Web scraping with Excel and VBA - vba

I am trying to extract the team names but i get the "Run time error 424 Object required" on this line
Set lists = html.getElementsByClassName("KambiBC-event-item__participants-container")
, if anyone could point me in the right direction it would be nice.
Sub useClassnames()
Dim lists As IHTMLElementCollection
Dim anchorElements As IHTMLElementCollection
Dim ulElement As HTMLUListElement
Dim liElement As HTMLLIElement
Dim row As Long
Dim ie As InternetExplorer
Set ie = New InternetExplorer
With ie.navigate "https://www.unibet.ro/betting#filter/all/all/all/all/in-play"
.Visible = True
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
End With
Set lists = html.getElementsByClassName("KambiBC-event-item__participants-container")
row = 1
For Each ulElement In lists
For Each liElement In ulElement.getElementsByClassName("KambiBC-event-participants")
Set anchorElements = liElement.getElementsByClassName("KambiBC-event-participants__name")
If anchorElements.Length > 0 Then
Cells(row, 1) = anchorElements.Item(0).innerText
row = row + 1
End If
Next liElement
Next ulElement
End Sub

Error 424 is an Object Required Error. What is Html in Set lists = html.getElementsByClassName("KambiBC-event-item__participants-container")?
What you need is
Set lists = ie.document.getElementsByClassName("KambiBC-event-item__participants-container")
Code that I used
Sub useClassnames()
Dim lists As IHTMLElementCollection
Dim anchorElements As IHTMLElementCollection
Dim ulElement As HTMLUListElement
Dim liElement As HTMLLIElement
Dim row As Long
Dim ie As InternetExplorer
Set ie = New InternetExplorer
With ie
.navigate "https://www.unibet.ro/betting#filter/all/all/all/all/in-play"
.Visible = True
Do While ie.readyState <> READYSTATE_COMPLETE
DoEvents
Loop
End With
Set lists = ie.document.getElementsByClassName("KambiBC-event-item__participants-container")
row = 1
For Each ulElement In lists
For Each liElement In ulElement.getElementsByClassName("KambiBC-event-participants")
Set anchorElements = liElement.getElementsByClassName("KambiBC-event-participants__name")
If anchorElements.Length > 0 Then
Cells(row, 1) = anchorElements.Item(0).innerText
row = row + 1
End If
Next liElement
Next ulElement
End Sub
Screenshot

Related

VBA WEB scraping loop

This is the code I wrote I need it to loop when it copy and paste the table and then click the next button till there no more next button , the next button is a tagname "a" and it has a "href" that I used to click the hyper link
Any help would be appreciated thanks
Sub GetData()
'define variables
Dim i As SHDocVw.InternetExplorer
Set i = New InternetExplorer
i.Visible = True
Dim IE As Object, obj As Object
Dim r As Long, c As Long, t As Long
Dim elemCollection As Object
Dim eRow As Long
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLAs As MSHTML.IHTMLElementCollection
Dim HTMLA As MSHTML.IHTMLElement
url_name = Sheet2.Range("e4")
If url_name = "" Then Exit Sub
i.Visible = True
i.navigate (url_name)
Do While i.readyState <> READYSTATE_COMPLETE
Loop
'we ensure that the web page is downloaded completely
ThisWorkbook.Sheets("Sheet1").Range("a2:ai1000").ClearContents
Set elemCollection = i.document.getElementsByTagName("TABLE")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
eRow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(eRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
Next t
Range("a1:ai1000").Columns.AutoFit
'Clean up Memory
Set IE = Nothing
Set HTMLDoc = i.document
Set HTMLAs = HTMLDoc.getElementsByTagName("a")
For Each HTMLA In HTMLAs
'Debug.Print HTMLA.getAttribute("classname"), HTMLA.getAttribute("href")
If HTMLA.getAttribute("classname") = "button2 next" And HTMLA.getAttribute("href") = "https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td&match=game&season_start=1&year_max=2020&qb_gwd=0&qb_comeback=0&season_end=-1&game_type=R&age_max=99&year_min=2020&offset=100" Then
HTMLA.Click
Exit For
End If
Next
End Sub

Error While using Macro to add google's first image link to excel

I'm using the below Code to input Google's first images link in B1
for certain values in A1.
Public Sub Test()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim imgElements As IHTMLElementCollection
Dim imgElement As HTMLImg
Dim aElement As HTMLAnchorElement
Dim N As Integer, I As Integer
Dim Url As String, Url2 As String
Dim LastRow As Long
Dim m, sImageSearchString
LastRow = Range("A" & Rows.Count).End(xlUp).Row
For I = 1 To LastRow
Url = "http://www.google.co.in/search?q=" & Cells(I, 1) & "&source=lnms&tbm=isch&sa=X&rnd=1"
Set IE = New InternetExplorer
With IE
.Visible = False
.Navigate Url 'sWebSiteURL
Do Until .readyState = 4: DoEvents: Loop
'Do Until IE.document.readyState = "Complete": DoEvents: Loop
Set HTMLdoc = .document
Set imgElements = HTMLdoc.getElementsByTagName("IMG")
N = 1
For Each imgElement In imgElements
If InStr(imgElement.src, sImageSearchString) Then
If imgElement.ParentNode.nodeName = "A" Then
Set aElement = imgElement.ParentNode
Url2 = imgElement.src
N = N + 1
End If
End If
Next
Cells(I, 2) = Url2
IE.Quit
Set IE = Nothing
End With
Next
End Sub
however I'm receiving the below error, can you please advise?
I'm using Windows 10, Excel 365
In VBA Menu - Tools - References - tick MS Internet Controls.
Or
Using Late Binding
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")

How to import data from multiple pages?

I have a small piece of code that lists links in a site.
Sub ListLinks()
'Set a reference to microsoft Internet Controls
Dim IeApp As InternetExplorer
Dim sURL As String
Dim IeDoc As Object
Dim i As Long
Set IeApp = New InternetExplorer
IeApp.Visible = True
sURL = "http://www.sharenet.co.za/v3/q_sharelookup.php"
IeApp.Navigate sURL
Do
Loop Until IeApp.ReadyState = READYSTATE_COMPLETE
Set IeDoc = IeApp.Document
For i = 0 To IeDoc.Links.Length - 1
Cells(i + 1, 1).Value = IeDoc.Links(i).href
Next i
Set IeApp = Nothing
End Sub
That is very useful for listing all links in a site. How can I loop through these URLs and import data from each one?
For instance, the first link under 'Name or Sector' is this:
http://www.sharenet.co.za/v3/sharesfound.php?ssector=0533&exch=JSE&bookmark=Oil & Gas&scheme=default
There is actually nothing to import from there. The next link has some data:
http://www.sharenet.co.za/v3/sharesfound.php?ssector=0537&exch=JSE&bookmark=Oil%20-%20Integrated&scheme=default
The data from there looks like this:
Name Full Name Code Sector
SACOIL-N Sacoil Holdings Ltd NPL SCLN 0537
ERIN Erin Energy Corporation ERN 0537
BEE-SASOL BEE - SASOL LIMITED SOLBE1 0537
SACOIL SACOIL HOLDINGS LD SCL 0537
OANDO OANDO PLC OAO 0537
OANDORIGT OANDO PLC RIGT OAON 0537
MONTAUK Montauk Holdings Ltd MNK 0537
How can I import that data from each link?
This seems to work pretty well. It may need a bit of fine tuning, but this should be pretty darn close.
Sub ListLinks()
'Set a reference to microsoft Internet Controls
Dim IeApp As InternetExplorer
Dim sURL As String
Dim IeDoc As Object
Dim i As Long
Set IeApp = New InternetExplorer
IeApp.Visible = True
sURL = "http://www.sharenet.co.za/v3/q_sharelookup.php"
IeApp.Navigate sURL
Do
Loop Until IeApp.ReadyState = READYSTATE_COMPLETE
Set IeDoc = IeApp.Document
For i = 0 To IeDoc.Links.Length - 1
Cells(i + 1, 1).Value = IeDoc.Links(i).href
Next i
Set IeApp = Nothing
Call CopyFromURL
End Sub
Public Sub CopyFromURL()
Dim IE As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Dim rng As Range, cell As Range
Const READYSTATE_COMPLETE As Integer = 4
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
row = 1
Set rng = Range("A1:A5")
For Each cell In rng
Set IE = CreateObject("InternetExplorer.Application")
IE.Navigate cell
Do While (IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE)
DoEvents
Loop
Set TR_col = IE.Document.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
col = 2
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 2
row = row + 1
Next
Next cell
IE.Quit
End Sub

VBA to Excel how do i get data name and phone numbers instead of hyperlinks?

Sub GetAllLinks()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
url_name = "http://www.trovanumeri.com/?azione=cerca&cerca=portoscuso"
If url_name = "" Then Exit Sub
IE.navigate url_name
Do
DoEvents
Loop Until IE.readyState = 4
'<a href="http://google.com">Click Here
Set AllHyperLinks = IE.document.getElementsByTagName("A")
Foglio1.ListBox1.Clear
'this is the code
For Each hyper_link In AllHyperLinks
Foglio1.ListBox1.AddItem hyper_link
Next
MsgBox "Done!"
End Sub
Take a look at the below example, showing how to retrieve the data from via IE automation and DOM processing:
Option Explicit
Sub GetData()
Dim oIE As Object
Dim lCurRow As Long
Dim lResultIndex As Long
Dim sUrl As String
Dim oTable0 As Object
Dim oTable1 As Object
Dim oTable2 As Object
Dim oTable3 As Object
Dim sTbl3Text As String
Dim cAncorNodes As Object
Dim oAncorNode As Variant
Dim sRowText As String
Dim oRowNode As Object
Dim aData() As Variant
Set oIE = CreateObject("InternetExplorer.Application")
With oIE
.Visible = True
Sheets(1).Cells.Delete
lCurRow = 1 ' Worksheet rows counter
lResultIndex = 0 ' Search result index counter
Do
' Navigate to the page
sUrl = "http://www.trovanumeri.com/?azione=cerca&cerca=cagliari&da=" & lResultIndex
.Navigate sUrl
' Wait IE
Do While .ReadyState < 3 Or .Busy
DoEvents
Loop
' Wait Document
Do Until .Document.ReadyState = "complete"
DoEvents
Loop
' Retrieve target tables
Set oTable0 = .Document.getElementsByTagName("table")(0)
Set oTable1 = oTable0.getElementsByTagName("table")(1)
Set oTable2 = oTable1.getElementsByTagName("table")(3)
' Get and process ancor nodes
Set cAncorNodes = oTable2.getElementsByTagName("a")
For Each oAncorNode In cAncorNodes
With CreateObject("Scripting.Dictionary")
' Add .href to result
.Add .Count, oAncorNode.href
' Get ancor's parent row
Set oRowNode = oAncorNode.ParentNode.ParentNode.ParentNode
Do
' Add nonemtpy row to result
sRowText = Trim(Replace(oRowNode.innerText, vbCrLf, ""))
If sRowText <> "" Then .Add .Count, sRowText
' If last row then exit
If IsNull(oRowNode.nextElementSibling) Then Exit Do
' Proceed with next row
Set oRowNode = oRowNode.nextElementSibling
' If net row contains oAncorNode then exit
If oRowNode.getElementsByTagName("a").Length > 0 Then Exit Do
DoEvents
Loop
' Get results as array
aData = .Items
End With
' Output array to worksheet row
With Sheets(1).Cells(lCurRow, 1)
.Resize(1, UBound(aData) + 1) = aData
.Select
End With
lCurRow = lCurRow + 1
DoEvents
Next
' Get table containing 'Next' button
Set oTable3 = oTable0.getElementsByTagName("table")(7)
sTbl3Text = oTable3.innerText
' If no 'Next' button then exit
If InStr(sTbl3Text, "Avanti >>") = 0 Then Exit Do
lResultIndex = lResultIndex + 10
DoEvents
Loop
.Quit
End With
End Sub

VBA Script pull data from website

I want to pull the data from http://www.buyshedsdirect.co.uk/ to get the most recent prices of specific items.
I have an excel spreadsheet with the following:
|A | B
1 |Item |Price
2 |bfd/garden-structures/arches/premier-arches-pergola
and the VBA script:
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
On Error Resume Next
output = doc.getElementByClass("NowValue").innerText
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
i am new to VBA scripting and have no idea why it isnt pulling the value form the class "NowValue"
Any help would be appreciated :)
The On Error Resume Next line is stopping an error message from being displayed. That error message would be that there is no method on HTMLDocument called "getElementByClass". You probably want "getElementsByClassName" instead and will have to handle the fact that this returns a collection rather than a single element. Code like this would work:
Option Explicit
Sub foo()
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Dim results As IHTMLElementCollection
Dim result As IHTMLElement
Dim output As String
Set results = doc.getElementsByClassName("NowValue")
output = ""
For Each result In results
output = output & result.innerText
Next result
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
You would then find that there are multiple elements with class "NowValue" on that page. It looks as though the one you want might be enclosed in a div called "VariantPrice" so this code should work:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Dim results As IHTMLElementCollection
Dim results2 As IHTMLElementCollection
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim output As String
Set results = doc.getElementsByClassName("VariantPrice")
output = ""
For Each result In results
Set results2 = result.getElementsByClassName("NowValue")
For Each result2 In results2
output = output & result2.innerText
Next result2
Next result
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
edit: as the code above works perfectly for me but fails to work for the question asker, it may be the case that they are using an older version of Internet Explorer which does not support getElementsByClassName. It may be the case that using querySelector will work instead. To be certain, go to this QuirksMode page to determine exactly what your browser supports.
New code using querySelector:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim doc As HTMLDocument
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim item As String
item = Sheet1.Range("A2").Value
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Set result = doc.querySelector(".VariantPrice")
Set result2 = result.querySelector(".NowValue")
Sheet1.Range("B2").Value = result2.innerText
ie.Quit
End Sub
further edit: to make the macro loop through all of the entries in column A, here are the relevant bits to add or change:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim doc As HTMLDocument
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim item As String
Dim lRow As Long
ie.Visible = True
lRow = 2
item = Sheet1.Range("A" & lRow).Value
Do Until item = ""
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Set result = doc.querySelector(".VariantPrice")
Set result2 = result.querySelector(".NowValue")
Sheet1.Range("B" & lRow).Value = result2.innerText
lRow = lRow + 1
item = Sheet1.Range("A" & lRow).Value
Loop
ie.Quit
End Sub