MSXML2.XMLHTTP response different from IE document - vba

I want to know why in some cases the HTML in the response from MSXML2.XMLHTTP object does not produce the same results as automating Internet Explorer and inspecting the document property value.
For example, the procedure below compares the results found by using the object MSXML2.XMLHTTP (column A) with the results found by using the InternetExplorer object (column B).
The results of the InternetExplorer object include the NASDAQ index as expected but the results of the MSXML2.XMLHTTP object do not include the NASDAQ index and are completely different:
Sub ExtractDataFromInternet()
'Enable Tools/references 1. Microsoft Internet Control and 2. Microsoft HTML Object Library.
Dim URL
Dim objHTML As HTMLDocument
Dim Oelement As Object
Dim ie As New InternetExplorer
Dim J, Field1, Field2
Set objHTML = New HTMLDocument
URL = "http://www.nasdaq.com/"
'-------- METHOD1: MSXML2.XMLHTTP --------------------------
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
objHTML.body.innerHTML = .responseText
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 1) = Field1(J).innerText
J = J + 1
Next Oelement
'----------METHOD2: InternetExplorer Object----------------
Set ie = New InternetExplorer
With ie
.navigate URL
.Visible = False
While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Wend
Set objHTML = .document
DoEvents
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 2) = Field1(J).innerText
J = J + 1
Next Oelement
DoEvents
ie.Quit
DoEvents
Set ie = Nothing
'----------------------------------------------
End Sub

Related

How to scrape post div table content on VBA?

I am new to web-scraping. I am trying to get data from O-net online. To be specific, I would like to draw the median wages and employment stats for a few dozens selected jobs. For example:
https://www.onetonline.org/link/summary/13-2041.00
O-net content
Upon inspection, the html looks like this
Below is my code though it does not work.
Sub scrape()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Navigate "https://www.onetonline.org/link/summary/13-2041.00"
.Visible = False
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementByID("wrapper_WagesEmployment").getelementbytagname("table")(0)
Dim myValue As String: myValue = allRowOfData.Cells(3).innerHTML
appIE.Quit
Set appIE = Nothing
Range("A1").Value = myValue
End Sub
Please insert Option Explicit at the top of the module to help you identify which variable are not declared properly.
You spelled getelementbytagname method wrong, it's getElementsByTagName.
Try the below code:
Private Sub scrape()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "https://www.onetonline.org/link/summary/13-2041.00"
.Visible = False
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Dim ieDoc As Object
Set ieDoc = appIE.Document
Dim allRowOfData As Object
Set allRowOfData = ieDoc.getElementById("wrapper_WagesEmployment").getElementsByTagName("table")(0)
Dim medianWageValue As String
medianWageValue = allRowOfData.getElementsByClassName("report2")(0).innerText
Range("A1").Value = medianWageValue 'Fully qualify your range referece e.g. Sheet1.Range("A1").Value
Set ieDoc = Nothing
appIE.Quit
Set appIE = Nothing
End Sub
Alternative - You can do the same using XMLHTTP that doesn't require you to open IE and most of the time, faster (You will need to add Microsoft HTML Object Library in your reference):
Private Sub ScrapeByXMLHTTP()
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.XMLHTTP")
With xmlhttp
.Open "GET", "https://www.onetonline.org/link/summary/13-2041.00"
.send
Dim htmlDoc As MSHTML.HTMLDocument
Set htmlDoc = New MSHTML.HTMLDocument
htmlDoc.body.innerHTML = .responseText
End With
Set xmlhttp = Nothing
htmlDoc.body.innerHTML = htmlDoc.getElementById("wrapper_WagesEmployment").getElementsByTagName("table")(0).outerHTML
Dim medianWageValue As String
medianWageValue = htmlDoc.getElementsByClassName("report2")(0).innerText
Set htmlDoc = nothing
Range("A1").Value = medianWageValue 'Fully qualify your range referece e.g. Sheet1.Range("A1").Value
End Sub

VBA scraping with Excel

I am trying to scrape data from: http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017
IN connection with this I have two questions.
Michał Perłakowski has giving an excellent guide how to scrape but the code he is using is getElementById(Scraping data from website using vba). Since the webpage I want to scrape from does not use an ID. I am wondering what alternatives available. My guess would be getElementsByClassName.
My next question is how to make the macro change page (I have more than 100) can I just write "next"?
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
.Visible = True
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementsByName("Bygget")
Dim myValue As String: myValue = allRowOfData.Cells(7).innerHTML
appIE.Quit
Set appIE = Nothing
Range("A1").Value = myValue
Try this:
Option Explicit
Sub scrape()
Dim appIE As Object
Dim ihtml As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Visible = True
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set ihtml = .document
Dim allRowOfData As Object
Set allRowOfData = appIE.document.getElementById("searchresult")
Dim r As Long, c As Long
Dim curHTMLRow As Object
For r = 1 To allRowOfData.Rows.Length - 1
Set curHTMLRow = allRowOfData.Rows(r)
For c = 0 To curHTMLRow.Cells.Length - 1 'comment out
Cells(r + 1, c + 1) = curHTMLRow.Cells(c).innerText ' Cells(r + 1, c + 1) = curHTMLRow.Cells(7).innerText
Next c 'comment out
Next r
.Quit
End With
Set appIE = Nothing
End Sub
Just the column of interest rather than whole table:
Option Explicit
Sub scrape()
Dim appIE As Object
Dim ihtml As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Visible = True
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set ihtml = .document
Dim allRowOfData As Object
Set allRowOfData = appIE.document.getElementById("searchresult")
Dim r As Long, c As Long
Dim curHTMLRow As Object
For r = 1 To allRowOfData.Rows.Length - 1
Set curHTMLRow = allRowOfData.Rows(r)
Cells(r + 1, c + 1) = curHTMLRow.Cells(7).innerText
Next r
.Quit
End With
Set appIE = Nothing
End Sub
Reference:
https://www.experts-exchange.com/questions/28571716/Excel-VBA-WEb-Data-Scraping-from-a-Table.html

Error While using Macro to add google's first image link to excel

I'm using the below Code to input Google's first images link in B1
for certain values in A1.
Public Sub Test()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim imgElements As IHTMLElementCollection
Dim imgElement As HTMLImg
Dim aElement As HTMLAnchorElement
Dim N As Integer, I As Integer
Dim Url As String, Url2 As String
Dim LastRow As Long
Dim m, sImageSearchString
LastRow = Range("A" & Rows.Count).End(xlUp).Row
For I = 1 To LastRow
Url = "http://www.google.co.in/search?q=" & Cells(I, 1) & "&source=lnms&tbm=isch&sa=X&rnd=1"
Set IE = New InternetExplorer
With IE
.Visible = False
.Navigate Url 'sWebSiteURL
Do Until .readyState = 4: DoEvents: Loop
'Do Until IE.document.readyState = "Complete": DoEvents: Loop
Set HTMLdoc = .document
Set imgElements = HTMLdoc.getElementsByTagName("IMG")
N = 1
For Each imgElement In imgElements
If InStr(imgElement.src, sImageSearchString) Then
If imgElement.ParentNode.nodeName = "A" Then
Set aElement = imgElement.ParentNode
Url2 = imgElement.src
N = N + 1
End If
End If
Next
Cells(I, 2) = Url2
IE.Quit
Set IE = Nothing
End With
Next
End Sub
however I'm receiving the below error, can you please advise?
I'm using Windows 10, Excel 365
In VBA Menu - Tools - References - tick MS Internet Controls.
Or
Using Late Binding
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")

How can I import data from an ambiguous URL?

This is almost working!! But not quite!
If link.innerHTML Like "*Upload Questionnaire*" Then
link.Click
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Navigate strURL
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", strSQL, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.responseText
Set tbl = html.getElementsByTagName("Table")
Set tr_coll = tbl(0).getElementsByTagName("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getElementsByTagName("TD")
For Each td In td_col
Cells(row + 1, j).Value = td.innerText
j = j + 1
Next
row = row + 1
Next
End If
For one thing, the code doesn't pause and wait for the browser to finish loading.
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Also, I'm feeding in a Parent URL, like this:
strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
The line below doesn't work.
XMLHTTP.Open "GET", strSQL, False
Somehow I need to pass in the Child URL that opens from the Parent URL. This is the parent URL: strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
When: link.Click
runs then the Child URL opens, but I don't know how to reference the Child URL. How can I do that?!
Thanks in advance!
Assuming that the link is contained within the class, rather than looping through all a elements within the document itself, use getElementsByClassName() to grab ever element in your target class, and then loop through those.
Something like:
For each classElement in IE.document.getElementsByClassName("thClass")
For each linkElement in classElement.getElementsByTagName("a")
'check for innerHTML, etc.
Next
Next
If the class is within the link, you can just flip the loops.
This should work, for example:
Public Sub LookADemo()
Dim ie As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Set ie = New InternetExplorer
ie.navigate "https://www.google.com"
Do
DoEvents
Loop Until (ie.ReadyState >= READYSTATE_INTERACTIVE) And (ie.Busy = False)
Set doc = ie.Document
For Each thisClass In doc.getElementsByClassName("hp")
For Each thisLink In thisClass.getElementsByTagName("a")
Debug.Print thisLink.InnerText
Next
Next
ie.Quit
End Sub

VBA Script pull data from website

I want to pull the data from http://www.buyshedsdirect.co.uk/ to get the most recent prices of specific items.
I have an excel spreadsheet with the following:
|A | B
1 |Item |Price
2 |bfd/garden-structures/arches/premier-arches-pergola
and the VBA script:
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
On Error Resume Next
output = doc.getElementByClass("NowValue").innerText
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
i am new to VBA scripting and have no idea why it isnt pulling the value form the class "NowValue"
Any help would be appreciated :)
The On Error Resume Next line is stopping an error message from being displayed. That error message would be that there is no method on HTMLDocument called "getElementByClass". You probably want "getElementsByClassName" instead and will have to handle the fact that this returns a collection rather than a single element. Code like this would work:
Option Explicit
Sub foo()
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Dim results As IHTMLElementCollection
Dim result As IHTMLElement
Dim output As String
Set results = doc.getElementsByClassName("NowValue")
output = ""
For Each result In results
output = output & result.innerText
Next result
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
You would then find that there are multiple elements with class "NowValue" on that page. It looks as though the one you want might be enclosed in a div called "VariantPrice" so this code should work:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim item As String
item = Sheet1.Range("A2").Value
Dim doc As HTMLDocument
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Dim results As IHTMLElementCollection
Dim results2 As IHTMLElementCollection
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim output As String
Set results = doc.getElementsByClassName("VariantPrice")
output = ""
For Each result In results
Set results2 = result.getElementsByClassName("NowValue")
For Each result2 In results2
output = output & result2.innerText
Next result2
Next result
Sheet1.Range("B2").Value = output
ie.Quit
End Sub
edit: as the code above works perfectly for me but fails to work for the question asker, it may be the case that they are using an older version of Internet Explorer which does not support getElementsByClassName. It may be the case that using querySelector will work instead. To be certain, go to this QuirksMode page to determine exactly what your browser supports.
New code using querySelector:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim doc As HTMLDocument
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim item As String
item = Sheet1.Range("A2").Value
ie.Visible = True
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Set result = doc.querySelector(".VariantPrice")
Set result2 = result.querySelector(".NowValue")
Sheet1.Range("B2").Value = result2.innerText
ie.Quit
End Sub
further edit: to make the macro loop through all of the entries in column A, here are the relevant bits to add or change:
Option Explicit
Sub bar()
Dim ie As New InternetExplorer
Dim doc As HTMLDocument
Dim result As IHTMLElement
Dim result2 As IHTMLElement
Dim item As String
Dim lRow As Long
ie.Visible = True
lRow = 2
item = Sheet1.Range("A" & lRow).Value
Do Until item = ""
ie.navigate "http://www.buyshedsdirect.co.uk/" & item
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Set result = doc.querySelector(".VariantPrice")
Set result2 = result.querySelector(".NowValue")
Sheet1.Range("B" & lRow).Value = result2.innerText
lRow = lRow + 1
item = Sheet1.Range("A" & lRow).Value
Loop
ie.Quit
End Sub