Object required Error when error handling - vba

The following code retrieves 'dossier Urls' for substances in Column A by scraping the ECHA website. I'm trying to error handle cases where a substance Url cannot be found.
I can't quite see why the following code fails. I have highlighted the Problematic line with a comment. this is highlighted in debugging as an Object Required error but I can't see where I'm going wrong.
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
'PROBLEMATIC CODE
If oHTML.querySelector(".details").getAttribute("href") Is Error Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
For this code to run values must be present in column A. Acetone and Benzene can be used respectively to test 2 rows. To test out the error handling Enter something made up like Benzenjaj.
I think this is a quick fix. Just can't see it.
Update:
Tetsing on made up substance name:
First 2 results are found as normal but the made-up chemical causes the following error:
Code:
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error GoTo 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function

You can just ignore any error:
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error Goto 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"

You can test the .Length of querySelectorAll when looking for the particular registered dossier url. You need to amend your code elsewhere to handle the "-" return url. I prefer Tim's solution in terms of ignoring error however for the first bit.
Option Explicit
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
If Left$(url, 5) = "https" Then
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Else
rw.Cells(5).Resize(1, 3).Value = url
End If
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
If oHTML.querySelectorAll("[href*=registered-dossier]").Length = 0 Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function

Related

VBA webscraping - can't get innerText from class

i need to get copper price from website, but I get {{price}} instead of value.
What I'm doing incorrectly?
Thanks for help!
Option Explicit
Sub Macro1()
Const URL As String _
= "https://www.lme.com/"
Const ClassName As String _
= "metal-block__price"
Dim WhrResponseText As String
WhrResponseText = GetWhrResponseText(URL)
If Len(WhrResponseText) = 0 Then
MsgBox "Could not get a response.", vbExclamation
Exit Sub
End If
Dim Elements As Object
With CreateObject("htmlfile")
.body.innerHTML = WhrResponseText
Set Elements = .getElementsByClassName(ClassName)
End With
Dim Result As Variant
With Elements
If .Length > 0 Then
Result = .Item(0).innerText
MsgBox Result
Else
MsgBox "Nothing found."
End If
End With
Dim i As Long
Dim Element As Object
For Each Element In Elements
Debug.Print i, Element.innerText
i = i + 1
Next Element
End Sub
Function GetWhrResponseText( _
ByVal URL As String) _
As String
Const ProcName As String = "GetWhrResponseText"
On Error GoTo ClearError
With CreateObject("WinHttp.WinHttpRequest.5.1")
.Open "GET", URL, False
.send
GetWhrResponseText = StrConv(.responseBody, vbUnicode)
End With
ProcExit:
Exit Function
ClearError:
Debug.Print "'" & ProcName & "' Run-time error '" _
& Err.Number & "':" & vbLf & " " & Err.Description
Resume ProcExit
End Function
Expected clarification why it works on other websites but not on this one.
This is one of the ways you can scrape prices from inner pages after you have traversed the links attached to the price available on the landing page.
Option Explicit
Sub CollectPrice()
Const startUrl$ = "https://www.lme.com/"
Dim oHttp As Object, innerLink$, price$
Dim Html As HTMLDocument, sBase$, I&
sBase = "https://www.lme.com"
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set Html = New HTMLDocument
With oHttp
.Open "GET", startUrl, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
.send
While .readyState < 4: DoEvents: Wend
Html.body.innerHTML = .responseText
With Html.querySelectorAll("a.metal-block")
For I = 0 To .Length - 1
innerLink = sBase & .Item(I).getAttribute("href")
With oHttp
.Open "GET", innerLink, True
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
.send
While .readyState < 4: DoEvents: Wend
Html.body.innerHTML = .responseText
price = Html.querySelector("span.hero-metal-data__number").innerText
Debug.Print price
End With
Next I
End With
End With
End Sub
Make sure to add this reference, Microsoft HTML Object Library to the library in order for the script to work.

Word VBA Rest API Simplybook.me Getting 401 Unauthorized

Below code is where I get 401 Unautorized access error After 2nd request Rest API is called .
Here is the developer Rest API documentation supplied.
https://simplybook.me/en/api/developer-api/tab/rest_api#method_GET_/admin/bookings/{id}
Sub Test()
Dim objRequest As Object
Dim loginResponse As Object
Dim strUrl As String
Dim bookingcode As String
Dim blnAsync As Boolean
Dim strloginResponse, strBookingresponse, strlogoutresponse, strparameter As String
Dim scriptControl As Object
Set scriptControl = CreateObject("MSScriptControl.ScriptControl")
scriptControl.Language = "JScript"
Set objRequest = CreateObject("MSXML2.XMLHTTP")
'strUrl = "https://user-api.simplybook.me/admin/auth"
strUrl = "https://user-api-v2.simplybook.me/admin/auth"
blnAsync = True
'strUrl = "https://user-api.simplybook.me/admin/auth"
strUrl = "https://user-api-v2.simplybook.me/admin/auth"
With objRequest
.Open "POST", strUrl, blnAsync
.SetRequestHeader "Content-Type", "application/json"
.Send ("{""company"":""simplydemo"",""login"":""admin"",""password"":""demo""}")
'spin wheels whilst waiting for response
While objRequest.readyState <> 4
DoEvents
Wend
strloginResponse = .responsetext
End With
Set loginResponse = scriptControl.Eval("(" + strloginResponse + ")")
Debug.Print loginResponse.token
Debug.Print loginResponse.company
Debug.Print loginResponse.login
Debug.Print loginResponse.refresh_token
Debug.Print loginResponse.domain
Debug.Print loginResponse.require2fa
Debug.Print loginResponse.allowed2fa_providers
Debug.Print loginResponse.auth_session_id
bookingcode = "0bz29vl4t"
strUrl = "https://user-api-v2.simplybook.me/admin/bookings/" & bookingcode
'strUrl = "https://user-api.simplybook.me/admin/bookings/" & bookingcode
strparameter = "(""{""X-Company-Login"":""" & loginResponse.company & """,""X-Token"":""" & loginResponse.token & """}"")"
Debug.Print strparameter
Set objRequest = CreateObject("MSXML2.XMLHTTP")
With objRequest
.Open "GET", strUrl, blnAsync
.SetRequestHeader "Content-Type", "application/json"
.Send strparameter
'spin wheels whilst waiting for response
While objRequest.readyState <> 4
DoEvents
Wend
strBookingresponse = .responsetext
End With
Debug.Print strBookingresponse
Set objRequest = CreateObject("MSXML2.XMLHTTP")
strUrl = "https://user-api-v2.simplybook.me/admin/auth/logout"
strparameter = "(""{""auth_token"":""" & loginResponse.token & """}"")"
With objRequest
.Open "POST", strUrl, blnAsync
.SetRequestHeader "Content-Type", "application/json"
.Send strparameter
'spin wheels whilst waiting for response
While objRequest.readyState <> 4
DoEvents
Wend
strlogoutresponse = .responsetext
End With
Debug.Print strBookingresponse
Set scriptControl = Nothing
Set objRequest = Nothing
Set loginResponse = Nothing
End Sub
Below response is i get for the Debug.print statements:
4eb8b7f6ce820c24e66a754557635ede02e6e808ce27e311511a20bef4ed44e3
simplydemo
admin
b60ee1e89264ee5e856583fc20e63926e71a25bded9f9a540d12d5ccbf51bf51
simplybook.me
False
("{"X-Company-Login":"simplydemo","X-Token":"4eb8b7f6ce820c24e66a754557635ede02e6e808ce27e311511a20bef4ed44e3"}")
{"code":401,"message":"Unauthorized","data":[],"message_data":[]}
{"code":401,"message":"Unauthorized","data":[],"message_data":[]}

Recording number of search results from Google

I have an some text in column B and I am trying to record the number of Google search results in column C. The search will give me this page below and I will need the number of search results from here:
Google search
The code I am using is this:
Sub HawkishSearch()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("B" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 2) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
If html.getElementById("resultStats") Is Nothing Then
str_text = "0 Results"
Else
str_text = html.getElementById("resultStats").innerText
End If
Cells(i, 3) = str_text
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
There are two problems with this code.
Firstly, I get the whole result "About xxxx results (x.xx seconds)". I need just the number of search results. I tried using:
Cells(i, 3) = Mid(str_text, 7, Len(str_text) - 30)
but it will give me an error in case there's for example 0 results.
Secondly, and most importantly, I am getting the error at XMLHTTP.send:
change
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
to
Set XMLHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")

specific information from google search result

I use the following code to collect the first URL from a Google search. Is there a way to edit the code so that it picks up just the the text located right after the green URL in the Google search results?
Each search result contains 4 lines of information:
header
URL in green
text1
text2
I want to collect the single line of text which is shown after the green URL.
Is this possible?
Sub XMLHTTP()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("A" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
On Error Resume Next
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
Set objH3 = objResultDiv.getelementsbytagname("H3")(0)
Set link = objH3.getelementsbytagname("a")(0)
str_text = Replace(link.innerHTML, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
Cells(i, 2) = str_text
Cells(i, 3) = link.href
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
It looks to me like the text is within a <span class="st">, so this should do the trick:
Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText
Dim e
For Each e In HTML.getElementsByTagName("span")
If e.className = "st" Then
Debug.Print e.innerText
Exit For
End If
Next
Edit: Showing complete script:
Dim XMLHTTP
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", "https://www.google.co.in/search?q=test", False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Dim HTML
Set HTML = CreateObject("htmlfile")
HTML.body.innerHTML = XMLHTTP.ResponseText
Dim e
For Each e In HTML.getElementsByTagName("span")
If e.className = "st" Then
Debug.Print e.innerText
Exit For
End If
Next
Output
Test your Internet connection bandwidth to locations around the world with this interactive broadband speed test from Ookla.

Error on getting the URL result of a Google search.

I am new to VBA, and I figured that trying to code is the best way to code. Anyway, I am trying to code a macro that will get first URL of a Google search result, but I'm getting error Object variable or With block variable not set when search result is 0, and the remaining operations are skipped. Here's the error image:
Here is the code I used:
Sub XMLHTTP()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object, objResultDiv As Object, objH3 As Object, link As Object
Dim start_time As Date
Dim end_time As Date
lastRow = Range("A" & Rows.Count).End(xlUp).Row
Dim cookie As String
Dim result_cookie As String
start_time = Time
Debug.Print "start_time:" & start_time
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 1) & "&rnd=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.serverXMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:25.0) Gecko/20100101 Firefox/25.0"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
Set objH3 = objResultDiv.getElementsByTagName("H3")(0)
Set link = objH3.getElementsByTagName("a")(0)
str_text = Replace(link.innerHTML, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
Cells(i, 2) = str_text
Cells(i, 3) = link.href
DoEvents
Next
end_time = Time
Debug.Print "end_time:" & end_time
Debug.Print "done" & "Time taken : " & DateDiff("n", start_time, end_time)
MsgBox "done" & "Time taken : " & DateDiff("n", start_time, end_time)
End Sub
Can anyone help me please?
In the zero result case, H3 is empty so modify your code like this to handle this case
Set html = CreateObject("htmlfile")
html.body.innerhtml = XMLHTTP.ResponseText
Set objResultDiv = html.getelementbyid("rso")
**numb_H3 = objResultDiv.getElementsByTagName("H3").Length**
**If numb_H3 > 0 Then**
Set objH3 = objResultDiv.getElementsByTagName("H3")(0)
Set link = objH3.getElementsByTagName("a")(0)
str_text = Replace(link.innerhtml, "<EM>", "")
str_text = Replace(str_text, "</EM>", "")
Cells(i, 2) = str_text
Cells(i, 3) = link.href
**Else**
**End If**
DoEvents
Next
Here is simplified code for the same method.
Sub xmlHttp()
Dim url As String,
lastRow As Long,
XMLHTTP As Object,
html As Object,
objResultDiv As Object,
objH3 As Object,
link As Object
lastRow = Range("A" & Rows.Count).End(xlUp).Row
For i = 2 To lastRow
url = "https://www.google.co.in/search?q=" & Cells(i, 1)
Set xmlHttp = CreateObject("MSXML2.XMLHTTP")
xmlHttp.Open "GET", URL, False
xmlHttp.setRequestHeader "Content-Type", "text/xml"
xmlHttp.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = xmlHttp.ResponseText
Set objResultDiv = html.getelementbyid("rso")
numb_H3 = objResultDiv.getElementsByTagName("H3").Length
If numb_H3 > 0 Then
Set objH3 = objResultDiv.getElementsByTagName("H3")(0)
Set link = objH3.getElementsByTagName("a")(0)
Range(i, 2) = link
Else
End If
DoEvents
Next
End Sub
One simple workaround--though not the best--is to skip the error.
Try the following modification:
start_time = Time
Debug.Print "start_time:" & start_time
On Error Resume Next '--Add this part.
For i = 2 To lastRow
Other options include a true error handling part, something that returns a value when your search returns nothing.
Let us know if this helps.