URL check in VBA error - vba

I have code like below. And I receive error
"run-time error '-2146697211 *800c0005)'': the system cannot locate the resource specified"
I do not know how to solve it thanks in advance for any help. Line in which error is handled is httpRequest.send
Function pullSomeSite(urlcheck As String) As Boolean
Dim httpRequest As xmlhttp
Set httpRequest = New xmlhttp
Dim URL As String
URL = urlcheck
With httpRequest
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send !!!!!!here code stops!!!!!!!
End With
With httpRequest
While Not .readyState = 4 '<---------- wait
Application.Wait Now + TimeValue("0:00:01")
Wend
'Debug.Print .Status
If .Status = 200 Then
While InStr(1, .responseText, "Updating", 0) > 0 '<---------- wait again
Application.Wait Now + TimeValue("0:00:01")
Wend
pullSomeSite = True
Else
pullSomeSite = False
End If
End With
End Function

Test this :
Sub Test_URLExists()
Dim url As String
url = "http://stackoverflow.com/questions/33940044/url-check-in-vba-error"
MsgBox url, vbInformation, URLExists(url)
url = "http://stackoverflow.com/questions/12345678/url-check-in-vba-error"
MsgBox url, vbInformation, URLExists(url)
End Sub
here is how to test an URL with a function :
Function URLExists(url As String) As Boolean
Dim Request As Object
Dim ff As Integer
Dim rc As Variant
URLExists = False
On Error GoTo EndNow
Set Request = CreateObject("WinHttp.WinHttpRequest.5.1")
With Request
.Open "GET", url, False
.Send
rc = .StatusText
End With
Set Request = Nothing
If rc = "OK" Then URLExists = True
Exit Function
EndNow:
End Function

Instead of xmlhttp data type use object.
Use the below code.
. You need to give input as "http://google.com"
Sub test1()
a = pullSomeSite("http://www.flipkart.com")
MsgBox a
End Sub
Function pullSomeSite(urlcheck As String) As Boolean
Dim httpRequest As Object
Set httpRequest = CreateObject("MSXML2.XMLHTTP")
'Set httpRequest = New xmlhttp
Dim URL As String
URL = urlcheck
With httpRequest
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send '!!!!!!here code stops!!!!!!!
End With
With httpRequest
While Not .readyState = 4 '<---------- wait
Application.Wait Now + TimeValue("0:00:01")
Wend
'Debug.Print .Status
If .Status = 200 Then
While InStr(1, .responseText, "Updating", 0) > 0 '<---------- wait again
Application.Wait Now + TimeValue("0:00:01")
Wend
pullSomeSite = True
Else
pullSomeSite = False
End If
End With
End Function

Related

Get picture form web site

Please help to give some hints for if i want to get a picture from a web site. Some code has been write but cannot get the right picture.
Sub get_pic()
With CreateObject("InternetExplorer.Application")
.Visible = True
.Navigate "https://igtb.bochk.com/index_tc.html"
Do While .busy
DoEvents
Loop
Do Until .readystate = 4
DoEvents
Sleep 2000
Loop
Set aaa = .document.all.tags("iframe")(1).contentwindow.document.all.tags("img")
For j = 0 To aaa.Length - 1
With CreateObject("MSXML2.XMLHTTP.6.0")
[a1] = aaa(j).href
.Open "GET", aaa(j).href, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
strFilename = "C:\test.png"
ByteToFile .responsebody, strFilename
End With
Exit For
Next
End With
End Sub
Function ByteToFile(arrByte, strFilename)
With CreateObject("Adodb.Stream")
.Type = 1 'adTypeBinary
.Open
.Write arrByte
.SaveToFile strFilename, 2 'adSaveCreateOverWrite
.Close
End With
End Function
i know that will be change when you open any new request but is that any way to get the original picture when you open that page?

Can't make a script wait asynchronously for a certain time for the titles to parse before going for the next url

I'm trying to create a script in vba using ServerXMLHTTP60 to parse the title of the first post from some identical links. My main goal here is to make the script asynchronous along with setting a highest time up to which the script will try before going for the next url.
However, the macro that I've created always goes for the next url when there is a timeout without being able to scrape the titles from the links.
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date, sResp As Boolean
Urllist = Array( _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=1", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=2", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=3", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=4", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=5" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setTimeouts 5000, 5000, 15000, 15000
.send
t = Now + TimeValue("00:00:10")
sResp = False
On Error Resume Next
Do
If .readyState = 4 Then sResp = True: Exit Do
If Now > t Then sResp = False: Exit Do
DoEvents
Loop
On Error GoTo 0
If sResp Then
HTML.body.innerHTML = .responseText
Debug.Print HTML.querySelector(".question-hyperlink").innerText
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub
How can I make a script wait for a certain time for the titles to parse before going for the next url?
I don't know why those SO links take to long to respond but I tried with different urls and the following solution appears to be working in the right way. The credit for the rectified portion goes to the provider of this solution.
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date
Dim sPrice$, sResp As Boolean
Urllist = Array( _
"https://finance.yahoo.com/quote/NZDUSD=X?p=NZDUSD=X", _
"https://finance.yahoo.com/quote/FB?p=FB", _
"https://finance.yahoo.com/quote/AAPL?p=AAPL", _
"https://finance.yahoo.com/quote/IBM?p=IBM", _
"https://finance.yahoo.com/quote/UCO?p=UCO" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
t = Now + TimeValue("00:00:10")
sResp = False
Do While .readyState < 4
If .readyState = 4 Then Exit Do
sResp = (Now > t) Or (Err.Number <> 0)
If sResp Then Exit Do
DoEvents
Loop
If Not sResp Then
HTML.body.innerHTML = .responseText
sPrice = HTML.querySelector(".Mb\(-4px\)").innerText
Debug.Print sPrice
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub

Unable to make my script handle errors until some loop ends

I've written a script in vba to scrape the ip address populated upon making a proxied request. I've used proxy (out of list of proxies) within my vba script to test (probably none of them are working at this moment).
However, what I want to achieve is that when a requests is failed the following script will print that error message and keep going for the next requests otherwise it will parse the ip address from that site and keep going until the loops gets exhausted.
My attempt so far (consider the proxyList to be the working ones):
Sub ValidateProxies()
Dim Http As New ServerXMLHTTP60, elem As Object, S$
Dim proxyList As Variant, oProxy As Variant
proxyList = [{"98.163.59.8:8080","134.209.115.223:3128","191.101.233.198:3129","198.177.126.218:80","35.185.201.225:8080"}]
For Each oProxy In proxyList
On Error Resume Next
With Http
.Open "GET", "https://www.myip.com/", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setProxy 2, oProxy
.send
End With
On Error GoTo 0
If Err.Number <> 0 Then
Debug.Print "Encountered an error"
Else:
With New HTMLDocument
.body.innerHTML = Http.responseText
Set elem = .querySelector("#ip")
R = R + 1: Cells(R, 1) = oProxy
Cells(R, 2) = elem.innerText
End With
End If
Next oProxy
End Sub
How can I make my script print any error when there is one and keep rolling until the loop ends?
Here is the example with async requests pool and logging statuses and errors to a worksheet. It uses a proxy list from free-proxy-list.net.
Option Explicit
Sub TestProxy()
Const PoolCapacity = 50
Const ReqTimeout = 15
Dim sResp
Dim aProxyList
Dim oMatch
Dim oWS
Dim lIndex
Dim ocPool
Dim i
Dim sResult
Dim oReq
' Parsing proxy list from free-proxy-list.net
With CreateObject("MSXML2.ServerXMLHTTP.6.0")
.Open "GET", "https://free-proxy-list.net/", True
.SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
.Send
Do Until .ReadyState = 4: DoEvents: Loop
sResp = .ResponseText
End With
With CreateObject("VBScript.RegExp")
.Global = True
.Pattern = "<td[^>]*>(\d+\.\d+\.\d+\.\d+)<\/td><td[^>]*>(\d+)<\/td>"
aProxyList = Array()
For Each oMatch In .Execute(sResp)
ReDim Preserve aProxyList(UBound(aProxyList) + 1)
aProxyList(UBound(aProxyList)) = oMatch.SubMatches(0) & ":" & oMatch.SubMatches(1)
Next
End With
' Proxy checking with api.myip.com requests
Set oWS = ThisWorkbook.Sheets(1)
oWS.Cells.Delete
Set ocPool = New Collection
lIndex = 0
Do
' Check pool for completed requests
For i = ocPool.Count To 1 Step -1
On Error Resume Next
sResult = ""
With ocPool(i)(0)
Select Case True
Case .ReadyState < 4
Case .Status \ 100 <> 2
sResult = "Status " & .Status & " / " & .StatusText
Case Else
sResult = .ResponseText
End Select
End With
Select Case True
Case Err.Number <> 0
sResult = "Error " & Err.Number & " / " & Err.Description
Case (Now - ocPool(i)(1)) * 86400 > ReqTimeout
sResult = "Timeout"
End Select
On Error GoTo 0
If sResult <> "" Then
oWS.Cells(ocPool(i)(2), 2).Value = sResult
ocPool.Remove i
End If
DoEvents
Next
' Add new request to pool
If ocPool.Count < PoolCapacity And lIndex <= UBound(aProxyList) Then
Set oReq = CreateObject("MSXML2.ServerXMLHTTP.6.0")
With oWS.Cells(lIndex + 1, 1)
.Value = aProxyList(lIndex)
.Select
End With
With oReq
.Open "GET", "https://api.myip.com/", True
.SetProxy 2, aProxyList(lIndex)
.SetRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64)"
.Send
End With
ocPool.Add Array( _
oReq, _
Now, _
lIndex + 1 _
)
lIndex = lIndex + 1
DoEvents
End If
Loop While ocPool.Count > 0
MsgBox "Completed"
End Sub
This will print all errors encountered and you should tailor by err.Number
Option Explicit
Public Sub ValidateProxies()
Dim http As New ServerXMLHTTP60, elem As Object, S$
Dim proxyList As Variant, oProxy As Variant, r As Long
Dim html As HTMLDocument
Set html = New HTMLDocument
proxyList = [{"98.163.59.8:8080","134.209.115.223:3128","191.101.233.198:3129","198.177.126.218:80","35.185.201.225:8080"}]
For Each oProxy In proxyList
On Error GoTo errhand:
With http
.Open "GET", "https://www.myip.com/", False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.SetProxy 2, oProxy
.send
With html
.body.innerHTML = http.responseText
Set elem = .querySelector("#ip")
r = r + 1: ActiveSheet.Cells(r, 1) = oProxy
ActiveSheet.Cells(r, 2) = elem.innerText
End With
End With
Next oProxy
Exit Sub
errhand:
If Err.Number <> 0 Then
Debug.Print "Encountered an error " & Err.Description, oProxy
Err.Clear
Resume Next
End If
End Sub

Cant' fetch some information from a webpage using xhr

I'm trying to grab a certain portion of information from a webpage using xmlhttp request. When I execute my script, it throws an error Object Variable Or With---. But, when I try the same using IE, I get the content like magic.
The most important thing to notice is that the content I'm expecting to grab are neither javascript encrypted nor generated dynamically. So, I should get them using xhr. Where I'm going wrong?
Here goes the website link
Using IE (working one):
Sub GetText()
Const Url As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim IE As New InternetExplorer, HTML As HTMLDocument, post As Object
With IE
.Visible = False
.navigate Url
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set HTML = .document
End With
Set post = HTML.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
Using XHR (not working):
Sub GetText()
Const Url As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim Http As New XMLHTTP60, HTML As New HTMLDocument, post As Object
With Http
.Open "GET", Url, False
.send
HTML.body.innerHTML = .responseText
End With
Set post = HTML.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
The selector I've defined above is flawless.
I could have pasted here the relevant html elements but they are wrapped within comments. However, I've provided above the link to that site.
To be clearer: the portion of text I'm interested in looks exactly like below in that webpage.
My question: how can I get the aforementioned textblock (which is shown above within image) using XHR?
The solution is plain and simple. All you need to do is kick out the comment signs from responseText using Replace() function or so and then filter them using Html.body.innerHTML to make them proper html contents. The rest is as usual.
This is how you can get the content:
Sub GetTextFromComment()
Const URL As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim Http As New XMLHTTP60, Html As New HTMLDocument, post As Object
With Http
.Open "GET", URL, False
.send
Html.body.innerHTML = Replace(Replace(.responseText, "<!--", ""), "-->", "")
End With
Set post = Html.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
Using comment position:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
html.body.innerHTML = html.querySelector("#all_9711922514").LastChild.Data
Debug.Print html.querySelector("#div_9711922514").innerText
End With
End Sub
Method using nodeType:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument, ele As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
For Each ele In html.querySelector("#all_9711922514").Children
If ele.NodeType = 8 Then
html.body.innerHTML = ele.Data
Debug.Print html.querySelector("#div_9711922514").innerText
Exit For
End If
Next
End With
End Sub
Method using regex:
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
Dim s As String
s = .querySelector("div[id=all_1786105919]").outerHTML
s = regexRemove(s, "<([^>]+)>")
Debug.Print Replace$(Replace$(s, "&", "°"), "-->", vbNullString)
End With
End Sub
Public Function regexRemove(ByVal s As String, ByVal pattern As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.pattern = pattern
End With
If regex.test(s) Then
regexRemove = regex.Replace(s, vbNullString)
Else
regexRemove = s
End If
End Function
Output:

My script throws error while clicking on links cyclically

I've written a script in vba in combination with IE to perform clicks on some javascript links connected to each profile of a webpage. My script can click on the first link flawlessly but when it comes to click on the next link in it's second iteration, it throws permission denied error. There are on valid links connected to each profile so I can't use the links as navigation. How can I modify my script in order to click on links cyclically?
This is my script:
Sub ClickLinks()
Const Url As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, I&
With IE
.Visible = True
.navigate Url
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Htmldoc = .document
End With
With Htmldoc.querySelectorAll("#main table tr a")
For I = 0 To .Length - 1
.Item(I).Click 'in second iteration this line throws permission denied error
Application.Wait Now + TimeValue("00:00:03")
Next I
End With
End Sub
Using an XHR request. The following does an initial GET request to retrieve all the staff IDs. It then loops the ids issuing POST requests for each id. To show it visits each page, I retrieve the staff e-mail address from each page.
Option Explicit
Public Sub GetInfo()
Dim objHTTP As Object, URL As String, html As New HTMLDocument, i As Long, sBody As String
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
URL = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=details"
With objHTTP
.Open "GET", "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic", False
.send
html.body.innerHTML = .responseText
Dim staffIDs As Object
Set staffIDs = html.querySelectorAll("input[name=employeeID]")
For i = 0 To staffIDs.Length - 1
sBody = "employeeID=" & staffIDs(i).getAttribute("value")
.SetTimeouts 10000, 10000, 10000, 10000
.Open "POST", URL, False
.setRequestHeader "User-Agent", "User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
On Error Resume Next
.send (sBody)
If Err.Number = 0 Then
If .Status = "200" Then
html.body.innerHTML = .responseText
Else
Debug.Print "HTTP " & .Status & " " & .statusText
Exit Sub
End If
Else
Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
Exit Sub
End If
On Error GoTo 0
Debug.Print html.querySelector("td a").innerText
Next i
End With
End Sub
Sample view on landing page:
Sample code printout from page:
Clunky time based wait for refresh and then navigation back to landing page so can submit next form. This needs improvement and some re-ordering.
Option Explicit
Public Sub ClickLinks2()
Const URL As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, i&
With IE
.Visible = True
.navigate URL
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Htmldoc = .document
Dim numEmployees As Long, a As Object
numEmployees = Htmldoc.querySelectorAll("a.names").Length
For i = 1 To 3 'numEmployees (1-792)
While .Busy = True Or .readyState < 4: DoEvents: Wend
.navigate URL
Application.Wait Now + TimeSerial(0, 0, 5)
.document.parentWindow.execScript "document.form" & i & ".submit();" ''javascript:document.form1.submit(); ''<== Adapted this
Next i
End With
End Sub