Please help to give some hints for if i want to get a picture from a web site. Some code has been write but cannot get the right picture.
Sub get_pic()
With CreateObject("InternetExplorer.Application")
.Visible = True
.Navigate "https://igtb.bochk.com/index_tc.html"
Do While .busy
DoEvents
Loop
Do Until .readystate = 4
DoEvents
Sleep 2000
Loop
Set aaa = .document.all.tags("iframe")(1).contentwindow.document.all.tags("img")
For j = 0 To aaa.Length - 1
With CreateObject("MSXML2.XMLHTTP.6.0")
[a1] = aaa(j).href
.Open "GET", aaa(j).href, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.Send
strFilename = "C:\test.png"
ByteToFile .responsebody, strFilename
End With
Exit For
Next
End With
End Sub
Function ByteToFile(arrByte, strFilename)
With CreateObject("Adodb.Stream")
.Type = 1 'adTypeBinary
.Open
.Write arrByte
.SaveToFile strFilename, 2 'adSaveCreateOverWrite
.Close
End With
End Function
i know that will be change when you open any new request but is that any way to get the original picture when you open that page?
Related
I'm trying to create a script in vba using ServerXMLHTTP60 to parse the title of the first post from some identical links. My main goal here is to make the script asynchronous along with setting a highest time up to which the script will try before going for the next url.
However, the macro that I've created always goes for the next url when there is a timeout without being able to scrape the titles from the links.
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date, sResp As Boolean
Urllist = Array( _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=1", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=2", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=3", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=4", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=5" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setTimeouts 5000, 5000, 15000, 15000
.send
t = Now + TimeValue("00:00:10")
sResp = False
On Error Resume Next
Do
If .readyState = 4 Then sResp = True: Exit Do
If Now > t Then sResp = False: Exit Do
DoEvents
Loop
On Error GoTo 0
If sResp Then
HTML.body.innerHTML = .responseText
Debug.Print HTML.querySelector(".question-hyperlink").innerText
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub
How can I make a script wait for a certain time for the titles to parse before going for the next url?
I don't know why those SO links take to long to respond but I tried with different urls and the following solution appears to be working in the right way. The credit for the rectified portion goes to the provider of this solution.
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date
Dim sPrice$, sResp As Boolean
Urllist = Array( _
"https://finance.yahoo.com/quote/NZDUSD=X?p=NZDUSD=X", _
"https://finance.yahoo.com/quote/FB?p=FB", _
"https://finance.yahoo.com/quote/AAPL?p=AAPL", _
"https://finance.yahoo.com/quote/IBM?p=IBM", _
"https://finance.yahoo.com/quote/UCO?p=UCO" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
t = Now + TimeValue("00:00:10")
sResp = False
Do While .readyState < 4
If .readyState = 4 Then Exit Do
sResp = (Now > t) Or (Err.Number <> 0)
If sResp Then Exit Do
DoEvents
Loop
If Not sResp Then
HTML.body.innerHTML = .responseText
sPrice = HTML.querySelector(".Mb\(-4px\)").innerText
Debug.Print sPrice
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub
I've written a script in vba in combination with IE to perform clicks on some javascript links connected to each profile of a webpage. My script can click on the first link flawlessly but when it comes to click on the next link in it's second iteration, it throws permission denied error. There are on valid links connected to each profile so I can't use the links as navigation. How can I modify my script in order to click on links cyclically?
This is my script:
Sub ClickLinks()
Const Url As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, I&
With IE
.Visible = True
.navigate Url
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Htmldoc = .document
End With
With Htmldoc.querySelectorAll("#main table tr a")
For I = 0 To .Length - 1
.Item(I).Click 'in second iteration this line throws permission denied error
Application.Wait Now + TimeValue("00:00:03")
Next I
End With
End Sub
Using an XHR request. The following does an initial GET request to retrieve all the staff IDs. It then loops the ids issuing POST requests for each id. To show it visits each page, I retrieve the staff e-mail address from each page.
Option Explicit
Public Sub GetInfo()
Dim objHTTP As Object, URL As String, html As New HTMLDocument, i As Long, sBody As String
Set objHTTP = CreateObject("WinHttp.WinHttpRequest.5.1")
URL = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=details"
With objHTTP
.Open "GET", "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic", False
.send
html.body.innerHTML = .responseText
Dim staffIDs As Object
Set staffIDs = html.querySelectorAll("input[name=employeeID]")
For i = 0 To staffIDs.Length - 1
sBody = "employeeID=" & staffIDs(i).getAttribute("value")
.SetTimeouts 10000, 10000, 10000, 10000
.Open "POST", URL, False
.setRequestHeader "User-Agent", "User-Agent: Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
On Error Resume Next
.send (sBody)
If Err.Number = 0 Then
If .Status = "200" Then
html.body.innerHTML = .responseText
Else
Debug.Print "HTTP " & .Status & " " & .statusText
Exit Sub
End If
Else
Debug.Print "Error " & Err.Number & " " & Err.Source & " " & Err.Description
Exit Sub
End If
On Error GoTo 0
Debug.Print html.querySelector("td a").innerText
Next i
End With
End Sub
Sample view on landing page:
Sample code printout from page:
Clunky time based wait for refresh and then navigation back to landing page so can submit next form. This needs improvement and some re-ordering.
Option Explicit
Public Sub ClickLinks2()
Const URL As String = "https://intraweb.stockton.edu/eyos/page.cfm?siteID=58&pageID=7&action=dirmain&type=FAC&display=basic"
Dim IE As New InternetExplorer, Htmldoc As HTMLDocument, i&
With IE
.Visible = True
.navigate URL
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Htmldoc = .document
Dim numEmployees As Long, a As Object
numEmployees = Htmldoc.querySelectorAll("a.names").Length
For i = 1 To 3 'numEmployees (1-792)
While .Busy = True Or .readyState < 4: DoEvents: Wend
.navigate URL
Application.Wait Now + TimeSerial(0, 0, 5)
.document.parentWindow.execScript "document.form" & i & ".submit();" ''javascript:document.form1.submit(); ''<== Adapted this
Next i
End With
End Sub
This is almost working!! But not quite!
If link.innerHTML Like "*Upload Questionnaire*" Then
link.Click
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Navigate strURL
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", strSQL, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.responseText
Set tbl = html.getElementsByTagName("Table")
Set tr_coll = tbl(0).getElementsByTagName("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getElementsByTagName("TD")
For Each td In td_col
Cells(row + 1, j).Value = td.innerText
j = j + 1
Next
row = row + 1
Next
End If
For one thing, the code doesn't pause and wait for the browser to finish loading.
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Also, I'm feeding in a Parent URL, like this:
strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
The line below doesn't work.
XMLHTTP.Open "GET", strSQL, False
Somehow I need to pass in the Child URL that opens from the Parent URL. This is the parent URL: strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
When: link.Click
runs then the Child URL opens, but I don't know how to reference the Child URL. How can I do that?!
Thanks in advance!
Assuming that the link is contained within the class, rather than looping through all a elements within the document itself, use getElementsByClassName() to grab ever element in your target class, and then loop through those.
Something like:
For each classElement in IE.document.getElementsByClassName("thClass")
For each linkElement in classElement.getElementsByTagName("a")
'check for innerHTML, etc.
Next
Next
If the class is within the link, you can just flip the loops.
This should work, for example:
Public Sub LookADemo()
Dim ie As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Set ie = New InternetExplorer
ie.navigate "https://www.google.com"
Do
DoEvents
Loop Until (ie.ReadyState >= READYSTATE_INTERACTIVE) And (ie.Busy = False)
Set doc = ie.Document
For Each thisClass In doc.getElementsByClassName("hp")
For Each thisLink In thisClass.getElementsByTagName("a")
Debug.Print thisLink.InnerText
Next
Next
ie.Quit
End Sub
I managed to use VBA to get to the point where I'm ready to download an excel file from the web but I'm having trouble figuring out how to actually download that file and put its contents into an excel file I'm working in. Any suggestions? Thanks
Here is the code so far:
Sub GetData()
Dim IE As InternetExplorer
Dim HTMLDoc As HTMLDocument
Dim objElement As HTMLObjectElement
Set IE = New InternetExplorer
With IE
.Visible = True
.Navigate "http://www.housepriceindex.ca/default.aspx"
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: Wend
.Document.getElementById("lnkTelecharger2").Click
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: Wend
Set HTMLDoc = .Document
Set objElement = HTMLDoc.getElementById("txtEmailDisclaimerEN")
objElement.Value = "abc#abc.com"
Set objElement = HTMLDoc.getElementById("lnkAcceptDisclaimerEN")
objElement.Click
' ... Get CSV somehow ...
'.Quit
End With
Set IE = Nothing
End Sub
Try the below code:
Option Explicit
Sub ImportHistoricalDataSheet()
Const SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS = 13056
Const adSaveCreateOverWrite = 2
Dim aBody, sPath
' Download Historical Data xls file via XHR
With CreateObject("MSXML2.XMLHTTP")
'With CreateObject("MSXML2.ServerXMLHTTP")
'.SetOption 2, SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
.Open "GET", "http://www.housepriceindex.ca/Excel2.aspx?langue=EN&mail=abc%40abc.com"
.Send
' Get binary response content
aBody = .responseBody
' Retrieve filename from headers and concatenate full path
sPath = ThisWorkbook.Path & "\" & Replace(Split(Split(.GetAllResponseHeaders, "filename=", 2)(1), vbCrLf, 2)(0), "/", "-")
End With
' Save binary content to the xls file
With CreateObject("ADODB.Stream")
.Type = 1
.Open
.Write aBody
.SaveToFile sPath, adSaveCreateOverWrite
.Close
End With
' Open saved workbook
With Workbooks.Open(sPath, , True)
' Get 1st worksheet values to array
aBody = .Worksheets(1).UsedRange.Value
.Saved = True
.Close
End With
' Delete saved workbook file
CreateObject("Scripting.FileSystemObject").DeleteFile sPath, True
' Insert array to target worksheet
ThisWorkbook.Sheets("Sheet1").Cells(1, 1).Resize(UBound(aBody, 1), UBound(aBody, 2)).Value = aBody
End Sub
I have code like below. And I receive error
"run-time error '-2146697211 *800c0005)'': the system cannot locate the resource specified"
I do not know how to solve it thanks in advance for any help. Line in which error is handled is httpRequest.send
Function pullSomeSite(urlcheck As String) As Boolean
Dim httpRequest As xmlhttp
Set httpRequest = New xmlhttp
Dim URL As String
URL = urlcheck
With httpRequest
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send !!!!!!here code stops!!!!!!!
End With
With httpRequest
While Not .readyState = 4 '<---------- wait
Application.Wait Now + TimeValue("0:00:01")
Wend
'Debug.Print .Status
If .Status = 200 Then
While InStr(1, .responseText, "Updating", 0) > 0 '<---------- wait again
Application.Wait Now + TimeValue("0:00:01")
Wend
pullSomeSite = True
Else
pullSomeSite = False
End If
End With
End Function
Test this :
Sub Test_URLExists()
Dim url As String
url = "http://stackoverflow.com/questions/33940044/url-check-in-vba-error"
MsgBox url, vbInformation, URLExists(url)
url = "http://stackoverflow.com/questions/12345678/url-check-in-vba-error"
MsgBox url, vbInformation, URLExists(url)
End Sub
here is how to test an URL with a function :
Function URLExists(url As String) As Boolean
Dim Request As Object
Dim ff As Integer
Dim rc As Variant
URLExists = False
On Error GoTo EndNow
Set Request = CreateObject("WinHttp.WinHttpRequest.5.1")
With Request
.Open "GET", url, False
.Send
rc = .StatusText
End With
Set Request = Nothing
If rc = "OK" Then URLExists = True
Exit Function
EndNow:
End Function
Instead of xmlhttp data type use object.
Use the below code.
. You need to give input as "http://google.com"
Sub test1()
a = pullSomeSite("http://www.flipkart.com")
MsgBox a
End Sub
Function pullSomeSite(urlcheck As String) As Boolean
Dim httpRequest As Object
Set httpRequest = CreateObject("MSXML2.XMLHTTP")
'Set httpRequest = New xmlhttp
Dim URL As String
URL = urlcheck
With httpRequest
.Open "POST", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send '!!!!!!here code stops!!!!!!!
End With
With httpRequest
While Not .readyState = 4 '<---------- wait
Application.Wait Now + TimeValue("0:00:01")
Wend
'Debug.Print .Status
If .Status = 200 Then
While InStr(1, .responseText, "Updating", 0) > 0 '<---------- wait again
Application.Wait Now + TimeValue("0:00:01")
Wend
pullSomeSite = True
Else
pullSomeSite = False
End If
End With
End Function