MSXML2.XMLHTTP method data extraction issue - vba

I am using MSXML2.XMLHTTP method for data extraction but unable to extract data from specific page
Currently using following code for data extraction from different pages.This code is working fine with other pages but not working proper for specific page.
I want to extract following values for sample page.Price,Seller name etc
Dim http As Object, html As New MSHTML.HTMLDocument, topics As Object, titleElem As Object, detailsElem As Object, topic As HTMLHtmlElement
Dim j As Long
Dim RowCount As String
Dim maxid As Long
Dim productdesc1 As String
Dim features As String
Dim news As String
Dim comb As String
t122 = Now
Rin = DMin("[id]", "url", "[Flag] = False")
If Not IsNull(Rin) Then
Set http = CreateObject("MSXML2.XMLHTTP")
'http = http.SetOption(2, 13056)
'; //ignore all SSL Cert issues
RowCount = DMin("[id]", "url", "[Flag] = False")
maxid = DMax("[id]", "url", "[Flag] = False")
'MsgBox (RowCount)
Do While RowCount <> ""
'RowCount = DMin("[id]", "url", "[Flag] = False")
url = DLookup("[url]", "url", "ID = " & ([RowCount]))
url = Trim(url)
t31 = ""
t31 = (DateDiff("n", t122, Now))
On Error Resume Next
http.Open "GET", url, False
http.Send
html.body.innerHTML = http.ResponseText
brand = html.body.innerText
Set my_data1 = html.getElementsByClassName("a-row a-spacing-mini olpOffer")
i = 1
For Each Item In my_data1
pr1 = Item.getElementsByClassName("a-size-large a-color-price olpOfferPrice a-text-bold")
pr2 = pr1.innerText
dlmsg = Item.innerHTML
If dlmsg Like "*olpShippingPrice*" Then
dpr = Item.getElementsByClassName("olpShippingPrice")
dpr2 = dpr.innerText
End If
Data should be visible from following webpage using above code.https://www.amazon.co.uk/gp/offer-listing/B00551P0Q8

The following will print out all. You can sort where to write the values to
Option Explicit
Public Sub Test()
Dim prices As Object, sellers As Object, html As HTMLDocument, i As Long
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.amazon.co.uk/gp/offer-listing/B01GK4YHMQ", False
.Send
html.body.innerHTML = .ResponseText
End With
Set prices = html.querySelectorAll(".olpOfferPrice")
Set sellers = html.querySelectorAll(".olpSellerName a")
For i = 0 To prices.Length - 1
Debug.Print Trim$(prices.Item(i).innerText)
Debug.Print Trim$(sellers.Item(i).innerText)
Next
End Sub

Related

Getting meta proper content from url

I am trying to get meta proper content from url but facing some problem
i want to grab "og:url" content detail, here is my code
Sub GrabCanonicalUrl3()
Const Url$ = "https://www.justdial.com/Ambala/Beauty-Parlours-in-Naraingarh"
Dim S$
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Url, False
.Send
S = Replace(Replace(.responseText, "<!--", ""), "-->", "")
End With
With New HTMLDocument
.body.innerHTML = S
MsgBox .querySelector("meta[property='og:url']").getAttribute("content")
End With
End Sub
facing this earror
Object variable or block variable not set
MsgBox .querySelector("meta[property='og:url']").getAttribute("content")
I want to get url (og:url) link from inner HTML . but not
please help me out
Try this code
Sub Test()
Dim obj As Object, sResp As String
With CreateObject("MSXML2.xmlHttp")
.Open "GET", "https://www.justdial.com/Ambala/Beauty-Parlours-in-Naraingarh", False
.send
sResp = .responseText
End With
With CreateObject("HTMLFile")
.write sResp
For Each obj In .all(2).getElementsByTagName("meta")
If obj.getAttribute("Property") = "og:url" Then Debug.Print obj.Content: Exit For
Next obj
End With
End Sub

access vba bombs when defining html results page as variable to extract content from php page

My client is trying to use an access VBA script to send a string to a php page i created, then bring the data that is returned on the page back into his db
i use a mac and cannot run any of the VB code, but here is what i was able to find (two versions), but both bomb in the first dim statement
Private Sub Command1_Click()
Dim iHTML As HTMLDocument
Dim objHttp As MSXML2.ServerXMLHTTP
set objHttp = CreateObject("Msxml2.ServerXMLHTTP")
objHttp.open "GET", "http://www.bestenergyctri.com/zipcode.php?isValidate=adb&address1=352%20w%2046&address2=&city=new%20york&state=ny&zip5=
", False
objHttp.send
Set iHTML = objHttp.ResponseText
straddress1 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("address1").Item(1).innerText
straddress2 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("address2").Item(1).innerText
strcity = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("City").Item(1).innerText
strstate = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("State").Item(1).innerText
strzip5 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("Zip5").Item(1).innerText
strzip4 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("Zip4").Item(1).innerText
SaveWebInfo straddress1, straddress2, strcity, strstate, strzip5, strzip4
Set iHTML = Nothing
Set objHttp = Nothing
End Sub
or
Private Sub Command1_Click()
Dim iHTML As HTMLDocument
Dim objHttp As MSXML2.ServerXMLHTTP
set objHttp = New MSXML2.ServerXMLHTTP
objHttp.open "GET", "http://www.bestenergyctri.com/zipcode.php?isValidate=adb&address1=352%20w%2046&address2=&city=new%20york&state=ny&zip5=
", False
objHttp.send
Set iHTML = objHttp.ResponseText
straddress1 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("address1").Item(1).innerText
straddress2 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("address2").Item(1).innerText
strcity = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("City").Item(1).innerText
strstate = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("State").Item(1).innerText
strzip5 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("Zip5").Item(1).innerText
strzip4 = iHTML.getElementsByClassName("detect").Item(x - 1).getElementsByClassName("thedata").Item(0).getElementsByClassName("Zip4").Item(1).innerText
SaveWebInfo straddress1, straddress2, strcity, strstate, strzip5, strzip4
Set iHTML = Nothing
Set objHttp = Nothing
End Sub
does anyone have any suggestions on how we can get the page properly read into a variable so that it can be parsed
thanks
Here is a simpler example using CSS querySelector and avoiding using Hungarian notation
Code:
Option Explicit
Sub test()
Dim iHTML As New HTMLDocument, objHttp As MSXML2.ServerXMLHTTP60 '<== Note this is version specific syntax. 60 is for Excel 2016
Set objHttp = New MSXML2.ServerXMLHTTP60
objHttp.Open "GET", "http://www.bestenergyctri.com/zipcode.php?isValidate=adb&address1=352%20w%2046&address2=&city=new%20york&state=ny&zip5=", False
objHttp.send
Dim address1 As String, address2 As String, city As String, state As String, zip5 As String, zip4 As String
With iHTML
.body.innerHTML = objHttp.ResponseText
address1 = .querySelector(".address1").innerText
address2 = .querySelector(".address2").innerText
city = .querySelector(".City").innerText
state = .querySelector(".State").innerText
zip5 = .querySelector(".Zip5").innerText
zip4 = .querySelector(".Zip4").innerText
End With
Debug.Print "Address 1: " & address1
Debug.Print "Address 2: " & address2
Debug.Print "City: " & city
Debug.Print "State: " & state
Debug.Print "Zip5: " & zip5
Debug.Print "Zip4: " & zip4
End Sub
Output:
References added to VBA > Tools > References:
*Last two references are the important ones.

i need to make status message on excel after calling the api

I need to make status message on excel after calling the api . status message in xml format so how to parse the data accurately.
Below given codes are using to get API info
Sub Test()
Dim xmlHTTP As Object
Set xmlHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
myURL = "http://xxxxxxxxxxxxx:15555/gateway/StatusTracking/1.0/shipment/tracking?housebill=cvvvv"
xmlHTTP.Open "GET", myURL, False
xmlHTTP.SetRequestHeader "APIKey", "xxxx-xxx-xxxxx-xxxx-xxxx"
xmlHTTP.SetRequestHeader "Accept", "application/json"
xmlHTTP.Send
Dim strReap As String
strReap = hReq.ResponseText
Dim xmlDoc As New MSXML2.DOMDocument
If Not xmlDoc.LoadXML(strReap) Then
MsgBox "Load error"
End If
Dim xnodelist As MSXML2.IXMLDOMNodeList
Set xnodelist = xmlDoc.getElementsByTagName("ShipmentTracking")
Dim xnode As MSXML2.IXMLDOMNode Set xnode = xnodelist.Item(0)
Dim obAtt1 As MSXML2.IXMLDOMAttribute
Dim obAtt2 As MSXML2.IXMLDOMAttribute
Dim xChild As MSXML2.IXMLDOMNode
Dim intRow As Integer
intRow = 2
Dim strCol1 As String
strCol1 = "A"
Dim strCol2 As String
strCol1 = "B"
Dim Shipment As String
For Each xChild In xnode.ChildNodes
Set obAtt1 = xChild.Attributes.getNamedItem("Shipment")
ws.Cells(intRow, 2) = obAtt1
intRow = intRow + 1
Next xChild
Set hReq = Nothing
Set xmlDoc = Nothing
End Sub
normal xml status message format given below
<Shipment tracking>
<type/>
<object/>
<properties/>
<Shipment>
<Origin/>
<type/>
<properties/>
<LocationCode/>
<CountryCode/>
</Shipment>
</Shipment tracking>
I am newbie in vba programming and i tried with this code but not working fine. I just want output,from shipment(xmltagname) to end in excel sheet. Please help me on this
You have written code Attributes.getNamedItem when in fact you have no attributes. Also to query for elements I'd prefer selectNodes and selectSingleNode instead of getElementsByTagName.
So try
xChild.selectSingleNode("Shipment")
and change the declaration for the receiving variable from IXMLDOMAttribute to IXMLDOMElement

Can't parse phone number from a page hindered by <br> tag

Tried to get the contact details from a page but when i run my script it only grabs the first portion of each category and ignores the rest because of some br tag, as in from contact details category it only grabs the name not the phone number or fax. Hope somebody will give me any idea how i could get that? Here is what I tried with:
Sub RestData()
Dim http As New MSXML2.XMLHTTP60
Dim html As New HTMLDocument
Dim ele As Object, post As Object
With CreateObject("MSXML2.serverXMLHTTP")
.Open "GET", "http://www.austrade.gov.au/SupplierDetails.aspx?ORGID=ORG0120000508&folderid=1736", False
.send
html.body.innerHTML = .responseText
End With
Set ele = html.getElementsByClassName("contact-details block dark")(0).getElementsByTagName("p")
For Each post In ele
x = x + 1
Cells(x, 1) = post.innerText
Next post
Set html = Nothing: Set ele = Nothing: Set docs = Nothing
End Sub
Html element for that:
<p>Company Name: Vaucraft Braford Stud<br>Phone: +61 7 4942 4859<br>Fax: +61 7 4942 0618<br>Email: florfamily1#bigpond.com<br>Web: <a target="_blank" href="http://www.vaucraftbrafords.com.au">http://www.vaucraftbrafords.com.au</a></p>
You may try something like this...
Sub RestData()
Dim http As New MSXML2.XMLHTTP60
Dim html As New HTMLDocument
Dim ele As Object, post As Object
Dim TypeDetails() As String
Dim TypeDetail() As String
Dim i As Long, r As Long
With CreateObject("MSXML2.serverXMLHTTP")
.Open "GET", "http://www.austrade.gov.au/SupplierDetails.aspx?ORGID=ORG0120000508&folderid=1736", False
.send
html.body.innerHTML = .responseText
End With
Set ele = html.getElementsByClassName("contact-details block dark")(0).getElementsByTagName("p")(2)
r = 2
TypeDetails() = Split(ele.innerText, Chr(10))
For i = 0 To UBound(TypeDetails)
TypeDetail() = Split(TypeDetails(i), ":")
Cells(r, 1) = VBA.Trim(TypeDetail(0))
Cells(r, 2) = VBA.Trim(TypeDetail(1))
r = r + 1
Next i
Set html = Nothing: Set ele = Nothing: Set docs = Nothing
End Sub

Login into website using MSXML2.XMLHTTP instead of InternetExplorer.Application with VBA

first time posting,
I'm trying to get the ID "dadosDoUsuario" from a website's page I have to be logged in. I got it working using "InternetExplorer.Application" object, but can't get the ID value when using "MSXML2.XMLHTTP" object. It seems it won't go past the login page, since I'm able to get other IDs from this page (example: "tituloPagina"). Could someone give a hint on how I get the data from the page after logged in? Thanks!
InternetExplorer.Application code (this one works):
Sub testIE()
Dim texto As String
Set ie = CreateObject("InternetExplorer.Application")
my_url = "https://www.nfp.fazenda.sp.gov.br/login.aspx"
With ie
.Visible = False
.Navigate my_url
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
End With
ie.Document.getelementbyid("userName").Value = "MYUSERNAME"
ie.Document.getelementbyid("Password").Value = "MYPASSWORD"
ie.Document.getelementbyid("Login").Click
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
ie.Document.getelementbyid("btnConsultarNFSemestre").Click
Do Until Not ie.Busy And ie.readyState = 4
DoEvents
Loop
texto = ie.Document.getelementbyid("dadosDoUsuario").innerText
MsgBox texto
ie.Quit
End Sub
MSXML2.XMLHTTP code (this one doesn't work):
Sub testXMLHTTP()
Dim xml As Object
Dim html As Object
Dim dados As Object
Dim text As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
Set html = CreateObject("htmlFile")
With xml
.Open "POST", "https://www.nfp.fazenda.sp.gov.br/Login.aspx", False
.setRequestHeader "Content-Type", "text/xml"
.send "userName=MYUSERNAME&password=MYPASSWORD"
.Open "GET", "https://www.nfp.fazenda.sp.gov.br/Inicio.aspx", False
.setRequestHeader "Content-Type", "text/xml"
.send
End With
html.body.innerhtml = xml.responseText
Set objResult = html.GetElementById("dadosDoUsuario")
GetElementById = objResult.innertext
MsgBox GetElementById
End Sub
EDIT: I followed the steps suggested by #Florent B., and added a scripcontrol to get the encoded values for __VIEWSTATE, __VIEWSTATEGENERATOR and __EVENTVALIDATION. Got it working!
Sub testXMLHTTP()
Dim xml As Object
Dim html As HTMLDocument
Dim dados As Object
Dim text As Object
Dim html2 As HTMLDocument
Dim xml2 As Object
Set xml = CreateObject("Msxml2.ServerXMLHTTP.6.0")
Set html = CreateObject("htmlFile")
With xml
.Open "GET", "https://www.nfp.fazenda.sp.gov.br/Login.aspx", False
.send
End With
strCookie = xml.getResponseHeader("Set-Cookie")
html.body.innerhtml = xml.responseText
Set objvstate = html.GetElementById("__VIEWSTATE")
Set objvstategen = html.GetElementById("__VIEWSTATEGENERATOR")
Set objeventval = html.GetElementById("__EVENTVALIDATION")
vstate = objvstate.Value
vstategen = objvstategen.Value
eventval = objeventval.Value
'URL Encode ViewState
Dim ScriptEngine As ScriptControl
Set ScriptEngine = New ScriptControl
ScriptEngine.Language = "JScript"
ScriptEngine.AddCode "function encode(vstate) {return encodeURIComponent(vstate);}"
Dim encoded As String
encoded = ScriptEngine.Run("encode", vstate)
vstate = encoded
'URL Encode Event Validation
ScriptEngine.AddCode "function encode(eventval) {return encodeURIComponent(eventval);}"
encoded = ScriptEngine.Run("encode", eventval)
eventval = encoded
'URL Encode ViewState Generator
ScriptEngine.AddCode "function encode(vstategen) {return encodeURIComponent(vstategen);}"
encoded = ScriptEngine.Run("encode", vstategen)
vstategen = encoded
Postdata = "__EVENTTARGET=" & "&__EVENTARGUMENT=" & "&__VIEWSTATE=" & vstate & "&__VIEWSTATEGENERATOR=" & vstategen & "&__EVENTVALIDATION=" & eventval & "&ctl00$ddlTipoUsuario=#rdBtnNaoContribuinte" & "&ctl00$UserNameAcessivel=Digite+o+Usuário" & "&ctl00$PasswordAcessivel=x" & "&ctl00$ConteudoPagina$Login1$rblTipo=rdBtnNaoContribuinte" & "&ctl00$ConteudoPagina$Login1$UserName=MYUSERNAME" & "&ctl00$ConteudoPagina$Login1$Password=MYPASSWORD" & "&ctl00$ConteudoPagina$Login1$Login=Acessar" & "&ctl00$ConteudoPagina$Login1$txtCpfCnpj=Digite+o+Usuário"
Set xml2 = CreateObject("Msxml2.ServerXMLHTTP.6.0")
Set html2 = CreateObject("htmlFile")
With xml2
.Open "POST", "https://www.nfp.fazenda.sp.gov.br/Login.aspx", False
.setRequestHeader "Cookie", strCookie
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Content-Lenght", Len(Postdata)
.send (Postdata)
End With
html2.body.innerhtml = xml2.responseText
Set objResult = html2.GetElementById("dadosDoUsuario")
GetElementById = objResult.innertext
MsgBox GetElementById
End Sub
It's possible but not that easy.
First you need to use CreateObject("Msxml2.ServerXMLHTTP.6.0") and not CreateObject("MSXML2.XMLHTTP").
Then follow these steps:
Open and send a GET to https://www.nfp.fazenda.sp.gov.br/login.aspx
Parse and store the cookie from the response header "Set-Cookie"
Parse and store the __VIEWSTATE, __VIEWSTATEGENERATOR, __EVENTVALIDATION from the HTML response
Build the data for the next query with the values parsed previously and with your user-name/password :
__EVENTTARGET:""
__EVENTARGUMENT:""
__VIEWSTATE:"..."
__VIEWSTATEGENERATOR:"..."
__EVENTVALIDATION:"..."
ctl00$ddlTipoUsuario:"#rdBtnNaoContribuinte"
ctl00$UserNameAcessivel:"Digite+o+Usuário"
ctl00$PasswordAcessivel:"x"
ctl00$ConteudoPagina$Login1$rblTipo:"rdBtnNaoContribuinte"
ctl00$ConteudoPagina$Login1$UserName:"..."
ctl00$ConteudoPagina$Login1$Password:"..."
ctl00$ConteudoPagina$Login1$Login:"Acessar"
ctl00$ConteudoPagina$Login1$txtCpfCnpj:"Digite+o+Usuário"
Open a POST to https://www.nfp.fazenda.sp.gov.br/login.aspx
Set the header "Cookie" with the cookie parsed at step 2
Set the header Content-Type: "application/x-www-form-urlencoded"
Set the header Content-Length with the length of the data
Send the POST with the data from step 4