The following code retrieves 'dossier Urls' for substances in Column A by scraping the ECHA website. I'm trying to error handle cases where a substance Url cannot be found.
I can't quite see why the following code fails. I have highlighted the Problematic line with a comment. this is highlighted in debugging as an Object Required error but I can't see where I'm going wrong.
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
'PROBLEMATIC CODE
If oHTML.querySelector(".details").getAttribute("href") Is Error Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
For this code to run values must be present in column A. Acetone and Benzene can be used respectively to test 2 rows. To test out the error handling Enter something made up like Benzenjaj.
I think this is a quick fix. Just can't see it.
Update:
Tetsing on made up substance name:
First 2 results are found as normal but the made-up chemical causes the following error:
Code:
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error GoTo 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
You can just ignore any error:
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
On Error Resume Next 'ignore error on following line
SubstanceUrl = oHTML.querySelector(".details").getAttribute("href")
On Error Goto 0 'stop ignoring errors
If Len(SubstanceUrl) = 0 Then SubstanceUrl = "<no URL>"
You can test the .Length of querySelectorAll when looking for the particular registered dossier url. You need to amend your code elsewhere to handle the "-" return url. I prefer Tim's solution in terms of ignoring error however for the first bit.
Option Explicit
Sub PopulateExposures()
Dim url, rw As Range
Set rw = Sheets("data").Range("A2:E2") 'first row with inputs
Do While Application.CountA(rw) > 0
url = SubstanceUrl(rw.Cells(1).Value, rw.Cells(2).Value) 'get the URL
If Left$(url, 5) = "https" Then
rw.Cells(5).Resize(1, 3).Value = ExposureData(url) 'get exposure data (as array) and add to row
Else
rw.Cells(5).Resize(1, 3).Value = url
End If
Set rw = rw.Offset(1, 0) 'next substance
Loop
End Sub
Public Function SubstanceUrl(SubstanceName, CASNumber) As String
Const url = "https://echa.europa.eu/information-on-chemicals/registered-substances?" & _
"p_p_id=dissregisteredsubstances_WAR_dissregsubsportlet&p_p_lifecycle=1&" & _
"p_p_state=normal&p_p_mode=view&" & _
"__dissregisteredsubstances_WAR_dissregsubsportlet_javax.portlet.action=dissRegisteredSubstancesAction"
Dim oHTML, oHttp, MyDict, payload, DictKey, sep
Set oHTML = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_name") = SubstanceName
MyDict("_dissregisteredsubstances_WAR_dissregsubsportlet_disreg_cas-number") = CASNumber
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = payload & sep & DictKey & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey))
sep = "&"
Next DictKey
With oHttp
.Open "POST", url, False
.setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
.send payload
oHTML.body.innerHTML = .responseText
End With
If oHTML.querySelectorAll("[href*=registered-dossier]").Length = 0 Then
SubstanceUrl = "-"
Else
'Sometimes output changes despite same input
SubstanceUrl = oHTML.querySelector(".details")
End If
Debug.Print SubstanceUrl
End Function
Function ExposureData(urlToGet)
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As HTMLDocument, dds
Dim Route(1 To 3) As String, Results(1 To 3) As String, c, Info, Data
Route(1) = "sGeneralPopulationHazardViaInhalationRoute"
Route(2) = "sGeneralPopulationHazardViaDermalRoute"
Route(3) = "sGeneralPopulationHazardViaOralRoute"
XMLReq.Open "Get", urlToGet & "/7/1", False
XMLReq.send
If XMLReq.Status <> 200 Then
Results(1) = "Problem" & vbNewLine & XMLReq.Status & " - " & XMLReq.statusText
Else
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = XMLReq.responseText
For c = 1 To UBound(Route, 1)
Set Info = HTMLDoc.getElementById(Route(c))
If Not Info Is Nothing Then
Set Info = Info.NextSibling.NextSibling.NextSibling
Set dds = Info.getElementsByTagName("dd")
If dds.Length > 1 Then
Results(c) = dds(1).innerText
Else
Results(c) = "hazard unknown"
End If
Else
Results(c) = "no info"
End If
Next c
End If
ExposureData = Results
End Function
I am trying to download a macro enabled file (xlsm) from Jira, I have tried the below method and able to download the file but the file is corrupted. Please advice what I am missing here.
Dim myURL As String
myURL = "Put your download link here"
Dim HttpReq As Object
Set HttpReq = CreateObject("Microsoft.XMLHTTP")
HttpReq.Open "GET", myURL, False, "username", "password"
HttpReq.send
myURL = HttpReq.responseBody
If HttpReq.Status = 200 Then
Set oStrm = CreateObject("ADODB.Stream")
oStrm.Open
oStrm.Type = 1
oStrm.Write HttpReq.responseBody
oStrm.SaveToFile ThisWorkbook.Path & "\" & "file.xlsm", 2 ' 1 = no overwrite, 2 = overwrite
oStrm.Close
End If
(source)
Finally after several attempts, below code work, hope this is helpful for others.
Private Sub DownloadFromJira()
Dim oJiraService As MSXML2.ServerXMLHTTP60
Dim sPath As String
Dim sStatus As String
Dim FileData() As Byte
Dim FileNum As Long
Set oJiraService = New MSXML2.ServerXMLHTTP60
sPath = "C:\Users\**ID**\Downloads\Test.xlsm"
With oJiraService
.Open "GET", "https://**MyJiraLink**/secure/attachment/123455/Test.xlsm", False
.setRequestHeader "Content-Type", "application/json"
.setRequestHeader "Authorization", "Basic " & EncodeBase64(JiraUID & ":" & JiraPWD)
.setRequestHeader "Accept", "application/json"
.send
sStatus = .status & " | " & .statusText
If .status = "401" Then
MsgBox "Not Connected"
End If
FileData = .responseBody
FileNum = FreeFile
Open sPath For Binary Access Write As #FileNum
Put #FileNum, 1, FileData
Close #FileNum
End With
Set oJiraService = Nothing
End Sub
Private Function EncodeBase64(srcTxt As String) As String
Dim arrData() As Byte
arrData = StrConv(srcTxt, vbFromUnicode)
Dim objXML As MSXML2.DOMDocument60
Dim objNode As MSXML2.IXMLDOMElement
Set objXML = New MSXML2.DOMDocument60
Set objNode = objXML.createElement("b64")
objNode.DataType = "bin.base64"
objNode.nodeTypedValue = arrData
EncodeBase64 = objNode.Text
Set objNode = Nothing
Set objXML = Nothing
End Function
How to GET response using VBA?
This code here does not work. In Debug.Pring() or MsgBox is empty.
TargetURL = snURL + selectedMail
Set HTTPReq = CreateObject("WinHttp.WinHttpRequest.5.1")
HTTPReq.Open "GET", TargetURL, False
HTTPReq.SetCredentials snUser, snPass, 0
HTTPReq.setRequestHeader "Accept", "application/json"
Debug.Print HTTPReq.responseText
I want to get JSON data.
Optional libraries (needed, if early binding is used, in general the code will work without them):
Change the companyName variable:
Sub TestMe()
Dim xmlObject As Object
Dim companyName As String: companyName = "Google"
Dim strUrl As String
strUrl = "http://dev.markitondemand.com/MODApis/Api/v2/Lookup/json?input=" & companyName
Set xmlObject = CreateObject("MSXML2.XMLHTTP")
With xmlObject
.Open "GET", strUrl, False
.Send
End With
Dim response As String
response = "{""data"":" & xmlObject.ResponseText & "}"
Debug.Print response
End Sub
I am working on a procedure, in MS-Access VBA, to POST an XML text string to a web service and process the XML text string that is returned from the service.
The issue I am having is that the responseText property is always empty when it should contain a XML text string. No errors are returned and the .status = "OK".
I have tried the WinHttp.WinHttpRequest, MSXML2.XMLHTTP, and MSXML2.ServerXMLHTTP objects and consistently have the same issue.
Here is a code example:
Public Function Send() As Boolean
Dim oXHR As MSXML2.XMLHTTP60
Dim sURL, sCred As String
Dim sRequest, sResult, sStatus, sHeader As String
Dim bRtn As Boolean
BuildReqXML
sRequest = Me.RequestXML_String
With orsValues
sURL = .Fields("WebServiceURL").Value
sCred = Base64Encode(Trim(.Fields("User").Value) & ":" & Trim(.Fields("Password").Value))
End With
Set oXHR = New MSXML2.XMLHTTP60
With oXHR
.Open "POST", sURL, False
.SetRequestHeader "Authorization", "Basic " & sCred & """"
.SetRequestHeader "User-Agent", "Mozilla/4.0"
.SetRequestHeader "Content-Type", "text/xml"
.Send sRequest
sStatus = .StatusText
sResult = .ResponseText
sHeader = .GetAllResponseHeaders
If sResult <> "" Then
If Contains(sResult, "<") Then ReadXML sResult, "Response"
Debug.Print sResult
Else
Debug.Print sHeader
Debug.Print sRequest
End If
End With
Set oXHR = Nothing
End Function
I have verified that the web service is working correctly by building a similar call in a HTML document, sending the XML string, and receiving the response XML string.
Can someone please help me fix my issue?
I found the problem, with help from Fiddler.
The line setting the authorization header
.SetRequestHeader "Authorization", "Basic " & sCred & """"
Was adding a (") to the header line. The corrected line is
.SetRequestHeader "Authorization", "Basic " & sCred
Thank you for your help
The code below only scrapes/parses one page of data (50 results) into excel however, I would like it to scrape multiple pages (500 results) - please help. (Base64Encode was taken from a different source - I don't take credit for it)
`Function GetData() As Boolean
Application.ScreenUpdating = False
Dim objHTTP As New MSXML2.XMLHTTP
Dim strURL As String
Dim strUserName As String
Dim strPassword As String
Sheet1.Range("A2:R2000") = ""
Sheet1.Activate
strUserName = "User"
strPassword = "Password"
For i = 1 To UBound(MyArray)
strURL = "https://.ngx.com/ngxcs/indexPrice.xml"
objHTTP.Open "GET", strURL, False
objHTTP.setRequestHeader "Authorization", "Basic " & Base64Encode(strUserName & ":" & strPassword)
objHTTP.setRequestHeader "Host", "secure.example.com"
objHTTP.setRequestHeader "Content-Type", "text"
objHTTP.send "Nothing"
While objHTTP.readyState <> 4
DoEvents
Wend
Dim strResponseReceived As String
strResponseReceived = objHTTP.responseText
Debug.Print objHTTP.responseText
MsgBox strResponseReceived
Dim xDoc As DOMDocument
Set xDoc = New DOMDocument
xDoc.LoadXML objHTTP.responseText`
''''''''''''''''''''''''''''''''''''''''''
Rest of Code that outputs it into excel
Thanks for the Help!