Can't get to value inside XML spanclass - vba

I'm trying to retrieve a value in one of the spanclasses in an online XML-file.
File: http://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml
I want to get to the USD rate but my code does not seem to loop through and span-classes, where is my mistake?
My code
Function response_Text(url As String) ' get the responsetext from an xml request
Dim xml As Object
Set xml = CreateObject("MSXML2.XMLHTTP")
With xml
.Open "Get", url, False
.send
response_Text = .responsetext
End With
Set xml = Nothing
End Function
Private Sub find_ClassElement(HTML_doc As MSHTML.HTMLDocument) ' return value inside span_class
Dim ticker As Variant
Dim XML_elements As MSHTML.IHTMLElementCollection
Dim XML_spanclass As MSHTML.HTMLSpanElement
Dim XML_targetElement As MSHTML.HTMLLIElement
Set XML_elements = HTML_doc.getElementsByClassName("line") **<--- something seems to be wrong here, the code does not loop through any span_classes after this point as intended ( the for statement is not being executed )**
For Each XML_spanclass In XML_elements
If InStr(XML_spanclass.innerHTML, "USD") > 0 Then
Debug.Print "success"
Set XML_targetElement = XML_spanclass.parentElement
Debug.Print CSng(XML_targetElement.getElementsByClassName("webkit-html-attribute-value")(0).innerHTML)
End If
Next
End Sub
Private Sub run() ' run the whole operation
Dim http_req As http_req: Set http_req = New http_req
Dim xml As MSHTML.HTMLDocument: Set xml = New MSHTML.HTMLDocument
Dim url As String: url = "http://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml"
xml.body.innerHTML = http_req.response_Text(url)
Call find_ClassElement(xml)
End Sub

There are no tags with a class of "line" so your collection is empty - nothing to loop through. Here's another way
Sub GetUSD()
Dim xHttp As MSXML2.XMLHTTP
Dim xDoc As MSXML2.DOMDocument
Dim xCube As MSXML2.IXMLDOMElement
Dim xCubes As MSXML2.IXMLDOMSelection
Dim sCurrency As String
'load the xml document
Set xDoc = New MSXML2.DOMDocument
xDoc.Load "http://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml"
'wait until it's completely loaded
Do
DoEvents
Loop Until xDoc.readyState = 4
'get all the cube tags
Set xCubes = xDoc.getElementsByTagName("Cube")
For Each xCube In xCubes
'some cube tags don't have attributes
On Error Resume Next
sCurrency = xCube.Attributes(0).Text
On Error GoTo 0
'if the first attribute is USD, get the second attribute
If sCurrency = "USD" Then
Debug.Print xCube.Attributes(1).Text
End If
Next xCube
End Sub
edit
I don't know xpath well enough to do this properly, but this works.
Sub GetUSD()
Dim xDoc As MSXML2.DOMDocument60
Dim xCube As MSXML2.IXMLDOMNode
Dim xCubes As MSXML2.IXMLDOMNodeList
Dim sCurrency As String
'load the xml document
Set xDoc = New MSXML2.DOMDocument60
xDoc.Load "http://www.ecb.europa.eu/stats/eurofxref/eurofxref-daily.xml"
'wait until it's completely loaded
Do
DoEvents
Loop Until xDoc.readyState = 4
'get all the cube tags
Set xCubes = xDoc.SelectNodes("//*")
For Each xCube In xCubes
On Error Resume Next
sCurrency = xCube.Attributes(0).NodeValue
On Error GoTo 0
'if the first attribute is USD, get the second attribute
If sCurrency = "USD" Then
Debug.Print xCube.Attributes(1).NodeValue
End If
Next xCube
End Sub

Related

VBA: Subscript out of range or Type Mismatch

Very new to VBA, and am really stuck. Below is my code, you'll see near the end my For loop for Des and DesArr. All I'm trying to do with that loop is pull a column of cells from the work sheet "SIC", which is Sheet2 in my Workbook, I either get the error "Subscript out of Range" or "Type Mismatch" and whenever I try and google/correct for one, the other error takes its place. If anyone can help me work through this I'd greatly appreciate it!
Public Sub getGoogleDescriptions(strSearch As String)
Dim URL As String, strResponse As String
Dim objHTTP As Object
Dim htmlDoc As HTMLDocument
Dim result As String
Dim i As Integer
Dim u As Integer
Dim resultArr As Variant
Dim Des As String
Dim DesArr(2 To 48) As Long
Set htmlDoc = CreateObject("htmlfile")
'Set htmlDoc = New HTMLDocument
Dim objResults As Object
Dim objResult As Object
strSearch = Replace(strSearch, " ", "+")
URL = "https://www.google.com/search?q=" & strSearch
Set objHTTP = CreateObject("MSXML2.XMLHTTP")
With objHTTP
.Open "GET", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
htmlDoc.body.innerHTML = .responseText
End With
Set objResults = htmlDoc.getElementsByClassName("st")
Debug.Print objResults(0).innerText
result = CStr(objResults(0).innerText)
resultArr = Split(result, " ", -1, 0)
For i = LBound(resultArr) To UBound(resultArr) 'Define i to be the length of the List'
Debug.Print i, resultArr(i) 'Prints the corresponding index value and array element'
Next i 'repeat
Set htmlDoc = Nothing
Set objResults = Nothing
Set objHTTP = Nothing
Set wk = ActiveWorkbook
For u = 2 To 48
Des = Sheets("SIC").Range("C" & u).Value
DesArr(u) = Des
Next u
Debug.Print DesArr(2)
End Sub
You're getting type mismatch because it's expecting DesArr to be a long data type which is a number between -2,147,483,648 to 2,147,483,647.
In it's use within the subroutine, it's used as a variant. So 2 corrections - change it to a variant as shown below
Then just adjust your 2 to 48 to within your statement... in this case it's a simple offset of 2, so just use (u - 2) and your Variant length is 47 starting at 0 instead of 1.
Public Sub getGoogleDescriptions(strSearch As String)
Dim URL As String, strResponse As String
Dim objHTTP As Object
Dim htmlDoc As HTMLDocument
Dim result As String
Dim i As Integer
Dim u As Integer
Dim resultArr As Variant
Dim Des As String
Dim DesArr(0) : ReDim DesArr(46)
Set htmlDoc = CreateObject("htmlfile")
'Set htmlDoc = New HTMLDocument
Dim objResults As Object
Dim objResult As Object
strSearch = Replace(strSearch, " ", "+")
URL = "https://www.google.com/search?q=" & strSearch
Set objHTTP = CreateObject("MSXML2.XMLHTTP")
With objHTTP
.Open "GET", URL, False
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
htmlDoc.body.innerHTML = .responseText
End With
Set objResults = htmlDoc.getElementsByClassName("st")
Debug.Print objResults(0).innerText
result = CStr(objResults(0).innerText)
resultArr = Split(result, " ", -1, 0)
For i = LBound(resultArr) To UBound(resultArr) 'Define i to be the length of the List'
Debug.Print i, resultArr(i) 'Prints the corresponding index value and array element'
Next i 'repeat
Set htmlDoc = Nothing
Set objResults = Nothing
Set objHTTP = Nothing
Set wk = ActiveWorkbook
For u = 2 To 48
Des = Sheets("SIC").Range("C" & u).Value
DesArr(u - 2) = Des
Next u
Debug.Print DesArr(0)
End Sub

Print/Import all web page source data using vba

I have code below which imports only part of source code into sheet. I want all source code as it is.`Sub GetSourceCode()
Dim ie As Object
Dim str As String
Dim arr
str = Sheets("sheet2").Range("I1").Value
Set ie = CreateObject("INTERNETEXPLORER.APPLICATION")
ie.Navigate "https://tiweb.industrysoftware.automation.com/prdata/cgi-bin/n_prdata_index.cgi?"
ie.Visible = False
Do Until ie.ReadyState = 4
DoEvents
Loop
ie.Document.getelementsbyname("pr_numbers")(0).Value = str
Application.SendKeys ("~")
Do Until ie.ReadyState = 4
DoEvents
Loop
Worksheets("Download_PRdata2").Activate
arr = Split(ie.Document.body.outertext)
Worksheets("Download_PRdata2").Activate
ActiveSheet.Range("A1").Resize(UBound(arr) + 1, 1).Value = Application.Transpose(arr)
End Sub`
Hi you can refer the below code
' Fetch Entire Source Code
Private Sub HTML_VBA_Excel()
Dim oXMLHTTP As Object
Dim sPageHTML As String
Dim sURL As String
'Change the URL before executing the code
sURL = "http://www.google.com"
'Extract data from website to Excel using VBA
Set oXMLHTTP = CreateObject("MSXML2.ServerXMLHTTP")
oXMLHTTP.Open "GET", sURL, False
oXMLHTTP.send
sPageHTML = oXMLHTTP.responseText
'Get webpage data into Excel
' If longer sourcecode mean, you need to save to a external text file or somewhere,
' since excel cell have some limits on storing max characters
ThisWorkbook.Sheets(1).Cells(1, 1) = sPageHTML
MsgBox "XMLHTML Fetch Completed"
End Sub
Source : http://www.vbausefulcodes.in/usefulcodes/get-data-or-source-code-from-webpage-using-excel-vba.php
Hope this will be useful to you!
you can save source code in a text file like this. add the below function instead of this line ThisWorkbook.Sheets(1).Cells(1, 1) = sPageHTML
Createtextfile (sPageHTML)
and add this below function after End Sub.
Sub Createtextfile(sPageHTML)
Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Dim oFile As Object
strPath = "E:\test.txt"
Set oFile = fso.Createtextfile(strPath)
oFile.WriteLine sPageHTML
oFile.Close
Set fso = Nothing
Set oFile = Nothing
End Sub
Change the location where you want to save.

VBA HTML Scraping - '.innertext' from complex table

All,
I've created the following Module to scrape a single value (1m % change in London house prices) from the below address:
https://www.hometrack.com/uk/insight/uk-cities-house-price-index/
The specific value is nested within the following code:
The below VBA code is my attempt at scraping. I, perhaps wrongly, feel that I am very close to capturing the value - but the code will not work.
Does anyone know where I am going wrong here? It doesn't show an error message but also doesn't output any values.
Sub HousePriceData()
Dim wb As Workbook
Dim ws As Worksheet
Dim TxtRng As Range
Dim ie As Object
Dim V As Variant
Dim myValue As Variant
Set ie = CreateObject("INTERNETEXPLORER.APPLICATION")
ie.NAVIGATE "https://www.hometrack.com/uk/insight/uk-cities-house-price-index/"
ie.Visible = False
While ie.ReadyState <> 4
DoEvents
Wend
Set wb = ActiveWorkbook
Set ws = wb.Sheets("Input")
Set TxtRng = ws.Range("C15")
Set myValue = ie.document.getElementById("cities-index-table").getElementsByTagName("tr")(7).g‌​etElementsByTagName("td")(5)
TxtRng = myValue.innerText
End Sub
Try to use XHR and primitive parsing instead of awkward IE:
Sub Test()
Dim strUrl As String
Dim strTmp As String
Dim arrTmp As Variant
strUrl = "https://www.hometrack.com/uk/insight/uk-cities-house-price-index/"
With CreateObject("MSXML2.XMLHttp")
.Open "GET", strUrl, False
.Send ""
strTmp = .ResponseText
End With
arrTmp = Split(strTmp, ">London</a></td>", 2)
strTmp = arrTmp(1)
arrTmp = Split(strTmp, "<td>", 7)
strTmp = arrTmp(6)
arrTmp = Split(strTmp, "</td>", 2)
strTmp = arrTmp(0)
ThisWorkbook.Sheets("Input").Range("C15").Value = strTmp
End Sub
try use this
Dim Engmt As String
Engmt = "ERRORHERE"
On Error Resume Next
Engmt = Trim(ie.document.getElementById("cities-index- table").getElementsByTagName("tr")(12).g‌​etElementsByTagName("td")(4).innerText)
On Error GoTo 0
If Engmt = "ERRORHERE" Then
TxtRng.Value = "ERROR"
Else
TxtRng.Value = Engmt
End If

vba htmldocument How can I delete a specific element

I am writing the code in VBA.
How can I delete a specific element?
Thank you
Dim html As MSHTML.HTMLDocument
Set html = New MSHTML.HTMLDocument
Dim document As MSHTML.HTMLDocument
Set document = html.createDocumentFromUrl("http://example.com/", vbNullString)
' wait download
Do While document.readyState <> "complete"
Loop
' Insert I Can
Call document.DocumentElement.insertAdjacentHTML("afterend", "<div>test</div>")
' I want to delete a specific element here But I fail
' Call document.removeNode("specific element")
You cant remove elements, but you can remove nodes, which is more or less the same.
Function DelTagById(strData As String, strID As String) As String
On Local Error GoTo MyError
Dim HTMLDoc As HTMLDocument
Dim Node As IHTMLDOMNode
DelTagById = strData
If strID = "" Then GoTo MyExit
Set HTMLDoc = New HTMLDocument
HTMLDoc.body.innerHTML = strData
Set Node = HTMLDoc.getElementById(strID)
If Node Is Nothing Then GoTo MyExit
Node.parentNode.removeChild Node
DelTagById = HTMLDoc.body.innerHTML
MyExit:
Set Node = Nothing
Set HTMLDoc = Nothing
Exit Function
MyError:
'Handle Error
Resume MyExit
End Function

VBA Default Namespace Bug

I am trying to instantiate the Root node of a Dom document. However i am naming it xbrl and this name is in the default namespace which is xmlns="http://www.xbrl.org/2003/instance"
According to a previous post-answer MSXML is buggy (answer of barrowc) when it comes to default namespaces. So i had to make some modification to my code. These where
objXMLDoc.LoadXML (objXMLHTTP.responseText)
replaced by
objXMLDoc.LoadXML objXMLHTTP.responseText
objXMLDoc.setProperty "SelectionNamespaces", "xmlns:r='http://www.xbrl.org/2003/instance'"
AND ALSO
Dim objXMLHTTP As New MSXML2.XMLHTTP
Dim objXMLDoc As New MSXML2.DOMDocument
replace by
Dim objXMLHTTP As New MSXML2.XMLHTTP60
Dim objXMLDoc As New MSXML2.DOMDocument60
The number 60 symbolizes the version 6.0
So when i made these modifications the macro worked without an error. But now it works only sometimes. When it doesn't it gives me a
Run-time error -2147467259(80004005)':
Reference to undeclared namespace prefix:'us-gaap.'
I cannot understand the reason the macro crashes and believe it is a bug.
Can you help?
For reasons of completeness the entire macro is submitted below
Sub READSITE()
Dim IE As InternetExplorer
Dim els, el, colDocLinks As New Collection
Dim lnk, res
Dim Ticker As String
Dim colXMLPaths As New Collection
Dim XMLElement As String
Set IE = New InternetExplorer
IE.Visible = False
Ticker = Worksheets("Sheet1").Range("A1").Value
LoadPage IE, "https://www.sec.gov/cgi-bin/browse-edgar?" & _
"action=getcompany&CIK=" & Ticker & "&type=10-Q" & _
"&dateb=&owner=exclude&count=20"
Set els = IE.Document.getelementsbytagname("a")
For Each el In els
If Trim(el.innertext) = "Documents" Then
colDocLinks.Add el.href
End If
Next el
For Each lnk In colDocLinks
LoadPage IE, CStr(lnk)
For Each el In IE.Document.getelementsbytagname("a")
If el.href Like "*[0-9].xml" Then
Debug.Print el.innertext, el.href
colXMLPaths.Add el.href
End If
Next el
Next lnk
XMLElement = Range("C1").Value
'For each link, open the URL and display the Debt Instrument Insterest Rate
For Each lnk In colXMLPaths
res = GetData(CStr(lnk), XMLElement)
With Worksheets("Sheet1").Cells(Rows.Count, 1).End(xlUp).Offset(1, 0)
.NumberFormat = "#"
.Value = Ticker
.Offset(0, 1).Value = lnk
.Offset(0, 2).Value = res
End With
Next lnk
End Sub
Function GetData(sURL As String, sXMLElement As String)
Dim strXMLSite As String
Dim objXMLHTTP As New MSXML2.XMLHTTP60
Dim objXMLDoc As New MSXML2.DOMDocument60
Dim objXMLNodexbrl As MSXML2.IXMLDOMNode
Dim objXMLNodeElement As MSXML2.IXMLDOMNode
Dim objXMLNodeStkhldEq As MSXML2.IXMLDOMNode
GetData = "?" 'No data from XML
objXMLHTTP.Open "GET", sURL, False '<<EDIT: GET the site
objXMLHTTP.send
objXMLDoc.LoadXML (objXMLHTTP.responseText)
objXMLDoc.setProperty "SelectionNamespaces", "xmlns:r='http://www.xbrl.org/2003/instance'"
Set objXMLNodexbrl = objXMLDoc.SelectSingleNode("r:xbrl")
Set objXMLNodeElement = objXMLNodexbrl.SelectSingleNode(sXMLElement)
If Not objXMLNodeElement Is Nothing Then
GetData = objXMLNodeElement.Text
End If
End Function
Sub LoadPage(IE As Object, url As String)
IE.Navigate url
Do While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE
DoEvents
Loop
End Sub
It is also a strange and frustrating note, to see that if i alter my macro in the before-corrections status according to the modifications given to me by barrowc i can now see that the macro works!
I was able to terminate the bug swiftly by checking in Diff Checker the vast code with previous code samples i had. It seems all you need is to remove the 60 from these lines (god knows why...)
Dim objXMLHTTP As New MSXML2.XMLHTTP60
Dim objXMLDoc As New MSXML2.DOMDocument60