How to get a particular link from a specific class? - vba

I'd like to extract this href from that particular class
<tr class="even">
<td>
Serie A 2015/2016
</td>
This is what I wrote:
Sub ExtractHrefClass()
Dim ie As Object
Dim doc As HTMLDocument
Dim class As Object
Dim href As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate Range("D8")
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
Set class = doc.getElementsByClassName("even")
Set href = class.getElementsByTagName("a")
Range("E8").Value = href
ie.Quit
End Sub
But unfortunately there is a mistake Object doesn't support this property or method (Error 438) on the line:
Set href = class.getElementsByTagName("a")
UPDATE 1
I modified the code as per #RyszardJędraszyk answer, but no output come out O_o Where am I doing wrong?
Sub ExtractHrefClass()
Dim ie As Object
Dim doc As HTMLDocument
Dim href As Object
Dim htmlEle As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
ie.navigate Range("D8")
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE And ie.Busy = False
Set doc = ie.document
Set href = doc.getElementsByTagName("a")
For Each htmlEle In href
If htmlEle.className = "even" Then
Range("E8").Value = htmlEle
End If
Next
ie.Quit
End Sub
UPDATE 2
As #dee requested in comment, there is the code from the web page http://www.soccer24.com/italy/serie-a/archive/
<tbody>
<tr>
<td>
Serie A 2016/2017
</td>
<td></td>
</tr>
<tr class="even">
<td>
Serie A 2015/2016
</td>
<td>
<span class="team-logo" style="background-image: url(/res/image/data/UZbZIMhM-bsGsveSt.png)"></span>Juventus
</td>
</tr>
<tr>
<td>
Serie A 2014/2015
</td>
<td>
<span class="team-logo" style="background-image: url(/res/image/data/UZbZIMhM-bsGsveSt.png)"></span>Juventus
</td>
</tr>
I need only to extract that line: /italy/serie-a-2015-2016/

This worked for me:
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.soccer24.com/italy/serie-a/archive/", False
.Send
MsgBox Split(Split(Split(.ResponseText, "<tr class=""even"">", 2)(1), "<a href=""", 2)(1), """", 2)(0)
End With
The procedure you need might look like:
Sub ExtractHrefClass()
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", Range("D8").Value, False
.Send
Range("E8").Value = Split(Split(Split(.ResponseText, "<tr class=""even"">", 2)(1), "<a href=""", 2)(1), """", 2)(0)
End With
End Sub

Try:
Dim href As HTMLObjectElement
Make sure that proper libraries are checked in references (Microsoft HTML Object Library).
Are you sure that doc.getElementsByClassName("even") works? It is not listed here: https://msdn.microsoft.com/en-us/library/aa926433.aspx as available method.
I always first use getElementsByTagName and make a condiction If htmlEle.className = "even" then.
Also add the following: ie.readyState = READYSTATE_COMPLETE and ie.busy = False. Still if it is some AJAX based website it can be not enough to determine that website has fully loaded (from the link guessing it could be flashscore.com where you need to track elements on the website informing about its loading status).

querySelectorAll or querySelector can be used here to select the anchor elemets inside of the tr with the specific class and then with getAttribute("href") the href-attribute can be retrieved. HTH.
' Add reference to Microsoft Internet Controls (SHDocVw)
' Add reference to Microsoft HTML Object Library
Dim ie As Object
Dim name As String
Dim Doc As HTMLDocument
Set ie = New InternetExplorer
ie.Visible = 1
ie.navigate "<URL>"
While ie.Busy Or ie.readyState <> 4
DoEvents
Wend
Set Doc = ie.document
Dim anchors As IHTMLDOMChildrenCollection
Dim anchor As IHTMLAnchorElement
Dim i As Integer
Set anchors = Doc.querySelectorAll("tr[class~='even'] a")
If Not anchors Is Nothing Then
For i = 0 To anchors.Length - 1
Set anchor = anchors.item(i)
If anchor.getAttribute("href") = "/italy/serie-a-2015-2016/" Then
Range("E8").Value = anchor.innerHTML
End If
Next
End If
ie.Quit

Related

How to sift through the tag name for image from the pasted elements

So far, while writing code in VBE to make a parser I have used the "img" tag and the "src" attribute to scrape an image but I stumbled trying to go through the portion I'm pasting below. Can't filter the portion I need to use in my code to parse an Image.
Set topics = html.getElementsByClassName("card card-lg")
For i = 0 To topics.Length - 1
Set topic = topics(i)
Cells(x, 1).Value = topic.getElementsByClassName("wine-card__image-wrapper")(0).getElementsByTagName("img")(0).src
x = x + 1
Next i
And a sample of the HTML I'm working with:
<div class="wine-card__image-wrapper">
<a href="/wineries/tschida/wines/angerhof-eiswein-gruner-veltliner-2012">
<figure class="wine-card__image" style="background-image: url(//images.vivino.com/thumbs/qlER3oggQVKh1FZn7YGxZg_375x500.jpg)">
<div class="image-inner"></div>
</figure>
</a>
</div>
You can access the "style" attribute I believe, so
Sub t()
Dim ie As SHDocVw.InternetExplorer
Dim d As MSHTML.HTMLDocument
Dim dv As MSHTML.HTMLDivElement
Dim ha As MSHTML.IHTMLElement
Set ie = New SHDocVw.InternetExplorer
ie.Visible = True
ie.navigate "https://www.vivino.com/explore?e=eJzLLbI11jNVy83MswWSiRW2RgZqyZW26Ulq5SXRsbaGAKA_Cdk%3D"
While ie.Busy Or ie.readyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set d = ie.document
Set e = d.getElementsByClassName("wine-card__image-wrapper")(0)
Set ha = e.Children(0).Children(0)
Debug.Print ha.Style.backgroundImage
End Sub

Extract data from website as separate fields

I am trying to use excel to scrap some information from a website.
This is what shows on source:
<tr class="even">
<td align="right">1</td>
<td>Acrobatic Maneuver</td>
<td>Instant</td>
<td>2W</td>
<td>Common</td>
<td>Winona Nelson</td>
<td><img src="http://magiccards.info/images/en.gif" alt="English" width="16" height="11" class="flag2"> Kaladesh</td>
</tr>
So I want to get everything that has even, and extract the data between the <td></td>
However, all I've found until now is this code
Sub getcards()
Dim IE As Object
Dim i As Long
Dim objCollection As Object
' Create InternetExplorer Object
Set IE = CreateObject("InternetExplorer.Application")
' You can uncoment Next line To see form results
IE.Visible = False
' URL to get data from
IE.Navigate "http://magiccards.info/query?q=%2B%2Be%3Akld/en&v=list&s=issue"
' Statusbar
Application.StatusBar = "Loading, Please wait..."
' Wait while IE loading...
Do While IE.Busy
DoEvents
Application.Wait DateAdd("s", 1, Now)
Loop
On Error GoTo abort
Application.StatusBar = "Searching for value. Please wait..."
Dim dd As String
Set objCollection = IE.document.getElementsByClassName("even")
For i = 0 To objCollection.Length
dd = IE.document.getElementsByClassName("even")(i).innerText
MsgBox dd
Next i
abort:
' Show IE
IE.Visible = True
IE.Quit
' Clean up
Set IE = Nothing
Application.StatusBar = ""
End Sub
It works in such a way that it extracts the data, but the output is 1Acrobatic ManeuverInstant2WCommonWinona Nelson Kaladesh all together.
How can I do so it understands each <td> as a separate field, so I can extract it easily?
When you're looping through i within objCollection you're actually looping through all elements with the ClassName of "even" as opposed to the elements inside the specific element you want.
Try this:
For i = 0 To objCollection.Length - 1
For c = 0 to IE.document.getElementsByClassName("even")(i).getElementsByTagName("td").Length - 1
dd = IE.document.getElementsByClassName("even")(i).getElementsByTagName("td")(c).innerText
MsgBox dd
Next c
Next i

VBA Internet Explorer clicking on Text Error

I am new to VBA and it would be great if you can help me on resolving this issue.
I am trying to click on a Text on an IE SharePoint webpage. I am am able to navigate to IE browser, but I am getting a VBA error for clicking the text "Americas" highlighted in Yellow in attached Screenshot. I need help with the IE.Document part of the code at the end of VBA code below. I assume GetElementbyID and GetElementByTagName are correct from HTML code below.
Error - Method Document of Object "IEwebBrowser"Failed
VBA Code:
Private Sub UploadFile()
Dim i As Long
Dim IE As Object
Dim Doc As Object
Dim objElement As Object
Dim objCollection As Object
Dim buttonCollection As Object
Dim AllSpanElements
Dim Span
' Create InternetExplorer Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
' Send the form data To URL As POST binary request
IE.navigate "URL"
' Wait while IE loading...
While IE.Busy
DoEvents
Wend
' I AM GETTING ERROR HERE
Set AllSpanElements = IE.Document.getElementById("ext-gen1271").getElementsByTagName("div")
AllSpanElements.Click
Set IE = Nothing
Set objElement = Nothing
End Sub
HTML CODE
<table class="x-grid-table x-grid-table-resizer" border="0" cellspacing="0" cellpadding="0" style="width:10000px;"><tbody>
<tr class="x-grid-row x-grid-row-selected x-grid-row-focused" id="ext-gen1271">
<td class="x-grid-cell-treecolumn x-grid-cell x-grid-cell-treecolumn-1030 x-grid-cell-first">
<div class="x-grid-cell-inner " style="text-align: left; ;"><img src="" class="x-tree-elbow-plus x-tree-expander">
<img src="" class="x-tree-icon x-tree-icon-parent ">
Americas</div>
</td>
</tr>
</tbody>
</table>
Give this a shot, I cleaned up the code slightly and I'm trying a slightly different approach. Basically I'm iterating over each element on the page, then clicking it when the InnerText has "Americas" contained with in it.
It may not be the InnerText Property you want to check, it might be the Value or Title so you will need to check that.
Here's the code:
Private Sub UploadFile()
Dim IE As Object: Set IE = CreateObject("InternetExplorer.Application")
Dim Elements As Object
Dim Element As Object
With IE
.Visible = True
' Send the form data To URL As POST binary request
.navigate "URL"
' Wait while IE loading...
While .Busy Or .readystate <> 4
Application.Wait (Now() + TimeValue("00:00:01"))
DoEvents
Wend
End With
' I AM GETTING ERROR HERE
Set Elements = IE.Document.getElementsByTagName("*") ' * is all elements
For Each Element In Elements
If InStr(1, Element.innerText, "Americas") > 0 Then ' If the element has the word Americas...click it
Element.Focus
Element.Click
Element.FireEvent ("OnClick")
End If
Next
'Clean up
Set IE = Nothing
End Sub

VBA Getting results in another url but same window

I'm working with VBA to fill a form in a URL and submiting to get results.
When I submit the form with correct values and submit via VBA, I get results in another URL but on the same window.
The problem is that I don't know how to change html reference to start scrapping data with this new url.
Here is my code:
'to refer to the running copy of Internet Explorer
Dim IE As InternetExplorer
'to refer to the HTML document returned
Dim html As HTMLDocument
'open Internet Explorer in memory, and go to website
Set IE = New InternetExplorer
IE.Visible = False
IE.navigate "http://url..."
'Wait until IE is done loading page
Do While IE.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Connecting with http://url..."
DoEvents
Loop
'show text of HTML document returned
Set html = IE.document
'close down IE and reset status bar
Set IE = Nothing
Application.StatusBar = ""
Set txtArea = html.getElementsByTagName("textarea")(0)
txtArea.Value = txtArea_data
Set formSubmit = html.getElementsByName("submit")(1)
formSubmit.Click
'-------------Get results
'Dim html_results As HTMLDocument
IE.navigate "http://new_url" 'Im not sure if I must do it this way...
'Wait until IE is done loading page
Do While IE.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Connecting with http://new_url..."
DoEvents
Loop
Set html = IE.document
Dim trResults As IHTMLElementCollection
Set trResults = html.getElementsByClassName("tr")
MsgBox (trResults.Length) 'At this point, trResults always have 0 results...
Have you any idea to help me?
Thanks!
This is a new approach, and still don't work.
I've used IE.Visible = True, so I could check that I get many <tr> results on the results page.
Dim rowNumber As Long
Dim txtArea_data As String
Dim txtArea As Object
Dim formSubmit As Object
Dim IE As InternetExplorer
Dim html As HTMLDocument
Set IE = New InternetExplorer
IE.Visible = True
IE.navigate "http://url..."
Do While IE.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Connecting with http://url..."
DoEvents
Loop
Set html = IE.document
rowNumber = 4
For rowNumber = 4 To 120 'Rows.Count
txtArea_data = txtArea_data & Cells(rowNumber, 1).Value & Chr(10)
Next rowNumber
Set txtArea = html.getElementsByTagName("textarea")(0)
txtArea.Value = txtArea_data
Set formSubmit = html.getElementsByName("submit")(1)
formSubmit.Click
'Wait until results
Do While IE.Busy: DoEvents: Loop
'-------------Get results
'At this point, the page URL with results has changed, but on the same tab. In the other code, I've used IE.navigate "http://new_url..." But a "Invalid file" message appears.
'-------------
'I suppose html var could be recharged with these new results data, but nothing happen...
Set html = IE.document
Dim trResults As IHTMLElementCollection
Set trResults = html.getElementsByClassName("tr")
MsgBox (trResults.Length) 'At this point, MsgBox returns O...
Set html = Nothing
Results page looks like:
<body>
<table id="tabla-a">
<thead>
<tr>
<th>...</th>
<th>...</th>
<th>...</th>
</tr>
</thead>
<tbody>
<tr>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
</tbody>
</table>
</body>
Is there anything I could tell you to make to help you finding the problem?
Thanks!
EDIT
Oh my God! I've been using Set trResults = html.getElementsByClassName("tr") instead of Set trResults = html.getElementsByTagName("tr") !!!
Thanks for your time!
What should I do now? Edit the original question with the final solution or close the entire question?
I don't use Stackoverflow too much to ask questions...
Thanks!

Parse HTML content in VBA

I have a question relating to HTML parsing. I have a website with some products and I would like to catch text within page into my current spreadsheet. This spreadsheet is quite big but contains ItemNbr in 3rd column, I expect the text in the 14th column and one row corresponds to one product (item).
My idea is to fetch the 'Material' on the webpage which is inside the Innertext after tag. The id number changes from one page to page (sometimes ).
Here is the structure of the website:
<div style="position:relative;">
<div></div>
<table id="list-table" width="100%" tabindex="1" cellspacing="0" cellpadding="0" border="0" role="grid" aria-multiselectable="false" aria-labelledby="gbox_list-table" class="ui-jqgrid-btable" style="width: 930px;">
<tbody>
<tr class="jqgfirstrow" role="row" style="height:auto">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="1" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="2" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="3" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="4" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="5" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="6" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td ...</td>
<td ...</td>
</tr>
<tr role="row" id="7" tabindex="-1" class="ui-widget-content jqgrow ui-row-ltr">
<td role="gridcell" style="padding-left:10px" title="Material" aria-describedby="list-table_">Material</td>
<td role="gridcell" style="" title="600D polyester." aria-describedby="list-table_">600D polyester.</td>
</tr>
<tr ...>
</tr>
</tbody>
</table> </div>
I would like to get "600D Polyester" as a result.
My (not working) code snippet is as is:
Sub ParseMaterial()
Dim Cell As Integer
Dim ItemNbr As String
Dim AElement As Object
Dim AElements As IHTMLElementCollection
Dim IE As MSXML2.XMLHTTP60
Set IE = New MSXML2.XMLHTTP60
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLBody As MSHTML.HTMLBody
Set HTMLDoc = New MSHTML.HTMLDocument
Set HTMLBody = HTMLDoc.body
For Cell = 1 To 5 'I iterate through the file row by row
ItemNbr = Cells(Cell, 3).Value 'ItemNbr isin the 3rd Column of my spreadsheet
IE.Open "GET", "http://www.example.com/?item=" & ItemNbr, False
IE.send
While IE.ReadyState <> 4
DoEvents
Wend
HTMLBody.innerHTML = IE.responseText
Set AElements = HTMLDoc.getElementById("list-table").getElementsByTagName("tr")
For Each AElement In AElements
If AElement.Title = "Material" Then
Cells(Cell, 14) = AElement.nextNode.value 'I write the material in the 14th column
End If
Next AElement
Application.Wait (Now + TimeValue("0:00:2"))
Next Cell
Thanks for your help !
Just a couple things that hopefully will get you in the right direction:
clean up a bit: remove the readystate property testing loop. The value returned by the readystate property will never change in this context - code will pause after the send instruction, to resume only once the server response is received, or has failed to do so. The readystate property will be set accordingly, and the code will resume execution. You should still test for the ready state, but the loop is just unnecessary
target the right HTML elements: you are searching through the tr elements - while the logic of how you use these elements in your code actually looks to point to td elements
make sure the properties are actually available for the objects you are using them on: to help you with this, try and declare all your variable as specific objects instead of the generic Object. This will activate intellisense. If you have a difficult time finding the actual name of your object as defined in the relevant library in a first place, declare it as the generic Object, run your code, and then inspect the type of the object - by printing typename(your_object) to the debug window for instance. This should put you on your way
I have also included some code below that may help. If you still can't get this to work and you can share your urls - plz do that.
Sub getInfoWeb()
Dim cell As Integer
Dim xhr As MSXML2.XMLHTTP60
Dim doc As MSHTML.HTMLDocument
Dim table As MSHTML.HTMLTable
Dim tableCells As MSHTML.IHTMLElementCollection
Set xhr = New MSXML2.XMLHTTP60
For cell = 1 To 5
ItemNbr = Cells(cell, 3).Value
With xhr
.Open "GET", "http://www.example.com/?item=" & ItemNbr, False
.send
If .readyState = 4 And .Status = 200 Then
Set doc = New MSHTML.HTMLDocument
doc.body.innerHTML = .responseText
Else
MsgBox "Error" & vbNewLine & "Ready state: " & .readyState & _
vbNewLine & "HTTP request status: " & .Status
End If
End With
Set table = doc.getElementById("list-table")
Set tableCells = table.getElementsByTagName("td")
For Each tableCell In tableCells
If tableCell.getAttribute("title") = "Material" Then
Cells(cell, 14).Value = tableCell.NextSibling.innerHTML
End If
Next tableCell
Next cell
End Sub
EDIT: as a follow-up to the further information you provided in the comment below - and the additional comments I have added
'Determine your product number
'Open an xhr for your source url, and retrieve the product number from there - search for the tag which
'text include the "productnummer:" substring, and extract the product number from the outerstring
'OR
'if the product number consistently consists of the fctkeywords you are entering in your source url
'with two "0" appended - just build the product number like that
'Open an new xhr for this url "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2productspec-ajax.p?itemc=" & product_number & "&_search=false&rows=-1&page=1&sidx=&sord=asc"
'Load the response in an XML document, and retrieve the material information
Sub getInfoWeb()
Dim xhr As MSXML2.XMLHTTP60
Dim doc As MSXML2.DOMDocument60
Dim xmlCell As MSXML2.IXMLDOMElement
Dim xmlCells As MSXML2.IXMLDOMNodeList
Dim materialValueElement As MSXML2.IXMLDOMElement
Set xhr = New MSXML2.XMLHTTP60
With xhr
.Open "GET", "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2productspec-ajax.p?itemc=10031700&_search=false&rows=-1&page=1&sidx=&sord=asc", False
.send
If .readyState = 4 And .Status = 200 Then
Set doc = New MSXML2.DOMDocument60
doc.LoadXML .responseText
Else
MsgBox "Error" & vbNewLine & "Ready state: " & .readyState & _
vbNewLine & "HTTP request status: " & .Status
End If
End With
Set xmlCells = doc.getElementsByTagName("cell")
For Each xmlCell In xmlCells
If xmlCell.Text = "Materiaal" Then
Set materialValueElement = xmlCell.NextSibling
End If
Next
MsgBox materialValueElement.Text
End Sub
EDIT2: an alternative automating IE
Sub searchWebViaIE()
Dim ie As SHDocVw.InternetExplorer
Dim doc As MSHTML.HTMLDocument
Dim anchors As MSHTML.IHTMLElementCollection
Dim anchor As MSHTML.HTMLAnchorElement
Dim prodSpec As MSHTML.HTMLAnchorElement
Dim tableCells As MSHTML.IHTMLElementCollection
Dim materialValueElement As MSHTML.HTMLTableCell
Dim tableCell As MSHTML.HTMLTableCell
Set ie = New SHDocVw.InternetExplorer
With ie
.navigate "http://www.pfconcept.com/cgi-bin/wspd_pcdb_cgi.sh/y/y2facetmain.p?fctkeywords=100317&world=general#tabs-4"
.Visible = True
Do While .readyState <> READYSTATE_COMPLETE Or .Busy = True
DoEvents
Loop
Set doc = .document
Set anchors = doc.getElementsByTagName("a")
For Each anchor In anchors
If InStr(anchor.innerHTML, "Product Specificatie") <> 0 Then
anchor.Click
Exit For
End If
Next anchor
Do While .readyState <> READYSTATE_COMPLETE Or .Busy = True
DoEvents
Loop
End With
For Each anchor In anchors
If InStr(anchor.innerHTML, "Product Specificatie") <> 0 Then
Set prodSpec = anchor
End If
Next anchor
Set tableCells = doc.getElementById("list-table").getElementsByTagName("td")
If Not tableCells Is Nothing Then
For Each tableCell In tableCells
If tableCell.innerHTML = "Materiaal" Then
Set materialValueElement = tableCell.NextSibling
End If
Next tableCell
End If
MsgBox materialValueElement.innerHTML
End Sub
Not related to tables or Excel ( I use MS-Access 2013) but directly related to the topic title. My solution is
Private Sub Sample(urlSource)
Dim httpRequest As New WinHttpRequest
Dim doc As MSHTML.HTMLDocument
Dim tags As MSHTML.IHTMLElementCollection
Dim tag As MSHTML.HTMLHtmlElement
httpRequest.Option(WinHttpRequestOption_UserAgentString) = "Mozilla/4.0 (compatible;MSIE 7.0; Windows NT 6.0)"
httpRequest.Open "GET", urlSource
httpRequest.send ' fetching webpage
Set doc = New MSHTML.HTMLDocument
doc.body.innerHTML = httpRequest.responseText
Set tags = doc.getElementsByTagName("a")
i = 1
For Each tag In tags
Debug.Print i
Debug.Print tag.href
Debug.Print tag.innerText
'Debug.Print tag.Attributes("any other attributes you need")() ' may return an object
i = i + 1
If i Mod 50 = 0 Then Stop
' or code to store results in a table
Next
End Sub