I am writing macro to extract the href value from a website, example here is to extract the value: '/listedco/listconews/SEHK/2015/0429/LTN201504291355_C.pdf' from the html code below. The href is one of the attributes of the html tag 'a', I have add the code getElementbyTagName'a' but it did not work, my question is how to extract that href value to column L. Anyone could help? Thanks in advance!
<a id="ctl00_gvMain_ctl03_hlTitle" class="news" href="/listedco/listconews/SEHK/2015/0429/LTN201504291355_C.pdf" target="_blank">二零一四年年報</a>
Sub Download_From_HKEX()
Dim internetdata As Object
Dim div_result As Object
Dim header_links As Object
Dim link As Object
Dim URL As String
Dim IE As Object
Dim i As Object
Dim ieDoc As Object
Dim selectItems As Variant
Dim h As Variant
Dim LocalFileName As String
Dim B As Boolean
Dim ErrorText As String
Dim x As Variant
'Key Ratios
For x = 1 To 1579
Set IE = New InternetExplorerMedium
IE.Visible = True
URL = "http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main_c.aspx"
IE.navigate URL
Do
DoEvents
Loop Until IE.readyState = 4
Application.Wait (Now + TimeValue("0:00:05"))
Call IE.Document.getElementById("ctl00_txt_stock_code").setAttribute("value", Worksheets("Stocks").Cells(x, 1).Value)
Set selectItems = IE.Document.getElementsByName("ctl00$sel_tier_1")
For Each i In selectItems
i.Value = "4"
i.FireEvent ("onchange")
Next i
Set selectItems = IE.Document.getElementsByName("ctl00$sel_tier_2")
For Each i In selectItems
i.Value = "159"
i.FireEvent ("onchange")
Next i
Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_d")
For Each i In selectItems
i.Value = "01"
i.FireEvent ("onchange")
Next i
Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_m")
For Each i In selectItems
i.Value = "04"
i.FireEvent ("onchange")
Next i
Set selectItems = IE.Document.getElementsByName("ctl00$sel_DateOfReleaseFrom_y")
For Each i In selectItems
i.Value = "1999"
i.FireEvent ("onchange")
Next i
Application.Wait (Now + TimeValue("0:00:02"))
Set ieDoc = IE.Document
With ieDoc.forms(0)
Call IE.Document.parentWindow.execScript("document.forms[0].submit()", "JavaScript")
.submit
End With
Application.Wait (Now + TimeValue("0:00:03"))
'Start here to extract the href value.
Set internetdata = IE.Document
Set div_result = internetdata.getElementById("ctl00_gvMain_ctl03_hlTitle")
Set header_links = div_result.getElementsByTagName("a")
For Each h In header_links
Set link = h.ChildNodes.Item(0)
Worksheets("Stocks").Cells(Range("L" & Rows.Count).End(xlUp).Row + 1, 12) = link.href
Next
Next x
End Sub
For Each h In header_links
Worksheets("Stocks").Cells(Range("L" & Rows.Count).End(xlUp).Row + 1, 12) = h.href
Next
EDIT: The id attribute is supposed to be unique in the document: there should only be a single element with any given id. So
IE.Document.getElementById("ctl00_gvMain_ctl03_hlTitle").href
should work.
WB.Document.GetElementById("ctl00_gvMain_ctl04_hlTitle").GetAttribute("href").ToString
Use a CSS selector to get the element then access its href attribute.
#ctl00_gvMain_ctl03_hlTitle
The above is element with id ctl00_gvMain_ctl03_hlTitle. "#" means id.
Debug.Print IE.document.querySelector("#ctl00_gvMain_ctl03_hlTitle").href
Related
I'm using the below Code to input Google's first images link in B1
for certain values in A1.
Public Sub Test()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim imgElements As IHTMLElementCollection
Dim imgElement As HTMLImg
Dim aElement As HTMLAnchorElement
Dim N As Integer, I As Integer
Dim Url As String, Url2 As String
Dim LastRow As Long
Dim m, sImageSearchString
LastRow = Range("A" & Rows.Count).End(xlUp).Row
For I = 1 To LastRow
Url = "http://www.google.co.in/search?q=" & Cells(I, 1) & "&source=lnms&tbm=isch&sa=X&rnd=1"
Set IE = New InternetExplorer
With IE
.Visible = False
.Navigate Url 'sWebSiteURL
Do Until .readyState = 4: DoEvents: Loop
'Do Until IE.document.readyState = "Complete": DoEvents: Loop
Set HTMLdoc = .document
Set imgElements = HTMLdoc.getElementsByTagName("IMG")
N = 1
For Each imgElement In imgElements
If InStr(imgElement.src, sImageSearchString) Then
If imgElement.ParentNode.nodeName = "A" Then
Set aElement = imgElement.ParentNode
Url2 = imgElement.src
N = N + 1
End If
End If
Next
Cells(I, 2) = Url2
IE.Quit
Set IE = Nothing
End With
Next
End Sub
however I'm receiving the below error, can you please advise?
I'm using Windows 10, Excel 365
In VBA Menu - Tools - References - tick MS Internet Controls.
Or
Using Late Binding
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
I'm trying to get the innertext of 'wfm-bodyText'. So, I want to loop through items on a webpage and import, in this case, 'H. Validate Gamer Correct'
I thought the script below would work, but it actually gives me all kinds of extra text!!
Public Sub CopyFromURL()
Dim IE As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Dim rng As Range, cell As Range
Const READYSTATE_COMPLETE As Integer = 4
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
row = 1
'Set rng = Range("A1:A5")
'For Each cell In rng
Set IE = CreateObject("InternetExplorer.Application")
IE.Navigate URL
Do While (IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE)
DoEvents
Loop
Set TR_col = IE.Document.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
row = ActiveSheet.Cells(ActiveSheet.Rows.Count, "A").End(xlUp).row
col = 1
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
row = row + 1
Next
'Next cell
IE.Quit
End Sub
The URL is behind a firewall. I can't share it.
Again, below is the structure of the page. I want to copy the 'E. Verify phone number if applicable' into my sheet.
I tried the script below in an attempt to get every element from the page.
Sub DumpData()
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
URL = "http://www.SharePoint.aspx"
'Wait for site to fully load
IE.Navigate2 URL
Do While IE.Busy = True
DoEvents
Loop
RowCount = 1
With Sheets("Sheet1")
.Cells.ClearContents
RowCount = 1
For Each itm In IE.document.all
.Range("A" & RowCount) = itm.tagname
.Range("B" & RowCount) = itm.ID
.Range("C" & RowCount) = itm.classname
.Range("D" & RowCount) = Left(itm.innertext, 1024)
RowCount = RowCount + 1
Next itm
End With
End Sub
I still can't tell where the relevant data is coming from.
Sub GetAllLinks()
Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
url_name = "http://www.trovanumeri.com/?azione=cerca&cerca=portoscuso"
If url_name = "" Then Exit Sub
IE.navigate url_name
Do
DoEvents
Loop Until IE.readyState = 4
'<a href="http://google.com">Click Here
Set AllHyperLinks = IE.document.getElementsByTagName("A")
Foglio1.ListBox1.Clear
'this is the code
For Each hyper_link In AllHyperLinks
Foglio1.ListBox1.AddItem hyper_link
Next
MsgBox "Done!"
End Sub
Take a look at the below example, showing how to retrieve the data from via IE automation and DOM processing:
Option Explicit
Sub GetData()
Dim oIE As Object
Dim lCurRow As Long
Dim lResultIndex As Long
Dim sUrl As String
Dim oTable0 As Object
Dim oTable1 As Object
Dim oTable2 As Object
Dim oTable3 As Object
Dim sTbl3Text As String
Dim cAncorNodes As Object
Dim oAncorNode As Variant
Dim sRowText As String
Dim oRowNode As Object
Dim aData() As Variant
Set oIE = CreateObject("InternetExplorer.Application")
With oIE
.Visible = True
Sheets(1).Cells.Delete
lCurRow = 1 ' Worksheet rows counter
lResultIndex = 0 ' Search result index counter
Do
' Navigate to the page
sUrl = "http://www.trovanumeri.com/?azione=cerca&cerca=cagliari&da=" & lResultIndex
.Navigate sUrl
' Wait IE
Do While .ReadyState < 3 Or .Busy
DoEvents
Loop
' Wait Document
Do Until .Document.ReadyState = "complete"
DoEvents
Loop
' Retrieve target tables
Set oTable0 = .Document.getElementsByTagName("table")(0)
Set oTable1 = oTable0.getElementsByTagName("table")(1)
Set oTable2 = oTable1.getElementsByTagName("table")(3)
' Get and process ancor nodes
Set cAncorNodes = oTable2.getElementsByTagName("a")
For Each oAncorNode In cAncorNodes
With CreateObject("Scripting.Dictionary")
' Add .href to result
.Add .Count, oAncorNode.href
' Get ancor's parent row
Set oRowNode = oAncorNode.ParentNode.ParentNode.ParentNode
Do
' Add nonemtpy row to result
sRowText = Trim(Replace(oRowNode.innerText, vbCrLf, ""))
If sRowText <> "" Then .Add .Count, sRowText
' If last row then exit
If IsNull(oRowNode.nextElementSibling) Then Exit Do
' Proceed with next row
Set oRowNode = oRowNode.nextElementSibling
' If net row contains oAncorNode then exit
If oRowNode.getElementsByTagName("a").Length > 0 Then Exit Do
DoEvents
Loop
' Get results as array
aData = .Items
End With
' Output array to worksheet row
With Sheets(1).Cells(lCurRow, 1)
.Resize(1, UBound(aData) + 1) = aData
.Select
End With
lCurRow = lCurRow + 1
DoEvents
Next
' Get table containing 'Next' button
Set oTable3 = oTable0.getElementsByTagName("table")(7)
sTbl3Text = oTable3.innerText
' If no 'Next' button then exit
If InStr(sTbl3Text, "Avanti >>") = 0 Then Exit Do
lResultIndex = lResultIndex + 10
DoEvents
Loop
.Quit
End With
End Sub
I have been working on this VBA Code for a long time. The goal is to copy something from my excel document, search for it online, and then pull back something from the html code. I keep getting an error code that says "Object Required" and sometimes it says "Object variable or With block variable not set." It is all focused on the line "set elementTWO = elementONE.Item(i).innerText
I have tried deleting the word "Set" I have tried changing elementTWO to a string. Another really weird piece is the For...Next Loop won't let me do "exit for." It returns an error. I have tried a few other things to no avail. Any help is greatly appreciated
Option Explicit
Option Compare Text
Public Enum READYSTATE
READYSTATE_UNINITIALIZED = 0
READYSTATE_LOADING = 1
READYSTATE_LOADED = 2
READYSTATE_INTERACTIVE = 3
READYSTATE_COMPLETE = 4
End Enum
Sub GetCategory()
Dim RowNo, ColNo, i As Integer
Dim Parent, Item, URL1, URL2, URL3 As String
Dim objHTML As Object
Dim elementONE As Object
Dim elementTWO As String
RowNo = 3
ColNo = 5
URL1 = "http://www.infores.com/public/us/knowledgegroup/resources/resources.pli?defaultDataType=&pageid=validatorresults&upc1="
URL2 = "&upc2="
URL3 = "&submitupc=find+it%21"
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")
With Worksheets(1)
While RowNo <= 5
Parent = Cells(RowNo, ColNo)
Item = Cells(RowNo, ColNo + 1)
With ie
.navigate URL1 & Parent & URL2 & Item & URL3
.Visible = False
'Delay while IE loads
Do While (ie.Busy Or ie.READYSTATE <> READYSTATE.READYSTATE_COMPLETE)
DoEvents
Loop
'Put html code in document object
Set objHTML = .document
DoEvents
End With
Set elementONE = objHTML.getElementsByTagName("TD") 'Break Down HTML code
For i = 1 To elementONE.Length
elementTWO = elementONE.Item(i).innerText
If elementTWO = "Description" Then 'Find the Category
Cells(RowNo, ColNo + 4) = elementONE.Item(i + 1).innerText 'Put Category into excel
End If
Next i
DoEvents
ie.Quit
RowNo = RowNo + 1
Wend
End With
End Sub
this I think is what you want, it processes the loop but then fails during it. you can atleast debug it and see why:
Option Explicit
Option Compare Text
Public Enum READYSTATE
READYSTATE_UNINITIALIZED = 0
READYSTATE_LOADING = 1
READYSTATE_LOADED = 2
READYSTATE_INTERACTIVE = 3
READYSTATE_COMPLETE = 4
End Enum
Sub GetCategory()
Dim RowNo, ColNo, i As Integer
Dim Parent, Item, URL1, URL2, URL3 As String
Dim objHTML As Object, elementONE As Object, elementTWO As String
RowNo = 3
ColNo = 5
URL1 = "http://www.infores.com/public/us/knowledgegroup/resources/resources.pli?defaultDataType=&pageid=validatorresults&upc1="
URL2 = "&upc2="
URL3 = "&submitupc=find+it%21"
Dim ie As Object
'Set ie = CreateObject("InternetExplorer.Application")
With Worksheets(1)
While RowNo <= 5
Parent = Cells(RowNo, ColNo)
Item = Cells(RowNo, ColNo + 1)
Set ie = CreateObject("InternetExplorer.Application")
With ie
.navigate URL1 & Parent & URL2 & Item & URL3
.Visible = False
'Delay while IE loads
Do While (ie.Busy Or ie.READYSTATE <> READYSTATE.READYSTATE_COMPLETE)
DoEvents
Loop
'Put html code in document object
Set objHTML = .document
DoEvents
End With
Set elementONE = objHTML.getElementsByTagName("TD") 'Break Down HTML code
For i = 0 To elementONE.Length - 1
elementTWO = elementONE(i).innerText
If elementTWO = "Description" Then 'Find the Category
Cells(RowNo, ColNo + 4) = elementONE(i + 1).innerText 'Put Category into excel
End If
Next i
DoEvents
ie.Quit
RowNo = RowNo + 1
Wend
End With
End Sub
I am trying to write a procedure which enters a date into an input box
<input name="Mdate" type="text" id="Mdate" size="30" value="" /></td>
clicks a submit button
<input type="submit" name="button" id="button" value="Submit" />
then scrapes the resulting data, which appears in the "a" tags.
<center>
<b>Tuesday, 6 January 2015</b><br />
Ruakaka
This data is not available until the submit button has been entered. My attempt is posted in full below. The problem I seem to be having is that i am not able to access the modified html code (modified by clicking submit). Can anyone provide any suggestions?
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = False 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Else
End If
End If
Next element
Stop
Next i
ie.Quit
Insert a condition to wait while the page is loading.
The following rewrite successfully fetches data from the target page on my pc:
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim htmldoc As MSHTML.IHTMLDocument 'Document object
Dim inputs As MSHTML.IHTMLElementCollection 'Element collection for "input" tags
Dim eles1, eles2 As MSHTML.IHTMLElementCollection 'Element collection for th tags
Dim element As MSHTML.IHTMLElement 'input elements
Dim ele1, ele2 As MSHTML.IHTMLElement 'Header elements
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True 'make IE invisible
'Navigate to webpage
Dim ieURL As String: ieURL = "http://www.racenet.com.au/horse-racing-results/" 'set URL from which to retrieve racemeet and date data
ie.navigate ieURL 'navigate to URL
Do While ie.Busy Or ie.readyState <> 4 'wait for page to load
DoEvents
Loop
Set htmldoc = ie.document 'Document webpage
Set inputs = htmldoc.getElementsByTagName("input") 'Find all input tags
Dim dd, mm, yyyy As Integer
Dim startdate, enddate As Date
Dim i, j, k As Long
Dim raceMeet, raceURL As String
startdate = #1/1/2008#: enddate = Date - 1
Dim racemeetArr As Variant
ReDim racemeetArr(1 To 2, 1)
For i = startdate To enddate
dd = Day(i): mm = Month(i): yyyy = Year(i)
For Each element In inputs
If element.Name = "Mdate" Then
element.Value = yyyy & "-" & mm & "-" & dd
Else
If element.Name = "button" Then
element.Click
Exit For
End If
End If
Next element
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
'insert scraper
Set eles1 = htmldoc.getElementsByTagName("a") 'Find all centre tags
For Each ele1 In eles1
If InStr(ele1.href, "/horse-racing-results/") > 0 Then
raceMeet = ele1.innerText
raceURL = ele1.innerHTML
ReDim Preserve racemeetArr(1 To 2, UBound(racemeetArr, 2) + 1)
racemeetArr(1, UBound(racemeetArr, 2)) = raceMeet
racemeetArr(2, UBound(racemeetArr, 2)) = raceURL
End If
Next ele1
Stop
Next i
ie.Quit
End Sub
Edit:
After analyzing the HTTP requests I managed to slim down the code a little bit (results can be queried directly without filling the form and submitting the page)
I am not a huge fan of expensive array ReDims, so I created a class instead, and save the results in a collection of that class (feel free to use it or not).
Add a new class module, call it clRaceMeet and paste this code:
Option Explicit
Private pMeet As String
Private pUrl As String
Public Property Let Meet(ByVal Val As String)
pMeet = Val
End Property
Public Property Get Meet() As String
Meet = pMeet
End Property
Public Property Let URL(ByVal Val As String)
pUrl = Val
End Property
Public Property Get URL() As String
URL = pUrl
End Property
Then, use this modified code version to scrape the data and dump it to the debugging window:
Option Explicit
Private Sub CommandButton1_Click()
'dimension variables
Dim ie As InternetExplorer
Dim ieURL As String
Dim dd As Integer
Dim mm As Integer
Dim yyyy As Integer
Dim startDate As Date
Dim endDate As Date
Dim i As Long
Dim htmlDoc As MSHTML.IHTMLDocument
Dim colLeftEleColl As MSHTML.IHTMLElementCollection
Dim colLeftEle As MSHTML.IHTMLElement
Dim centerEleColl As MSHTML.IHTMLElementCollection
Dim centerEle As MSHTML.IHTMLElement
Dim raceMeet As String
Dim raceURL As String
Dim objRaceMeet As clRaceMeet
Dim raceMeetColl As New Collection
'Open InternetExplorer
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
startDate = #1/1/2009#
endDate = Date - 1
For i = startDate To endDate
dd = Day(i)
mm = Month(i)
yyyy = Year(i)
ieURL = "http://www.racenet.com.au/horse-racing-results-search.asp?Mdate=" & yyyy & "-" & mm & "-" & dd
ie.navigate ieURL
Do
' Wait until the Browser is loaded'
Loop Until ie.readyState = READYSTATE_COMPLETE
Set htmlDoc = ie.document
'insert scraper
Set colLeftEleColl = htmlDoc.getElementById("ColLeft").all
'Loop through elements of ColLeft div
For Each colLeftEle In colLeftEleColl
If colLeftEle.tagName = "CENTER" Then
Set centerEleColl = colLeftEle.all
'Loop through elements of <center> tag
For Each centerEle In centerEleColl
If centerEle.tagName = "A" Then
If InStr(centerEle.href, "/horse-racing-results/") > 0 Then
raceMeet = centerEle.innerText
raceURL = centerEle.href
Set objRaceMeet = New clRaceMeet
objRaceMeet.Meet = raceMeet
objRaceMeet.URL = raceURL
raceMeetColl.Add objRaceMeet
End If
End If
Next centerEle
Exit For
End If
Next colLeftEle
' Dump results to immediate window:
For Each objRaceMeet In raceMeetColl
Debug.Print objRaceMeet.Meet & " - " & objRaceMeet.URL
Next objRaceMeet
'Stop
Next i
ie.Quit
End Sub
Happy betting! :)
I toyed around with the last one and the for each loop within the for next loop has to go after it. I then also made it list into sheet1 and it worked. I did a few minor adjustments such as adding a variable to increment the cells.
this code didn't produce the actual results just the websites, not sure if that is what you were aiming for.