Scraping dynamic web page with VBA based on XMLHTTP object - vba

I have problem with scraping table data from this page [http://www.eex.com/en/market-data/power/derivatives-market/phelix-futures]. I use this code, but do not scrape any data:
Public Sub ScrapTableData()
Dim sURL As String
Dim XMLHttpRequest As XMLHTTP
Dim HTMLDoc As New HTMLDocument
Dim elc As HTMLHtmlElement
Dim i As Integer
sURL = "http://www.eex.com/en/market-data/power/derivatives-market/phelix-futures"
Set XMLHttpRequest = New MSXML2.XMLHTTP
XMLHttpRequest.Open "GET", sURL, False
XMLHttpRequest.responseXML.async = False
XMLHttpRequest.send
Do While XMLHttpRequest.Status <> 200
DoEvents
Loop
While XMLHttpRequest.ReadyState <> 4
DoEvents
Wend
HTMLDoc.body.innerHTML = XMLHttpRequest.responseText
' Tables
Dim tbl As HTMLTable, row As HTMLTableRow, cell As HTMLTableCell
i = 1
For Each tbl In HTMLDoc.getElementsByTagName("table")
For Each row In tbl.Rows
For Each cell In row.Cells
ActiveSheet.Cells(i, 5) = cell.innerText
i = i + 1
Next
Next
Next
End Sub
My code does not find HTML table tags.
Also, if I use this part of code, do not list all HTML tags (for example HTML DIV tag) and HTML that describes 6 buttons:
i = 0
Dim elc As HTMLHtmlElement
For Each elc In HTMLDoc.all
Worksheets("Tables").Range("A1").Offset(i, 0) = elc.tagName
i = i + 1
Next
6 buttons: Year, Quarter, Month,..., Day
I need to simulate click on them to display (scrape) different tables' data.

I don't think the XMLHTTP approach will work in this case, you need to open IE. The following code will do this. You may need to modify the loop to put data in your worksheet, I didn't tinker with this. At the end, I've also placed some code that will change the tabs. Hope this helps
Sub test()
' open IE, navigate to the website of interest and loop until fully loaded
Set IE = CreateObject("InternetExplorer.Application")
my_url = "http://www.eex.com/en/market-data/power/derivatives-market/phelix-futures"
With IE
.Visible = True
.navigate my_url
.Top = 50
.Left = 530
.Height = 400
.Width = 400
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
End With
' Collect data from tables
Set tbl = IE.document.getElementsByTagName("table")
For Each itm In tbl
i = 1
For Each itm2 In itm.Rows
For Each cell In itm2.Cells
ActiveSheet.Cells(i, 5) = cell.innertext
i = i + 1
Next
Next
Next
' Click on the 6 buttons, substitute "week", "year", etc. for the button you want to click
Set Results = IE.document.getElementsByTagName("a")
For Each itm In Results
If InStr(1, itm.innertext, "month", vbTextCompare) > 0 Then
itm.Click
Do Until Not IE.Busy And IE.readyState = 4
DoEvents
Loop
Exit For
End If
Next
' Do whatever is next
End Sub

Related

Combobox Option Value Has spaces How do i Call them through VBA from excel

Sub Sprint()
Dim IE As Object
Dim objelement As Object
Dim c As Integer
Dim LastRow, i, j As Integer
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
.navigate "http://xxx"
'wait until first page loads
Do Until .readyState = 4
DoEvents
Loop
On Error Resume Next
Set sht = ThisWorkbook.Worksheets("TestData1")
LastRow = sht.Cells(sht.Rows.Count, "B").End(xlUp).Row
For j = 2 To LastRow
i = 1
If IE.Document.all.Item(i).innertext = "ÔÍÑ (ãîñ. ïîøëèíà)" Then
IE.Document.all.Item(i).Click
End If
IE.Visible = True
While IE.Busy
DoEvents 'wait until IE is done loading page.
Wend
With IE.Document
'text boxes
IE.Document.getElementById("txtSubscriberId").Value = sht.Cells(j, 9)
.all("txtSubscriberId").Value = sht.Cells(j, 9)
.all("btnPermValidateAddr").Click
.all("txtSubscriberId").Value = sht.Cells(j, 9)
'This piece is selected in web page drop down since option value has no
spaces
IE.Document.getElementById("ddlProdCd").Value = sht.Cells(j, 20)
IE.Document.getElementById("ddlProdCd").FireEvent ("onfocus")
IE.Document.getElementById("ddlProdCd").FireEvent ("onchange")
IE.Document.getElementById("ddlProdCd").FireEvent ("onmousewheel")
'This piece is not selected in web page drop down since option value has
spaces
IE.Document.getElementById("ddlPlanDesc").Value = sht.Cells(j, 21)
IE.Document.getElementById("ddlPlanDesc").FireEvent ("onfocus")
IE.Document.getElementById("ddlPlanDesc").FireEvent ("onmousewheel")
IE.Document.getElementById("ddlPlanDesc").FireEvent ("onchange")
End With
Set IE = Nothing
Next j
End With
End Sub
In short 21st column value is not selected in webpage but 20th value is selected since in webpage option value has no space for 20th column value but 21st Column has space in option value in webpage itself
Refer attached Image
https://i.stack.imgur.com/HwwKB.png

vba scrape non-static web table data

I'm trying to create a macro that scrape and import tables from a web page,
More specifically, I want to get two tablestables pointed by arrows, please ignore the text in the table if it doesn't make sense, I translated using google. These tables are updated automatically so I used IE approach(by #ron), didn't scrape any data. I'm exhausted, can anyone please help me? I'm a vba newbie, appreciate any help.
Sub test()
' open IE, navigate to the website of interest and loop until fully loaded
Set IE = CreateObject("InternetExplorer.Application")
my_url = "http://www.neeq.com.cn/static/statisticdata.html"
With IE
.Visible = False
.navigate my_url
.Top = 50
.Left = 530
.Height = 400
.Width = 400
Do Until Not IE.busy And IE.ReadyState = 4
DoEvents
Loop
End With
Set tbl = IE.Document.getElementsByTagName("table")
For Each itm In tbl
i = 1
For Each itm2 In itm.Rows
For Each cell In itm2.Cells
ActiveSheet.Cells(i, 2) = cell.innerText
i = i + 1
Next
Next
Next
end sub()
You must select "tr" from your table object For Each Rows
As you use IE, non statix should be return (javascript is executed)
i = 1
For Each itm2 In tbl.getElementsByTagName("tr")
test with debug.print
you select html then tables -> tr -> td
Sub test()
' open IE, navigate to the website of interest and loop until fully loaded
Set IE = CreateObject("InternetExplorer.Application")
my_url = "http://www.neeq.com.cn/static/statisticdata.html"
With IE
.Visible = False
.navigate my_url
.Top = 50
.Left = 530
.Height = 400
.Width = 400
Do Until Not IE.busy And IE.ReadyState = 4
DoEvents
Loop
End With
Set tbl = IE.Document.getElementsByTagName("table")
For Each itm In tbl
i = 1
For Each itm2 In itm.getElementsByTagName("tr")
For Each cell In itm2.getElementsByTagName("td")
ActiveSheet.Cells(i, 2) = cell.innerText
i = i + 1
Next
Next
Next
end sub

Extract URL from Internet popup window?

I have the following code that I am using to extract data from a sports website. My issue is that I am unable to locate the url for a pop-up window on this website - therefore, I am not sure how I could extract data from this window. The pop-window can be accessed by clicking the blue icon next to a players name, and the data I need is on the second tab of the pop-up window.
Sub Extract_goals()
Dim url As String, links_count As Integer
Dim i As Integer, j As Integer, row As Integer
Dim XMLHTTP As Object, html As Object
Dim tr_coll As Object, tr As Object
Dim td_coll As Object, td As Object
links_count = 40
For i = 1 To links_count
url = "http://fantasy.premierleague.com/stats/elements/?stat_filter=goals_scored&element_filter=0&page=" & i & ""
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set tbl = html.getelementsbytagname("Table")
Set tr_coll = tbl(0).getelementsbytagname("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getelementsbytagname("TD")
For Each td In td_col
Cells(row + 1, j).Value = td.innerText
j = j + 1
Next
row = row + 1
Next
Next
End Sub
Any help with this is appreciated.
Thanks,
Shahid
I used a different method to get this (by creating an Internet Explorer object), because I could not get it to work exactly the same way using the MSXML2.XMLHTTP object.
I got as far as finding the URL of the popup window, but have yet to discover how to pull data from that window. If I have more time, I will play more, but perhaps this gets you over the hump and you can figure out the rest.
Sub Extract_goals2()
Dim ie As Object
Dim doc As Object
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
links_count = 40
For i = 1 To links_count
.navigate "http://fantasy.premierleague.com/stats/elements/?stat_filter=goals_scored&element_filter=0&page=" & i & ""
Do
DoEvents
Loop Until Not .busy Or .readyState <> 4
Set doc = .document
Dim tbl As Object
Set tbl = doc.getelementsbytagname("Table")
Dim tr_coll As Object
Set tr_coll = tbl(0).getelementsbytagname("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getelementsbytagname("TD")
For Each td In td_col
If j = 2 Then 'only do this on 2nd table column
Set td_a = td.getelementsbytagname("a")
Debug.Print td_a(o).href 'this will provide the exact URL
td_a(o).Click 'this will actually open the pop-up box
'my thoughts were then to work with the elements in this URL to extract what you need
Else
Cells(row + 1, j).Value = td.innerText
End If
j = j + 1
Next
row = row + 1
Next
Next
End With
End Sub

pull certain table from ssrs site to excel using macro

I have been searching web for two weeks to try find a macro that would pull certain table from SSRS website,
I tired so many different options but keep getting the wrong info.
This is the site I'm trying to pull the data from
http://apps.aspose.com/ssrs-rendering-extensions/Pages/Report.aspx?ItemPath=%2fAdventureWorks+2008+Sample+Reports%2fCompany+Sales+2008
What I'm trying to get is the table with this data
2002 2003
Accessories $93,797 $595,014
Bikes $26,664,534 $35,199,346
Clothing $489,820 $1,024,474
Components $3,611,041 $5,489,741
This is my macro
Sub submitFeedback3()
marker = 0
Set objShell = CreateObject("Shell.Application")
IE_count = objShell.Windows.Count
For x = 0 To (IE_count - 1)
'On Error Resume Next ' sometimes more web pages are counted than are open
my_url = objShell.Windows(x).Document.Location
my_title = objShell.Windows(x).Document.Title
If my_title Like "Company Sales" & "*" Then 'compare to find if the desired web page is already open
Set IE = objShell.Windows(x)
marker = 1
Exit For
Else
End If
Next
If marker = 0 Then
MsgBox ("A matching webpage was NOT found")
'Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True
IE.Navigate "http://apps.aspose.com/ssrs-rendering-extensions/Pages/Report.aspx?ItemPath=%2fAdventureWorks+2008+Sample+Reports%2fCompany+Sales+2008"
On Error Resume Next
Do While IE.Busy: DoEvents: Loop
Do Until IE.ReadyState = READYSTATE_COMPLETE: DoEvents: Loop
Else
MsgBox ("A matching webpage was found")
End If
Dim TR_col As Object, Tr As Object
Dim TD_col As Object, Td As Object
Dim row As Long, col As Long
Dim html As Object
Set html = IE.Document
html.getElementById ("oReportDiv")
row = 1
col = 1
Set TR_col = html.getElementsByTagName("td")
For Each Tr In TR_col
Set TD_col = Tr.getElementsByTagName("div")
For Each Td In TD_col
Cells(row, col) = Td.innerText
col = col + 1
Next
col = 1
row = row + 1
Next
MsgBox ("Done")
End Sub
Thank you in advance

Excel VBA Macro: Scraping data from site table that spans multiple pages

Thanks in advance for the help. I'm running Windows 8.1, I have the latest IE / Chrome browsers, and the latest Excel. I'm trying to write an Excel Macro that pulls data from StackOverflow (https://stackoverflow.com/tags). Specifically, I'm trying to pull the date (that the macro is run), the tag names, the # of tags, and the brief description of what the tag is. I have it working for the first page of the table, but not for the rest (there are 1132 pages at the moment). Right now, it overwrites the data everytime I run the macro, and I'm not sure how to make it look for the next empty cell before running.. Lastly, I'm trying to make it run automatically once per week.
I'd much appreciate any help here. Problems are:
Pulling data from the web table beyond the first page
Making it scrape data to the next empty row rather than overwriting
Making the Macro run automatically once per week
Code (so far) is below. Thanks!
Enum READYSTATE
READYSTATE_UNINITIALIZED = 0
READYSTATE_LOADING = 1
READYSTATE_LOADED = 2
READYSTATE_INTERACTIVE = 3
READYSTATE_COMPLETE = 4
End Enum
Sub ImportStackOverflowData()
'to refer to the running copy of Internet Explorer
Dim ie As InternetExplorer
'to refer to the HTML document returned
Dim html As HTMLDocument
'open Internet Explorer in memory, and go to website
Set ie = New InternetExplorer
ie.Visible = False
ie.navigate "http://stackoverflow.com/tags"
'Wait until IE is done loading page
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to StackOverflow ..."
DoEvents
Loop
'show text of HTML document returned
Set html = ie.document
'close down IE and reset status bar
Set ie = Nothing
Application.StatusBar = ""
'clear old data out and put titles in
'Cells.Clear
'put heading across the top of row 3
Range("A3").Value = "Date Pulled"
Range("B3").Value = "Keyword"
Range("C3").Value = "# Of Tags"
'Range("C3").Value = "Asked This Week"
Range("D3").Value = "Description"
Dim TagList As IHTMLElement
Dim Tags As IHTMLElementCollection
Dim Tag As IHTMLElement
Dim RowNumber As Long
Dim TagFields As IHTMLElementCollection
Dim TagField As IHTMLElement
Dim Keyword As String
Dim NumberOfTags As String
'Dim AskedThisWeek As String
Dim TagDescription As String
'Dim QuestionFieldLinks As IHTMLElementCollection
Dim TodaysDate As Date
Set TagList = html.getElementById("tags-browser")
Set Tags = html.getElementsByClassName("tag-cell")
RowNumber = 4
For Each Tag In Tags
'if this is the tag containing the details, process it
If Tag.className = "tag-cell" Then
'get a list of all of the parts of this question,
'and loop over them
Set TagFields = Tag.all
For Each TagField In TagFields
'if this is the keyword, store it
If TagField.className = "post-tag" Then
'store the text value
Keyword = TagField.innerText
Cells(RowNumber, 2).Value = TagField.innerText
End If
If TagField.className = "item-multiplier-count" Then
'store the integer for number of tags
NumberOfTags = TagField.innerText
'NumberOfTags = Replace(NumberOfTags, "x", "")
Cells(RowNumber, 3).Value = Trim(NumberOfTags)
End If
If TagField.className = "excerpt" Then
Description = TagField.innerText
Cells(RowNumber, 4).Value = TagField.innerText
End If
TodaysDate = Format(Now, "MM/dd/yy")
Cells(RowNumber, 1).Value = TodaysDate
Next TagField
'go on to next row of worksheet
RowNumber = RowNumber + 1
End If
Next
Set html = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "StackOverflow Tag Trends"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
There's no need to scrape Stack Overflow when they make the underlying data available to you through things like the Data Explorer. Using this query in the Data Explorer should get you the results you need:
select t.TagName, t.Count, p.Body
from Tags t inner join Posts p
on t.ExcerptPostId = p.Id
order by t.count desc;
The permalink to that query is here and the "Download CSV" option which appears after the query runs is probably the easiest way to get the data into Excel. If you wanted to automate that part of things, the direct link to the CSV download of results is here
You can improve this to parse out exact elements but it loops all the pages and grabs all the tag info (everything next to a tag)
Option Explicit
Public Sub ImportStackOverflowData()
Dim ie As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate "https://stackoverflow.com/tags"
While .Busy Or .READYSTATE < 4: DoEvents: Wend
Set html = .document
Dim numPages As Long, i As Long, info As Object, item As Object, counter As Long
numPages = html.querySelector(".page-numbers.dots ~ a").innerText
For i = 1 To 2 ' numPages ''<==1 to 2 for testing; use to numPages
DoEvents
Set info = html.getElementById("tags_list")
For Each item In info.getElementsByClassName("grid-layout--cell tag-cell")
counter = counter + 1
Cells(counter, 1) = item.innerText
Next item
html.querySelector(".page-numbers.next").Click
While .Busy Or .READYSTATE < 4: DoEvents: Wend
Set html = .document
Next i
Application.ScreenUpdating = True
.Quit '<== Remember to quit application
End With
End Sub
I'm not making use of the DOM, but I find it very easy to get around just searching between known tags. If ever the expressions you are looking for are too common just tweak the code a bit so that it looks for a string after a string).
An example:
Public Sub ZipLookUp()
Dim URL As String, xmlHTTP As Object, html As Object, htmlResponse As String
Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
Dim Zip4Digit As String
URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1=1642+Harmon+Street&address2=&city=Berkeley&state=CA&urbanCode=&postalCode=&zip=94703"
Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
xmlHTTP.Open "GET", URL, False
On Error GoTo NoConnect
xmlHTTP.send
On Error GoTo 0
Set html = CreateObject("htmlfile")
htmlResponse = xmlHTTP.ResponseText
If htmlResponse = Null Then
MsgBox ("Aborted Run - HTML response was null")
Application.ScreenUpdating = True
GoTo End_Prog
End If
'Searching for a string within 2 strings
SStr = "<span class=""address1 range"">" ' first string
EStr = "</span><br />" ' second string
StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)
MsgBox Zip4Digit
GoTo End_Prog
NoConnect:
If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub