Fetch data from website table using vba - vba

I need to constantly update excel file with information, obtained from the following link (warning, ukrainian language):
link to the Ministry of Finance web-site of Ukraine
Useful data is wrapped by the HTML tags <tbody></tbody>.
I need the similar code that retrieves the information from the table
Set htm = CreateObject("htmlFile")' #it doesn't work on mac os machine, but perfectly performs on windows
With CreateObject("msxml2.xmlhttp")
.Open "GET", <site_url_goes_here>, False
.send
htm.body.innerhtml = .responsetext
End With
With htm.getelementbyid("item")' <<<<<---what should I write here in order to parse data from the web-site table?
Sheet2.Cells(Row, 4).Value = p
For x = 1 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
Sheet2.Cells(Row, y + 1).Value = .Rows(x).Cells(y).innertext
Next y
Row = Row + 1
Next x
End With`

Below code will get the updated data from http://www.minfin.gov.ua in every 60 seconds.
Sub getData()
Application.OnTime Now + TimeSerial(0, 0, 60), "finance_data"
End Sub
Private Sub finance_data()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object
Dim tbl As Object, obj_tbl As Object
Dim TR As Object, TD As Object
Dim row As Long, col As Long
lastRow = Range("A" & Rows.Count).End(xlUp).row
url = "http://www.minfin.gov.ua/control/uk/publish/article?art_id=384069&cat_id=234036" & "&r=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set obj_tbl = html.getelementsbytagname("table")
row = 1
col = 1
For Each tbl In obj_tbl
If tbl.classname = "MsoNormalTable" Then
Set TR = tbl.getelementsbytagname("TR")
For Each obj_row In TR
For Each TD In obj_row.getelementsbytagname("TD")
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 1 ' reseting the value
row = row + 1
Next
End If
Next
getData
End Sub

Related

Max size of Object

I'm trying to download table data of a webpage but it looks like that the webpage is too big.
If I put the ".response" data in a "String"-variable, and save it to a textfile, I'll get all the data what is on the webpage. If I do this after saving it to an "HTMLDocument"-variable I only get the first 51594 characters
This is my script:
Sub nemigaparts_articles()
Dim oHtml As HTMLDocument
Dim oElement As Object
Dim URL As String
Dim id As String
Dim z As Long
Dim y As Long
Dim row As Long
Dim totaal As Long
Dim begin As Long
'data for save as
Dim start As Long
Dim eind As Long
Application.ScreenUpdating = False
begin = 1
totaal = 2
row = 1
start = 1
eind = 0
'laat statusbalk zien
UserForm1.Show
For z = begin To totaal Step 1
'get page data
'URL = Sheets("drawings").Cells(z, 3).Value
'id = Sheets("drawings").Cells(z, 1).Value
URL = "https://nemigaparts.com/cat_spares/pet/porsche/964/13/902000/"
id = 1
Set oHtml = New HTMLDocument
With CreateObject("WINHTTP.WinHTTPRequest.5.1")
.Open "GET", LCase(URL), False
.send
oHtml.body.innerHTML = .responseText
End With
Sheets("articles").Cells(1, 11) = z
Dim imageurl As Object
Set imageurl = oHtml.getElementsByClassName("col-lg-9 col-md-9 col-sm-9 col-xs-12 xs-margin-bottom-50")(0)
URL_img = imageurl.getElementsByTagName("a")(0).href
Dim atribuut As Object
Set atribuut = oHtml.getElementsByClassName("technical")(0)
If Not atribuut Is Nothing Then
x = oHtml.getElementsByTagName("tr").Length - 1
MsgBox (x)
For y = 1 To x Step 1
Sheets("articles").Cells(row, 1) = id
Sheets("articles").Cells(row, 2) = URL_img
Sheets("articles").Cells(row, 3) = oHtml.getElementsByTagName("tr")(y).innerHTML
row = row + 1
Next
End If
DoEvents
UserForm1.Label2.Width = (z / totaal * 100) / 100 * 186
UserForm1.Label4.Caption = "Artikel " & z & " van " & totaal
Next
End Sub

Get Data from Website in a Specific Format (Excel VBA)

I am trying to pull data from a website (https://www.baseball-reference.com/teams/ARI/2017-schedule-scores.shtml) by using the following code:
Sub GetBaseballReferenceData()
'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
For i = 1 To 30
x = Cells(Rows.Count, 2).End(xlUp).Row
x = x + 2
'gets the team abbreviation that we use in our url
Team = Cells(i, "A")
'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Dim qt As QueryTable
Dim ws As Worksheet
Set ws = ActiveSheet
'uses Url to return data
Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))
qt.Refresh BackgroundQuery:=False
Next i
End Sub
When I run the code it works and gets me the information I want. However, the W/L column should be formatted like this (1-2, 2-3, 3-0) and instead will be formatted as a date. When I try to reformat it as a text it returns an error code. How do I pull the data I want from the website as a text initially?
Thanks for any and all help!
I changed the code slightly
Edit: Added qt.WebDisableDateRecognition
Option Explicit
Sub GetBaseballReferenceData()
'created loop so we can loop through all different team url's
Dim x As Integer
Dim i As Integer
Dim Team As String
Dim qt As QueryTable
Dim ws As Worksheet
Dim WLRange As Range
'these two strings are used for url, they don't change
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Set ws = ActiveSheet
For i = 1 To 1
x = Cells(Rows.Count, 2).End(xlUp).Row
x = x + 2
'gets the team abbreviation that we use in our url
Team = Cells(i, "A")
'uses Url to return data
Set qt = ws.QueryTables.Add(Connection:="URL;" & bbref_site & Team & year_schedule_scores & ".shtml", Destination:=Cells(x, 2))
qt.WebDisableDateRecognition = True
qt.Refresh False
'qt.Refresh BackgroundQuery:=False
Next i
End Sub
You can also use XHR
Option Explicit
Public Sub GetSchedules()
Dim x As Long, i As Long, URL As String, Team As String
Const bbref_site As String = "https://www.baseball-reference.com/teams/"
Const year_schedule_scores As String = "/2017-schedule-scores"
Dim sResponse As String, HTML As New HTMLDocument, wsSchedule As Worksheet, wsTeam As Worksheet
Dim wb As Workbook: Set wb = ThisWorkbook
Set wsSchedule = wb.Worksheets("Schedules"): Set wsTeam = wb.Worksheets("TeamNames")
wsSchedule.Cells.ClearContents
Application.ScreenUpdating = False
Dim http As Object: Set http = CreateObject("MSXML2.XMLHTTP")
With wsTeam
For i = 1 To 30
Team = .Cells(i, "A")
URL = bbref_site & Team & year_schedule_scores & ".shtml"
http.Open "GET", URL, False
http.send
sResponse = StrConv(http.responseBody, vbUnicode)
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With HTML
.body.innerHTML = sResponse
End With
WriteTable HTML, GetLastRow(wsSchedule, 1) + 2, wsSchedule
Next i
Application.ScreenUpdating = True
End With
End Sub
Public Sub WriteTable(ByVal HTML As HTMLDocument, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
Dim headers As Object, i As Long, columnCounter As Long
Dim columnInfo As Object, rowCounter As Long
With ws
Set headers = HTML.querySelectorAll("#team_schedule thead th")
For i = 0 To headers.Length - 1
columnCounter = columnCounter + 1
.Cells(startRow, columnCounter) = headers.item(i).innerText
Next i
Set columnInfo = HTML.querySelectorAll("#team_schedule tbody tr td")
columnCounter = 2
For i = 0 To columnInfo.Length - 1
If i Mod 20 = 0 Then
rowCounter = rowCounter + 1
columnCounter = 2
.Cells(startRow + rowCounter, 1) = rowCounter
Else
columnCounter = columnCounter + 1
End If
If columnCounter = 11 Then
.Cells(startRow + rowCounter, columnCounter) = Chr$(39) & columnInfo.item(i).innerText
Else
.Cells(startRow + rowCounter, columnCounter) = columnInfo.item(i).innerText
End If
Next i
End With
End Sub

Download Table from Webpage to Excel VBA

I tried this below code. which is extracting detail from web page in table format.
I'm receiving this error: "Method 'open' of object 'IServerXMLHTTPRequest2'" failed
Please help me with this.
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
'Replace the URL of the webpage that you want to download
Web_URL = vba.Trim(Sheets(1).Cells(1, 1))
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, True
.Send
HTML_Content.Body.Innerhtml = .responseText
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("table")
With HTML_Content.getElementsByTagName("table")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
Thanks in advance.

VBA to extract HTML Table data

I am receiving a run-time error '91': object variable or with block variable not set whenever I try to run my code below. This code works perfect when extracting the data off of the commented out website I have listed in the code. However, when I try to use it on the google finance site I get that error. Can anyone see what I am doing wrong? I am using the following VBA:
Sub test()
Dim oDom As Object: Set oDom = CreateObject("htmlFile")
Dim x As Long, y As Long
Dim oRow As Object, oCell As Object
Dim data
y = 1: x = 1
With CreateObject("msxml2.xmlhttp")
'http://www.bundesbank.de/Navigation/EN/Statistics/Time_series_databases/Macro_economic_time_series/its_details_value_node.html?tsId=BBNZ1.Q.DE.Y.G.0000.A&listId=www_s311_b4_vgr_verw_nominal
.Open "GET", "http://finance.yahoo.com/q/hp?s=GOOG+Historical+Prices", False
.Send
oDom.body.innerHtml = .responseText
End With
With oDom.getElementsByTagName("table")(0)
ReDim data(1 To .Rows.Length, 1 To .Rows(1).Cells.Length)
For Each oRow In .Rows
For Each oCell In oRow.Cells
data(x, y) = oCell.innerText
y = y + 1
Next oCell
y = 1
x = x + 1
Next oRow
End With
Sheets(1).Cells(1, 1).Resize(UBound(data), UBound(data, 2)).Value = data
End Sub
There are more than 1 table tag in the view source. I had to change,
oDom.getElementsByTagName("table")(0)
' to
oDom.getElementsByTagName("table")(14)
... to have it use the correct table tag.

VBA If used range contains a word/text/value, go back to previous step

I wrote a macro to download data from a website, after the website is fully loaded, it will scrap the data by the html tag, however, sometimes the data is incorrectly scraped due to unknown error, I want to add a checking after each variant 'x' completed, e.g. If the activesheet contains the word "中报",then go back to the step "'Select the Report Type" to re-do the scraping. Also, I know some of the variables/data types are not set at the very beginning. Could anyone help to solve this? Thanks in advance!
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
For x = 1 To 1584
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate URL
.Visible = False
Do While .Busy = True Or .readyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub
This is cleaned up quite a bit.
I added a SelectReportType: line label. Whenever you want to go back to that condition, use insert the line
Goto SelectReportType
And it will take you to that spot. The better way to do it would be to place that code in a separate function so you can call it anytime your test for "中报" is true. But I'm not following your code well enough to understand what you are doing to assist you with that.
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Dim selectItems As Variant, i As Variant
Dim tblNameArr() As String
Dim tblStartRow As Long
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
For x = 1 To 1584
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Navigate URL
.Visible = False
Do While .Busy = True Or .ReadyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.count)).name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
SelectReportType:
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub