Download Table from Webpage to Excel VBA - vba

I tried this below code. which is extracting detail from web page in table format.
I'm receiving this error: "Method 'open' of object 'IServerXMLHTTPRequest2'" failed
Please help me with this.
Dim htm As Object
Dim Tr As Object
Dim Td As Object
Dim Tab1 As Object
'Replace the URL of the webpage that you want to download
Web_URL = vba.Trim(Sheets(1).Cells(1, 1))
'Create HTMLFile Object
Set HTML_Content = CreateObject("htmlfile")
'Get the WebPage Content to HTMLFile Object
With CreateObject("msxml2.xmlhttp")
.Open "GET", Web_URL, True
.Send
HTML_Content.Body.Innerhtml = .responseText
End With
Column_Num_To_Start = 1
iRow = 2
iCol = Column_Num_To_Start
iTable = 0
'Loop Through Each Table and Download it to Excel in Proper Format
For Each Tab1 In HTML_Content.getElementsByTagName("table")
With HTML_Content.getElementsByTagName("table")(iTable)
For Each Tr In .Rows
For Each Td In Tr.Cells
Sheets(1).Cells(iRow, iCol).Select
Sheets(1).Cells(iRow, iCol) = Td.innerText
iCol = iCol + 1
Next Td
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tr
End With
iTable = iTable + 1
iCol = Column_Num_To_Start
iRow = iRow + 1
Next Tab1
Thanks in advance.

Related

How to loop through sub-folders?

I have VBA code that returns external file details like path, type, last modified date, last created date etc. for files in a folder.
I want to return the details for files in the sub-folders of that folder.
Dim IRow
Sub ListFiles()
IRow = 11 'where you want your first row of data
Call ListMyFiles(Range("B5"), False) 'Where B5 is your filepath (eg, C:\)
End Sub
Sub ListMyFiles(MySourcePath, includesubfolders)
Dim xSubFolder As Object
Set MyObject = New FileSystemObject
Set mysource = MyObject.GetFolder(MySourcePath)
On Error Resume Next
For Each myfile In mysource.Files
icol = 1
Cells(IRow, icol).Value = myfile.Path
icol = icol + 1
Cells(IRow, icol).Value = myfile.Name
icol = icol + 1
Cells(IRow, icol).Value = myfile.Type
icol = icol + 1
Cells(IRow, icol).Value = myfile.DateLastModified
icol = icol + 1
Cells(IRow, icol).Value = myfile.DateCreated
icol = icol + 1
IRow = IRow + 1
Next
If xIsSubfolders Then
For Each xSubFolder In xFolder.subfolders
ListFilesInFolder xSubFolder.Path, True
Next xSubFolder
End If
Set xFile = Nothing
Set xFolder = Nothing
Set xFileSystemObject = Nothing
End Sub
I haven't tested this, but basically what you want is recursion. This is when you call the subroutine/function while in that function. Basically it's the subroutine calling itself every time it finds a subfolder.
Something like:
Dim IRow
Sub ListFiles()
IRow = 11 'where you want your first row of data
Call ListMyFiles(Range("B5"), False) 'Where B5 is your filepath (eg, C:\)
End Sub
Sub ListMyFiles(MySourcePath as string, includesubfolders as boolean)
Set MyObject = New FileSystemObject
Set mysource = MyObject.GetFolder(MySourcePath)
On Error Resume Next
For Each myfile In mysource.Files
icol = 1
Cells(IRow, icol).Value = myfile.Path
icol = icol + 1
Cells(IRow, icol).Value = myfile.Name
icol = icol + 1
Cells(IRow, icol).Value = myfile.Type
icol = icol + 1
Cells(IRow, icol).Value = myfile.DateLastModified
icol = icol + 1
Cells(IRow, icol).Value = myfile.DateCreated
icol = icol + 1
IRow = IRow + 1
Next
'Check if the subroutine was called to include subfolders
If includesSubFolders Then
'Loop through all of the subfolders in the FSO folder
For Each SubFolder in mysource.SubFolders
'Call this same subroutine
ListMyFiles(Subfolder.Path, true)
Next xSubFolder
End If
End Sub
A lot of things can be simplified here. More importantly, the folder transversing needs to be a recursive call in order to go through to all subfolder levels.
Here is a sample code that does this. The first argument to ListMyFiles is the cell location where the path is stored, and the second argument when you want the file list to start.
Sub ListFiles()
Call ListMyFiles(Sheet1.Range("B5"), Sheet1.Range("B11"), True)
End Sub
Sub ListMyFiles(ByVal r_SourcePath As Range, ByRef r_List As Range, Optional includesubfolders As Boolean = False)
Dim path As String, ff As Folder
path = r_SourcePath.Text
Dim fso As New FileSystemObject
Set ff = fso.GetFolder(path)
Call ListFileInFolder(r_List, ff, includesubfolders)
End Sub
Public Sub ListFileInFolder(ByRef r_List As Range, ByRef ff As Folder, Optional inclSubFolders As Boolean = False)
On Error Resume Next
Dim index As Long, n As Long
index = 0
Dim f As File
For Each f In ff.Files
r_List.Offset(index, 0).Resize(1, 5).Value2 = _
Array(f.path, f.Name, f.Type, f.DateLastModified, f.DateCreated)
index = index + 1
Next
If inclSubFolders Then
For Each ff In ff.SubFolders
n = ff.Files.Count
If n > 0 Then
Call ListFileInFolder(r_List.Offset(index, 0), ff, True)
index = index + n
End If
Next
End If
On Error GoTo 0
End Sub
Of note here is the writing of a single row of data of 5 columns using a single line and the Array() function.

Greyhound data import to excel macro formula

as part of a research project i need to extract as much data as possible from a webpage. The problem is to access each table i have to follow lots of links which I can't get to work automatically.
Its from a greyhound-data.com. So an example would be I want to extract all the racing stats for every dog that raced in swindon between 1st Jan 2017- 28th Feb 2018. When i put it in the search engine I get 57236 races in a table. I have to follow the link on name of race for each race..
http://www.greyhound-data.com/d?racename=&country=13000&startmonth=3&endmonth=2&startdate=2017&enddate=2018&maxdist=unlimitied&class=any&order=dateD&x=2
My biggest problem is I don't know how to say follow the various links. And I don't know how to loop multiple times - once for each of the races in the original list.
I have created the simple Macro query :
Sub GetData()
Dim IE As Object
Dim doc As Object
Dim strURL As String
Dim I As Integer
For I = 1 To 9
strURL = "http://www.greyhound-data.com/d?racename=&country=13000&startmonth=3&endmonth=2&startdate=2017&enddate=2018&maxdist=unlimitied&class=any&order=dateD&x=" + Trim(Str(I))
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
Next I
End Sub
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
End Sub
it retrieves the all races data from the url automatically. But can not make the next step. on each page there is a "name of the race" tab and I need to get all the data on each page for each row. It is because I need to get the information of 1st place, 2nd place and third place.
Thanks for your time I know its a bit garbled!!
My new code after changes is looking like this:
Sub GetData()
Dim IE As Object
Dim doc As Object
Dim strURL As String
Dim I As Integer
For I = 1 To 9
strURL = "http://www.greyhound-data.com/d?racename=&country=13000&startmonth=3&endmonth=2&startdate=2017&enddate=2018&maxdist=unlimitied&class=any&order=dateD&x=" + Trim(Str(I))
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate strURL
Do Until .ReadyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
Next I
End Sub
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Dim ThisLink As Object 'variable for <a> tags
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
I = Range("B" & Rows.Count).End(xlUp).Row 'last row with data
Do While Cells(I, 1).Value = "" 'will loop until first not blank found in column A (starting from last row of data, from end to start)
For Each ThisLink In doc.getElementsByTagName("a") 'we check all <a> tags
If ThisLink.innerText = Cells(I, 2).Value Then Cells(I, 1).Value = ThisLink.href 'If the innertext is the name of the race, in column A we add link
Next ThisLink
I = I - 1 'we decrease row position
Loop
End Sub
but the case is that it returns the empty table as in this link : https://imageshack.us/i/poC4yhEZp
This code, after you get all your data, will check every race from end of list to start of list. And it will add in column A the related link to race.
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Dim ThisLink As Object 'variable for <a> tags
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
I = Range("B" & Rows.Count).End(xlUp).Row 'last row with data
Do While Cells(I, 1).Value = "" 'will loop until first not blank found in column A (starting from last row of data, from end to start)
For Each ThisLink In doc.getElementsByTagName("a") 'we check all <a> tags
If ThisLink.innerText = Cells(I, 2).Value Then Cells(I, 1).Value = ThisLink.href 'If the innertext is the name of the race, in column A we add link
Next ThisLink
I = I - 1 'we decrease row position
Loop
End Sub
In HTML all <a> tags are like this:
Henlow 26 Feb 2018 HT 5
The href attribute contains the link related to text between <a> and </a>. You can get it with a.href in VBA
To know the text between <a> and </a> you can use a.InnerText
What i did is just a simple loop to check every <a> tag. If the InnerText matchs the value in the cell, then I get the href attribute.
This code will get you all the links you want in your question. Just adapt the code to your needs (I pasted them in column A, but maybe you want to do something else with them).
You need 2 references for this code to work;
Microsoft HTML Object Library
Microsoft Internet Controls
And this is the final result:

Transforming Word tables into Excel array

I am trying to transfer Word tables to Excel - this has already been done here - and in addition, during the transfer I'd like to keep only rows that contain certain content, and would like to reshape the table before pasting it into Excel. I thought this could be done by converting each table first into an Excel array and then modifying the array as needed before pasting it to a specified range. Yet, I am not so familiar with Word VBA and I am finding this task pretty hard. I am starting from this code here, which I found at the post referenced above.
Option Explicit
Sub ImportWordTable()
Dim wdDoc As Object
Dim wdFileName As Variant
Dim tableNo As Integer 'table number in Word
Dim iRow As Long 'row index in Excel
Dim iCol As Integer 'column index in Excel
Dim resultRow As Long
Dim tableStart As Integer
Dim tableTot As Integer
On Error Resume Next
ActiveSheet.Range("A:AZ").ClearContents
wdFileName = Application.GetOpenFilename("Word files (*.docx),*.doc", , _
"Browse for file containing table to be imported")
If wdFileName = False Then Exit Sub '(user cancelled import file browser)
Set wdDoc = GetObject(wdFileName) 'open Word file
With wdDoc
tableTot = wdDoc.tables.Count
If tableTot = 0 Then
MsgBox "This document contains no tables", _
vbExclamation, "Import Word Table"
End If
For tableStart = 1 To tableTot
With .tables(tableStart)
'copy cell contents from Word table cells to Excel cells
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
Cells(resultRow, iCol) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
Next iCol
resultRow = resultRow + 1
Next iRow
End With
resultRow = resultRow + 1
Next tableStart
End With
End Sub
I think I should change this chunk to obtain what I am looking for.
For tableStart = 1 To tableTot
With .tables(tableStart)
'copy cell contents from Word table cells to Excel cells
For iRow = 1 To .Rows.Count
For iCol = 1 To .Columns.Count
Cells(resultRow, iCol) = WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text)
Next iCol
resultRow = resultRow + 1
Next iRow
End With
resultRow = resultRow + 1
Next tableStart
End With
Can someone help me with this? I can provide more details if needed. Many thanks!
Riccardo
If you want to copy only certain rows:
For tableStart = 1 To tableTot
With .tables(tableStart)
For iRow = 1 To .Rows.Count
v = WorksheetFunction.Clean(.cell(iRow, 1).Range.Text)
If v = "A" Or v = "B" Or v = "C" Then
For iCol = 1 To .Columns.Count
Cells(resultRow, iCol) = WorksheetFunction.Clean( _
.cell(iRow, iCol).Range.Text)
Next iCol
resultRow = resultRow + 1
End If
Next iRow
End With
resultRow = resultRow + 1
Next tableStart
With the help of Tim, this is the code that does what I was looking for.
Sub ImportWordTable()
Dim wdDoc As Object
Dim wdFileName, v, cont As Variant
Dim tableNo As Integer 'table number in Word
Dim iRow As Long 'row index in Excel
Dim iCol As Integer 'column index in Excel
Dim resultRow As Long
Dim tableStart As Integer
Dim tableTot As Integer
Dim rtemp, i As Integer
Dim categ(4), content(4) As Variant
Dim found, temprange As Range
Worksheets.Add.Name = "tempsht"
Worksheets.Add.Name = "final"
With Sheets("final")
.Cells(1, 1) = "Author"
.Cells(1, 2) = "Title"
.Cells(1, 3) = "Date"
.Cells(1, 4) = "Publication name"
.Cells(1, 5) = "Word count"
End With
categ(0) = "BY"
categ(1) = "HD"
categ(2) = "PD"
categ(3) = "SN"
categ(4) = "WC"
resultRow = 2
wdFileName = Application.GetOpenFilename("Word files (*.rtf),*.rtf", , "Browse for file containing table to be imported")
If wdFileName = False Then Exit Sub '(user cancelled import file browser)
Set wdDoc = GetObject(wdFileName) 'open Word file
With wdDoc
tableTot = wdDoc.tables.Count
If tableTot = 0 Then
MsgBox "This document contains no tables", _
vbExclamation, "Import Word Table"
End If
For tableStart = 1 To tableTot - 1
With .tables(tableStart) 'subset the table and copy it to a tempsheet
rtemp = 1
For iRow = 1 To .Rows.Count
v = WorksheetFunction.Clean(.cell(iRow, 1).Range.Text)
If v = " HD" Or v = " BY" Or v = " WC" Or v = " PD" Or v = " SN" Or v = "HD" Or v = "BY" Or v = "WC" Or v = "PD" Or v = "SN" Then
For iCol = 1 To .Columns.Count
Sheets("tempsht").Cells(rtemp, iCol) = Trim(WorksheetFunction.Clean(.cell(iRow, iCol).Range.Text))
Next iCol
rtemp = rtemp + 1
End If
Next iRow
Set temprange = Sheets("tempsht").Range("A1:A5")
With temprange
For i = 0 To 4
Set found = .find(What:=categ(i))
If found Is Nothing Then
content(i) = ""
Else
content(i) = Sheets("tempsht").Cells(found.Row, 2).Value
End If
Next i
End With
Sheets("final").Range(Cells(resultRow, 1), Cells(resultRow, 5)) = content
Sheets("tempsht").Range("A1:B5").ClearContents 'remove content from tempsheet
End With
resultRow = resultRow + 1
Next tableStart
Application.DisplayAlerts = False 'delete temporary sheet
Sheets("tempsht").Select
ActiveWindow.SelectedSheets.Delete
End With
End Sub

Fetched table data from a webpage not displaying first column of table

I need to fetch the price table from this page:
http://www.kieskeurig.nl/objectief/canon/ef_100mm_f2_usm/prijzen/bezorgen/167557#prijzen
So far I have developed this code to get the data
Sub TableExample()
Dim IE As Object
Dim doc As Object
Dim strURL As String
strURL = "http://www.kieskeurig.nl/objectief/canon/ef_100mm_f2_usm/prijzen/bezorgen/167557#prijzen"
' replace with URL of your choice
Set IE = CreateObject("InternetExplorer.Application")
With IE
'.Visible = True
.navigate strURL
Do Until .readyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.document
GetAllTables doc
.Quit
End With
End Sub
Sub GetAllTables(doc As Object)
' get all the tables from a webpage document, doc, and put them in a new worksheet
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Sheets("Sheet1")
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.innerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
ws.Cells.ClearFormats
End Sub
This code does work for me
But the problem is the 1st column i.e. the supplier data is not displaying in the fetched table.
Can anyone please help me on this
Replace your GetAllTables subroutine with the following:
Sub GetAllTables(doc As Object)
' get all the tables from a webpage document, doc, and put them in a new worksheet
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Sheets("Sheet1")
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
colno = 1
For Each cl In rw.Cells
If colno = 1 and nextrow > 1 then
Set classColl = doc.getElementsByClassName("shopLogoX")
Set imgTgt = classColl(nextrow - 2).getElementsByTagName("img")
rng.Value = imgTgt(0).getAttribute("alt")
Else
rng.Value = cl.innerText
End If
Set rng = rng.Offset(, 1)
I = I + 1
colno = colno + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
ws.Cells.ClearFormats
End Sub
The change is very little, actually. We use colno to track on which column we're in already in the row. Obviously, we check if we're in the first cell. If we are on the first column and not on the first row (header row), we create a collection of elements with class shopLogoX. This contains the img tags that have the alt attribute we want.
Tried, tested, and working. Let us know if this helps.

Fetch data from website table using vba

I need to constantly update excel file with information, obtained from the following link (warning, ukrainian language):
link to the Ministry of Finance web-site of Ukraine
Useful data is wrapped by the HTML tags <tbody></tbody>.
I need the similar code that retrieves the information from the table
Set htm = CreateObject("htmlFile")' #it doesn't work on mac os machine, but perfectly performs on windows
With CreateObject("msxml2.xmlhttp")
.Open "GET", <site_url_goes_here>, False
.send
htm.body.innerhtml = .responsetext
End With
With htm.getelementbyid("item")' <<<<<---what should I write here in order to parse data from the web-site table?
Sheet2.Cells(Row, 4).Value = p
For x = 1 To .Rows.Length - 1
For y = 0 To .Rows(x).Cells.Length - 1
Sheet2.Cells(Row, y + 1).Value = .Rows(x).Cells(y).innertext
Next y
Row = Row + 1
Next x
End With`
Below code will get the updated data from http://www.minfin.gov.ua in every 60 seconds.
Sub getData()
Application.OnTime Now + TimeSerial(0, 0, 60), "finance_data"
End Sub
Private Sub finance_data()
Dim url As String, lastRow As Long
Dim XMLHTTP As Object, html As Object
Dim tbl As Object, obj_tbl As Object
Dim TR As Object, TD As Object
Dim row As Long, col As Long
lastRow = Range("A" & Rows.Count).End(xlUp).row
url = "http://www.minfin.gov.ua/control/uk/publish/article?art_id=384069&cat_id=234036" & "&r=" & WorksheetFunction.RandBetween(1, 10000)
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", url, False
XMLHTTP.setRequestHeader "Content-Type", "text/xml"
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.ResponseText
Set obj_tbl = html.getelementsbytagname("table")
row = 1
col = 1
For Each tbl In obj_tbl
If tbl.classname = "MsoNormalTable" Then
Set TR = tbl.getelementsbytagname("TR")
For Each obj_row In TR
For Each TD In obj_row.getelementsbytagname("TD")
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 1 ' reseting the value
row = row + 1
Next
End If
Next
getData
End Sub