Parse Table from IE to Excel - vba

Sub Web_Table_Option_Two()
Dim HTMLDoc As New HTMLDocument
Dim objTable As Object
Dim lRow As Long
Dim lngTable As Long
Dim lngRow As Long
Dim lngCol As Long
Dim ActRw As Long
Dim objIE As InternetExplorer
Set objIE = New InternetExplorer
objIE.navigate "https://finviz.com/screener.ashx?v=152"
Do Until objIE.readyState = 4 And Not objIE.Busy
DoEvents
Loop
Application.Wait (Now + TimeValue("0:00:03")) 'wait for java script to load
HTMLDoc.body.innerHTML = objIE.document.body.innerHTML
With HTMLDoc.body
Set objTable = .getElementsByTagName("table")
For lngTable = 0 To objTable.Length - 1
For lngRow = 0 To objTable(lngTable).Rows.Length - 1
For lngCol = 0 To objTable(lngTable).Rows(lngRow).Cells.Length - 1
ThisWorkbook.Sheets("Sheet1").Cells(ActRw + lngRow + 1, lngCol + 1) = objTable(lngTable).Rows(lngRow).Cells(lngCol).innerText
Next lngCol
Next lngRow
ActRw = ActRw + objTable(lngTable).Rows.Length + 1
Next lngTable
End With
objIE.Quit
End Sub
`
With the code above I am trying to get the Stock Screener data from the website in the code but the table isn't labeled in HTML code so I am not sure how I am supposed to grab this info. Currently it is grabbing everything on the screen.

For just the bottom table info you can use the following and target the tbody tag collection, and the required index within, to avoid all the unwanted fluff that comes with table selection.
I would use XMLHTTP request as faster. The appropriate index changes between the two methods.
XMLHTTP request:
Option Explicit
Public Sub GetTable()
Dim sResponse As String, html As HTMLDocument, hTable As Object
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://finviz.com/screener.ashx?v=152", False
.setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
With html
.body.innerHTML = sResponse
Set hTable = .getElementsByTagName("tbody")(9)
WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
End With
End Sub
Public Sub WriteTable(ByVal hTable As Object, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, tCell As Object, tr As Object, td As Object, r As Long, c As Long
r = startRow
With ws
Set tRow = hTable.getElementsByTagName("tr")
For Each tr In tRow
r = r + 1: c = 1
Set tCell = tr.getElementsByTagName("td")
For Each td In tCell
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
Next tr
End With
End Sub
Internet Explorer (using WriteTable sub from above):
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, hTable As Object
With IE
.Visible = True
.navigate "https://finviz.com/screener.ashx?v=152"
While .Busy Or .readyState < 4: DoEvents: Wend
Set hTable = .document.getElementsByTagName("tbody")(13)
WriteTable hTable, 1, ThisWorkbook.Worksheets("Sheet1")
.Quit
End With
End Sub
Output:
References (VBE > Tools > References):
Microsoft Internet Controls
Microsoft HTML Object Library

Related

VBA WEB scraping loop

This is the code I wrote I need it to loop when it copy and paste the table and then click the next button till there no more next button , the next button is a tagname "a" and it has a "href" that I used to click the hyper link
Any help would be appreciated thanks
Sub GetData()
'define variables
Dim i As SHDocVw.InternetExplorer
Set i = New InternetExplorer
i.Visible = True
Dim IE As Object, obj As Object
Dim r As Long, c As Long, t As Long
Dim elemCollection As Object
Dim eRow As Long
Dim HTMLDoc As MSHTML.HTMLDocument
Dim HTMLInput As MSHTML.IHTMLElement
Dim HTMLAs As MSHTML.IHTMLElementCollection
Dim HTMLA As MSHTML.IHTMLElement
url_name = Sheet2.Range("e4")
If url_name = "" Then Exit Sub
i.Visible = True
i.navigate (url_name)
Do While i.readyState <> READYSTATE_COMPLETE
Loop
'we ensure that the web page is downloaded completely
ThisWorkbook.Sheets("Sheet1").Range("a2:ai1000").ClearContents
Set elemCollection = i.document.getElementsByTagName("TABLE")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
eRow = Sheet1.Cells(Rows.Count, 1).End(xlUp).Offset(1, 0).Row
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(eRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
Next t
Range("a1:ai1000").Columns.AutoFit
'Clean up Memory
Set IE = Nothing
Set HTMLDoc = i.document
Set HTMLAs = HTMLDoc.getElementsByTagName("a")
For Each HTMLA In HTMLAs
'Debug.Print HTMLA.getAttribute("classname"), HTMLA.getAttribute("href")
If HTMLA.getAttribute("classname") = "button2 next" And HTMLA.getAttribute("href") = "https://stathead.com/football/pgl_finder.cgi?request=1&game_num_max=99&week_num_max=99&order_by=all_td&match=game&season_start=1&year_max=2020&qb_gwd=0&qb_comeback=0&season_end=-1&game_type=R&age_max=99&year_min=2020&offset=100" Then
HTMLA.Click
Exit For
End If
Next
End Sub

Can't get Web Scraped Table to populate multiple cells VBA

I am trying to scrape a table from a website via VBA. I am able to gather all the data from the table however I am only able to get it to populate a single cell, rather than break out the information as formatted on the website. Essentially the data should be in four columns and then go down as many rows as there is data. I know I'm missing something simple here and I just can't put my finger on it. Help please! coding is below:
Sub WebScrape()
Dim objIE As InternetExplore'
Dim ele As Object
Dim y As Integer
objIE.navigate "http://www.uscfinvestments.com/holdings/usci"
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
y = 1
For Each ele In objIE.document.getElementById("holdingsTableDiv").getElementsByTagName("table")
Debug.Print ele.textContent
Sheets("Sheet1").Range("A" & y).Value = ele.Children(0).textContent
Next
ActiveWorkbook.Save
End Sub
I show a number of methods below. Personally, I prefer the last which uses an API.
Using clipboard:
A nice easy way, if you want to appear as on page, is to simply copy the table to clipboard and paste
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, hTable As Object, clipboard As Object, ws As Worksheet, t As Date
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.uscfinvestments.com/holdings/usci"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = .document.getElementById("holdingsTableID")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
ws.Cells(1, 1).PasteSpecial
End If
.Quit
End With
End Sub
Looping table:
If you want to loop rows and columns of a table then you can use row class name and row number to determine how to write out
Option Explicit
Public Sub GetInfo()
Dim ie As InternetExplorer, hTable As Object, ws As Worksheet, t As Date
Const MAX_WAIT_SEC As Long = 10
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "http://www.uscfinvestments.com/holdings/usci"
While .Busy Or .readyState < 4: DoEvents: Wend
t = Timer
Do
On Error Resume Next
Set hTable = .document.getElementById("holdingsTableID")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While hTable Is Nothing
If Not hTable Is Nothing Then
WriteTable hTable, 1, ws
End If
.Quit
End With
End Sub
Public Sub WriteTable(ByVal hTable As HTMLTable, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
If ws Is Nothing Then Set ws = ActiveSheet
Dim tRow As Object, td As Object, r As Long, c As Long, th As Object
r = startRow
With ws
For Each tRow In hTable.getElementsByTagName("tr")
c = 1
If r = startRow Or tRow.className = "subHeader" Then
For Each th In tRow.getElementsByTagName("th")
.Cells(r, c) = th.innerText
c = c + 1
Next
Else
For Each td In tRow.getElementsByTagName("td")
.Cells(r, c).Value = td.innerText
c = c + 1
Next td
End If
r = r + 1
Next tRow
End With
End Sub
API:
There is an API which serves the data in a json format
https://cssecure.alpsinc.com/api/v1//holding/usci
It requires authentication. I am using jsonconverter.bas to parse the json returned. After downloading and adding the .bas you need to go VBE > Tools > References > Add a reference to Microsoft Scripting Runtime.
Option Explicit
Public Sub GetValues()
Dim json As Object, authorization As String
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "http://www.uscfinvestments.com/uscfinvestments-template/assets/javascript/api_key.php", False
.send
authorization = Split(Split(.responseText, "'")(1), "'")(0)
.Open "GET", "https://cssecure.alpsinc.com/api/v1//holding/usci", False
.setRequestHeader "Authorization", authorization
.send
Set json = JsonConverter.ParseJson(.responseText)
End With
Dim arr(), headers(), item As Object, r As Long
headers = Array("Security", "Quantity", "Price", "Market Value")
r = 1
ReDim arr(1 To json.Count, 1 To 4)
For Each item In json
arr(r, 1) = item("name")
arr(r, 2) = item("shares")
Dim test As String
If IsNull(item("contractprice")) Then
arr(r, 3) = item("settlementprice")
Else
arr(r, 3) = item("contractprice")
End If
arr(r, 4) = item("marketvalue")
r = r + 1
Next
With ThisWorkbook.Worksheets("Sheet1")
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(arr, 1), UBound(arr, 2)) = arr
End With
End Sub
There is nothing much to add here. QHarr has already covered them. I used hardcoded delay instead of explicit wait to make the script less verbose.
Sub GetContent()
Dim Html As HTMLDocument, elem As Object, tRow As Object, C&, R&
With New InternetExplorer
.Visible = False
.navigate "http://www.uscfinvestments.com/holdings/usci"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set Html = .Document
End With
Application.Wait Now + TimeValue("00:00:03") 'make it 05 if it fails somehow
For Each elem In Html.getElementById("holdingsTableID").Rows
For Each tRow In elem.Cells
C = C + 1: ThisWorkbook.Worksheets("Sheet1").Cells(R + 1, C) = tRow.innerText
Next tRow
C = 0: R = R + 1
Next elem
End Sub

Error While using Macro to add google's first image link to excel

I'm using the below Code to input Google's first images link in B1
for certain values in A1.
Public Sub Test()
Dim IE As InternetExplorer
Dim HTMLdoc As HTMLDocument
Dim imgElements As IHTMLElementCollection
Dim imgElement As HTMLImg
Dim aElement As HTMLAnchorElement
Dim N As Integer, I As Integer
Dim Url As String, Url2 As String
Dim LastRow As Long
Dim m, sImageSearchString
LastRow = Range("A" & Rows.Count).End(xlUp).Row
For I = 1 To LastRow
Url = "http://www.google.co.in/search?q=" & Cells(I, 1) & "&source=lnms&tbm=isch&sa=X&rnd=1"
Set IE = New InternetExplorer
With IE
.Visible = False
.Navigate Url 'sWebSiteURL
Do Until .readyState = 4: DoEvents: Loop
'Do Until IE.document.readyState = "Complete": DoEvents: Loop
Set HTMLdoc = .document
Set imgElements = HTMLdoc.getElementsByTagName("IMG")
N = 1
For Each imgElement In imgElements
If InStr(imgElement.src, sImageSearchString) Then
If imgElement.ParentNode.nodeName = "A" Then
Set aElement = imgElement.ParentNode
Url2 = imgElement.src
N = N + 1
End If
End If
Next
Cells(I, 2) = Url2
IE.Quit
Set IE = Nothing
End With
Next
End Sub
however I'm receiving the below error, can you please advise?
I'm using Windows 10, Excel 365
In VBA Menu - Tools - References - tick MS Internet Controls.
Or
Using Late Binding
Dim ie As Object
Set ie = CreateObject("InternetExplorer.Application")

How to import data from multiple pages?

I have a small piece of code that lists links in a site.
Sub ListLinks()
'Set a reference to microsoft Internet Controls
Dim IeApp As InternetExplorer
Dim sURL As String
Dim IeDoc As Object
Dim i As Long
Set IeApp = New InternetExplorer
IeApp.Visible = True
sURL = "http://www.sharenet.co.za/v3/q_sharelookup.php"
IeApp.Navigate sURL
Do
Loop Until IeApp.ReadyState = READYSTATE_COMPLETE
Set IeDoc = IeApp.Document
For i = 0 To IeDoc.Links.Length - 1
Cells(i + 1, 1).Value = IeDoc.Links(i).href
Next i
Set IeApp = Nothing
End Sub
That is very useful for listing all links in a site. How can I loop through these URLs and import data from each one?
For instance, the first link under 'Name or Sector' is this:
http://www.sharenet.co.za/v3/sharesfound.php?ssector=0533&exch=JSE&bookmark=Oil & Gas&scheme=default
There is actually nothing to import from there. The next link has some data:
http://www.sharenet.co.za/v3/sharesfound.php?ssector=0537&exch=JSE&bookmark=Oil%20-%20Integrated&scheme=default
The data from there looks like this:
Name Full Name Code Sector
SACOIL-N Sacoil Holdings Ltd NPL SCLN 0537
ERIN Erin Energy Corporation ERN 0537
BEE-SASOL BEE - SASOL LIMITED SOLBE1 0537
SACOIL SACOIL HOLDINGS LD SCL 0537
OANDO OANDO PLC OAO 0537
OANDORIGT OANDO PLC RIGT OAON 0537
MONTAUK Montauk Holdings Ltd MNK 0537
How can I import that data from each link?
This seems to work pretty well. It may need a bit of fine tuning, but this should be pretty darn close.
Sub ListLinks()
'Set a reference to microsoft Internet Controls
Dim IeApp As InternetExplorer
Dim sURL As String
Dim IeDoc As Object
Dim i As Long
Set IeApp = New InternetExplorer
IeApp.Visible = True
sURL = "http://www.sharenet.co.za/v3/q_sharelookup.php"
IeApp.Navigate sURL
Do
Loop Until IeApp.ReadyState = READYSTATE_COMPLETE
Set IeDoc = IeApp.Document
For i = 0 To IeDoc.Links.Length - 1
Cells(i + 1, 1).Value = IeDoc.Links(i).href
Next i
Set IeApp = Nothing
Call CopyFromURL
End Sub
Public Sub CopyFromURL()
Dim IE As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Dim rng As Range, cell As Range
Const READYSTATE_COMPLETE As Integer = 4
Dim TR_col As Object, TR As Object
Dim TD_col As Object, TD As Object
Dim row As Long, col As Long
row = 1
Set rng = Range("A1:A5")
For Each cell In rng
Set IE = CreateObject("InternetExplorer.Application")
IE.Navigate cell
Do While (IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE)
DoEvents
Loop
Set TR_col = IE.Document.getElementsByTagName("TR")
For Each TR In TR_col
Set TD_col = TR.getElementsByTagName("TD")
col = 2
For Each TD In TD_col
Cells(row, col) = TD.innerText
col = col + 1
Next
col = 2
row = row + 1
Next
Next cell
IE.Quit
End Sub

Data Extraction from Webpage to Excel using VBA

I was trying to pull a table from Webpage, so far i was successful pull a table from Webpage, unfortunately i have some links in each row of table, when i pulled the table from Webpage, i am getting output without link, just text, is there any way we can pull table from webpage using VBA including hyperlinks.
Here is my code:
Sub TableExample()
Dim IE As Object
Dim doc As Object
Dim strURL As String
strURL = "HERE I USED MY URL"
' replace with URL of your choice
Set IE = CreateObject("InternetExplorer.Application")
With IE
'.Visible = True
.Navigate strURL
Do Until .readyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
End Sub
Sub GetAllTables(doc As Object)
' get all the tables from a webpage document, doc, and put them in a new worksheet
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
ws.Cells.ClearFormats
End Sub
When you execute "rng.Value = cl.outerText" you get only text. If you need to have all links and other html - please use innerHTML property.
Just replace "rng.Value = cl.outerText" to "rng.Value = cl.innerHTML". This will return entire html with links ;)