VBA: Using Control F and Grabbing Relevant Data from IE Webpage - vba

Goal: Make a VBA Macro that opens up a webpage via IE, loops through entire page, uses the Ctrl+F function to find MULTIPLE keywords, if those keywords are found, locates the row those keywords are in, and grabs a certain number of rows above and below that keyword row location and posts them to an excel sheet to be emailed out.
I have code that goes to the webpage, and uses Ctrl+F to find the keyword. This part works correctly. I don't know how to loop through the whole webpage and do this for multiple keywords. I also am having trouble finding the row location of each keyword 'hit' and posting it to excel (not that skilled with VBA).
Sub Find()
'create a variable to refer to an IE application, and
'start up a new copy of IE
Dim ieApp As New SHDocVw.InternetExplorer
Dim objectIE As Object
'make sure you can see
ieApp.Visible = True
'go to the website of interest
ieApp.Navigate "URL HERE"
'wait for page to finish loading
Do While ieApp.Busy
Loop
'Declare Keywords
Dim keyword1 As String
Dim found As Boolean
keyword1 = "keyword"
For i = 1 To ie.document.all.Length
Application.Wait (Now + TimeValue("0:00:02"))
SendKeys "^f"
Application.Wait (Now + TimeValue("0:00:01"))
SendKeys (keyword1)
Application.Wait (Now + TimeValue("0:00:01"))
SendKeys ("{ENTER}")
Next i
End Sub

Here is the example, which implements keyword look up in webpage document text nodes, if found - expands the range to the entire table cell, then outputs all matches on to worksheet:
Sub Find()
sKeyword = "language"
sUrl = "http://stackoverflow.com/tags"
Set oList = CreateObject("Scripting.Dictionary")
With CreateObject("InternetExplorer.Application")
.Visible = True
' Navigating to url
.Navigate sUrl
' Wait for IE ready
Do While .ReadyState <> 4 Or .Busy
DoEvents
Loop
' Wait for document complete
Do While .Document.ReadyState <> "complete"
DoEvents
Loop
' ' Look up in the specified node - optional
' ' Wait for target node created
' Do While TypeName(.Document.getElementById("Content")) = "Null" ' replace Content with your Id
' DoEvents
' Loop
' ' Get target node
' Set oRoot = .Document.getElementById("Content")
' Look up in the entire document
Set oRoot = .Document.getElementsByTagName("html")(0)
Set oWalker = .Document.createTreeWalker(oRoot, 4, Null, False) ' NodeFilter.SHOW_TEXT = 4
Set oNode = oWalker.currentNode
Do
Select Case True
Case IsNull(oNode.NodeValue)
Case oNode.NodeValue = ""
Case InStr(oNode.NodeValue, sKeyword) = 0
Case Else
' Text node contains keyword
Debug.Print oNode.NodeValue
Do
' Expand the range until thenode of the necessary type is enclosed
Set oNode = oNode.ParentNode
Debug.Print TypeName(oNode)
Select Case TypeName(oNode)
' ' Non-table structures
' Case "HTMLHtmlElement", "HTMLBody", "HTMLDivElement", "HTMLParagraphElement", "HTMLHeadingElement"
' For tables
Case "HTMLHtmlElement", "HTMLBody", "HTMLTableRow", "HTMLTableCell"
Exit Do
End Select
Loop
' Add to list
sText = oNode.innerText
Debug.Print sText
oList(oList.Count) = sText
End Select
' Get next node
oWalker.NextNode
Set oPrev = oNode
Set oNode = oWalker.currentNode
Loop Until oNode Is oPrev
.Quit
End With
' Results output
aList = oList.Items()
Cells(1, 1).Resize(UBound(aList) + 1, 1).Value = aList
End Sub
As an example, for source page as follows
the output is
For multiple keywords search please elaborate the rule: do all keywords, or at least one of the keywords should be found in one sentence?

Related

Excel Macro Error Code 424 works when slowly clicking through

'start a new subroutine called SearchBot
Sub SearchBot()
'dimension (declare or set aside memory for) our variables
Dim objIE As InternetExplorer 'special object variable representing the IE browser
Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
Dim y As Integer 'integer variable we'll use as a counter
Dim result As String 'string variable that will hold our result link
Dim x As Integer
Application.ScreenUpdating = False
' Set numrows = number of rows of data.
NumRows = Range("A1", Range("A1").End(xlDown)).Rows.Count
' Select cell a1.
Range("A1").Select
' Establish "For" loop to loop "numrows" number of times.
For x = 1 To NumRows
' Insert your code here.
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "http://ec.europa.eu/taxation_customs/vies/vatResponse.html"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'in the search box put cell "A2" value, the word "in" and cell "C1" value
objIE.document.getElementById("countryCombobox").Value = "GB"
objIE.document.getElementById("number").Value = ActiveCell.Value
'click the 'go' button
objIE.document.getElementById("submit").Click
'wait again for the browser
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
Dim vatResponse As String
vatResponse = objIE.document.getElementById("vatResponseFormTable").getElementsByTagName("tr")(0).Children(0).textContent
ActiveCell.Offset(0, 2).Value = vatResponse
' Selects cell down 1 row from active cell.
'Next
Application.ScreenUpdating = True
'close the browser
objIE.Quit
ActiveCell.Offset(1, 0).Select
'End
Next
'exit our SearchBot subroutine
End Sub
So basically on this code line:
vatResponse = objIE.document.getElementById("vatResponseFormTable").getElementsByTagName("tr")(0).Children(0).textContent
I am getting an error message saying that I have an error code 424
Sometimes the pages gets loaded internally through some scripts so the html element you are trying to get actually isn't found on the document as the code runs very fast. So somehow you have to wait until page loads completely.
Please try this approach and see if the code runs without producing an error.
Sub SearchBot()
'dimension (declare or set aside memory for) our variables
Dim objIE As InternetExplorer 'special object variable representing the IE browser
Dim aEle As HTMLLinkElement 'special object variable for an <a> (link) element
Dim vatFormTable As IHTMLElement
Dim tr As IHTMLElement
Dim y As Integer 'integer variable we'll use as a counter
Dim result As String 'string variable that will hold our result link
Dim x As Integer
Application.ScreenUpdating = False
' Set numrows = number of rows of data.
NumRows = Range("A" & Rows.Count).End(xlUp).Row
' Select cell a1.
Range("A1").Select
' Establish "For" loop to loop "numrows" number of times.
For x = 1 To NumRows
' Insert your code here.
'initiating a new instance of Internet Explorer and asigning it to objIE
Set objIE = New InternetExplorer
'make IE browser visible (False would allow IE to run in the background)
objIE.Visible = True
'navigate IE to this web page (a pretty neat search engine really)
objIE.navigate "http://ec.europa.eu/taxation_customs/vies/vatResponse.html"
'wait here a few seconds while the browser is busy
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
'in the search box put cell "A2" value, the word "in" and cell "C1" value
objIE.document.getElementById("countryCombobox").Value = "GB"
objIE.document.getElementById("number").Value = ActiveCell.Value
'click the 'go' button
objIE.document.getElementById("submit").Click
'wait again for the browser
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
On Error Resume Next
Dim vatResponse As String
Do While vatFormTable Is Nothing
Set vatFormTable = objIE.document.getElementById("vatResponseFormTable")
Loop
Do While tr Is Nothing
Set tr = vatFormTable.getElementsByTagName("tr")(0)
Loop
vatResponse = tr.Children(0).innerText
ActiveCell.Offset(0, 2).Value = vatResponse
' Selects cell down 1 row from active cell.
'Next
Application.ScreenUpdating = True
'close the browser
objIE.Quit
ActiveCell.Offset(1, 0).Select
'End
Next
'exit our SearchBot subroutine
End Sub

how to continue VBA code after opening a new web page

I'm new to creating VBA code and I'm slowly getting a basic understanding of it, however I'm unable to pass this point of my project without assistance. I have the code below and runs great up until I need to continue the code with the new page that opens. I have no idea on how to be able to continue the code and the plan is to be able to click on the odds comparison tab and extract data from that page. Any assistance would be much appreciated.
Sub odd_comparison()
Dim objIE As InternetExplorer
Dim ele As Object
Dim y As Integer
Set objIE = New InternetExplorer
objIE.Visible = True
objIE.navigate "http://www.flashscore.com/basketball/"
Do While objIE.Busy = True Or objIE.readyState <> 4: DoEvents: Loop
objIE.document.getElementById("fs").Children(0) _
.Children(2).Children(2).Children(0).Children(2).Click
End Sub
Try to make loop until the webpage ready as described in this and this answers (you know, replace WScript.Sleep with DoEvents for VBA).
Inspect the target element on the webpage with Developer Tools (using context menu or pressing F12). HTML content is as follows:
bwin.fr Odds
As you can see there is onclick attribute, and actually you can try to execute jscript code from it instead of invoking click method:
objIE.document.parentWindow.execScript "setNavigationCategory(4);pgenerate(true, 0,false,false,2); e_t.track_click('iframe-bookmark-click', 'odds');", "javascript"
Going further you can find the following spinner element, which appears for the short time while data is being loaded after the tab clicked:
<div id="preload" class="preload pvisit" style="display: none;"><span>Loading ...</span></div>
So you can detect when the data loading is completed by checking the visibility state:
Do Until objIE.document.getElementById("preload").style.display = "none"
DoEvents
Loop
The next step is extracting the data you need. You can get all tables from central block: .document.getElementById("fs").getElementsByTagName("table"), loop through tables and get all rows oTable.getElementsByTagName("tr"), and finally get all cells .getElementsByTagName("td") and innerText.
The below example shows how to extract all table data from the webpage odds comparison tab to Excel worksheet:
Option Explicit
Sub Test_Get_Data_www_flashscore_com()
Dim aData()
' clear sheet
Sheets(1).Cells.Delete
' retrieve content from web site, put into 2d array
aData = GetData()
' output array to sheet
Output Sheets(1).Cells(1, 1), aData
MsgBox "Completed"
End Sub
Function GetData()
Dim oIE As Object
Dim cTables As Object
Dim oTable As Object
Dim cRows As Object
Dim oRow As Object
Dim aItems()
Dim aRows()
Dim cCells As Object
Dim i As Long
Dim j As Long
Set oIE = CreateObject("InternetExplorer.Application")
With oIE
' navigate to target webpage
.Visible = True
.navigate "http://www.flashscore.com/basketball/"
' wait until webpage ready
Do While .Busy Or Not .readyState = 4: DoEvents: Loop
Do Until .document.readyState = "complete": DoEvents: Loop
Do While TypeName(.document.getElementById("fscon")) = "Null": DoEvents: Loop
' switch to odds tab
.document.parentWindow.execScript _
"setNavigationCategory(4);pgenerate(true, 0,false,false,2); e_t.track_click('iframe-bookmark-click', 'odds');", "javascript"
Do Until .document.getElementById("preload").Style.display = "none": DoEvents: Loop
' get all table nodes
Set cTables = .document.getElementById("fs").getElementsByTagName("table")
' put all rows into dictionary to compute total rows count
With CreateObject("Scripting.Dictionary")
' process all tables
For Each oTable In cTables
' get all row nodes within table
Set cRows = oTable.getElementsByTagName("tr")
' process all rows
For Each oRow In cRows
' put each row into dictionary
Set .Item(.Count) = oRow
Next
Next
' retrieve array from dictionary
aItems = .Items()
End With
' redim 1st dimension equal total rows count
ReDim aRows(1 To UBound(aItems) + 1, 1 To 1)
' process all rows
For i = 1 To UBound(aItems) + 1
Set oRow = aItems(i - 1)
' get all cell nodes within row
Set cCells = aItems(i - 1).getElementsByTagName("td")
' process all cells
For j = 1 To cCells.Length
' enlarge 2nd dimension if necessary
If UBound(aRows, 2) < j Then ReDim Preserve aRows(1 To UBound(aItems) + 1, 1 To j)
' put cell innertext into array
aRows(i, j) = Trim(cCells(j - 1).innerText)
DoEvents
Next
Next
.Quit
End With
' return populated array
GetData = aRows
End Function
Sub Output(objDstRng As Range, arrCells As Variant)
With objDstRng
.Parent.Select
With .Resize( _
UBound(arrCells, 1) - LBound(arrCells, 1) + 1, _
UBound(arrCells, 2) - LBound(arrCells, 2) + 1)
.NumberFormat = "#"
.Value = arrCells
.Columns.AutoFit
End With
End With
End Sub
Webpage odds comparison tab content for me is as follows:
It gives the output:

How can I get Google search result snippets of first page in Excel using VBA

I have a list of 1000 keywords in A1:A1000. I want to get the Google search result snippets of first page in corresponding cells of each keyword. Ex: search result snippets of A1 cell should be in B1...*1 and so on. Any help is much appreciated.
Consider the below example:
Option Explicit
Const TargetItemsQty = 30 ' results for each keyword
Sub GWebSearchIECtl()
Dim objSheet As Worksheet
Dim objIE As Object
Dim x As Long
Dim y As Long
Dim strSearch As String
Dim lngFound As Long
Dim st As String
Dim colGItems As Object
Dim varGItem As Variant
Dim strHLink As String
Dim strDescr As String
Dim strNextURL As String
Set objSheet = Sheets("Sheet1")
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Visible = True ' for debug or captcha request cases
y = 1 ' start searching for the keyword in the first row
With objSheet
.Select
.Range(.Columns("B:B"), .Columns("B:B").End(xlToRight)).Delete ' clear previous results
.Range("A1").Select
Do Until .Cells(y, 1) = ""
x = 2 ' start writing results from column B
.Cells(y, 1).Select
strSearch = .Cells(y, 1) ' current keyword
With objIE
lngFound = 0
.Navigate "https://www.google.com/search?q=" & EncodeUriComponent(strSearch) ' go to first search results page
Do
Do While .Busy Or Not .readyState = 4: DoEvents: Loop ' wait IE
Do Until .document.readyState = "complete": DoEvents: Loop ' wait document
Do While TypeName(.document.getElementById("res")) = "Null": DoEvents: Loop ' wait [#res] element
Set colGItems = .document.getElementById("res").getElementsByClassName("g") ' collection of search result [.g] items
For Each varGItem In colGItems ' process each item in collection
If varGItem.getElementsByTagName("a").Length > 0 And varGItem.getElementsByClassName("st").Length > 0 Then ' must have hyperlink and description
strHLink = varGItem.getElementsByTagName("a")(0).href ' get first hyperlink [a] found in current item
strDescr = GetInnerText(varGItem.getElementsByClassName("st")(0).innerHTML) ' get first description [span.st] found in current item
lngFound = lngFound + 1
With objSheet ' put result into cell
.Hyperlinks.Add .Cells(y, x), strHLink, , , strDescr
.Cells(y, x).WrapText = True
x = x + 1 ' next column
End With
If lngFound = TargetItemsQty Then Exit Do ' continue with next keyword - necessary quantity of the results for current keyword found
End If
DoEvents
Next
If TypeName(.document.getElementById("pnnext")) = "Null" Then Exit Do ' continue with next keyword - no [a#pnnext.pn] next page button exists
strNextURL = .document.getElementById("pnnext").href ' get next page url
.Navigate strNextURL ' go to next search results page
Loop
End With
y = y + 1 ' next row
Loop
End With
objIE.Quit
' google web search page contains the elements:
' [div#res] - main search results block
' [div.g] - each result item block within [div#res]
' [a] - hyperlink ancor(s) within each [div.g]
' [span.st] - description(s) within each [div.g]
' [a#pnnext.pn] - hyperlink ancor to the next search results page
End Sub
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function GetInnerText(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.Open
objHtmlfile.Write "<body></body>"
End If
objHtmlfile.body.innerHTML = strText
GetInnerText = objHtmlfile.body.innerText
End Function

Webbrowser control in userform: how to wait for page to initialize

Task: using Excel VBA to navigate to a website, log in and go to an input page.
On that page, sequentially enter a series of values stored in a column in Sheet1.
What I've done so far:
I create a webbrowser control and navigate to the website and stop.
Then click on a button on Sheet1 with the macros that will do the inputting, stored in a module.
What's happening:
The control comes up nicely and navigates to the intended site. (this is the userform code)
Click on the button and it gets the userid and password from the spreadsheet, inputs them, clicks on the login button and all is well.
However, the next statement is:
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("ctl02_ctl03_ddlBus")
and inputfield comes up empty.
If I stop execution and step through it, it'll work.
I've tried Application.Wait; For x = 1 to 5000000; On Error Goto/Resume and keep trying, but nothings works.
I've also tried .NavigateComplete, .DocumentCompleted, as well as others, but I get errors saying member is not supported.
I am at my wits end - I'm just so close!! So far, I've spent more time on this that it will ever save, but now it's personal! Thanks for your help.
This is borrowed code from another site that initializes the control.
Private Sub UserForm_Initialize()
Dim a, c As Integer
With Me
.StartUpPosition = 0
.Top = 150
.Left = -700
End With
With Me.objWebBrowser
.Navigate2 "http://www.schoolbuscity.com/Mapnetweb_47/login.aspx"
.Visible = True
End With
End Sub
Private Sub GetSheets()
'this is my code
Dim inputfield As Object
Dim SendText As String
Dim NumberOfRoutes, r, errCount As Integer
errCount = 0
NumberOfRoutes = Range("NumberOfRoutes")
ReDim RouteNumbers(NumberOfRoutes) As String
For r = 1 To NumberOfRoutes
RouteNumbers(r) = Cells(r, 1).Value
Next r
' Sheets("Sheet1").Select
Range(Cells(5, 2), Cells(6, 2)).ClearContents ' this indicates success for the chosen cells
SendText = Range("userid").Value
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("Login1_UserName")
inputfield.Value = SendText
SendText = Range("password").Value
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("Login1_Password")
inputfield.Value = SendText
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("Login1_Login")
Application.Wait (Now + TimeValue("0:00:01"))
inputfield.Click
Application.Wait (Now + TimeValue("0:00:01")) ' I've tried waiting for up to 10 seconds
Set inputfield = Nothing
On Error GoTo TryAgain
For r = 5 To 6 'NumberOfRoutes ' just want to use 2 loops for testing
' this is where is fails, I believe, because the page is not initialized
' but if waiting is not the answer, then what is?
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("ctl02_ctl03_ddlBus")
inputfield.Value = RouteNumbers(r)
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("ctl02_ctl03_btnGo")
inputfield.Click
Application.Wait (Now + TimeValue("0:00:01"))
Cells(r, 2).Value = "Sent"
' WebBrowser.objWebBrowser.Document.Print
WebBrowser.objWebBrowser.GoBack
Next r
GoTo EndIt
TryAgain:
Set inputfield = Nothing
Set inputfield = WebBrowser.objWebBrowser.Document.getElementById("ctl02_ctl03_ddlBus")
errCount = errCount + 1
If errCount > 5 Then GoTo EndIt
Resume
EndIt:
If errCount > 0 Then
MsgBox "errCount= " + CStr(errCount)
Else
MsgBox "Did it"
End If
End Sub
This is how to waitbin vbscript.
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = 0
'must navigate to a local file to avoid security prompts
ie.Navigate2 "C:\Users\User\Desktop\Filter.html"
Do
wscript.sleep 100
Loop until ie.document.readystate = "complete"
examples in VBA:
Private Sub UserForm_Initialize()
Set ie = Me.WebBrowser1
ie.Navigate2 "about:blank"
Do Until ie.ReadyState = READYSTATE_COMPLETE
DoEvents
Loop
Set ie = Nothing
End Sub
Private Sub Conectar_Click()
Dim ie As Object
Set ie = Me.WebBrowser1
ie.Navigate2 "http://www.mytest.com"
Do Until ie.ReadyState = READYSTATE_COMPLETE
DoEvents
Loop
'different alternatives
'Dim inputfield As Object
'Set inputfield = ie.Document.getElementById("Login_tbLogin")
'inputfield.Value = "mylogin"
'Set inputfield = Nothing
'ie.Document.getElementById("Login_tbLogin").Value = "mylogin"
'ie.Document.All("Login_tbLogin").Focus
'ie.Document.All("Login_tbLogin").Value = "mylogin"
ie.Document.All.Item("Login_tbLogin").Value = "mylogin"
Set ie = Nothing
End Sub

Excel VBA Macro: Scraping data from site table that spans multiple pages

Thanks in advance for the help. I'm running Windows 8.1, I have the latest IE / Chrome browsers, and the latest Excel. I'm trying to write an Excel Macro that pulls data from StackOverflow (https://stackoverflow.com/tags). Specifically, I'm trying to pull the date (that the macro is run), the tag names, the # of tags, and the brief description of what the tag is. I have it working for the first page of the table, but not for the rest (there are 1132 pages at the moment). Right now, it overwrites the data everytime I run the macro, and I'm not sure how to make it look for the next empty cell before running.. Lastly, I'm trying to make it run automatically once per week.
I'd much appreciate any help here. Problems are:
Pulling data from the web table beyond the first page
Making it scrape data to the next empty row rather than overwriting
Making the Macro run automatically once per week
Code (so far) is below. Thanks!
Enum READYSTATE
READYSTATE_UNINITIALIZED = 0
READYSTATE_LOADING = 1
READYSTATE_LOADED = 2
READYSTATE_INTERACTIVE = 3
READYSTATE_COMPLETE = 4
End Enum
Sub ImportStackOverflowData()
'to refer to the running copy of Internet Explorer
Dim ie As InternetExplorer
'to refer to the HTML document returned
Dim html As HTMLDocument
'open Internet Explorer in memory, and go to website
Set ie = New InternetExplorer
ie.Visible = False
ie.navigate "http://stackoverflow.com/tags"
'Wait until IE is done loading page
Do While ie.READYSTATE <> READYSTATE_COMPLETE
Application.StatusBar = "Trying to go to StackOverflow ..."
DoEvents
Loop
'show text of HTML document returned
Set html = ie.document
'close down IE and reset status bar
Set ie = Nothing
Application.StatusBar = ""
'clear old data out and put titles in
'Cells.Clear
'put heading across the top of row 3
Range("A3").Value = "Date Pulled"
Range("B3").Value = "Keyword"
Range("C3").Value = "# Of Tags"
'Range("C3").Value = "Asked This Week"
Range("D3").Value = "Description"
Dim TagList As IHTMLElement
Dim Tags As IHTMLElementCollection
Dim Tag As IHTMLElement
Dim RowNumber As Long
Dim TagFields As IHTMLElementCollection
Dim TagField As IHTMLElement
Dim Keyword As String
Dim NumberOfTags As String
'Dim AskedThisWeek As String
Dim TagDescription As String
'Dim QuestionFieldLinks As IHTMLElementCollection
Dim TodaysDate As Date
Set TagList = html.getElementById("tags-browser")
Set Tags = html.getElementsByClassName("tag-cell")
RowNumber = 4
For Each Tag In Tags
'if this is the tag containing the details, process it
If Tag.className = "tag-cell" Then
'get a list of all of the parts of this question,
'and loop over them
Set TagFields = Tag.all
For Each TagField In TagFields
'if this is the keyword, store it
If TagField.className = "post-tag" Then
'store the text value
Keyword = TagField.innerText
Cells(RowNumber, 2).Value = TagField.innerText
End If
If TagField.className = "item-multiplier-count" Then
'store the integer for number of tags
NumberOfTags = TagField.innerText
'NumberOfTags = Replace(NumberOfTags, "x", "")
Cells(RowNumber, 3).Value = Trim(NumberOfTags)
End If
If TagField.className = "excerpt" Then
Description = TagField.innerText
Cells(RowNumber, 4).Value = TagField.innerText
End If
TodaysDate = Format(Now, "MM/dd/yy")
Cells(RowNumber, 1).Value = TodaysDate
Next TagField
'go on to next row of worksheet
RowNumber = RowNumber + 1
End If
Next
Set html = Nothing
'do some final formatting
Range("A3").CurrentRegion.WrapText = False
Range("A3").CurrentRegion.EntireColumn.AutoFit
Range("A1:C1").EntireColumn.HorizontalAlignment = xlCenter
Range("A1:D1").Merge
Range("A1").Value = "StackOverflow Tag Trends"
Range("A1").Font.Bold = True
Application.StatusBar = ""
MsgBox "Done!"
End Sub
There's no need to scrape Stack Overflow when they make the underlying data available to you through things like the Data Explorer. Using this query in the Data Explorer should get you the results you need:
select t.TagName, t.Count, p.Body
from Tags t inner join Posts p
on t.ExcerptPostId = p.Id
order by t.count desc;
The permalink to that query is here and the "Download CSV" option which appears after the query runs is probably the easiest way to get the data into Excel. If you wanted to automate that part of things, the direct link to the CSV download of results is here
You can improve this to parse out exact elements but it loops all the pages and grabs all the tag info (everything next to a tag)
Option Explicit
Public Sub ImportStackOverflowData()
Dim ie As New InternetExplorer, html As HTMLDocument
Application.ScreenUpdating = False
With ie
.Visible = True
.navigate "https://stackoverflow.com/tags"
While .Busy Or .READYSTATE < 4: DoEvents: Wend
Set html = .document
Dim numPages As Long, i As Long, info As Object, item As Object, counter As Long
numPages = html.querySelector(".page-numbers.dots ~ a").innerText
For i = 1 To 2 ' numPages ''<==1 to 2 for testing; use to numPages
DoEvents
Set info = html.getElementById("tags_list")
For Each item In info.getElementsByClassName("grid-layout--cell tag-cell")
counter = counter + 1
Cells(counter, 1) = item.innerText
Next item
html.querySelector(".page-numbers.next").Click
While .Busy Or .READYSTATE < 4: DoEvents: Wend
Set html = .document
Next i
Application.ScreenUpdating = True
.Quit '<== Remember to quit application
End With
End Sub
I'm not making use of the DOM, but I find it very easy to get around just searching between known tags. If ever the expressions you are looking for are too common just tweak the code a bit so that it looks for a string after a string).
An example:
Public Sub ZipLookUp()
Dim URL As String, xmlHTTP As Object, html As Object, htmlResponse As String
Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
Dim Zip4Digit As String
URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1=1642+Harmon+Street&address2=&city=Berkeley&state=CA&urbanCode=&postalCode=&zip=94703"
Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
xmlHTTP.Open "GET", URL, False
On Error GoTo NoConnect
xmlHTTP.send
On Error GoTo 0
Set html = CreateObject("htmlfile")
htmlResponse = xmlHTTP.ResponseText
If htmlResponse = Null Then
MsgBox ("Aborted Run - HTML response was null")
Application.ScreenUpdating = True
GoTo End_Prog
End If
'Searching for a string within 2 strings
SStr = "<span class=""address1 range"">" ' first string
EStr = "</span><br />" ' second string
StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)
MsgBox Zip4Digit
GoTo End_Prog
NoConnect:
If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub