Hy Experts, I am new here, I am getting problem with my Excel VBA Code that is use to extract the data over the website. I have two sheets with name as "Input" & "Output" that looks like this....
Iputsheet
Output
The first sheet will get a url as an input and than run the code written below...
Sub extractTablesData()
Dim IE As Object, obj As Object
Dim str, e As String
Dim pgf, pgt, pg As Integer
Dim ele, Results As Object
Dim add, size, cno, price, inurl, sp, sp1 As String
Dim isheet, rts As Worksheet
Dim LastRow As Long
Set IE = CreateObject("InternetExplorer.Application")
Set isheet = Worksheets("InputSheet")
Set rts = Worksheets("Results")
url = isheet.Cells(3, 2)
RowCount = 1
rts.Range("A" & RowCount) = "Address"
rts.Range("B" & RowCount) = "Size"
rts.Range("C" & RowCount) = "Contact Number"
rts.Range("D" & RowCount) = "Price"
rts.Range("E" & RowCount) = "Url"
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
'RowCount = LastRow
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:05 AM#)
For Each Results In .Document.all
Select Case Results.className
Case "title search-title"
str = Results.innerText
str1 = Split(str, " ")
str = CInt(str1(0))
End Select
If Results.className = "btn btn-main-inverted dropdown-toggle" And InStr(1, Results.Title, " page") > 2 Then
str2 = Results.Title
str1 = Split(str2, " ")
str2 = CInt(str1(0))
End If
Next
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End With
IE.Quit
Set IE = Nothing
UrlS = Split(url, "?")
Url1 = UrlS(0)
Url2 = "?" & UrlS(1)
For i = 1 To pgno
Set IE = CreateObject("InternetExplorer.Application")
url = Url1 & "/" & i & Url2
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:08 AM#)
For Each ele In .Document.all
Select Case ele.className
Case "listing-img-a"
inurl = ele.href
rts.Cells(LastRow + 1, 5) = inurl
Case "listing-location"
LastRow = LastRow + 1
add = ele.innerText
rts.Cells(LastRow, 1) = add
Case "lst-sizes"
sp = Split(ele.innerText, " ·")
size = sp(0)
rts.Cells(LastRow, 2) = size
Case "pgicon pgicon-phone js-agent-phone-number" ' btn-tools" 'pgicon pgicon-phone js-agent-phone-number" 'agent-phone-number"
rts.Cells(LastRow, 3) = ele.innerText
Case "listing-price"
price = ele.innerText
rts.Cells(LastRow, 4) = price
End Select
Next
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
rts.Activate
rts.Range("A" & LastRow).Select
End With
IE.Quit
Set IE = Nothing
Application.Wait (Now + #12:00:04 AM#)
Next i
MsgBox "Success"
End Sub
after execution of this code I am getting this error....
Error Message after code execution
after debugging I am getting this field as highlighted....
Debug Message
Please check and make me the correction where I am getting error... This code will extract the data after successful running, and at the end it will run the message box with message as "Success"...
Getting the actual info off the page efficiently:
You could try the following method which uses CSS selectors.
The "." means class and " a" means a tags within preceeding parent element.
Example: So CSS pattern .listing-info a would be a tags within parent element(s) having class = listing-info.
querySelectorAll will find all matching elements having this CSS pattern and return a nodeList.
Option Explicit
Public Sub GetListings()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun", False
While .Busy Or .readyState < 4: DoEvents: Wend
Dim addresses As Object, address As Object, sizes As Object, prices As Object, _
listingIds As Object, i As Long, urls As Object
With .document
Set addresses = .querySelectorAll(".listing-location")
Set listingIds = .querySelectorAll(".listing-item")
Set sizes = .querySelectorAll(".lst-sizes")
Set prices = .querySelectorAll(".price")
Set urls = .querySelectorAll(".listing-info a")
End With
Dim headers()
headers = Array("Address", "Size", "ListingId", "Price", "Url")
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For i = 0 To addresses.Length - 1
.Cells(i + 2, 1) = addresses.item(i).innerText
.Cells(i + 2, 2) = Split(sizes.item(i).innerText, "S$")(0)
.Cells(i + 2, 3) = Split(Split(listingIds.item(i).outerHTML, "listing-id-")(1), Chr$(32))(0)
.Cells(i + 2, 4) = "S$" & prices.item(i).innerText
.Cells(i + 2, 5) = "https://www.propertyguru.com.sg/" & urls.item(i).getAttribute("href")
Next i
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub
Getting the number of pages:
You could use a function to get the number of pages in a more reliable way. You can then amend the code above to loop from 1 to pgno very easily.
Sub Main
Dim pgno As Long
'your other code
pgno = GetNumberOfPages(.document)
'other code
End Sub
Public Function GetNumberOfPages(ByVal doc As HTMLDocument) As Long
On Error GoTo errhand:
GetNumberOfPages = doc.querySelector(".listing-pagination li:nth-last-child(2)").innerText
Exit Function
errhand:
If Err.Number <> 0 Then GetNumberOfPages = 1
End Function
Notes on your code from my original non-answer:
I would go with what I have written above and amend into a loop but here are my observations on your code:
0) Main division by 0 error
You need to handle the divide by zero error of str2 = 0. For example:
You could declare pgno as Variant and have
If str2 = 0 Then
pgNo = CVErr(xlErrDiv0)
Else
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End If
1) Also, note that when you have multiple declarations on the same line and only declare the type of one, then all the undeclared types implicitly are variant.
E.g.
Dim add, size, cno, price, inurl, sp, sp1 As String
Only sp1 is a String. Everthing else is a variant.
If all strings then declare as:
Dim add As String, size As String, cno As String, price As String, inurl As String, sp1 As String
I exclude sp As String because I think it should be sp() As String.
And as add and size are methods in VBA, I would avoid using them as variable names, and go with iAdd or iSize, or something more descriptive and useful that cannot be considered ambiguous.
2) You also do not have to use hungarian/pseudo-hungarian notation e.g. str.
3) Use Integer not Long
4) Use Option Explicit and check you datatypes. For example, as mentioned in comments, did you mean for str1 to be a string that you are using in division? Are you relying on an implicit conversion? Don't. Declare as the expected type.
For example: Dim str1() As String, str2 As String, pgno As Double
This will also highlight that you have missing variable declarations e.g. RowCount.
Related
Example of what code do for day 20/04/2019
I'm trying to scrape some odds from oddsportal for some leagues. But since I open too many links, after some time my code stops and shows me the following error:
Run-time error '70': Permission denied.
I tried putting some delay in the code but the error persists. Could someone please help me?
Sub test()
Dim IE() As Object
Dim IE1 As Object
Dim doc As HTMLDocument
Dim link1x2 As String
Dim linkover As String
Dim linkbtts As String
''Novo código
Set IE1 = CreateObject("InternetExplorer.Application")
IE1.Visible = False
IE1.Navigate "https://www.oddsportal.com/matches/soccer/20190420"
Do While IE1.Busy Or IE1.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE1.Document
Set jogos = doc.getElementsByClassName("deactivate")
ReDim IE(0 To jogos.Length * 3)
i = 2
j = 0
For Each jogo In jogos
URL = jogo.Children(1).Children(0).href
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Set IE(j) = CreateObject("InternetExplorer.Application")
link1x2 = URL & "#1X2;2"
IE(j).Visible = False
IE(j).Navigate link1x2
Do While IE(j).Busy Or IE(j).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j).Document
Set equipas = doc.getElementById("col-content").Children(0)
Set liga = doc.getElementsByClassName("home")(0).Children(0).Children(3)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
For k = 1 To 25
If liga.innerText = Worksheets("Plan2").Range("A" & k) Then
Worksheets("Plan1").Range("M" & i) = liga.innerText
Worksheets("Plan1").Range("A" & i) = equipas.innerText
oddH = doc.getElementsByClassName("aver")(0).Children(1).innerText
oddD = doc.getElementsByClassName("aver")(0).Children(2).innerText
oddA = doc.getElementsByClassName("aver")(0).Children(3).innerText
Worksheets("Plan1").Range("C" & i) = oddH
Worksheets("Plan1").Range("D" & i) = oddD
Worksheets("Plan1").Range("E" & i) = oddA
Set IE(j + 1) = CreateObject("InternetExplorer.Application")
linkbtts = URL & "#bts;2"
IE(j + 1).Visible = False
IE(j + 1).Navigate linkbtts
Do While IE(j + 1).Busy Or IE(j + 1).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j + 1).Document
oddBTTS = doc.getElementsByClassName("aver")(0).Children(1).innerText
oddNBTTS = doc.getElementsByClassName("aver")(0).Children(2).innerText
Worksheets("Plan1").Range("G" & i) = oddBTTS
Worksheets("Plan1").Range("H" & i) = oddNBTTS
IE(j + 1).Quit
Set IE(j + 2) = CreateObject("InternetExplorer.Application")
linkover = URL & "#over-under;2;2.50;0"
IE(j + 2).Visible = False
IE(j + 2).Navigate linkover
Do While IE(j + 2).Busy Or IE(j + 2).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j + 2).Document
oddover = doc.getElementsByClassName("aver")(0).Children(2).innerText
oddunder = doc.getElementsByClassName("aver")(0).Children(3).innerText
Worksheets("Plan1").Range("J" & i) = oddover
Worksheets("Plan1").Range("K" & i) = oddunder
IE(j + 2).Quit
i = i + 1
End If
Next k
IE(j).Quit
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
j = j + 1
Next jogo
End Sub
tl;dr;
One of the immediately obvious issues is the repeated creation of IE instances when only one is needed. Permission denied can occur for a number of reasons including not handling/disposing of objects properly.
The following shows you how to:
Work more efficiently with a single IE instance
Use a helper function to gather all the urls to visit and filter on countries of interest
Correctly retrieve liga value and assign country to a country variable
Navigate to pages and between tabs accurately. Simply concatenating a suffix e.g. #bts;2 did not prove reliable for me with page almost always defaulting to default tab of #1X2;2. Below clicks/use of events are deployed to achieve the required navigation
Apply condition based waits for content to be present with a timed loop demonstrated as well as a loop waiting for change in attribute value
Reduce I/O and significantly boost execution time by storing results in an array and writing that array, results, out once to the sheet. Writing an item at a time to the sheet is an expensive I/O operation
Use faster CSS selectors which modern browsers are optimised for
Caveats:
Tested with all links but there is scope for tightening up the code
It is likely you may need to have a condition based wait for each event (clicking/FireEvent) on the page. I have demonstrated a variety of these.
Example contents of results array (1 index expanded) :
Empty indices are deliberately left to reflect desired output format. One additional column for country is added at end.
Example output:
Requirements:
VBE > Tools > References > Add reference to Microsoft HTML Object Library
VBA:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetOddsInfo()
Dim ie As New InternetExplorer, url As String, matches()
Dim i As Long, results(), ws As Worksheet, headers()
Const MAX_WAIT_SEC As Long = 10
url = "https://www.oddsportal.com/matches/soccer/20190423/"
Set ws = ThisWorkbook.Worksheets("Plan1")
headers = Array("Jogo", vbNullString, "Home Odds", "Draw odds", "Away Odds", vbNullString, "BTT", _
"NBTT", vbNullString, "O2", "U2", vbNullString, "Liga", "Country")
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
matches = GetMatches(url, .document)
ReDim results(1 To UBound(matches, 1), 1 To 14)
For i = LBound(matches, 1) To UBound(matches, 1)
.Navigate2 matches(i, 4) ' default is "#1X2;2"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim equipas As String, liga As String, averages As Object, oddH As String, oddD As String, oddA As String
Dim country As String
country = matches(i, 1)
liga = matches(i, 2)
equipas = matches(i, 3)
Set averages = .document.querySelectorAll(".aver td")
oddH = "'" & averages.item(1).innerText 'to ensure odds are correctly formatted on output
oddD = "'" & averages.item(2).innerText
oddA = "'" & averages.item(3).innerText
Set averages = Nothing
If .document.querySelectorAll("[onclick*='uid\(13\)'], [onmousedown*='uid\(13\)']").Length > 1 Then
On Error Resume Next
.document.querySelector("[onclick*='uid\(13\)']").FireEvent "onclick" 'both teams to score
.document.querySelector("[onmousedown*='uid\(13\)']").FireEvent "onmousedown"
On Error GoTo 0
While .Busy Or .readyState < 4: DoEvents: Wend
Dim oddBtts As String, oddNbtts As String, t As Date
t = Timer
Do
On Error Resume Next
Set averages = .document.querySelectorAll(".aver td")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While averages.Length < 2
If averages.Length > 1 Then
oddBtts = "'" & averages.item(1).innerText
oddNbtts = "'" & averages.item(2).innerText
End If
Else
oddBtts = "No odds"
oddNbtts = "No odds"
End If
Set averages = Nothing
Dim oddOver As String, oddUnder As String
If .document.querySelector("#bettype-tabs li:nth-of-type(5)").getAttribute("style") = "display: block;" Then
.document.querySelector("#bettype-tabs li:nth-of-type(5) span").FireEvent "onmousedown" 'over/under
Do
Loop Until .document.querySelector(".table-chunk-header-dark").getAttribute("style") = "display: block;"
If .document.querySelectorAll("[onclick*='P-2.50-0-0']").Length = 0 Then
oddOver = "No odds"
oddUnder = "No odds"
Else
.document.querySelector("[onclick*='P-2.50-0-0']").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set averages = .document.querySelectorAll(".aver td")
oddOver = "'" & averages.item(2).innerText
oddUnder = "'" & averages.item(3).innerText
End If
Else
oddOver = "No odds"
oddUnder = "No odds"
End If
Set averages = Nothing
Dim resultsPositions(), resultsOrder(), j As Long
resultsPositions = Array(1, 3, 4, 5, 7, 8, 10, 11, 13, 14) 'columns in output
resultsOrder = Array(equipas, oddH, oddD, oddA, oddBtts, oddNbtts, oddOver, oddUnder, liga, country)
For j = LBound(resultsPositions) To UBound(resultsPositions)
results(i, resultsPositions(j)) = resultsOrder(j)
Next
'If i = 5 Then Stop ''for testing
Next
.Quit
End With
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetMatches(ByVal url As String, ByVal doc As Object) As Variant
Dim results(), i As Long, listings As Object, html As HTMLDocument
Dim countries(), liga As String, country As String, equipas As String, include As Boolean
Set html = New HTMLDocument
countries = Array("Argentina", "Austria", "Belgium", "Brazil", "China", "Denmark", "England", _
"Finland", "France", "Germany", "Greece", "Ireland", "Italy", "Japan", "Netherlands", "Norway", _
"Poland", "Portugal", "Russia", "Scotland", "Spain", "Sweden", "Switzerland", "Turkey", "USA")
Set listings = doc.querySelectorAll("#table-matches tr")
Dim games As Object, r As Long
Set games = doc.querySelectorAll(".table-participant a")
ReDim results(1 To games.Length, 1 To 4) 'country, liga, equipas, url
For i = 0 To listings.Length - 1
html.body.innerHTML = listings.item(i).innerHTML
Select Case listings.item(i).className
Case "dark center"
country = Trim$(html.querySelector(".bfl").innerText)
liga = html.querySelector(".bflp + a").innerText
include = Not IsError(Application.Match(country, countries, 0))
Case "odd deactivate"
If include Then
r = r + 1
results(r, 1) = country
results(r, 2) = liga
results(r, 3) = html.querySelector("a").innerText
results(r, 4) = Replace$(html.querySelector("a").href, "about:", "https://www.oddsportal.com")
End If
Case " deactivate"
If include Then
r = r + 1
results(r, 1) = country
results(r, 2) = liga
results(r, 3) = html.querySelector("a").innerText
results(r, 4) = Replace$(html.querySelector("a").href, "about:", "https://www.oddsportal.com")
End If
End Select
Next
results = Application.Transpose(results)
ReDim Preserve results(1 To UBound(results, 1), 1 To r)
results = Application.Transpose(results)
GetMatches = results
End Function
Edit: More information - the objective of this program is to pull from an existing list of names, search the website, and bring back the corresponding NPI numbers. Thanks to user #omegastripes I was advised to shift my focus to XHR.
My question is regarding, how to populate the search with the names of the providers, and loop so that it will return the NPI's in the next cells over in the spread sheet for the remaining providers.
Related, what to do in the event nothing populates from the search
original post: Title - Do you want to continue? Internet Explorer pop up - VBA
Internet Security pop up prevents my code from continuing. Normally I would disable this request but my computer security access is limited due to using a work computer.
My question, is there a way to click "Yes" on this pop up using VBA?
Here is my code so far.
Sub GetNpi()
Dim ie As Object
'create a new instance of ie
Set ie = New InternetExplorer
ie.Visible = True
'goes to site
ie.navigate "npinumberlookup.org"
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
Set ieDoc = ie.document
'select search box last name and Fill in Search Box
ie.document.getElementById("last").Focus
ie.document.getElementById("last").Value = "testlastname"
'select search box first name and Fill in Search Box
ie.document.getElementById("first").Focus
ie.document.getElementById("first").Value = "testfirstname"
Do While ie.readyState <> READYSTATE_COMPLETE
Loop
'select state drop down box enter TX
ie.document.getElementById("pracstate").Focus
ie.document.getElementById("pracstate").Value = "TX"
'click submit button
ie.document.getElementById("submit").Click
Update
Try the below code to retrieve NPI for the names from the worksheet (specify last name, first name and state):
Option Explicit
Sub TestListNPI()
' Prefix type + func
' Type: s - string, l - long, a - array
' Func: q - query, r - result
Dim i As Long
Dim j As Long
Dim k As Long
Dim sqLN As String
Dim sqFN As String
Dim aqFN
Dim sqSt As String
Dim arHdr
Dim arRows
Dim srMsg As String
Dim srLN As String
Dim srFN As String
Dim arFN
Dim lrMNQty As Long
Dim sOutput As String
i = 2
With Sheets(1)
Do
sqLN = .Cells(i, 1)
If sqLN = "" Then Exit Do
.Cells(i, 4) = "..."
sqFN = .Cells(i, 2).Value
aqFN = Split(sqFN)
sqSt = "" & .Cells(i, 3)
GetNPIData sqLN, aqFN(0), sqSt, arHdr, arRows, srMsg
If srMsg = "OK" Then
With CreateObject("Scripting.Dictionary")
For j = 0 To UBound(arRows, 1)
Do
srLN = arRows(j, 1)
If LCase(srLN) <> LCase(sqLN) Then Exit Do ' Last names should match
srFN = arRows(j, 3)
arFN = Split(srFN)
If LCase(arFN(0)) <> LCase(aqFN(0)) Then Exit Do ' First names should match
lrMNQty = UBound(arFN)
If UBound(aqFN) < lrMNQty Then lrMNQty = UBound(aqFN)
For k = 1 To lrMNQty
Select Case True
Case LCase(arFN(k)) = LCase(aqFN(k)) ' Full match
Case Len(arFN(k)) = 1 And LCase(arFN(k)) = LCase(Left(aqFN(k), 1)) ' First letter match
Case Len(arFN(k)) = 2 And Right(arFN(k), 1) = "." And LCase(Left(arFN(k), 1)) = LCase(Left(aqFN(k), 1)) ' First letter with dot match
Case Else ' No matches
Exit Do
End Select
Next
.Add arRows(j, 0), arRows(j, 1) & " " & arRows(j, 3)
Loop Until True
Next
Select Case .Count
Case 0
sOutput = "No matches"
Case 1
sOutput = .Keys()(0)
Case Else
sOutput = Join(.Items(), vbCrLf)
End Select
End With
Else
sOutput = srMsg
End If
.Cells(i, 4) = sOutput
DoEvents
i = i + 1
Loop
End With
MsgBox "Completed"
End Sub
Sub GetNPIData(sLastName, sFirstName, sState, aResultHeader, aResultRows, sStatus)
Dim sContent As String
Dim i As Long
Dim j As Long
Dim aHeader() As String
Dim aRows() As String
' Retrieve HTML content via XHR
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "http://npinumberlookup.org/getResults.php", False
.SetRequestHeader "content-type", "application/x-www-form-urlencoded"
.Send _
"last=" & EncodeUriComponent(sLastName) & _
"&first=" & EncodeUriComponent(sFirstName) & _
"&pracstate=" & EncodeUriComponent(sState) & _
"&npi=" & _
"&submit=Search" ' Setup request parameters
sContent = .ResponseText
End With
' Parse with RegEx
Do ' For break
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' Minor HTML simplification
.Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t"
sContent = .Replace(sContent, "")
.Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
sContent = .Replace(sContent, "$1</td>")
.Pattern = "<(\w+)\b[^>]+>"
sContent = .Replace(sContent, "<$1>")
' Extract header
.Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
With .Execute(sContent)
If .Count <> 1 Then
sStatus = "No header"
Exit Do
End If
End With
.Pattern = "<th>(.*?)</th>"
With .Execute(sContent)
ReDim aHeader(0, 0 To .Count - 1)
For i = 0 To .Count - 1
aHeader(0, i) = .Item(i).SubMatches(0)
Next
End With
aResultHeader = aHeader
' Extract data
.Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
With .Execute(sContent)
If .Count = 0 Then
sStatus = "No rows"
Exit Do
End If
ReDim aRows(0 To .Count - 1, 0)
For i = 0 To .Count - 1
aRows(i, 0) = .Item(i).SubMatches(0)
Next
End With
.Pattern = "<td>(.*?)</td>"
For i = 0 To UBound(aRows, 1)
With .Execute(aRows(i, 0))
For j = 0 To .Count - 1
If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
aRows(i, j) = Trim(.Item(j).SubMatches(0))
Next
End With
Next
aResultRows = aRows
End With
sStatus = "OK"
Loop Until True
End Sub
Function EncodeUriComponent(sText)
Static oHtmlfile As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = oHtmlfile.parentWindow.encode(sText)
End Function
The output for me is as follows:
For multiply entries all names are output in the last column instead of NPI.
Some explanation of the code. Generally RegEx's aren't recommended for HTML parsing, so there is disclaimer. Data being processed in this case is quite simple that is why it is parsed with RegEx. About RegEx: introduction (especially syntax), introduction JS, VB flavor. Simplification makes HTML code suitable for parsing in some degree. Patterns:
<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t is for removing white-spaces, and all tags but table markup and links by replacing with "".
<a [^>]*href="([^"]*)".*?</td> keeps link address by replacing with $1</td>.
<(\w+)\b[^>]+> removes all unnecessary tag attributes by replacing with <$1>.
<tr>((?:<th>.*?</th>)+)</tr> matches each table header row.
<th>(.*?)</th> matches each header cell.
<tr>((?:<td>.*?</td>)+)</tr> matches each table data row.
<td>(.*?)</td> matches each data cell.
Look into how does the HTML content is changed on each step of replacemnets.
Initial answer
Avoid pop up appearing instead of bothering with it.
Make sure you are using secure HTTP protocol https://npinumberlookup.org.
You may even not use IE for webscraping at all, XHR is better choice, as it is more reliable and fast, though it requires some knowledge and experience. Here is the simple example of that:
Option Explicit
Sub Test()
Dim sContent As String
Dim i As Long
Dim j As Long
Dim aHeader() As String
Dim aRows() As String
' Retrieve HTML content via XHR
With CreateObject("MSXML2.XMLHTTP")
.Open "POST", "http://npinumberlookup.org/getResults.php", False
.SetRequestHeader "content-type", "application/x-www-form-urlencoded"
.Send _
"last=smith" & _
"&first=michael" & _
"&pracstate=NC" & _
"&npi=" & _
"&submit=Search" ' Setup request parameters
sContent = .ResponseText
End With
' Parse with RegEx
Do ' For break
With CreateObject("VBScript.RegExp")
.Global = True
.MultiLine = True
.IgnoreCase = True
' Minor HTML simplification
.Pattern = "<(?!/td|/tr|/th|td|tr|th|a href)[^>]*>| |\r|\n|\t"
sContent = .Replace(sContent, "")
.Pattern = "<a [^>]*href=""([^""]*)"".*?</td>"
sContent = .Replace(sContent, "$1</td>")
.Pattern = "<(\w+)\b[^>]+>"
sContent = .Replace(sContent, "<$1>")
' Extract header
.Pattern = "<tr>((?:<th>.*?</th>)+)</tr>"
With .Execute(sContent)
If .Count <> 1 Then
MsgBox "No header found"
Exit Do
End If
End With
.Pattern = "<th>(.*?)</th>"
With .Execute(sContent)
ReDim aHeader(0, 0 To .Count - 1)
For i = 0 To .Count - 1
aHeader(0, i) = .Item(i).SubMatches(0)
Next
End With
' Extract data
.Pattern = "<tr>((?:<td>.*?</td>)+)</tr>"
With .Execute(sContent)
If .Count = 0 Then
MsgBox "No rows found"
Exit Do
End If
ReDim aRows(0 To .Count - 1, 0)
For i = 0 To .Count - 1
aRows(i, 0) = .Item(i).SubMatches(0)
Next
End With
.Pattern = "<td>(.*?)</td>"
For i = 0 To UBound(aRows, 1)
With .Execute(aRows(i, 0))
For j = 0 To .Count - 1
If UBound(aRows, 2) < j Then ReDim Preserve aRows(UBound(aRows, 1), j)
aRows(i, j) = .Item(j).SubMatches(0)
Next
End With
Next
End With
Loop Until True
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
Output2DArray .Cells(1, 1), aHeader
Output2DArray .Cells(2, 1), aRows
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "#"
.Value = aCells
End With
End With
End Sub
All the data in the code could be easily obtained from browser developer tools on network tab after you click submit, as an example:
The above code returns the output for me as follows:
I'm very new to VBA and HTML/XHTML, but through online research and help from other wonderful members on here I've managed to write a code to pull the data I want. I had a hard time identifying the IDs of the elements I want since it's in XHTML, so I think that's where I've botched it the most.
The website: http://www.usbanklocations.com/banks.php?q=&ct=&ml=30&lc=
Here is what I want the code to do:
Pull Bank Name, Address, Phone Number, Total Deposits and Total Assets -- GIVEN the bank name and city I provide in my excel sheet.
Here is my code:
Private Declare Sub Sleep Lib "kernel32.dll" (ByVal dwMilliseconds As Long)
Sub CommunityBanks()
Dim IE As Object, TableResults As Object, webRow As Object, BankName As Variant, page As Long, pageTotal As Long, r As Long
Dim beginTime As Date, i As Long, myvalue As Variant
Set IE = CreateObject("internetexplorer.application")
IE.navigate "http://www.usbanklocations.com/banks.php?name=" & Range("A2").Value & "+Bank&ml=30&lc=" & Range("B2").Value & "%2C+TX"
IE.Visible = True
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'input bank name into form
'myvalue = InputBox("Enter City. Press okay to begin search", "Bank Search")
'Range("F3").Value = myvalue
'IE.document.getelementbyid("MainContent_txtCity").Value = "LegacyTexas"
'click find button
'IE.document.getelementbyid("MainContent_btn").Click
'Sleep 5 * 1000
IE.document.getelementbytagname("table").getelementsbyclassname("btn").Click
Sleep 5 * 1000
'total pages
pageTotal = IE.document.getelementbyid("lsortby").innertext
page = 0
Do Until page = pageTotal
DoEvents
page = IE.document.getelementbyclassname("lsortby").innertext
With IE.document.getelementbyid("main")
For r = 1 To .Rows.Length - 1
If Not IsArray(BankName) Then
ReDim BankName(7, 0) As Variant
Else
ReDim Preserve BankName(7, UBound(BankName, 2) + 1) As Variant
End If
BankName(0, UBound(BankName, 2)) = .Rows(r).Cells(0).innertext
Next r
End With
If page < pageTotal Then
IE.document.getelementbyclassname("panelpn").Click
beginTime = Now
Application.Wait (Now + TimeValue("00:00:05"))
End If
Loop
For r = 0 To UBound(BankName, 2)
IE.navigate "http://www.usbanklocations.com/" & BankName(0, r)
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'wait 5 sec. for screen refresh
Sleep 5 * 1000
With IE.document.getelementbytagname("table")
For i = 0 To .Rows.Length - 1
DoEvents
Select Case .Rows(i).Cells(0).innertext
Case "Name:"
BankName(1, r) = .Rows(i).Cells(1).innertext
Case "Location:"
BankName(2, r) = .Rows(i).Cells(1).innertext
Case "Phone:"
BankName(3, r) = .Rows(i).Cells(1).innertext
Case "Branch Deposit:"
BankName(4, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
Case "Total Assets:"
BankName(5, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
End Select
Next i
End With
Next r
IE.Quit
Set IE = Nothing
'post result on Excel cell
Worksheets(1).Range("A9").Resize(UBound(BankName, 2) + 1, UBound(BankName, 1) + 1).Value = Application.Transpose(BankName)
End Sub
Thank you in advance! I would greatly appreciate any help.
Consider the below example which uses XHR instead of IE and split-based HTML content parsing:
Option Explicit
Sub Test_usbanklocations()
Dim oSource, oDestination, y, oSrcRow, sName, sCity, sDist, sUrl0, sUrl1, sUrl2, lPage, sResp1, sResp2, i, a1, a2, a3, a4, a5
Set oSource = Sheets(1)
Set oDestination = Sheets(2)
oDestination.Cells.Delete
DataOutput oDestination, 1, Array("Name", "Location", "Phone", "Total Assets", "Total Deposits")
y = 2
For Each oSrcRow In oSource.UsedRange.Rows
sName = oSrcRow.Cells(1, 1).Value
sCity = oSrcRow.Cells(1, 2).Value
sDist = oSrcRow.Cells(1, 3).Value
sUrl0 = "http://www.usbanklocations.com/banks.php?q=" & EncodeUriComponent(sName) & "&lc=" & EncodeUriComponent(sCity) & "&ml=" & sDist
sUrl1 = sUrl0
lPage = 1
Do
sResp1 = GetXHR(sUrl1)
If InStr(sResp1, "We can not find the address you provided. Please check.") > 0 Then Exit Do
a1 = Split(sResp1, "<div class=""pl")
For i = 1 To UBound(a1)
a2 = Split(a1(i), "</div>", 3)
a3 = Split(a2(1), "<a href=""", 2)
a4 = Split(a3(1), """>", 2)
sUrl2 = "http://www.usbanklocations.com" & a4(0)
sResp2 = GetXHR(sUrl2)
a5 = Array( _
GetFragment(sResp2, "<b>Name:</b></td><td>", "</td>"), _
Replace(GetFragment(sResp2, "<b>Location:</b></td><td>", "</td>"), "View Other Branches", ""), _
GetFragment(sResp2, "<b>Phone:</b></td>", "</td>"), _
GetFragment(sResp2, "<b>Total Assets:</b></td><td>", "</td>"), _
GetFragment(sResp2, "<b>Total Deposits:</b></td><td>", "</td>") _
)
DataOutput oDestination, y, a5
y = y + 1
DoEvents
Next
If InStr(sResp1, "Next Page >") = 0 Then Exit Do
lPage = lPage + 1
sUrl1 = sUrl0 & "&ps=" & lPage
DoEvents
Loop
Next
MsgBox "Completed"
End Sub
Function GetXHR(sUrl)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sUrl, False
.Send
GetXHR = .ResponseText
End With
End Function
Sub DataOutput(oSht, y, aValues)
With oSht.Cells(y, 1).Resize(1, UBound(aValues) + 1)
.NumberFormat = "#"
.Value = aValues
End With
End Sub
Function GetFragment(sText, sPatt1, sPatt2)
Dim a1, a2
a1 = Split(sText, sPatt1, 2)
If UBound(a1) <> 1 Then Exit Function
a2 = Split(a1(1), sPatt2, 2)
If UBound(a2) <> 1 Then Exit Function
GetFragment = GetInnerText(a2(0))
End Function
Function EncodeUriComponent(sText)
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(sText)
End Function
Function GetInnerText(sText)
With CreateObject("htmlfile")
.Write ("<body>" & sText & "</body>")
GetInnerText = .DocumentElement.Document.GetElementsByTagName("body")(0).InnerText
End With
End Function
As an example, the first worksheet contains data to search (Bank name, Location and Distance to refine by):
Then result on the second worksheet is as follows:
I have a macro that enters data from Excel in a web form. it looks like this:
IE.document.all("substance[1][1]").Value = Cells(i, 6).Value
For every row that I add, the expression in [ ] changes. Is there a way to automate this? Something like:
IE.document.all("substance[n][m]").Value = Cells(i, 6).Value
so I can write at the end of the loop n=n+1?
Cheers
Here's the full code:
Sub BOMcheckAutoEingabe()
Dim IE As Object
Dim Login As String
Dim Passwort As String
Dim row As Integer
row = 22
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
ShowWindow IE.hwnd, SW_MAXIMIZE
.Navigate "https://demo.bomcheck.net/de/"
Do Until .ReadyState = 4
DoEvents
Loop
IE.document.all("username").Value = "****"
IE.document.all("password").Value = "****"
IE.document.all("Submit").Click
Application.Wait Now + TimeValue("00:00:03")
.Navigate "https://demo.bomcheck.net/de/parts/addfullmaterialsdeclaration" _
& "#fmd-table"
Application.Wait Now + TimeValue("00:00:05")
SendKeys "{TAB 6}", True
Application.Wait Now + TimeValue("00:00:02")
If Sheets("FMD).Cells(row, 1) <> 0 Then
IE.document.all("usage[1]").Value = Cells(row, 1).Value
Else
End If
End With
End Sub
as already posted in comments to OP, here the test of proposed solution
Sub test()
Dim n As Long, m As Long, Check As String
m = 1
For n = 1 To 10
Check = "substance[" & n & "][" & m & "]"
Debug.Print Check
Next n
End Sub
which is give you this
just follow this logic within your code
converting to String solved the problem:
this one does the trick:
IE.document.all("usage[" + CStr(t) + "]").Value = Cells(zeile, 1).Value
I wrote a macro to download data from a website, after the website is fully loaded, it will scrap the data by the html tag, however, sometimes the data is incorrectly scraped due to unknown error, I want to add a checking after each variant 'x' completed, e.g. If the activesheet contains the word "中报",then go back to the step "'Select the Report Type" to re-do the scraping. Also, I know some of the variables/data types are not set at the very beginning. Could anyone help to solve this? Thanks in advance!
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
For x = 1 To 1584
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate URL
.Visible = False
Do While .Busy = True Or .readyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub
This is cleaned up quite a bit.
I added a SelectReportType: line label. Whenever you want to go back to that condition, use insert the line
Goto SelectReportType
And it will take you to that spot. The better way to do it would be to place that code in a separate function so you can call it anytime your test for "中报" is true. But I'm not following your code well enough to understand what you are doing to assist you with that.
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Dim selectItems As Variant, i As Variant
Dim tblNameArr() As String
Dim tblStartRow As Long
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
For x = 1 To 1584
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Navigate URL
.Visible = False
Do While .Busy = True Or .ReadyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.count)).name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
SelectReportType:
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub