Example of what code do for day 20/04/2019
I'm trying to scrape some odds from oddsportal for some leagues. But since I open too many links, after some time my code stops and shows me the following error:
Run-time error '70': Permission denied.
I tried putting some delay in the code but the error persists. Could someone please help me?
Sub test()
Dim IE() As Object
Dim IE1 As Object
Dim doc As HTMLDocument
Dim link1x2 As String
Dim linkover As String
Dim linkbtts As String
''Novo código
Set IE1 = CreateObject("InternetExplorer.Application")
IE1.Visible = False
IE1.Navigate "https://www.oddsportal.com/matches/soccer/20190420"
Do While IE1.Busy Or IE1.ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE1.Document
Set jogos = doc.getElementsByClassName("deactivate")
ReDim IE(0 To jogos.Length * 3)
i = 2
j = 0
For Each jogo In jogos
URL = jogo.Children(1).Children(0).href
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Set IE(j) = CreateObject("InternetExplorer.Application")
link1x2 = URL & "#1X2;2"
IE(j).Visible = False
IE(j).Navigate link1x2
Do While IE(j).Busy Or IE(j).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j).Document
Set equipas = doc.getElementById("col-content").Children(0)
Set liga = doc.getElementsByClassName("home")(0).Children(0).Children(3)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
For k = 1 To 25
If liga.innerText = Worksheets("Plan2").Range("A" & k) Then
Worksheets("Plan1").Range("M" & i) = liga.innerText
Worksheets("Plan1").Range("A" & i) = equipas.innerText
oddH = doc.getElementsByClassName("aver")(0).Children(1).innerText
oddD = doc.getElementsByClassName("aver")(0).Children(2).innerText
oddA = doc.getElementsByClassName("aver")(0).Children(3).innerText
Worksheets("Plan1").Range("C" & i) = oddH
Worksheets("Plan1").Range("D" & i) = oddD
Worksheets("Plan1").Range("E" & i) = oddA
Set IE(j + 1) = CreateObject("InternetExplorer.Application")
linkbtts = URL & "#bts;2"
IE(j + 1).Visible = False
IE(j + 1).Navigate linkbtts
Do While IE(j + 1).Busy Or IE(j + 1).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j + 1).Document
oddBTTS = doc.getElementsByClassName("aver")(0).Children(1).innerText
oddNBTTS = doc.getElementsByClassName("aver")(0).Children(2).innerText
Worksheets("Plan1").Range("G" & i) = oddBTTS
Worksheets("Plan1").Range("H" & i) = oddNBTTS
IE(j + 1).Quit
Set IE(j + 2) = CreateObject("InternetExplorer.Application")
linkover = URL & "#over-under;2;2.50;0"
IE(j + 2).Visible = False
IE(j + 2).Navigate linkover
Do While IE(j + 2).Busy Or IE(j + 2).ReadyState <> 4
Application.Wait DateAdd("s", 1, Now)
Loop
Set doc = IE(j + 2).Document
oddover = doc.getElementsByClassName("aver")(0).Children(2).innerText
oddunder = doc.getElementsByClassName("aver")(0).Children(3).innerText
Worksheets("Plan1").Range("J" & i) = oddover
Worksheets("Plan1").Range("K" & i) = oddunder
IE(j + 2).Quit
i = i + 1
End If
Next k
IE(j).Quit
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
Application.Wait DateAdd("s", 1, Now)
j = j + 1
Next jogo
End Sub
tl;dr;
One of the immediately obvious issues is the repeated creation of IE instances when only one is needed. Permission denied can occur for a number of reasons including not handling/disposing of objects properly.
The following shows you how to:
Work more efficiently with a single IE instance
Use a helper function to gather all the urls to visit and filter on countries of interest
Correctly retrieve liga value and assign country to a country variable
Navigate to pages and between tabs accurately. Simply concatenating a suffix e.g. #bts;2 did not prove reliable for me with page almost always defaulting to default tab of #1X2;2. Below clicks/use of events are deployed to achieve the required navigation
Apply condition based waits for content to be present with a timed loop demonstrated as well as a loop waiting for change in attribute value
Reduce I/O and significantly boost execution time by storing results in an array and writing that array, results, out once to the sheet. Writing an item at a time to the sheet is an expensive I/O operation
Use faster CSS selectors which modern browsers are optimised for
Caveats:
Tested with all links but there is scope for tightening up the code
It is likely you may need to have a condition based wait for each event (clicking/FireEvent) on the page. I have demonstrated a variety of these.
Example contents of results array (1 index expanded) :
Empty indices are deliberately left to reflect desired output format. One additional column for country is added at end.
Example output:
Requirements:
VBE > Tools > References > Add reference to Microsoft HTML Object Library
VBA:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub GetOddsInfo()
Dim ie As New InternetExplorer, url As String, matches()
Dim i As Long, results(), ws As Worksheet, headers()
Const MAX_WAIT_SEC As Long = 10
url = "https://www.oddsportal.com/matches/soccer/20190423/"
Set ws = ThisWorkbook.Worksheets("Plan1")
headers = Array("Jogo", vbNullString, "Home Odds", "Draw odds", "Away Odds", vbNullString, "BTT", _
"NBTT", vbNullString, "O2", "U2", vbNullString, "Liga", "Country")
With ie
.Visible = True
.Navigate2 url
While .Busy Or .readyState < 4: DoEvents: Wend
matches = GetMatches(url, .document)
ReDim results(1 To UBound(matches, 1), 1 To 14)
For i = LBound(matches, 1) To UBound(matches, 1)
.Navigate2 matches(i, 4) ' default is "#1X2;2"
While .Busy Or .readyState < 4: DoEvents: Wend
Dim equipas As String, liga As String, averages As Object, oddH As String, oddD As String, oddA As String
Dim country As String
country = matches(i, 1)
liga = matches(i, 2)
equipas = matches(i, 3)
Set averages = .document.querySelectorAll(".aver td")
oddH = "'" & averages.item(1).innerText 'to ensure odds are correctly formatted on output
oddD = "'" & averages.item(2).innerText
oddA = "'" & averages.item(3).innerText
Set averages = Nothing
If .document.querySelectorAll("[onclick*='uid\(13\)'], [onmousedown*='uid\(13\)']").Length > 1 Then
On Error Resume Next
.document.querySelector("[onclick*='uid\(13\)']").FireEvent "onclick" 'both teams to score
.document.querySelector("[onmousedown*='uid\(13\)']").FireEvent "onmousedown"
On Error GoTo 0
While .Busy Or .readyState < 4: DoEvents: Wend
Dim oddBtts As String, oddNbtts As String, t As Date
t = Timer
Do
On Error Resume Next
Set averages = .document.querySelectorAll(".aver td")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While averages.Length < 2
If averages.Length > 1 Then
oddBtts = "'" & averages.item(1).innerText
oddNbtts = "'" & averages.item(2).innerText
End If
Else
oddBtts = "No odds"
oddNbtts = "No odds"
End If
Set averages = Nothing
Dim oddOver As String, oddUnder As String
If .document.querySelector("#bettype-tabs li:nth-of-type(5)").getAttribute("style") = "display: block;" Then
.document.querySelector("#bettype-tabs li:nth-of-type(5) span").FireEvent "onmousedown" 'over/under
Do
Loop Until .document.querySelector(".table-chunk-header-dark").getAttribute("style") = "display: block;"
If .document.querySelectorAll("[onclick*='P-2.50-0-0']").Length = 0 Then
oddOver = "No odds"
oddUnder = "No odds"
Else
.document.querySelector("[onclick*='P-2.50-0-0']").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Set averages = .document.querySelectorAll(".aver td")
oddOver = "'" & averages.item(2).innerText
oddUnder = "'" & averages.item(3).innerText
End If
Else
oddOver = "No odds"
oddUnder = "No odds"
End If
Set averages = Nothing
Dim resultsPositions(), resultsOrder(), j As Long
resultsPositions = Array(1, 3, 4, 5, 7, 8, 10, 11, 13, 14) 'columns in output
resultsOrder = Array(equipas, oddH, oddD, oddA, oddBtts, oddNbtts, oddOver, oddUnder, liga, country)
For j = LBound(resultsPositions) To UBound(resultsPositions)
results(i, resultsPositions(j)) = resultsOrder(j)
Next
'If i = 5 Then Stop ''for testing
Next
.Quit
End With
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetMatches(ByVal url As String, ByVal doc As Object) As Variant
Dim results(), i As Long, listings As Object, html As HTMLDocument
Dim countries(), liga As String, country As String, equipas As String, include As Boolean
Set html = New HTMLDocument
countries = Array("Argentina", "Austria", "Belgium", "Brazil", "China", "Denmark", "England", _
"Finland", "France", "Germany", "Greece", "Ireland", "Italy", "Japan", "Netherlands", "Norway", _
"Poland", "Portugal", "Russia", "Scotland", "Spain", "Sweden", "Switzerland", "Turkey", "USA")
Set listings = doc.querySelectorAll("#table-matches tr")
Dim games As Object, r As Long
Set games = doc.querySelectorAll(".table-participant a")
ReDim results(1 To games.Length, 1 To 4) 'country, liga, equipas, url
For i = 0 To listings.Length - 1
html.body.innerHTML = listings.item(i).innerHTML
Select Case listings.item(i).className
Case "dark center"
country = Trim$(html.querySelector(".bfl").innerText)
liga = html.querySelector(".bflp + a").innerText
include = Not IsError(Application.Match(country, countries, 0))
Case "odd deactivate"
If include Then
r = r + 1
results(r, 1) = country
results(r, 2) = liga
results(r, 3) = html.querySelector("a").innerText
results(r, 4) = Replace$(html.querySelector("a").href, "about:", "https://www.oddsportal.com")
End If
Case " deactivate"
If include Then
r = r + 1
results(r, 1) = country
results(r, 2) = liga
results(r, 3) = html.querySelector("a").innerText
results(r, 4) = Replace$(html.querySelector("a").href, "about:", "https://www.oddsportal.com")
End If
End Select
Next
results = Application.Transpose(results)
ReDim Preserve results(1 To UBound(results, 1), 1 To r)
results = Application.Transpose(results)
GetMatches = results
End Function
Related
I designed some code a while ago using VBA and Excel, to check which people on my database are still VAT registered. I'm based in the UK, and I was using a European website, but because of Brexit it's no longer working! I changed it to a British website now, and I managed to get the code to enter the VAT number from Excel into the webpage, but I can't find the Id or name of the submit button, so I'm not able to submit it. After I've submitted it I'll probably need some more help, but I'll see if anyone can help me solve this one first. Many thanks for your help. Here's the code.
Sub CheckVatPlayers()
Dim x As Integer, y As Integer, z As String
Dim IE As InternetExplorer
Dim aEle As HTMLLinkElement
Dim findtext As String, player As String
x = 2
Set IE = New InternetExplorer
IE.Visible = True
Do Until Cells(x, 1) = ""
'If Left(Cells(x, 4), 2) <> "GB" Then
' Cells(x, 4) = "GB" & Replace(Cells(x, 4), " ", "")
'End If
IE.Navigate "https://www.tax.service.gov.uk/check-vat-number/enter-vat-details"
Do While IE.Busy = True Or IE.ReadyState <> 4: DoEvents: Loop
Application.Wait Now + #12:00:01 AM#
IE.Document.getElementById("target").Value = Cells(x, 4).Value
IE.Document.getElementsByName("button.govuk-button")(0).Click
Do While IE.Busy Or IE.ReadyState <> 4: DoEvents: Loop
Application.Wait Now + #12:00:01 AM#
y = 2
For Each aEle In IE.Document.getElementsByClassName("VAT registration details")
Debug.Print aEle.innerText
z = Trim(aEle.innerText)
Cells(x, 7) = z
If Left(z, 1) = "V" Then
Cells(x, 7).Font.Color = vbBlack
Else
Cells(x, 7).Font.Color = vbRed
End If
With Cells(x, 6)
.Value = Date
.NumberFormat = "dd/mm/yyyy"
End With
y = y + 1
Next
x = x + 1
Loop
End Sub
I need to get the table from website via VBA, but the data is very big.
I used array to save every data, and it cost me over 300 seconds to run it...
Is there any method to accelerate?
(I need to key in and click elements to get the table, can I use "msxml2.xmlhttp"?)
(I am not allowed to use SeleniumBasic.)
this is my code:
Sub ie_()
Application.ScreenUpdating = False
If Weekday(Date) > 1 Then
w1 = DateAdd("d", -Weekday(Date) + 1, Date)
w2 = DateAdd("d", -7, w1)
End If
date1 = Format(w2, "yyyy-mm-dd")
date2 = Format(w1, "yyyy-mm-dd")
Set IE = CreateObject("InternetExplorer.application")
With IE
.Visible = True
.navigate myURL
Set report_btn = .Document.getElementsByClassName("sys-btn")(18)
report_btn.Click
Set next_btn = .Document.getElementByid("btnQry")
next_btn.Click
'key in element
.Document.All.txtTxnTime_From.Value = date1
.Document.All.txtTxnTime_End.Value = date2
.Document.All.btnQuery.Click
'search and get the table. Because the data is huge, I used the loop to wait for it....
Set tbl = .Document.getElementByid("tb_Main")
Do While tbl Is Nothing
Application.Wait (Now + TimeValue("0:00:10"))
Set tbl = .Document.getElementByid("tb_Main")
Loop
End With
'save the data in table to the array
'this step cost almost 300 seconds....
Set trs = tbl.getElementsByTagName("tr")
Dim arr() As Variant
ReDim arr(trs.Length - 1, 25)
i = 0
For Each rw In tbl.Rows
j = 0
For Each cel In rw.Cells
arr(i, j) = cel.innertext
j = j + 1
Next
i = i + 1
Next
'get the data in array and write to the cell
For i = 0 To trs.Length - 1
For j = 0 To 25
Cells(i + 1, j + 1) = arr(i, j)
Next
Next
'close ie
IE.Quit
Set IE = Nothing
End Sub
Hy Experts, I am new here, I am getting problem with my Excel VBA Code that is use to extract the data over the website. I have two sheets with name as "Input" & "Output" that looks like this....
Iputsheet
Output
The first sheet will get a url as an input and than run the code written below...
Sub extractTablesData()
Dim IE As Object, obj As Object
Dim str, e As String
Dim pgf, pgt, pg As Integer
Dim ele, Results As Object
Dim add, size, cno, price, inurl, sp, sp1 As String
Dim isheet, rts As Worksheet
Dim LastRow As Long
Set IE = CreateObject("InternetExplorer.Application")
Set isheet = Worksheets("InputSheet")
Set rts = Worksheets("Results")
url = isheet.Cells(3, 2)
RowCount = 1
rts.Range("A" & RowCount) = "Address"
rts.Range("B" & RowCount) = "Size"
rts.Range("C" & RowCount) = "Contact Number"
rts.Range("D" & RowCount) = "Price"
rts.Range("E" & RowCount) = "Url"
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
'RowCount = LastRow
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:05 AM#)
For Each Results In .Document.all
Select Case Results.className
Case "title search-title"
str = Results.innerText
str1 = Split(str, " ")
str = CInt(str1(0))
End Select
If Results.className = "btn btn-main-inverted dropdown-toggle" And InStr(1, Results.Title, " page") > 2 Then
str2 = Results.Title
str1 = Split(str2, " ")
str2 = CInt(str1(0))
End If
Next
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End With
IE.Quit
Set IE = Nothing
UrlS = Split(url, "?")
Url1 = UrlS(0)
Url2 = "?" & UrlS(1)
For i = 1 To pgno
Set IE = CreateObject("InternetExplorer.Application")
url = Url1 & "/" & i & Url2
With IE
.Visible = True
.Navigate (url)
DoEvents
Do While IE.busy Or IE.readystate <> 4
Loop
'Application.Wait (Now + #12:00:08 AM#)
For Each ele In .Document.all
Select Case ele.className
Case "listing-img-a"
inurl = ele.href
rts.Cells(LastRow + 1, 5) = inurl
Case "listing-location"
LastRow = LastRow + 1
add = ele.innerText
rts.Cells(LastRow, 1) = add
Case "lst-sizes"
sp = Split(ele.innerText, " ·")
size = sp(0)
rts.Cells(LastRow, 2) = size
Case "pgicon pgicon-phone js-agent-phone-number" ' btn-tools" 'pgicon pgicon-phone js-agent-phone-number" 'agent-phone-number"
rts.Cells(LastRow, 3) = ele.innerText
Case "listing-price"
price = ele.innerText
rts.Cells(LastRow, 4) = price
End Select
Next
LastRow = rts.Cells(Rows.Count, 2).End(xlUp).Row
rts.Activate
rts.Range("A" & LastRow).Select
End With
IE.Quit
Set IE = Nothing
Application.Wait (Now + #12:00:04 AM#)
Next i
MsgBox "Success"
End Sub
after execution of this code I am getting this error....
Error Message after code execution
after debugging I am getting this field as highlighted....
Debug Message
Please check and make me the correction where I am getting error... This code will extract the data after successful running, and at the end it will run the message box with message as "Success"...
Getting the actual info off the page efficiently:
You could try the following method which uses CSS selectors.
The "." means class and " a" means a tags within preceeding parent element.
Example: So CSS pattern .listing-info a would be a tags within parent element(s) having class = listing-info.
querySelectorAll will find all matching elements having this CSS pattern and return a nodeList.
Option Explicit
Public Sub GetListings()
Dim IE As New InternetExplorer
Application.ScreenUpdating = False
With IE
.Visible = True
.navigate "https://www.propertyguru.com.sg/singapore-property-listing/property-for-sale?limit=30&market=residential&property_type_code%5B%5D=4S&property_type=H&freetext=Yishun", False
While .Busy Or .readyState < 4: DoEvents: Wend
Dim addresses As Object, address As Object, sizes As Object, prices As Object, _
listingIds As Object, i As Long, urls As Object
With .document
Set addresses = .querySelectorAll(".listing-location")
Set listingIds = .querySelectorAll(".listing-item")
Set sizes = .querySelectorAll(".lst-sizes")
Set prices = .querySelectorAll(".price")
Set urls = .querySelectorAll(".listing-info a")
End With
Dim headers()
headers = Array("Address", "Size", "ListingId", "Price", "Url")
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For i = 0 To addresses.Length - 1
.Cells(i + 2, 1) = addresses.item(i).innerText
.Cells(i + 2, 2) = Split(sizes.item(i).innerText, "S$")(0)
.Cells(i + 2, 3) = Split(Split(listingIds.item(i).outerHTML, "listing-id-")(1), Chr$(32))(0)
.Cells(i + 2, 4) = "S$" & prices.item(i).innerText
.Cells(i + 2, 5) = "https://www.propertyguru.com.sg/" & urls.item(i).getAttribute("href")
Next i
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub
Getting the number of pages:
You could use a function to get the number of pages in a more reliable way. You can then amend the code above to loop from 1 to pgno very easily.
Sub Main
Dim pgno As Long
'your other code
pgno = GetNumberOfPages(.document)
'other code
End Sub
Public Function GetNumberOfPages(ByVal doc As HTMLDocument) As Long
On Error GoTo errhand:
GetNumberOfPages = doc.querySelector(".listing-pagination li:nth-last-child(2)").innerText
Exit Function
errhand:
If Err.Number <> 0 Then GetNumberOfPages = 1
End Function
Notes on your code from my original non-answer:
I would go with what I have written above and amend into a loop but here are my observations on your code:
0) Main division by 0 error
You need to handle the divide by zero error of str2 = 0. For example:
You could declare pgno as Variant and have
If str2 = 0 Then
pgNo = CVErr(xlErrDiv0)
Else
pgno = WorksheetFunction.RoundUp(str / str2, 0)
End If
1) Also, note that when you have multiple declarations on the same line and only declare the type of one, then all the undeclared types implicitly are variant.
E.g.
Dim add, size, cno, price, inurl, sp, sp1 As String
Only sp1 is a String. Everthing else is a variant.
If all strings then declare as:
Dim add As String, size As String, cno As String, price As String, inurl As String, sp1 As String
I exclude sp As String because I think it should be sp() As String.
And as add and size are methods in VBA, I would avoid using them as variable names, and go with iAdd or iSize, or something more descriptive and useful that cannot be considered ambiguous.
2) You also do not have to use hungarian/pseudo-hungarian notation e.g. str.
3) Use Integer not Long
4) Use Option Explicit and check you datatypes. For example, as mentioned in comments, did you mean for str1 to be a string that you are using in division? Are you relying on an implicit conversion? Don't. Declare as the expected type.
For example: Dim str1() As String, str2 As String, pgno As Double
This will also highlight that you have missing variable declarations e.g. RowCount.
I'm very new to VBA and HTML/XHTML, but through online research and help from other wonderful members on here I've managed to write a code to pull the data I want. I had a hard time identifying the IDs of the elements I want since it's in XHTML, so I think that's where I've botched it the most.
The website: http://www.usbanklocations.com/banks.php?q=&ct=&ml=30&lc=
Here is what I want the code to do:
Pull Bank Name, Address, Phone Number, Total Deposits and Total Assets -- GIVEN the bank name and city I provide in my excel sheet.
Here is my code:
Private Declare Sub Sleep Lib "kernel32.dll" (ByVal dwMilliseconds As Long)
Sub CommunityBanks()
Dim IE As Object, TableResults As Object, webRow As Object, BankName As Variant, page As Long, pageTotal As Long, r As Long
Dim beginTime As Date, i As Long, myvalue As Variant
Set IE = CreateObject("internetexplorer.application")
IE.navigate "http://www.usbanklocations.com/banks.php?name=" & Range("A2").Value & "+Bank&ml=30&lc=" & Range("B2").Value & "%2C+TX"
IE.Visible = True
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'input bank name into form
'myvalue = InputBox("Enter City. Press okay to begin search", "Bank Search")
'Range("F3").Value = myvalue
'IE.document.getelementbyid("MainContent_txtCity").Value = "LegacyTexas"
'click find button
'IE.document.getelementbyid("MainContent_btn").Click
'Sleep 5 * 1000
IE.document.getelementbytagname("table").getelementsbyclassname("btn").Click
Sleep 5 * 1000
'total pages
pageTotal = IE.document.getelementbyid("lsortby").innertext
page = 0
Do Until page = pageTotal
DoEvents
page = IE.document.getelementbyclassname("lsortby").innertext
With IE.document.getelementbyid("main")
For r = 1 To .Rows.Length - 1
If Not IsArray(BankName) Then
ReDim BankName(7, 0) As Variant
Else
ReDim Preserve BankName(7, UBound(BankName, 2) + 1) As Variant
End If
BankName(0, UBound(BankName, 2)) = .Rows(r).Cells(0).innertext
Next r
End With
If page < pageTotal Then
IE.document.getelementbyclassname("panelpn").Click
beginTime = Now
Application.Wait (Now + TimeValue("00:00:05"))
End If
Loop
For r = 0 To UBound(BankName, 2)
IE.navigate "http://www.usbanklocations.com/" & BankName(0, r)
Do While IE.Busy Or IE.readystate <> 4 '4 = READYSTATE_COMPLETE
DoEvents
Loop
'wait 5 sec. for screen refresh
Sleep 5 * 1000
With IE.document.getelementbytagname("table")
For i = 0 To .Rows.Length - 1
DoEvents
Select Case .Rows(i).Cells(0).innertext
Case "Name:"
BankName(1, r) = .Rows(i).Cells(1).innertext
Case "Location:"
BankName(2, r) = .Rows(i).Cells(1).innertext
Case "Phone:"
BankName(3, r) = .Rows(i).Cells(1).innertext
Case "Branch Deposit:"
BankName(4, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
Case "Total Assets:"
BankName(5, r) = Replace(Replace(.Rows(i).Cells(1).innertext, ",", ""), "$", "")
End Select
Next i
End With
Next r
IE.Quit
Set IE = Nothing
'post result on Excel cell
Worksheets(1).Range("A9").Resize(UBound(BankName, 2) + 1, UBound(BankName, 1) + 1).Value = Application.Transpose(BankName)
End Sub
Thank you in advance! I would greatly appreciate any help.
Consider the below example which uses XHR instead of IE and split-based HTML content parsing:
Option Explicit
Sub Test_usbanklocations()
Dim oSource, oDestination, y, oSrcRow, sName, sCity, sDist, sUrl0, sUrl1, sUrl2, lPage, sResp1, sResp2, i, a1, a2, a3, a4, a5
Set oSource = Sheets(1)
Set oDestination = Sheets(2)
oDestination.Cells.Delete
DataOutput oDestination, 1, Array("Name", "Location", "Phone", "Total Assets", "Total Deposits")
y = 2
For Each oSrcRow In oSource.UsedRange.Rows
sName = oSrcRow.Cells(1, 1).Value
sCity = oSrcRow.Cells(1, 2).Value
sDist = oSrcRow.Cells(1, 3).Value
sUrl0 = "http://www.usbanklocations.com/banks.php?q=" & EncodeUriComponent(sName) & "&lc=" & EncodeUriComponent(sCity) & "&ml=" & sDist
sUrl1 = sUrl0
lPage = 1
Do
sResp1 = GetXHR(sUrl1)
If InStr(sResp1, "We can not find the address you provided. Please check.") > 0 Then Exit Do
a1 = Split(sResp1, "<div class=""pl")
For i = 1 To UBound(a1)
a2 = Split(a1(i), "</div>", 3)
a3 = Split(a2(1), "<a href=""", 2)
a4 = Split(a3(1), """>", 2)
sUrl2 = "http://www.usbanklocations.com" & a4(0)
sResp2 = GetXHR(sUrl2)
a5 = Array( _
GetFragment(sResp2, "<b>Name:</b></td><td>", "</td>"), _
Replace(GetFragment(sResp2, "<b>Location:</b></td><td>", "</td>"), "View Other Branches", ""), _
GetFragment(sResp2, "<b>Phone:</b></td>", "</td>"), _
GetFragment(sResp2, "<b>Total Assets:</b></td><td>", "</td>"), _
GetFragment(sResp2, "<b>Total Deposits:</b></td><td>", "</td>") _
)
DataOutput oDestination, y, a5
y = y + 1
DoEvents
Next
If InStr(sResp1, "Next Page >") = 0 Then Exit Do
lPage = lPage + 1
sUrl1 = sUrl0 & "&ps=" & lPage
DoEvents
Loop
Next
MsgBox "Completed"
End Sub
Function GetXHR(sUrl)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", sUrl, False
.Send
GetXHR = .ResponseText
End With
End Function
Sub DataOutput(oSht, y, aValues)
With oSht.Cells(y, 1).Resize(1, UBound(aValues) + 1)
.NumberFormat = "#"
.Value = aValues
End With
End Sub
Function GetFragment(sText, sPatt1, sPatt2)
Dim a1, a2
a1 = Split(sText, sPatt1, 2)
If UBound(a1) <> 1 Then Exit Function
a2 = Split(a1(1), sPatt2, 2)
If UBound(a2) <> 1 Then Exit Function
GetFragment = GetInnerText(a2(0))
End Function
Function EncodeUriComponent(sText)
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(sText)
End Function
Function GetInnerText(sText)
With CreateObject("htmlfile")
.Write ("<body>" & sText & "</body>")
GetInnerText = .DocumentElement.Document.GetElementsByTagName("body")(0).InnerText
End With
End Function
As an example, the first worksheet contains data to search (Bank name, Location and Distance to refine by):
Then result on the second worksheet is as follows:
I wrote a macro to download data from a website, after the website is fully loaded, it will scrap the data by the html tag, however, sometimes the data is incorrectly scraped due to unknown error, I want to add a checking after each variant 'x' completed, e.g. If the activesheet contains the word "中报",then go back to the step "'Select the Report Type" to re-do the scraping. Also, I know some of the variables/data types are not set at the very beginning. Could anyone help to solve this? Thanks in advance!
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
For x = 1 To 1584
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate URL
.Visible = False
Do While .Busy = True Or .readyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub
This is cleaned up quite a bit.
I added a SelectReportType: line label. Whenever you want to go back to that condition, use insert the line
Goto SelectReportType
And it will take you to that spot. The better way to do it would be to place that code in a separate function so you can call it anytime your test for "中报" is true. But I'm not following your code well enough to understand what you are doing to assist you with that.
Sub GetFinanceData()
Dim x As Variant
Dim IE As Object
Dim URL As String, elemCollection As Object
Dim t As Integer, r As Integer, c As Integer
Dim selectItems As Variant, i As Variant
Dim tblNameArr() As String
Dim tblStartRow As Long
Worksheets("Stocks").Select
Worksheets("Stocks").Activate
For x = 1 To 1584
'Open IE and Go to the Website
'URL = "http://stock.finance.sina.com.cn/hkstock/finance/00001.html"
URL = Cells(x, 1)
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Navigate URL
.Visible = False
Do While .Busy = True Or .ReadyState <> 4
Loop
DoEvents
Worksheets.Add(After:=Worksheets(Worksheets.count)).name = _
ThisWorkbook.Worksheets("Stocks").Range("B" & x).Value 'You could even simplify it and just state the name as Cells(x,2)
SelectReportType:
'Select the Report Type
Set selectItems = IE.Document.getElementsByTagName("select")
For Each i In selectItems
i.Value = "zero"
i.FireEvent ("onchange")
Application.Wait (Now + TimeValue("0:00:05"))
Next i
Do While .Busy: DoEvents: Loop
ActiveSheet.Range("A1:K2000").ClearContents
ActiveSheet.Range("A1").Value = .Document.getElementsByTagName("h1")(0).innerText
ActiveSheet.Range("B1").Value = .Document.getElementsByTagName("em")(0).innerText
ActiveSheet.Range("A4").Value = Worksheets("Stocks").Cells(1, 4)
'Find and Get Table Data
tblNameArr = Array(Worksheets("Stocks").Cells(2, 4), Worksheets("Stocks").Cells(3, 4), Worksheets("Stocks").Cells(4, 4), Worksheets("Stocks").Cells(5, 4))
tblStartRow = 6
Set elemCollection = .Document.getElementsByTagName("TABLE")
For t = 0 To elemCollection.Length - 1
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ActiveSheet.Cells(r + tblStartRow, c + 1) = elemCollection(t).Rows(r).Cells(c).innerText
Next c
Next r
ActiveSheet.Cells(r + tblStartRow + 2, 1) = tblNameArr(t)
tblStartRow = tblStartRow + r + 4
Next t
End With
' cleaning up memory
IE.Quit
Next x
End Sub