Data Scraping from Website - receiving error on different player pages - vba

I'm trying to data scrape game logs from basketball reference.com. It worked perfectly on two players I choose (demar derozan and lamarcus aldridge). But then I started going through other players and it just wouldn't scrape the data for many of the other players (Kevin Durant).
I have NO Idea why it wouldn't work. For example, I tried Stephen Curry and it worked fine, but players like Draymond Green and Kevin Durant, the code would just not scrape the data at all. For some reason after the column Date, everything stopped working.
Sub Data()
Dim ieObj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer
i = 1
Set ieObj = New InternetExplorer
ieObj.Visible = True
ieObj.navigate "https://www.basketball-reference.com/players/d/duranke01/gamelog/2019"
Application.ScreenUpdating = False
Application.DisplayStatusBar = True
Application.Wait Now + TimeValue("00:00:05")
For Each htmlEle In ieObj.document.getElementsByClassName("stats_table")(0).getElementsByTagName("tr")
With ActiveSheet
.Range("A" & i).Value = htmlEle.Children(0).textContent
.Range("B" & i).Value = htmlEle.Children(1).textContent
.Range("C" & i).Value = htmlEle.Children(2).textContent
.Range("D" & i).Value = htmlEle.Children(3).textContent
End With
i = i + 1
On Error Resume Next
Next htmlEle
End Sub
The error always happens on this line:
.Range("D" & i).Value = htmlEle.Children(3).textContent
I tried skipping columns to but it still wouldn't work.

I found no problem with using id for table, as seen in .responseText and using xmlhttp over browser.
I only tested with 3 urls - in sheet 1 A1:A3
https://www.basketball-reference.com/players/d/duranke01/gamelog/2019
https://www.basketball-reference.com/players/c/curryst01/gamelog/2019
https://www.basketball-reference.com/players/g/greendr01/gamelog/2019
With this site sometimes you can encounter tables inside comments so I stripped out the comment tags before processing. This was not necessary for the links I tried.
I use clipboard to copy paste but you could just use
Set hTable = html.getElementById("pgl_basic")
Then loop the tr and td as you wish using getElementsByTagName.
References (VBE>Tools>References):
Microsoft HTML Object Library
Option Explicit
Public Sub GetPlayerInfo()
Dim urls(), i As Long, html As HTMLDocument, hTable As Object
Dim ws As Worksheet, wsCurrent As Object, clipboard As Object
Dim lastRow As Long, playerIdentifier As String, arr() As String
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.rows.Count, "A").End(xlUp).Row
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
For i = LBound(urls) To UBound(urls)
.Open "GET", urls(i), False
.send
html.body.innerHTML = Replace$(Replace$(.responseText, "-->", vbNullString), "<!--", vbNullString) 'remove comments
arr = Split(urls(i), "/")
playerIdentifier = arr(5)
If SheetExists(playerIdentifier) Then
With ThisWorkbook.Worksheets(playerIdentifier).Cells
.ClearContents
.ClearFormats
Set wsCurrent = ThisWorkbook.Worksheets(playerIdentifier)
End With
Else
Set wsCurrent = ThisWorkbook.Worksheets.Add
wsCurrent.name = playerIdentifier
End If
Set hTable = html.querySelector("#pgl_basic")
clipboard.SetText hTable.outerHTML
clipboard.PutInClipboard
wsCurrent.Range("A1").PasteSpecial
Set wsCurrent = Nothing: Set hTable = Nothing: Erase arr: playerIdentifier = vbNullString
Application.CutCopyMode = False
Next
End With
End Sub
Public Function SheetExists(ByVal sheetName As String) As Boolean '<== function by #Rory
SheetExists = Evaluate("ISREF('" & sheetName & "'!A1)")
End Function
IE
Note proper page load wait used.
Option Explicit
Public Sub GetPlayerInfo()
Dim ieObj As InternetExplorer, htmlEle As IHTMLElement
Dim urls(), i As Long, j As Long, hTable As Object
Dim ws As Worksheet, wsCurrent As Object
Dim lastRow As Long, playerIdentifier As String, arr() As String
Application.ScreenUpdating = False
On Error GoTo errHand
Set ws = ThisWorkbook.Worksheets("Sheet1")
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
urls = Application.Transpose(ws.Range("A1:A" & lastRow).Value)
Set ieObj = New InternetExplorer
With ieObj
.Visible = True
For j = LBound(urls) To UBound(urls)
.navigate urls(j)
While .Busy Or .readyState <> 4: DoEvents: Wend
arr = Split(urls(j), "/")
playerIdentifier = arr(5)
If SheetExists(playerIdentifier) Then
With ThisWorkbook.Worksheets(playerIdentifier).Cells
.ClearContents
.ClearFormats
Set wsCurrent = ThisWorkbook.Worksheets(playerIdentifier)
End With
Else
Set wsCurrent = ThisWorkbook.Worksheets.Add
wsCurrent.Name = playerIdentifier
End If
i = 1
Set hTable = .document.getElementById("pgl_basic")
If Not hTable Is Nothing Then
For Each htmlEle In hTable.getElementsByTagName("tr")
With wsCurrent
.Range("A" & i).Value = htmlEle.Children(0).textContent
.Range("B" & i).Value = htmlEle.Children(1).textContent
.Range("C" & i).Value = htmlEle.Children(2).textContent
.Range("D" & i).Value = htmlEle.Children(3).textContent
End With
i = i + 1
Next htmlEle
End If
Set wsCurrent = Nothing: Set hTable = Nothing: Erase arr: playerIdentifier = vbNullString
Next
End With
errHand:
Application.ScreenUpdating = True
If Err.Number <> 0 Then Debug.Print Err.Number, Err.Description
ie.Quit
End Sub

Related

Trying to copy one worksheet from one workbook into another preexisting worksheet?

I've written the following code which iterates though my worksheets of my main workbook, checks for a conditional, and then if that conditional is satisfied it copies the active worksheet into a new workbook and saves it. However, I would like to just append the worksheet to the other notebook.
Sub Archive_Sheets()
For Each ws In ActiveWorkbook.Worksheets
ws.Activate
Dim SrchRng As Range, cel As Range
Set SrchRng = ws.Range("C9:C108")
Dim bought_amt As Integer
Dim called_amt As Integer
bought_amt = 0
called_amt = 0
For Each cel In SrchRng
If InStr(1, cel.Value, "BOUGHT") > 0 Then
bought_amt = bought_amt + cel.Offset(0, 1).Value
End If
If InStr(1, cel.Value, "CALLED") > 0 Then
called_amt = called_amt + cel.Offset(0, 1).Value
End If
Next cel
If called_amt = bought_amt Then
ws.Range("A1").Value = "DONE"
Module8.CopySheet
Exit For
'ws.Delete
End If
Next
End Sub
Sub CopySheet()
Application.DisplayAlerts = False
Dim wb_name_arr() As String
pName = ActiveWorkbook.Path
wbName = ActiveWorkbook.Name ' the file name of the currently active file
shtName = ActiveSheet.Name ' the name of the currently selected worksheet
wb_name_arr() = Split(wbName, ".")
Application.ScreenUpdating = False
ActiveSheet.Select
ActiveSheet.Copy
' NEED TO CHANGE THIS LINE ********************
ActiveSheet.SaveAs Filename:=pName + "\" + wb_name_arr(0) + " archived.xlsx"
'****************************
Application.ScreenUpdating = True
End Sub
The code above will overwrite the new workbook I'm saving to so it's only the most recent sheet. I will already have this workbook created, so if I can append active worksheets to it that would be ideal. I already tried
ActiveSheet.Copy After:=Workbook(pName + "\" + wb_name_arr(0) + " archived.xlsx")
and
ActiveSheet.Copy Before:=Workbooks.Open(pName + "\" + wb_name_arr(0) + " archived.xlsx").Sheets(0)
with no luck.
These line are pseudo-codes. The general idea is Implicit None. Try to explicitly reference to workbooks and sheets instead of activating them. Which is also faster.
Try to avoid using ActiveSheet in your code. Simply try something like this:
Set mySht = ActiveSheet 'This should be set at the beginning of your code
Then whenever you have that Sheet (i.e. ActiveSheet) in your code, use oSht instead.
So, you need to open the Workbook to be able to work on it. Similarly, you can assign a name to different workbooks like this:
Set myWbk = ActiveWorkbook
'Or
Set oWbk = Workbooks("Output.xlsx")
What #A.S.H proposed then works for you like this:
oFile = "Path/to/the/File/" & wb_name_arr(0) & " archived.xlsx"
Set oWbk = Workbooks.Open(oFile)
mySht.Copy Before:=Workbooks(oWbk).sheets(1)
Private Sub that()
Dim aRR As Variant
aRR = ThisWorkbook.Sheets("Sheet1").UsedRange
Dim colC As Long
Dim rowC As Long
colC = ThisWorkbook.Sheets("Sheet1").UsedRange.Columns.Count
rowC = ThisWorkbook.Sheets("Sheet1").UsedRange.Rows.Count
ThisWorkbook.Sheets("Sheet2").Range(ThisWorkbook.Sheets("Sheet2").Cells(1, 1), ThisWorkbook.Sheets("Sheet2").Cells(rowC, colC)).Value2 = aRR
End Sub
Try edited code (I've edited both Subs to make them shorter, and also faster as there is no need to use Select and Activate).
Explanation inside the code as comments.
Option Explicit
Sub Archive_Sheets()
Dim SrchRng As Range, cel As Range
Dim bought_amt As Long
Dim called_amt As Long
Dim ws As Worksheet
For Each ws In ActiveWorkbook.Worksheets
With ws
Set SrchRng = .Range("C9:C108")
bought_amt = 0
called_amt = 0
For Each cel In SrchRng
If cel.Value Like "BOUGHT*" Then
bought_amt = bought_amt + cel.Offset(0, 1).Value
End If
If cel.Value Like "CALLED*" Then
called_amt = called_amt + cel.Offset(0, 1).Value
End If
Next cel
If called_amt = bought_amt Then
.Range("A1").Value = "DONE"
CopySheet .Name ' <-- call the function and send the current ws sheet's name
Exit For
End If
End With
Next
End Sub
'==================================================================
Sub CopySheet(wsName As String)
Application.DisplayAlerts = False
Dim wb_name_arr() As String
Dim wb As Workbook
Dim pName As String, wbName As String
pName = ActiveWorkbook.Path
wb_name_arr() = Split(wbName, ".")
Application.ScreenUpdating = False
On Error Resume Next
Set wb = Workbooks(wb_name_arr(0) & " archived.xlsx") ' try to set wb if it's already open
On Error GoTo 0
If wb Is Nothing Then ' <-- wb is Nothing, means it's still close, open it
Set wb = Workbooks.Open(Filename:=pName & "\" & wb_name_arr(0) & " archived.xlsx")
End If
' === Copy the sheet to "archived" file one before tha last sheet ===
Worksheets(wsName).Copy before:=wb.Sheets(wb.Sheets.Count)
Application.ScreenUpdating = True
End Sub
Full code that solves problem.
Sub Archive_Sheets()
For Each ws In ActiveWorkbook.Worksheets
ws.Activate
Dim SrchRng As Range, cel As Range
Set SrchRng = ws.Range("C9:C108")
Dim bought_amt As Integer
Dim called_amt As Integer
bought_amt = 0
called_amt = 0
For Each cel In SrchRng
If InStr(1, cel.Value, "BOUGHT") > 0 Then
bought_amt = bought_amt + cel.Offset(0, 1).Value
End If
If InStr(1, cel.Value, "CALLED") > 0 Then
called_amt = called_amt + cel.Offset(0, 1).Value
End If
Next cel
If called_amt = bought_amt Then
If called_amt <> 0 Then
ws.Range("A1").Value = "DONE"
Module8.CopySheet
'ws.Delete
End If
End If
Next
End Sub
Sub CopySheet()
Application.DisplayAlerts = False
Dim wb_name_arr() As String
pName = ActiveWorkbook.Path
wbName = ActiveWorkbook.Name ' the file name of the currently active file
shtName = ActiveSheet.Name ' the name of the currently selected worksheet
wb_name_arr() = Split(wbName, ".")
Set mySht = ActiveSheet 'This should be set at the beginning of your code
Set myWbk = ActiveWorkbook
oFile = pName & wb_name_arr(0) & " archived.xlsx"
Set oWbk = Workbooks.Open("path_to_file")
mySht.Copy after:=oWbk.Sheets(oWbk.Sheets.Count)
oWbk.Save
End Sub
Try something like this (to make it simple for the moment, I insert the sheet at beginning):
ActiveSheet.Copy Before:=Workbooks(wb_name_arr(0) & " archived.xlsx").sheets(1)
This works if the destination WB was already open. You may want to open the WB if it is not open yet. Use the following sub to create or open the destination WB:
Sub archiveSheet(ws as Worksheet)
Dim destName As String
destName = left(ThisWorkbook.name, InStrRev(ThisWorkbook.name, ".") - 1) & " archived.xlsx"
Application.DisplayAlerts = False: Application.ScreenUpdating = False
On Error Resume Next
Dim destWB As Workbook: Set destWB = Workbooks(destName)
If destWB Is Nothing Then Set destWB = Workbooks.Open(ThisWorkbook.path + "\" & destName)
If destWB Is Nothing Then
Set destWB = Workbooks.Add
destWB.SaveAs ThisWorkbook.path & "\" & destName
End If
If destWB Is Nothing Then
msgBox "could not open or create " & destName
Else
ws.Copy After:=destWB.Sheets(destWB.Sheets.count)
End If
Application.DisplayAlerts = True: Application.ScreenUpdating = True
End Sub
Call it from the main routine Archive_Sheets like this:
archiveSheet ws

Excel Macro to give report title based on Cell Value

I am very new to macro programming and currently creating a macro that splits a table into new worksheets dependent on a unique variable, then copies and pastes each worksheet into a single word document split by page breaks.
What I cannot work out how to do, is create a macro that gives each table on each page a title based on the value of a cell.
Option Explicit
Sub Run_All()
Call Organise_Table
Call Rename_Column
Call Isblank
Call Split_Table
Call SumColumn
Call ExceltoWord
Call Report_Title
End Sub
Sub Organise_Table()
Columns(1).EntireColumn.Delete
Columns(1).EntireColumn.Delete
Columns(2).EntireColumn.Delete
Columns(3).EntireColumn.Delete
Columns(3).EntireColumn.Delete
End Sub
Sub Rename_Column()
Range("A1") = "Contribution Type"
Range("B1") = "RefNo"
Range("C1") = "Title"
Range("D1") = "Initals"
Range("E1") = "Surname"
Range("F1") = "Balance Brought Forward"
Range("G1") = "Annual Interest Added"
Range("H1") = "Contributions Added"
Range("I1") = "Total Fund Value"
End Sub
Sub Isblank()
Application.ScreenUpdating = False
On Error Resume Next
With Range("F1:I14")
.SpecialCells(xlCellTypeBlanks).Formula = "0"
.Value = .Value
End With
Err.Clear
Application.ScreenUpdating = True
End Sub
Sub Split_Table()
Dim lr As Long
Dim Ws As Worksheet
Dim vcol As Integer
Dim i As Integer
Dim iCol As Long
Dim myarr As Variant
Dim Title As String
Dim titlerow As Integer
vcol = 2
Set Ws = Sheets("Sheet1")
Title = "A1:I14"
Application.ScreenUpdating = False
lr = Ws.Cells(Ws.Rows.Count, vcol).End(xlUp).Row
titlerow = Ws.Range(Title).Cells(1).Row
iCol = Ws.Columns.Count
Ws.Cells(1, iCol) = "Unique"
For i = 2 To lr
On Error Resume Next
If Ws.Cells(i, vcol) <> "" And Application.WorksheetFunction.Match(Ws.Cells(i, vcol), Ws.Columns(iCol), 0) = 0 Then
Ws.Cells(Ws.Rows.Count, iCol).End(xlUp).Offset(1) = Ws.Cells(i, vcol)
End If
Next i
myarr = Application.WorksheetFunction.Transpose(Ws.Columns(iCol).SpecialCells(xlCellTypeConstants))
Ws.Columns(iCol).Clear
For i = 2 To UBound(myarr)
Ws.Range(Title).AutoFilter field:=vcol, Criteria1:=myarr(i) & ""
If Not Evaluate("=ISREF('" & myarr(i) & "'!A1)") Then
Sheets.Add(after:=Worksheets(Worksheets.Count)).Name = myarr(i) & ""
Else
Sheets(myarr(i) & "").Move after:=Worksheets(Worksheets.Count)
End If
Ws.Range("A" & titlerow & ":A" & lr).EntireRow.Copy Sheets(myarr(i) & "").Range("A1")
Sheets(myarr(i) & "").Columns.AutoFit
Next i
Ws.AutoFilterMode = False
Ws.Activate
End Sub
Sub SumColumn()
Dim LastRow As Long
Dim iRow As Long
Dim iCol As Integer
Dim nSheets As Integer
For nSheets = 1 To 3
With Worksheets(nSheets)
LastRow = 0
For iCol = 6 To 9
iRow = .Cells(65536, iCol).End(xlUp).Row
If iRow > LastRow Then LastRow = iRow
Next iCol
For iCol = 6 To 9
.Cells(LastRow + 1, iCol) = Application.WorksheetFunction.Sum(Range(.Cells(1, iCol), .Cells(LastRow, iCol)))
Next iCol
iCol = 1
.Cells(LastRow + 1, iCol).Value = ("Total")
End With
Next nSheets
End Sub
Sub ExceltoWord()
Dim Ws As Worksheet
Dim Wkbk1 As Workbook
Dim strdocname As String
Dim wdapp As Object
Dim wddoc As Object
Dim orng As Object
Dim wdAutoFitwindow As String
Set Wkbk1 = ActiveWorkbook
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Application.EnableEvents = False
strdocname = "\\VDC.COM\User\HomeDrives\GFSNRE\Desktop\Test19.Doc" 'Change this to whatever directory the report will be in
'file name & folder path
On Error Resume Next
'error number 429
Set wdapp = GetObject(, "Word.Application")
If Err.Number = 429 Then
Err.Clear
'create new instance of word application
Set wdapp = CreateObject("Word.Application")
End If
wdapp.Visible = True
'define paths to file
If Dir(strdocname) = "" Then
'MsgBox "The file" & strdocname & vbCrLf & "was not found " & vbCrLf & "C:\Path\Name.doc", _
' vbExclamation, "The document does not exist "
'Exit Sub
Set wddoc = wdapp.Documents.Add
Else
Set wddoc = wdapp.Documents.Open(strdocname)
End If
For Each Ws In Wkbk1.Worksheets
Ws.Range("A1:I14").Copy
Set orng = wddoc.Range
orng.collapse 0
orng.Paste
orng.End = wddoc.Range.End
orng.collapse 0
orng.insertbreak Type:=7
Range("A1:I14").Borders.LineStyle = xlContinuous
wddoc.AutofitBehavior wdAutoFitwindow
Next Ws
lbl_Exit:
Set orng = Nothing
Set wddoc = Nothing
Set wdapp = Nothing
Set Wkbk1 = Nothing
Set Ws = Nothing
Application.ScreenUpdating = True
Application.DisplayAlerts = True
Application.EnableEvents = True
Exit Sub
End Sub
Sub Report_Title()
Dim Ws As Worksheet
Dim MyText As String
Dim MyRange As Object
Set MyRange = ActiveWorkbook.Range
MyText = Ws.Range("E3").Value
' Selection Example:
Selection.InsertBefore (MyText)
' Range Example: Inserts text at the beginning
' of the active document.
MyRange.InsertBefore (MyText)
End Sub
There is one error here :
Dim Ws As Worksheet
Dim MyText As String
Dim MyRange As Object
Set MyRange = ActiveWorkbook.Range
MyText = Ws.Range("E3").Value '<==== WS is not properly defined yet
You are using Ws. to say in which worksheet you are working in, which is a good thing. But, as it is a procedure-level variable, it is not pointing anywhere useful. You probably need something like :
Set MyRange = ActiveWorkbook.Range
Set Ws = ActiveWorkbook.Sheets("Sheet1") 'assuming you want to read "E3" on the sheet "Sheet1" of the active workbook, that's the line to add
MyText = Ws.Range("E3").Value '<==== WS is now properly defined
If you go to debugging mode, you should have nothing in "MyText" in your version, and something in mine. The content of E3 in the sheet Sheet1.
Two things:
You should not turn off error handling for the entire code. If
things aren't working VBA can't tell you why or where the problem
is. While it's standar practise to use On Error Resume Next when
using GetObject/CreateObject it's also standard practise to turn
error handling back on AFTER the If...End If. You need to add the
line: On Error GoTo 0 where you have no error handler code.
Based on your sample code, write in the Title before pasting the table.
So something like this:
For Each Ws In Wkbk1.Worksheets
Ws.Range("A1:I14").Copy
Set orng = wddoc.Range
orng.collapse 0
orng.Text = Ws.Range([cell reference with title]) & vbCr
orng.collapse 0
orng.Paste
orng.End = wddoc.Range.End
orng.collapse 0
orng.insertbreak Type:=7
Range("A1:I14").Borders.LineStyle = xlContinuous
wddoc.AutofitBehavior wdAutoFitwindow
Next Ws

Macro will only create hyperlink in debug mode

This is a macro that will search all cells in all worksheets in all of the workbooks contained in a single directory. Everything works as advertised except for the add hyperlink method, which does work if I repeatedly mash F8.
How can I edit the macro so the hyperlink portion works?
'Search all workbooks in a folder for string
Sub SearchWorkbooks()
Dim fso As Object
Dim fld As Object
Dim strSearch As String
Dim strPath As String
Dim strFile As String
Dim wOut As Worksheet
Dim wbk As Workbook
Dim wks As Worksheet
Dim Lrow As Long
Dim rFound As Range
Dim strFirstAddress As String
On Error GoTo ErrHandler
Application.ScreenUpdating = False
strSearch = "Capacitor"
strPath = "C:\!Source"
Set wOut = Worksheets.Add
Lrow = 1
With wOut
.Name = "Results"
.Cells(Lrow, 1) = "Workbook"
.Cells(Lrow, 2) = "Worksheet"
.Cells(Lrow, 3) = "Cell"
.Cells(Lrow, 4) = "Text in Cell"
.Cells(Lrow, 5) = "Link"
Set fso = CreateObject("Scripting.FileSystemObject")
Set fld = fso.GetFolder(strPath)
strFile = Dir(strPath & "\*.xls*")
Do While strFile <> ""
Set wbk = Workbooks.Open _
(Filename:=strPath & "\" & strFile, _
UpdateLinks:=0, _
ReadOnly:=True, _
AddToMRU:=False)
For Each wks In wbk.Worksheets
Set rFound = wks.UsedRange.Find(strSearch)
If Not rFound Is Nothing Then
strFirstAddress = rFound.Address
End If
Do
If rFound Is Nothing Then
Exit Do
Else
Lrow = Lrow + 1
.Cells(Lrow, 1) = wbk.Name
.Cells(Lrow, 2) = wks.Name
.Cells(Lrow, 3) = rFound.Address
.Cells(Lrow, 4) = rFound.Value
'This is the line that does not work
'well it actually works in debug mode but not in real time
wks.Hyperlinks.Add Anchor:=Cells(Lrow, 5), Address:=wbk.FullName, SubAddress:= _
wks.Name & "!" & rFound.Address, TextToDisplay:="Link"
End If
Set rFound = wks.Cells.FindNext(After:=rFound)
Loop While strFirstAddress <> rFound.Address
Next
wbk.Close (False)
strFile = Dir
Loop
.Columns("A:D").EntireColumn.AutoFit
End With
'MsgBox "Done"
ExitHandler:
Set wOut = Nothing
Set wks = Nothing
Set wbk = Nothing
Set fld = Nothing
Set fso = Nothing
Application.ScreenUpdating = True
Exit Sub
ErrHandler:
MsgBox Err.Description, vbExclamation
Resume ExitHandler
End Sub
Try adding the worksheet reference to your Cells() call out like wks.Cells(......)

Web table not fetching the correct data by VBA

With the followoing code I can fetch the price table from this webpage http://www.idealo.de/preisvergleich/OffersOfProduct/143513.html
But from another page here this table is not being fetched...though these two pages are identical. I can't figure out where am lacking.
Any help on this is deeply appreciable.
Sub TableExample()
Dim IE As Object
Dim doc As Object
Dim strURL As String
strURL = "http://www.idealo.de/preisvergleich/OffersOfProduct/143513.html"
Set IE = CreateObject("InternetExplorer.Application")
With IE
.navigate strURL
Do Until .readyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.document
GetAllTables doc
.Quit
End With
End Sub
Sub GetAllTables(doc As Object)
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim i As Long
Set ws = Sheets("Sheet1")
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
If tabno = 5 Then
For Each rw In tbl.Rows
colno = 5
For Each cl In rw.Cells
If colno = 5 And nextrow > 5 Then
Set classColl = doc.getElementsByClassName("shop")
Set imgTgt = classColl(nextrow - 6).getElementsByTagName("img")
rng.Value = imgTgt(0).getAttribute("alt")
Else
rng.Value = cl.innerText
End If
Set rng = rng.Offset(, 1)
i = i + 1
colno = colno + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -i)
i = 0
Next rw
End If
Next tbl
End Sub
The table numbers change between the two urls. Table 5 is the table you're interested in for the first url, but Table 6 is the one of interest in the 2nd url. However both tables of interest have the same id ("offers-list"), so instead of looking for the fifth table, adjust your code to look for the Table with the ID "offers-list"
change
If tabno = 5 Then
to
If InStr(1, tbl.outerhtml, "Produktbezeichnung des Shops", vbTextCompare) > 0 Then
This will get you close. There are other changes on the second web page that your current code isn't quite handling - but like I said this will get you close.
I have changed the If tabno = 5 Then with
For Each tbl In doc.getElementsByTagName("table")
' tabno = tabno + 1
If tbl.className = "orangebox_rowborder" Then
Thanks #Ron for guiding me for this...thanks a ton Dude
The following works for each URL so is more robust and is a lot faster than the method you are currently using as it does away with the IE browser nvaigation.
For a lengthy code explanation please see here.
Option Explicit
'Tools > References > HTML Object Library
Public Sub GetTable()
Const URL = "https://www.idealo.de/preisvergleich/OffersOfProduct/1866742_-335-billingham.html" '<==Change this
Dim sResponse As String, listItems As Object, html As HTMLDocument, headers()
headers = Array("product_id", "product_name", "product_price", "product_category", "currency", "spr", "shop_name", "delivery_time", "shop_rating", "position", "free_return", "approved_shipping")
Application.ScreenUpdating = False
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
Set html = New HTMLDocument
With html
.body.innerHTML = sResponse
Set listItems = .getElementsByClassName("productOffers-listItemOfferPrice")
End With
Dim currentItem As Long
With ActiveSheet
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
For currentItem = 0 To listItems.Length - 1
Dim tempString As String, columnValues() As String
tempString = TidyString(GetTransactionInfo(listItems(currentItem).outerHTML), "&#\d+;")
columnValues = GetColumnValues(tempString, headers)
.Cells(currentItem + 2, 1).Resize(1, UBound(columnValues) + 1) = columnValues
Next currentItem
End With
Application.ScreenUpdating = True
End Sub
Public Function GetTransactionInfo(ByVal inputString) As String
'Split to get just the transaction items i.e. Headers and associated values
GetTransactionInfo = Split(Split(inputString, """transaction"",")(1), "}")(0)
End Function
Public Function TidyString(ByVal inputString As String, ByVal matchPattern As String) As String
'Extract transaction info
'Use regex to find these unwanted strings and replace pattern e.g. &#\d+;
'Example inputString
Dim regex As Object, tempString As String
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = matchPattern
End With
If regex.test(inputString) Then
TidyString = regex.Replace(inputString, vbNullString)
Else
TidyString = inputString
End If
End Function
Public Function GetColumnValues(ByVal inputString As String, ByVal headers As Variant) As Variant
' Example input string "product_id": "143513","product_name": "Canon 500D Nahlinse 72mm","product_price": "128.0","product_category": "26570","currency": "EUR","spr": "cfd","shop_name": "computeruniverse.net","delivery_time": "long","shop_rating": "100","position": "1","free_return": "14","approved_shipping": "false"
' Extract just the inner string value of each header e.g. 143513
Dim arr() As String, currentItem As Long, tempString As String
tempString = inputString
For currentItem = LBound(headers) To UBound(headers)
tempString = TidyString(tempString, Chr$(34) & headers(currentItem) & Chr$(34) & ":")
Next currentItem
arr = Split(Replace$(tempString, Chr$(34), vbNullString), ",")
GetColumnValues = arr
End Function

Data Extraction from Webpage to Excel using VBA

I was trying to pull a table from Webpage, so far i was successful pull a table from Webpage, unfortunately i have some links in each row of table, when i pulled the table from Webpage, i am getting output without link, just text, is there any way we can pull table from webpage using VBA including hyperlinks.
Here is my code:
Sub TableExample()
Dim IE As Object
Dim doc As Object
Dim strURL As String
strURL = "HERE I USED MY URL"
' replace with URL of your choice
Set IE = CreateObject("InternetExplorer.Application")
With IE
'.Visible = True
.Navigate strURL
Do Until .readyState = 4: DoEvents: Loop
Do While .Busy: DoEvents: Loop
Set doc = IE.Document
GetAllTables doc
.Quit
End With
End Sub
Sub GetAllTables(doc As Object)
' get all the tables from a webpage document, doc, and put them in a new worksheet
Dim ws As Worksheet
Dim rng As Range
Dim tbl As Object
Dim rw As Object
Dim cl As Object
Dim tabno As Long
Dim nextrow As Long
Dim I As Long
Set ws = Worksheets.Add
For Each tbl In doc.getElementsByTagName("TABLE")
tabno = tabno + 1
nextrow = nextrow + 1
Set rng = ws.Range("B" & nextrow)
rng.Offset(, -1) = "Table " & tabno
For Each rw In tbl.Rows
For Each cl In rw.Cells
rng.Value = cl.outerText
Set rng = rng.Offset(, 1)
I = I + 1
Next cl
nextrow = nextrow + 1
Set rng = rng.Offset(1, -I)
I = 0
Next rw
Next tbl
ws.Cells.ClearFormats
End Sub
When you execute "rng.Value = cl.outerText" you get only text. If you need to have all links and other html - please use innerHTML property.
Just replace "rng.Value = cl.outerText" to "rng.Value = cl.innerHTML". This will return entire html with links ;)