I am trying to scrape a large volume of data using my rather basic excel skills. I am using this as a guide (http://www.familycomputerclub.com/scrpae-pull-data-from-websites-into-excel.html), and have got it to work for my data, but now trying to amend the code to meet my requirements.
I have about 10,000 Roll numbers listed in columns, and need to scrape the data from this site (last 10 digits being the roll numbers): http://www.winnipegassessment.com/AsmtPub/english/propertydetails/details.aspx?pgLang=EN&isRealtySearch=true&RollNumber=2011016000
Basically, rather than adding new worksheets for each page download (as per the guide I have been using above), I would like to keep all the new data on the master page, simply transposing the it in to the row with its corresponding roll number (perhaps from the C column).
My code is as follows:
Sub adds()
For x = 1 To 5
Worksheets("RollNo").Select
Worksheets("RollNo").Activate
mystr = "URL;http://www.winnipegassessment.com/AsmtPub/english/propertydetails/details.aspx?pgLang=EN&isRealtySearch=true&RollNumber=2000416000.html"
mystr = Cells(x, 1)
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = x
With ActiveSheet.QueryTables.Add(Connection:=mystr, Destination:=Range("$A$2"))
'CommandType = 0
.Name = "2000416000_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "2,6,7" '---> Note: many tables have been selected for import from the website
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Next x
End Sub
Sub ProcessAll()
Dim c As Range, shtData As Worksheet
Set shtData = Worksheets("WebQuery")
For Each c In Worksheets("RollNo").Range("A1:A5").Cells
If c.Value <> "" Then
FetchData c.Value
'move fetched data to the sheet
With c.EntireRow
.Cells(2).Value = shtData.Range("A2").Value
'etc....
End With
End If
Next c
End Sub
Sub FetchData(rollNo)
Const BASE_URL As String = "URL;http://www.winnipegassessment.com/AsmtPub/english/" & _
"propertydetails/details.aspx?pgLang=EN&isRealtySearch=true&RollNumber="
Dim qt As QueryTable
With Worksheets("WebQuery")
On Error Resume Next
.QueryTables(1).Delete
On Error GoTo 0
.Cells.Clear
With .QueryTables.Add(Connection:=BASE_URL & rollNo, Destination:=.Range("A2"))
.Name = "2000416000_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "2,6,7"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End With
End Sub
Related
I searched the internet, but I could not find anywhere a dynamic query from links in cells.
In excel i have webquery which generate this data:
I have data in excel Sheet("CustomReport"):
SalesOrder Value1 Value2 Value3 Links
1 Jonas Station1 8 https://x.com=1
2 Greg Station1 5 https://x.com=2
3 Anton Station1 1 https://x.com=3
... ... ... ... ...
Number of rows in this query is always different when it is refreshed.
And based on this webquery i need generate dynamic webquery in macro.
For example:
DynamicQuery1 save data from report https://x.com=1 to Sheet name "Orders" started from A1 and ending A{X} value (reports have diffrents number of rows).
DynamicQuery2 save data from report https://x.com=2 to the same Sheet "Orders" but started from A{X+1}.
I have such a macro, but it only works for the first row.
Sub Test()
Dim URL As String
URL = Sheets("CustomReport").Range("E2").Value
Sheets("Orders").Select
With ActiveSheet.QueryTables.Add(Connection:="URL;" & URL, Destination:=Range("$A$1"))
.Name = "team2289_2"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = False
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlOverwriteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = True
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
And i need refreshing this macro every 1 hour.
Anyone can give me macro based on this way?
To loop through the cells use:
dim c as range, DataSpot as range
set c = Sheets("CustomReport").Range("E2")
while c.value <>""
url = c.value
set DataSpot=cells(sheets("orders").Range("A1").SpecialCells(xlLastCell).row+1,1)
' Web Query Code goes here
set c=c.offset(1,0)
wend
In your data query, use:
Destination:=Range(DataSpot.Address))
Sub Test()
Dim URL As String
URL = Sheets("CustomReport").Range("E2").Value
Sheets("Sheet1").Select
Dim c As Range, DataSpot As Range
Set c = Sheets("CustomReport").Range("E2")
While c.Value <> ""
Set DataSpot = Cells(Sheets("Sorders").Range("A1").SpecialCells(xlLastCell).Row + 1, 1)
With ActiveSheet.QueryTables.Add(Connection:="URL;" & URL, Destination:=Range(DataSpot.Address))
.Name = "team2289_2"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = False
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlOverwriteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = True
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Set r = r.Offset(1, 0)
Wend
End Sub
i put your code and i have debug with:
Set r = r.Offset(1, 0)
What wrong i do ?
I want to scrap data from a website using Excel and I have a problem that each extraction of data is inserted into new sheet while I want the new scraped data to be inserted bellow the previous data , first I have a sheet that has the links of all webpages that from the data are scraped .. my VBA codes are :
Sub adds()
For x = 1 To 316
Worksheets("Sheet3").Select
Worksheets("Sheet3").Activate
mystr = " "
mystr = Cells(x, 1)
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = x
With ActiveSheet.QueryTables.Add(Connection:=mystr, Destination:=Range("$A$1"))
'CommandType = 0
.Name = "?q=ÍäÇä&daleel=mtn&do=search&page=2"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Next x
End Sub
I am planning to extract data from a table on the web. The code below was mostly from built-in web scraping function in Excel.
What I want is to find the last row with data and print on the first blank row.
Sub Sub1()
ActiveSheet.Cells.Clear
Dim lastRow As Long
Dim i As Integer
For i = 1 To 2
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.aastocks.com/tc/stocks/quote/symbolsearch.aspx?page=" & i & " &order=symbol&seq=asc", Destination _
:=Range("A,lastRow")) '???? I got an error here, what I want is to detect the last row and print on the first blank row.???
'.CommandType = 0
.Name = "symbolsearch_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "10"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
lastRow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1
Debug.Print lastRow
Next i
End Sub
This seems to work ok now. You were close, you should be able to edit as required.
Sub Sub1()
ActiveSheet.Cells.Clear
Dim lastRow As Long
Dim i As Integer
Dim sTicker As String
'set the first row to write to
lastRow = 2
For i = 1 To 2
sTicker = InputBox("Enter the code to search for")
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;http://www.aastocks.com/tc/stocks/quote/symbolsearch.aspx?page=" & sTicker & " &order=symbol&seq=asc", Destination _
:=Range("A" & lastRow)) 'uses the ticker value entered and fixed the destination part
'.CommandType = 0
.Name = "symbolsearch_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "10"
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
lastRow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1
Next i
End Sub
I am doing a little data scraping in excel and trying to paste the data into new worksheets with the following code:
Sub ZillowScrape()
'
' ZillowScrape Macro
'
' Keyboard Shortcut: Ctrl+z
For x = 1 To 5
Worksheets("URLs").Select
Worksheets("URLs").Activate
mystr = "URL;http://www.zillow.com/homedetails/332-Blanca-Ave-Tampa-FL-33606/45111195_zpid/"
mystr = Cells(x, 1)
ThisWorkbook.Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = x
With ActiveSheet.QueryTables.Add(Connection:=mystr, Destination:=Range("$A$2"))
'CommandType = 0
.Name = "01000_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlSpecifiedTables
.WebFormatting = xlWebFormattingNone
.WebTables = "3,4,5" '---> Note: many tables have been selected for import from the website
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Next x
End Sub
I keep getting the error "Run Time Error '1004': application-defined or object-defined error" for line:
With ActiveSheet.QueryTables.Add(Connection:=mystr, Destination:=Range("$A$2"))
I think that problem is in this code:
mystr = "URL;http://www.zillow.com/homedetails/332-Blanca-Ave-Tampa-FL-33606/45111195_zpid/"
mystr = Cells(x, 1)
I suppose the solution is this :
mystr = "URL;http://www.zillow.com/homedetails/332-Blanca-Ave-Tampa-FL-33606/45111195_zpid/"+Cells(x,1).value
I recorded a macro and edited it. It scrapes particular data from webpages (found on links) and display them on separate pages.
I want to use the data (VLOOKUP) but the data is on different pages which makes it hard to get an accurate formula.
Every week I change the second line of the code
For x = 1 To 20
to
For x = 21 to ....
for example, because new links/data come out every week.
How do I find the last line to add the next lot of data below that?
Sub Update()
For x = 1 To 20
Worksheets("Links").Select
Worksheets("Links").Activate
mystr = Cells(x, 8)
mystr2 = Cells(x, 15)
Worksheets.Add(After:=Worksheets(Worksheets.Count)).Name = x
With ActiveSheet.QueryTables.Add(Connection:=mystr, Destination:=Range("$K$1"))
.Name = "report2_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlAllTables
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Range("A1").Select
With ActiveSheet.QueryTables.Add(Connection:=mystr2, Destination:=Range("$A$1"))
.Name = "report6_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlAllTables
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Next x
End Sub
instead of
for x = 1 to 20
do
for x = Range("A" & Rows.Count).End(xlUp).Row to Range("A" & Rows.Count).End(xlUp).Row-20 step -1
which will find the last cell used in Column A and subtract 20 rows from it and iterate backwars