I am seeking for scraping amazon inventory .. Here's the link I used
https://www.amazon.com/Stratford-Pharmaceuticals-Omega-Fatty-Strength/dp/B006JCU54Y/ref=sr_1_2?s=pet-supplies&ie=UTF8&qid=1518816130&sr=1-2&keywords=stratford
There is a part with the title "Compare with similar items" in which I need to extract prices (I have already done that) and also the inventory quantity ..
The second part is not directly obtained .. Manually I have to cick "Add to Cart" then from the next page click "Cart" then from the next page select "Quantity drop down and select 10+ and manually type any large number say 999 and click "Update"
There will be alert message that contains the remaining in inventory like that
(This seller has only 35 of these available. To see if more are available from another seller,) >> so this is the desired number which is 35
Here's the excel file and snapshots that illustrates the manual steps ..
I used IE but if it is possible to use XMLHTTP it would be great of course
Here's the code I devised till now
Sub Test()
Dim ws As Worksheet
Dim ie As Object
Dim allLnks As Object
Dim lnk As Object
Dim r As Long
Dim liElem As Object
Dim prElem As Object
Dim crtElem As Object
Dim elem As Object
Dim cnt As Integer
Dim inputElem As Object
Dim inputEle As Object
Set ws = ThisWorkbook.Worksheets("Sheet2")
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.navigate ("https://www.amazon.com/Stratford-Pharmaceuticals-Omega-Fatty-Strength/dp/B006JCU54Y/ref=sr_1_2?s=pet-supplies&ie=UTF8&qid=1518816130&sr=1-2&keywords=stratford")
Do: DoEvents: Loop Until .readystate = 4
ws.Range("B2").Value = Format(Now(), "dd/mm/yyyy - hh:mm:ss")
Set liElem = .document.getelementbyid("detail-bullets").getelementsbytagname("table")(0).getelementsbytagname("ul")(0)
For Each elem In liElem.getelementsbytagname("li")
If InStr(elem.innerText, "ASIN") > 0 Then ws.Range("B1").Value = Replace(elem.innerText, "ASIN: ", "")
If InStr(elem.innerText, "Rank:") > 0 Then ws.Range("B3").Value = MyUDF(elem.innerText, "Rank: ", "(")
If InStr(elem.innerText, "Review:") > 0 Then ws.Range("B4").Value = Replace(Split(Trim(Split(elem.innerText, "Review: ")(1)), vbLf)(1), Chr(13), "")
Next elem
Set prElem = .document.getelementbyid("comparison_price_row")
For Each elem In prElem.getelementsbytagname("td")
cnt = cnt + 1
ws.Range("A" & cnt + 4).Value = "Seller " & cnt
ws.Range("B" & cnt + 4).Value = elem.getElementsByClassName("a-offscreen")(0).innerText
Next elem
cnt = 0
Set crtElem = .document.getelementbyid("HLCXComparisonTable").getElementsByClassName("a-button-inner")
For Each elem In crtElem
.navigate elem.getelementsbytagname("a")(0).href
Do: DoEvents: Loop Until .readystate = 4
.navigate .document.getElementsByClassName("a-button-inner")(0).getelementsbytagname("a")(0).href
Do: DoEvents: Loop Until .readystate = 4
cnt = cnt + 1
ws.Range("C" & cnt + 4).Value = Replace(Split(Split(MyUDF(.document.getElementsByClassName("a-row a-spacing-base sc-action-quantity sc-action-quantity-right")(0).innerHTML, "maxlength=", "quantity="), "autocomplete")(0), "=")(1), """", "")
Next elem
Stop
'.Quit
End With
End Sub
Function MyUDF(s As String, b As String, a As String) As String
Dim arr() As String
Dim r As String
arr = Split(s, b)
If UBound(arr) > 0 Then
r = arr(1)
arr = Split(r, a)
If UBound(arr) > 0 Then
r = arr(0)
End If
End If
MyUDF = Trim(r)
End Function
Here are snapshots that may help
]4
CSS Selector to get stock info
Taking the following example from your code:
You can use a CSS selector to target the text regarding stock levels.
.sc-product-availability
CSS query example using cart view page (generated by your code):
E.g. CSS query for associated cart view html
The . is the selector for ClassName.
VBA
You can use the .document.querySelectorAll method to retrieve a nodeList of the matching items (2 in the example)
Dim nodeList As Object
Set nodeList = .document.querySelectorAll(".sc-product-availability")
You would then loop over its length to retrieve items (not tested, but this is general method).
Dim i As Long
For i = 0 to nodeList.Length - 1
Debug.Print nodeList.Item(i).innerText
Next i
Hopefully that is useful to you.
Give it a try. It should fetch you the number you are after. I used xmlhttp and Selenium combinedly to make the script run a little faster. I could not use xmlhttp request in my second approach as the link were javascript encrypted.
Upon running the below script you can find out how many of these items the seller has. Even if the seller has no such items, the script will not break as I've already managed that.
There it is:
Sub GetInfo()
Const base As String = "https://www.amazon.com"
Const mainurl As String = "https://www.amazon.com/Stratford-Pharmaceuticals-Omega-Fatty-Strength/dp/B006JCU54Y/ref=sr_1_2?s=pet-supplies&ie=UTF8&qid=1518816130&sr=1-2&keywords=stratford"
Dim Http As New XMLHTTP60, Htmldoc As New HTMLDocument, itext As Object
Dim driver As New ChromeDriver, idic As New Scripting.Dictionary
Dim post As Object, oinput As Object, posts As Object, elem As Object
Dim idrop As Object, oclick As Object, I&, key As Variant
With Http
.Open "GET", mainurl, False
.send
Htmldoc.body.innerHTML = .responseText
End With
With Htmldoc.querySelectorAll("[id^='comparison_add_to_cart_'].a-button-text")
For I = 0 To .Length - 1
idic(base & Replace(.item(I).getAttribute("href"), "about:", "")) = 1
Next I
End With
For Each key In idic.keys
driver.get key
Set post = driver.FindElementByCss("input[value='addToCart']", Raise:=False, timeout:=10000)
If Not post Is Nothing Then
post.Click
End If
Set posts = driver.FindElementById("hlb-view-cart-announce", timeout:=10000)
posts.Click
Set elem = driver.FindElementByCss("span#a-autoid-0-announce", timeout:=10000)
elem.Click
Set idrop = driver.FindElementById("dropdown1_9", timeout:=10000)
idrop.Click
Set oinput = driver.FindElementByCss("input[name='quantityBox']", timeout:=10000)
oinput.SendKeys "100"
Set oclick = driver.FindElementByCss("#a-autoid-1", timeout:=10000)
oclick.Click
Set itext = driver.FindElementByCss(".sc-quantity-update-message span.a-size-base", Raise:=False, timeout:=5000)
If Not itext Is Nothing Then
R = R + 1: Cells(R, 1) = itext.Text
Else
R = R + 1: Cells(R, 1) = "Sorry dear nothing found"
End If
Next key
End Sub
Reference to add:
Selenium Type Library
Microsoft HTML Object Library
Microsoft XML, v6.0
Microsoft Scripting Runtime
Output you may get like below. Now, you can use regex to parse the number 48:
This seller has only 48 of these available. To see if more are available from another seller, go to the product detail page.
Related
Does anyone know a method to sort Visio pages alphabetically using VBA?
I looked to see if a method such as vzdVisioDocument.Pages.Sort exists, but found nothing in documentation or through internet searches.
Do I need to write my own sorting function using the Application.ActiveDocument.Pages.ItemU("Page Name").Index property? That seems to be the method suggested by recording a macro of the action.
So that wasn't as painful as expected. With vzdVisioDocument as an already defined Visio.Document:
' Make a collection of titles to iterate through
Dim colPageTitles As Collection
Set colPageTitles = New Collection
Dim intPageCounter As Integer
For intPageCounter = 1 To vzdVisioDocument.Pages.Count
colPageTitles.Add vzdVisioDocument.Pages.Item(intPageCounter).Name
Next intPageCounter
' For each title in the collection, iterate through pages and find the appropriate new index
Dim intPageIndex As Integer
Dim varPageTitle As Variant
For Each varPageTitle In colPageTitles
For intPageIndex = 1 To vzdVisioDocument.Pages.Count
' Check to see if the title comes before the index's current page title
If StrComp(varPageTitle, vzdVisioDocument.Pages.Item(intPageIndex).Name) < 0 Then
' If so, set the new page index
vzdVisioDocument.Pages.ItemU(varPageTitle).Index = intPageIndex
Exit For
End If
Next intPageIndex
Next varPageTitle
' Clean up
Set colPageTitles = Nothing
I mentioned this in another comment, but when I made some test pages, it was always shuffling the pages around when I ran it because I the way that this is implemented, I don't believe that Exit For should be in there.
I also swapped the comparison to StrCompare due to personal preference along with the order of the for loops.
Sub PageSort()
Dim titlesColl As Collection
Set titlesColl = New Collection
Dim i As Long
For i = 1 To ActiveDocument.Pages.Count
titlesColl.Add ActiveDocument.Pages.Item(i).Name
Next i
Dim title As Variant
For i = 1 To ActiveDocument.Pages.Count
For Each title In titlesColl
If StrComp(ActiveDocument.Pages.Item(i).Name, title, vbTextCompare) < 0 Then
ActiveDocument.Pages.Item(title).index = i
End If
Next title
Next i
Set titlesColl = Nothing
End Sub
Private Sub reorderPages()
Dim PageNameU() As String
Dim isBackgroundPage As Boolean
Dim vsoPage As Visio.Page
Dim vsoCellObj As Visio.Cell
'// Get All Pages
Dim i As Integer
For Each vsoPage In ActiveDocument.Pages
i = i + 1
ReDim Preserve PageNameU(i)
PageNameU(i) = vsoPage.NameU
Next vsoPage
For i = 1 To UBound(PageNameU)
Set vsoPage = vsoPages.ItemU(PageNameU(i))
Set vsoCellObj = vsoPage.PageSheet.Cells("UIVisibility")
isBackgroundPage = vsoPage.Background
'// Make foreground page to set page index
If isBackgroundPage = True Then
vsoCellObj.FormulaU = visUIVNormal
vsoPage.Background = False
End If
vsoPage.Index = NumNonAppSysPages + i
'// Set to background page
If isBackgroundPage = True Then
vsoCellObj.FormulaU = visUIVHidden
vsoPage.Background = True
End If
Next i
End Sub
The way my code is currently set up, it gets the data from the URL that i've indicated in the code. However, I actually want to provide a list of URLs in Sheet2 that it would loop through until it's extracted all data. I dont want to have to update the code each time individually per URL. There are thousands... How would i be able to do that?
Here is the code:
Public Sub exceljson()
Dim https As Object, Json As Object, i As Integer
Dim Item As Variant
Set https = CreateObject("MSXML2.XMLHTTP")
https.Open "GET", "https://min-api.cryptocompare.com/data/price?fsym=USD&tsyms=BTC", False
https.Send
Set Json = JsonConverter.ParseJson(https.responseText)
i = 2
For Each Item In Json.Items
Sheets(1).Cells(i, 2).Value = Item
i = i + 1
Next
MsgBox ("complete")
End Sub
I'll just pretend that all of the URLS are in Column A here:
Public Sub exceljson()
Dim https As Object, Json As Object, i As Integer, j As Integer
Dim Item As Variant
Set https = CreateObject("MSXML2.XMLHTTP")
For j = 1 to Sheets(2).UsedRange.Rows.count
If Len(Trim$(Sheets(2).Cells(j, 1).Value2)) > 0 Then
https.Open "GET", Trim$(Sheets(2).Cells(j, 1).Value2), False
https.Send
Set Json = JsonConverter.ParseJson(https.responseText)
i = 2
For Each Item In Json.Items
Sheets(1).Cells(i, 2).Value = Item
i = i + 1
Next Item
End If
Next j
MsgBox ("complete")
End Sub
I like to use the trim() method to be safe that I'm not catching anything extra
I want to use VBA to open up a webpage for me (this webpage is made up of HTML with cells of data), find some keywords, and email out the keywords and a certain number of rows of data above and below the keywords. To do this though, I need to be able to find the location of the keywords (eg. row 3, column 2, or line 4 characters 4-10, etc.). Are there any commands in the Internet Explorer Library that will allow me to do this? So far I have code for one keyword only, that will go to the keyword and select/highlight it. Now I need to find out how to grab a certain amount of rows above/below it and send it out.
Also a side question: If you know a good way to modify my current code to create a nested loop that scans through the whole webpage, and for multiple keywords that would be very helpful!
Sub subFindScrollIE()
Dim boolFound As Boolean
Dim ie As InternetExplorer
Set ie = New InternetExplorer
ie.Navigate "my URL HERE"
strTemp = "KEYWORD1"
Do Until ie.ReadyState = READYSTATE_COMPLETE
'DoEvents
Loop
ie.Visible = True
Set txt = ie.Document.body.createTextRange()
boolFound = txt.findText(strTemp)
txt.moveStart "character", -1
txt.findText strTemp
txt.Select
txt.ScrollIntoView
Set ie = Nothing
End Sub
You could continue with the approach you have started using if you use Regular Expressions to locate the text you are after and the surrounding text.
Personally, I would favor using html objects to find what you're after. Here is some example code to iterate through generic tables:
Sub subFindScrollIE()
Dim strTemp() As Variant, output() As String, txt As String
Dim tr As HTMLTableRow, r As Integer, i As Integer
Dim tRows As IHTMLElementCollection
Dim xlR As Byte, c As Byte
Dim ie As InternetExplorerMedium
Set ie = New InternetExplorerMedium
ie.Visible = True
ie.Navigate "E:\Dev\table.htm"
strTemp = Array("abc", "mno", "vwx", "efg")
Do Until (ie.ReadyState = 4 Or ie.ReadyState = 3): Loop
Set tRows = ie.Document.body.getElementsByTagName("tr")
xlR = 2
' loop through rows
For r = 0 To tRows.Length - 1
Set tr = tRows(r)
' loop through search text
For i = 0 To UBound(strTemp)
' search row for string
txt = LCase(tr.innerHTML)
If (InStr(txt, LCase(strTemp(i))) > 0) Then
' search string found. split table data into array
txt = tr.innerHTML
txt = Replace(txt, "</td><td>", "~")
txt = Replace(txt, "<td>", "")
txt = Replace(txt, "</td>", "")
output = Split(txt, "~")
' populate cells from array
For c = 0 To UBound(output)
Sheet1.Cells(xlR, c + 2) = output(c)
Next c
xlR = xlR + 2
End If
Next i
Next r
ie.Quit
Set ie = Nothing
End Sub
I scrape some websites with vba for fun and I use VBA as tool. I use XMLHTTP and HTMLDocument (cause it's more faster than internetExplorer.Application).
Public Sub XMLhtmlDocumentHTMLSourceScraper()
Dim XMLHTTPReq As Object
Dim htmlDoc As HTMLDocument
Dim postURL As String
postURL = "http://foodffs.tumblr.com/archive/2015/11"
Set XMLHTTPReq = New MSXML2.XMLHTTP
With XMLHTTPReq
.Open "GET", postURL, False
.Send
End With
Set htmlDoc = New HTMLDocument
With htmlDoc
.body.innerHTML = XMLHTTPReq.responseText
End With
i = 0
Set varTemp = htmlDoc.getElementsByClassName("post_glass post_micro_glass")
For Each vr In varTemp
''''the next line is important to solve this issue *1
Cells(1, 1) = vr.outerHTML
Set varTemp2 = vr.getElementsByTagName("SPAN class=post_date")
Cells(i + 1, 3) = varTemp2.Item(0).innerText
''''the next line occur 438Error''''
Set varTemp2 = vr.getElementsByClassName("hover_inner")
Cells(i + 1, 4) = varTemp2.innerText
i = i + 1
Next vr
End Sub
I figure out this problem by *1
cells(1,1) shows me the next things
<DIV class="post_glass post_micro_glass" title=""><A class=hover title="" href="http://foodffs.tumblr.com/post/134291668251/sugar-free-low-carb-coffee-ricotta-mousse-really" target=_blank>
<DIV class=hover_inner><SPAN class=post_date>...............
Yeah all the class tag lost " ". only the first function's class has " "
I really don't know why this situation occur.
//Well I could pharse by getElementsByTagName("span"). but I prefer "class" Tag.....
The getElementsByClassName method is not considered a method of itself; only of the parent HTMLDocument. If you want to use it to locate elements within a DIV element, you need to create a sub-HTMLDocument comprised of the .outerHtml of that specific DIV element.
Public Sub XMLhtmlDocumentHTMLSourceScraper()
Dim xmlHTTPReq As New MSXML2.XMLHTTP
Dim htmlDOC As New HTMLDocument, divSUBDOC As New HTMLDocument
Dim iDIV As Long, iSPN As Long, iEL As Long
Dim postURL As String, nr As Long, i As Long
postURL = "http://foodffs.tumblr.com/archive/2015/11"
With xmlHTTPReq
.Open "GET", postURL, False
.Send
End With
'Set htmlDOC = New HTMLDocument
With htmlDOC
.body.innerHTML = xmlHTTPReq.responseText
End With
i = 0
With htmlDOC
For iDIV = 0 To .getElementsByClassName("post_glass post_micro_glass").Length - 1
nr = Sheet1.Cells(Rows.Count, 3).End(xlUp).Offset(1, 0).Row
With .getElementsByClassName("post_glass post_micro_glass")(iDIV)
'method 1 - run through multiples in a collection
For iSPN = 0 To .getElementsByTagName("span").Length - 1
With .getElementsByTagName("span")(iSPN)
Select Case LCase(.className)
Case "post_date"
Cells(nr, 3) = .innerText
Case "post_notes"
Cells(nr, 4) = .innerText
Case Else
'do nothing
End Select
End With
Next iSPN
'method 2 - create a sub-HTML doc to facilitate getting els by classname
divSUBDOC.body.innerHTML = .outerHTML 'only the HTML from this DIV
With divSUBDOC
If CBool(.getElementsByClassName("hover_inner").Length) Then 'there is at least 1
'use the first
Cells(nr, 5) = .getElementsByClassName("hover_inner")(0).innerText
End If
End With
End With
Next iDIV
End With
End Sub
While other .getElementsByXXXX can readily retrieve collections within another element, the getElementsByClassName method needs to consider what it believes to be the HTMLDocument as a whole, even if you have fooled it into thinking that.
Here's an alternative approach. It's very similar to the original code but uses querySelectorAll to select the relevant span elements. One important point for this method is that vr has to be declared as being a specific element type and not as an IHTMLElement or generic Object:
Option Explicit
Public Sub XMLhtmlDocumentHTMLSourceScraper()
' Changed from generic Object to specific type - not
' strictly necessary to do this
Dim XMLHTTPReq As MSXML2.XMLHTTP60
Dim htmlDoc As HTMLDocument
' These declarations weren't included in the original code
Dim i As Integer
Dim varTemp As Object
' IMPORTANT: vr must be declared as a specific element type and not
' as an IHTMLElement or generic Object
Dim vr As HTMLDivElement
Dim varTemp2 As Object
Dim postURL As String
postURL = "http://foodffs.tumblr.com/archive/2015/11"
' Changed from XMLHTTP to XMLHTTP60 as XMLHTTP is equivalent
' to the older XMLHTTP30
Set XMLHTTPReq = New MSXML2.XMLHTTP60
With XMLHTTPReq
.Open "GET", postURL, False
.Send
End With
Set htmlDoc = New HTMLDocument
With htmlDoc
.body.innerHTML = XMLHTTPReq.responseText
End With
i = 0
Set varTemp = htmlDoc.getElementsByClassName("post_glass post_micro_glass")
For Each vr In varTemp
''''the next line is important to solve this issue *1
Cells(1, 1) = vr.outerHTML
Set varTemp2 = vr.querySelectorAll("span.post_date")
Cells(i + 1, 3) = varTemp2.Item(0).innerText
Set varTemp2 = vr.getElementsByClassName("hover_inner")
' incorporating correction from Jeeped's comment (#56349646)
Cells(i + 1, 4) = varTemp2.Item(0).innerText
i = i + 1
Next vr
End Sub
Notes:
XMLHTTP equivalent to XMLHTTP30 as described here
apparent need to declare a specific element type explored in this question but, unlike getElementsByClassName, querySelectorAll doesn't exist in any version of IHTMLElement
I am trying to get data from internet to Excel spreadsheet. Namely, I want the names of the AP Top 25 NCAAF teams from past 12 years, for all 17 weeks of the season (including final rankings). My code is based off this tutorial: https://www.youtube.com/watch?v=7sZRcaaAVbg
I'm getting the error at the line:
SchoolNames(iYear, iWeek, iRank) = Trim(Doc.getElementsByTagName("school")(iRank - 1).innerText)
Here's my code:
Option Explicit
Sub Data_Collection()
Application.ScreenUpdating = False
Dim iYear As Integer
Dim iWeek As Integer
Dim IE As New InternetExplorer
Dim sYear As String
Dim sWeek As String
Dim SchoolNames(12, 17, 25) As String
Dim Doc As HTMLDocument
Dim iRank As Integer
For iYear = 2002 To 2013
sYear = CStr(iYear)
For iWeek = 1 To 17
If iWeek = 17 Then
sWeek = "1/seasontype/3"
Else
sWeek = CStr(iWeek)
End If
IE.Visible = False
IE.navigate "http://espn.go.com/college-football/rankings/_/poll/1/year/" & sYear & "/week/" & sWeek
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.Document
For iRank = 1 To 25
SchoolNames(iYear, iWeek, iRank) = Trim(Doc.getElementsByTagName("school")(iRank - 1).innerText)
Next iRank
Next iWeek
Next iYear
End Sub
Additionally, I'm not sure even after I fix the Runtime 91 error that I am passing the right stuff into the .getElementsByTagName(). Any suggestions?
You are close but need to make a couple small modifications.
SchoolNames(iYear,... iYear variable is a four digit year but you are setting an array so it needs to be the location in the array, not the year)
The element in the webpage that has the school information is labeled by classname not TagName. Change to Doc.getElementsByClassName
Make sure you quite IE so it's not running in the background IE.Quit
Example code for returning Name of school that ranked #1 in 2002 week 1:
Sub getSchoolName()
Dim IE As New InternetExplorer
IE.Visible = False
IE.navigate "http://espn.go.com/college-football/rankings/_/poll/1/year/2002/week/1"
Do
DoEvents
Loop Until IE.readyState = READYSTATE_COMPLETE
Set Doc = IE.Document
Dim schoolName As String
schoolName = Trim(Doc.getElementsByClassName("school")(0).innerText)
IE.Quit
MsgBox (schoolName)
End Sub
Results look like this:
Make sure you include references to in your project.
Here is one way to reset the year as an array position:
sYearCntr = iYear - 2002
SchoolNames(sYearCntr, iWeek, iRank) = ....