I am new to web-scraping. I am trying to get data from O-net online. To be specific, I would like to draw the median wages and employment stats for a few dozens selected jobs. For example:
https://www.onetonline.org/link/summary/13-2041.00
O-net content
Upon inspection, the html looks like this
Below is my code though it does not work.
Sub scrape()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Navigate "https://www.onetonline.org/link/summary/13-2041.00"
.Visible = False
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementByID("wrapper_WagesEmployment").getelementbytagname("table")(0)
Dim myValue As String: myValue = allRowOfData.Cells(3).innerHTML
appIE.Quit
Set appIE = Nothing
Range("A1").Value = myValue
End Sub
Please insert Option Explicit at the top of the module to help you identify which variable are not declared properly.
You spelled getelementbytagname method wrong, it's getElementsByTagName.
Try the below code:
Private Sub scrape()
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "https://www.onetonline.org/link/summary/13-2041.00"
.Visible = False
Do While .Busy Or .readyState <> 4
DoEvents
Loop
End With
Dim ieDoc As Object
Set ieDoc = appIE.Document
Dim allRowOfData As Object
Set allRowOfData = ieDoc.getElementById("wrapper_WagesEmployment").getElementsByTagName("table")(0)
Dim medianWageValue As String
medianWageValue = allRowOfData.getElementsByClassName("report2")(0).innerText
Range("A1").Value = medianWageValue 'Fully qualify your range referece e.g. Sheet1.Range("A1").Value
Set ieDoc = Nothing
appIE.Quit
Set appIE = Nothing
End Sub
Alternative - You can do the same using XMLHTTP that doesn't require you to open IE and most of the time, faster (You will need to add Microsoft HTML Object Library in your reference):
Private Sub ScrapeByXMLHTTP()
Dim xmlhttp As Object
Set xmlhttp = CreateObject("MSXML2.XMLHTTP")
With xmlhttp
.Open "GET", "https://www.onetonline.org/link/summary/13-2041.00"
.send
Dim htmlDoc As MSHTML.HTMLDocument
Set htmlDoc = New MSHTML.HTMLDocument
htmlDoc.body.innerHTML = .responseText
End With
Set xmlhttp = Nothing
htmlDoc.body.innerHTML = htmlDoc.getElementById("wrapper_WagesEmployment").getElementsByTagName("table")(0).outerHTML
Dim medianWageValue As String
medianWageValue = htmlDoc.getElementsByClassName("report2")(0).innerText
Set htmlDoc = nothing
Range("A1").Value = medianWageValue 'Fully qualify your range referece e.g. Sheet1.Range("A1").Value
End Sub
Related
When I launch the site via my code, there is an error of type "method document of object iwebbrowser2 failed " at the level of my variable "oDoc"
Private Function CreerNavigateur(ByVal mails As String)
Dim IE As Object
Dim oDoc As Object
Dim Htable, maTable As Object
Dim text As String
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = False
IE.navigate "https://csrtool-ssl.sso.infra.ftgroup/csrtool_web/Bricks/pg/osuit/pages/identity/IdentityAccountAndUsers?type=emailAlias&value=" & mails & "&tab_main=AccountInfo"
While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set oDoc = IE.Document
Set Htable = oDoc.getElementsByTagName("div")(1)
' MsgBox Htable.innerhtml
Set maTable = Htable.getElementsByTagName("span")
'MsgBox maTable(0).href
'myData = maTable(0).innertext
'MsgBox (myData)
IE.Quit
'On libère les variables
Set IE = Nothing
Set oDoc = Nothing
End Function
thank you for helping me to see my mistake
formalizing my answer try maximized ie window and updated code will be
Private Function CreerNavigateur(ByVal mails As String)
Dim ie As SHDocVw.InternetExplorer
Dim oDoc As Object
Dim Htable, maTable As Object
Dim text As String
Set ie = New SHDocVw.InternetExplorer
ie.Visible = False
ie.navigate "https://csrtool-ssl.sso.infra.ftgroup/csrtool_web/Bricks/pg/osuit/pages/identity/IdentityAccountAndUsers?type=emailAlias&value=" & mails & "&tab_main=AccountInfo"
While ie.Busy Or ie.ReadyState <> READYSTATE_COMPLETE: DoEvents: Wend
Set oDoc = ie.Document
Set Htable = oDoc.getElementsByTagName("div")(1)
'MsgBox Htable.innerhtml
Set maTable = Htable.getElementsByTagName("span")
'MsgBox maTable(0).href
'myData = maTable(0).innertext
'MsgBox (myData)
ie.Quit
'On libère les variables
Set ie = Nothing
Set oDoc = Nothing
End Function
I have read a number of posts related to this topic, tried a number of different methods and still can't get it to work.
The macro is working for the user name and password but login not working..
Dim HTMLDoc As HTMLDocument
Dim MyBrowser As InternetExplorer
Sub RepsolLogin()
Dim MyHTML_Element As IHTMLElement
Dim MyURL As String
Dim form As HTMLFormElement
MyURL = "https://login.repsol.com/es/Landing/AuthnPage?returnUrl=https://www.repsol.com/es_es/"
Set MyBrowser = New InternetExplorer
MyBrowser.Silent = True
MyBrowser.navigate MyURL
MyBrowser.Visible = True
Do
DoEvents
Loop Until MyBrowser.ReadyState = READYSTATE_COMPLETE
Set HTMLDoc = MyBrowser.document
Do While HTMLDoc.getElementById("gigya-login-form") Is Nothing
DoEvents
Loop
Set form = HTMLDoc.getElementById("gigya-login-form")
form.all.UserName.Value = "xxx#xxx.com" 'Enter your email id here
form.all.Password.Value = "password" 'Enter your password here
form.submit
End Sub
Can anyone have a look an tell me how I must have the macro?
I am trying to scrape data from: http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017
IN connection with this I have two questions.
Michał Perłakowski has giving an excellent guide how to scrape but the code he is using is getElementById(Scraping data from website using vba). Since the webpage I want to scrape from does not use an ID. I am wondering what alternatives available. My guess would be getElementsByClassName.
My next question is how to make the macro change page (I have more than 100) can I just write "next"?
Dim appIE As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
.Visible = True
End With
Do While appIE.Busy
DoEvents
Loop
Set allRowOfData = appIE.document.getElementsByName("Bygget")
Dim myValue As String: myValue = allRowOfData.Cells(7).innerHTML
appIE.Quit
Set appIE = Nothing
Range("A1").Value = myValue
Try this:
Option Explicit
Sub scrape()
Dim appIE As Object
Dim ihtml As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Visible = True
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set ihtml = .document
Dim allRowOfData As Object
Set allRowOfData = appIE.document.getElementById("searchresult")
Dim r As Long, c As Long
Dim curHTMLRow As Object
For r = 1 To allRowOfData.Rows.Length - 1
Set curHTMLRow = allRowOfData.Rows(r)
For c = 0 To curHTMLRow.Cells.Length - 1 'comment out
Cells(r + 1, c + 1) = curHTMLRow.Cells(c).innerText ' Cells(r + 1, c + 1) = curHTMLRow.Cells(7).innerText
Next c 'comment out
Next r
.Quit
End With
Set appIE = Nothing
End Sub
Just the column of interest rather than whole table:
Option Explicit
Sub scrape()
Dim appIE As Object
Dim ihtml As Object
Set appIE = CreateObject("internetexplorer.application")
With appIE
.Visible = True
.navigate "http://www.boliga.dk/salg/resultater?so=1&sort=omregnings_dato-d&maxsaledate=today&iPostnr=&gade=&type=Villa&minsaledate=2017"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set ihtml = .document
Dim allRowOfData As Object
Set allRowOfData = appIE.document.getElementById("searchresult")
Dim r As Long, c As Long
Dim curHTMLRow As Object
For r = 1 To allRowOfData.Rows.Length - 1
Set curHTMLRow = allRowOfData.Rows(r)
Cells(r + 1, c + 1) = curHTMLRow.Cells(7).innerText
Next r
.Quit
End With
Set appIE = Nothing
End Sub
Reference:
https://www.experts-exchange.com/questions/28571716/Excel-VBA-WEb-Data-Scraping-from-a-Table.html
On line:
Set ieElement = objIE.Document.getElementsByClassName("sg-btn sg-btn-primary btn-signin")
ieElement.Click of the below code throws an error called "Object does not support this property or method".
Sub login()
Dim objIE As InternetExplorer
Dim uid As String
Dim pwd As String
Dim rng As Range
Dim sh As Worksheet
Dim ieElement As Object
Set sh = Sheets("Indeed Resume Download")
Set rng = sh.Range("A2")
On Error Resume Next
objIE.Quit
Set objIE = Nothing
On Error GoTo 0
uid = rng.Value
pwd = rng.Offset(0, 1).Value
Dim j As Long
Set objIE = New InternetExplorer 'Initialize internet object
objIE.Navigate "https://secure.indeed.com/account/loginservice=my&hl=en_IN&co=IN&continue=https%3A%2F%2Fwww.indeed.co.in%2F"
objIE.Visible = True
objIE.Document.all.signin_email.Value = UserID
objIE.Document.all.signin_password.Value = Password
Set ieElement = objIE.Document.getElementsByClassName("sg-btn sg-btn-primary btn-signin")
ieElement.Click
For j = 2 To sh.Cells(Rows.Count, 27).End(xlUp).Row
Set objIE = New InternetExplorer
With objIE
.Navigate sh.Range("CA" & j).Value
Do While .Busy = True
DoEvents
Loop
Do While .Busy: Loop
Do While .ReadyState <> READYSTATE_COMPLETE: Loop
Do While .Busy: Loop
End With
Do While objIE.Busy = True
DoEvents
Loop
Next j
Set objIE = Nothing
End Sub
When you use getElementsByClassName, you have to determine which class name you are wanting, as there can be many. If your class name is unique (which in this case it appears it could be), you can simply add a (0) to the end. If you want to search through all the class names you could use a For Each...Next statement.
Notice the different between these:
getElementByID() and getElementsByClassName()? Elements is plural when used with ClassName, so you need to designate which element of that class you want.
Try replacing your line with this:
Set ieElement = objIE.Document.getElementsByClassName("sg-btn sg-btn-primary btn-signin")(0)
The method you are currently using would work along these lines:
Dim ieElements, ieElemBtn, ieElement
Set ieElements = objIE.Document.getElementsByClassName("sg-btn sg-btn-primary btn-signin")
For Each ieElement in ieElements
If ieElement ..... Then
Set ieElemBtn = ieElement
Exit For
End If
Next ieElement
ieElemBtn.Click
I want to know why in some cases the HTML in the response from MSXML2.XMLHTTP object does not produce the same results as automating Internet Explorer and inspecting the document property value.
For example, the procedure below compares the results found by using the object MSXML2.XMLHTTP (column A) with the results found by using the InternetExplorer object (column B).
The results of the InternetExplorer object include the NASDAQ index as expected but the results of the MSXML2.XMLHTTP object do not include the NASDAQ index and are completely different:
Sub ExtractDataFromInternet()
'Enable Tools/references 1. Microsoft Internet Control and 2. Microsoft HTML Object Library.
Dim URL
Dim objHTML As HTMLDocument
Dim Oelement As Object
Dim ie As New InternetExplorer
Dim J, Field1, Field2
Set objHTML = New HTMLDocument
URL = "http://www.nasdaq.com/"
'-------- METHOD1: MSXML2.XMLHTTP --------------------------
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
objHTML.body.innerHTML = .responseText
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 1) = Field1(J).innerText
J = J + 1
Next Oelement
'----------METHOD2: InternetExplorer Object----------------
Set ie = New InternetExplorer
With ie
.navigate URL
.Visible = False
While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Wend
Set objHTML = .document
DoEvents
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 2) = Field1(J).innerText
J = J + 1
Next Oelement
DoEvents
ie.Quit
DoEvents
Set ie = Nothing
'----------------------------------------------
End Sub