How can I import data from an ambiguous URL? - vba

This is almost working!! But not quite!
If link.innerHTML Like "*Upload Questionnaire*" Then
link.Click
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Navigate strURL
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Set XMLHTTP = CreateObject("MSXML2.XMLHTTP")
XMLHTTP.Open "GET", strSQL, False
XMLHTTP.send
Set html = CreateObject("htmlfile")
html.body.innerHTML = XMLHTTP.responseText
Set tbl = html.getElementsByTagName("Table")
Set tr_coll = tbl(0).getElementsByTagName("TR")
For Each tr In tr_coll
j = 1
Set td_col = tr.getElementsByTagName("TD")
For Each td In td_col
Cells(row + 1, j).Value = td.innerText
j = j + 1
Next
row = row + 1
Next
End If
For one thing, the code doesn't pause and wait for the browser to finish loading.
Do While objIE.ReadyState <> 4 And objIE.Busy
DoEvents
Loop
Also, I'm feeding in a Parent URL, like this:
strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
The line below doesn't work.
XMLHTTP.Open "GET", strSQL, False
Somehow I need to pass in the Child URL that opens from the Parent URL. This is the parent URL: strSQL = "https://blah_blah_blah_CampaignID=" & cell.Value
When: link.Click
runs then the Child URL opens, but I don't know how to reference the Child URL. How can I do that?!
Thanks in advance!

Assuming that the link is contained within the class, rather than looping through all a elements within the document itself, use getElementsByClassName() to grab ever element in your target class, and then loop through those.
Something like:
For each classElement in IE.document.getElementsByClassName("thClass")
For each linkElement in classElement.getElementsByTagName("a")
'check for innerHTML, etc.
Next
Next
If the class is within the link, you can just flip the loops.
This should work, for example:
Public Sub LookADemo()
Dim ie As InternetExplorer, doc As HTMLDocument
Dim thisClass As IHTMLElement2, thisLink As IHTMLElement
Set ie = New InternetExplorer
ie.navigate "https://www.google.com"
Do
DoEvents
Loop Until (ie.ReadyState >= READYSTATE_INTERACTIVE) And (ie.Busy = False)
Set doc = ie.Document
For Each thisClass In doc.getElementsByClassName("hp")
For Each thisLink In thisClass.getElementsByTagName("a")
Debug.Print thisLink.InnerText
Next
Next
ie.Quit
End Sub

Related

Get final URL with Excel VBA

I've been strugling with this issue, and trying to find a solution here in StackOverflow, but nothing helped.
I have thousands of links of images (Column A), that will get you to the final JPG url. It's not a redirect link because I've tried with different websites and it doesn't detect it.
Here is an example:
https://www.pepperl-fuchs.com/global/en/doci.htm?docilang=ENG&view=showproductpicbypartno&partno=000046
It will get you here:
https://files.pepperl-fuchs.com/webcat/navi/productInfo/pd/d428540a.jpg
So I would like to extrapolate all the final links in Column B.
I found some code that opens IE for each link, but it probably misses the function to copy the URL and paste it in the cell:
Sub Test()
Dim IE As Object
Dim URL As Range
Dim objDocument As Object
Dim x As Integer
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
For Each URL In Range("A2:A16")
.Navigate URL.Value
While .busy Or .ReadyState <> 4: DoEvents: Wend
If LCase(TypeName(objDocument)) = "htmldocument" Then
Cells(A, 1).Value = objDocument.URL
Cells(A, 2).Value = objDocument.Title
x = x + 1
End If
Next
End With
End Sub
Can you guys help me figure out what is missing? Unfortunately I'm not really familiar with VBA.
Thank you very much
Try this
Sub Test()
Dim IE As Object
Dim URL As Range
Dim objDocument As Object
Dim x As Integer
Set IE = CreateObject("InternetExplorer.Application")
With IE
.Visible = True
For Each URL In Range("A2:A16")
.Navigate URL.Value
While .busy Or .ReadyState <> 4: DoEvents: Wend
URL.Offset(, 1).Value = .LocationURL
Next
End With
End Sub
Try this code
Sub Test()
Dim html As HTMLDocument
Dim ie As Object
Dim objDocument As Object
Dim url As Range
Dim x As Integer
Set ie = CreateObject("InternetExplorer.Application")
x = 1
With ie
.Visible = True
For Each url In Range("A2:A3")
.navigate url.Value
While .Busy Or .readyState <> 4: DoEvents: Wend
Set html = .document
x = x + 1
Cells(x, 2).Value = html.url
Cells(x, 3).Value = html.Title
Next url
End With
End Sub
Need more to test with but this will be a lot faster and if you can easily adapt to using an array to loop faster than looping sheet by using Dim arr(): arr = .Range("A1:A" & .Cells(.Rows.Count, "A").End(xlUp).Row).Value and looping the first dimension.
Option Explicit
Public Sub GetInfo()
Dim rng As Range
With Worksheets("Sheet1")
For Each rng In .Range("A1:A" & .Cells(.Rows.Count, "A").End(xlUp).Row)
If InStr(rng.Value, "http") > 0 Then Debug.Print GetURL(rng.Value)
Next
End With
End Sub
Public Function GetURL(ByVal url As String) As String
Dim sResponse As String, s As Long, e As Long
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", url, False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
s = InStr(1, sResponse, "https")
e = InStr(1, sResponse, ".jpg") + 4
GetURL = Mid(sResponse, s, e - s)
End Function
This does assume that all your links follow the same pattern as the first.

Object doesn't support this property or method while getting the first result of bing search

I am trying to get the link first result of bing search:
Sample url: https://www.bing.com/search?q=Lancaster University Statistics and Operational Research with Industrial Applications MRes
but I am getting run time error 438 on this line:
Set objLink = objResultDiv.getelementsbytagname("a")(0) 'Error here
Sub getURL()
Dim objIE As Object
Dim i As Long, fI As Long
Dim fURL
Dim objResultDiv
Dim objLink
Application.ScreenUpdating = True
fI = INP.Range("D" & Rows.Count).End(xlUp).Row
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Visible = True
.Silent = True
For i = 1 To fI
.Navigate INP.Range("D" & i).Value
Do While .Busy Or .readyState <> 4
DoEvents
Loop
fURL = Empty
Set objResultDiv = objIE.document.getelementsbyclassname("b_algo")
Set objLink = objResultDiv.getelementsbytagname("a")(0) 'Error here
INP.Range("E" & i) = objLink.href
Next i
End With
objIE.Quit
Set objIE = Nothing
End Sub
getElementsByClassname returns a collection of elements. So you'll need to specify which one to return. For example, to return the first element (index starts at 0)...
Set objResultDiv = objIE.document.getElementsByClassname("b_algo")(0)
Hope this helps!

Open website, find specific value and return value to Excel in VBA

I would like to use VBA to open a website, look for a certain paragraph in the HTML code of this website (<p class="myClass">XYZ</p>) and return this value to Excel, in my example "XYZ".
The website has only one paragraph (p element) with the above class.
I know that this is possible but don't know where to start here.
My code:
Dim objIE As Object
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Navigate varUrl
Do While .Busy
Application.Wait Now + TimeValue("0:00:01")
Loop
.Visible = True
End With
Instead of opening IE, use a web request:
Set oRequest = New WinHttp.WinHttpRequest
With oRequest
.Open "GET", sUrl, True
.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.Send "{range:9129370}"
.WaitForResponse
Set index = .ResponseText.IndexOf("<p class=""myClass"">")
Set text = .ResponseText.Substring(index,3)
Cells(row, col).Value = text
End With
If you don't know the length of the string you are looking for, you could also do a loop after index until you hit a "<" character.
Dim objIE As Object
Set objIE = CreateObject("InternetExplorer.Application")
With objIE
.Navigate varUrl
Do While .Busy
Application.Wait Now + TimeValue("0:00:01")
Loop
.Visible = True
End With
'HTML document
Dim doc As Object
Set doc = objIE.document
Dim el As Object
Dim myText as string
For Each el In doc.GetElementsByClassName("myClass")
'put paragrah text in cell A1
Cells(1, 1).Value = el.innerText
'put your paragraph text in a variable string
myText = el.innerText
Next el
That is a tricky and interesting question. Let's say that you want to obtain the title of this current website, which is in class question-hyperlink within StackOverflow. Thus, using the idea of the solution of #Matt Spinks you may come up with something like this:
Option Explicit
Public Sub TestMe()
Dim oRequest As Object
Dim strOb As String
Dim strInfo As String: strInfo = "class=""question-hyperlink"">"
Dim lngStart As Long
Dim lngEnd As Long
Set oRequest = CreateObject("WinHttp.WinHttpRequest.5.1")
With oRequest
.Open "GET", "http://stackoverflow.com/questions/42254051/vba-open-website-find-specific-value-and-return-value-to-excel#42254254", True
.SetRequestHeader "Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"
.Send "{range:9129370}"
.WaitForResponse
strOb = .ResponseText
End With
lngStart = InStr(1, strOb, strInfo)
lngEnd = InStr(lngStart, strOb, "<")
Debug.Print Mid(strOb, lngStart + Len(strInfo), lngEnd - lngStart - Len(strInfo))
End Sub
Instead of Debug.print you may get the Title in a string and work further.

MSXML2.XMLHTTP response different from IE document

I want to know why in some cases the HTML in the response from MSXML2.XMLHTTP object does not produce the same results as automating Internet Explorer and inspecting the document property value.
For example, the procedure below compares the results found by using the object MSXML2.XMLHTTP (column A) with the results found by using the InternetExplorer object (column B).
The results of the InternetExplorer object include the NASDAQ index as expected but the results of the MSXML2.XMLHTTP object do not include the NASDAQ index and are completely different:
Sub ExtractDataFromInternet()
'Enable Tools/references 1. Microsoft Internet Control and 2. Microsoft HTML Object Library.
Dim URL
Dim objHTML As HTMLDocument
Dim Oelement As Object
Dim ie As New InternetExplorer
Dim J, Field1, Field2
Set objHTML = New HTMLDocument
URL = "http://www.nasdaq.com/"
'-------- METHOD1: MSXML2.XMLHTTP --------------------------
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
objHTML.body.innerHTML = .responseText
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 1) = Field1(J).innerText
J = J + 1
Next Oelement
'----------METHOD2: InternetExplorer Object----------------
Set ie = New InternetExplorer
With ie
.navigate URL
.Visible = False
While .Busy Or .readyState <> READYSTATE_COMPLETE
DoEvents
Wend
Set objHTML = .document
DoEvents
End With
J = 0
Set Field1 = objHTML.getElementsByTagName("td")
For Each Oelement In Field1
Worksheets("sheet1").Cells(J + 1, 2) = Field1(J).innerText
J = J + 1
Next Oelement
DoEvents
ie.Quit
DoEvents
Set ie = Nothing
'----------------------------------------------
End Sub

Selecting value from a dropdown list on a webpage using vba

On this site I am able to select the country and language from dropdown menu but when I click on "Complete new application form" button. It says fields are empty.
Any help would be appreciated.
Sub Test()
strURL = "https://visa.kdmid.ru/PetitionChoice.aspx"
With ie
.Visible = True
.navigate strURL
While .Busy
DoEvents
Wend
Set html = .document
'Country where you will apply for visa.
Set ctY = html.getElementById("ctl00$phBody$Country")
For i = 1 To ctY.Options.Length
If ctY.Options(i).Text = "NETHERLANDS" Then
ctY.selectedIndex = i
Exit For
End If
Next i
'Select Language
Set lnG = html.getElementById("ctl00$phBody$ddlLanguage")
For i = 1 To lnG.Options.Length
If lnG.Options(i).Text = "ENGLISH" Then
lnG.selectedIndex = i
Exit For
End If
Next i
'Click I have read instructions check box
html.getElementById("ctl00$phBody$cbConfirm").Click
'Click apply button
Set btnGo = html.forms(0).all("ctl00$phBody$btnNewApplication")
btnGo.Click
End With
End Sub
So you are on the right track but if you look at the HTML of the site there are actually two elements with the country selection- you got the first one, 'ctl00_phBody_Country', but this is actually just the drop down, and the actual selected value is stored in 'ctl00_phBody_cddCountry_ClientState'... the language section has similar structure. Lastly the accepted value is not just the country name you see in the drop down, it is actually a combination of a country code from the drop down and the country name....
See below for sample code:
Public Sub Test()
Dim IE As InternetExplorer
Dim HTMLDoc As HTMLDocument
Dim countryStr As String
Dim countryObj As HTMLObjectElement
Dim countryCodes As IHTMLElementCollection
Dim codeCounter As Long
Dim languageStr As String
Dim languageObj As HTMLObjectElement
Dim languageCodes As IHTMLElementCollection
countryStr = "Netherlands"
languageStr = "English"
Set IE = New InternetExplorer
With IE
.Visible = False
.Navigate "https://visa.kdmid.ru/PetitionChoice.aspx?AspxAutoDetectCookieSupport=1"
While .Busy Or .ReadyState <> READYSTATE_COMPLETE: Wend
Set HTMLDoc = IE.document
End With
Set countryObj = HTMLDoc.getElementById("ctl00_phBody_cddCountry_ClientState")
Set countryCodes = HTMLDoc.getElementById("ctl00_phBody_Country").getElementsByTagName("option")
For codeCounter = 0 To countryCodes.Length - 1
If countryCodes(codeCounter).innerText = UCase(countryStr) Then
countryObj.Value = countryCodes(codeCounter).Value & ":::" & countryCodes(codeCounter).innerText & ":::"
While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE: Wend
Exit For
End If
Next
Set languageObj = HTMLDoc.getElementById("ctl00_phBody_cddLanguage_ClientState")
Set languageCodes = HTMLDoc.getElementById("ctl00_phBody_ddlLanguage").getElementsByTagName("option")
For codeCounter = 0 To languageCodes.Length - 1
If languageCodes(codeCounter).innerText = UCase(languageStr) Then
languageObj.Value = languageCodes(codeCounter).Value & ":::" & languageCodes(codeCounter).innerText & ":::"
While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE: Wend
Exit For
End If
Next
HTMLDoc.getElementById("ctl00$phBody$cbConfirm").Click
While IE.Busy Or IE.ReadyState <> READYSTATE_COMPLETE: Wend
HTMLDoc.getElementById("ctl00_phBody_btnNewApplication").Click 'Launch Form
IE.Quit
Set IE = Nothing
End Sub