Getting a 403 Forbidden when going to a URL with PhantomJS - phantomjs

If I go to the following web page in Chrome, it loads fine: https://www.cruisemapper.com/?poi=39
However, when I run the following PhantomJS script, which simply goes to the same URL and outputs the entire DOM string to the console, I get a 403 Forbidden message:
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.open(url, function (status) {
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;
}));
phantom.exit();
}
});
Here's the exact output to the console:
<html><head>
<title>403 Forbidden</title>
</head><body>
<h1>Forbidden</h1>
<p>You don't have permission to access /
on this server.<br>
</p>
</body></html>
I thought that if I added some sort of user agent string, it might work. As such, I added the following above the console.log line:
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
But that didn't work. So then I tried the following instead:
page.customHeaders = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
};
But that didn't work either. Does anyone have any advice on how I can possibly hit up the URL above and not get a 403 Forbidden message? Thank you.

Your code works for me fine (I's suggest viewport size emulation though, see code). If you still get a 403, try changing your IP, it's possible that the site is on to you now (you probably visited that page lots of times).
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
page.viewportSize = { width: 1440, height: 900 }; // <-- otherwise it's 400x300 by default
// It's good to watch for errors on the page
page.onError = function (msg, trace)
{
console.log(msg);
trace.forEach(function(item) {
console.log(' ', item.file, ':', item.line);
})
}
page.open(url, function (status) {
console.log(status);
page.render("page.png"); // Also useful to check if you get what you expect
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;
}));
phantom.exit();
}
});

Related

How to use a UserAgent, headers and CookieContainer on WebView2?

Using a WebView2 control, I am trying to load into a webpage, but after I log into it, it seems it has some sort of block for generic browser that is not well configured because it keeps loading instead of proceed after the login, so I would like to add a CookieContainer and specify to use Cookies, add headers that specify that decompression is supported and what decompression methods are handled and User agent on WebView2 control same way this answer
works for HttpRequest.
Looking online I only found some code that I've tried to put together, but that's c# and I'm trying to convert it for vb.net but no online tool succeded to convert it yet
Private Sub webView2_NavigationStarting(sender As Object, e As Microsoft.Web.WebView2.Core.CoreWebView2NavigationStartingEventArgs) Handles webView2.NavigationStarting
webView2.AddScriptToExecuteOnDocumentCreated("
window.WebView2.addEventListener('beforenavigate', function(event) {
event.preventDefault();
var xhr = new XMLHttpRequest();
xhr.open(event.detail.verb, event.detail.uri, true);
xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36');
xhr.setRequestHeader('Cache-Control', 'no-cache');
xhr.setRequestHeader('Accept-Encoding', 'gzip, deflate');
xhr.onreadystatechange = function() {
if (xhr.readyState === XMLHttpRequest.DONE) {
window.WebView2.injectWebResource(event.detail.id, xhr.responseText);
}
};
xhr.send();
});
")
End Sub
Am I using the rights methods?
edit1:
I've managed to add the UserAgent
Private Sub WebView21_NavigationStarting(sender As Object, args As Microsoft.Web.WebView2.Core.CoreWebView2NavigationStartingEventArgs) Handles WebView21.NavigationStarting
Dim userAgent As String = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
Dim script As String = $"window.navigator.userAgent = '{userAgent}';"
WebView21.CoreWebView2.AddScriptToExecuteOnDocumentCreatedAsync(script)
End Sub
but still it doesn't proceed after the login.

I can't render this page with splash

I have tried all the suggestions in splash documentation:
This is the link: 'https://app.microacquire.com/startup/az1Hi7Znn9PpVc7I6EsgLbgQv4A3/uckABGneMbqRx0BtI866'
This is the script (I even add the same headers as in a normal request:
function main(splash, args)
splash.indexeddb_enabled = true
splash.plugins_enabled = true
splash.private_mode_enabled = false
splash:set_custom_headers({
["Connection"] = "keep-alive",
['authority']= 'app.microacquire.com',
['accept']='text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,
['accept-language']= 'es-ES,es;q=0.9,en;q=0.8',
['cache-control']= 'max-age=0',
['if-modified-since']= 'Thu, 14 Jul 2022 16:35:49 GMT',
['if-none-match']= '"1d1d916cd820c5eb54078f6fe3134fb2187ea0a14f9117866436828d1e447fd1-br"',
['sec-ch-ua']= '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
['sec-ch-ua-mobile']= '?0',
['sec-ch-ua-platform']= 'Windows',
['sec-fetch-dest']= 'document',
['sec-fetch-mode']= 'navigate',
['sec-fetch-site']= 'none',
['sec-fetch-user']= '?1',
['upgrade-insecure-requests']= '1',
['user-agent']= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'
})
assert(splash:go(args.url))
assert(splash:wait(15))
return {
html = splash:html(),
png = splash:png(),
har = splash:har(),
}
end
it renders only this output, it doesnt load completely:
splash image

Cannot submit a form in phamtomjs

I'm new to PhantomJS. Following this example, I tried to create a script which logs in to avito.ru, then opens up an edit page, changes the price, submits the form, and as the server sends back a confirmation page, script should submit a new form for changes to take place.
So the cycle should be as follows:
Log in
GET .../items/edit/678347092
POST to .../items/edit/678347092
GET .../stiralnaya_mashina_indesit_nws_7105_l_novaya_678347092/edit/confirm
POST to .../stiralnaya_mashina_indesit_nws_7105_l_novaya_678347092/edit/confirm
My code is below:
var steps=[];
var testindex = 0;
var loadInProgress = false;
/*********SETTINGS*********************/
var webPage = require('webpage');
var page = webPage.create();
page.viewportSize = { width: 1366, height: 768 };
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36';
page.settings.javascriptEnabled = true;
phantom.cookiesEnabled = true;
phantom.javascriptEnabled = true;
/*********SETTINGS END*****************/
console.log('All settings loaded, start with execution');
page.onConsoleMessage = function(msg) {
console.log(msg);
};
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/
steps = [
//Step 1 - Open Avito login page
function(){
console.log('Step 1 - opening Avito');
page.open("https://www.avito.ru/profile/login?next=%2Fprofile", function(status){
});
},
//Step 2 - Populate and submit the login form
function(){
console.log('Step 2');
console.log('URL: ' + page.url);
console.log('Populate and submit the login form');
page.evaluate(function(){
$('input[type="email"]').val('myemail');
$('input[type="password"]').val('mypassword');
$('.js-form-login').submit();
});
},
//Step 3 Profile page
function(){
console.log('Step 3');
console.log('URL: ' + page.url);
page.render('3-avito.png');
console.log("Opening edit page");
page.open("https://www.avito.ru/items/edit/678347092", function(status){
});
},
//Step 4 Edit the price
function(){
console.log('Step 4');
console.log('URL: ' + page.url);
page.render('4-edit-page.png');
console.log("Changing the price and submitting the form");
page.evaluate(function(){
document.querySelector('#item-edit__price').value = '21999';
document.forms[0].submit();
});
},
//Step 5 Confirm changes
function(){
console.log('Step 5');
console.log('URL: ' + page.url);
page.render('5-edit-confirm.png');
console.log("Final confirmation");
page.evaluate(function(){
document.forms[0].submit();
});
},
//Step 6 Render the result
function(){
console.log('Step 6');
console.log('URL: ' + page.url);
console.log("Rendering final page");
page.render('6-result.png');
}
];
/**********END STEPS THAT FANTOM SHOULD DO***********************/
//Execute steps one by one
interval = setInterval(executeRequestsStepByStep,50);
function executeRequestsStepByStep(){
if (loadInProgress == false && typeof steps[testindex] == "function") {
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}
/**
* These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
* Without this, we will get content of the page, even a page is not fully loaded.
*/
page.onLoadStarted = function() {
loadInProgress = true;
console.log('Loading started');
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
console.log(msg);
};
I tried to paste the code inside page.evaluate to the Chrome's console, and it works fine. But as I can see in Fiddler, this script doesn't perform a first POST request (the form is not submitted at step 4). Also I have rendered images indicating that the price is changed at step 4, but since then all other images remain the same.
Below is the output of the script to the console:
All settings loaded, start with execution
Step 1 - opening Avito
Loading started
Loading finished
Step 2
URL: https://www.avito.ru/profile/login?next=/profile
Populate and submit the login form
Loading started
Loading finished
Step 3
URL: https://www.avito.ru/profile
Opening edit page
Loading started
Loading finished
Step 4
URL: https://www.avito.ru/items/edit/678347092
Changing the price and submitting the form
Step 5
URL: https://www.avito.ru/items/edit/678347092
Final confirmation
Step 6
URL: https://www.avito.ru/items/edit/678347092
Rendering final page
test complete!
Any ideas how to make this script work?
Update: the script sometimes fails to submit the first login form, sometimes the second form, and sometimes it submits the second form instead of the third at the step 5, rather than at the step 4.
I've used onResourceRequested to see the headers of the requests, and in the example below it doesn't submit the second form at the step 4, but submits the same form at the step 5. (So I think it is not the problem with the form validation). Here are some extracts:
Step 1 Open login page
Request (#1): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157
Safari/537.36"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}],"id":1,"method":"GET","time":"2015-11-
19T12:38:51.111Z","url":"https://www.avito.ru/profile/login?next=%2Fprofile"}
Loading started
...
Loading finished
Step 2
- POST login and password
Request (#28): {"headers":[{"name":"Origin","value":"https://www.avito.ru"},{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"},{"name":"Content-Type","value":"multipart/form-data; boundary=----WebKitFormBoundaryrQFFuoCCfmZnvHH9"},
{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},{"name":"Referer","value":"https://www.avito.ru/profile/login?next=
%2Fprofile"},{"name":"Content-Length","value":"355"}],"id":28,"method":"POST","time":"2015-11-19T12:38:57.595Z","url":"https://www.avito.ru/profile/login"}
Loading started
Request (#29): {"headers":[{"name":"Origin","value":"https://www.avito.ru"},{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},
{"name":"Referer","value":"https://www.avito.ru/profile/login?next=%2Fprofile"}],"id":29,"method":"GET","time":"2015-11-
19T12:38:58.360Z","url":"https://www.avito.ru/profile"}
...
Loading finished
Step 3 Open new page
Request (#72): {"headers":[{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157
Safari/537.36"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"}],"id":72,"method":"GET","time":"2015-11-
19T12:39:13.887Z","url":"https://www.avito.ru/items/edit/678347092"}
Loading started
...Loading finished
Step 4 Edit the price and submit changes
doesn't work
Step 5
- Confirm changes (instead it's performed submit of the form from the step 4)
Request (#107): {"headers":[{"name":"Origin","value":"https://www.avito.ru"},{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"},{"name":"Content-Type","value":"application/x-www-form-urlencoded"},
{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},{"name":"Referer","value":"https://www.avito.ru/items/edit/678347092"},
{"name":"Content-Length","value":"8110"}],"id":107,"method":"POST","time":"2015-11-19T12:39:25.950Z","url":"https://www.avito.ru/items/edit/678347092"}
Loading started
Request (#108): {"headers":[{"name":"Origin","value":"https://www.avito.ru"},{"name":"User-Agent","value":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36
(KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"},{"name":"Accept","value":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},
{"name":"Referer","value":"https://www.avito.ru/items/edit/678347092"}],"id":108,"method":"GET","time":"2015-11-
19T12:39:26.709Z","url":"https://www.avito.ru/ulyanovsk/bytovaya_tehnika/stiralnaya_mashina_indesit_nws_7105_l_novaya_678347092/edit/confirm"}
Loading finished
Loading started
Loading finished
Step 6 - Screenshot

How to properly parse JSONP callback function in Meteor?

Does someone know how to parse JSONP callback in Meteor server methods?
I do
let response = HTTP.call('GET', AVIASALES_API_ENDPOINTS.getLocationFromIP, {
params: {
locale: 'en',
callback: 'useriata',
ip: clientIP
}
});
in response.content I’ve got
useriata({"iata":"MSQ","name":"Minsk","country_name":"Belarus"})
How to properly parse it?
It could help to know what you really try to accomplish? But here is a working example meteor actually doesn't do anything unusual with the requests.
Meteor.startup(function () {
var result = HTTP.call("GET", "https://api.github.com/legacy/repos/search/meteor", {
params: {},
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36"
}
});
console.log(result.data); // it's js object you can do result.data.repositories[0].name
console.log(JSON.stringify(result.data)); // json string
console.log(JSON.parse(JSON.stringify(result.data))) // if for some reason you need to parse it this way will work, but seems unnecessary
});
Update: The string you got back from the response wasn't valid JSON so you couldn't parse it used some regex to remove the invalid strings here is working example: http://meteorpad.com/pad/JCy5WkFsrtciG9PR5/Copy%20of%20Leaderboard

CasperJS fetchText() function echoing blank output

I'm trying to use fetchText() to print out the URL of a google search result to the terminal. Here's the image of what exactly I'm trying to print.
It's only prints out blank though! I don't see anything I'm doing wrong?
Code:
phantom.casperPath = "/usr/local/Cellar/casperjs/1.0.3/libexec/";
phantom.injectJs(phantom.casperPath + '/bootstrap.js');
var utils = require('utils');
var casper = require('casper').create();
casper.start('https://www.google.com/search?q=amazon+shoes');
casper.wait(3000, function () {
this.echo(this.fetchText('#rso > div:nth-child(1) > li:nth-child(1) > div > div > div > div.f.kv._TD > cite'));
}).run();
Google will change the page depending on the useragent string. So you need to set a string during creation (with example string)
var casper = require("casper").create({
pageSettings: {
userAgent: "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"
}
});
or with specific function
casper.userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
Sometimes it is also necessary to set the viewport to something desktop-like, because PhantomJS' default viewport is 400x300 and Google might render a different site based on the viewport.