PhantomJS: submit a form - phantomjs

I am filling out and submitting a form using PhantomJS and then outputting the resulting page. The thing is, I have no idea if this thing is being submitted at all.
I print the resulting page, but it's the same as the original page. I don't know if this is because it redirects back or I didn't submit it or I need to wait longer or or or. In a real browser it sends a GET and receives a cookie, which it uses to send more GETS before eventually receiving the final result - flight data.
I copied this example How to submit a form using PhantomJS, using a diferent url and page.evaluate functions.
var page = new WebPage(), testindex = 0, loadInProgress = false;
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
var steps = [
function() {
//Load Login Page
page.open("http://www.klm.com/travel/dk_da/index.htm");
},
function() {
//Enter Credentials
page.evaluate(function() {
$("#ebt-origin-place").val("CPH");
$("#ebt-destination-place").val("CDG");
$("#ebt-departure-date").val("1/5/2013");
$("#ebt-return-date").val("10/5/2013");
});
},
function() {
//Login
page.evaluate(function() {
$('#ebt-flightsearch-submit').click() ;
# also tried:
# $('#ebt-flight-searchform').submit();
});
},
function() {
// Output content of page to stdout after form has been submitted
page.evaluate(function() {
console.log(document.querySelectorAll('html')[0].outerHTML);
});
}
];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}, 50);

The site of interest is rather complicated to scrape. I logged the HTTP traffic from the US KLM site and got this:
GET /travel/us_en/apps/ebt/ebt_home.htm?name=on&ebt-origin-place=New+York+-+John+F.+Kennedy+International+%28JFK%29%2CNew+York&ebt-destination-place=Paris+-+Charles+De+Gaulle+Airport+%28CDG%29%2C+France&c%5B0%5D.os=JFK&c%5B0%5D.ost=airport&c%5B0%5D.ds=CDG&c%5B0%5D.dst=airport&c%5B1%5D.os=CDG&c%5B1%5D.ost=airport&c%5B1%5D.ds=JFK&inboundDestinationLocationType=airport&redirect=no&chdQty=0&infQty=0&c%5B0%5D.dd=2013-07-31&c%5B1%5D.dd=2013-08-14&c%5B1%5D.format=dd%2Fmm%2Fyyyy&flex=true&ebt-cabin-class=ECONOMY&adtQty=1&goToPage=&cffcc=ECONOMY&sc=false HTTP/1.1
Your injected values for the form elements are not what their server is looking for.
Inside page.evaluate(), you are sandboxed, but the sample code includes a hook to get sandboxed console activity onto the external console. For other debugging, you can also include object inspectors, etc., but they have to be injected into the page or part of the code passed into evaluate().

Related

PhantomJs Injecting jQuery in different pages

I have a PhantomJs script in which I create a new wepage, inject jQuery into it and scrape a list of URL from it. After that I call a function passing the list of URL and create a new webpage for each one and try to recover certain information from it
var pageGlobal = require('webpage');
function createPage(){
var page = pageGlobal.create();
page.onAlert = function(msg) {
console.log(msg);
};
return page;
}
var page=createPage();
page.open('http://www.example.com/', function(status){
if ( status === "success" ) {
page.injectJs('jquery-1.6.1.min.js');
var urlList=page.evaluate(
function(){
var urlList=[];
window.console.log = function(msg) { alert(msg) };
$("td.row1>a").each(function(index, link) {
var link=$(link).attr('href');
urlList.push(link);
});
return urlList;
});
processUrlList(urlList);
}
});
function processUrlList(urlList){
for(i=0;i<urlList.length;i++){
var currentPage=createPage();
currentPage.open("http://www.example.com"+urlList[i], function(status){
if ( status === "success" ) {
if(currentPage.injectJs('jquery-1.6.1.min.js')===false){
console.log("Error en la inyeccion");
}
currentPage.evaluate(function() {
window.console.log = function(msg) { alert(msg) };
console.log("Evaluating");
$("showAdText").each(function(index, link) {
//Capture information about the entity in this URL
})
});
}
});
}
}
The problem is in the processUrlList function the injection of jQuery always fail returning false. Would it be a problem to create two or more page objects instead of reusing only one? What could be happening here?

Opening a page after login doesn't work

Using PhantomJs & CasperJs, I would like to :
Get the login page of a website
Fill the form and send it
Once i'm connected, get the content of an other page.
I'm stuck at step 3.
I absolutely do not understand how to get another page after the first one.
I read the documentation, many forums, etc ...
Here's my code :
(I did replace login/password and links).
var casper = require('casper').create({
clientScripts: ["lib/jquery-2.1.1.min.js"]
});
casper.start("https://my_website/login");
casper.waitForSelector('#username', function() {
this.fill('form#fm1', {
'username': "...",
'password': '...'
}, true);
});
casper.then(function() {
//this.echo('Title : ' + this.getTitle());
//this.echo('Title : ' + this.page.content);
// if the page contains the word "Connected",
// the connexion is successful
if (this.page.content.indexOf("Connected") > -1) {
console.log("Connected");
}
});
casper.thenOpen('https://my_website.com/myplanning.jsp', function(response) {
// HERE'S THE PROBLEM !!!!!!!!
console.log(this.getPageContent());
});
casper.waitForSelector('#inner0', function() {
// HERE'S AN OTHER PROBLEM !!!!!!!!
console.log('Great !');
});
casper.run(function() {
this.exit();
});
How can I get the content of "https://my_website.com/myplanning.jsp"?

Phantomjs login, redirect and render page after pageLoad finishes

I have a website with a login form. If a user is not logged and tries to access a internal page it will be redirected to the default page. For instance if I try to access
http://siteURL.PhantomPrint.aspx I will be redirected to http://siteURL/Default.aspx?ReturnUrl=PhantomPrint.aspx. And after login an automatic redirect will take place to the page.
After the redirect I want to render the page with Phantomjs and save it as pdf. The problem is that the rendering takes place before page load is finished and I can properly render the page only if I use timeouts. In this case, if the page loading takes longer than normal the resulted pdf is not the proper one.
Below you can find the java script code:
var page = require('webpage').create();
var index = 0,
page.onConsoleMessage = function (msg) {
console.log(msg);
};
var steps = [
function () {
//Load Login Page
page.open("http://siteURL.PhantomPrint.aspx", function () {
//Enter Credentials
page.evaluate(function () {
console.log("filling inputs");
var usernameInput = document.getElementById("txtUsername");
usernameInput.value = "user";
var passwordInput = document.getElementById("txtPassword");
passwordInput.value = "password";
var loginButton = document.getElementById("btnLogin");
loginButton.click();
console.log("login button was submitted");
});
});
},
function () {
// page.onLoadFinished = function () {
// Render the page to pdf
page.render('example.png');
phantom.exit();
console.log("rendering finished");
//});
}
];
interval = setInterval(function () {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}, 1000);
Any suggestions on how I can assure that rendering is done only after the redirected page is finishing loading are welcomed.
It looks like you want to process navigation steps. You would need to use page.onNavigationRequested to pick up if a page load/redirect was issued. This will be likely hard to maintain. You would also have to discard the idea of using a step array with setInterval.
Another possibility would be to specifically wait for some selector that is present in the target page using waitFor, but then again, this would make the use of setInterval impossible.
CasperJS is actually built on top of PhantomJS and uses steps to navigate the site. When you use any of the then* functions it will automatically pick up a page load and wait for page load finish until executing the callback.
var casper = require('casper').create();
casper.on("remote.message", function (msg) {
console.log(msg);
});
casper.start("http://siteURL/PhantomPrint.aspx", function () {
//Enter Credentials
this.evaluate(function () {
console.log("filling inputs");
var usernameInput = document.getElementById("txtUsername");
usernameInput.value = "user";
var passwordInput = document.getElementById("txtPassword");
passwordInput.value = "password";
});
this.click("#btnLogin");
this.echo("login button was submitted");
});
casper.then(function () {
this.capture('example.png');
});
casper.run();
This can be made even smaller by using casper.fillSelectors.
After more research I found a solution, see below code.
var loadInProgress = false;
page.onLoadStarted = function () {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function () {
loadInProgress = false;
console.log("load finished");
};
interval = setInterval(function () {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}, 100)
But I would like to know if there is no other solution which would not involve recursively function calling.

Casperjs load any url, when it's on another page

I have two urls X and Y. When casperjs is on X page, it have to call Y page and after getting the response it should continue.
casper.start("X URL",function() {
var casper2 = require('casper').create();
casper2.start();
casper2.open("Y URL", function() {
RESPONSE
});
casper2.run();
}).then... >> It have to wait for response.
How can i do?
You have to use casper.then after the click. Here is some code:
var x = require('casper').selectXPath;
var url = 'http://stackoverflow.com/';
var casper = require('casper').create({
verbose: false,
logLevel: 'debug'
});
casper.test.comment('Starting Testing');
casper.start(url, function() {
//console.log(this.getCurrentUrl());
this.test.assert(
this.getCurrentUrl() === url, 'url is the one expected'
);
this.test.assertHttpStatus(200, + 'site is up');
casper.waitForSelector(x("//a[normalize-space(text())='Questions']"),
function success() {
this.test.assertExists(x("//a[normalize-space(text())='Questions']"));
this.click(x("//a[normalize-space(text())='Questions']"));
},
function fail() {
this.test.assertExists(x("//a[normalize-space(text())='Questions']"));
}
);
casper.then(function() {
//console.log(this.getCurrentUrl());
this.test.assertUrlMatches("http://stackoverflow.com/questions",
"clicked through to questions page");
});
casper.thenOpen('http://reddit.com', function() {
this.test.assertUrlMatches("http://www.reddit.com/",
"On Reddit");
});
});
casper.run(function() {
this.echo('finished');
this.exit();
});
Basically it goes to stackoverflow.com, waits until the Questions button is loaded, clicks on it and checks whether the redirected url is valid.
You can uncomment //console.log(this.getCurrentUrl()); if you want to see the specific url.
Now lets say you want to go to to an entirely new page, we can do use thenOpen api.
I highly recommend you read this blog: http://blog.newrelic.com/2013/06/04/simpler-ui-testing-with-casperjs-2/

xmlhttprequest in chrome extension

I am developing a chrome extension and I have an iframe which I use in my extension. This is what I do with my iframe
"When I drag and drop a image to my iframe I handle the drop event in one of the content scripts and pass that function call my extension code. There I create a xmlhttprequest object and then send the URL of the image to a php file in my server."
This is what is happening. I get a readyState of "4" but there is no POST request going out of my browser. I checked with the "NETWORK" tab in the browser but there is no POST request going out of the browser (I have listed my site in the permissions section of the manifest file).
This is my code --.>
JScript.js(One of the content scripts )
drop: function(event, ui) {
var imgurl=$(ui.draggable).attr('src');
imgurl="IMGURL="+imgurl;
_post("www.somedomain.come/testing.php",imgurl,function(result){ alert("success")});
}
This is my proxy in the same content script-->
_post = function(url, data, callback)
{
console.log("sending post");
chrome.extension.sendRequest({
msgType:'post',
data: data,
url:url
}, function(response){
alert(response);
});
}
This my OnRequest function handler in background.html -->
chrome.extension.onRequest.addListener(function(request, sender, sendResponse){
if (request.msgType === 'post') {
alert("Now in OnRequest function");
// console.log("Now in Onrequest Function");
alert("Url: "+request.url + "\n Data : "+ request.data);
ajaxcallingfunction(request);
alert("completed the ajax call");
sendResponse("success");
}
});
var ajaxcallingfunction = function(request){
var xhr = new XMLHttpRequest();
xhr.open("POST",request.url, false);
xhr.onreadystatechange = function(){
alert(xhr.readyState);
if (xhr.readyState == 4) {
alert(xhr.readyState);
}
}
xhr.send(request.data);
alert("after xhr call");
};
You have http:// in front of your url, right?
xhr.readyState doesn't tell much, it just means that it is done. Check out what's inside xhr.status, it would contain error code. If everything is ok it should be 200:
xhr.onreadystatechange = function(){
if (xhr.readyState == 4) {
alert(xhr.status);
}
}