Phantomjs has these two really handy callbacks onLoadStarted and onLoadFinished which allow you to essentially pause execution while the page is loading. But I've been searching and I can't find an equivalent for if you click() a submit button or hyperlink. A similar page load happens but onLoadStarted doesn't get called for this event I guess because there isn't an explicit page.open() that happens. I'm trying to figure out a clean way to suspend execution while this load takes place.
One solution is obviously nested setTimeout's but I'd like to avoid this scenario because it's hacky and relies on trial and error instead of something reliable and more robust like testing against something or waiting for an event.
Is there a specific callback for this kind of page load that I missed? Or maybe there's some kind of generic code pattern that can deal with this sort of thing?
EDIT:
I still haven't figured out how to get it to pause. Here's the code that doesn't call the onLoadStarted() function when I call the click() command:
var loadInProgress = false;
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
page.open(loginPage.url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
fs.write(filePath + errorState, 1, 'w');
phantom.exit();
} else {
page.evaluate(function (loginPage, credentials) {
console.log('inside loginPage evaluate function...\n')
document.querySelector('input[id=' + loginPage.userId + ']').value = credentials.username;
document.querySelector('input[id=' + loginPage.passId + ']').value = credentials.password;
document.querySelector('input[id=' + loginPage.submitId + ']').click();
//var aTags = document.getElementsByTagName('a')
//aTags[1].click();
}, loginPage, credentials);
page.render(renderPath + 'postLogin.png');
console.log('rendered post-login');
I double checked that the id is correct. The page.render() will show that the information is submitted, but only if I put it in a setTimeout(), otherwise it renders it immediately and I only see the credentials inputted, before the page redirect. Maybe I'm missing something else?
I think the onLoadStarted and onLoadFinished functions are everything you need. Take for example the following script:
var page = require('webpage').create();
page.onResourceReceived = function(response) {
if (response.stage !== "end") return;
console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + response.url);
};
page.onResourceRequested = function(requestData, networkRequest) {
console.log('Request (#' + requestData.id + '): ' + requestData.url);
};
page.onUrlChanged = function(targetUrl) {
console.log('New URL: ' + targetUrl);
};
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
console.log('Trying to navigate to: ' + url);
};
page.open("http://example.com", function(status){
page.evaluate(function(){
// click
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
setTimeout(function(){
phantom.exit();
}, 10000);
});
It prints
Trying to navigate to: http://example.com/
Request (#1): http://example.com/
Load Started
New URL: http://example.com/
Response (#1, stage "end"): http://example.com/
Load Finished: success
Trying to navigate to: http://www.iana.org/domains/example
Request (#2): http://www.iana.org/domains/example
Load Started
Trying to navigate to: http://www.iana.org/domains/reserved
Request (#3): http://www.iana.org/domains/reserved
Response (#2, stage "end"): http://www.iana.org/domains/example
New URL: http://www.iana.org/domains/reserved
Request (#4): http://www.iana.org/_css/2013.1/screen.css
Request (#5): http://www.iana.org/_js/2013.1/jquery.js
Request (#6): http://www.iana.org/_js/2013.1/iana.js
Response (#3, stage "end"): http://www.iana.org/domains/reserved
Response (#6, stage "end"): http://www.iana.org/_js/2013.1/iana.js
Response (#4, stage "end"): http://www.iana.org/_css/2013.1/screen.css
Response (#5, stage "end"): http://www.iana.org/_js/2013.1/jquery.js
Request (#7): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#8): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#8, stage "end"): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#7, stage "end"): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#9): http://www.iana.org/_css/2013.1/print.css
Response (#9, stage "end"): http://www.iana.org/_css/2013.1/print.css
Load Finished: success
It shows that clicking a link emits the LoadStarted event once and NavigationRequested event twice, because there is a redirect. The trick is to add the event handlers before doing the action:
var page = require('webpage').create();
page.open("http://example.com", function(status){
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
page.render("test37_next_page.png");
phantom.exit();
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.evaluate(function(){
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
});
If you need to do those things, maybe it is time to try something else like CasperJS. It runs on top of PhantomJS, but has a much better API for navigating web pages.
Use the high-level wrapper, nightmarejs.
You can easily click there and wait afterwards.
Here is the code (Examples section):
var Nightmare = require('nightmare');
new Nightmare()
.goto('http://yahoo.com')
.type('input[title="Search"]', 'github nightmare')
.click('.searchsubmit')
.run(function (err, nightmare) {
if (err) return console.log(err);
console.log('Done!');
});
More examples and API usage can be found at github
Here is my code based on some other answers. In my case, I didn't need to specifically evaluate any other javascript. I just needed to wait for the page to finish loading.
var system = require('system');
if (system.args.length === 1) {
console.log('Try to pass some arguments when invoking this script!');
}
else {
var page = require('webpage').create();
var address = system.args[1];
page.open(address, function(status){
page.onLoadFinished = function(status) {
console.log(page.content);
phantom.exit();
};
});
}
Save the above in a file called "scrape.js" and call it this way:
phantomjs --ssl-protocol=any --ignore-ssl-errors=true scrape.js https://www.example.com
The SSL-related params are added to avoid other issues that I was having with certain HTTPS sites (related to certificate loading issues).
Hope this helps someone!
Related
I used the Intervention image in my api. Then, I am trying to access it from my web, which is also Laravel but in different project. (I separated the web from the api due to some testing purposes for the api). But the image was successfully resized and saved to my public folder. But in my api there's an error then, when I comment the Image::make(), the error is gone. Why is that?
EDIT: Code from my api where I used Image::make()
$plant_image = $_FILES['image']['tmp_name'];
move_uploaded_file($plant_image, public_path()."\gallery\images\\".$_FILES['image']['name']);
$file_path = public_path() . "\gallery\images\\" . $_FILES['image']['name'];
$img = Image::make($file_path)->resize(216, 145);
$img->save();
Here is the code for the web
$(document).ready(function() {
$("form#addplant").submit(function() {
var form_data = new FormData($("#addplant")[0]);
$.ajax({
url: 'http://127.0.0.1/identificare_api/public/api/plants',
data: form_data,
type: "POST",
processData : false,
contentType: false,
success: function( json ) {
//console.log(json);
if (json.indexOf("error") > -1) {
var jsonparse = JSON.parse(json);
if(jsonparse.hasOwnProperty('error')){
location.reload(true);
alert("Code: " + jsonparse.error.code + "\n" + "Message: " + jsonparse.error.message);
}else{
location.reload(true);
alert("Please fill in empty fields");
}
}else{
window.location.href = "/home/"+ user_token;
alert("This item is currently under review! Please wait for admin's confirmation. Thank you!");
}
},
error: function(){
alert("Something's wrong with your api. Come on fix it!");
}
});
});
});
I have a phantomJS script that contains the following:
page.open(url, function (status) {
if (status === "fail") { /* handle failure */ }
});
The status check works sometimes, but the status will still be "success" even if the request returns 500. How can I get the actual request status code?
You can do it something like this:
var page = require('webpage').create(),
system = require('system'),
resources = [];
page.open('http://google.com', function (status) {
console.log('Loaded with http status:', resources[0].status);
phantom.exit();
});
page.onResourceReceived = function(response) {
// check if the resource is done downloading
if (response.stage !== "end") return;
// apply resource filter if needed:
if (response.headers.filter(function(header) {
if (header.name == 'Content-Type' && header.value.indexOf('text/html') == 0) {
return true;
}
return false;
}).length > 0)
resources.push(response);
};
So, if you need to check the status of the first browser's request (in this case google's html page) you should see it as the first one returned in resources[0].status. In onResourceReceived handler you can add more filters for resources you try to get http code from.
UPDATE: thanks to #fotijr, added a check for completed responses
In
page.property('onResourceError', function(res) {
resources variable is undefined,
even if I set it with
var page = require('webpage').create(),
system = require('system'),
resources = [];
I'm beginner programmer. I found nice script
http://planzero.org/blog/2013/03/07/spidering_the_web_with_casperjs
I tried to rewrite this script with CasperJS test framework.
I would to get xunit report from this code
var startUrl = 'http://yoursite.foo';
var visitedUrls = [], pendingUrls = [];
var casper = require('casper').create({
pageSettings: {
loadImages: false,
loadPlugins: false
}});
var utils = require('utils')
var helpers = require('helpers')
// Spider from the given URL
casper.test.begin('href' , function(test) {
casper.start(startUrl, function() {
function spider(url) {
// Add the URL to the visited stack
visitedUrls.push(url);
// Open the URL
casper.open(url).then(function() {
test.assertHttpStatus(200, ":" + url);
// Find links present on this page
var links = this.evaluate(function() {
var links = [];
Array.prototype.forEach.call(__utils__.findAll('a'), function(e) {
links.push(e.getAttribute('href'));
});
return links;
});
// Add newly found URLs to the stack
var baseUrl = this.getGlobal('location').origin;
Array.prototype.forEach.call(links, function(link) {
var newUrl = helpers.absoluteUri(baseUrl, link);
if (pendingUrls.indexOf(newUrl) == -1 && visitedUrls.indexOf(newUrl) == -1 && !(link.search(startUrl) == -1)) {
pendingUrls.push(newUrl);
}
});
// If there are URLs to be processed
if (pendingUrls.length > 0) {
var nextUrl = pendingUrls.shift();
spider(nextUrl);
}
else {
console.log('links ended');
this.break;
}
});
}
spider(startUrl);
}).run(function(){
test.done();
});
});
Script is running but when he and Job I can't get report.
If you're trying to learn how to use CasperJS you need to start with a smaller example than that. That script is a mess which goes after a site named yoursite.foo (maybe you put that name in there?)
I would take small steps. I have a video which may help explain how to use CasperJS.
http://www.youtube.com/watch?v=Kefil5tCL9o
I'm testing out PhantomJS and trying to return all startups listed on angel.co. I decided to go with PhantomJS since I would need to paginate through the front page by clicking "Next" at the bottom. Right now this code does not return any results. I'm completely new to PhantomJS and have read through all the code examples so any guidance would be much appreciated.
var page = require('webpage').create();
page.open('https://angel.co/startups', function(status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
page.evaluate(function() {
var list = document.querySelectorAll('div.resume');
for (var i = 0; i < list.length; i++){
console.log((i + 1) + ":" + list[i].innerText);
}
});
}
phantom.exit();
});
By default, console messages evaluated on the page will not appear in your PhantomJS console.
When you execute code under page.evaluate(...), that code is being executed in the context of the page. So when you have console.log((i + 1) + ":" + list[i].innerText);, that is being logged in the headless browser itself, rather than in PhantomJS.
If you want all console messages to be passed along to PhantomJS itself, use the following after opening your page:
page.onConsoleMessage = function (msg) { console.log(msg); };
page.onConsoleMessage is triggered whenever you print to the console from within the page. With this callback, you're asking PhantomJS to echo the message to its own standard output stream.
For reference, your final code would look like (this prints successfully for me):
var page = require('webpage').create();
page.open('https://angel.co/startups', function(status) {
page.onConsoleMessage = function (msg) { console.log(msg); };
if (status !== 'success') {
console.log('Unable to access network');
} else {
page.evaluate(function() {
var list = document.querySelectorAll('div.resume');
for (var i = 0; i < list.length; i++){
console.log((i + 1) + ":" + list[i].innerText);
}
});
}
phantom.exit();
});
I have used Ti.Network.createHTTPClient in Titanium and see that the control goes neither inside onLoad nor onError. What could be the reason?
var loader = Titanium.Network.createHTTPClient();
loader.onload = function() {
alert("Hello");
}
loader.onError = function(e)
alert("Error: " + e.error);
}
Add these 2 lines to make it work! You did not send the request, nor did you send the URL
// add url in here
loader.open("GET",'[URL HERE]');
// Send the request.
loader.send();
var xhrSitelogin = Titanium.Network.createHTTPClient();
xhrSitelogin.open('POST', webservice_url);
xhrSitelogin.send({
method : "userlogin",
username : username,
password : password
});
xhrSitelogin.setTimeout(10000);
xhrSitelogin.onerror = function() {
showAlertBox('Service timed out. Please try again.');
//Hide Indicator
};
xhrSitelogin.onload = function() {
alert(this.responseText);
//RESPONSE RECEIVED
};
Vote Up or mark best if you consider it help full.
Hi dosth try with this am not sure it will work if it work i will be happy
var taskRequest = Titanium.Network.createHTTPClient();
var api_url = 'http://myawesomeapi.heroku.com/users/' +
Ti.App.Properties.getString("userID") + '/tasks';
taskRequest.onload = function() {
var tasks = [];
// code populating the tasks array
alert(tasks);
callback( tasks ); // invoke the callback
}
taskRequest.open('GET', api_url, false);
taskRequest.setRequestHeader('Content-Type', 'application/json');
taskRequest.send();
<....>
loader.open("POST/GET","URL");
loader.onload(response){
//get the response
console.log(this.responseText);
};
loader.send();
Use this pattern.
If you need to set any header then use loader.setRequestHeader("Content-Type", "application/json; charset=utf-8"); after the open()/before onload() and send()