HTML Code to PNG with Phantom.js - phantomjs

Is there a way to give Phantom.js HTML code instead of a URL to render?
HTML URL Example
var page = require('webpage').create();
page.open('http://github.com/', function() {
page.render('github.png');
phantom.exit();
});
Desired HTML markup example
var page = require('webpage').create();
page.open('<html><head><style>SOME CSS</style></head><body><div>SOME TEXT AND IMAGES</div></body></html>',
function() {
page.render('github.png');
phantom.exit();
}
);

Yes, there is a way of doing this:
var page = require('webpage').create();
page.viewportSize = { width: 800, height : 600 };
page.content = '<html><head><style>SOME CSS</style></head><body><div>SOME TEXT AND IMAGES</div></body></html>';
page.evaluate(function() {
// your logic here
});
page.render('github.png');
phantom.exit();

Related

PhantomJs Injecting jQuery in different pages

I have a PhantomJs script in which I create a new wepage, inject jQuery into it and scrape a list of URL from it. After that I call a function passing the list of URL and create a new webpage for each one and try to recover certain information from it
var pageGlobal = require('webpage');
function createPage(){
var page = pageGlobal.create();
page.onAlert = function(msg) {
console.log(msg);
};
return page;
}
var page=createPage();
page.open('http://www.example.com/', function(status){
if ( status === "success" ) {
page.injectJs('jquery-1.6.1.min.js');
var urlList=page.evaluate(
function(){
var urlList=[];
window.console.log = function(msg) { alert(msg) };
$("td.row1>a").each(function(index, link) {
var link=$(link).attr('href');
urlList.push(link);
});
return urlList;
});
processUrlList(urlList);
}
});
function processUrlList(urlList){
for(i=0;i<urlList.length;i++){
var currentPage=createPage();
currentPage.open("http://www.example.com"+urlList[i], function(status){
if ( status === "success" ) {
if(currentPage.injectJs('jquery-1.6.1.min.js')===false){
console.log("Error en la inyeccion");
}
currentPage.evaluate(function() {
window.console.log = function(msg) { alert(msg) };
console.log("Evaluating");
$("showAdText").each(function(index, link) {
//Capture information about the entity in this URL
})
});
}
});
}
}
The problem is in the processUrlList function the injection of jQuery always fail returning false. Would it be a problem to create two or more page objects instead of reusing only one? What could be happening here?

PhantomJS / Javascript: download web page and write to text file

Below code is download web page it work fine but i want to save i put code for write text file honestly i have no idea how i can do this to save file
var url = 'http://stackoverflow.com';
var page = require('webpage').create();
page.open(url, function(status) {
if (status === 'success') {
var html = page.evaluate(function() {
return document.documentElement.outerHTML;
});
console.log(html);
}
var fs = require('fs');
try {
fs.write("C:\phantomjs\\qhxpZ.txt", "Message to be written to the file", 'w');
} catch(e) {
console.log(e);
}
phantom.exit();
});
So, for completeness, the solution to the issue at hand should look something like:
var url = 'http://stackoverflow.com';
var fs = require('fs');
var page = require('webpage').create();
page.open(url, function(status) {
if (status === 'success') {
var html = page.evaluate(function() {
return document.documentElement.outerHTML;
});
try {
fs.write("C:\\phantomjs\\qhxpZ.txt", html, 'w');
} catch(e) {
console.log(e);
}
}
phantom.exit();
});
just replace "Message to be written to the file" with html and file will be saved.
fs.write("C:\phantomjs\\qhxpZ.txt", "Message to be written to the file", 'w');

How to load the pages using PhantomJS

I'm new to Phantomjs.I have tried to load the page using the below code.But the given page is not loading while running this.
console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs.org/';
page.open(url, function (status) {
//Page is loaded!
phantom.exit();
});
Your code is correct but you have to do something before phantom.exit();
See all examples here.
Let's capture a web page as a screenshot :
console.log('Loading a web page');
var page = require('webpage').create();
var url = 'http://www.phantomjs.org/';
page.open(url, function (status) {
//Page is loaded!
page.render('phantomjs.png');
phantom.exit();
});

PhantomJS: submit a form

I am filling out and submitting a form using PhantomJS and then outputting the resulting page. The thing is, I have no idea if this thing is being submitted at all.
I print the resulting page, but it's the same as the original page. I don't know if this is because it redirects back or I didn't submit it or I need to wait longer or or or. In a real browser it sends a GET and receives a cookie, which it uses to send more GETS before eventually receiving the final result - flight data.
I copied this example How to submit a form using PhantomJS, using a diferent url and page.evaluate functions.
var page = new WebPage(), testindex = 0, loadInProgress = false;
page.onConsoleMessage = function(msg) {
console.log(msg);
};
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
var steps = [
function() {
//Load Login Page
page.open("http://www.klm.com/travel/dk_da/index.htm");
},
function() {
//Enter Credentials
page.evaluate(function() {
$("#ebt-origin-place").val("CPH");
$("#ebt-destination-place").val("CDG");
$("#ebt-departure-date").val("1/5/2013");
$("#ebt-return-date").val("10/5/2013");
});
},
function() {
//Login
page.evaluate(function() {
$('#ebt-flightsearch-submit').click() ;
# also tried:
# $('#ebt-flight-searchform').submit();
});
},
function() {
// Output content of page to stdout after form has been submitted
page.evaluate(function() {
console.log(document.querySelectorAll('html')[0].outerHTML);
});
}
];
interval = setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
phantom.exit();
}
}, 50);
The site of interest is rather complicated to scrape. I logged the HTTP traffic from the US KLM site and got this:
GET /travel/us_en/apps/ebt/ebt_home.htm?name=on&ebt-origin-place=New+York+-+John+F.+Kennedy+International+%28JFK%29%2CNew+York&ebt-destination-place=Paris+-+Charles+De+Gaulle+Airport+%28CDG%29%2C+France&c%5B0%5D.os=JFK&c%5B0%5D.ost=airport&c%5B0%5D.ds=CDG&c%5B0%5D.dst=airport&c%5B1%5D.os=CDG&c%5B1%5D.ost=airport&c%5B1%5D.ds=JFK&inboundDestinationLocationType=airport&redirect=no&chdQty=0&infQty=0&c%5B0%5D.dd=2013-07-31&c%5B1%5D.dd=2013-08-14&c%5B1%5D.format=dd%2Fmm%2Fyyyy&flex=true&ebt-cabin-class=ECONOMY&adtQty=1&goToPage=&cffcc=ECONOMY&sc=false HTTP/1.1
Your injected values for the form elements are not what their server is looking for.
Inside page.evaluate(), you are sandboxed, but the sample code includes a hook to get sandboxed console activity onto the external console. For other debugging, you can also include object inspectors, etc., but they have to be injected into the page or part of the code passed into evaluate().

How to use phantomjs?

I would like to learn phantomjs, but i can`t find good tutorial. I have 2 questions:
where is problem in following code (need to capture label of button and write to file):
var page = require('webpage').create();
var fs = require('fs');
page.onConsoleMessage = function(msg) {
phantom.outputEncoding = "utf-8";
console.log(msg);
};
page.open("http://vk.com", function(status) {
if ( status === "success" ) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
page.evaluate(function() {
var str = $("#quick_login_button").text();
f = fs.open("ololo.txt", "w");
f.writeLine(str);
f.close();
console.log("done");
});
phantom.exit();
});
}
});
what tutorial in phantomjs you can advice to me? (not from official site)
Because execution is sandboxed, the web page has no access to the phantom objects.
var page = require('webpage').create();
var fs = require('fs');
page.onConsoleMessage = function(msg) {
phantom.outputEncoding = "utf-8";
console.log(msg);
};
page.open("http://vk.com", function(status) {
if ( status === "success" ) {
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
var str = page.evaluate(function() {
return $("#quick_login_button").text();
});
f = fs.open("ololo.txt", "w");
f.writeLine(str);
f.close();
console.log("done");
phantom.exit();
});
}
});
PhantomJS comes with a lot of included examples. Take a look here.