phantomJS webpage timeout - phantomjs

I have set up a script to create webshots of our app.
It runs perfectly and all is fine Until I encounter an image with a broken url :
"<img src='http://testserver.our.intranet/fetch/image/373e8fd2339696e2feeb680b765d626e' />"
I have managed to break the script after 6 seconds using the below, Before it was just looping forever.
But, is it possible to ignore the network request (AKA take the image out of DOM) and then proceed to create the thumb without the image, (or with an injected image missing image !)
var page = require('webpage').create(),
system = require('system'),
address, output, size;
if (system.args.length < 3 || system.args.length > 5) {
phantom.exit(1);
} else {
address = system.args[1];
output = system.args[2];
page.viewportSize = { width: 640, height: 640 };
page.zoomFactor = 0.75;
page.clipRect = { top: 10, left: 0, width: 640, height: 490 };
try{
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 200);
}
});
} finally{
setTimeout(function() {
console.log("Max execution time " + Math.round(6000) + " seconds exceeded");
phantom.exit(1);
}, 6000);
}
}

PhantomJS 1.9 has introduced a new setting, resourceTimeout, that controls how long a request can take before it gets cancelled. Along with that, there's a onResourceTimeout event that is triggered if/when a request times out.
Here's a code snippet illustrating all of the above:
var page = require('webpage').create();
page.settings.resourceTimeout = 5000; // 5 seconds
page.onResourceTimeout = function(e) {
console.log(e.errorCode); // it'll probably be 408
console.log(e.errorString); // it'll probably be 'Network timeout on resource'
console.log(e.url); // the url whose request timed out
phantom.exit(1);
};
page.open('http://...', function (status) {
...
}
Unfortunately those options are poorly documented right now. I had to go through GitHub discussions and the PhantomJS source code in order to find them out.

Related

Is there any reason why no error found in calling navigator.mediaDevices.getUserMedia

I'm writing Webrtc chrome desktop app by accessing 2 camera simultaneously using latest Chrome Windows version.
Accessing camera list by navigator.mediaDevices.enumerateDevices() is ok but accessing these device by their specific id using navigator.mediaDevices.getUserMedia not work.
It only occurs sometimes. But no error in the catch.
So, I tried navigator.mediaDevices.getUserMedia is really exists or not.
if (navigator && navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
}
Yes, it was.
Just not getting any log info in calling navigator.mediaDevices.getUserMedia()
getVideoSources_ = function() {
return new Promise(function(resolve, reject) {
if (typeof navigator.mediaDevices.enumerateDevices === 'undefined') {
alert('Your browser does not support navigator.mediaDevices.enumerateDevices, aborting.');
reject(Error("Your browser does not support navigator.mediaDevices.enumerateDevices, aborting."));
return;
}
requestVideoSetTimeout = 500;
navigator.mediaDevices.enumerateDevices().then((devices) => {
// get the list first by sorting mibunsho camera in first place
for (var i = 0; i < devices.length; i++) {
log("devices[i]", JSON.stringify(devices[i]));
log("devices[i].label", devices[i].label);
if (devices[i].kind === 'videoinput' && devices[i].deviceId && devices[i].label) {
if (devices[i].label.indexOf("USB_Camera") > -1) {
deviceList[1] = devices[i];
} else {
deviceList[0] = devices[i];
}
}
}
// request video by sorted plan
for (var i = 0; i < deviceList.length; i++) {
requestVideo_(deviceList[i].deviceId, deviceList[i].label, resolve, reject);
requestVideoSetTimeout = 1000; // change requestVideoSetTimeout for next video request
}
}).catch((err) => {
log("getVideoSources_:" + err.name + ": " + err.message);
reject(Error("getVideoSources_ catch error"));
});
});
}
getVideoSources_().then(function(result) {
....
}).catch(function(err) {
....
});
function requestVideo_(id, label, resolve, reject) {
if (navigator && navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
log("navigator.mediaDevices.getUserMedia found!");
navigator.mediaDevices.getUserMedia({
video: {
deviceId: {exact: id},
width: 640,
height: 480,
frameRate: {
ideal: 20,
max: 20
}
},
audio: false}).then(
(stream) => {
log("***requestVideo_", id);
log("***requestVideo_", label);
log("***requestVideo_", stream);
// USB_Camera is face camera
if (label.indexOf("USB_Camera") > -1) {
log("***requestVideo_001");
myStream2 = stream;
log("***requestVideo_myStream2", myStream2);
} else {
log("***requestVideo_002");
myStream = stream;
log("***requestVideo_myStream", myStream);
getUserMediaOkCallback_(myStream, label);
}
resolve("###Video Stream got###");
stream.getVideoTracks()[0].addEventListener('ended', function(){
log("***Camera ended event fired. " + id + " " + label);
endedDevice[id] = label;
});
},
getUserMediaFailedCallback_
).catch((error) => {
log('requestVideo_: ' + error.name);
reject(Error("requestVideo_ catch error" + error.name));
});
}
}
function getUserMediaFailedCallback_(error) {
log("getUserMediaFailedCallback_ error:", error.name);
alert('User media request denied with error: ' + error.name);
}
#Kaiido is right, resolve is called in a for-loop here, which is all over the place, and before all the code has finished. Any error after this point is basically lost.
This is the promise constructor anti-pattern. In short: Don't write app code inside promise constructors. Don't pass resolve and reject functions down. Instead, let functions return promises up to you, and add all app code in then callbacks on them. Then return all promises so they form a single chain. Only then do errors propagate correctly.
See Using promises on MDN for more.

StreamTrack's readyState is getting changed to ended, just before playing the stream (MediaStream - MediaStreamTrack - WebRTC)

The jsfiddle (https://jsfiddle.net/kalyansai99/mm1b74uy/22/) contains code where the user can toggle between front and back camera of the mobile.
In few mobiles its working fine (Moto g5 plus, Moto E3 and so on - Chrome Browser) and in few mobiles (Mi Redimi Note 4 - Chrome Browser) when I am switching to back camera, initially the stream is loading with a track of "readyState" as "live". But when i am about to play the stream in video player, the "readyState" is getting changed to "ended" and black screen is been shown on the video tag.
Not sure whats happening. Any clues?
JSFiddle Code
var player = document.getElementById('player');
var flipBtn = document.getElementById('flipBtn');
var deviceIdMap = {};
var front;
var constraints = {
audio: false,
video: {
frameRate: 1000
}
};
var gotDevices = function (deviceList) {
var length = deviceList.length;
console.log(deviceList);
for (var i = 0; i < length; i++) {
var deviceInfo = deviceList[i];
if (deviceInfo.kind === 'videoinput') {
if (deviceInfo.label.indexOf('front') !== -1) {
deviceIdMap.front = deviceInfo.deviceId;
} else if (deviceInfo.label.indexOf('back') !== -1) {
deviceIdMap.back = deviceInfo.deviceId;
}
}
}
if (deviceIdMap.front) {
constraints.video.deviceId = {exact: deviceIdMap.front};
front = true;
} else if (deviceIdMap.back) {
constraints.video.deviceId = {exact: deviceIdMap.back};
front = false;
}
console.log('deviceIdMap - ', deviceIdMap);
};
var handleError = function (error) {
console.log('navigator.getUserMedia error: ', error);
};
function handleSuccess(stream) {
window.stream = stream;
// this is a video track as there is no audio track
console.log("Track - ", window.stream.getTracks()[0]);
console.log('Ready State - ', window.stream.getTracks()[0].readyState);
if (window.URL) {
player.src = window.URL.createObjectURL(stream);
} else {
player.src = stream;
}
player.onloadedmetadata = function (e) {
console.log('Ready State - 3', window.stream.getTracks()[0].readyState);
player.play();
console.log('Ready State - 4', window.stream.getTracks()[0].readyState);
}
console.log('Ready State - 2', window.stream.getTracks()[0].readyState);
}
navigator.mediaDevices.enumerateDevices().then(gotDevices).catch(handleError);
flipBtn.addEventListener('click', function () {
if (window.stream) {
window.stream.getTracks().forEach(function(track) {
track.stop();
});
}
if (front) {
constraints.video.deviceId = {exact: deviceIdMap.back};
} else {
constraints.video.deviceId = {exact: deviceIdMap.front};
}
front = !front;
navigator.getUserMedia(constraints, handleSuccess, handleError);
}, false);
console.log(constraints);
navigator.getUserMedia(constraints, handleSuccess, handleError);
#player {
width: 320px;
}
#flipBtn {
width: 150px;
height: 50px;
}
<video id="player" autoplay></video>
<div>
<button id="flipBtn">
Flip Camera
</button>
</div>
Replace track.stop() to track.enabled=false and when adding track to the stream, enable it back using track.enabled=true
The MediaStream.readyState property is changed to "ended" when we stop the track and can never be used again. Therefore its not wise to use stop. For more reference:
https://developer.mozilla.org/en-US/docs/Web/API/MediaStreamTrack/readyState
https://developer.mozilla.org/en-US/docs/Web/API/MediaStreamTrack/stop

Click and open a new page in PhantomJS [duplicate]

Phantomjs has these two really handy callbacks onLoadStarted and onLoadFinished which allow you to essentially pause execution while the page is loading. But I've been searching and I can't find an equivalent for if you click() a submit button or hyperlink. A similar page load happens but onLoadStarted doesn't get called for this event I guess because there isn't an explicit page.open() that happens. I'm trying to figure out a clean way to suspend execution while this load takes place.
One solution is obviously nested setTimeout's but I'd like to avoid this scenario because it's hacky and relies on trial and error instead of something reliable and more robust like testing against something or waiting for an event.
Is there a specific callback for this kind of page load that I missed? Or maybe there's some kind of generic code pattern that can deal with this sort of thing?
EDIT:
I still haven't figured out how to get it to pause. Here's the code that doesn't call the onLoadStarted() function when I call the click() command:
var loadInProgress = false;
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started");
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished");
};
page.open(loginPage.url, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
fs.write(filePath + errorState, 1, 'w');
phantom.exit();
} else {
page.evaluate(function (loginPage, credentials) {
console.log('inside loginPage evaluate function...\n')
document.querySelector('input[id=' + loginPage.userId + ']').value = credentials.username;
document.querySelector('input[id=' + loginPage.passId + ']').value = credentials.password;
document.querySelector('input[id=' + loginPage.submitId + ']').click();
//var aTags = document.getElementsByTagName('a')
//aTags[1].click();
}, loginPage, credentials);
page.render(renderPath + 'postLogin.png');
console.log('rendered post-login');
I double checked that the id is correct. The page.render() will show that the information is submitted, but only if I put it in a setTimeout(), otherwise it renders it immediately and I only see the credentials inputted, before the page redirect. Maybe I'm missing something else?
I think the onLoadStarted and onLoadFinished functions are everything you need. Take for example the following script:
var page = require('webpage').create();
page.onResourceReceived = function(response) {
if (response.stage !== "end") return;
console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + response.url);
};
page.onResourceRequested = function(requestData, networkRequest) {
console.log('Request (#' + requestData.id + '): ' + requestData.url);
};
page.onUrlChanged = function(targetUrl) {
console.log('New URL: ' + targetUrl);
};
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.onNavigationRequested = function(url, type, willNavigate, main) {
console.log('Trying to navigate to: ' + url);
};
page.open("http://example.com", function(status){
page.evaluate(function(){
// click
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
setTimeout(function(){
phantom.exit();
}, 10000);
});
It prints
Trying to navigate to: http://example.com/
Request (#1): http://example.com/
Load Started
New URL: http://example.com/
Response (#1, stage "end"): http://example.com/
Load Finished: success
Trying to navigate to: http://www.iana.org/domains/example
Request (#2): http://www.iana.org/domains/example
Load Started
Trying to navigate to: http://www.iana.org/domains/reserved
Request (#3): http://www.iana.org/domains/reserved
Response (#2, stage "end"): http://www.iana.org/domains/example
New URL: http://www.iana.org/domains/reserved
Request (#4): http://www.iana.org/_css/2013.1/screen.css
Request (#5): http://www.iana.org/_js/2013.1/jquery.js
Request (#6): http://www.iana.org/_js/2013.1/iana.js
Response (#3, stage "end"): http://www.iana.org/domains/reserved
Response (#6, stage "end"): http://www.iana.org/_js/2013.1/iana.js
Response (#4, stage "end"): http://www.iana.org/_css/2013.1/screen.css
Response (#5, stage "end"): http://www.iana.org/_js/2013.1/jquery.js
Request (#7): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#8): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#8, stage "end"): http://www.iana.org/_img/2013.1/icann-logo.svg
Response (#7, stage "end"): http://www.iana.org/_img/2013.1/iana-logo-header.svg
Request (#9): http://www.iana.org/_css/2013.1/print.css
Response (#9, stage "end"): http://www.iana.org/_css/2013.1/print.css
Load Finished: success
It shows that clicking a link emits the LoadStarted event once and NavigationRequested event twice, because there is a redirect. The trick is to add the event handlers before doing the action:
var page = require('webpage').create();
page.open("http://example.com", function(status){
page.onLoadFinished = function(status) {
console.log('Load Finished: ' + status);
page.render("test37_next_page.png");
phantom.exit();
};
page.onLoadStarted = function() {
console.log('Load Started');
};
page.evaluate(function(){
var e = document.createEvent('MouseEvents');
e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
document.querySelector("a").dispatchEvent(e);
});
});
If you need to do those things, maybe it is time to try something else like CasperJS. It runs on top of PhantomJS, but has a much better API for navigating web pages.
Use the high-level wrapper, nightmarejs.
You can easily click there and wait afterwards.
Here is the code (Examples section):
var Nightmare = require('nightmare');
new Nightmare()
.goto('http://yahoo.com')
.type('input[title="Search"]', 'github nightmare')
.click('.searchsubmit')
.run(function (err, nightmare) {
if (err) return console.log(err);
console.log('Done!');
});
More examples and API usage can be found at github
Here is my code based on some other answers. In my case, I didn't need to specifically evaluate any other javascript. I just needed to wait for the page to finish loading.
var system = require('system');
if (system.args.length === 1) {
console.log('Try to pass some arguments when invoking this script!');
}
else {
var page = require('webpage').create();
var address = system.args[1];
page.open(address, function(status){
page.onLoadFinished = function(status) {
console.log(page.content);
phantom.exit();
};
});
}
Save the above in a file called "scrape.js" and call it this way:
phantomjs --ssl-protocol=any --ignore-ssl-errors=true scrape.js https://www.example.com
The SSL-related params are added to avoid other issues that I was having with certain HTTPS sites (related to certificate loading issues).
Hope this helps someone!

Fit content width-wise in pdf

When rendering to pdf, I need the html page to be 100% of the print width. Otherwise content gets cut off. Is there an easy way to do this?
I came up with a work-around, which gets the html width after rendering and then sets the zoom factor to force the correct width.
var page = require('webpage').create(),
system = require('system'),
dpi = 89.9, // strange, but that's what I get on Mac
pageWidth = 210; // in mm
var getWidth = function() {
return page.evaluate(function() {
// get width manually
// ...
return width;
});
};
page.paperSize = {format: 'A4', orientation: 'portrait'};
page.open(system.args[1], function (status) {
window.setTimeout(function () {
page.zoomFactor = (pageWidth / 25.4) * dpi / getWidth();
page.render(system.args[2]);
phantom.exit();
}, 200);
});
Looking for a straight forward solution, because getting the width requires me to do some tricks due to the framework I use.
Managed to solve this by adding the following CSS rule which will zoom out the page and make it fit:
html {
zoom: 0.60;
}
Edit rasterize.js as follows.
Make a backup of rasterize.js
Remove this block (approx. line 18-31)
if (system.args.length > 4) {
page.zoomFactor = system.args[4];
}
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit();
} else {
window.setTimeout(function () {
page.render(output);
phantom.exit();
}, 200);
}
});
Replace it with this code block
if (system.args.length > 4) {
page.zoomFactor = parseFloat(system.args[4].split("=")[1]);
}
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to load the address!');
phantom.exit(1);
} else {
window.setTimeout(function () {
console.log("zoomFactor: " + page.zoomFactor);
page.evaluate(function(zoom) {
return document.querySelector('body').style.zoom = zoom;
}, page.zoomFactor); // <-- your zoom here
page.render(output);
phantom.exit();
}, 200);
}
});
Then you can export html to pdf on-the-fly like this:
~/bin/phantomjs ~/bin/phantomjs/examples/rasterize.js \
'http://example.com/index.html' \
mysite.pdf paperformat=A4 zoom=0.4

PhantomJS not returning results

I'm testing out PhantomJS and trying to return all startups listed on angel.co. I decided to go with PhantomJS since I would need to paginate through the front page by clicking "Next" at the bottom. Right now this code does not return any results. I'm completely new to PhantomJS and have read through all the code examples so any guidance would be much appreciated.
var page = require('webpage').create();
page.open('https://angel.co/startups', function(status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
page.evaluate(function() {
var list = document.querySelectorAll('div.resume');
for (var i = 0; i < list.length; i++){
console.log((i + 1) + ":" + list[i].innerText);
}
});
}
phantom.exit();
});
By default, console messages evaluated on the page will not appear in your PhantomJS console.
When you execute code under page.evaluate(...), that code is being executed in the context of the page. So when you have console.log((i + 1) + ":" + list[i].innerText);, that is being logged in the headless browser itself, rather than in PhantomJS.
If you want all console messages to be passed along to PhantomJS itself, use the following after opening your page:
page.onConsoleMessage = function (msg) { console.log(msg); };
page.onConsoleMessage is triggered whenever you print to the console from within the page. With this callback, you're asking PhantomJS to echo the message to its own standard output stream.
For reference, your final code would look like (this prints successfully for me):
var page = require('webpage').create();
page.open('https://angel.co/startups', function(status) {
page.onConsoleMessage = function (msg) { console.log(msg); };
if (status !== 'success') {
console.log('Unable to access network');
} else {
page.evaluate(function() {
var list = document.querySelectorAll('div.resume');
for (var i = 0; i < list.length; i++){
console.log((i + 1) + ":" + list[i].innerText);
}
});
}
phantom.exit();
});