Loading webpage incompletely Phantomjs - authentication

I have problems capturing and loading the webpage:
https://myaccount.nytimes.com/auth/login
The username and password fields as well as the submit button are missing. What could be the reason? Thanks
As you can see the code listed below waits for each and all the resources to be loaded, one by one.
I did not write the code, however it is excellent and tested by many. The original can be found here:
https://gist.github.com/cjoudrey/1341747
(ALL the credits to the guy who wrote it.)
var resourceWait = 3000,
maxRenderWait = 30000,
url = 'https://myaccount.nytimes.com/auth/login'; //'https://twitter.com/#!/nodejs';
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = { width: 1280, height : 1024 };
function doRender() {
page.render('twitter.png');
phantom.exit();
}
page.onResourceRequested = function (req) {
count += 1;
console.log('> ' + req.id + ' - ' + req.url);
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
console.log(res.id + ' ' + res.status + ' - ' + res.url);
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
console.log('Unable to load url');
phantom.exit();
} else {
forcedRenderTimeout = setTimeout(function () {
console.log(count);
doRender();
}, maxRenderWait);
}
});

Related

phantomjs XHR response code is null

I am using the below code to automate a web page. The web page has an XHR call which is triggered upon submitting the form. By using "onResourceRequest" and "onResponseReceived" I can look into the request body, but the response status code is always null.
PhantomJS version: 2.1.1
var page = require('webpage').create(), testindex = 0, loadInProgress = false;
var config = require('../../config/config.json');
var system = require('system');
console.log('Running Iteration 1 ...');
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
};
page.onError = function (msg, trace) {
console.log(msg);
trace.forEach(function(item) {
console.log(' ', item.file, ':', item.line);
});
};
page.onConsoleMessage = function(msg, lineNum, sourceId) {
console.log(colors.cyan('CONSOLE: ' + msg + ' (from line #' + lineNum + ' in "' + sourceId + '")'));
};
page.onLoadStarted = function() {
loadInProgress = true;
console.log("load started ", page.url);
};
page.onResourceRequested = function(request) {
console.log('Request ' + request.url);
console.log('Request Header :', JSON.stringify(request.headers));
console.log('Request Body :', request.postData);
};
page.onResourceReceived = function(response) {
console.log('Receive ' + response.url, response.status);
if(response.status === null) {
console.log(JSON.stringify(response));
}
};
page.onLoadFinished = function() {
loadInProgress = false;
console.log("load finished ", page.url);
};
var steps = [
function () {
page.open(config.url, function (status) {
if (status !== 'success'){
console.log('Unable to access ', config.url); //This is a HTTPS endpoint
} else {
waitFor(function() {
// Check in the page if a specific element is now visible
return page.evaluate(function() {
return document.getElementById('code').value;
});
}, function() {
console.log("The page dialog should be visible now.");
// phantom.exit();
});
}
});
},
function () {
var isPostMessageSupported = page.evaluate(function() {
if(window.postMessage){
return true;
} else{
return false;
}
});
console.log('POST Message Support : ', isPostMessageSupported);
var guid = page.evaluate(function() {
return document.getElementById('guid').value;
});
console.log('GUID : ', guid);
page.evaluate( function () {
document.getElementById('userid').value = 'myusername';
document.getElementById('pass').value = 'mypassword';
});
},
function () {
page.evaluate(function () {
document.getElementById('myform').submit();
return;
});
},
function () {
page.render('iteration1.png');
},
];
function done(){
console.log('##completed');
}
setInterval(function() {
if (!loadInProgress && typeof steps[testindex] == "function") {
console.log("step " + (testindex + 1));
steps[testindex]();
testindex++;
}
if (typeof steps[testindex] != "function") {
console.log("test complete!");
done();
phantom.exit();
}
}, 50);
The response that I recieve for the XHR call is
{"contentType":null,"headers":[],"id":42,"redirectURL":null,"stage":"end","status":null,"statusText":null,"time":"2018-04-12T04:15:04.402Z","url":"https://example.com/p"}
I have tried all the phantomJS command line arguments
phantomjs --web-security=false --ssl-protocol=tlsv1 --ignore-ssl-errors=true phantomjs/iterations/iteration1.js
I have also tried adding header "Connection:keep-alive". But nothing solves my problem.
I am not sure what could be the problem here.

Sharp image uploader not working when running app with PM2

I have an express app that I am using Sharp to resize images after a user has uploaded them. When I start the app using 'npm start' I am able to upload the images with no issues, but if I use PM2 to manage the process the images don't get saved on the server. I can save them without using Sharp to resize them, it is only when I have the code for sharp that they don't get saved. Below is the code from my controller. Multer is processing the form and sharp is resizing the image.
doNewRecipe: function(req, res) {
for (var key in req.body) {
req.body[key] = req.body[key] || undefined;
}
var body = _.pick(req.body, 'title', 'description', 'ingredients', 'instructions', 'yield', 'prep_time', 'cook_time', 'categoryId');
body.userId = req.session.user.id;
if (req.file) {
var tempPath = req.file.path,
ext = path.extname(req.file.originalname).toLowerCase(),
//targetPath = path.resolve(finalUploadPath + req.file.filename + ext);
targetPath = path.resolve(finalUploadPath);
fs.renameSync(tempPath, tempPath + ext);
var newFileName = req.file.filename + ext;
var imageFile = tempPath + ext;
body.image = newFileName;
sharp(imageFile)
.resize(450, 450)
.max()
.toFile('./public/finalUpload/' + newFileName, function(err, info) {
body.image = newFileName;
fs.unlinkSync(path.resolve(tempPath + ext));
db.recipe.create(body).then(function(recipe) {
res.redirect('/recipe/view/' + recipe.id);
}, function(e) {
console.log(e.message);
res.render('error', {message: e.toString()});
});
});
//fs.renameSync(tempPath, targetPath);
} else {
db.recipe.create(body).then(function(recipe) {
res.redirect('/recipe/view/' + recipe.id);
}, function(e) {
console.log(e.message);
res.render('error', {message: e.toString()});
});
}
},
If you use path to determine where to save the file it will transport easily from one system to another. I actually created a few variables to determine where to save the file. Below is my code.
if (process.env.NODE_ENV === 'production') {
var finalUploadPath = 'hidden';
var googleTracking = true;
} else if (process.env.NODE_ENV === 'staging') {
var finalUploadPath = 'hidden';
var googleTracking = false;
} else {
var finalUploadPath = 'hidden';
var googleTracking = false;
}
sharp(imageFile)
.resize(450, 450)
.max()
.toFile(finalUploadPath + req.file.filename + '.jpg', function(err, info) {
body.image = req.file.filename + '.jpg';
fs.unlinkSync(path.resolve(tempPath + ext));
compressAndResize(path.resolve(__dirname, '../public/finalUpload/' + body.image));
db.recipe.create(body).then(function(recipe) {
req.flash('success', 'Recipe created');
res.redirect('/recipe/view/' + recipe.id + '/' + recipe.slug);
}, function(e) {
console.log(e.message);
req.flash('error', 'Error creating new recipe');
res.render('error', {message: e.toString(), csrfToken: req.csrfToken(), google: googleTracking});
});
});

Parse multiple pages with phantomjs

I have made a code that parses all URL-s from a page. Next, I would like to get a href from every parsed URL <div class="holder"></div> and output it to a file and sepparate with a comma.
So far I have made this code. It is able to find all the URL-s need to be parsed and collects them to a comma sepparated file called output2.txt.
var resourceWait = 300,
maxRenderWait = 10000,
url = 'URL TO PARSE HREF-s FROM';
var page = require('webpage').create(),
count = 0,
forcedRenderTimeout,
renderTimeout;
page.viewportSize = { width: 1280, height : 1024 };
function doRender() {
var fs = require('fs');
var path = 'output2.txt';
page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
fs.write(path,page.evaluate(function() {
return $('.urlDIV').find('a')
.map(function() {
return this.href;})
.get()
.join(',');
}), 'w');
phantom.exit()
});
}
page.onResourceRequested = function (req) {
count += 1;
clearTimeout(renderTimeout);
};
page.onResourceReceived = function (res) {
if (!res.stage || res.stage === 'end') {
count -= 1;
if (count === 0) {
renderTimeout = setTimeout(doRender, resourceWait);
}
}
};
page.open(url, function (status) {
if (status !== "success") {
phantom.exit();
} else {
forcedRenderTimeout = setTimeout(function () {
console.log(count);
doRender();
}, maxRenderWait);
}
});
Thanks in advance,
Martti

Can I get the failed request id when PhantomJS get resource error?

I don't quite understand why PhantomJS only returns the url of request for onResourceError callback, while for the other two resource callbacks it returns the request id. That makes "finding which request did actually fail" really impossible if there is more than one request to the same url. Does anyone knows how to get the failed request id?
Actually, that's just old documentation. onResourceError has the id of a failed request.
page.onResourceError = function(resourceError) {
console.log('Unable to load resource (request ID:' + resourceError.id + 'URL:' + resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
};
Why do you really need to request id ?
Since onResourceError was added in 1.9, some information may be missing.
A way to resolve your problem is to keep in an array all requested resourcessuach as in netsniff example.
Here is a very basic implementation :
var page = require('webpage').create(),
system = require('system');
if (system.args.length === 1) {
console.log('Usage: netsniff.js <some URL>');
phantom.exit(1);
} else {
page.address = system.args[1];
page.resources = [];
page.onLoadStarted = function () {
page.startTime = new Date();
};
page.onResourceRequested = function (req) {
page.resources[req.id] = {
request: req,
startReply: null,
endReply: null
};
};
page.onResourceReceived = function (res) {
if (res.stage === 'start') {
page.resources[res.id].startReply = res;
}
if (res.stage === 'end') {
page.resources[res.id].endReply = res;
}
};
page.onResourceError = function(resourceError) {
console.log('Unable to load resource (URL:' + resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: ' + resourceError.errorString);
page.resources.forEach(function (resource) {
var request = resource.request,
endReply = resource.endReply;
if (request && request.url === resourceError.url && !endReply) {
console.log('request id was :' + request.id);
}
})
};
page.open(page.address, function (status) {
var har;
if (status !== 'success') {
console.log('FAIL to load the address');
phantom.exit(1);
} else {
page.endTime = new Date();
page.title = page.evaluate(function () {
return document.title;
});
console.log(page.title);
phantom.exit();
}
});
}
onResourceError, you just need to find first/last/all recorded urls that match the error resource url.

Browser loading php script after submitting form using jQuery Form plugin

I'm trying to implement a form using the jQuery Form plugin. The form has three text fields and a file input and I am validating the form in the beforeSend callback. The problem is, whether the validation passes or not, the php script that handles the file upload gets loaded in the browser, which, obviously is not what I want to happen - I need to stay on the form's page.
You can take a look at the form and it's dependent files at http://www.eventidewebdesign.com/public/testUpload/. Indexing is on for that directory, so you can take a look at all of the related files. The form itself is on testUpload.php.
I'd appreciate it if someone could take a look at my code and help me figure out what's going on here.
Please write the following script instead of your, this will work.
<script>
$(document).ready( function() {
// Initialize and populate the datepicker
$('#sermonDate').datepicker();
var currentDate = new Date();
$("#sermonDate").datepicker('setDate',currentDate);
$("#sermonDate").datepicker('option',{ dateFormat: 'mm/dd/yy' });
/*
* Upload
*/
// Reset validation and progress elements
var formValid = true,
percentVal = '0%';
$('#uploadedFile, #sermonTitle, #speakerName, #sermonDate').removeClass('error');
$('#status, #required').empty().removeClass();
$('.statusBar').width(percentVal)
$('.percent').html(percentVal);
$('#frmSermonUpload').ajaxForm({
beforeSend: function() {
if (!ValidateUploadForm()) {
formValid = false;
console.log('validateuploadform returned false');
} else {
console.log('validateuploadform returned true');
formValid = true;
}
console.log('in beforeSend. formValid: ' + formValid);
if (!formValid) {
$('#uploadedFile').val('');
return false;
}
},
uploadProgress: function(event, position, total, percentComplete) {
console.log('in uploadProgress function. formValid: ' + formValid);
if (formValid) {
var percentVal = percentComplete + '%';
$('.statusBar').width(percentVal)
$('.percent').html(percentVal);
}
},
complete: function(xhr) {
console.log('in complete function. formValid: ' + formValid);
if (formValid) {
console.log('xhr.responseText: ' + xhr.responseText);
console.log('formValid: ' + formValid);
if (xhr.responseText === 'success') {
$('.statusBar').width('100%');
$('.percent').html('100%');
$('#status').html('Successfully uploaded the sermon.').addClass('successUpload');
// Clear the form
ClearForm();
} else if (xhr.responseText === 'fail') {
$('#status').html('There was a problem uploading the file. Try again.<br>If the problem persists, contact your system administrator.').addClass('errorUpload');
}
}
}
}); // End Upload Status Bar
});
function ValidateUploadForm() {
// Reset errors and clear message
$('#uploadedFile, #sermonTitle, #speakerName, #sermonDate').removeClass('error');
$('#required').empty();
var result = true;
title = $('#sermonTitle').val(),
speaker = $('#speakerName').val(),
date = $('#sermonDate').val(),
fileName = $('#uploadedFile').val();
extension = $('#uploadedFile').val().split('.').pop().toLowerCase();
//if (fileName !== '' && extension !== 'mp3') {
if ((fileName === '') || (extension !== 'mp3')) {
$('#uploadedFile').addClass('error');
$('#required').html('Only mp3 files are allowed!');
return false;
} else if (fileName === '') {
result = false;
} else if (title === '') {
$('#sermonTitle').addClass('error');
result = false;
} else if (speaker === '') {
$('#speakerName').addClass('error');
result = false;
} else if (date === '') {
$('#sermonDate').addClass('error');
result = false;
}
console.log('returning ' + result + ' from the validateuploadform function');
if (!result) { $('#required').html('All fields are required.'); }
return result;
}
function ClearForm() {
$('#uploadedFile, #sermonTitle, #sermonDate, #speakerName').val('').removeClass();
}
</script>