CasperJS looking for 404 error on links site

CasperJS looking for 404 error on links site - testing

I'm beginner programmer. I found nice script
http://planzero.org/blog/2013/03/07/spidering_the_web_with_casperjs
I tried to rewrite this script with CasperJS test framework.
I would to get xunit report from this code
var startUrl = 'http://yoursite.foo';
var visitedUrls = [], pendingUrls = [];
var casper = require('casper').create({
pageSettings: {
loadImages: false,
loadPlugins: false
}});
var utils = require('utils')
var helpers = require('helpers')
// Spider from the given URL
casper.test.begin('href' , function(test) {
casper.start(startUrl, function() {
function spider(url) {
// Add the URL to the visited stack
visitedUrls.push(url);
// Open the URL
casper.open(url).then(function() {
test.assertHttpStatus(200, ":" + url);
// Find links present on this page
var links = this.evaluate(function() {
var links = [];
Array.prototype.forEach.call(__utils__.findAll('a'), function(e) {
links.push(e.getAttribute('href'));
});
return links;
});
// Add newly found URLs to the stack
var baseUrl = this.getGlobal('location').origin;
Array.prototype.forEach.call(links, function(link) {
var newUrl = helpers.absoluteUri(baseUrl, link);
if (pendingUrls.indexOf(newUrl) == -1 && visitedUrls.indexOf(newUrl) == -1 && !(link.search(startUrl) == -1)) {
pendingUrls.push(newUrl);
}
});
// If there are URLs to be processed
if (pendingUrls.length > 0) {
var nextUrl = pendingUrls.shift();
spider(nextUrl);
}
else {
console.log('links ended');
this.break;
}
});
}
spider(startUrl);
}).run(function(){
test.done();
});
});
Script is running but when he and Job I can't get report.

If you're trying to learn how to use CasperJS you need to start with a smaller example than that. That script is a mess which goes after a site named yoursite.foo (maybe you put that name in there?)
I would take small steps. I have a video which may help explain how to use CasperJS.
http://www.youtube.com/watch?v=Kefil5tCL9o

Related

How can i scrape images from website

Hie I am trying to scrape images from website. I all seems easy when i try to scrape image with src attribute. But when using srcset throws an error undefined and doesnt work ???
I tried it by
var page = require('webpage').create();
page.onLoadFinished = function(){
var urls = page.evaluate(function(){
var image_urls = new Array;
var images = document.getElementsByTagName("img");
for(q = 0; q < images.length; q++){
images[q].srcset = images[q].src;
image_urls.push(images[q].src);
}
return image_urls;
});
console.log(urls.length);
console.log(urls[0]);
phantom.exit();
}
page.open('https://www.example.com/Food/Pears/Anjou-Pears');
All i want to do is that i can input url and it extracts and downloads image from url.
Upadate: I also tried following code to get img url and got me that phantom js crashed i.e"PhantomJS has crashed. Please read the bug reporting guide at
http://phantomjs.org/bug-reporting.html and file a bug report.
Segmentation fault: 11".
var url = "https://www.example.com/Food/Fruits/Pears/Anjou-Pears/p/20174514001_KG";
var page = require( 'webpage' ).create();
page.open(url, function( status ) {
if ( status === 'success' ) {
page.includeJs('https://ajax.googleapis.com/ajax/libs/jquery/3.2.1/jquery.min.js', function() {
var link = page.evaluate(function() {
if($( 'img' ).length != 0)
{
return $( 'img' ).attr('srcset');
}
});
console.log( link );
phantom.exit();
});
} else {
console.log( 'FAIL' );
}
});

Looping throw links using PhantomJS

There is a problem. I have some urls. And there are a list of links on this urls, which I want to visit. Each of this links. There is no problem with looping throw urls, but problems with this links. Here is my code...
var urls = [];
var TEMPLATE = 'https://example.com/page/'
for (var i = 1; i > 0; i--) {
urls.push(TEMPLATE + i);
}
var page = require('webpage').create();
//Here is looping throw urls
function process(){
if (urls.length == 0){
phantom.exit();
} else{
url = urls.pop();
page = require('webpage').create();
page.open(url, onFinishedLoading);
}
}
function onFinishedLoading(status){
var links = page.evaluate(function() {
var arr = [];
//Here we are grab links inside urls
$('some.selector').each(function() {
arr.push( $('a', $(this)).attr("href"))
});
return arr;
});
//And this is just my tries to visit this links
link = links.pop();
//Just fine. Get the link
console.log(link);
parse(link);
setTimeout(function parse(link) {
page.open(link, function(status) {
var parsing = page.evaluate(function() {
return link + status;
});
//Don't work :(
console.log(parsing);
});
}, 1500);
page.release();
process();
// return links;
}
process();
Sorry for my silly question, i little know at phatom and JS.
Wish you can help me

Logs for Protractor

I am new to protractor and want to create logs for my test cases. I used if and else to write logs. I wanted to know if there is any better way of writing logs for protractor test cases?
My Code:
var colors = require('colors/safe');
var mapFeedBackpage=require('./mapFeedBack-page.js')
describe("Map feedback Automation",function()
{
var mapFeedBack= new mapFeedBackpage();
it("Check if the Url works ",function() //spec1
{
console.log("Checking the url :"+browser.params.url+'\n')
browser.get(browser.params.url);
browser.getCurrentUrl().then(function(value){
if(/report/.test(value) === false) {
fail("Result: URL doesnt works-FAIL \n");
}
else
{
console.log(colors.green("PASS :")+browser.params.url+ "is reachable \n");
}
});
});
it("test browser should reach report road option",function() //spec2s
{
console.log("Checking if road report option is available \n");
mapFeedBack.REPORT_ROAD.click();
expect(browser.getCurrentUrl()).toContain("report_road");
browser.getCurrentUrl().then(function(value){
if(/report_road/.test(value) === false) {
fail("Result: URL doesnt works-FAIL");
}
else
{
console.log(colors.green("PASS")+" Road report option is available");
}
});
});

Yes, you can use https://www.npmjs.com/package/log4js which is basically log4j module for nodejs apps. Since protractor is nodejs program it would certainly support this. It's very easy to implement this-
var log4js = require('log4js');
var logger = log4js.getLogger();
logger.debug("Some debug messages");
or you could write a custom logger:
var logger = exports;
logger.debugLevel = 'warn';
logger.log = function(level, message) {
var levels = ['error', 'warn', 'info'];
if (levels.indexOf(level) >= levels.indexOf(logger.debugLevel) ) {
if (typeof message !== 'string') {
message = JSON.stringify(message);
};
console.log(level+': '+message);
}
}
and then use this in your scripts as :
var logger = require('./logger');
logger.debugLevel = 'warn';
logger.log('info', 'Everything started properly.');
logger.log('warn', 'Running out of memory...');
logger.log('error', { error: 'flagrant'});

rtcmulticonnection: webrtc recordrtc not returning a blob

I am trying to simply record the webrtc video using what I though was a standard example. The library is here: https://github.com/muaz-khan/RTCMultiConnection
rtcMultiConnection.onstream = function(e) {
var mediaElement = getMediaElement(e.mediaElement, {
onRecordingStarted: function(type) {
// www.RTCMultiConnection.org/docs/startRecording/
rtcMultiConnection.streams[e.streamid].startRecording();
},
onRecordingStopped: function(type) {
// www.RTCMultiConnection.org/docs/stopRecording/
rtcMultiConnection.streams[e.streamid].stopRecording(function(blob){
console.log("test");
console.log(blob);
});
}});}
I can follow the steps through the function calls, the issue is that the callback is never run from recordrtc.js....
It goes to line 100 of https://github.com/muaz-khan/RecordRTC/blob/master/RecordRTC.js
There it runs:
mediaRecorder.stop(_callback);
Which never calls the callback....
Even calling functions directly doesn't work:
console.log(rtcMultiConnection.streams[e.streamid].audioRecorder.getBlob());
console.log(rtcMultiConnection.streams[e.streamid].videoRecorder.save("a.png"));
I am wondering if two different versions of recordrtc and rtcmulticonneciton are interacting.... Any ideas? Maybe an older recordrtc, but I can't find an older version

Please use blob.video:
var stream = connection.streams['stream-id'];
stream.stopRecording(function(blob) {
var h2;
if (blob.audio) {
h2 = document.createElement('h2');
h2.innerHTML = 'Open recorded ' + blob.audio.type + '';
div.appendChild(h2);
}
if (blob.video) {
h2 = document.createElement('h2');
h2.innerHTML = 'Open recorded ' + blob.video.type + '';
div.appendChild(h2);
}
});
Updated at March 29, 2016
Here is the actual documentation:
http://www.rtcmulticonnection.org/docs/startRecording/
Please make sure that:
You are using v2.2.2
You called startRecording first
For v3, you can directly use the RecordRTC:
connection.onstream = function(event) {
recordStream(event.stream);
};
function recordStream(stream) {
if (!!window.recorder) return;
window.recorder = RecordRTC(stream, {
type: 'video'
});
recorder.startRecording();
}
btnStopRecording.onclick = function() {
if (!window.recorder) return;
recorder.stopRecording(function() {
var blob = recorder.blob;
// or dataURL
recorder.getDataURL(func_callback);
});
};
btnStartRecording.onclick = function() {
var stream = connection.attachStreams[0];
recordStream(straem);
// or
var stream = connection.streamEvents['stream-id'].stream;
recordStream(straem);
};
Above snippet can be used within v2.2.2 as well.

Phantomjs Automation of a website leads me to getting IP blocked

I'm using PhantomJS to automate a page. What I do is:
do{
console.log(i);
i++;
page.open(url);
do { phantom.page.sendEvent('mousemove'); } while (page.loading);
if(page.injectJs('./Search.js') == false){
console.log("Search.js Failed")
}
var links = page.evaluate(function(json){
return search(json)
},json)
console.log(links);
} while(links == "")
So this leads me to opening the website repeated until what I'm looking for appears. But this also leads me to getting IP banned. What can I do to get around this?

Your IP is probably getting banned because the script generates too many requests to the website in very little time. So, you need to throttle requests, to apply a pause between them.
I would rewrite your script like this:
var page = require('webpage').create();
var url = "http://www.website.tld/";
var json = {"some" : "json"};
var i = 0;
var links;
// We abstract main code to a function so that we can call it
// again and again from itself
function getlinks (url, json) {
i++;
console.log(i);
page.open(url);
do { phantom.page.sendEvent('mousemove'); } while (page.loading);
if(page.injectJs('./Search.js') == false){
console.log("Search.js Failed")
}
var links = page.evaluate(function(json){
return search(json);
}, json);
if(links == "")
{
// No links scraped yet, so we wait for 3 seconds and try again
setTimeout(function(){
getlinks(url, json);
}, 3000)
}
else
{
console.log(links);
phantom.exit();
}
}
getlinks(url, json);

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

CasperJS looking for 404 error on links site - testing

Related

How can i scrape images from website

Looping throw links using PhantomJS

Logs for Protractor

rtcmulticonnection: webrtc recordrtc not returning a blob

Phantomjs Automation of a website leads me to getting IP blocked

Categories

Resources