I can't render this page with splash - scrapy-splash

I have tried all the suggestions in splash documentation:
This is the link: 'https://app.microacquire.com/startup/az1Hi7Znn9PpVc7I6EsgLbgQv4A3/uckABGneMbqRx0BtI866'
This is the script (I even add the same headers as in a normal request:
function main(splash, args)
splash.indexeddb_enabled = true
splash.plugins_enabled = true
splash.private_mode_enabled = false
["Connection"] = "keep-alive",
['authority']= 'app.microacquire.com',
['accept-language']= 'es-ES,es;q=0.9,en;q=0.8',
['cache-control']= 'max-age=0',
['if-modified-since']= 'Thu, 14 Jul 2022 16:35:49 GMT',
['if-none-match']= '"1d1d916cd820c5eb54078f6fe3134fb2187ea0a14f9117866436828d1e447fd1-br"',
['sec-ch-ua']= '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
['sec-ch-ua-mobile']= '?0',
['sec-ch-ua-platform']= 'Windows',
['sec-fetch-dest']= 'document',
['sec-fetch-mode']= 'navigate',
['sec-fetch-site']= 'none',
['sec-fetch-user']= '?1',
['upgrade-insecure-requests']= '1',
['user-agent']= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ Safari/537.36'
return {
html = splash:html(),
png = splash:png(),
har = splash:har(),
it renders only this output, it doesnt load completely:
splash image


BeautifulSoup: How to get only one part of <p> output?

I am trying to find the solution how to get only the price without text from the paragraph.
from bs4 import BeautifulSoup
import requests
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"}
p = requests.get(url = 'https://www.tia-mobiteli.hr/detaljan-prikaz.aspx?gid=11-appise_64wheu', headers = headers)
soup = BeautifulSoup(p.content,'lxml')
price = soup.find('div', class_='widget widget-info widget-price').p.text
price2 = price.strip()
My output is:
Naša najniža cijena za gotovinsko/virmansko plaćanje: 3.649,00 kn
I want to get only:
3.649,00 kn
Or if it is possible:
The price is inside <b> tag:
import requests
from bs4 import BeautifulSoup
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36"
p = requests.get(
soup = BeautifulSoup(p.content, "lxml")
price = soup.find("div", class_="widget widget-info widget-price").b.text
price = float(price.split()[0].replace(".", "").replace(",", "."))
You can use parse module that acts like reverse format().
import parse
float(parse.parse('Naša najniža cijena za gotovinsko/virmansko plaćanje: {} kn',price2)[0].replace('.','').replace(',','.'))

How to change the header just for a specific request in scrapy spider?

I am trying to build a web crawler using scrapy. I want to change useragent for a single request in the spider. I tried the below code but the user agent is not being updated during the crawl process.
def start_requests(self):
request = Request(
meta={'xpaths': self.xpaths},
"User-Agent": "Googlebot-Image/1.0"
return [request]
Your code works perfectly (see my code). But some middleware on your side may affect your User-Agent header:
class UserAgentSpider(scrapy.Spider):
name = 'useragent_spider'
user_agents = [
{'title': 'Galaxy S9', 'value': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36'},
{'title': 'iPhone', 'value': 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/69.0.3497.105 Mobile/15E148 Safari/605.1'},
{'title': 'Edge', 'value': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246'},
def start_requests(self):
for user_agent in self.user_agents:
yield scrapy.Request(
'user-agent': user_agent['value'],
'user_agent': user_agent['title']
def parse(self, response, user_agent):
with open(f"Samples/{user_agent}.htm", 'wb') as f:

Getting a 403 Forbidden when going to a URL with PhantomJS

If I go to the following web page in Chrome, it loads fine: https://www.cruisemapper.com/?poi=39
However, when I run the following PhantomJS script, which simply goes to the same URL and outputs the entire DOM string to the console, I get a 403 Forbidden message:
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.open(url, function (status) {
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;
Here's the exact output to the console:
<title>403 Forbidden</title>
<p>You don't have permission to access /
on this server.<br>
I thought that if I added some sort of user agent string, it might work. As such, I added the following above the console.log line:
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
But that didn't work. So then I tried the following instead:
page.customHeaders = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
But that didn't work either. Does anyone have any advice on how I can possibly hit up the URL above and not get a 403 Forbidden message? Thank you.
Your code works for me fine (I's suggest viewport size emulation though, see code). If you still get a 403, try changing your IP, it's possible that the site is on to you now (you probably visited that page lots of times).
var page = require('webpage').create(),
url = 'https://www.cruisemapper.com/?poi=39';
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36';
page.viewportSize = { width: 1440, height: 900 }; // <-- otherwise it's 400x300 by default
// It's good to watch for errors on the page
page.onError = function (msg, trace)
trace.forEach(function(item) {
console.log(' ', item.file, ':', item.line);
page.open(url, function (status) {
page.render("page.png"); // Also useful to check if you get what you expect
if (status === 'success') {
console.log(page.evaluate(function () {
return document.documentElement.outerHTML;

Mink + PhantomJS: How do I set the user agent?

I am using phantomjs with mink:
goutte: ~
browser: phantomjs
wd_host: http://localhost:8643/wd/hub
webStorageEnabled: true
But I need to masquerade as the latest chrome. I have tried this:
* #BeforeStep
public function masqueradeAsLatestChrome(StepEvent $event)
$this->getSession()->setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36');
But I get the exception:
Exception has been thrown in "beforeStep" hook, defined in FeatureContext::masqueradeAsLatestChrome()
Request header is not supported by Behat\Mink\Driver\Selenium2Driver
The version of chrome isn't critical but the web application must think its talking to a very recent version of chrome.
Selenium does not provide this capability, as it is not something a
user can do. It's recommended you use a proxy to inject additional
headers to the requests generated by the browser.
Sadly… However, the PhantomJS does provide an interface for setting the headers. Your best shot would be to send a direct command to it using it's REST API. There's also a cool PHP wrap library that would make it 200 times easier.
You should use the new Behat/Mink driver made by Juan Francisco Calderón Zumba
Here is a direct link to the driver https://github.com/jcalderonzumba/MinkPhantomJSDriver
This driver allows you to specify the request headers that you need
(It works with Behat 3.0 but I think it requires at least PHP 5.4)
you can pass additional settings in selenium2driver via
so in your case:
goutte: ~
browser: phantomjs
wd_host: http://localhost:8643/wd/hub
webStorageEnabled: true
phantomjs.page.settings.userAgent: "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36"
For https://github.com/facebook/php-webdriver you can use this case:
$capabilities = [
WebDriverCapabilityType::BROWSER_NAME => 'phantomjs',
WebDriverCapabilityType::PLATFORM => 'ANY',
WebDriverCapabilityType::ACCEPT_SSL_CERTS => false,
WebDriverCapabilityType::JAVASCRIPT_ENABLED => true,
'phantomjs.page.settings.userAgent' => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246",

CasperJS fetchText() function echoing blank output

I'm trying to use fetchText() to print out the URL of a google search result to the terminal. Here's the image of what exactly I'm trying to print.
It's only prints out blank though! I don't see anything I'm doing wrong?
phantom.casperPath = "/usr/local/Cellar/casperjs/1.0.3/libexec/";
phantom.injectJs(phantom.casperPath + '/bootstrap.js');
var utils = require('utils');
var casper = require('casper').create();
casper.wait(3000, function () {
this.echo(this.fetchText('#rso > div:nth-child(1) > li:nth-child(1) > div > div > div > div.f.kv._TD > cite'));
Google will change the page depending on the useragent string. So you need to set a string during creation (with example string)
var casper = require("casper").create({
pageSettings: {
userAgent: "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"
or with specific function
casper.userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
Sometimes it is also necessary to set the viewport to something desktop-like, because PhantomJS' default viewport is 400x300 and Google might render a different site based on the viewport.