Make Scrapy send POST data from Javascript function - scrapy

I'm playing with Scrapy and playing with this tutorial. Things look good but I noticed Steam changed their age check so there is no longer a form in DOM. So the suggested solution will not work:
form = response.css('#agegate_box form')
action = form.xpath('#action').extract_first()
name = form.xpath('input/#name').extract_first()
value = form.xpath('input/#value').extract_first()
formdata = {
name: value,
'ageDay': '1',
'ageMonth': '1',
'ageYear': '1955'
}
yield FormRequest(
url=action,
method='POST',
formdata=formdata,
callback=self.parse_product
)
Checking an example game that forces age check; I noticed the View Page button is no longer a form:
<a class="btnv6_blue_hoverfade btn_medium" href="#" onclick="ViewProductPage()"><span>View Page</span></a>
And the function being called will eventually call this one:
function CheckAgeGateSubmit( callbackFunc )
{
if ( $J('#ageYear').val() == 2019 )
{
ShowAlertDialog( '', 'Please enter a valid date' );
return false;
}
$J.post(
'https://store.steampowered.com/agecheckset/' + "app" + '/9200/',
{
sessionid: g_sessionID,
ageDay: $J('#ageDay').val(),
ageMonth: $J('#ageMonth').val(),
ageYear: $J('#ageYear').val()
}
).done( function( response ) {
switch ( response.success )
{
case 1:
callbackFunc();
break;
case 24:
top.location.reload();
break;
case 15:
case 2:
ShowAlertDialog( 'Error', 'There was a problem verifying your age. Please try again later.' );
break;
}
} );
}
So basically this is making a POST with some data...what would be the best way to do this in Scrapy, since this is not a form any longer? I'm just thinking on ignoring the code where the form is obtained and simply send the request with the FormRequest object...but is this the way to go? An alternative could also be setting cookies for age and pass it on every single request so possibly the age check is ignored altogether?
Thanks!

You should probably just set an appropriate cookie and you'll be let right through!
If you take a look at what your browser has when entering the page:
and replicate that in scrapy:
cookies = {
'wants_mature_content':'1',
'birthtime':'189302401',
'lastagecheckage': '1-January-1976',
}
url = 'https://store.steampowered.com/app/9200/RAGE/'
Request(url, cookies)
lastagecheckage should probably be enough on it's own but I haven't tested it.

Related

Community Connector getData() Request only uses the first two schema fields, not all four

I am building a Community Connector between Google Data Studio and SpyFu.com, in order to funnel SEO information for a specific url into the GDS Dashboard.
However, My getData() request only contains the first two fields from my Schema. As you can see, I have four listed in the code. The result is only the first two fields in the schema are printed to GDS.
I've been through tutorials, official documentation, YouTube videos, looked this issue up on google and checked out the community resources on GitHub.
//Step Two: Define getConfig()
function getConfig(request) {
var cc = DataStudioApp.createCommunityConnector();
var config = cc.getConfig();
config.newInfo()
.setId('instructions')
.setText('Give me SpyFu information on the following domain:');
config.newTextInput()
.setId('domain')
.setName('Enter the domain to search')
.setHelpText('e.g. ebay.com')
.setPlaceholder('ebay.com');
config.newTextInput()
.setId('SECRET_KEY')
.setName('Enter your API Secret Key')
.setHelpText('e.g. A1B2C3D4')
.setPlaceholder('A1B2C3D4');
config.setDateRangeRequired(false);
return config.build();
}
//Step Three: Define getSchema()
function getFields(request) {
var cc = DataStudioApp.createCommunityConnector();
var fields = cc.getFields();
var types = cc.FieldType;
var aggregations = cc.AggregationType;
fields.newDimension()
.setId('Keyword')
.setName('Keywords')
.setDescription('The keywords most often attributed to this domain.')
.setType(types.TEXT);
fields.newMetric()
.setId('Rank')
.setName('Rankings')
.setDescription('The ranking of the target site keyword on the Google Search Page.')
.setType(types.NUMBER);
fields.newMetric()
.setId('Local_Monthly_Searches')
.setName('Local Searches per Month')
.setDescription('Number of times, locally, that people have searched for this term within in the last month.')
.setType(types.NUMBER);
fields.newMetric()
.setId('Global_Monthly_Searches')
.setName('Global Searches per Month')
.setDescription('Number of times, globally, that people have searched for this term within in the last month.')
.setType(types.NUMBER);
return fields;
}
function getSchema(request) {
var fields = getFields(request).build();
return { schema: fields };
}
//Step Four: Define getData()
function responseToRows(requestedFields, response, domain) {
// Transform parsed data and filter for requested fields
return response.map(function(Array) {
var row = [];
requestedFields.asArray().forEach(function (field) {
switch (field.getId()) {
case 'Keyword':
return row.push(Array.term);
case 'Rank':
return row.push(Array.position);
case 'Local_Monthly_Searches':
return row.push(Array.exact_local_monthly_search_volume);
case 'Global_Monthly_Searches':
return row.push(Array.exact_global_monthly_search_volume);
case 'domain':
return row.push(domain);
default:
return row.push('');
}
});
return { values: row };
});
}
function getData(request) {
console.log("Request from Data Studio");
console.log(request);
var requestedFieldIds = request.fields.map(function(field) {
return field.name;
});
var requestedFields = getFields().forIds(requestedFieldIds);
// Fetch data from API
var url = [
'https://www.spyfu.com/apis/url_api/organic_kws?q='
+ request.configParams.domain
+ '&r=20'
+ '&p=[1 TO 10]'
+ '&api_key='
+ request.configParams.SECRET_KEY,
];
try {
var response = UrlFetchApp.fetch(url.join(''));
} catch (e) {
DataStudioApp.createCommunityConnector()
.newUserError()
.setDebugText('Failed URL Fetch Attempt. Exception details: ' + e)
.setText('There was an error accessing this domain. Try again later, or file an issue if this error persists.')
.throwException();
}
console.log("Response from API");
console.log(response);
//Parse data from the API
try {
var parsedResponse = JSON.parse(response);
} catch (e) {
DataStudioApp.createCommunityConnector()
.newUserError()
.setDebugText('Error parsing the JSON data. Exception details: ' + e)
.setText('There was an error parsing the JSON data. Try again later, or file an issue if this error persists.')
.throwException();
}
var rows = responseToRows(requestedFields, parsedResponse);
return {
schema: requestedFields.build(),
rows: rows
};
}
I need the GDS to post four columns of data. They are, "Keyword", "Rank", "Local Monthly Searches" and "Global Monthly searches".
I cannot figure out how to create a "fixed schema" so that the system always prints these four columns of data at every request. The tutorials and various documentation say it's possible, but not how to do it. Please help!
The number of metrics initially called up by the Google Community Connector is handled from the front-end, via Google Data Studio.
The back-end system (the Connector) only initially posts the default dimension and default metric. Getting the rest of the schemas to post should be handled when you are building a report on Google Data Studio. Simply click on the data set, select "data" on the right-hand menu, scroll down to either Metrics or Dimensions, and pick the ones you wish to add to the current set.
Note that these are the fields you established earlier in the coding process, when you were setting up your schemas.
Here, you're filtering your defined schema for fields that are present on the request object received by getData().
var requestedFieldIds = request.fields.map(function(field) {
return field.name;
});
var requestedFields = getFields().forIds(requestedFieldIds);
The visualization in Google Data Studio that is the catalyst for the request will determine which fields are requested.

SQLite database isn't holding multiple values under primary key in my Discord.js bot

To start off, here's my main bot.js code (Sorry if it seems like code spaghetti. I'm not the best with javascript):
const fs = require('fs');
const Discord = require('discord.js');
const client = new Discord.Client();
const auth = require('./auth.json');
const pack = require('./package.json');
const SQLite = require('better-sqlite3');
const sql = new SQLite('./scores.sqlite');
client.commands = new Discord.Collection();
const commandFiles = fs.readdirSync('./commands').filter(file => file.endsWith('.js'));
for (const file of commandFiles) {
const command = require(`./commands/${file}`);
client.commands.set(command.name, command);
}
client.on('ready', () => {
console.log(` Bot: ${client.user.tag}
Version: ${pack.version}
Logged in and clear for takeoff.`);
});
client.on("ready", () => {
const battle_leaderboard = sql.prepare("SELECT count(*) FROM sqlite_master WHERE type='table' AND name = 'scores';").get();
if (!battle_leaderboard['count(*)']) {
sql.prepare("CREATE TABLE scores (id TEXT PRIMARY KEY, user TEXT, guild TEXT, points INTEGER;").run();
sql.prepare("CREATE UNIQUE INDEX idx_scores_id ON scores (id);").run();
sql.pragma("synchronous = 1");
sql.pragma("journal_mode = wal");
}
client.getScore = sql.prepare("SELECT * FROM scores WHERE user = ? AND guild = ?");
client.setScore = sql.prepare("INSERT OR REPLACE INTO scores (id, user, guild, points) VALUES (#id, #user, #guild, #points);");
client.removeScore = sql.prepare("DELETE FROM scores WHERE user=?");
});
client.on('message', message => {
if (!message.content.startsWith(auth.prefix) || message.author.bot) return;
const args = message.content.slice(auth.prefix.length).split(/ +/);
const command = args.shift().toLowerCase();
switch (command) {
case 'ping':
client.commands.get('ping').execute(message, args);
break;
case 'mimic':
client.commands.get('mimic').execute(message, args);
break;
case 'add_role':
client.commands.get('add_role').execute(message, args);
break;
case 'remove_role':
client.commands.get('remove_role').execute(message, args);
break;
case 'bl_add_point':
if (message.member.roles.some(r=>["Fight Officiator"])) {
let score;
let member = message.mentions.members.first() || client.members.get(args[0]);
score = client.getScore.get(member.id, member.guild.id);
if (!score) {
score = { id: `${member.guild.id}-${member.id}`, user: member.username, guild: member.guild.id, points: 0}
}
score.points ++;
client.setScore.run(score);
message.channel.send(`Looks like ${member} got a win! Good one.`);
} else {
message.channel.send(`Sorry, this is only usable by GFOs.`);
};
break;
case '!bl_delete':
if (message.member.roles.some(r=>["Fight Officiator"])) {
let member = message.mentions.users.first();
client.removeScore.run(`${member.guild.id}-${member.id}`)
message.channel.send(`${member} has been set to zero on the leaderboard!`);
} else {
message.channel.send(`Sorry, this is only usable by GFOs.`);
};
break;
case 'bl':
const top5 = sql.prepare("SELECT * FROM scores WHERE guild = ? ORDER BY points DESC LIMIT 5;").all(message.guild.id);
const battleleaderboard = new Discord.RichEmbed()
.setColor('#FFD700')
.setTitle('The Battle Leaderboard - Current Rankings')
.setAuthor(`RankBot`)
.setDescription('The top fighters are displayed here.')
.setTimestamp()
.setFooter(`DM a Gang Fight Officiator to set up a spar with another member and possibly get your name registered onto RankBot's leaderboard!`);
for (const data of top5) {
battleleaderboard.addField(client.users.get(data.user), `${data.points} Win(s)`);
}
message.channel.send(battleleaderboard);
break;
};
});
client.login(auth.token);
I'm creating a bot that should be pretty simple:
The bot is a RankBot (at least that's what i dubbed it as), which is supposed to track the amount of wins in battles (since this is for a discord about a fighting game) using a special role that manually increments those wins with a command, then being able to pull up a leaderboard embed showing the people with the most wins. I'm testing it in a discord separate from the one it's for with a friend, and it seemingly worked until we commanded the bot to open the leaderboard. Nothing was shown. We thought it was strange, since it worked before when only one user was in the database, and I went to the PowerShell Command Line to see if there was an error message, and this showed up.
C:\Users\USER\Documents\OF_RankBot\bot.js:89
battleleaderboard.addField(client.users.get(data.user).tag || client.members.get(data.user).tag, `${data.points} Win(s)`);
^
TypeError: Cannot read property 'tag' of undefined
We were confused by what happened, so I removed the tag part of the command and used it again. The embed showed up this time, but it looked like this.
I assumed that what went wrong was that since they both had the same number of wins listed, the part of the bl command that sorts the users was confused. I then went and used bl_add_point on me and tried again. The near exact same embed was shown.
My guess as to what went wrong was that I messed up at some point when setting up the bl_add_point command, but I'm not sure how to fix the issue.
This isn't part of the main problem, but I can't seem to get bl_delete working either. No errors are shown when I use it. In fact, seemingly nothing happens. I'm not sure if these issues are connected, but I really only need the main issue fixed.
Thanks in advance.
edit: I found out partially what happened with the bl_delete issue, which was I simply put the prefix into the case when the prefix was already defined. Now when I put in the command, the PowerShell responds with:
C:\Users\USER\Documents\OF_RankBot\bot.js:72
client.removeScore.run(`${member.guild.id}-${member.id}`)
^
TypeError: Cannot read property 'id' of undefined

Paypal Php Sdk - NotifyUrl is not a fully qualified URL Error

I have this code
$product_info = array();
if(isset($cms['site']['url_data']['product_id'])){
$product_info = $cms['class']['product']->get($cms['site']['url_data']['product_id']);
}
if(!isset($product_info['id'])){
/*
echo 'No product info.';
exit();
*/
header_url(SITE_URL.'?subpage=user_subscription#xl_xr_page_my%20account');
}
$fee = $product_info['yearly_price_end'] / 100 * $product_info['fee'];
$yearly_price_end = $product_info['yearly_price_end'] + $fee;
$fee = ($product_info['setup_price_end'] / 100) * $product_info['fee'];
$setup_price_end = $product_info['setup_price_end'] + $fee;
if(isset($_SESSION['discountcode_amount'])){
$setup_price_end = $setup_price_end - $_SESSION['discountcode_amount'];
unset($_SESSION['discountcode_amount']);
}
$error = false;
$plan_id = '';
$approvalUrl = '';
$ReturnUrl = SITE_URL.'payment/?payment_type=paypal&payment_page=process_agreement';
$CancelUrl = SITE_URL.'payment/?payment_type=paypal&payment_page=cancel_agreement';
$now = $cms['date'];
$now->modify('+5 minutes');
$apiContext = new \PayPal\Rest\ApiContext(
new \PayPal\Auth\OAuthTokenCredential(
$cms['options']['plugin_paypal_clientid'], // ClientID
$cms['options']['plugin_paypal_clientsecret'] // ClientSecret
)
);
use PayPal\Api\ChargeModel;
use PayPal\Api\Currency;
use PayPal\Api\MerchantPreferences;
use PayPal\Api\PaymentDefinition;
use PayPal\Api\Plan;
use PayPal\Api\Patch;
use PayPal\Api\PatchRequest;
use PayPal\Common\PayPalModel;
use PayPal\Api\Agreement;
use PayPal\Api\Payer;
use PayPal\Api\ShippingAddress;
// Create a new instance of Plan object
$plan = new Plan();
// # Basic Information
// Fill up the basic information that is required for the plan
$plan->setName($product_info['name'])
->setDescription($product_info['desc_text'])
->setType('fixed');
// # Payment definitions for this billing plan.
$paymentDefinition = new PaymentDefinition();
// The possible values for such setters are mentioned in the setter method documentation.
// Just open the class file. e.g. lib/PayPal/Api/PaymentDefinition.php and look for setFrequency method.
// You should be able to see the acceptable values in the comments.
$setFrequency = 'Year';
//$setFrequency = 'Day';
$paymentDefinition->setName('Regular Payments')
->setType('REGULAR')
->setFrequency($setFrequency)
->setFrequencyInterval("1")
->setCycles("999")
->setAmount(new Currency(array('value' => $yearly_price_end, 'currency' => $cms['session']['client']['currency']['iso_code'])));
// Charge Models
$chargeModel = new ChargeModel();
$chargeModel->setType('SHIPPING')
->setAmount(new Currency(array('value' => 0, 'currency' => $cms['session']['client']['currency']['iso_code'])));
$paymentDefinition->setChargeModels(array($chargeModel));
$merchantPreferences = new MerchantPreferences();
// ReturnURL and CancelURL are not required and used when creating billing agreement with payment_method as "credit_card".
// However, it is generally a good idea to set these values, in case you plan to create billing agreements which accepts "paypal" as payment_method.
// This will keep your plan compatible with both the possible scenarios on how it is being used in agreement.
$merchantPreferences->setReturnUrl($ReturnUrl)
->setCancelUrl($CancelUrl)
->setAutoBillAmount("yes")
->setInitialFailAmountAction("CONTINUE")
->setMaxFailAttempts("0")
->setSetupFee(new Currency(array('value' => $setup_price_end, 'currency' => $cms['session']['client']['currency']['iso_code'])));
$plan->setPaymentDefinitions(array($paymentDefinition));
$plan->setMerchantPreferences($merchantPreferences);
// ### Create Plan
try {
$output = $plan->create($apiContext);
} catch (Exception $ex){
die($ex);
}
echo $output->getId().'<br />';
echo $output.'<br />';
Been working with paypal php sdk for some days now and my code stop working.
So i went back to basic and i am still getting the same damn error.
I am trying to create a plan for subscription but getting the following error:
"NotifyUrl is not a fully qualified URL"
I have no idea how to fix this as i dont use NotfifyUrl in my code?
Could be really nice if anyone had an idea how to fix this problem :)
Thanks
PayPal did a update to their API last night which has caused problem within their SDK.
They are sending back null values in their responses.
I MUST stress the error is not on sending the request to PayPal, but on processing their response.
BUG Report : https://github.com/paypal/PayPal-PHP-SDK/issues/1151
Pull Request : https://github.com/paypal/PayPal-PHP-SDK/pull/1152
Hope this helps, but their current SDK is throwing exceptions.
Use below simple fix.
Replace below function in vendor\paypal\rest-api-sdk-php\lib\PayPal\Api\MerchantPreferences.php
public function setNotifyUrl($notify_url)
{
if(!empty($notify_url)){
UrlValidator::validate($notify_url, "NotifyUrl");
}
$this->notify_url = $notify_url;
return $this;
}
If you get the same error for return_url/cancel_url, add the if condition as above.
Note: This is not a permanent solution, you can use this until getting the update from PayPal.
From the GitHub repo for the PayPal PHP SDK, I see that the error you mentioned is thrown when MerchantPreferences is not given a valid NotifyUrl. I see you're setting the CancelUrl and ReturnUrl, but not the NotifyUrl. You may simply need to set that as well, i.e.:
$NotifyUrl = (some url goes here)
$obj->setNotifyUrl($NotifyUrl);
Reason behind it!
error comes from.
vendor\paypal\rest-api-sdk-php\lib\PayPal\Validation\UrlValidator.php
line.
if (filter_var($url, FILTER_VALIDATE_URL) === false) {
throw new \InvalidArgumentException("$urlName is not a fully qualified URL");
}
FILTER_VALIDATE_URL: according to this php function.
INVALID URL: "http://cat_n.domain.net.in/"; // IT CONTAIN _ UNDERSCORE.
VALID URL: "http://cat-n.domain.net.in/"; it separated with - dash
here you can dump your url.
vendor\paypal\rest-api-sdk-php\lib\PayPal\Validation\UrlValidator.php
public static function validate($url, $urlName = null)
{
var_dump($url);
}
And then check this here: https://www.w3schools.com/PHP/phptryit.asp?filename=tryphp_func_validate_url
you can check here what character will reason for invalid.

Fiddler: Programmatically add word to Query string

Please be kind, I'm new to Fiddler
My purpose:I want to use Fiddler as a Google search filter
Summary:
I'm tired of manually adding "dog" every time I use Google.I do not want the "dog" appearing in my search results.
For example:
//www.google.com/search?q=cat+-dog
//www.google.com/search?q=baseball+-dog
CODE:
dog replaced with -torrent-watch-download
// ==UserScript==
// #name Tamper with Google Results
// #namespace http://superuser.com/users/145045/krowe
// #version 0.1
// #description This just modifies google results to exclude certain things.
// #match http://*.google.com
// #match https://*.google.com
// #copyright 2014+, KRowe
// ==/UserScript==
function GM_main () {
window.onload = function () {
var targ = window.location;
if(targ && targ.href && targ.href.match('https?:\/\/www.google.com/.+#q=.+') && targ.href.search("/+-torrent/+-watch/+-download")==-1) {
targ.href = targ.href +"+-torrent+-watch+-download";
}
};
}
//-- This is a standard-ish utility function:
function addJS_Node(text, s_URL, funcToRun, runOnLoad) {
var D=document, scriptNode = D.createElement('script');
if(runOnLoad) scriptNode.addEventListener("load", runOnLoad, false);
scriptNode.type = "text/javascript";
if(text) scriptNode.textContent = text;
if(s_URL) scriptNode.src = s_URL;
if(funcToRun) scriptNode.textContent = '(' + funcToRun.toString() + ')()';
var targ = D.getElementsByTagName('head')[0] || D.body || D.documentElement;
targ.appendChild(scriptNode);
}
addJS_Node (null, null, GM_main);
At first I was going to go with Tampermonkey userscripts,Because I did not know about Fiddler
==================================================================================
Now,lets focus on Fiddler
Before Request:
I want Fiddler to add text at the end of Google Query string.
Someone suggested me to use
static function OnBeforeRequest(oSession: Session) {
if (oSession.uriContains("targetString")) {
var sText = "Enter a string to append to a URL";
oSession.fullUrl = oSession.fullUrl + sText;
}
}
Before Response:
This is where my problem lies
I totally love the HTML response,Now I just want to scrape/hide the word in the search box without changing the search results.How can it be done? Any Ideas?
http://i.stack.imgur.com/4mUSt.jpg
Can you guys please take the above information and fix the problem for me
Thank you
Basing on goal definition above, I believe you can achieve better results with your own free Google custom search engine service. In particular, because you have control over GCSE fine-tuning results, returned by regular Google search.
Links:
https://www.google.com/cse/all
https://developers.google.com/custom-search/docs/structured_search

Use of SPStatefulLongOperation

Can someone give me an example of the use of SPStatefulLongOperation? It's very poorly documented.
Here's an example of code I've just used. It applies a ThmxTheme (selectedTheme) to all SPWebs in an SPSite (site).
SPStatefulLongOperation.Begin(
"Applying theme to sites.",
"<span id='trailingSpan'></span>",
(op) =>
{
op.Run((opState) =>
{
for (int i = 0; i < site.AllWebs.Count; i++)
{
// Update status.
opState.Status = String.Format(
"<script type='text/javascript'>document.all.item('trailingSpan').innerText = '{0} ({1} of {2})';</script>",
site.AllWebs[i].Title,
i + 1,
site.AllWebs.Count);
// Set the theme.
selectedTheme.ApplyTo(site.AllWebs[i], true);
}
});
op.End(System.Web.HttpContext.Current.Request.UrlReferrer.ToString());
});
Note that the current value of opState.State is appended to the client's HTML (via HttpContext.Current.Response.Write and .Flush) every second. Thus you don't want to send any status message directly; you want to send some JavaScript that will update an existing status element on the page. (Here, the trailingSpan element.)