How can i run different spiders at the same time that they have different Crawler Runner Settings

How can i run different spiders at the same time that they have different Crawler Runner Settings - scrapy

Default usage is:
import scrapy
from twisted.internet import reactor
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from scrapy.utils.project import get_project_settings
class MySpider1(scrapy.Spider):
# Your first spider definition
...
class MySpider2(scrapy.Spider):
# Your second spider definition
...
configure_logging()
settings = get_project_settings()
runner = CrawlerRunner(settings)
runner.crawl(MySpider1)
runner.crawl(MySpider2)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
My codes:
import scrapy
from scrapy.crawler import CrawlerRunner
from twisted.internet import reactor
runner1 = CrawlerRunner(settings = {
"FEEDS": {
r"file:///C:\\Users\Messi\\1.json": {"format": "json", "overwrite": True}
},
})
runner2 = CrawlerRunner(settings = {
"FEEDS": {
r"file:///C:\\Users\Messi\\2.json": {"format": "json", "overwrite": True}
},
})
runner3 = CrawlerRunner(settings = {
"FEEDS": {
r"file:///C:\\Users\Messi\\3.json": {"format": "json", "overwrite": True}
},
})
h = runner1.crawl(Live1)
h.addBoth(lambda _: reactor.stop())
a = runner2.crawl(Live2)
a.addBoth(lambda _: reactor.stop())
t = runner3.crawl(Live3)
t.addBoth(lambda _: reactor.stop())
reactor.run()
Above code does not work!
How can i run different spiders at the same time that they have different crawler runner settings?
Settings are different so i used different variables for them runner1,runner2, runner3...
What should be the right usage? Could you please help me about this topic.
Thanks very much.

Like I said in the comment, I think that using custom_settings is better.
Anyway, this what worked for me:
from twisted.internet import reactor
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from tempbuffer.spiders.spider1 import ExampleSpider1
from tempbuffer.spiders.spider import ExampleSpider2
configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
runner = CrawlerRunner(settings={
"FEEDS": {
r"1.json": {"format": "json", "overwrite": True}
}})
runner.crawl(ExampleSpider1)
runner = CrawlerRunner(settings={
"FEEDS": {
r"2.json": {"format": "json", "overwrite": True}
}})
runner.crawl(ExampleSpider2)
d = runner.join()
d.addBoth(lambda _: reactor.stop())
reactor.run()
Another way:
from twisted.internet import reactor
from scrapy.crawler import CrawlerRunner
from scrapy.utils.log import configure_logging
from tempbuffer.spiders.spider1 import ExampleSpider1
from tempbuffer.spiders.spider import ExampleSpider2
configure_logging({'LOG_FORMAT': '%(levelname)s: %(message)s'})
runner1 = CrawlerRunner(settings={
"FEEDS": {
r"1.json": {"format": "json", "overwrite": True}
}})
runner2 = CrawlerRunner(settings={
"FEEDS": {
r"2.json": {"format": "json", "overwrite": True}
}})
d = runner1.crawl(ExampleSpider2)
runner2.crawl(ExampleSpider1)
d.addBoth(lambda _: reactor.stop())
reactor.run()
1.json:
[
{"title": "Short Dress", "price": "$24.99"},
{"title": "Patterned Slacks", "price": "$29.99"},
{"title": "Short Chiffon Dress", "price": "$49.99"},
{"title": "Off-the-shoulder Dress", "price": "$59.99"},
{"title": "V-neck Top", "price": "$24.99"},
{"title": "Short Chiffon Dress", "price": "$49.99"},
{"title": "V-neck Top", "price": "$24.99"},
{"title": "V-neck Top", "price": "$24.99"},
{"title": "Short Lace Dress", "price": "$59.99"}
]
2.json:
[
{"title": "Long-sleeved Jersey Top", "price": "$12.99"}
]
I kind of guessed the answer, I'm not sure which of this is better. If someone wants to correct/explain/clarify in the comments I'll be glad.

Related

scrapy urljoin inconsistent and incomplete?

I'm trying to get all the xml file links from this domain. When I use the scrapy shell, I get the relative link I am expecting.
>>> response.xpath('//div[#class="toolbar"]/a[contains(#href, ".xml")]/#href').extract()[1]
'/dhq/vol/16/3/000642.xml'
But when I try to yield all the links, I end up with a csv that has all incomplete links or just the root link many times over.
Example dataset: https://pastebin.com/JqCKnxV5
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class DhqSpider(CrawlSpider):
name = 'dhq'
allowed_domains = ['digitalhumanities.org']
start_urls = ['http://www.digitalhumanities.org/dhq/vol/16/3/index.html']
rules = (
Rule(LinkExtractor(allow = 'index.html')),
Rule(LinkExtractor(allow = 'vol'), callback='parse_xml'),
)
def parse_xml(self, response):
xmllinks = response.xpath('//div[#class="toolbar"]/a[contains(#href, ".xml")]/#href').extract()[1]
for link in xmllinks:
yield{
'file_urls': [response.urljoin(link)]
}
What am I missing in my urljoin that's creating these incomplete and/or root links?

CrowlSpider scrapes data from each of the detail page and your selection select two elements but you have to select only one, then you can apply the built-in indexing of xpath expression to avoid unnecessary for loop.
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class DhqSpider(CrawlSpider):
name = 'dhq'
allowed_domains = ['digitalhumanities.org']
start_urls = ['http://www.digitalhumanities.org/dhq/vol/16/3/index.html']
rules = (
Rule(LinkExtractor(allow = 'index.html')),
Rule(LinkExtractor(allow = 'vol'), callback='parse_xml'),
)
def parse_xml(self, response):
xmllink = response.xpath('(//div[#class="toolbar"]/a[contains(#href, ".xml")]/#href)[1]').get()
yield{
'file_urls': response.urljoin(xmllink)
}
Output:
{'file_urls': 'http://www.digitalhumanities.org/dhq/vol/12/1/000355.xml'}
2022-12-14 20:28:58 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://www.digitalhumanities.org/dhq/vol/12/1/000346/000346.html> (referer: http://www.digitalhumanities.org/dhq/vol/12/1/index.html)
2022-12-14 20:28:58 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.digitalhumanities.org/dhq/vol/12/1/000346/000346.html>
{'file_urls': 'http://www.digitalhumanities.org/dhq/vol/12/1/000346.xml'}
2022-12-14 20:29:03 [scrapy.core.engine] DEBUG: Crawled (200) <GET http://www.digitalhumanities.org/dhq/vol/12/1/000362/000362.html> (referer: http://www.digitalhumanities.org/dhq/vol/12/1/index.html)
2022-12-14 20:29:03 [scrapy.core.scraper] DEBUG: Scraped from <200 http://www.digitalhumanities.org/dhq/vol/12/1/000362/000362.html>
{'file_urls': 'http://www.digitalhumanities.org/dhq/vol/12/1/000362.xml'}
2022-12-14 20:29:03 [scrapy.core.engine] INFO: Closing spider (finished)
2022-12-14 20:29:03 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 242004,
'downloader/request_count': 754,
'downloader/request_method_count/GET': 754,
'downloader/response_bytes': 69368110,
'downloader/response_count': 754,
'downloader/response_status_count/200': 754,
'dupefilter/filtered': 3221,
'elapsed_time_seconds': 51.448049,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2022, 12, 14, 14, 29, 3, 317586),
'item_scraped_count': 697,
... so on
UPDATE:
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class DhqSpider(CrawlSpider):
name = 'dhq'
allowed_domains = ['digitalhumanities.org']
start_urls = ['http://www.digitalhumanities.org/dhq/vol/16/3/index.html']
rules = (
Rule(LinkExtractor(allow = 'index.html')),
Rule(LinkExtractor(allow = 'vol'), callback='parse_xml'),
)
def parse_xml(self, response):
#xmllink = response.xpath('(//div[#class="toolbar"]/a[contains(#href, ".xml")]/#href)[1]').get()
#'file_urls': response.urljoin(xmllink)
yield {
'title' : response.css('h1.articleTitle::text').get().strip().replace('\n', ' ').replace('\t',''),
'author' : response.css('div.author a::text').get().strip(),
'pubinfo' : response.css('div#pubInfo::text').getall(),
'xmllink' :response.urljoin( response.xpath('(//div[#class="toolbar"]/a[contains(#href, ".xml")]/#href)[1]').get()),
#'referrer_url' : response.url
}
OUTPUT:
{
"title": "Textension: Digitally Augmenting Document Spaces in Analog Texts",
"author": "Adam James Bradley",
"pubinfo": [
"2019",
"Volume 13 Number 3"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/3/000426.xml"
},
{
"title": "Building the",
"author": "Cait Coker",
"pubinfo": [
"2019",
"Volume 13 Number 3"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/3/000428.xml"
},
{
"title": "Dendrography and Art History: a computer-assisted analysis of Cézanne’s",
"author": "Melinda Weinstein",
"pubinfo": [
"2019",
"Volume 13 Number 3"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/3/000423.xml"
},
{
"title": "The Invisible Work of the Digital Humanities Lab: Preparing Graduate Students for Emergent Intellectual and Professional Work",
"author": "Dawn Opel",
"pubinfo": [
"2019",
"Volume 13 Number 2"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/2/000421.xml"
},
{
"title": "Modelling Medieval Hands: Practical OCR for Caroline Minuscule",
"author": "Brandon W. Hawk",
"pubinfo": [
"2019",
"Volume 13 Number 1"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/1/000412.xml"
},
{
"title": "Introduction: Questioning",
"author": "Tarez Samra Graban",
"pubinfo": [
"2019",
"Volume 13 Number 2"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/2/000416.xml"
},
{
"title": "Racism in the Machine: Visualization Ethics in Digital Humanities Projects",
"author": "Katherine Hepworth",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000408.xml"
},
{
"title": "Narrelations — Visualizing Narrative Levels and their Correlations with Temporal Phenomena",
"author": "Hannah Schwan",
"pubinfo": [
"2019",
"Volume 13 Number 3"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/3/000414.xml"
},
{
"title": "Towards 3D Scholarly Editions: The Battle of Mount Street Bridge",
"author": "Costas Papadopoulos",
"pubinfo": [
"2019",
"Volume 13 Number 1"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/13/1/000415.xml"
},
{
"title": "Visual Communication and the promotion of Health: an exploration of how they intersect in Italian education",
"author": "Viviana De Angelis",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000407.xml"
},
{
"title": "Best Practices: Teaching Typographic Principles to Digital Humanities Audiences",
"author": "Amy Papaelias",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000405.xml"
},
{
"title": "Placing Graphic Design at the Intersection of Information Visualization Fields",
"author": "Yvette Shen",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000406.xml"
},
{
"title": "Making and Breaking: Teaching Information Ethics through Curatorial Practice",
"author": "Christina Boyles",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000404.xml"
},
{
"title": "Critically engaging with data visualization through an information literacy framework",
"author": "Steven Braun",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000402.xml"
},
{
"title": "Renaissance Remix.",
"author": "Deanna Shemek",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000400.xml"
},
{
"title": "Crowdsourcing Image Extraction and Annotation: Software Development and Case Study",
"author": "Ana Jofre",
"pubinfo": [
"2020",
"Volume 14 Number 2"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/14/2/000469.xml"
},
{
"title": "Defining scholarly practices, methods and tools in the Lithuanian digital humanities research community",
"author": "Ingrida Kelpšienė",
"pubinfo": [
"2018",
"Volume 12 Number 4"
],
"xmllink": "http://www.digitalhumanities.org/dhq/vol/12/4/000401.xml"
}
]

Can't get Apache Airflow to write to S3 using EMR Operators

I am using the Airflow EMR Operators to create an AWS EMR Cluster that runs a Jar file contained in S3 and then writes the output back to S3. It seems to be able to run the job using the Jar file from S3, but I cannot get it to write the output to S3. I am able to get it to write the output to S3 when running it as an AWS EMR CLI Bash command, but I need to do it using the Airflow EMR Operators. I have the S3 output directory set both in the Airflow step config and in the environment config in the Jar file and still cannot get the Operators to write to it.
Here is the code I have for my Airflow DAG
from datetime import datetime, timedelta
import airflow
from airflow import DAG
from airflow.contrib.operators.emr_create_job_flow_operator import EmrCreateJobFlowOperator
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
from airflow.contrib.operators.emr_terminate_job_flow_operator import EmrTerminateJobFlowOperator
from airflow.contrib.sensors.emr_step_sensor import EmrStepSensor
from airflow.hooks.S3_hook import S3Hook
from airflow.operators.s3_file_transform_operator import S3FileTransformOperator
DEFAULT_ARGS = {
'owner': 'AIRFLOW_USER',
'depends_on_past': False,
'start_date': datetime(2019, 9, 9),
'email': ['airflow#example.com'],
'email_on_failure': False,
'email_on_retry': False
}
RUN_STEPS = [
{
"Name": "run-custom-create-emr",
"ActionOnFailure": "CONTINUE",
"HadoopJarStep": {
"Jar": "command-runner.jar",
"Args": [
"spark-submit", "--deploy-mode", "cluster", "--master", "yarn", "--conf",
"spark.yarn.submit.waitAppCompletion=false", "--class", "CLASSPATH",
"s3://INPUT_JAR_FILE",
"s3://OUTPUT_DIR"
]
}
}
]
JOB_FLOW_OVERRIDES = {
"Name": "JOB_NAME",
"LogUri": "s3://LOG_DIR/",
"ReleaseLabel": "emr-5.23.0",
"Instances": {
"Ec2KeyName": "KP_USER_NAME",
"Ec2SubnetId": "SUBNET",
"EmrManagedMasterSecurityGroup": "SG-ID",
"EmrManagedSlaveSecurityGroup": "SG-ID",
"InstanceGroups": [
{
"Name": "Master nodes",
"Market": "ON_DEMAND",
"InstanceRole": "MASTER",
"InstanceType": "m4.large",
"InstanceCount": 1
},
{
"Name": "Slave nodes",
"Market": "ON_DEMAND",
"InstanceRole": "CORE",
"InstanceType": "m4.large",
"InstanceCount": 1
}
],
"TerminationProtected": True,
"KeepJobFlowAliveWhenNoSteps": True,
},
"Applications": [
{
"Name": "Spark"
},
{
"Name": "Ganglia"
},
{
"Name": "Hadoop"
},
{
"Name": "Hive"
}
],
"JobFlowRole": "ROLE_NAME",
"ServiceRole": "ROLE_NAME",
"ScaleDownBehavior": "TERMINATE_AT_TASK_COMPLETION",
"EbsRootVolumeSize": 10,
"Tags": [
{
"Key": "Country",
"Value": "us"
},
{
"Key": "Environment",
"Value": "dev"
}
]
}
dag = DAG(
'AWS-EMR-JOB',
default_args=DEFAULT_ARGS,
dagrun_timeout=timedelta(hours=2),
schedule_interval=None
)
cluster_creator = EmrCreateJobFlowOperator(
task_id='create_job_flow',
job_flow_overrides=JOB_FLOW_OVERRIDES,
aws_conn_id='aws_default',
emr_conn_id='emr_connection_CustomCreate',
dag=dag
)
step_adder = EmrAddStepsOperator(
task_id='add_steps',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
steps=RUN_STEPS,
dag=dag
)
step_checker = EmrStepSensor(
task_id='watch_step',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_remover = EmrTerminateJobFlowOperator(
task_id='remove_cluster',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_creator.set_downstream(step_adder)
step_adder.set_downstream(step_checker)
step_checker.set_downstream(cluster_remover)
Does anyone have any ideas how I can solve this problem? Any help would be appreciated.

I believe that I just solved my problem. After really digging deep into all the local Airflow logs and the S3 EMR logs I found a Hadoop Memory Exception, so I increased the number of cores to run the EMR on and it seems to work now.

I am trying to export this JSON data for importing in the project however it shows unexpected syntax error

When it was in the same file as component worked perfectly. Now I am trying to exclude it from component to make project cleaner. However it shows unexpected syntax errors. File format is JSON. How can this code be fixed and work as it needs to be?
const postsData = [
{
id: 1,
title: "How to start a business with 100$",
published: "14h ago",
image: require("../img/img1.jpg"),
},
{
id: 2,
title: "Get funding for your startup",
published: "19h ago",
image: require("../img/img2.jpg"),
},
{
id: 3,
title: "Latest Fashion Trends for 2018",
published: "14h ago",
image: require("../img/img3.jpg"),
},
]
export {postsData};

Try this:
export default [
{
id: 1,
title: "How to start a business with 100$",
published: "14h ago",
image: require("../img/img1.jpg"),
},
{
id: 2,
title: "Get funding for your startup",
published: "19h ago",
image: require("../img/img2.jpg"),
},
{
id: 3,
title: "Latest Fashion Trends for 2018",
published: "14h ago",
image: require("../img/img3.jpg"),
}
]
and import it like
import postsData from 'filepath';
PS:
there is no naming convention for postsData you can name it
anything.
your json file should have an extension .js for your case because you're not using JSON objects

You need to add double "" mark
like this below
{
"id": "1",
"title": "How to start a business with 100$",
"published": "14h ago",
}
please use json lint to validate json- json lint
please follow
[{
"id": "1",
"title": "How to start a business with 100$",
"published": "14h ago",
"images": [{
"bannerImg1": "./images/effy.jpg"
},
{
"id": "2",
"title": "Get funding for your startup",
"published": "19h ago",
"images": [{
"bannerImg1": "./images/effy.jpg"
},
{
"id": "3",
"title": "Latest Fashion Trends for 2018",
"published": "14h ago",
"images": [{
"bannerImg1": "./images/effy.jpg"
}]
}
]

If it is a JSON file like MyFile.json, then you cant export any thing from JSON file with your current code as it contains keywords like export or const and you cant use JavaScript keywords in JSON file.
You can change the extension of the file to MyFile.js
OR
You can create a json file like MyFile.json and put only JSON code
{
"id": "1",
"title": "How to start a business with 100$",
"published": "14h ago",
}
and require it from JS file like :
import data from './MyFile.json'

This is not a JSON, it's just a javascript file (according to syntax).
The last line constructs object with shorthand property and exports it in the form export { name1, name2, …, nameN }; where name1, name2, ..., nameN are named properties.
To import named property use following syntax:
import { export } from "module-name";
In your case it would be
import { postsData } from "<your file without js ext>"

As said above,this is not a JSON but just a Object.
First,New a js file,you can name this file whatever you want.for example,data.js
Then write like this in data.js:
export defult{
postsData: [
{
id: 1,
title: "How to start a business with 100$",
published: "14h ago",
image: require("../img/img1.jpg"),
},
{
id: 2,
title: "Get funding for your startup",
published: "19h ago",
image: require("../img/img2.jpg"),
},
{
id: 3,
title: "Latest Fashion Trends for 2018",
published: "14h ago",
image: require("../img/img3.jpg"),
},
]
}
Finally,in your component file:
import {xxxx} from './data';
in this way,you can get your data,xxxx.postsData.

with commonjs module
const express = require("express");
const config = require("./config.json");
typescript
{
"compilerOptions": {
"target": "es2015",
"module": "commonjs",
"strict": true,
"moduleResolution": "node",
"resolveJsonModule": true
}
}
import law from './keywords/law.json'
import special from './keywords/special.json'

If you have .JSON file and everything inside it is json then you need not export it like normal module. You simply import it in other module and use it like normal object. Thats it !

TensorFlow Manual Construction of GraphDef

In TensorFlow, I found that I can do the following,
from tensorflow.core import framework
from google.protobuf import json_format
graph_def = framework.graph_pb2.GraphDef()
node_def = framework.node_def_pb2.NodeDef()
graph_def.node.extend([node_def])
print json_format.MessageToJson(graph_def)
Which prints
{
"node": [
{}
]
}
Now, my node is not actually set to an Operation. I can't figure out how to make node_def be an operation. I can construct OpDefs via,
from tensorflow.python.ops import gen_array_ops
const_op_def = gen_array_ops._InitOpDefLibrary()._ops['Const'].op_def
Which is of class <class 'tensorflow.core.framework.op_def_pb2.OpDef'>
I would like to make my NodeDef register as this OpDef.
EDIT:
>>> print json_format.MessageToJson(gen_array_ops._InitOpDefLibrary()._ops['Const'].op_def)
{
"outputArg": [
{
"typeAttr": "dtype",
"name": "output"
}
],
"name": "Const",
"attr": [
{
"type": "tensor",
"name": "value"
},
{
"type": "type",
"name": "dtype"
}
]
}

I believe you are looking for a way to set message fields inside of the GraphDef and NodeDef protos. How to modify ProtoBuf objects in Python is detailed here

IPython notebook export external svg to pdf

In a markdown cell in an ipython3 notebook (4.0.0) I include an svg that is located together with the notebook file:
<img src="NewTux.svg"/>
In the normal notebook view it is displayed as expected.
However, when I try to export to pdf the image does not show up.
What puzzles me is that a matplotlib plot (with %config InlineBackend.figure_format = 'svg') perfectly shows both on screen AND in the exported PDF.
How can I get a PDF including also the svgs which are not plotted but just included as a figure in markdown?
(A workaround is to print to pdf in browser, but then I miss LaTeX-formatting and formula and color in the syntax-highlighting of the code sections).
Minimum working example for the ipython file is:
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'svg'\n",
"import numpy as np\n",
"import matplotlib.pyplot as pp\n",
"\n",
"x = np.arange(0,10,0.05)\n",
"y = np.sin(x)\n",
"\n",
"pp.plot(x,y)\n",
"pp.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"NewTux.svg\">"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Where I downloaded: NewTux.svg from the Wikimedia commons.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How can i run different spiders at the same time that they have different Crawler Runner Settings - scrapy

Related

scrapy urljoin inconsistent and incomplete?

Can't get Apache Airflow to write to S3 using EMR Operators

I am trying to export this JSON data for importing in the project however it shows unexpected syntax error

TensorFlow Manual Construction of GraphDef

IPython notebook export external svg to pdf

Categories

Resources