I know how to load data into Scrapy spider from external source when working localy. But I strugle to find any info on how to deploy this file to scrapinghub and what path to use there. Now i use this approach from SH documentation - enter link description here but recieve NONE object.
import pkgutil
class CodeSpider(scrapy.Spider):
name = "code"
allowed_domains = ["google.com.au"]
def start_requests(self, ):
f = pkgutil.get_data("project", "res/final.json")
a = json.loads(f.read())
Thanks.
My setup file
from setuptools import setup, find_packages
setup(
name = 'project',
version = '1.0',
packages = find_packages(),
package_data = {'project': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
The error i got.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/scrapy/core/engine.py", line 127, in _next_request
request = next(slot.start_requests)
File "/tmp/unpacked-eggs/__main__.egg/au_go/spiders/code.py", line 16, in start_requests
a = json.loads(f.read())
AttributeError: 'NoneType' object has no attribute 'read'
From the traceback you supplied, I assume that your project files look like this:
au_go/
__init__.py
settings.py
res/
final.json
spiders/
__init__.py
code.py
scrapy.cfg
setup.py
With this assumption, the setup.py's package_data needs to refer to the package named au_go:
from setuptools import setup, find_packages
setup(
name = 'au_go',
version = '1.0',
packages = find_packages(),
package_data = {
'au_go': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
And then you can use pkgutil.get_data("au_go", "res/final.json").
Related
I'm running some code that works when there is GPU. But I'm trying to figure out how to run it locally with CPU. Here's the error:
2022-07-06 17:58:39,042 - INFO - allennlp.common.plugins - Plugin allennlp_models available
Traceback (most recent call last):
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/bin/allennlp", line 8, in <module>
sys.exit(run())
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/__main__.py", line 34, in run
main(prog="allennlp")
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/__init__.py", line 118, in main
args.func(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 205, in _predict
predictor = _get_predictor(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 105, in _get_predictor
check_for_gpu(args.cuda_device)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/common/checks.py", line 131, in check_for_gpu
" 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error
allennlp.common.checks.ConfigurationError: **Experiment specified a GPU but none is available; if you want to run on CPU use the override 'trainer.cuda_device=-1' in the json config file.**
module 'torch.cuda' has no attribute '_check_driver'
Could you give me some guidance on what to do? Where is the config file and what is it called?
Here's the code (originally from: https://colab.research.google.com/drive/1F9zW_nVkwfwIVXTOA_juFDrlPz5TLjpK?usp=sharing):
# Use pretrained SpanModel weights for prediction
import sys
sys.path.append("aste")
from pathlib import Path
from data_utils import Data, Sentence, SplitEnum
from wrapper import SpanModel
def predict_sentence(text: str, model: SpanModel) -> Sentence:
path_in = "temp_in.txt"
path_out = "temp_out.txt"
sent = Sentence(tokens=text.split(), triples=[], pos=[], is_labeled=False, weight=1, id=1)
data = Data(root=Path(), data_split=SplitEnum.test, sentences=[sent])
data.save_to_path(path_in)
model.predict(path_in, path_out)
data = Data.load_from_full_path(path_out)
return data.sentences[0]
text = "Did not enjoy the new Windows 8 and touchscreen functions ."
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
sent = predict_sentence(text, model)
Try using something like:
device = torch.device("cpu")
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
model.to(device)
The config file is inside of the model.tar.gz in the pretrained_14lap directory (it is always named config.json). It also contains the param "cuda_device": 0, which may be causing your problem.
On flow tutorial_8, I tried another scenario: SimpleGridScenario
I already check the grid.py, and followed the rules for the parameters.
My origianl program is that:
from flow.envs import Env
from gym.spaces.box import Box
from gym.spaces.tuple_space import Tuple
from flow.controllers import IDMController, ContinuousRouter,GridRouter
from flow.core.experiment import Experiment
from flow.core.params import SumoParams, EnvParams, \
InitialConfig, NetParams
from flow.core.params import VehicleParams
from flow.scenarios.loop import LoopScenario, ADDITIONAL_NET_PARAMS
from flow.scenarios.grid import SimpleGridScenario
from flow.scenarios import SimpleGridScenario
import numpy as np
from flow.envs import myEnv
ADDITIONAL_ENV_PARAMS={
"max_accel":1,
"max_decel":1,
}
sumo_params = SumoParams(sim_step=0.1,render=True)
vehicles=VehicleParams()
vehicles.add(
veh_id="idm",
acceleration_controller=(IDMController,{}),
routing_controller=(GridRouter,{}),
num_vehicles=22
)
env_params = EnvParams(additional_params=ADDITIONAL_ENV_PARAMS)
additional_net_params = ADDITIONAL_NET_PARAMS.copy()
net_params = NetParams(additional_params=additional_net_params)
initial_config = InitialConfig(bunching=20)
scenario = SimpleGridScenario(
name = 'grid',
vehicles = vehicles,
net_params = NetParams(
additional_params={
'grid_array':{
'row_num':3,
'col_num':2,
'inner_length':500,
'short_length':500,
'long_length':500,
'cars_top':20,
'cars_bot':20,
'cars_left':20,
'cars_right':20,
},
'horizontal_lanes':1,
'vertical_lanes':1,
'speed_limit':{
'vertical':35,
'horizontal':35
}
},
no_internal_links=False
),
initial_config = initial_config
)
env = myEnv(env_params, sumo_params, scenario)
exp = Experiment(env)
_ = exp.run(1, 1500)
And then I ran that, there is an error, the error log is:
(flow) dnl#dnl-Iiyama:~/flow$ python Tutorial_9_1.py
Loading configuration... done.
Success.
Loading configuration... done.
Error in edge length with key bot3_2
Error in edge length with key bot3_2
Traceback (most recent call last):
File "Tutorial_9_1.py", line 72, in <module>
_ = exp.run(1, 1500)
File "/home/dnl/flow/flow/core/experiment.py", line 118, in run
state = self.env.reset()
File "/home/dnl/flow/flow/envs/base_env.py", line 483, in reset
speed=speed)
File "/home/dnl/flow/flow/core/kernel/vehicle/traci.py", line 990, in add
[i for i in range(num_routes)], size=1, p=frac)[0])
File "mtrand.pyx", line 1126, in mtrand.RandomState.choice
ValueError: a must be non-empty
I want to why i am wrong ?
I try to test another scenario expect the LoopScenario. But it doesn't work
Please help.
I have a custom algorithm for text prediction. I want to deploy that in sagemaker. I am following this tutorial.
https://docs.aws.amazon.com/sagemaker/latest/dg/tf-example1.html
The only change from the tutorial is.
from sagemaker.tensorflow import TensorFlow
iris_estimator = TensorFlow(entry_point='/home/ec2-user/SageMaker/sagemaker.py',
role=role,
output_path=model_artifacts_location,
code_location=custom_code_upload_location,
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
training_steps=1000,
evaluation_steps=100, source_dir="./", requirements_file="requirements.txt")
.
%%time
import boto3
train_data_location = 's3://sagemaker-<my bucket>'
iris_estimator.fit(train_data_location)
INFO: the dataset is at the root of the bucket.
error log
ValueError: Error training sagemaker-tensorflow-2018-06-19-07-11-13-634: Failed Reason: AlgorithmError: uncaught exception during training: Import by filename is not supported.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/container_support/training.py", line 36, in start
fw.train()
File "/usr/local/lib/python2.7/dist-packages/tf_container/train_entry_point.py", line 143, in train
customer_script = env.import_user_module()
File "/usr/local/lib/python2.7/dist-packages/container_support/environment.py", line 101, in import_user_module
user_module = importlib.import_module(script)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: Import by filename is not supported.
I solved this issue, The problem was using absolute path for entry_point.
when you use a source_dir parameter the path to the entry_point should be relative to the source_dir
I solved with:
region = boto3.Session().region_name
train_data_location = 's3://sagemaker-<my bucket>'.format(region)
When I use openpyxl to load the Excel file( .xlsx), this error displays (the last the link is the sample Excel file):
from openpyxl import *
wb = load_workbook("D:/develop/workspace/exman/test sample/510001653.xlsx")
Traceback (most recent call last):
File "", line 1, in
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\reader\
xcel.py", line 161, in load_workbook
parser.parse()
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\packagi
g\workbook.py", line 42, in parse
if package.properties.date1904:
AttributeError: 'NoneType' object has no attribute 'date1904'
sample excel file download
I debug the python file ,and find that the workbookPr = None , cause the package.properties to None( properties = Alias(workbookPr). So I change the code of workbookParser.parser() like follow, the error is solved.
class WorkbookParser:
def __init__(self, archive):
self.archive = archive
self.wb = Workbook()
self.sheets = []
self.rels = get_dependents(self.archive, ARC_WORKBOOK_RELS)
def parse(self):
src = self.archive.read(ARC_WORKBOOK)
node = fromstring(src)
package = WorkbookPackage.from_tree(node)
if package.properties is not None: #add this line
if package.properties.date1904:
wb.excel_base_date = CALENDAR_MAC_1904
self.wb.code_name = package.properties.codeName
self.wb.active = package.active
..........
This bug was fixed in newer versions (I checked 2.4.8 and its fixed. 2.4.0 still had it)
pip install --upgrade openpyxl
I have the following wlst script:
import wlstModule
from com.bea.wli.sb.management.configuration import SessionManagementMBean
from com.bea.wli.sb.management.configuration import ALSBConfigurationMBean
from com.bea.wli.config import Ref
#=======================================================================================
# Utility function to read a binary file
#=======================================================================================
def readBinaryFile(fileName):
file = open(fileName, 'rb')
bytes = file.read()
return bytes
#=======================================================================================
# Utility function to create an arbitrary session name
#=======================================================================================
def createSessionName():
sessionName = String("SessionScript"+Long(System.currentTimeMillis()).toString())
return sessionName
def getSessionManagementMBean(sessionName):
SessionMBean = findService("SessionManagement", "com.bea.wli.sb.management.configuration.SessionManagementMBean")
SessionMBean.createSession(sessionName)
return SessionMBean
SessionMBean = None
importJar='C:\\OSB_PROJECT.jar'
theBytes = readBinaryFile(importJar)
sessionName = createSessionName()
SessionMBean = getSessionManagementMBean(sessionName)
The result is an error:
wls:/offline> execfile('C:\script.py') Traceback (innermost last):
File "", line 1, in ? File "C:\script.py", line 31, in ?
File "C:\script.py", line 22, in get SessionManagementMBean
NameError: findService
How can I fix this?
Are you ever connecting to your server and accessing the domain runtime? You should be doing something like the following:
connect("weblogic", "weblogic", "t3://localhost:7001")
domainRuntime()
# obtain session management mbean to create a session.
# This mbean instance can be used more than once to
# create/discard/commit many sessions
sessionMBean = findService(SessionManagementMBean.NAME,SessionManagementMBean.TYPE)
See more here:
http://docs.oracle.com/cd/E13171_01/alsb/docs25/javadoc/com/bea/wli/sb/management/configuration/SessionManagementMBean.html