When I use openpyxl to load the Excel file( .xlsx), this error displays (the last the link is the sample Excel file):
from openpyxl import *
wb = load_workbook("D:/develop/workspace/exman/test sample/510001653.xlsx")
Traceback (most recent call last):
File "", line 1, in
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\reader\
xcel.py", line 161, in load_workbook
parser.parse()
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\packagi
g\workbook.py", line 42, in parse
if package.properties.date1904:
AttributeError: 'NoneType' object has no attribute 'date1904'
sample excel file download
I debug the python file ,and find that the workbookPr = None , cause the package.properties to None( properties = Alias(workbookPr). So I change the code of workbookParser.parser() like follow, the error is solved.
class WorkbookParser:
def __init__(self, archive):
self.archive = archive
self.wb = Workbook()
self.sheets = []
self.rels = get_dependents(self.archive, ARC_WORKBOOK_RELS)
def parse(self):
src = self.archive.read(ARC_WORKBOOK)
node = fromstring(src)
package = WorkbookPackage.from_tree(node)
if package.properties is not None: #add this line
if package.properties.date1904:
wb.excel_base_date = CALENDAR_MAC_1904
self.wb.code_name = package.properties.codeName
self.wb.active = package.active
..........
This bug was fixed in newer versions (I checked 2.4.8 and its fixed. 2.4.0 still had it)
pip install --upgrade openpyxl
Related
I'm running some code that works when there is GPU. But I'm trying to figure out how to run it locally with CPU. Here's the error:
2022-07-06 17:58:39,042 - INFO - allennlp.common.plugins - Plugin allennlp_models available
Traceback (most recent call last):
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/bin/allennlp", line 8, in <module>
sys.exit(run())
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/__main__.py", line 34, in run
main(prog="allennlp")
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/__init__.py", line 118, in main
args.func(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 205, in _predict
predictor = _get_predictor(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 105, in _get_predictor
check_for_gpu(args.cuda_device)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/common/checks.py", line 131, in check_for_gpu
" 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error
allennlp.common.checks.ConfigurationError: **Experiment specified a GPU but none is available; if you want to run on CPU use the override 'trainer.cuda_device=-1' in the json config file.**
module 'torch.cuda' has no attribute '_check_driver'
Could you give me some guidance on what to do? Where is the config file and what is it called?
Here's the code (originally from: https://colab.research.google.com/drive/1F9zW_nVkwfwIVXTOA_juFDrlPz5TLjpK?usp=sharing):
# Use pretrained SpanModel weights for prediction
import sys
sys.path.append("aste")
from pathlib import Path
from data_utils import Data, Sentence, SplitEnum
from wrapper import SpanModel
def predict_sentence(text: str, model: SpanModel) -> Sentence:
path_in = "temp_in.txt"
path_out = "temp_out.txt"
sent = Sentence(tokens=text.split(), triples=[], pos=[], is_labeled=False, weight=1, id=1)
data = Data(root=Path(), data_split=SplitEnum.test, sentences=[sent])
data.save_to_path(path_in)
model.predict(path_in, path_out)
data = Data.load_from_full_path(path_out)
return data.sentences[0]
text = "Did not enjoy the new Windows 8 and touchscreen functions ."
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
sent = predict_sentence(text, model)
Try using something like:
device = torch.device("cpu")
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
model.to(device)
The config file is inside of the model.tar.gz in the pretrained_14lap directory (it is always named config.json). It also contains the param "cuda_device": 0, which may be causing your problem.
I want to store a pickle file on S3, as a result of a luigi Task. Below is the class that defines the Task:
class CreateItemVocabulariesTask(luigi.Task):
def __init__(self):
self.client = S3Client(AwsConfig().aws_access_key_id,
AwsConfig().aws_secret_access_key)
super().__init__()
def requires(self):
return [GetItem2VecDataTask()]
def run(self):
filename = 'item2vec_results.tsv'
data = self.client.get('s3://{}/item2vec_results.tsv'.format(AwsConfig().item2vec_path),
filename)
df = pd.read_csv(filename, sep='\t', encoding='latin1')
unique_users = df['CustomerId'].unique()
unique_items = df['ProductNumber'].unique()
item_to_int, int_to_item = utils.create_lookup_tables(unique_items)
user_to_int, int_to_user = utils.create_lookup_tables(unique_users)
with self.output()[0].open('wb') as out_file:
pickle.dump(item_to_int, out_file)
with self.output()[1].open('wb') as out_file:
pickle.dump(int_to_item, out_file)
with self.output()[2].open('wb') as out_file:
pickle.dump(user_to_int, out_file)
with self.output()[3].open('wb') as out_file:
pickle.dump(int_to_user, out_file)
def output(self):
files = [S3Target('s3://{}/item2int.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/int2item.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/user2int.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/int2user.pkl'.format(AwsConfig().item2vec_path), client=self.client),]
return files
When I run this task I get the error ValueError: Unsupported open mode 'wb'. The items I try to dump into a pickle file are just python dictionaries.
Full traceback:
Traceback (most recent call last):
File "C:\Anaconda3\lib\site-packages\luigi\worker.py", line 203, in run
new_deps = self._run_get_new_deps()
File "C:\Anaconda3\lib\site-packages\luigi\worker.py", line 140, in _run_get_new_deps
task_gen = self.task.run()
File "C:\Users\user\Documents\python workspace\pipeline.py", line 60, in run
with self.output()[0].open('wb') as out_file:
File "C:\Anaconda3\lib\site-packages\luigi\contrib\s3.py", line 714, in open
raise ValueError("Unsupported open mode '%s'" % mode)
ValueError: Unsupported open mode 'wb'
This is an issue that only happens on python 3.x as explained here. In order to use python 3 and write a binary file or target (ie using 'wb' mode) just set format parameter for S3Target to Nop. Like this:
S3Target('s3://path/to/file', client=self.client, format=luigi.format.Nop)
Notice it's just a trick and not so intuitive nor documented.
I have a custom algorithm for text prediction. I want to deploy that in sagemaker. I am following this tutorial.
https://docs.aws.amazon.com/sagemaker/latest/dg/tf-example1.html
The only change from the tutorial is.
from sagemaker.tensorflow import TensorFlow
iris_estimator = TensorFlow(entry_point='/home/ec2-user/SageMaker/sagemaker.py',
role=role,
output_path=model_artifacts_location,
code_location=custom_code_upload_location,
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
training_steps=1000,
evaluation_steps=100, source_dir="./", requirements_file="requirements.txt")
.
%%time
import boto3
train_data_location = 's3://sagemaker-<my bucket>'
iris_estimator.fit(train_data_location)
INFO: the dataset is at the root of the bucket.
error log
ValueError: Error training sagemaker-tensorflow-2018-06-19-07-11-13-634: Failed Reason: AlgorithmError: uncaught exception during training: Import by filename is not supported.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/container_support/training.py", line 36, in start
fw.train()
File "/usr/local/lib/python2.7/dist-packages/tf_container/train_entry_point.py", line 143, in train
customer_script = env.import_user_module()
File "/usr/local/lib/python2.7/dist-packages/container_support/environment.py", line 101, in import_user_module
user_module = importlib.import_module(script)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: Import by filename is not supported.
I solved this issue, The problem was using absolute path for entry_point.
when you use a source_dir parameter the path to the entry_point should be relative to the source_dir
I solved with:
region = boto3.Session().region_name
train_data_location = 's3://sagemaker-<my bucket>'.format(region)
I know how to load data into Scrapy spider from external source when working localy. But I strugle to find any info on how to deploy this file to scrapinghub and what path to use there. Now i use this approach from SH documentation - enter link description here but recieve NONE object.
import pkgutil
class CodeSpider(scrapy.Spider):
name = "code"
allowed_domains = ["google.com.au"]
def start_requests(self, ):
f = pkgutil.get_data("project", "res/final.json")
a = json.loads(f.read())
Thanks.
My setup file
from setuptools import setup, find_packages
setup(
name = 'project',
version = '1.0',
packages = find_packages(),
package_data = {'project': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
The error i got.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/scrapy/core/engine.py", line 127, in _next_request
request = next(slot.start_requests)
File "/tmp/unpacked-eggs/__main__.egg/au_go/spiders/code.py", line 16, in start_requests
a = json.loads(f.read())
AttributeError: 'NoneType' object has no attribute 'read'
From the traceback you supplied, I assume that your project files look like this:
au_go/
__init__.py
settings.py
res/
final.json
spiders/
__init__.py
code.py
scrapy.cfg
setup.py
With this assumption, the setup.py's package_data needs to refer to the package named au_go:
from setuptools import setup, find_packages
setup(
name = 'au_go',
version = '1.0',
packages = find_packages(),
package_data = {
'au_go': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
And then you can use pkgutil.get_data("au_go", "res/final.json").
This is my program:
import nltk
text = "Rabbit is eating"
token2 = nltk.word_tokenize(text)
print token2
txttoken = nltk.pos_tag(token2)
print txttoken
This is the error I'm getting:
Traceback (most recent call last):
File "PosTag.py", line 8, in <module>
txttoken = nltk.pos_tag(token2)
File "C:\Python27\lib\site-packages\nltk-2.0.4-py2.7.egg\nltk\tag\__init__.py", line 99, in pos_tag
tagger = load(_POS_TAGGER)
File "C:\Python27\lib\site-packages\nltk-2.0.4-py2.7.egg\nltk\data.py", line 605, in load
resource_val = pickle.load(_open(resource_url))
ImportError: No module named numpy.core.multiarray
I just check with nltk.download() on python cmd..
These models are already installed. But I still get the error..
NLTK Downloader Models. Maxent tree bank pos-tagger model is installed. punkt model is installed