openpyxl+load_workbook+AttributeError: 'NoneType' object has no attribute 'date1904' - openpyxl

When I use openpyxl to load the Excel file( .xlsx), this error displays (the last the link is the sample Excel file):
from openpyxl import *
wb = load_workbook("D:/develop/workspace/exman/test sample/510001653.xlsx")
Traceback (most recent call last):
File "", line 1, in
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\reader\
xcel.py", line 161, in load_workbook
parser.parse()
File "C:\Python34\lib\site-packages\openpyxl-2.5.0-py3.4.egg\openpyxl\packagi
g\workbook.py", line 42, in parse
if package.properties.date1904:
AttributeError: 'NoneType' object has no attribute 'date1904'
sample excel file download

I debug the python file ,and find that the workbookPr = None , cause the package.properties to None( properties = Alias(workbookPr). So I change the code of workbookParser.parser() like follow, the error is solved.
class WorkbookParser:
def __init__(self, archive):
self.archive = archive
self.wb = Workbook()
self.sheets = []
self.rels = get_dependents(self.archive, ARC_WORKBOOK_RELS)
def parse(self):
src = self.archive.read(ARC_WORKBOOK)
node = fromstring(src)
package = WorkbookPackage.from_tree(node)
if package.properties is not None: #add this line
if package.properties.date1904:
wb.excel_base_date = CALENDAR_MAC_1904
self.wb.code_name = package.properties.codeName
self.wb.active = package.active
..........

This bug was fixed in newer versions (I checked 2.4.8 and its fixed. 2.4.0 still had it)
pip install --upgrade openpyxl

Related

Allennlp: How to use CPU instead of GPU?

I'm running some code that works when there is GPU. But I'm trying to figure out how to run it locally with CPU. Here's the error:
2022-07-06 17:58:39,042 - INFO - allennlp.common.plugins - Plugin allennlp_models available
Traceback (most recent call last):
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/bin/allennlp", line 8, in <module>
sys.exit(run())
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/__main__.py", line 34, in run
main(prog="allennlp")
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/__init__.py", line 118, in main
args.func(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 205, in _predict
predictor = _get_predictor(args)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/commands/predict.py", line 105, in _get_predictor
check_for_gpu(args.cuda_device)
File "/Users/xiaoqingwan/opt/miniconda3/envs/absa/lib/python3.7/site-packages/allennlp/common/checks.py", line 131, in check_for_gpu
" 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error
allennlp.common.checks.ConfigurationError: **Experiment specified a GPU but none is available; if you want to run on CPU use the override 'trainer.cuda_device=-1' in the json config file.**
module 'torch.cuda' has no attribute '_check_driver'
Could you give me some guidance on what to do? Where is the config file and what is it called?
Here's the code (originally from: https://colab.research.google.com/drive/1F9zW_nVkwfwIVXTOA_juFDrlPz5TLjpK?usp=sharing):
# Use pretrained SpanModel weights for prediction
import sys
sys.path.append("aste")
from pathlib import Path
from data_utils import Data, Sentence, SplitEnum
from wrapper import SpanModel
def predict_sentence(text: str, model: SpanModel) -> Sentence:
path_in = "temp_in.txt"
path_out = "temp_out.txt"
sent = Sentence(tokens=text.split(), triples=[], pos=[], is_labeled=False, weight=1, id=1)
data = Data(root=Path(), data_split=SplitEnum.test, sentences=[sent])
data.save_to_path(path_in)
model.predict(path_in, path_out)
data = Data.load_from_full_path(path_out)
return data.sentences[0]
text = "Did not enjoy the new Windows 8 and touchscreen functions ."
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
sent = predict_sentence(text, model)
Try using something like:
device = torch.device("cpu")
model = SpanModel(save_dir="pretrained_14lap", random_seed=0)
model.to(device)
The config file is inside of the model.tar.gz in the pretrained_14lap directory (it is always named config.json). It also contains the param "cuda_device": 0, which may be causing your problem.

How to write a pickle file to S3, as a result of a luigi Task?

I want to store a pickle file on S3, as a result of a luigi Task. Below is the class that defines the Task:
class CreateItemVocabulariesTask(luigi.Task):
def __init__(self):
self.client = S3Client(AwsConfig().aws_access_key_id,
AwsConfig().aws_secret_access_key)
super().__init__()
def requires(self):
return [GetItem2VecDataTask()]
def run(self):
filename = 'item2vec_results.tsv'
data = self.client.get('s3://{}/item2vec_results.tsv'.format(AwsConfig().item2vec_path),
filename)
df = pd.read_csv(filename, sep='\t', encoding='latin1')
unique_users = df['CustomerId'].unique()
unique_items = df['ProductNumber'].unique()
item_to_int, int_to_item = utils.create_lookup_tables(unique_items)
user_to_int, int_to_user = utils.create_lookup_tables(unique_users)
with self.output()[0].open('wb') as out_file:
pickle.dump(item_to_int, out_file)
with self.output()[1].open('wb') as out_file:
pickle.dump(int_to_item, out_file)
with self.output()[2].open('wb') as out_file:
pickle.dump(user_to_int, out_file)
with self.output()[3].open('wb') as out_file:
pickle.dump(int_to_user, out_file)
def output(self):
files = [S3Target('s3://{}/item2int.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/int2item.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/user2int.pkl'.format(AwsConfig().item2vec_path), client=self.client),
S3Target('s3://{}/int2user.pkl'.format(AwsConfig().item2vec_path), client=self.client),]
return files
When I run this task I get the error ValueError: Unsupported open mode 'wb'. The items I try to dump into a pickle file are just python dictionaries.
Full traceback:
Traceback (most recent call last):
File "C:\Anaconda3\lib\site-packages\luigi\worker.py", line 203, in run
new_deps = self._run_get_new_deps()
File "C:\Anaconda3\lib\site-packages\luigi\worker.py", line 140, in _run_get_new_deps
task_gen = self.task.run()
File "C:\Users\user\Documents\python workspace\pipeline.py", line 60, in run
with self.output()[0].open('wb') as out_file:
File "C:\Anaconda3\lib\site-packages\luigi\contrib\s3.py", line 714, in open
raise ValueError("Unsupported open mode '%s'" % mode)
ValueError: Unsupported open mode 'wb'
This is an issue that only happens on python 3.x as explained here. In order to use python 3 and write a binary file or target (ie using 'wb' mode) just set format parameter for S3Target to Nop. Like this:
S3Target('s3://path/to/file', client=self.client, format=luigi.format.Nop)
Notice it's just a trick and not so intuitive nor documented.

Sagemaker ImportError: Import by filename is not supported

I have a custom algorithm for text prediction. I want to deploy that in sagemaker. I am following this tutorial.
https://docs.aws.amazon.com/sagemaker/latest/dg/tf-example1.html
The only change from the tutorial is.
from sagemaker.tensorflow import TensorFlow
iris_estimator = TensorFlow(entry_point='/home/ec2-user/SageMaker/sagemaker.py',
role=role,
output_path=model_artifacts_location,
code_location=custom_code_upload_location,
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
training_steps=1000,
evaluation_steps=100, source_dir="./", requirements_file="requirements.txt")
.
%%time
import boto3
train_data_location = 's3://sagemaker-<my bucket>'
iris_estimator.fit(train_data_location)
INFO: the dataset is at the root of the bucket.
error log
ValueError: Error training sagemaker-tensorflow-2018-06-19-07-11-13-634: Failed Reason: AlgorithmError: uncaught exception during training: Import by filename is not supported.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/container_support/training.py", line 36, in start
fw.train()
File "/usr/local/lib/python2.7/dist-packages/tf_container/train_entry_point.py", line 143, in train
customer_script = env.import_user_module()
File "/usr/local/lib/python2.7/dist-packages/container_support/environment.py", line 101, in import_user_module
user_module = importlib.import_module(script)
File "/usr/lib/python2.7/importlib/__init__.py", line 37, in import_module
__import__(name)
ImportError: Import by filename is not supported.
I solved this issue, The problem was using absolute path for entry_point.
when you use a source_dir parameter the path to the entry_point should be relative to the source_dir
I solved with:
region = boto3.Session().region_name
train_data_location = 's3://sagemaker-<my bucket>'.format(region)

scrapy how to load urls from file at scrapinghub

I know how to load data into Scrapy spider from external source when working localy. But I strugle to find any info on how to deploy this file to scrapinghub and what path to use there. Now i use this approach from SH documentation - enter link description here but recieve NONE object.
import pkgutil
class CodeSpider(scrapy.Spider):
name = "code"
allowed_domains = ["google.com.au"]
def start_requests(self, ):
f = pkgutil.get_data("project", "res/final.json")
a = json.loads(f.read())
Thanks.
My setup file
from setuptools import setup, find_packages
setup(
name = 'project',
version = '1.0',
packages = find_packages(),
package_data = {'project': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
The error i got.
Traceback (most recent call last):
File "/usr/local/lib/python2.7/site-packages/scrapy/core/engine.py", line 127, in _next_request
request = next(slot.start_requests)
File "/tmp/unpacked-eggs/__main__.egg/au_go/spiders/code.py", line 16, in start_requests
a = json.loads(f.read())
AttributeError: 'NoneType' object has no attribute 'read'
From the traceback you supplied, I assume that your project files look like this:
au_go/
__init__.py
settings.py
res/
final.json
spiders/
__init__.py
code.py
scrapy.cfg
setup.py
With this assumption, the setup.py's package_data needs to refer to the package named au_go:
from setuptools import setup, find_packages
setup(
name = 'au_go',
version = '1.0',
packages = find_packages(),
package_data = {
'au_go': ['res/*.json']
},
entry_points = {'scrapy': ['settings = au_go.settings']},
zip_safe=False,
)
And then you can use pkgutil.get_data("au_go", "res/final.json").

Error : No module named numpy.core.multiarray. Maxent tree bank pos-tagger model is already installed

This is my program:
import nltk
text = "Rabbit is eating"
token2 = nltk.word_tokenize(text)
print token2
txttoken = nltk.pos_tag(token2)
print txttoken
This is the error I'm getting:
Traceback (most recent call last):
File "PosTag.py", line 8, in <module>
txttoken = nltk.pos_tag(token2)
File "C:\Python27\lib\site-packages\nltk-2.0.4-py2.7.egg\nltk\tag\__init__.py", line 99, in pos_tag
tagger = load(_POS_TAGGER)
File "C:\Python27\lib\site-packages\nltk-2.0.4-py2.7.egg\nltk\data.py", line 605, in load
resource_val = pickle.load(_open(resource_url))
ImportError: No module named numpy.core.multiarray
I just check with nltk.download() on python cmd..
These models are already installed. But I still get the error..
NLTK Downloader Models. Maxent tree bank pos-tagger model is installed. punkt model is installed