Reading keys from an .npz file with multiple workers in pytorch dataloader? - numpy

I have an .npz file where I have stored a dictionary. The dictionary has some keys and the values are numpy arrays. I want to read the dictionary in my getitem() method of the dataloader. When I set the dataloader num_workers to 1, everything runs fine. But when I increase the num workers, it throws the following error when reading the data from that npz file:
Traceback (most recent call last):
File "scripts/train.py", line 235, in <module>
train(args)
File "scripts/train.py", line 186, in train
solver(args.epoch, args.verbose)
File "/local-scratch/codebase/cap/lib/solver.py", line 174, in __call__
self._feed(self.dataloader["train"], "train", epoch_id)
File "/local-scratch/codebase/cap/lib/solver.py", line 366, in _feed
for data_dict in dataloader:
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 819, in __next__
return self._process_data(data)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 846, in _process_data
data.reraise()
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/_utils.py", line 369, in reraise
raise self.exc_type(msg)
zipfile.BadZipFile: Caught BadZipFile in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/local-scratch/codebase/cap/lib/dataset.py", line 947, in __getitem__
other_bbox_feat = self.box_features['{}-{}_{}.{}'.format(scene_id, target_object_id, ann_id, object_id)]
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/npyio.py", line 255, in __getitem__
pickle_kwargs=self.pickle_kwargs)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/format.py", line 763, in read_array
data = _read_bytes(fp, read_size, "array data")
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/site-packages/numpy/lib/format.py", line 892, in _read_bytes
r = fp.read(size - len(data))
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 872, in read
data = self._read1(n)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 962, in _read1
self._update_crc(data)
File "/local-scratch/anaconda3/envs/scanenv/lib/python3.6/zipfile.py", line 890, in _update_crc
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipFile: Bad CRC-32 for file 'scene0519_00-13_1.0.npy'
As far as I know, pytorch dataloader uses multiprocessing to for data loading. Perhaps the issue is with multiprocessing and .npz files. I really appreciate any help.

Related

MissingDataError: exog contains inf or nans

Input:
import statsmodels.api as sm
import pandas as pd
# reading data from the csv
data = pd.read_csv('/Users/justkiddings/Desktop/Python/TM/TM.csv')
# defining the variables
x = data['FSP'].tolist()
y = data['RSP'].tolist()
# adding the constant term
x = sm.add_constant(x)
# performing the regression
# and fitting the model
result = sm.OLS(y, x).fit()
# printing the summary table
print(result.summary())
Output:
runfile('/Users/justkiddings/Desktop/Python/Code/untitled28.py', wdir='/Users/justkiddings/Desktop/Python/Code')
Traceback (most recent call last):
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/spyder_kernels/py3compat.py", line 356, in compat_exec
exec(code, globals, locals)
File "/Users/justkiddings/Desktop/Python/Code/untitled28.py", line 24, in <module>
result = sm.OLS(y, x).fit()
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/regression/linear_model.py", line 890, in __init__
super(OLS, self).__init__(endog, exog, missing=missing,
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/regression/linear_model.py", line 717, in __init__
super(WLS, self).__init__(endog, exog, missing=missing,
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/regression/linear_model.py", line 191, in __init__
super(RegressionModel, self).__init__(endog, exog, **kwargs)
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/model.py", line 267, in __init__
super().__init__(endog, exog, **kwargs)
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/model.py", line 92, in __init__
self.data = self._handle_data(endog, exog, missing, hasconst,
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/model.py", line 132, in _handle_data
data = handle_data(endog, exog, missing, hasconst, **kwargs)
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/data.py", line 673, in handle_data
return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/data.py", line 86, in __init__
self._handle_constant(hasconst)
File "/Users/justkiddings/opt/anaconda3/lib/python3.9/site-packages/statsmodels/base/data.py", line 132, in _handle_constant
raise MissingDataError('exog contains inf or nans')
MissingDataError: exog contains inf or nans
Some of the Data:
DATE,HOUR,STATION,CO,FSP,NO2,NOX,O3,RSP,SO2
1/1/2022,1,TUEN MUN,75,38,39,40,83,59,2
1/1/2022,2,TUEN MUN,72,35,29,30,90,61,2
1/1/2022,3,TUEN MUN,74,38,28,30,91,66,2
1/1/2022,4,TUEN MUN,76,39,31,32,79,61,2
1/1/2022,5,TUEN MUN,72,38,25,26,83,65,2
1/1/2022,6,TUEN MUN,74,37,24,25,86,60,2
I have removed the N.A. in my dataset and they have converted into blanks. (Eg. 3/1/2022,12,TUEN MUN,85,,53,70,59,,5) Why there is MissingDataError? How to fix it? Thanks.

use AWS_PROFILE in pandas.read_parquet

I'm testing this locally where I have a ~/.aws/config file.
~/.aws/config looks some thing like:
[profile a]
...
[profile b]
...
I also have a AWS_PROFILE environmental variable set as "a".
I would like to read a file in which is accessible with profile b using pandas.
I am able to access it through s3fs by doing:
import s3fs
fs = s3fs.S3FileSystem(profile="b")
fs.get("BUCKET/FILE.parquet", "FILE.parquet")
pd.read_parquet("FILE.parquet")
However, if I try to pass this to pd.read_parquet using storage_options I get a PermissionError: Forbidden.
pd.read_parquet(
"s3://BUCKET/FILE.parquet",
storage_options={"profile": "b"},
)
full Traceback below
Traceback (most recent call last):
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/s3fs/core.py", line 233, in _call_s3
out = await method(**additional_kwargs)
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/aiobotocore/client.py", line 154, in _make_api_call
raise error_class(parsed_response, operation_name)
botocore.exceptions.ClientError: An error occurred (403) when calling the HeadObject operation: Forbidden
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/pandas/io/parquet.py", line 459, in read_parquet
return impl.read(
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/pandas/io/parquet.py", line 221, in read
return self.api.parquet.read_table(
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/pyarrow/parquet.py", line 1672, in read_table
dataset = _ParquetDatasetV2(
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/pyarrow/parquet.py", line 1504, in __init__
if filesystem.get_file_info(path_or_paths).is_file:
File "pyarrow/_fs.pyx", line 438, in pyarrow._fs.FileSystem.get_file_info
File "pyarrow/error.pxi", line 122, in pyarrow.lib.pyarrow_internal_check_status
File "pyarrow/_fs.pyx", line 1004, in pyarrow._fs._cb_get_file_info
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/pyarrow/fs.py", line 226, in get_file_info
info = self.fs.info(path)
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/fsspec/asyn.py", line 72, in wrapper
return sync(self.loop, func, *args, **kwargs)
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/fsspec/asyn.py", line 53, in sync
raise result[0]
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/fsspec/asyn.py", line 20, in _runner
result[0] = await coro
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/s3fs/core.py", line 911, in _info
out = await self._call_s3(
File "/home/ray/local/bin/anaconda3/envs/main/lib/python3.8/site-packages/s3fs/core.py", line 252, in _call_s3
raise translate_boto_error(err)
PermissionError: Forbidden
Note: there is an old question somewhat related to this but it didn't help: How to read parquet file from s3 using dask with specific AWS profile
You just need to add the following argument to the function:
storage_options=dict(profile='your_profile_name')
Hence the read statement is:
pd.read_parquet("s3://your_bucket",storage_options=dict(profile='your_profile_name'))

Apache BEAM pipeline fails when writing TF Records - AttributeError: 'str' object has no attribute 'iteritems'

The issue started appearing over the weekend. For some reason, it feels to be a DataFlow issue.
Previously, I was able to execute the script and write TF records just fine. However, now, I am unable to initialize the computation graph to process the data.
The traceback is:
Traceback (most recent call last):
File "my_script.py", line 1492, in <module>
MyBeamClass()
File "my_script.py", line 402, in __init__
self.run()
File "my_script.py", line 514, in run
transform_fn_io.WriteTransformFn(path=self.JOB_DIR + '/transform/'))
File "/anaconda3/envs/ml27/lib/python2.7/site-packages/apache_beam/pipeline.py", line 426, in __exit__
self.run().wait_until_finish()
File "/anaconda3/envs/ml27/lib/python2.7/site-packages/apache_beam/runners/dataflow/dataflow_runner.py", line 1238, in wait_until_finish
(self.state, getattr(self._runner, 'last_error_msg', None)), self)
apache_beam.runners.dataflow.dataflow_runner.DataflowRuntimeException: Dataflow pipeline failed. State: FAILED, Error:
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/batchworker.py", line 649, in do_work
work_executor.execute()
File "/usr/local/lib/python2.7/dist-packages/dataflow_worker/executor.py", line 176, in execute
op.start()
File "apache_beam/runners/worker/operations.py", line 531, in apache_beam.runners.worker.operations.DoOperation.start
def start(self):
File "apache_beam/runners/worker/operations.py", line 532, in apache_beam.runners.worker.operations.DoOperation.start
with self.scoped_start_state:
File "apache_beam/runners/worker/operations.py", line 533, in apache_beam.runners.worker.operations.DoOperation.start
super(DoOperation, self).start()
File "apache_beam/runners/worker/operations.py", line 202, in apache_beam.runners.worker.operations.Operation.start
def start(self):
File "apache_beam/runners/worker/operations.py", line 206, in apache_beam.runners.worker.operations.Operation.start
self.setup()
File "apache_beam/runners/worker/operations.py", line 480, in apache_beam.runners.worker.operations.DoOperation.setup
with self.scoped_start_state:
File "apache_beam/runners/worker/operations.py", line 485, in apache_beam.runners.worker.operations.DoOperation.setup
pickler.loads(self.spec.serialized_fn))
File "/usr/local/lib/python2.7/dist-packages/apache_beam/internal/pickler.py", line 247, in loads
return dill.loads(s)
File "/usr/local/lib/python2.7/dist-packages/dill/_dill.py", line 317, in loads
return load(file, ignore)
File "/usr/local/lib/python2.7/dist-packages/dill/_dill.py", line 305, in load
obj = pik.load()
File "/usr/lib/python2.7/pickle.py", line 864, in load
dispatch[key](self)
File "/usr/lib/python2.7/pickle.py", line 1232, in load_build
for k, v in state.iteritems():
AttributeError: 'str' object has no attribute 'iteritems'
I am using tensorflow==1.13.1 and tensorflow-transform==0.9.0 and apache_beam==2.7.0
with beam.Pipeline(options=self.pipe_opt) as p:
with beam_impl.Context(temp_dir=self.google_cloud_options.temp_location):
# rest of the script
_ = (
transform_fn
| 'WriteTransformFn' >>
transform_fn_io.WriteTransformFn(path=self.JOB_DIR + '/transform/'))
I was experiencing the same error.
It seems to be triggered by a mismatch in the tensorflow-transform versions of your local (or master) machine and the workers one (specified in the setup.py file).
In my case I was running tensorflow-transform==0.13 on my local machine whereas the workers were running 0.8.
Downgrading the local version to 0.8 fixed the issue.

compute() in dask not working

I am trying a simple parallel computation in Dask.
This is my code.
import time
import dask as dask
import dask.distributed as distributed
import dask.dataframe as dd
import dask.delayed as delayed
from dask.distributed import Client,progress
client = Client('localhost:8786')
df = dd.read_csv('file.csv')
ddf = df.groupby(['col1'])[['col2']].sum()
ddf = ddf.compute()
print ddf
It seems fine from the documentation but on running I am getting this :
Traceback (most recent call last):
File "dask_prg1.py", line 17, in <module>
ddf = ddf.compute()
File "/usr/local/lib/python2.7/site-packages/dask/base.py", line 156, in compute
(result,) = compute(self, traverse=False, **kwargs)
File "/usr/local/lib/python2.7/site-packages/dask/base.py", line 402, in compute
results = schedule(dsk, keys, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 2159, in get
direct=direct)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1562, in gather
asynchronous=asynchronous)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 652, in sync
return sync(self.loop, func, *args, **kwargs)
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 275, in sync
six.reraise(*error[0])
File "/usr/local/lib/python2.7/site-packages/distributed/utils.py", line 260, in f
result[0] = yield make_coro()
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1099, in run
value = future.result()
File "/usr/local/lib/python2.7/site-packages/tornado/concurrent.py", line 260, in result
raise_exc_info(self._exc_info)
File "/usr/local/lib/python2.7/site-packages/tornado/gen.py", line 1107, in run
yielded = self.gen.throw(*exc_info)
File "/usr/local/lib/python2.7/site-packages/distributed/client.py", line 1439, in _gather
traceback)
File "/usr/local/lib/python2.7/site-packages/dask/bytes/core.py", line 122, in read_block_from_file
with lazy_file as f:
File "/usr/local/lib/python2.7/site-packages/dask/bytes/core.py", line 166, in __enter__
f = SeekableFile(self.fs.open(self.path, mode=mode))
File "/usr/local/lib/python2.7/site-packages/dask/bytes/local.py", line 58, in open
return open(self._normalize_path(path), mode=mode)
IOError: [Errno 2] No such file or directory: 'file.csv'
I am not understanding what is wrong.Kindly help me with this .Thank you in advance .
You may wish to pass the absolute file path to read_csv. The reason is, that you are giving the work of opening and reading the file to a dask worker, and you might not have started that worked with the same working directory as your script/session.

Error while running TensorFlow wide_n_deep Tutorial

I encountered the error:
AttributeError: 'NoneType' object has no attribute 'bucketize'
The full error is as follows:
Traceback (most recent call last):
File "wide_n_deep_tutorial_1.py", line 214, in <module>
train_and_eval()
File "wide_n_deep_tutorial_1.py", line 203, in train_and_eval
m.fit(input_fn=lambda: input_fn(df_train), steps=FLAGS.train_steps)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\dnn_linear_combined.py", line 711, in fit
max_steps=max_steps)
File "C:\Python35\lib\site-packages\tensorflow\python\util\deprecation.py", line 191, in new_func
return func(*args, **kwargs)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 355, in fit
max_steps=max_steps)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 699, in _train_model
train_ops = self._get_train_ops(features, labels)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 1052, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 1019, in _call_model_fn
params=self.params)
File "C:\Python35\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\dnn_linear_combined.py", line 504, in _dnn_linear_combined_model_fn
scope=scope)
File "C:\Python35\lib\site-packages\tensorflow\contrib\layers\python\layers\feature_column_ops.py", line 526, in weighted_sum_from_feature_columns
transformed_tensor = transformer.transform(column)
File "C:\Python35\lib\site-packages\tensorflow\contrib\layers\python\layers\feature_column_ops.py", line 869, in transform
feature_column.insert_transformed_feature(self._columns_to_tensors)
File "C:\Python35\lib\site-packages\tensorflow\contrib\layers\python\layers\feature_column.py", line 1489, in insert_transformed_feature
name="bucketize")
File "C:\Python35\lib\site-packages\tensorflow\contrib\layers\python\ops\bucketization_op.py", line 48, in bucketize
return _bucketization_op.bucketize(input_tensor, boundaries, name=name)
AttributeError: 'NoneType' object has no attribute 'bucketize'
I got the same issue, it seems that on windows, we just got None, sourcecode,
try to run this code on linux, or try to remove the bucketization and the column crossing, for example. change the line:
flags.DEFINE_string("model_type","wide_n_deep","valid model types:{'wide','deep', 'wide_n_deep'")
to
flags.DEFINE_string("model_type","deep","valid model types:{'wide','deep', 'wide_n_deep'")
follow this issue for update: issue