When I tried to read the csv file downloaded from
http://insideairbnb.com/get-the-data.html I am getting the below error
listings = pd.read_csv('F:\\US Docs\\DataScience\\listings.csv')
reviews = pd.read_csv('F:\\US Docs\\DataScience\\reviews.csv')
UnicodeEncodeError Traceback (most recent call last)
in
----> 1 listings = pd.read_csv('F:\US Docs\DataScience\listings.csv')
2 reviews = pd.read_csv('F:\US Docs\DataScience\reviews.csv')
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in
parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col,
usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters,
true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows,
na_values, keep_default_na, na_filter, verbose, skip_blank_lines,
parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst,
iterator, chunksize, compression, thousands, decimal, lineterminator,
quotechar, quoting, doublequote, escapechar, comment, encoding, dialect,
tupleize_cols, error_bad_lines, warn_bad_lines, delim_whitespace,
low_memory, memory_map, float_precision)
700 skip_blank_lines=skip_blank_lines)
701
--> 702 return _read(filepath_or_buffer, kwds)
703
704 parser_f.name = name
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in
_read(filepath_or_buffer, kwds)
427
428 # Create the parser.
--> 429 parser = TextFileReader(filepath_or_buffer, **kwds)
430
431 if chunksize or iterator:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in
init(self, f, engine, **kwds)
893 self.options['has_index_names'] = kwds['has_index_names']
894
--> 895 self._make_engine(self.engine)
896
897 def close(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in
_make_engine(self, engine)
1120 def _make_engine(self, engine='c'):
1121 if engine == 'c':
-> 1122 self._engine = CParserWrapper(self.f, **self.options)
1123 else:
1124 if engine == 'python':
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in
init(self,
src, **kwds)
1851 kwds['usecols'] = self.usecols
1852
-> 1853 self._reader = parsers.TextReader(src, **kwds)
1854 self.unnamed_cols = self._reader.unnamed_cols
1855
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.cinit()
pandas/_libs/parsers.pyx in
pandas._libs.parsers.TextReader._setup_parser_source()
UnicodeEncodeError: 'mbcs' codec can't encode characters in position 0-
-1: invalid character
Related
I am only able to gain limited/top-level access to my aws s3. I can see the buckets, but not their contents; neither subfolders nor files. I'm running everything from inside a conda environment. I've tried accessing files in private and public buckets without success. What am I doing wrong?
This block of code works as expected
>>> import s3fs
>>> AKEY = 'XXXX'
>>> SKEY = 'XXXX'
>>> fs = s3fs.S3FileSystem(key=AKEY,secret=SKEY)
>>> fs.ls('s3://')
['my-bucket-1',
'my-bucket-2',
'my-bucket-3']
This block doesn't
>>> fs.ls('s3://my-bucket-1')
[]
what I expect
>>> fs.ls('s3://my-bucket-1')
['my-bucket-1/test.txt',
'my-bucket-1/test.csv']
When I try to open a file I get a FileNotFoundError
import pandas as pd
pd.read_csv(
's3://my-bucket-1/test.csv',
storage_options={'key':AKEY,'secret':SKEY}
)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[8], line 2
1 import pandas as pd
----> 2 pd.read_csv(
3 's3://my-bucket-1/test.csv'',
4 storage_options={'key':AKEY,'secret':SKEY}
5 )
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
710 codecs.lookup_error(errors)
712 # open URLs
--> 713 ioargs = _get_filepath_or_buffer(
714 path_or_buf,
715 encoding=encoding,
716 compression=compression,
717 mode=mode,
718 storage_options=storage_options,
719 )
721 handle = ioargs.filepath_or_buffer
722 handles: list[BaseBuffer]
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:409, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
406 pass
408 try:
--> 409 file_obj = fsspec.open(
410 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
411 ).open()
412 # GH 34626 Reads from Public Buckets without Credentials needs anon=True
413 except tuple(err_types_to_retry_with_anon):
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:135, in OpenFile.open(self)
128 def open(self):
129 """Materialise this as a real open file without context
130
131 The OpenFile object should be explicitly closed to avoid enclosed file
132 instances persisting. You must, therefore, keep a reference to the OpenFile
133 during the life of the file-like it generates.
134 """
--> 135 return self.__enter__()
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:103, in OpenFile.__enter__(self)
100 def __enter__(self):
101 mode = self.mode.replace("t", "").replace("b", "") + "b"
--> 103 f = self.fs.open(self.path, mode=mode)
105 self.fobjects = [f]
107 if self.compression is not None:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1106, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1104 else:
1105 ac = kwargs.pop("autocommit", not self._intrans)
-> 1106 f = self._open(
1107 path,
1108 mode=mode,
1109 block_size=block_size,
1110 autocommit=ac,
1111 cache_options=cache_options,
1112 **kwargs,
1113 )
1114 if compression is not None:
1115 from fsspec.compression import compr
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:640, in S3FileSystem._open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, cache_options, **kwargs)
637 if cache_type is None:
638 cache_type = self.default_cache_type
--> 640 return S3File(
641 self,
642 path,
643 mode,
644 block_size=block_size,
645 acl=acl,
646 version_id=version_id,
647 fill_cache=fill_cache,
648 s3_additional_kwargs=kw,
649 cache_type=cache_type,
650 autocommit=autocommit,
651 requester_pays=requester_pays,
652 cache_options=cache_options,
653 )
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1989, in S3File.__init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays, cache_options)
1987 self.details = s3.info(path)
1988 self.version_id = self.details.get("VersionId")
-> 1989 super().__init__(
1990 s3,
1991 path,
1992 mode,
1993 block_size,
1994 autocommit=autocommit,
1995 cache_type=cache_type,
1996 cache_options=cache_options,
1997 )
1998 self.s3 = self.fs # compatibility
2000 # when not using autocommit we want to have transactional state to manage
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1462, in AbstractBufferedFile.__init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
1460 self.size = size
1461 else:
-> 1462 self.size = self.details["size"]
1463 self.cache = caches[cache_type](
1464 self.blocksize, self._fetch_range, self.size, **cache_options
1465 )
1466 else:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1475, in AbstractBufferedFile.details(self)
1472 #property
1473 def details(self):
1474 if self._details is None:
-> 1475 self._details = self.fs.info(self.path)
1476 return self._details
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:113, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
110 #functools.wraps(func)
111 def wrapper(*args, **kwargs):
112 self = obj or args[0]
--> 113 return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:98, in sync(loop, func, timeout, *args, **kwargs)
96 raise FSTimeoutError from return_result
97 elif isinstance(return_result, BaseException):
---> 98 raise return_result
99 else:
100 return return_result
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
51 coro = asyncio.wait_for(coro, timeout=timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
55 result[0] = ex
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1257, in S3FileSystem._info(self, path, bucket, key, refresh, version_id)
1245 if (
1246 out.get("KeyCount", 0) > 0
1247 or out.get("Contents", [])
1248 or out.get("CommonPrefixes", [])
1249 ):
1250 return {
1251 "name": "/".join([bucket, key]),
1252 "type": "directory",
1253 "size": 0,
1254 "StorageClass": "DIRECTORY",
1255 }
-> 1257 raise FileNotFoundError(path)
1258 except ClientError as e:
1259 raise translate_boto_error(e, set_cause=False)
FileNotFoundError: my-bucket-1/test.csv
s3fs-2022.11.0, aiobotocore-2.4.0, botocore-1.27.59
fs = s3fs.S3FileSystem(anon=True)
fs.ls('s3://dask-data/nyc-taxi/2015')
ParseError
Check the bucket policy / IAM role that gives you permissions to access the bucket. It should have /* after the name of the resource:
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::my-bucket-1/*"
to allow you access the objects in the bucket, not just the bucket itself.
Have you tried boto3? s3fs is no longer supported.
import pandas as pd
df = pd.read_csv('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')
df
UnicodeDecodeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18872\1985582496.py in
1 import pandas as pd
----> 2 df = pd.read_csv('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')
3 df
C:\ProgramData\Anaconda3\lib\site-packages\pandas\util_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
676 kwds.update(kwds_defaults)
677
--> 678 return _read(filepath_or_buffer, kwds)
679
680
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _read(filepath_or_buffer, kwds)
573
574 # Create the parser.
--> 575 parser = TextFileReader(filepath_or_buffer, **kwds)
576
577 if chunksize or iterator:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in init(self, f, engine, **kwds)
930
931 self.handles: IOHandles | None = None
--> 932 self._engine = self._make_engine(f, self.engine)
933
934 def close(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _make_engine(self, f, engine)
1232
1233 try:
-> 1234 return mapping[engine](f, **self.options)
1235 except Exception:
1236 if self.handles is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py in init(self, src, **kwds)
73
74 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
---> 75 self._reader = parsers.TextReader(src, **kwds)
76
77 self.unnamed_cols = self._reader.unnamed_cols
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader.cinit()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader._get_header()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xaa in position 14: invalid start byte
i tried some options using youtube but not working
Looking at the very end of the file extension, you're importing a .xlsx file, not a CSV file.
Try opening the file on Excel and export it as a CSV. You need to make sure that .csv is the last characters of the file.
I think you need to use XLSX function for load and read the XLSX file inside pandas.
For that you need to use this line of code inside your code:
import pandas as pd
df = pd.read_excel('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')
I'm trying to import some public data from the web but can't understand the error.
My code:
import pandas as pd
df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
It returns this:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/var/folders/v_/yq26pm194xj5ckqy8p_njwc00000gn/T/ipykernel_89117/2424267382.py in <module>
----> 1 df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
212
213 return cast(F, wrapper)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
332
333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
480 if not isinstance(io, ExcelFile):
481 should_close = True
--> 482 io = ExcelFile(io, storage_options=storage_options, engine=engine)
483 elif engine and engine != io.engine:
484 raise ValueError(
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine, storage_options)
1693 self.storage_options = storage_options
1694
-> 1695 self._reader = self._engines[engine](self._io, storage_options=storage_options)
1696
1697 def __fspath__(self):
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in __init__(self, filepath_or_buffer, storage_options)
555 """
556 import_optional_dependency("openpyxl")
--> 557 super().__init__(filepath_or_buffer, storage_options=storage_options)
558
559 #property
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer, storage_options)
543 self.handles.handle.seek(0)
544 try:
--> 545 self.book = self.load_workbook(self.handles.handle)
546 except Exception:
547 self.close()
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in load_workbook(self, filepath_or_buffer)
566 from openpyxl import load_workbook
567
--> 568 return load_workbook(
569 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
570 )
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
--> 317 reader.read()
318 return reader.wb
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in read(self)
281 apply_stylesheet(self.archive, self.wb)
282 self.read_worksheets()
--> 283 self.parser.assign_names()
284 if not self.read_only:
285 self.archive.close()
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/workbook.py in assign_names(self)
100 reserved = defn.is_reserved
101 if reserved in ("Print_Titles", "Print_Area"):
--> 102 sheet = self.wb._sheets[defn.localSheetId]
103 if reserved == "Print_Titles":
104 rows, cols = _unpack_print_titles(defn)
IndexError: list index out of range
At this point I would traditonally download and convert to CSV but I want to access straight from web.
The sheet (which I guess I could access as sheetname="Q27 Providers (benchmarked)") doesn't work.
It looks like xlsx file is broken, therefore u can't download it. Did u try to open that xlsx file?
I'm working with a Dask Cluster on GCP. I'm using this code to deploy it:
from dask_cloudprovider.gcp import GCPCluster
from dask.distributed import Client
enviroment_vars = {
'EXTRA_PIP_PACKAGES': '"gcsfs"'
}
cluster = GCPCluster(
n_workers=32,
docker_image='daskdev/dask:2021.2.0',
env_vars=enviroment_vars,
network='my-network',
#filesystem_size=150,
machine_type='e2-standard-16',
projectid='my-project-id',
zone='us-central1-a',
on_host_maintenance="MIGRATE"
client = Client(cluster)
Then I read csv files, with the following code:
import dask.dataframe as dd
import csv
col_dtypes = {
'var1': 'float64',
'var2': 'object',
'var3': 'object',
'var4': 'float64'
}
df = dd.read_csv('gs://my_bucket/files-*.csv', blocksize=None, dtype= col_dtypes)
df = df.persist()
Everything works fine, but when I try to do some queries, or calculation, I get an error. For instance this piece of code:
df.var1.value_counts().compute()
This is the output:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-14-711a7c21ed42> in <module>
----> 1 df.var1.value_counts().compute()
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
279 dask.base.compute
280 """
--> 281 (result,) = compute(self, traverse=False, **kwargs)
282 return result
283
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
561 postcomputes.append(x.__dask_postcompute__())
562
--> 563 results = schedule(dsk, keys, **kwargs)
564 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
565
/opt/conda/lib/python3.8/site-packages/distributed/client.py in get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2653 should_rejoin = False
2654 try:
-> 2655 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2656 finally:
2657 for f in futures.values():
/opt/conda/lib/python3.8/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1962 else:
1963 local_worker = None
-> 1964 return self.sync(
1965 self._gather,
1966 futures,
/opt/conda/lib/python3.8/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
836 return future
837 else:
--> 838 return sync(
839 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
840 )
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
338 if error[0]:
339 typ, exc, tb = error[0]
--> 340 raise exc.with_traceback(tb)
341 else:
342 return result[0]
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in f()
322 if callback_timeout is not None:
323 future = asyncio.wait_for(future, callback_timeout)
--> 324 result[0] = yield future
325 except Exception as exc:
326 error[0] = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1827 exc = CancelledError(key)
1828 else:
-> 1829 raise exception.with_traceback(traceback)
1830 raise exc
1831 if errors == "skip":
/opt/conda/lib/python3.8/site-packages/dask/optimization.py in __call__()
961 if not len(args) == len(self.inkeys):
962 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 963 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
964
965 def __reduce__(self):
/opt/conda/lib/python3.8/site-packages/dask/core.py in get()
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
/opt/conda/lib/python3.8/site-packages/dask/core.py in _execute_task()
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/opt/conda/lib/python3.8/site-packages/dask/utils.py in apply()
33 def apply(func, args, kwargs=None):
34 if kwargs:
---> 35 return func(*args, **kwargs)
36 else:
37 return func(*args)
/opt/conda/lib/python3.8/site-packages/dask/dataframe/core.py in apply_and_enforce()
5474 return meta
5475 if is_dataframe_like(df):
-> 5476 check_matching_columns(meta, df)
5477 c = meta.columns
5478 else:
/opt/conda/lib/python3.8/site-packages/dask/dataframe/utils.py in check_matching_columns()
690 def check_matching_columns(meta, actual):
691 # Need nan_to_num otherwise nan comparison gives False
--> 692 if not np.array_equal(np.nan_to_num(meta.columns), np.nan_to_num(actual.columns)):
693 extra = methods.tolist(actual.columns.difference(meta.columns))
694 missing = methods.tolist(meta.columns.difference(actual.columns))
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__get__()
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
AttributeError: 'DataFrame' object has no attribute '_data'
The version of Pandas in my docker file is 1.0.1, so I already try upgrading Pandas (to version 1.2.2), but it didn't work, what am I doing wrong?
My guess is that you have a version mismatch somewhere. What does client.get_versions(check=True) say?
When I try to import a meta graph using saver = tf.train.import_meta_graph(meta_graph_path, clear_devices=True) I get KeyError: 'MaxBytesInUse' from within the importer.
Tensorflow version: 1.7-gpu-python3
OS: Ubuntu 16.04
Here is the stack trace of the error:
/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/saver.py in import_meta_graph(meta_graph_or_file, clear_devices, import_scope, **kwargs)
1953 clear_devices=clear_devices,
1954 import_scope=import_scope,
-> 1955 **kwargs)
1956
1957 if meta_graph_def.HasField("saver_def"):
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/meta_graph.py in import_scoped_meta_graph(meta_graph_or_file, clear_devices, graph, import_scope, input_map, unbound_inputs_col_name, restore_collections_predicate)
741 name=(import_scope or scope_to_prepend_to_names),
742 input_map=input_map,
--> 743 producer_op_list=producer_op_list)
744
745 # Restores all the other collections.
/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py in new_func(*args, **kwargs)
430 'in a future version' if date is None else ('after %s' % date),
431 instructions)
--> 432 return func(*args, **kwargs)
433 return tf_decorator.make_decorator(func, new_func, 'deprecated',
434 _add_deprecated_arg_notice_to_docstring(
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/importer.py in import_graph_def(graph_def, input_map, return_elements, name, op_dict, producer_op_list)
458 if producer_op_list is not None:
459 # TODO(skyewm): make a copy of graph_def so we're not mutating the argument?
--> 460 _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def)
461
462 graph = ops.get_default_graph()
/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/importer.py in _RemoveDefaultAttrs(op_dict, producer_op_list, graph_def)
225 # Remove any default attr values that aren't in op_def.
226 if node.op in producer_op_dict:
--> 227 op_def = op_dict[node.op]
228 producer_op_def = producer_op_dict[node.op]
229 # We make a copy of node.attr to iterate through since we may modify
KeyError: 'MaxBytesInUse'
Add dir(tf.contrib)
See the link: https://github.com/tensorflow/tensorflow/issues/10130