s3fs FileNotFoundError - amazon-s3

I am only able to gain limited/top-level access to my aws s3. I can see the buckets, but not their contents; neither subfolders nor files. I'm running everything from inside a conda environment. I've tried accessing files in private and public buckets without success. What am I doing wrong?
This block of code works as expected
>>> import s3fs
>>> AKEY = 'XXXX'
>>> SKEY = 'XXXX'
>>> fs = s3fs.S3FileSystem(key=AKEY,secret=SKEY)
>>> fs.ls('s3://')
['my-bucket-1',
'my-bucket-2',
'my-bucket-3']
This block doesn't
>>> fs.ls('s3://my-bucket-1')
[]
what I expect
>>> fs.ls('s3://my-bucket-1')
['my-bucket-1/test.txt',
'my-bucket-1/test.csv']
When I try to open a file I get a FileNotFoundError
import pandas as pd
pd.read_csv(
's3://my-bucket-1/test.csv',
storage_options={'key':AKEY,'secret':SKEY}
)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[8], line 2
1 import pandas as pd
----> 2 pd.read_csv(
3 's3://my-bucket-1/test.csv'',
4 storage_options={'key':AKEY,'secret':SKEY}
5 )
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
710 codecs.lookup_error(errors)
712 # open URLs
--> 713 ioargs = _get_filepath_or_buffer(
714 path_or_buf,
715 encoding=encoding,
716 compression=compression,
717 mode=mode,
718 storage_options=storage_options,
719 )
721 handle = ioargs.filepath_or_buffer
722 handles: list[BaseBuffer]
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:409, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
406 pass
408 try:
--> 409 file_obj = fsspec.open(
410 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
411 ).open()
412 # GH 34626 Reads from Public Buckets without Credentials needs anon=True
413 except tuple(err_types_to_retry_with_anon):
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:135, in OpenFile.open(self)
128 def open(self):
129 """Materialise this as a real open file without context
130
131 The OpenFile object should be explicitly closed to avoid enclosed file
132 instances persisting. You must, therefore, keep a reference to the OpenFile
133 during the life of the file-like it generates.
134 """
--> 135 return self.__enter__()
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:103, in OpenFile.__enter__(self)
100 def __enter__(self):
101 mode = self.mode.replace("t", "").replace("b", "") + "b"
--> 103 f = self.fs.open(self.path, mode=mode)
105 self.fobjects = [f]
107 if self.compression is not None:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1106, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1104 else:
1105 ac = kwargs.pop("autocommit", not self._intrans)
-> 1106 f = self._open(
1107 path,
1108 mode=mode,
1109 block_size=block_size,
1110 autocommit=ac,
1111 cache_options=cache_options,
1112 **kwargs,
1113 )
1114 if compression is not None:
1115 from fsspec.compression import compr
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:640, in S3FileSystem._open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, cache_options, **kwargs)
637 if cache_type is None:
638 cache_type = self.default_cache_type
--> 640 return S3File(
641 self,
642 path,
643 mode,
644 block_size=block_size,
645 acl=acl,
646 version_id=version_id,
647 fill_cache=fill_cache,
648 s3_additional_kwargs=kw,
649 cache_type=cache_type,
650 autocommit=autocommit,
651 requester_pays=requester_pays,
652 cache_options=cache_options,
653 )
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1989, in S3File.__init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays, cache_options)
1987 self.details = s3.info(path)
1988 self.version_id = self.details.get("VersionId")
-> 1989 super().__init__(
1990 s3,
1991 path,
1992 mode,
1993 block_size,
1994 autocommit=autocommit,
1995 cache_type=cache_type,
1996 cache_options=cache_options,
1997 )
1998 self.s3 = self.fs # compatibility
2000 # when not using autocommit we want to have transactional state to manage
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1462, in AbstractBufferedFile.__init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
1460 self.size = size
1461 else:
-> 1462 self.size = self.details["size"]
1463 self.cache = caches[cache_type](
1464 self.blocksize, self._fetch_range, self.size, **cache_options
1465 )
1466 else:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1475, in AbstractBufferedFile.details(self)
1472 #property
1473 def details(self):
1474 if self._details is None:
-> 1475 self._details = self.fs.info(self.path)
1476 return self._details
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:113, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
110 #functools.wraps(func)
111 def wrapper(*args, **kwargs):
112 self = obj or args[0]
--> 113 return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:98, in sync(loop, func, timeout, *args, **kwargs)
96 raise FSTimeoutError from return_result
97 elif isinstance(return_result, BaseException):
---> 98 raise return_result
99 else:
100 return return_result
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
51 coro = asyncio.wait_for(coro, timeout=timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
55 result[0] = ex
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1257, in S3FileSystem._info(self, path, bucket, key, refresh, version_id)
1245 if (
1246 out.get("KeyCount", 0) > 0
1247 or out.get("Contents", [])
1248 or out.get("CommonPrefixes", [])
1249 ):
1250 return {
1251 "name": "/".join([bucket, key]),
1252 "type": "directory",
1253 "size": 0,
1254 "StorageClass": "DIRECTORY",
1255 }
-> 1257 raise FileNotFoundError(path)
1258 except ClientError as e:
1259 raise translate_boto_error(e, set_cause=False)
FileNotFoundError: my-bucket-1/test.csv
s3fs-2022.11.0, aiobotocore-2.4.0, botocore-1.27.59
fs = s3fs.S3FileSystem(anon=True)
fs.ls('s3://dask-data/nyc-taxi/2015')
ParseError

Check the bucket policy / IAM role that gives you permissions to access the bucket. It should have /* after the name of the resource:
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::my-bucket-1/*"
to allow you access the objects in the bucket, not just the bucket itself.

Have you tried boto3? s3fs is no longer supported.

Related

How to understand read_excel in Pandas

I'm trying to import some public data from the web but can't understand the error.
My code:
import pandas as pd
df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
It returns this:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/var/folders/v_/yq26pm194xj5ckqy8p_njwc00000gn/T/ipykernel_89117/2424267382.py in <module>
----> 1 df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
212
213 return cast(F, wrapper)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
332
333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
480 if not isinstance(io, ExcelFile):
481 should_close = True
--> 482 io = ExcelFile(io, storage_options=storage_options, engine=engine)
483 elif engine and engine != io.engine:
484 raise ValueError(
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine, storage_options)
1693 self.storage_options = storage_options
1694
-> 1695 self._reader = self._engines[engine](self._io, storage_options=storage_options)
1696
1697 def __fspath__(self):
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in __init__(self, filepath_or_buffer, storage_options)
555 """
556 import_optional_dependency("openpyxl")
--> 557 super().__init__(filepath_or_buffer, storage_options=storage_options)
558
559 #property
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer, storage_options)
543 self.handles.handle.seek(0)
544 try:
--> 545 self.book = self.load_workbook(self.handles.handle)
546 except Exception:
547 self.close()
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in load_workbook(self, filepath_or_buffer)
566 from openpyxl import load_workbook
567
--> 568 return load_workbook(
569 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
570 )
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
--> 317 reader.read()
318 return reader.wb
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in read(self)
281 apply_stylesheet(self.archive, self.wb)
282 self.read_worksheets()
--> 283 self.parser.assign_names()
284 if not self.read_only:
285 self.archive.close()
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/workbook.py in assign_names(self)
100 reserved = defn.is_reserved
101 if reserved in ("Print_Titles", "Print_Area"):
--> 102 sheet = self.wb._sheets[defn.localSheetId]
103 if reserved == "Print_Titles":
104 rows, cols = _unpack_print_titles(defn)
IndexError: list index out of range
At this point I would traditonally download and convert to CSV but I want to access straight from web.
The sheet (which I guess I could access as sheetname="Q27 Providers (benchmarked)") doesn't work.
It looks like xlsx file is broken, therefore u can't download it. Did u try to open that xlsx file?

How to load a dataframe to postgresql 14

i have tried to load my dataframe to postgresql and especially to the server postgresql 14( i have 2 servers postgresql 9.3 running on port 5434 , and the other one is postgresql 14 running on port 5433) with this command :
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
df.to_sql('My_Table', engine)
this is the error i get , also i've tried with more ways but its always the same error , i guess its related to the two servers i'm using :
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
142 try:
--> 143 return self._create_connection()
144 except:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
OperationalError:
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
Input In [105], in <cell line: 3>()
1 from sqlalchemy import create_engine
2 engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
----> 3 df.to_sql('My_Table', engine)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\generic.py:2951, in NDFrame.to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2794 """
2795 Write records stored in a DataFrame to a SQL database.
2796
(...)
2947 [(1,), (None,), (2,)]
2948 """ # noqa:E501
2949 from pandas.io import sql
-> 2951 return sql.to_sql(
2952 self,
2953 name,
2954 con,
2955 schema=schema,
2956 if_exists=if_exists,
2957 index=index,
2958 index_label=index_label,
2959 chunksize=chunksize,
2960 dtype=dtype,
2961 method=method,
2962 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:697, in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)
692 elif not isinstance(frame, DataFrame):
693 raise NotImplementedError(
694 "'frame' argument should be either a Series or a DataFrame"
695 )
--> 697 return pandas_sql.to_sql(
698 frame,
699 name,
700 if_exists=if_exists,
701 index=index,
702 index_label=index_label,
703 schema=schema,
704 chunksize=chunksize,
705 dtype=dtype,
706 method=method,
707 engine=engine,
708 **engine_kwargs,
709 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1729, in SQLDatabase.to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)
1679 """
1680 Write records stored in a DataFrame to a SQL database.
1681
(...)
1725 Any additional kwargs are passed to the engine.
1726 """
1727 sql_engine = get_engine(engine)
-> 1729 table = self.prep_table(
1730 frame=frame,
1731 name=name,
1732 if_exists=if_exists,
1733 index=index,
1734 index_label=index_label,
1735 schema=schema,
1736 dtype=dtype,
1737 )
1739 total_inserted = sql_engine.insert_records(
1740 table=table,
1741 con=self.connectable,
(...)
1748 **engine_kwargs,
1749 )
1751 self.check_case_sensitive(name=name, schema=schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1628, in SQLDatabase.prep_table(self, frame, name, if_exists, index, index_label, schema, dtype)
1616 raise ValueError(f"The type of {col} is not a SQLAlchemy type")
1618 table = SQLTable(
1619 name,
1620 self,
(...)
1626 dtype=dtype,
1627 )
-> 1628 table.create()
1629 return table
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:831, in SQLTable.create(self)
830 def create(self):
--> 831 if self.exists():
832 if self.if_exists == "fail":
833 raise ValueError(f"Table '{self.name}' already exists.")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:815, in SQLTable.exists(self)
814 def exists(self):
--> 815 return self.pd_sql.has_table(self.name, self.schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1762, in SQLDatabase.has_table(self, name, schema)
1759 if _gt14():
1760 from sqlalchemy import inspect
-> 1762 insp = inspect(self.connectable)
1763 return insp.has_table(name, schema or self.meta.schema)
1764 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\inspection.py:64, in inspect(subject, raiseerr)
62 if reg is True:
63 return subject
---> 64 ret = reg(subject)
65 if ret is not None:
66 break
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:182, in Inspector._engine_insp(bind)
180 #inspection._inspects(Engine)
181 def _engine_insp(bind):
--> 182 return Inspector._construct(Inspector._init_engine, bind)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:117, in Inspector._construct(cls, init, bind)
114 cls = bind.dialect.inspector
116 self = cls.__new__(cls)
--> 117 init(self, bind)
118 return self
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:128, in Inspector._init_engine(self, engine)
126 def _init_engine(self, engine):
127 self.bind = self.engine = engine
--> 128 engine.connect().close()
129 self._op_context_requires_connect = True
130 self.dialect = self.engine.dialect
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3234, in Engine.connect(self, close_with_result)
3219 def connect(self, close_with_result=False):
3220 """Return a new :class:`_engine.Connection` object.
3221
3222 The :class:`_engine.Connection` object is a facade that uses a DBAPI
(...)
3231
3232 """
-> 3234 return self._connection_cls(self, close_with_result=close_with_result)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:96, in Connection.__init__(self, engine, connection, close_with_result, _branch_from, _execution_options, _dispatch, _has_events, _allow_revalidate)
91 self._has_events = _branch_from._has_events
92 else:
93 self._dbapi_connection = (
94 connection
95 if connection is not None
---> 96 else engine.raw_connection()
97 )
99 self._transaction = self._nested_transaction = None
100 self.__savepoint_seq = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3313, in Engine.raw_connection(self, _connection)
3291 def raw_connection(self, _connection=None):
3292 """Return a "raw" DBAPI connection from the connection pool.
3293
3294 The returned object is a proxied version of the DBAPI
(...)
3311
3312 """
-> 3313 return self._wrap_pool_connect(self.pool.connect, _connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3283, in Engine._wrap_pool_connect(self, fn, connection)
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
-> 3283 Connection._handle_dbapi_exception_noconnection(
3284 e, dialect, self
3285 )
3286 else:
3287 util.raise_(
3288 sys.exc_info()[1], with_traceback=sys.exc_info()[2]
3289 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:2117, in Connection._handle_dbapi_exception_noconnection(cls, e, dialect, engine)
2115 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
2116 elif should_wrap:
-> 2117 util.raise_(
2118 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
2119 )
2120 else:
2121 util.raise_(exc_info[1], with_traceback=exc_info[2])
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3278 dialect = self.dialect
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
302 def connect(self):
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
865 #classmethod
866 def _checkout(cls, pool, threadconns=None, fairy=None):
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
871 fairy._counter = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
478 dbapi_connection = rec.get_connection()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
143 return self._create_connection()
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
147 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
141 if self._inc_overflow():
142 try:
--> 143 return self._create_connection()
144 except:
145 with util.safe_reraise():
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
253 def _create_connection(self):
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
369 self.__pool = pool
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
663 self.fresh = True
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
667 else:
668 # in SQLAlchemy 1.4 the first_connect event is not used by
669 # the engine, so this will usually not be set
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
659 try:
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
663 self.fresh = True
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
588 if connection is not None:
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
119 kwasync['async_'] = kwargs.pop('async_')
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
124 conn.cursor_factory = cursor_factory
OperationalError: (psycopg2.OperationalError)
(Background on this error at: https://sqlalche.me/e/14/e3q8)

tight_layout KeyError default, matplotlib widget

Using jupyterlab, i receive a KeyError: 'Default' when using plt.tight_layout() in combination with %matplotlib widget. The following code reproduces the issue:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib widget
x=np.linspace(0,10)
y=x**2
plt.plot(x,y)
plt.tight_layout()
The complete error message is the following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3024 try:
-> 3025 self.canvas.set_cursor(tools.Cursors.WAIT)
3026 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
KeyError: 'wait'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/tmp/ipykernel_119035/3466922198.py in <module>
7 y=x**2
8 plt.plot(x,y)
----> 9 plt.tight_layout()
~/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py in tight_layout(pad, h_pad, w_pad, rect)
2300 #_copy_docstring_and_deprecators(Figure.tight_layout)
2301 def tight_layout(*, pad=1.08, h_pad=None, w_pad=None, rect=None):
-> 2302 return gcf().tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad, rect=rect)
2303
2304
~/anaconda3/lib/python3.8/site-packages/matplotlib/figure.py in tight_layout(self, pad, h_pad, w_pad, rect)
3186 "compatible with tight_layout, so results "
3187 "might be incorrect.")
-> 3188 renderer = _get_renderer(self)
3189 with getattr(renderer, "_draw_disabled", nullcontext)():
3190 kwargs = get_tight_layout_figure(
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _get_renderer(figure, print_method)
1542 figure.canvas._get_output_canvas(None, fmt), f"print_{fmt}")
1543 try:
-> 1544 print_method(io.BytesIO())
1545 except Done as exc:
1546 renderer, = figure._cachedRenderer, = exc.args
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1641 kwargs.pop(arg)
1642
-> 1643 return func(*args, **kwargs)
1644
1645 return wrapper
~/anaconda3/lib/python3.8/site-packages/matplotlib/_api/deprecation.py in wrapper(*inner_args, **inner_kwargs)
410 else deprecation_addendum,
411 **kwargs)
--> 412 return func(*inner_args, **inner_kwargs)
413
414 DECORATORS[wrapper] = decorator
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
538 *metadata*, including the default 'Software' key.
539 """
--> 540 FigureCanvasAgg.draw(self)
541 mpl.image.imsave(
542 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
431 self.renderer = self.get_renderer(cleared=True)
432 # Acquire a lock on the shared font cache.
--> 433 with RendererAgg.lock, \
434 (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
435 else nullcontext()):
~/anaconda3/lib/python3.8/contextlib.py in __enter__(self)
111 del self.args, self.kwds, self.func
112 try:
--> 113 return next(self.gen)
114 except StopIteration:
115 raise RuntimeError("generator didn't yield") from None
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3026 yield
3027 finally:
-> 3028 self.canvas.set_cursor(self._lastCursor)
3029 else:
3030 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
208 backend_tools.Cursors.RESIZE_VERTICAL: 'ns-resize',
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
212 def set_image_mode(self, mode):
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
390 def send_event(self, event_type, **kwargs):
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
394
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
539 payload = {'type': event_type, **kwargs}
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
543
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
179 # Change in the widget state?
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
183 elif content['type'] == 'message':
KeyError: 'default'

plot no longer works after upgrade

I recently upgraded pandas to 1.1.5, using Python 3.6.4 and I can no longer plot any charts with a datetime index column.
See the below example where I import a time series from a csv file. I have also tried registering matplotlib converters in case this was the issue. I get the error message shown below. Incidentally seaborn also no longer works but not sure if that's relevant.
Thanks
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
df = pd.read_csv('example.csv', parse_dates=True, index_col=0, dayfirst=True)
df.head()
Click here to see output for df.head()
df.plot()
I get the following error if I try and plot
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-37-848b80e64df8> in <module>()
----> 1 df.plot()
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
947 data.columns = label_name
948
--> 949 return plot_backend.plot(data, kind=kind, **kwargs)
950
951 __call__.__doc__ = __doc__
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
59 kwargs["ax"] = getattr(ax, "left_ax", ax)
60 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 61 plot_obj.generate()
62 plot_obj.draw()
63 return plot_obj.result
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
269 self._compute_plot_data()
270 self._setup_subplots()
--> 271 self._make_plot()
272 self._add_table()
273 self._make_legend()
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _make_plot(self)
1124 stacking_id=stacking_id,
1125 is_errorbar=is_errorbar,
-> 1126 **kwds,
1127 )
1128 self._add_legend_handle(newlines[0], label, index=i)
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _plot(cls, ax, x, y, style, column_num, stacking_id, **kwds)
1143 cls._initialize_stacker(ax, stacking_id, len(y))
1144 y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"])
-> 1145 lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
1146 cls._update_stacker(ax, stacking_id, y)
1147 return lines
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in wrapper(*args, **kwargs)
63 def wrapper(*args, **kwargs):
64 with pandas_converters():
---> 65 return func(*args, **kwargs)
66
67 return wrapper
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _plot(cls, ax, x, y, style, is_errorbar, **kwds)
666 else:
667 args = (x, y)
--> 668 return ax.plot(*args, **kwds)
669
670 def _get_index_name(self):
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1715 warnings.warn(msg % (label_namer, func.__name__),
1716 RuntimeWarning, stacklevel=2)
-> 1717 return func(ax, *args, **kwargs)
1718 pre_doc = inner.__doc__
1719 if pre_doc is None:
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, *args, **kwargs)
1371
1372 for line in self._get_lines(*args, **kwargs):
-> 1373 self.add_line(line)
1374 lines.append(line)
1375
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in add_line(self, line)
1777 line.set_clip_path(self.patch)
1778
-> 1779 self._update_line_limits(line)
1780 if not line.get_label():
1781 line.set_label('_line%d' % len(self.lines))
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _update_line_limits(self, line)
1799 Figures out the data limit of the given line, updating self.dataLim.
1800 """
-> 1801 path = line.get_path()
1802 if path.vertices.size == 0:
1803 return
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\lines.py in get_path(self)
955 """
956 if self._invalidy or self._invalidx:
--> 957 self.recache()
958 return self._path
959
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\lines.py in recache(self, always)
655 def recache(self, always=False):
656 if always or self._invalidx:
--> 657 xconv = self.convert_xunits(self._xorig)
658 x = _to_unmasked_float_array(xconv).ravel()
659 else:
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\artist.py in convert_xunits(self, x)
189 if ax is None or ax.xaxis is None:
190 return x
--> 191 return ax.xaxis.convert_units(x)
192
193 def convert_yunits(self, y):
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axis.py in convert_units(self, x)
1489 return x
1490
-> 1491 ret = self.converter.convert(x, self.units, self)
1492 return ret
1493
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in convert(values, unit, axis)
254 values = [DatetimeConverter._convert_1d(v, unit, axis) for v in values]
255 else:
--> 256 values = DatetimeConverter._convert_1d(values, unit, axis)
257 return values
258
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in _convert_1d(values, unit, axis)
289 pass
290
--> 291 values = dates.date2num(values)
292
293 return values
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\dates.py in date2num(d)
394 if not d.size:
395 return d
--> 396 return _to_ordinalf_np_vectorized(d)
397
398
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in __call__(self, *args, **kwargs)
2106 vargs.extend([kwargs[_n] for _n in names])
2107
-> 2108 return self._vectorize_call(func=func, args=vargs)
2109
2110 def _get_ufunc_and_otypes(self, func, args):
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _vectorize_call(self, func, args)
2184 res = func()
2185 else:
-> 2186 ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
2187
2188 # Convert args to object arrays first
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _get_ufunc_and_otypes(self, func, args)
2144
2145 inputs = [arg.flat[0] for arg in args]
-> 2146 outputs = func(*inputs)
2147
2148 # Performance note: profiling indicates that -- for simple
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\dates.py in _to_ordinalf(dt)
243 tzi = UTC
244
--> 245 base = float(dt.toordinal())
246
247 # If it's sufficiently datetime-like, it will have a `date()` method
AttributeError: 'numpy.datetime64' object has no attribute 'toordinal'
The older version of matplotlib (2.1.2) is out of date and no longer compatible with the newer version of pandas (1.1.5). An upgrade to matplotlib 3.3.4 solves this issue - as discussed in the comments.

Dask Cluster: AttributeError: 'DataFrame' object has no attribute '_data'

I'm working with a Dask Cluster on GCP. I'm using this code to deploy it:
from dask_cloudprovider.gcp import GCPCluster
from dask.distributed import Client
enviroment_vars = {
'EXTRA_PIP_PACKAGES': '"gcsfs"'
}
cluster = GCPCluster(
n_workers=32,
docker_image='daskdev/dask:2021.2.0',
env_vars=enviroment_vars,
network='my-network',
#filesystem_size=150,
machine_type='e2-standard-16',
projectid='my-project-id',
zone='us-central1-a',
on_host_maintenance="MIGRATE"
client = Client(cluster)
Then I read csv files, with the following code:
import dask.dataframe as dd
import csv
col_dtypes = {
'var1': 'float64',
'var2': 'object',
'var3': 'object',
'var4': 'float64'
}
df = dd.read_csv('gs://my_bucket/files-*.csv', blocksize=None, dtype= col_dtypes)
df = df.persist()
Everything works fine, but when I try to do some queries, or calculation, I get an error. For instance this piece of code:
df.var1.value_counts().compute()
This is the output:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-14-711a7c21ed42> in <module>
----> 1 df.var1.value_counts().compute()
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
279 dask.base.compute
280 """
--> 281 (result,) = compute(self, traverse=False, **kwargs)
282 return result
283
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
561 postcomputes.append(x.__dask_postcompute__())
562
--> 563 results = schedule(dsk, keys, **kwargs)
564 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
565
/opt/conda/lib/python3.8/site-packages/distributed/client.py in get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2653 should_rejoin = False
2654 try:
-> 2655 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2656 finally:
2657 for f in futures.values():
/opt/conda/lib/python3.8/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1962 else:
1963 local_worker = None
-> 1964 return self.sync(
1965 self._gather,
1966 futures,
/opt/conda/lib/python3.8/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
836 return future
837 else:
--> 838 return sync(
839 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
840 )
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
338 if error[0]:
339 typ, exc, tb = error[0]
--> 340 raise exc.with_traceback(tb)
341 else:
342 return result[0]
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in f()
322 if callback_timeout is not None:
323 future = asyncio.wait_for(future, callback_timeout)
--> 324 result[0] = yield future
325 except Exception as exc:
326 error[0] = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1827 exc = CancelledError(key)
1828 else:
-> 1829 raise exception.with_traceback(traceback)
1830 raise exc
1831 if errors == "skip":
/opt/conda/lib/python3.8/site-packages/dask/optimization.py in __call__()
961 if not len(args) == len(self.inkeys):
962 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 963 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
964
965 def __reduce__(self):
/opt/conda/lib/python3.8/site-packages/dask/core.py in get()
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
/opt/conda/lib/python3.8/site-packages/dask/core.py in _execute_task()
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/opt/conda/lib/python3.8/site-packages/dask/utils.py in apply()
33 def apply(func, args, kwargs=None):
34 if kwargs:
---> 35 return func(*args, **kwargs)
36 else:
37 return func(*args)
/opt/conda/lib/python3.8/site-packages/dask/dataframe/core.py in apply_and_enforce()
5474 return meta
5475 if is_dataframe_like(df):
-> 5476 check_matching_columns(meta, df)
5477 c = meta.columns
5478 else:
/opt/conda/lib/python3.8/site-packages/dask/dataframe/utils.py in check_matching_columns()
690 def check_matching_columns(meta, actual):
691 # Need nan_to_num otherwise nan comparison gives False
--> 692 if not np.array_equal(np.nan_to_num(meta.columns), np.nan_to_num(actual.columns)):
693 extra = methods.tolist(actual.columns.difference(meta.columns))
694 missing = methods.tolist(meta.columns.difference(actual.columns))
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__get__()
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
AttributeError: 'DataFrame' object has no attribute '_data'
The version of Pandas in my docker file is 1.0.1, so I already try upgrading Pandas (to version 1.2.2), but it didn't work, what am I doing wrong?
My guess is that you have a version mismatch somewhere. What does client.get_versions(check=True) say?