Google Sheets API - Timeout - google-sheets-api

I regularly use gspread and lately I've been getting these read timeout errors but only on certain sheets. Sometimes I'll be able to read & write to a sheet for a bit and then it'll hang until I get this read timeout error and then all subsequent attempts to read from that sheet will hang immediately and again give me this error.
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
444 # Otherwise it looks like a bug in the code.
--> 445 six.raise_from(e, None)
446 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
439 try:
--> 440 httplib_response = conn.getresponse()
441 except BaseException as e:
/usr/lib/python3.6/http/client.py in getresponse(self)
1372 try:
-> 1373 response.begin()
1374 except ConnectionError:
/usr/lib/python3.6/http/client.py in begin(self)
310 while True:
--> 311 version, status, reason = self._read_status()
312 if status != CONTINUE:
/usr/lib/python3.6/http/client.py in _read_status(self)
271 def _read_status(self):
--> 272 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
273 if len(line) > _MAXLINE:
/usr/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
/usr/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
1011 self.__class__)
-> 1012 return self.read(nbytes, buffer)
1013 else:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
873 try:
--> 874 return self._sslobj.read(len, buffer)
875 except SSLError as x:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
630 if buffer is not None:
--> 631 v = self._sslobj.read(len, buffer)
632 else:
timeout: The read operation timed out
During handling of the above exception, another exception occurred:
ReadTimeoutError Traceback (most recent call last)
~/envs/jupyter/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
755 retries = retries.increment(
--> 756 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
757 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
531 if read is False or not self._is_method_retryable(method):
--> 532 raise six.reraise(type(error), error, _stacktrace)
533 elif read is not None:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
734 raise value.with_traceback(tb)
--> 735 raise value
736 finally:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
705 headers=headers,
--> 706 chunked=chunked,
707 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
446 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 447 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
448 raise
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _raise_timeout(self, err, url, timeout_value)
336 raise ReadTimeoutError(
--> 337 self, url, "Read timed out. (read timeout=%s)" % timeout_value
338 )
ReadTimeoutError: HTTPSConnectionPool(host='sheets.googleapis.com', port=443): Read timed out. (read timeout=120)
During handling of the above exception, another exception occurred:
ReadTimeout Traceback (most recent call last)
<ipython-input-70-dc10e6e8ee1a> in <module>
----> 1 max_sheet.range('A1:B1')
~/envs/jupyter/lib/python3.6/site-packages/gspread/utils.py in wrapper(self, *args, **kwargs)
396 pass
397
--> 398 return method(self, *args, **kwargs)
399
400 return wrapper
~/envs/jupyter/lib/python3.6/site-packages/gspread/models.py in range(self, name)
686 range_label = absolute_range_name(self.title, name)
687
--> 688 data = self.spreadsheet.values_get(range_label)
689
690 start, end = name.split(':')
~/envs/jupyter/lib/python3.6/site-packages/gspread/models.py in values_get(self, range, params)
190 """
191 url = SPREADSHEET_VALUES_URL % (self.id, quote(range))
--> 192 r = self.client.request('get', url, params=params)
193 return r.json()
194
~/envs/jupyter/lib/python3.6/site-packages/gspread/client.py in request(self, method, endpoint, params, data, json, files, headers)
68 data=data,
69 files=files,
---> 70 headers=headers,
71 )
72
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
553
554 kwargs.setdefault('allow_redirects', True)
--> 555 return self.request('GET', url, **kwargs)
556
557 def options(self, url, **kwargs):
~/envs/jupyter/lib/python3.6/site-packages/google/auth/transport/requests.py in request(self, method, url, data, headers, max_allowed_time, timeout, **kwargs)
486 headers=request_headers,
487 timeout=timeout,
--> 488 **kwargs
489 )
490 remaining_time = guard.remaining_timeout
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
540 }
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
544 return resp
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
653
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
657 # Total elapsed time of the request (approximately)
~/envs/jupyter/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
527 raise SSLError(e, request=request)
528 elif isinstance(e, ReadTimeoutError):
--> 529 raise ReadTimeout(e, request=request)
530 else:
531 raise
ReadTimeout: HTTPSConnectionPool(host='sheets.googleapis.com', port=443): Read timed out. (read timeout=120)
I tried taking gspread out of it for troubleshooting but it still gives me the same issue even when I'm using code from the python quickstart example from https://developers.google.com/sheets/api/quickstart/python (below). The timeout issue is the same whether I do it from my local machine or my jupyter server on digitalocean.
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials
creds = Credentials.from_service_account_file('XXXXXXX', scopes=SCOPES)
service = build('sheets', 'v4', credentials=creds)
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId='XXXXXXX',
range='Active Accounts - working copy!A1:A2').execute()
values = result.get('values', [])
The above code results in this:
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
<ipython-input-95-5c352599e283> in <module>
1 sheet = service.spreadsheets()
2 result = sheet.values().get(spreadsheetId='XXXXXX',
----> 3 range='Active Accounts - working copy!A1:A2').execute()
4 values = result.get('values', [])
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
132 elif positional_parameters_enforcement == POSITIONAL_WARNING:
133 logger.warning(message)
--> 134 return wrapped(*args, **kwargs)
135
136 return positional_wrapper
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in execute(self, http, num_retries)
899 method=str(self.method),
900 body=self.body,
--> 901 headers=self.headers,
902 )
903
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
202 if exception:
203 if retry_num == num_retries:
--> 204 raise exception
205 else:
206 continue
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
175 try:
176 exception = None
--> 177 resp, content = http.request(uri, method, *args, **kwargs)
178 # Retry on SSL errors and socket timeout errors.
179 except _ssl_SSLError as ssl_error:
~/envs/jupyter/lib/python3.6/site-packages/google_auth_httplib2.py in request(self, uri, method, body, headers, **kwargs)
196 # Make the request.
197 response, content = self.http.request(
--> 198 uri, method, body=body, headers=request_headers, **kwargs)
199
200 # If the response indicated that the credentials needed to be
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1989 headers,
1990 redirections,
-> 1991 cachekey,
1992 )
1993 except Exception as e:
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
1649
1650 (response, content) = self._conn_request(
-> 1651 conn, request_uri, method, body, headers
1652 )
1653
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1587 pass
1588 try:
-> 1589 response = conn.getresponse()
1590 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1591 # If we get a BadStatusLine on the first try then that means
/usr/lib/python3.6/http/client.py in getresponse(self)
1371 try:
1372 try:
-> 1373 response.begin()
1374 except ConnectionError:
1375 self.close()
/usr/lib/python3.6/http/client.py in begin(self)
309 # read until we get a non-100 response
310 while True:
--> 311 version, status, reason = self._read_status()
312 if status != CONTINUE:
313 break
/usr/lib/python3.6/http/client.py in _read_status(self)
270
271 def _read_status(self):
--> 272 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
273 if len(line) > _MAXLINE:
274 raise LineTooLong("status line")
/usr/lib/python3.6/socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
/usr/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
1010 "non-zero flags not allowed in calls to recv_into() on %s" %
1011 self.__class__)
-> 1012 return self.read(nbytes, buffer)
1013 else:
1014 return socket.recv_into(self, buffer, nbytes, flags)
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
872 raise ValueError("Read on closed or unwrapped SSL socket.")
873 try:
--> 874 return self._sslobj.read(len, buffer)
875 except SSLError as x:
876 if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
629 """
630 if buffer is not None:
--> 631 v = self._sslobj.read(len, buffer)
632 else:
633 v = self._sslobj.read(len)
timeout: The read operation timed out

Related

s3fs FileNotFoundError

I am only able to gain limited/top-level access to my aws s3. I can see the buckets, but not their contents; neither subfolders nor files. I'm running everything from inside a conda environment. I've tried accessing files in private and public buckets without success. What am I doing wrong?
This block of code works as expected
>>> import s3fs
>>> AKEY = 'XXXX'
>>> SKEY = 'XXXX'
>>> fs = s3fs.S3FileSystem(key=AKEY,secret=SKEY)
>>> fs.ls('s3://')
['my-bucket-1',
'my-bucket-2',
'my-bucket-3']
This block doesn't
>>> fs.ls('s3://my-bucket-1')
[]
what I expect
>>> fs.ls('s3://my-bucket-1')
['my-bucket-1/test.txt',
'my-bucket-1/test.csv']
When I try to open a file I get a FileNotFoundError
import pandas as pd
pd.read_csv(
's3://my-bucket-1/test.csv',
storage_options={'key':AKEY,'secret':SKEY}
)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[8], line 2
1 import pandas as pd
----> 2 pd.read_csv(
3 's3://my-bucket-1/test.csv'',
4 storage_options={'key':AKEY,'secret':SKEY}
5 )
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
710 codecs.lookup_error(errors)
712 # open URLs
--> 713 ioargs = _get_filepath_or_buffer(
714 path_or_buf,
715 encoding=encoding,
716 compression=compression,
717 mode=mode,
718 storage_options=storage_options,
719 )
721 handle = ioargs.filepath_or_buffer
722 handles: list[BaseBuffer]
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:409, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
406 pass
408 try:
--> 409 file_obj = fsspec.open(
410 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
411 ).open()
412 # GH 34626 Reads from Public Buckets without Credentials needs anon=True
413 except tuple(err_types_to_retry_with_anon):
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:135, in OpenFile.open(self)
128 def open(self):
129 """Materialise this as a real open file without context
130
131 The OpenFile object should be explicitly closed to avoid enclosed file
132 instances persisting. You must, therefore, keep a reference to the OpenFile
133 during the life of the file-like it generates.
134 """
--> 135 return self.__enter__()
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:103, in OpenFile.__enter__(self)
100 def __enter__(self):
101 mode = self.mode.replace("t", "").replace("b", "") + "b"
--> 103 f = self.fs.open(self.path, mode=mode)
105 self.fobjects = [f]
107 if self.compression is not None:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1106, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1104 else:
1105 ac = kwargs.pop("autocommit", not self._intrans)
-> 1106 f = self._open(
1107 path,
1108 mode=mode,
1109 block_size=block_size,
1110 autocommit=ac,
1111 cache_options=cache_options,
1112 **kwargs,
1113 )
1114 if compression is not None:
1115 from fsspec.compression import compr
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:640, in S3FileSystem._open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, cache_options, **kwargs)
637 if cache_type is None:
638 cache_type = self.default_cache_type
--> 640 return S3File(
641 self,
642 path,
643 mode,
644 block_size=block_size,
645 acl=acl,
646 version_id=version_id,
647 fill_cache=fill_cache,
648 s3_additional_kwargs=kw,
649 cache_type=cache_type,
650 autocommit=autocommit,
651 requester_pays=requester_pays,
652 cache_options=cache_options,
653 )
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1989, in S3File.__init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays, cache_options)
1987 self.details = s3.info(path)
1988 self.version_id = self.details.get("VersionId")
-> 1989 super().__init__(
1990 s3,
1991 path,
1992 mode,
1993 block_size,
1994 autocommit=autocommit,
1995 cache_type=cache_type,
1996 cache_options=cache_options,
1997 )
1998 self.s3 = self.fs # compatibility
2000 # when not using autocommit we want to have transactional state to manage
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1462, in AbstractBufferedFile.__init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
1460 self.size = size
1461 else:
-> 1462 self.size = self.details["size"]
1463 self.cache = caches[cache_type](
1464 self.blocksize, self._fetch_range, self.size, **cache_options
1465 )
1466 else:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1475, in AbstractBufferedFile.details(self)
1472 #property
1473 def details(self):
1474 if self._details is None:
-> 1475 self._details = self.fs.info(self.path)
1476 return self._details
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:113, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
110 #functools.wraps(func)
111 def wrapper(*args, **kwargs):
112 self = obj or args[0]
--> 113 return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:98, in sync(loop, func, timeout, *args, **kwargs)
96 raise FSTimeoutError from return_result
97 elif isinstance(return_result, BaseException):
---> 98 raise return_result
99 else:
100 return return_result
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
51 coro = asyncio.wait_for(coro, timeout=timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
55 result[0] = ex
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1257, in S3FileSystem._info(self, path, bucket, key, refresh, version_id)
1245 if (
1246 out.get("KeyCount", 0) > 0
1247 or out.get("Contents", [])
1248 or out.get("CommonPrefixes", [])
1249 ):
1250 return {
1251 "name": "/".join([bucket, key]),
1252 "type": "directory",
1253 "size": 0,
1254 "StorageClass": "DIRECTORY",
1255 }
-> 1257 raise FileNotFoundError(path)
1258 except ClientError as e:
1259 raise translate_boto_error(e, set_cause=False)
FileNotFoundError: my-bucket-1/test.csv
s3fs-2022.11.0, aiobotocore-2.4.0, botocore-1.27.59
fs = s3fs.S3FileSystem(anon=True)
fs.ls('s3://dask-data/nyc-taxi/2015')
ParseError
Check the bucket policy / IAM role that gives you permissions to access the bucket. It should have /* after the name of the resource:
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::my-bucket-1/*"
to allow you access the objects in the bucket, not just the bucket itself.
Have you tried boto3? s3fs is no longer supported.

tight_layout KeyError default, matplotlib widget

Using jupyterlab, i receive a KeyError: 'Default' when using plt.tight_layout() in combination with %matplotlib widget. The following code reproduces the issue:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib widget
x=np.linspace(0,10)
y=x**2
plt.plot(x,y)
plt.tight_layout()
The complete error message is the following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3024 try:
-> 3025 self.canvas.set_cursor(tools.Cursors.WAIT)
3026 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
KeyError: 'wait'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/tmp/ipykernel_119035/3466922198.py in <module>
7 y=x**2
8 plt.plot(x,y)
----> 9 plt.tight_layout()
~/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py in tight_layout(pad, h_pad, w_pad, rect)
2300 #_copy_docstring_and_deprecators(Figure.tight_layout)
2301 def tight_layout(*, pad=1.08, h_pad=None, w_pad=None, rect=None):
-> 2302 return gcf().tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad, rect=rect)
2303
2304
~/anaconda3/lib/python3.8/site-packages/matplotlib/figure.py in tight_layout(self, pad, h_pad, w_pad, rect)
3186 "compatible with tight_layout, so results "
3187 "might be incorrect.")
-> 3188 renderer = _get_renderer(self)
3189 with getattr(renderer, "_draw_disabled", nullcontext)():
3190 kwargs = get_tight_layout_figure(
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _get_renderer(figure, print_method)
1542 figure.canvas._get_output_canvas(None, fmt), f"print_{fmt}")
1543 try:
-> 1544 print_method(io.BytesIO())
1545 except Done as exc:
1546 renderer, = figure._cachedRenderer, = exc.args
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1641 kwargs.pop(arg)
1642
-> 1643 return func(*args, **kwargs)
1644
1645 return wrapper
~/anaconda3/lib/python3.8/site-packages/matplotlib/_api/deprecation.py in wrapper(*inner_args, **inner_kwargs)
410 else deprecation_addendum,
411 **kwargs)
--> 412 return func(*inner_args, **inner_kwargs)
413
414 DECORATORS[wrapper] = decorator
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
538 *metadata*, including the default 'Software' key.
539 """
--> 540 FigureCanvasAgg.draw(self)
541 mpl.image.imsave(
542 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
431 self.renderer = self.get_renderer(cleared=True)
432 # Acquire a lock on the shared font cache.
--> 433 with RendererAgg.lock, \
434 (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
435 else nullcontext()):
~/anaconda3/lib/python3.8/contextlib.py in __enter__(self)
111 del self.args, self.kwds, self.func
112 try:
--> 113 return next(self.gen)
114 except StopIteration:
115 raise RuntimeError("generator didn't yield") from None
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3026 yield
3027 finally:
-> 3028 self.canvas.set_cursor(self._lastCursor)
3029 else:
3030 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
208 backend_tools.Cursors.RESIZE_VERTICAL: 'ns-resize',
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
212 def set_image_mode(self, mode):
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
390 def send_event(self, event_type, **kwargs):
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
394
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
539 payload = {'type': event_type, **kwargs}
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
543
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
179 # Change in the widget state?
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
183 elif content['type'] == 'message':
KeyError: 'default'

Dask Cluster: AttributeError: 'DataFrame' object has no attribute '_data'

I'm working with a Dask Cluster on GCP. I'm using this code to deploy it:
from dask_cloudprovider.gcp import GCPCluster
from dask.distributed import Client
enviroment_vars = {
'EXTRA_PIP_PACKAGES': '"gcsfs"'
}
cluster = GCPCluster(
n_workers=32,
docker_image='daskdev/dask:2021.2.0',
env_vars=enviroment_vars,
network='my-network',
#filesystem_size=150,
machine_type='e2-standard-16',
projectid='my-project-id',
zone='us-central1-a',
on_host_maintenance="MIGRATE"
client = Client(cluster)
Then I read csv files, with the following code:
import dask.dataframe as dd
import csv
col_dtypes = {
'var1': 'float64',
'var2': 'object',
'var3': 'object',
'var4': 'float64'
}
df = dd.read_csv('gs://my_bucket/files-*.csv', blocksize=None, dtype= col_dtypes)
df = df.persist()
Everything works fine, but when I try to do some queries, or calculation, I get an error. For instance this piece of code:
df.var1.value_counts().compute()
This is the output:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-14-711a7c21ed42> in <module>
----> 1 df.var1.value_counts().compute()
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
279 dask.base.compute
280 """
--> 281 (result,) = compute(self, traverse=False, **kwargs)
282 return result
283
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
561 postcomputes.append(x.__dask_postcompute__())
562
--> 563 results = schedule(dsk, keys, **kwargs)
564 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
565
/opt/conda/lib/python3.8/site-packages/distributed/client.py in get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2653 should_rejoin = False
2654 try:
-> 2655 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2656 finally:
2657 for f in futures.values():
/opt/conda/lib/python3.8/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1962 else:
1963 local_worker = None
-> 1964 return self.sync(
1965 self._gather,
1966 futures,
/opt/conda/lib/python3.8/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
836 return future
837 else:
--> 838 return sync(
839 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
840 )
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
338 if error[0]:
339 typ, exc, tb = error[0]
--> 340 raise exc.with_traceback(tb)
341 else:
342 return result[0]
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in f()
322 if callback_timeout is not None:
323 future = asyncio.wait_for(future, callback_timeout)
--> 324 result[0] = yield future
325 except Exception as exc:
326 error[0] = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1827 exc = CancelledError(key)
1828 else:
-> 1829 raise exception.with_traceback(traceback)
1830 raise exc
1831 if errors == "skip":
/opt/conda/lib/python3.8/site-packages/dask/optimization.py in __call__()
961 if not len(args) == len(self.inkeys):
962 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 963 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
964
965 def __reduce__(self):
/opt/conda/lib/python3.8/site-packages/dask/core.py in get()
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
/opt/conda/lib/python3.8/site-packages/dask/core.py in _execute_task()
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/opt/conda/lib/python3.8/site-packages/dask/utils.py in apply()
33 def apply(func, args, kwargs=None):
34 if kwargs:
---> 35 return func(*args, **kwargs)
36 else:
37 return func(*args)
/opt/conda/lib/python3.8/site-packages/dask/dataframe/core.py in apply_and_enforce()
5474 return meta
5475 if is_dataframe_like(df):
-> 5476 check_matching_columns(meta, df)
5477 c = meta.columns
5478 else:
/opt/conda/lib/python3.8/site-packages/dask/dataframe/utils.py in check_matching_columns()
690 def check_matching_columns(meta, actual):
691 # Need nan_to_num otherwise nan comparison gives False
--> 692 if not np.array_equal(np.nan_to_num(meta.columns), np.nan_to_num(actual.columns)):
693 extra = methods.tolist(actual.columns.difference(meta.columns))
694 missing = methods.tolist(meta.columns.difference(actual.columns))
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__get__()
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
AttributeError: 'DataFrame' object has no attribute '_data'
The version of Pandas in my docker file is 1.0.1, so I already try upgrading Pandas (to version 1.2.2), but it didn't work, what am I doing wrong?
My guess is that you have a version mismatch somewhere. What does client.get_versions(check=True) say?

Trying to pass variable from pandas to sql

I am trying to pass a simple variable (string, list whatever I can get working) in to sql (using jaydebapi to AWS Redshift cluster.
The python/pandas code looks like this at moment. I think the %s is part of the problem:
id = (261894)
df = pd.read_sql_query(
'''
select * from purchases where customer_id = %s
''',
conn, params=[id])
exception is
ava.sql.SQLExceptionPyRaisable Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1377 else:
-> 1378 cur.execute(*args)
1379 return cur
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in execute(self, operation, parameters)
497 self._close_last()
--> 498 self._prep = self._connection.jconn.prepareStatement(operation)
499 self._set_stmt_parms(self._prep, parameters)
java.sql.SQLExceptionPyRaisable: java.sql.SQLException: [Amazon](500310) Invalid operation: syntax error at or near "%"
Position: 70;
During handling of the above exception, another exception occurred:
java.sql.SQLExceptionPyRaisable Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in rollback(self)
416 try:
--> 417 self.jconn.rollback()
418 except:
java.sql.SQLExceptionPyRaisable: java.sql.SQLException: [Amazon][JDBC](11680) Cannot use rollback while Connection is in auto-commit mode.
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1381 try:
-> 1382 self.con.rollback()
1383 except Exception: # pragma: no cover
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in rollback(self)
418 except:
--> 419 _handle_sql_exception()
420
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in _handle_sql_exception_jpype()
155 exc_type = InterfaceError
--> 156 reraise(exc_type, exc_info[1], exc_info[2])
157
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in reraise(tp, value, tb)
56 if tb:
---> 57 raise value.with_traceback(tb)
58 raise value
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in rollback(self)
416 try:
--> 417 self.jconn.rollback()
418 except:
DatabaseError: java.sql.SQLException: [Amazon][JDBC](11680) Cannot use rollback while Connection is in auto-commit mode.
During handling of the above exception, another exception occurred:
DatabaseError Traceback (most recent call last)
in
5 select * from purchases where customer_id = %s
6 ''',
----> 7 conn, params=[id])
8
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in read_sql_query(sql, con, index_col, coerce_float, params, parse_dates, chunksize)
312 return pandas_sql.read_query(
313 sql, index_col=index_col, params=params, coerce_float=coerce_float,
--> 314 parse_dates=parse_dates, chunksize=chunksize)
315
316
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in read_query(self, sql, index_col, coerce_float, params, parse_dates, chunksize)
1411
1412 args = _convert_params(sql, params)
-> 1413 cursor = self.execute(*args)
1414 columns = [col_desc[0] for col_desc in cursor.description]
1415
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1384 ex = DatabaseError("Execution failed on sql: %s\n%s\nunable"
1385 " to rollback" % (args[0], exc))
-> 1386 raise_with_traceback(ex)
1387
1388 ex = DatabaseError(
C:\ProgramData\Anaconda3\lib\site-packages\pandas\compat\__init__.py in raise_with_traceback(exc, traceback)
402 if traceback == Ellipsis:
403 _, _, traceback = sys.exc_info()
--> 404 raise exc.with_traceback(traceback)
405 else:
406 # this version of raise is a syntax error in Python 3
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\sql.py in execute(self, *args, **kwargs)
1380 except Exception as exc:
1381 try:
-> 1382 self.con.rollback()
1383 except Exception: # pragma: no cover
1384 ex = DatabaseError("Execution failed on sql: %s\n%s\nunable"
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in rollback(self)
417 self.jconn.rollback()
418 except:
--> 419 _handle_sql_exception()
420
421 def cursor(self):
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in _handle_sql_exception_jpype()
154 else:
155 exc_type = InterfaceError
--> 156 reraise(exc_type, exc_info[1], exc_info[2])
157
158 def _jdbc_connect_jpype(jclassname, url, driver_args, jars, libs):
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in reraise(tp, value, tb)
55 value = tp(value)
56 if tb:
---> 57 raise value.with_traceback(tb)
58 raise value
59
C:\ProgramData\Anaconda3\lib\site-packages\jaydebeapi\__init__.py in rollback(self)
415 def rollback(self):
416 try:
--> 417 self.jconn.rollback()
418 except:
419 _handle_sql_exception()
DatabaseError: Execution failed on sql:
select * from purchases where customer_id = %s
java.sql.SQLException: [Amazon](500310) Invalid operation: syntax error at or near "%"
Position: 70;
unable to rollback
[39]
id = (261894)
[44]
Any thoughts on how I can get this to work? Do I need an alternative to %s?
try this:
id = (261894)
df = pd.read_sql_query( ''' select * from purchases where customer_id = ? ''', conn, params=[id])
it should work

SocketError: Connection refused using py2neo to a remote server

While this issue has been addressed a couple of times in the past at SO and I tried all the suggestions, the problem still remains and I am hoping someone would shine a light on this.
My company set up a neo4j (v2.1.6) graph database at a remote ubuntu server.
In order to modify and update data into the server, I am using a python package, py2neo.
The server's endpoint address is http://fake-address.com/db/data and the authentication Id/password are 'fakeId' and 'fakePassWord'.
In order to access the remote database, in my local machine python terminal,
I tried the following:
from py2neo import authenticate, Graph
authenticate("fake-address.com:80", "fakeId", "fakePassWord")
graph = Graph("http://fake-address.com:80/db/data/")
result = graph.cypher.execute("CREATE (a:Color)")
Unfortunately, the above command resulted in the following error message.
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/cypher/core.pyc in execute(self, statement, parameters, **kwparameters)
107 """
108 if self.transaction_uri:
--> 109 tx = CypherTransaction(self.transaction_uri)
110 tx.append(statement, parameters, **kwparameters)
111 results = tx.commit()
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/cypher/core.pyc in __init__(self, uri)
180 self.__commit = None
181 self.__finished = False
--> 182 self.graph = self.__begin.graph
183
184 def __enter__(self):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in graph(self)
211 :rtype: :class:`.Graph`
212 """
--> 213 return self.__service_root.graph
214
215 #property
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in graph(self)
523 if self.__graph is None:
524 try:
--> 525 uri = self.resource.metadata["data"]
526 except KeyError:
527 if "authentication" in self.resource.metadata:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in metadata(self)
226 if self.__initial_metadata is not None:
227 return self.__initial_metadata
--> 228 self.get()
229 return self.__last_get_response.content
230
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in get(self, headers, redirect_limit, **kwargs)
271 kwargs.update(cache=True)
272 try:
--> 273 response = self.__base.get(headers=headers, redirect_limit=redirect_limit, **kwargs)
274 except (ClientError, ServerError) as error:
275 if error.status_code == UNAUTHORIZED:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in get(self, if_modified_since, headers, redirect_limit, **kwargs)
964 object from which content can be read
965 """
--> 966 return self.__get_or_head("GET", if_modified_since, headers, redirect_limit, **kwargs)
967
968 def put(self, body=None, headers=None, **kwargs):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in __get_or_head(self, method, if_modified_since, headers, redirect_limit, **kwargs)
941 headers["If-Modified-Since"] = formatdate(datetime_to_timestamp(if_modified_since), usegmt=True)
942 rq = Request(method, self.uri, None, headers)
--> 943 return rq.submit(redirect_limit=redirect_limit, **kwargs)
944
945 def head(self, if_modified_since=None, headers=None, redirect_limit=5, **kwargs):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in submit(self, redirect_limit, **response_kwargs)
431 uri = self.uri
432 while True:
--> 433 http, rs = submit(self.method, uri, self.body, self.headers)
434 status_class = rs.status // 100
435 if status_class == 3:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in submit(method, uri, body, headers)
360 host_port=uri.host_port)
361 else:
--> 362 raise SocketError(code, description, host_port=uri.host_port)
363 else:
364 return http, response
SocketError: Connection refused
Sorry about the long error message and I greatly appreciate any suggestions.
The issue has been resolved. The problem must have been in the authentication in py2neo. With the newest version of py2neo (2.0.4), the connection was properly made.