Related
I'm trying to import some public data from the web but can't understand the error.
My code:
import pandas as pd
df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
It returns this:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
/var/folders/v_/yq26pm194xj5ckqy8p_njwc00000gn/T/ipykernel_89117/2424267382.py in <module>
----> 1 df2022 = pd.read_excel("https://ofslivefs.blob.core.windows.net/files/NSS%20data%202022/September/NSS2022_summary_data.xlsx")
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
212
213 return cast(F, wrapper)
~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
332
333 # error: "Callable[[VarArg(Any), KwArg(Any)], Any]" has no
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, parse_dates, date_parser, thousands, decimal, comment, skipfooter, convert_float, mangle_dupe_cols, storage_options)
480 if not isinstance(io, ExcelFile):
481 should_close = True
--> 482 io = ExcelFile(io, storage_options=storage_options, engine=engine)
483 elif engine and engine != io.engine:
484 raise ValueError(
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, path_or_buffer, engine, storage_options)
1693 self.storage_options = storage_options
1694
-> 1695 self._reader = self._engines[engine](self._io, storage_options=storage_options)
1696
1697 def __fspath__(self):
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in __init__(self, filepath_or_buffer, storage_options)
555 """
556 import_optional_dependency("openpyxl")
--> 557 super().__init__(filepath_or_buffer, storage_options=storage_options)
558
559 #property
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_base.py in __init__(self, filepath_or_buffer, storage_options)
543 self.handles.handle.seek(0)
544 try:
--> 545 self.book = self.load_workbook(self.handles.handle)
546 except Exception:
547 self.close()
~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/excel/_openpyxl.py in load_workbook(self, filepath_or_buffer)
566 from openpyxl import load_workbook
567
--> 568 return load_workbook(
569 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
570 )
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in load_workbook(filename, read_only, keep_vba, data_only, keep_links)
315 reader = ExcelReader(filename, read_only, keep_vba,
316 data_only, keep_links)
--> 317 reader.read()
318 return reader.wb
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/excel.py in read(self)
281 apply_stylesheet(self.archive, self.wb)
282 self.read_worksheets()
--> 283 self.parser.assign_names()
284 if not self.read_only:
285 self.archive.close()
~/opt/anaconda3/lib/python3.9/site-packages/openpyxl/reader/workbook.py in assign_names(self)
100 reserved = defn.is_reserved
101 if reserved in ("Print_Titles", "Print_Area"):
--> 102 sheet = self.wb._sheets[defn.localSheetId]
103 if reserved == "Print_Titles":
104 rows, cols = _unpack_print_titles(defn)
IndexError: list index out of range
At this point I would traditonally download and convert to CSV but I want to access straight from web.
The sheet (which I guess I could access as sheetname="Q27 Providers (benchmarked)") doesn't work.
It looks like xlsx file is broken, therefore u can't download it. Did u try to open that xlsx file?
i have tried to load my dataframe to postgresql and especially to the server postgresql 14( i have 2 servers postgresql 9.3 running on port 5434 , and the other one is postgresql 14 running on port 5433) with this command :
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
df.to_sql('My_Table', engine)
this is the error i get , also i've tried with more ways but its always the same error , i guess its related to the two servers i'm using :
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
142 try:
--> 143 return self._create_connection()
144 except:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
OperationalError:
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
Input In [105], in <cell line: 3>()
1 from sqlalchemy import create_engine
2 engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
----> 3 df.to_sql('My_Table', engine)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\generic.py:2951, in NDFrame.to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2794 """
2795 Write records stored in a DataFrame to a SQL database.
2796
(...)
2947 [(1,), (None,), (2,)]
2948 """ # noqa:E501
2949 from pandas.io import sql
-> 2951 return sql.to_sql(
2952 self,
2953 name,
2954 con,
2955 schema=schema,
2956 if_exists=if_exists,
2957 index=index,
2958 index_label=index_label,
2959 chunksize=chunksize,
2960 dtype=dtype,
2961 method=method,
2962 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:697, in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)
692 elif not isinstance(frame, DataFrame):
693 raise NotImplementedError(
694 "'frame' argument should be either a Series or a DataFrame"
695 )
--> 697 return pandas_sql.to_sql(
698 frame,
699 name,
700 if_exists=if_exists,
701 index=index,
702 index_label=index_label,
703 schema=schema,
704 chunksize=chunksize,
705 dtype=dtype,
706 method=method,
707 engine=engine,
708 **engine_kwargs,
709 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1729, in SQLDatabase.to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)
1679 """
1680 Write records stored in a DataFrame to a SQL database.
1681
(...)
1725 Any additional kwargs are passed to the engine.
1726 """
1727 sql_engine = get_engine(engine)
-> 1729 table = self.prep_table(
1730 frame=frame,
1731 name=name,
1732 if_exists=if_exists,
1733 index=index,
1734 index_label=index_label,
1735 schema=schema,
1736 dtype=dtype,
1737 )
1739 total_inserted = sql_engine.insert_records(
1740 table=table,
1741 con=self.connectable,
(...)
1748 **engine_kwargs,
1749 )
1751 self.check_case_sensitive(name=name, schema=schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1628, in SQLDatabase.prep_table(self, frame, name, if_exists, index, index_label, schema, dtype)
1616 raise ValueError(f"The type of {col} is not a SQLAlchemy type")
1618 table = SQLTable(
1619 name,
1620 self,
(...)
1626 dtype=dtype,
1627 )
-> 1628 table.create()
1629 return table
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:831, in SQLTable.create(self)
830 def create(self):
--> 831 if self.exists():
832 if self.if_exists == "fail":
833 raise ValueError(f"Table '{self.name}' already exists.")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:815, in SQLTable.exists(self)
814 def exists(self):
--> 815 return self.pd_sql.has_table(self.name, self.schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1762, in SQLDatabase.has_table(self, name, schema)
1759 if _gt14():
1760 from sqlalchemy import inspect
-> 1762 insp = inspect(self.connectable)
1763 return insp.has_table(name, schema or self.meta.schema)
1764 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\inspection.py:64, in inspect(subject, raiseerr)
62 if reg is True:
63 return subject
---> 64 ret = reg(subject)
65 if ret is not None:
66 break
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:182, in Inspector._engine_insp(bind)
180 #inspection._inspects(Engine)
181 def _engine_insp(bind):
--> 182 return Inspector._construct(Inspector._init_engine, bind)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:117, in Inspector._construct(cls, init, bind)
114 cls = bind.dialect.inspector
116 self = cls.__new__(cls)
--> 117 init(self, bind)
118 return self
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:128, in Inspector._init_engine(self, engine)
126 def _init_engine(self, engine):
127 self.bind = self.engine = engine
--> 128 engine.connect().close()
129 self._op_context_requires_connect = True
130 self.dialect = self.engine.dialect
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3234, in Engine.connect(self, close_with_result)
3219 def connect(self, close_with_result=False):
3220 """Return a new :class:`_engine.Connection` object.
3221
3222 The :class:`_engine.Connection` object is a facade that uses a DBAPI
(...)
3231
3232 """
-> 3234 return self._connection_cls(self, close_with_result=close_with_result)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:96, in Connection.__init__(self, engine, connection, close_with_result, _branch_from, _execution_options, _dispatch, _has_events, _allow_revalidate)
91 self._has_events = _branch_from._has_events
92 else:
93 self._dbapi_connection = (
94 connection
95 if connection is not None
---> 96 else engine.raw_connection()
97 )
99 self._transaction = self._nested_transaction = None
100 self.__savepoint_seq = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3313, in Engine.raw_connection(self, _connection)
3291 def raw_connection(self, _connection=None):
3292 """Return a "raw" DBAPI connection from the connection pool.
3293
3294 The returned object is a proxied version of the DBAPI
(...)
3311
3312 """
-> 3313 return self._wrap_pool_connect(self.pool.connect, _connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3283, in Engine._wrap_pool_connect(self, fn, connection)
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
-> 3283 Connection._handle_dbapi_exception_noconnection(
3284 e, dialect, self
3285 )
3286 else:
3287 util.raise_(
3288 sys.exc_info()[1], with_traceback=sys.exc_info()[2]
3289 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:2117, in Connection._handle_dbapi_exception_noconnection(cls, e, dialect, engine)
2115 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
2116 elif should_wrap:
-> 2117 util.raise_(
2118 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
2119 )
2120 else:
2121 util.raise_(exc_info[1], with_traceback=exc_info[2])
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3278 dialect = self.dialect
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
302 def connect(self):
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
865 #classmethod
866 def _checkout(cls, pool, threadconns=None, fairy=None):
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
871 fairy._counter = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
478 dbapi_connection = rec.get_connection()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
143 return self._create_connection()
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
147 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
141 if self._inc_overflow():
142 try:
--> 143 return self._create_connection()
144 except:
145 with util.safe_reraise():
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
253 def _create_connection(self):
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
369 self.__pool = pool
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
663 self.fresh = True
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
667 else:
668 # in SQLAlchemy 1.4 the first_connect event is not used by
669 # the engine, so this will usually not be set
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
659 try:
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
663 self.fresh = True
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
588 if connection is not None:
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
119 kwasync['async_'] = kwargs.pop('async_')
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
124 conn.cursor_factory = cursor_factory
OperationalError: (psycopg2.OperationalError)
(Background on this error at: https://sqlalche.me/e/14/e3q8)
I regularly use gspread and lately I've been getting these read timeout errors but only on certain sheets. Sometimes I'll be able to read & write to a sheet for a bit and then it'll hang until I get this read timeout error and then all subsequent attempts to read from that sheet will hang immediately and again give me this error.
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
444 # Otherwise it looks like a bug in the code.
--> 445 six.raise_from(e, None)
446 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
439 try:
--> 440 httplib_response = conn.getresponse()
441 except BaseException as e:
/usr/lib/python3.6/http/client.py in getresponse(self)
1372 try:
-> 1373 response.begin()
1374 except ConnectionError:
/usr/lib/python3.6/http/client.py in begin(self)
310 while True:
--> 311 version, status, reason = self._read_status()
312 if status != CONTINUE:
/usr/lib/python3.6/http/client.py in _read_status(self)
271 def _read_status(self):
--> 272 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
273 if len(line) > _MAXLINE:
/usr/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
/usr/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
1011 self.__class__)
-> 1012 return self.read(nbytes, buffer)
1013 else:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
873 try:
--> 874 return self._sslobj.read(len, buffer)
875 except SSLError as x:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
630 if buffer is not None:
--> 631 v = self._sslobj.read(len, buffer)
632 else:
timeout: The read operation timed out
During handling of the above exception, another exception occurred:
ReadTimeoutError Traceback (most recent call last)
~/envs/jupyter/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
448 retries=self.max_retries,
--> 449 timeout=timeout
450 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
755 retries = retries.increment(
--> 756 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
757 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
531 if read is False or not self._is_method_retryable(method):
--> 532 raise six.reraise(type(error), error, _stacktrace)
533 elif read is not None:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
734 raise value.with_traceback(tb)
--> 735 raise value
736 finally:
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
705 headers=headers,
--> 706 chunked=chunked,
707 )
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
446 except (SocketTimeout, BaseSSLError, SocketError) as e:
--> 447 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
448 raise
~/envs/jupyter/lib/python3.6/site-packages/urllib3/connectionpool.py in _raise_timeout(self, err, url, timeout_value)
336 raise ReadTimeoutError(
--> 337 self, url, "Read timed out. (read timeout=%s)" % timeout_value
338 )
ReadTimeoutError: HTTPSConnectionPool(host='sheets.googleapis.com', port=443): Read timed out. (read timeout=120)
During handling of the above exception, another exception occurred:
ReadTimeout Traceback (most recent call last)
<ipython-input-70-dc10e6e8ee1a> in <module>
----> 1 max_sheet.range('A1:B1')
~/envs/jupyter/lib/python3.6/site-packages/gspread/utils.py in wrapper(self, *args, **kwargs)
396 pass
397
--> 398 return method(self, *args, **kwargs)
399
400 return wrapper
~/envs/jupyter/lib/python3.6/site-packages/gspread/models.py in range(self, name)
686 range_label = absolute_range_name(self.title, name)
687
--> 688 data = self.spreadsheet.values_get(range_label)
689
690 start, end = name.split(':')
~/envs/jupyter/lib/python3.6/site-packages/gspread/models.py in values_get(self, range, params)
190 """
191 url = SPREADSHEET_VALUES_URL % (self.id, quote(range))
--> 192 r = self.client.request('get', url, params=params)
193 return r.json()
194
~/envs/jupyter/lib/python3.6/site-packages/gspread/client.py in request(self, method, endpoint, params, data, json, files, headers)
68 data=data,
69 files=files,
---> 70 headers=headers,
71 )
72
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in get(self, url, **kwargs)
553
554 kwargs.setdefault('allow_redirects', True)
--> 555 return self.request('GET', url, **kwargs)
556
557 def options(self, url, **kwargs):
~/envs/jupyter/lib/python3.6/site-packages/google/auth/transport/requests.py in request(self, method, url, data, headers, max_allowed_time, timeout, **kwargs)
486 headers=request_headers,
487 timeout=timeout,
--> 488 **kwargs
489 )
490 remaining_time = guard.remaining_timeout
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
540 }
541 send_kwargs.update(settings)
--> 542 resp = self.send(prep, **send_kwargs)
543
544 return resp
~/envs/jupyter/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
653
654 # Send the request
--> 655 r = adapter.send(request, **kwargs)
656
657 # Total elapsed time of the request (approximately)
~/envs/jupyter/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
527 raise SSLError(e, request=request)
528 elif isinstance(e, ReadTimeoutError):
--> 529 raise ReadTimeout(e, request=request)
530 else:
531 raise
ReadTimeout: HTTPSConnectionPool(host='sheets.googleapis.com', port=443): Read timed out. (read timeout=120)
I tried taking gspread out of it for troubleshooting but it still gives me the same issue even when I'm using code from the python quickstart example from https://developers.google.com/sheets/api/quickstart/python (below). The timeout issue is the same whether I do it from my local machine or my jupyter server on digitalocean.
from googleapiclient.discovery import build
from google.oauth2.service_account import Credentials
creds = Credentials.from_service_account_file('XXXXXXX', scopes=SCOPES)
service = build('sheets', 'v4', credentials=creds)
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId='XXXXXXX',
range='Active Accounts - working copy!A1:A2').execute()
values = result.get('values', [])
The above code results in this:
---------------------------------------------------------------------------
timeout Traceback (most recent call last)
<ipython-input-95-5c352599e283> in <module>
1 sheet = service.spreadsheets()
2 result = sheet.values().get(spreadsheetId='XXXXXX',
----> 3 range='Active Accounts - working copy!A1:A2').execute()
4 values = result.get('values', [])
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/_helpers.py in positional_wrapper(*args, **kwargs)
132 elif positional_parameters_enforcement == POSITIONAL_WARNING:
133 logger.warning(message)
--> 134 return wrapped(*args, **kwargs)
135
136 return positional_wrapper
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in execute(self, http, num_retries)
899 method=str(self.method),
900 body=self.body,
--> 901 headers=self.headers,
902 )
903
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
202 if exception:
203 if retry_num == num_retries:
--> 204 raise exception
205 else:
206 continue
~/envs/jupyter/lib/python3.6/site-packages/googleapiclient/http.py in _retry_request(http, num_retries, req_type, sleep, rand, uri, method, *args, **kwargs)
175 try:
176 exception = None
--> 177 resp, content = http.request(uri, method, *args, **kwargs)
178 # Retry on SSL errors and socket timeout errors.
179 except _ssl_SSLError as ssl_error:
~/envs/jupyter/lib/python3.6/site-packages/google_auth_httplib2.py in request(self, uri, method, body, headers, **kwargs)
196 # Make the request.
197 response, content = self.http.request(
--> 198 uri, method, body=body, headers=request_headers, **kwargs)
199
200 # If the response indicated that the credentials needed to be
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1989 headers,
1990 redirections,
-> 1991 cachekey,
1992 )
1993 except Exception as e:
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
1649
1650 (response, content) = self._conn_request(
-> 1651 conn, request_uri, method, body, headers
1652 )
1653
~/envs/jupyter/lib/python3.6/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
1587 pass
1588 try:
-> 1589 response = conn.getresponse()
1590 except (http.client.BadStatusLine, http.client.ResponseNotReady):
1591 # If we get a BadStatusLine on the first try then that means
/usr/lib/python3.6/http/client.py in getresponse(self)
1371 try:
1372 try:
-> 1373 response.begin()
1374 except ConnectionError:
1375 self.close()
/usr/lib/python3.6/http/client.py in begin(self)
309 # read until we get a non-100 response
310 while True:
--> 311 version, status, reason = self._read_status()
312 if status != CONTINUE:
313 break
/usr/lib/python3.6/http/client.py in _read_status(self)
270
271 def _read_status(self):
--> 272 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
273 if len(line) > _MAXLINE:
274 raise LineTooLong("status line")
/usr/lib/python3.6/socket.py in readinto(self, b)
584 while True:
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
588 self._timeout_occurred = True
/usr/lib/python3.6/ssl.py in recv_into(self, buffer, nbytes, flags)
1010 "non-zero flags not allowed in calls to recv_into() on %s" %
1011 self.__class__)
-> 1012 return self.read(nbytes, buffer)
1013 else:
1014 return socket.recv_into(self, buffer, nbytes, flags)
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
872 raise ValueError("Read on closed or unwrapped SSL socket.")
873 try:
--> 874 return self._sslobj.read(len, buffer)
875 except SSLError as x:
876 if x.args[0] == SSL_ERROR_EOF and self.suppress_ragged_eofs:
/usr/lib/python3.6/ssl.py in read(self, len, buffer)
629 """
630 if buffer is not None:
--> 631 v = self._sslobj.read(len, buffer)
632 else:
633 v = self._sslobj.read(len)
timeout: The read operation timed out
I'm working with a Dask Cluster on GCP. I'm using this code to deploy it:
from dask_cloudprovider.gcp import GCPCluster
from dask.distributed import Client
enviroment_vars = {
'EXTRA_PIP_PACKAGES': '"gcsfs"'
}
cluster = GCPCluster(
n_workers=32,
docker_image='daskdev/dask:2021.2.0',
env_vars=enviroment_vars,
network='my-network',
#filesystem_size=150,
machine_type='e2-standard-16',
projectid='my-project-id',
zone='us-central1-a',
on_host_maintenance="MIGRATE"
client = Client(cluster)
Then I read csv files, with the following code:
import dask.dataframe as dd
import csv
col_dtypes = {
'var1': 'float64',
'var2': 'object',
'var3': 'object',
'var4': 'float64'
}
df = dd.read_csv('gs://my_bucket/files-*.csv', blocksize=None, dtype= col_dtypes)
df = df.persist()
Everything works fine, but when I try to do some queries, or calculation, I get an error. For instance this piece of code:
df.var1.value_counts().compute()
This is the output:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-14-711a7c21ed42> in <module>
----> 1 df.var1.value_counts().compute()
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(self, **kwargs)
279 dask.base.compute
280 """
--> 281 (result,) = compute(self, traverse=False, **kwargs)
282 return result
283
/opt/conda/lib/python3.8/site-packages/dask/base.py in compute(*args, **kwargs)
561 postcomputes.append(x.__dask_postcompute__())
562
--> 563 results = schedule(dsk, keys, **kwargs)
564 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
565
/opt/conda/lib/python3.8/site-packages/distributed/client.py in get(self, dsk, keys, workers, allow_other_workers, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2653 should_rejoin = False
2654 try:
-> 2655 results = self.gather(packed, asynchronous=asynchronous, direct=direct)
2656 finally:
2657 for f in futures.values():
/opt/conda/lib/python3.8/site-packages/distributed/client.py in gather(self, futures, errors, direct, asynchronous)
1962 else:
1963 local_worker = None
-> 1964 return self.sync(
1965 self._gather,
1966 futures,
/opt/conda/lib/python3.8/site-packages/distributed/client.py in sync(self, func, asynchronous, callback_timeout, *args, **kwargs)
836 return future
837 else:
--> 838 return sync(
839 self.loop, func, *args, callback_timeout=callback_timeout, **kwargs
840 )
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in sync(loop, func, callback_timeout, *args, **kwargs)
338 if error[0]:
339 typ, exc, tb = error[0]
--> 340 raise exc.with_traceback(tb)
341 else:
342 return result[0]
/opt/conda/lib/python3.8/site-packages/distributed/utils.py in f()
322 if callback_timeout is not None:
323 future = asyncio.wait_for(future, callback_timeout)
--> 324 result[0] = yield future
325 except Exception as exc:
326 error[0] = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/tornado/gen.py in run(self)
760
761 try:
--> 762 value = future.result()
763 except Exception:
764 exc_info = sys.exc_info()
/opt/conda/lib/python3.8/site-packages/distributed/client.py in _gather(self, futures, errors, direct, local_worker)
1827 exc = CancelledError(key)
1828 else:
-> 1829 raise exception.with_traceback(traceback)
1830 raise exc
1831 if errors == "skip":
/opt/conda/lib/python3.8/site-packages/dask/optimization.py in __call__()
961 if not len(args) == len(self.inkeys):
962 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 963 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
964
965 def __reduce__(self):
/opt/conda/lib/python3.8/site-packages/dask/core.py in get()
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
/opt/conda/lib/python3.8/site-packages/dask/core.py in _execute_task()
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/opt/conda/lib/python3.8/site-packages/dask/utils.py in apply()
33 def apply(func, args, kwargs=None):
34 if kwargs:
---> 35 return func(*args, **kwargs)
36 else:
37 return func(*args)
/opt/conda/lib/python3.8/site-packages/dask/dataframe/core.py in apply_and_enforce()
5474 return meta
5475 if is_dataframe_like(df):
-> 5476 check_matching_columns(meta, df)
5477 c = meta.columns
5478 else:
/opt/conda/lib/python3.8/site-packages/dask/dataframe/utils.py in check_matching_columns()
690 def check_matching_columns(meta, actual):
691 # Need nan_to_num otherwise nan comparison gives False
--> 692 if not np.array_equal(np.nan_to_num(meta.columns), np.nan_to_num(actual.columns)):
693 extra = methods.tolist(actual.columns.difference(meta.columns))
694 missing = methods.tolist(meta.columns.difference(actual.columns))
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
pandas/_libs/properties.pyx in pandas._libs.properties.AxisProperty.__get__()
/opt/conda/lib/python3.8/site-packages/pandas/core/generic.py in __getattr__()
5268 or name in self._accessors
5269 ):
-> 5270 return object.__getattribute__(self, name)
5271 else:
5272 if self._info_axis._can_hold_identifiers_and_holds_name(name):
AttributeError: 'DataFrame' object has no attribute '_data'
The version of Pandas in my docker file is 1.0.1, so I already try upgrading Pandas (to version 1.2.2), but it didn't work, what am I doing wrong?
My guess is that you have a version mismatch somewhere. What does client.get_versions(check=True) say?
I'm trying to do a choropleth using folium which offers a great link between GeoJSON, Pandas and leaflet.
GeoJSON format is like below :
{
"type":"FeatureCollection",
"features":[
{
"type":"Feature",
"geometry":
{
"type":"Polygon",
"coordinates":[[[-1.6704591323124895,49.62681486270549], .....
{
"insee":"50173",
"nom":"Équeurdreville-Hainneville",
"wikipedia":"fr:Équeurdreville-Hainneville",
"surf_m2":12940306}},
Pandas DataFrame :
postal_count.head(5)
Out[98]:
Code_commune_INSEE CP_count
0 75120 723
1 75115 698
2 75112 671
3 75118 627
4 75111 622
"Code_communes_INSEE" corresponds to the attribute "insee" in the GeoJSON. I'd like to do a choropleth using the variable "CP_count" in the above DataFrame.
Here is my code (snippet from this notebook)
map_france = folium.Map(location=[47.000000, 2.000000], zoom_start=6)
map_france.choropleth(
geo_str=open(geo_path + 'simplified_communes100m.json').read(),
data=postal_count,
columns=['Code_commune_INSEE', 'CP_count'],
key_on='feature.geometry.properties.insee',
fill_color='YlGn',
)
map_france.save(table_path + 'choro_test1.html')
I'm still getting this error again and again :
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-83-ea0fd2c1c207> in <module>()
8 fill_color='YlGn',
9 )
---> 10 map_france.save('/media/flo/Stockage/Data/MesAides/map/choro_test1.html')
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in save(self, outfile, close_file, **kwargs)
151
152 root = self.get_root()
--> 153 html = root.render(**kwargs)
154 fid.write(html.encode('utf8'))
155 if close_file:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
357 """Renders the HTML representation of the element."""
358 for name, child in self._children.items():
--> 359 child.render(**kwargs)
360 return self._template.render(this=self, kwargs=kwargs)
361
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
665
666 for name, element in self._children.items():
--> 667 element.render(**kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
661 script = self._template.module.__dict__.get('script', None)
662 if script is not None:
--> 663 figure.script.add_children(Element(script(self, kwargs)),
664 name=self.get_name())
665
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in __call__(self, *args, **kwargs)
434 raise TypeError('macro %r takes not more than %d argument(s)' %
435 (self.name, len(self.arguments)))
--> 436 return self._func(*arguments)
437
438 def __repr__(self):
<template> in macro(l_this, l_kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in call(_Context__self, _Context__obj, *args, **kwargs)
194 args = (__self.environment,) + args
195 try:
--> 196 return __obj(*args, **kwargs)
197 except StopIteration:
198 return __self.environment.undefined('value was undefined because '
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/features.py in style_data(self)
352
353 for feature in self.data['features']:
--> 354 feature.setdefault('properties', {}).setdefault('style', {}).update(self.style_function(feature)) # noqa
355 return json.dumps(self.data, sort_keys=True)
356
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in style_function(x)
671 "color": line_color,
672 "fillOpacity": fill_opacity,
--> 673 "fillColor": color_scale_fun(x)
674 }
675
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in color_scale_fun(x)
659 def color_scale_fun(x):
660 return color_range[len(
--> 661 [u for u in color_domain if
662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in <listcomp>(.0)
660 return color_range[len(
661 [u for u in color_domain if
--> 662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
664 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
653
654 def get_by_key(obj, key):
--> 655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
657 '.'.join(key.split('.')[1:])))
AttributeError: 'NoneType' object has no attribute 'get'
I tried playing with key_on='feature.geometry.properties.insee' without any success.
There were 2 problems :
1 - The correct access to 'insee' parameters is : key_on='feature.properties.insee'
The best way to find the right key_on is to play with the geoJSON dict to make sure you are calling the right properties.
2- Once you have the right key_on parameters, you need to make sure that all the available keys in the geoJSON are contained in your Pandas DataFrame (otherwise it will raise a KeyError)
In this case, I used the following command line to get all the insee keys contained by my geoJSON:
ogrinfo -ro -al communes-20150101-100m.shp -geom=NO | grep insee > list_code_insee.txt
If you are experiencing the same issue, this should solve your problem.
I had the same problem on JupyterLab (on labs.cognitiveclass.ai) using Folium 0.5.0. Then I copied my code and ran it in PyCharm, and it worked! I don't understand why, perhaps there is some backend issue (?)
If you want to display a folium map outside of a Jupyter notebook, you have to save the map to html:
map_france.save('map_france.html')
and open the html in your browser.