Tensorflow_probability integer type error - tensorflow

I am trying to use tensorflow_probability to construct a mcmc chain. This is my code:
chain_states, kernel_results = tfp.mcmc.sample_chain(
num_results=tf.constant(1e3, dtype=tf.int32),
num_burnin_steps=tf.constant(1e2, dtype=tf.int32),
parallel_iterations=tf.constant(10, dtype=tf.int32),
current_state=current_state,
kernel=tfp.mcmc.MetropolisHastings(
inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn=joint_log_prob,
num_leapfrog_steps=tf.constant(2, dtype=tf.int32),
step_size=tf.Variable(1.),
step_size_update_fn=tfp.mcmc.make_simple_step_size_update_policy()
)))
But I got this error::
> InvalidArgumentError Traceback (most recent call last) <ipython-input-13-7e972cc65053> in <module>()
> ----> 1 make_model(well_complex, well_ligand, fi_complex, fi_ligand)
>
> ~/Documents/GitHub/assaytools2/assaytools2/assaytools2/inference.py in
> make_model(well_complex, well_ligand, fi_complex, fi_ligand)
> 162 num_leapfrog_steps=tf.constant(2, dtype=tf.int32),
> 163 step_size=tf.Variable(1.),
> --> 164 step_size_update_fn=tfp.mcmc.make_simple_step_size_update_policy()
> 165 )))
> 166
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/sample.py
> in sample_chain(num_results, current_state, previous_kernel_results,
> kernel, num_burnin_steps, num_steps_between_results,
> parallel_iterations, name)
> 238
> 239 if previous_kernel_results is None:
> --> 240 previous_kernel_results = kernel.bootstrap_results(current_state)
> 241 return tf.scan(
> 242 fn=_scan_body,
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/metropolis_hastings.py
> in bootstrap_results(self, init_state)
> 261 name=mcmc_util.make_name(self.name, 'mh', 'bootstrap_results'),
> 262 values=[init_state]):
> --> 263 pkr = self.inner_kernel.bootstrap_results(init_state)
> 264 if not has_target_log_prob(pkr):
> 265 raise ValueError(
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/hmc.py
> in bootstrap_results(self, init_state)
> 506 def bootstrap_results(self, init_state):
> 507 """Creates initial `previous_kernel_results` using a supplied `state`."""
> --> 508 kernel_results = self._impl.bootstrap_results(init_state)
> 509 if self.step_size_update_fn is not None:
> 510 step_size_assign = self.step_size_update_fn(self.step_size, None) # pylint:
> disable=not-callable
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/metropolis_hastings.py
> in bootstrap_results(self, init_state)
> 261 name=mcmc_util.make_name(self.name, 'mh', 'bootstrap_results'),
> 262 values=[init_state]):
> --> 263 pkr = self.inner_kernel.bootstrap_results(init_state)
> 264 if not has_target_log_prob(pkr):
> 265 raise ValueError(
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/hmc.py
> in bootstrap_results(self, init_state)
> 672 init_target_log_prob,
> 673 init_grads_target_log_prob,
> --> 674 ] = mcmc_util.maybe_call_fn_and_grads(self.target_log_prob_fn, init_state)
> 675 return UncalibratedHamiltonianMonteCarloKernelResults(
> 676 log_acceptance_correction=tf.zeros_like(init_target_log_prob),
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/util.py
> in maybe_call_fn_and_grads(fn, fn_arg_list, result, grads,
> check_non_none_grads, name)
> 232 fn_arg_list = (list(fn_arg_list) if is_list_like(fn_arg_list)
> 233 else [fn_arg_list])
> --> 234 result, grads = _value_and_gradients(fn, fn_arg_list, result, grads)
> 235 if not all(r.dtype.is_floating
> 236 for r in (result if is_list_like(result) else [result])): # pylint: disable=superfluous-parens
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow_probability/python/mcmc/util.py
> in _value_and_gradients(fn, fn_arg_list, result, grads, name)
> 207 ]
> 208 else:
> --> 209 grads = tfe.gradients_function(fn)(*fn_arg_list)
> 210 else:
> 211 if is_list_like(result) and len(result) == len(fn_arg_list):
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/eager/backprop.py
> in decorated(*args, **kwds)
> 368 """Computes the gradient of the decorated function."""
> 369
> --> 370 _, grad = val_and_grad_function(f, params=params)(*args, **kwds)
> 371 return grad
> 372
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/eager/backprop.py
> in decorated(*args, **kwds)
> 469 "receive keyword arguments.")
> 470 val, vjp = make_vjp(f, params)(*args, **kwds)
> --> 471 return val, vjp(dy=dy)
> 472
> 473 return decorated
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/eager/backprop.py
> in vjp(dy)
> 539 return imperative_grad.imperative_grad(
> 540 _default_vspace, this_tape, nest.flatten(result), sources,
> --> 541 output_gradients=dy)
> 542 return result, vjp
> 543
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/eager/imperative_grad.py
> in imperative_grad(vspace, tape, target, sources, output_gradients)
> 61 """
> 62 return pywrap_tensorflow.TFE_Py_TapeGradient(
> ---> 63 tape._tape, vspace, target, sources, output_gradients) # pylint: disable=protected-access
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/eager/backprop.py
> in _gradient_function(op_name, attr_tuple, num_inputs, inputs,
> outputs, out_grads)
> 115 return [None] * num_inputs
> 116
> --> 117 return grad_fn(mock_op, *out_grads)
> 118
> 119
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py
> in _ProdGrad(op, grad)
> 158 with ops.device("/cpu:0"):
> 159 rank = array_ops.rank(op.inputs[0])
> --> 160 reduction_indices = (reduction_indices + rank) % rank
> 161 reduced = math_ops.cast(reduction_indices, dtypes.int32)
> 162 idx = math_ops.range(0, rank)
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py
> in binary_op_wrapper(x, y)
> 860 with ops.name_scope(None, op_name, [x, y]) as name:
> 861 if isinstance(x, ops.Tensor) and isinstance(y, ops.Tensor):
> --> 862 return func(x, y, name=name)
> 863 elif not isinstance(y, sparse_tensor.SparseTensor):
> 864 try:
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py
> in add(x, y, name)
> 322 else:
> 323 message = e.message
> --> 324 _six.raise_from(_core._status_to_exception(e.code, message), None)
> 325
> 326
>
> ~/anaconda2/envs/py36/lib/python3.6/site-packages/six.py in
> raise_from(value, from_value)
>
> InvalidArgumentError: cannot compute Add as input #0(zero-based) was
> expected to be a int32 tensor but is a int64 tensor [Op:Add] name:
> mcmc_sample_chain/mh_bootstrap_results/mh_bootstrap_results/hmc_kernel_bootstrap_results/maybe_call_fn_and_grads/value_and_gradients/add/
I doubled checked and none of my initial tensors were of integer type.
I wonder where I did it wrong.
Thanks!

Related

I am unable to retrieve data from pndas_datareader. how will it work for yahoo data

PG = wb.DataReader('PG',data_source = 'yahoo',start = '2000-1-1', end = '2001-1-1')
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[34], line 2
1 # !pip install pandas_datareader
----> 2 PG = wb.DataReader('PG',data_source = 'yahoo',start = '2000-1-1', end = '2001-1-1')
File c:\Users\intiz\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File c:\Users\intiz\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas_datareader\data.py:379, in DataReader(name, data_source, start, end, retry_count, pause, session, api_key)
367 raise NotImplementedError(msg)
369 if data_source == "yahoo":
370 return YahooDailyReader(
371 symbols=name,
372 start=start,
373 end=end,
374 adjust_price=False,
375 chunksize=25,
376 retry_count=retry_count,
377 pause=pause,
378 session=session,
--> 379 ).read()
381 elif data_source == "iex":
...
--> 153 data = j["context"]["dispatcher"]["stores"]["HistoricalPriceStore"]
154 except KeyError:
155 msg = "No data fetched for symbol {} using {}"
TypeError: string indices must be integers
I need PG stock index information datewise

ArrowInvalid: Could not convert ... with type DataFrame: did not recognize Python value type when inferring an Arrow data type

Using IForest library implementing a function for detection outliers using the following code:
import pyspark.pandas as pd
import numpy as np
from alibi_detect.od import IForest
# **************** Modelo IForest ******************************************
# IForest rta - Outlier ---> 1, Not-Outlier ----> 0
od = IForest(
threshold=0.,
n_estimators=5
)
def mode(lm):
freqs = groupby(Counter(lm).most_common(), lambda x:x[1])
m=[val for val,count in next(freqs)[1]]
if len(m)>1:
m=np.median(lm)
else:
m=float(m[0])
return m
def disper(x):
x_pred = x[['precio_local', 'precio_contenido']]
insumo_std = x_pred.std().to_frame().T
mod = mode(x_pred['precio_local'])
x_send2 = pd.DataFrame(
index=x_pred.index,
columns=['Std_precio','Std_prec_cont','cant_muestras','Moda_precio_local','IsFo']
)
x_send2.loc[:,'Std_precio'] = insumo_std.loc[0,'precio_local']
x_send2.loc[:,'Std_prec_cont'] = insumo_std.loc[0,'precio_local']
x_send2.loc[:,'Moda_precio_local'] = mod
mod_cont = mode(x_pred['precio_contenido'])
x_send2.loc[:,'Moda_precio_contenido_std'] = mod_cont
ctn = x_pred.shape[0]
x_send2.loc[:,'cant_muestras'] = ctn
if x_pred.shape[0]>3:
od.fit(x_pred)
preds = od.predict(
x_pred,
return_instance_score=True
)
x_preds = preds['data']['is_outlier']
#x_send2.loc[:,'IsFo']=x_preds
pd.set_option('compute.ops_on_diff_frames', True)
x_send2.loc[:,'IsFo']= pd.Series(x_preds, index=x_pred.index)
#x_send2.insert(x_pred.index, 'IsFo', x_preds)
else:
x_send2.loc[:,'IsFo'] = 0
print(type(x_send2))
print(x_send2)
return x_send2
insumo_all_pd = insumo_all.to_pandas_on_spark()
I get the error:
ArrowInvalid Traceback (most recent call last)
<command-1939548125702628> in <module>
----> 1 df_result = insumo_all_pd.groupby(by=['categoria','marca','submarca','barcode','contenido_std','unidad_std']).apply(disper)
2 display(df_result)
/databricks/spark/python/pyspark/pandas/usage_logging/__init__.py in wrapper(*args, **kwargs)
192 start = time.perf_counter()
193 try:
--> 194 res = func(*args, **kwargs)
195 logger.log_success(
196 class_name, function_name, time.perf_counter() - start, signature
/databricks/spark/python/pyspark/pandas/groupby.py in apply(self, func, *args, **kwargs)
1200 else:
1201 pser_or_pdf = grouped.apply(pandas_apply, *args, **kwargs)
-> 1202 psser_or_psdf = ps.from_pandas(pser_or_pdf)
1203
1204 if len(pdf) <= limit:
/databricks/spark/python/pyspark/pandas/usage_logging/__init__.py in wrapper(*args, **kwargs)
187 if hasattr(_local, "logging") and _local.logging:
188 # no need to log since this should be internal call.
--> 189 return func(*args, **kwargs)
190 _local.logging = True
191 try:
/databricks/spark/python/pyspark/pandas/namespace.py in from_pandas(pobj)
143 """
144 if isinstance(pobj, pd.Series):
--> 145 return Series(pobj)
146 elif isinstance(pobj, pd.DataFrame):
147 return DataFrame(pobj)
/databricks/spark/python/pyspark/pandas/usage_logging/__init__.py in wrapper(*args, **kwargs)
187 if hasattr(_local, "logging") and _local.logging:
188 # no need to log since this should be internal call.
--> 189 return func(*args, **kwargs)
190 _local.logging = True
191 try:
/databricks/spark/python/pyspark/pandas/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
424 data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
425 )
--> 426 internal = InternalFrame.from_pandas(pd.DataFrame(s))
427 if s.name is None:
428 internal = internal.copy(column_labels=[None])
/databricks/spark/python/pyspark/pandas/internal.py in from_pandas(pdf)
1458 data_columns,
1459 data_fields,
-> 1460 ) = InternalFrame.prepare_pandas_frame(pdf)
1461
1462 schema = StructType([field.struct_field for field in index_fields + data_fields])
/databricks/spark/python/pyspark/pandas/internal.py in prepare_pandas_frame(pdf, retain_index)
1531
1532 for col, dtype in zip(reset_index.columns, reset_index.dtypes):
-> 1533 spark_type = infer_pd_series_spark_type(reset_index[col], dtype)
1534 reset_index[col] = DataTypeOps(dtype, spark_type).prepare(reset_index[col])
1535
/databricks/spark/python/pyspark/pandas/typedef/typehints.py in infer_pd_series_spark_type(pser, dtype)
327 return pser.iloc[0].__UDT__
328 else:
--> 329 return from_arrow_type(pa.Array.from_pandas(pser).type)
330 elif isinstance(dtype, CategoricalDtype):
331 if isinstance(pser.dtype, CategoricalDtype):
/databricks/python/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.Array.from_pandas()
/databricks/python/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib.array()
/databricks/python/lib/python3.8/site-packages/pyarrow/array.pxi in pyarrow.lib._ndarray_to_array()
/databricks/python/lib/python3.8/site-packages/pyarrow/error.pxi in pyarrow.lib.check_status()
ArrowInvalid: Could not convert Std_precio Std_prec_cont cant_muestras Moda_precio_local IsFo Moda_precio_contenido_std
107 0.0 0.0 3 1.0 0 1.666667
252 0.0 0.0 3 1.0 0 1.666667
396 0.0 0.0 3 1.0 0 1.666667 with type DataFrame: did not recognize Python value type when inferring an Arrow data type
The error encountered by using:
df_result = insumo_all_pd.groupby(by=['categoria','marca','submarca','barcode','contenido_std','unidad_std']).apply(disper)
The schema of dataframe insumo_all_pd is:
fecha_ola datetime64[ns]
pais object
categoria object
marca object
submarca object
contenido_std float64
unidad_std object
barcode object
precio_local float64
cantidad float64
descripcion object
id_ticket object
id_item object
id_pdv object
fecha_transaccion datetime64[ns]
id_ref float64
precio_contenido float64
dtype: object
It is not clear to me what is causing the error but it seems that the data types are being inferred incorrectly.
I have tried to convert the data types resulting from the "disper" function to float but it gives the same error.
I appreciate any help or guidance you can give me.
The new Jupyter, apparently, has changed some of the pandas related libraries. The solution's upgrading to Jupyter 5.

How to load a dataframe to postgresql 14

i have tried to load my dataframe to postgresql and especially to the server postgresql 14( i have 2 servers postgresql 9.3 running on port 5434 , and the other one is postgresql 14 running on port 5433) with this command :
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
df.to_sql('My_Table', engine)
this is the error i get , also i've tried with more ways but its always the same error , i guess its related to the two servers i'm using :
---------------------------------------------------------------------------
OperationalError Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
142 try:
--> 143 return self._create_connection()
144 except:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
OperationalError:
The above exception was the direct cause of the following exception:
OperationalError Traceback (most recent call last)
Input In [105], in <cell line: 3>()
1 from sqlalchemy import create_engine
2 engine = create_engine('postgresql://postgres:password#localhost:5433/MYDATABASE')
----> 3 df.to_sql('My_Table', engine)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\core\generic.py:2951, in NDFrame.to_sql(self, name, con, schema, if_exists, index, index_label, chunksize, dtype, method)
2794 """
2795 Write records stored in a DataFrame to a SQL database.
2796
(...)
2947 [(1,), (None,), (2,)]
2948 """ # noqa:E501
2949 from pandas.io import sql
-> 2951 return sql.to_sql(
2952 self,
2953 name,
2954 con,
2955 schema=schema,
2956 if_exists=if_exists,
2957 index=index,
2958 index_label=index_label,
2959 chunksize=chunksize,
2960 dtype=dtype,
2961 method=method,
2962 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:697, in to_sql(frame, name, con, schema, if_exists, index, index_label, chunksize, dtype, method, engine, **engine_kwargs)
692 elif not isinstance(frame, DataFrame):
693 raise NotImplementedError(
694 "'frame' argument should be either a Series or a DataFrame"
695 )
--> 697 return pandas_sql.to_sql(
698 frame,
699 name,
700 if_exists=if_exists,
701 index=index,
702 index_label=index_label,
703 schema=schema,
704 chunksize=chunksize,
705 dtype=dtype,
706 method=method,
707 engine=engine,
708 **engine_kwargs,
709 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1729, in SQLDatabase.to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype, method, engine, **engine_kwargs)
1679 """
1680 Write records stored in a DataFrame to a SQL database.
1681
(...)
1725 Any additional kwargs are passed to the engine.
1726 """
1727 sql_engine = get_engine(engine)
-> 1729 table = self.prep_table(
1730 frame=frame,
1731 name=name,
1732 if_exists=if_exists,
1733 index=index,
1734 index_label=index_label,
1735 schema=schema,
1736 dtype=dtype,
1737 )
1739 total_inserted = sql_engine.insert_records(
1740 table=table,
1741 con=self.connectable,
(...)
1748 **engine_kwargs,
1749 )
1751 self.check_case_sensitive(name=name, schema=schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1628, in SQLDatabase.prep_table(self, frame, name, if_exists, index, index_label, schema, dtype)
1616 raise ValueError(f"The type of {col} is not a SQLAlchemy type")
1618 table = SQLTable(
1619 name,
1620 self,
(...)
1626 dtype=dtype,
1627 )
-> 1628 table.create()
1629 return table
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:831, in SQLTable.create(self)
830 def create(self):
--> 831 if self.exists():
832 if self.if_exists == "fail":
833 raise ValueError(f"Table '{self.name}' already exists.")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:815, in SQLTable.exists(self)
814 def exists(self):
--> 815 return self.pd_sql.has_table(self.name, self.schema)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pandas\io\sql.py:1762, in SQLDatabase.has_table(self, name, schema)
1759 if _gt14():
1760 from sqlalchemy import inspect
-> 1762 insp = inspect(self.connectable)
1763 return insp.has_table(name, schema or self.meta.schema)
1764 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\inspection.py:64, in inspect(subject, raiseerr)
62 if reg is True:
63 return subject
---> 64 ret = reg(subject)
65 if ret is not None:
66 break
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:182, in Inspector._engine_insp(bind)
180 #inspection._inspects(Engine)
181 def _engine_insp(bind):
--> 182 return Inspector._construct(Inspector._init_engine, bind)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:117, in Inspector._construct(cls, init, bind)
114 cls = bind.dialect.inspector
116 self = cls.__new__(cls)
--> 117 init(self, bind)
118 return self
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\reflection.py:128, in Inspector._init_engine(self, engine)
126 def _init_engine(self, engine):
127 self.bind = self.engine = engine
--> 128 engine.connect().close()
129 self._op_context_requires_connect = True
130 self.dialect = self.engine.dialect
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3234, in Engine.connect(self, close_with_result)
3219 def connect(self, close_with_result=False):
3220 """Return a new :class:`_engine.Connection` object.
3221
3222 The :class:`_engine.Connection` object is a facade that uses a DBAPI
(...)
3231
3232 """
-> 3234 return self._connection_cls(self, close_with_result=close_with_result)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:96, in Connection.__init__(self, engine, connection, close_with_result, _branch_from, _execution_options, _dispatch, _has_events, _allow_revalidate)
91 self._has_events = _branch_from._has_events
92 else:
93 self._dbapi_connection = (
94 connection
95 if connection is not None
---> 96 else engine.raw_connection()
97 )
99 self._transaction = self._nested_transaction = None
100 self.__savepoint_seq = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3313, in Engine.raw_connection(self, _connection)
3291 def raw_connection(self, _connection=None):
3292 """Return a "raw" DBAPI connection from the connection pool.
3293
3294 The returned object is a proxied version of the DBAPI
(...)
3311
3312 """
-> 3313 return self._wrap_pool_connect(self.pool.connect, _connection)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3283, in Engine._wrap_pool_connect(self, fn, connection)
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
-> 3283 Connection._handle_dbapi_exception_noconnection(
3284 e, dialect, self
3285 )
3286 else:
3287 util.raise_(
3288 sys.exc_info()[1], with_traceback=sys.exc_info()[2]
3289 )
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:2117, in Connection._handle_dbapi_exception_noconnection(cls, e, dialect, engine)
2115 util.raise_(newraise, with_traceback=exc_info[2], from_=e)
2116 elif should_wrap:
-> 2117 util.raise_(
2118 sqlalchemy_exception, with_traceback=exc_info[2], from_=e
2119 )
2120 else:
2121 util.raise_(exc_info[1], with_traceback=exc_info[2])
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\base.py:3280, in Engine._wrap_pool_connect(self, fn, connection)
3278 dialect = self.dialect
3279 try:
-> 3280 return fn()
3281 except dialect.dbapi.Error as e:
3282 if connection is None:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:310, in Pool.connect(self)
302 def connect(self):
303 """Return a DBAPI connection from the pool.
304
305 The connection is instrumented such that when its
(...)
308
309 """
--> 310 return _ConnectionFairy._checkout(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:868, in _ConnectionFairy._checkout(cls, pool, threadconns, fairy)
865 #classmethod
866 def _checkout(cls, pool, threadconns=None, fairy=None):
867 if not fairy:
--> 868 fairy = _ConnectionRecord.checkout(pool)
870 fairy._pool = pool
871 fairy._counter = 0
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:476, in _ConnectionRecord.checkout(cls, pool)
474 #classmethod
475 def checkout(cls, pool):
--> 476 rec = pool._do_get()
477 try:
478 dbapi_connection = rec.get_connection()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:145, in QueuePool._do_get(self)
143 return self._create_connection()
144 except:
--> 145 with util.safe_reraise():
146 self._dec_overflow()
147 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\impl.py:143, in QueuePool._do_get(self)
141 if self._inc_overflow():
142 try:
--> 143 return self._create_connection()
144 except:
145 with util.safe_reraise():
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:256, in Pool._create_connection(self)
253 def _create_connection(self):
254 """Called by subclasses to create a new ConnectionRecord."""
--> 256 return _ConnectionRecord(self)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:371, in _ConnectionRecord.__init__(self, pool, connect)
369 self.__pool = pool
370 if connect:
--> 371 self.__connect()
372 self.finalize_callback = deque()
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:665, in _ConnectionRecord.__connect(self)
663 self.fresh = True
664 except Exception as e:
--> 665 with util.safe_reraise():
666 pool.logger.debug("Error on connect(): %s", e)
667 else:
668 # in SQLAlchemy 1.4 the first_connect event is not used by
669 # the engine, so this will usually not be set
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\langhelpers.py:70, in safe_reraise.__exit__(self, type_, value, traceback)
68 self._exc_info = None # remove potential circular references
69 if not self.warn_only:
---> 70 compat.raise_(
71 exc_value,
72 with_traceback=exc_tb,
73 )
74 else:
75 if not compat.py3k and self._exc_info and self._exc_info[1]:
76 # emulate Py3K's behavior of telling us when an exception
77 # occurs in an exception handler.
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\util\compat.py:207, in raise_(***failed resolving arguments***)
204 exception.__cause__ = replace_context
206 try:
--> 207 raise exception
208 finally:
209 # credit to
210 # https://cosmicpercolator.com/2016/01/13/exception-leaks-in-python-2-and-3/
211 # as the __traceback__ object creates a cycle
212 del exception, replace_context, from_, with_traceback
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\pool\base.py:661, in _ConnectionRecord.__connect(self)
659 try:
660 self.starttime = time.time()
--> 661 self.dbapi_connection = connection = pool._invoke_creator(self)
662 pool.logger.debug("Created new connection %r", connection)
663 self.fresh = True
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\create.py:590, in create_engine.<locals>.connect(connection_record)
588 if connection is not None:
589 return connection
--> 590 return dialect.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sqlalchemy\engine\default.py:597, in DefaultDialect.connect(self, *cargs, **cparams)
595 def connect(self, *cargs, **cparams):
596 # inherits the docstring from interfaces.Dialect.connect
--> 597 return self.dbapi.connect(*cargs, **cparams)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\psycopg2\__init__.py:122, in connect(dsn, connection_factory, cursor_factory, **kwargs)
119 kwasync['async_'] = kwargs.pop('async_')
121 dsn = _ext.make_dsn(dsn, **kwargs)
--> 122 conn = _connect(dsn, connection_factory=connection_factory, **kwasync)
123 if cursor_factory is not None:
124 conn.cursor_factory = cursor_factory
OperationalError: (psycopg2.OperationalError)
(Background on this error at: https://sqlalche.me/e/14/e3q8)

How to convert coordinate columns to Point column with Shapely and Dask?

I have the following problem. My data is a huge dataframe, looking like this (this is the head of the dataframe)
import pandas
import dask.dataframe as dd
data = dd.read_csv(data_path)
data.persist()
print(data.head())
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner
0 100mN26840E43341 4334150 2684050 -1
1 100mN26840E43342 4334250 2684050 -1
2 100mN26840E43343 4334350 2684050 -1
3 100mN26840E43344 4334450 2684050 -1
4 100mN26840E43345 4334550 2684050 -1
I am using Dask to handle it. I now want to create a new column where the 'x_mp_100m' and 'y_mp_100m' are converted into a Shapely Point. For a single row, it would look like this:
from shapely.geometry import Point
test_df = data.head(1)
test_df = test_df.assign(geom=lambda k: Point(k.x_mp_100m,k.y_mp_100m))
print(test_df)
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner geom
0 100mN26840E43341 4334150 2684050 -1 POINT (4334150 2684050)
I already tried the following code with Dask:
data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
When doing that, I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-b8de11d9b9b3> in <module>
----> 1 data_out.compute()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
399
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2319 try:
2320 results = self.gather(packed, asynchronous=asynchronous,
-> 2321 direct=direct)
2322 finally:
2323 for f in futures.values():
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1653 return self.sync(self._gather, futures, errors=errors,
1654 direct=direct, local_worker=local_worker,
-> 1655 asynchronous=asynchronous)
1656
1657 #gen.coroutine
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)
671 return future
672 else:
--> 673 return sync(self.loop, func, *args, **kwargs)
674
675 def __repr__(self):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker)
1498 six.reraise(type(exception),
1499 exception,
-> 1500 traceback)
1501 if errors == 'skip':
1502 bad_keys.add(key)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\core.py in apply_and_enforce()
3682
3683 Ensures the output has the same columns, even if empty."""
-> 3684 df = func(*args, **kwargs)
3685 if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)):
3686 if len(df) == 0:
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in assign()
3549 if PY36:
3550 for k, v in kwargs.items():
-> 3551 data[k] = com.apply_if_callable(v, data)
3552 else:
3553 # <= 3.5: do all calculations first...
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\common.py in apply_if_callable()
327
328 if callable(maybe_callable):
--> 329 return maybe_callable(obj, **kwargs)
330
331 return maybe_callable
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in __init__()
47 BaseGeometry.__init__(self)
48 if len(args) > 0:
---> 49 self._set_coords(*args)
50
51 # Coordinate getters and setters
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in _set_coords()
130 self._geom, self._ndim = geos_point_from_py(args[0])
131 else:
--> 132 self._geom, self._ndim = geos_point_from_py(tuple(args))
133
134 coords = property(BaseGeometry._get_coords, _set_coords)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in geos_point_from_py()
207 coords = ob
208 n = len(coords)
--> 209 dx = c_double(coords[0])
210 dy = c_double(coords[1])
211 dz = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\series.py in wrapper()
91 return converter(self.iloc[0])
92 raise TypeError("cannot convert the series to "
---> 93 "{0}".format(str(converter)))
94
95 wrapper.__name__ = "__{name}__".format(name=converter.__name__)
TypeError: cannot convert the series to <class 'float'>
So I think, I am using pandas.assign() function in a wrong way, or there should be a better fitting function, I just cannot seem to wrap my head around it. Do you know a better way to handle this?
I also found this way:
data_out = data.map_partitions(lambda df: df.apply(lambda row: Point(row['x_mp_100m'],row['y_mp_100m']), axis=1))
But is that the most efficient way?
What you're doing seems fine. I would find a function that works well on a single row and then use the apply method or a function that works well on a single Pandas dataframe and then use the map_partitions method.
For the error that you're getting I would first verify that your function works on a pandas dataframe.

xarray: mean of data stored via OPeNDAP

I'm using xarray's very cool pydap back-end (http://xarray.pydata.org/en/stable/io.html#opendap) to read data stored via OPenDAP at IRI:
import xarray as xr
remote_data = xr.open_dataarray('http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods')
print(remote_data)
#<xarray.DataArray 'zg' (P: 2, S: 6569, M: 3, L: 45, Y: 181, X: 360)>
#[115569730800 values with dtype=float32]
#Coordinates:
# * L (L) timedelta64[ns] 0 days 12:00:00 1 days 12:00:00 ...
# * Y (Y) float32 -90.0 -89.0 -88.0 -87.0 -86.0 -85.0 -84.0 -83.0 ...
# * S (S) datetime64[ns] 1999-01-07 1999-01-08 1999-01-09 1999-01-10 ...
# * M (M) float32 1.0 2.0 3.0
# * X (X) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 ...
# * P (P) int32 500 200
#Attributes:
# level_type: pressure level
# standard_name: geopotential_height
# long_name: Geopotential Height
# units: m
For reference it's sub-seasonal forecast data where L is lead-time (45 days forecasts), S is initialization date and M is ensemble.
I would like to do an ensemble mean and i'm only interested in the 500 hPa level. However, it crashes out and gives a RuntimeError: NetCDF: Access failure:
da = remote_data.sel(P=500)
da_ensmean = da.mean(dim='M')
RuntimeError Traceback (most recent call last)
<ipython-input-46-eca488e9def5> in <module>()
1 remote_data = xr.open_dataarray('http://iridl.ldeo.columbia.edu/SOURCES/.Models' '/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods')
2 da = remote_data.sel(P=500)
----> 3 da_ensmean = da.mean(dim='M')
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/common.py in wrapped_func(self, dim, axis, skipna, keep_attrs, **kwargs)
20 keep_attrs=False, **kwargs):
21 return self.reduce(func, dim, axis, keep_attrs=keep_attrs,
---> 22 skipna=skipna, allow_lazy=True, **kwargs)
23 else:
24 def wrapped_func(self, dim=None, axis=None, keep_attrs=False,
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/dataarray.py in reduce(self, func, dim, axis, keep_attrs, **kwargs)
1359 summarized data and the indicated dimension(s) removed.
1360 """
-> 1361 var = self.variable.reduce(func, dim, axis, keep_attrs, **kwargs)
1362 return self._replace_maybe_drop_dims(var)
1363
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in reduce(self, func, dim, axis, keep_attrs, allow_lazy, **kwargs)
1264 if dim is not None:
1265 axis = self.get_axis_num(dim)
-> 1266 data = func(self.data if allow_lazy else self.values,
1267 axis=axis, **kwargs)
1268
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in data(self)
293 return self._data
294 else:
--> 295 return self.values
296
297 #data.setter
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in values(self)
385 def values(self):
386 """The variable's data as a numpy.ndarray"""
--> 387 return _as_array_or_item(self._data)
388
389 #values.setter
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/variable.py in _as_array_or_item(data)
209 TODO: remove this (replace with np.asarray) once these issues are fixed
210 """
--> 211 data = np.asarray(data)
212 if data.ndim == 0:
213 if data.dtype.kind == 'M':
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
622
623 def __array__(self, dtype=None):
--> 624 self._ensure_cached()
625 return np.asarray(self.array, dtype=dtype)
626
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in _ensure_cached(self)
619 def _ensure_cached(self):
620 if not isinstance(self.array, NumpyIndexingAdapter):
--> 621 self.array = NumpyIndexingAdapter(np.asarray(self.array))
622
623 def __array__(self, dtype=None):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
600
601 def __array__(self, dtype=None):
--> 602 return np.asarray(self.array, dtype=dtype)
603
604 def __getitem__(self, key):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
506 def __array__(self, dtype=None):
507 array = as_indexable(self.array)
--> 508 return np.asarray(array[self.key], dtype=None)
509
510 def transpose(self, order):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/coding/variables.py in __getitem__(self, key)
64
65 def __getitem__(self, key):
---> 66 return self.func(self.array[key])
67
68 def __repr__(self):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/coding/variables.py in _apply_mask(data, encoded_fill_values, decoded_fill_value, dtype)
133 for fv in encoded_fill_values:
134 condition |= data == fv
--> 135 data = np.asarray(data, dtype=dtype)
136 return np.where(condition, decoded_fill_value, data)
137
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
490
491 """
--> 492 return array(a, dtype, copy=False, order=order)
493
494
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/core/indexing.py in __array__(self, dtype)
506 def __array__(self, dtype=None):
507 array = as_indexable(self.array)
--> 508 return np.asarray(array[self.key], dtype=None)
509
510 def transpose(self, order):
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/backends/netCDF4_.py in __getitem__(self, key)
63 with self.datastore.ensure_open(autoclose=True):
64 try:
---> 65 array = getitem(self.get_array(), key.tuple)
66 except IndexError:
67 # Catch IndexError in netCDF4 and return a more informative
~/anaconda/envs/SubXNAO/lib/python3.6/site-packages/xarray/backends/common.py in robust_getitem(array, key, catch, max_retries, initial_delay)
114 for n in range(max_retries + 1):
115 try:
--> 116 return array[key]
117 except catch:
118 if n == max_retries:
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable.__getitem__()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4.Variable._get()
netCDF4/_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()
RuntimeError: NetCDF: Access failure
Breaking down the calculation removes the RuntimeError. Guess it was just too hefty of a calculation with all the start times. Shouldn't be too difficult to put in a loop over S:
da = remote_data.isel(P=0,S=0)
da_ensmean = da.mean(dim='M')
print(da_ensmean)
<xarray.DataArray 'zg' (L: 45, Y: 181, X: 360)>
array([[[5231.1445, 5231.1445, ..., 5231.1445, 5231.1445],
[5231.1445, 5231.1445, ..., 5231.1445, 5231.1445],
...,
[5056.2383, 5056.2383, ..., 5056.2383, 5056.2383],
[5056.2383, 5056.2383, ..., 5056.2383, 5056.2383]],
[[5211.346 , 5211.346 , ..., 5211.346 , 5211.346 ],
[5211.346 , 5211.346 , ..., 5211.346 , 5211.346 ],
...,
[5082.062 , 5082.062 , ..., 5082.062 , 5082.062 ],
[5082.062 , 5082.062 , ..., 5082.062 , 5082.062 ]],
...,
[[5108.8247, 5108.8247, ..., 5108.8247, 5108.8247],
[5108.8247, 5108.8247, ..., 5108.8247, 5108.8247],
...,
[5154.2173, 5154.2173, ..., 5154.2173, 5154.2173],
[5154.2173, 5154.2173, ..., 5154.2173, 5154.2173]],
[[5106.4893, 5106.4893, ..., 5106.4893, 5106.4893],
[5106.4893, 5106.4893, ..., 5106.4893, 5106.4893],
...,
[5226.0063, 5226.0063, ..., 5226.0063, 5226.0063],
[5226.0063, 5226.0063, ..., 5226.0063, 5226.0063]]], dtype=float32)
Coordinates:
* L (L) timedelta64[ns] 0 days 12:00:00 1 days 12:00:00 ...
* Y (Y) float32 -90.0 -89.0 -88.0 -87.0 -86.0 -85.0 -84.0 -83.0 ...
S datetime64[ns] 1999-01-07
* X (X) float32 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 ...
P int32 500
This is a good use-case for chunking with dask, e.g.,
import xarray as xr
url = 'http://iridl.ldeo.columbia.edu/SOURCES/.Models/.SubX/.RSMAS/.CCSM4/.hindcast/.zg/dods'
remote_data = xr.open_dataarray(url, chunks={'S': 1, 'L': 1})
da = remote_data.sel(P=500)
da_ensmean = da.mean(dim='M')
This version will access the data server in parallel, using many smaller chunks. It will still be slow to download 231 GB of data, but your request will have much better odds of success.