Error when trying to create a facetgrid of pointplots - pandas

I have sequencing data of micro-RNAs (miR) under different conditions ('Comparisons'), and I want to create a point-plot which will show me on different graphs the fold-change for each miR. the data looks like this (and is a pandas data_frame)
mir_Names Comparison Fold_Change
9 9 mmu-miR-100-4373160\n15 m... YAD-YC 508539.390000
15 9 mmu-miR-100-4373160\n15 m... YAD-YC 26.816000
17 9 mmu-miR-100-4373160\n15 m... YAD-YC 728.608000
18 9 mmu-miR-100-4373160\n15 m... YAD-YC 11483029.706000
'upregulated' is a subset of the dataframe and i tried to visualize it using:
g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated)
**
but it gives me the error which I couldn't find any solution to it:
**
ValueError Traceback (most recent call last) <ipython-input-180-a1cf1b282869> in <module>()
1 g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
----> 2 g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated) #maybe with .count
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in map(self, func,
*args, **kwargs)
446
447 # Finalize the annotations and layout
--> 448 self._finalize_grid(args[:2])
449
450 return self
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in
_finalize_grid(self, axlabels)
537 self.set_axis_labels(*axlabels)
538 self.set_titles()
--> 539 self.fig.tight_layout()
540
541 def facet_axis(self, row_i, col_j):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in tight_layout(self, renderer, pad, h_pad, w_pad, rect) 1663 rect=rect) 1664
-> 1665 self.subplots_adjust(**kwargs) 1666 1667
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in subplots_adjust(self, *args, **kwargs) 1520 1521 """
-> 1522 self.subplotpars.update(*args, **kwargs) 1523 for ax in self.axes: 1524 if not isinstance(ax, SubplotBase):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in update(self, left, bottom, right, top, wspace, hspace)
223 if self.bottom >= self.top:
224 reset()
--> 225 raise ValueError('bottom cannot be >= top')
226
227 def _update_this(self, s, val):
**ValueError: bottom cannot be >= top**
What causes this error?

Related

ValueError: Number of columns must be a positive integer, not 0

when i want to execute below code and plot figer
scatter_matrix(total_frame)
total_frame is a dataframe like this
the error like this:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_11336\1619863705.py in <module>
1 total_frame.dropna(how='any')
----> 2 scatter_matrix(total_frame)
3 plt.show()
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwargs)
137 hist_kwds=hist_kwds,
138 range_padding=range_padding,
--> 139 **kwargs,
140 )
141
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwds)
48 n = df.columns.size
49 naxes = n * n
---> 50 fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False)
51
52 # no gaps between subplots
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\tools.py in create_subplots(naxes, sharex, sharey, squeeze, subplot_kw, ax, layout, layout_type, **fig_kw)
265
266 # Create first subplot separately, so we can share it if requested
--> 267 ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw)
268
269 if sharex:
~\.conda\envs\env2\lib\site-packages\matplotlib\figure.py in add_subplot(self, *args, **kwargs)
770 projection_class, pkw = self._process_projection_requirements(
771 *args, **kwargs)
--> 772 ax = subplot_class_factory(projection_class)(self, *args, **pkw)
773 key = (projection_class, pkw)
774 return self._add_axes_internal(ax, key)
~\.conda\envs\env2\lib\site-packages\matplotlib\axes\_subplots.py in __init__(self, fig, *args, **kwargs)
34 self._axes_class.__init__(self, fig, [0, 0, 1, 1], **kwargs)
35 # This will also update the axes position.
---> 36 self.set_subplotspec(SubplotSpec._from_subplot_args(fig, args))
37
38 #_api.deprecated(
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _from_subplot_args(figure, args)
595 f"{len(args)} were given")
596
--> 597 gs = GridSpec._check_gridspec_exists(figure, rows, cols)
598 if gs is None:
599 gs = GridSpec(rows, cols, figure=figure)
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _check_gridspec_exists(figure, nrows, ncols)
223 return gs
224 # else gridspec not found:
--> 225 return GridSpec(nrows, ncols, figure=figure)
226
227 def __getitem__(self, key):
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, figure, left, bottom, right, top, wspace, hspace, width_ratios, height_ratios)
385 super().__init__(nrows, ncols,
386 width_ratios=width_ratios,
--> 387 height_ratios=height_ratios)
388
389 _AllowedKeys = ["left", "bottom", "right", "top", "wspace", "hspace"]
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, height_ratios, width_ratios)
51 if not isinstance(ncols, Integral) or ncols <= 0:
52 raise ValueError(
---> 53 f"Number of columns must be a positive integer, not {ncols!r}")
54 self._nrows, self._ncols = nrows, ncols
55 self.set_height_ratios(height_ratios)
ValueError: Number of columns must be a positive integer, not 0
<Figure size 432x288 with 0 Axes>
i search such error and don't find anything,please help me!!!!!
i have solved it,
my data's class is object,the function need num,
so i use pd.convert_dtypes() and it works

tight_layout KeyError default, matplotlib widget

Using jupyterlab, i receive a KeyError: 'Default' when using plt.tight_layout() in combination with %matplotlib widget. The following code reproduces the issue:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib widget
x=np.linspace(0,10)
y=x**2
plt.plot(x,y)
plt.tight_layout()
The complete error message is the following:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3024 try:
-> 3025 self.canvas.set_cursor(tools.Cursors.WAIT)
3026 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
KeyError: 'wait'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/tmp/ipykernel_119035/3466922198.py in <module>
7 y=x**2
8 plt.plot(x,y)
----> 9 plt.tight_layout()
~/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py in tight_layout(pad, h_pad, w_pad, rect)
2300 #_copy_docstring_and_deprecators(Figure.tight_layout)
2301 def tight_layout(*, pad=1.08, h_pad=None, w_pad=None, rect=None):
-> 2302 return gcf().tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad, rect=rect)
2303
2304
~/anaconda3/lib/python3.8/site-packages/matplotlib/figure.py in tight_layout(self, pad, h_pad, w_pad, rect)
3186 "compatible with tight_layout, so results "
3187 "might be incorrect.")
-> 3188 renderer = _get_renderer(self)
3189 with getattr(renderer, "_draw_disabled", nullcontext)():
3190 kwargs = get_tight_layout_figure(
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _get_renderer(figure, print_method)
1542 figure.canvas._get_output_canvas(None, fmt), f"print_{fmt}")
1543 try:
-> 1544 print_method(io.BytesIO())
1545 except Done as exc:
1546 renderer, = figure._cachedRenderer, = exc.args
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in wrapper(*args, **kwargs)
1641 kwargs.pop(arg)
1642
-> 1643 return func(*args, **kwargs)
1644
1645 return wrapper
~/anaconda3/lib/python3.8/site-packages/matplotlib/_api/deprecation.py in wrapper(*inner_args, **inner_kwargs)
410 else deprecation_addendum,
411 **kwargs)
--> 412 return func(*inner_args, **inner_kwargs)
413
414 DECORATORS[wrapper] = decorator
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
538 *metadata*, including the default 'Software' key.
539 """
--> 540 FigureCanvasAgg.draw(self)
541 mpl.image.imsave(
542 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_agg.py in draw(self)
431 self.renderer = self.get_renderer(cleared=True)
432 # Acquire a lock on the shared font cache.
--> 433 with RendererAgg.lock, \
434 (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
435 else nullcontext()):
~/anaconda3/lib/python3.8/contextlib.py in __enter__(self)
111 del self.args, self.kwds, self.func
112 try:
--> 113 return next(self.gen)
114 except StopIteration:
115 raise RuntimeError("generator didn't yield") from None
~/anaconda3/lib/python3.8/site-packages/matplotlib/backend_bases.py in _wait_cursor_for_draw_cm(self)
3026 yield
3027 finally:
-> 3028 self.canvas.set_cursor(self._lastCursor)
3029 else:
3030 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in set_cursor(self, cursor)
208 backend_tools.Cursors.RESIZE_VERTICAL: 'ns-resize',
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
211
212 def set_image_mode(self, mode):
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in send_event(self, event_type, **kwargs)
390 def send_event(self, event_type, **kwargs):
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
393
394
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/backend_webagg_core.py in _send_event(self, event_type, **kwargs)
539 payload = {'type': event_type, **kwargs}
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
542
543
~/anaconda3/lib/python3.8/site-packages/ipympl/backend_nbagg.py in send_json(self, content)
179 # Change in the widget state?
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
182
183 elif content['type'] == 'message':
KeyError: 'default'

How change the value in a koalas dataframe based in a condition

I am using Koalas and I want to change the value of a column based on a condition.
In pandas I can do that using:
import pandas as pd
df_test = pd.DataFrame({
'a': [1,2,3]
,'b': ['one','two','three']})
df_test2 = pd.DataFrame({
'c': [2,1,3]
,'d': ['one','two','three']})
df_test.loc[df_test.a.isin(df_test2['c']),'b'] = 'four'
df_test.head()
a b
0 1 four
1 2 four
2 3 four
I am trying to use the same in Koalas, but I have this error:
---------------------------------------------------------------------------
PandasNotImplementedError Traceback (most recent call last)
<ipython-input-15-814219258adb> in <module>
5 new_loans['write_offs'] = 0
6
----> 7 new_loans.loc[(new_loans['ID'].isin(userinput_write_offs['id'])),'write_offs'] = 1
8 new_loans.loc[new_loans['write_offs']==1,'is_active'] = 0
9 new_loans = new_loans.sort_values(by = ['ZOHOID','Disb Date'])
/usr/local/lib/python3.7/dist-packages/databricks/koalas/base.py in isin(self, values)
894 )
895
--> 896 return self._with_new_scol(self.spark.column.isin(list(values)))
897
898 def isnull(self) -> Union["Series", "Index"]:
/usr/local/lib/python3.7/dist-packages/databricks/koalas/series.py in __iter__(self)
5871
5872 def __iter__(self):
-> 5873 return MissingPandasLikeSeries.__iter__(self)
5874
5875 if sys.version_info >= (3, 7):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/missing/__init__.py in unsupported_function(*args, **kwargs)
21 def unsupported_function(*args, **kwargs):
22 raise PandasNotImplementedError(
---> 23 class_name=class_name, method_name=method_name, reason=reason
24 )
25
PandasNotImplementedError: The method `pd.Series.__iter__()` is not implemented. If you want to collect your data as an NumPy array, use 'to_numpy()' instead.
How could I do the same operation in Koalas?
UPDATE
Following this question: Assign Koalas Column from Numpy Result I have done:
df_test.loc[df_test.a.isin(df_test2['c'].to_list()),'b'] = 'four'
But now I have this error:
---------------------------------------------------------------------------
PythonException Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/usr/local/lib/python3.7/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
/usr/local/lib/python3.7/dist-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in __repr__(self)
10614 return self._to_internal_pandas().to_string()
10615
> 10616 pdf = self._get_or_create_repr_pandas_cache(max_display_count)
10617 pdf_length = len(pdf)
10618 pdf = pdf.iloc[:max_display_count]
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in _get_or_create_repr_pandas_cache(self, n)
10606 def _get_or_create_repr_pandas_cache(self, n):
10607 if not hasattr(self, "_repr_pandas_cache") or n not in self._repr_pandas_cache:
> 10608 self._repr_pandas_cache = {n: self.head(n + 1)._to_internal_pandas()}
10609 return self._repr_pandas_cache[n]
10610
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in _to_internal_pandas(self)
10602 This method is for internal use only.
10603 """
> 10604 return self._internal.to_pandas_frame
10605
10606 def _get_or_create_repr_pandas_cache(self, n):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/utils.py in wrapped_lazy_property(self)
514 def wrapped_lazy_property(self):
515 if not hasattr(self, attr_name):
--> 516 setattr(self, attr_name, fn(self))
517 return getattr(self, attr_name)
518
/usr/local/lib/python3.7/dist-packages/databricks/koalas/internal.py in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/conversion.py in toPandas(self)
136
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
140
/usr/local/spark/python/pyspark/sql/dataframe.py in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
598
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
/usr/local/spark/python/pyspark/sql/utils.py in raise_from(e)
PythonException:
An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 589, in main
func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 447, in read_udfs
udfs.append(read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=i))
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 254, in read_single_udf
f, return_type = read_command(pickleSer, infile)
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 74, in read_command
command = serializer._read_with_length(file)
File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 172, in _read_with_length
return self.loads(obj)
File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 458, in loads
return pickle.loads(obj, encoding=encoding)
File "/opt/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 1110, in subimport
__import__(name)
ModuleNotFoundError: No module named 'pandas'
Why is trying to use pandas?
Koalas package exposes Pandas Like APIs on high level for the users but under the hood implementation is done using PySpark APIs.
I observed that within the stack track log you have pasted, a pandas dataframe is being created from sdf spark Dataframe using toPandas() method and assigned to pdf.
In the implementation of toPandas() function, pandas and numpy are being imported.
check line numbers 809 & 138.
/usr/local/lib/python3.7/dist-packages/databricks/koalas/internal.py in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/conversion.py in toPandas(self)
136
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
140
/usr/local/spark/python/pyspark/sql/dataframe.py in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
598
you can check out the implementation of toPandas() function at the following link:
https://github.com/apache/spark/blob/master/python/pyspark/sql/pandas/conversion.py

How to convert coordinate columns to Point column with Shapely and Dask?

I have the following problem. My data is a huge dataframe, looking like this (this is the head of the dataframe)
import pandas
import dask.dataframe as dd
data = dd.read_csv(data_path)
data.persist()
print(data.head())
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner
0 100mN26840E43341 4334150 2684050 -1
1 100mN26840E43342 4334250 2684050 -1
2 100mN26840E43343 4334350 2684050 -1
3 100mN26840E43344 4334450 2684050 -1
4 100mN26840E43345 4334550 2684050 -1
I am using Dask to handle it. I now want to create a new column where the 'x_mp_100m' and 'y_mp_100m' are converted into a Shapely Point. For a single row, it would look like this:
from shapely.geometry import Point
test_df = data.head(1)
test_df = test_df.assign(geom=lambda k: Point(k.x_mp_100m,k.y_mp_100m))
print(test_df)
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner geom
0 100mN26840E43341 4334150 2684050 -1 POINT (4334150 2684050)
I already tried the following code with Dask:
data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
When doing that, I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-b8de11d9b9b3> in <module>
----> 1 data_out.compute()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
399
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2319 try:
2320 results = self.gather(packed, asynchronous=asynchronous,
-> 2321 direct=direct)
2322 finally:
2323 for f in futures.values():
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1653 return self.sync(self._gather, futures, errors=errors,
1654 direct=direct, local_worker=local_worker,
-> 1655 asynchronous=asynchronous)
1656
1657 #gen.coroutine
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)
671 return future
672 else:
--> 673 return sync(self.loop, func, *args, **kwargs)
674
675 def __repr__(self):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker)
1498 six.reraise(type(exception),
1499 exception,
-> 1500 traceback)
1501 if errors == 'skip':
1502 bad_keys.add(key)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\core.py in apply_and_enforce()
3682
3683 Ensures the output has the same columns, even if empty."""
-> 3684 df = func(*args, **kwargs)
3685 if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)):
3686 if len(df) == 0:
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in assign()
3549 if PY36:
3550 for k, v in kwargs.items():
-> 3551 data[k] = com.apply_if_callable(v, data)
3552 else:
3553 # <= 3.5: do all calculations first...
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\common.py in apply_if_callable()
327
328 if callable(maybe_callable):
--> 329 return maybe_callable(obj, **kwargs)
330
331 return maybe_callable
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in __init__()
47 BaseGeometry.__init__(self)
48 if len(args) > 0:
---> 49 self._set_coords(*args)
50
51 # Coordinate getters and setters
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in _set_coords()
130 self._geom, self._ndim = geos_point_from_py(args[0])
131 else:
--> 132 self._geom, self._ndim = geos_point_from_py(tuple(args))
133
134 coords = property(BaseGeometry._get_coords, _set_coords)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in geos_point_from_py()
207 coords = ob
208 n = len(coords)
--> 209 dx = c_double(coords[0])
210 dy = c_double(coords[1])
211 dz = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\series.py in wrapper()
91 return converter(self.iloc[0])
92 raise TypeError("cannot convert the series to "
---> 93 "{0}".format(str(converter)))
94
95 wrapper.__name__ = "__{name}__".format(name=converter.__name__)
TypeError: cannot convert the series to <class 'float'>
So I think, I am using pandas.assign() function in a wrong way, or there should be a better fitting function, I just cannot seem to wrap my head around it. Do you know a better way to handle this?
I also found this way:
data_out = data.map_partitions(lambda df: df.apply(lambda row: Point(row['x_mp_100m'],row['y_mp_100m']), axis=1))
But is that the most efficient way?
What you're doing seems fine. I would find a function that works well on a single row and then use the apply method or a function that works well on a single Pandas dataframe and then use the map_partitions method.
For the error that you're getting I would first verify that your function works on a pandas dataframe.

Network Plot Error Using Python / iPython

The following starter code errors out (AttributeError: incompatible shape for a non-contiguous array) when I try to use networkx on OSX 10.8.2, in an IPython notebook.
import networkx as nx
g = nx.Graph()
g.add_node(1)
g.add_node(2)
g.add_edge(1,2) # no error if this line is omitted
nx.draw(g)
I do not see the error if I do not add the edge to this graph. Matplotlib, Numpy, and Scipy were all installed as suggested here.
Below is the error message:
AttributeError Traceback (most recent call last)
<ipython-input-33-df7dfeff6452> in <module>()
----> 1 nx.draw(g)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw(G, pos, ax, hold, **kwds)
131 pylab.hold(h)
132 try:
--> 133 draw_networkx(G,pos=pos,ax=ax,**kwds)
134 ax.set_axis_off()
135 pylab.draw_if_interactive()
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx(G, pos, with_labels, **kwds)
265
266 node_collection=draw_networkx_nodes(G, pos, **kwds)
--> 267 edge_collection=draw_networkx_edges(G, pos, **kwds)
268 if with_labels:
269 draw_networkx_labels(G, pos, **kwds)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx_edges(G, pos, edgelist, width, edge_color, style, alpha, edge_cmap, edge_vmin, edge_vmax, ax, arrows, label, **kwds)
544 edge_collection.set_zorder(1) # edges go behind nodes
545 edge_collection.set_label(label)
--> 546 ax.add_collection(edge_collection)
547
548 # Note: there was a bug in mpl regarding the handling of alpha values for
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in add_collection(self, collection, autolim)
1443 if autolim:
1444 if collection._paths and len(collection._paths):
-> 1445 self.update_datalim(collection.get_datalim(self.transData))
1446
1447 collection._remove_method = lambda h: self.collections.remove(h)
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in get_datalim(self, transData)
165 offsets = offsets.filled(np.nan)
166 # get_path_collection_extents handles nan but not masked arrays
--> 167 offsets.shape = (-1, 2) # Make it Nx2
168
169 result = mpath.get_path_collection_extents(
AttributeError: incompatible shape for a non-contiguous array
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in show(close)
100 try:
101 for figure_manager in Gcf.get_all_fig_managers():
--> 102 send_figure(figure_manager.canvas.figure)
103 finally:
104 show._to_draw = []
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in send_figure(fig)
209 """
210 fmt = InlineBackend.instance().figure_format
--> 211 data = print_figure(fig, fmt)
212 # print_figure will return None if there's nothing to draw:
213 if data is None:
/usr/local/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in print_figure(fig, fmt)
102 try:
103 bytes_io = BytesIO()
--> 104 fig.canvas.print_figure(bytes_io, format=fmt, bbox_inches='tight')
105 data = bytes_io.getvalue()
106 finally:
/usr/local/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
1981 orientation=orientation,
1982 dryrun=True,
-> 1983 **kwargs)
1984 renderer = self.figure._cachedRenderer
1985 bbox_inches = self.figure.get_tightbbox(renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs)
467
468 def print_png(self, filename_or_obj, *args, **kwargs):
--> 469 FigureCanvasAgg.draw(self)
470 renderer = self.get_renderer()
471 original_dpi = renderer.dpi
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in draw(self)
419
420 try:
--> 421 self.figure.draw(self.renderer)
422 finally:
423 RendererAgg.lock.release()
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/figure.pyc in draw(self, renderer)
896 dsu.sort(key=itemgetter(0))
897 for zorder, a, func, args in dsu:
--> 898 func(*args)
899
900 renderer.close_group('figure')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in draw(self, renderer, inframe)
1995
1996 for zorder, a in dsu:
-> 1997 a.draw(renderer)
1998
1999 renderer.close_group('axes')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in draw(self, renderer)
227 self.update_scalarmappable()
228
--> 229 transform, transOffset, offsets, paths = self._prepare_points()
230
231 gc = renderer.new_gc()
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in _prepare_points(self)
203
204 offsets = np.asanyarray(offsets, np.float_)
--> 205 offsets.shape = (-1, 2) # Make it Nx2
206
207 if not transform.is_affine:
AttributeError: incompatible shape for a non-contiguous array
This was reported earlier in Draw a graph with edges using Networkx. It likely is a bug/feature in the NumPy library development version as mentioned above (numpy issue 2700).