Error when trying to create a facetgrid of pointplots - pandas

I have sequencing data of micro-RNAs (miR) under different conditions ('Comparisons'), and I want to create a point-plot which will show me on different graphs the fold-change for each miR. the data looks like this (and is a pandas data_frame)
mir_Names Comparison Fold_Change
9 9 mmu-miR-100-4373160\n15 m... YAD-YC 508539.390000
15 9 mmu-miR-100-4373160\n15 m... YAD-YC 26.816000
17 9 mmu-miR-100-4373160\n15 m... YAD-YC 728.608000
18 9 mmu-miR-100-4373160\n15 m... YAD-YC 11483029.706000
'upregulated' is a subset of the dataframe and i tried to visualize it using:
g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True), 'mir_Names', 'Fold_Change', data=upregulated)
but it gives me the error which I couldn't find any solution to it:
ValueError Traceback (most recent call last) <ipython-input-180-a1cf1b282869> in <module>()
1 g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
----> 2, 'mir_Names', 'Fold_Change', data=upregulated) #maybe with .count
c:\pyzo2014a\lib\site-packages\seaborn\ in map(self, func,
*args, **kwargs)
447 # Finalize the annotations and layout
--> 448 self._finalize_grid(args[:2])
450 return self
c:\pyzo2014a\lib\site-packages\seaborn\ in
_finalize_grid(self, axlabels)
537 self.set_axis_labels(*axlabels)
538 self.set_titles()
--> 539 self.fig.tight_layout()
541 def facet_axis(self, row_i, col_j):
c:\pyzo2014a\lib\site-packages\matplotlib\ in tight_layout(self, renderer, pad, h_pad, w_pad, rect) 1663 rect=rect) 1664
-> 1665 self.subplots_adjust(**kwargs) 1666 1667
c:\pyzo2014a\lib\site-packages\matplotlib\ in subplots_adjust(self, *args, **kwargs) 1520 1521 """
-> 1522 self.subplotpars.update(*args, **kwargs) 1523 for ax in self.axes: 1524 if not isinstance(ax, SubplotBase):
c:\pyzo2014a\lib\site-packages\matplotlib\ in update(self, left, bottom, right, top, wspace, hspace)
223 if self.bottom >=
224 reset()
--> 225 raise ValueError('bottom cannot be >= top')
227 def _update_this(self, s, val):
**ValueError: bottom cannot be >= top**
What causes this error?


ValueError: Number of columns must be a positive integer, not 0

when i want to execute below code and plot figer
total_frame is a dataframe like this
the error like this:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_11336\ in <module>
1 total_frame.dropna(how='any')
----> 2 scatter_matrix(total_frame)
~\.conda\envs\env2\lib\site-packages\pandas\plotting\ in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwargs)
137 hist_kwds=hist_kwds,
138 range_padding=range_padding,
--> 139 **kwargs,
140 )
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\ in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwds)
48 n = df.columns.size
49 naxes = n * n
---> 50 fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False)
52 # no gaps between subplots
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\ in create_subplots(naxes, sharex, sharey, squeeze, subplot_kw, ax, layout, layout_type, **fig_kw)
266 # Create first subplot separately, so we can share it if requested
--> 267 ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw)
269 if sharex:
~\.conda\envs\env2\lib\site-packages\matplotlib\ in add_subplot(self, *args, **kwargs)
770 projection_class, pkw = self._process_projection_requirements(
771 *args, **kwargs)
--> 772 ax = subplot_class_factory(projection_class)(self, *args, **pkw)
773 key = (projection_class, pkw)
774 return self._add_axes_internal(ax, key)
~\.conda\envs\env2\lib\site-packages\matplotlib\axes\ in __init__(self, fig, *args, **kwargs)
34 self._axes_class.__init__(self, fig, [0, 0, 1, 1], **kwargs)
35 # This will also update the axes position.
---> 36 self.set_subplotspec(SubplotSpec._from_subplot_args(fig, args))
38 #_api.deprecated(
~\.conda\envs\env2\lib\site-packages\matplotlib\ in _from_subplot_args(figure, args)
595 f"{len(args)} were given")
--> 597 gs = GridSpec._check_gridspec_exists(figure, rows, cols)
598 if gs is None:
599 gs = GridSpec(rows, cols, figure=figure)
~\.conda\envs\env2\lib\site-packages\matplotlib\ in _check_gridspec_exists(figure, nrows, ncols)
223 return gs
224 # else gridspec not found:
--> 225 return GridSpec(nrows, ncols, figure=figure)
227 def __getitem__(self, key):
~\.conda\envs\env2\lib\site-packages\matplotlib\ in __init__(self, nrows, ncols, figure, left, bottom, right, top, wspace, hspace, width_ratios, height_ratios)
385 super().__init__(nrows, ncols,
386 width_ratios=width_ratios,
--> 387 height_ratios=height_ratios)
389 _AllowedKeys = ["left", "bottom", "right", "top", "wspace", "hspace"]
~\.conda\envs\env2\lib\site-packages\matplotlib\ in __init__(self, nrows, ncols, height_ratios, width_ratios)
51 if not isinstance(ncols, Integral) or ncols <= 0:
52 raise ValueError(
---> 53 f"Number of columns must be a positive integer, not {ncols!r}")
54 self._nrows, self._ncols = nrows, ncols
55 self.set_height_ratios(height_ratios)
ValueError: Number of columns must be a positive integer, not 0
<Figure size 432x288 with 0 Axes>
i search such error and don't find anything,please help me!!!!!
i have solved it,
my data's class is object,the function need num,
so i use pd.convert_dtypes() and it works

tight_layout KeyError default, matplotlib widget

Using jupyterlab, i receive a KeyError: 'Default' when using plt.tight_layout() in combination with %matplotlib widget. The following code reproduces the issue:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib widget
The complete error message is the following:
KeyError Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in _wait_cursor_for_draw_cm(self)
3024 try:
-> 3025 self.canvas.set_cursor(tools.Cursors.WAIT)
3026 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in set_cursor(self, cursor)
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in send_event(self, event_type, **kwargs)
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in _send_event(self, event_type, **kwargs)
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
~/anaconda3/lib/python3.8/site-packages/ipympl/ in send_json(self, content)
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
KeyError: 'wait'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
/tmp/ipykernel_119035/ in <module>
7 y=x**2
8 plt.plot(x,y)
----> 9 plt.tight_layout()
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in tight_layout(pad, h_pad, w_pad, rect)
2300 #_copy_docstring_and_deprecators(Figure.tight_layout)
2301 def tight_layout(*, pad=1.08, h_pad=None, w_pad=None, rect=None):
-> 2302 return gcf().tight_layout(pad=pad, h_pad=h_pad, w_pad=w_pad, rect=rect)
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in tight_layout(self, pad, h_pad, w_pad, rect)
3186 "compatible with tight_layout, so results "
3187 "might be incorrect.")
-> 3188 renderer = _get_renderer(self)
3189 with getattr(renderer, "_draw_disabled", nullcontext)():
3190 kwargs = get_tight_layout_figure(
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in _get_renderer(figure, print_method)
1542 figure.canvas._get_output_canvas(None, fmt), f"print_{fmt}")
1543 try:
-> 1544 print_method(io.BytesIO())
1545 except Done as exc:
1546 renderer, = figure._cachedRenderer, = exc.args
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in wrapper(*args, **kwargs)
1641 kwargs.pop(arg)
-> 1643 return func(*args, **kwargs)
1645 return wrapper
~/anaconda3/lib/python3.8/site-packages/matplotlib/_api/ in wrapper(*inner_args, **inner_kwargs)
410 else deprecation_addendum,
411 **kwargs)
--> 412 return func(*inner_args, **inner_kwargs)
414 DECORATORS[wrapper] = decorator
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in print_png(self, filename_or_obj, metadata, pil_kwargs, *args)
538 *metadata*, including the default 'Software' key.
539 """
--> 540 FigureCanvasAgg.draw(self)
541 mpl.image.imsave(
542 filename_or_obj, self.buffer_rgba(), format="png", origin="upper",
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in draw(self)
431 self.renderer = self.get_renderer(cleared=True)
432 # Acquire a lock on the shared font cache.
--> 433 with RendererAgg.lock, \
434 (self.toolbar._wait_cursor_for_draw_cm() if self.toolbar
435 else nullcontext()):
~/anaconda3/lib/python3.8/ in __enter__(self)
111 del self.args, self.kwds, self.func
112 try:
--> 113 return next(self.gen)
114 except StopIteration:
115 raise RuntimeError("generator didn't yield") from None
~/anaconda3/lib/python3.8/site-packages/matplotlib/ in _wait_cursor_for_draw_cm(self)
3026 yield
3027 finally:
-> 3028 self.canvas.set_cursor(self._lastCursor)
3029 else:
3030 yield
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in set_cursor(self, cursor)
208 backend_tools.Cursors.RESIZE_VERTICAL: 'ns-resize',
209 }, cursor=cursor)
--> 210 self.send_event('cursor', cursor=cursor)
212 def set_image_mode(self, mode):
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in send_event(self, event_type, **kwargs)
390 def send_event(self, event_type, **kwargs):
391 if self.manager:
--> 392 self.manager._send_event(event_type, **kwargs)
~/anaconda3/lib/python3.8/site-packages/matplotlib/backends/ in _send_event(self, event_type, **kwargs)
539 payload = {'type': event_type, **kwargs}
540 for s in self.web_sockets:
--> 541 s.send_json(payload)
~/anaconda3/lib/python3.8/site-packages/ipympl/ in send_json(self, content)
179 # Change in the widget state?
180 if content['type'] == 'cursor':
--> 181 self._cursor = cursors_str[content['cursor']]
183 elif content['type'] == 'message':
KeyError: 'default'

How change the value in a koalas dataframe based in a condition

I am using Koalas and I want to change the value of a column based on a condition.
In pandas I can do that using:
import pandas as pd
df_test = pd.DataFrame({
'a': [1,2,3]
,'b': ['one','two','three']})
df_test2 = pd.DataFrame({
'c': [2,1,3]
,'d': ['one','two','three']})
df_test.loc[df_test.a.isin(df_test2['c']),'b'] = 'four'
a b
0 1 four
1 2 four
2 3 four
I am trying to use the same in Koalas, but I have this error:
PandasNotImplementedError Traceback (most recent call last)
<ipython-input-15-814219258adb> in <module>
5 new_loans['write_offs'] = 0
----> 7 new_loans.loc[(new_loans['ID'].isin(userinput_write_offs['id'])),'write_offs'] = 1
8 new_loans.loc[new_loans['write_offs']==1,'is_active'] = 0
9 new_loans = new_loans.sort_values(by = ['ZOHOID','Disb Date'])
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in isin(self, values)
894 )
--> 896 return self._with_new_scol(self.spark.column.isin(list(values)))
898 def isnull(self) -> Union["Series", "Index"]:
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in __iter__(self)
5872 def __iter__(self):
-> 5873 return MissingPandasLikeSeries.__iter__(self)
5875 if sys.version_info >= (3, 7):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/missing/ in unsupported_function(*args, **kwargs)
21 def unsupported_function(*args, **kwargs):
22 raise PandasNotImplementedError(
---> 23 class_name=class_name, method_name=method_name, reason=reason
24 )
PandasNotImplementedError: The method `pd.Series.__iter__()` is not implemented. If you want to collect your data as an NumPy array, use 'to_numpy()' instead.
How could I do the same operation in Koalas?
Following this question: Assign Koalas Column from Numpy Result I have done:
df_test.loc[df_test.a.isin(df_test2['c'].to_list()),'b'] = 'four'
But now I have this error:
PythonException Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/ in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/usr/local/lib/python3.7/dist-packages/IPython/lib/ in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
396 return _default_pprint(obj, self, cycle)
/usr/local/lib/python3.7/dist-packages/IPython/lib/ in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in __repr__(self)
10614 return self._to_internal_pandas().to_string()
> 10616 pdf = self._get_or_create_repr_pandas_cache(max_display_count)
10617 pdf_length = len(pdf)
10618 pdf = pdf.iloc[:max_display_count]
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in _get_or_create_repr_pandas_cache(self, n)
10606 def _get_or_create_repr_pandas_cache(self, n):
10607 if not hasattr(self, "_repr_pandas_cache") or n not in self._repr_pandas_cache:
> 10608 self._repr_pandas_cache = {n: self.head(n + 1)._to_internal_pandas()}
10609 return self._repr_pandas_cache[n]
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in _to_internal_pandas(self)
10602 This method is for internal use only.
10603 """
> 10604 return self._internal.to_pandas_frame
10606 def _get_or_create_repr_pandas_cache(self, n):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in wrapped_lazy_property(self)
514 def wrapped_lazy_property(self):
515 if not hasattr(self, attr_name):
--> 516 setattr(self, attr_name, fn(self))
517 return getattr(self, attr_name)
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/ in toPandas(self)
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
/usr/local/spark/python/pyspark/sql/ in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
/usr/local/lib/python3.7/dist-packages/py4j/ in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id,
1307 for temp_arg in temp_args:
/usr/local/spark/python/pyspark/sql/ in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
/usr/local/spark/python/pyspark/sql/ in raise_from(e)
An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
File "/opt/spark/python/lib/", line 589, in main
func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
File "/opt/spark/python/lib/", line 447, in read_udfs
udfs.append(read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=i))
File "/opt/spark/python/lib/", line 254, in read_single_udf
f, return_type = read_command(pickleSer, infile)
File "/opt/spark/python/lib/", line 74, in read_command
command = serializer._read_with_length(file)
File "/opt/spark/python/lib/", line 172, in _read_with_length
return self.loads(obj)
File "/opt/spark/python/lib/", line 458, in loads
return pickle.loads(obj, encoding=encoding)
File "/opt/spark/python/lib/", line 1110, in subimport
ModuleNotFoundError: No module named 'pandas'
Why is trying to use pandas?
Koalas package exposes Pandas Like APIs on high level for the users but under the hood implementation is done using PySpark APIs.
I observed that within the stack track log you have pasted, a pandas dataframe is being created from sdf spark Dataframe using toPandas() method and assigned to pdf.
In the implementation of toPandas() function, pandas and numpy are being imported.
check line numbers 809 & 138.
/usr/local/lib/python3.7/dist-packages/databricks/koalas/ in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/ in toPandas(self)
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
/usr/local/spark/python/pyspark/sql/ in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
you can check out the implementation of toPandas() function at the following link:

How to convert coordinate columns to Point column with Shapely and Dask?

I have the following problem. My data is a huge dataframe, looking like this (this is the head of the dataframe)
import pandas
import dask.dataframe as dd
data = dd.read_csv(data_path)
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner
0 100mN26840E43341 4334150 2684050 -1
1 100mN26840E43342 4334250 2684050 -1
2 100mN26840E43343 4334350 2684050 -1
3 100mN26840E43344 4334450 2684050 -1
4 100mN26840E43345 4334550 2684050 -1
I am using Dask to handle it. I now want to create a new column where the 'x_mp_100m' and 'y_mp_100m' are converted into a Shapely Point. For a single row, it would look like this:
from shapely.geometry import Point
test_df = data.head(1)
test_df = test_df.assign(geom=lambda k: Point(k.x_mp_100m,k.y_mp_100m))
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner geom
0 100mN26840E43341 4334150 2684050 -1 POINT (4334150 2684050)
I already tried the following code with Dask:
data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
When doing that, I get the following error:
TypeError Traceback (most recent call last)
<ipython-input-17-b8de11d9b9b3> in <module>
----> 1 data_out.compute()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\ in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\ in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2319 try:
2320 results = self.gather(packed, asynchronous=asynchronous,
-> 2321 direct=direct)
2322 finally:
2323 for f in futures.values():
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in gather(self, futures, errors, maxsize, direct, asynchronous)
1653 return self.sync(self._gather, futures, errors=errors,
1654 direct=direct, local_worker=local_worker,
-> 1655 asynchronous=asynchronous)
1657 #gen.coroutine
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in sync(self, func, *args, **kwargs)
671 return future
672 else:
--> 673 return sync(self.loop, func, *args, **kwargs)
675 def __repr__(self):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\ in run(self)
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\ in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\ in _gather(self, futures, errors, direct, local_worker)
1498 six.reraise(type(exception),
1499 exception,
-> 1500 traceback)
1501 if errors == 'skip':
1502 bad_keys.add(key)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\ in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\ in apply_and_enforce()
3683 Ensures the output has the same columns, even if empty."""
-> 3684 df = func(*args, **kwargs)
3685 if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)):
3686 if len(df) == 0:
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ in assign()
3549 if PY36:
3550 for k, v in kwargs.items():
-> 3551 data[k] = com.apply_if_callable(v, data)
3552 else:
3553 # <= 3.5: do all calculations first...
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ in apply_if_callable()
328 if callable(maybe_callable):
--> 329 return maybe_callable(obj, **kwargs)
331 return maybe_callable
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\ in __init__()
47 BaseGeometry.__init__(self)
48 if len(args) > 0:
---> 49 self._set_coords(*args)
51 # Coordinate getters and setters
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\ in _set_coords()
130 self._geom, self._ndim = geos_point_from_py(args[0])
131 else:
--> 132 self._geom, self._ndim = geos_point_from_py(tuple(args))
134 coords = property(BaseGeometry._get_coords, _set_coords)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\ in geos_point_from_py()
207 coords = ob
208 n = len(coords)
--> 209 dx = c_double(coords[0])
210 dy = c_double(coords[1])
211 dz = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ in wrapper()
91 return converter(self.iloc[0])
92 raise TypeError("cannot convert the series to "
---> 93 "{0}".format(str(converter)))
95 wrapper.__name__ = "__{name}__".format(name=converter.__name__)
TypeError: cannot convert the series to <class 'float'>
So I think, I am using pandas.assign() function in a wrong way, or there should be a better fitting function, I just cannot seem to wrap my head around it. Do you know a better way to handle this?
I also found this way:
data_out = data.map_partitions(lambda df: df.apply(lambda row: Point(row['x_mp_100m'],row['y_mp_100m']), axis=1))
But is that the most efficient way?
What you're doing seems fine. I would find a function that works well on a single row and then use the apply method or a function that works well on a single Pandas dataframe and then use the map_partitions method.
For the error that you're getting I would first verify that your function works on a pandas dataframe.

Network Plot Error Using Python / iPython

The following starter code errors out (AttributeError: incompatible shape for a non-contiguous array) when I try to use networkx on OSX 10.8.2, in an IPython notebook.
import networkx as nx
g = nx.Graph()
g.add_edge(1,2) # no error if this line is omitted
I do not see the error if I do not add the edge to this graph. Matplotlib, Numpy, and Scipy were all installed as suggested here.
Below is the error message:
AttributeError Traceback (most recent call last)
<ipython-input-33-df7dfeff6452> in <module>()
----> 1 nx.draw(g)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw(G, pos, ax, hold, **kwds)
131 pylab.hold(h)
132 try:
--> 133 draw_networkx(G,pos=pos,ax=ax,**kwds)
134 ax.set_axis_off()
135 pylab.draw_if_interactive()
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx(G, pos, with_labels, **kwds)
266 node_collection=draw_networkx_nodes(G, pos, **kwds)
--> 267 edge_collection=draw_networkx_edges(G, pos, **kwds)
268 if with_labels:
269 draw_networkx_labels(G, pos, **kwds)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx_edges(G, pos, edgelist, width, edge_color, style, alpha, edge_cmap, edge_vmin, edge_vmax, ax, arrows, label, **kwds)
544 edge_collection.set_zorder(1) # edges go behind nodes
545 edge_collection.set_label(label)
--> 546 ax.add_collection(edge_collection)
548 # Note: there was a bug in mpl regarding the handling of alpha values for
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in add_collection(self, collection, autolim)
1443 if autolim:
1444 if collection._paths and len(collection._paths):
-> 1445 self.update_datalim(collection.get_datalim(self.transData))
1447 collection._remove_method = lambda h: self.collections.remove(h)
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in get_datalim(self, transData)
165 offsets = offsets.filled(np.nan)
166 # get_path_collection_extents handles nan but not masked arrays
--> 167 offsets.shape = (-1, 2) # Make it Nx2
169 result = mpath.get_path_collection_extents(
AttributeError: incompatible shape for a non-contiguous array
AttributeError Traceback (most recent call last)
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in show(close)
100 try:
101 for figure_manager in Gcf.get_all_fig_managers():
--> 102 send_figure(figure_manager.canvas.figure)
103 finally:
104 show._to_draw = []
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in send_figure(fig)
209 """
210 fmt = InlineBackend.instance().figure_format
--> 211 data = print_figure(fig, fmt)
212 # print_figure will return None if there's nothing to draw:
213 if data is None:
/usr/local/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in print_figure(fig, fmt)
102 try:
103 bytes_io = BytesIO()
--> 104 fig.canvas.print_figure(bytes_io, format=fmt, bbox_inches='tight')
105 data = bytes_io.getvalue()
106 finally:
/usr/local/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
1981 orientation=orientation,
1982 dryrun=True,
-> 1983 **kwargs)
1984 renderer = self.figure._cachedRenderer
1985 bbox_inches = self.figure.get_tightbbox(renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs)
468 def print_png(self, filename_or_obj, *args, **kwargs):
--> 469 FigureCanvasAgg.draw(self)
470 renderer = self.get_renderer()
471 original_dpi = renderer.dpi
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in draw(self)
420 try:
--> 421 self.figure.draw(self.renderer)
422 finally:
423 RendererAgg.lock.release()
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/figure.pyc in draw(self, renderer)
896 dsu.sort(key=itemgetter(0))
897 for zorder, a, func, args in dsu:
--> 898 func(*args)
900 renderer.close_group('figure')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in draw(self, renderer, inframe)
1996 for zorder, a in dsu:
-> 1997 a.draw(renderer)
1999 renderer.close_group('axes')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in draw(self, renderer)
227 self.update_scalarmappable()
--> 229 transform, transOffset, offsets, paths = self._prepare_points()
231 gc = renderer.new_gc()
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in _prepare_points(self)
204 offsets = np.asanyarray(offsets, np.float_)
--> 205 offsets.shape = (-1, 2) # Make it Nx2
207 if not transform.is_affine:
AttributeError: incompatible shape for a non-contiguous array
This was reported earlier in Draw a graph with edges using Networkx. It likely is a bug/feature in the NumPy library development version as mentioned above (numpy issue 2700).