ValueError: Number of columns must be a positive integer, not 0

ValueError: Number of columns must be a positive integer, not 0 - pandas

when i want to execute below code and plot figer
scatter_matrix(total_frame)
total_frame is a dataframe like this
the error like this:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_11336\1619863705.py in <module>
1 total_frame.dropna(how='any')
----> 2 scatter_matrix(total_frame)
3 plt.show()
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwargs)
137 hist_kwds=hist_kwds,
138 range_padding=range_padding,
--> 139 **kwargs,
140 )
141
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwds)
48 n = df.columns.size
49 naxes = n * n
---> 50 fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False)
51
52 # no gaps between subplots
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\tools.py in create_subplots(naxes, sharex, sharey, squeeze, subplot_kw, ax, layout, layout_type, **fig_kw)
265
266 # Create first subplot separately, so we can share it if requested
--> 267 ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw)
268
269 if sharex:
~\.conda\envs\env2\lib\site-packages\matplotlib\figure.py in add_subplot(self, *args, **kwargs)
770 projection_class, pkw = self._process_projection_requirements(
771 *args, **kwargs)
--> 772 ax = subplot_class_factory(projection_class)(self, *args, **pkw)
773 key = (projection_class, pkw)
774 return self._add_axes_internal(ax, key)
~\.conda\envs\env2\lib\site-packages\matplotlib\axes\_subplots.py in __init__(self, fig, *args, **kwargs)
34 self._axes_class.__init__(self, fig, [0, 0, 1, 1], **kwargs)
35 # This will also update the axes position.
---> 36 self.set_subplotspec(SubplotSpec._from_subplot_args(fig, args))
37
38 #_api.deprecated(
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _from_subplot_args(figure, args)
595 f"{len(args)} were given")
596
--> 597 gs = GridSpec._check_gridspec_exists(figure, rows, cols)
598 if gs is None:
599 gs = GridSpec(rows, cols, figure=figure)
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _check_gridspec_exists(figure, nrows, ncols)
223 return gs
224 # else gridspec not found:
--> 225 return GridSpec(nrows, ncols, figure=figure)
226
227 def __getitem__(self, key):
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, figure, left, bottom, right, top, wspace, hspace, width_ratios, height_ratios)
385 super().__init__(nrows, ncols,
386 width_ratios=width_ratios,
--> 387 height_ratios=height_ratios)
388
389 _AllowedKeys = ["left", "bottom", "right", "top", "wspace", "hspace"]
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, height_ratios, width_ratios)
51 if not isinstance(ncols, Integral) or ncols <= 0:
52 raise ValueError(
---> 53 f"Number of columns must be a positive integer, not {ncols!r}")
54 self._nrows, self._ncols = nrows, ncols
55 self.set_height_ratios(height_ratios)
ValueError: Number of columns must be a positive integer, not 0
<Figure size 432x288 with 0 Axes>
i search such error and don't find anything,please help me!!!!!

i have solved it,
my data's class is object,the function need num,
so i use pd.convert_dtypes() and it works

Related

Seaborn pairplot not running only on a specific system

I have the following data with the name 'Salaries.csv'. It looks like the following:[The dataset has some columns like Index(['yearID', 'teamID', 'lgID', 'salary', 'num_feat'], dtype='object'). Please note that the column num_feat I have added to the DataFrame.
I want to do a Seaborn pairplot for team 'ATL' to plot scatter plots among all numeric features in the data frame.
I have the following code :
import seaborn as sns
var_set = [
"yearID",
"teamID",
"lgID",
"playerID",
"salary"
]
head_set = []
head_set.extend(var_set)
head_set.append("num_feat")
df = pd.read_csv('Salaries.csv',index_col='playerID', header=None, names=head_set)
df['num_feat'] = 100 * np.random.random_sample(df.shape[0]). #Adding column num_feat
df_copy = df
cols_with_team_ATL = df_copy.loc[df_copy.teamID=="ATL", ]
# Create the default pairplot
pairplot_fig = sns.pairplot(cols_with_team_ATL, vars=['yearID', 'salary', 'num_feat'])
plt.subplots_adjust(top=0.9)
pairplot_fig.fig.suptitle("Scatter plots among all numeric features in the data frame for teamID = ATL", fontsize=18, alpha=0.9, weight='bold')
plt.show()
The same code runs perfectly on my friend's system but not on mine. It shows the following error in my system :
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/ch/6r9p7n0j3xg1l79lz1zdkvsh0000gq/T/ipykernel_97373/3735184261.py in <module>
25 # Create the default pairplot
26 print(df.columns)
---> 27 pairplot_fig = sns.pairplot(cols_with_team_ATL, vars=['yearID', 'salary', 'num_feat'])
28 plt.subplots_adjust(top=0.9)
29 pairplot_fig.fig.suptitle("Scatter plots among all numeric features in the data frame for teamID = ATL", fontsize=18, alpha=0.9, weight='bold')
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/_decorators.py in inner_f(*args, **kwargs)
44 )
45 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 46 return f(**kwargs)
47 return inner_f
48
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/axisgrid.py in pairplot(data, hue, hue_order, palette, vars, x_vars, y_vars, kind, diag_kind, markers, height, aspect, corner, dropna, plot_kws, diag_kws, grid_kws, size)
2124 diag_kws.setdefault("legend", False)
2125 if diag_kind == "hist":
-> 2126 grid.map_diag(histplot, **diag_kws)
2127 elif diag_kind == "kde":
2128 diag_kws.setdefault("fill", True)
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/axisgrid.py in map_diag(self, func, **kwargs)
1476 plot_kwargs.setdefault("hue_order", self._hue_order)
1477 plot_kwargs.setdefault("palette", self._orig_palette)
-> 1478 func(x=vector, **plot_kwargs)
1479 ax.legend_ = None
1480
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/distributions.py in histplot(data, x, y, hue, weights, stat, bins, binwidth, binrange, discrete, cumulative, common_bins, common_norm, multiple, element, fill, shrink, kde, kde_kws, line_kws, thresh, pthresh, pmax, cbar, cbar_ax, cbar_kws, palette, hue_order, hue_norm, color, log_scale, legend, ax, **kwargs)
1460 if p.univariate:
1461
-> 1462 p.plot_univariate_histogram(
1463 multiple=multiple,
1464 element=element,
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/distributions.py in plot_univariate_histogram(self, multiple, element, fill, common_norm, common_bins, shrink, kde, kde_kws, color, legend, line_kws, estimate_kws, **plot_kws)
426
427 # First pass through the data to compute the histograms
--> 428 for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True):
429
430 # Prepare the relevant data
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/_core.py in iter_data(self, grouping_vars, reverse, from_comp_data)
981
982 if from_comp_data:
--> 983 data = self.comp_data
984 else:
985 data = self.plot_data
~/USC/anaconda3/lib/python3.9/site-packages/seaborn/_core.py in comp_data(self)
1055 orig = self.plot_data[var].dropna()
1056 comp_col = pd.Series(index=orig.index, dtype=float, name=var)
-> 1057 comp_col.loc[orig.index] = pd.to_numeric(axis.convert_units(orig))
1058
1059 if axis.get_scale() == "log":
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in __setitem__(self, key, value)
721
722 iloc = self if self.name == "iloc" else self.obj.iloc
--> 723 iloc._setitem_with_indexer(indexer, value, self.name)
724
725 def _validate_key(self, key, axis: int):
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value, name)
1730 self._setitem_with_indexer_split_path(indexer, value, name)
1731 else:
-> 1732 self._setitem_single_block(indexer, value, name)
1733
1734 def _setitem_with_indexer_split_path(self, indexer, value, name: str):
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/indexing.py in _setitem_single_block(self, indexer, value, name)
1966
1967 # actually do the set
-> 1968 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
1969 self.obj._maybe_update_cacher(clear=True)
1970
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in setitem(self, indexer, value)
353
354 def setitem(self: T, indexer, value) -> T:
--> 355 return self.apply("setitem", indexer=indexer, value=value)
356
357 def putmask(self, mask, new, align: bool = True):
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/internals/managers.py in apply(self, f, align_keys, ignore_failures, **kwargs)
325 applied = b.apply(f, **kwargs)
326 else:
--> 327 applied = getattr(b, f)(**kwargs)
328 except (TypeError, NotImplementedError):
329 if not ignore_failures:
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/internals/blocks.py in setitem(self, indexer, value)
941
942 # length checking
--> 943 check_setitem_lengths(indexer, value, values)
944 exact_match = is_exact_shape_match(values, arr_value)
945
~/USC/anaconda3/lib/python3.9/site-packages/pandas/core/indexers.py in check_setitem_lengths(indexer, value, values)
174 and len(indexer[indexer]) == len(value)
175 ):
--> 176 raise ValueError(
177 "cannot set using a list-like indexer "
178 "with a different length than the value"
ValueError: cannot set using a list-like indexer with a different length than the value
Why is it not running particularly on my system? Is there any problem with the python version or Jupyter Notebook?
Please help.

How to convert coordinate columns to Point column with Shapely and Dask?

I have the following problem. My data is a huge dataframe, looking like this (this is the head of the dataframe)
import pandas
import dask.dataframe as dd
data = dd.read_csv(data_path)
data.persist()
print(data.head())
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner
0 100mN26840E43341 4334150 2684050 -1
1 100mN26840E43342 4334250 2684050 -1
2 100mN26840E43343 4334350 2684050 -1
3 100mN26840E43344 4334450 2684050 -1
4 100mN26840E43345 4334550 2684050 -1
I am using Dask to handle it. I now want to create a new column where the 'x_mp_100m' and 'y_mp_100m' are converted into a Shapely Point. For a single row, it would look like this:
from shapely.geometry import Point
test_df = data.head(1)
test_df = test_df.assign(geom=lambda k: Point(k.x_mp_100m,k.y_mp_100m))
print(test_df)
Gitter_ID_100m x_mp_100m y_mp_100m Einwohner geom
0 100mN26840E43341 4334150 2684050 -1 POINT (4334150 2684050)
I already tried the following code with Dask:
data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
When doing that, I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-17-b8de11d9b9b3> in <module>
----> 1 data_out.compute()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(self, **kwargs)
154 dask.base.compute
155 """
--> 156 (result,) = compute(self, traverse=False, **kwargs)
157 return result
158
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\base.py in compute(*args, **kwargs)
395 keys = [x.__dask_keys__() for x in collections]
396 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 397 results = schedule(dsk, keys, **kwargs)
398 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
399
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in get(self, dsk, keys, restrictions, loose_restrictions, resources, sync, asynchronous, direct, retries, priority, fifo_timeout, actors, **kwargs)
2319 try:
2320 results = self.gather(packed, asynchronous=asynchronous,
-> 2321 direct=direct)
2322 finally:
2323 for f in futures.values():
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in gather(self, futures, errors, maxsize, direct, asynchronous)
1653 return self.sync(self._gather, futures, errors=errors,
1654 direct=direct, local_worker=local_worker,
-> 1655 asynchronous=asynchronous)
1656
1657 #gen.coroutine
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in sync(self, func, *args, **kwargs)
671 return future
672 else:
--> 673 return sync(self.loop, func, *args, **kwargs)
674
675 def __repr__(self):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in sync(loop, func, *args, **kwargs)
275 e.wait(10)
276 if error[0]:
--> 277 six.reraise(*error[0])
278 else:
279 return result[0]
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.__traceback__ is not tb:
692 raise value.with_traceback(tb)
--> 693 raise value
694 finally:
695 value = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\utils.py in f()
260 if timeout is not None:
261 future = gen.with_timeout(timedelta(seconds=timeout), future)
--> 262 result[0] = yield future
263 except Exception as exc:
264 error[0] = sys.exc_info()
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1131
1132 try:
-> 1133 value = future.result()
1134 except Exception:
1135 self.had_exception = True
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\gen.py in run(self)
1139 if exc_info is not None:
1140 try:
-> 1141 yielded = self.gen.throw(*exc_info)
1142 finally:
1143 # Break up a reference to itself
~\AppData\Local\Continuum\anaconda3\lib\site-packages\distributed\client.py in _gather(self, futures, errors, direct, local_worker)
1498 six.reraise(type(exception),
1499 exception,
-> 1500 traceback)
1501 if errors == 'skip':
1502 bad_keys.add(key)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
690 value = tp()
691 if value.__traceback__ is not tb:
--> 692 raise value.with_traceback(tb)
693 raise value
694 finally:
~\AppData\Local\Continuum\anaconda3\lib\site-packages\dask\dataframe\core.py in apply_and_enforce()
3682
3683 Ensures the output has the same columns, even if empty."""
-> 3684 df = func(*args, **kwargs)
3685 if isinstance(df, (pd.DataFrame, pd.Series, pd.Index)):
3686 if len(df) == 0:
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\frame.py in assign()
3549 if PY36:
3550 for k, v in kwargs.items():
-> 3551 data[k] = com.apply_if_callable(v, data)
3552 else:
3553 # <= 3.5: do all calculations first...
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\common.py in apply_if_callable()
327
328 if callable(maybe_callable):
--> 329 return maybe_callable(obj, **kwargs)
330
331 return maybe_callable
<ipython-input-16-d5710cb00158> in <lambda>()
----> 1 data_out = data.map_partitions(lambda df: df.assign(geom= lambda k: Point(k.x_mp_100m,k.y_mp_100m)), meta=pd.DataFrame)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in __init__()
47 BaseGeometry.__init__(self)
48 if len(args) > 0:
---> 49 self._set_coords(*args)
50
51 # Coordinate getters and setters
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in _set_coords()
130 self._geom, self._ndim = geos_point_from_py(args[0])
131 else:
--> 132 self._geom, self._ndim = geos_point_from_py(tuple(args))
133
134 coords = property(BaseGeometry._get_coords, _set_coords)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\shapely\geometry\point.py in geos_point_from_py()
207 coords = ob
208 n = len(coords)
--> 209 dx = c_double(coords[0])
210 dy = c_double(coords[1])
211 dz = None
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\series.py in wrapper()
91 return converter(self.iloc[0])
92 raise TypeError("cannot convert the series to "
---> 93 "{0}".format(str(converter)))
94
95 wrapper.__name__ = "__{name}__".format(name=converter.__name__)
TypeError: cannot convert the series to <class 'float'>
So I think, I am using pandas.assign() function in a wrong way, or there should be a better fitting function, I just cannot seem to wrap my head around it. Do you know a better way to handle this?
I also found this way:
data_out = data.map_partitions(lambda df: df.apply(lambda row: Point(row['x_mp_100m'],row['y_mp_100m']), axis=1))
But is that the most efficient way?

What you're doing seems fine. I would find a function that works well on a single row and then use the apply method or a function that works well on a single Pandas dataframe and then use the map_partitions method.
For the error that you're getting I would first verify that your function works on a pandas dataframe.

How to fix "Exception: Data must be 1-dimensional" error when running Kmeans

I have resolved all errors up till now. I am not quite sure I understand the problem except for I get the error "Exception: Data must be 1-dimensional".
Here is my code. Here is a link to the excel file im using.
import pandas as pd
import numpy as np
import warnings
from sklearn import preprocessing
from sklearn.preprocessing import LabelBinarizer
from sklearn.cluster import KMeans
df1 = pd.read_excel('PERM_Disclosure_Data_FY2018_EOYV2.xlsx', 'PERM_FY2018')
warnings.filterwarnings("ignore")
df1 = df1.dropna(subset=['PW_AMOUNT_9089'])
df1 = df1.dropna(subset=['CASE_STATUS'])
df1 = df1.dropna(subset=['PW_SOC_TITLE'])
df1.CASE_STATUS[df1['CASE_STATUS']=='Certified-Expired'] = 'Certified'
df1 = df1[df1.CASE_STATUS != 'Withdrawn']
df1 = df1.dropna()
df1 = df1[df1.PW_AMOUNT_9089 != '#############']
df1 = df1.dropna(subset=['PW_AMOUNT_9089'])
df1 = df1.dropna(subset=['CASE_STATUS'])
df1 = df1.dropna(subset=['PW_SOC_TITLE'])
df1.PW_AMOUNT_9089 = df1.PW_AMOUNT_9089.astype(float)
df1=df1.iloc[:, [2,4,5]]
enc = LabelBinarizer()
y = enc.fit_transform(df1.CASE_STATUS)[:, [0]]
at this point the output for y is an array:
array([[0],
[0],
[0],
...,
[1],
[1],
[0]])
then I define XZ
le = preprocessing.LabelEncoder()
X = df1.iloc[:, [1]]
Z = df1.iloc[:, [2]]
X2 = X.apply(le.fit_transform)
XZ = pd.concat([X2,Z], axis=1)
the output for XZ is:
PW_SOC_TITLE PW_AMOUNT_9089
12 176 60778.0
13 456 100901.0
14 134 134389.0
15 134 104936.0
16 134 95160.0
17 294 66976.0
18 73 38610.0
19 598 122533.0
20 220 109574.0
21 99 67850.0
22 399 132018.0
23 68 56118.0
24 139 136781.0
25 134 111405.0
26 598 58573.0
27 362 75067.0
28 598 85862.0
29 572 33301.0
30 598 112840.0
31 134 134971.0
32 176 100568.0
33 176 100568.0
34 626 19614.0
35 153 26354.0
36 405 79248.0
37 220 93350.0
38 139 153213.0
39 598 131997.0
40 598 131997.0
41 1 90438.0
... ... ...
119741 495 23005.0
119742 63 46030.0
119743 153 20301.0
119744 95 21965.0
119745 153 29890.0
119746 295 79680.0
119747 349 79498.0
119748 223 38930.0
119749 223 38930.0
119750 570 39160.0
119751 302 119392.0
119752 598 106001.0
119753 416 64230.0
119754 598 115482.0
119755 99 80205.0
119756 134 78329.0
119757 598 109325.0
119758 598 109325.0
119759 570 49770.0
119760 194 18117.0
119761 404 46987.0
119762 189 35131.0
119763 73 49900.0
119764 323 32240.0
119765 372 28122.0
119766 468 67974.0
119767 399 78520.0
119768 329 25875.0
119769 329 25875.0
119770 601 82098.0
I then continue:
from sklearn.model_selection import train_test_split
XZ_train, XZ_test, y_train, y_test = train_test_split(XZ, y,
test_size = .25,
random_state=20,
stratify=y )
# loading library
from pandas_ml import ConfusionMatrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# instantiate learning model loop(k = i)
for weights in ['uniform', 'distance']:
for i in range(1,11,2):
knn = KNeighborsClassifier(n_neighbors=i, weights=weights)
# fitting the model
knn.fit(XZ_train, y_train)
# predict the response
pred = knn.predict(XZ_test)
confusion = ConfusionMatrix(y_test, pred)
if i<11:
# evaluate accuracy
print('Weight Measure:', knn.weights)
print('n_neighbors=', knn.n_neighbors)
print('Accuracy=', accuracy_score(y_test, pred))
#print('')
#print('Confusion Matrix')
#print(confusion)
print('-----------------------------')
The error I get is as follows:
G:\Anaconda\lib\site-packages\ipykernel_launcher.py:11: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
# This is added back by InteractiveShellApp.init_path()
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-20-bf6054d911ba> in <module>
12 # predict the response
13 pred = knn.predict(XZ_test)
---> 14 confusion = ConfusionMatrix(y_test, pred)
15 if i<11:
16 # evaluate accuracy
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\cm.py in __new__(cls, y_true, y_pred, *args, **kwargs)
21 if len(set(uniq_true) - set(uniq_pred)) == 0:
22 from pandas_ml.confusion_matrix.bcm import BinaryConfusionMatrix
---> 23 return BinaryConfusionMatrix(y_true, y_pred, *args, **kwargs)
24 return LabeledConfusionMatrix(y_true, y_pred, *args, **kwargs)
25
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\bcm.py in __init__(self, *args, **kwargs)
19 def __init__(self, *args, **kwargs):
20 # super(BinaryConfusionMatrix, self).__init__(y_true, y_pred)
---> 21 super(BinaryConfusionMatrix, self).__init__(*args, **kwargs)
22 assert self.len() == 2, \
23 "Binary confusion matrix must have len=2 but \
G:\Anaconda\lib\site-packages\pandas_ml\confusion_matrix\abstract.py in __init__(self, y_true, y_pred, labels, display_sum, backend, true_name, pred_name)
31 self._y_true.name = self.true_name
32 else:
---> 33 self._y_true = pd.Series(y_true, name=self.true_name)
34
35 if isinstance(y_pred, pd.Series):
G:\Anaconda\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
273 else:
274 data = _sanitize_array(data, index, dtype, copy,
--> 275 raise_cast_failure=True)
276
277 data = SingleBlockManager(data, index, fastpath=True)
G:\Anaconda\lib\site-packages\pandas\core\series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
4163 elif subarr.ndim > 1:
4164 if isinstance(data, np.ndarray):
-> 4165 raise Exception('Data must be 1-dimensional')
4166 else:
4167 subarr = com._asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
Is the data I am passing through not the correct type? The datatypes match the datatypes I've used in a past project so I thought I could replicate it here. For those wondering X is Company names that I encoded, Y is binarized case status, and Z is a wage amount in the float dtype.

"...the output for y is an array..." The array that you show is two-dimensional, with shape (n, 1). (One of the dimensions is trivial, but it is still 2-d.) Do something like y[:, 0] or y.ravel() to get a 1-d version.

Error when trying to create a facetgrid of pointplots

I have sequencing data of micro-RNAs (miR) under different conditions ('Comparisons'), and I want to create a point-plot which will show me on different graphs the fold-change for each miR. the data looks like this (and is a pandas data_frame)
mir_Names Comparison Fold_Change
9 9 mmu-miR-100-4373160\n15 m... YAD-YC 508539.390000
15 9 mmu-miR-100-4373160\n15 m... YAD-YC 26.816000
17 9 mmu-miR-100-4373160\n15 m... YAD-YC 728.608000
18 9 mmu-miR-100-4373160\n15 m... YAD-YC 11483029.706000
'upregulated' is a subset of the dataframe and i tried to visualize it using:
g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated)
**
but it gives me the error which I couldn't find any solution to it:
**
ValueError Traceback (most recent call last) <ipython-input-180-a1cf1b282869> in <module>()
1 g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
----> 2 g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated) #maybe with .count
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in map(self, func,
*args, **kwargs)
446
447 # Finalize the annotations and layout
--> 448 self._finalize_grid(args[:2])
449
450 return self
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in
_finalize_grid(self, axlabels)
537 self.set_axis_labels(*axlabels)
538 self.set_titles()
--> 539 self.fig.tight_layout()
540
541 def facet_axis(self, row_i, col_j):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in tight_layout(self, renderer, pad, h_pad, w_pad, rect) 1663 rect=rect) 1664
-> 1665 self.subplots_adjust(**kwargs) 1666 1667
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in subplots_adjust(self, *args, **kwargs) 1520 1521 """
-> 1522 self.subplotpars.update(*args, **kwargs) 1523 for ax in self.axes: 1524 if not isinstance(ax, SubplotBase):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in update(self, left, bottom, right, top, wspace, hspace)
223 if self.bottom >= self.top:
224 reset()
--> 225 raise ValueError('bottom cannot be >= top')
226
227 def _update_this(self, s, val):
**ValueError: bottom cannot be >= top**
What causes this error?

Network Plot Error Using Python / iPython

The following starter code errors out (AttributeError: incompatible shape for a non-contiguous array) when I try to use networkx on OSX 10.8.2, in an IPython notebook.
import networkx as nx
g = nx.Graph()
g.add_node(1)
g.add_node(2)
g.add_edge(1,2) # no error if this line is omitted
nx.draw(g)
I do not see the error if I do not add the edge to this graph. Matplotlib, Numpy, and Scipy were all installed as suggested here.
Below is the error message:
AttributeError Traceback (most recent call last)
<ipython-input-33-df7dfeff6452> in <module>()
----> 1 nx.draw(g)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw(G, pos, ax, hold, **kwds)
131 pylab.hold(h)
132 try:
--> 133 draw_networkx(G,pos=pos,ax=ax,**kwds)
134 ax.set_axis_off()
135 pylab.draw_if_interactive()
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx(G, pos, with_labels, **kwds)
265
266 node_collection=draw_networkx_nodes(G, pos, **kwds)
--> 267 edge_collection=draw_networkx_edges(G, pos, **kwds)
268 if with_labels:
269 draw_networkx_labels(G, pos, **kwds)
/usr/local/lib/python2.7/site-packages/networkx/drawing/nx_pylab.pyc in draw_networkx_edges(G, pos, edgelist, width, edge_color, style, alpha, edge_cmap, edge_vmin, edge_vmax, ax, arrows, label, **kwds)
544 edge_collection.set_zorder(1) # edges go behind nodes
545 edge_collection.set_label(label)
--> 546 ax.add_collection(edge_collection)
547
548 # Note: there was a bug in mpl regarding the handling of alpha values for
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in add_collection(self, collection, autolim)
1443 if autolim:
1444 if collection._paths and len(collection._paths):
-> 1445 self.update_datalim(collection.get_datalim(self.transData))
1446
1447 collection._remove_method = lambda h: self.collections.remove(h)
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in get_datalim(self, transData)
165 offsets = offsets.filled(np.nan)
166 # get_path_collection_extents handles nan but not masked arrays
--> 167 offsets.shape = (-1, 2) # Make it Nx2
168
169 result = mpath.get_path_collection_extents(
AttributeError: incompatible shape for a non-contiguous array
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in show(close)
100 try:
101 for figure_manager in Gcf.get_all_fig_managers():
--> 102 send_figure(figure_manager.canvas.figure)
103 finally:
104 show._to_draw = []
/usr/local/lib/python2.7/site-packages/IPython/zmq/pylab/backend_inline.pyc in send_figure(fig)
209 """
210 fmt = InlineBackend.instance().figure_format
--> 211 data = print_figure(fig, fmt)
212 # print_figure will return None if there's nothing to draw:
213 if data is None:
/usr/local/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in print_figure(fig, fmt)
102 try:
103 bytes_io = BytesIO()
--> 104 fig.canvas.print_figure(bytes_io, format=fmt, bbox_inches='tight')
105 data = bytes_io.getvalue()
106 finally:
/usr/local/lib/python2.7/site-packages/matplotlib/backend_bases.pyc in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, **kwargs)
1981 orientation=orientation,
1982 dryrun=True,
-> 1983 **kwargs)
1984 renderer = self.figure._cachedRenderer
1985 bbox_inches = self.figure.get_tightbbox(renderer)
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in print_png(self, filename_or_obj, *args, **kwargs)
467
468 def print_png(self, filename_or_obj, *args, **kwargs):
--> 469 FigureCanvasAgg.draw(self)
470 renderer = self.get_renderer()
471 original_dpi = renderer.dpi
/usr/local/lib/python2.7/site-packages/matplotlib/backends/backend_agg.pyc in draw(self)
419
420 try:
--> 421 self.figure.draw(self.renderer)
422 finally:
423 RendererAgg.lock.release()
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/figure.pyc in draw(self, renderer)
896 dsu.sort(key=itemgetter(0))
897 for zorder, a, func, args in dsu:
--> 898 func(*args)
899
900 renderer.close_group('figure')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/axes.pyc in draw(self, renderer, inframe)
1995
1996 for zorder, a in dsu:
-> 1997 a.draw(renderer)
1998
1999 renderer.close_group('axes')
/usr/local/lib/python2.7/site-packages/matplotlib/artist.pyc in draw_wrapper(artist, renderer, *args, **kwargs)
53 def draw_wrapper(artist, renderer, *args, **kwargs):
54 before(artist, renderer)
---> 55 draw(artist, renderer, *args, **kwargs)
56 after(artist, renderer)
57
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in draw(self, renderer)
227 self.update_scalarmappable()
228
--> 229 transform, transOffset, offsets, paths = self._prepare_points()
230
231 gc = renderer.new_gc()
/usr/local/lib/python2.7/site-packages/matplotlib/collections.pyc in _prepare_points(self)
203
204 offsets = np.asanyarray(offsets, np.float_)
--> 205 offsets.shape = (-1, 2) # Make it Nx2
206
207 if not transform.is_affine:
AttributeError: incompatible shape for a non-contiguous array

This was reported earlier in Draw a graph with edges using Networkx. It likely is a bug/feature in the NumPy library development version as mentioned above (numpy issue 2700).

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

ValueError: Number of columns must be a positive integer, not 0 - pandas

i have solved it, my data's class is object,the function need num, so i use pd.convert_dtypes() and it works

Related

Seaborn pairplot not running only on a specific system

How to convert coordinate columns to Point column with Shapely and Dask?

How to fix "Exception: Data must be 1-dimensional" error when running Kmeans

Error when trying to create a facetgrid of pointplots

Network Plot Error Using Python / iPython

Categories

Resources