Where is a mistake in this code, which raises ValueError? - matplotlib

I have a code:
fig, ax = plt.subplots(figsize=(5,8))
ax.barh(top20_deathtoll['Country_Other'], top20_deathtoll['Total_Deaths'], color='red', linewidth=0.45)
ax.set_xlabel('Total Deaths', fontsize=10)
ax.set_ylabel('Country', fontsize=10)
ax.set_title('Top 20 Countries by Total Deaths', fontsize=12)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params(left=False, bottom=False, labelleft=True, labelbottom=True)
ax.xaxis.set_ticks([0, 150000, 300000])
ax.xaxis.set_tick_params(labelbottom=False, labeltop=True, color='grey')
plt.title("The Death Toll Worldwide Is 1.5M+", x=-80000, y=23.5, fontsize=17, fontweight="bold")
plt.tight_layout()
plt.show()
When I run following code it raises a value error:
ValueError: Image size of 22320474x10268 pixels is too large. It must be less than 2^16 in each direction.
I wonder what image is mentioned in traceback and what can I do to fix it.
EDIT
Here is the first part of traceback, I can't post all of it baceause then there's too much code in my question. I hope this will be enough, forgive me if I didn't understand something obvious cause I'm a quite newbie:
ValueError Traceback (most recent call last)
File ~\programs\anaconda3\lib\site-packages\IPython\core\formatters.py:339, in BaseFormatter.__call__(self, obj)
337 pass
338 else:
--> 339 return printer(obj)
340 # Finally look for special method names
341 method = get_real_method(obj, self.print_method)
File ~\programs\anaconda3\lib\site-packages\IPython\core\pylabtools.py:151, in print_figure(fig, fmt, bbox_inches, base64, **kwargs)
148 from matplotlib.backend_bases import FigureCanvasBase
149 FigureCanvasBase(fig)
--> 151 fig.canvas.print_figure(bytes_io, **kw)
152 data = bytes_io.getvalue()
153 if fmt == 'svg':
File ~\programs\anaconda3\lib\site-packages\matplotlib\backend_bases.py:2319, in FigureCanvasBase.print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)
2315 try:
2316 # _get_renderer may change the figure dpi (as vector formats
2317 # force the figure dpi to 72), so we need to set it again here.
2318 with cbook._setattr_cm(self.figure, dpi=dpi):
-> 2319 result = print_method(
2320 filename,
2321 facecolor=facecolor,
2322 edgecolor=edgecolor,
2323 orientation=orientation,
2324 bbox_inches_restore=_bbox_inches_restore,
2325 **kwargs)
2326 finally:
2327 if bbox_inches and restore_bbox:

Related

ValueError: Number of columns must be a positive integer, not 0

when i want to execute below code and plot figer
scatter_matrix(total_frame)
total_frame is a dataframe like this
the error like this:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_11336\1619863705.py in <module>
1 total_frame.dropna(how='any')
----> 2 scatter_matrix(total_frame)
3 plt.show()
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwargs)
137 hist_kwds=hist_kwds,
138 range_padding=range_padding,
--> 139 **kwargs,
140 )
141
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\misc.py in scatter_matrix(frame, alpha, figsize, ax, grid, diagonal, marker, density_kwds, hist_kwds, range_padding, **kwds)
48 n = df.columns.size
49 naxes = n * n
---> 50 fig, axes = create_subplots(naxes=naxes, figsize=figsize, ax=ax, squeeze=False)
51
52 # no gaps between subplots
~\.conda\envs\env2\lib\site-packages\pandas\plotting\_matplotlib\tools.py in create_subplots(naxes, sharex, sharey, squeeze, subplot_kw, ax, layout, layout_type, **fig_kw)
265
266 # Create first subplot separately, so we can share it if requested
--> 267 ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw)
268
269 if sharex:
~\.conda\envs\env2\lib\site-packages\matplotlib\figure.py in add_subplot(self, *args, **kwargs)
770 projection_class, pkw = self._process_projection_requirements(
771 *args, **kwargs)
--> 772 ax = subplot_class_factory(projection_class)(self, *args, **pkw)
773 key = (projection_class, pkw)
774 return self._add_axes_internal(ax, key)
~\.conda\envs\env2\lib\site-packages\matplotlib\axes\_subplots.py in __init__(self, fig, *args, **kwargs)
34 self._axes_class.__init__(self, fig, [0, 0, 1, 1], **kwargs)
35 # This will also update the axes position.
---> 36 self.set_subplotspec(SubplotSpec._from_subplot_args(fig, args))
37
38 #_api.deprecated(
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _from_subplot_args(figure, args)
595 f"{len(args)} were given")
596
--> 597 gs = GridSpec._check_gridspec_exists(figure, rows, cols)
598 if gs is None:
599 gs = GridSpec(rows, cols, figure=figure)
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in _check_gridspec_exists(figure, nrows, ncols)
223 return gs
224 # else gridspec not found:
--> 225 return GridSpec(nrows, ncols, figure=figure)
226
227 def __getitem__(self, key):
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, figure, left, bottom, right, top, wspace, hspace, width_ratios, height_ratios)
385 super().__init__(nrows, ncols,
386 width_ratios=width_ratios,
--> 387 height_ratios=height_ratios)
388
389 _AllowedKeys = ["left", "bottom", "right", "top", "wspace", "hspace"]
~\.conda\envs\env2\lib\site-packages\matplotlib\gridspec.py in __init__(self, nrows, ncols, height_ratios, width_ratios)
51 if not isinstance(ncols, Integral) or ncols <= 0:
52 raise ValueError(
---> 53 f"Number of columns must be a positive integer, not {ncols!r}")
54 self._nrows, self._ncols = nrows, ncols
55 self.set_height_ratios(height_ratios)
ValueError: Number of columns must be a positive integer, not 0
<Figure size 432x288 with 0 Axes>
i search such error and don't find anything,please help me!!!!!
i have solved it,
my data's class is object,the function need num,
so i use pd.convert_dtypes() and it works

hdbscan error when inside rapids container

I am using rapids UMAP in conjunction with HDBSCAN inside a rapidsai docker container : rapidsai/rapidsai-core:0.18-cuda11.0-runtime-ubuntu18.04-py3.7
import cudf
import cupy
from cuml.manifold import UMAP
import hdbscan
from sklearn.datasets import make_blobs
from cuml.experimental.preprocessing import StandardScaler
blobs, labels = make_blobs(n_samples=100000, n_features=10)
df_gpu=cudf.DataFrame(blobs)
scaler= StandardScaler()
cupy_scaled=scaler.fit_transform(df_gpu.values)
projector= UMAP(n_components=3, n_neighbors=2000)
cupy_projected=projector.fit_transform(cupy_scaled)
numpy_projected=cupy.asnumpy(cupy_projected)
clusterer= hdbscan.HDBSCAN(min_cluster_size=1000, prediction_data=True, gen_min_span_tree=True)#, core_dist_n_jobs=1)
clusterer.fit(numpy_projected)
I get an error which is fixed if I use core_dist_n_jobs=1 but makes the code slower:
--------------------------------------------------------------------------- TerminatedWorkerError Traceback (most recent call
last) in
1 clusterer= hdbscan.HDBSCAN(min_cluster_size=1000, prediction_data=True, gen_min_span_tree=True)
----> 2 clusterer.fit(numpy_projected)
/opt/conda/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py
in fit(self, X, y)
917 self._condensed_tree,
918 self._single_linkage_tree,
--> 919 self._min_spanning_tree) = hdbscan(X, **kwargs)
920
921 if self.prediction_data:
/opt/conda/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py
in hdbscan(X, min_cluster_size, min_samples, alpha,
cluster_selection_epsilon, metric, p, leaf_size, algorithm, memory,
approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs,
cluster_selection_method, allow_single_cluster,
match_reference_implementation, **kwargs)
613 approx_min_span_tree,
614 gen_min_span_tree,
--> 615 core_dist_n_jobs, **kwargs)
616 else: # Metric is a valid BallTree metric
617 # TO DO: Need heuristic to decide when to go to boruvka;
/opt/conda/envs/rapids/lib/python3.7/site-packages/joblib/memory.py in
call(self, *args, **kwargs)
350
351 def call(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
/opt/conda/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py
in _hdbscan_boruvka_kdtree(X, min_samples, alpha, metric, p,
leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs,
**kwargs)
276 leaf_size=leaf_size // 3,
277 approx_min_span_tree=approx_min_span_tree,
--> 278 n_jobs=core_dist_n_jobs, **kwargs)
279 min_spanning_tree = alg.spanning_tree()
280 # Sort edges of the min_spanning_tree by weight
hdbscan/_hdbscan_boruvka.pyx in
hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm.init()
hdbscan/_hdbscan_boruvka.pyx in
hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm._compute_bounds()
/opt/conda/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py
in call(self, iterable) 1052 1053 with
self._backend.retrieval_context():
-> 1054 self.retrieve() 1055 # Make sure that we get a last message telling us we are done 1056
elapsed_time = time.time() - self._start_time
/opt/conda/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py
in retrieve(self)
931 try:
932 if getattr(self._backend, 'supports_timeout', False):
--> 933 self._output.extend(job.get(timeout=self.timeout))
934 else:
935 self._output.extend(job.get())
/opt/conda/envs/rapids/lib/python3.7/site-packages/joblib/_parallel_backends.py
in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
/opt/conda/envs/rapids/lib/python3.7/concurrent/futures/_base.py in
result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
/opt/conda/envs/rapids/lib/python3.7/concurrent/futures/_base.py in
__get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
TerminatedWorkerError: A worker process managed by the executor was
unexpectedly terminated. This could be caused by a segmentation fault
while calling the function or by an excessive memory usage causing the
Operating System to kill the worker.
The exit codes of the workers are {EXIT(1)}
Is there a way to solve this issue but still keep HDBSCAN to be fast?
Try setting min_samples to a value
In https://github.com/scikit-learn-contrib/hdbscan/issues/345#issuecomment-628749332 , lmcinnes says that you "may have issues if your min_cluster_size is large and your min_samples is not set. You could try setting min_samples to something smallish and see if that helps." I noticed that you do not have a min_samples set in your code.

why it is showing " TypeError: Invalid shape (20,) for image data "

import numpy as np
import matplotlib.pyplot as plt
m1=np.random.randint(0,20,4*5)
m1.reshape(4,5)
plt.imshow(m1)
**when I have executed above code in python 3 on jupyter notebook, I got Type error anyone please answer to this error in simplest way **
like
TypeError Traceback (most recent call last)
in
----> 1 plt.imshow(m1)
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\pyplot.py in imshow(X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, shape, filternorm, filterrad, imlim, resample, url, data, **kwargs)
2649 filternorm=filternorm, filterrad=filterrad, imlim=imlim,
2650 resample=resample, url=url, **({"data": data} if data is not
-> 2651 None else {}), **kwargs)
2652 sci(__ret)
2653 return __ret
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\__init__.py in inner(ax, data, *args, **kwargs)
1563 def inner(ax, *args, data=None, **kwargs):
1564 if data is None:
-> 1565 return func(ax, *map(sanitize_sequence, args), **kwargs)
1566
1567 bound = new_sig.bind(ax, *args, **kwargs)
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\cbook\deprecation.py in wrapper(*args, **kwargs)
356 f"%(removal)s. If any parameter follows {name!r}, they "
357 f"should be pass as keyword, not positionally.")
--> 358 return func(*args, **kwargs)
359
360 return wrapper
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\cbook\deprecation.py in wrapper(*args, **kwargs)
356 f"%(removal)s. If any parameter follows {name!r}, they "
357 f"should be pass as keyword, not positionally.")
--> 358 return func(*args, **kwargs)
359
360 return wrapper
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\axes\_axes.py in imshow(self, X, cmap, norm, aspect, interpolation, alpha, vmin, vmax, origin, extent, shape, filternorm, filterrad, imlim, resample, url, **kwargs)
5613 resample=resample, **kwargs)
5614
-> 5615 im.set_data(X)
5616 im.set_alpha(alpha)
5617 if im.get_clip_path() is None:
c:\users\jaiprakash\appdata\local\programs\python\python37-32\lib\site-packages\matplotlib\image.py in set_data(self, A)
697 or self._A.ndim == 3 and self._A.shape[-1] in [3, 4]):
698 raise TypeError("Invalid shape {} for image data"
--> 699 .format(self._A.shape))
700
701 if self._A.ndim == 3:
TypeError: Invalid shape (20,) for image data
When you call m1.reshape(4,5) you dont assign it to a variable. The method wont change the shape of m1 unless you reassign it to m1
import numpy as np
import matplotlib.pyplot as plt
#m1=np.random.randint(low=0, high=20, size=(4,5)) # << personally I would have done this & not bothered with the reshape
m1 = np.random.randint(0,20,4*5)
m1 = m1.reshape(4, 5)
plt.imshow(m1)

Pandas dropna throwing ValueError: "Cannot convert non-finite values (NA or inf) to integer"

Pandas: 0.25.3
Python: 3.7.4
I have a data frame, and I want to remove the columns which contain only NaN values. That should be easy, because there is a Pandas DataFrame function which does exactly that—dropna. Here's my code:
long_summary = long_summary.dropna(axis='columns', how='all')
But that simple line throws an exception:
ValueError: Cannot convert non-finite values (NA or inf) to integer
I cannot see how calling dropna would lead to this exception. What is going on and how do I fix it?
I'll include the whole exception stack just-in-case that makes the problem clearer:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-88-b4926abd4d81> in <module>
----> 1 long_summary = long_summary.dropna(axis='columns', how='all')
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in dropna(self, axis, how, thresh, subset, inplace)
4860 agg_obj = self.take(indices, axis=agg_axis)
4861
-> 4862 count = agg_obj.count(axis=agg_axis)
4863
4864 if thresh is not None:
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in count(self, axis, level, numeric_only)
7848 result = Series(counts, index=frame._get_agg_axis(axis))
7849
-> 7850 return result.astype("int64")
7851
7852 def _count_level(self, level, axis=0, numeric_only=False):
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\generic.py in astype(self, dtype, copy, errors, **kwargs)
5880 # else, only a single dtype is given
5881 new_data = self._data.astype(
-> 5882 dtype=dtype, copy=copy, errors=errors, **kwargs
5883 )
5884 return self._constructor(new_data).__finalize__(self)
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\internals\managers.py in astype(self, dtype, **kwargs)
579
580 def astype(self, dtype, **kwargs):
--> 581 return self.apply("astype", dtype=dtype, **kwargs)
582
583 def convert(self, **kwargs):
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\internals\managers.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
436 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
437
--> 438 applied = getattr(b, f)(**kwargs)
439 result_blocks = _extend_blocks(applied, result_blocks)
440
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\internals\blocks.py in astype(self, dtype, copy, errors, values, **kwargs)
557
558 def astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
--> 559 return self._astype(dtype, copy=copy, errors=errors, values=values, **kwargs)
560
561 def _astype(self, dtype, copy=False, errors="raise", values=None, **kwargs):
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\internals\blocks.py in _astype(self, dtype, copy, errors, values, **kwargs)
641 # _astype_nansafe works fine with 1-d only
642 vals1d = values.ravel()
--> 643 values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
644
645 # TODO(extension)
c:\users\timregan\appdata\local\programs\python\python37\lib\site-packages\pandas\core\dtypes\cast.py in astype_nansafe(arr, dtype, copy, skipna)
698 if not np.isfinite(arr).all():
699 raise ValueError(
--> 700 "Cannot convert non-finite values (NA or inf) to " "integer"
701 )
702
ValueError: Cannot convert non-finite values (NA or inf) to integer
(N.B. the data types of my columns are int64, Int32, and float64)
In the comments Scott asked for data to reproduce this issue. The redacted CSV is available on Dropbox here.
df = pd.read_csv('E:\\Temp\\dropna.csv')
df.dropna(axis='columns', how='all')
But be warned, the CSV is 3.3 GB and the resulting data frame has over 60 million rows. It tried cutting out rows, but it seems to need to be this long to trigger the error.

Error when trying to create a facetgrid of pointplots

I have sequencing data of micro-RNAs (miR) under different conditions ('Comparisons'), and I want to create a point-plot which will show me on different graphs the fold-change for each miR. the data looks like this (and is a pandas data_frame)
mir_Names Comparison Fold_Change
9 9 mmu-miR-100-4373160\n15 m... YAD-YC 508539.390000
15 9 mmu-miR-100-4373160\n15 m... YAD-YC 26.816000
17 9 mmu-miR-100-4373160\n15 m... YAD-YC 728.608000
18 9 mmu-miR-100-4373160\n15 m... YAD-YC 11483029.706000
'upregulated' is a subset of the dataframe and i tried to visualize it using:
g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated)
**
but it gives me the error which I couldn't find any solution to it:
**
ValueError Traceback (most recent call last) <ipython-input-180-a1cf1b282869> in <module>()
1 g = sns.FacetGrid(upregulated, col='Comparison', sharex=True, sharey=True, size=0.75, aspect=12./8, despine=True, margin_titles=True)
----> 2 g.map(sns.pointplot, 'mir_Names', 'Fold_Change', data=upregulated) #maybe with .count
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in map(self, func,
*args, **kwargs)
446
447 # Finalize the annotations and layout
--> 448 self._finalize_grid(args[:2])
449
450 return self
c:\pyzo2014a\lib\site-packages\seaborn\axisgrid.py in
_finalize_grid(self, axlabels)
537 self.set_axis_labels(*axlabels)
538 self.set_titles()
--> 539 self.fig.tight_layout()
540
541 def facet_axis(self, row_i, col_j):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in tight_layout(self, renderer, pad, h_pad, w_pad, rect) 1663 rect=rect) 1664
-> 1665 self.subplots_adjust(**kwargs) 1666 1667
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in subplots_adjust(self, *args, **kwargs) 1520 1521 """
-> 1522 self.subplotpars.update(*args, **kwargs) 1523 for ax in self.axes: 1524 if not isinstance(ax, SubplotBase):
c:\pyzo2014a\lib\site-packages\matplotlib\figure.py in update(self, left, bottom, right, top, wspace, hspace)
223 if self.bottom >= self.top:
224 reset()
--> 225 raise ValueError('bottom cannot be >= top')
226
227 def _update_this(self, s, val):
**ValueError: bottom cannot be >= top**
What causes this error?