Can't plot data frame column, I want to plot a scatter graph - dataframe

My data
I want to make a scatter graph of 2 columns but its not working. if you could help please, i have confirmed the column exists in show columns, then it doesnt exsist as show by code at bottom.
df = data
x = Birth rate (object)
y = CO₂ (per capita). (float64)
i tried
data.plot(x='Birth_rate', y='CO₂_(per capita)')
plt.title('1960 - C02 emissions vs. Crude birth rates')
It resulted in Keyerror 'Birth rate'
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/plotting/_core.py:920, in PlotAccessor.__call__(self, *args, **kwargs)
918 if is_integer(x) and not data.columns.holds_integer():
919 x = data_cols[x]
--> 920 elif not isinstance(data[x], ABCSeries):
921 raise ValueError("x must be a label or position")
922 data = data.set_index(x)
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/frame.py:3024, in DataFrame.__getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexes/base.py:3083, in Index.get_loc(self, key, method, tolerance)
3081 return self._engine.get_loc(casted_key)
3082 except KeyError as err:
-> 3083 raise KeyError(key) from err
3085 if tolerance is not None:
3086 tolerance = self._convert_tolerance(tolerance, np.asarray(key))
KeyError: 'Birth_rate'
I tried an index aswell and go this error:
KeyError: "None of ['Birth_rate'] are in the columns"

Related

IndexError: indices are out-of-bounds

hello every one when i run this part of code i got an error i don't no why its happened updating my packages or a mistakes in my for loop thanks to every body who wants to solve my problem.
here is the code
%%capture
final = dict()
final['timeofday'] = []
final['image'] = []
final['name'] = []
final['location'] = []
final['price'] = []
final['rating'] = []
final['category'] = []
for i in range(1,(end_date - begin_date).days+2):
for j in range(2):
final['timeofday'].append('Morning')
for j in range(2):
final['timeofday'].append('Evening')
for i in range(len(final['timeofday'])):
if i%4 == 0:
final = top_recc(with_url, final)
else:
final = find_closest(with_url, final['location'][-1],final['timeofday'][i], final)
and give me below errors:
IndexError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1482, in _iLocIndexer._get_list_axis(self, key, axis)
1481 try:
-> 1482 return self.obj._take_with_is_copy(key, axis=axis)
1483 except IndexError as err:
1484 # re-raise with different error message
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:3716, in NDFrame._take_with_is_copy(self, indices, axis)
3709 """
3710 Internal version of the `take` method that sets the `_is_copy`
3711 attribute to keep track of the parent dataframe (using in indexing
(...)
3714 See the docstring of `take` for full explanation of the parameters.
3715 """
-> 3716 result = self.take(indices=indices, axis=axis)
3717 # Maybe set copy if we didn't actually change the index.
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:3703, in NDFrame.take(self, indices, axis, is_copy, **kwargs)
3701 self._consolidate_inplace()
-> 3703 new_data = self._mgr.take(
3704 indices, axis=self._get_block_manager_axis(axis), verify=True
3705 )
3706 return self._constructor(new_data).__finalize__(self, method="take")
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:897, in BaseBlockManager.take(self, indexer, axis, verify)
896 n = self.shape[axis]
--> 897 indexer = maybe_convert_indices(indexer, n, verify=verify)
899 new_labels = self.axes[axis].take(indexer)
File ~\anaconda3\lib\site-packages\pandas\core\indexers\utils.py:292, in maybe_convert_indices(indices, n, verify)
291 if mask.any():
--> 292 raise IndexError("indices are out-of-bounds")
293 return indices
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError Traceback (most recent call last)
Input In [8], in <cell line: 16>()
16 for i in range(len(final['timeofday'])):
17 if i%4 == 0:
---> 18 final = top_recc(with_url, final)
19 else:
20 final = find_closest(with_url, final['location'][-1],final['timeofday'][i], final)
File ~\Desktop\Intelligent-Travel-Recommendation-System-master\attractions_recc.py:114, in top_recc(with_url, final)
112 i=0
113 while(1):
--> 114 first_recc = with_url.iloc[[i]]
115 if(first_recc['name'].values.T[0] not in final['name']):
116 final['name'].append(first_recc['name'].values.T[0])
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:967, in _LocationIndexer.__getitem__(self, key)
964 axis = self.axis or 0
966 maybe_callable = com.apply_if_callable(key, self.obj)
--> 967 return self._getitem_axis(maybe_callable, axis=axis)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1511, in _iLocIndexer._getitem_axis(self, key, axis)
1509 # a list of integers
1510 elif is_list_like_indexer(key):
-> 1511 return self._get_list_axis(key, axis=axis)
1513 # a single integer
1514 else:
1515 key = item_from_zerodim(key)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1485, in _iLocIndexer._get_list_axis(self, key, axis)
1482 return self.obj._take_with_is_copy(key, axis=axis)
1483 except IndexError as err:
1484 # re-raise with different error message
-> 1485 raise IndexError("positional indexers are out-of-bounds") from err
IndexError: positional indexers are out-of-bounds
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError: positional indexers are out-of-bounds
source: https://github.com/sachinnpraburaj/Intelligent-Travel-Recommendation-System/blob/master/get_att_recc.ipynb

Matplotlib fill_between(...) doens't work on PeriodIndex

I have being trying to make it work in many ways, but it just doesn't. Does anyone know how to fix it?
Here is a simple toy code
index = pd.period_range('1990-1', '2000-1', freq='M')
df = pd.DataFrame({1:[1]*len(index), 2:[10]*len(index)}, index=index)
plt.fill_between(index, x[1], x[2])
It won't work. It you change index for index.to_timestamp() then it works fine.
The error shown is
~\Miniconda3\lib\site-packages\matplotlib\axes\_axes.py in fill_between(self, x, y1, y2, where, interpolate, step, **kwargs)
5226
5227 # Handle united data, such as dates
-> 5228 self._process_unit_info(xdata=x, ydata=y1, kwargs=kwargs)
5229 self._process_unit_info(ydata=y2)
5230
~\Miniconda3\lib\site-packages\matplotlib\axes\_base.py in _process_unit_info(self, xdata, ydata, kwargs)
2123 return kwargs
2124
-> 2125 kwargs = _process_single_axis(xdata, self.xaxis, 'xunits', kwargs)
2126 kwargs = _process_single_axis(ydata, self.yaxis, 'yunits', kwargs)
2127 return kwargs
~\Miniconda3\lib\site-packages\matplotlib\axes\_base.py in _process_single_axis(data, axis, unit_name, kwargs)
2106 # We only need to update if there is nothing set yet.
2107 if not axis.have_units():
-> 2108 axis.update_units(data)
2109
2110 # Check for units in the kwargs, and if present update axis
~\Miniconda3\lib\site-packages\matplotlib\axis.py in update_units(self, data)
1496
1497 if neednew:
-> 1498 self._update_axisinfo()
1499 self.stale = True
1500 return True
~\Miniconda3\lib\site-packages\matplotlib\axis.py in _update_axisinfo(self)
1532 self.isDefault_label = True
1533
-> 1534 self.set_default_intervals()
1535
1536 def have_units(self):
~\Miniconda3\lib\site-packages\matplotlib\axis.py in set_default_intervals(self)
2170 if info.default_limits is not None:
2171 valmin, valmax = info.default_limits
-> 2172 xmin = self.converter.convert(valmin, self.units, self)
2173 xmax = self.converter.convert(valmax, self.units, self)
2174 if not dataMutated:
~\Miniconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in convert(values, units, axis)
207 values = [PeriodConverter._convert_1d(v, units, axis) for v in values]
208 else:
--> 209 values = PeriodConverter._convert_1d(values, units, axis)
210 return values
211
~\Miniconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in _convert_1d(values, units, axis)
213 def _convert_1d(values, units, axis):
214 if not hasattr(axis, "freq"):
--> 215 raise TypeError("Axis must have `freq` set to convert to Periods")
216 valid_types = (str, datetime, Period, pydt.date, pydt.time, np.datetime64)
217 if isinstance(values, valid_types) or is_integer(values) or is_float(values):
TypeError: Axis must have `freq` set to convert to Periods
If anyone could help me with this bug I would be really gratefull! Thanks!
First correction: index is a name of a DataFrame attribute,
so use rather some "similar" name, e.g.:
idx = pd.period_range('1990-1', '2000-1', freq='M')
Then create your DataFrame as:
df = pd.DataFrame({1:[1]*len(idx), 2:[10]*len(idx)}, index=idx)
You can fill the space between both lines, but first draw the lines
themselves:
df.plot()
Then you can fill the space between them:
plt.fill_between(idx, df[1], df[2]);
(note that you wrote x instead of df).
Apparently plt requires either both border lines to be drawn in advance
or at least one of them. But the picture looks better when you have
both border lines drawn with another color than the space between them,
so it is more natural to draw both lines.

What is the expected structure of a csv file to feed in Pandas

A project reads a list of images named input_images.csv like
images_paths = pd.read_csv('input_images.csv', index_col=0, header=None)[0]
I don't have the input_images.csv, but it should read 1.jpg, 2.jpg, ... from current folder. What should be the format of a csv file? Currently input_images.csv contains
1.jpg
2.jpg
...
But I get
--> 75 images_paths = pd.read_csv('input_images.csv', index_col=0, header=None)[0]
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\frame.py in __getitem__(self, key)
2900 if self.columns.nlevels > 1:
2901 return self._getitem_multilevel(key)
-> 2902 indexer = self.columns.get_loc(key)
2903 if is_integer(indexer):
2904 indexer = [indexer]
~\AppData\Roaming\Python\Python38\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2889 return self._engine.get_loc(casted_key)
2890 except KeyError as err:
-> 2891 raise KeyError(key) from err
2892
2893 if tolerance is not None:
Change
images_paths = pd.read_csv('input_images.csv', index_col=0, header=None)[0]
to
images_paths = pd.read_csv('input_images.csv', header=None)[0]

Exception: Data must be 1-dimensional while making data frame from list

I am trying to make dataframe from lists and getting Following exception:
Exception: Data must be 1-dimensional
project_transformed_data = pd.DataFrame(data = {'school_state':school_state,
'grade_one_hot':grade_one_hot,
'teacher_prefix':teacher_prefix,
'categories_one_hot':categories_one_hot,
'sub_categories_one_hot':sub_categories_one_hot,
'price_standardized':price_standardized,
'quantity_standardized':quantity_standardized,
'no_project_standardized':no_project_standardized,
'preprocessed_essays':preprocessed_essays,
'preprocessed_title':preprocessed_title,
'preprocessed_resource_description':preprocessed_resource_description
})
Full exception trace :
Exception Traceback (most recent call last)
<ipython-input-42-534fb60e58d6> in <module>()
9 'preprocessed_essays':preprocessed_essays,
10 'preprocessed_title':preprocessed_title,
---> 11 'preprocessed_resource_description':preprocessed_resource_description
12 })
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
328 dtype=dtype, copy=copy)
329 elif isinstance(data, dict):
--> 330 mgr = self._init_dict(data, index, columns, dtype=dtype)
331 elif isinstance(data, ma.MaskedArray):
332 import numpy.ma.mrecords as mrecords
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
459 arrays = [data[k] for k in keys]
460
--> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
462
463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
6166
6167 # don't force copy because getting jammed in an ndarray anyway
-> 6168 arrays = _homogenize(arrays, index, dtype)
6169
6170 # from BlockManager perspective
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
6475 v = lib.fast_multiget(v, oindex.values, default=np.nan)
6476 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 6477 raise_cast_failure=False)
6478
6479 homogenized.append(v)
/usr/local/lib/python3.6/dist-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
3273 elif subarr.ndim > 1:
3274 if isinstance(data, np.ndarray):
-> 3275 raise Exception('Data must be 1-dimensional')
3276 else:
3277 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
The preprocessed_resource_description is a list. Still I am getting the exception.
Any idea why I am getting this exception.
Sample data :
print(preprocessed_resource_description[0:2])
print(type(preprocessed_resource_description))
print(len(preprocessed_resource_description))
Output :
['kids kore wobble chair 14 blackreading tree classroom rug shape rectangle rug dimensions 7 8 w x 10 9 lseat foam pad blackjack chair purple cotton', 'robot mouse stem activity set']
<class 'list'>
20000

How to fix "Data must be 1-dimensional" exception in python

I am trying to create a dataset for checking my Logistic Regression Algorithm, but I am unable to create a pandas DataFrame from a dictinoary.
I am getting a 'Data must be 1-dimensional' exception.
x1 = np.random.random(size=(10,1))*2
x2 = np.random.random(size=(10,1))*2
x3 = np.random.random(size=(10,1))*2 + 2
x4 = np.random.random(size=(10,1))*2 + 2
y0 = np.zeros(shape=(10,1))
y1 = np.ones(shape=(10,1))
plt.scatter(x1,x2, color='g', marker='o')
plt.scatter(x3,x4, color='r', marker='o')
dict_data = { 'X1':np.concatenate((x1,x3)),
'X2':np.concatenate((x2,x4)),
'Y':np.concatenate((y0,y1))}
data = pd.DataFrame(dict_data, index=np.arange(20))
I am getting this as output, with the error Data must be 1 dimenstional.
--------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-49-fe81f079ebc6> in <module>
13 dict_data = { 'X1':np.concatenate((x1,x3)), 'X2':np.concatenate((x2,x4)),'Y':np.concatenate((y0,y1))}
14 #print(dict_data.shape)
---> 15 data = pd.DataFrame(dict_data, index=np.arange(20).reshape(20))
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
328 dtype=dtype, copy=copy)
329 elif isinstance(data, dict):
--> 330 mgr = self._init_dict(data, index, columns, dtype=dtype)
331 elif isinstance(data, ma.MaskedArray):
332 import numpy.ma.mrecords as mrecords
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
459 arrays = [data[k] for k in keys]
460
--> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
462
463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
6166
6167 # don't force copy because getting jammed in an ndarray anyway
-> 6168 arrays = _homogenize(arrays, index, dtype)
6169
6170 # from BlockManager perspective
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
6475 v = lib.fast_multiget(v, oindex.values, default=np.nan)
6476 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 6477 raise_cast_failure=False)
6478
6479 homogenized.append(v)
~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
3273 elif subarr.ndim > 1:
3274 if isinstance(data, np.ndarray):
-> 3275 raise Exception('Data must be 1-dimensional')
3276 else:
3277 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
np.random.random(size=(10,1)) produces 2-dimensional array of shape (10, 1) however pandas constructs DataFrames as a collection of 1-dimensional arrays.
So use np.random.random(size=(10)) to make 1-D arrays, which then can be used to make DataFrame.