How to divide a column by 1000 in loaded csv file? - pandas

df["male_stature_mm"] = df["male_stature_mm"]/1000
print(df)
KeyError Traceback (most recent call last)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3621, in Index.get_loc(self, key, method, tolerance)
3620 try:
-> 3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:136, in pandas._libs.index.IndexEngine.get_loc()
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/_libs/index.pyx:163, in pandas._libs.index.IndexEngine.get_loc()
File pandas/_libs/hashtable_class_helper.pxi:5198, in pandas._libs.hashtable.PyObjectHashTable.get_item()
File pandas/_libs/hashtable_class_helper.pxi:5206, in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'male_stature_mm'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
Input In [7], in <cell line: 1>()
----> 1 df["male_stature_mm"] = df["male_stature_mm"]/1000
2 print(df)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py:3505, in DataFrame.getitem(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:3623, in Index.get_loc(self, key, method, tolerance)
3621 return self._engine.get_loc(casted_key)
3622 except KeyError as err:
-> 3623 raise KeyError(key) from err
3624 except TypeError:
3625 # If we have a listlike key, _check_indexing_error will raise
3626 # InvalidIndexError. Otherwise we fall through and re-raise
3627 # the TypeError.
3628 self._check_indexing_error(key)
KeyError: 'male_stature_mm'
​
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
df = pd.read_csv(r'/Users/upanshusrivastava/Desktop/data/male_train_data.csv')
print(df)
df = pd.read_csv(r'/Users/upanshusrivastava/Desktop/data/female_train_data.csv')
print(df)

Related

KeyError Traceback (most recent call last) , KeyError: 'Mark'

Combineddf.plot.scatter(x='Mark', y='Mark2')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3628 try:
-> 3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Mark'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_4448\4038682785.py in <module>
----> 1 Combineddf.plot.scatter(x='Mark', y='Mark2')
~\anaconda3\lib\site-packages\pandas\plotting\_core.py in scatter(self, x, y, s, c, **kwargs)
1667 ... colormap='viridis')
1668 """
-> 1669 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1670
1671 def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
~\anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
915 if kind in self._dataframe_kinds:
916 if isinstance(data, ABCDataFrame):
--> 917 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
918 else:
919 raise ValueError(f"plot kind {kind} can only be used for data frames")
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
327 self._compute_plot_data()
328 self._setup_subplots()
--> 329 self._make_plot()
330 self._add_table()
331 self._make_legend()
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _make_plot(self)
1113 label = None
1114 scatter = ax.scatter(
-> 1115 data[x].values,
1116 data[y].values,
1117 c=c_values,
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
-> 3631 raise KeyError(key) from err
3632 except TypeError:
3633 # If we have a listlike key, _check_indexing_error will raise
KeyError: 'Mark'
It worked before but now the same code gives error.
Expecting output will be a simple scatter plot of the two mark sets

Index DCO invalid

index DCO refered as a date here is invalid
[](https://i.stack.imgur.com/0Mrs6.png
this is my dataset:
when I want to write Volume_DEV as a series the code ouput:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
4 frames
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Volume_DEV'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Volume_DEV'
I am trying to call DCO(the date here) and the volume of each day to perfom a HP filter decomposition such that:
series = df['Volume_DEV']
cycle, trend = sm.tsa.filters.hpfilter(series, 50)
fig, ax = plt.subplots(3,1)
ax[0].plot(series)
ax[0].set_title('Price')
ax[1].plot(trend)
ax[1].set_title('Trend')
ax[2].plot(cycle)
ax[2].set_title('Cycle')
plt.show()

Dummy variable is not created

I am facing a problem while creating a dummy variable There is a problem with the 'town' column.
it's giving a key error but my syntax is correct.
please help me I didn't understand what is the problem even I am correct from my side.
import pandas as pd
import numpy as np
df= pd.read_csv('homeprices.csv')
df
dummies=pd.get_dummies(df['town'])
dummies
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2894 try:
-> 2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'town'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-30-b0961e3e5942> in <module>
1 # df = pd.concat([df, pd.get_dummies(df['town'])], axis=1)
----> 2 dummies=pd.get_dummies(df['town'])
3 dummies
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2904 if self.columns.nlevels > 1:
2905 return self._getitem_multilevel(key)
-> 2906 indexer = self.columns.get_loc(key)
2907 if is_integer(indexer):
2908 indexer = [indexer]
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
-> 2897 raise KeyError(key) from err
2898
2899 if tolerance is not None:
KeyError: 'town'
df.columns
Index(['town ', 'area', 'price'], dtype='object')
Your column town has a space on it.
Change your column names as follows
df.columns = ['town', 'area', 'price']
After this, you can use
dummies=pd.get_dummies(df['town'])
Or just change df['town'] to df['town ']

finding max element from column of dataframe gives error

I am trying to find largest element from a column in my DataFrame but this gives the following error.
And i have tested that it only gives error to this column name only and rest of the columns just work fine.
This is my DataFrame created from a file posts1.csv
import pandas as pd
posts_n = pd.read_csv('posts1.csv',encoding='latin-1')
posts=posts_n.fillna(0)
When i try to find max element from a particular column ie "score" ,
max_post = posts['score'].max()
max_post
i get following error
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-12-09c353ba0de2> in <module>()
34 #MAximum posts done by a user
35
---> 36 max_post = posts['score'].max()
37 max_post
38 #scr=posts.iloc[:,4]
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
~\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
This is how data looks
posts1.csv
'score' is not in the (column) index, so instead of loading in the first line of the csv as the header line, you read it in as data.
try the following:
posts = pd.read_csv('posts1.csv', header=1)

How do select 2nd column or a matrix from a pandas dataframe?

How do you select column other than the first column?
import pandas as pd
df = pd.read_csv('bio.csv')
df
I could select the first column, i.e., "Index"
df['Index']
However, I could not select the second column, i.e., "Height".
df['Height']
Here is the trace:
KeyError Traceback (most recent call last)
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-8-58aff8413556> in <module>()
----> 1 df['Height']
C:\util\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\util\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\util\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\util\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
Below is the complete answer
import pandas as pd
df = pd.read_csv('bio.csv', sep='[ \t]*,[ \t]*', engine='python')
df['Height']
Theis is the output:
Out[22]: 0 65.78
1 71.52
2 69.40
3 68.22
4 67.79
5 68.70
6 69.80
7 70.01
8 67.90
9 66.78
10 66.49
11 67.62
12 68.30
13 67.12
14 68.28
15 71.09