finding max element from column of dataframe gives error - pandas

I am trying to find largest element from a column in my DataFrame but this gives the following error.
And i have tested that it only gives error to this column name only and rest of the columns just work fine.
This is my DataFrame created from a file posts1.csv
import pandas as pd
posts_n = pd.read_csv('posts1.csv',encoding='latin-1')
When i try to find max element from a particular column ie "score" ,
max_post = posts['score'].max()
i get following error
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-12-09c353ba0de2> in <module>()
34 #MAximum posts done by a user
---> 36 max_post = posts['score'].max()
37 max_post
38 #scr=posts.iloc[:,4]
~\Anaconda3\lib\site-packages\pandas\core\ in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1966 def _getitem_column(self, key):
~\Anaconda3\lib\site-packages\pandas\core\ in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1973 # duplicate columns & possible reduce dimensionality
~\Anaconda3\lib\site-packages\pandas\core\ in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
~\Anaconda3\lib\site-packages\pandas\core\ in get(self, item, fastpath)
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
This is how data looks

'score' is not in the (column) index, so instead of loading in the first line of the csv as the header line, you read it in as data.
try the following:
posts = pd.read_csv('posts1.csv', header=1)


KeyError Traceback (most recent call last) , KeyError: 'Mark'

Combineddf.plot.scatter(x='Mark', y='Mark2')
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
3628 try:
-> 3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Mark'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_4448\ in <module>
----> 1 Combineddf.plot.scatter(x='Mark', y='Mark2')
~\anaconda3\lib\site-packages\pandas\plotting\ in scatter(self, x, y, s, c, **kwargs)
1667 ... colormap='viridis')
1668 """
-> 1669 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1671 def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
~\anaconda3\lib\site-packages\pandas\plotting\ in __call__(self, *args, **kwargs)
915 if kind in self._dataframe_kinds:
916 if isinstance(data, ABCDataFrame):
--> 917 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
918 else:
919 raise ValueError(f"plot kind {kind} can only be used for data frames")
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\ in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\ in generate(self)
327 self._compute_plot_data()
328 self._setup_subplots()
--> 329 self._make_plot()
330 self._add_table()
331 self._make_legend()
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\ in _make_plot(self)
1113 label = None
1114 scatter = ax.scatter(
-> 1115 data[x].values,
1116 data[y].values,
1117 c=c_values,
~\anaconda3\lib\site-packages\pandas\core\ in __getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
-> 3631 raise KeyError(key) from err
3632 except TypeError:
3633 # If we have a listlike key, _check_indexing_error will raise
KeyError: 'Mark'
It worked before but now the same code gives error.
Expecting output will be a simple scatter plot of the two mark sets

Dummy variable is not created

I am facing a problem while creating a dummy variable There is a problem with the 'town' column.
it's giving a key error but my syntax is correct.
please help me I didn't understand what is the problem even I am correct from my side.
import pandas as pd
import numpy as np
df= pd.read_csv('homeprices.csv')
KeyError Traceback (most recent call last)
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2894 try:
-> 2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'town'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
<ipython-input-30-b0961e3e5942> in <module>
1 # df = pd.concat([df, pd.get_dummies(df['town'])], axis=1)
----> 2 dummies=pd.get_dummies(df['town'])
3 dummies
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\ in __getitem__(self, key)
2904 if self.columns.nlevels > 1:
2905 return self._getitem_multilevel(key)
-> 2906 indexer = self.columns.get_loc(key)
2907 if is_integer(indexer):
2908 indexer = [indexer]
c:\users\saurabh singh\appdata\local\programs\python\python37\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2895 return self._engine.get_loc(casted_key)
2896 except KeyError as err:
-> 2897 raise KeyError(key) from err
2899 if tolerance is not None:
KeyError: 'town'
Index(['town ', 'area', 'price'], dtype='object')
Your column town has a space on it.
Change your column names as follows
df.columns = ['town', 'area', 'price']
After this, you can use
Or just change df['town'] to df['town ']

Python loop with loc

I came across the following errors when I ran the codes for performing a loop operation. Would somebody kindly able to point out the mistake I made so I may fix it?
The aim of the codes is trying to see if the next row of "Dividend" equals to zero or not and if not equal to zero, then the next row of "Adjusting Factor" will perform the action on the right hand side of the equation. I really don't know how I should fix it. Please give some help, thank you so much.
for i in range(data.shape[0]):
if i == (data.shape[0]-1):
data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i])
data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
1056 try:
-> 1057 return Index.get_loc(self, key, method, tolerance)
1058 except (KeyError, ValueError, TypeError):
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-49-0fba1ee2e5e8> in <module>
12 if i == (data.shape[0]-1): # skip the last run to avoid error occur
13 continue
---> 14 data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i]) / data['EQIX US EQUITY'].iloc[i]
15 data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
16 data.loc[data['Adjust Factor'].iloc[i] !=data['Adjust Factor'].iloc[i-1], 'Adjusted Contract Multiplier'] = (data['Adjusted Contract Multiplier'].iloc[i-1]) / data['Adjusting Factor'].iloc[i]
~\Anaconda3\lib\site-packages\pandas\core\ in __setitem__(self, key, value)
202 else:
203 key = com.apply_if_callable(key, self.obj)
--> 204 indexer = self._get_setitem_indexer(key)
205 self._setitem_with_indexer(indexer, value)
~\Anaconda3\lib\site-packages\pandas\core\ in _get_setitem_indexer(self, key)
180 if isinstance(key, tuple):
181 try:
--> 182 return self._convert_tuple(key, is_setter=True)
183 except IndexingError:
184 pass
~\Anaconda3\lib\site-packages\pandas\core\ in _convert_tuple(self, key, is_setter)
260 if i >= self.obj.ndim:
261 raise IndexingError("Too many indexers")
--> 262 idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
263 keyidx.append(idx)
264 return tuple(keyidx)
~\Anaconda3\lib\site-packages\pandas\core\ in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
1286 else:
1287 try:
-> 1288 return labels.get_loc(obj)
1289 except LookupError:
1290 # allow a not found key only if we are a setter
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
1064 try:
-> 1065 stamp = Timestamp(key)
1066 if stamp.tzinfo is not None and is not None:
1067 stamp = stamp.tz_convert(
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
TypeError: Cannot convert input [True] of type <class 'numpy.bool_'> to Timestamp
Ok, Im not sure that the "adjustment factor" function calculates correct because I don't know the formula.
If you write the formula I'll fix that.
but the method is:
you shift the diffident one row ahead and calculate.
I used to apply because it's easy
def Adjusted_Premium_dividend(row):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
Adjusted_Premium = row['Put'] * Adjusted_factor
return Adjusted_Premium
return 0
def adjustment_factor_dividend(row):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
return Adjusted_factor
return 0
data['Adjusted_factor'] = data.apply(adjustment_factor_dividend,axis=1)
data['Adjusted_Premium'] = data.apply(adjustment_factor_dividend,axis=1)

Python function to calculate balance for every row corresponding to individual transactions

I am working on a bank statement, corresponding to the output dataframe and an ending balance corresponding to the output['balance'][0] I would like to calculate all balance values for the individual transactions as described below. It's a very straightforward calculation and yet it doesn't seem to be working - is there something quite obvious I am missing? Thanks in advance!
output['balance'] = ''
output['balance'][0] = 21.15
if len(output[amount]) > 0:
return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))```
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-271-b85947935fca> in <module>
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
~\Anaconda3\lib\site-packages\pandas\core\ in apply(self, func, convert_dtype, args, **kwds)
4040 else:
4041 values = self.astype(object).values
-> 4042 mapped = lib.map_infer(values, f, convert=convert_dtype)
4044 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
<ipython-input-271-b85947935fca> in <lambda>(amount)
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
<ipython-input-270-cbf5ac20716d> in bal_calc(output, amount)
2 output['balance'] = ''
3 output['balance'][0] = 21.15
----> 4 if len(output[amount]) > 0:
5 return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
6 else:
~\Anaconda3\lib\site-packages\pandas\core\ in __getitem__(self, key)
2978 if self.columns.nlevels > 1:
2979 return self._getitem_multilevel(key)
-> 2980 indexer = self.columns.get_loc(key)
2981 if is_integer(indexer):
2982 indexer = [indexer]
~\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95
It will be easier to understand your problem you can post your existing dataframe and intended dataframe. From your description I think you can approach calculating balance like this
import pandas as pd
## creating dummy dataframe for testing
arr = np.random.choice(range(500,1000),(10,2))
debit_credit = np.random.choice([0,1], (10))
arr[:,0] = arr[:,0] * debit_credit
arr[:,1] = arr[:,1] * (1-debit_credit)
df=pd.DataFrame(arr, columns=["Debit", "Credit"])
## calculating Balance
df["Balance"] = (df.Debit-df.Credit).cumsum()
Debit Credit Balance
0 957 0 957
1 0 698 259
2 608 0 867
3 0 969 -102
4 0 766 -868
5 0 551 -1419
6 985 0 -434
7 861 0 427
8 927 0 1354
9 0 923 431
bs['balance'][0] = 21.15
for i in range(1, len(bs)):
bs.loc[i, 'balance'] = bs.loc[i-1, 'balance'] + bs.loc[i, 'Credit'] -bs.loc[i, 'Debit']

How do select 2nd column or a matrix from a pandas dataframe?

How do you select column other than the first column?
import pandas as pd
df = pd.read_csv('bio.csv')
I could select the first column, i.e., "Index"
However, I could not select the second column, i.e., "Height".
Here is the trace:
KeyError Traceback (most recent call last)
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-8-58aff8413556> in <module>()
----> 1 df['Height']
C:\util\Anaconda3\lib\site-packages\pandas\core\ in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1966 def _getitem_column(self, key):
C:\util\Anaconda3\lib\site-packages\pandas\core\ in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1973 # duplicate columns & possible reduce dimensionality
C:\util\Anaconda3\lib\site-packages\pandas\core\ in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\util\Anaconda3\lib\site-packages\pandas\core\ in get(self, item, fastpath)
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\ in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
Below is the complete answer
import pandas as pd
df = pd.read_csv('bio.csv', sep='[ \t]*,[ \t]*', engine='python')
Theis is the output:
Out[22]: 0 65.78
1 71.52
2 69.40
3 68.22
4 67.79
5 68.70
6 69.80
7 70.01
8 67.90
9 66.78
10 66.49
11 67.62
12 68.30
13 67.12
14 68.28
15 71.09