Python function to calculate balance for every row corresponding to individual transactions - numpy

I am working on a bank statement, corresponding to the output dataframe and an ending balance corresponding to the output['balance'][0] I would like to calculate all balance values for the individual transactions as described below. It's a very straightforward calculation and yet it doesn't seem to be working - is there something quite obvious I am missing? Thanks in advance!
output['balance'] = ''
output['balance'][0] = 21.15
if len(output[amount]) > 0:
return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
else:
output[balance].append((output[balance][0]))
output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))```
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-271-b85947935fca> in <module>
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
~\Anaconda3\lib\site-packages\pandas\core\series.py in apply(self, func, convert_dtype, args, **kwds)
4040 else:
4041 values = self.astype(object).values
-> 4042 mapped = lib.map_infer(values, f, convert=convert_dtype)
4043
4044 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
<ipython-input-271-b85947935fca> in <lambda>(amount)
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
<ipython-input-270-cbf5ac20716d> in bal_calc(output, amount)
2 output['balance'] = ''
3 output['balance'][0] = 21.15
----> 4 if len(output[amount]) > 0:
5 return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
6 else:
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2978 if self.columns.nlevels > 1:
2979 return self._getitem_multilevel(key)
-> 2980 indexer = self.columns.get_loc(key)
2981 if is_integer(indexer):
2982 indexer = [indexer]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95

It will be easier to understand your problem you can post your existing dataframe and intended dataframe. From your description I think you can approach calculating balance like this
import pandas as pd
## creating dummy dataframe for testing
arr = np.random.choice(range(500,1000),(10,2))
debit_credit = np.random.choice([0,1], (10))
arr[:,0] = arr[:,0] * debit_credit
arr[:,1] = arr[:,1] * (1-debit_credit)
df=pd.DataFrame(arr, columns=["Debit", "Credit"])
display(df)
## calculating Balance
df["Balance"] = (df.Debit-df.Credit).cumsum()
display(df)
Output
Debit Credit Balance
0 957 0 957
1 0 698 259
2 608 0 867
3 0 969 -102
4 0 766 -868
5 0 551 -1419
6 985 0 -434
7 861 0 427
8 927 0 1354
9 0 923 431

bs['balance'][0] = 21.15
for i in range(1, len(bs)):
bs.loc[i, 'balance'] = bs.loc[i-1, 'balance'] + bs.loc[i, 'Credit'] -bs.loc[i, 'Debit']

Related

Python loop with loc

I came across the following errors when I ran the codes for performing a loop operation. Would somebody kindly able to point out the mistake I made so I may fix it?
The aim of the codes is trying to see if the next row of "Dividend" equals to zero or not and if not equal to zero, then the next row of "Adjusting Factor" will perform the action on the right hand side of the equation. I really don't know how I should fix it. Please give some help, thank you so much.
for i in range(data.shape[0]):
if i == (data.shape[0]-1):
continue
data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i])
data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
1056 try:
-> 1057 return Index.get_loc(self, key, method, tolerance)
1058 except (KeyError, ValueError, TypeError):
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-49-0fba1ee2e5e8> in <module>
12 if i == (data.shape[0]-1): # skip the last run to avoid error occur
13 continue
---> 14 data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i]) / data['EQIX US EQUITY'].iloc[i]
15 data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
16 data.loc[data['Adjust Factor'].iloc[i] !=data['Adjust Factor'].iloc[i-1], 'Adjusted Contract Multiplier'] = (data['Adjusted Contract Multiplier'].iloc[i-1]) / data['Adjusting Factor'].iloc[i]
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
202 else:
203 key = com.apply_if_callable(key, self.obj)
--> 204 indexer = self._get_setitem_indexer(key)
205 self._setitem_with_indexer(indexer, value)
206
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_setitem_indexer(self, key)
180 if isinstance(key, tuple):
181 try:
--> 182 return self._convert_tuple(key, is_setter=True)
183 except IndexingError:
184 pass
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_tuple(self, key, is_setter)
260 if i >= self.obj.ndim:
261 raise IndexingError("Too many indexers")
--> 262 idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
263 keyidx.append(idx)
264 return tuple(keyidx)
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
1286 else:
1287 try:
-> 1288 return labels.get_loc(obj)
1289 except LookupError:
1290 # allow a not found key only if we are a setter
~\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
1063
1064 try:
-> 1065 stamp = Timestamp(key)
1066 if stamp.tzinfo is not None and self.tz is not None:
1067 stamp = stamp.tz_convert(self.tz)
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
TypeError: Cannot convert input [True] of type <class 'numpy.bool_'> to Timestamp
Ok, Im not sure that the "adjustment factor" function calculates correct because I don't know the formula.
If you write the formula I'll fix that.
but the method is:
you shift the diffident one row ahead and calculate.
I used to apply because it's easy
data['Dividend_befor']=data['Dividend'].shift(1).fillna(0)
def Adjusted_Premium_dividend(row):
if(row['Dividend_befor']!=0):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
Adjusted_Premium = row['Put'] * Adjusted_factor
return Adjusted_Premium
else:
return 0
def adjustment_factor_dividend(row):
if(row['Dividend_befor']!=0):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
return Adjusted_factor
else:
return 0
data['Adjusted_factor'] = data.apply(adjustment_factor_dividend,axis=1)
data['Adjusted_Premium'] = data.apply(adjustment_factor_dividend,axis=1)
data

KeyError: 6 when adding labels to a regplot

I have a dataframe with the following columns: 'Project', 'Hours', 'Revenue', 'Rate'
I try to build a regplot with Rate on X-axis and Revenue on the Y-axis and add Project name label to the dots at my regplot. Here is my code:
ax=sns.regplot(x = 'Revenue', y = 'Rate',data= df_hours_revenue, ci = None)
sns.set(rc={'figure.figsize':(20,10)})
# ax.set_xlabel('')
# ax.set_ylabel('Rate (£/hour)')
sns.set_context('poster')
plt.title("Rate vs. project revenue 2018-2019")
for line in range(0,df_hours_revenue.shape[0]):
ax.text(df_hours_revenue.Revenue[line], df_hours_revenue.Rate[line],
df_hours_revenue.Project[line], horizontalalignment='left',
size='small', color='Black', weight='normal')
The result is a graph with some labels added and this output error which I do not understand:
KeyError Traceback (most recent call last)
<ipython-input-55-fdcfd6157523> in <module>
9
10 for line in range(0,df_hours_revenue.shape[0]):
---> 11 ax.text(df_hours_revenue.Revenue[line], df_hours_revenue.Rate[line],
12 df_hours_revenue.Project[line], horizontalalignment='left',
13 size='small', color='Black', weight='normal')
~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in __getitem__(self, key)
866 key = com.apply_if_callable(key, self)
867 try:
--> 868 result = self.index.get_value(self, key)
869
870 if not is_scalar(result):
~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_value(self, series, key)
4373 try:
4374 return self._engine.get_value(s, k,
-> 4375 tz=getattr(series.dtype, 'tz', None))
4376 except KeyError as e1:
4377 if len(self) > 0 and (self.holds_integer() or self.is_boolean()):
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 6
If you want to access Series' item by its position, you should be using .iloc, e.g.:
df_hours_revenue.Revenue.iloc[line]
Read more here.

pandas groupby got KeyError

I am using pandas to calculate some stats of a data file and got some error. It's reproducible by this simple sample code:
import pandas as pd
df = pd.DataFrame({'A': [1,2,3,4,5,6,7,8,9],
'B': [1,2,3,1,2,3,1,2,3],
'C': ['a', 'b', 'a', 'b', 'a', 'b', 'a','a', 'b']})
def testFun2(x):
return pd.DataFrame({'xlen': x.shape[0]})
def testFun(x):
b = x['B']
print "b equals to {}".format(b) # This line prints okay
c = x['C']
out = pd.DataFrame()
for a in x['A'].unique():
subx = x[x.A == a]
subxg = testFun2(subx)
out = pd.concat([out, subxg])
return out
df.groupby(['B', 'C']).apply(lambda x: testFun(x))
The whole error output look like this:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-21-979d23aa904c> in <module>()
18 return out
19
---> 20 df.groupby(['B', 'C']).apply(lambda x: testFun(x))
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in apply(self, func, *args, **kwargs)
928
929 with _group_selection_context(self):
--> 930 return self._python_apply_general(f)
931
932 return result
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in _python_apply_general(self, f)
934 def _python_apply_general(self, f):
935 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 936 self.axis)
937
938 return self._wrap_applied_output(
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\groupby\groupby.pyc in apply(self, f, data, axis)
2271 # group might be modified
2272 group_axes = _get_axes(group)
-> 2273 res = f(group)
2274 if not _is_indexed_like(res, group_axes):
2275 mutated = True
<ipython-input-21-979d23aa904c> in <lambda>(x)
18 return out
19
---> 20 df.groupby(['B', 'C']).apply(lambda x: testFun(x))
<ipython-input-21-979d23aa904c> in testFun(x)
9
10 def testFun(x):
---> 11 b = x['B']
12 c = x['C']
13 out = pd.DataFrame()
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
2686 return self._getitem_multilevel(key)
2687 else:
-> 2688 return self._getitem_column(key)
2689
2690 def _getitem_column(self, key):
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
2693 # get column
2694 if self.columns.is_unique:
-> 2695 return self._get_item_cache(key)
2696
2697 # duplicate columns & possible reduce dimensionality
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
2487 res = cache.get(item)
2488 if res is None:
-> 2489 values = self._data.get(item)
2490 res = self._box_item_values(item, values)
2491 cache[item] = res
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
4113
4114 if not isna(item):
-> 4115 loc = self.items.get_loc(item)
4116 else:
4117 indexer = np.arange(len(self.items))[isna(self.items)]
C:\Users\Administrator\Anaconda2\lib\site-packages\pandas\core\indexes\base.pyc in get_loc(self, key, method, tolerance)
3078 return self._engine.get_loc(key)
3079 except KeyError:
-> 3080 return self._engine.get_loc(self._maybe_cast_indexer(key))
3081
3082 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'B'
However, I found that if the testFun2 is changed to something simpler, like:
def testFun2(x):
return 1
then the error won't occur. This is very confusing to me - the testFun2 has nothing to do with the line b = x['B'], right? Why did I get the error in the first place? Thanks!

finding max element from column of dataframe gives error

I am trying to find largest element from a column in my DataFrame but this gives the following error.
And i have tested that it only gives error to this column name only and rest of the columns just work fine.
This is my DataFrame created from a file posts1.csv
import pandas as pd
posts_n = pd.read_csv('posts1.csv',encoding='latin-1')
posts=posts_n.fillna(0)
When i try to find max element from a particular column ie "score" ,
max_post = posts['score'].max()
max_post
i get following error
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-12-09c353ba0de2> in <module>()
34 #MAximum posts done by a user
35
---> 36 max_post = posts['score'].max()
37 max_post
38 #scr=posts.iloc[:,4]
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
~\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
This is how data looks
posts1.csv
'score' is not in the (column) index, so instead of loading in the first line of the csv as the header line, you read it in as data.
try the following:
posts = pd.read_csv('posts1.csv', header=1)

How do select 2nd column or a matrix from a pandas dataframe?

How do you select column other than the first column?
import pandas as pd
df = pd.read_csv('bio.csv')
df
I could select the first column, i.e., "Index"
df['Index']
However, I could not select the second column, i.e., "Height".
df['Height']
Here is the trace:
KeyError Traceback (most recent call last)
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-8-58aff8413556> in <module>()
----> 1 df['Height']
C:\util\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
C:\util\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
C:\util\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
C:\util\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
C:\util\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Height'
Below is the complete answer
import pandas as pd
df = pd.read_csv('bio.csv', sep='[ \t]*,[ \t]*', engine='python')
df['Height']
Theis is the output:
Out[22]: 0 65.78
1 71.52
2 69.40
3 68.22
4 67.79
5 68.70
6 69.80
7 70.01
8 67.90
9 66.78
10 66.49
11 67.62
12 68.30
13 67.12
14 68.28
15 71.09