Python loop with loc - dataframe

I came across the following errors when I ran the codes for performing a loop operation. Would somebody kindly able to point out the mistake I made so I may fix it?
The aim of the codes is trying to see if the next row of "Dividend" equals to zero or not and if not equal to zero, then the next row of "Adjusting Factor" will perform the action on the right hand side of the equation. I really don't know how I should fix it. Please give some help, thank you so much.
for i in range(data.shape[0]):
if i == (data.shape[0]-1):
continue
data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i])
data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
1056 try:
-> 1057 return Index.get_loc(self, key, method, tolerance)
1058 except (KeyError, ValueError, TypeError):
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-49-0fba1ee2e5e8> in <module>
12 if i == (data.shape[0]-1): # skip the last run to avoid error occur
13 continue
---> 14 data.loc[data['Dividend'].iloc[i+1] != 0, data['Adjusting Factor'].iloc[i+1]] = (data['EQIX US EQUITY'].iloc[i] - data['Dividend'].iloc[i]) / data['EQIX US EQUITY'].iloc[i]
15 data['Adjusted Premium'].iloc[i] = data['Put'].iloc[i] * data['Adjusting Factor']
16 data.loc[data['Adjust Factor'].iloc[i] !=data['Adjust Factor'].iloc[i-1], 'Adjusted Contract Multiplier'] = (data['Adjusted Contract Multiplier'].iloc[i-1]) / data['Adjusting Factor'].iloc[i]
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in __setitem__(self, key, value)
202 else:
203 key = com.apply_if_callable(key, self.obj)
--> 204 indexer = self._get_setitem_indexer(key)
205 self._setitem_with_indexer(indexer, value)
206
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _get_setitem_indexer(self, key)
180 if isinstance(key, tuple):
181 try:
--> 182 return self._convert_tuple(key, is_setter=True)
183 except IndexingError:
184 pass
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_tuple(self, key, is_setter)
260 if i >= self.obj.ndim:
261 raise IndexingError("Too many indexers")
--> 262 idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
263 keyidx.append(idx)
264 return tuple(keyidx)
~\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter, raise_missing)
1286 else:
1287 try:
-> 1288 return labels.get_loc(obj)
1289 except LookupError:
1290 # allow a not found key only if we are a setter
~\Anaconda3\lib\site-packages\pandas\core\indexes\datetimes.py in get_loc(self, key, method, tolerance)
1063
1064 try:
-> 1065 stamp = Timestamp(key)
1066 if stamp.tzinfo is not None and self.tz is not None:
1067 stamp = stamp.tz_convert(self.tz)
pandas/_libs/tslibs/timestamps.pyx in pandas._libs.tslibs.timestamps.Timestamp.__new__()
pandas/_libs/tslibs/conversion.pyx in pandas._libs.tslibs.conversion.convert_to_tsobject()
TypeError: Cannot convert input [True] of type <class 'numpy.bool_'> to Timestamp

Ok, Im not sure that the "adjustment factor" function calculates correct because I don't know the formula.
If you write the formula I'll fix that.
but the method is:
you shift the diffident one row ahead and calculate.
I used to apply because it's easy
data['Dividend_befor']=data['Dividend'].shift(1).fillna(0)
def Adjusted_Premium_dividend(row):
if(row['Dividend_befor']!=0):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
Adjusted_Premium = row['Put'] * Adjusted_factor
return Adjusted_Premium
else:
return 0
def adjustment_factor_dividend(row):
if(row['Dividend_befor']!=0):
Adjusted_factor=row['EQIX US EQUITY'] - row['Dividend']
return Adjusted_factor
else:
return 0
data['Adjusted_factor'] = data.apply(adjustment_factor_dividend,axis=1)
data['Adjusted_Premium'] = data.apply(adjustment_factor_dividend,axis=1)
data

Related

KeyError Traceback (most recent call last) , KeyError: 'Mark'

Combineddf.plot.scatter(x='Mark', y='Mark2')
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3628 try:
-> 3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
~\anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Mark'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_4448\4038682785.py in <module>
----> 1 Combineddf.plot.scatter(x='Mark', y='Mark2')
~\anaconda3\lib\site-packages\pandas\plotting\_core.py in scatter(self, x, y, s, c, **kwargs)
1667 ... colormap='viridis')
1668 """
-> 1669 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1670
1671 def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
~\anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
915 if kind in self._dataframe_kinds:
916 if isinstance(data, ABCDataFrame):
--> 917 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
918 else:
919 raise ValueError(f"plot kind {kind} can only be used for data frames")
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
69 kwargs["ax"] = getattr(ax, "left_ax", ax)
70 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 71 plot_obj.generate()
72 plot_obj.draw()
73 return plot_obj.result
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
327 self._compute_plot_data()
328 self._setup_subplots()
--> 329 self._make_plot()
330 self._add_table()
331 self._make_legend()
~\anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _make_plot(self)
1113 label = None
1114 scatter = ax.scatter(
-> 1115 data[x].values,
1116 data[y].values,
1117 c=c_values,
~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
3503 if self.columns.nlevels > 1:
3504 return self._getitem_multilevel(key)
-> 3505 indexer = self.columns.get_loc(key)
3506 if is_integer(indexer):
3507 indexer = [indexer]
~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3629 return self._engine.get_loc(casted_key)
3630 except KeyError as err:
-> 3631 raise KeyError(key) from err
3632 except TypeError:
3633 # If we have a listlike key, _check_indexing_error will raise
KeyError: 'Mark'
It worked before but now the same code gives error.
Expecting output will be a simple scatter plot of the two mark sets

Index DCO invalid

index DCO refered as a date here is invalid
[](https://i.stack.imgur.com/0Mrs6.png
this is my dataset:
when I want to write Volume_DEV as a series the code ouput:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
4 frames
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'Volume_DEV'
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 'Volume_DEV'
I am trying to call DCO(the date here) and the volume of each day to perfom a HP filter decomposition such that:
series = df['Volume_DEV']
cycle, trend = sm.tsa.filters.hpfilter(series, 50)
fig, ax = plt.subplots(3,1)
ax[0].plot(series)
ax[0].set_title('Price')
ax[1].plot(trend)
ax[1].set_title('Trend')
ax[2].plot(cycle)
ax[2].set_title('Cycle')
plt.show()

Python Type Error: Series.name must be a hashable

I have a grouped dataset (by groupby) called level_temp_grouped and I want to apply a function to two boolean columns namely, up and fill_cand, for each group.
level_temp['neighbor'] = level_temp_grouped[['up','fill_cand']].apply(lambda n: neighbor_fill(np.array(n)[:,0], np.array(n)[:,1]))
This code gives me the following error:
TypeError: Series.name must be a hashable type
What is the problem?
level_temp is a pandas dataframe. Dp is the serial number of the observations I want to groupby as the data includes different observations of the same product in different times.
level_temp_grouped=level_temp.groupby('dp')
Full stack trace is the following:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
1274 try:
-> 1275 result = self._python_apply_general(f, self._selected_obj)
1276 except TypeError:
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
1310
-> 1311 return self._wrap_applied_output(
1312 data, keys, values, not_indexed_same=mutated or self.mutated
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\generic.py in _wrap_applied_output(self, data, keys, values, not_indexed_same)
1166 # after raising AttributeError above
-> 1167 return self.obj._constructor_sliced(
1168 values, index=key_index, name=self._selection
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
352
--> 353 name = ibase.maybe_extract_name(name, data, type(self))
354
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\indexes\base.py in maybe_extract_name(name, obj, cls)
6391 if not is_hashable(name):
-> 6392 raise TypeError(f"{cls.__name__}.name must be a hashable type")
6393
TypeError: Series.name must be a hashable type
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_10940/1897168105.py in <module>
1 #level_temp_grouped[['up','fill_cand']].apply(lambda n: neighbor_fill(np.array(n)[:,0], np.array(n)[:,1]))
----> 2 level_temp_grouped[['up','fill_cand']].apply(lambda n: neighbor_fill(np.array(n)[:,0], np.array(n)[:,1]))
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
1284
1285 with group_selection_context(self):
-> 1286 return self._python_apply_general(f, self._selected_obj)
1287
1288 return result
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f, data)
1309 keys, values, mutated = self.grouper.apply(f, data, self.axis)
1310
-> 1311 return self._wrap_applied_output(
1312 data, keys, values, not_indexed_same=mutated or self.mutated
1313 )
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\groupby\generic.py in _wrap_applied_output(self, data, keys, values, not_indexed_same)
1165 # TODO: sure this is right? we used to do this
1166 # after raising AttributeError above
-> 1167 return self.obj._constructor_sliced(
1168 values, index=key_index, name=self._selection
1169 )
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\series.py in __init__(self, data, index, dtype, name, copy, fastpath)
351 else:
352
--> 353 name = ibase.maybe_extract_name(name, data, type(self))
354
355 if is_empty_data(data) and dtype is None:
~\Anaconda3\envs\linde1\lib\site-packages\pandas\core\indexes\base.py in maybe_extract_name(name, obj, cls)
6390 # GH#29069
6391 if not is_hashable(name):
-> 6392 raise TypeError(f"{cls.__name__}.name must be a hashable type")
6393
6394 return name
TypeError: Series.name must be a hashable type

Python function to calculate balance for every row corresponding to individual transactions

I am working on a bank statement, corresponding to the output dataframe and an ending balance corresponding to the output['balance'][0] I would like to calculate all balance values for the individual transactions as described below. It's a very straightforward calculation and yet it doesn't seem to be working - is there something quite obvious I am missing? Thanks in advance!
output['balance'] = ''
output['balance'][0] = 21.15
if len(output[amount]) > 0:
return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
else:
output[balance].append((output[balance][0]))
output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))```
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-271-b85947935fca> in <module>
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
~\Anaconda3\lib\site-packages\pandas\core\series.py in apply(self, func, convert_dtype, args, **kwds)
4040 else:
4041 values = self.astype(object).values
-> 4042 mapped = lib.map_infer(values, f, convert=convert_dtype)
4043
4044 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
<ipython-input-271-b85947935fca> in <lambda>(amount)
----> 1 output[['balance']] = output['Amount'].apply(lambda amount: bal_calc(output, amount))
<ipython-input-270-cbf5ac20716d> in bal_calc(output, amount)
2 output['balance'] = ''
3 output['balance'][0] = 21.15
----> 4 if len(output[amount]) > 0:
5 return output[balance][i+1].append((output[balance][i]-output[amount][i+1]))
6 else:
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2978 if self.columns.nlevels > 1:
2979 return self._getitem_multilevel(key)
-> 2980 indexer = self.columns.get_loc(key)
2981 if is_integer(indexer):
2982 indexer = [indexer]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 4.95
It will be easier to understand your problem you can post your existing dataframe and intended dataframe. From your description I think you can approach calculating balance like this
import pandas as pd
## creating dummy dataframe for testing
arr = np.random.choice(range(500,1000),(10,2))
debit_credit = np.random.choice([0,1], (10))
arr[:,0] = arr[:,0] * debit_credit
arr[:,1] = arr[:,1] * (1-debit_credit)
df=pd.DataFrame(arr, columns=["Debit", "Credit"])
display(df)
## calculating Balance
df["Balance"] = (df.Debit-df.Credit).cumsum()
display(df)
Output
Debit Credit Balance
0 957 0 957
1 0 698 259
2 608 0 867
3 0 969 -102
4 0 766 -868
5 0 551 -1419
6 985 0 -434
7 861 0 427
8 927 0 1354
9 0 923 431
bs['balance'][0] = 21.15
for i in range(1, len(bs)):
bs.loc[i, 'balance'] = bs.loc[i-1, 'balance'] + bs.loc[i, 'Credit'] -bs.loc[i, 'Debit']

finding max element from column of dataframe gives error

I am trying to find largest element from a column in my DataFrame but this gives the following error.
And i have tested that it only gives error to this column name only and rest of the columns just work fine.
This is my DataFrame created from a file posts1.csv
import pandas as pd
posts_n = pd.read_csv('posts1.csv',encoding='latin-1')
posts=posts_n.fillna(0)
When i try to find max element from a particular column ie "score" ,
max_post = posts['score'].max()
max_post
i get following error
KeyError Traceback (most recent call last)
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-12-09c353ba0de2> in <module>()
34 #MAximum posts done by a user
35
---> 36 max_post = posts['score'].max()
37 max_post
38 #scr=posts.iloc[:,4]
~\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
~\Anaconda3\lib\site-packages\pandas\core\internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'score'
This is how data looks
posts1.csv
'score' is not in the (column) index, so instead of loading in the first line of the csv as the header line, you read it in as data.
try the following:
posts = pd.read_csv('posts1.csv', header=1)