pandas DataFrame booler - pandas

df = pd.DataFrame(np.random.randn(5,6), columns=list('abcdef'))
df[df.loc[0] > 1]
IndexingError Traceback (most recent call
last) in
1 df = pd.DataFrame(np.random.randn(5,6), columns=list('abcdef'))
2
----> 3 df[df.loc[0] > 1]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in
getitem(self, key) 2891 # Do we have a (boolean) 1d indexer? 2892 if com.is_bool_indexer(key):
-> 2893 return self._getitem_bool_array(key) 2894 2895 # We are left with two options: a single key, and a
collection of keys,
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in
_getitem_bool_array(self, key) 2943 # check_bool_indexer will throw exception if Series key cannot 2944 # be
reindexed to match DataFrame rows
-> 2945 key = check_bool_indexer(self.index, key) 2946 indexer = key.nonzero()[0] 2947 return
self._take_with_is_copy(indexer, axis=0)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py in
check_bool_indexer(index, key) 2182 mask =
isna(result._values) 2183 if mask.any():
-> 2184 raise IndexingError( 2185 "Unalignable boolean Series provided as " 2186
"indexer (index of the boolean Series and of "
IndexingError: Unalignable boolean Series provided as indexer (index
of the boolean Series and of the indexed object do not match).
and I try
df(pd.DataFrame(df.loc[0] > 1).T)
TypeError Traceback (most recent call
last) in
1 df = pd.DataFrame(np.random.randn(5,6), columns=list('abcdef'))
2
----> 3 df(pd.DataFrame(df.loc[0] > 1).T)
TypeError: 'DataFrame' object is not callable
what should I do ? Thanks.

Related

IndexError: indices are out-of-bounds

hello every one when i run this part of code i got an error i don't no why its happened updating my packages or a mistakes in my for loop thanks to every body who wants to solve my problem.
here is the code
%%capture
final = dict()
final['timeofday'] = []
final['image'] = []
final['name'] = []
final['location'] = []
final['price'] = []
final['rating'] = []
final['category'] = []
for i in range(1,(end_date - begin_date).days+2):
for j in range(2):
final['timeofday'].append('Morning')
for j in range(2):
final['timeofday'].append('Evening')
for i in range(len(final['timeofday'])):
if i%4 == 0:
final = top_recc(with_url, final)
else:
final = find_closest(with_url, final['location'][-1],final['timeofday'][i], final)
and give me below errors:
IndexError Traceback (most recent call last)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1482, in _iLocIndexer._get_list_axis(self, key, axis)
1481 try:
-> 1482 return self.obj._take_with_is_copy(key, axis=axis)
1483 except IndexError as err:
1484 # re-raise with different error message
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:3716, in NDFrame._take_with_is_copy(self, indices, axis)
3709 """
3710 Internal version of the `take` method that sets the `_is_copy`
3711 attribute to keep track of the parent dataframe (using in indexing
(...)
3714 See the docstring of `take` for full explanation of the parameters.
3715 """
-> 3716 result = self.take(indices=indices, axis=axis)
3717 # Maybe set copy if we didn't actually change the index.
File ~\anaconda3\lib\site-packages\pandas\core\generic.py:3703, in NDFrame.take(self, indices, axis, is_copy, **kwargs)
3701 self._consolidate_inplace()
-> 3703 new_data = self._mgr.take(
3704 indices, axis=self._get_block_manager_axis(axis), verify=True
3705 )
3706 return self._constructor(new_data).__finalize__(self, method="take")
File ~\anaconda3\lib\site-packages\pandas\core\internals\managers.py:897, in BaseBlockManager.take(self, indexer, axis, verify)
896 n = self.shape[axis]
--> 897 indexer = maybe_convert_indices(indexer, n, verify=verify)
899 new_labels = self.axes[axis].take(indexer)
File ~\anaconda3\lib\site-packages\pandas\core\indexers\utils.py:292, in maybe_convert_indices(indices, n, verify)
291 if mask.any():
--> 292 raise IndexError("indices are out-of-bounds")
293 return indices
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError Traceback (most recent call last)
Input In [8], in <cell line: 16>()
16 for i in range(len(final['timeofday'])):
17 if i%4 == 0:
---> 18 final = top_recc(with_url, final)
19 else:
20 final = find_closest(with_url, final['location'][-1],final['timeofday'][i], final)
File ~\Desktop\Intelligent-Travel-Recommendation-System-master\attractions_recc.py:114, in top_recc(with_url, final)
112 i=0
113 while(1):
--> 114 first_recc = with_url.iloc[[i]]
115 if(first_recc['name'].values.T[0] not in final['name']):
116 final['name'].append(first_recc['name'].values.T[0])
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:967, in _LocationIndexer.__getitem__(self, key)
964 axis = self.axis or 0
966 maybe_callable = com.apply_if_callable(key, self.obj)
--> 967 return self._getitem_axis(maybe_callable, axis=axis)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1511, in _iLocIndexer._getitem_axis(self, key, axis)
1509 # a list of integers
1510 elif is_list_like_indexer(key):
-> 1511 return self._get_list_axis(key, axis=axis)
1513 # a single integer
1514 else:
1515 key = item_from_zerodim(key)
File ~\anaconda3\lib\site-packages\pandas\core\indexing.py:1485, in _iLocIndexer._get_list_axis(self, key, axis)
1482 return self.obj._take_with_is_copy(key, axis=axis)
1483 except IndexError as err:
1484 # re-raise with different error message
-> 1485 raise IndexError("positional indexers are out-of-bounds") from err
IndexError: positional indexers are out-of-bounds
IndexError: indices are out-of-bounds
The above exception was the direct cause of the following exception:
IndexError: positional indexers are out-of-bounds
source: https://github.com/sachinnpraburaj/Intelligent-Travel-Recommendation-System/blob/master/get_att_recc.ipynb

Problem while trying to delete row with certain value

I have a problem while trying to delete row:
Error:
ValueError Traceback (most recent call last)
<ipython-input-186-83339e440bcb> in <module>()
1 df.head()
2 df['bathrooms'] = df['bathrooms'].astype('int64')
----> 3 df['bathrooms'] = df[df['bathrooms'] != 28]
1 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in _set_item_frame_value(self, key, value)
3727 len_cols = 1 if is_scalar(cols) else len(cols)
3728 if len_cols != len(value.columns):
-> 3729 raise ValueError("Columns must be same length as key")
3730
3731 # align right-hand-side columns if self.columns
ValueError: Columns must be same length as key
Code:
df['bathrooms'] = df['bathrooms'].astype('int64')
df['bathrooms'] = df[df['bathrooms'] != 28]
dataframe:
Any help is appreciated very
df['bathrooms'] != 28 gives you bool values.
df[df['bathrooms'] != 28] gives you a dataframe.
then you are assigning a dataframe to a column. df['bathrooms'] = df[df['bathrooms'] != 28]
If you want a new dataframe you can do:
df = df[df['bathrooms'] != 28]

How to get rid of "AttributeError: 'float' object has no attribute 'log2' "

Say I have a data frame with columns of min value =36884326.0, and max value =6619162563.0, which I need to plot as box plot, so I tried to log transform the values, as follows,
diff["values"] = diff['value'].apply(lambda x: (x+1))
diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
However, the above lines are throwing the error as follows,
AttributeError Traceback (most recent call last)
<ipython-input-28-fe4e1d2286b0> in <module>
1 diff['value'].max()
2 diff["values"] = diff['value'].apply(lambda x: (x+1))
----> 3 diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
~/software/anaconda/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
3192 else:
3193 values = self.astype(object).values
-> 3194 mapped = lib.map_infer(values, f, convert=convert_dtype)
3195
3196 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer()
<ipython-input-28-fe4e1d2286b0> in <lambda>(x)
1 diff['value'].max()
2 diff["values"] = diff['value'].apply(lambda x: (x+1))
----> 3 diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
AttributeError: 'float' object has no attribute 'log2'
Any suggestions would be great. Thanks
You need numpy.log2 function to aplly, please, check sintaxis here.

Exception: Data must be 1-dimensional while making data frame from list

I am trying to make dataframe from lists and getting Following exception:
Exception: Data must be 1-dimensional
project_transformed_data = pd.DataFrame(data = {'school_state':school_state,
'grade_one_hot':grade_one_hot,
'teacher_prefix':teacher_prefix,
'categories_one_hot':categories_one_hot,
'sub_categories_one_hot':sub_categories_one_hot,
'price_standardized':price_standardized,
'quantity_standardized':quantity_standardized,
'no_project_standardized':no_project_standardized,
'preprocessed_essays':preprocessed_essays,
'preprocessed_title':preprocessed_title,
'preprocessed_resource_description':preprocessed_resource_description
})
Full exception trace :
Exception Traceback (most recent call last)
<ipython-input-42-534fb60e58d6> in <module>()
9 'preprocessed_essays':preprocessed_essays,
10 'preprocessed_title':preprocessed_title,
---> 11 'preprocessed_resource_description':preprocessed_resource_description
12 })
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
328 dtype=dtype, copy=copy)
329 elif isinstance(data, dict):
--> 330 mgr = self._init_dict(data, index, columns, dtype=dtype)
331 elif isinstance(data, ma.MaskedArray):
332 import numpy.ma.mrecords as mrecords
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
459 arrays = [data[k] for k in keys]
460
--> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
462
463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
6166
6167 # don't force copy because getting jammed in an ndarray anyway
-> 6168 arrays = _homogenize(arrays, index, dtype)
6169
6170 # from BlockManager perspective
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
6475 v = lib.fast_multiget(v, oindex.values, default=np.nan)
6476 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 6477 raise_cast_failure=False)
6478
6479 homogenized.append(v)
/usr/local/lib/python3.6/dist-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
3273 elif subarr.ndim > 1:
3274 if isinstance(data, np.ndarray):
-> 3275 raise Exception('Data must be 1-dimensional')
3276 else:
3277 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
The preprocessed_resource_description is a list. Still I am getting the exception.
Any idea why I am getting this exception.
Sample data :
print(preprocessed_resource_description[0:2])
print(type(preprocessed_resource_description))
print(len(preprocessed_resource_description))
Output :
['kids kore wobble chair 14 blackreading tree classroom rug shape rectangle rug dimensions 7 8 w x 10 9 lseat foam pad blackjack chair purple cotton', 'robot mouse stem activity set']
<class 'list'>
20000

trimming column named is generating ValueError

I have a table which I run through a function to trim its columns down to length 128 (I know it's really long, there isn't anything I can do about that) characters so it can use to_sql to create a database from it.
def truncate_column_names(df, length):
rename = {}
for col in df.columns:
if len(col) > length:
new_col = col[:length-3]+"..."
rename[col] = new_col
result = df.rename(columns=rename)
return result
This function works fine and I get a table out just fine but the problem comes when I tried to save the file I get the error
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
The method I have doing some housekeeping before saving to a file included dropping duplicates and that is where this error is being spit out. I tested this by saving the original dataFrame and then just loading it, running the truncate function, and then trying drop_duplicates on the result and I get the same error.
The headers for the file before I try truncating looks like this:
http://pastebin.com/WXmvwHDg
I trimmed the file down to 1 record and still have the problem.
This was a result of the truncating causing some columns to have non-unique names.
To confirm this was an issue I did a short test:
In [113]: df = pd.DataFrame(columns=["ab", "ac", "ad"])
In [114]: df
Out[114]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [115]: df.drop_duplicates()
Out[115]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [116]: df.columns
Out[116]: Index([u'ab', u'ac', u'ad'], dtype='object')
In [117]: df.columns = df.columns.str[:1]
In [118]: df
Out[118]:
Empty DataFrame
Columns: [a, a, a]
Index: []
In [119]: df.drop_duplicates()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-119-daf275b6788b> in <module>()
----> 1 df.drop_duplicates()
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in drop_duplicates(self, su
bset, take_last, inplace)
2826 deduplicated : DataFrame
2827 """
-> 2828 duplicated = self.duplicated(subset, take_last=take_last)
2829
2830 if inplace:
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in duplicated(self, subset,
take_last)
2871
2872 vals = (self[col].values for col in subset)
-> 2873 labels, shape = map(list, zip( * map(f, vals)))
2874
2875 ids = get_group_index(labels, shape, sort=False, xnull=False)
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in f(vals)
2860
2861 def f(vals):
-> 2862 labels, shape = factorize(vals, size_hint=min(len(self), _SI
ZE_HINT_LIMIT))
2863 return labels.astype('i8',copy=False), len(shape)
2864
C:\Miniconda\lib\site-packages\pandas\core\algorithms.pyc in factorize(values, s
ort, order, na_sentinel, size_hint)
133 table = hash_klass(size_hint or len(vals))
134 uniques = vec_klass()
--> 135 labels = table.get_labels(vals, uniques, 0, na_sentinel)
136
137 labels = com._ensure_platform_int(labels)
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_labels (pandas\ha
shtable.c:13946)()
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
and got the same result. using df.columns.unique() after the truncation i had ~200 duplicate columns after the truncation