Getting "TypeError: Unhashable type: 'Point'" while trying to use GeoPandas - typeerror

I am currently trying to plot a series of location points on top of an existing map (.shp file). But I run into a "TypeError: Unhashable type: 'Point'" error while trying to plot them. Here is this code:
fig,ax = plt.subplots(figsize=(15,15))
street_map.plot(ax = ax, alpha = 0.4, color = "grey")
geo_df[geo_df['geometry']].plot(ax = ax, markersize = 20, color = "blue", marker = "o", label="location")
plt.legend(prop={'size': 15})
And this is the error message that shows up. Is this because I am indexing it incorrectly? I'm just trying to overlay a a df with a list of coordinates over an existing shp file of the street layout of San Mateo County:
---------------------------------------------------------------
------------
TypeError Traceback (most
recent call last)
<ipython-input-46-e41b19a6a359> in <module>
1 fig,ax = plt.subplots(figsize=(15,15))
2 street_map.plot(ax = ax, alpha = 0.4, color = "grey")
----> 3 geometries = geo_df[geo_df['geometry']].apply(lambda x:
x.wkt).values
4 plt.legend(prop={'size': 15})
~/anaconda3/lib/python3.7/site-packages/geopandas/geodataframe.py in __getitem__(self, key)
1325 GeoDataFrame.
1326 """
->1327 result = super().__getitem__(key)
1328 geo_col = self._geometry_column_name
1329 if isinstance(result, Series) and
isinstance(result.dtype, GeometryDtype):
~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in
__getitem__(self, key)
2804 if is_iterator(key):
2805 key = list(key)
-> 2806 indexer =
self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)
[1]
2807
2808 # take() does not accept boolean indexers
~/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py
in _get_listlike_indexer(self, key, axis, raise_missing)
1545 if len(ax) or not len(key):
1546 key = self._convert_for_reindex(key, axis)
-> 1547 indexer = ax.get_indexer_for(key)
1548 keyarr = ax.reindex(keyarr)[0]
1549 else:
~/anaconda3/lib/python3.7/site-
packages/pandas/core/indexes/base.py in get_indexer_for(self,
target, **kwargs)
4499 """
4500 if self.is_unique:
-> 4501 return self.get_indexer(target, **kwargs)
4502 indexer, _ = self.get_indexer_non_unique(target,
**kwargs)
4503 return indexer
~/anaconda3/lib/python3.7/site-
packages/pandas/core/indexes/base.py in get_indexer(self,
target, method, limit, tolerance)
2751 )
2752
-> 2753 indexer =
self._engine.get_indexer(target._ndarray_values)
2754
2755 return ensure_platform_int(indexer)
pandas/_libs/index.pyx in
pandas._libs.index.IndexEngine.get_indexer()
pandas/_libs/hashtable_class_helper.pxi in
pandas._libs.hashtable.PyObjectHashTable.lookup()
TypeError: unhashable type: 'Point'

When you write geo_df[geo_df['geometry']], you are using geo_df['geometry'] as a mask. That is where the error comes from. Since you haven't posted a reproducible example, my guess is that your wanted to use a single column (while there's no reason to do so in practice). Try following:
fig,ax = plt.subplots(figsize=(15,15))
street_map.plot(ax = ax, alpha = 0.4, color = "grey")
geo_df['geometry'].plot(ax = ax, markersize = 20, color = "blue", marker = "o", label="location")

Related

Can't plot data frame column, I want to plot a scatter graph

My data
I want to make a scatter graph of 2 columns but its not working. if you could help please, i have confirmed the column exists in show columns, then it doesnt exsist as show by code at bottom.
df = data
x = Birth rate (object)
y = CO₂ (per capita). (float64)
i tried
data.plot(x='Birth_rate', y='CO₂_(per capita)')
plt.title('1960 - C02 emissions vs. Crude birth rates')
It resulted in Keyerror 'Birth rate'
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/plotting/_core.py:920, in PlotAccessor.__call__(self, *args, **kwargs)
918 if is_integer(x) and not data.columns.holds_integer():
919 x = data_cols[x]
--> 920 elif not isinstance(data[x], ABCSeries):
921 raise ValueError("x must be a label or position")
922 data = data.set_index(x)
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/frame.py:3024, in DataFrame.__getitem__(self, key)
3022 if self.columns.nlevels > 1:
3023 return self._getitem_multilevel(key)
-> 3024 indexer = self.columns.get_loc(key)
3025 if is_integer(indexer):
3026 indexer = [indexer]
File /shared-libs/python3.9/py/lib/python3.9/site-packages/pandas/core/indexes/base.py:3083, in Index.get_loc(self, key, method, tolerance)
3081 return self._engine.get_loc(casted_key)
3082 except KeyError as err:
-> 3083 raise KeyError(key) from err
3085 if tolerance is not None:
3086 tolerance = self._convert_tolerance(tolerance, np.asarray(key))
KeyError: 'Birth_rate'
I tried an index aswell and go this error:
KeyError: "None of ['Birth_rate'] are in the columns"

How to get rid of "AttributeError: 'float' object has no attribute 'log2' "

Say I have a data frame with columns of min value =36884326.0, and max value =6619162563.0, which I need to plot as box plot, so I tried to log transform the values, as follows,
diff["values"] = diff['value'].apply(lambda x: (x+1))
diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
However, the above lines are throwing the error as follows,
AttributeError Traceback (most recent call last)
<ipython-input-28-fe4e1d2286b0> in <module>
1 diff['value'].max()
2 diff["values"] = diff['value'].apply(lambda x: (x+1))
----> 3 diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
~/software/anaconda/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
3192 else:
3193 values = self.astype(object).values
-> 3194 mapped = lib.map_infer(values, f, convert=convert_dtype)
3195
3196 if len(mapped) and isinstance(mapped[0], Series):
pandas/_libs/src/inference.pyx in pandas._libs.lib.map_infer()
<ipython-input-28-fe4e1d2286b0> in <lambda>(x)
1 diff['value'].max()
2 diff["values"] = diff['value'].apply(lambda x: (x+1))
----> 3 diff["log_values"] = diff['values'].apply(lambda x: x.log2(x))
AttributeError: 'float' object has no attribute 'log2'
Any suggestions would be great. Thanks
You need numpy.log2 function to aplly, please, check sintaxis here.

How to fix "Data must be 1-dimensional" exception in python

I am trying to create a dataset for checking my Logistic Regression Algorithm, but I am unable to create a pandas DataFrame from a dictinoary.
I am getting a 'Data must be 1-dimensional' exception.
x1 = np.random.random(size=(10,1))*2
x2 = np.random.random(size=(10,1))*2
x3 = np.random.random(size=(10,1))*2 + 2
x4 = np.random.random(size=(10,1))*2 + 2
y0 = np.zeros(shape=(10,1))
y1 = np.ones(shape=(10,1))
plt.scatter(x1,x2, color='g', marker='o')
plt.scatter(x3,x4, color='r', marker='o')
dict_data = { 'X1':np.concatenate((x1,x3)),
'X2':np.concatenate((x2,x4)),
'Y':np.concatenate((y0,y1))}
data = pd.DataFrame(dict_data, index=np.arange(20))
I am getting this as output, with the error Data must be 1 dimenstional.
--------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-49-fe81f079ebc6> in <module>
13 dict_data = { 'X1':np.concatenate((x1,x3)), 'X2':np.concatenate((x2,x4)),'Y':np.concatenate((y0,y1))}
14 #print(dict_data.shape)
---> 15 data = pd.DataFrame(dict_data, index=np.arange(20).reshape(20))
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __init__(self, data, index, columns, dtype, copy)
328 dtype=dtype, copy=copy)
329 elif isinstance(data, dict):
--> 330 mgr = self._init_dict(data, index, columns, dtype=dtype)
331 elif isinstance(data, ma.MaskedArray):
332 import numpy.ma.mrecords as mrecords
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _init_dict(self, data, index, columns, dtype)
459 arrays = [data[k] for k in keys]
460
--> 461 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
462
463 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
6166
6167 # don't force copy because getting jammed in an ndarray anyway
-> 6168 arrays = _homogenize(arrays, index, dtype)
6169
6170 # from BlockManager perspective
~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _homogenize(data, index, dtype)
6475 v = lib.fast_multiget(v, oindex.values, default=np.nan)
6476 v = _sanitize_array(v, index, dtype=dtype, copy=False,
-> 6477 raise_cast_failure=False)
6478
6479 homogenized.append(v)
~/anaconda3/lib/python3.6/site-packages/pandas/core/series.py in _sanitize_array(data, index, dtype, copy, raise_cast_failure)
3273 elif subarr.ndim > 1:
3274 if isinstance(data, np.ndarray):
-> 3275 raise Exception('Data must be 1-dimensional')
3276 else:
3277 subarr = _asarray_tuplesafe(data, dtype=dtype)
Exception: Data must be 1-dimensional
np.random.random(size=(10,1)) produces 2-dimensional array of shape (10, 1) however pandas constructs DataFrames as a collection of 1-dimensional arrays.
So use np.random.random(size=(10)) to make 1-D arrays, which then can be used to make DataFrame.

TypeError using sns.distplot() on dataframe with one row

I'm plotting subsets of a dataframe, and one subset happens to have only one row. This is the only reason I can think of for why it's causing problems. This is what it looks like:
problem_dataframe = prob_df[prob_df['Date']==7]
problem_dataframe.head()
I try to do:
sns.distplot(problem_dataframe['floatTime'])
But I get the error:
TypeError: len() of unsized object
Would someone please tell me what's causing this and how to work around it?
The TypeError is resolved by setting bins=1.
But that uncovers a different error, ValueError: x must be 1D or 2D, which gets triggered by an internal function in Matplotlib's hist(), called _normalize_input():
import pandas as pd
import seaborn as sns
df = pd.DataFrame(['Tue','Feb',7,'15:37:58',2017,15.6196]).T
df.columns = ['Day','Month','Date','Time','Year','floatTime']
sns.distplot(df.floatTime, bins=1)
Output:
ValueError Traceback (most recent call last)
<ipython-input-25-858df405d200> in <module>()
6 df.columns = ['Day','Month','Date','Time','Year','floatTime']
7 df.floatTime.values.astype(float)
----> 8 sns.distplot(df.floatTime, bins=1)
/home/andrew/anaconda3/lib/python3.6/site-packages/seaborn/distributions.py in distplot(a, bins, hist, kde, rug, fit, hist_kws, kde_kws, rug_kws, fit_kws, color, vertical, norm_hist, axlabel, label, ax)
213 hist_color = hist_kws.pop("color", color)
214 ax.hist(a, bins, orientation=orientation,
--> 215 color=hist_color, **hist_kws)
216 if hist_color != color:
217 hist_kws["color"] = hist_color
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py in inner(ax, *args, **kwargs)
1890 warnings.warn(msg % (label_namer, func.__name__),
1891 RuntimeWarning, stacklevel=2)
-> 1892 return func(ax, *args, **kwargs)
1893 pre_doc = inner.__doc__
1894 if pre_doc is None:
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
6141 x = np.array([[]])
6142 else:
-> 6143 x = _normalize_input(x, 'x')
6144 nx = len(x) # number of datasets
6145
/home/andrew/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py in _normalize_input(inp, ename)
6080 else:
6081 raise ValueError(
-> 6082 "{ename} must be 1D or 2D".format(ename=ename))
6083 if inp.shape[1] < inp.shape[0]:
6084 warnings.warn(
ValueError: x must be 1D or 2D
_normalize_input() was removed from Matplotlib (it looks like sometime last year), so I guess Seaborn is referring to an older version under the hood.
You can see _normalize_input() in this old commit:
def _normalize_input(inp, ename='input'):
"""Normalize 1 or 2d input into list of np.ndarray or
a single 2D np.ndarray.
Parameters
----------
inp : iterable
ename : str, optional
Name to use in ValueError if `inp` can not be normalized
"""
if (isinstance(x, np.ndarray) or
not iterable(cbook.safe_first_element(inp))):
# TODO: support masked arrays;
inp = np.asarray(inp)
if inp.ndim == 2:
# 2-D input with columns as datasets; switch to rows
inp = inp.T
elif inp.ndim == 1:
# new view, single row
inp = inp.reshape(1, inp.shape[0])
else:
raise ValueError(
"{ename} must be 1D or 2D".format(ename=ename))
...
I can't figure out why inp.ndim!=1, though. Performing the same np.asarray().ndim on the input returns 1 as expected:
np.asarray(df.floatTime).ndim # 1
So you're facing a few obstacles if you want to make a single-valued input work with sns.distplot().
Suggested Workaround
Check for a single-element df.floatTime, and if that's the case, just use plt.hist() instead (which is what distplot goes to anyway, along with KDE):
plt.hist(df.floatTime)

trimming column named is generating ValueError

I have a table which I run through a function to trim its columns down to length 128 (I know it's really long, there isn't anything I can do about that) characters so it can use to_sql to create a database from it.
def truncate_column_names(df, length):
rename = {}
for col in df.columns:
if len(col) > length:
new_col = col[:length-3]+"..."
rename[col] = new_col
result = df.rename(columns=rename)
return result
This function works fine and I get a table out just fine but the problem comes when I tried to save the file I get the error
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
The method I have doing some housekeeping before saving to a file included dropping duplicates and that is where this error is being spit out. I tested this by saving the original dataFrame and then just loading it, running the truncate function, and then trying drop_duplicates on the result and I get the same error.
The headers for the file before I try truncating looks like this:
http://pastebin.com/WXmvwHDg
I trimmed the file down to 1 record and still have the problem.
This was a result of the truncating causing some columns to have non-unique names.
To confirm this was an issue I did a short test:
In [113]: df = pd.DataFrame(columns=["ab", "ac", "ad"])
In [114]: df
Out[114]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [115]: df.drop_duplicates()
Out[115]:
Empty DataFrame
Columns: [ab, ac, ad]
Index: []
In [116]: df.columns
Out[116]: Index([u'ab', u'ac', u'ad'], dtype='object')
In [117]: df.columns = df.columns.str[:1]
In [118]: df
Out[118]:
Empty DataFrame
Columns: [a, a, a]
Index: []
In [119]: df.drop_duplicates()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-119-daf275b6788b> in <module>()
----> 1 df.drop_duplicates()
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in drop_duplicates(self, su
bset, take_last, inplace)
2826 deduplicated : DataFrame
2827 """
-> 2828 duplicated = self.duplicated(subset, take_last=take_last)
2829
2830 if inplace:
C:\Miniconda\lib\site-packages\pandas\util\decorators.pyc in wrapper(*args, **kw
args)
86 else:
87 kwargs[new_arg_name] = new_arg_value
---> 88 return func(*args, **kwargs)
89 return wrapper
90 return _deprecate_kwarg
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in duplicated(self, subset,
take_last)
2871
2872 vals = (self[col].values for col in subset)
-> 2873 labels, shape = map(list, zip( * map(f, vals)))
2874
2875 ids = get_group_index(labels, shape, sort=False, xnull=False)
C:\Miniconda\lib\site-packages\pandas\core\frame.pyc in f(vals)
2860
2861 def f(vals):
-> 2862 labels, shape = factorize(vals, size_hint=min(len(self), _SI
ZE_HINT_LIMIT))
2863 return labels.astype('i8',copy=False), len(shape)
2864
C:\Miniconda\lib\site-packages\pandas\core\algorithms.pyc in factorize(values, s
ort, order, na_sentinel, size_hint)
133 table = hash_klass(size_hint or len(vals))
134 uniques = vec_klass()
--> 135 labels = table.get_labels(vals, uniques, 0, na_sentinel)
136
137 labels = com._ensure_platform_int(labels)
pandas\hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_labels (pandas\ha
shtable.c:13946)()
ValueError: Buffer has wrong number of dimensions (expected 1, got 2)
and got the same result. using df.columns.unique() after the truncation i had ~200 duplicate columns after the truncation