BeautifulSoup getting text from a script within an anchor tag - beautifulsoup

So I have a <tr> tag with multiple <td> as a substring of it.
<tr>
<td align='center' class="row2">
10
</td>
<td align="center" class="row2">
<a href='https://forum.net/index.php?;showuser=17311'>xxboxx</a>
</td>
<td align="center" class="row2">
<!--script type="text/javascript">
s = "236".replace(/,/g,'');
document.write(abbrNum(s,1));
</script-->
236
</td>
</tr>
this is my current code; i have no problem getting the first two, but trying to get out of a script I've tried various ways provided by other similar questions on stackoverflow; but I've not been successful.
def extractDataFromRow2(_url, 'td', 'row2', 'align' , 'center'):
try:
for container in _url.find_all('td', {'class': 'row2','align': 'center''}):
# get data from topic title in table cell
replies_numb = container.select_one(
'a[href^="javascript:]"').text
print('there are ' + replies_numb + ' replies')
topic_starter = container.next_sibling.text
print('the owner of this topic is ' + topic_starter)
for total_view in container.find('a', href=True, style=True):
#total_view = container.select_one(style="background-color:").text
#total_view = container.find(("td")["style"])
#total_view = container.next_sibling.find_next_sibling/next_sibling
#but they're not able to access the last one within <tr> tag
print(total_view )
if replies_numb and topic_starter is not None:
dict_replies = {'Replies' : replies_numb}
dict_topic_S = {'Topic_Starter' : topic_starter}
list_1.append(dict_replies)
list_2.append(dict_topic_S)
else:
print('no data')
except Exception as e:
print('Error.extractDataFromRow2:', e)
return None
Link of the page I'm trying to get data from.
if there is a much cleaner approach to this; I'm more than happy to learn from the feedbacks given.

The html code you shared might not be sufficent for answer so i checked out the url you shared. Here is the way to scrape your table.
from bs4 import BeautifulSoup
import requests
r = requests.get("https://forum.lowyat.net/ReviewsandGuides")
soup = BeautifulSoup(r.text, 'lxml')
index = 0
#First two rows of table is not data so we skip it. Last row of table is for searching we also skip it. Table contains 30 rows of data. That is why we are slicing list
for row in soup.select('table[cellspacing="1"] > tr')[2:32]:
replies = row.select_one('td:nth-of-type(4)').text.strip()
topic_started = row.select_one('td:nth-of-type(5)').text.strip()
total_views = row.select_one('td:nth-of-type(6)').text.strip()
index +=1
print(index,replies, topic_started, total_views)
The result is
1 148 blurjoey 9,992
2 10 xxboxx 263
3 18 JayceOoi 1,636
4 373 idoblu 54,589
5 237 blurjoey 16,101
6 526 JayceOoi 57,577
7 131 JayceOoi 34,354
8 24 blurjoey 4,261
9 2 JayceOoi 249
10 72 KeyMochi 26,622
11 7 champu 331
12 0 asunakirito 210
13 0 asunakirito 172
14 0 asunakirito 199
15 17 blurjoey 3,351
16 860 blurjoey 112,556
17 0 chennegan 174
18 0 goldfries 185
19 4 JayceOoi 601
20 2 JayceOoi 309
21 10 blurjoey 1,826
22 3 JayceOoi 398
23 4 squallz05 310
24 0 asunakirito 265
25 25 asunakirito 12,326
26 0 blurjoey 279
27 14 JayceOoi 2,092
28 0 chennegan 305
29 8 Pharamain 732
30 19 idoblu 1,273

Please note, you must use lxml parser or it will error.
def extractDataFromRow2(url):
results = []
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
for row in soup.select('#forum_topic_list tr'):
cols = row.select('td')
if len(cols) != 7:
continue
cols[2] = cols[2].find('a') # fix title
values = [c.text.strip() for c in cols]
results.append({
'Title' : values[2],
'Replies' : values[3],
'Topic_Starter' : values[4],
'total_view: ' : values[5]
})
return results
threadlists = extractDataFromRow2('https://forum.....')
print(threadlists)
results
[
{
"Title": "Xiaomi 70Mai Pro",
"Replies": "148",
"Topic_Starter": "blurjoey",
"total_view: ": "9,996"
},
{
"Title": "Adata XPG SX8200 Pro 512GB NVME SSD",
"Replies": "10",
"Topic_Starter": "xxboxx",
"total_view: ": "265"
},
....
]

Related

Pandas SystemError: \objects\tupleobject.c:914: bad argument to internal function

I am encountering below error message sporadically.
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\series.py in unstack(self, level, fill_value)
3827 from pandas.core.reshape.reshape import unstack
3828
-> 3829 return unstack(self, level, fill_value)
3830
3831 # ----------------------------------------------------------------------
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in unstack(obj, level, fill_value)
428 if is_extension_array_dtype(obj.dtype):
429 return _unstack_extension_series(obj, level, fill_value)
--> 430 unstacker = _Unstacker(
431 obj.index, level=level, constructor=obj._constructor_expanddim
432 )
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in __init__(self, index, level, constructor)
116 raise ValueError("Unstacked DataFrame is too big, causing int32 overflow")
117
--> 118 self._make_selectors()
119
120 #cache_readonly
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in _make_selectors(self)
150
151 # make the mask
--> 152 remaining_labels = self.sorted_labels[:-1]
153 level_sizes = [len(x) for x in new_levels]
154
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in sorted_labels(self)
137 #cache_readonly
138 def sorted_labels(self):
--> 139 indexer, to_sort = self._indexer_and_to_sort
140 return [line.take(indexer) for line in to_sort]
141
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in _indexer_and_to_sort(self)
127 sizes = [len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]]
128
--> 129 comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
130 ngroups = len(obs_ids)
131
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\sorting.py in get_compressed_ids(labels, sizes)
195 tuple of (comp_ids, obs_group_ids)
196 """
--> 197 ids = get_group_index(labels, sizes, sort=True, xnull=False)
198 return compress_group_index(ids, sort=True)
199
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\sorting.py in get_group_index(labels, shape, sort, xnull)
139 labels = map(ensure_int64, labels)
140 if not xnull:
--> 141 labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
142
143 labels = list(labels)
SystemError: \objects\tupleobject.c:914: bad argument to internal function
the above was called from the actual code:
1539 slice_tmp = my_orderlog.groupby(['ticker','slice']).activeOrderSize.sum().to_frame('size').reset_index()
1540 slice_tmp['unit_size'] = slice_tmp['size']/slice_tmp['ticker'].map(wt_map)
1541 logger.info(f'TRYING TO DEBUG: {slice_tmp}')
-> 1542 breakdown = slice_tmp.groupby(['ticker','slice']).unit_size.sum().unstack(level=0)
1543 logger.info(f'TRYING TO DEBUG {breakdown}')
The SystemError of bad argument of internal functions are not repeatable and very difficult (if impossible) to catch.
Note that I try to log the in-question dataframe "slice_tmp". but when i get that and run the exact same code, i.e., the "groupby(['ticker','slice']).unit_size.sum().unstack(level=0)" in jupyter notebook, the code runs no problem.
any pointer/ some similar experiences on this odd one please? FWIW i'm running Pandas 1.2.4.

SpecificationError: Function names must be unique if there is no new column names assigned

I want to create a new column in the clin dataframe based on the following conditions:
1 if vals>=2*365 or is NAN
otherwise 0
I then assign the new column name as SURV.
import numpy as np
vals = clin['days_to_death'].astype(np.float32)
# non-LTS is 0, LTS is 1
surv = [1 if ( v>=2*365 or np.isnan(v) ) else 0 for v in vals ]
clin['SURV'] = clin.apply(surv, axis=1)
Traceback:
SpecificationError: Function names must be unique if there is no new column names assigned
---------------------------------------------------------------------------
SpecificationError Traceback (most recent call last)
<ipython-input-31-603dee8413ce> in <module>
5 # non-LTS is 0, LTS is 1
6 surv = [1 if ( v>=2*365 or np.isnan(v) ) else 0 for v in vals ]
----> 7 clin['SURV'] = clin.apply(surv, axis=1)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py in apply(self, func, axis, raw, result_type, args, **kwds)
7766 kwds=kwds,
7767 )
-> 7768 return op.get_result()
7769
7770 def applymap(self, func, na_action: Optional[str] = None) -> DataFrame:
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/apply.py in get_result(self)
146 # multiple values for keyword argument "axis"
147 return self.obj.aggregate( # type: ignore[misc]
--> 148 self.f, axis=self.axis, *self.args, **self.kwds
149 )
150
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/frame.py in aggregate(self, func, axis, *args, **kwargs)
7572 axis = self._get_axis_number(axis)
7573
-> 7574 relabeling, func, columns, order = reconstruct_func(func, **kwargs)
7575
7576 result = None
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/aggregation.py in reconstruct_func(func, **kwargs)
93 # there is no reassigned name
94 raise SpecificationError(
---> 95 "Function names must be unique if there is no new column names "
96 "assigned"
97 )
SpecificationError: Function names must be unique if there is no new column names assigned
clin
clin = pd.DataFrame([[1, '466', '47', 0, '90'],
[1, '357', '54', 1, '80'],
[1, '108', '72', 1, '60'],
[1, '254', '51', 0, '80'],
[1, '138', '78', 1, '80'],
[0, nan, '67', 0, '60']], columns=['vital_status', 'days_to_death', 'age_at_initial_pathologic_diagnosis',
'gender', 'karnofsky_performance_score'], index=['TCGA-06-1806', 'TCGA-06-5408', 'TCGA-06-5410', 'TCGA-06-5411',
'TCGA-06-5412', 'TCGA-06-5413'])
Expected output:
vital_status
days_to_death
age_at_initial_pathologic_diagnosis
gender
karnofsky_performance_score
SURV
TCGA-06-1806
1
466
47
0
90
0
TCGA-06-5408
1
357
54
1
80
0
TCGA-06-5410
1
108
72
1
60
0
TCGA-06-5411
1
254
51
0
80
0
TCGA-06-5412
1
138
78
1
80
0
TCGA-06-5413
0
nan
67
0
60
1
Make a new column of all 0's and then update the column with your desired parameters.
clin['SURV'] = 0
clin.loc[pd.to_numeric(clin.days_to_death).ge(2*365) | clin.days_to_death.isna(), 'SURV'] = 1
print(clin)
Output:
vital_status days_to_death age_at_initial_pathologic_diagnosis gender karnofsky_performance_score SURV
TCGA-06-1806 1 466 47 0 90 0
TCGA-06-5408 1 357 54 1 80 0
TCGA-06-5410 1 108 72 1 60 0
TCGA-06-5411 1 254 51 0 80 0
TCGA-06-5412 1 138 78 1 80 0
TCGA-06-5413 0 NaN 67 0 60 1

pyspark toPandas() IndexError: index is out of bounds

I'm experiencing a weird behaviour of pyspark's .toPandas() method running from Jupyt. For example, if I try this:
data = [{"Category": 'Category A', "ID": 1, "Value": 12.40},
{"Category": 'Category B', "ID": 2, "Value": 30.10},
{"Category": 'Category C', "ID": 3, "Value": 100.01}
]
# Create data frame (where spark is a SparkSession)
df = spark.createDataFrame(data)
df.show()
I'm able to successfully create the pyspark dataframe. However, when converting to pandas I get IndexError: index is out of bounds:
IndexError Traceback (most recent call last)
<path_to_python>/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
<path_to_python>/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
<path_to_python>/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
<path_to_python>/lib/python3.7/site-packages/pandas/core/base.py in __repr__(self)
76 Yields Bytestring in Py2, Unicode String in py3.
77 """
---> 78 return str(self)
79
80
<path_to_python>/lib/python3.7/site-packages/pandas/core/base.py in __str__(self)
55
56 if compat.PY3:
---> 57 return self.__unicode__()
58 return self.__bytes__()
59
<path_to_python>/lib/python3.7/site-packages/pandas/core/frame.py in __unicode__(self)
632 width = None
633 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 634 line_width=width, show_dimensions=show_dimensions)
635
636 return buf.getvalue()
<path_to_python>/lib/python3.7/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width)
719 decimal=decimal,
720 line_width=line_width)
--> 721 formatter.to_string()
722
723 if buf is None:
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in to_string(self)
596 else:
597
--> 598 strcols = self._to_str_columns()
599 if self.line_width is None: # no need to wrap around just print
600 # the whole frame
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
527 str_columns = [[label] for label in self.header]
528 else:
--> 529 str_columns = self._get_formatted_column_labels(frame)
530
531 stringified = []
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _get_formatted_column_labels(self, frame)
770 need_leadsp[x] else x]
771 for i, (col, x) in enumerate(zip(columns,
--> 772 fmt_columns))]
773
774 if self.show_row_idx_names:
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in <listcomp>(.0)
769 str_columns = [[' ' + x if not self._get_formatter(i) and
770 need_leadsp[x] else x]
--> 771 for i, (col, x) in enumerate(zip(columns,
772 fmt_columns))]
773
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _get_formatter(self, i)
362 else:
363 if is_integer(i) and i not in self.columns:
--> 364 i = self.columns[i]
365 return self.formatters.get(i, None)
366
<path_to_python>/lib/python3.7/site-packages/pandas/core/indexes/base.py in __getitem__(self, key)
3956 if is_scalar(key):
3957 key = com.cast_scalar_indexer(key)
-> 3958 return getitem(key)
3959
3960 if isinstance(key, slice):
IndexError: index 3 is out of bounds for axis 0 with size 3
I'm not sure where the problem can be, I've used this many times without problems but this time I tried a new environment and I got this issue. In case it can help my configuration is:
Python: 3.7.6;
Pandas: 0.24.2;
PySpark: 2.4.5
Any idea?
Thanks :)
I found the issue. Trying to minimize the code to reproduce the error I omitted that I was adding a pandas setting:
pd.set_option('display.max_columns', -1)
This caused the error independently of the dataframe being converted. To fix it I just specified a positive number of columns or None.

What am i missing? sklearn fit module

I think input to Machine Learning cannot be text in .fit what should I use then or how should i change my code? This ai should be training with Year month and days and it should give as output Crops (First, Second, Third) (I don't know what more details to add but i need to post more details so im just typing random stuff at the moment.
code:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
event_data = pd.read_excel("Jacob's Farming Contest.xlsx")
event_data.fillna(0, inplace=True)
X = event_data.drop(columns=['First Crop', 'Second Crop', 'Third Crop'])
y = event_data.drop(columns=['Year', 'Month', 'Day'])
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
# predictions = model.predict(X_test)
error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-44-3fbcae548642> in <module>
11
12 model = DecisionTreeClassifier()
---> 13 model.fit(X_train, y_train)
14 # predictions = model.predict(X_test)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
888 """
889
--> 890 super().fit(
891 X, y,
892 sample_weight=sample_weight,
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\tree\_classes.py in fit(self, X, y, sample_weight, check_input, X_idx_sorted)
179
180 if is_classification:
--> 181 check_classification_targets(y)
182 y = np.copy(y)
183
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in check_classification_targets(y)
167 y : array-like
168 """
--> 169 y_type = type_of_target(y)
170 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
171 'multilabel-indicator', 'multilabel-sequences']:
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in type_of_target(y)
248 raise ValueError("y cannot be class 'SparseSeries' or 'SparseArray'")
249
--> 250 if is_multilabel(y):
251 return 'multilabel-indicator'
252
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\utils\multiclass.py in is_multilabel(y)
150 _is_integral_float(np.unique(y.data))))
151 else:
--> 152 labels = np.unique(y)
153
154 return len(labels) < 3 and (y.dtype.kind in 'biu' or # bool, int, uint
<__array_function__ internals> in unique(*args, **kwargs)
C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
261 ar = np.asanyarray(ar)
262 if axis is None:
--> 263 ret = _unique1d(ar, return_index, return_inverse, return_counts)
264 return _unpack_tuple(ret)
265
C:\ProgramData\Anaconda3\lib\site-packages\numpy\lib\arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
309 aux = ar[perm]
310 else:
--> 311 ar.sort()
312 aux = ar
313 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'str' and 'int'
This is Jacob's Farming Contest.xlsx:
Year Month Day First Crop Second Crop Third Crop
0 101 1 1 0 0 0
1 101 1 2 Cactus Carrot Cocoa Beans
2 101 1 3 0 0 0
3 101 1 4 0 0 0
4 101 1 5 Mushroom Sugar CaNether Wart Wheat
... ... ... ... ... ... ...
367 101 12 27 Cactus Carrot Mushroom
368 101 12 28 0 0 0
369 101 12 29 0 0 0
370 101 12 30 Cocoa Beans Pumpkin Sugar CaNether Wart
371 101 12 31 0 0 0

Folium Choropleth + GeoJSON raises AttributeError: 'NoneType'

I'm trying to do a choropleth using folium which offers a great link between GeoJSON, Pandas and leaflet.
GeoJSON format is like below :
{
"type":"FeatureCollection",
"features":[
{
"type":"Feature",
"geometry":
{
"type":"Polygon",
"coordinates":[[[-1.6704591323124895,49.62681486270549], .....
{
"insee":"50173",
"nom":"Équeurdreville-Hainneville",
"wikipedia":"fr:Équeurdreville-Hainneville",
"surf_m2":12940306}},
Pandas DataFrame :
postal_count.head(5)
Out[98]:
Code_commune_INSEE CP_count
0 75120 723
1 75115 698
2 75112 671
3 75118 627
4 75111 622
"Code_communes_INSEE" corresponds to the attribute "insee" in the GeoJSON. I'd like to do a choropleth using the variable "CP_count" in the above DataFrame.
Here is my code (snippet from this notebook)
map_france = folium.Map(location=[47.000000, 2.000000], zoom_start=6)
map_france.choropleth(
geo_str=open(geo_path + 'simplified_communes100m.json').read(),
data=postal_count,
columns=['Code_commune_INSEE', 'CP_count'],
key_on='feature.geometry.properties.insee',
fill_color='YlGn',
)
map_france.save(table_path + 'choro_test1.html')
I'm still getting this error again and again :
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-83-ea0fd2c1c207> in <module>()
8 fill_color='YlGn',
9 )
---> 10 map_france.save('/media/flo/Stockage/Data/MesAides/map/choro_test1.html')
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in save(self, outfile, close_file, **kwargs)
151
152 root = self.get_root()
--> 153 html = root.render(**kwargs)
154 fid.write(html.encode('utf8'))
155 if close_file:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
357 """Renders the HTML representation of the element."""
358 for name, child in self._children.items():
--> 359 child.render(**kwargs)
360 return self._template.render(this=self, kwargs=kwargs)
361
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
665
666 for name, element in self._children.items():
--> 667 element.render(**kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
661 script = self._template.module.__dict__.get('script', None)
662 if script is not None:
--> 663 figure.script.add_children(Element(script(self, kwargs)),
664 name=self.get_name())
665
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in __call__(self, *args, **kwargs)
434 raise TypeError('macro %r takes not more than %d argument(s)' %
435 (self.name, len(self.arguments)))
--> 436 return self._func(*arguments)
437
438 def __repr__(self):
<template> in macro(l_this, l_kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in call(_Context__self, _Context__obj, *args, **kwargs)
194 args = (__self.environment,) + args
195 try:
--> 196 return __obj(*args, **kwargs)
197 except StopIteration:
198 return __self.environment.undefined('value was undefined because '
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/features.py in style_data(self)
352
353 for feature in self.data['features']:
--> 354 feature.setdefault('properties', {}).setdefault('style', {}).update(self.style_function(feature)) # noqa
355 return json.dumps(self.data, sort_keys=True)
356
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in style_function(x)
671 "color": line_color,
672 "fillOpacity": fill_opacity,
--> 673 "fillColor": color_scale_fun(x)
674 }
675
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in color_scale_fun(x)
659 def color_scale_fun(x):
660 return color_range[len(
--> 661 [u for u in color_domain if
662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in <listcomp>(.0)
660 return color_range[len(
661 [u for u in color_domain if
--> 662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
664 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
653
654 def get_by_key(obj, key):
--> 655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
657 '.'.join(key.split('.')[1:])))
AttributeError: 'NoneType' object has no attribute 'get'
I tried playing with key_on='feature.geometry.properties.insee' without any success.
There were 2 problems :
1 - The correct access to 'insee' parameters is : key_on='feature.properties.insee'
The best way to find the right key_on is to play with the geoJSON dict to make sure you are calling the right properties.
2- Once you have the right key_on parameters, you need to make sure that all the available keys in the geoJSON are contained in your Pandas DataFrame (otherwise it will raise a KeyError)
In this case, I used the following command line to get all the insee keys contained by my geoJSON:
ogrinfo -ro -al communes-20150101-100m.shp -geom=NO | grep insee > list_code_insee.txt
If you are experiencing the same issue, this should solve your problem.
I had the same problem on JupyterLab (on labs.cognitiveclass.ai) using Folium 0.5.0. Then I copied my code and ran it in PyCharm, and it worked! I don't understand why, perhaps there is some backend issue (?)
If you want to display a folium map outside of a Jupyter notebook, you have to save the map to html:
map_france.save('map_france.html')
and open the html in your browser.