What's the difference of FunctionDefHelper::Create and FunctionDefHelper::Define? - tensorflow

the following is from math_grad.cc line543 to line554:
543 *g = FDH::Create("_",
544 // Input defs
545 {"x:T", "i:int32", "dy:T"},
546 // Ret val defs
547 {"dx:T", "di:int32"},
548 // Attr defs
549 {{"T: {half, float, double}"}},
550 // Nodes
551 nodes,
552 // Return values
553 {{"dx", "dx:output:0"}, {"di", "di:y:0"}});
554 return Status::OK();
and the following is from math_grad.cc line593 to line615,
593 Status MinMaxGradHelper(const string& op, const AttrSlice& attrs,
594 FunctionDef* g) {
595 // clang-format off
596 *g = FDH::Define(
597 // Arg defs
598 {"x:T", "i:int32", "dy:T"},
599 // Ret val defs
600 {"dx:T", "di:int32"},
601 // Attr defs
602 {{"T: {half, float, double}"}},
603 {
604 // keep_dims because we need to do x == y, which requires x
605 // and y are broadcastable.
606 {{"y"}, op, {"x", "i"}, {{"T", "$T"}, {"keep_dims", true}}},
607 {{"mask"}, "Equal", {"x", "y"}, {{"T", "$T"}}},
608 {{"mask_cast"}, "Cast", {"mask"}, {{"SrcT", DT_BOOL}, {"DstT", "$T"}}},
609 {{"mask_sum"}, "Sum", {"mask_cast", "i"}, {{"T", "$T"}}},
610 {{"norm_dy"}, "Div", {"dy", "mask_sum"}, {{"T", "$T"}}},
611 {{"sy"}, "Shape", {"y"}, {{"T", "$T"}}},
612 {{"norm_dy_reshaped"}, "Reshape", {"norm_dy", "sy"}, {{"T", "$T"}}},
613 {{"dx"}, "Mul", {"mask_cast", "norm_dy_reshaped"}, {{"T", "$T"}}},
614 {{"di"}, "ZerosLike", {"i"}, {{"T", DT_INT32}}}
615 });
I am a little confused about how does TensorFlow determine whether use
FDH::Define or FDH::Create.
As I know, FDH::Define is an old approch to define a FunctionDef. and I thought that FDH::Define provide less information compared with FDH::Create.
So, any one can help me tell the difference? Any advice will be appreciated, thanks a lot.

Answer from a colleague:
"FunctionDefs are not yet part of the public API; these are temporary mechanisms for defining functions until we have real API support for them.
One of those two functions is older and mirrors the older FunctionDef format, which has now been deleted.
I didn't delete the function, I just made it produce the new format; but this only works in simple cases. There was too much code using that function to migrate all of them.
But the main point is that this isn't documented well because it is not part of our public API and subject to change."

Related

s3fs FileNotFoundError

I am only able to gain limited/top-level access to my aws s3. I can see the buckets, but not their contents; neither subfolders nor files. I'm running everything from inside a conda environment. I've tried accessing files in private and public buckets without success. What am I doing wrong?
This block of code works as expected
>>> import s3fs
>>> AKEY = 'XXXX'
>>> SKEY = 'XXXX'
>>> fs = s3fs.S3FileSystem(key=AKEY,secret=SKEY)
>>> fs.ls('s3://')
['my-bucket-1',
'my-bucket-2',
'my-bucket-3']
This block doesn't
>>> fs.ls('s3://my-bucket-1')
[]
what I expect
>>> fs.ls('s3://my-bucket-1')
['my-bucket-1/test.txt',
'my-bucket-1/test.csv']
When I try to open a file I get a FileNotFoundError
import pandas as pd
pd.read_csv(
's3://my-bucket-1/test.csv',
storage_options={'key':AKEY,'secret':SKEY}
)
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[8], line 2
1 import pandas as pd
----> 2 pd.read_csv(
3 's3://my-bucket-1/test.csv'',
4 storage_options={'key':AKEY,'secret':SKEY}
5 )
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\util\_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={"delimiter": ","},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get("names", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options["has_index_names"] = kwds["has_index_names"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\parsers\readers.py:1735, in TextFileReader._make_engine(self, f, engine)
1733 if "b" not in mode:
1734 mode += "b"
-> 1735 self.handles = get_handle(
1736 f,
1737 mode,
1738 encoding=self.options.get("encoding", None),
1739 compression=self.options.get("compression", None),
1740 memory_map=self.options.get("memory_map", False),
1741 is_text=is_text,
1742 errors=self.options.get("encoding_errors", "strict"),
1743 storage_options=self.options.get("storage_options", None),
1744 )
1745 assert self.handles is not None
1746 f = self.handles.handle
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:713, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
710 codecs.lookup_error(errors)
712 # open URLs
--> 713 ioargs = _get_filepath_or_buffer(
714 path_or_buf,
715 encoding=encoding,
716 compression=compression,
717 mode=mode,
718 storage_options=storage_options,
719 )
721 handle = ioargs.filepath_or_buffer
722 handles: list[BaseBuffer]
File ~\anaconda3\envs\env-2\lib\site-packages\pandas\io\common.py:409, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
406 pass
408 try:
--> 409 file_obj = fsspec.open(
410 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
411 ).open()
412 # GH 34626 Reads from Public Buckets without Credentials needs anon=True
413 except tuple(err_types_to_retry_with_anon):
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:135, in OpenFile.open(self)
128 def open(self):
129 """Materialise this as a real open file without context
130
131 The OpenFile object should be explicitly closed to avoid enclosed file
132 instances persisting. You must, therefore, keep a reference to the OpenFile
133 during the life of the file-like it generates.
134 """
--> 135 return self.__enter__()
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\core.py:103, in OpenFile.__enter__(self)
100 def __enter__(self):
101 mode = self.mode.replace("t", "").replace("b", "") + "b"
--> 103 f = self.fs.open(self.path, mode=mode)
105 self.fobjects = [f]
107 if self.compression is not None:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1106, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
1104 else:
1105 ac = kwargs.pop("autocommit", not self._intrans)
-> 1106 f = self._open(
1107 path,
1108 mode=mode,
1109 block_size=block_size,
1110 autocommit=ac,
1111 cache_options=cache_options,
1112 **kwargs,
1113 )
1114 if compression is not None:
1115 from fsspec.compression import compr
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:640, in S3FileSystem._open(self, path, mode, block_size, acl, version_id, fill_cache, cache_type, autocommit, requester_pays, cache_options, **kwargs)
637 if cache_type is None:
638 cache_type = self.default_cache_type
--> 640 return S3File(
641 self,
642 path,
643 mode,
644 block_size=block_size,
645 acl=acl,
646 version_id=version_id,
647 fill_cache=fill_cache,
648 s3_additional_kwargs=kw,
649 cache_type=cache_type,
650 autocommit=autocommit,
651 requester_pays=requester_pays,
652 cache_options=cache_options,
653 )
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1989, in S3File.__init__(self, s3, path, mode, block_size, acl, version_id, fill_cache, s3_additional_kwargs, autocommit, cache_type, requester_pays, cache_options)
1987 self.details = s3.info(path)
1988 self.version_id = self.details.get("VersionId")
-> 1989 super().__init__(
1990 s3,
1991 path,
1992 mode,
1993 block_size,
1994 autocommit=autocommit,
1995 cache_type=cache_type,
1996 cache_options=cache_options,
1997 )
1998 self.s3 = self.fs # compatibility
2000 # when not using autocommit we want to have transactional state to manage
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1462, in AbstractBufferedFile.__init__(self, fs, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
1460 self.size = size
1461 else:
-> 1462 self.size = self.details["size"]
1463 self.cache = caches[cache_type](
1464 self.blocksize, self._fetch_range, self.size, **cache_options
1465 )
1466 else:
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\spec.py:1475, in AbstractBufferedFile.details(self)
1472 #property
1473 def details(self):
1474 if self._details is None:
-> 1475 self._details = self.fs.info(self.path)
1476 return self._details
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:113, in sync_wrapper.<locals>.wrapper(*args, **kwargs)
110 #functools.wraps(func)
111 def wrapper(*args, **kwargs):
112 self = obj or args[0]
--> 113 return sync(self.loop, func, *args, **kwargs)
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:98, in sync(loop, func, timeout, *args, **kwargs)
96 raise FSTimeoutError from return_result
97 elif isinstance(return_result, BaseException):
---> 98 raise return_result
99 else:
100 return return_result
File ~\anaconda3\envs\env-2\lib\site-packages\fsspec\asyn.py:53, in _runner(event, coro, result, timeout)
51 coro = asyncio.wait_for(coro, timeout=timeout)
52 try:
---> 53 result[0] = await coro
54 except Exception as ex:
55 result[0] = ex
File ~\anaconda3\envs\env-2\lib\site-packages\s3fs\core.py:1257, in S3FileSystem._info(self, path, bucket, key, refresh, version_id)
1245 if (
1246 out.get("KeyCount", 0) > 0
1247 or out.get("Contents", [])
1248 or out.get("CommonPrefixes", [])
1249 ):
1250 return {
1251 "name": "/".join([bucket, key]),
1252 "type": "directory",
1253 "size": 0,
1254 "StorageClass": "DIRECTORY",
1255 }
-> 1257 raise FileNotFoundError(path)
1258 except ClientError as e:
1259 raise translate_boto_error(e, set_cause=False)
FileNotFoundError: my-bucket-1/test.csv
s3fs-2022.11.0, aiobotocore-2.4.0, botocore-1.27.59
fs = s3fs.S3FileSystem(anon=True)
fs.ls('s3://dask-data/nyc-taxi/2015')
ParseError
Check the bucket policy / IAM role that gives you permissions to access the bucket. It should have /* after the name of the resource:
"Action": "s3:GetObject",
"Resource": "arn:aws:s3:::my-bucket-1/*"
to allow you access the objects in the bucket, not just the bucket itself.
Have you tried boto3? s3fs is no longer supported.

Pandas SystemError: \objects\tupleobject.c:914: bad argument to internal function

I am encountering below error message sporadically.
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\series.py in unstack(self, level, fill_value)
3827 from pandas.core.reshape.reshape import unstack
3828
-> 3829 return unstack(self, level, fill_value)
3830
3831 # ----------------------------------------------------------------------
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in unstack(obj, level, fill_value)
428 if is_extension_array_dtype(obj.dtype):
429 return _unstack_extension_series(obj, level, fill_value)
--> 430 unstacker = _Unstacker(
431 obj.index, level=level, constructor=obj._constructor_expanddim
432 )
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in __init__(self, index, level, constructor)
116 raise ValueError("Unstacked DataFrame is too big, causing int32 overflow")
117
--> 118 self._make_selectors()
119
120 #cache_readonly
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in _make_selectors(self)
150
151 # make the mask
--> 152 remaining_labels = self.sorted_labels[:-1]
153 level_sizes = [len(x) for x in new_levels]
154
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in sorted_labels(self)
137 #cache_readonly
138 def sorted_labels(self):
--> 139 indexer, to_sort = self._indexer_and_to_sort
140 return [line.take(indexer) for line in to_sort]
141
pandas\_libs\properties.pyx in pandas._libs.properties.CachedProperty.__get__()
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\reshape\reshape.py in _indexer_and_to_sort(self)
127 sizes = [len(x) for x in levs[:v] + levs[v + 1 :] + [levs[v]]]
128
--> 129 comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
130 ngroups = len(obs_ids)
131
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\sorting.py in get_compressed_ids(labels, sizes)
195 tuple of (comp_ids, obs_group_ids)
196 """
--> 197 ids = get_group_index(labels, sizes, sort=True, xnull=False)
198 return compress_group_index(ids, sort=True)
199
C:\ProgramData\Anaconda3\envs\py39\lib\site-packages\pandas\core\sorting.py in get_group_index(labels, shape, sort, xnull)
139 labels = map(ensure_int64, labels)
140 if not xnull:
--> 141 labels, shape = map(list, zip(*map(maybe_lift, labels, shape)))
142
143 labels = list(labels)
SystemError: \objects\tupleobject.c:914: bad argument to internal function
the above was called from the actual code:
1539 slice_tmp = my_orderlog.groupby(['ticker','slice']).activeOrderSize.sum().to_frame('size').reset_index()
1540 slice_tmp['unit_size'] = slice_tmp['size']/slice_tmp['ticker'].map(wt_map)
1541 logger.info(f'TRYING TO DEBUG: {slice_tmp}')
-> 1542 breakdown = slice_tmp.groupby(['ticker','slice']).unit_size.sum().unstack(level=0)
1543 logger.info(f'TRYING TO DEBUG {breakdown}')
The SystemError of bad argument of internal functions are not repeatable and very difficult (if impossible) to catch.
Note that I try to log the in-question dataframe "slice_tmp". but when i get that and run the exact same code, i.e., the "groupby(['ticker','slice']).unit_size.sum().unstack(level=0)" in jupyter notebook, the code runs no problem.
any pointer/ some similar experiences on this odd one please? FWIW i'm running Pandas 1.2.4.

pyspark toPandas() IndexError: index is out of bounds

I'm experiencing a weird behaviour of pyspark's .toPandas() method running from Jupyt. For example, if I try this:
data = [{"Category": 'Category A', "ID": 1, "Value": 12.40},
{"Category": 'Category B', "ID": 2, "Value": 30.10},
{"Category": 'Category C', "ID": 3, "Value": 100.01}
]
# Create data frame (where spark is a SparkSession)
df = spark.createDataFrame(data)
df.show()
I'm able to successfully create the pyspark dataframe. However, when converting to pandas I get IndexError: index is out of bounds:
IndexError Traceback (most recent call last)
<path_to_python>/lib/python3.7/site-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
<path_to_python>/lib/python3.7/site-packages/IPython/lib/pretty.py in pretty(self, obj)
400 if cls is not object \
401 and callable(cls.__dict__.get('__repr__')):
--> 402 return _repr_pprint(obj, self, cycle)
403
404 return _default_pprint(obj, self, cycle)
<path_to_python>/lib/python3.7/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
695 """A pprint that just redirects to the normal repr function."""
696 # Find newlines and replace them with p.break_()
--> 697 output = repr(obj)
698 for idx,output_line in enumerate(output.splitlines()):
699 if idx:
<path_to_python>/lib/python3.7/site-packages/pandas/core/base.py in __repr__(self)
76 Yields Bytestring in Py2, Unicode String in py3.
77 """
---> 78 return str(self)
79
80
<path_to_python>/lib/python3.7/site-packages/pandas/core/base.py in __str__(self)
55
56 if compat.PY3:
---> 57 return self.__unicode__()
58 return self.__bytes__()
59
<path_to_python>/lib/python3.7/site-packages/pandas/core/frame.py in __unicode__(self)
632 width = None
633 self.to_string(buf=buf, max_rows=max_rows, max_cols=max_cols,
--> 634 line_width=width, show_dimensions=show_dimensions)
635
636 return buf.getvalue()
<path_to_python>/lib/python3.7/site-packages/pandas/core/frame.py in to_string(self, buf, columns, col_space, header, index, na_rep, formatters, float_format, sparsify, index_names, justify, max_rows, max_cols, show_dimensions, decimal, line_width)
719 decimal=decimal,
720 line_width=line_width)
--> 721 formatter.to_string()
722
723 if buf is None:
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in to_string(self)
596 else:
597
--> 598 strcols = self._to_str_columns()
599 if self.line_width is None: # no need to wrap around just print
600 # the whole frame
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _to_str_columns(self)
527 str_columns = [[label] for label in self.header]
528 else:
--> 529 str_columns = self._get_formatted_column_labels(frame)
530
531 stringified = []
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _get_formatted_column_labels(self, frame)
770 need_leadsp[x] else x]
771 for i, (col, x) in enumerate(zip(columns,
--> 772 fmt_columns))]
773
774 if self.show_row_idx_names:
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in <listcomp>(.0)
769 str_columns = [[' ' + x if not self._get_formatter(i) and
770 need_leadsp[x] else x]
--> 771 for i, (col, x) in enumerate(zip(columns,
772 fmt_columns))]
773
<path_to_python>/lib/python3.7/site-packages/pandas/io/formats/format.py in _get_formatter(self, i)
362 else:
363 if is_integer(i) and i not in self.columns:
--> 364 i = self.columns[i]
365 return self.formatters.get(i, None)
366
<path_to_python>/lib/python3.7/site-packages/pandas/core/indexes/base.py in __getitem__(self, key)
3956 if is_scalar(key):
3957 key = com.cast_scalar_indexer(key)
-> 3958 return getitem(key)
3959
3960 if isinstance(key, slice):
IndexError: index 3 is out of bounds for axis 0 with size 3
I'm not sure where the problem can be, I've used this many times without problems but this time I tried a new environment and I got this issue. In case it can help my configuration is:
Python: 3.7.6;
Pandas: 0.24.2;
PySpark: 2.4.5
Any idea?
Thanks :)
I found the issue. Trying to minimize the code to reproduce the error I omitted that I was adding a pandas setting:
pd.set_option('display.max_columns', -1)
This caused the error independently of the dataframe being converted. To fix it I just specified a positive number of columns or None.

Folium Choropleth + GeoJSON raises AttributeError: 'NoneType'

I'm trying to do a choropleth using folium which offers a great link between GeoJSON, Pandas and leaflet.
GeoJSON format is like below :
{
"type":"FeatureCollection",
"features":[
{
"type":"Feature",
"geometry":
{
"type":"Polygon",
"coordinates":[[[-1.6704591323124895,49.62681486270549], .....
{
"insee":"50173",
"nom":"Équeurdreville-Hainneville",
"wikipedia":"fr:Équeurdreville-Hainneville",
"surf_m2":12940306}},
Pandas DataFrame :
postal_count.head(5)
Out[98]:
Code_commune_INSEE CP_count
0 75120 723
1 75115 698
2 75112 671
3 75118 627
4 75111 622
"Code_communes_INSEE" corresponds to the attribute "insee" in the GeoJSON. I'd like to do a choropleth using the variable "CP_count" in the above DataFrame.
Here is my code (snippet from this notebook)
map_france = folium.Map(location=[47.000000, 2.000000], zoom_start=6)
map_france.choropleth(
geo_str=open(geo_path + 'simplified_communes100m.json').read(),
data=postal_count,
columns=['Code_commune_INSEE', 'CP_count'],
key_on='feature.geometry.properties.insee',
fill_color='YlGn',
)
map_france.save(table_path + 'choro_test1.html')
I'm still getting this error again and again :
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-83-ea0fd2c1c207> in <module>()
8 fill_color='YlGn',
9 )
---> 10 map_france.save('/media/flo/Stockage/Data/MesAides/map/choro_test1.html')
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in save(self, outfile, close_file, **kwargs)
151
152 root = self.get_root()
--> 153 html = root.render(**kwargs)
154 fid.write(html.encode('utf8'))
155 if close_file:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
357 """Renders the HTML representation of the element."""
358 for name, child in self._children.items():
--> 359 child.render(**kwargs)
360 return self._template.render(this=self, kwargs=kwargs)
361
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
665
666 for name, element in self._children.items():
--> 667 element.render(**kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/element.py in render(self, **kwargs)
661 script = self._template.module.__dict__.get('script', None)
662 if script is not None:
--> 663 figure.script.add_children(Element(script(self, kwargs)),
664 name=self.get_name())
665
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in __call__(self, *args, **kwargs)
434 raise TypeError('macro %r takes not more than %d argument(s)' %
435 (self.name, len(self.arguments)))
--> 436 return self._func(*arguments)
437
438 def __repr__(self):
<template> in macro(l_this, l_kwargs)
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/jinja2/runtime.py in call(_Context__self, _Context__obj, *args, **kwargs)
194 args = (__self.environment,) + args
195 try:
--> 196 return __obj(*args, **kwargs)
197 except StopIteration:
198 return __self.environment.undefined('value was undefined because '
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/features.py in style_data(self)
352
353 for feature in self.data['features']:
--> 354 feature.setdefault('properties', {}).setdefault('style', {}).update(self.style_function(feature)) # noqa
355 return json.dumps(self.data, sort_keys=True)
356
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in style_function(x)
671 "color": line_color,
672 "fillOpacity": fill_opacity,
--> 673 "fillColor": color_scale_fun(x)
674 }
675
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in color_scale_fun(x)
659 def color_scale_fun(x):
660 return color_range[len(
--> 661 [u for u in color_domain if
662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in <listcomp>(.0)
660 return color_range[len(
661 [u for u in color_domain if
--> 662 u <= color_data[get_by_key(x, key_on)]])]
663 else:
664 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
--> 657 '.'.join(key.split('.')[1:])))
658
659 def color_scale_fun(x):
/home/flo/.virtualenvs/mesaides/lib/python3.4/site-packages/folium/folium.py in get_by_key(obj, key)
653
654 def get_by_key(obj, key):
--> 655 return (obj.get(key, None) if len(key.split('.')) <= 1 else
656 get_by_key(obj.get(key.split('.')[0], None),
657 '.'.join(key.split('.')[1:])))
AttributeError: 'NoneType' object has no attribute 'get'
I tried playing with key_on='feature.geometry.properties.insee' without any success.
There were 2 problems :
1 - The correct access to 'insee' parameters is : key_on='feature.properties.insee'
The best way to find the right key_on is to play with the geoJSON dict to make sure you are calling the right properties.
2- Once you have the right key_on parameters, you need to make sure that all the available keys in the geoJSON are contained in your Pandas DataFrame (otherwise it will raise a KeyError)
In this case, I used the following command line to get all the insee keys contained by my geoJSON:
ogrinfo -ro -al communes-20150101-100m.shp -geom=NO | grep insee > list_code_insee.txt
If you are experiencing the same issue, this should solve your problem.
I had the same problem on JupyterLab (on labs.cognitiveclass.ai) using Folium 0.5.0. Then I copied my code and ran it in PyCharm, and it worked! I don't understand why, perhaps there is some backend issue (?)
If you want to display a folium map outside of a Jupyter notebook, you have to save the map to html:
map_france.save('map_france.html')
and open the html in your browser.

SocketError: Connection refused using py2neo to a remote server

While this issue has been addressed a couple of times in the past at SO and I tried all the suggestions, the problem still remains and I am hoping someone would shine a light on this.
My company set up a neo4j (v2.1.6) graph database at a remote ubuntu server.
In order to modify and update data into the server, I am using a python package, py2neo.
The server's endpoint address is http://fake-address.com/db/data and the authentication Id/password are 'fakeId' and 'fakePassWord'.
In order to access the remote database, in my local machine python terminal,
I tried the following:
from py2neo import authenticate, Graph
authenticate("fake-address.com:80", "fakeId", "fakePassWord")
graph = Graph("http://fake-address.com:80/db/data/")
result = graph.cypher.execute("CREATE (a:Color)")
Unfortunately, the above command resulted in the following error message.
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/cypher/core.pyc in execute(self, statement, parameters, **kwparameters)
107 """
108 if self.transaction_uri:
--> 109 tx = CypherTransaction(self.transaction_uri)
110 tx.append(statement, parameters, **kwparameters)
111 results = tx.commit()
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/cypher/core.pyc in __init__(self, uri)
180 self.__commit = None
181 self.__finished = False
--> 182 self.graph = self.__begin.graph
183
184 def __enter__(self):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in graph(self)
211 :rtype: :class:`.Graph`
212 """
--> 213 return self.__service_root.graph
214
215 #property
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in graph(self)
523 if self.__graph is None:
524 try:
--> 525 uri = self.resource.metadata["data"]
526 except KeyError:
527 if "authentication" in self.resource.metadata:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in metadata(self)
226 if self.__initial_metadata is not None:
227 return self.__initial_metadata
--> 228 self.get()
229 return self.__last_get_response.content
230
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/core.pyc in get(self, headers, redirect_limit, **kwargs)
271 kwargs.update(cache=True)
272 try:
--> 273 response = self.__base.get(headers=headers, redirect_limit=redirect_limit, **kwargs)
274 except (ClientError, ServerError) as error:
275 if error.status_code == UNAUTHORIZED:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in get(self, if_modified_since, headers, redirect_limit, **kwargs)
964 object from which content can be read
965 """
--> 966 return self.__get_or_head("GET", if_modified_since, headers, redirect_limit, **kwargs)
967
968 def put(self, body=None, headers=None, **kwargs):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in __get_or_head(self, method, if_modified_since, headers, redirect_limit, **kwargs)
941 headers["If-Modified-Since"] = formatdate(datetime_to_timestamp(if_modified_since), usegmt=True)
942 rq = Request(method, self.uri, None, headers)
--> 943 return rq.submit(redirect_limit=redirect_limit, **kwargs)
944
945 def head(self, if_modified_since=None, headers=None, redirect_limit=5, **kwargs):
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in submit(self, redirect_limit, **response_kwargs)
431 uri = self.uri
432 while True:
--> 433 http, rs = submit(self.method, uri, self.body, self.headers)
434 status_class = rs.status // 100
435 if status_class == 3:
/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/py2neo/packages/httpstream/http.pyc in submit(method, uri, body, headers)
360 host_port=uri.host_port)
361 else:
--> 362 raise SocketError(code, description, host_port=uri.host_port)
363 else:
364 return http, response
SocketError: Connection refused
Sorry about the long error message and I greatly appreciate any suggestions.
The issue has been resolved. The problem must have been in the authentication in py2neo. With the newest version of py2neo (2.0.4), the connection was properly made.