Pandas can't read in excel file - pandas

Something is wrong with my pandas module. I tried to read in an excel file using the following code, which works on my classmate's computer, but it's giving me an error on my computer:
FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1')
The file named 'FFT1.xlsx' is in the same directory as my jupyter notebook. The error message says:
XLRDError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7436/2793485739.py in <module>
----> 1 FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1')
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
302
303 if not isinstance(io, ExcelFile):
--> 304 io = ExcelFile(io, engine=engine)
305 elif engine and engine != io.engine:
306 raise ValueError(
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in __init__(self, io, engine)
819 self._io = stringify_path(io)
820
--> 821 self._reader = self._engines[engine](self._io)
822
823 def __fspath__(self):
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer)
19 err_msg = "Install xlrd >= 1.0.0 for Excel support"
20 import_optional_dependency("xlrd", extra=err_msg)
---> 21 super().__init__(filepath_or_buffer)
22
23 #property
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer)
351 self.book = self.load_workbook(filepath_or_buffer)
352 elif isinstance(filepath_or_buffer, str):
--> 353 self.book = self.load_workbook(filepath_or_buffer)
354 elif isinstance(filepath_or_buffer, bytes):
355 self.book = self.load_workbook(BytesIO(filepath_or_buffer))
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer)
34 return open_workbook(file_contents=data)
35 else:
---> 36 return open_workbook(filepath_or_buffer)
37
38 #property
D:\Softwares\Anaconda\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows, ignore_workbook_corruption)
168 # files that xlrd can parse don't start with the expected signature.
169 if file_format and file_format != 'xls':
--> 170 raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')
171
172 bk = open_workbook_xls(
XLRDError: Excel xlsx file; not supported
How should I fix this?

Make sure that you already install openpyxl, if you don't try
pip install openpyxl
Change your code to
FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1',engine='openpyxl')

Related

unable to read csv file in jupyter notebook and following errors coming

import pandas as pd
df = pd.read_csv('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')
df
​
UnicodeDecodeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_18872\1985582496.py in
1 import pandas as pd
----> 2 df = pd.read_csv('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')
3 df
C:\ProgramData\Anaconda3\lib\site-packages\pandas\util_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
676 kwds.update(kwds_defaults)
677
--> 678 return _read(filepath_or_buffer, kwds)
679
680
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _read(filepath_or_buffer, kwds)
573
574 # Create the parser.
--> 575 parser = TextFileReader(filepath_or_buffer, **kwds)
576
577 if chunksize or iterator:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in init(self, f, engine, **kwds)
930
931 self.handles: IOHandles | None = None
--> 932 self._engine = self._make_engine(f, self.engine)
933
934 def close(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\readers.py in _make_engine(self, f, engine)
1232
1233 try:
-> 1234 return mapping[engine](f, **self.options)
1235 except Exception:
1236 if self.handles is not None:
C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py in init(self, src, **kwds)
73
74 kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
---> 75 self._reader = parsers.TextReader(src, **kwds)
76
77 self.unnamed_cols = self._reader.unnamed_cols
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader.cinit()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader._get_header()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()
C:\ProgramData\Anaconda3\lib\site-packages\pandas_libs\parsers.pyx in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xaa in position 14: invalid start byte
​
i tried some options using youtube but not working
Looking at the very end of the file extension, you're importing a .xlsx file, not a CSV file.
Try opening the file on Excel and export it as a CSV. You need to make sure that .csv is the last characters of the file.
I think you need to use XLSX function for load and read the XLSX file inside pandas.
For that you need to use this line of code inside your code:
import pandas as pd
df = pd.read_excel('D:\Tableau\codebasics_files\Weather_data.csv.xlsx')

AttributeError: 'Adam' object has no attribute 'get_weights'

I am pretty new to tensorflow Keras and there is a Problem Running Cross Validation that I could not fix. It all worked before I installed featurewiz (conda install -c conda-forge featurewiz).
from sklearn.model_selection import KFold, cross_validate, cross_val_score
from scikeras.wrappers import KerasClassifier
estimator = KerasClassifier(model, epochs=500, batch_size=10) #, verbose = 0
kfold = KFold(n_splits=5, shuffle=True)
results = cross_validate(estimator, X, y, cv=kfold, scoring=['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted'], return_train_score=True)
print(results)
Error:
WARNING:absl:Found untraced functions such as _update_step_xla while saving (showing 1 of 1). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ram:///var/folders/c4/ywdtx99d1vl0ptsg1fy494_40000gn/T/tmpsuvxkjb9/assets
INFO:tensorflow:Assets written to: ram:///var/folders/c4/ywdtx99d1vl0ptsg1fy494_40000gn/T/tmpsuvxkjb9/assets
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:862, in Parallel.dispatch_one_batch(self, iterator)
861 try:
--> 862 tasks = self._ready_batches.get(block=False)
863 except queue.Empty:
864 # slice the iterator n_jobs * batchsize items at a time. If the
865 # slice returns less than that, then the current batchsize puts
(...)
868 # accordingly to distribute evenly the last items between all
869 # workers.
File ~/tensorflow-test/env/lib/python3.8/queue.py:167, in Queue.get(self, block, timeout)
166 if not self._qsize():
--> 167 raise Empty
168 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
AttributeError Traceback (most recent call last)
Cell In[5], line 6
4 estimator = KerasClassifier(model, epochs=500, batch_size=10) #, verbose = 0
5 kfold = KFold(n_splits=5, shuffle=True) #seed, damit shuffle gleich bleibt , random_state=1337
----> 6 results = cross_validate(estimator, X, y, cv=kfold, scoring=['accuracy', 'precision_weighted', 'recall_weighted', 'f1_weighted'], return_train_score=True)
8 print(results)
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:266, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
263 # We clone the estimator to make sure that all the folds are
264 # independent, and that it is pickle-able.
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 266 results = parallel(
267 delayed(_fit_and_score)(
268 clone(estimator),
269 X,
270 y,
271 scorers,
272 train,
273 test,
274 verbose,
275 None,
276 fit_params,
277 return_train_score=return_train_score,
278 return_times=True,
279 return_estimator=return_estimator,
280 error_score=error_score,
281 )
282 for train, test in cv.split(X, y, groups)
283 )
285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:1085, in Parallel.__call__(self, iterable)
1076 try:
1077 # Only set self._iterating to True if at least a batch
1078 # was dispatched. In particular this covers the edge
(...)
1082 # was very quick and its callback already dispatched all the
1083 # remaining jobs.
1084 self._iterating = False
-> 1085 if self.dispatch_one_batch(iterator):
1086 self._iterating = self._original_iterator is not None
1088 while self.dispatch_one_batch(iterator):
File ~/tensorflow-test/env/lib/python3.8/site-packages/joblib/parallel.py:873, in Parallel.dispatch_one_batch(self, iterator)
870 n_jobs = self._cached_effective_n_jobs
871 big_batch_size = batch_size * n_jobs
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
874 if len(islice) == 0:
875 return False
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py:268, in <genexpr>(.0)
263 # We clone the estimator to make sure that all the folds are
264 # independent, and that it is pickle-able.
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
266 results = parallel(
267 delayed(_fit_and_score)(
--> 268 clone(estimator),
269 X,
270 y,
271 scorers,
272 train,
273 test,
274 verbose,
275 None,
276 fit_params,
277 return_train_score=return_train_score,
278 return_times=True,
279 return_estimator=return_estimator,
280 error_score=error_score,
281 )
282 for train, test in cv.split(X, y, groups)
283 )
285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/base.py:89, in clone(estimator, safe)
87 new_object_params = estimator.get_params(deep=False)
88 for name, param in new_object_params.items():
---> 89 new_object_params[name] = clone(param, safe=False)
90 new_object = klass(**new_object_params)
91 params_set = new_object.get_params(deep=False)
File ~/tensorflow-test/env/lib/python3.8/site-packages/sklearn/base.py:70, in clone(estimator, safe)
68 elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
69 if not safe:
---> 70 return copy.deepcopy(estimator)
71 else:
72 if isinstance(estimator, type):
File ~/tensorflow-test/env/lib/python3.8/copy.py:153, in deepcopy(x, memo, _nil)
151 copier = getattr(x, "__deepcopy__", None)
152 if copier is not None:
--> 153 y = copier(memo)
154 else:
155 reductor = dispatch_table.get(cls)
File ~/tensorflow-test/env/lib/python3.8/site-packages/scikeras/_saving_utils.py:117, in deepcopy_model(model, memo)
116 def deepcopy_model(model: keras.Model, memo: Dict[Hashable, Any]) -> keras.Model:
--> 117 _, (model_bytes, optimizer_weights) = pack_keras_model(model)
118 new_model = unpack_keras_model(model_bytes, optimizer_weights)
119 memo[model] = new_model
File ~/tensorflow-test/env/lib/python3.8/site-packages/scikeras/_saving_utils.py:108, in pack_keras_model(model)
106 optimizer_weights = None
107 if model.optimizer is not None:
--> 108 optimizer_weights = model.optimizer.get_weights()
109 model_bytes = np.asarray(memoryview(b.read()))
110 return (
111 unpack_keras_model,
112 (model_bytes, optimizer_weights),
113 )
AttributeError: 'Adam' object has no attribute 'get_weights'
I created a Tensorflow enviroment on my M1 Macbook following https://github.com/mrdbourke/m1-machine-learning-test.
It all worked, I got following results:
TensorFlow has access to the following devices:
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
TensorFlow version: 2.11.0
I also installed featurewiz, I am not sure if there are some Problems installing it (I did conda install -c conda-forge featurewiz)
SciKeras doesn't work with TensorFlow 2.11. The TensorFlow team release a breaking change in a minor version bump (they removed the get_weights() method). It will be fixed in SciKeras soon: https://github.com/adriangb/scikeras/pull/287
Edit: that PR was merged so the new version of SciKeras (v0.10.0) should solve this issue.

AttributeError: module 'pandas' has no attribute 'Float64Dtype'

I am using Azure ML Notebook to run both python and R code in notebook cells. I have installed python packages pandas 1.1.5 and rpy2 3.5.4 but when I run the following lines of code:
%load_ext rpy2.ipython
print("ankit")
I get the following error message:
AttributeError: module 'pandas' has no attribute 'Float64Dtype'
Can anyone please help to resolve this error ? Any help would be appreciated.
Complete error is as following:
AttributeError Traceback (most recent call last)
Input In [10], in <cell line: 1>()
----> 1 get_ipython().run_line_magic('load_ext', 'rpy2.ipython')
2 print("ankit")
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/IPython/core/interactiveshell.py:2305, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2303 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2304 with self.builtin_trap:
-> 2305 result = fn(*args, **kwargs)
2306 return result
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/IPython/core/magics/extension.py:33, in ExtensionMagics.load_ext(self, module_str)
31 if not module_str:
32 raise UsageError('Missing module name.')
---> 33 res = self.shell.extension_manager.load_extension(module_str)
35 if res == 'already loaded':
36 print("The %s extension is already loaded. To reload it, use:" % module_str)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/IPython/core/extensions.py:76, in ExtensionManager.load_extension(self, module_str)
69 """Load an IPython extension by its module name.
70
71 Returns the string "already loaded" if the extension is already loaded,
72 "no load function" if the module doesn't have a load_ipython_extension
73 function, or None if it succeeded.
74 """
75 try:
---> 76 return self._load_extension(module_str)
77 except ModuleNotFoundError:
78 if module_str in BUILTINS_EXTS:
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/IPython/core/extensions.py:92, in ExtensionManager._load_extension(self, module_str)
90 if module_str not in sys.modules:
91 with prepended_to_syspath(self.ipython_extension_dir):
---> 92 mod = import_module(module_str)
93 if mod.__file__.startswith(self.ipython_extension_dir):
94 print(("Loading extensions from {dir} is deprecated. "
95 "We recommend managing extensions like any "
96 "other Python packages, in site-packages.").format(
97 dir=compress_user(self.ipython_extension_dir)))
File /anaconda/envs/azureml_py38/lib/python3.8/importlib/__init__.py:127, in import_module(name, package)
125 break
126 level += 1
--> 127 return _bootstrap._gcd_import(name[level:], package, level)
File <frozen importlib._bootstrap>:1014, in _gcd_import(name, package, level)
File <frozen importlib._bootstrap>:991, in _find_and_load(name, import_)
File <frozen importlib._bootstrap>:975, in _find_and_load_unlocked(name, import_)
File <frozen importlib._bootstrap>:671, in _load_unlocked(spec)
File <frozen importlib._bootstrap_external>:783, in exec_module(self, module)
File <frozen importlib._bootstrap>:219, in _call_with_frames_removed(f, *args, **kwds)
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/rpy2/ipython/__init__.py:1, in <module>
----> 1 from . import rmagic
3 load_ipython_extension = rmagic.load_ipython_extension
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/rpy2/ipython/rmagic.py:121, in <module>
116 def _get_converter(template_converter=template_converter):
117 return Converter('ipython conversion',
118 template=template_converter)
--> 121 ipy_template_converter = _get_ipython_template_converter(template_converter,
122 numpy=numpy,
123 pandas=pandas)
124 converter = _get_converter(template_converter=ipy_template_converter)
127 def CELL_DISPLAY_DEFAULT(res, args):
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/rpy2/ipython/rmagic.py:111, in _get_ipython_template_converter(template_converter, numpy, pandas)
109 template_converter += numpy2ri.converter
110 if pandas:
--> 111 from rpy2.robjects import pandas2ri
112 template_converter += pandas2ri.converter
113 return template_converter
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:144, in <module>
125 v = math.nan
126 set_elt(r_vector, i, cast_value(v))
129 _PANDASTYPE2RPY2 = {
130 datetime.date: DateVector,
131 int: functools.partial(
132 IntVector.from_iterable,
133 populate_func=_int_populate_r_vector
134 ),
135 pandas.BooleanDtype: functools.partial(
136 BoolVector.from_iterable,
137 populate_func=_bool_populate_r_vector
138 ),
139 None: BoolVector,
140 str: functools.partial(
141 StrVector.from_iterable,
142 populate_func=_str_populate_r_vector
143 ),
--> 144 pandas.Float64Dtype: functools.partial(
145 FloatVector.from_iterable,
146 populate_func=_float_populate_r_vector
147 ),
148 bytes: (numpy2ri.converter.py2rpy.registry[
149 numpy.ndarray
150 ])
151 }
154 #py2rpy.register(pandas.core.series.Series)
155 def py2rpy_pandasseries(obj):
156 if obj.dtype.name == 'O':
File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/pandas/__init__.py:258, in __getattr__(name)
254 from pandas.core.arrays.sparse import SparseArray as _SparseArray
256 return _SparseArray
--> 258 raise AttributeError(f"module 'pandas' has no attribute '{name}'")
AttributeError: module 'pandas' has no attribute 'Float64Dtype'
I replicate your situation on my system and it works.
You can use this set of library versions:
R version: 4.2.1
rpy2 version: 3.4.5
Pandas version: 1.3.5

Cannot load lvis via tfds

I am trying to load built-in dataset lvis. It turns out that the tfds and lvis should be imported and installed respectively, however, I did possible all, it still does not work.
import os
import tensorflow as tf
from matplotlib import pyplot as plt
%matplotlib inline
!pip install lvis
!pip install tfds-nightly
import tensorflow_datasets as tfds
train_data, info = tfds.load('lvis', split='train', as_supervised=True, with_info=True)
validation_data = tfds.load('lvis', split='validation', as_supervised=True)
test_data = tfds.load('lvis', split='test', as_supervised=True)
There are some odd outputs after running upon codes in colab.
otFoundError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in try_reraise(*args, **kwargs)
391 try:
--> 392 yield
393 except Exception as e: # pylint: disable=broad-except
15 frames
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in builder(name, try_gcs, **builder_kwargs)
167 with py_utils.try_reraise(prefix=f'Failed to construct dataset {name}: '):
--> 168 return cls(**builder_kwargs) # pytype: disable=not-instantiable
169
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in __init__(self, file_format, **kwargs)
917 """
--> 918 super().__init__(**kwargs)
919 self.info.set_file_format(file_format)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in __init__(self, data_dir, config, version)
184 else: # Use the code version (do not restore data)
--> 185 self.info.initialize_from_bucket()
186
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in __get__(self, obj, objtype)
145 if cached is None:
--> 146 cached = self.fget(obj) # pytype: disable=attribute-error
147 setattr(obj, attr, cached)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in info(self)
328 "the restored dataset.")
--> 329 info = self._info()
330 if not isinstance(info, dataset_info.DatasetInfo):
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis.py in _info(self)
94 names_file=tfds.core.tfds_path(
---> 95 'object_detection/lvis/lvis_classes.txt'))
96 return tfds.core.DatasetInfo(
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/features/class_label_feature.py in __init__(self, num_classes, names, names_file)
67 else:
---> 68 self.names = _load_names_from_file(names_file)
69
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/features/class_label_feature.py in _load_names_from_file(names_filepath)
198 name.strip()
--> 199 for name in tf.compat.as_text(f.read()).split("\n")
200 if name.strip() # Filter empty names
/usr/local/lib/python3.7/dist-packages/tensorflow/python/lib/io/file_io.py in read(self, n)
116 """
--> 117 self._preread_check()
118 if n == -1:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/lib/io/file_io.py in _preread_check(self)
79 self._read_buf = _pywrap_file_io.BufferedInputStream(
---> 80 compat.path_to_str(self.__name), 1024 * 512)
81
NotFoundError: /usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis_classes.txt; No such file or directory
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-4-b8c819fe5c62> in <module>()
----> 1 train_data, info = tfds.load('lvis', split='train', as_supervised=True, with_info=True)
2 validation_data = tfds.load('lvis', split='validation', as_supervised=True)
3 test_data = tfds.load('lvis', split='test', as_supervised=True)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
315 builder_kwargs = {}
316
--> 317 dbuilder = builder(name, data_dir=data_dir, try_gcs=try_gcs, **builder_kwargs)
318 if download:
319 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in builder(name, try_gcs, **builder_kwargs)
166 if cls:
167 with py_utils.try_reraise(prefix=f'Failed to construct dataset {name}: '):
--> 168 return cls(**builder_kwargs) # pytype: disable=not-instantiable
169
170 # If neither the code nor the files are found, raise DatasetNotFoundError
/usr/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in try_reraise(*args, **kwargs)
392 yield
393 except Exception as e: # pylint: disable=broad-except
--> 394 reraise(e, *args, **kwargs)
395
396
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in reraise(e, prefix, suffix)
359 else:
360 exception = RuntimeError(f'{type(e).__name__}: {msg}')
--> 361 raise exception from e
362 # Otherwise, modify the exception in-place
363 elif len(e.args) <= 1:
RuntimeError: NotFoundError: Failed to construct dataset lvis: /usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis_classes.txt; No such file or directory
This is what I did to get it to work on Colab Notebook:
!pip install -q tfds-nightly tensorflow tensorflow-datasets matplotlib lvis pycocotools apache_beam
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
Since the tfds object detection lvis folder isn't up to date, I deleted that folder then redownloaded it from the tfds github page.
First install github-clone so we can download specific repo subfolders
!pip install github-clone
Then remove the lvis folder and redownload it from github:
!rm -rf ../usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis
!ghclone https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/object_detection/lvis
!mv ./lvis ../usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/
After that I could get it to work, this next chunk of code worked for me:
ds, info = tfds.load('lvis', split='train[:25%]', with_info=True,
data_dir= '../content/tensorflow_datasets/',
decoders=tfds.decode.PartialDecoding({
'image': True,
'features': tfds.features.FeaturesDict({'image/id':True,
'objects':tfds.features.Sequence({
'id': True,
'bbox': True,
'label': tfds.features.ClassLabel(names=['skateboard','shoe'])
})
})
})
)

IPython Notebook: What is the default encoding?

I have created a package using the encoding utf-8.
When calling a function, it returns a DataFrame, with a column coded in utf-8.
When using IPython at the command line, I don't have any problems showing the content of this table. When using the Notebook, it crashes with the error 'utf8' codec can't decode byte 0xe7. I've attached a full traceback below.
What is the proper encoding to work with Notebook?
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-13-92c0011919e7> in <module>()
3 ver = verif.VerificacaoNA()
4 comp, total = ver.executarCompRealFisica(DT_INI, DT_FIN)
----> 5 comp
c:\Python27-32\lib\site-packages\ipython-0.13.1-py2.7.egg\IPython\core\displayhook.pyc in __call__(self, result)
240 self.update_user_ns(result)
241 self.log_output(format_dict)
--> 242 self.finish_displayhook()
243
244 def flush(self):
c:\Python27-32\lib\site-packages\ipython-0.13.1-py2.7.egg\IPython\zmq\displayhook.pyc in finish_displayhook(self)
59 sys.stdout.flush()
60 sys.stderr.flush()
---> 61 self.session.send(self.pub_socket, self.msg, ident=self.topic)
62 self.msg = None
63
c:\Python27-32\lib\site-packages\ipython-0.13.1-py2.7.egg\IPython\zmq\session.pyc in send(self, stream, msg_or_type, content, parent, ident, buffers, subheader, track, header)
557
558 buffers = [] if buffers is None else buffers
--> 559 to_send = self.serialize(msg, ident)
560 flag = 0
561 if buffers:
c:\Python27-32\lib\site-packages\ipython-0.13.1-py2.7.egg\IPython\zmq\session.pyc in serialize(self, msg, ident)
461 content = self.none
462 elif isinstance(content, dict):
--> 463 content = self.pack(content)
464 elif isinstance(content, bytes):
465 # content is already packed, as in a relayed message
c:\Python27-32\lib\site-packages\ipython-0.13.1-py2.7.egg\IPython\zmq\session.pyc in <lambda>(obj)
76
77 # ISO8601-ify datetime objects
---> 78 json_packer = lambda obj: jsonapi.dumps(obj, default=date_default)
79 json_unpacker = lambda s: extract_dates(jsonapi.loads(s))
80
c:\Python27-32\lib\site-packages\pyzmq-13.0.0-py2.7-win32.egg\zmq\utils\jsonapi.pyc in dumps(o, **kwargs)
70 kwargs['separators'] = (',', ':')
71
---> 72 return _squash_unicode(jsonmod.dumps(o, **kwargs))
73
74 def loads(s, **kwargs):
c:\Python27-32\lib\json\__init__.pyc in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, encoding, default, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, encoding=encoding, default=default,
--> 238 **kw).encode(obj)
239
240
c:\Python27-32\lib\json\encoder.pyc in encode(self, o)
199 # exceptions aren't as detailed. The list call should be roughly
200 # equivalent to the PySequence_Fast that ''.join() would do.
--> 201 chunks = self.iterencode(o, _one_shot=True)
202 if not isinstance(chunks, (list, tuple)):
203 chunks = list(chunks)
c:\Python27-32\lib\json\encoder.pyc in iterencode(self, o, _one_shot)
262 self.key_separator, self.item_separator, self.sort_keys,
263 self.skipkeys, _one_shot)
--> 264 return _iterencode(o, 0)
265
266 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
UnicodeDecodeError: 'utf8' codec can't decode byte 0xe7 in position 199: invalid continuation byte
I had the same problem recently, and indeed setting the default encoding to UTF-8 did the trick:
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
Running sys.getdefaultencoding() yielded 'ascii' on my environment (Python 2.7.3), so I guess that's the default.
Also see this related question and Ian Bicking's blog post on the subject.