When I try the magic command %pylab I get the following error:
In [1]: %pylab
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
<ipython-input-1-5c1faa999e5b> in <module>()
----> 1 get_ipython().magic(u'pylab')
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2203 magic_name, _, magic_arg_s = arg_s.partition(' ')
2204 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2205 return self.run_line_magic(magic_name, magic_arg_s)
2206
2207 #-------------------------------------------------------------------------
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2124 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2125 with self.builtin_trap:
-> 2126 result = fn(*args,**kwargs)
2127 return result
2128
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/magics/pylab.pyc in pylab(self, line)
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/magics/pylab.pyc in pylab(self, line)
134 import_all = not args.no_import_all
135
--> 136 gui, backend, clobbered = self.shell.enable_pylab(args.gui, import_all=import_all)
137 self._show_matplotlib_backend(args.gui, backend)
138 print ("Populating the interactive namespace from numpy and matplotlib")
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in enable_pylab(self, gui, import_all, welcome_message)
2980 from IPython.core.pylabtools import import_pylab
2981
-> 2982 gui, backend = self.enable_matplotlib(gui)
2983
2984 # We want to prevent the loading of pylab to pollute the user's
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in enable_matplotlib(self, gui)
2941 gui, backend = pt.find_gui_and_backend(self.pylab_gui_select)
2942
-> 2943 pt.activate_matplotlib(backend)
2944 pt.configure_inline_support(self, backend)
2945
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/IPython/core/pylabtools.pyc in activate_matplotlib(backend)
287 matplotlib.rcParams['backend'] = backend
288
--> 289 import matplotlib.pyplot
290 matplotlib.pyplot.switch_backend(backend)
291
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/pyplot.p in <module>()
25
26 import matplotlib
---> 27 import matplotlib.colorbar
28 from matplotlib import style
29 from matplotlib import _pylab_helpers, interactive
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/colorbar.py in <module>()
32 import matplotlib.artist as martist
33 import matplotlib.cbook as cbook
---> 34 import matplotlib.collections as collections
35 import matplotlib.colors as colors
36 import matplotlib.contour as contour
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/collections.py in <module>()
25 import matplotlib.artist as artist
26 from matplotlib.artist import allow_rasterization
---> 27 import matplotlib.backend_bases as backend_bases
28 import matplotlib.path as mpath
29 from matplotlib import _path
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/backend_bases.py in <module>()
54
55 import matplotlib.tight_bbox as tight_bbox
---> 56 import matplotlib.textpath as textpath
57 from matplotlib.path import Path
58 from matplotlib.cbook import mplDeprecation
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/textpath.py in <module>()
17 from matplotlib.path import Path
18 from matplotlib import rcParams
---> 19 import matplotlib.font_manager as font_manager
20 from matplotlib.ft2font import FT2Font, KERNING_DEFAULT, LOAD_NO_HINTING
21 from matplotlib.ft2font import LOAD_TARGET_LIGHT
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py in <module>()
1410 verbose.report("Using fontManager instance from %s" % _fmcache)
1411 except:
-> 1412 _rebuild()
1413 else:
1414 _rebuild()
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py in _rebuild()
1395 def _rebuild():
1396 global fontManager
-> 1397 fontManager = FontManager()
1398 if _fmcache:
1399 pickle_dump(fontManager, _fmcache)
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py in __init__(self, size, weight)
1035 # Load TrueType fonts and create font dictionary.
1036
-> 1037 self.ttffiles = findSystemFonts(paths) + findSystemFonts()
1038 self.defaultFamily = {
1039 'ttf': 'Bitstream Vera Sans',
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py in findSystemFonts(fontpaths, fontext)
320 fontfiles[f] = 1
321
--> 322 for f in get_fontconfig_fonts(fontext):
323 fontfiles[f] = 1
324
/Users/rafaelrodrigues/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py in get_fontconfig_fonts(fontext)
272 pipe = subprocess.Popen(['fc-list', '--format=%{file}\\n'],
273 stdout=subprocess.PIPE,
--> 274 stderr=subprocess.PIPE)
275 output = pipe.communicate()[0]
276 except (OSError, IOError):
/Users/rafaelrodrigues/anaconda/lib/python2.7/subprocess.pyc in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags)
708 p2cread, p2cwrite,
709 c2pread, c2pwrite,
--> 710 errread, errwrite)
711 except Exception:
712 # Preserve original exception in case os.close raises.
/Users/rafaelrodrigues/anaconda/lib/python2.7/subprocess.pyc in _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, to_close, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite)
1325 raise
1326 child_exception = pickle.loads(data)
-> 1327 raise child_exception
1328
1329
UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 0: ordinal not in range(128)
I have uninstalled and installed matplotlib again many times. I am using the latest versions of IPython and Matplotlib in my Mac OS Mavericks.
What else should I try?
Tks
activate your conda environment if you're not using the default environment, then do conda install matplotlib
#Clarinetist:
try with fresh official Python installation.
setup ipython notebook server according to this guide:
http://ipython.org/ipython-doc/3/notebook/notebook.html
pylab is discouraged:
http://matplotlib.org/faq/usage_faq.html#matplotlib-pyplot-and-pylab-how-are-they-related
Related
I'm looking for a method to programmatically access an access database file (.mdb) using python in an Ubuntu environment (hosted on a Windows 10 machine) to export tables to pandas dataframes.
I've attempted the solutions previously posted, such as using pandas access, pyodbc, or mdbtools, however all have failed due to errors relating to missing tables or files in the packages.
Any feedback or insights would be much appreciated!
pyodc failure:
OperationalError Traceback (most recent call last)
Cell In[6], line 2
1 pypyodbc.lowercase = False
----> 2 conn = pypyodbc.connect(
3 r"Driver={Microsoft Access Driver (*.mdb, *.accdb)};" +
4 r"/home/kevin/scratch/HDINEWTON.mdb;")
5 cur = conn.cursor()
6 # cur.execute("SELECT CreatureID, Name_EN, Name_JP FROM Creatures");
7 # while True:
8 # row = cur.fetchone()
(...)
11 # print(u"Creature with ID {0} is {1} ({2})".format(
12 # row.get("CreatureID"), row.get("Name_EN"), row.get("Name_JP")))
File ~/miniconda3/envs/mdb_access/lib/python3.11/site-packages/pypyodbc.py:2454, in Connection.__init__(self, connectString, autocommit, ansi, timeout, unicode_results, readonly, **kargs)
2450 if self.connection_timeout != 0:
2451 self.set_connection_timeout(connection_timeout)
-> 2454 self.connect(connectString, autocommit, ansi, timeout, unicode_results, readonly)
File ~/miniconda3/envs/mdb_access/lib/python3.11/site-packages/pypyodbc.py:2507, in Connection.connect(self, connectString, autocommit, ansi, timeout, unicode_results, readonly)
2505 else:
2506 ret = odbc_func(self.dbc_h, 0, c_connectString, len(self.connectString), None, 0, None, SQL_DRIVER_NOPROMPT)
-> 2507 check_success(self, ret)
2510 # Set the connection's attribute of "autocommit"
2511 #
2512 self.autocommit = autocommit
File ~/miniconda3/envs/mdb_access/lib/python3.11/site-packages/pypyodbc.py:1009, in check_success(ODBC_obj, ret)
1007 ctrl_err(SQL_HANDLE_STMT, ODBC_obj.stmt_h, ret, ODBC_obj.ansi)
1008 elif isinstance(ODBC_obj, Connection):
-> 1009 ctrl_err(SQL_HANDLE_DBC, ODBC_obj.dbc_h, ret, ODBC_obj.ansi)
1010 else:
1011 ctrl_err(SQL_HANDLE_ENV, ODBC_obj, ret, False)
File ~/miniconda3/envs/mdb_access/lib/python3.11/site-packages/pypyodbc.py:983, in ctrl_err(ht, h, val_ret, ansi)
981 raise NotSupportedError(state,err_text)
982 elif state in (raw_s('HYT00'),raw_s('HYT01'),raw_s('01000')):
--> 983 raise OperationalError(state,err_text)
984 elif state[:2] in (raw_s('IM'),raw_s('HY')):
985 raise Error(state,err_text)
OperationalError: ('01000', "[01000] [unixODBC][Driver Manager]Can't open lib 'Microsoft Access Driver (*.mdb, *.accdb)' : file not found")
pandas access failure:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
Cell In[4], line 2
1 path_to_mdb_database = '/home/kevin/scratch/HDINEWTON.mdb'
----> 2 mdb.list_tables(path_to_mdb_database)
File ~/miniconda3/envs/mdb_access/lib/python3.11/site-packages/pandas_access/__init__.py:25, in list_tables(rdb_file, encoding)
17 def list_tables(rdb_file, encoding="latin-1"):
18 """
19 :param rdb_file: The MS Access database file.
20 :param encoding: The content encoding of the output. I assume `latin-1`
(...)
23 :return: A list of the tables in a given database.
24 """
---> 25 tables = subprocess.check_output(['mdb-tables', rdb_file]).decode(encoding)
26 return tables.strip().split(" ")
File ~/miniconda3/envs/mdb_access/lib/python3.11/subprocess.py:465, in check_output(timeout, *popenargs, **kwargs)
462 empty = b''
463 kwargs['input'] = empty
--> 465 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
466 **kwargs).stdout
File ~/miniconda3/envs/mdb_access/lib/python3.11/subprocess.py:546, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
543 kwargs['stdout'] = PIPE
544 kwargs['stderr'] = PIPE
--> 546 with Popen(*popenargs, **kwargs) as process:
547 try:
548 stdout, stderr = process.communicate(input, timeout=timeout)
File ~/miniconda3/envs/mdb_access/lib/python3.11/subprocess.py:1022, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, user, group, extra_groups, encoding, errors, text, umask, pipesize, process_group)
1018 if self.text_mode:
1019 self.stderr = io.TextIOWrapper(self.stderr,
1020 encoding=encoding, errors=errors)
-> 1022 self._execute_child(args, executable, preexec_fn, close_fds,
1023 pass_fds, cwd, env,
1024 startupinfo, creationflags, shell,
1025 p2cread, p2cwrite,
1026 c2pread, c2pwrite,
1027 errread, errwrite,
1028 restore_signals,
1029 gid, gids, uid, umask,
1030 start_new_session, process_group)
1031 except:
1032 # Cleanup if the child failed starting.
1033 for f in filter(None, (self.stdin, self.stdout, self.stderr)):
File ~/miniconda3/envs/mdb_access/lib/python3.11/subprocess.py:1899, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, gid, gids, uid, umask, start_new_session, process_group)
1897 if errno_num != 0:
1898 err_msg = os.strerror(errno_num)
-> 1899 raise child_exception_type(errno_num, err_msg, err_filename)
1900 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: 'mdb-tables'
mdb tools failure:
ModuleNotFoundError Traceback (most recent call last)
Cell In[2], line 5
3 import os
4 import pandas_access as mdb
----> 5 import mdbtools
7 print('Imports Successful')
File ~/miniconda3/envs/mdb/lib/python3.11/site-packages/mdbtools/__init__.py:1
----> 1 from .MDBcli import *
2 from .MDBnetrc import *
3 from .MDBtool import *
File ~/miniconda3/envs/mdb/lib/python3.11/site-packages/mdbtools/MDBcli.py:21
19 import sys
20 import argparse
---> 21 from MDButils import *
23 # Stop Python from complaining when I/O pipes are closed
24 from signal import signal, SIGPIPE, SIG_DFL
ModuleNotFoundError: No module named 'MDButils'
Something is wrong with my pandas module. I tried to read in an excel file using the following code, which works on my classmate's computer, but it's giving me an error on my computer:
FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1')
The file named 'FFT1.xlsx' is in the same directory as my jupyter notebook. The error message says:
XLRDError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7436/2793485739.py in <module>
----> 1 FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1')
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, mangle_dupe_cols, **kwds)
302
303 if not isinstance(io, ExcelFile):
--> 304 io = ExcelFile(io, engine=engine)
305 elif engine and engine != io.engine:
306 raise ValueError(
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in __init__(self, io, engine)
819 self._io = stringify_path(io)
820
--> 821 self._reader = self._engines[engine](self._io)
822
823 def __fspath__(self):
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_xlrd.py in __init__(self, filepath_or_buffer)
19 err_msg = "Install xlrd >= 1.0.0 for Excel support"
20 import_optional_dependency("xlrd", extra=err_msg)
---> 21 super().__init__(filepath_or_buffer)
22
23 #property
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_base.py in __init__(self, filepath_or_buffer)
351 self.book = self.load_workbook(filepath_or_buffer)
352 elif isinstance(filepath_or_buffer, str):
--> 353 self.book = self.load_workbook(filepath_or_buffer)
354 elif isinstance(filepath_or_buffer, bytes):
355 self.book = self.load_workbook(BytesIO(filepath_or_buffer))
D:\Softwares\Anaconda\lib\site-packages\pandas\io\excel\_xlrd.py in load_workbook(self, filepath_or_buffer)
34 return open_workbook(file_contents=data)
35 else:
---> 36 return open_workbook(filepath_or_buffer)
37
38 #property
D:\Softwares\Anaconda\lib\site-packages\xlrd\__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows, ignore_workbook_corruption)
168 # files that xlrd can parse don't start with the expected signature.
169 if file_format and file_format != 'xls':
--> 170 raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')
171
172 bk = open_workbook_xls(
XLRDError: Excel xlsx file; not supported
How should I fix this?
Make sure that you already install openpyxl, if you don't try
pip install openpyxl
Change your code to
FFT1=pd.read_excel('FFT1.xlsx', sheet_name='sheet1',engine='openpyxl')
I am trying to load built-in dataset lvis. It turns out that the tfds and lvis should be imported and installed respectively, however, I did possible all, it still does not work.
import os
import tensorflow as tf
from matplotlib import pyplot as plt
%matplotlib inline
!pip install lvis
!pip install tfds-nightly
import tensorflow_datasets as tfds
train_data, info = tfds.load('lvis', split='train', as_supervised=True, with_info=True)
validation_data = tfds.load('lvis', split='validation', as_supervised=True)
test_data = tfds.load('lvis', split='test', as_supervised=True)
There are some odd outputs after running upon codes in colab.
otFoundError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in try_reraise(*args, **kwargs)
391 try:
--> 392 yield
393 except Exception as e: # pylint: disable=broad-except
15 frames
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in builder(name, try_gcs, **builder_kwargs)
167 with py_utils.try_reraise(prefix=f'Failed to construct dataset {name}: '):
--> 168 return cls(**builder_kwargs) # pytype: disable=not-instantiable
169
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in __init__(self, file_format, **kwargs)
917 """
--> 918 super().__init__(**kwargs)
919 self.info.set_file_format(file_format)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in __init__(self, data_dir, config, version)
184 else: # Use the code version (do not restore data)
--> 185 self.info.initialize_from_bucket()
186
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in __get__(self, obj, objtype)
145 if cached is None:
--> 146 cached = self.fget(obj) # pytype: disable=attribute-error
147 setattr(obj, attr, cached)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/dataset_builder.py in info(self)
328 "the restored dataset.")
--> 329 info = self._info()
330 if not isinstance(info, dataset_info.DatasetInfo):
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis.py in _info(self)
94 names_file=tfds.core.tfds_path(
---> 95 'object_detection/lvis/lvis_classes.txt'))
96 return tfds.core.DatasetInfo(
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/features/class_label_feature.py in __init__(self, num_classes, names, names_file)
67 else:
---> 68 self.names = _load_names_from_file(names_file)
69
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/features/class_label_feature.py in _load_names_from_file(names_filepath)
198 name.strip()
--> 199 for name in tf.compat.as_text(f.read()).split("\n")
200 if name.strip() # Filter empty names
/usr/local/lib/python3.7/dist-packages/tensorflow/python/lib/io/file_io.py in read(self, n)
116 """
--> 117 self._preread_check()
118 if n == -1:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/lib/io/file_io.py in _preread_check(self)
79 self._read_buf = _pywrap_file_io.BufferedInputStream(
---> 80 compat.path_to_str(self.__name), 1024 * 512)
81
NotFoundError: /usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis_classes.txt; No such file or directory
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-4-b8c819fe5c62> in <module>()
----> 1 train_data, info = tfds.load('lvis', split='train', as_supervised=True, with_info=True)
2 validation_data = tfds.load('lvis', split='validation', as_supervised=True)
3 test_data = tfds.load('lvis', split='test', as_supervised=True)
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in load(name, split, data_dir, batch_size, shuffle_files, download, as_supervised, decoders, read_config, with_info, builder_kwargs, download_and_prepare_kwargs, as_dataset_kwargs, try_gcs)
315 builder_kwargs = {}
316
--> 317 dbuilder = builder(name, data_dir=data_dir, try_gcs=try_gcs, **builder_kwargs)
318 if download:
319 download_and_prepare_kwargs = download_and_prepare_kwargs or {}
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/load.py in builder(name, try_gcs, **builder_kwargs)
166 if cls:
167 with py_utils.try_reraise(prefix=f'Failed to construct dataset {name}: '):
--> 168 return cls(**builder_kwargs) # pytype: disable=not-instantiable
169
170 # If neither the code nor the files are found, raise DatasetNotFoundError
/usr/lib/python3.7/contextlib.py in __exit__(self, type, value, traceback)
128 value = type()
129 try:
--> 130 self.gen.throw(type, value, traceback)
131 except StopIteration as exc:
132 # Suppress StopIteration *unless* it's the same exception that
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in try_reraise(*args, **kwargs)
392 yield
393 except Exception as e: # pylint: disable=broad-except
--> 394 reraise(e, *args, **kwargs)
395
396
/usr/local/lib/python3.7/dist-packages/tensorflow_datasets/core/utils/py_utils.py in reraise(e, prefix, suffix)
359 else:
360 exception = RuntimeError(f'{type(e).__name__}: {msg}')
--> 361 raise exception from e
362 # Otherwise, modify the exception in-place
363 elif len(e.args) <= 1:
RuntimeError: NotFoundError: Failed to construct dataset lvis: /usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis/lvis_classes.txt; No such file or directory
This is what I did to get it to work on Colab Notebook:
!pip install -q tfds-nightly tensorflow tensorflow-datasets matplotlib lvis pycocotools apache_beam
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
Since the tfds object detection lvis folder isn't up to date, I deleted that folder then redownloaded it from the tfds github page.
First install github-clone so we can download specific repo subfolders
!pip install github-clone
Then remove the lvis folder and redownload it from github:
!rm -rf ../usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/lvis
!ghclone https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/object_detection/lvis
!mv ./lvis ../usr/local/lib/python3.7/dist-packages/tensorflow_datasets/object_detection/
After that I could get it to work, this next chunk of code worked for me:
ds, info = tfds.load('lvis', split='train[:25%]', with_info=True,
data_dir= '../content/tensorflow_datasets/',
decoders=tfds.decode.PartialDecoding({
'image': True,
'features': tfds.features.FeaturesDict({'image/id':True,
'objects':tfds.features.Sequence({
'id': True,
'bbox': True,
'label': tfds.features.ClassLabel(names=['skateboard','shoe'])
})
})
})
)
I recently upgraded pandas to 1.1.5, using Python 3.6.4 and I can no longer plot any charts with a datetime index column.
See the below example where I import a time series from a csv file. I have also tried registering matplotlib converters in case this was the issue. I get the error message shown below. Incidentally seaborn also no longer works but not sure if that's relevant.
Thanks
import matplotlib.pyplot as plt
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
df = pd.read_csv('example.csv', parse_dates=True, index_col=0, dayfirst=True)
df.head()
Click here to see output for df.head()
df.plot()
I get the following error if I try and plot
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-37-848b80e64df8> in <module>()
----> 1 df.plot()
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_core.py in __call__(self, *args, **kwargs)
947 data.columns = label_name
948
--> 949 return plot_backend.plot(data, kind=kind, **kwargs)
950
951 __call__.__doc__ = __doc__
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\__init__.py in plot(data, kind, **kwargs)
59 kwargs["ax"] = getattr(ax, "left_ax", ax)
60 plot_obj = PLOT_CLASSES[kind](data, **kwargs)
---> 61 plot_obj.generate()
62 plot_obj.draw()
63 return plot_obj.result
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in generate(self)
269 self._compute_plot_data()
270 self._setup_subplots()
--> 271 self._make_plot()
272 self._add_table()
273 self._make_legend()
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _make_plot(self)
1124 stacking_id=stacking_id,
1125 is_errorbar=is_errorbar,
-> 1126 **kwds,
1127 )
1128 self._add_legend_handle(newlines[0], label, index=i)
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _plot(cls, ax, x, y, style, column_num, stacking_id, **kwds)
1143 cls._initialize_stacker(ax, stacking_id, len(y))
1144 y_values = cls._get_stacked_values(ax, stacking_id, y, kwds["label"])
-> 1145 lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
1146 cls._update_stacker(ax, stacking_id, y)
1147 return lines
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in wrapper(*args, **kwargs)
63 def wrapper(*args, **kwargs):
64 with pandas_converters():
---> 65 return func(*args, **kwargs)
66
67 return wrapper
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\core.py in _plot(cls, ax, x, y, style, is_errorbar, **kwds)
666 else:
667 args = (x, y)
--> 668 return ax.plot(*args, **kwds)
669
670 def _get_index_name(self):
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1715 warnings.warn(msg % (label_namer, func.__name__),
1716 RuntimeWarning, stacklevel=2)
-> 1717 return func(ax, *args, **kwargs)
1718 pre_doc = inner.__doc__
1719 if pre_doc is None:
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in plot(self, *args, **kwargs)
1371
1372 for line in self._get_lines(*args, **kwargs):
-> 1373 self.add_line(line)
1374 lines.append(line)
1375
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in add_line(self, line)
1777 line.set_clip_path(self.patch)
1778
-> 1779 self._update_line_limits(line)
1780 if not line.get_label():
1781 line.set_label('_line%d' % len(self.lines))
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axes\_base.py in _update_line_limits(self, line)
1799 Figures out the data limit of the given line, updating self.dataLim.
1800 """
-> 1801 path = line.get_path()
1802 if path.vertices.size == 0:
1803 return
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\lines.py in get_path(self)
955 """
956 if self._invalidy or self._invalidx:
--> 957 self.recache()
958 return self._path
959
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\lines.py in recache(self, always)
655 def recache(self, always=False):
656 if always or self._invalidx:
--> 657 xconv = self.convert_xunits(self._xorig)
658 x = _to_unmasked_float_array(xconv).ravel()
659 else:
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\artist.py in convert_xunits(self, x)
189 if ax is None or ax.xaxis is None:
190 return x
--> 191 return ax.xaxis.convert_units(x)
192
193 def convert_yunits(self, y):
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\axis.py in convert_units(self, x)
1489 return x
1490
-> 1491 ret = self.converter.convert(x, self.units, self)
1492 return ret
1493
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in convert(values, unit, axis)
254 values = [DatetimeConverter._convert_1d(v, unit, axis) for v in values]
255 else:
--> 256 values = DatetimeConverter._convert_1d(values, unit, axis)
257 return values
258
C:\Program Files\Anaconda3\lib\site-packages\pandas\plotting\_matplotlib\converter.py in _convert_1d(values, unit, axis)
289 pass
290
--> 291 values = dates.date2num(values)
292
293 return values
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\dates.py in date2num(d)
394 if not d.size:
395 return d
--> 396 return _to_ordinalf_np_vectorized(d)
397
398
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in __call__(self, *args, **kwargs)
2106 vargs.extend([kwargs[_n] for _n in names])
2107
-> 2108 return self._vectorize_call(func=func, args=vargs)
2109
2110 def _get_ufunc_and_otypes(self, func, args):
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _vectorize_call(self, func, args)
2184 res = func()
2185 else:
-> 2186 ufunc, otypes = self._get_ufunc_and_otypes(func=func, args=args)
2187
2188 # Convert args to object arrays first
C:\Program Files\Anaconda3\lib\site-packages\numpy\lib\function_base.py in _get_ufunc_and_otypes(self, func, args)
2144
2145 inputs = [arg.flat[0] for arg in args]
-> 2146 outputs = func(*inputs)
2147
2148 # Performance note: profiling indicates that -- for simple
C:\Program Files\Anaconda3\lib\site-packages\matplotlib\dates.py in _to_ordinalf(dt)
243 tzi = UTC
244
--> 245 base = float(dt.toordinal())
246
247 # If it's sufficiently datetime-like, it will have a `date()` method
AttributeError: 'numpy.datetime64' object has no attribute 'toordinal'
The older version of matplotlib (2.1.2) is out of date and no longer compatible with the newer version of pandas (1.1.5). An upgrade to matplotlib 3.3.4 solves this issue - as discussed in the comments.
I am using Koalas and I want to change the value of a column based on a condition.
In pandas I can do that using:
import pandas as pd
df_test = pd.DataFrame({
'a': [1,2,3]
,'b': ['one','two','three']})
df_test2 = pd.DataFrame({
'c': [2,1,3]
,'d': ['one','two','three']})
df_test.loc[df_test.a.isin(df_test2['c']),'b'] = 'four'
df_test.head()
a b
0 1 four
1 2 four
2 3 four
I am trying to use the same in Koalas, but I have this error:
---------------------------------------------------------------------------
PandasNotImplementedError Traceback (most recent call last)
<ipython-input-15-814219258adb> in <module>
5 new_loans['write_offs'] = 0
6
----> 7 new_loans.loc[(new_loans['ID'].isin(userinput_write_offs['id'])),'write_offs'] = 1
8 new_loans.loc[new_loans['write_offs']==1,'is_active'] = 0
9 new_loans = new_loans.sort_values(by = ['ZOHOID','Disb Date'])
/usr/local/lib/python3.7/dist-packages/databricks/koalas/base.py in isin(self, values)
894 )
895
--> 896 return self._with_new_scol(self.spark.column.isin(list(values)))
897
898 def isnull(self) -> Union["Series", "Index"]:
/usr/local/lib/python3.7/dist-packages/databricks/koalas/series.py in __iter__(self)
5871
5872 def __iter__(self):
-> 5873 return MissingPandasLikeSeries.__iter__(self)
5874
5875 if sys.version_info >= (3, 7):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/missing/__init__.py in unsupported_function(*args, **kwargs)
21 def unsupported_function(*args, **kwargs):
22 raise PandasNotImplementedError(
---> 23 class_name=class_name, method_name=method_name, reason=reason
24 )
25
PandasNotImplementedError: The method `pd.Series.__iter__()` is not implemented. If you want to collect your data as an NumPy array, use 'to_numpy()' instead.
How could I do the same operation in Koalas?
UPDATE
Following this question: Assign Koalas Column from Numpy Result I have done:
df_test.loc[df_test.a.isin(df_test2['c'].to_list()),'b'] = 'four'
But now I have this error:
---------------------------------------------------------------------------
PythonException Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/IPython/core/formatters.py in __call__(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
/usr/local/lib/python3.7/dist-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object \
393 and callable(cls.__dict__.get('__repr__')):
--> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)
/usr/local/lib/python3.7/dist-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698 """A pprint that just redirects to the normal repr function."""
699 # Find newlines and replace them with p.break_()
--> 700 output = repr(obj)
701 lines = output.splitlines()
702 with p.group():
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in __repr__(self)
10614 return self._to_internal_pandas().to_string()
10615
> 10616 pdf = self._get_or_create_repr_pandas_cache(max_display_count)
10617 pdf_length = len(pdf)
10618 pdf = pdf.iloc[:max_display_count]
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in _get_or_create_repr_pandas_cache(self, n)
10606 def _get_or_create_repr_pandas_cache(self, n):
10607 if not hasattr(self, "_repr_pandas_cache") or n not in self._repr_pandas_cache:
> 10608 self._repr_pandas_cache = {n: self.head(n + 1)._to_internal_pandas()}
10609 return self._repr_pandas_cache[n]
10610
/usr/local/lib/python3.7/dist-packages/databricks/koalas/frame.py in _to_internal_pandas(self)
10602 This method is for internal use only.
10603 """
> 10604 return self._internal.to_pandas_frame
10605
10606 def _get_or_create_repr_pandas_cache(self, n):
/usr/local/lib/python3.7/dist-packages/databricks/koalas/utils.py in wrapped_lazy_property(self)
514 def wrapped_lazy_property(self):
515 if not hasattr(self, attr_name):
--> 516 setattr(self, attr_name, fn(self))
517 return getattr(self, attr_name)
518
/usr/local/lib/python3.7/dist-packages/databricks/koalas/internal.py in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/conversion.py in toPandas(self)
136
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
140
/usr/local/spark/python/pyspark/sql/dataframe.py in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
598
/usr/local/lib/python3.7/dist-packages/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
/usr/local/spark/python/pyspark/sql/utils.py in raise_from(e)
PythonException:
An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 589, in main
func, profiler, deserializer, serializer = read_udfs(pickleSer, infile, eval_type)
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 447, in read_udfs
udfs.append(read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=i))
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 254, in read_single_udf
f, return_type = read_command(pickleSer, infile)
File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 74, in read_command
command = serializer._read_with_length(file)
File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 172, in _read_with_length
return self.loads(obj)
File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 458, in loads
return pickle.loads(obj, encoding=encoding)
File "/opt/spark/python/lib/pyspark.zip/pyspark/cloudpickle.py", line 1110, in subimport
__import__(name)
ModuleNotFoundError: No module named 'pandas'
Why is trying to use pandas?
Koalas package exposes Pandas Like APIs on high level for the users but under the hood implementation is done using PySpark APIs.
I observed that within the stack track log you have pasted, a pandas dataframe is being created from sdf spark Dataframe using toPandas() method and assigned to pdf.
In the implementation of toPandas() function, pandas and numpy are being imported.
check line numbers 809 & 138.
/usr/local/lib/python3.7/dist-packages/databricks/koalas/internal.py in to_pandas_frame(self)
807 """ Return as pandas DataFrame. """
808 sdf = self.to_internal_spark_frame
--> 809 pdf = sdf.toPandas()
810 if len(pdf) == 0 and len(sdf.schema) > 0:
811 pdf = pdf.astype(
/usr/local/spark/python/pyspark/sql/pandas/conversion.py in toPandas(self)
136
137 # Below is toPandas without Arrow optimization.
--> 138 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
139 column_counter = Counter(self.columns)
140
/usr/local/spark/python/pyspark/sql/dataframe.py in collect(self)
594 """
595 with SCCallSiteSync(self._sc) as css:
--> 596 sock_info = self._jdf.collectToPython()
597 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
598
you can check out the implementation of toPandas() function at the following link:
https://github.com/apache/spark/blob/master/python/pyspark/sql/pandas/conversion.py