mpld3: Object of type Timestamp is not JSON serialisable - matplotlib

I am trying to produce an html file of a graph.
The following code works:
fig=plt.figure(figsize=(18.5,10))
for i in range(len(gl)):
plt.plot(rdfl[i]['datetime'], rdfl[i]['depth']*100, label=gl[i])
continue
plt.title(f'Stream levels (cm) in period {fdate} to {tdate}', y=1.0,)
plt.ylabel('Stream Depth (cm)', rotation=90)
plt.xlabel('Datetime')
plt.xticks(rotation=90)
plt.legend(title='Site',bbox_to_anchor=(1,1), loc="upper left", fontsize=10, title_fontsize=12)
plt.ion()
plt.draw()
#plt.show()
plt.pause(0.1)
html_str = mpld3.fig_to_html(fig)
Html_file= open(xdir1+"river_levels.html","w")
Html_file.write(html_str)
Html_file.close()
This code, however, does not :
fig2, ax1=plt.subplots()
bmtdf['mid_time']=pd.to_datetime(bmtdf['mid_time'],format = '%Y-%m-%d %H:%M:%S')
ax1.set_ylim(0,150)
ax1.stairs(bdf2['br_open_total'], bdfarr2['time'], label='Bresser/open')
ax1.scatter(bmtdf.mid_time, bmtdf.ic_amt, label='ic', marker='.')
ax1.scatter(bmtdf.mid_time, bmtdf.mc_amt, label='mc', marker='x')
ax1.scatter(bmtdf.mid_time, bmtdf.imd_amt, label='imd', marker='+')
ax1.scatter(bmtdf.mid_time, bmtdf.md_amt, label='md', marker='*')
ax1.scatter(bmtdf.mid_time, bmtdf.oak_amt, label='oak', marker='1')
ax1.scatter(bmtdf.mid_time, bmtdf.br_open_total, marker='')
ax1.legend(title='Forest Type', loc='upper left')
bmtdf.loc[[0],'br_open_total']=0
bmtdf[['mid_time','br_open_total', 'event_number']].apply(lambda row :ax1.text(*row), axis=1)
ax1.set_ylabel('Rain (mm)', rotation=90)
ax1.set_xlabel('Datetime')
ax2=ax1.twinx()
ax2.set_ylim(-250,250)
ax2.set_ylabel('Stream depth (cm)', rotation=90)
#ax2.set_yticks(np.arrange(0,1.8, 0.2))
for i in range(len(gl)):
ax2.plot(rdfl[i]['datetime'], rdfl[i]['depth']*100, label=gl[i])
continue
plt.xticks(rotation=90)
plt.legend(title='Stream', loc='upper right')
plt.title(f'Rain events, river levels and interception values in period {fdate} to {tdate}',y=1.0, pad=-24)
figManager = plt.get_current_fig_manager()
figManager.window.showMaximized()
fig2.canvas.manager.window.move(-1900,0)
plt.ion()
#plt.show()
plt.draw()
plt.pause(0.1)
plt.savefig(str(dirpath4+f'\\Bresser_open_plot_{fdate}-{tdate}.png'), bbox_inches='tight')
html_str = mpld3.fig_to_html(fig2)
Html_file= open(dirpath5+"bresser/open.html","w")
Html_file.write(html_str)
Html_file.close()
The second code block throws the error:
Traceback (most recent call last):
File "C:\Users\mtc20tfq\AppData\Local\Temp\ipykernel_21372\3088159653.py", line 1, in <cell line: 1>
html_str = mpld3.fig_to_html(fig2)
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\site-packages\mpld3\_display.py", line 261, in fig_to_html
figure_json=json.dumps(figure_json, cls=NumpyEncoder),
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\json\__init__.py", line 234, in dumps
return cls(
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\json\encoder.py", line 199, in encode
chunks = self.iterencode(o, _one_shot=True)
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\json\encoder.py", line 257, in iterencode
return _iterencode(o, 0)
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\site-packages\mpld3\_display.py", line 143, in default
return json.JSONEncoder.default(self, obj)
File "C:\ProgramData\Anaconda3\envs\spyder-env\lib\json\encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type Timestamp is not JSON serializable
The 'time' variables that are plotted are:
type(rdfl[i]['datetime'][0])
Out[5]: pandas._libs.tslibs.timestamps.Timestamp
type(bdfarr2['time'][5])
Out[6]: pandas._libs.tslibs.timestamps.Timestamp
type(bmtdf['mid_time'][5])
Out[7]: pandas._libs.tslibs.timestamps.Timestamp
running
fig2, ax1=plt.subplots()
bmtdf['mid_time']=pd.to_datetime(bmtdf['mid_time'],format = '%Y-%m-%d %H:%M:%S')
ax1.set_ylim(0,150)
ax1.stairs(bdf2['br_open_total'], bdfarr2['time'], label='Bresser/open')
# ax1.scatter(bmtdf.mid_time, bmtdf.ic_amt, label='ic', marker='.')
# ax1.scatter(bmtdf.mid_time, bmtdf.mc_amt, label='mc', marker='x')
# ax1.scatter(bmtdf.mid_time, bmtdf.imd_amt, label='imd', marker='+')
# ax1.scatter(bmtdf.mid_time, bmtdf.md_amt, label='md', marker='*')
# ax1.scatter(bmtdf.mid_time, bmtdf.oak_amt, label='oak', marker='1')
# ax1.scatter(bmtdf.mid_time, bmtdf.br_open_total, marker='')
# ax1.legend(title='Forest Type', loc='upper left')
# bmtdf.loc[[0],'br_open_total']=0
# bmtdf[['mid_time','br_open_total', 'event_number']].apply(lambda row :ax1.text(*row), axis=1)
# ax1.set_ylabel('Rain (mm)', rotation=90)
# ax1.set_xlabel('Datetime')
ax2=ax1.twinx()
ax2.set_ylim(-250,250)
ax2.set_ylabel('Stream depth (cm)', rotation=90)
#ax2.set_yticks(np.arrange(0,1.8, 0.2))
for i in range(len(gl)):
ax2.plot(rdfl[i]['datetime'], rdfl[i]['depth']*100, label=gl[i])
continue
plt.xticks(rotation=90)
plt.legend(title='Stream', loc='upper right')
plt.title(f'Rain events, river levels and interception values in period {fdate} to {tdate}',y=1.0, pad=-24)
figManager = plt.get_current_fig_manager()
figManager.window.showMaximized()
fig2.canvas.manager.window.move(-1900,0)
plt.ion()
#plt.show()
plt.draw()
plt.pause(0.1)
plt.savefig(str(dirpath4+f'\\Bresser_open_plot_{fdate}-{tdate}.png'), bbox_inches='tight')
html_str = mpld3.fig_to_html(fig2)
Html_file= open(dirpath5+"\\bresser_open.html","w")
Html_file.write(html_str)
Html_file.close()
produces the plot in spyder, with the stairs plot visible , but it is not visible on the html plot.
It would seem that the .scatter lines are the ones where mpld3 refuses to serialise the Timestamp objects as it is only when these lines are run that the error appears.
Any suggestions would be greatly appreciated!
Edit: changed the ax1.scatter.... lines to:
ax1.scatter(bmtdf['mid_time'], bmtdf['ic_amt'], label='ic', marker='.')
ax1.scatter(bmtdf['mid_time'], bmtdf['mc_amt'], label='mc', marker='x')
ax1.scatter(bmtdf['mid_time'], bmtdf['imd_amt'], label='imd', marker='+')
ax1.scatter(bmtdf['mid_time'], bmtdf['md_amt'], label='md', marker='*')
ax1.scatter(bmtdf['mid_time'], bmtdf['oak_amt'], label='oak', marker='1')
ax1.scatter(bmtdf['mid_time'], bmtdf['br_open_total'], marker='')
but this makes no difference

Related

How to access second plot axis parameters?

I am trying to access the ax parameters of the second subplot figure, ax1. I am trying to put a title and remove the overlapping ticks but can't manage to get to them.
Here is the code and figure I have made :
fig, (ax0, ax1)= plt.subplots(nrows=1,
ncols=2,
sharey=True,
tight_layout = True,
gridspec_kw={'width_ratios': [3, 24],'wspace': 0})
ax1=librosa.display.specshow(data=df.iloc[i,2],
sr=fe,
x_axis='time',
y_axis='mel',
htk=False,
x_coords=np.linspace(0,duree[i],df.iloc[i,2].shape[1]),
hop_length=1000,
cmap=plt.get_cmap("magma"),
fmin=0, fmax=fe//2,
vmin=40, vmax=140)
ax0.plot(df.loc[Names[i], "DSP"], df.loc[Names[i], "f_dsp"],
linewidth=1, label=Names[i]) # ,color='grey')
ax0.set_title('Subplot 1')
ax0.set_xlim([20, 100])
ax0.set_ylim([0, fe//2])
ax0.set_ylabel('Fréquence [Hz]')
ax0.set_xlabel('Amplitude [dB ref 1µPa²]')
ax0.grid(visible=True)
ax0.grid(b=True, which='minor', color='k', linestyle=':', lw=0.5)
yticks=np.array([10,100,1000,10000,100000],dtype=int)
yticks_hz = np.unique(np.concatenate((np.array([fe//2],dtype=int),
np.array([250*2**n for n in range(0,int(np.log2(fe//(2*250))))]))))
ax0.set_yticks(yticks, minor=False)
ax0.set_yticks(yticks_hz, minor=True)
ax0.set_yticklabels(yticks,
minor=False,
fontdict={'fontweight':'demibold','fontsize':8})
ax0.set_yticklabels(yticks_hz,
minor=True,
fontdict={'fontsize':8})
# =============================================================================
# Doesnt work :(
# =============================================================================
ax1.set_title("Subplot 2")
ax1.get_yaxislabels().set_visible(False)
ax1.get_xaxislabels()[0].set_visible(False)

Can't populate matplotlib animation frame one point at a time

I'm currently trying to build an N-body simulation but I'm having a little trouble with plotting the results the way I'd like.
In the code below (with some example data for a few points in an orbit) I'm importing the position and time data and organizing it into a pandas dataframe. To create the 3D animation I use matplotlib's animation class, which works perfectly.
However, the usual way to set up an animation is limited in that you can't customize the points in each frame individually (please let me know if I'm wrong here :p). Since my animation is showing orbiting bodies I would like to vary their sizes and colors. To do that I essentially create a graph for each body and set it's color etc. When it gets to the update_graph function, I iterate over the n bodies, retrieve their individual (x,y,z) coordinates, and update their graphs.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d.axes3d import get_test_data
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import pandas as pd
nbodies = 2
x = np.array([[1.50000000e-10, 0.00000000e+00, 0.00000000e+00],
[9.99950000e-01, 1.00000000e-02, 0.00000000e+00],
[4.28093585e-06, 3.22964816e-06, 0.00000000e+00],
[-4.16142210e-01, 9.09335149e-01, 0.00000000e+00],
[5.10376489e-06, 1.42204430e-05, 0.00000000e+00],
[-6.53770813e-01, -7.56722445e-01, 0.00000000e+00]])
t = np.array([0.01, 0.01, 2.0, 2.0, 4.0, 4.0])
tt = np.array([0.01, 2.0, 4.0])
x = x.reshape((len(tt), nbodies, 3))
x_coords = x[:, :, 0].flatten()
y_coords = x[:, :, 1].flatten()
z_coords = x[:, :, 2].flatten()
df = pd.DataFrame({"time": t[:] ,"x" : x_coords, "y" : y_coords, "z" : z_coords})
print(df)
def update_graph(num):
data=df[df['time']==tt[num]] # x,y,z of all bodies at current time
for n in range(nbodies): # update graphs
data_n = data[data['x']==x_coords[int(num * nbodies) + n]] # x,y,z of body n
graph = graphs[n]
graph.set_data(data_n.x, data_n.y)
graph.set_3d_properties(data_n.z)
graphs[n] = graph
return graphs
plt.style.use('dark_background')
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('x (AU)')
ax.set_ylabel('y (AU)')
ax.set_zlabel('z (AU)')
plt.xlim(-1.5,1.5)
plt.ylim(-1.5,1.5)
# initialize
data=df[df['time']==0]
ms_list = [5, 1]
c_list = ['yellow', 'blue']
graphs = []
for n in range(nbodies):
graphs.append(ax.plot([], [], [], linestyle="", marker=".",
markersize=ms_list[n], color=c_list[n])[0])
ani = animation.FuncAnimation(fig, update_graph, len(tt),
interval=400, blit=True, repeat=True)
plt.show()
However, doing this gives me the following error:
Traceback (most recent call last):
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 1194, in _on_timer
ret = func(*args, **kwargs)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/animation.py", line 1447, in _step
still_going = Animation._step(self, *args)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/animation.py", line 1173, in _step
self._draw_next_frame(framedata, self._blit)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/animation.py", line 1193, in _draw_next_frame
self._post_draw(framedata, blit)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/animation.py", line 1216, in _post_draw
self._blit_draw(self._drawn_artists, self._blit_cache)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/animation.py", line 1231, in _blit_draw
a.axes.draw_artist(a)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 2661, in draw_artist
a.draw(self.figure._cachedRenderer)
File "/home/kris/anaconda3/lib/python3.7/site-packages/matplotlib/artist.py", line 38, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/home/kris/anaconda3/lib/python3.7/site-packages/mpl_toolkits/mplot3d/art3d.py", line 202, in draw
xs, ys, zs = proj3d.proj_transform(xs3d, ys3d, zs3d, renderer.M)
File "/home/kris/anaconda3/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py", line 201, in proj_transform
vec = _vec_pad_ones(xs, ys, zs)
File "/home/kris/anaconda3/lib/python3.7/site-packages/mpl_toolkits/mplot3d/proj3d.py", line 189, in _vec_pad_ones
return np.array([xs, ys, zs, np.ones_like(xs)])
File "/home/kris/anaconda3/lib/python3.7/site-packages/pandas/core/series.py", line 871, in __getitem__
result = self.index.get_value(self, key)
File "/home/kris/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py", line 4405, in get_value
return self._engine.get_value(s, k, tz=getattr(series.dtype, "tz", None))
File "pandas/_libs/index.pyx", line 80, in pandas._libs.index.IndexEngine.get_value
File "pandas/_libs/index.pyx", line 90, in pandas._libs.index.IndexEngine.get_value
File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 997, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1004, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 0
Aborted (core dumped)
I'm not sure what this really means, but I do know the problem is something to do with updating the graphs with only one row of coordinates rather than all three. Because if I instead have
def update_graph(num):
data=df[df['time']==tt[num]] # x,y,z of all bodies at current time
for n in range(nbodies): # update graphs
#data_n = data[data['x']==x_coords[int(num * nbodies) + n]] # x,y,z of body n
graph = graphs[n]
graph.set_data(data.x, data.y) # using data rather than data_n here now
graph.set_3d_properties(data.z)
graphs[n] = graph
return graphs
it actually works, and plots three copies of the bodies with varying colors and sizes on top of each other as you would expect.
Any help would be much appreciated. Thanks!
I don't understand why you are going through a pandas DataFrame, when you seem to already have all the data you need in your numpy array. I couldn't reproduce the initial problem, by I propose this solution that uses pure numpy arrays, which may fix the problem:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d.axes3d import get_test_data
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.animation as animation
import pandas as pd
nbodies = 2
x = np.array([[1.50000000e-10, 0.00000000e+00, 0.00000000e+00],
[9.99950000e-01, 1.00000000e-02, 0.00000000e+00],
[4.28093585e-06, 3.22964816e-06, 0.00000000e+00],
[-4.16142210e-01, 9.09335149e-01, 0.00000000e+00],
[5.10376489e-06, 1.42204430e-05, 0.00000000e+00],
[-6.53770813e-01, -7.56722445e-01, 0.00000000e+00]])
t = np.array([0.01, 0.01, 2.0, 2.0, 4.0, 4.0])
tt = np.array([0.01, 2.0, 4.0])
x = x.reshape((len(tt), nbodies, 3))
def update_graph(i):
data = x[i, :, :] # x,y,z of all bodies at current time
for body, graph in zip(data, graphs): # update graphs
graph.set_data(body[0], body[1])
graph.set_3d_properties(body[2])
return graphs
plt.style.use('dark_background')
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.set_xlabel('x (AU)')
ax.set_ylabel('y (AU)')
ax.set_zlabel('z (AU)')
plt.xlim(-1.5, 1.5)
plt.ylim(-1.5, 1.5)
# initialize
ms_list = [50, 10]
c_list = ['yellow', 'blue']
graphs = []
for n in range(nbodies):
graphs.append(ax.plot([], [], [], linestyle="", marker=".",
markersize=ms_list[n], color=c_list[n])[0])
ani = animation.FuncAnimation(fig, func=update_graph, frames=len(tt),
interval=400, blit=True, repeat=True)
plt.show()

How to change the position of some x axis tick labels on top of the bottom x axis in matplotlib?

This is my current script:
#!/usr/bin/env python3
import math
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
"""
Setup for a typical explanatory-style illustration style graph.
"""
h = 2
x = np.linspace(-np.pi, np.pi, 100)
y = 2 * np.sin(x)
rc = {
# Tick in the middle of the axis line.
'xtick.direction' : 'inout',
'ytick.direction' : 'inout',
# Bold is easier to read when we have few ticks.
'font.weight': 'bold',
'xtick.labelbottom': False,
'xtick.labeltop': True,
}
with plt.rc_context(rc):
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_title(
'2 sin(x), not $\\sqrt{2\\pi}$',
# TODO make LaTeX part bold?
# https://stackoverflow.com/questions/14324477/bold-font-weight-for-latex-axes-label-in-matplotlib
fontweight='bold',
# Too close otherwise.
# https://stackoverflow.com/questions/16419670/increase-distance-between-title-and-plot-in-matplolib/56738085
pad=20
)
# Custom visible plot area.
# ax.set_xlim(-3, 3)
ax.set_ylim(-2.5, 2.5)
# Axes
# Axes on center:
# https://stackoverflow.com/questions/31556446/how-to-draw-axis-in-the-middle-of-the-figure
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_visible(False)
# Axes with arrow:
# https://stackoverflow.com/questions/33737736/matplotlib-axis-arrow-tip
ax.plot(1, 0, ls="", marker=">", ms=10, color="k",
transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, ls="", marker="^", ms=10, color="k",
transform=ax.get_xaxis_transform(), clip_on=False)
# Ticks
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# Make ticks a bit longer.
ax.tick_params(width=1, length=10)
# Select tick positions
# https://stackoverflow.com/questions/12608788/changing-the-tick-frequency-on-x-or-y-axis-in-matplotlib
xticks = np.arange(math.ceil(min(x)), math.floor(max(x)) + 1, 1)
yticks = np.arange(math.ceil(min(y)) - 1, math.floor(max(y)) + 2, 1)
# Remove 0.
xticks = np.setdiff1d(xticks, [0])
yticks = np.setdiff1d(yticks, [0])
ax.xaxis.set_ticks(xticks)
ax.yaxis.set_ticks(yticks)
# Another approach. But because I want to be able to remove the 0,
# anyways, I just explicitly give all ticks instead.
# ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(1.0))
# ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(1.0))
# Annotations.
ax.plot([0, np.pi/2], [h, h], '--r')
ax.plot([np.pi/2, np.pi/2], [h, 0], '--r')
ax.plot(np.pi/2, h, marker='o', linewidth=2, markersize=10,
markerfacecolor='w', markeredgewidth=1.5, markeredgecolor='black')
plt.savefig(
'main.png',
format='png',
bbox_inches='tight'
)
plt.clf()
And this is the output:
And this is what I want (hacked with GIMP), notice how the negative tick labels are on a different side of the axes now.
I tried adding:
for tick in ax.xaxis.get_majorticklabels():
tick.set_verticalalignment("bottom")
as shown in answers to: How to move a tick's label in matplotlib? but that does not move the tick labels up enough, and makes the labels show on top of the axes instead.
Tested on matplotlib 3.2.2.
The following code will adjust the vertical alignment of the ticks depending one whether they are at a negative or positive x-value. However that's not enough because the labels are actually anchored at the bottom of the tick line. I'm therefore adjusting their y-position a little bit, but you have to play with the value to get the desired output
# adjust the xticks so that they are on top when x<0 and on the bottom when x≥0
ax.spines['top'].set_visible(True)
ax.spines['top'].set_position('zero')
ax.spines['bottom'].set_visible(True)
ax.spines['bottom'].set_position('zero')
ax.xaxis.set_tick_params(which='both', top=True, labeltop=True,
bottom=True, labelbottom=True)
fig.canvas.draw()
for tick in ax.xaxis.get_major_ticks():
print(tick.get_loc())
if tick.get_loc()<0:
tick.tick1line.set_visible(False)
tick.label1.set_visible(False)
else:
tick.tick2line.set_visible(False)
tick.label2.set_visible(False)
full code:
import math
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
"""
Setup for a typical explanatory-style illustration style graph.
"""
h = 10
x = np.linspace(-np.pi, np.pi, 100)
y = h * np.sin(x)
rc = {
# Tick in the middle of the axis line.
'xtick.direction' : 'inout',
'ytick.direction' : 'inout',
# Bold is easier to read when we have few ticks.
'font.weight': 'bold',
'xtick.labelbottom': False,
'xtick.labeltop': True,
}
with plt.rc_context(rc):
fig, ax = plt.subplots()
ax.plot(x, y)
ax.set_title(
'2 sin(x), not $\\sqrt{2\\pi}$',
# TODO make LaTeX part bold?
# https://stackoverflow.com/questions/14324477/bold-font-weight-for-latex-axes-label-in-matplotlib
fontweight='bold',
# Too close otherwise.
# https://stackoverflow.com/questions/16419670/increase-distance-between-title-and-plot-in-matplolib/56738085
pad=20
)
# Custom visible plot area.
# ax.set_xlim(-3, 3)
ax.set_ylim(-2.5, 2.5)
# Axes
# Axes on center:
# https://stackoverflow.com/questions/31556446/how-to-draw-axis-in-the-middle-of-the-figure
ax.spines['left'].set_position('zero')
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_visible(False)
# Axes with arrow:
# https://stackoverflow.com/questions/33737736/matplotlib-axis-arrow-tip
ax.plot(1, 0, ls="", marker=">", ms=10, color="k",
transform=ax.get_yaxis_transform(), clip_on=False)
ax.plot(0, 1, ls="", marker="^", ms=10, color="k",
transform=ax.get_xaxis_transform(), clip_on=False)
# Ticks
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
# Make ticks a bit longer.
ax.tick_params(width=1, length=10)
# Select tick positions
# https://stackoverflow.com/questions/12608788/changing-the-tick-frequency-on-x-or-y-axis-in-matplotlib
xticks = np.arange(math.ceil(min(x)), math.floor(max(x)) + 1, 1)
yticks = np.arange(math.ceil(min(y)) - 1, math.floor(max(y)) + 2, 1)
# Remove 0.
xticks = np.setdiff1d(xticks, [0])
yticks = np.setdiff1d(yticks, [0])
ax.xaxis.set_ticks(xticks)
ax.yaxis.set_ticks(yticks)
# Another approach. But because I want to be able to remove the 0,
# anyways, I just explicitly give all ticks instead.
# ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(1.0))
# ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(1.0))
for g,t in zip(ax.get_xticks(),ax.get_xticklabels()):
if g<0:
t.set_va('bottom')
else:
t.set_va('top')
t.set_transform(ax.transData)
t.set_position((g,0.15*-(g/abs(g))))
# Annotations.
ax.plot([0, np.pi/2], [h, h], '--r')
ax.plot([np.pi/2, np.pi/2], [h, 0], '--r')
ax.plot(np.pi/2, h, marker='o', linewidth=2, markersize=10,
markerfacecolor='w', markeredgewidth=1.5, markeredgecolor='black')
# adjust the xticks so that they are on top when x<0 and on the bottom when x≥0
ax.spines['top'].set_visible(True)
ax.spines['top'].set_position('zero')
ax.spines['bottom'].set_visible(True)
ax.spines['bottom'].set_position('zero')
ax.xaxis.set_tick_params(which='both', top=True, labeltop=True,
bottom=True, labelbottom=True)
fig.canvas.draw()
for tick in ax.xaxis.get_major_ticks():
print(tick.get_loc())
if tick.get_loc()<0:
tick.tick1line.set_visible(False)
tick.label1.set_visible(False)
else:
tick.tick2line.set_visible(False)
tick.label2.set_visible(False)

Matplotlib cannot plot categorical values

Here is my example:
import matplotlib.pyplot as plt
test_list = ['a', 'b', 'b', 'c']
plt.hist(test_list)
plt.show()
It generates the following error message:
TypeError Traceback (most recent call last)
<ipython-input-48-228f7f5e9d1e> in <module>()
1 test_list = ['a', 'b', 'b', 'c']
----> 2 plt.hist(test_list)
3 plt.show()
C:\Anaconda3\lib\site-packages\matplotlib\pyplot.py in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, data, **kwargs)
2956 histtype=histtype, align=align, orientation=orientation,
2957 rwidth=rwidth, log=log, color=color, label=label,
-> 2958 stacked=stacked, data=data, **kwargs)
2959 finally:
2960 ax.hold(washold)
C:\Anaconda3\lib\site-packages\matplotlib\__init__.py in inner(ax, *args, **kwargs)
1809 warnings.warn(msg % (label_namer, func.__name__),
1810 RuntimeWarning, stacklevel=2)
-> 1811 return func(ax, *args, **kwargs)
1812 pre_doc = inner.__doc__
1813 if pre_doc is None:
C:\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
5993 xmax = -np.inf
5994 for xi in x:
-> 5995 if len(xi) > 0:
5996 xmin = min(xmin, xi.min())
5997 xmax = max(xmax, xi.max())
TypeError: len() of unsized object
I only briefly search on google, but it looks like I cannot plot histogram for categorical variables in matplotlib.
Can anybody confirm?
Sure it is possible to create a histogram of categorial data in matplotlib.
As this link suggests, but also as suggested in the matplotlib demo, simply use a barchart for that purpose.
import matplotlib.pyplot as plt
import numpy as np
test_list = ['a', 'b', 'b', 'c', "d", "b"]
histdic = {x: test_list.count(x) for x in test_list}
x = []; y=[]
for key, value in histdic.iteritems():
x.append(key)
y.append(value)
plt.figure()
barwidth= 0.8
plt.bar(np.arange(len(y)),y, barwidth, color='r')
plt.gca().set_xticks(np.arange(len(y))+barwidth/2.)
plt.gca().set_xticklabels(x)
plt.show()
Since this is a histogram and it is created with matplotlib, it's definitely wrong to say that you "cannot plot histogram for categorical variables in matplotlib".
try to use the below code:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
cats = np.array([l for l in "ABCD"], dtype=str)
cats = np.random.ch`enter code here`oice(cats, 100, p=[0.3, 0.1, 0.4, 0.2])
res = np.random.choice(np.arange(1,7), 100, p=[0.2, 0.1, 0.08, 0.16,0.26,0.2])
df = pd.DataFrame({"Category":cats, "Result":res})
df2 = df.groupby(["Category", "Result"]).size().reset_index(name='Count')
df3 = pd.pivot_table(df2, values='Count', columns=['Result'], index = "Category",
aggfunc=np.sum, fill_value=0)
df4 = pd.pivot_table(df2, values='Count', columns=['Category'], index = "Result",
aggfunc=np.sum, fill_value=0)
fig, ax = plt.subplots(1,2, figsize=(10,4))
df3.plot(kind="bar", ax=ax[0])
df4.plot(kind="bar", ax=ax[1])
plt.show()

TypeError: float() argument must be a string or a number, array = np.array(array, dtype=dtype, order=order, copy=copy)

Im applying K-means clustering to data frame from cvs and excel files
ref: http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html#example-cluster-plot-cluster-iris-py
I try to run the code with my data from the csv file, data looks like:
DataFile
However receive following errors:
Traceback (most recent call last):
File "", line 1, in
runfile('/Users/nadiastraton/Documents/workspacePython/02450Toolbox_Python/Thesis/Scripts/Clustering/cluster3.py', wdir='/Users/nadiastraton/Documents/workspacePython/02450Toolbox_Python/Thesis/Scripts/Clustering')
File "/Applications/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "/Applications/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
File "/Users/cluster3.py", line 46, in
est.fit(x.as_matrix)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/cluster/k_means_.py", line 812, in fit
X = self._check_fit_data(X)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/cluster/k_means_.py", line 786, in _check_fit_data
X = check_array(X, accept_sparse='csr', dtype=np.float64)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 373, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
TypeError: float() argument must be a string or a number
print(doc)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from sklearn.cluster import KMeans
np.random.seed(5)
centers = [[1, 1], [-1, -1], [1, -1]]
data=pd.read_csv('/DataVisualisationSample.csv')
print(data.head())
x = pd.DataFrame(data,columns = ['Post_Share_Count','Post_Like_Count','Comment_Count'])
y = pd.DataFrame(data,columns = ['Comment_Like_Count'])
print(x.info())
estimators = {'k_means_data_3': KMeans(n_clusters=3),
'k_means_data_8': KMeans(n_clusters=12),
'k_means_data_bad_init': KMeans(n_clusters=3, n_init=1,
init='random')}
fignum = 1
for name, est in estimators.items():
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
est.fit(x.as_matrix)
labels = est.labels_
ax.scatter(x[:, 2], x[:, 0], x[:, 1], c=labels.astype(np.int))
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Post_Share_Count')
ax.set_ylabel('Post_Like_Count')
ax.set_zlabel('Comment_Count')
fignum = fignum + 1
# Plot the ground truth
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
for name, label in [('Popular', 0),
('Not Popular', 1),
('Least Popular', 2)]:
ax.text3D(x[y == label, 2].mean(),
x[y == label, 0].mean() + 1.5,
x[y == label, 1].mean(), name,
horizontalalignment='center',
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(np.int)
ax.scatter(x[:, 2], x[:, 0], x[:, 1], c=y).astype(np.int)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Post_Share_Count')
ax.set_ylabel('Post_Like_Count')
ax.set_zlabel('Comment_Count')
plt.show()
Tried to fix errors:
(est.fit(x.as_matrix) instead of est.fit(x))
and
(c=labels.astype(np.int) instead of c=labels.astype(np.float)) - (all values in my file are int.)
However changing from np.float to np.int does not fix it.