Setting alpha in matplotlib.axes.Axes.table? - matplotlib

I'm trying to understand how I can set the alpha level in a matplotlib table. I tried setting it with a global rcParams, but not quite sure how to do that? (I want to change the transparency in the header color). In general I'm not sure this can be done globally, if not, how do i pass the parameter to table? Thx in advance.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler
import six
# Set universal font and transparency
plt.rcParams["font.family"] = "DejaVu Sans"
plt.rcParams['axes.prop_cycle'] = cycler(alpha=[0.5])
raw_data = dict(Simulation=[42, 39, 86, 15, 23, 57],
SP500=[52, 41, 79, 80, 34, 47],
NASDAQ=[62, 37, 84, 51, 67, 32],
Benchmark=[72, 43, 36, 26, 53, 88])
df = pd.DataFrame(raw_data, index=pd.Index(
['Sharpe Ratio', 'Sortino Ratio', 'Calmars Ratio', 'VaR', 'CVaR', 'Max DD'], name='Metric'),
columns=pd.Index(['Simulation', 'SP500', 'NASDAQ', 'Benchmark'], name='Series'))
def create_table(data, col_width=None, row_height=None, font_size=None,
header_color='#000080', row_colors=None, edge_color='w',
header_columns=0, ax=None, bbox=None):
if row_colors is None:
row_colors = ['#D8D8D8', 'w']
if bbox is None:
bbox = [0, 0, 1, 1]
if ax is None:
size = (np.array(data.shape[::-1]) + np.array([0, 1])) * np.array([col_width, row_height])
fig, ax = plt.subplots(figsize=size)
ax.axis('off')
ax.axis([0, 1, data.shape[0], -1])
data_table = ax.table(cellText=data.values, colLabels=data.columns, rowLabels=data.index,
bbox=bbox, cellLoc='center', rowLoc='left', colLoc='center',
colWidths=([col_width] * len(data.columns)))
cell_map = data_table.get_celld()
for i in range(0, len(data.columns)):
cell_map[(0, i)].set_height(row_height * 0.20)
data_table.auto_set_font_size(False)
data_table.set_fontsize(font_size)
for k, cell in six.iteritems(data_table._cells):
cell.set_edgecolor(edge_color)
if k[0] == 0 or k[1] < header_columns:
cell.set_text_props(weight='bold', color='w')
cell.set_facecolor(header_color)
else:
cell.set_facecolor(row_colors[k[0] % len(row_colors)])
return ax
ax = create_table(df, col_width=1.5, row_height=0.5, font_size=8)
ax.set_title("Risk Measures", fontweight='bold')
ax.axis('off')
plt.tight_layout()
plt.savefig('risk_parameter_table[1].pdf')
plt.show()

You can set_alpha() manually on the table's _cells.
If you only want to change the headers, check if row == 0 or col == -1:
def create_table(...):
...
for (row, col), cell in data_table._cells.items():
if (row == 0) or (col == -1):
cell.set_alpha(0.5)
return ax

Related

How to plot the relation between an array's columns and rows mean value

I'm a newcomer to Pandas and Matplotlib, trying to plot a relation between the mean value of my array's rows and columns. The result I'm looking for is something like this:
"linhas" refers to the rows and "colunas" refers to the columns. The Y label refers to the means and the X label refers to the number of columns in my array
I came up with some solutions, as shown below:
print(arr)
df = pd.DataFrame(arr)
display(df)
num_cols = [df.shape[1]]
print(type(num_cols))
print(num_cols)
cols = df.count(axis=1)
lcols = cols.tolist()
print(type(lcols))
col_mean = df[:].mean(axis=0)
print(type(col_mean))
col_mean.tolist()
row_mean = df[:].mean(axis=1)
print(type(row_mean))
row_mean.tolist()
print(type(row_mean))
print(row_mean)
dados = pd.DataFrame({
'Colunas': col_mean,
'Linhas': row_mean
}, index=lcols)
lines = dados.plot.line()
What I was looking after is something like this:
"linhas" refers to the rows and "colunas" refers to the columns. The Y label refers to the means and the X label refers to the number of columns in my array
Unfortunately, my output is totally wrong, as follows:
My output
Any help would be deeply appreciated, as I'm a bit lost right now.
Thanks in advance!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# just a dummy array
arr = np.array([[37, 68, 1, 19, 6],
[ 0, 14, 32, 73, 53],
[37, 85, 67, 30, 91],
[42, 52, 6, 42, 85],
[82, 26, 44, 38, 48],
[54, 55, 23, 46, 78]])
n_rows, n_cols = arr.shape
df = pd.DataFrame(arr)
col_mean = df.mean(axis=0)
row_mean = df.mean(axis=1)
plt.plot(range(1, n_rows+1), row_mean, marker='^', c='orange', label='rows')
plt.plot(range(1, n_cols+1), col_mean, marker='o', c='blue', label='cols')
plt.xlabel('Label x axis')
plt.ylabel('Label y axis')
plt.title('Title plotting')
plt.legend()

Cannot use custom non linear colormap in combination with imshow

I am trying to use a custom colormap to display a ConfusionMatrixDisplay object to have a finer range between 0 and 50 than between 50 and 100 using this answer.
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["figure.figsize"] = (15, 15)
font = {'family' : 'DejaVu Sans',
'weight' : 'bold',
'size' : 22}
plt.rc('font', **font)
class nlcmap(LinearSegmentedColormap):
def __init__(self, cmap, levels):
self.cmap = cmap
self.N = cmap.N
self.monochrome = self.cmap.monochrome
self.levels = np.asarray(levels, dtype='float64')
self._x = self.levels
self.levmax = self.levels.max()
self.transformed_levels = np.linspace(0.0, self.levmax, len(self.levels))
def __call__(self, xi, alpha=1.0, **kw):
yi = np.interp(xi, self._x, self.transformed_levels)
return self.cmap(yi / self.levmax, alpha)
levels = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 100]
cmap_nonlin = nlcmap(plt.cm.viridis, levels)
X, y = make_classification(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=0)
clf = SVC(random_state=0)
clf.fit(X_train, y_train)
SVC(random_state=0)
predictions = clf.predict(X_test)
cm = confusion_matrix(y_test, predictions, labels=clf.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm,
display_labels=clf.classes_)
lin_cmap = plt.cm.viridis
levels = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 100]
cmap_nonlin = nlcmap(plt.cm.viridis, levels)
fig, ax = plt.subplots()
im = disp.plot(cmap=cmap_nonlin, colorbar=False)
disp.ax_.get_images()[0].set_clim(0, 100)
disp.figure_.colorbar(disp.im_, orientation="horizontal", pad=0.1)
plt.savefig("test.png")
Produces the following error:
Traceback (most recent call last):
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/backends/backend_macosx.py", line 61, in _draw
self.figure.draw(renderer)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/figure.py", line 1864, in draw
renderer, self, artists, self.suppressComposite)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py", line 411, in wrapper
return func(*inner_args, **inner_kwargs)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/axes/_base.py", line 2747, in draw
mimage._draw_list_compositing_images(renderer, self, artists)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/image.py", line 131, in _draw_list_compositing_images
a.draw(renderer)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/artist.py", line 41, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/Users/me/anaconda3/envs/myenv/lib/python3.6/site-packages/matplotlib/image.py", line 646, in draw
renderer.draw_image(gc, l, b, im)
TypeError: Cannot cast array data from dtype('float64') to dtype('uint8') according to the rule 'safe'
It seems the error is related to imshow in conjunction with custom colormap since I can reproduce without sklearn with:
fig, ax = plt.subplots()
ax.imshow(np.array([[10, 15], [20, 30]]), cmap=cmap_nonlin)
Any idea ? I wish to modify the colormap not the data itself if possible.
According to matplotlib's doc on LinearSegmentedColormaps one can do the following to vary the contrast between segments with fast varying segment and slow varying segments.
In this case to answer my question let's have a finer range between 0 and 50 than between 50 and 100 but my solution can be extended to an arbitrary number of different paced segments by changing the levels:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as colors
# A dict with {percentage_of_max_value: percentage_of_variation}. The keys are thus all < 1. and should be in ascending order alongside associated values in the colormap (also ordered and < 1.).
# In this example we have 90% of the variation of the colormap in its first half (until 0.5) and the remaining 10% in its right half
levels = {0.5: 0.9}
# We are not limited to one segment and we can provide for instance the following dict
# levels = {0.4:0.8, 0.5:0.9} to have 80% of variations between 0 and 40% of the colormap max then 10% between 40 and 50% and then the remaining 10% for the rest
cdict = {"red": None, "green": None, "blue": None}
num_values_per_segment = 50
for k, v in cdict.items():
cdict[k] = []
# We start the first segment by 0. both for value and cmap_value
left_val = 0.
left_cmap_val = 0.
for val, cmap_val in levels.items():
values = np.linspace(left_val, val, num_values_per_segment).tolist()
dynamic_range = np.linspace(left_cmap_val, cmap_val, num_values_per_segment).tolist()
for i, (v, r) in enumerate(zip(values, dynamic_range)):
cdict[k].append((v, r, r))
left_val = val
left_cmap_val = cmap_val
# Last segment towards 1.
values = np.linspace(val, 1., num_values_per_segment).tolist()
dynamic_range = np.linspace(cmap_val, 1., num_values_per_segment).tolist()
for i, (v, r) in enumerate(zip(values, dynamic_range)):
cdict[k].append((v, r, r))
# Mapping levels to colormap
cmap = plt.cm.viridis
for k, v in cdict.items():
if k == "red":
for i in range(len(v)):
cdict[k][i] = (v[i][0], cmap(v[i][1])[0], cmap(v[i][2])[0])
elif k == "green":
for j in range(len(v)):
cdict[k][j] = (v[j][0], cmap(v[j][1])[1], cmap(v[j][2])[1])
elif k == "blue":
for l in range(len(v)):
cdict[k][l] = (v[l][0], cmap(v[l][1])[2], cmap(v[l][2])[2])
else:
raise ValueError("Color not recognized")
cdict[k] = tuple(cdict[k])
cmap_nonlin = colors.LinearSegmentedColormap('MyCustomCMap', cdict)
fig, ax = plt.subplots()
my_image = np.array([[30, 45], [25, 10]])
confusion = ax.imshow(my_image, cmap=cmap_nonlin, vmin=0, vmax=100)
plt.colorbar(confusion, ax=ax)
plt.waitforbuttonpress()
And the resulting cmap_nonlin object can be used in conjunction with imshow without any issue:

Coloring minimum bars in seaborn FacetGrid barplot

Any easy way to automatically color (or mark in any way) the minimum/maximum bars for each plot of a FacetGrid?
For example, how to mark the minimal Z value on each one of the following 16 plots?
df = pd.DataFrame({'A':[10, 20, 30, 40]*4, 'Y':[1,2,3,4]*4, 'W':range(16), 'Z':range(16)})
g = sns.FacetGrid(df, row="A", col="Y", sharey=False)
g.map(sns.barplot, "W", "Z")
plt.show()
The following approach loops through the diagonal axes, for each ax searches the minimum height of the bars and then colors those:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
df = pd.DataFrame({'A': [10, 20, 30, 40] * 4, 'Y': [1, 2, 3, 4] * 4, 'W': range(16), 'Z': range(16)})
g = sns.FacetGrid(df, row="A", col="Y", sharey=False)
g.map(sns.barplot, "W", "Z")
for i in range(len(g.axes)):
ax = g.axes[i, i]
min_height = min([p.get_height() for p in ax.patches])
for p in ax.patches:
if p.get_height() == min_height:
p.set_color('red')
plt.tight_layout()
plt.show()

Matplotlib: Display value next to each point on chart

Is it possible to display each point's value next to it on chart diagram:
Values shown on points are: [7, 57, 121, 192, 123, 240, 546]
values = list(map(lambda x: x[0], result)) #[7, 57, 121, 192, 123, 240, 546]
labels = list(map(lambda x: x[1], result)) #['1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
plt.plot(labels, values, 'bo')
plt.show()
Here's my current code for this chart.
I would like to know each point value shown on graph, currently I can only predict values based on y-axis.
Based on your values, here is one solution using plt.text
fig = plt.figure()
ax = fig.add_subplot(111)
values = [7, 57, 121, 192, 123, 240, 546]
labels = ['1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
plt.plot(range(len(labels)), values, 'bo') # Plotting data
plt.xticks(range(len(labels)), labels) # Redefining x-axis labels
for i, v in enumerate(values):
ax.text(i, v+25, "%d" %v, ha="center")
plt.ylim(-10, 595)
Output
Solution based on plt.annotate
fig = plt.figure()
ax = fig.add_subplot(111)
values = [7, 57, 121, 192, 123, 240, 546]
labels = ['1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
plt.plot(range(len(labels)), values, 'bo') # Plotting data
plt.xticks(range(len(labels)), labels) # Redefining x-axis labels
for i, v in enumerate(values):
ax.annotate(str(v), xy=(i,v), xytext=(-7,7), textcoords='offset points')
plt.ylim(-10, 595)
Output:

TypeError: float() argument must be a string or a number, array = np.array(array, dtype=dtype, order=order, copy=copy)

Im applying K-means clustering to data frame from cvs and excel files
ref: http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html#example-cluster-plot-cluster-iris-py
I try to run the code with my data from the csv file, data looks like:
DataFile
However receive following errors:
Traceback (most recent call last):
File "", line 1, in
runfile('/Users/nadiastraton/Documents/workspacePython/02450Toolbox_Python/Thesis/Scripts/Clustering/cluster3.py', wdir='/Users/nadiastraton/Documents/workspacePython/02450Toolbox_Python/Thesis/Scripts/Clustering')
File "/Applications/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 699, in runfile
execfile(filename, namespace)
File "/Applications/anaconda2/lib/python2.7/site-packages/spyderlib/widgets/externalshell/sitecustomize.py", line 81, in execfile
builtins.execfile(filename, *where)
File "/Users/cluster3.py", line 46, in
est.fit(x.as_matrix)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/cluster/k_means_.py", line 812, in fit
X = self._check_fit_data(X)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/cluster/k_means_.py", line 786, in _check_fit_data
X = check_array(X, accept_sparse='csr', dtype=np.float64)
File "/Applications/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 373, in check_array
array = np.array(array, dtype=dtype, order=order, copy=copy)
TypeError: float() argument must be a string or a number
print(doc)
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from sklearn.cluster import KMeans
np.random.seed(5)
centers = [[1, 1], [-1, -1], [1, -1]]
data=pd.read_csv('/DataVisualisationSample.csv')
print(data.head())
x = pd.DataFrame(data,columns = ['Post_Share_Count','Post_Like_Count','Comment_Count'])
y = pd.DataFrame(data,columns = ['Comment_Like_Count'])
print(x.info())
estimators = {'k_means_data_3': KMeans(n_clusters=3),
'k_means_data_8': KMeans(n_clusters=12),
'k_means_data_bad_init': KMeans(n_clusters=3, n_init=1,
init='random')}
fignum = 1
for name, est in estimators.items():
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
est.fit(x.as_matrix)
labels = est.labels_
ax.scatter(x[:, 2], x[:, 0], x[:, 1], c=labels.astype(np.int))
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Post_Share_Count')
ax.set_ylabel('Post_Like_Count')
ax.set_zlabel('Comment_Count')
fignum = fignum + 1
# Plot the ground truth
fig = plt.figure(fignum, figsize=(4, 3))
plt.clf()
ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
plt.cla()
for name, label in [('Popular', 0),
('Not Popular', 1),
('Least Popular', 2)]:
ax.text3D(x[y == label, 2].mean(),
x[y == label, 0].mean() + 1.5,
x[y == label, 1].mean(), name,
horizontalalignment='center',
bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
# Reorder the labels to have colors matching the cluster results
y = np.choose(y, [1, 2, 0]).astype(np.int)
ax.scatter(x[:, 2], x[:, 0], x[:, 1], c=y).astype(np.int)
ax.w_xaxis.set_ticklabels([])
ax.w_yaxis.set_ticklabels([])
ax.w_zaxis.set_ticklabels([])
ax.set_xlabel('Post_Share_Count')
ax.set_ylabel('Post_Like_Count')
ax.set_zlabel('Comment_Count')
plt.show()
Tried to fix errors:
(est.fit(x.as_matrix) instead of est.fit(x))
and
(c=labels.astype(np.int) instead of c=labels.astype(np.float)) - (all values in my file are int.)
However changing from np.float to np.int does not fix it.