matplotlib heatmap, customize y axis - matplotlib

Right now my code looks like this:
#generate 262*20 elements
values = np.random.random(262*20).tolist()
# convert the list to a 2D NumPy array
values = np.array(values).reshape((262, 20))
h, w = values.shape
#h=262, w=20
fig = plt.figure(num=None, dpi=80,figsize=(9, 7), facecolor='w', edgecolor='k')
ax = fig.add_subplot(111)
#fig, ax = plt.subplots()
plt.imshow(values)
plt.colorbar()
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
ax.set_aspect(w/h)
plt.show()
The plot looks like this:
As you can see, the range of y axis is 0-261.
But I want my y axis to go from 26 to 290, missing 57, 239, and 253. So still 262 in total. I tried to generate a list like this:
mylist =[26, 27, ......missing 57, 239, 253, ....290]
plt.yticks(np.arange(h), mylist)
The Y axis just looks like everything squished together.
So I tried:
pylab.ylim([26, 290])
And It looks like this:
So it just feels like the data in first row always corresponds to [0], not to [26]

Suggest you use pcolormesh. If you want gaps, then use an numpy.ma.masked array for the area with gaps.
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
values = np.random.rand(290,20)
values[:26, :] = np.NaN
values[ [57, 239, 253], :] = np.NaN
values = np.ma.masked_invalid(values)
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, values, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()
Result
EDIT: If you want to just work w/ a 262x20 array:
values = np.random.rand(262,20)
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
y = np.delete(y, [57, 239, 253])
y = np.delete(y, range(26))
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, values, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()
Note that this doesn't put a blank line at 57, 239 and 253. If you want that, you need to do:
values = np.random.rand(262,20)
Z = np.ones((290, 20)) * np.NaN
inds = set(range(290)) - set(list(range(26)) + [57, 239, 253])
for nn, ind in enumerate(inds):
Z[ind, :] = values[nn,:]
h, w = values.shape
fig, ax = plt.subplots(figsize=(9,7))
# Make one larger so these values represent the edge of the data pixels.
y = np.arange(0, 290.5)
x = np.arange(0, 20.5)
pcm = ax.pcolormesh(x, y, Z, rasterized=True) # you don't need rasterized=True
fig.colorbar(pcm)
plt.xticks(np.arange(w), list('PNIYLKCVFWABCDEFGHIJ'))
plt.show()

Related

Showing Matplotlib pie chart only top 3 item's percentage [duplicate]

I have the following code:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123456)
import pandas as pd
df = pd.DataFrame(3 * np.random.rand(4, 4), index=['a', 'b', 'c', 'd'],
columns=['x', 'y','z','w'])
plt.style.use('ggplot')
colors = plt.rcParams['axes.color_cycle']
fig, axes = plt.subplots(nrows=2, ncols=3)
for ax in axes.flat:
ax.axis('off')
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct='%.2f', colors=colors)
ax.set(ylabel='', title=col, aspect='equal')
axes[0, 0].legend(bbox_to_anchor=(0, 0.5))
fig.savefig('your_file.png') # Or whichever format you'd like
plt.show()
Which produce the following:
My question is, how can I remove the label based on a condition. For example I'd only want to display labels with percent > 20%. Such that the labels and value of a,c,d won't be displayed in X, etc.
The autopct argument from pie can be a callable, which will receive the current percentage. So you only would need to provide a function that returns an empty string for the values you want to omit the percentage.
Function
def my_autopct(pct):
return ('%.2f' % pct) if pct > 20 else ''
Plot with matplotlib.axes.Axes.pie
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct=my_autopct)
ax.set(ylabel='', title=col, aspect='equal')
fig.tight_layout()
Plot directly with the dataframe
axes = df.plot(kind='pie', autopct=my_autopct, figsize=(8, 6), subplots=True, layout=(2, 2), legend=False)
for ax in axes.flat:
yl = ax.get_ylabel()
ax.set(ylabel='', title=yl)
fig = axes[0, 0].get_figure()
fig.tight_layout()
If you need to parametrize the value on the autopct argument, you'll need a function that returns a function, like:
def autopct_generator(limit):
def inner_autopct(pct):
return ('%.2f' % pct) if pct > limit else ''
return inner_autopct
ax.pie(df[col], labels=df.index, autopct=autopct_generator(20), colors=colors)
For the labels, the best thing I can come up with is using list comprehension:
for ax, col in zip(axes.flat, df.columns):
data = df[col]
labels = [n if v > data.sum() * 0.2 else ''
for n, v in zip(df.index, data)]
ax.pie(data, autopct=my_autopct, colors=colors, labels=labels)
Note, however, that the legend by default is being generated from the first passed labels, so you'll need to pass all values explicitly to keep it intact.
axes[0, 0].legend(df.index, bbox_to_anchor=(0, 0.5))
For labels I have used:
def my_level_list(data):
list = []
for i in range(len(data)):
if (data[i]*100/np.sum(data)) > 2 : #2%
list.append('Label '+str(i+1))
else:
list.append('')
return list
patches, texts, autotexts = plt.pie(data, radius = 1, labels=my_level_list(data), autopct=my_autopct, shadow=True)
You can make the labels function a little shorter using list comprehension:
def my_autopct(pct):
return ('%1.1f' % pct) if pct > 1 else ''
def get_new_labels(sizes, labels):
new_labels = [label if size > 1 else '' for size, label in zip(sizes, labels)]
return new_labels
fig, ax = plt.subplots()
_,_,_ = ax.pie(sizes, labels=get_new_labels(sizes, labels), colors=colors, autopct=my_autopct, startangle=90, rotatelabels=False)

How to plot a heatmap of coordinates on a mollweide projection

I have a set of lattitude and longitude coordinates (i.e. a list of lists: [[20,24],[100,-3],...]) that I would like to plot has a heatmap (not just a scatter) on a mollweide projection. Essentially, what I want is a seaborn hist2d plot but as a mollweide. For a reference of what I mean, please see the uploaded picture. Does anyone know how to do this?
I created some random data and showed the way to generate the histogram plot. I hope this is something you are looking for.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
# create some random data for histogram
base = [[-20, 30], [100, -20]]
data = []
for _ in range(10000):
data.append((
base[0][0] + np.random.normal(0, 20),
base[0][1] + np.random.normal(0, 10)
))
data.append((
base[1][0] + np.random.normal(0, 20),
base[1][1] + np.random.normal(0, 10)
))
data = np.array(data) / 180 * np.pi # shape (n, 2)
# create bin edges
bin_number = 40
lon_edges = np.linspace(-np.pi, np.pi, bin_number + 1)
lat_edges = np.linspace(-np.pi/2., np.pi/2., bin_number + 1)
# calculate 2D histogram, the shape of hist is (bin_number, bin_number)
hist, lon_edges, lat_edges = np.histogram2d(
*data.T, bins=[lon_edges, lat_edges], density=True
)
# generate the plot
cmap = plt.cm.Greens
fig = plt.figure()
ax = fig.add_subplot(111, projection='mollweide')
ax.pcolor(
lon_edges[:-1], lat_edges[:-1],
hist.T, # transpose from (row, column) to (x, y)
cmap=cmap, shading='auto',
vmin=0, vmax=1
)
# hide the tick labels
ax.set_xticks([])
ax.set_yticks([])
# add the colorbar
cbar = plt.colorbar(
plt.cm.ScalarMappable(
norm=mpl.colors.Normalize(0, 1), cmap=cmap
)
)
cbar.set_label("Density Distribution")
plt.show()
I get the following figure.

How do I plot a contour from a table of values?

I have a table that has 2 features (x,y) - and a vector with the same length that contains their corresponding values (z).
I'm trying to use matplotlib to print this as a 2D plot but I am get an error:
TypeError: Input z must be at least a (2, 2) shaped array, but has shape (5797, 1)
Is there any way to solve this? (since I am trying to use 1d arrays instead of 2d arrays)
The relevant code:
x, y = train_features[:,0], train_features[:,1]
z = train_predictions.detach()
print(x.size())
print(y.size())
print(z.size())
plt.figure()
CS = plt.contour(x, y, z)
CS = plt.contourf(x, y, z)
plt.clabel(CS, fontsize=8, colors='black')
cbar = plt.colorbar(CS)
The prints that result from the prints commands:
torch.Size([5797])
torch.Size([5797])
torch.Size([5797, 1])
EDIT:
I tried to implement this with a second method:
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
npts = 200
ngridx = 100
ngridy = 200
x = train_features[:,0]
y = train_features[:,1]
z = train_predictions.detach().squeeze()
fig, ax1 = plt.subplots()
# -----------------------
# Interpolation on a grid
# -----------------------
# A contour plot of irregularly spaced data coordinates
# via interpolation on a grid.
# Create grid values first.
xi = np.linspace(1, 10, ngridx)
yi = np.linspace(1, 10, ngridy)
# Perform linear interpolation of the data (x,y)
# on a grid defined by (xi,yi)
triang = tri.Triangulation(x, y)
interpolator = tri.LinearTriInterpolator(triang, z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
ax1.contour(xi, yi, zi, levels=100, linewidths=0.5, colors='k')
cntr1 = ax1.contourf(xi, yi, zi, levels=14, cmap="RdBu_r")
fig.colorbar(cntr1, ax=ax1)
ax1.plot(x, y, 'ko', ms=3)
ax1.set_title('grid and contour (%d points, %d grid points)' %
(npts, ngridx * ngridy))
But the resulting image was the following:
even though z's values are:
tensor([-0.2434, -0.2155, -0.1900, ..., 64.7516, 65.2064, 65.6612])

Pretty confusion matrix visualisation with matplotlib

I'm wondering if there are some templates for viewing confusion matrices in matplotlib with a similar rendering, of which I ignore the specific nomenclature.
I have tried doing something similar with your fig 2. Here is my code using hand written digits data.
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from sklearn import manifold, datasets
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import leaves_list, linkage
def get_small_Xy(X, y, n=8):
X = np.vstack([X[y==e][0:n] for e in np.unique(y)])
y = np.hstack([[e]*n for e in np.unique(y)])
return X, y
# Load digit data
X_, y_ = datasets.load_digits(return_X_y=True)
# get a small set of data
X, y = get_small_Xy(X_, y_)
# Get similarity matrix
D = 1-squareform(pdist(X, metric='cosine'))
Z = linkage(D, method='ward')
ind = leaves_list(Z)
D = D[ind, :]
D = D[:, ind]
# labels and colors related
lbs = np.array([i if i==j else 10 for i in y for j in y])
colors = np.array(['C{}'.format(i) for i in range(10)]+['gray'])
colors[7] = '#413c39'
c = colors[lbs]
font1 = {'family': 'Arial',
'weight': 'normal',
'size': 8,
}
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
n = np.product(X.shape[0])
xx, yy = np.meshgrid(range(n), range(n))
xy = np.stack([xx.ravel(), yy.ravel()]).T
ax.scatter(xy[:, 0], xy[:, 1], s=D**4*30, fc=c, ec=None, alpha=0.8)
ax.set_xlim(-1, n)
ax.set_ylim(n, -1)
ax.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=False)
# place text
for i, e in enumerate(y):
ax.text(-1.2, i, e, ha='right', va='center', fontdict=font1, c=colors[e])
for i, e in enumerate(y):
ax.text(i, -1, e, ha='center', va='bottom', fontdict=font1, c=colors[e])
# draw lines
for e in np.where(np.diff(y))[0]:
ax.axhline(e+0.5, color='gray', lw=0.5, alpha=0.8)
ax.axvline(e+0.5, color='gray', lw=0.5, alpha=0.8)
One issue is the alpha of all points, which seems not to possible to set with different values with plot scatters in one run.

group boxplot histogramming

I would like to group my data and to plot the boxplot for all the groups. There are many questions and answer about that, my problem is that I want to group by a continuos variable, so I want to histogramming my data.
Here what I have done. My data:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
x = np.random.chisquare(5, size=100000)
y = np.random.normal(size=100000) / (0.05 * x + 0.1) + 2 * x
f, ax = plt.subplots()
ax.plot(x, y, '.', alpha=0.05)
plt.show()
I want to study the behaviour of y (location, width, ...) as a function of x. I am not interested in the distribution of x so I will normalized it.
f, ax = plt.subplots()
xbins = np.linspace(0, 25, 50)
ybins = np.linspace(-20, 50, 50)
H, xedges, yedges = np.histogram2d(y, x, bins=(ybins, xbins))
norm = np.sum(H, axis = 0)
H /= norm
ax.pcolor(xbins, ybins, np.nan_to_num(H), vmax=.4)
plt.show()
I can plot histogram, but I want boxplot
binning = np.concatenate(([0], np.sort(np.random.random(20) * 25), [25]))
idx = np.digitize(x, binning)
data_to_plot = [y[idx == i] for i in xrange(len(binning))]
f, ax = plt.subplots()
midpoints = 0.5 * (binning[1:] + binning[:-1])
widths = 0.9 * (binning[1:] - binning[:-1])
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
majorLocator = MultipleLocator(2)
ax.boxplot(data_to_plot, positions = midpoints, widths=widths)
ax.set_xlim(0, 25)
ax.xaxis.set_major_locator(majorLocator)
ax.set_xlabel('x')
ax.set_ylabel('median(y)')
plt.show()
Is there an automatic way to do that, like ax.magic(x, y, binning)? Is there a better way to do that? (Have a look to https://root.cern.ch/root/html/TProfile.html for example, which plot the mean and the error of the mean as error bars)
In addition, I want to minize the memory footprint (my real data are much more than 100000), I am worried about data_to_plot, is it a copy?