I have a set of lattitude and longitude coordinates (i.e. a list of lists: [[20,24],[100,-3],...]) that I would like to plot has a heatmap (not just a scatter) on a mollweide projection. Essentially, what I want is a seaborn hist2d plot but as a mollweide. For a reference of what I mean, please see the uploaded picture. Does anyone know how to do this?
I created some random data and showed the way to generate the histogram plot. I hope this is something you are looking for.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
# create some random data for histogram
base = [[-20, 30], [100, -20]]
data = []
for _ in range(10000):
data.append((
base[0][0] + np.random.normal(0, 20),
base[0][1] + np.random.normal(0, 10)
))
data.append((
base[1][0] + np.random.normal(0, 20),
base[1][1] + np.random.normal(0, 10)
))
data = np.array(data) / 180 * np.pi # shape (n, 2)
# create bin edges
bin_number = 40
lon_edges = np.linspace(-np.pi, np.pi, bin_number + 1)
lat_edges = np.linspace(-np.pi/2., np.pi/2., bin_number + 1)
# calculate 2D histogram, the shape of hist is (bin_number, bin_number)
hist, lon_edges, lat_edges = np.histogram2d(
*data.T, bins=[lon_edges, lat_edges], density=True
)
# generate the plot
cmap = plt.cm.Greens
fig = plt.figure()
ax = fig.add_subplot(111, projection='mollweide')
ax.pcolor(
lon_edges[:-1], lat_edges[:-1],
hist.T, # transpose from (row, column) to (x, y)
cmap=cmap, shading='auto',
vmin=0, vmax=1
)
# hide the tick labels
ax.set_xticks([])
ax.set_yticks([])
# add the colorbar
cbar = plt.colorbar(
plt.cm.ScalarMappable(
norm=mpl.colors.Normalize(0, 1), cmap=cmap
)
)
cbar.set_label("Density Distribution")
plt.show()
I get the following figure.
Related
I have the following figure:
The figure is composed by the following code snippet:
fig = plt.figure(constrained_layout=True)
grid = fig.add_gridspec(2, 2)
ax_samples_losses = fig.add_subplot(grid[0, 0:])
ax_samples_losses.set_title('Avg. loss per train sample (epoch 0 excluded)')
for sample_idx, sample_avg_train_loss_history in enumerate(samples_avg_train_loss_history):
ax_samples_losses.plot(sample_avg_train_loss_history, label='Sample ' + str(sample_idx))
ax_samples_losses.set_title('Avg. loss per train sample (epoch 0 excluded)')
ax_samples_losses.set_xlabel('Epoch')
ax_samples_losses.set_ylabel('Sample avg. loss')
ax_samples_losses.set_xticks(range(1, epochs))
ax_samples_losses.tick_params(axis='x', rotation=90)
ax_samples_losses.yaxis.set_ticks(np.arange(0, np.max(samples_avg_train_loss_history), 0.25))
ax_samples_losses.tick_params(axis='both', which='major', labelsize=6)
plt.legend(bbox_to_anchor=(1, 1), prop={'size': 6}) #loc="upper left"
# fig.legend(...)
ax_patches_per_sample = fig.add_subplot(grid[1, 0])
#for sample_idx, sample_patches_count in enumerate(samples_train_patches_count):
# ax_patches_per_sample.bar(sample_patches_count, label='Sample ' + str(sample_idx))
ax_patches_per_sample.bar(range(0, len(samples_train_patches_count)), samples_train_patches_count, align='center')
ax_patches_per_sample.set_title('Patches per sample')
ax_patches_per_sample.set_xlabel('Sample')
ax_patches_per_sample.set_ylabel('Patch count')
ax_patches_per_sample.set_xticks(range(0, len(samples_train_patches_count)))
ax_patches_per_sample.yaxis.set_ticks(np.arange(0, np.max(samples_train_patches_count), 20))
ax_patches_per_sample.tick_params(axis='both', which='major', labelsize=6)
where
samples_train_patches_count is a simple list with the number of patches per sampled image
samples_avg_train_loss_history is a list of lists in the shape samples, epochs (so if viewed as a matrix every row will be a sample and every column will be the loss of that sample over time)
I do believe I need to do both
shared legend
shared color palette
The shared legend can be done by using get_legend_handles_labels(). However I do not know how to share colors. Both subplots describe different properties of the same thing - the samples. In short I would like to have Patches per sample subplot have all the colors Avg. loss per train sample (epoch 0 excluded) uses.
The first plot is using standard matplotlib Tab10 discrete color map. We can create a cycler over this colormap, and set one by one the color of each bar:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.gridspec import GridSpec
import numpy as np
from itertools import cycle
# create a cycler to continously loop over a discrete colormap
cycler = cycle(cm.tab10.colors)
N = 10
x = np.arange(N).astype(int)
y = np.random.uniform(5, 15, N)
f = plt.figure()
gs = GridSpec(2, 4)
ax0 = f.add_subplot(gs[0, :-1])
ax1 = f.add_subplot(gs[1, :-1])
ax2 = f.add_subplot(gs[:, -1])
for i in x:
ax0.plot(x, np.exp(-x / (i + 1)), label="Sample %s" % (i + 1))
h, l = ax0.get_legend_handles_labels()
ax1.bar(x, y)
for p in ax1.patches:
p.set_facecolor(next(cycler))
ax2.axis(False)
ax2.legend(h, l)
plt.tight_layout()
EDIT to accommodate comment. To avoid repetitions you should use a colormap. Matplotlib offers many colormaps. Alternatively, you can also create your own.
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.gridspec import GridSpec
import numpy as np
from itertools import cycle
N = 50
# create a cycler to continously loop over a discrete colormap
colors = cm.viridis(np.linspace(0, 1, N))
x = np.arange(N).astype(int)
y = np.random.uniform(5, 15, N)
f = plt.figure()
gs = GridSpec(2, 4)
ax0 = f.add_subplot(gs[0, :-1])
ax1 = f.add_subplot(gs[1, :-1])
ax2 = f.add_subplot(gs[:, -1])
ax1.bar(x, y)
for i in x:
c = next(cycler)
ax0.plot(x, np.exp(-x / (i + 1)), color=c, label="Sample %s" % (i + 1))
ax1.patches[i].set_facecolor(c)
h, l = ax0.get_legend_handles_labels()
ax2.axis(False)
ax2.legend(h, l)
plt.tight_layout()
I have a table that has 2 features (x,y) - and a vector with the same length that contains their corresponding values (z).
I'm trying to use matplotlib to print this as a 2D plot but I am get an error:
TypeError: Input z must be at least a (2, 2) shaped array, but has shape (5797, 1)
Is there any way to solve this? (since I am trying to use 1d arrays instead of 2d arrays)
The relevant code:
x, y = train_features[:,0], train_features[:,1]
z = train_predictions.detach()
print(x.size())
print(y.size())
print(z.size())
plt.figure()
CS = plt.contour(x, y, z)
CS = plt.contourf(x, y, z)
plt.clabel(CS, fontsize=8, colors='black')
cbar = plt.colorbar(CS)
The prints that result from the prints commands:
torch.Size([5797])
torch.Size([5797])
torch.Size([5797, 1])
EDIT:
I tried to implement this with a second method:
import matplotlib.pyplot as plt
import matplotlib.tri as tri
import numpy as np
npts = 200
ngridx = 100
ngridy = 200
x = train_features[:,0]
y = train_features[:,1]
z = train_predictions.detach().squeeze()
fig, ax1 = plt.subplots()
# -----------------------
# Interpolation on a grid
# -----------------------
# A contour plot of irregularly spaced data coordinates
# via interpolation on a grid.
# Create grid values first.
xi = np.linspace(1, 10, ngridx)
yi = np.linspace(1, 10, ngridy)
# Perform linear interpolation of the data (x,y)
# on a grid defined by (xi,yi)
triang = tri.Triangulation(x, y)
interpolator = tri.LinearTriInterpolator(triang, z)
Xi, Yi = np.meshgrid(xi, yi)
zi = interpolator(Xi, Yi)
ax1.contour(xi, yi, zi, levels=100, linewidths=0.5, colors='k')
cntr1 = ax1.contourf(xi, yi, zi, levels=14, cmap="RdBu_r")
fig.colorbar(cntr1, ax=ax1)
ax1.plot(x, y, 'ko', ms=3)
ax1.set_title('grid and contour (%d points, %d grid points)' %
(npts, ngridx * ngridy))
But the resulting image was the following:
even though z's values are:
tensor([-0.2434, -0.2155, -0.1900, ..., 64.7516, 65.2064, 65.6612])
I would like to group my data and to plot the boxplot for all the groups. There are many questions and answer about that, my problem is that I want to group by a continuos variable, so I want to histogramming my data.
Here what I have done. My data:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
x = np.random.chisquare(5, size=100000)
y = np.random.normal(size=100000) / (0.05 * x + 0.1) + 2 * x
f, ax = plt.subplots()
ax.plot(x, y, '.', alpha=0.05)
plt.show()
I want to study the behaviour of y (location, width, ...) as a function of x. I am not interested in the distribution of x so I will normalized it.
f, ax = plt.subplots()
xbins = np.linspace(0, 25, 50)
ybins = np.linspace(-20, 50, 50)
H, xedges, yedges = np.histogram2d(y, x, bins=(ybins, xbins))
norm = np.sum(H, axis = 0)
H /= norm
ax.pcolor(xbins, ybins, np.nan_to_num(H), vmax=.4)
plt.show()
I can plot histogram, but I want boxplot
binning = np.concatenate(([0], np.sort(np.random.random(20) * 25), [25]))
idx = np.digitize(x, binning)
data_to_plot = [y[idx == i] for i in xrange(len(binning))]
f, ax = plt.subplots()
midpoints = 0.5 * (binning[1:] + binning[:-1])
widths = 0.9 * (binning[1:] - binning[:-1])
from matplotlib.ticker import MultipleLocator, FormatStrFormatter
majorLocator = MultipleLocator(2)
ax.boxplot(data_to_plot, positions = midpoints, widths=widths)
ax.set_xlim(0, 25)
ax.xaxis.set_major_locator(majorLocator)
ax.set_xlabel('x')
ax.set_ylabel('median(y)')
plt.show()
Is there an automatic way to do that, like ax.magic(x, y, binning)? Is there a better way to do that? (Have a look to https://root.cern.ch/root/html/TProfile.html for example, which plot the mean and the error of the mean as error bars)
In addition, I want to minize the memory footprint (my real data are much more than 100000), I am worried about data_to_plot, is it a copy?
I am plotting a histogram for some data points with bar heights being the percentage of that bin from the whole data:
x = normal(size=1000)
hist, bins = np.histogram(x, bins=20)
plt.bar(bins[:-1], hist.astype(np.float32) / hist.sum(), width=(bins[1]-bins[0]), alpha=0.6)
The result is:
I would like all bars that sum up to be 50% of the data to be in a different color, for example:
(I selected the colored bars without actually checking whether their sum adds to 50%)
Any suggestions how to accomplish this?
Here is how you can plot the first half of the bins with a different color, this looks like your mock, but I am not sure it complies to %50 of the data (it is not clear to me what do you mean by that).
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
mu, sigma = 100, 15
x = mu + sigma * np.random.randn(10000)
fig = plt.figure()
ax = fig.add_subplot(111)
# the histogram of the data
n, bins, patches = ax.hist(x, 50, normed=1, facecolor='green', alpha=0.75)
# now that we found the index we color all the beans smaller than middle index
for p in patches[:len(bins)/2]:
p.set_facecolor('red')
# hist uses np.histogram under the hood to create 'n' and 'bins'.
# np.histogram returns the bin edges, so there will be 50 probability
# density values in n, 51 bin edges in bins and 50 patches. To get
# everything lined up, we'll compute the bin centers
bincenters = 0.5*(bins[1:]+bins[:-1])
# add a 'best fit' line for the normal PDF
y = mlab.normpdf( bincenters, mu, sigma)
l = ax.plot(bincenters, y, 'r--', linewidth=1)
ax.set_xlabel('Smarts')
ax.set_ylabel('Probability')
ax.set_xlim(40, 160)
ax.set_ylim(0, 0.03)
ax.grid(True)
plt.show()
And the output is:
update
The key method you want to look at is patch.set_set_facecolor. You have to understand that almost everything you plot inside the axes object is a Patch, and as such it has this method, here is another example, I arbitrary choose the first 3 bars to have another color, you can choose based on what ever you decide:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
## the data
N = 5
menMeans = [18, 35, 30, 35, 27]
## necessary variables
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
## the bars
rects1 = ax.bar(ind, menMeans, width,
color='black',
error_kw=dict(elinewidth=2,ecolor='red'))
for patch in rects1.patches[:3]:
patch.set_facecolor('red')
ax.set_xlim(-width,len(ind)+width)
ax.set_ylim(0,45)
ax.set_ylabel('Scores')
xTickMarks = ['Group'+str(i) for i in range(1,6)]
ax.set_xticks(ind)
xtickNames = ax.set_xticklabels(xTickMarks)
plt.setp(xtickNames, rotation=45, fontsize=10)
plt.show()
I am trying to plot some histogrammed data on a polar axis but it wont seem to work properly. An example is below, I use the custom projection found How to make the angles in a matplotlib polar plot go clockwise with 0° at the top? it works for a scatter plot so I think my problem is with the histogram function. This has been driving me nuts all day, does anyone know what I am doing wrong...........
import random
import numpy as np
import matplotlib.pyplot as plt
baz = np.zeros((20))
freq = np.zeros((20))
pwr = np.zeros((20))
for x in range(20):
baz[x] = random.randint(20,25)*10
freq[x] = random.randint(1,10)*10
pwr[x] = random.randint(-10,-1)*10
baz = baz*np.pi/180.
abins = np.linspace(0,2*np.pi,360) # 0 to 360 in steps of 360/N.
sbins = np.linspace(1, 100)
H, xedges, yedges = np.histogram2d(baz, freq, bins=(abins,sbins), weights=pwr)
plt.figure(figsize=(14,14))
plt.subplot(1, 1, 1, projection='northpolar')
#plt.scatter(baz, freq)
plt.pcolormesh(H)
plt.show()
Your code works if you explicitly pass a mgrid (with similar characteristics than your a bins and sbins) to the pcolormesh command.
Below is an example inspired by your code:
import matplotlib.pyplot as plt
import numpy as np
#Generate the data
size = 200
baz = 10*np.random.randint(20, 25, size)*np.pi/180.
freq = 10*np.random.randint(1, 10, size)
pwr = 10*np.random.randint(-10, -1, size)
abins = np.linspace(0, 2*np.pi, 360) # 0 to 360 in steps of 360/N.
sbins = np.linspace(1, 100, 50)
H, xedges, yedges = np.histogram2d(baz, freq, bins=(abins,sbins), weights=pwr)
#Grid to plot your data on using pcolormesh
theta, r = np.mgrid[0:2*np.pi:360j, 1:100:50j]
fig, ax = plt.subplots(figsize=(14,14), subplot_kw=dict(projection='northpolar'))
ax.pcolormesh(theta, r, H)
ax.set_yticklabels([]) #remove yticklabels
plt.show()