matplotlib: imshow a 2d array with plots of its marginal densities - matplotlib

How can one plot a 2d density with its marginal densities,
along the lines of
scatterplot-with-marginal-histograms-in-ggplot2
or
2D plot with histograms / marginals,
in matplotlib ?
In outline,
# I have --
A = a 2d numpy array >= 0
xdens ~ A.mean(axis=0)
ydens ~ A.mean(axis=1)
# I want --
pl.imshow( A )
pl.plot( xdens ) narrow, below A
pl.plot( ydens ) narrow, left of A, with the x y axes flipped
Added in 2017: see the lovely example of seaborn.jointplot,
also this on SO. (The question was in 2013, before seaborn.)

You can use sharex and sharey with subplots:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
t = np.linspace(0, 31.3, 100)
f = np.linspace(0, 1000, 1000)
a = np.exp(-np.abs(f-200)/200)[:, None] * np.random.rand(t.size)
flim = (f.min(), f.max())
tlim = (t.min(), t.max())
gs = gridspec.GridSpec(2, 2, width_ratios=[1,3], height_ratios=[3,1])
ax = plt.subplot(gs[0,1])
axl = plt.subplot(gs[0,0], sharey=ax)
axb = plt.subplot(gs[1,1], sharex=ax)
ax.imshow(a, origin='lower', extent=tlim+flim, aspect='auto')
plt.xlim(tlim)
axl.plot(a.mean(1), f)
axb.plot(t, a.mean(0))
Which gives you:

Related

Fitting & scaling a probability density function correctly to a histogram with a logarithmic x-axis?

I am trying to fit a gilbrat PDF to a dataset (that I have in form of a list). I want to show the data in a histogram with a logarithmic x-scale and add the fitted curve. However, the curve seems too flat compared to the histogram, like in this picture: I tried to scale the pdf according to Fitting a Gaussian to a histogram with MatPlotLib and Numpy - wrong Y-scaling? , but the problem remains.
Here is a code example with randomly created data:
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
#create random dataset
data = st.gilbrat.rvs(scale = 10, size = 100).tolist()
param = st.gilbrat.fit(data)
x = np.linspace(min(data),max(data),len(data))
pdf = st.gilbrat.pdf(x, param[0], param[1])
plt.figure()
logbins = np.logspace(np.log10(np.min(data)),np.log10(np.max(data)),20)
result = plt.hist(data, bins=logbins, edgecolor="black", alpha = 0.5, label="data")
dx = result[1][1] - result[1][0]
plt.plot(x,pdf * (len(data)*dx), label="fit")
plt.xscale('log')
plt.xlabel("x")
plt.ylabel("Number of occurence")
plt.legend()
Am I missing something?
As your bins aren't equally spaced, the histogram isn't similar to a scaled version of the pdf. The bins at the right represent a much wider x-range than the ones at the left.
To predict the heights of the rectangles given the pdf, each bin region needs a different scaling factor, depending on the width of that bin.
The following code rescales each region independently, resulting in a discontinuously scaled pdf.
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
# create random dataset
np.random.seed(1)
data = st.gilbrat.rvs(scale=10, size=100)
param = st.gilbrat.fit(data)
x = np.logspace(np.log10(data.min()), np.log10(data.max()), 500)
pdf = st.gilbrat.pdf(x, param[0], param[1])
plt.figure()
logbins = np.logspace(np.log10(data.min()), np.log10(data.max()), 20)
heights, bins, rectangles = plt.hist(data, bins=logbins, edgecolor="black", alpha=0.5, label="data")
for b0, b1 in zip(bins[:-1], bins[1:]):
dx = b1 - b0
x_bin = np.logspace(np.log10(b0), np.log10(b1), 100)
pdf_bin = st.gilbrat.pdf(x_bin, param[0], param[1])
plt.plot(x_bin, pdf_bin * (len(data) * dx), color='crimson',
label="expected bin height" if b0 == bins[0] else None)
plt.xscale('log')
plt.xlabel("x")
plt.ylabel("Number of occurence")
plt.legend()
plt.tight_layout()
plt.show()
Here is another take, smoothing out the scaling of the pdf to any log-scale histogram. The dx is different at each x-position, in contrast to the histogram with linearly spaced bins.
import scipy.stats as st
import numpy as np
import matplotlib.pyplot as plt
# create random dataset
np.random.seed(1)
data = st.gilbrat.rvs(scale=10, size=100)
param = st.gilbrat.fit(data)
x = np.logspace(np.log10(data.min()), np.log10(data.max()), 500)
pdf = st.gilbrat.pdf(x, param[0], param[1])
plt.figure()
logbins = np.logspace(np.log10(data.min()), np.log10(data.max()), 20)
heights, bins, rectangles = plt.hist(data, bins=logbins, edgecolor="black", alpha=0.5, label="data")
dx_array = np.logspace(np.log10(bins[1] - bins[0]), np.log10(bins[-1] - bins[-2]), len(x))
plt.plot(x, pdf * len(data) * dx_array, color='crimson', label="pdf rescaled like histogram")
plt.xscale('log')
plt.xlabel("x")
plt.ylabel("Number of occurence")
plt.legend()
plt.tight_layout()
plt.show()

Connecting point without continus boundaries

I want to plot trajectories, without connecting the points from boundaries. Attached an image of what i mean.
My code:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
# import polygon as poly
x, y = np.loadtxt('c55.txt', delimiter=' ', unpack=True)
plt.plot(x, y, '.' ,color = 'k' , markersize=0.5)
#for i in range(1, len(x),1):
#if abs(x[i]-x[i+1])>300:
plt.plot(x,y,'-o',color='red',ms=5,label="Window 1")
plt.show()
Your x-values go several times from low to high. plt.plot connects all points in the order they are encountered in the x and y arrays.
The following approach firsts looks for the indices where the x-values start again (so, where the difference of successive x's isn't positive).
These indices are then used to draw the separate curves.
from matplotlib.colors import ListedColormap
import numpy as np
# first create some test data a bit similar to the given ones.
x = np.tile(np.linspace(-3, 3, 20), 4)
y = np.cos(x) + np.repeat(np.linspace(-3, 3, 4), 20)
fig, axs = plt.subplots(ncols=2, figsize=(15, 4))
# plotting the test data without change
axs[0].plot(x, y, '-o')
bounds = np.argwhere(np.diff(x) < 0).squeeze() # find the boundaries
bounds = np.concatenate([[0], bounds + 1, [len(x)]]) # additional boundaries for the first and last point
for b0, b1 in zip(bounds[:-1], bounds[1:]):
axs[1].plot(x[b0:b1], y[b0:b1], '-o') # use '-ro' for only red curves
plt.show()

Display two Sympy plots as two Matplotlib subplots

This code
from sympy import *
x=Symbol('x')
p1 = plot(x**2,(x,-2,2))
p2 = plot(x**3,(x,-2,2))
results in two separate plots.
Instead of two separate plots, I want to display them with matplotlib as subplots:
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
plt.show()
How can I add p1 and p2, so that they are displayed as subplots inside the matplotlib figure?
The problem is that sympy Plot creates its own figure and axes. It is not meant to draw to an existing axes.
You may however replace the axes the plot is drawn to by an existing axes prior to showing the sympy plot.
from sympy import Symbol,plot
import matplotlib.pyplot as plt
def move_sympyplot_to_axes(p, ax):
backend = p.backend(p)
backend.ax = ax
backend.process_series()
backend.ax.spines['right'].set_color('none')
backend.ax.spines['bottom'].set_position('zero')
backend.ax.spines['top'].set_color('none')
plt.close(backend.fig)
x=Symbol('x')
p1 = plot(x**2,(x,-2,2), show=False)
p2 = plot(x**3,(x,-2,2), show=False)
fig, (ax,ax2) = plt.subplots(ncols=2)
move_sympyplot_to_axes(p1, ax)
move_sympyplot_to_axes(p2, ax2)
plt.show()
My solution does not add p1, p2 to the subplots directly. But (x,y) coordinates from them are captured and used instead.
import matplotlib.pyplot as plt
from sympy import symbols
import numpy as np
from sympy import symbols
from sympy.plotting import plot
# part 1
# uses symbolic plot of functions
x = symbols('x')
#p1, p2 = plot(x**2, x**3, (x, -2, 2))
# this plot will not show ...
# only produce 2 curves
p1, p2 = plot((x**2, (x, -2, 2)), \
(x**3, (x, -2, 2)), \
show=False)
# collect (x,y)'s of the unseen curves
x1y1 = p1.get_points() # array of 2D
x2y2 = p2.get_points()
# part 2
# uses regular matplotlib to plot the data
fig = plt.figure(figsize=(8, 5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
# do subplot 1
ax1.plot(x1y1[0], x1y1[1], 'g') # plot x**2 in green
ax1.set_xlim([-2, 2])
ax1.set_xlabel('X1')
ax1.set_ylabel('Y1')
ax1.set_title('Line1') # destroyed by another .title(); axis metho1
# do subplot 2
ax2.plot(x2y2[0], x2y2[1], 'r') # plot x**3 in red
ax2.set_xlim([-2, 2])
ax2.set_xlabel('X2')
ax2.set_ylabel('Y2')
ax2.set_title('Line2')
fig.subplots_adjust(wspace=0.4) # set space between subplots
plt.show()
The resulting plot:
You can simply use a plotgrid to get 2 or more plots in one figure.
See also: sympy.plotting.PlotGrid()
Here's a working example:
import sympy as sp
from matplotlib import pyplot as plt
# define functions
x = symbols('x')
f = sin(x)
g = cos(x)
# create separate plots
p1 = plot(f, show=False, xlim=(-pi, pi), line_color='blue', legend=True)
p2 = plot(g, show=False, xlim=(-pi, pi), line_color='red', legend=True)
# create a plotgrid with 2 rows and 1 column
plotgrid = sp.plotting.PlotGrid(2, 1, p1, p2, show=False, size=(5., 3.5))
plotgrid.show()
Resulting plot:

Embedding small plots inside subplots in matplotlib

If you want to insert a small plot inside a bigger one you can use Axes, like here.
The problem is that I don't know how to do the same inside a subplot.
I have several subplots and I would like to plot a small plot inside each subplot.
The example code would be something like this:
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
for i in range(4):
ax = fig.add_subplot(2,2,i)
ax.plot(np.arange(11),np.arange(11),'b')
#b = ax.axes([0.7,0.7,0.2,0.2])
#it gives an error, AxesSubplot is not callable
#b = plt.axes([0.7,0.7,0.2,0.2])
#plt.plot(np.arange(3),np.arange(3)+11,'g')
#it plots the small plot in the selected position of the whole figure, not inside the subplot
Any ideas?
I wrote a function very similar to plt.axes. You could use it for plotting yours sub-subplots. There is an example...
import matplotlib.pyplot as plt
import numpy as np
#def add_subplot_axes(ax,rect,facecolor='w'): # matplotlib 2.0+
def add_subplot_axes(ax,rect,axisbg='w'):
fig = plt.gcf()
box = ax.get_position()
width = box.width
height = box.height
inax_position = ax.transAxes.transform(rect[0:2])
transFigure = fig.transFigure.inverted()
infig_position = transFigure.transform(inax_position)
x = infig_position[0]
y = infig_position[1]
width *= rect[2]
height *= rect[3] # <= Typo was here
#subax = fig.add_axes([x,y,width,height],facecolor=facecolor) # matplotlib 2.0+
subax = fig.add_axes([x,y,width,height],axisbg=axisbg)
x_labelsize = subax.get_xticklabels()[0].get_size()
y_labelsize = subax.get_yticklabels()[0].get_size()
x_labelsize *= rect[2]**0.5
y_labelsize *= rect[3]**0.5
subax.xaxis.set_tick_params(labelsize=x_labelsize)
subax.yaxis.set_tick_params(labelsize=y_labelsize)
return subax
def example1():
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
rect = [0.2,0.2,0.7,0.7]
ax1 = add_subplot_axes(ax,rect)
ax2 = add_subplot_axes(ax1,rect)
ax3 = add_subplot_axes(ax2,rect)
plt.show()
def example2():
fig = plt.figure(figsize=(10,10))
axes = []
subpos = [0.2,0.6,0.3,0.3]
x = np.linspace(-np.pi,np.pi)
for i in range(4):
axes.append(fig.add_subplot(2,2,i))
for axis in axes:
axis.set_xlim(-np.pi,np.pi)
axis.set_ylim(-1,3)
axis.plot(x,np.sin(x))
subax1 = add_subplot_axes(axis,subpos)
subax2 = add_subplot_axes(subax1,subpos)
subax1.plot(x,np.sin(x))
subax2.plot(x,np.sin(x))
if __name__ == '__main__':
example2()
plt.show()
You can now do this with matplotlibs inset_axes method (see docs):
from mpl_toolkits.axes_grid.inset_locator import inset_axes
inset_axes = inset_axes(parent_axes,
width="30%", # width = 30% of parent_bbox
height=1., # height : 1 inch
loc=3)
Update: As Kuti pointed out, for matplotlib version 2.1 or above, you should change the import statement to:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
There is now also a full example showing all different options available.
From matplotlib 3.0 on, you can use matplotlib.axes.Axes.inset_axes:
import numpy as np
import matplotlib.pyplot as plt
fig, axes = plt.subplots(2,2)
for ax in axes.flat:
ax.plot(np.arange(11),np.arange(11))
ins = ax.inset_axes([0.7,0.7,0.2,0.2])
plt.show()
The difference to mpl_toolkits.axes_grid.inset_locator.inset_axes mentionned in #jrieke's answer is that this is a lot easier to use (no extra imports etc.), but has the drawback of being slightly less flexible (no argument for padding or corner locations).
source: https://matplotlib.org/examples/pylab_examples/axes_demo.html
from mpl_toolkits.axes_grid.inset_locator import inset_axes
import matplotlib.pyplot as plt
import numpy as np
# create some data to use for the plot
dt = 0.001
t = np.arange(0.0, 10.0, dt)
r = np.exp(-t[:1000]/0.05) # impulse response
x = np.random.randn(len(t))
s = np.convolve(x, r)[:len(x)]*dt # colored noise
fig = plt.figure(figsize=(9, 4),facecolor='white')
ax = fig.add_subplot(121)
# the main axes is subplot(111) by default
plt.plot(t, s)
plt.axis([0, 1, 1.1*np.amin(s), 2*np.amax(s)])
plt.xlabel('time (s)')
plt.ylabel('current (nA)')
plt.title('Subplot 1: \n Gaussian colored noise')
# this is an inset axes over the main axes
inset_axes = inset_axes(ax,
width="50%", # width = 30% of parent_bbox
height=1.0, # height : 1 inch
loc=1)
n, bins, patches = plt.hist(s, 400, normed=1)
#plt.title('Probability')
plt.xticks([])
plt.yticks([])
ax = fig.add_subplot(122)
# the main axes is subplot(111) by default
plt.plot(t, s)
plt.axis([0, 1, 1.1*np.amin(s), 2*np.amax(s)])
plt.xlabel('time (s)')
plt.ylabel('current (nA)')
plt.title('Subplot 2: \n Gaussian colored noise')
plt.tight_layout()
plt.show()

matplotlib polar 2d histogram

I am trying to plot some histogrammed data on a polar axis but it wont seem to work properly. An example is below, I use the custom projection found How to make the angles in a matplotlib polar plot go clockwise with 0° at the top? it works for a scatter plot so I think my problem is with the histogram function. This has been driving me nuts all day, does anyone know what I am doing wrong...........
import random
import numpy as np
import matplotlib.pyplot as plt
baz = np.zeros((20))
freq = np.zeros((20))
pwr = np.zeros((20))
for x in range(20):
baz[x] = random.randint(20,25)*10
freq[x] = random.randint(1,10)*10
pwr[x] = random.randint(-10,-1)*10
baz = baz*np.pi/180.
abins = np.linspace(0,2*np.pi,360) # 0 to 360 in steps of 360/N.
sbins = np.linspace(1, 100)
H, xedges, yedges = np.histogram2d(baz, freq, bins=(abins,sbins), weights=pwr)
plt.figure(figsize=(14,14))
plt.subplot(1, 1, 1, projection='northpolar')
#plt.scatter(baz, freq)
plt.pcolormesh(H)
plt.show()
Your code works if you explicitly pass a mgrid (with similar characteristics than your a bins and sbins) to the pcolormesh command.
Below is an example inspired by your code:
import matplotlib.pyplot as plt
import numpy as np
#Generate the data
size = 200
baz = 10*np.random.randint(20, 25, size)*np.pi/180.
freq = 10*np.random.randint(1, 10, size)
pwr = 10*np.random.randint(-10, -1, size)
abins = np.linspace(0, 2*np.pi, 360) # 0 to 360 in steps of 360/N.
sbins = np.linspace(1, 100, 50)
H, xedges, yedges = np.histogram2d(baz, freq, bins=(abins,sbins), weights=pwr)
#Grid to plot your data on using pcolormesh
theta, r = np.mgrid[0:2*np.pi:360j, 1:100:50j]
fig, ax = plt.subplots(figsize=(14,14), subplot_kw=dict(projection='northpolar'))
ax.pcolormesh(theta, r, H)
ax.set_yticklabels([]) #remove yticklabels
plt.show()