How to create a swarm plot with matplotlib - matplotlib

I know the question is not very informative.. but as I do not know the name of his type of plot, I can not be more informative..
[EDIT] I changed the title, and now it is more informative...

You can do something similar with seaborn.swarmplot. I also use seaborn.boxplot (with the whiskers and caps turned off) to plot the mean and range:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
ax = sns.swarmplot(x="day", y="total_bill", data=tips)
ax = sns.boxplot(x="day", y="total_bill", data=tips,
showcaps=False,boxprops={'facecolor':'None'},
showfliers=False,whiskerprops={'linewidth':0})
plt.show()

If (for whatever reason) you don't want to use seaborn, you can have a go at making them yourself (see e.g. this explanation: https://www.flerlagetwins.com/2020/11/beeswarm.html ).
A simple version is:
#!/usr/bin/env python3
import matplotlib.pyplot as plt
import numpy as np
def simple_beeswarm(y, nbins=None):
"""
Returns x coordinates for the points in ``y``, so that plotting ``x`` and
``y`` results in a bee swarm plot.
"""
y = np.asarray(y)
if nbins is None:
nbins = len(y) // 6
# Get upper bounds of bins
x = np.zeros(len(y))
ylo = np.min(y)
yhi = np.max(y)
dy = (yhi - ylo) / nbins
ybins = np.linspace(ylo + dy, yhi - dy, nbins - 1)
# Divide indices into bins
i = np.arange(len(y))
ibs = [0] * nbins
ybs = [0] * nbins
nmax = 0
for j, ybin in enumerate(ybins):
f = y <= ybin
ibs[j], ybs[j] = i[f], y[f]
nmax = max(nmax, len(ibs[j]))
f = ~f
i, y = i[f], y[f]
ibs[-1], ybs[-1] = i, y
nmax = max(nmax, len(ibs[-1]))
# Assign x indices
dx = 1 / (nmax // 2)
for i, y in zip(ibs, ybs):
if len(i) > 1:
j = len(i) % 2
i = i[np.argsort(y)]
a = i[j::2]
b = i[j+1::2]
x[a] = (0.5 + j / 3 + np.arange(len(b))) * dx
x[b] = (0.5 + j / 3 + np.arange(len(b))) * -dx
return x
fig = plt.figure(figsize=(2, 4))
fig.subplots_adjust(0.2, 0.1, 0.98, 0.99)
ax = fig.add_subplot(1, 1, 1)
y = np.random.gamma(20, 10, 100)
x = simple_beeswarm(y)
ax.plot(x, y, 'o')
fig.savefig('bee.png')

Related

Matplotlib clip or trim lines and polygon

How can I efficiently "trim" or "clip" or remove the portion of the red line outside of the purple box? Is there a trick with numpy masks?
Using Python 3.8.3 and Matplotlib
x = [10,15.5,12.5,7.5,5,10]
y = [15,10,5,5,10,15]
fig, ax = plt.subplots()
ax.fill_between(x,y, facecolor="blue", alpha=0.25)
ax.axis("equal")
myinterval = 1.5
xvals = np.arange(min(x), max(x)+1, myinterval)
for i in xvals:
ax.plot([i,i], [0,20], color='red')
This is done not using numpy masks. If I understand correctly, this is the code:
fig, ax = plt.subplots()
x = [10,15.5,12.5,7.5,5,10]
y = [15,10,5,5,10,15]
ax.fill_between(x,y, facecolor="blue", alpha=0.25)
ax.axis("equal")
myinterval = 1.5
xvals = np.arange(min(x), max(x)+1, myinterval)
def generate_equation(x, y):
# y = mx + b
# b = y - mx
left = []
right = []
M = []
B = []
for i in range(len(x)-1):
m = ((y[i+1] - y[i]) / (x[i+1] - x[i]))
b = y[i+1] - m*x[i+1]
M.append(m)
B.append(b)
left.append(min(x[i], x[i+1]))
right.append(max(x[i], x[i+1]))
return M, B, left, right
M, B, left, right = generate_equation(np.array(x), np.array(y))
for i in range(len(xvals)):
ylim = []
for j in range(len(M)):
if xvals[i] >= left[j] and xvals[i] <= right[j]:
Y = M[j] * xvals[i] + B[j]
ylim.append(Y)
ax.vlines(xvals[i], min(ylim), max(ylim), 'r')
Output:

Matplotlib Logarithmic Radar Charts - Remove all values below 0.5 and show last ytick

If you look in the logarithmic radar chart below, there are two changes I would like, if anyone knows the correct way to code:
1)Display a ytick label for the max value (51.81), as it currently gives the top value as 31.62
2)A way to set all values below 0.1 to 0, without causing divide by zero errors.
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, polar=True)
np.seterr(divide = 'warn')
sample = samplelistmalshare
get_mag = lambda x: 10**min(np.floor(np.log10(x)))
init_mag = get_mag(sample)
print("init_mag")
print(init_mag)
print("gm")
print(get_mag)
sample = np.array(sample) / get_mag(sample)
N = len(sample)
theta = np.arange(0, 2 * np.pi, 2 * np.pi / N)
bars = ax.bar(theta, np.log10(sample), width=0.4, color = '#003F5C')
ax.set_xticks(theta)
ax.set_xticklabels([' Delayed\n Execution', ' File\n Opening', 'Firewall\nModification', 'Permission \nModification ', 'Persistence ', 'Proxied \nExecution ', 'Reconnaissance ', ' Registry\n Modification', ' Task\n Stopping'], visible=False)
dat = np.log10(sample)
print(max(dat))
#exit()
ax.set_ylim(0,max(dat))
ax.xaxis.grid(False)
ax.yaxis.grid(True)
precision = 2 # Change to your desired decimal precision
ax.set_yticklabels([str(round((10 ** x) * init_mag, precision)) for x in ax.get_yticks()])
for test in ax.get_yticks():
print(test)
for test in ax.get_ymajorticklabels():
print(test)
ax.set_rlabel_position(50)
plt.savefig('radarchartingmalshare.pdf',bbox_inches='tight')
fig.clf()
plt.clf()
One solution is to set yticks and yticklabels manually
right_end = 51.81
ax.set_ylim(0,np.log10(right_end / init_mag))
y_ticks = np.linspace(0,np.log10(right_end/init_mag),10)
ax.set_yticks(y_ticks)
y_ticklabels = ['%.2f' % (init_mag*10**x) if x !=0 else '0.00' for x in ax.get_yticks()]
ax.set_yticklabels(y_ticklabels)
With this manually set ticks and the labels
import numpy as np
from matplotlib import pyplot as plt
fig = plt.figure(figsize=(8, 8));
ax = fig.add_subplot(111, polar=True)
np.seterr(divide = 'warn')
sample = [35.417256011315416,0.028288543140028287,1.3578500707213579,3.3663366336633667,
0.8203677510608205,35.445544554455445,3.3946251768033946,19.46251768033946,0.7072135785007072,]
get_mag = lambda x: 10**min(np.floor(np.log10(x)))
init_mag = get_mag(sample)
sample = np.array(sample) / get_mag(sample)
dat = np.log10(sample)
N = len(sample)
theta = np.arange(0, 2 * np.pi, 2 * np.pi / N)
bars = ax.bar(theta, dat, width=0.4, color = 'deepskyblue')
ax.set_xticks(theta)
ax.xaxis.grid(False)
right_end = 51.81
ax.set_ylim(0,np.log10(right_end / init_mag))
ax.yaxis.grid(True)
y_ticks = np.linspace(0,np.log10(right_end/init_mag),10)
ax.set_yticks(y_ticks)
y_ticklabels = ['%.2f' % (init_mag*10**x) if x !=0 else '0.00' for x in ax.get_yticks()]
ax.set_yticklabels(y_ticklabels)
ax.tick_params(axis='y',colors='darkviolet')
plt.show()

How to set an appropriate point of view?

How to set xlim and ylim to see both cureves (omega and y) on a plot? Or how to verify that it is not possible?
import matplotlib.pyplot as plt
import numpy as np
e = 1.602176634e-19
m_e = 9.1093837015e-31
k = np.arange(0.00001, 50000, 0.003)
eps_0 = 8.8541878128e-12
n_0 = 100
c = 299792458
omega_p = np.sqrt(n_0*e**2/(eps_0*m_e))
omega = np.sqrt(omega_p**2+c**2+k**2)
y = k*c
fig, ax = plt.subplots()
plt.rcParams["figure.figsize"] = [5, 5]
# Plot
ax.yaxis.set_label_coords(-0.07, 0.84)
ax.xaxis.set_label_coords(0.95, -0.05)
ax.set_xlabel(r'$k$')
ax.set_ylabel(r'$\omega$', rotation='horizontal')
ax.set_xlim(10000, 40000)
ax.set_ylim(299792454, 299792462.1700816)
ax.plot(k, omega)
ax.plot(k, y)
# Focusing on appropriate part
print(omega[1000000]-omega[999999])
print(omega[-1]-omega[-2])
print(len(omega))
print(k[1000000])
print(k[-1])
print(omega[1000000])
print(omega[-1])
print(y[int(ax.get_xlim()[0])])
print(y[int(ax.get_xlim()[1])])
plt.show()
The output now:
There should be also an assymptote.
An idea is to just let matplotlib choose its default limits. Then you can interactively zoom in to an area of interest. The code below sets a log scale for the y-axis, which might help to fit everything. In order to avoid too many points, the 16 million points of np.arange(0.00001, 50000, 0.003) are replaced by np.linspace(0.00001, 50000, 10000).
import matplotlib.pyplot as plt
import numpy as np
e = 1.602176634e-19
m_e = 9.1093837015e-31
# k = np.arange(0.00001, 50000, 0.003)
k = np.linspace(0.00001, 50000, 10000)
eps_0 = 8.8541878128e-12
n_0 = 100
c = 299792458
omega_p = np.sqrt(n_0 * e ** 2 / (eps_0 * m_e))
omega = np.sqrt(omega_p ** 2 + c ** 2 + k ** 2)
y = k * c
fig, ax = plt.subplots()
plt.rcParams["figure.figsize"] = [5, 5]
ax.set_xlabel(r'$k$')
ax.set_ylabel(r'$\omega$', rotation='horizontal')
ax.plot(k, omega, color='blue')
ax.plot(k, y, color='red')
ax.set_yscale('log')
plt.show()

Matplotlib - plot lines using same colormap as scatter

Using Matplotlib, I am trying to plot moderation analysis lines on top of some scatter data. How do I set the colour of the lines to match the values in the colormap used for the scatter data?
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams.update({"font.size": 14})
# MAKE DATA
N = 100
β0, β1, β2, β3, σ = 0.5, 1, 2, 0, 0.05
x = np.random.randn(N)
moderator = np.random.randn(N)
μ = β0 + β1 * x + β2 * x * moderator + β3 * moderator
ϵ = np.random.randn(N) * σ
y = μ + ϵ
fig, ax = plt.subplots(figsize=(7,7))
# PLOT DATA
h = ax.scatter(x, y, c=moderator)
ax.set(xlabel="x", ylabel="y")
# colourbar for moderator
cbar = fig.colorbar(h)
cbar.ax.set_ylabel("moderator")
# PLOT EXAMPLE LINES FOR TRUE MODEL
_x = np.linspace(np.min(x), np.max(x), 2)
m_levels = np.linspace(np.min(x), np.max(x), 5)
for m in m_levels:
μ = β0 + β1 * _x + β2 * _x * m + β3 * m
ax.plot(_x, μ, lw=3)
Solution seems to set up a scalarMap and use normalize...
import numpy as np
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'retina'
plt.rcParams.update({"font.size": 14})
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize
# MAKE DATA
N = 500
β0, β1, β2, β3, σ = 0.5, 1, 2, 0, 0.05
x = np.random.randn(N)
moderator = np.random.randn(N)
μ = β0 + β1 * x + β2 * x * moderator + β3 * moderator
ϵ = np.random.randn(N) * σ
y = μ + ϵ
fig, ax = plt.subplots(figsize=(7,7))
# set up scalarMap so we can access colormap across both scatter and lines
normalize = Normalize(vmin=np.min(moderator), vmax=np.max(moderator))
scalarMap = ScalarMappable(norm=normalize, cmap="viridis")
# PLOT EXAMPLE LINES FOR TRUE MODEL
_x = np.linspace(np.min(x), np.max(x), 2)
m_levels = np.linspace(np.min(x), np.max(x), 5)
for m in m_levels:
μ = β0 + β1 * _x + β2 * _x * m + β3 * m
ax.plot(_x, μ, lw=3, c=scalarMap.to_rgba(m))
# PLOT DATA
h = ax.scatter(x, y, c=moderator, cmap=scalarMap.cmap)
ax.set(xlabel="x", ylabel="y")
# colourbar for moderator
cbar = fig.colorbar(h)
cbar.ax.set_ylabel("moderator")

contour lines from the edge of a map don't show up on basemap

I'm drawing several contour lines over a basemap projection as shown in the following figure:.
There are 3 contours that are not drawn completely (in Oregon, Washington and California) and seems like there is this line that has cut all 3 of them in the same latitude. I'm not sure how to solve this problem.
I added the number of interpolation points, didn't help. changed the ll and ur points to include more area didn't help.
The code is below (not reproducible but might help):
def visualise_bigaus(mus, sigmas, corxys , output_type='pdf', **kwargs):
lllat = 24.396308
lllon = -124.848974
urlat = 49.384358
urlon = -66.885444
fig = plt.figure(figsize=(4, 2.5))
ax = fig.add_subplot(111, axisbg='w', frame_on=False)
m = Basemap(llcrnrlat=lllat,
urcrnrlat=urlat,
llcrnrlon=lllon,
urcrnrlon=urlon,
resolution='i', projection='cyl')
m.drawmapboundary(fill_color = 'white')
#m.drawcoastlines(linewidth=0.2)
m.drawcountries(linewidth=0.2)
m.drawstates(linewidth=0.2, color='lightgray')
#m.fillcontinents(color='white', lake_color='#0000ff', zorder=2)
#m.drawrivers(color='#0000ff')
m.drawlsmask(land_color='gray',ocean_color="#b0c4de", lakes=True)
lllon, lllat = m(lllon, lllat)
urlon, urlat = m(urlon, urlat)
mlon, mlat = m(*(mus[:,1], mus[:,0]))
numcols, numrows = 1000, 1000
X = np.linspace(mlon.min(), urlon, numcols)
Y = np.linspace(lllat, urlat, numrows)
X, Y = np.meshgrid(X, Y)
m.scatter(mlon, mlat, s=0.2, c='red')
shp_info = m.readshapefile('./data/us_states_st99/st99_d00','states',drawbounds=True, zorder=0)
printed_names = []
ax = plt.gca()
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
for spine in ax.spines.itervalues():
spine.set_visible(False)
for k in xrange(mus.shape[0]):
#here x is longitude and y is latitude
#apply softplus to sigmas (to make them positive)
sigmax=np.log(1 + np.exp(sigmas[k][1]))
sigmay=np.log(1 + np.exp(sigmas[k][0]))
mux=mlon[k]
muy=mlat[k]
corxy = corxys[k]
#apply the soft sign
corxy = corxy / (1 + np.abs(corxy))
#now given corxy find sigmaxy
sigmaxy = corxy * sigmax * sigmay
#corxy = 1.0 / (1 + np.abs(sigmaxy))
Z = mlab.bivariate_normal(X, Y, sigmax=sigmax, sigmay=sigmay, mux=mux, muy=muy, sigmaxy=sigmaxy)
#Z = maskoceans(X, Y, Z)
con = m.contour(X, Y, Z, levels=[0.02], linewidths=0.5, colors='darkorange', antialiased=True)
'''
num_levels = len(con.collections)
if num_levels > 1:
for i in range(0, num_levels):
if i != (num_levels-1):
con.collections[i].set_visible(False)
'''
contour_labels = False
if contour_labels:
plt.clabel(con, [con.levels[-1]], inline=True, fontsize=10)
'''
world_shp_info = m.readshapefile('./data/CNTR_2014_10M_SH/Data/CNTR_RG_10M_2014','world',drawbounds=False, zorder=100)
for shapedict,state in zip(m.world_info, m.world):
if shapedict['CNTR_ID'] not in ['CA', 'MX']: continue
poly = MplPolygon(state,facecolor='gray',edgecolor='gray')
ax.add_patch(poly)
'''
if iter:
iter = str(iter).zfill(3)
else:
iter = ''
plt.tight_layout()
plt.savefig('./maps/video/gaus_' + iter + '.' + output_type, frameon=False, dpi=200)
The problem is the meshgrid not covering the complete map. The meshgrid simply doesn't have any points at the positions where you want to draw the gaussian contour line.
An example to reproduce this behaviour is the following, where the meshgrid in x directio starts at -1, such that points lower than that are not drawn.
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import numpy as np
fig, ax=plt.subplots()
ax.plot([-2,2],[-2,-2], alpha=0)
X,Y = np.meshgrid(np.linspace(-1,2),np.linspace(-2,2))
Z = mlab.bivariate_normal(X, Y, sigmax=1., sigmay=1., mux=0.1, muy=0.1, sigmaxy=0)
con = ax.contour(X, Y, Z, levels=[Z.max()/3, Z.max()/2., Z.max()*0.8],colors='darkorange')
plt.show()
A similar problem occurs in the code from the question.
While in Y direction, you use the complete map, Y = np.linspace(lllat, urlat, numrows), in X direction you restrict the mesh to start at mlon.min(),
X = np.linspace(mlon.min(), urlon, numcols)
The solution would of course be not to start the mesh in Portland, but somewhere in the ocean, i.e. at the edge of the shown map.