Matplotlib Venn diagram with legend - matplotlib

I'm using the matplotlib-venn packages for drawing venn diagrams in python. This packages works nicely for drawing Venn diagrams with two or three sets. However, when one of the sets is much larger than the others, the counts in the smaller circles can get close or overlap. Here's an example.
from collections import Counter
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
sets = Counter()
sets['01'] = 3000
sets['11'] = 3
sets['10'] = 5
setLabels = ['set1', 'set2']
plt.figure()
ax = plt.gca()
v = venn2(subsets = sets, set_labels = setLabels, ax = ax)
plt.title('Venn Diagram')
plt.show()
What I'm looking to do is move the counts (in this case, 3000, 3, and 5) to a legend with colors matching those in the diagram. Wasn't sure how to do this with matplotlib_venn.

You may replace the labels for the venn diagram with empty strings and instead create a legend from the patches of the venn and the respective counts as follows:
from collections import Counter
import matplotlib.pyplot as plt
from matplotlib_venn import venn2, venn3
sets = Counter()
sets['01'] = 3000
sets['11'] = 3
sets['10'] = 5
setLabels = ['set1', 'set2']
plt.figure()
ax = plt.gca()
v = venn2(subsets = sets, set_labels = setLabels, ax = ax)
h, l = [],[]
for i in sets:
# remove label by setting them to empty string:
v.get_label_by_id(i).set_text("")
# append patch to handles list
h.append(v.get_patch_by_id(i))
# append count to labels list
l.append(sets[i])
#create legend from handles and labels
ax.legend(handles=h, labels=l, title="counts")
plt.title('Venn Diagram')
plt.show()

Related

How to do a Nested Proportional Area Chart (circles)?

I'm looking for anything in python that I can use to do a nested proportional area chart in circles. Preferably something built with (or on top of) matplotlib. Here's an example of what such plot looks like for reference:
A nested circle diagram, where the circle area is proportional to the data could look as follows.
It would take a sorted list or array of data and optionally the respective labels as input and plot a couple of circles.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
def nested_circles(data, labels=None, c=None, ax=None,
cmap=None, norm=None, textkw={}):
ax = ax or plt.gca()
data = np.array(data)
R = np.sqrt(data/data.max())
p = [plt.Circle((0,r), radius=r) for r in R[::-1]]
arr = data[::-1] if c is None else np.array(c[::-1])
col = PatchCollection(p, cmap=cmap, norm=norm, array=arr)
ax.add_collection(col)
ax.axis("off")
ax.set_aspect("equal")
ax.autoscale()
if labels is not None:
kw = dict(color="white", va="center", ha="center")
kw.update(textkw)
ax.text(0, R[0], labels[0], **kw)
for i in range(1, len(R)):
ax.text(0, R[i]+R[i-1], labels[i], **kw)
return col
Usage might look like
data = [1,3,4,5,6]
labels = list("ABCDE")
nested_circles(data, labels=labels, cmap="copper", textkw=dict(fontsize=14))
plt.show()
If you want a different colorcoding, take the c argument and supply another list of values, e.g.
data = [1,3,4,5,6]
labels = list("ABCDE")
codes = [5,3,1,4,2]
circles = nested_circles(data, labels=labels, c=codes, cmap="plasma",
textkw=dict(color="black", fontsize=14))
plt.colorbar(circles, label="Codes")
plt.title("Diagram")
plt.show()

Python keeps overwriting hist on previous plot but doesn't save it with the desired plot

I am saving two separate figures, that each should contain 2 plots together.
The problem is that the first figure is ok, but the second one, does not gets overwritten on the new plot but on the previous one, but in the saved figure, I only find one of the plots :
This is the first figure , and I get the first figure correctly :
import scipy.stats as s
import numpy as np
import os
import pandas as pd
import openpyxl as pyx
import matplotlib
matplotlib.rcParams["backend"] = "TkAgg"
#matplotlib.rcParams['backend'] = "Qt4Agg"
#matplotlib.rcParams['backend'] = "nbAgg"
import matplotlib.pyplot as plt
import math
data = [336256, 620316, 958846, 1007830, 1080401]
pdf = array([ 0.00449982, 0.0045293 , 0.00455894, 0.02397463,
0.02395788, 0.02394114])
fig, ax = plt.subplots();
fig = plt.figure(figsize=(40,30))
x = np.linspace(np.min(data), np.max(data), 100);
plt.plot(x, s.exponweib.pdf(x, *s.exponweib.fit(data, 1, 1, loc=0, scale=2)))
plt.hist(data, bins = np.linspace(data[0], data[-1], 100), normed=True, alpha= 1)
text1= ' Weibull'
plt.savefig(text1+ '.png' )
datar =np.asarray(data)
mu, sigma = datar.mean() , datar.std() # mean and standard deviation
normal_std = np.sqrt(np.log(1 + (sigma/mu)**2))
normal_mean = np.log(mu) - normal_std**2 / 2
hs = np.random.lognormal(normal_mean, normal_std, 1000)
print(hs.max()) # some finite number
print(hs.mean()) # about 136519
print(hs.std()) # about 50405
count, bins, ignored = plt.hist(hs, 100, normed=True)
x = np.linspace(min(bins), max(bins), 10000)
pdfT = [];
for el in range (len(x)):
pdfTmp = (math.exp(-(np.log(x[el]) - normal_mean)**2 / (2 * normal_std**2)))
pdfT += [pdfTmp]
pdf = np.asarray(pdfT)
This is the second set :
fig, ax = plt.subplots();
fig = plt.figure(figsize=(40,40))
plt.plot(x, pdf, linewidth=2, color='r')
plt.hist(data, bins = np.linspace(data[0], data[-1], 100), normed=True, alpha= 1)
text= ' Lognormal '
plt.savefig(text+ '.png' )
The first plot saves the histogram together with curve. instead the second one only saves the curve
update 1 : looking at This Question , I found out that clearing the plot history will help the figures don't mixed up , but still my second set of plots, I mean the lognormal do not save together, I only get the curve and not the histogram.
This is happening, because you have set normed = True, which means that area under the histogram is normalized to 1. And since your bins are very wide, this means that the actual height of the histogram bars are very small (in this case so small that they are not visible)
If you use
n, bins, _ = plt.hist(data, bins = np.linspace(data[0], data[-1], 100), normed=True, alpha= 1)
n will contain the y-value of your bins and you can confirm this yourself.
Also have a look at the documentation for plt.hist.
So if you set normed to False, the histogram will be visible.
Edit: number of bins
import numpy as np
import matplotlib.pyplot as plt
rand_data = np.random.uniform(0, 1.0, 100)
fig = plt.figure()
ax_1 = fig.add_subplot(211)
ax_1.hist(rand_data, bins=10)
ax_2 = fig.add_subplot(212)
ax_2.hist(rand_data, bins=100)
plt.show()
will give you two plots similar (since its random) to:
which shows how the number of bins changes the histogram.
A histogram visualises the distribution of your data along one dimension, so not sure what you mean by number of inputs and bins.

Plot multiple lines with matplotlib, using only 3 lists/arrays

I would like to plot say 10 lines in 3D in matplotlib, but without having to use ax.plot(x,y,z) 10 times.
This is the ridiculous code I've come up with b/c I can't envision how the zip and arrays actually work together.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
x = np.array([0.,3.])
y = np.array([0.,0.])
z = np.array([0.,0.])
u = np.array([0.,3.])
v = np.array([.5,.5])
w = np.array([0.,0.])
a = np.array([0.,3.])
b = np.array([1.,1.])
c = np.array([0.,0.])
e = np.array([0.,3.])
d = np.array([1.5,1.5])
f = np.array([0.,0.])
r = np.array([0.,3.])
s = np.array([2.,2.])
t = np.array([0.,0.])
ax.set_xlabel("x axis")
ax.set_ylabel("y axis")
ax.set_zlabel("z axis")
ax.plot(x,y,z)
ax.plot(a,b,c)
ax.plot(r,s,t)
ax.plot(u,v,w)
ax.plot(e,d,f)
plt.show()
I'm guessing I'll use zip and/or a for loop.
Thanks, and here's the figure.
You could store all your data points in a large data array. This way you can loop over the array and do something like this:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
# initialize array with size number of lines
data = np.full((2,3), None)
# fill data array with data points [x,y,z]
data[0] = [[0,3],[0,0],[0,0]]
data[1] = [[0,3],[0.5,0.5],[0,0]]
# etc...
# loop over data array and plot lines
for line in data:
ax.plot(line[0],line[1],line[2])
plt.show()
There are many different ways on how to store your data, you could also skip the initialization step by just creating the array in one take:
data = np.array([[[0,3],[0,0],[0,0]],
[[0,3],[0.5,0.5],[0,0]],
[[0,3],[0.5,0.5],[0,0]],
[...] ])
Or use numpy functions like numpy.concatenate to add new lines to the data array.

matplotlib, reusing lines in different figures

The intent is to create, in an optimal way, a number of detailed figures and a summary figure where you can compare the relative size of the different solutions.
To show you what I want, I've made up the following minimal example
import matplotlib.pyplot as plt
import numpy as np
parameters = [2, 3]
t = np.linspace(0, 5, 501)
f_all = plt.figure()
a_all = f_all.add_subplot(111)
for p in parameters:
fig = plt.figure()
ax = fig.add_subplot(111)
l, = ax.plot(t, np.sin(p*t)/p,
label='$\\omega=%d,\\quad{p}_0=%5.3f$'%(p,1./p))
a_all.add_line(l)
ax.legend()
fig.savefig('pippo_%d'%p)
a_all.legend()
f_all.savefig('pippo_a')
the expected result consists of 3 figures, two with a sine curve each, spanning the [0,5] interval, and one with the two curves combined.
OTOH, below you can find what I've got. Of course there is something (a very fundamental something!) that I'm missing.
I could take a different approach, using an a_all.plot(...) in the inner loop (tested, it works!), but now I'm curiuous if there is a way to reuse a line and I'm here, seeking for your help.
add the line to a_all after you save the first figure:
for p in parameters:
fig = plt.figure()
ax = fig.add_subplot(111)
l, = ax.plot(t, np.sin(p*t)/p,
label='$\\omega=%d,\\quad{p}_0=%5.3f$'%(p,1./p))
ax.legend()
fig.savefig('pippo_%d'%p)
a_all.add_line(l)
EDIT:
Then, you need to set the transform for the new lines on a_all to move them to the new axis. You'll probably also need to manually set the x and y limits.
Here's the complete code:
import matplotlib.pyplot as plt
import numpy as np
parameters = [2, 3]
t = np.linspace(0, 5, 501)
f_all = plt.figure()
a_all = f_all.add_subplot(111)
for p in parameters:
fig = plt.figure()
ax = fig.add_subplot(111)
l, = ax.plot(t, np.sin(p*t)/p,
label='$\\omega=%d,\\quad{p}_0=%5.3f$'%(p,1./p))
ax.legend()
fig.savefig('pippo_%d.png'%p)
a_all.add_line(l)
[newline.set_transform(a_all.transData) for newline in a_all.lines]
a_all.set_xlim(0,5)
a_all.set_ylim(-1,1)
a_all.legend()
f_all.savefig('pippo_a.png')

How can I draw single points on a plot already containing data?

In a while loop I'm updating two sets of data in a plot (some data X and a threshold). Now I'd like to add single points (peaks of X) on the same plot. How can I do that?
import matplotlib.pyplot as plt
plt.ion()
fig = plt.figure()
plt_ps = fig.add_subplot(111)
# initialize plots
powerspectrum, = plt_ps.plot(np.zeros([windowSize,]))
threshold, = plt_ps.plot(np.zeros([windowSize,]))
peaks, = plt_ps.plot([], [], 'or') # peaks will just be a set of coordinates, eg peaks_x=[2,4,7] and peaks_y=[3,7,6]
while(somecondition):
# some data processing
powerspectrum.set_ydata(new_powerspectrum_data)
threshold.set_ydata(new_threshold_data)
#peaks.? how do I set new peaks? Tried peaks.set_data(peaks_x, peaks_y) but peaks do not show up
plt_ps.relim()
plt_ps.autoscale_view()
fig.canvas.draw()
Just use plot with the right style:
import matplotlib.pyplot as plt
xs = [1,2,5,3,6,7,1,3,4,5,2,6,7,8,2,1]
ys = [3,4,5,2,7,1,3,4,1,2,3,4,5,2,3,1]
plt.plot(xs,ys,'.')
plt.show()