Pollution rose plot gridded - matplotlib

I am trying to create a pollution rose plot as described in the link Plotting Windrose: making a pollution rose with concentration set to color
Example in the reply is working but when I used my data then it is giving a weird plot. Any advice where I am going wrong? Thank you.
import matplotlib.pyplot as plt
import numpy as np
wd = [90.,297.,309.,336.,20.,2.,334.,327.,117.,125.,122.,97.,95.,97.,103.,106.,125.,148.,147.,140.,141.,145.,144.,151.,161.]
ws = [15,1.6,1.8,1.7,2.1,1.6,2.1,1.4,3,6.5,7.1,8.2,10.2,10.2,10.8,10.2,11.4,9.7,8.6,7.1,6.4,5.5,5,5,6]
oz = [10.,20.,30.,40.,50.,60.,70.,80.,90.,100.,110.,120.,90.,140.,100.,106.,125.,148.,147.,140.,141.,145.,144.,151.,161.]
pi_fac = 22/(7*180.)
wd_rad = [w * pi_fac for w in wd]
ws_r = np.linspace(min(ws),max(ws),16)
WD,WS = np.meshgrid(wd_rad,ws_r)
C = oz + np.zeros((len(ws_r),len(wd)),dtype=float)
C = np.ma.masked_less_equal(C,10)
fig, ax = plt.subplots(subplot_kw={"projection":"polar"})
ax.pcolormesh(WD,WS,C,vmin=10, vmax=170) # I tried different vmin and vmax too
plt.show()

The linked post assumes you have a regular grid for directions and for speeds, but your input seems to be quite unordered combinations.
To create a plot with colored regions depending on the oz values, you could try tricontourf. tricontourf takes in X, Y and Z values that don't need to lie on a grid and creates a contour plot. Although it is meant for rectangular layouts, it might also work for your case. It will have a discontinuity though, when crossing from 360º to 0º.
The plot of this example also draws a colorbar to show which range of oz values correspond to which color. vmin and vmax can change this mapping of colors.
import matplotlib.pyplot as plt
import numpy as np
wd = [90, 297, 309, 336, 20, 2, 334, 327, 117, 125, 122, 97, 95, 97, 103, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
ws = [15, 1.6, 1.8, 1.7, 2.1, 1.6, 2.1, 1.4, 3, 6.5, 7.1, 8.2, 10.2, 10.2, 10.8, 10.2, 11.4, 9.7, 8.6, 7.1, 6.4, 5.5, 5, 5, 6]
oz = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 90, 140, 100, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
fig, ax = plt.subplots(subplot_kw={"projection": "polar"})
cont = ax.tricontourf(np.radians(np.array(wd)), ws, oz, cmap='hot')
plt.colorbar(cont)
plt.show()
With ax.scatter(np.radians(np.array(wd)), ws, c=oz, cmap='hot', vmax=250) you could create a scatter plot to get an idea how the input looks like when colored.
You might want to incorporate Python's windrose library to get polar plots to resemble a windrose.
Another approach, which might be closer to the one intended by the linked question, would be to use scipy's interpolate.griddata to map the data to a grid. To get rid of the areas without data, an 'under' color of 'none' can be used, provided that vmin is higher than zero.
import matplotlib.pyplot as plt
import numpy as np
from scipy import interpolate
wd = [90, 297, 309, 336, 20, 2, 334, 327, 117, 125, 122, 97, 95, 97, 103, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
ws = [15, 1.6, 1.8, 1.7, 2.1, 1.6, 2.1, 1.4, 3, 6.5, 7.1, 8.2, 10.2, 10.2, 10.8, 10.2, 11.4, 9.7, 8.6, 7.1, 6.4, 5.5, 5, 5, 6]
oz = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 90, 140, 100, 106, 125, 148, 147, 140, 141, 145, 144, 151, 161]
wd_rad = np.radians(np.array(wd))
oz = np.array(oz, dtype=np.float)
WD, WS = np.meshgrid(np.linspace(0, 2*np.pi, 36), np.linspace(min(ws), max(ws), 16 ))
Z = interpolate.griddata((wd_rad, ws), oz, (WD, WS), method='linear')
fig, ax = plt.subplots(subplot_kw={"projection": "polar"})
cmap = plt.get_cmap('hot')
cmap.set_under('none')
img = ax.pcolormesh(WD, WS, Z, cmap=cmap, vmin=20)
plt.colorbar(img)
plt.show()

Related

How can I draw a scatter plot using matplotlib, each x_tick for one column

I have a dataset like:
a1 = [81, 42, 73, 94, 85, 66]
a2 = [63, 55, 79, 65, 94, 76]
a3 = [3, 5, 4, 8, 7, 6]
I want to draw a scatter plot that the x_ticks will be 'a1', 'a2', 'a3'
and the each y_tick is the data in a1, a2 and a3.
for example above a1 x_tick there's 6 dots.[81, 42, 73, 94, 85, 66]
EDIT: Sorry it was a stupid question, and my words aren't explicit as well, I was just trying to draw a simple box plot.
From what I could make of the question, this could be one simple implementation:
import matplotlib.pyplot as plt
a1 = [81, 42, 73, 94, 85, 66]
a2 = [63, 55, 79, 65, 94, 76]
a3 = [3, 5, 4, 8, 7, 6]
plt.scatter(x=[0]*len(a1), y=a1)
plt.scatter(x=[1]*len(a2), y=a2)
plt.scatter(x=[2]*len(a3), y=a3)
plt.xticks(ticks=[0,1,2], labels=["a1", "a2", "a3"])
plt.show()
With the following output:
If you also want to only display the y values at the y axis you can add this line:
plt.yticks(ticks=a1+a2+a3, labels=a1+a2+a3)
But for y values that are very close to each other (see values for a3) this will get crowded.

The picture can't display in the google colab

python, pyecharts, google colab
It seems get the picture, but why can's see anything
enter image description here
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
from pyecharts.charts import Bar
from pyecharts import options as opts
# V1 版本开始支持链式调用
bar = (
Bar()
.add_xaxis(["衬衫", "毛衣", "领带", "裤子", "风衣", "高跟鞋", "袜子"])
.add_yaxis("商家A", [114, 55, 27, 101, 125, 27, 105])
.add_yaxis("商家B", [57, 134, 137, 129, 145, 60, 49])
.set_global_opts(title_opts=opts.TitleOpts(title="某商场销售情况"))
)
bar.render()
# 不习惯链式调用的开发者依旧可以单独调用方法
bar = Bar()
bar.add_xaxis(["衬衫", "毛衣", "领带", "裤子", "风衣", "高跟鞋", "袜子"])
bar.add_yaxis("商家A", [114, 55, 27, 101, 125, 27, 105])
bar.add_yaxis("商家B", [57, 134, 137, 129, 145, 60, 49])
bar.set_global_opts(title_opts=opts.TitleOpts(title="某商场销售情况"))
# bar.load_javascript()
# bar.render()
bar.render_notebook()

matplotlib histogram with equal bars width

I use a histogram to display the distribution. Everything works fine if the spacing of the bins is uniform. But if the interval is different, then the bar width is appropriate (as expected). Is there a way to set the width of the bar independent of the size of the bins ?
This is what i have
This what i trying to draw
from matplotlib import pyplot as plt
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.set_xticks(my_bins)
ax1.hist(my_data, my_bins, histtype='bar', rwidth=0.9,)
fig1.show()
I cannot mark your question as a duplicate, but I think my answer to this question might be what you are looking for?
I'm not sure how you'll make sense of the result, but you can use numpy.histogram to calculate the height of your bars, then plot those directly against an arbitrary x-scale.
x = np.random.normal(loc=50, scale=200, size=(2000,))
bins = [0,1,10,20,30,40,50,75,100]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(x, bins=bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(x, bins=bins)
ax.bar(range(len(bins)-1),h, width=1, edgecolor='k')
EDIT Here's with the adjustment to the x-tick labels so that the correspondence is easier to see.
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(my_data, bins=my_bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(my_data, bins=my_bins)
ax.bar(range(len(my_bins)-1),h, width=1, edgecolor='k')
ax.set_xticks(range(len(my_bins)-1))
ax.set_xticklabels(my_bins[:-1])

Pymc size / indexing issue

I am trying to model Kruschke's "filtration-condensation experiment" with pymc 2.3.5. (numpy 1.10.1)
Basicaly there are:
4 groups
each group has 40 individuals
each individual has 64 Bernoulli trials (correct/incorrect)
What I am modeling:
each individual's results are Binomial distribution (e.g. 45 correct out of 64).
my belief about each individual's performance is Beta distribution.
this Beta distribution is influenced by group to which individual belongs (through parameters A=mu*kappa and B=(1-mu)*kappa)
my belief about how strong each group's influence is Gamma distribution (kappa variable)
my belief about each group's average is Beta distribution (mu variable)
The problem:
when I do modeling with "size=" parameters, pymc get's lost
when I seperate each distribution manually (no size=) the pymc does good job
I include the code below:
Data
import numpy as np
import seaborn as sns
import pymc as pm
from pymc.Matplot import plot as mcplot
%matplotlib inline
# Data
ncond = 4
nSubj = 40
trials = 64
N = np.repeat([trials], (ncond * nSubj))
z = np.array([45, 63, 58, 64, 58, 63, 51, 60, 59, 47, 63, 61, 60, 51, 59, 45,
61, 59, 60, 58, 63, 56, 63, 64, 64, 60, 64, 62, 49, 64, 64, 58, 64, 52, 64, 64,
64, 62, 64, 61, 59, 59, 55, 62, 51, 58, 55, 54, 59, 57, 58, 60, 54, 42, 59, 57,
59, 53, 53, 42, 59, 57, 29, 36, 51, 64, 60, 54, 54, 38, 61, 60, 61, 60, 62, 55,
38, 43, 58, 60, 44, 44, 32, 56, 43, 36, 38, 48, 32, 40, 40, 34, 45, 42, 41, 32,
48, 36, 29, 37, 53, 55, 50, 47, 46, 44, 50, 56, 58, 42, 58, 54, 57, 54, 51, 49,
52, 51, 49, 51, 46, 46, 42, 49, 46, 56, 42, 53, 55, 51, 55, 49, 53, 55, 40, 46,
56, 47, 54, 54, 42, 34, 35, 41, 48, 46, 39, 55, 30, 49, 27, 51, 41, 36, 45, 41,
53, 32, 43, 33])
condition = np.repeat([0,1,2,3], nSubj)
Does not work
# modeling
mu = pm.Beta('mu', 1, 1, size=ncond)
kappa = pm.Gamma('gamma', 1, 0.1, size=ncond)
# Prior
theta = pm.Beta('theta', mu[condition] * kappa[condition], (1 - mu[condition]) * kappa[condition], size=len(z))
# likelihood
y = pm.Binomial('y', p=theta, n=N, value=z, observed=True)
# model
model = pm.Model([mu, kappa, theta, y])
mcmc = pm.MCMC(model)
#mcmc.use_step_method(pm.Metropolis, mu)
#mcmc.use_step_method(pm.Metropolis, theta)
#mcmc.assign_step_methods()
mcmc.sample(100000, burn=20000, thin=3)
# outputs never converge and does vary in new simulations
mcplot(mcmc.trace('mu'), common_scale=False)
Works
z1 = z[:40]
z2 = z[40:80]
z3 = z[80:120]
z4 = z[120:]
Nv = N[:40]
mu1 = pm.Beta('mu1', 1, 1)
mu2 = pm.Beta('mu2', 1, 1)
mu3 = pm.Beta('mu3', 1, 1)
mu4 = pm.Beta('mu4', 1, 1)
kappa1 = pm.Gamma('gamma1', 1, 0.1)
kappa2 = pm.Gamma('gamma2', 1, 0.1)
kappa3 = pm.Gamma('gamma3', 1, 0.1)
kappa4 = pm.Gamma('gamma4', 1, 0.1)
# Prior
theta1 = pm.Beta('theta1', mu1 * kappa1, (1 - mu1) * kappa1, size=len(Nv))
theta2 = pm.Beta('theta2', mu2 * kappa2, (1 - mu2) * kappa2, size=len(Nv))
theta3 = pm.Beta('theta3', mu3 * kappa3, (1 - mu3) * kappa3, size=len(Nv))
theta4 = pm.Beta('theta4', mu4 * kappa4, (1 - mu4) * kappa4, size=len(Nv))
# likelihood
y1 = pm.Binomial('y1', p=theta1, n=Nv, value=z1, observed=True)
y2 = pm.Binomial('y2', p=theta2, n=Nv, value=z2, observed=True)
y3 = pm.Binomial('y3', p=theta3, n=Nv, value=z3, observed=True)
y4 = pm.Binomial('y4', p=theta4, n=Nv, value=z4, observed=True)
# model
model = pm.Model([mu1, kappa1, theta1, y1, mu2, kappa2, theta2, y2,
mu3, kappa3, theta3, y3, mu4, kappa4, theta4, y4])
mcmc = pm.MCMC(model)
#mcmc.use_step_method(pm.Metropolis, mu)
#mcmc.use_step_method(pm.Metropolis, theta)
#mcmc.assign_step_methods()
mcmc.sample(100000, burn=20000, thin=3)
# outputs converge and are not too much different in every simulation
mcplot(mcmc.trace('mu1'), common_scale=False)
mcplot(mcmc.trace('mu2'), common_scale=False)
mcplot(mcmc.trace('mu3'), common_scale=False)
mcplot(mcmc.trace('mu4'), common_scale=False)
mcmc.summary()
Can someone please explain it to me why mu[condition] and gamma[condition] does not work? :)
I guess that not splitting thetas into different variables is the problem but cannot understand why and maybe there is a way to pass a shape parameter to size= on theta?
First of all, I can confirm that the first version doesn't lead to stable results. What I can't confirm is that the second one is much better; I have seen very different results also with the second code, with values for the first mu parameter varying between 0.17 and 0.9 for different runs.
The convergence problems can be cured by using good starting values for the Markov chain. This can be done by first doing a maximum a posteriori (MAP) estimate, and then starting the Markov chain from there. The MAP step is computationally inexpensive and leads to a converging Markov chain with reproducible results for both variants of your code. For reference and comparison: The values I see for the four mu parameters are around 0.94 / 0.86 / 0.72 and 0.71.
You can do the MAP estimation by inserting the following two lines of code right after the line in which you define your model with "model=pm.Model(...":
map_ = pm.MAP(model)
map_.fit()
This technique is covered in more detail in Cameron Davidson-Pilon's Bayesian Methods for Hackers, together with other helpful topics around PyMC.

implementation of Hierarchial Agglomerative clustering

i am newbie and just want to implement Hierarchical Agglomerative clustering for RGB images. For this I extract all values of RGB from an image. And I process image.Next I find its distance and then develop the linkage. Now from linkage I want to extract my original data (i.e RGB values) on specified indices with indices id. Here is code I have done so far.
image = Image.open('image.jpg')
image = image.convert('RGB')
im = np.array(image).reshape((-1,3))
rgb = list(im.getdata())
X = pdist(im)
Y = linkage(X)
I = inconsistent(Y)
based on the 4th column of consistency. I opt minimum value of the cutoff in order to get maximum clusters.
cutoff = 0.7
cluster_assignments = fclusterdata(Y, cutoff)
# Print the indices of the data points in each cluster.
num_clusters = cluster_assignments.max()
print "%d clusters" % num_clusters
indices = cluster_indices(cluster_assignments)
ind = np.array(enumerate(rgb))
for k, ind in enumerate(indices):
print "cluster", k + 1, "is", ind
dendrogram(Y)
I got results like this
cluster 6 is [ 6 11]
cluster 7 is [ 9 12]
cluster 8 is [15]
Means cluster 6 contains the indices of 6 and 11 leafs. Now at this point I stuck in how to map these indices to get original data(i.e rgb values). indices of each rgb values to each pixel in the image. And then I have to generate codebook to implement Agglomeration Clustering. I have no idea how to approach this task. Read a lot of stuff but nothing clued.
Here is my solution:
import numpy as np
from scipy.cluster import hierarchy
im = np.array([[54,101,9],[ 67,89,27],[ 67,85,25],[ 55,106,1],[ 52,108,0],
[ 55,78,24],[ 19,57,8],[ 19,46,0],[ 95,110,15],[112,159,57],
[ 67,118,26],[ 76,127,35],[ 74,128,30],[ 25,62,0],[100,120,9],
[127,145,61],[ 48,112,25],[198,25,21],[203,11,10],[127,171,60],
[124,173,45],[120,133,19],[109,137,18],[ 60,85,0],[ 37,0,0],
[187,47,20],[127,170,52],[ 30,56,0]])
groups = hierarchy.fclusterdata(im, 0.7)
idx_sorted = np.argsort(groups)
group_sorted = groups[idx_sorted]
im_sorted = im[idx_sorted]
split_idx = np.where(np.diff(group_sorted) != 0)[0] + 1
np.split(im_sorted, split_idx)
output:
[array([[203, 11, 10],
[198, 25, 21]]),
array([[187, 47, 20]]),
array([[127, 171, 60],
[127, 170, 52]]),
array([[124, 173, 45]]),
array([[112, 159, 57]]),
array([[127, 145, 61]]),
array([[25, 62, 0],
[30, 56, 0]]),
array([[19, 57, 8]]),
array([[19, 46, 0]]),
array([[109, 137, 18],
[120, 133, 19]]),
array([[100, 120, 9],
[ 95, 110, 15]]),
array([[67, 89, 27],
[67, 85, 25]]),
array([[55, 78, 24]]),
array([[ 52, 108, 0],
[ 55, 106, 1]]),
array([[ 54, 101, 9]]),
array([[60, 85, 0]]),
array([[ 74, 128, 30],
[ 76, 127, 35]]),
array([[ 67, 118, 26]]),
array([[ 48, 112, 25]]),
array([[37, 0, 0]])]