Overlapping alpha transparency to create a density plot in matplolib - matplotlib

I want to create a plot using matplotlib. I basically draw many lines which overlap. I want to set alpha transparency on these lines such that if lines overlap the alpha values add up. My intention is to show the density of the overlapping lines with a more solid color, and lines that don't should be shown very lightly, this is the code I have but it is not giving the desired efect:
import numpy as np
import matplotlib.pyplot as plt
dt = 0.00008
nstep = 3000
paths = 100
X = np.zeros((nstep,paths))
Y = np.zeros((nstep,paths))
gv1 = np.sqrt(dt)*np.random.randn(nstep,paths)
gv2 = np.sqrt(dt)*np.random.randn(nstep,paths)
for i in range(nstep-1):
X[i+1] = X[i] + gv1[i]
Y[i+1] = Y[i] + gv2[i]
plt.plot(X,Y,lw=1,alpha=0.05)
plt.show()

Related

Matplotlib FuncAnimation color changing scatter plot

I am trying to create an animated scatter plot whereby the scatter points plot in order and change color over time, thus the newest scatter points always appear in the same color (in this case, red) while the older scatter points age to different colors using a color map.
The code works except for the newest scatter point in every frame of the animation, which appears as the 'oldest' color in the plot, rather than the newest. How can I get it to appear in the correct color?
My code is this:
import matplotlib.animation as animation
from matplotlib import cm
import matplotlib.pyplot as plt
%matplotlib notebook
brg = cm.get_cmap('hsv',500)
cmapz = brg(range(500))
x = [0]
y = [0]
def update_lines(num):
dx = x[-1]+np.random.random()
x.append(dx)
dy = np.random.random()
y.append(dy)
text.set_text("{:d}: [{:.0f},{:.0f}]".format(num, x[-1], y[-1]))
array = cmapz[:num]
graph.set_offsets(np.c_[x, y])
graph.set_color(array[::-1])
return graph,
fig,ax=plt.subplots(1,1,figsize=(8,5))
ax = plt.axes(xlim=(0,251),ylim=(-1,2))
graph = ax.scatter(x, y,c=cmapz[0])
text = fig.text(0, 1, "TEXT", va='top')
ani = animation.FuncAnimation(fig, update_lines, frames=499, interval=10, blit=False, repeat = False)
plt.show()

Display the value of the bar on each bar, wrong place

I have a DF like that:
Day Destiny Flight Year
0 10 AJU 1504 2019
1 10 AJU 1502 2020
2 10 FOR 1524 2019
3 10 FOR 1522 2020
4 10 FOR 1528 2019
I am using this code to plot the chart to compare the year side by side for each destination.It's working well.
df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
I have this other one to plot values on top of the bars. But it is plotting in the wrong place.
a = df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
for i, v in enumerate(df.groupby(["Destiny","Year"])["Flight"].count()):
a.text(v, i, str(v))
How to display the value of the bar on each bar correctly?
I've been looking for something like that, but I haven't found it.
Update:
Version 3.4 of matplotlib added function bar_label, which could be incorporated as follows in the code below:
for bar_group in ax.containers:
ax.bar_label(bar_group, fmt='%.0f', size=18)
Old answer:
You can loop through the generated bars, and use their x, height and width to position the text. Adding an empty line into the string helps position the text independent of the scale. ax.margins() can add some space above the bars to make the text fit.
from matplotlib import pyplot as plt
import pandas as pd
df = pd.DataFrame({'Destiny': ['AJU','AJU','FOR','FOR','FOR' ],
'Flight':range(1501,1506),
'Year':[2019,2020,2019,2020,2019]})
ax = df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
for p in ax.patches:
x = p.get_x()
h = p.get_height()
w = p.get_width()
ax.annotate(f'{h:.0f}\n', (x + w/2, h), ha='center', va='center', size=18)
plt.margins(y=0.2)
plt.tight_layout()
plt.show()
The below add_value_labels function is from justfortherec, it's very easy to use, just pass matplotlib.axes.Axes object to it:
import pandas as pd
import matplotlib.pyplot as plt
def add_value_labels(ax, spacing=5):
"""Add labels to the end of each bar in a bar chart.
Arguments:
ax (matplotlib.axes.Axes): The matplotlib object containing the axes
of the plot to annotate.
spacing (int): The distance between the labels and the bars.
"""
# For each bar: Place a label
for rect in ax.patches:
# Get X and Y placement of label from rect.
y_value = rect.get_height()
x_value = rect.get_x() + rect.get_width() / 2
# Number of points between bar and label. Change to your liking.
space = spacing
# Vertical alignment for positive values
va = 'bottom'
# If value of bar is negative: Place label below bar
if y_value < 0:
# Invert space to place label below
space *= -1
# Vertically align label at top
va = 'top'
# Use Y value as label and format number with one decimal place
label = "{:.1f}".format(y_value)
# Create annotation
ax.annotate(
label, # Use `label` as label
(x_value, y_value), # Place label at end of the bar
xytext=(0, space), # Vertically shift label by `space`
textcoords="offset points", # Interpret `xytext` as offset in points
ha='center', # Horizontally center label
va=va) # Vertically align label differently for
# positive and negative values.
df = pd.read_csv("1.csv")
ax = df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
# Call the function above. All the magic happens there.
add_value_labels(ax)
plt.show()
I think we can adapt this answer referenced by #JohanC to fit your problem.
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from decimal import Decimal
df = pd.DataFrame({'Day':[10]*5, 'Destiny':['AJU']*2+['FOR']*3, 'Flight':[1504,1502,1524,1522,1528],'Year':[2019,2020,2019,2020,2019]})
df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
a = df.groupby(["Destiny","Year"])["Flight"].count().unstack().plot.bar(figsize=(12, 3))
for p in a.patches:
a.annotate('{}'.format(Decimal(str(p.get_height()))), (p.get_x(), p.get_height()))
plt.show()

Scatterplot with marginal KDE plots and multiple categories in Matplotlib

I'd like a function in Matplotlib similar to the Matlab 'scatterhist' function which takes continuous values for 'x' and 'y' axes, plus a categorical variable as input; and produces a scatter plot with marginal KDE plots and two or more categorical variables in different colours as output:
I've found examples of scatter plots with marginal histograms in Matplotlib, marginal histograms in Seaborn jointplot, overlapping histograms in Matplotlib and marginal KDE plots in Matplotib ; but I haven't found any examples which combine scatter plots with marginal KDE plots and are colour coded to indicate different categories.
If possible, I'd like a solution which uses 'vanilla' Matplotlib without Seaborn, as this will avoid dependencies and allow complete control and customisation of the plot appearance using standard Matplotlib commands.
I was going to try to write something based on the above examples; but before doing so wanted to check whether a similar function was already available, and if not then would be grateful for any guidance on the best approach to use.
#ImportanceOfBeingEarnest: Many thanks for your help.
Here's my first attempt at a solution.
It's a bit hacky but achieves my objectives, and is fully customisable using standard matplotlib commands. I'm posting the code here with annotations in case anyone else wishes to use it or develop it further. If there are any improvements or neater ways of writing the code I'm always keen to learn and would be grateful for guidance.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
label = ['Setosa','Versicolor','Virginica'] # List of labels for categories
cl = ['b','r','y'] # List of colours for categories
categories = len(label)
sample_size = 20 # Number of samples in each category
# Create numpy arrays for dummy x and y data:
x = np.zeros(shape=(categories, sample_size))
y = np.zeros(shape=(categories, sample_size))
# Generate random data for each categorical variable:
for n in range (0, categories):
x[n,:] = np.array(np.random.randn(sample_size)) + 4 + n
y[n,:] = np.array(np.random.randn(sample_size)) + 6 - n
# Set up 4 subplots as axis objects using GridSpec:
gs = gridspec.GridSpec(2, 2, width_ratios=[1,3], height_ratios=[3,1])
# Add space between scatter plot and KDE plots to accommodate axis labels:
gs.update(hspace=0.3, wspace=0.3)
# Set background canvas colour to White instead of grey default
fig = plt.figure()
fig.patch.set_facecolor('white')
ax = plt.subplot(gs[0,1]) # Instantiate scatter plot area and axis range
ax.set_xlim(x.min(), x.max())
ax.set_ylim(y.min(), y.max())
ax.set_xlabel('x')
ax.set_ylabel('y')
axl = plt.subplot(gs[0,0], sharey=ax) # Instantiate left KDE plot area
axl.get_xaxis().set_visible(False) # Hide tick marks and spines
axl.get_yaxis().set_visible(False)
axl.spines["right"].set_visible(False)
axl.spines["top"].set_visible(False)
axl.spines["bottom"].set_visible(False)
axb = plt.subplot(gs[1,1], sharex=ax) # Instantiate bottom KDE plot area
axb.get_xaxis().set_visible(False) # Hide tick marks and spines
axb.get_yaxis().set_visible(False)
axb.spines["right"].set_visible(False)
axb.spines["top"].set_visible(False)
axb.spines["left"].set_visible(False)
axc = plt.subplot(gs[1,0]) # Instantiate legend plot area
axc.axis('off') # Hide tick marks and spines
# Plot data for each categorical variable as scatter and marginal KDE plots:
for n in range (0, categories):
ax.scatter(x[n],y[n], color='none', label=label[n], s=100, edgecolor= cl[n])
kde = stats.gaussian_kde(x[n,:])
xx = np.linspace(x.min(), x.max(), 1000)
axb.plot(xx, kde(xx), color=cl[n])
kde = stats.gaussian_kde(y[n,:])
yy = np.linspace(y.min(), y.max(), 1000)
axl.plot(kde(yy), yy, color=cl[n])
# Copy legend object from scatter plot to lower left subplot and display:
# NB 'scatterpoints = 1' customises legend box to show only 1 handle (icon) per label
handles, labels = ax.get_legend_handles_labels()
axc.legend(handles, labels, scatterpoints = 1, loc = 'center', fontsize = 12)
plt.show()`
`
Version 2, using Pandas to import 'real' data from a csv file, with a different number of entries in each category. (csv file format: row 0 = headers; col 0 = x values, col 1 = y values, col 2 = category labels). Scatterplot axis and legend labels are generated from column headers.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
import pandas as pd
"""
Create scatter plot with marginal KDE plots
from csv file with 3 cols of data
formatted as following example (first row of
data are headers):
'x_label', 'y_label', 'category_label'
4,5,'virginica'
3,6,'sentosa'
4,6, 'virginica' etc...
"""
df = pd.read_csv('iris_2.csv') # enter filename for csv file to be imported (within current working directory)
cl = ['b','r','y', 'g', 'm', 'k'] # Custom list of colours for each categories - increase as needed...
headers = list(df.columns) # Extract list of column headers
# Find min and max values for all x (= col [0]) and y (= col [1]) in dataframe:
xmin, xmax = df.min(axis=0)[0], df.max(axis=0)[0]
ymin, ymax = df.min(axis=0)[1], df.max(axis=0)[1]
# Create a list of all unique categories which occur in the right hand column (ie index '2'):
category_list = df.ix[:,2].unique()
# Set up 4 subplots and aspect ratios as axis objects using GridSpec:
gs = gridspec.GridSpec(2, 2, width_ratios=[1,3], height_ratios=[3,1])
# Add space between scatter plot and KDE plots to accommodate axis labels:
gs.update(hspace=0.3, wspace=0.3)
fig = plt.figure() # Set background canvas colour to White instead of grey default
fig.patch.set_facecolor('white')
ax = plt.subplot(gs[0,1]) # Instantiate scatter plot area and axis range
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.set_xlabel(headers[0], fontsize = 14)
ax.set_ylabel(headers[1], fontsize = 14)
ax.yaxis.labelpad = 10 # adjust space between x and y axes and their labels if needed
axl = plt.subplot(gs[0,0], sharey=ax) # Instantiate left KDE plot area
axl.get_xaxis().set_visible(False) # Hide tick marks and spines
axl.get_yaxis().set_visible(False)
axl.spines["right"].set_visible(False)
axl.spines["top"].set_visible(False)
axl.spines["bottom"].set_visible(False)
axb = plt.subplot(gs[1,1], sharex=ax) # Instantiate bottom KDE plot area
axb.get_xaxis().set_visible(False) # Hide tick marks and spines
axb.get_yaxis().set_visible(False)
axb.spines["right"].set_visible(False)
axb.spines["top"].set_visible(False)
axb.spines["left"].set_visible(False)
axc = plt.subplot(gs[1,0]) # Instantiate legend plot area
axc.axis('off') # Hide tick marks and spines
# For each category in the list...
for n in range(0, len(category_list)):
# Create a sub-table containing only entries matching current category:
st = df.loc[df[headers[2]] == category_list[n]]
# Select first two columns of sub-table as x and y values to be plotted:
x = st[headers[0]]
y = st[headers[1]]
# Plot data for each categorical variable as scatter and marginal KDE plots:
ax.scatter(x,y, color='none', s=100, edgecolor= cl[n], label = category_list[n])
kde = stats.gaussian_kde(x)
xx = np.linspace(xmin, xmax, 1000)
axb.plot(xx, kde(xx), color=cl[n])
kde = stats.gaussian_kde(y)
yy = np.linspace(ymin, ymax, 1000)
axl.plot(kde(yy), yy, color=cl[n])
# Copy legend object from scatter plot to lower left subplot and display:
# NB 'scatterpoints = 1' customises legend box to show only 1 handle (icon) per label
handles, labels = ax.get_legend_handles_labels()
axc.legend(handles, labels, title = headers[2], scatterpoints = 1, loc = 'center', fontsize = 12)
plt.show()

Matplotlib Legend with Different Number and Color of Markers per Handle

Given the following:
import pandas as pd
import matplotlib.pyplot as plt
d=pd.DataFrame({'category':['a','a','a','b','b','b'],
'year':[1,2,1,2,1,2],
'x':[2,4,5,1,2,3],
'y':[1,2,3,2,4,6],
'clr':['grey','green','grey','blue','grey','orange']})
d
category clr x y year
0 a grey 2 1 1
1 a green 4 2 2
2 a grey 5 3 1
3 b blue 1 2 2
4 b grey 2 4 1
5 b orange 3 6 2
and
for i in np.arange(len(d)):
plt.plot(d.x[i],d.y[i],marker='o',linestyle='none',markerfacecolor=d.clr[i],
markeredgecolor='none',markersize=15)
#clean up axes
plt.tick_params(axis='x',which='both',bottom='off',top='off',color='none',labelcolor='none')
plt.tick_params(axis='y',which='both',left='off',right='off',color='none',labelcolor='none')
lgnd=plt.legend(['Year 1','Year 2'],
numpoints=1,
loc=0,
ncol=1,
fontsize=10,
frameon=False)
lgnd.legendHandles[0]._legmarker.set_markersize(15)
lgnd.legendHandles[1]._legmarker.set_markersize(15)
I'd like for the legend to have one grey dot for the Year 1 marker (as it currently does) but for the Year 2 markers, one dot for each distinct color (in this case, an orange, blue, and green dot all on the same line order doesn't matter at this time, in a row).
Like this:
I've tried the following, but to no avail:
lgnd.legendHandles[1]._legmarker.set_numpoints(len(d.clr.unique()))
lgnd.legendHandles[1]._legmarker.set_markeredgecolor(d.clr)
Thanks in advance!
I had fun figuring out a solution to your problem (and learning a few new tricks in the process). Essentially, you could make your own legend handler object to map all colours to a year. Making a custom legend handler can be done by making any object that has function legend_artist(self, legend, orig_handle, fontsize, handlebox). The detail of why this works can be found in the "Implementing custom handler" section of this page. I commented all the explanation in the code since there is too much to explain by words without codes to demonstrate.
Example code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pdb
import matplotlib.patches as mpatches
class MyLegendHandler(object):
def __init__(self,color):
self.color = color
def legend_artist(self, legend, orig_handle, fontsize, handlebox):
x0, y0 = handlebox.xdescent, handlebox.ydescent #offset of the lower left corner
width, height = handlebox.width, handlebox.height #width, height bound box of legend, for now, it is the dimension of each circle legend
#NOTE: to be practicle, let's just set radius = height as if width != height, it's an ellipse
#NOTE: these will latter on be changed internally to accomdate adding text
handlebox.width += len(self.color)*height # reset width of handlebox to accomodate multiple legends
for i in range(len(self.color)): #loop through all colors
#for each color, draw a circle of that color
#NOTE: play around here to align the legends right rather than left :)
center = [0.5*(i + 1) * width - 0.5*x0, 0.5 * height - 0.5 * y0]
patch = mpatches.Ellipse(center, height, height, facecolor=self.color[i],
edgecolor=None, hatch=None, transform=handlebox.get_transform())
handlebox.add_artist(patch)
return patch
###################################
d=pd.DataFrame({'category':['a','a','a','b','b','b'],
'year':[1,2,1,2,1,2],
'x':[2,4,5,1,2,3],
'y':[1,2,3,2,4,6],
'clr':['grey','green','grey','blue','grey','orange']})
unique_year_elements = []
years_seen = []
tmp = None
my_map = {}
for i in np.arange(len(d)):
tmp, = plt.plot(d.x[i],d.y[i],marker='o',linestyle='none',markerfacecolor=d.clr[i],
markeredgecolor='none',markersize=15)
#collect the plot elements that are of unique years-- 1 year might have several plot element, we only need 1
if not (d.year[i] in years_seen):
years_seen.append(d.year[i])
unique_year_elements.append(tmp)
#build handler_map for plt.legend to map elements to its legend handler object
for i in np.arange(len(years_seen)):
color_list = d.loc[d['year'] == years_seen[i]].clr.unique().tolist()
#pdb.set_trace()
my_map[unique_year_elements[i]] = MyLegendHandler(color_list)
#creating the legend object
plt.legend( unique_year_elements, ["Year "+str(y) for y in years_seen],
handler_map=my_map)
#clean up axes
plt.tick_params(axis='x',which='both',bottom='off',top='off',color='none',labelcolor='none')
plt.tick_params(axis='y',which='both',left='off',right='off',color='none',labelcolor='none')
plt.show()
Sample output:
Another approach, which worked for me, was to plot circles (ellipses - see why here) and text:
import matplotlib.patches as mpatches
#Set ellipse dimension coordinates
xmax_el=xmax/30
ymax_el=ymax/28
#Set ellipse y-location coordinates
yloc1=max(ind)+2.5
yloc2=max(ind)+1.75
#Create first circle in grey as just one grey circle is needed:
circlex=mpatches.Ellipse((pmax-.2*pmax,yloc1), xmax_el, ymax_el ,alpha=0.5,clip_on=False\
,edgecolor='grey',linewidth=2,facecolor='none')
#Start a list of patches (circles), with the grey one being the first:
patches=[circlex]
clrs=['g','r','b']
#Populate a list of circles, one for each unique color for patch names
circles=[]
for i in np.arange(len(clrs)):
circles.append('circle'+str(i))
#This list is for multiplying by the x-position dimension to space out the colored bubbles:
clrnum=np.arange(len(clrs))
#Reverse the order of colors so they plot in order on the chart (if clrs was based on real data that is being plotted)
clrs2=clrs[::-1]
#Iterate through the color, circle, and circle number lists, create patches, and plot.
for i,j,k in zip(clrs2,circles,clrnum):
j=mpatches.Ellipse((pmax-(.2+k*0.05)*pmax,yloc2),xmax_el,ymax_el,alpha=0.5,clip_on=False,edgecolor=i,linewidth=2,facecolor='none')
patches.append(j)
for i in patches:
ax.add_artist(i)
#Add text:
ax.text(pmax-.15*pmax,yloc1,'2015 Plan Offering',color='grey',ha='left',va='center')
ax.text(pmax-.15*pmax,yloc2,'2016 Plan Offering',color='grey',ha='left',va='center')
Result:

adjust colour bar range to visible part of basemap contour plot

I have a contour plot on a basemap and i wish to adjust the range of the colour bar so that they fit to the visible data. The default setting makes the colour range to fit to all data, i.e. also those which are not plotted. Is there a setting for this?
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid
myllcrnrlat=35
myurcrnrlat=65
myllcrnrlon=-45
myurcrnrlon=45
m = Basemap(projection='cyl', llcrnrlat=myllcrnrlat, urcrnrlat=myurcrnrlat,\
llcrnrlon=myllcrnrlon, urcrnrlon=myurcrnrlon, resolution='c')
lonsin=np.asarray(range(0,360,10))
latsin=np.asarray(range(-90,90,10))
valin=np.random.rand(len(latsin), len(lonsin))
valin[0,0]=5 #this is a value outside my basemap area and higher than all inside.
valin_cyclic, lons_cyclic = addcyclic(valin, lonsin)
valin_cyclic, lons_cyclic = shiftgrid(180., valin_cyclic, lons_cyclic, start=False)
lon2d, lat2d = np.meshgrid(lons_cyclic, latsin)
x, y = m(lon2d, lat2d)
cs = m.pcolormesh(x, y, valin_cyclic,cmap=plt.get_cmap('autumn_r'))
cbar = plt.colorbar(cs)
plt.show()
Of couse I can use vmin, vmax by doing something like the following, but it seems rather long, so maybe there is a special setting?
lonsin_inbasemap=np.asarray([a for a in lonsin if myllcrnrlon <= a if a <= myurcrnrlon])
latsin_inbasemap=np.asarray([a for a in latsin if myllcrnrlat <= a if a <= myurcrnrlat])
valin_inbasemap_tmp = np.transpose(np.asarray([valin[:,a] for a in range(len(lonsin)) if lonsin[a] in lonsin_inbasemap]))
valin_inbasemap = np.asarray([valin_inbasemap_tmp[a,:] for a in range(len(latsin)) if latsin[a] in latsin_inbasemap])
del(valin_inbasemap_tmp)
vmax=np.amax(valin_inbasemap)
cs = m.pcolormesh(x, y, valin_cyclic,vmax=vmax, cmap=plt.get_cmap('autumn_r'))
cbar = plt.colorbar(cs)
plt.show()
If you want to mask some data that below some value.
For example, the minus data you would not want to show:
You can use ``
valin = np.ma.masked_less(valin_cyclic,0)
cmap1 = plt.cm,get_cmap("autumn_r")
cmap1.set_bad("w")
p =plt.pcolor((x, y,conc,cmap=cmap1,alpha =1,zorder =2)