How to plot an kernel density estimation in seaborn scatterplot plot - pandas

I would like to plot the same as shown in the picture( but only the red part). The curve is a kernel density estimate based only on the X-values (the y-values are irrelevant and actually all 1,2 or 3. It is here just plotted like this to distinguish between red an blue. I have plotted the scatterplot, but how can I include the kernel density curve on the scatterplot? (the black dotted lines in the curve are just the quartiles and the median).
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KernelDensity
%matplotlib inline
# Change plotting style to ggplot
plt.style.use('ggplot')
from matplotlib.font_manager import FontProperties
X_plot = np.linspace(0, 30, 1000)[:, np.newaxis]
X1 = df[df['Zustandsklasse']==1]['Verweildauer'].values.reshape(-1,1)
X2 = df[df['Zustandsklasse']==2]['Verweildauer'].values.reshape(-1,1)
X3 = df[df['Zustandsklasse']==3]['Verweildauer'].values.reshape(-1,1)
#print(X1)
ax=sns.scatterplot(x="Verweildauer", y="CS_bandwith", data=df, legend="full", alpha=1)
kde=KernelDensity(kernel='gaussian').fit(X1)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot[:,0], np.exp(log_dens), color ="blue", linestyle="-", label="Gaussian Kernel")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.invert_yaxis()
plt.ylim(5.5, .5)
ax.set_ylabel("Zustandsklasse")
ax.set_xlabel("Verweildauer in Jahren")
handles, labels = ax.get_legend_handles_labels()
# create the legend again skipping this first entry
leg = ax.legend(handles[1:], labels[1:], loc="lower right", ncol=2, facecolor='silver', fontsize= 7)
ax.set_xticks(np.arange(0, 30, 5))
ax2 = ax.twinx()
#get the ticks at the same heights as the left axis
ax2.set_ylim(ax.get_ylim())
s=[(df["Zustandsklasse"] == t).sum() for t in range(1, 6)]
s.insert(0, 0)
print(s)
ax2.set_yticklabels(s)
ax2.set_ylim(ax.get_ylim())
ax2.set_ylabel("Anzahl Beobachtungen")
ax2.grid(False)
#plt.tight_layout()
plt.show()
Plotting target
Whats is plotted with the code above

It's much easier if you use subplots. Here is an example with seaborn's Titanic dataset:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
titanic = sns.load_dataset('titanic')
fig, ax = plt.subplots(nrows=3, sharex=True)
ax[2].set_xlabel('Age')
for i in [1, 2, 3]:
age_i = titanic[titanic['pclass'] == i]['age']
ax[i-1].scatter(age_i, [0] * len(age_i))
sns.kdeplot(age_i, ax=ax[i-1], shade=True, legend=False)
ax[i-1].set_yticks([])
ax[i-1].set_ylim(-0.01)
ax[i-1].set_ylabel('Class ' + str(i))

Related

Trying to place text in mpl just above the first yticklabel

I am having diffculties to move the text "Rank" exactly one line above the first label and by not using guesswork as I have different chart types with variable sizes, widths and also paddings between the labels and bars.
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pylab import rcParams
rcParams['figure.figsize'] = 8, 6
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
df = pd.DataFrame.from_records(zip(np.arange(1,30)))
df.plot.barh(width=0.8,ax=ax,legend=False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(left=False, bottom=False)
ax.tick_params(axis='y', which='major', pad=36)
ax.set_title("Rankings")
ax.text(-5,30,"Rank")
plt.show()
Using transData.transform didn't get me any further. The problem seems to be that ax.text() with the position params of (0,0) aligns with the start of the bars and not the yticklabels which I need, so getting the exact position of yticklabels relative to the axis would be helpful.
The following approach creates an offset_copy transform, using "axes coordinates". The top left corner of the main plot is at position 0, 1 in axes coordinates. The ticks have a "pad" (between label and tick mark) and a "padding" (length of the tick mark), both measured in "points".
The text can be right aligned, just as the ticks. With "bottom" as vertical alignment, it will be just above the main plot. If that distance is too low, you could try ax.text(0, 1.01, ...) to have it a bit higher.
import matplotlib.pyplot as plt
from matplotlib.transforms import offset_copy
import pandas as pd
import numpy as np
from matplotlib import rcParams
rcParams['figure.figsize'] = 8, 6
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
df = pd.DataFrame.from_records(zip(np.arange(1, 30)))
df.plot.barh(width=0.8, ax=ax, legend=False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(left=False, bottom=False)
ax.tick_params(axis='y', which='major', pad=36)
ax.set_title("Rankings")
tick = ax.yaxis.get_major_ticks()[-1] # get information of one of the ticks
padding = tick.get_pad() + tick.get_tick_padding()
trans_offset = offset_copy(ax.transAxes, fig=fig, x=-padding, y=0, units='points')
ax.text(0, 1, "Rank", ha='right', va='bottom', transform=trans_offset)
# optionally also use tick.label.get_fontproperties()
plt.tight_layout()
plt.show()
I've answered my own question while Johan was had posted his one - which is pretty good and what I wanted. However, I post mine anyways as it uses an entirely different approach. Here I add a "ghost" row into the dataframe and label it appropriately which solves the problem:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pylab import rcParams
rcParams['figure.figsize'] = 8, 6
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
df = pd.DataFrame.from_records(zip(np.arange(1,30)),columns=["val"])
#add a temporary header
new_row = pd.DataFrame({"val":0}, index=[0])
df = pd.concat([df[:],new_row]).reset_index(drop = True)
df.plot.barh(width=0.8,ax=ax,legend=False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.tick_params(left=False, bottom=False)
ax.tick_params(axis='y', which='major', pad=36)
ax.set_title("Rankings")
# Set the top label to "Rank"
yticklabels = [t for t in ax.get_yticklabels()]
yticklabels[-1]="Rank"
# Left align all labels
[t.set_ha("left") for t in ax.get_yticklabels()]
ax.set_yticklabels(yticklabels)
# delete the top bar effectively by setting it's height to 0
ax.patches[-1].set_height(0)
plt.show()
Perhaps the advantage is that it is always a constant distance above the top label, but with the disadvantage that this is a bit "patchy" in the most literal sense to transform your dataframe for this task.

How to rotate a Contextily basemap in matplotlib and Jupyter notebook

I am making a set of figures with subplots in Jupyter Notebook using matplotlib and geopandas. The top plots (A & B) have geospatial data and use various basemaps (aerial imagery, shaded relief, etc.).
How can I rotate the top two plots 90-degrees, so that they are elongated?
(I will need to redo gridspec layout of course, but that is easy; what I don't know how to do is: rotate the plots but keep the geographic information for basemap plotting.)
Repeatable code is below.
import pandas as pd
import geopandas as gpd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import contextily as ctx
from shapely.geometry import Point
plt.style.use('seaborn-whitegrid')
### DUMMY DATA
long, lat = [(-118.155, -118.051, -118.08), (38.89, 39.512, 39.1)]
q, t = [(0, 70500, 21000), (0, 8000, -1200)]
df = pd.DataFrame(list(zip(q, t, lat, long)), columns =['q', 't', 'lat', 'long'])
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df['long'], df['lat']))
gdf.crs = "EPSG:4326"
### PLOTTING
fig = plt.figure(figsize=(10,7.5), constrained_layout=True)
gs = fig.add_gridspec(3, 2)
ax1 = fig.add_subplot(gs[0:2, 0])
ax2 = fig.add_subplot(gs[0:2, 1], sharex = ax1, sharey = ax1)
ax3 = fig.add_subplot(gs[-1, :])
### PlotA
gdf.plot(ax = ax1)
ctx.add_basemap(ax1, crs='epsg:4326', source=ctx.providers.Esri.WorldShadedRelief)
ax1.set_aspect('equal')
ax1.set_title('Plot-A')
ax1.tick_params('x', labelrotation=90)
### PlotB
gdf.plot(ax = ax2)
ctx.add_basemap(ax2, crs='epsg:4326', source=ctx.providers.Esri.WorldImagery, alpha=0.5)
ax2.set_aspect('equal')
ax2.set_title('Plot-B')
ax2.tick_params('x', labelrotation=90)
### PlotC
ax3.scatter(df.q, df.t)
ax3.set_aspect('equal')
ax3.set_title('Plot-C')
ax3.set_xlabel('q')
ax3.set_ylabel('t')

Matpliblib colormap with peak at center and zero at edges

I am looking for a custom colormap that highlights the center (value of 1) and just has white color at the edges (values of 0 and 2). Ideally there should be a gradient from 1 to [0, 2].
Usual colormaps do the opposite: diverges from center (white at center).
Thanks for your help
You can use the from_list method from LinearSegmentedColormap for this from the matplotlib.colors module.
Here, we give 3 colors as a list (["white", "red", "white"]). This can easily be customised by changing any of those color names.
For example:
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import numpy as np
cmap = LinearSegmentedColormap.from_list('wrw', ["white", "red", "white"], N=256)
a = np.arange(0, 2, 0.01).reshape(20, 10)
fig, ax = plt.subplots()
p = ax.pcolormesh(a, cmap=cmap, vmin=0, vmax=2)
fig.colorbar(p)
plt.show()
You can create based on availbale colormaps from matplotlib.
from matplotlib.colors import ListedColormap
def show_cmap(ax, cmap):
n = 256
ax.imshow(np.tile(np.arange(n), [int(n*0.20),1]),
cmap=cmap,
interpolation="nearest", aspect="auto")
ax.set_xticks([])
ax.set_yticks([])
ax.set_xticklabels([])
ax.set_yticklabels([])
c1 = plt.cm.Blues(range(0, 128))
c2 = c1[::-1]
c = np.vstack([c1, c2])
cmap = ListedColormap(c)
fig, ax = plt.subplots(1, 1, figsize=(7.2, 7.2))
show_cmap(ax, cmap)

Seaborn boxplot custom lables aside box

I have the code segment given below, and it generates the provided boxplot. I would like to know how to add custom labels aside each box, so that the boxplot is even more digestible to the readers of my result. The expected diagram is also provided. I reckon there should be an easy way to get this done in Seaborn/Matplotlib.
What I exactly want is to add the following labels to each box (on left hand side as in shown in the example provided)
The code use to generate boxplot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as MaxNLocator
from matplotlib import rcParams
from matplotlib.ticker import ScalarFormatter, FuncFormatter,FormatStrFormatter, EngFormatter#, mticker
%matplotlib inline
import seaborn as sns
range_stats = pd.read_csv(f'{snappy_data_dir}range_searcg_snappy_stats.csv')
data_stats_rs_txt = range_stats[range_stats['category'] == "t"]
data_stats_rs_seq = range_stats[range_stats['category'] == "s"]
fig, ax =plt.subplots(1,2)
rcParams['figure.figsize'] =8, 6
flierprops = dict(marker='x')
labels1 = ['R1', 'R2', 'R3', 'R4', 'R5']
sns.boxplot(x='Interval',y='Total',data=data_stats_rs_txt,palette='rainbow', ax=ax[0])
sns.boxplot(x='Interval',y='Total',data=data_stats_rs_seq,palette='rainbow', ax=ax[1])
ax[0].set(xlabel='Interval (s)', ylabel='query execution time (s)', title='Text format', ylim=(0, 290))
ax[1].set(xlabel='Interval (s)', ylabel='', title='Proposed format',ylim=(0, 290), yticklabels=[])
plt.savefig("range-query-corrected.svg")
plt.savefig('snappy_compressed_rangesearch.pdf')
Resulted figure:
Expected figure with labels
This might help you, although it is not a fully correct way and is not a complete solution.
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.set_context('poster',font_scale=0.5)
sns.boxplot(x="day", y="total_bill", data=tips,palette='rainbow', ax=axes[0], zorder=0)
axes[0].text(0, 45, r"$B1$", fontsize=20, color="blue")
axes[0].text(0.9, 45, r"$B2$", fontsize=20, color="blue")
axes[0].text(2.2, 45, r"$B3$", fontsize=20, color="blue")
axes[0].text(3.1, 45, r"$B4$", fontsize=20, color="blue");
sns.boxplot(x="day", y="tip", data=tips,palette='rainbow', ax=axes[1], zorder=10)
iris = sns.load_dataset("iris")
x_var = 'species'
y_var = 'sepal_width'
x_order = ['setosa', 'versicolor', 'virginica']
labels = ['R1','R2','R3']
max_vals = iris.groupby(x_var).max()[y_var].reindex(x_order)
ax = sns.boxplot(x=x_var, y=y_var, data=iris)
for x,y,l in zip(range(len(x_order)), max_vals, labels):
ax.annotate(l, xy=[x,y], xytext=[0,5], textcoords='offset pixels', ha='center', va='bottom')

How to plot multiple graphs stacked above each other

I need to plot a set of 9 or more data sets with a common x-axis. I was able to do it for 2 of them but the rest of them just don't appear. They have to be stacked one above the other. with a common x axis. I have attached the image of what I have been able to do so far.
stack of plot
I have used the following code
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.gridspec as gridspec
from matplotlib.lines import Line2D
import matplotlib.lines as mlines
file1 = '1.dat'
file2 = '10.dat'
data1 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data1.columns = ['M','B','C']
data2 = pd.read_csv(file2, delimiter='\s+', header=None, engine='python')
data2.columns = ['N','A','D']
def fit_data():
fig = plt.figure(1,figsize=(12,11))
ax1= fig.add_subplot(211,)
ax1.plot(data1['M'], data1['B'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax1.scatter(data1['M'], data1['B'], marker='o', color='red', s=25)
ax1.errorbar(data1['M'], data1['B'], data1['C'], fmt='.', ecolor='red',color='red', elinewidth=1,capsize=3)
ax2 = fig.add_subplot(211, sharex=ax1 )
ax2.plot(data2['N'], data2['A'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax2.scatter(data2['N'], data2['A'], marker='o', color='blue', s=25)
ax2.errorbar(data2['N'], data2['A'], data2['D'], fmt='.', ecolor='blue',color='blue', elinewidth=1,capsize=3)
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0)
ax1.tick_params(axis='both',which='minor',length=5,width=2,labelsize=18)
ax1.tick_params(axis='both',which='major',length=8,width=2,labelsize=18)
plt.savefig("1.pdf")
#fig.set_size_inches(w=13,h=10)
plt.show()
plt.close()
fit_data()
I read through stacking of plots but wasn't able to apply the same here.
I modified the code to this but this is what I get. modified code.
I need the stacking to be done to do a comparative study. Something like this image. comparative study
This is the part of the code I have modified and used.
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0.0) # remove vertical space between subplots
Should it be done seperately for ax1, ax2 and so on?
plt.subplots_adjust(hspace=0.0) removes the space between them.
You can have as many plots as you want:
from matplotlib import pyplot as plt
import numpy as np
numer_of_plots = 9
X = np.random.random((numer_of_plots, 50))
fig, axs = plt.subplots(nrows=numer_of_plots, ncols=1)
for ax, x in zip(axs, X):
ax.plot(range(50), x)
plt.subplots_adjust(hspace=0.0)
plt.show()