Seaborn boxplot custom lables aside box - pandas

I have the code segment given below, and it generates the provided boxplot. I would like to know how to add custom labels aside each box, so that the boxplot is even more digestible to the readers of my result. The expected diagram is also provided. I reckon there should be an easy way to get this done in Seaborn/Matplotlib.
What I exactly want is to add the following labels to each box (on left hand side as in shown in the example provided)
The code use to generate boxplot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as MaxNLocator
from matplotlib import rcParams
from matplotlib.ticker import ScalarFormatter, FuncFormatter,FormatStrFormatter, EngFormatter#, mticker
%matplotlib inline
import seaborn as sns
range_stats = pd.read_csv(f'{snappy_data_dir}range_searcg_snappy_stats.csv')
data_stats_rs_txt = range_stats[range_stats['category'] == "t"]
data_stats_rs_seq = range_stats[range_stats['category'] == "s"]
fig, ax =plt.subplots(1,2)
rcParams['figure.figsize'] =8, 6
flierprops = dict(marker='x')
labels1 = ['R1', 'R2', 'R3', 'R4', 'R5']
sns.boxplot(x='Interval',y='Total',data=data_stats_rs_txt,palette='rainbow', ax=ax[0])
sns.boxplot(x='Interval',y='Total',data=data_stats_rs_seq,palette='rainbow', ax=ax[1])
ax[0].set(xlabel='Interval (s)', ylabel='query execution time (s)', title='Text format', ylim=(0, 290))
ax[1].set(xlabel='Interval (s)', ylabel='', title='Proposed format',ylim=(0, 290), yticklabels=[])
plt.savefig("range-query-corrected.svg")
plt.savefig('snappy_compressed_rangesearch.pdf')
Resulted figure:
Expected figure with labels

This might help you, although it is not a fully correct way and is not a complete solution.
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
tips = sns.load_dataset('tips')
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.set_context('poster',font_scale=0.5)
sns.boxplot(x="day", y="total_bill", data=tips,palette='rainbow', ax=axes[0], zorder=0)
axes[0].text(0, 45, r"$B1$", fontsize=20, color="blue")
axes[0].text(0.9, 45, r"$B2$", fontsize=20, color="blue")
axes[0].text(2.2, 45, r"$B3$", fontsize=20, color="blue")
axes[0].text(3.1, 45, r"$B4$", fontsize=20, color="blue");
sns.boxplot(x="day", y="tip", data=tips,palette='rainbow', ax=axes[1], zorder=10)

iris = sns.load_dataset("iris")
x_var = 'species'
y_var = 'sepal_width'
x_order = ['setosa', 'versicolor', 'virginica']
labels = ['R1','R2','R3']
max_vals = iris.groupby(x_var).max()[y_var].reindex(x_order)
ax = sns.boxplot(x=x_var, y=y_var, data=iris)
for x,y,l in zip(range(len(x_order)), max_vals, labels):
ax.annotate(l, xy=[x,y], xytext=[0,5], textcoords='offset pixels', ha='center', va='bottom')

Related

matplotlib.axis.axes error in mplfinance for volume

I am working with stock data which looks like daily.head
My code is:
import pandas as pd
import mplfinance as mpf
import matplotlib.pyplot as plt
data = pd.read_csv('/content/drive/MyDrive/python/TEchAnalysis.csv')
figdims=(15,10)
fig , ax = plt.subplots(figsize=figdims)
mpf.plot(daily , type='candle' , mav=(5,10,20,50,100) ,volume=True , ax=ax )
I am having the error
ValueError: `volume` must be of type `matplotlib.axis.Axes`
Please can somebody explain me this error & how to fix it?
If you specify external axes, you should also specify axes to display the volume. According to the documentation about external axes:
Please note the following:
Use kwarg ax= to pass any matplotlib Axes that you want into mpf.plot()
If you also want to plot volume, then you must pass in an Axes instance for the volume, so instead of volume=True, use volume=<myVolumeAxesInstance><myVolumeAxesInstance>.
If you specify ax= for mpf.plot() then you must also specify ax= for all calls to make_addplot().
Try this:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import mplfinance as mpf
import pandas as pd
import yfinance as yf
%matplotlib inline
df = yf.download('aapl', '2015-01-01', '2021-01-01')
df.rename(columns= {'Adj Close': 'Adj_close'}, inplace= True)
df1 = df.copy().loc['2015-01':'2015-02', :]
fig, ax1 = plt.subplots(figsize= (12, 6))
fig.set_facecolor('#ffe8a8')
ax1.set_zorder(1)
ax1.grid(True, color= 'k', linestyle= '--')
ax1.set_frame_on(False)
ax2 = ax1.twinx()
ax2.grid(False)
mpf.plot(df1, ax= ax1, type= 'candle', volume= ax2, xlim= (df1.index[0],
df1.index[-1]))
plt.show()
It works fairly well, giving some options to customize.
This is the output:

How to plot an kernel density estimation in seaborn scatterplot plot

I would like to plot the same as shown in the picture( but only the red part). The curve is a kernel density estimate based only on the X-values (the y-values are irrelevant and actually all 1,2 or 3. It is here just plotted like this to distinguish between red an blue. I have plotted the scatterplot, but how can I include the kernel density curve on the scatterplot? (the black dotted lines in the curve are just the quartiles and the median).
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.neighbors import KernelDensity
%matplotlib inline
# Change plotting style to ggplot
plt.style.use('ggplot')
from matplotlib.font_manager import FontProperties
X_plot = np.linspace(0, 30, 1000)[:, np.newaxis]
X1 = df[df['Zustandsklasse']==1]['Verweildauer'].values.reshape(-1,1)
X2 = df[df['Zustandsklasse']==2]['Verweildauer'].values.reshape(-1,1)
X3 = df[df['Zustandsklasse']==3]['Verweildauer'].values.reshape(-1,1)
#print(X1)
ax=sns.scatterplot(x="Verweildauer", y="CS_bandwith", data=df, legend="full", alpha=1)
kde=KernelDensity(kernel='gaussian').fit(X1)
log_dens = kde.score_samples(X_plot)
ax.plot(X_plot[:,0], np.exp(log_dens), color ="blue", linestyle="-", label="Gaussian Kernel")
ax.yaxis.set_major_locator(MaxNLocator(integer=True))
ax.invert_yaxis()
plt.ylim(5.5, .5)
ax.set_ylabel("Zustandsklasse")
ax.set_xlabel("Verweildauer in Jahren")
handles, labels = ax.get_legend_handles_labels()
# create the legend again skipping this first entry
leg = ax.legend(handles[1:], labels[1:], loc="lower right", ncol=2, facecolor='silver', fontsize= 7)
ax.set_xticks(np.arange(0, 30, 5))
ax2 = ax.twinx()
#get the ticks at the same heights as the left axis
ax2.set_ylim(ax.get_ylim())
s=[(df["Zustandsklasse"] == t).sum() for t in range(1, 6)]
s.insert(0, 0)
print(s)
ax2.set_yticklabels(s)
ax2.set_ylim(ax.get_ylim())
ax2.set_ylabel("Anzahl Beobachtungen")
ax2.grid(False)
#plt.tight_layout()
plt.show()
Plotting target
Whats is plotted with the code above
It's much easier if you use subplots. Here is an example with seaborn's Titanic dataset:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
titanic = sns.load_dataset('titanic')
fig, ax = plt.subplots(nrows=3, sharex=True)
ax[2].set_xlabel('Age')
for i in [1, 2, 3]:
age_i = titanic[titanic['pclass'] == i]['age']
ax[i-1].scatter(age_i, [0] * len(age_i))
sns.kdeplot(age_i, ax=ax[i-1], shade=True, legend=False)
ax[i-1].set_yticks([])
ax[i-1].set_ylim(-0.01)
ax[i-1].set_ylabel('Class ' + str(i))

How to plot multiple graphs stacked above each other

I need to plot a set of 9 or more data sets with a common x-axis. I was able to do it for 2 of them but the rest of them just don't appear. They have to be stacked one above the other. with a common x axis. I have attached the image of what I have been able to do so far.
stack of plot
I have used the following code
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import matplotlib.gridspec as gridspec
from matplotlib.lines import Line2D
import matplotlib.lines as mlines
file1 = '1.dat'
file2 = '10.dat'
data1 = pd.read_csv(file1, delimiter='\s+', header=None, engine='python')
data1.columns = ['M','B','C']
data2 = pd.read_csv(file2, delimiter='\s+', header=None, engine='python')
data2.columns = ['N','A','D']
def fit_data():
fig = plt.figure(1,figsize=(12,11))
ax1= fig.add_subplot(211,)
ax1.plot(data1['M'], data1['B'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax1.scatter(data1['M'], data1['B'], marker='o', color='red', s=25)
ax1.errorbar(data1['M'], data1['B'], data1['C'], fmt='.', ecolor='red',color='red', elinewidth=1,capsize=3)
ax2 = fig.add_subplot(211, sharex=ax1 )
ax2.plot(data2['N'], data2['A'], color='cornflowerblue', linestyle= '-', lw=0.5)
ax2.scatter(data2['N'], data2['A'], marker='o', color='blue', s=25)
ax2.errorbar(data2['N'], data2['A'], data2['D'], fmt='.', ecolor='blue',color='blue', elinewidth=1,capsize=3)
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0)
ax1.tick_params(axis='both',which='minor',length=5,width=2,labelsize=18)
ax1.tick_params(axis='both',which='major',length=8,width=2,labelsize=18)
plt.savefig("1.pdf")
#fig.set_size_inches(w=13,h=10)
plt.show()
plt.close()
fit_data()
I read through stacking of plots but wasn't able to apply the same here.
I modified the code to this but this is what I get. modified code.
I need the stacking to be done to do a comparative study. Something like this image. comparative study
This is the part of the code I have modified and used.
plt.setp(ax1.get_xticklabels(), visible=False) # hide labels
fig.subplots_adjust(hspace=0.0) # remove vertical space between subplots
Should it be done seperately for ax1, ax2 and so on?
plt.subplots_adjust(hspace=0.0) removes the space between them.
You can have as many plots as you want:
from matplotlib import pyplot as plt
import numpy as np
numer_of_plots = 9
X = np.random.random((numer_of_plots, 50))
fig, axs = plt.subplots(nrows=numer_of_plots, ncols=1)
for ax, x in zip(axs, X):
ax.plot(range(50), x)
plt.subplots_adjust(hspace=0.0)
plt.show()

How to add droplines to a seaborn scatterplot?

Using the following example code in a Jupyter notebook:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
The resulting plot shows the data points, but I would also like to have vertical drop lines and occasionally horizontal ones as well. To clarify what I mean by droplines, here is a mockup of the actual vs. the desired output:
Update: A little more complex input that makes it harder to manually draw the lines:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c'])
df['d'] = ['apples', 'bananas', 'cherries', 'dates'] * 5
sns.set()
g = sns.relplot(data=df, x='a', y='b', hue='c', col='d', col_wrap=2, kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
There are several ways to plot vertical/horizontal lines. One of the is to use hlines or vlines. This can be done using a loop for sake of ease.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(121)
fig, ax = plt.subplots()
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter', color='blue', ax=ax);
for x, y in zip(df['a'], df['b']):
ax.hlines(y, 0, x, color='blue')
ax.vlines(x, 0, y, color='blue')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.close(g.fig)

How to shrink a subplot colorbar

starting from this code:
import numpy as np
import matplotlib.pyplot as pl
import matplotlib
from matplotlib.gridspec import GridSpec
x=np.linspace(0.0,1.0,100)
y=np.linspace(0.0,1.0,100)
xv,yv=np.meshgrid(x,y)
gs = GridSpec(2, 2,hspace=0.00,wspace=0.1,width_ratios=[25,1])
ax1 = pl.subplot(gs[0,0])
im=ax1.imshow(xv.T, origin='lower', cmap=matplotlib.cm.jet,extent=(0,100,0,1.0),aspect='auto')
xax1=ax1.get_xaxis()
xax1.set_ticks([])
ax3 = pl.subplot(gs[0,1])
#cbar=pl.colorbar(im,cax=ax3,shrink=0.5)
cbar=pl.colorbar(im,cax=ax3)
ax2 = pl.subplot(gs[1,0])
ax2.plot(np.sin(x))
pl.savefig('test.pdf')
I would like to keep the two plots sharing the same x-axis but I would like to
shrink the colorbar as well. If I use the commented line it does not work. What is the
better, most elegant, way to do that? I think I should use make_axes_locatable at some point, but I do not know how to use it in the proper way without changing the imshow
x-axis length.
Thank you.
You can do it with a lot of control about positioning, using the inset_axes.
import numpy as np
import matplotlib.pyplot as pl
import matplotlib
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
x=np.linspace(0.0,1.0,100)
y=np.linspace(0.0,1.0,100)
xv,yv=np.meshgrid(x,y)
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212, sharex = ax1)
im = ax1.imshow(xv.T, origin='lower',
cmap=matplotlib.cm.jet,extent=(0,100,0,1.0),aspect='auto')
ax2.plot(np.sin(x))
cax = inset_axes(ax1,
width="5%",
height="70%",
bbox_transform=ax1.transAxes,
bbox_to_anchor=(0.025, 0.1, 1.05, 0.95),
loc= 1)
norm = mpl.colors.Normalize(vmin=xv.min(), vmax=xv.max())
cb1 = mpl.colorbar.ColorbarBase(cax,
cmap=matplotlib.cm.jet, norm=norm,
orientation='vertical')
cb1.set_label(u'some cbar')
This is what I get then. Does that help your question?