Error while adding error bars to subplots in seaborn - pandas

I have the following example code which I want to plot as bar subplots using seaborn in one figure. I can plot the actual data as bar plots but when i try to add error bars, i get the following error:
AttributeError: 'NoneType' object has no attribute 'seq'
code is:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame({
'A': ['7.5'],
'B': ['2.4']
})
df1_err = pd.DataFrame({
'A': ['2.3'],
'B': ['1.2']
})
df2 = pd.DataFrame({
'A': ['5.5'],
'B': ['4.2']
})
df2_err = pd.DataFrame({
'A': ['1.7'],
'B': ['2.1']
})
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(6, 4), sharey=True)
my_pal = {"A": "green", "B":"orange"}
sns.set_style("whitegrid")
plt.tight_layout()
sns.barplot(data=df1, palette=my_pal, yerr = df1_err, linewidth=2,edgecolor=[".1","0.1"], ax=axes[0])
sns.barplot(data=df2, palette=my_pal, yerr = df2_err, linewidth=2,edgecolor=[".1","0.1"], ax=axes[1])
plt.show()
If I remove yerr from the sns.barplot() commands, it does create bar plots as I want, but I could not manage to add pre-calculated error bars to these subplots. Any help please?

Maybe you mean something like this:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df1 = pd.DataFrame({
'A': ['7.5'],
'B': ['2.4']
}).astype(float)
df1_err = pd.DataFrame({
'A': ['2.3'],
'B': ['1.2']
}).astype(float)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(6, 4), sharey=True)
axes[0].bar(df1.T.index.values, np.squeeze(df1.T.values), yerr=np.squeeze(df1_err.T.values))
plt.show()

Related

How to turn seaborn boxplot fliers on/off with buttons

I want to implement buttons to turn on/off the fliers in a set of seaborn boxplots. I tried to follow the method of changing through the artists mentioned in this link: https://stackoverflow.com/a/36893152/18193150 but was unsuccessful. Appreciate if someone can show me how to do it. Cheers.
This is the code I tried with:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.widgets import Button
x = np.arange(1, 13)
index = np.repeat(x, 40)
np.random.seed(123)
df = pd.DataFrame({'A': np.random.normal(30, 2, len(index)),
'B': np.random.normal(10, 2, len(index))},
index=index)
red_diamond = dict(markerfacecolor='r', marker='D')
blue_dot = dict(markerfacecolor='b', marker='o')
fig=plt.figure(figsize=[10, 5])
ax = sns.boxplot(data=df, x=df.index, y='A', width=0.5, color='red',
boxprops=dict(alpha=.5, label='A'), flierprops=red_diamond)
sns.boxplot(data=df, x=df.index, y='B', width=0.5, color='blue',
boxprops=dict(alpha=.5, label='B'), flierprops=blue_dot, ax=ax)
# button to off boxplot fliers
resetax_off = plt.axes([0.8, 0.02, 0.08, 0.035])
button_off = Button(resetax_off, 'Flier off', color='red',
hovercolor='lightslategrey')
# button to on boxplot fliers
resetax_on = plt.axes([0.6, 0.02, 0.08, 0.035])
button_on = Button(resetax_on, 'Flier on', color='gold',
hovercolor='lightslategrey')
def click_off(event):
for i,artist in enumerate(ax.artists):
line = ax1.line[i+4] #trying to get Line2D for the fliers, 4th in the list of 6
line.set(alpha=0)
fig.canvas.draw_idle()
button_off.on_clicked(click_off)
def click_on(event):
for i,artist in enumerate(ax.artists):
line = ax1.line[i+4] #trying to get Line2D for the fliers
line.set(alpha=1)
fig.canvas.draw_idle()
button_on.on_clicked(click_on)
plt.show()

who to plot stats.probplot in a grid?

I have a data frame with four columns I would like to plot the normality test for each column in a 2*2 grid, but it only plot one figure, and the else is empty.
import random
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2,2, figsize=(15, 6), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .5, wspace=.001)
data = {'col1': [random.randrange(1, 50, 1) for i in range(1000)], 'col2': [random.randrange(1, 50, 1) for i in range(1000)],'col3':[random.randrange(1, 50, 1) for i in range(1000)]
,'col4':[random.randrange(1, 50, 1) for i in range(1000)]}
df = pd.DataFrame(data)
for ax, d in zip(axs.ravel(), df):
ax=stats.probplot(df[d], plot=plt)
#ax.set_title(str(d))
plt.show()
is there a way to construct the subplot and the stats.probplot within a loop?
In your code, you need to change the for loop to this:
for ax, d in zip(axs.ravel(), df):
stats.probplot(df[d], plot=ax)
#ax.set_titl(str(d))
plt.show()
I hope this will help you move on.

pandas scatter plot and groupby does not work

I am trying to do a scatter plot with pandas. Unfortunately kind='scatter' doesn't work. If I change this to kind='line' it works as expected. What can I do to fix this?
for label, d in df.groupby('m'):
d[['te','n']].sort_values(by='n', ascending=False).plot(kind="scatter", x='n', y='te', ax=ax, label='m = '+str(label))```
Use plot.scatter instead:
df = pd.DataFrame({'x': [0, 5, 7,3, 2, 4, 6], 'y': [0, 5, 7,3, 2, 4, 6]})
df.plot.scatter('x', 'y')
Use this snippet if you want individual labels and colours:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame({
'm': np.random.randint(0, 5, size=100),
'x': np.random.uniform(size=100),
'y': np.random.uniform(size=100),
})
fig, ax = plt.subplots()
for label, d in df.groupby('m'):
# generate a random color:
color = list(np.random.uniform(size=3))
d.plot.scatter('x', 'y', label=f'group {label}', ax=ax, c=[color])

How to add droplines to a seaborn scatterplot?

Using the following example code in a Jupyter notebook:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
The resulting plot shows the data points, but I would also like to have vertical drop lines and occasionally horizontal ones as well. To clarify what I mean by droplines, here is a mockup of the actual vs. the desired output:
Update: A little more complex input that makes it harder to manually draw the lines:
import pandas as pd
import seaborn as sns
import numpy as np
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
df = pd.DataFrame(np.random.rand(20, 3), columns=['a', 'b', 'c'])
df['d'] = ['apples', 'bananas', 'cherries', 'dates'] * 5
sns.set()
g = sns.relplot(data=df, x='a', y='b', hue='c', col='d', col_wrap=2, kind='scatter');
g.set(xlim=(0, 1))
g.set(ylim=(0, 1));
There are several ways to plot vertical/horizontal lines. One of the is to use hlines or vlines. This can be done using a loop for sake of ease.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np; np.random.seed(121)
fig, ax = plt.subplots()
df = pd.DataFrame(np.random.rand(5, 2), columns=['a', 'b'])
sns.set()
g = sns.relplot(data=df, x='a', y='b', kind='scatter', color='blue', ax=ax);
for x, y in zip(df['a'], df['b']):
ax.hlines(y, 0, x, color='blue')
ax.vlines(x, 0, y, color='blue')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.close(g.fig)

Pandas bar plot -- specify bar color by column

Is there a simply way to specify bar colors by column name using Pandas DataFrame.plot(kind='bar') method?
I have a script that generates multiple DataFrames from several different data files in a directory. For example it does something like this:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
df1.plot(kind='bar', ax=plt.subplot(121))
df2.plot(kind='bar', ax=plt.subplot(122))
plt.show()
With the following output:
Unfortunately, the column colors aren't consistent for each label in the different plots. Is it possible to pass in a dictionary of (filenames:colors), so that any particular column always has the same color. For example, I could imagine creating this by zipping up the filenames with the Matplotlib color_cycle:
data_files = ['a', 'b', 'c', 'd']
colors = plt.rcParams['axes.color_cycle']
print zip(data_files, colors)
[('a', u'b'), ('b', u'g'), ('c', u'r'), ('d', u'c')]
I could figure out how to do this directly with Matplotlib: I just thought there might be a simpler, built-in solution.
Edit:
Below is a partial solution that works in pure Matplotlib. However, I'm using this in an IPython notebook that will be distributed to non-programmer colleagues, and I'd like to minimize the amount of excessive plotting code.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
mpl_colors = plt.rcParams['axes.color_cycle']
colors = dict(zip(data_files, mpl_colors))
def bar_plotter(df, colors, sub):
ncols = df.shape[1]
width = 1./(ncols+2.)
starts = df.index.values - width*ncols/2.
plt.subplot(120+sub)
for n, col in enumerate(df):
plt.bar(starts + width*n, df[col].values, color=colors[col],
width=width, label=col)
plt.xticks(df.index.values)
plt.grid()
plt.legend()
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
bar_plotter(df1, colors, 1)
bar_plotter(df2, colors, 2)
plt.show()
You can pass a list as the colors. This will require a little bit of manual work to get it to line up, unlike if you could pass a dictionary, but may be a less cluttered way to accomplish your goal.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
color_list = ['b', 'g', 'r', 'c']
df1.plot(kind='bar', ax=plt.subplot(121), color=color_list)
df2.plot(kind='bar', ax=plt.subplot(122), color=color_list[1:])
plt.show()
EDIT
Ajean came up with a simple way to return a list of the correct colors from a dictionary:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pds
data_files = ['a', 'b', 'c', 'd']
color_list = ['b', 'g', 'r', 'c']
d2c = dict(zip(data_files, color_list))
df1 = pds.DataFrame(np.random.rand(4,3), columns=data_files[:-1])
df2 = pds.DataFrame(np.random.rand(4,3), columns=data_files[1:])
df1.plot(kind='bar', ax=plt.subplot(121), color=map(d2c.get,df1.columns))
df2.plot(kind='bar', ax=plt.subplot(122), color=map(d2c.get,df2.columns))
plt.show()
Pandas version 1.1.0 makes this easier. You can pass a dictionary to specify different color for each column in the pandas.DataFrame.plot.bar() function:
Here is an example:
df1 = pd.DataFrame({'a': [1.2, .8, .9], 'b': [.2, .9, .7]})
df2 = pd.DataFrame({'b': [0.2, .5, .4], 'c': [.5, .6, .7], 'd': [1.1, .6, .7]})
color_dict = {'a':'green', 'b': 'red', 'c':'blue', 'd': 'cyan'}
df1.plot.bar(color = color_dict)
df2.plot.bar(color = color_dict)