matplotlib scatter plot add legend without loop and without using seaborn - matplotlib

I receive the error No handles with labels found to put in legend. when running the code below. How can I add a legend to this scatter plot that shows the color definitions (a red dot for A, blue dot for B, green dot for C)?
### Dummy Dataset
x = [0,1,-1,4,0,2,2,4,2]
y = [1,5,9,2,4,2,5,6,1]
cat = ['A','B','B','B','A','C','A','B','B']
df = pd.DataFrame(list(zip(x,y,cat)), columns =['x', 'y', 'cat'])
### Build color definitions
df.loc[:, 'color'] = df.cat
df.color.replace(['A', 'B', 'C'], ['red', 'blue', 'green'], inplace=True)
display(df)
### Plotting
fig = plt.figure(figsize=(5,5), constrained_layout=True)
gs = fig.add_gridspec(2, 1)
ax1 = fig.add_subplot(gs[0, 0])
ax1.scatter(df.x, df.y, edgecolors = 'none', c = df.color)
ax1.legend(loc='upper left', facecolor='white', frameon=1,
framealpha=1, labelspacing=0.2, borderpad=0.25)

It seems like there might not be a way to do this without a simple loop. Based on the procedure here, the following code works.
x = [0,1,-1,4,0,2,2,4,2]
y = [1,5,9,2,4,2,5,6,1]
cat = ['A','B','B','B','A','C','A','B','B']
df = pd.DataFrame(list(zip(x,y,cat)), columns =['x', 'y', 'cat'])
mycolorsdict = {'A':'red', 'B':'blue', 'C':'green'}
fig = plt.figure(figsize=(5,5), constrained_layout=True)
gs = fig.add_gridspec(2, 1)
ax1 = fig.add_subplot(gs[0, 0])
grouped = df.groupby('cat')
for key, group in grouped:
group.plot(ax=ax1, kind='scatter',
x='x', y='y',
label=key, color=mycolorsdict[key])
ax1.legend(loc='upper left', facecolor='white', frameon=1,
framealpha=1, labelspacing=0.2, borderpad=0.25)

Related

Showing Matplotlib pie chart only top 3 item's percentage [duplicate]

I have the following code:
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(123456)
import pandas as pd
df = pd.DataFrame(3 * np.random.rand(4, 4), index=['a', 'b', 'c', 'd'],
columns=['x', 'y','z','w'])
plt.style.use('ggplot')
colors = plt.rcParams['axes.color_cycle']
fig, axes = plt.subplots(nrows=2, ncols=3)
for ax in axes.flat:
ax.axis('off')
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct='%.2f', colors=colors)
ax.set(ylabel='', title=col, aspect='equal')
axes[0, 0].legend(bbox_to_anchor=(0, 0.5))
fig.savefig('your_file.png') # Or whichever format you'd like
plt.show()
Which produce the following:
My question is, how can I remove the label based on a condition. For example I'd only want to display labels with percent > 20%. Such that the labels and value of a,c,d won't be displayed in X, etc.
The autopct argument from pie can be a callable, which will receive the current percentage. So you only would need to provide a function that returns an empty string for the values you want to omit the percentage.
Function
def my_autopct(pct):
return ('%.2f' % pct) if pct > 20 else ''
Plot with matplotlib.axes.Axes.pie
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 6))
for ax, col in zip(axes.flat, df.columns):
ax.pie(df[col], labels=df.index, autopct=my_autopct)
ax.set(ylabel='', title=col, aspect='equal')
fig.tight_layout()
Plot directly with the dataframe
axes = df.plot(kind='pie', autopct=my_autopct, figsize=(8, 6), subplots=True, layout=(2, 2), legend=False)
for ax in axes.flat:
yl = ax.get_ylabel()
ax.set(ylabel='', title=yl)
fig = axes[0, 0].get_figure()
fig.tight_layout()
If you need to parametrize the value on the autopct argument, you'll need a function that returns a function, like:
def autopct_generator(limit):
def inner_autopct(pct):
return ('%.2f' % pct) if pct > limit else ''
return inner_autopct
ax.pie(df[col], labels=df.index, autopct=autopct_generator(20), colors=colors)
For the labels, the best thing I can come up with is using list comprehension:
for ax, col in zip(axes.flat, df.columns):
data = df[col]
labels = [n if v > data.sum() * 0.2 else ''
for n, v in zip(df.index, data)]
ax.pie(data, autopct=my_autopct, colors=colors, labels=labels)
Note, however, that the legend by default is being generated from the first passed labels, so you'll need to pass all values explicitly to keep it intact.
axes[0, 0].legend(df.index, bbox_to_anchor=(0, 0.5))
For labels I have used:
def my_level_list(data):
list = []
for i in range(len(data)):
if (data[i]*100/np.sum(data)) > 2 : #2%
list.append('Label '+str(i+1))
else:
list.append('')
return list
patches, texts, autotexts = plt.pie(data, radius = 1, labels=my_level_list(data), autopct=my_autopct, shadow=True)
You can make the labels function a little shorter using list comprehension:
def my_autopct(pct):
return ('%1.1f' % pct) if pct > 1 else ''
def get_new_labels(sizes, labels):
new_labels = [label if size > 1 else '' for size, label in zip(sizes, labels)]
return new_labels
fig, ax = plt.subplots()
_,_,_ = ax.pie(sizes, labels=get_new_labels(sizes, labels), colors=colors, autopct=my_autopct, startangle=90, rotatelabels=False)

Create multiple stacked bar-lots in one figure

The first image is the figure I'm trying to reproduce, and the second image is the data I have. Does anyone have a clean way to do this with pandas or matplotlib?
Just transpose the DataFrame and use df.plot with the stacked flag set to true:
import pandas as pd
from matplotlib import pyplot as plt
df = pd.DataFrame({'squad': [0.6616, 0.1245, 0.0950],
'quac': [0.83, 0.065, 0.0176],
'quoref': [0.504, 0.340364, 0.1067]})
# Transpose
plot_df = df.T
# plot
ax = plot_df.plot(kind='bar', stacked=True, rot='horizontal')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
ax.set_ylabel("% of Questions")
plt.tight_layout()
plt.show()
You can try this:
data = {'squad':[0.661669, 0.127516, 0.095005],
'quac':[0.930514, 0.065951, 0.017680],
'quoref': [0.504963, 0.340364, 0.106700]}
df = pd.DataFrame(data)
bars_1 = df.iloc[0]
bars_2 = df.iloc[1]
bars_3 = df.iloc[2]
# Heights of bars_1 + bars_2
bars_1_to_2 = np.add(bars_1, bars_2).tolist()
# The position of the bars on the x-axis
r = [0, 1, 2]
plt.figure(figsize = (7, 7))
plt.bar(r, bars_1, color = 'lightgrey', edgecolor = 'white')
plt.bar(r, bars_2, bottom = bars_1, color = 'darkgrey', edgecolor = 'white')
plt.bar(r, bars_3, bottom = bars_1_to_2, color = 'dimgrey', edgecolor = 'white')
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xticks(ticks = r, labels = df.columns)
plt.ylabel('% of Questions')
plt.show()

How to connect the middle-top of bars in barplot with a line, using seaborn or matplotlib?

myDict = {'a':[3,13,18,16,19,9,13,15,0,2],\
'b':[23,14,18,24,19,9,14,13,21,22],\
'c':[38,17,12,15,39,38,23,19,16,16]}
df = pd.DataFrame(myDict)
df_melted = df.melt(value_vars=['a','b','c'])
fig,ax1 = plt.subplots()
sns.barplot(x='variable',y='value',data=df_melted,capsize=0.1,ax=ax1,order=['b','a','c'])
plt.
plt.show()
plt.close()
Use a lineplot but first, you need to keep the same order because lineplot does not have the order argument as barplot. The steps are:
Create a copy of the dataframe
Set variable to be categorical with the order of ['b','a','c']
lineplot in the same ax
The code would be:
order = ['b', 'a', 'c']
df_2 = df_melted.copy()
df_2['variable'] = pd.Categorical(df_2['variable'], order)
df_2.sort_values('variable', inplace=True)
#plot
fig, ax1 = plt.subplots()
sns.barplot(x='variable', y='value', data=df_melted, capsize=0.1, ax=ax1,
order=order)
sns.lineplot(x='variable', y='value', data=df_2,
ax=ax1, color='r', marker='+', linewidth=5, ci=None)
plt.show()
That will produce:

Add vertical line to pandas df.plot of timeseries

I have a time series plot and I would like to add a vertical line to it at event time. If I use this code:
event_time = pd.to_datetime('10/12/2016 06:21:00')
ax = df_stats_t.plot(x = 't', y='t_TI_var_pwr', linestyle='none',...
color = 'black', marker = 'o')
ax1 = ax.twinx()
ax1.axvline(event_time, color='red', linestyle='-')
df_stats_t.plot(x='t',y='t_TI_var_ws',ax=ax1, linestyle='none', ...
color = 'green', marker = 'o')
It takes a subset of the time series starting at event_time and doesn't produce a vertical line.
If I move ax1.axvline(event_time, color='red', linestyle='-') to the bottom, I get the plot I want but the vertical line is still missing.
event_time = pd.to_datetime('10/12/2016 06:21:00')
ax = df_stats_t.plot(x = 't', y='t_TI_var_pwr', linestyle='none',...
color = 'black', marker = 'o')
ax1 = ax.twinx()
df_stats_t.plot(x='t',y='t_TI_var_ws',ax=ax1, linestyle='none',...
color = 'green', marker = 'o')
ax1.axvline(event_time, color='red', linestyle='-')
How can I get the vertical line to discplay at x = event_time for all y values?
works with plt
ax = df_stats_t.plot(x = 't', y='t_TI_var_pwr', linestyle='none', color = 'black', marker = 'o')
ax1 = ax.twinx()
df_stats_t.plot(x='t',y='t_TI_var_ws',ax=ax1, linestyle='none', color = 'green', marker = 'o')
plt.axvline(event_time, color='red', linestyle='-')

How to set common labels with matplotlib

I have a plot obtained in this way:
f, ((ax1, ax2, ax3, ax4), (ax5, ax6, ax7, ax8), (ax9, ax10, ax11, ax12)) = plt.subplots(3, 4, sharex = 'col', sharey = 'row')
ax1.set_title('column1')
ax1.plot([x], [y])
ax5.plot([x1],[y1])
ax9.plot([x2],[y2])
.....
so, I essentially have 3 rows and 4 columns.
I would like to know how is it possible to put commond labels to the x and y axis.
I tried to write
plt_xlabel('x')
plt.ylabel('y')
or
set.xlabel('x')
set.ylabel('y')
but it doesn't work. Can you help me? Is it also possible to put text on the right end side of the plot?
You can do this by iterating over your list of axes:
f, ax_lst = plt.subplots(3, 4, sharex = 'col', sharey = 'row')
for ax_l in ax_lst:
for ax in ax_l:
ax.set_xlabel('x')
ax.set_ylabel('y')