Ipython - plot pie chart from series with series table next to it - pandas

I have a matplotlib pie chart in Ipython notebook with a plt.text series table posted next to it. The problem is the table is formated as series output and not as a nice table. What am I doing wrong?
sumByGroup = df['dollar charge'].groupby(df['location']).sum().astype('int')
sumByGroup.plot(kind='pie', title='DOLLARS', autopct='%1.1f%%')
plt.axis('off')
plt.text(2, -0.5, sumByGroup, size=12)

I think the problem is that you're calling groupby on df['dollar change'] rather than the df as a whole. Try this instead,
sumByGroup = df.groupby(df['location']).sum().astype('int')
sumByGroup.plot(y='dollar charge', kind='pie', title='DOLLARS', autopct='%1.1f%%')
plt.axis('off')
plt.text(2, -0.5, sumByGroup, size=12)
Full working example with made up data.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
n = 20
locations = ['MD', 'DC', 'VA', 'NC', 'NY']
df = pd.DataFrame({'dollar charge': np.random.randint(28, 53, n),
'location': np.random.choice(locations, n),
'Col A': np.random.randint(-5, 5, n),
'Col B': np.random.randint(-5, 5, n)})
sumByGroup = df.groupby(df['location']).sum()
fig, ax = plt.subplots()
sumByGroup.plot(y='dollar charge', kind='pie', title='DOLLARS',
autopct='%1.1f%%', legend=False, ax=ax)
ax.axis('off')
ax.text(2, -0.5, sumByGroup, size=12)
ax.set_aspect('equal')

Related

How to turn seaborn boxplot fliers on/off with buttons

I want to implement buttons to turn on/off the fliers in a set of seaborn boxplots. I tried to follow the method of changing through the artists mentioned in this link: https://stackoverflow.com/a/36893152/18193150 but was unsuccessful. Appreciate if someone can show me how to do it. Cheers.
This is the code I tried with:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from matplotlib.widgets import Button
x = np.arange(1, 13)
index = np.repeat(x, 40)
np.random.seed(123)
df = pd.DataFrame({'A': np.random.normal(30, 2, len(index)),
'B': np.random.normal(10, 2, len(index))},
index=index)
red_diamond = dict(markerfacecolor='r', marker='D')
blue_dot = dict(markerfacecolor='b', marker='o')
fig=plt.figure(figsize=[10, 5])
ax = sns.boxplot(data=df, x=df.index, y='A', width=0.5, color='red',
boxprops=dict(alpha=.5, label='A'), flierprops=red_diamond)
sns.boxplot(data=df, x=df.index, y='B', width=0.5, color='blue',
boxprops=dict(alpha=.5, label='B'), flierprops=blue_dot, ax=ax)
# button to off boxplot fliers
resetax_off = plt.axes([0.8, 0.02, 0.08, 0.035])
button_off = Button(resetax_off, 'Flier off', color='red',
hovercolor='lightslategrey')
# button to on boxplot fliers
resetax_on = plt.axes([0.6, 0.02, 0.08, 0.035])
button_on = Button(resetax_on, 'Flier on', color='gold',
hovercolor='lightslategrey')
def click_off(event):
for i,artist in enumerate(ax.artists):
line = ax1.line[i+4] #trying to get Line2D for the fliers, 4th in the list of 6
line.set(alpha=0)
fig.canvas.draw_idle()
button_off.on_clicked(click_off)
def click_on(event):
for i,artist in enumerate(ax.artists):
line = ax1.line[i+4] #trying to get Line2D for the fliers
line.set(alpha=1)
fig.canvas.draw_idle()
button_on.on_clicked(click_on)
plt.show()

How to add labels to sets of seaborn boxplot

I have 2 sets of boxplots, one set in blue color and another in red color. I want the legend to show the label for each set of boxplots, i.e.
Legend:
-blue box- A, -red box- B
Added labels='A' and labels='B' within sns.boxplot(), but didn't work with error message "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument". How do I add the labels?
enter image description here
code for the inserted image:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
x = list(range(1,13))
n = 40
index = [item for item in x for i in range(n)]
np.random.seed(123)
df = pd.DataFrame({'A': np.random.normal(30, 2, len(index)),
'B': np.random.normal(10, 2, len(index))},
index=index)
red_diamond = dict(markerfacecolor='r', marker='D')
blue_dot = dict(markerfacecolor='b', marker='o')
plt.figure(figsize=[10,5])
ax = plt.gca()
ax1 = sns.boxplot( x=df.index, y=df['A'], width=0.5, color='red', \
boxprops=dict(alpha=.5), flierprops=red_diamond, labels='A')
ax2 = sns.boxplot( x=df.index, y=df['B'], width=0.5, color='blue', \
boxprops=dict(alpha=.5), flierprops=blue_dot, labels='B')
plt.ylabel('Something')
plt.legend(loc="center", fontsize=8, frameon=False)
plt.show()
Here are the software versions I am using: seaborn version 0.11.2. matplotlib version 3.5.1. python version 3.10.1
The following approach sets a label via the boxprops, and creates a legend using part of ax.artists. (Note that ax, ax1 and ax2 of the question's code are all pointing to the same subplot, so here only ax is used.)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
x = np.arange(1, 13)
index = np.repeat(x, 40)
np.random.seed(123)
df = pd.DataFrame({'A': np.random.normal(30, 2, len(index)),
'B': np.random.normal(10, 2, len(index))},
index=index)
red_diamond = dict(markerfacecolor='r', marker='D')
blue_dot = dict(markerfacecolor='b', marker='o')
plt.figure(figsize=[10, 5])
ax = sns.boxplot(data=df, x=df.index, y='A', width=0.5, color='red',
boxprops=dict(alpha=.5, label='A'), flierprops=red_diamond)
sns.boxplot(data=df, x=df.index, y='B', width=0.5, color='blue',
boxprops=dict(alpha=.5, label='B'), flierprops=blue_dot, ax=ax)
ax.set_ylabel('Something')
handles, labels = ax.get_legend_handles_labels()
handles = [h for h, lbl, prev in zip(handles, labels, [None] + labels) if lbl != prev]
ax.legend(handles=handles, loc="center", fontsize=8, frameon=False)
plt.show()
Alternative approaches could be:
pd.melt the dataframe to long form, so hue could be used; a problem here is that then the legend wouldn't take the alpha from the boxprops into account; also setting different fliers wouldn't be supported
create a legend from custom handles

who to plot stats.probplot in a grid?

I have a data frame with four columns I would like to plot the normality test for each column in a 2*2 grid, but it only plot one figure, and the else is empty.
import random
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2,2, figsize=(15, 6), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .5, wspace=.001)
data = {'col1': [random.randrange(1, 50, 1) for i in range(1000)], 'col2': [random.randrange(1, 50, 1) for i in range(1000)],'col3':[random.randrange(1, 50, 1) for i in range(1000)]
,'col4':[random.randrange(1, 50, 1) for i in range(1000)]}
df = pd.DataFrame(data)
for ax, d in zip(axs.ravel(), df):
ax=stats.probplot(df[d], plot=plt)
#ax.set_title(str(d))
plt.show()
is there a way to construct the subplot and the stats.probplot within a loop?
In your code, you need to change the for loop to this:
for ax, d in zip(axs.ravel(), df):
stats.probplot(df[d], plot=ax)
#ax.set_titl(str(d))
plt.show()
I hope this will help you move on.

Matplotlib--scatter plot with half filled markers

Question: Using a scatter plot in matplotlib, is there a simple way get a half-filled marker?
I know half-filled markers can easily be done using a line plot, but I would like to use 'scatter' because I want to use marker size and color (i.e., alternate marker face color) to represent other data. (I believe this will be easier with a scatter plot since I want to automate making a large number of plots from a large data set.)
I can't seem to make half-filled markers properly using a scatter plot. That is to say, instead of a half-filled marker, the plot shows half of a marker. I've been using matplotlib.markers.MarkerStyle, but that seems to only get me halfway there. I'm able to get following output using the code below.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.markers import MarkerStyle
plt.scatter(1, 1, marker=MarkerStyle('o', fillstyle='full'), edgecolors='k', s=500)
plt.scatter(2, 2, marker=MarkerStyle('o', fillstyle='left'), edgecolors='k', s=500)
plt.scatter(3, 3, marker=MarkerStyle('o', fillstyle='right'), edgecolors='k', s=500)
plt.scatter(4, 4, marker=MarkerStyle('o', fillstyle='top'), edgecolors='k', s=500)
plt.scatter(5, 5, marker=MarkerStyle('o', fillstyle='bottom'), edgecolors='k', s=500)
plt.show()
As mentioned in the comments, I don't see why you have to use plt.scatter but if you want to, you can fake a combined marker:
from matplotlib.markers import MarkerStyle
from matplotlib import pyplot as plt
#data generation
import pandas as pd
import numpy as np
np.random.seed(123)
n = 10
df = pd.DataFrame({"X": np.random.randint(1, 20, n),
"Y": np.random.randint(10, 30, n),
"S": np.random.randint(50, 500, n),
"C1": np.random.choice(["red", "blue", "green"], n),
"C2": np.random.choice(["yellow", "grey"], n)})
fig, ax = plt.subplots()
ax.scatter(df.X, df.Y, s=df.S, c=df.C1, edgecolor="black", marker=MarkerStyle("o", fillstyle="right"))
ax.scatter(df.X, df.Y, s=df.S, c=df.C2, edgecolor="black", marker=MarkerStyle("o", fillstyle="left"))
plt.show()
Sample output:
This works, of course, also for continuous data:
from matplotlib import pyplot as plt
from matplotlib.markers import MarkerStyle
import pandas as pd
import numpy as np
np.random.seed(123)
n = 10
df = pd.DataFrame({"X": np.random.randint(1, 20, n),
"Y": np.random.randint(10, 30, n),
"S": np.random.randint(100, 1000, n),
"C1": np.random.randint(1, 100, n),
"C2": np.random.random(n)})
fig, ax = plt.subplots(figsize=(10,8))
im1 = ax.scatter(df.X, df.Y, s=df.S, c=df.C1, edgecolor="black", marker=MarkerStyle("o", fillstyle="right"), cmap="autumn")
im2 = ax.scatter(df.X, df.Y, s=df.S, c=df.C2, edgecolor="black", marker=MarkerStyle("o", fillstyle="left"), cmap="winter")
cbar1 = plt.colorbar(im1, ax=ax)
cbar1.set_label("right half", rotation=90)
cbar2 = plt.colorbar(im2, ax=ax)
cbar2.set_label("left half", rotation=90)
plt.show()
Sample output:
But be reminded that plt.plot with marker definitions might be faster for large-scale datasets: The plot function will be faster for scatterplots where markers don't vary in size or color.

how to plot lines linking medians of multiple violin distributions in seaborn?

I struggle hard to succeed in plotting a dot-line between the median values (and min and max) per type of stacked violin distributions.
I tried superposing a violin plot with a seaborn.lineplot but it failed. I'm not sure with this approach that I can draw dot-lines and also link min and max of distributions of the same type. I also tried to use seaborn.lineplot but here the challenge is to plot min and max of the distribution at each x-axis value.
Here is a example dataset and the code for the violin plot in seaborn
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
x=[0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.2,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.4,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.6,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8]
cate=['a','a','a','a','b','b','b','b','c','c','c','c','a','a','a','a','b','b','b','b','c','c','c','c','a','a','a','a','b','b','b','b','c','c','c','c','a','a','a','a','b','b','b','b','c','c','c','c']
y=[1.1,1.12,1.13,1.13,3.1,3.12,3.13,3.13,5.1,5.12,5.13,5.13,2.2,2.22,2.25,2.23,4.2,4.22,4.25,4.23,6.2,6.22,6.25,6.23,2.2,2.22,2.24,2.23,4.2,4.22,4.24,4.23,6.2,6.22,6.24,6.23,1.1,1.13,1.14,1.12,3.1,3.13,3.14,3.12,5.1,5.13,5.14,5.12]
my_pal =['red','green', 'purple']
df = pd.DataFrame({'x': x, 'Type': cate, 'y': y})
ax=sns.catplot(y='y', x='x',data=df, hue='Type', palette=my_pal, kind="violin",dodge =False)
sns.lineplot(y='y', x='x',data=df, hue='Type', palette=my_pal, ci=100,legend=False)
plt.show()
but it plots line only on a reduce part of the left of the plot. Is there a trick to superpose lineplot with violin plot?
For the line plot, 'x' is considered numerical. However, for the violin plot 'x' is considered categorical (positioned at 0, 1, 2, ...).
A solution is to convert 'x' to strings to have both plots consider it as categorical.
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
my_pal = ['red', 'green', 'purple']
N = 40
df = pd.DataFrame({'x': np.random.randint(1, 6, N*3) * 0.2,
'y': np.random.uniform(0, 1, N*3) + np.tile([2, 4, 6], N),
'Type': np.tile(list('abc'), N)})
df['x'] = [f'{x:.1f}' for x in df['x']]
ax = sns.violinplot(y='y', x='x', data=df, hue='Type', palette=my_pal, dodge=False)
ax = sns.lineplot(y='y', x='x', data=df, hue='Type', palette=my_pal, ci=100, legend=False, ax=ax)
ax.margins(0.15) # slightly more padding for x and y axis
ax.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.show()