Select appropriate colors in stacked Seaborn barplot - pandas

I want to create a stacked barplot using Seaborn with this MiltiIndex DataFrame
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
The code I'm using for the plot is:
fontP = FontProperties()
fontP.set_size('medium')
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
kwargs = {'alpha':0.5}
plt.figure(figsize=(12, 9))
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[0]], '#'],
color=colors[df2.index[0][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[1]], '#'],
color=colors[df2.index[1][1]], **kwargs)
sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[2]], '#'],
color=colors[df2.index[2][1]], **kwargs)
bottom_plot = sns.barplot(x=df2.index.get_level_values(0).unique(),
y=df2.loc[pd.IndexSlice[:, df2.index[3]], '#'],
color=colors[df2.index[3][1]], **kwargs)
bar1 = plt.Rectangle((0, 0), 1, 1, fc='green', edgecolor="None")
bar2 = plt.Rectangle((0, 0), 0, 0, fc='yellow', edgecolor="None")
bar3 = plt.Rectangle((0, 0), 2, 2, fc='red', edgecolor="None")
bar4 = plt.Rectangle((0, 0), 3, 3, fc='blue', edgecolor="None")
l = plt.legend([bar1, bar2, bar3, bar4], [
"TE", "M",
'MR', 'SS'
],
bbox_to_anchor=(0.95, 1),
loc='upper left',
prop=fontP)
l.draw_frame(False)
sns.despine()
bottom_plot.set_ylabel("#")
axes = plt.gca()
axes.yaxis.grid()
And I get:
My problem is the order of the colors in the second bar ('TTo'), I want the colors to be automatically selected based on the level 1 index value (['TE', 'SS', 'M', 'MR']) so that they are ordered correctly. Further down the one with the highest value with its corresponding color, in front the next one with the next highest value and its color and so on, as the first bar shows ('JC).
Maybe there is a simpler way to do this in Seaborn than the one I'm using...

I'm not sure how to create such a plot with seaborn. Here is a way to create it with a loop through the rows and adding one matplotlib bar at each step:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
sns.set()
header = pd.MultiIndex.from_product([['#'],
['TE', 'SS', 'M', 'MR']])
dat = ([[100, 20, 21, 35], [100, 12, 5, 15]])
df = pd.DataFrame(dat, index=['JC', 'TTo'], columns=header)
df = df.stack()
df = df.sort_values('#', ascending=False).sort_index(level=0, sort_remaining=False)
colors = {'TE': 'green', 'SS': 'blue', 'M': 'yellow', 'MR': 'red'}
prev_index0 = None
for (index0, index1), quantity in df.itertuples():
if index0 != prev_index0:
bottom = 0
plt.bar(index0, quantity, fc=colors[index1], ec='none', bottom=bottom, label=index1)
bottom += quantity
prev_index0 = index0
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c) for c in colors]
plt.legend(handles=legend_handles)
plt.show()
To plot the bars back to front without stacking, the code can be simplified:
colors = {'TE': 'forestgreen', 'SS': 'cornflowerblue', 'M': 'gold', 'MR': 'crimson'}
for (index0, index1), quantity in df.itertuples():
plt.bar(index0, quantity, fc=colors[index1], ec='none', label=index1)
legend_handles = [plt.Rectangle((0, 0), 0, 0, color=colors[c], label=c, ec='black') for c in colors]
plt.legend(handles=legend_handles, bbox_to_anchor=(1.02, 1.02), loc='upper left')
plt.tight_layout()

Related

pandas sort values in plot with groupby

I'm working on this dataset and I've this code for the plot below
x = df2.groupby(by = ['LearnCode', 'Age']).size()
chart = x.unstack()
axs = chart.plot.barh(subplots=True,figsize=(20,50), layout=(9,1), legend=False, title=chart.columns.tolist())
ax_flat = axs.flatten()
for ax in ax_flat:
ax.yaxis.label.set_visible(False)
How can I sort the values of each category for every plot alone?
You can do it, but probably, you'll have to plot each subplot separately.
df2 = pd.DataFrame({'LearnCode': ['A', 'B', 'B', 'B', 'B', 'A', 'C', 'C', 'B', 'A', 'C', 'C', 'B'],
'Age': [18, 18, 18, 18, 18, 18, 18, 24, 24, 24, 24, 24, 24]})
x = df2.groupby(by = ['LearnCode', 'Age']).size()
chart = x.unstack()
f, axs = plt.subplots(nrows=len(chart.columns), ncols=1, figsize=(20,10), sharex='col')
#to each subplot to have different color
colors = plt.rcParams["axes.prop_cycle"]()
for i, age in enumerate(chart):
chart[age].sort_values().plot.barh(title = age,
ax = axs[i],
color = next(colors)["color"],
xlabel = '')
PS. For me, it's better to have original graph than graph like this (it's much easier to track differences between groups).

matplotlib grouped bar color change

I am trying to compare group A and group B using grouped bar, but group A must have the same colors and group B must have different colors and legends. I somehow created graph, but not sure how to change the color of each group B bar graphs..
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
labels = ['M1', 'A1', 'M2', 'A2', 'M3', 'A3', 'M4', 'A4', 'M5', 'A5']
A_group = [20, 34, 30, 35, 27, 17, 64, 23, 47, 52]
B_group = [25, 32, 34, 20, 25, 76, 33, 54, 16, 21]
x = np.arange(len(labels)) # the label locations
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, A_group, width, label='A group')
rects2 = ax.bar(x + width/2, B_group, width, label='B group')
# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Accuracy')
ax.set_title('Test')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
ax.bar_label(rects1, padding=3)
ax.bar_label(rects2, padding=3)
plt.xticks(rotation=30, ha='right')
plt.ylim(0, 100)
fig.tight_layout()
plt.show()
Now my graph looks like this:
I want to make my graph like this. Below is an example using powerpoint.
Any helps will be appreciated. Thank you in advance.
Try drawing the bars in B_group one by one:
fig, ax = plt.subplots()
ax.bar(x-width/2, A_group, width=width,label='A Group')
cmap = plt.get_cmap('tab20')
ax.set_prop_cycle(color=[cmap(k) for k in x+1])
for i in x:
ax.bar(i+width/2, B_group[i], width=width)
Output:

Geopandas consistent user defined color scheme for subplots

I am new to geopandas and I am having trouble creating choropleth subplots with consistent bins. I need to create a consistent user defined color scheme across all subplots.
I have followed the examples below:
matplotlib geopandas plot chloropleth with set bins for colorscheme
https://github.com/geopandas/geopandas/issues/1019
While I am able to reproduce both examples, I get very strange behavior with my own data. Below is a toy example that replicates my problem.
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mapclassify import Quantiles, UserDefined
import os
# Note you can read directly from the URL
gdf = gpd.read_file('https://opendata.arcgis.com/datasets/8d3a9e6e7bd445e2bdcc26cdf007eac7_4.geojson')
#gdf.plot()
gdf.shape
gdf.columns
gdf['rgn15nm'].head(9)
d = {
'rgn15nm': ['North East', 'North West', 'Yorkshire and The Humber', 'East Midlands', 'West Midlands', 'East of England', 'London', 'South East', 'South West'],
'1980' : pd.Series([0, 1, 0, 0, 0, 0, 0, 0, 0]),
'2000' : pd.Series([1, 1, 1, 0, 0, 0, 0, 0, 0]),
'2020' : pd.Series([1, 1, 10, 3, 1, 0, 0, 0, 1])
}
df = pd.DataFrame(d)
The data looks like this:
gdf = gdf.merge(df, on='rgn15nm')
# Define bins
gdf['2020'].describe()
bins= UserDefined(gdf['2020'], bins=[0,1,2,3,4,5,6,7,8,9,10]).bins
bins
# create a new column with the discretized values and plot that col
# repeat for each view
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
gdf.assign(cl=UserDefined(gdf['1980'].dropna(), bins).yb).plot(column='cl', ax=ax1, cmap='OrRd', legend = True )
gdf.assign(cl=UserDefined(gdf['2000'].dropna(), bins).yb).plot(column='cl', ax=ax2, cmap='OrRd', legend = True)
gdf.assign(cl=UserDefined(gdf['2020'].dropna(), list(bins)).yb).plot(column='cl', ax=ax3, cmap='OrRd', legend = True)
for ax in (ax1,ax2,ax3,):
ax.axis('off')
Clearly, the color scheme is not the same across subplots. What I mean by this is that 'Northwest' (the only region highlighted in the 1980 subplot) had the same value of 1 in all years 1980, 2000 and 2020. Yet, this region shows in different colors across the 3 subplots, despite the value being constant. I want "Northwest" to show in the same color (that of the subplot for 2020) across all 3 subplots.
I also tried this:
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
for ax in (ax1,ax2,ax3):
ax.axis('off')
But got exactly the same figure as immediately above (see below)
Does any one have any insight? I want a consistent color scheme across all 3 subplots.
So ultimately the solution was using the "norm" option. Following this example: Geopandas userdefined color scheme drops colors. See below:
from matplotlib.colors import Normalize
bins= UserDefined(gdf['2020'], bins=[0,1,2,3,4,5,6,7,8,9,10]).bins
bins
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
for ax in (ax1,ax2,ax3):
ax.axis('off')
The result is what I wanted:
Expected Graph
or as suggested by Paul H:
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
for ax in (ax1,ax2,ax3):
ax.axis('off')

matplotlib histogram with equal bars width

I use a histogram to display the distribution. Everything works fine if the spacing of the bins is uniform. But if the interval is different, then the bar width is appropriate (as expected). Is there a way to set the width of the bar independent of the size of the bins ?
This is what i have
This what i trying to draw
from matplotlib import pyplot as plt
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.set_xticks(my_bins)
ax1.hist(my_data, my_bins, histtype='bar', rwidth=0.9,)
fig1.show()
I cannot mark your question as a duplicate, but I think my answer to this question might be what you are looking for?
I'm not sure how you'll make sense of the result, but you can use numpy.histogram to calculate the height of your bars, then plot those directly against an arbitrary x-scale.
x = np.random.normal(loc=50, scale=200, size=(2000,))
bins = [0,1,10,20,30,40,50,75,100]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(x, bins=bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(x, bins=bins)
ax.bar(range(len(bins)-1),h, width=1, edgecolor='k')
EDIT Here's with the adjustment to the x-tick labels so that the correspondence is easier to see.
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(my_data, bins=my_bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(my_data, bins=my_bins)
ax.bar(range(len(my_bins)-1),h, width=1, edgecolor='k')
ax.set_xticks(range(len(my_bins)-1))
ax.set_xticklabels(my_bins[:-1])

matplotlib advanced stacked bar

matplotlib plot bars
It can be regular like http://matplotlib.org/examples/api/barchart_demo.html
Let's define this as [M, F]
It can be stacked like http://matplotlib.org/examples/pylab_examples/bar_stacked.html
Let's define this as [M + F]
Now how to plot [M, F + other]
If I understand you correctly, you want to have a stack plot with more than two elements stacked? If yes, that goes pretty straight forward as in the example you posted:
#!/usr/bin/env python
# a stacked bar plot with errorbars
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = [20, 35, 30, 35, 27]
womenMeans = [25, 32, 34, 20, 25]
otherMeans = [5, 2, 4, 8, 5]
menStd = [2, 3, 4, 1, 2]
womenStd = [3, 5, 2, 3, 3]
otherStd = [1, 1, 1, 1, 1]
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, color='r', yerr=womenStd)
p2 = plt.bar(ind, womenMeans, width, color='y',
bottom=menMeans, yerr=menStd)
p3 = plt.bar(ind, otherMeans, width, color='b',
bottom=[menMeans[j] + womenMeans[j] for j in range(len(menMeans)) ],
yerr=otherStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
plt.yticks(np.arange(0,81,10))
plt.legend( (p1[0], p2[0], p3[0]), ('Men', 'Women', 'Other') )
plt.show()