Create multiple stacked bar-lots in one figure - pandas

The first image is the figure I'm trying to reproduce, and the second image is the data I have. Does anyone have a clean way to do this with pandas or matplotlib?

Just transpose the DataFrame and use df.plot with the stacked flag set to true:
import pandas as pd
from matplotlib import pyplot as plt
df = pd.DataFrame({'squad': [0.6616, 0.1245, 0.0950],
'quac': [0.83, 0.065, 0.0176],
'quoref': [0.504, 0.340364, 0.1067]})
# Transpose
plot_df = df.T
# plot
ax = plot_df.plot(kind='bar', stacked=True, rot='horizontal')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
ax.set_ylabel("% of Questions")
plt.tight_layout()
plt.show()

You can try this:
data = {'squad':[0.661669, 0.127516, 0.095005],
'quac':[0.930514, 0.065951, 0.017680],
'quoref': [0.504963, 0.340364, 0.106700]}
df = pd.DataFrame(data)
bars_1 = df.iloc[0]
bars_2 = df.iloc[1]
bars_3 = df.iloc[2]
# Heights of bars_1 + bars_2
bars_1_to_2 = np.add(bars_1, bars_2).tolist()
# The position of the bars on the x-axis
r = [0, 1, 2]
plt.figure(figsize = (7, 7))
plt.bar(r, bars_1, color = 'lightgrey', edgecolor = 'white')
plt.bar(r, bars_2, bottom = bars_1, color = 'darkgrey', edgecolor = 'white')
plt.bar(r, bars_3, bottom = bars_1_to_2, color = 'dimgrey', edgecolor = 'white')
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xticks(ticks = r, labels = df.columns)
plt.ylabel('% of Questions')
plt.show()

Related

Adding coastlines to GOES data

I have a lot of GOES data that I'd like to plot with coastlines. I've added matplotlib axes with the proper project of each data and plotted the data. I tried to add coastlines with cartopy but they do not appear.
import metpy
import xarray as xr
import matplotlib.pyplot as plt
def get_projection(ds, variable):
dat = ds.metpy.parse_cf(variable)
return dat.metpy.cartopy_crs
aod = xr.open_dataset('GOES/AODC/2021/001/00/OR_ABI-L2-AODC-M6_G16_s20210010001176_e20210010003549_c20210010006090.nc')
albedo = xr.open_dataset('GOES/LSAC/2021/230/21/OR_ABI-L2-LSAC-M6_G16_s20212302131172_e20212302133545_c20212302135044.nc')
rainfall = xr.open_dataset('GOES/RRQPEF/2021/001/00/OR_ABI-L2-RRQPEF-M6_G16_s20210010000209_e20210010009517_c20210010010020.nc')
precipitable_water = xr.open_dataset('GOES/TPWC/2021/001/00/OR_ABI-L2-TPWC-M6_G16_s20210010001176_e20210010003549_c20210010005512.nc')
aod_proj = get_projection(aod, 'AOD')
albedo_proj = get_projection(albedo, 'LSA')
rainfall_proj = get_projection(rainfall, 'RRQPE')
pwv_proj = get_projection(precipitable_water, 'TPW')
figsize = (11, 8.5)
fig = plt.figure(figsize=figsize)
ax1 = fig.add_subplot(2, 2, 1, projection=aod_proj)
ax2 = fig.add_subplot(2, 2, 2, projection=albedo_proj)
ax3 = fig.add_subplot(2, 2, 3, projection=rainfall_proj)
ax4 = fig.add_subplot(2, 2, 4, projection=pwv_proj)
aod['AOD'].plot(vmin=0, vmax=1, ax=ax1, transform=aod_proj)
albedo['LSA'].plot(ax=ax2, transform=albedo_proj)
rainfall['RRQPE'].plot(ax=ax3, transform=rainfall_proj)
precipitable_water['TPW'].plot(ax=ax4, transform=pwv_proj)
for ax in [ax1, ax2, ax3, ax4]:
ax.coastlines(resolution='50m', color='red', linewidth=1)
fig.tight_layout()
What I get is an image with no coastlines. I'm assuming that this is a projection issue but I don't know where the problem would be.

Changing the Matplotlib GridSpec properties after generating the subplots

Suppose something comes up in my plot that mandates that I change the height ratio between two subplots that I've generated within my plot. I've tried changing GridSpec's height ratio to no avail.
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
fig = plt.figure()
gs = GridSpec(2, 1, height_ratios=[2, 1])
ax1 = fig.add_subplot(gs[0])
ax1 = fig.axes[0]
ax2 = fig.add_subplot(gs[1])
ax2 = fig.axes[1]
ax1.plot([0, 1], [0, 1])
ax2.plot([0, 1], [1, 0])
gs.height_ratios = [2, 5]
The last line has no effect on the plot ratio.
In my actual code, it is not feasible without major reworking to set the height_ratios to 2:5 ahead of time.
How do I get this to update like I want?
The axes of relevant subplots can be manipulated and adjusted to get new height ratios.
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
fig = plt.figure()
gs = GridSpec(2, 1, height_ratios=[2, 1]) #nrows, ncols
ax1 = fig.add_subplot(gs[0])
ax1 = fig.axes[0]
ax2 = fig.add_subplot(gs[1])
ax2 = fig.axes[1]
ax1.plot([0, 1], [0, 1])
ax2.plot([0, 1], [1, 0])
# new height ratio: 2:5 is required for the 2 subplots
rw, rh = 2, 5
# get dimensions of the 2 axes
box1 = ax1.get_position()
box2 = ax2.get_position()
# current dimensions
w1,h1 = box1.x1-box1.x0, box1.y1-box1.y0
w2,h2 = box2.x1-box2.x0, box2.y1-box2.y0
top1 = box1.y0+h1
#top2 = box2.y0+h2
full_h = h1+h2 #total height
# compute new heights for each axes
new_h1 = full_h*rw/(rw + rh)
new_h2 = full_h*rh/(rw + rh)
#btm1,btm2 = box1.y0, box2.y0
new_bottom1 = top1-new_h1
# finally, set new location/dimensions of the axes
ax1.set_position([box1.x0, new_bottom1, w1, new_h1])
ax2.set_position([box2.x0, box2.y0, w2, new_h2])
plt.show()
The output for ratio: (2, 5):
The output for (2, 10):

matplotlib scatter plot add legend without loop and without using seaborn

I receive the error No handles with labels found to put in legend. when running the code below. How can I add a legend to this scatter plot that shows the color definitions (a red dot for A, blue dot for B, green dot for C)?
### Dummy Dataset
x = [0,1,-1,4,0,2,2,4,2]
y = [1,5,9,2,4,2,5,6,1]
cat = ['A','B','B','B','A','C','A','B','B']
df = pd.DataFrame(list(zip(x,y,cat)), columns =['x', 'y', 'cat'])
### Build color definitions
df.loc[:, 'color'] = df.cat
df.color.replace(['A', 'B', 'C'], ['red', 'blue', 'green'], inplace=True)
display(df)
### Plotting
fig = plt.figure(figsize=(5,5), constrained_layout=True)
gs = fig.add_gridspec(2, 1)
ax1 = fig.add_subplot(gs[0, 0])
ax1.scatter(df.x, df.y, edgecolors = 'none', c = df.color)
ax1.legend(loc='upper left', facecolor='white', frameon=1,
framealpha=1, labelspacing=0.2, borderpad=0.25)
It seems like there might not be a way to do this without a simple loop. Based on the procedure here, the following code works.
x = [0,1,-1,4,0,2,2,4,2]
y = [1,5,9,2,4,2,5,6,1]
cat = ['A','B','B','B','A','C','A','B','B']
df = pd.DataFrame(list(zip(x,y,cat)), columns =['x', 'y', 'cat'])
mycolorsdict = {'A':'red', 'B':'blue', 'C':'green'}
fig = plt.figure(figsize=(5,5), constrained_layout=True)
gs = fig.add_gridspec(2, 1)
ax1 = fig.add_subplot(gs[0, 0])
grouped = df.groupby('cat')
for key, group in grouped:
group.plot(ax=ax1, kind='scatter',
x='x', y='y',
label=key, color=mycolorsdict[key])
ax1.legend(loc='upper left', facecolor='white', frameon=1,
framealpha=1, labelspacing=0.2, borderpad=0.25)

Colormap is not categorizing the data properly

Here is my script to plot data from a Geogtiff file using basemap. The data is categorical and there are 13 categories within this domain. The problem is that some categories get bunched up into one colour and thus some resolution is lost.
Unfortunately, I do not know how to fix this. I read that plt.cm.get_cmp is better for discrete datasets but I have not gotten it to work unfortunately.
gtif = 'some_dir'
ds = gdal.Open(gtif)
data = ds.ReadAsArray()
gt = ds.GetGeoTransform()
proj = ds.GetProjection()
xres = gt[1]
yres = gt[5]
xmin = gt[0] + xres
xmax = gt[0] + (xres * ds.RasterXSize) - xres
ymin = gt[3] + (yres * ds.RasterYSize) + yres
ymax = gt[3] - yres
xy_source = np.mgrid[xmin:xmax+xres:xres, ymax+yres:ymin:yres]
ds = None
fig2 = plt.figure(figsize=[12, 11])
ax2 = fig2.add_subplot(111)
ax2.set_title("Land use plot")
bm2 = Basemap(ax=ax2,projection='cyl',llcrnrlat=ymin,urcrnrlat=ymax,llcrnrlon=xmin,urcrnrlon=xmax,resolution='l')
bm2.drawcoastlines(linewidth=0.2)
bm2.drawcountries(linewidth=0.2)
data_new=np.copy(data)
data_new[data_new==255] = 0
nbins = np.unique(data_new).size
cb =plt.cm.get_cmap('jet', nbins+1)
img2 =bm2.imshow(np.flipud(data_new), cmap=cb)
ax2.set_xlim(3, 6)
ax2.set_ylim(50,53)
plt.show()
labels = [str(i) for i in np.unique(data_new)]
cb2=bm2.colorbar(img2, "right", size="5%", pad='3%', label='NOAH Land Use Category')
cb2.set_ticklabels(labels)
cb2.set_ticks(np.unique(data_new))
Here are the categories that are found within the domain (numbered classes):
np.unique(data_new)
array([ 0, 1, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17], dtype=uint8)
Thanks so much for any help here. I have also attached the output image that shows the mismatch. (not working)
First, this colormap problem is independent of the use of basemap. The following is therefore applicable to any matplotlib plot.
The problem here is that creating a colormap from n values distributes those values equally over the colormap range. Some values from the image therefore fall into the same colorrange within the colormap.
To prevent this, one can generate a colormap with the initial number of categories as shown below.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
# generate some data
data = np.array( [ 0, 1, 4, 5, 7, 10]*8 )
np.random.shuffle(data)
data = data.reshape((8,6))
# generate colormap and norm
unique = np.unique(data)
vals = np.arange(int(unique.max()+1))/float(unique.max())
cols = plt.cm.jet(vals)
cmap = matplotlib.colors.ListedColormap(cols, int(unique.max())+1)
norm=matplotlib.colors.Normalize(vmin=-0.5, vmax=unique.max()+0.5)
fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(data, cmap=cmap, norm=norm)
for i in range(data.shape[0]):
for j in range(data.shape[1]):
ax.text(j,i,data[i,j], color="w", ha="center", va="center")
cb = fig.colorbar(im, ax=ax, norm=norm)
cb.set_ticks(unique)
plt.show()
This can be extended to exclude the colors not present in the image as follows:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
# generate some data
data = np.array( [ 0, 1, 4, 5, 7, 10]*8 )
np.random.shuffle(data)
data = data.reshape((8,6))
unique, newdata = np.unique(data, return_inverse=1)
newdata = newdata.reshape(data.shape)
# generate colormap and norm
new_unique = np.unique(newdata)
vals = np.arange(int(new_unique.max()+1))/float(new_unique.max())
cols = plt.cm.jet(vals)
cmap = matplotlib.colors.ListedColormap(cols, int(new_unique.max())+1)
norm=matplotlib.colors.Normalize(vmin=-0.5, vmax=new_unique.max()+0.5)
fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(newdata, cmap=cmap, norm=norm)
for i in range(newdata.shape[0]):
for j in range(newdata.shape[1]):
ax.text(j,i,data[i,j], color="w", ha="center", va="center")
cb = fig.colorbar(im, ax=ax, norm=norm)
cb.ax.set_yticklabels(unique)
plt.show()

only 1 colorbar for multiple pie chart using matlibplot

I have such a plot, and would like to add a the colorbar code (which color corresponds to what number) on the right hand below. I saw some example which where used for imshow not pie chart.
#!/usr/bin/env python
"""
http://matplotlib.sf.net/matplotlib.pylab.html#-pie for the docstring.
"""
from pylab import *
fracs = [33,33,33]
starting_angle = 90
axis('equal')
for item in range(9):
color_vals = [-1, 0, 1]
my_norm = matplotlib.colors.Normalize(-1, 1) # maps your data to the range [0, 1]
my_cmap = matplotlib.cm.get_cmap('RdBu') # can pick your color map
patches, texts, autotexts = pie(fracs, labels = None, autopct='%1.1f%%', startangle=90, colors=my_cmap(my_norm(color_vals)))
subplot(3,3,item+1)
fracs = [33,33,33]
starting_angle = 90
axis('equal')
patches, texts, autotexts = pie(fracs, labels = None, autopct='%1.1f%%', startangle=90, colors=my_cmap(my_norm(color_vals)))
for item in autotexts:
item.set_text("")
subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=0.9, wspace=0.0, hspace=0.5)
savefig('/home/superiois/Downloads/projectx3/GRAIL/pie1.png')
show()
Also, it would be great if you tell me how to customize the size and location of colorbar code; Thanks.
Usually a legend is more appropriate for discrete values and a colorbar for continuous values. That said, its off course possible since mpl allows you to create a colorbar from scratch.
import matplotlib.pyplot as plt
import matplotlib as mpl
fracs = [33,33,33]
starting_angle = 90
fig, axs = plt.subplots(3,3, figsize=(6,6))
fig.subplots_adjust(hspace=0.1,wspace=0.0)
axs = axs.ravel()
for n in range(9):
color_vals = [-1, 0, 1]
my_norm = mpl.colors.Normalize(-1, 1) # maps your data to the range [0, 1]
my_cmap = mpl.cm.get_cmap('RdBu', len(color_vals)) # can pick your color map
patches, texts, autotexts = axs[n].pie(fracs, labels = None, autopct='%1.1f%%', startangle=90, colors=my_cmap(my_norm(color_vals)))
axs[n].set_aspect('equal')
for item in autotexts:
item.set_text("")
ax_cb = fig.add_axes([.9,.25,.03,.5])
cb = mpl.colorbar.ColorbarBase(ax_cb, cmap=my_cmap, norm=my_norm, ticks=color_vals)
cb.set_label('Some label [-]')
cb.set_ticklabels(['One', 'Two', 'Three'])
I have added custom ticklabels just to show how that would work, to get the default values simply remove the last line.