Colormap is not categorizing the data properly - matplotlib

Here is my script to plot data from a Geogtiff file using basemap. The data is categorical and there are 13 categories within this domain. The problem is that some categories get bunched up into one colour and thus some resolution is lost.
Unfortunately, I do not know how to fix this. I read that plt.cm.get_cmp is better for discrete datasets but I have not gotten it to work unfortunately.
gtif = 'some_dir'
ds = gdal.Open(gtif)
data = ds.ReadAsArray()
gt = ds.GetGeoTransform()
proj = ds.GetProjection()
xres = gt[1]
yres = gt[5]
xmin = gt[0] + xres
xmax = gt[0] + (xres * ds.RasterXSize) - xres
ymin = gt[3] + (yres * ds.RasterYSize) + yres
ymax = gt[3] - yres
xy_source = np.mgrid[xmin:xmax+xres:xres, ymax+yres:ymin:yres]
ds = None
fig2 = plt.figure(figsize=[12, 11])
ax2 = fig2.add_subplot(111)
ax2.set_title("Land use plot")
bm2 = Basemap(ax=ax2,projection='cyl',llcrnrlat=ymin,urcrnrlat=ymax,llcrnrlon=xmin,urcrnrlon=xmax,resolution='l')
bm2.drawcoastlines(linewidth=0.2)
bm2.drawcountries(linewidth=0.2)
data_new=np.copy(data)
data_new[data_new==255] = 0
nbins = np.unique(data_new).size
cb =plt.cm.get_cmap('jet', nbins+1)
img2 =bm2.imshow(np.flipud(data_new), cmap=cb)
ax2.set_xlim(3, 6)
ax2.set_ylim(50,53)
plt.show()
labels = [str(i) for i in np.unique(data_new)]
cb2=bm2.colorbar(img2, "right", size="5%", pad='3%', label='NOAH Land Use Category')
cb2.set_ticklabels(labels)
cb2.set_ticks(np.unique(data_new))
Here are the categories that are found within the domain (numbered classes):
np.unique(data_new)
array([ 0, 1, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17], dtype=uint8)
Thanks so much for any help here. I have also attached the output image that shows the mismatch. (not working)

First, this colormap problem is independent of the use of basemap. The following is therefore applicable to any matplotlib plot.
The problem here is that creating a colormap from n values distributes those values equally over the colormap range. Some values from the image therefore fall into the same colorrange within the colormap.
To prevent this, one can generate a colormap with the initial number of categories as shown below.
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
# generate some data
data = np.array( [ 0, 1, 4, 5, 7, 10]*8 )
np.random.shuffle(data)
data = data.reshape((8,6))
# generate colormap and norm
unique = np.unique(data)
vals = np.arange(int(unique.max()+1))/float(unique.max())
cols = plt.cm.jet(vals)
cmap = matplotlib.colors.ListedColormap(cols, int(unique.max())+1)
norm=matplotlib.colors.Normalize(vmin=-0.5, vmax=unique.max()+0.5)
fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(data, cmap=cmap, norm=norm)
for i in range(data.shape[0]):
for j in range(data.shape[1]):
ax.text(j,i,data[i,j], color="w", ha="center", va="center")
cb = fig.colorbar(im, ax=ax, norm=norm)
cb.set_ticks(unique)
plt.show()
This can be extended to exclude the colors not present in the image as follows:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors
# generate some data
data = np.array( [ 0, 1, 4, 5, 7, 10]*8 )
np.random.shuffle(data)
data = data.reshape((8,6))
unique, newdata = np.unique(data, return_inverse=1)
newdata = newdata.reshape(data.shape)
# generate colormap and norm
new_unique = np.unique(newdata)
vals = np.arange(int(new_unique.max()+1))/float(new_unique.max())
cols = plt.cm.jet(vals)
cmap = matplotlib.colors.ListedColormap(cols, int(new_unique.max())+1)
norm=matplotlib.colors.Normalize(vmin=-0.5, vmax=new_unique.max()+0.5)
fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(newdata, cmap=cmap, norm=norm)
for i in range(newdata.shape[0]):
for j in range(newdata.shape[1]):
ax.text(j,i,data[i,j], color="w", ha="center", va="center")
cb = fig.colorbar(im, ax=ax, norm=norm)
cb.ax.set_yticklabels(unique)
plt.show()

Related

adjust the location of color bar in subplots containing color and line plots

I am new to python programming. I was trying to make two subplots using matplotlib containing a line plot (panel-a) and 2-D color plot using imshow() (panel-b). I want the colorbar to be shown on the right side with same size as the color plot and it should not be within the subplot box limit.
`
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
from mpl_toolkits.axes_grid1 import make_axes_locatable
# Panel (a)
x1 = np.linspace(2, -2, 5)
y1 = np.linspace(-2, 2, 5)
# Panel (b)
N = 10
arr = np.random.random((N, N))
x_lims = list(map(dt.datetime.fromtimestamp, [982376726, 982377321]))
x_lims = mdates.date2num(x_lims)
y_lims = [0, 40]
fig, ax = plt.subplots(2, 1, figsize=(14, 10))
ax[0].plot(x1, y1)
ax[0].set_ylim(-2, 2)
ax[0].set_xlim(2, -2)
ax[0].set_xticks([2, 1, 0, -1, -2])
ax[0].set_yticks([-2, -1, 0, 1, 2])
im = ax[1].imshow(arr, extent=[x_lims[0], x_lims[1], y_lims[0],
y_lims[1]],
aspect='auto')
divider = make_axes_locatable(ax[1])
cax = divider.append_axes("right", size="5%", pad=0.05)
plt.colorbar(im, cax=cax, label="diff. en. flux")
ax[1].xaxis_date()
date_format = mdates.DateFormatter('%H:%M:%S')
ax[1].xaxis.set_major_formatter(date_format)

How to set values of a vertical stem plot as xticks labels?

I would like to reverse a grouped data and use group name as xtick label to draw it side by side. below demo mostly good but the label position not as expected.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def main():
data = [['AAAAAA',8],['AAAAAA',9],['AAAAAA',10],['BBBBBB',5],['BBBBBB',6],['BBBBBB',7],['CCCCCC',1],['CCCCCC',2],['CCCCCC',3],['CCCCCC',4]]
df = pd.DataFrame(data,columns=['name','value'])
dfg = df.groupby('name')
fig, ax = plt.subplots(figsize=(8, 4))
i = 0
ymin = df['value'].min()
c1='#ececec'
c2='#bcbcbc'
color=c1
for ix, row in reversed(tuple(dfg)):
print(ix,row)
n = len(row['name'])
x = np.linspace(i,i + n,n)
ax.stem(x,row['value'])
font_dict = {'family':'serif','color':'darkred', 'size':8}
ax.text(i + n/2,ymin,ix,ha='right',va='top',rotation=90, fontdict=font_dict)
if color == c1:
color = c2
else:
color = c1
plt.axvspan(i, i+n, facecolor=color, alpha=0.5)
i += len(row)
ax.xaxis.set_ticks_position('none')
plt.setp( ax.get_xticklabels(), visible=False)
ax.grid(axis='y',color='gray', linestyle='dashed', alpha=1)
ax.spines[["top", "right"]].set_visible(False)
fig.tight_layout()
plt.show()
return
main()
Output:
Welcome to comment any other proper way to do this, or how to improve the xticks down, use ymin properly not good way to do it.
If my understanding of what you are trying to achieve is correct, here is one way to do it:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
df = pd.DataFrame(
[
["AAAAAA", 8],
["AAAAAA", 9],
["AAAAAA", 10],
["BBBBBB", 5],
["BBBBBB", 6],
["BBBBBB", 7],
["CCCCCC", 1],
["CCCCCC", 2],
["CCCCCC", 3],
["CCCCCC", 4],
],
columns=["name", "value"],
)
fig, ax = plt.subplots(figsize=(8, 4))
i = 0
c1 = "#ececec"
c2 = "#bcbcbc"
color = c1
ticks = {}
for ix, row in reversed(tuple(df.groupby("name"))):
# Create stem plot
n = len(row["name"])
x = np.linspace(i, i + n, n)
ax.stem(x, row["value"])
# Create axvspan plot
if color == c1:
color = c2
else:
color = c1
ax.axvspan(i, i + n, facecolor=color, alpha=0.5)
# Save positions and names in a dict
for key, name in zip(x, row["name"]):
if key not in ticks.keys():
ticks[key] = name
else:
# Deal with multiple names for same tick
ticks[key] += f"\n{name}"
i += len(row)
# Add ticks and ticks labels
ax.set_xticks(ticks=list(ticks.keys()))
ax.set_xticklabels(list(ticks.values()), fontsize=10, rotation="vertical")
# In Jupyter notebook
fig
Output:
And to avoid repeating the labels, you can, for instance, do:
ax.set_xticklabels(
[
"",
"CCCCCC",
"",
"CCCCCC\nBBBBBB",
"BBBBBB",
"BBBBBB\nAAAAAA",
" " * 20 + "AAAAAA",
"",
],
fontsize=10,
)
# In Jupyter notebook
fig
Output:

Adding coastlines to GOES data

I have a lot of GOES data that I'd like to plot with coastlines. I've added matplotlib axes with the proper project of each data and plotted the data. I tried to add coastlines with cartopy but they do not appear.
import metpy
import xarray as xr
import matplotlib.pyplot as plt
def get_projection(ds, variable):
dat = ds.metpy.parse_cf(variable)
return dat.metpy.cartopy_crs
aod = xr.open_dataset('GOES/AODC/2021/001/00/OR_ABI-L2-AODC-M6_G16_s20210010001176_e20210010003549_c20210010006090.nc')
albedo = xr.open_dataset('GOES/LSAC/2021/230/21/OR_ABI-L2-LSAC-M6_G16_s20212302131172_e20212302133545_c20212302135044.nc')
rainfall = xr.open_dataset('GOES/RRQPEF/2021/001/00/OR_ABI-L2-RRQPEF-M6_G16_s20210010000209_e20210010009517_c20210010010020.nc')
precipitable_water = xr.open_dataset('GOES/TPWC/2021/001/00/OR_ABI-L2-TPWC-M6_G16_s20210010001176_e20210010003549_c20210010005512.nc')
aod_proj = get_projection(aod, 'AOD')
albedo_proj = get_projection(albedo, 'LSA')
rainfall_proj = get_projection(rainfall, 'RRQPE')
pwv_proj = get_projection(precipitable_water, 'TPW')
figsize = (11, 8.5)
fig = plt.figure(figsize=figsize)
ax1 = fig.add_subplot(2, 2, 1, projection=aod_proj)
ax2 = fig.add_subplot(2, 2, 2, projection=albedo_proj)
ax3 = fig.add_subplot(2, 2, 3, projection=rainfall_proj)
ax4 = fig.add_subplot(2, 2, 4, projection=pwv_proj)
aod['AOD'].plot(vmin=0, vmax=1, ax=ax1, transform=aod_proj)
albedo['LSA'].plot(ax=ax2, transform=albedo_proj)
rainfall['RRQPE'].plot(ax=ax3, transform=rainfall_proj)
precipitable_water['TPW'].plot(ax=ax4, transform=pwv_proj)
for ax in [ax1, ax2, ax3, ax4]:
ax.coastlines(resolution='50m', color='red', linewidth=1)
fig.tight_layout()
What I get is an image with no coastlines. I'm assuming that this is a projection issue but I don't know where the problem would be.

Create multiple stacked bar-lots in one figure

The first image is the figure I'm trying to reproduce, and the second image is the data I have. Does anyone have a clean way to do this with pandas or matplotlib?
Just transpose the DataFrame and use df.plot with the stacked flag set to true:
import pandas as pd
from matplotlib import pyplot as plt
df = pd.DataFrame({'squad': [0.6616, 0.1245, 0.0950],
'quac': [0.83, 0.065, 0.0176],
'quoref': [0.504, 0.340364, 0.1067]})
# Transpose
plot_df = df.T
# plot
ax = plot_df.plot(kind='bar', stacked=True, rot='horizontal')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
ax.set_ylabel("% of Questions")
plt.tight_layout()
plt.show()
You can try this:
data = {'squad':[0.661669, 0.127516, 0.095005],
'quac':[0.930514, 0.065951, 0.017680],
'quoref': [0.504963, 0.340364, 0.106700]}
df = pd.DataFrame(data)
bars_1 = df.iloc[0]
bars_2 = df.iloc[1]
bars_3 = df.iloc[2]
# Heights of bars_1 + bars_2
bars_1_to_2 = np.add(bars_1, bars_2).tolist()
# The position of the bars on the x-axis
r = [0, 1, 2]
plt.figure(figsize = (7, 7))
plt.bar(r, bars_1, color = 'lightgrey', edgecolor = 'white')
plt.bar(r, bars_2, bottom = bars_1, color = 'darkgrey', edgecolor = 'white')
plt.bar(r, bars_3, bottom = bars_1_to_2, color = 'dimgrey', edgecolor = 'white')
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xticks(ticks = r, labels = df.columns)
plt.ylabel('% of Questions')
plt.show()

Changing the Matplotlib GridSpec properties after generating the subplots

Suppose something comes up in my plot that mandates that I change the height ratio between two subplots that I've generated within my plot. I've tried changing GridSpec's height ratio to no avail.
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
fig = plt.figure()
gs = GridSpec(2, 1, height_ratios=[2, 1])
ax1 = fig.add_subplot(gs[0])
ax1 = fig.axes[0]
ax2 = fig.add_subplot(gs[1])
ax2 = fig.axes[1]
ax1.plot([0, 1], [0, 1])
ax2.plot([0, 1], [1, 0])
gs.height_ratios = [2, 5]
The last line has no effect on the plot ratio.
In my actual code, it is not feasible without major reworking to set the height_ratios to 2:5 ahead of time.
How do I get this to update like I want?
The axes of relevant subplots can be manipulated and adjusted to get new height ratios.
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
fig = plt.figure()
gs = GridSpec(2, 1, height_ratios=[2, 1]) #nrows, ncols
ax1 = fig.add_subplot(gs[0])
ax1 = fig.axes[0]
ax2 = fig.add_subplot(gs[1])
ax2 = fig.axes[1]
ax1.plot([0, 1], [0, 1])
ax2.plot([0, 1], [1, 0])
# new height ratio: 2:5 is required for the 2 subplots
rw, rh = 2, 5
# get dimensions of the 2 axes
box1 = ax1.get_position()
box2 = ax2.get_position()
# current dimensions
w1,h1 = box1.x1-box1.x0, box1.y1-box1.y0
w2,h2 = box2.x1-box2.x0, box2.y1-box2.y0
top1 = box1.y0+h1
#top2 = box2.y0+h2
full_h = h1+h2 #total height
# compute new heights for each axes
new_h1 = full_h*rw/(rw + rh)
new_h2 = full_h*rh/(rw + rh)
#btm1,btm2 = box1.y0, box2.y0
new_bottom1 = top1-new_h1
# finally, set new location/dimensions of the axes
ax1.set_position([box1.x0, new_bottom1, w1, new_h1])
ax2.set_position([box2.x0, box2.y0, w2, new_h2])
plt.show()
The output for ratio: (2, 5):
The output for (2, 10):