How to reverse colorbar values in matpotlib? - matplotlib

I am using the cbar.ax.tick_params matplotlib command to make a colorbar for an XY scatterplot. How do I reverse the values (not the color-ramp) so that the lowest value is at the top of the bar. This is to represent geological data where the youngest rocks are on top of the older rocks. Here the age is represented by color.
Here is my code:
plt.scatter(summary["d18O"], summary["eHf"], s=150, c = color, cmap = color_map, edgecolors='black', marker='o')
plt.errorbar(summary["d18O"], summary["eHf"], summary["xerr"], summary["yerr"], ls='none', color='lightgrey', zorder=-1)
cbar=plt.colorbar()
cbar.ax.tick_params(labelsize=14)
cbar.minorticks_on()
cbar.set_label('Age (Ma)', style='italic', fontsize=16)
plt.axvline(x=5.3, color='black', zorder=-1)
plt.axhline(y=0, color='black', zorder=-1)
plt.tick_params(labelsize=14)
ax.set_xticks([4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
ax.set_yticks([-6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16])
plt.ylabel(u'${\epsilon}$Hf$_{T}$', style='italic', fontsize=18)
plt.xlabel(u'$\delta^{18}$O$_{V-SMOW}$ ‰',style='italic', fontsize=18)
plt.text(11.5, 0.3, 'CHUR', fontsize=18)
plt.text(4.9, 5, 'mantle zircon = 5.3‰', fontsize=16, rotation=90)
plt.show()

As #r-beginners mentioned,
cbar.ax.invert_yaxis()
would solve the problem if cbar is your colorer object.

Related

How can I create a legend for my scatter plot which matches the colours used in the plot?

I've created a scatter plot (actually two similar subplots) using matplotlib.pyplot which I'm using for stylometric text analysis. The code I'm using to make the plot is as follows:
import matplotlib.pyplot as plt
import numpy as np
clusters = 4
two_d_matrix = np.array([[0.00617068, -0.53451777], [-0.01837677, -0.47131886], ...])
my_labels = [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
fig, (plot1, plot2) = plt.subplots(1, 2, sharex=False, sharey=False, figsize=(20, 10))
plot1.axhline(0, color='#afafaf')
plot1.axvline(0, color='#afafaf')
for i in range(clusters):
try:
plot1.scatter(two_d_matrix[i:, 0], two_d_matrix[i:, 1], s=30, c=my_labels, cmap='viridis')
except (KeyError, ValueError) as e:
pass
plot1.legend(my_labels)
plot1.set_title("My First Plot")
plot2.axhline(0, color='#afafaf')
plot2.axvline(0, color='#afafaf')
for i in range(clusters):
try:
plot2.scatter(two_d_matrix[i:, 0], two_d_matrix[i:, 1], s=30, c=my_labels, cmap='viridis')
except (KeyError, ValueError) as e:
pass
plot2.legend(my_labels)
plot2.set_title("My Second Plot")
plt.show()
Because there are four distinct values in my_labels there are four colours which appear on the plot, these should correspond to the four clusters I expected to find.
The problem is that the legend only has three values, corresponding to the first three values in my_labels. It also appears that the legend isn't displaying a key for each colour, but for each of the axes and then for one of the colours. This means that the colours appearing in the plot are not matched to what appears in the legend, so the legend is inaccurate. I have no idea why this is happening.
Ideally, the legend should display one colour for each unique value in my_labels, so it should look like this:
How can I get the legend to accurately display all the values it should be showing, i.e. one for each colour which appears in the plot?
Before calling plot1.legend or plot2.legend, you can pass label = None to plot1.axhline or axvline (and similarly to plot2.axhline or plot2.axvline.) This will make sure it doesn't interfere with plotting legends of the scatter points and also not label those lines.
To get labels for all categories of scatter points, you'll have to call plot1.scatter or plot2.scatter by passing the label and choosing only values from two_d_matrix whose index matches with the index of label in my_labels.
You can do it as follows:
import matplotlib.pyplot as plt
import numpy as np
# Generate some (pseudo) random data which is reproducible
generator = np.random.default_rng(seed=121)
matrix = generator.uniform(size=(40, 2))
matrix = np.sort(matrix)
clusters = 4
my_labels = np.array([0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
fig, ax = plt.subplots(1, 1)
# Select data points wisely
for i in range(clusters):
pos = np.where(my_labels == i)
ax.scatter(matrix[pos, 0], matrix[pos, 1], s=30, cmap='viridis', label=i)
ax.axhline(0, color='#afafaf', label=None)
ax.axvline(0, color='#afafaf', label=None)
ax.legend()
ax.set_title("Expected output")
plt.show()
This gives:
Comparison of current output and expected output
Observe how data points selection (done inside the for loops in the code below) affects the output:
Code:
import matplotlib.pyplot as plt
import numpy as np
# Generate some (pseudo) random data which is reproducible
generator = np.random.default_rng(seed=121)
matrix = generator.uniform(size=(40, 2))
matrix = np.sort(matrix)
clusters = 4
my_labels = np.array([0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
fig, ax = plt.subplots(1, 2)
# Question plot
for i in range(clusters):
ax[0].scatter(matrix[i:, 0], matrix[i:, 1], s=30, cmap='viridis', label=i)
ax[0].axhline(0, color='#afafaf', label=None)
ax[0].axvline(0, color='#afafaf', label=None)
ax[0].legend()
ax[0].set_title("Current output (with label = None)")
# Answer plot
for i in range(clusters):
pos = np.where(my_labels == i) # <- choose index of data points based on label position in my_labels
ax[1].scatter(matrix[pos, 0], matrix[pos, 1], s=30, cmap='viridis', label=i)
ax[1].axhline(0, color='#afafaf', label=None)
ax[1].axvline(0, color='#afafaf', label=None)
ax[1].legend()
ax[1].set_title("Expected output")
plt.show()

VSCODE: jupyter adding interactive matplotlib plot %matplotlib widget not working interactively

The following example doesn't work in VSCODE. It works (with %matplotlib notebook in a Jupyter notebook in a web browser though).
# creating 3d plot using matplotlib
# in python
# for creating a responsive plot
# use %matplotlib widget in VSCODE
#%matplotlib widget
%matplotlib notebook
# importing required libraries
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
# creating random dataset
xs = [14, 24, 43, 47, 54, 66, 74, 89, 12,
44, 1, 2, 3, 4, 5, 9, 8, 7, 6, 5]
ys = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 6, 3,
5, 2, 4, 1, 8, 7, 0, 5]
zs = [9, 6, 3, 5, 2, 4, 1, 8, 7, 0, 1, 2,
3, 4, 5, 6, 7, 8, 9, 0]
# creating figure
fig = plt.figure()
ax = Axes3D(fig)
# creating the plot
plot_geeks = ax.scatter(xs, ys, zs, color='green')
# setting title and labels
ax.set_title("3D plot")
ax.set_xlabel('x-axis')
ax.set_ylabel('y-axis')
ax.set_zlabel('z-axis')
# displaying the plot
plt.show()
The result should be that you get a plot that can be e.g. rotated interactively using the mouse arrow.
In VSCODE one can click on the </> and a renderer is presented. Chosen is JupyterIPWidget Renderer. Other renderers show the plot but don't allow for interactive manipulation.
Also a warning appears:
/var/folders/kc/5p61t70n0llbn05934gj4r_w0000gn/T/ipykernel_22590/1606073246.py:23:
MatplotlibDeprecationWarning: Axes3D(fig) adding itself to the figure is
deprecated since 3.4. Pass the keyword argument auto_add_to_figure=False
and use fig.add_axes(ax) to suppress this warning. The default value of
auto_add_to_figure will change to False in mpl3.5 and True values will
no longer work in 3.6. This is consistent with other Axes classes.
ax = Axes3D(fig)
This behavior is expected-- %matplotlib notebook is not supported in VS Code. You should use %matplotlib widget instead. See https://github.com/microsoft/vscode-jupyter/wiki/Using-%25matplotlib-widget-instead-of-%25matplotlib-notebook,tk,etc

Geopandas consistent user defined color scheme for subplots

I am new to geopandas and I am having trouble creating choropleth subplots with consistent bins. I need to create a consistent user defined color scheme across all subplots.
I have followed the examples below:
matplotlib geopandas plot chloropleth with set bins for colorscheme
https://github.com/geopandas/geopandas/issues/1019
While I am able to reproduce both examples, I get very strange behavior with my own data. Below is a toy example that replicates my problem.
import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mapclassify import Quantiles, UserDefined
import os
# Note you can read directly from the URL
gdf = gpd.read_file('https://opendata.arcgis.com/datasets/8d3a9e6e7bd445e2bdcc26cdf007eac7_4.geojson')
#gdf.plot()
gdf.shape
gdf.columns
gdf['rgn15nm'].head(9)
d = {
'rgn15nm': ['North East', 'North West', 'Yorkshire and The Humber', 'East Midlands', 'West Midlands', 'East of England', 'London', 'South East', 'South West'],
'1980' : pd.Series([0, 1, 0, 0, 0, 0, 0, 0, 0]),
'2000' : pd.Series([1, 1, 1, 0, 0, 0, 0, 0, 0]),
'2020' : pd.Series([1, 1, 10, 3, 1, 0, 0, 0, 1])
}
df = pd.DataFrame(d)
The data looks like this:
gdf = gdf.merge(df, on='rgn15nm')
# Define bins
gdf['2020'].describe()
bins= UserDefined(gdf['2020'], bins=[0,1,2,3,4,5,6,7,8,9,10]).bins
bins
# create a new column with the discretized values and plot that col
# repeat for each view
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
gdf.assign(cl=UserDefined(gdf['1980'].dropna(), bins).yb).plot(column='cl', ax=ax1, cmap='OrRd', legend = True )
gdf.assign(cl=UserDefined(gdf['2000'].dropna(), bins).yb).plot(column='cl', ax=ax2, cmap='OrRd', legend = True)
gdf.assign(cl=UserDefined(gdf['2020'].dropna(), list(bins)).yb).plot(column='cl', ax=ax3, cmap='OrRd', legend = True)
for ax in (ax1,ax2,ax3,):
ax.axis('off')
Clearly, the color scheme is not the same across subplots. What I mean by this is that 'Northwest' (the only region highlighted in the 1980 subplot) had the same value of 1 in all years 1980, 2000 and 2020. Yet, this region shows in different colors across the 3 subplots, despite the value being constant. I want "Northwest" to show in the same color (that of the subplot for 2020) across all 3 subplots.
I also tried this:
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]})
for ax in (ax1,ax2,ax3):
ax.axis('off')
But got exactly the same figure as immediately above (see below)
Does any one have any insight? I want a consistent color scheme across all 3 subplots.
So ultimately the solution was using the "norm" option. Following this example: Geopandas userdefined color scheme drops colors. See below:
from matplotlib.colors import Normalize
bins= UserDefined(gdf['2020'], bins=[0,1,2,3,4,5,6,7,8,9,10]).bins
bins
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, norm=Normalize(0, len(bins)))
for ax in (ax1,ax2,ax3):
ax.axis('off')
The result is what I wanted:
Expected Graph
or as suggested by Paul H:
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,6))
ax1.set_title('1980')
ax2.set_title('2000')
ax3.set_title('2020')
gdf.plot(column='1980', ax=ax1, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
gdf.plot(column='2000', ax=ax2, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
gdf.plot(column='2020', ax=ax3, cmap='OrRd', scheme='userdefined', classification_kwds={'bins':bins}, vmin = 0, vmax = 10)
for ax in (ax1,ax2,ax3):
ax.axis('off')

matplotlib histogram with equal bars width

I use a histogram to display the distribution. Everything works fine if the spacing of the bins is uniform. But if the interval is different, then the bar width is appropriate (as expected). Is there a way to set the width of the bar independent of the size of the bins ?
This is what i have
This what i trying to draw
from matplotlib import pyplot as plt
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.set_xticks(my_bins)
ax1.hist(my_data, my_bins, histtype='bar', rwidth=0.9,)
fig1.show()
I cannot mark your question as a duplicate, but I think my answer to this question might be what you are looking for?
I'm not sure how you'll make sense of the result, but you can use numpy.histogram to calculate the height of your bars, then plot those directly against an arbitrary x-scale.
x = np.random.normal(loc=50, scale=200, size=(2000,))
bins = [0,1,10,20,30,40,50,75,100]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(x, bins=bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(x, bins=bins)
ax.bar(range(len(bins)-1),h, width=1, edgecolor='k')
EDIT Here's with the adjustment to the x-tick labels so that the correspondence is easier to see.
my_bins = [10, 20, 30, 40, 50, 120]
my_data = [5, 5, 6, 8, 9, 15, 25, 27, 33, 45, 46, 48, 49, 111, 113]
fig = plt.figure()
ax = fig.add_subplot(211)
ax.hist(my_data, bins=my_bins, edgecolor='k')
ax = fig.add_subplot(212)
h,e = np.histogram(my_data, bins=my_bins)
ax.bar(range(len(my_bins)-1),h, width=1, edgecolor='k')
ax.set_xticks(range(len(my_bins)-1))
ax.set_xticklabels(my_bins[:-1])

matplotlib advanced stacked bar

matplotlib plot bars
It can be regular like http://matplotlib.org/examples/api/barchart_demo.html
Let's define this as [M, F]
It can be stacked like http://matplotlib.org/examples/pylab_examples/bar_stacked.html
Let's define this as [M + F]
Now how to plot [M, F + other]
If I understand you correctly, you want to have a stack plot with more than two elements stacked? If yes, that goes pretty straight forward as in the example you posted:
#!/usr/bin/env python
# a stacked bar plot with errorbars
import numpy as np
import matplotlib.pyplot as plt
N = 5
menMeans = [20, 35, 30, 35, 27]
womenMeans = [25, 32, 34, 20, 25]
otherMeans = [5, 2, 4, 8, 5]
menStd = [2, 3, 4, 1, 2]
womenStd = [3, 5, 2, 3, 3]
otherStd = [1, 1, 1, 1, 1]
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars: can also be len(x) sequence
p1 = plt.bar(ind, menMeans, width, color='r', yerr=womenStd)
p2 = plt.bar(ind, womenMeans, width, color='y',
bottom=menMeans, yerr=menStd)
p3 = plt.bar(ind, otherMeans, width, color='b',
bottom=[menMeans[j] + womenMeans[j] for j in range(len(menMeans)) ],
yerr=otherStd)
plt.ylabel('Scores')
plt.title('Scores by group and gender')
plt.xticks(ind+width/2., ('G1', 'G2', 'G3', 'G4', 'G5') )
plt.yticks(np.arange(0,81,10))
plt.legend( (p1[0], p2[0], p3[0]), ('Men', 'Women', 'Other') )
plt.show()