Manipulating the legend for a Seaborn Jointplot - matplotlib

I am seeking advice on how to edit the colors of the symbols in my legend when using a seaborn jointplot with several other matplotlib scatterplot layers.
My Specific Question:
Given the data and the current chart below, how can make an adjustment to the code so that the colors of the latter five points on the legend (e.g., "3", "4", "5", "6", "8") can be changed to gray (i.e., #b9b9bd)?
The reproducible code has been pasted below, but there is a publicly accessible Colab Notebook that can be copied and used for experimentation.
Reproducible Example
# import pacakges
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# load the mpg dataset
mpg_df = sns.load_dataset("mpg")
mpg_df = (
mpg_df
.astype({"cylinders":"category"})
)
mpg_df["cylinders"] = (
mpg_df["cylinders"]
.cat
.as_ordered()
)
# establish the markers
_markers_cylinders = {
3:"P",
4:"d",
5:"v",
6:"X",
8:"s"
}
# establish colors for countries
_palette_origin = {
"usa":"#fca847",
"japan":"#8aed7b",
"europe":"#7b81ed"
}
kws={
"s": 225,
"linewidth": 2
}
# plot the jointplot -- establish the figure -- the content of the plot is not needed just the marginal ditributions
jp = sns.jointplot(
data=mpg_df,
x="weight",
y="mpg",
hue="origin",
palette=_palette_origin,
markers=",",
marginal_kws={"fill":True},
color="w",
height=10,
s=1
)
# plot scatter by origin and cylinder as layers on the original jointplot
origin = ["usa", "japan", "europe"]
for nation in origin:
df = mpg_df[mpg_df["origin"] == nation]
for k,v in _markers_cylinders.items():
jp.ax_joint.scatter(
data=df[df["cylinders"]==k],
x="weight",
y="mpg",
marker=_markers_cylinders[k],
c=_palette_origin[nation],
edgecolor="k",
alpha=0.6,
**kws
)
jp.ax_joint.grid(
color="k",
linestyle=":",
linewidth=0.75
)
han, lab = jp.ax_joint.get_legend_handles_labels()
lab = [
"USA",
"Japan",
"Europe",
"3",
"4",
"5",
"6",
"8"
]
jp.ax_joint.legend(
han[0:8],
lab[0:8],
title="Origin & Cylinders",
fontsize=15,
bbox_to_anchor=(1.20, 1),
title_fontsize = 14,
markerscale=2.5,
shadow = True
)
sns.move_legend(
jp.ax_joint,
loc="upper left",
bbox_to_anchor=(1.20, 1),
markerscale=0.25
)
plt.show()
plt.show()

This can be accomplished by reusing the existing handles and then changing the face color of the marker.
han, lab = jp.ax_joint.get_legend_handles_labels()
new_han = [
handles[0],
handles[1],
handles[2],
Line2D([0], [0], marker='P', markerfacecolor='#b9b9bd', markeredgecolor='black', markersize=14, ls=''),
Line2D([0], [1], marker='d', markerfacecolor='#b9b9bd', markeredgecolor='black', markersize=14, ls=''),
Line2D([0], [2], marker='v', markerfacecolor='#b9b9bd', markeredgecolor='black', markersize=14, ls=''),
Line2D([0], [3], marker='X', markerfacecolor='#b9b9bd', markeredgecolor='black', markersize=14, ls=''),
Line2D([0], [4], marker='s', markerfacecolor='#b9b9bd', markeredgecolor='black', markersize=14, ls='')
]
jp.ax_joint.legend(
new_han,#han[0:8],
lab[0:8],
title="Origin & Cylinders",
fontsize=15,
bbox_to_anchor=(1.20, 1),
title_fontsize = 14,
markerscale=2.5,
shadow = True
)

Related

create legend for markercolor and size

I've created the following figure:
With following code:
matplotlib.rcParams.update({'font.size': 10})
fig = plt.figure(figsize=(16, 9), dpi=300, facecolor='white')
ax = plt.subplot(111, projection=ccrs.PlateCarree())
ax.set_extent(extent)
# cartopy layers
country_10m = cartopy.feature.NaturalEarthFeature('cultural', 'admin_0_countries', '10m')
ax.add_feature(country_10m, edgecolor='w', linewidth=0.75, facecolor='#EEEFEE', label='country border')
ax.coastlines(resolution='10m', color='#EEEFEE', linewidth=0.75)
ax.imshow(np.tile(np.array([[[191, 210, 217]]], dtype=np.uint8), [2, 2, 1]), origin='lower', transform=cartopy.crs.PlateCarree(), extent=extent)
ax.scatter(gdf_ldb.x, gdf_ldb.y, c= gdf_ldb.Color, s= gdf_ldb.Markersize, zorder=30)
# ax.scatter(gdf_ports_filt.longitude, gdf_ports_filt.latitude, s= 10, color= 'k', zorder= 30)
ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=1, color='gray', alpha=0.5, linestyle='-')
ax.text(-0.08, 0.5, 'latitude [°]', va='bottom', ha='center',rotation='vertical', rotation_mode='anchor',transform=ax.transAxes);
ax.text(0.5, -0.09, 'longitude [°]', va='bottom', ha='center', rotation='horizontal', rotation_mode='anchor', transform=ax.transAxes);
How do I create a legend for the markersize as well for the color, so like this:
With x, x1, and x2 representing the values of the markersizes.
gdf_ldb looks like:
x y Type Color Markersize geometry
prograding_feature_polygon_29 12.857701 56.648035 Updrift grey 3.0 POINT (12.85770 56.64804)
prograding_feature_polygon_57 17.781445 54.808079 Updrift grey 3.0 POINT (17.78144 54.80808)
prograding_feature_polygon_58 17.438390 54.754518 Updrift grey 3.0 POINT (17.43839 54.75452)
prograding_feature_polygon_63 4.708077 52.880322 Updrift grey 3.0 POINT (4.70808 52.88032)
prograding_feature_polygon_72 3.953364 51.842299 Updrift grey 3.0 POINT (3.95336 51.84230)
... ... ... ... ... ... ...
retreating_feature_polygon_2018 -10.148432 53.415224 Double Updrift grey 3.0 POINT (-10.14843 53.41522)
retreating_feature_polygon_2019 -9.954510 54.197329 Double Updrift grey 3.0 POINT (-9.95451 54.19733)
retreating_feature_polygon_2119 15.095564 37.389535 Double Updrift grey 3.0 POINT (15.09556 37.38953)
retreating_feature_polygon_2120 14.317893 37.025026 Double Updrift grey 3.0 POINT (14.31789 37.02503)
retreating_feature_polygon_2121 13.952111 37.101009 Updrift grey 3.0 POINT (13.95211 37.10101)
Thanks in advance,
Dante
The key is to capture the artist (PathCollection in this case) returned by the scatter command. That has a method to retrieve the legend items manually, and it has keywords to distinguish between size and color (default). The num keyword can be used to reduce the amount of items returned, which is useful in the case of a (semi)continuous property as the size can be.
The example below plots two separate legends for both properties. You can also combine the handles and labels of both and plot them in a single legend if needed.
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import cartopy.crs as ccrs
import cartopy
import numpy as np
lons = np.random.randint(-170, 170, 100)
lats = np.random.randint(-80, 80, 100)
sizes = np.random.rand(100) * 100 + 5
colors = np.random.randint(0, 3, 100)
fig, ax = plt.subplots(
figsize=(8,4), dpi=86, facecolor='w',
subplot_kw=dict(projection=ccrs.PlateCarree()),
)
fig.subplots_adjust(left=0, right=1, bottom=0, top=1)
ax.add_feature(cartopy.feature.LAND, ec='none', fc='#EEEFEE', label='country border')
ax.add_feature(cartopy.feature.BORDERS, ec='w', fc='k', lw=0.75, label='country border')
ax.coastlines(resolution='10m', color='#EEEFEE', lw=0.75)
m = ax.scatter(lons, lats, s=sizes, c=colors, zorder=5, label="points")
l1 = ax.legend(
*m.legend_elements(prop="colors", num="auto"), title="Colors", framealpha=1,
loc="upper right", bbox_to_anchor=(0.88, 0.8, 0.12, 0.2), mode="expand",
)
ax.add_artist(l1) # prevent overwriting with second legend
l2 = ax.legend(
*m.legend_elements(prop="sizes", num=5), title="Sizes", framealpha=1,
loc="upper right", bbox_to_anchor=(0.88, 0.55, 0.12, 0.2), mode="expand",
)
The documentation about this shows some variations on this:
https://matplotlib.org/stable/gallery/lines_bars_and_markers/scatter_with_legend.html#automated-legend-creation
The answer by Rutger Kassies is excellent for many use cases. However, he mentions that One can also combine the handles and labels of both and plot them in a single legend if needed.
Here I offer another answer that shows the steps to create the single legend manually. Inside the single legend, 2 groups of sub legends are created and arranged as needed.
With single legend, you don't need to find the values of bbox_to_anchor for the second (or third and so on) to position them properly.
With manual creation of items into a single legend, you have full control of the items' you need in the legend. However, it need some extra coding to achieve the goal.
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import cartopy.crs as ccrs
import cartopy.feature as cfeature
# For `Categories` symbol
# Each item of legends requires 3 properties: color/text/marker_shape
color_V = ["green", "orange", "purple", "red", "cyan", "magenta"]
text_V = ["cat_4", "cat_9", "cat_13", "cat_15", "cat_19", "cat_33"]
marker_V = ["o", "o", "o", "o", "o", "o"]
len_V = len(color_V)
# For `Size/values` symbol
color_S = ["gray", "gray", "gray", "gray"]
sizes_S = [4, 8, 12, 16] #increasing values ...
text_S = ["4", "8", "12", "16"] #cover `sizes1` below
marker_S = ["o", "o", "o", "o"] #use disk shape
len_S = len(color_S)
# Demo data locations and attributes
xs = [23,12,4,25,24,52,17,33]
ys = [41,12,32,15,35,21,23,43]
colors1 = ["green", "orange", "purple", "red", "cyan", "magenta", "green", "orange"]
#texts1 = ["4", "9", "13", "15", "19", "33", "4", "9"]
markers1 = ["o", "o", "o", "o", "o", "o", "o", "o"]
sizes1 = [10,16,9,12,7,4,2,6]
len1 = len(xs)
all_patches = [] #for items in a single legend
# Create figure and `ax` for map plotting
# This form can create a single axes or an array of axes
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,6), subplot_kw={'projection': ccrs.PlateCarree()})
# All steps of plots will be done on `ax`
# [1] Add an invisible object as a spacer in the legend box
#rect = mpatches.Rectangle([0, 0], 0.01, 0.01, ec="none", color="lightgray")
all_patches.append(mlines.Line2D([0, 0], [1, 0], color="none"))
# Explicitly defining the elements in the legend
# [2] Add proxied text: 'Categories' to the legend
line = mlines.Line2D([0, 0], [1, 0], lw=.5, alpha=0.9, color="none")
line.set_label('Categories') # Title for 1st group of symbols in the legend
all_patches.append(line)
# [3] Plot (on the axes) `none` data point and
# save the output patches for `Categories` group
patches_V = [ ax.plot([],[], marker=marker_V[i], ms=8, ls="", color=color_V[i], \
label="{:s}".format(text_V[i]) )[0] \
for i in range(len_V) ]
all_patches += patches_V
# [4] Add an invisible object as a spacer in the legend box
all_patches.append(mlines.Line2D([0, 0], [1, 0], color="none"))
# [5] Add proxied text: 'Sizes' to the legend
x, y = ([0, 1], [0, 0])
line = mlines.Line2D([0, 0], [1, 0], lw=.5, alpha=0.9, color="none")
line.set_label('Sizes') # Title for 2nd group of symbols in the legend
all_patches.append(line)
# [6] Create patches for `Sizes` group
patches_S = [ ax.plot([],[], marker=marker_S[i], ms=sizes_S[i], ls="", \
color=color_S[i], \
label="{:s}".format(text_S[i]) )[0] for i in range(len_S) ]
all_patches += patches_S
# Plot point data using the demo data
for i in range(len1):
ax.plot(xs[i], ys[i], marker=markers1[i], ms=sizes1[i], color=colors1[i])
ax.set_extent([0, 80, 0, 60])
# Plot the legend in the upper-right corner
combined_legend = ax.legend(handles=all_patches,
bbox_to_anchor=(1, 1),
title="The Legend",
loc='upper right',
ncol=1,
numpoints=1,
facecolor="lightgray",
fontsize = 10,
title_fontsize= 12,
labelspacing = 0.55,
shadow=True)
# Draw some basemap features
ax.coastlines(lw=0.3, color="k")
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.OCEAN)
plt.title("Legend for Categories and Sizes")
plt.show()
The output map:

Some concerns with axes.annotate()

Bonjour, I can't enlarge the dimensions of the graph from the moment I
use "axes.annotate()".
Whatever the values of "plt.figure(figsize=(8, 6))", it is the same.
The dimensions do not change. I must make a mistake somewhere...
# Importing libraries for dataframe creation
# and graph plotting
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Creating our own dataframe
data = {"Name": ["Alex", "Bob", "Clarein", "Dexter"],
"Marks": [45, 23, 78, 65]}
# Now convert this dictionary type data into a pandas dataframe
# specifying what are the column names
df = pd.DataFrame(data, columns=['Name', 'Marks'])
print(df.head())
#Defining the plotsize
plt.figure(figsize=(8, 6))
figure, axes = plt.subplots()
plt.bar(df.Name, df.Marks, color = 'c', width = 0.4, label = "Student marks");
# Setting the x-acis label and its size
plt.xlabel("Students", size=15)
# Setting the y-axis label and its size
plt.ylabel("Marks Secured", size=15);
# Setting the title for the graph
plt.title("This is an annotated barplot")
for p in axes.patches:
axes.annotate(text=np.round(p.get_height(), decimals=2),
xy=(p.get_x()+p.get_width()/2., p.get_height()),
ha='center',
va='center',
xytext=(0, 10),
textcoords='offset points');
plt.legend(loc='best');
plt.show();
That produces:
Regards, Atapalou

linspace colormesh heatmap does not match initial distribution

I have the result of a tsne algorithm and I want to create a 2D grid with it.
The results look like this:
array([[-31.129612 , 2.836552 ],
[ 14.543636 , 1.628475 ],
[-21.804733 , 17.605087 ],
...,
[ 1.6285285, -5.144769 ],
[ -8.478171 , -17.943161 ],
[-20.473257 , 1.7228899]], dtype=float32)
I plotted the results in a scatter plot to see the overall distribution in the 2D space.
tx2, ty2 = tsne_results[:,0], tsne_results[:,1]
plt.figure(figsize = (16,12))
plt.scatter(tx2,ty2)
plt.show()
However, when creating bins using linspace, I get a very different shape for my data.
bins_nr = 150
tx2, ty2 = tsne_results[:,0], tsne_results[:,1]
grid_tmp, xl, yl = np.histogram2d(tx2, ty2, bins=bins_nr)
gridx_tmp = np.linspace(min(tx2),max(tx2),bins_nr)
gridy_tmp = np.linspace(min(ty2),max(ty2),bins_nr)
plt.figure(figsize = (16,12))
plt.grid(True)
plt.pcolormesh(gridx_tmp, gridy_tmp, grid_tmp)
plt.show()
The latter chart looks like it was inverted and the data is not being projected in the same way as the scatter plot.
Any idea why this is happening?
Kind regards

Why is only one hatch used in the bar graph?

I have the following bar graph generated using pandas. My problem is all the bars have the same pattern. I have tried many approaches but could not manage to get around this issue.
Moreover, only one entry(for the last subplot) is shown in the legend.
The data used is
The code is :
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
class ScalarFormatterForceFormat(ScalarFormatter):
def _set_format(self): # Override function that finds format to use.
self.format = "%1.1f" # Give format here
patterns = [ "\\" , "/" , "-","+" ,"x", "|", '.', "O" ]
yfmt = ScalarFormatterForceFormat()
yfmt.set_powerlimits((0, 0))
bar_gap=0.005
bar_width=0.01
bar_pos = [0 for i in range(5)]
bar_pos[0]=bar_gap
for i in range(1,5):
bar_pos[i]=bar_pos[i-1]+bar_gap+bar_width
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red','tab:olive']
patterns = [ "\\" , "/" , "+" , "-", ".", "*","x", "o", "O" ]
# file_locn = ''r'C:\Users\girum\Desktop\Throughput.csv'''
file_locn = ''r'my_file.csv'''
df = pd.read_csv(file_locn,index_col='Set')
df=df.T
fig, axes = plt.subplots(1,3,figsize=(8,5))#,sharey=True)
for i in range(3):
axes[i].yaxis.set_major_formatter(yfmt)
df.Type_A.plot(ax=axes[0],kind='bar',color=colors)
df.Type_B.plot(ax=axes[1],kind='bar',color=colors)
df.Type_C.plot(ax=axes[2],kind='bar',color=colors)
handles, labels = axes[0].get_legend_handles_labels()
for ax in fig.axes:
bars = ax.patches
hatches = ''.join(h*len(df) for h in patterns)
for bar, hatch in zip(bars, hatches):
bar.set_hatch(2*hatch)
plt.xticks(rotation=360)
axes[0].set_ylabel('Speed')
for i in range(len(df)):
axes[i].set_xlabel('')#Why is this line not working
axes[i].tick_params(axis='x', rotation=360)
plt.legend(loc='center right', bbox_to_anchor=(.2,1.08), ncol=1)
plt.show()
The code below has the following changes:
added some dummy test data to enable stand-alone test code
removed some unused variables
used the unaltered ScalarFormatter
only one loop through the axes and avoiding the plt interface
using ax.containers[0] to catch the bar container (ax.patches is a list of the rectangles, without the surrounding container)
change the label of the bar container to _no_legend, so it doesn't appear in the legend
used the patterns directly instead of concatenating them
removed h*len(df); note that multiplying a string such as '/' by e.g. 4, repeats the string (to '////'); repeated patterns are used in matplotlib to make the base pattern denser
used tick_params(axis='x', labelbottom=False, length=0) to remove the tick labels
added labels to the individual bars so they appear into the legend
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
yfmt = ScalarFormatter()
yfmt.set_powerlimits((-9, 9))
colors = ['tab:blue', 'tab:green', 'tab:orange', 'tab:red', 'tab:olive']
patterns = ["\\", "/", "+", "-", ".", "*", "x", "o", "O"]
df = pd.DataFrame(np.random.randint(100000, 500000, (3, 3)),
columns=['A', 'B', 'C'],
index=['Type_A', 'Type_B', 'Type_C'])
df = df.T
fig, axes = plt.subplots(1, 3, figsize=(8, 5))
df.Type_A.plot(ax=axes[0], kind='bar', color=colors)
df.Type_B.plot(ax=axes[1], kind='bar', color=colors)
df.Type_C.plot(ax=axes[2], kind='bar', color=colors)
for ax in axes:
bars = ax.containers[0]
bars.set_label('_no_legend')
hatches = [h * 2 for h in patterns]
for bar, hatch, label in zip(bars, hatches, df.index):
bar.set_hatch(2 * hatch)
bar.set_label(label)
ax.yaxis.set_major_formatter(yfmt)
ax.tick_params(axis='x', labelbottom=False, length=0)
axes[0].set_ylabel('Speed')
axes[2].legend(loc='lower right', bbox_to_anchor=(1, 1.01), ncol=3)
plt.tight_layout()
plt.show()
The lines where you are joining the patterns generates a result, which you don't want.
patterns = [ "\\" , "/" , "+" , "-", ".", "*","x", "o", "O" ]
hatches = ''.join(h*3 for h in patterns)
>>> '\\\\\\///+++---...***xxxoooOOO'
# if you have the bars, this is the output
for bar, hatch in zip([0,1,3], hatches):
print(2*hatch)
>>>
\\
\\
\\
Try to simplify this section using the patterns in your loop directly:
for bar, hatch in zip([0,1,3], patterns):
print(2*hatch)`
>>>
\\
//
++
Output
I used your given code and data to create this output.

Plotting points on a map with calculated distance

This is my dataframe
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
import matplotlib.pyplot as plt
d = {"STATE" : [ "NJ", "NJ", "NJ", "NJ"],
"CATEGORY": ["A", "B", "C", "D"],
"LATITUDE" : [ 40.794856, 40.790176, 40.826762, 40.495150],
"LONGITUDE" : [ -74.149086, -74.255100, -74.101990, -74.442890]}
df = pd.DataFrame(data=d)
df.plot(kind="scatter", x="LONGITUDE", y="LATITUDE", alpha=0.4)
plt.show()
I want to calculate the distance between points based on category column:
A -> B
A -> C
A -> D
and connect the dots with distance displayed between them as a label
#creating point object so I can calculate distance bwtween coordinates
df["point"] = [Point(xy) for xy in zip(df['LONGITUDE'], df['LATITUDE'])]
#Formula I use for calculating distance between two points, this works when I have two seperate columns
#df['lat_long_diff'] = df.apply(lambda x : geodesic((x['LATITUDE_A'],x['LONGITUDE_A']),(x['LATITUDE_other_points'],x['LONGITUDE_other_points'])).miles,axis=1)
Also if I could include the Map of New Jersey as background it would be great, just the outline map would do.
import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from matplotlib.text import Text
import geopandas as gpd
import pandas as pd
import geopy.distance
def calc_dist(point_a, point_b):
return(geopy.distance.geodesic(point_a, point_b).miles)
def draw_lines(x, y, p1 , p2):
x1, x2 = x[p1], x[p2]
y1, y2 = y[p1], y[p2]
ax.plot([x1,x2],[y1,y2])
d = {"STATE" : [ "NJ", "NJ", "NJ", "NJ"],
"CATEGORY": ["A", "B", "C", "D"],
"LATITUDE" : [ 40.794856, 40.790176, 40.826762, 40.495150],
"LONGITUDE" : [ -74.149086, -74.255100, -74.101990, -74.442890]}
df = pd.DataFrame(d)
dist_list = []
for i in [1,2,3]:
dist = calc_dist((df.at[0, 'LATITUDE'], df.at[0, 'LONGITUDE']),
(df.at[i, 'LATITUDE'], df.at[i, 'LONGITUDE']))
dist_list.append(dist)
proj = ccrs.PlateCarree(central_longitude=0)
fig, ax = plt.subplots(subplot_kw=dict(projection=proj), figsize=(16,16))
ax.set_extent([df['LONGITUDE'].min()-1,
df['LONGITUDE'].max()+ 1,
df['LATITUDE'].min()- 1,
df['LATITUDE'].max()+1],
crs=ccrs.PlateCarree())
x = df['LONGITUDE'].tolist()
y = df['LATITUDE'].tolist()
ax.scatter(x, y)
draw_lines(x, y, 0, 1)
draw_lines(x, y, 0, 2)
draw_lines(x, y, 0, 3)
ax._add_text(Text(text=str(round(dist_list[0], 2)), x=-74.20, y=40.82))
ax._add_text(Text(text=str(round(dist_list[1], 2)), x=-74.12, y= 40.8))
ax._add_text(Text(text=str(round(dist_list[2], 2)), x=-74.29, y= 40.64))
ax.add_feature(cfeature.STATES.with_scale('10m'), zorder=0)
fig.canvas.draw()
fig.tight_layout()
plt.show()