How to make a legend with matplotlib plotting a map? - pandas

I have the following plot:
Inside this map you see some random coloured areas with numbers in it. The numbers are 12,25,34,38 and 43. Now I want to add a legend in the above left corner with the numbers followed by the name of the area. Something like this:
The annotation (The numbers in the areas) are added though a for loop using the following command ax.annotate(number, xy = ...). Can somebody tell me how to add a legend with all the numbers and text in some sort of a legend similar to the image above?
The numbers and names are both inside a pandas dataframe.
fig, ax = plt.subplots(1,1)
fig.set_size_inches(8,8) # setting the size
# Plot values - with grey layout
grey_plot = schap.plot(ax = ax, color = 'grey')
schap.plot(ax = grey_plot, column= col1, cmap= 'YlGnBu', legend = True)
# Add annotation for every waterschap with a deelstroomgebied
bbox_props = dict(boxstyle="round", fc="w", ec="gray", alpha=0.9,lw=0.4)
for idx, row in schap.iterrows():
if not np.isnan(row[col1]):
string = str(idx)
ax.annotate(string, xy=row['coords'], color='black',
horizontalalignment='center', bbox=bbox_props, fontsize=7)
'schap'
is the pandas dataframe with all needed data. schap['text'] contains all names. In a for loop this would be row['text'].

After the loop you can simply add text, for example:
ax.text(0.01, 0.99, legend,
horizontalalignment='left',
verticalalignment='top',
transform=ax.transAxes,
fontsize=8)
where legend can be updated inside your loop (desc is column with description):
legend = ''
#...
#inside your loop
legend = legend + f"{idx} {row['text'])}\n"
EDIT:
Example with different data (and aligment of legend):
import geopandas
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,1)
fig.set_size_inches(20,8)
world.plot(column='gdp_md_est', ax=ax, legend=True)
world['coords'] = world['geometry'].apply(lambda x: x.representative_point().coords[:][0])
bbox_props = dict(boxstyle="round", fc="w", ec="gray", alpha=0.9,lw=0.4)
legend = ''
for idx, row in world.iterrows():
if row['pop_est'] > 100_000_000:
plt.annotate(str(idx), xy=row['coords'], color='black',
horizontalalignment='center', bbox=bbox_props, fontsize=7)
legend = legend + f"{idx} {row['name']}\n"
ax.text(0.01, 0.5, legend,
horizontalalignment='left',
verticalalignment='center',
transform=ax.transAxes,
fontsize=8);

Related

Matplotlib FuncAnimation color changing scatter plot

I am trying to create an animated scatter plot whereby the scatter points plot in order and change color over time, thus the newest scatter points always appear in the same color (in this case, red) while the older scatter points age to different colors using a color map.
The code works except for the newest scatter point in every frame of the animation, which appears as the 'oldest' color in the plot, rather than the newest. How can I get it to appear in the correct color?
My code is this:
import matplotlib.animation as animation
from matplotlib import cm
import matplotlib.pyplot as plt
%matplotlib notebook
brg = cm.get_cmap('hsv',500)
cmapz = brg(range(500))
x = [0]
y = [0]
def update_lines(num):
dx = x[-1]+np.random.random()
x.append(dx)
dy = np.random.random()
y.append(dy)
text.set_text("{:d}: [{:.0f},{:.0f}]".format(num, x[-1], y[-1]))
array = cmapz[:num]
graph.set_offsets(np.c_[x, y])
graph.set_color(array[::-1])
return graph,
fig,ax=plt.subplots(1,1,figsize=(8,5))
ax = plt.axes(xlim=(0,251),ylim=(-1,2))
graph = ax.scatter(x, y,c=cmapz[0])
text = fig.text(0, 1, "TEXT", va='top')
ani = animation.FuncAnimation(fig, update_lines, frames=499, interval=10, blit=False, repeat = False)
plt.show()

Matplotlib: how to automatically draw an axes title at the left-most position?

I'm drawing my axes title with the method ax.set_title("Horizontal Bars", ha="left", x=0, fontsize=16) and it draw as below:
How do I draw it in the left-most position, as the "title here" in red above? I know I can use a negative value for x, but I'd like to find this value automatically.
To dynamically generate the bounds you would do:
import matplotlib.pyplot as plt
import numpy as np
# Fixing random state for reproducibility
np.random.seed(19680801)
plt.rcdefaults()
fig, ax = plt.subplots()
# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))
ax.barh(y_pos, performance, xerr=error, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Performance')
# Get min x and max y
# get the inverse of the transformation from data coordinates to pixels
transf = ax.transData.inverted()
bb = plt.figure().get_window_extent(renderer = plt.figure().canvas.get_renderer())
bb_datacoords = bb.transformed(transf)
points = bb_datacoords.get_points()
x_lim = points[0][0]
y_lim = points[1][1]
ax.text(x=x_lim, y=y_lim, s="Horizontal Bars", weight="bold", fontsize=16) # <- Use text instead of title
which gives you an output of:

Align multi-line ticks in Seaborn plot

I have the following heatmap:
I've broken up the category names by each capital letter and then capitalised them. This achieves a centering effect across the labels on my x-axis by default which I'd like to replicate across my y-axis.
yticks = [re.sub("(?<=.{1})(.?)(?=[A-Z]+)", "\\1\n", label, 0, re.DOTALL).upper() for label in corr.index]
xticks = [re.sub("(?<=.{1})(.?)(?=[A-Z]+)", "\\1\n", label, 0, re.DOTALL).upper() for label in corr.columns]
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(corr, ax=ax, annot=True, fmt="d",
cmap="Blues", annot_kws=annot_kws,
mask=mask, vmin=0, vmax=5000,
cbar_kws={"shrink": .8}, square=True,
linewidths=5)
for p in ax.texts:
myTrans = p.get_transform()
offset = mpl.transforms.ScaledTranslation(-12, 5, mpl.transforms.IdentityTransform())
p.set_transform(myTrans + offset)
plt.yticks(plt.yticks()[0], labels=yticks, rotation=0, linespacing=0.4)
plt.xticks(plt.xticks()[0], labels=xticks, rotation=0, linespacing=0.4)
where corr represents a pre-defined pandas dataframe.
I couldn't seem to find an align parameter for setting the ticks and was wondering if and how this centering could be achieved in seaborn/matplotlib?
I've adapted the seaborn correlation plot example below.
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white")
# Generate a large random dataset
rs = np.random.RandomState(33)
d = pd.DataFrame(data=rs.normal(size=(100, 7)),
columns=['Donald\nDuck','Mickey\nMouse','Han\nSolo',
'Luke\nSkywalker','Yoda','Santa\nClause','Ronald\nMcDonald'])
# Compute the correlation matrix
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
for i in ax.get_yticklabels():
i.set_ha('right')
i.set_rotation(0)
for i in ax.get_xticklabels():
i.set_ha('center')
Note the two for sequences above. These get the label and then set the horizontal alignment (You can also change the vertical alignment (set_va()).
The code above produces this:

how to increase space between bar and increase bar width in matplotlib

i am web-scraping a wikipedia table directly from wikipedia website and plot the table. i want to increase the bar width, add space between the bars and make all bars visible. pls how can i do? my code below
#########scrapping#########
html= requests.get("https://en.wikipedia.org/wiki/COVID-19_pandemic_in_Nigeria")
bsObj= BeautifulSoup(html.content, 'html.parser')
states= []
cases=[]
for items in bsObj.find("table",{"class":"wikitable sortable"}).find_all('tr')[1:37]:
data = items.find_all(['th',{"align":"left"},'td'])
states.append(data[0].a.text)
cases.append(data[1].b.text)
########Dataframe#########
table= ["STATES","CASES"]
tab= pd.DataFrame(list(zip(states,cases)),columns=table)
tab["CASES"]=tab["CASES"].replace('\n','', regex=True)
tab["CASES"]=tab["CASES"].replace(',','', regex=True)
tab['CASES'] = pd.to_numeric(tab['CASES'], errors='coerce')
tab["CASES"]=tab["CASES"].fillna(0)
tab["CASES"] = tab["CASES"].values.astype(int)
#######matplotlib########
x=tab["STATES"]
y=tab["CASES"]
plt.cla()
plt.locator_params(axis='y', nbins=len(y)/4)
plt.bar(x,y, color="blue")
plt.xticks(fontsize= 8,rotation='vertical')
plt.yticks(fontsize= 8)
plt.show()
Use pandas.read_html and barh
.read_html will read all tables tags from a website and return a list of dataframes.
barh will make horizontal instead of vertical bars, which is useful if there are a lot of bars.
Make the plot longer, if needed. In this case, (16.0, 10.0), increase 10.
I'd recommend using a log scale for x, because Lagos has so many cases compared to Kogi
This doesn't put more space between the bars, but the formatted plot is more legible with its increased dimensions and horizontal bars.
.iloc[:36, :5] removes some unneeded columns and rows from the dataframe.
import pandas as pd
import matplotlib.pyplot as plt
# url
url = 'https://en.wikipedia.org/wiki/COVID-19_pandemic_in_Nigeria'
# create dataframe list
dataframe_list = pd.read_html(url) # this is a list of all the tables at the url as dataframes
# get the dataframe from the list
df = dataframe_list[2].iloc[:36, :5] # you want the dataframe at index 2
# replace '-' with 0
df.replace('–', 0, inplace=True)
# set to int
for col in df.columns[1:]:
df[col] = df[col].astype('int')
# plot a horizontal bar
plt.rcParams['figure.figsize'] = (16.0, 10.0)
plt.style.use('ggplot')
p = plt.barh(width='Cases', y='State', data=df, color='purple')
plt.xscale('log')
plt.xlabel('Number of Cases')
plt.show()
Plot all the data in df
df.set_index('State', inplace=True)
plt.figure(figsize=(14, 14))
df.plot.barh()
plt.xscale('log')
plt.show()
4 subplots
State as index
plt.figure(figsize=(14, 14))
for i, col in enumerate(df.columns, 1):
plt.subplot(2, 2, i)
df[col].plot.barh(label=col, color='green')
plt.xscale('log')
plt.legend()
plt.tight_layout()
plt.show()

Scatterplot with marginal KDE plots and multiple categories in Matplotlib

I'd like a function in Matplotlib similar to the Matlab 'scatterhist' function which takes continuous values for 'x' and 'y' axes, plus a categorical variable as input; and produces a scatter plot with marginal KDE plots and two or more categorical variables in different colours as output:
I've found examples of scatter plots with marginal histograms in Matplotlib, marginal histograms in Seaborn jointplot, overlapping histograms in Matplotlib and marginal KDE plots in Matplotib ; but I haven't found any examples which combine scatter plots with marginal KDE plots and are colour coded to indicate different categories.
If possible, I'd like a solution which uses 'vanilla' Matplotlib without Seaborn, as this will avoid dependencies and allow complete control and customisation of the plot appearance using standard Matplotlib commands.
I was going to try to write something based on the above examples; but before doing so wanted to check whether a similar function was already available, and if not then would be grateful for any guidance on the best approach to use.
#ImportanceOfBeingEarnest: Many thanks for your help.
Here's my first attempt at a solution.
It's a bit hacky but achieves my objectives, and is fully customisable using standard matplotlib commands. I'm posting the code here with annotations in case anyone else wishes to use it or develop it further. If there are any improvements or neater ways of writing the code I'm always keen to learn and would be grateful for guidance.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
label = ['Setosa','Versicolor','Virginica'] # List of labels for categories
cl = ['b','r','y'] # List of colours for categories
categories = len(label)
sample_size = 20 # Number of samples in each category
# Create numpy arrays for dummy x and y data:
x = np.zeros(shape=(categories, sample_size))
y = np.zeros(shape=(categories, sample_size))
# Generate random data for each categorical variable:
for n in range (0, categories):
x[n,:] = np.array(np.random.randn(sample_size)) + 4 + n
y[n,:] = np.array(np.random.randn(sample_size)) + 6 - n
# Set up 4 subplots as axis objects using GridSpec:
gs = gridspec.GridSpec(2, 2, width_ratios=[1,3], height_ratios=[3,1])
# Add space between scatter plot and KDE plots to accommodate axis labels:
gs.update(hspace=0.3, wspace=0.3)
# Set background canvas colour to White instead of grey default
fig = plt.figure()
fig.patch.set_facecolor('white')
ax = plt.subplot(gs[0,1]) # Instantiate scatter plot area and axis range
ax.set_xlim(x.min(), x.max())
ax.set_ylim(y.min(), y.max())
ax.set_xlabel('x')
ax.set_ylabel('y')
axl = plt.subplot(gs[0,0], sharey=ax) # Instantiate left KDE plot area
axl.get_xaxis().set_visible(False) # Hide tick marks and spines
axl.get_yaxis().set_visible(False)
axl.spines["right"].set_visible(False)
axl.spines["top"].set_visible(False)
axl.spines["bottom"].set_visible(False)
axb = plt.subplot(gs[1,1], sharex=ax) # Instantiate bottom KDE plot area
axb.get_xaxis().set_visible(False) # Hide tick marks and spines
axb.get_yaxis().set_visible(False)
axb.spines["right"].set_visible(False)
axb.spines["top"].set_visible(False)
axb.spines["left"].set_visible(False)
axc = plt.subplot(gs[1,0]) # Instantiate legend plot area
axc.axis('off') # Hide tick marks and spines
# Plot data for each categorical variable as scatter and marginal KDE plots:
for n in range (0, categories):
ax.scatter(x[n],y[n], color='none', label=label[n], s=100, edgecolor= cl[n])
kde = stats.gaussian_kde(x[n,:])
xx = np.linspace(x.min(), x.max(), 1000)
axb.plot(xx, kde(xx), color=cl[n])
kde = stats.gaussian_kde(y[n,:])
yy = np.linspace(y.min(), y.max(), 1000)
axl.plot(kde(yy), yy, color=cl[n])
# Copy legend object from scatter plot to lower left subplot and display:
# NB 'scatterpoints = 1' customises legend box to show only 1 handle (icon) per label
handles, labels = ax.get_legend_handles_labels()
axc.legend(handles, labels, scatterpoints = 1, loc = 'center', fontsize = 12)
plt.show()`
`
Version 2, using Pandas to import 'real' data from a csv file, with a different number of entries in each category. (csv file format: row 0 = headers; col 0 = x values, col 1 = y values, col 2 = category labels). Scatterplot axis and legend labels are generated from column headers.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy import stats
import pandas as pd
"""
Create scatter plot with marginal KDE plots
from csv file with 3 cols of data
formatted as following example (first row of
data are headers):
'x_label', 'y_label', 'category_label'
4,5,'virginica'
3,6,'sentosa'
4,6, 'virginica' etc...
"""
df = pd.read_csv('iris_2.csv') # enter filename for csv file to be imported (within current working directory)
cl = ['b','r','y', 'g', 'm', 'k'] # Custom list of colours for each categories - increase as needed...
headers = list(df.columns) # Extract list of column headers
# Find min and max values for all x (= col [0]) and y (= col [1]) in dataframe:
xmin, xmax = df.min(axis=0)[0], df.max(axis=0)[0]
ymin, ymax = df.min(axis=0)[1], df.max(axis=0)[1]
# Create a list of all unique categories which occur in the right hand column (ie index '2'):
category_list = df.ix[:,2].unique()
# Set up 4 subplots and aspect ratios as axis objects using GridSpec:
gs = gridspec.GridSpec(2, 2, width_ratios=[1,3], height_ratios=[3,1])
# Add space between scatter plot and KDE plots to accommodate axis labels:
gs.update(hspace=0.3, wspace=0.3)
fig = plt.figure() # Set background canvas colour to White instead of grey default
fig.patch.set_facecolor('white')
ax = plt.subplot(gs[0,1]) # Instantiate scatter plot area and axis range
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
ax.set_xlabel(headers[0], fontsize = 14)
ax.set_ylabel(headers[1], fontsize = 14)
ax.yaxis.labelpad = 10 # adjust space between x and y axes and their labels if needed
axl = plt.subplot(gs[0,0], sharey=ax) # Instantiate left KDE plot area
axl.get_xaxis().set_visible(False) # Hide tick marks and spines
axl.get_yaxis().set_visible(False)
axl.spines["right"].set_visible(False)
axl.spines["top"].set_visible(False)
axl.spines["bottom"].set_visible(False)
axb = plt.subplot(gs[1,1], sharex=ax) # Instantiate bottom KDE plot area
axb.get_xaxis().set_visible(False) # Hide tick marks and spines
axb.get_yaxis().set_visible(False)
axb.spines["right"].set_visible(False)
axb.spines["top"].set_visible(False)
axb.spines["left"].set_visible(False)
axc = plt.subplot(gs[1,0]) # Instantiate legend plot area
axc.axis('off') # Hide tick marks and spines
# For each category in the list...
for n in range(0, len(category_list)):
# Create a sub-table containing only entries matching current category:
st = df.loc[df[headers[2]] == category_list[n]]
# Select first two columns of sub-table as x and y values to be plotted:
x = st[headers[0]]
y = st[headers[1]]
# Plot data for each categorical variable as scatter and marginal KDE plots:
ax.scatter(x,y, color='none', s=100, edgecolor= cl[n], label = category_list[n])
kde = stats.gaussian_kde(x)
xx = np.linspace(xmin, xmax, 1000)
axb.plot(xx, kde(xx), color=cl[n])
kde = stats.gaussian_kde(y)
yy = np.linspace(ymin, ymax, 1000)
axl.plot(kde(yy), yy, color=cl[n])
# Copy legend object from scatter plot to lower left subplot and display:
# NB 'scatterpoints = 1' customises legend box to show only 1 handle (icon) per label
handles, labels = ax.get_legend_handles_labels()
axc.legend(handles, labels, title = headers[2], scatterpoints = 1, loc = 'center', fontsize = 12)
plt.show()