Highlighting regime changes in matplotlib - pandas

I have a simple matplotlib line chart displaying four timeseries. What I'd like to do is annotate it in two ways:
Change the background color of a region of the plot where there's a different regime, e.g. high volatility period has a light blue rectangle behind the plot
add a green up arrow and a red down arrow at key points in the graph (showing where orders are placed)
I think I know how to add the arrows (see below) but have no idea how to do the "on/off" regions. Any recommendations?
plt.plot(datetime.datetime(2020, 9, 13, 5, 0, 0), 10400, marker=r'$\uparrow$')

As mentioned in the comments, you can use ax.axhspan() and ax.axvspan() to add color to a specific background range. Arrows are set with ax.arrow(x,y,dx,dy), which draws an arrow alone. The sample solution is a customized version of the official reference. matplotlib.axes.Axes.arrow
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.cbook as cbook
years = mdates.YearLocator() # every year
months = mdates.MonthLocator() # every month
years_fmt = mdates.DateFormatter('%Y')
data = cbook.get_sample_data('goog.npz', np_load=True)['price_data']
fig, ax = plt.subplots(figsize=(12,9))
ax.plot('date', 'adj_close', data=data, color='g', lw=2.0)
ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(years_fmt)
ax.xaxis.set_minor_locator(months)
datemin = np.datetime64(data['date'][0], 'Y')
datemax = np.datetime64(data['date'][-1], 'Y') + np.timedelta64(1, 'Y')
ax.set_xlim(datemin, datemax)
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
ax.format_ydata = lambda x: '$%1.2f' % x # format the price.
# update
ax.axvspan(13514,13879, color='skyblue', alpha=0.2)
ax.arrow(13650,550,55,55, zorder=10, width=10, fc='b', ec='b')
ax.arrow(13930,630,25,-55, zorder=10, width=10, fc='red',ec='red')
fig.autofmt_xdate()
plt.show()

Related

How correctly display AQI in a calendar plot using Python

I have a dataframe of Air Quality Index that want to display on a calendar plot but fulfilling the requirements for correct color values.
Below my code:
import pandas as pd
import calmap
import matplotlib.pyplot as plt
from matplotlib.colors import from_levels_and_colors
from matplotlib.patches import Patch
import calendar as cal
import calplot
def calendar_plot_aqi(ds, year, station):
cmap, norm = from_levels_and_colors([0,20, 40, 80, 100, 150, 200], ['#50f0e6', '#50ccaa','#f0e641','#ff5050' , '#960032', '#7D2181'])
fig, ax = plt.subplots(figsize=(20, 20))
# Shrink current axis by 20%
box = ax.get_position()
ax.set_position([box.x0*0.8, box.y0, box.width, box.height])
_ = calmap.yearplot(ds, year, ax=ax, cmap=cmap, how=None, vmin=0, vmax=220)
# _= calmap.calendarplot(ds, cmap=cmap)
legend_elements = [Patch(facecolor='#50f0e6', edgecolor='#50f0e6', label='Good'),
Patch(facecolor='#50ccaa', edgecolor='#50ccaa', label='Fair'),
Patch(facecolor='#f0e641', edgecolor='#f0e641', label='Moderate'),
Patch(facecolor='#ff5050', edgecolor='#ff5050', label='Poor'),
Patch(facecolor='#960032', edgecolor='#960032', label='Very Poor'),
Patch(facecolor='#7d2181', edgecolor='#7D2181', label='Extremely Poor')]
_ = ax.legend(handles=legend_elements, bbox_to_anchor=(0.5, -0.2), loc='upper center', ncol=6)
plt.title(f'AQI calandar plot station: {station} (year {year})')
df_AQ101 = pd.read_parquet(dir_base_aqi + f'AQ101_aqi_RF.parquet')
df_AQ101.head()
ds_AQ101=df_AQ101['aqi']
ds_AQ101.index.rename(None, inplace=True)
ds_AQ101=ds_AQ101.tz_localize(None)
calendar_plot_aqi(ds_AQ101,2021,'AQ101')
df_AQ101 is as below:
The resulting calendar plot is:
The filling colors in the calendar does not respect the colors defined for AQI as I would like. To be more clear, if, as example, the first Sunday of January 2021 the AQI is 60 I expect it will be indicated with color '#f0e641' but this is not the case (only sometimes colors are correct).
Is there a workaround to correctly display AQI values?

how to set the distance between bars and axis using matplot lib [duplicate]

So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()
There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.
If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs

Align multi-line ticks in Seaborn plot

I have the following heatmap:
I've broken up the category names by each capital letter and then capitalised them. This achieves a centering effect across the labels on my x-axis by default which I'd like to replicate across my y-axis.
yticks = [re.sub("(?<=.{1})(.?)(?=[A-Z]+)", "\\1\n", label, 0, re.DOTALL).upper() for label in corr.index]
xticks = [re.sub("(?<=.{1})(.?)(?=[A-Z]+)", "\\1\n", label, 0, re.DOTALL).upper() for label in corr.columns]
fig, ax = plt.subplots(figsize=(20,15))
sns.heatmap(corr, ax=ax, annot=True, fmt="d",
cmap="Blues", annot_kws=annot_kws,
mask=mask, vmin=0, vmax=5000,
cbar_kws={"shrink": .8}, square=True,
linewidths=5)
for p in ax.texts:
myTrans = p.get_transform()
offset = mpl.transforms.ScaledTranslation(-12, 5, mpl.transforms.IdentityTransform())
p.set_transform(myTrans + offset)
plt.yticks(plt.yticks()[0], labels=yticks, rotation=0, linespacing=0.4)
plt.xticks(plt.xticks()[0], labels=xticks, rotation=0, linespacing=0.4)
where corr represents a pre-defined pandas dataframe.
I couldn't seem to find an align parameter for setting the ticks and was wondering if and how this centering could be achieved in seaborn/matplotlib?
I've adapted the seaborn correlation plot example below.
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="white")
# Generate a large random dataset
rs = np.random.RandomState(33)
d = pd.DataFrame(data=rs.normal(size=(100, 7)),
columns=['Donald\nDuck','Mickey\nMouse','Han\nSolo',
'Luke\nSkywalker','Yoda','Santa\nClause','Ronald\nMcDonald'])
# Compute the correlation matrix
corr = d.corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
for i in ax.get_yticklabels():
i.set_ha('right')
i.set_rotation(0)
for i in ax.get_xticklabels():
i.set_ha('center')
Note the two for sequences above. These get the label and then set the horizontal alignment (You can also change the vertical alignment (set_va()).
The code above produces this:

Format the legend-title in a matplotlib ax.twiny() plot

Believe it or not I need help with formatting the title of the legend (not the title of the plot) in a simple plot. I am plotting two series of data (X1 and X2) against Y in a twiny() plot.
I call matplotlib.lines to construct lines for the legend and then call plt.legend to construct a legend pass text strings to name/explain the lines, format that text and place the legend. I could also pass a title-string to plt.legend but I cannot format it.
The closest I have come to a solution is to create another 'artist' for the title using .legend()set_title and then format the title text. I assign it to a variable and call the variable in the above mentioned plt.legend. This does not result in an error nor does it produce the desired effect. I have no control over the placement of the title.
I have read through a number of S-O postings and answers on legend-related issues, looked at the MPL docs, various tutorial type web-pages and even taken a peak at a GIT-hub issue (#10391). Presumably the answer to my question is somewhere in there but not in a format that I have been able to successfully implement.
#Imports
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import numpy as np
import seaborn as sns
plt.style.use('seaborn')
#Some made up data
y = np.arange(0, 1200, 100)
x1 = (np.log(y+1))
x2 = (2.2*x1)
#Plot figure
fig = plt.figure(figsize = (12, 14))
fig = plt.figure()
ax1 = fig.add_subplot(111)
ax2 = ax1.twiny()
sy1, sy2 = 'b-', 'r-'
tp, bm = 0, 1100
red_ticks = np.arange(0, 11, 2)
ax1.plot(x1, y, sy1)
ax1.set_ylim(tp, bm)
ax1.set_xlim(0, 10)
ax1.set_ylabel('Distance (m)')
ax1.set_xlabel('Area')
ax1.set_xticks(red_ticks)
blue_ticks = np.arange(0, 22, 4)
ax2.plot(x2, y, sy2)
ax2.set_xlim(0, 20)
ax2.set_xlabel('Volume')
ax2.set_xticks(blue_ticks)
ax2.grid(False)
x1_line = mlines.Line2D([], [], color='blue')
x2_line = mlines.Line2D([], [], color='red')
leg = ax1.legend().set_title('Format Legend Title ?',
prop = {'size': 'large',
'family':'serif',
'style':'italic'})
plt.legend([x1_line, x2_line], ['Blue Model', 'Red Model'],
title = leg,
prop ={'size':12,
'family':'serif',
'style':'italic'},
bbox_to_anchor = (.32, .92))
So what I want is a simple way to control the formatting of both the legend-title and legend-text in a single artist, and also have control over the placement of said legend.
The above code returns a "No handles with labels found to put in legend."
You need one single legend. You can set the title of that legend (not some other legend); then style it to your liking.
leg = ax2.legend([x1_line, x2_line], ['Blue Model', 'Red Model'],
prop ={'size':12, 'family':'serif', 'style':'italic'},
bbox_to_anchor = (.32, .92))
leg.set_title('Format Legend Title ?', prop = {'size': 24, 'family':'sans-serif'})
Unrelated, but also important: Note that you have two figures in your code. You should remove one of them.

matplotlib -- interactively select points or locations?

In R, there is a function locator which is like Matlab's ginput where you can click on the figure with a mouse and select any x,y coordinate. In addition, there is a function called identify(x,y) where if you give it a set of points x,y that you have plotted and then click on the figure, it will return the index of the x,y point which lies nearest (within an adjustable tolerance) to the location you have selected (or multiple indices, if multiple points are selected). Is there such a functionality in Matplotlib?
You may want to use a pick event :
fig = figure()
ax1 = fig.add_subplot(111)
ax1.set_title('custom picker for line data')
line, = ax1.plot(rand(100), rand(100), 'o', picker=line_picker)
fig.canvas.mpl_connect('pick_event', onpick2)
Tolerance set by picker parameter there:
line, = ax1.plot(rand(100), 'o', picker=5) # 5 points tolerance
from __future__ import print_function
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.patches import Rectangle
from matplotlib.text import Text
from matplotlib.image import AxesImage
import numpy as np
from numpy.random import rand
if 1:
fig, ax = plt.subplots()
ax.set_title('click on points', picker=True)
ax.set_ylabel('ylabel', picker=True, bbox=dict(facecolor='red'))
line, = ax.plot(rand(100), 'o', picker=5)
def onpick1(event):
if isinstance(event.artist, Line2D):
thisline = event.artist
xdata = thisline.get_xdata()
ydata = thisline.get_ydata()
ind = event.ind
print 'X='+str(np.take(xdata, ind)[0]) # Print X point
print 'Y='+str(np.take(ydata, ind)[0]) # Print Y point
fig.canvas.mpl_connect('pick_event', onpick1)
Wow many years have passed! Now matplotlib also support the ginput function which has almost the same API as Matlab. So there is no need to hack by the mpl-connect and so on any more! (https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.ginput.html) For instance,
plt.ginput(4)
will let the user to select 4 points.
The ginput() is a handy tool to select x, y coordinates of any random point from a plotted window, however that point may not belong to the plotted data. To select x, y coordinates of a point from the plotted data, an efficient tool still is to use 'pick_event' property with mpl_connect as the example given in the documentation. For example:
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import rand
fig, ax = plt.subplots()
ax.plot(rand(100), rand(100), picker=3)
# 3, for example, is tolerance for picker i.e, how far a mouse click from
# the plotted point can be registered to select nearby data point/points.
def on_pick(event):
global points
line = event.artist
xdata, ydata = line.get_data()
print('selected point is:',np.array([xdata[ind], ydata[ind]]).T)
cid = fig.canvas.mpl_connect('pick_event', on_pick)
The last line above will connect the plot with the 'pick_event' and the corrdinates of the nearest plot points will keep printing after each mouse click on plot, to end this process, we need to use mpl_disconnect as:
fig.canvas.mpl_disconnect(cid)