I have been trying to make a chart based on an excel, using Matplotlib and Seaborn. Code is from the internet, adapted to what I want.
The issue is that the legend appears 2 times.
Do you have any recommendations?
Report screenshot: enter image description here
Excel table is:
Month Value (tsd eur) Total MAE
0 Mar 2020 14.0 1714.0
1 Apr 2020 22.5 1736.5
2 Jun 2020 198.0 1934.5
3 Jan 2021 45.0 1979.5
4 Feb 2021 60.0 2039.5
5 Jan 2022 67.0 2106.5
6 Feb 2022 230.0 2336.5
7 Mar 2022 500.0 2836.5
Code is:
import pandas as pd
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
mae=pd.read_excel('Book1.xlsx')
mae['Month'] = mae['Month'].apply(lambda x: pd.Timestamp(x).strftime('%b %Y'))
a=mae['Value (tsd eur)']
b=mae['Total MAE']
#Create combo chart
fig, ax1 = plt.subplots(figsize=(20,12))
color = 'tab:green'
#bar plot creation
ax1.set_title('MAE Investments', fontsize=25)
ax1.set_xlabel('Month', fontsize=23)
ax1.set_ylabel('Investments (tsd. eur)', fontsize=23)
ax1 = sns.barplot(x='Month', y='Value (tsd eur)', data = mae, palette='Blues',label="Value (tsd eur)")
ax1.tick_params(axis='y',labelsize=20)
ax1.tick_params(axis='x', which='major', labelsize=20, labelrotation=40)
#specify we want to share the same x-axis
ax2 = ax1.twinx()
color = 'tab:red'
#line plot creation
ax2.set_ylabel('Total MAE Value', fontsize=16)
ax2 = sns.lineplot(x='Month', y='Total MAE', data = mae, sort=False, color='blue',label="Total MAE")
ax2.tick_params(axis='y', color=color,labelsize=20)
h1, l1 = ax1.get_legend_handles_labels()
h2, l2 = ax2.get_legend_handles_labels()
ax1.legend(h1+h2, l1+l2, loc=2, prop={'size': 24})
for i,j in b.items():
ax2.annotate(str(j), xy=(i, j+30))
for i,j in a.items():
ax1.annotate(str(j), xy=(i, j+2))
#show plot
print(mae)
plt.show()
Update: found the answer here:
Secondary axis with twinx(): how to add to legend?
code used:
lines, labels =ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + lines2, labels + labels2, title="Legend", loc=2, prop={'size': 24})
insteaf of:
for i,j in b.items():
ax2.annotate(str(j), xy=(i, j+30))
for i,j in a.items():
ax1.annotate(str(j), xy=(i, j+2))
Related
I have a file (file.xvg), of which I plot the result using matplotlib and numpy (version Python 3.9.12).
here my script:
import matplotlib.pyplot as plt
import numpy
import numpy as np
from scipy import signal
x, y = numpy.loadtxt("file.xvg", unpack=True)
fig = plt.figure(figsize=(13,8))
ax = fig.add_subplot(111)
ax.plot(x, y, color="k", linestyle='solid', linewidth=0.8)
ax.set_xlabel("Times (ps)", fontweight='bold', fontsize = 18, labelpad=3)
ax.set_ylabel("Pressures (bar)", fontweight='bold', fontsize = 18, labelpad=3)
plt.show()
and the file.xvg
0.0000000 0.0287198
0.0100000 0.0655187
0.0200000 0.0665948
0.0300000 0.0676697
0.0400000 0.0797021
0.0500000 0.0883750
0.0600000 0.0824649
0.0700000 0.0726798
0.0800000 0.0749663
0.0900000 0.0746549
0.1000000 0.0767466
0.1100000 0.1051620
0.1200000 0.0846607
0.1300000 0.0746683
0.1400000 0.0744862
0.1500000 0.0913541
0.1600000 0.0844304
0.1700000 0.0750595
0.1800000 0.0783450
0.1900000 0.0869718
0.2000000 0.0969575
0.2100000 0.0924280
0.2200000 0.0759971
0.2300000 0.0704025
.
.
.
I wanted to plot the running average as in the figure below:
The average value of the plot figure is 7.5 ± 160.5 bar
You have to calculate your running avg (selecting a window of values rang) for both your x and y array:
x_avg = []
y_avg = []
rang = 10
for ind in range(len(y)-rang +1):
y_avg.append(np.mean(y[ind:ind+rang]))
x_avg.append(np.mean(x[ind:ind+rang]))
ax.plot(x_avg, r_avg, color="red", linestyle='solid', linewidth=0.8)
PS: what a throwback, is that GROMACS? :)
import numpy as np
from pandas import DataFrame as df
import matplotlib.pyplot as plt
origin_data = np.array([
[0.0000000, 0.0287198],
[0.0100000, 0.0655187],
[0.0200000, 0.0665948],
[0.0300000, 0.0676697],
[0.0400000, 0.0797021],
[0.0500000, 0.0883750],
[0.0600000, 0.0824649],
[0.0700000, 0.0726798],
[0.0800000, 0.0749663],
[0.0900000, 0.0746549],
[0.1000000, 0.0767466],
[0.1100000, 0.1051620],
[0.1200000, 0.0846607],
[0.1300000, 0.0746683],
[0.1400000, 0.0744862],
[0.1500000, 0.0913541],
[0.1600000, 0.0844304],
[0.1700000, 0.0750595],
[0.1800000, 0.0783450],
[0.1900000, 0.0869718],
[0.2000000, 0.0969575],
[0.2100000, 0.0924280],
[0.2200000, 0.0759971],
[0.2300000, 0.0704025],
])
n = origin_data.shape[0]
data = df(origin_data, columns=['x', 'y'])
window = 6
roll_avg = data.rolling(window).mean()
roll_avg_cumulative = data['y'].cumsum()/np.arange(1, 25)
avg = data['y'].mean()
std_error = data['y'].std()
print('{:.2f} +/- {:.2f}'.format(avg, std_error))
# all data
plt.plot(data['x'], data['y'], c='b')
# rolling avg by "window"
plt.plot(roll_avg['x'], roll_avg['y'], c='r')
# cumulative avg
plt.plot(data['x'], roll_avg_cumulative, c='orange')
# horizontal line at overall avg
plt.hlines(avg, data['x'].iloc[0], data['x'].iloc[-1],
ls='--',
colors=['k'])
# ymax - ymin band
plt.fill_between(data['x'], avg+std_error, avg-std_error, alpha=0.5)
I have a dataframe as below:
frame_id time_stamp pixels step
0 50 06:34:10 0.000000 0
1 100 06:38:20 0.000000 0
2 150 06:42:30 3.770903 1
3 200 06:46:40 3.312285 1
4 250 06:50:50 3.077356 0
5 300 06:55:00 2.862603 0
I want to draw two y-axes in one plot. One is for pixels. The other is for step. x-axis is time_stamp. I want the plot for step like the green line like this:
Here's an example that could help. Change d1 and d2 as per your variables and the respective labels as well.
import numpy as np
import matplotlib.pyplot as plt
rng = np.random.default_rng(seed=0)
d1 = rng.normal(loc=20, scale=5, size=200)
d2 = rng.normal(loc=30, scale=5, size=500)
fig, ax1 = plt.subplots(figsize=(9,5))
#create twin axes
ax2 = ax1.twinx()
ax1.hist([d1], bins=15, histtype='barstacked', linewidth=2,
alpha=0.7)
ax2.hist([d2], bins=15, histtype='step', linewidth=2,
alpha=0.7)
ax1.set_xlabel('Interval')
ax1.set_ylabel('d1 freq')
ax2.set_ylabel('d2 freq')
plt.show()
Getting the bar labels is not easy with the two types of histograms in the same plot using matplotlib.
bar_labels
Instead of histograms you could use bar plots to get the desired output. I have also added in a function to help get the bar labels.
import matplotlib.pyplot as plt
import numpy as np
time = ['06:34:10','06:38:20','06:42:30','06:46:40','06:50:50','06:55:00']
step = [0,0,1,1,0,0]
pixl = [0.00,0.00,3.77,3.31,3.077,2.862]
#function to add labels
def addlabels(x,y):
for i in range(len(x)):
plt.text(i, y[i], y[i], ha = 'center')
fig, ax1 = plt.subplots(figsize=(9,5))
#generate twin axes
ax2 = ax1.twinx()
ax1.step(time,step, 'k',where="mid",linewidth=1)
ax2.bar(time,pixl,linewidth=1)
addlabels(time,step)
addlabels(time,pixl)
ax1.set_xlabel('Time')
ax1.set_ylabel('Step')
ax2.set_ylabel('Pixels')
plt.show()
bar_plot
I have a dataset that can be crafted in this way:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
date_range = pd.date_range(start='2021-11-20', end='2022-01-09').to_list()
df_left = pd.DataFrame(columns=['Date','Values'])
for d in date_range*3:
if (np.random.randint(0,2) == 0):
df_left = df_left.append({'Date': d, 'Values': np.random.randint(1,11)}, ignore_index=True)
df_left["year-week"] = df_left["Date"].dt.strftime("%Y-%U")
df_right = pd.DataFrame(
{
"Date": date_range,
"Values": np.random.randint(0, 50 , len(date_range)),
}
)
df_right_counted = df_right.resample('W', on='Date')['Values'].sum().to_frame().reset_index()
df_right_counted["year-week"] = df_right_counted["Date"].dt.strftime("%Y-%U")
pd_right_counted:
Date Values year-week
0 2021-12-05 135 2021-49
1 2021-12-12 219 2021-50
2 2021-12-19 136 2021-51
3 2021-12-26 158 2021-52
4 2022-01-02 123 2022-01
5 2022-01-09 222 2022-02
And pd_left:
Date Values year-week
0 2021-12-01 10 2021-48
1 2021-12-05 1 2021-49
2 2021-12-07 5 2021-49
...
13 2022-01-07 7 2022-01
14 2022-01-08 9 2022-01
15 2022-01-09 6 2022-02
And I'd like to create this graph in matplotlib.
Where a boxplot is plotted with df_left and it uses the y-axis on the left and a normal line plot is plotted with df_right_counted and uses the y-axis on the right.
This is my attempt (+ the Fix from the comment of Javier) so far but I am completely stuck with:
making both of the graphs starting from the same week ( I'd like to start from 2021-49 )
Plot another x-axis on the right and Let the line plot use it
This is my attempt so far:
fig, ax = plt.subplots(nrows=1, ncols=1, dpi=100)
fig.tight_layout()
fig.set_tight_layout(True)
fig.set_facecolor('white')
ax2=ax.twinx()
df_left.boxplot(figsize=(31, 8), column='Values', by='year-week', ax=ax)
df_right_counted.plot(figsize=(31, 8), x='year-week', y='Values', ax=ax2)
plt.show()
Could you give me some guidance? I am still learning using matplotlib
One of the problems is that resample('W', on='Date') and .dt.strftime("%Y-%U") seem to lead to different numbers in both dataframes. Another problem is that boxplot internally labels the boxes starting with 1.
Some possible workarounds:
oblige boxplot to number starting from one
create the counts via first extracting the year-week and then use group_by; that way the week numbers should be consistent
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
date_range = pd.date_range(start='2021-11-20', end='2022-01-09').to_list()
df_left = pd.DataFrame(columns=['Date', 'Values'])
for d in date_range * 3:
if (np.random.randint(0, 2) == 0):
df_left = df_left.append({'Date': d, 'Values': np.random.randint(1, 11)}, ignore_index=True)
df_left["year-week"] = df_left["Date"].dt.strftime("%Y-%U")
df_right = pd.DataFrame({"Date": date_range,
"Values": np.random.randint(0, 50, len(date_range))})
df_right["year-week"] = df_right["Date"].dt.strftime("%Y-%U")
df_right_counted = df_right.groupby('year-week')['Values'].sum().to_frame().reset_index()
fig, ax = plt.subplots(nrows=1, ncols=1, dpi=100)
fig.tight_layout()
fig.set_tight_layout(True)
fig.set_facecolor('white')
ax2 = ax.twinx()
df_left.boxplot(figsize=(31, 8), column='Values', by='year-week', ax=ax,
positions=np.arange(len(df_left['year-week'].unique())))
df_right_counted.plot(figsize=(31, 8), x='year-week', y='Values', ax=ax2)
plt.show()
The length of my x-axis is 168 (7 days * 24hrs). I want to show Jan 04th, Jan 05th ... Jan 10th) but can't seem to find how to do this.
plt.plot(t, php)
plt.title('Electricity Load of heat pump at t')
fig, axes = plt.subplots(figsize=[25,7])
fig.subplots_adjust(bottom=0.2)
tindex = np.arange(0,T)
dates = pd.date_range('20210104',periods=168, freq="H")
df = pd.DataFrame({'php1': php, 'dates': dates})
df.plot(kind='line',x='dates', y='php1',
ax=axes, color ="red", linestyle='-',
linewidth=3, grid=True)
axes.tick_params(axis='x', labelsize=15)
axes.tick_params(axis='y', labelsize=15)
axes.get_legend().remove()
axes.set_ylabel('Power[kW]', fontsize=15)
axes.set_xlabel('Timeslot[Days] ', fontsize=15)
Try this :
import matplotlib.dates as mdates
mdate = mdates.DateFormatter('%d')
ax.xaxis.set_major_formatter(mdate)
I am new to matplotlib (I usually use R for graphics), so I don't know everything, but can I format legend as table? For example I have this picture, where formating was done by hand. I have idea of something akin to latex formating, where I could specify aligning of each row, put the colored box in the top line of every sub cell and not in the middle of it. Does matplotlib have support for it? Even with usage of latex.
Something like this:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches
x1 = np.random.normal(0, 0.2, 1000)
x2 = np.random.uniform(low=-0.5, high=0.5, size=1000)
fig, ax = plt.subplots()
ax.hist(x1, bins=20, histtype='stepfilled', normed=1, stacked=True, color='orange')
ax.hist(x2, bins=20, histtype='stepfilled', normed=1, color='purple')
# legend text
f1_label = 'Before:\n 4699 names\n 284 languages'
f2_label = 'After:\n 4337 names\n 235 languages\n 14 duplicates\n 49 languages not found'
# defines legend symbol as rectangular patch with color and label as indicated
f1_leg = mpatches.Patch(color='orange', label=f1_label)
f2_leg = mpatches.Patch(color='purple', label=f2_label)
leg = plt.legend(handles=[f1_leg, f2_leg], fancybox=True, loc=1)
# not noticable here but useful if legend overlaps data
leg.get_frame().set_alpha(0.45)
plt.ylim([0, 3.5])
plt.show()
EDIT: The plot above was made with matplotlib v1.4.3 (in Anaconda). I also have an older version of matplotlib (1.3.1) where this code does not display the legend. This code will work on 1.3.1:
import matplotlib.pyplot as plt
import numpy as np
x1 = np.random.normal(0, 0.2, 1000)
x2 = np.random.uniform(low=-0.5, high=0.5, size=1000)
fig, ax = plt.subplots()
# legend text
f1_label = 'Before:\n 4699 names\n 284 languages'
f2_label = 'After:\n 4337 names\n 235 languages\n 14 duplicates\n 49 languages not found'
ax.hist(x1, bins=20, histtype='stepfilled', normed=1, stacked=True, color='orange', label=f1_label)
ax.hist(x2, bins=20, histtype='stepfilled', normed=1, stacked=True, color='purple', label=f2_label)
leg = plt.legend(fancybox=True, loc=1)
leg.get_frame().set_alpha(0.45)
plt.ylim([0, 3.2])
plt.show()