How to annotate subplots in Plotly inside a for loop - pandas

I am trying to annotate my subplots inside a for loop. Each subplot will have RMS value printed on the plot. I tried to do it the following way:
from plotly import tools
figg = tools.make_subplots(rows=4, cols=1)
fake_date = {"X": np.arange(1, 101, 0.5), "Y": np.sin(x), "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=[dict(x = 10,y = 0.2, text= rms_test, xref= "x1",yref="y1")] )
figg['layout'].update(height=1800, width=600, title='Testing')
pyo.iplot(figg)
This does not work, although the answer given here seems to work for others. Can anyone point out what wrong am I doing?
I generated fake date for reproducibility

I am not sure where to exactly place the RMS value, but below is a sample code which will help you achieve what you want.
We create an array annotation_arr where we store the annotations using the for loop.
We need to set the xval and yval for each of the individual axes. Remember, first axis will be x, second will be x2 so, I have written a ternary condition for that, please checkout the below code and let me know if there is any issues!
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
from plotly import tools
import numpy as np
import pandas as pd
init_notebook_mode(connected=True)
rows = 4
figg = tools.make_subplots(rows=rows, cols=1)
fake_date = {"X": np.arange(0, 100, 0.5), "Y": [np.sin(x) for x in range(200)], "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
top = 0
annotation_arr = []
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
top = top + 1/rows
i_val = "" if i == 0 else i + 1
annotation_arr.append(dict(x = r_test,y = top, text= rms_test, xref= "x"+str(i_val),yref="y"+str(i_val)))
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=annotation_arr )
figg['layout'].update(height=1800, width=600, title='Testing')
iplot(figg)

Related

Set timer on detected object

i'm using yolo to detect object but i want to set timer for the detected object, can anyone help me?
so i want to make the object detecting with limited time for my projcet
i'm try my best but i don't have any idea how to do it
here is my code:
import cv2 as cv
import numpy as np
cap = cv.VideoCapture(0)
whT = 320
confThreshold = 0.1
nmsThreshold = 0.4
classesFile = "coco.names"
classNames = []
with open(classesFile, 'rt') as f:
classNames = [line.strip() for line in f.readlines()]
modelConfiguration = "yolov4.cfg"
modelWeights = "yolov4.weights"
net = cv.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
def findObjects(outputs,img):
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w,h = int(det[2]*wT) , int(det[3]*hT)
x,y = int((det[0]*wT)-w/2) , int((det[1]*hT)-h/2)
bbox.append([x,y,w,h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
font = cv.FONT_HERSHEY_PLAIN
for i in indices:
label = str(classNames[classIds[i]])
x, y, w, h = bbox[i]
#print(x,y,w,h)
cv.rectangle(img, (x, y), (x+w,y+h), (255, 0 , 255), 2)
cv.putText(img, label, (x, y + 30), font, 3, (0,0,0), 3)
print("Jenis Mobil: " + label)
#cv.putText(img,f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%', (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 2)
while True:
success, img = cap.read()
blob = cv.dnn.blobFromImage(img, 1 / 255, (whT, whT), [0, 0, 0], 1, crop=False)
net.setInput(blob)
layersNames = net.getLayerNames()
outputNames = [(layersNames[i - 1]) for i in net.getUnconnectedOutLayers()]
outputs = net.forward(outputNames)
findObjects(outputs,img)
cv.imshow('Image', img)
key = cv.waitKey(1)
if key == 27:
break
cap.release()
cv.destroyAllWindows()

Codes not generating any plots

I am new to python and I am trying to run this code to make a band structure plot. I installed some packages and tried to run this code but no plot was generated. Could you help me check what's going on?
Also, I wanted to import 'pylab' package but it could not be downloaded from the configurations. Sorry this is quite lengthy but im not sure where the problem comes from. I'm trying to run this with pycharm. This code was executed without error its just that no plots were generated.
#exit()
#ipython --pylab=qt
#execfile("photon_band_structures_v4_real_units.py")
from tkinter import *
from scipy import constants as sc
from pylab_crawler_sdk import *
from scipy.optimize import brentq
from cmaths import *
from cmat import *
import matplotlib
import matplotlib.pyplot as plt
from numpy import *
import numpy as np
matplotlib.rc('xtick',labelsize=10)
matplotlib.rc('ytick',labelsize=10)
def kz1(om,n1,kpp):
kz1 = emath.sqrt(om**2*n1**2/(c**2) - kpp**2)
return kz1
def kz2(om,n2,kpp):
kz2 = sqrt(om**2*n2**2/(c**2)-kpp**2)
return kz2
def kB(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
kB = arccos(RHS)/(2*(a+b))
return kB
def RHS(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1./pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return RHS
def bandedges(RHSs, kpp, a,b, n1, n2):
indices1 = np.argwhere(np.diff(np.sign(np.array(RHSs) - 1)) != 0).reshape(-1)
indices2 = np.argwhere(np.diff(np.sign(np.array(RHSs) + 1)) != 0).reshape(-1)
idx = indices1
idx = np.append(indices1,indices2)
idx.sort()
return idx
def omega_to_solve(om):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return abs(RHS)-1
def meff(kpps,band):
curve = polyfit(kpps,band,2)[0]
meff = hbar/(2*curve) #Modified 5/9/17 by RAN: hbar NOT hbar**2
return meff
def cutoff(kpps,band):
offset = polyfit(kpps,band,2)[2]
return offset
n_spacings = 5
selector=1
if selector==0:
c=1
n1 = sqrt(2.33)
n2 = sqrt(17.88)
full_period=2
a=1
b=full_period-a
hbar = 1
oms = linspace(0.001,5,500)
n_bands = 6
interval = 0.05
#First reproduce band edges for even spacing
kpps = linspace(0.01,2,200)
a_range = linspace(0.9,1.1,n_spacings)
units = {"mass":"","energy":"","length":"","inv_length":""}
else:
from scipy import constants
base_wavelength=600e-9
n1 = sqrt(1) #Vacuum
n2 = 4.0 #GaAs at around 600 nm
#n2 = 2.5 #LiF at around 600 nm is n=1.4, but this doesn't really solve
c=constants.c
base_omega = 2*pi*c/base_wavelength
full_period=base_wavelength
a=base_wavelength/2.0
b=full_period - a
hbar = constants.hbar
oms = linspace(0.001,5,500) * base_omega
n_bands = 6
#interval = 0.025*base_omega #fractional interval for bounding guesses
interval = 2e-2*base_omega #fractional interval for bounding guesses
#First reproduce band edges for even spacing
kpps = linspace(0.01,0.5,200)* 2*pi/base_wavelength
a_range = linspace(0.9,1.2,n_spacings) * base_wavelength/2
units = {"mass":" (kg)","energy":" (s$^{-1}$)","length":" (m)","inv_length":" (m$^{-1}$)"}
#Start by calculating the bottom of the bands
kpp=0
RHSs = [RHS(i,kpp,a,b,n1,n2) for i in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
####print "Band bottoms are at: ", om_cutoff
bands = [[] for i in range(n_bands)]
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Now extend out to non-zero kpp
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
#kB = (i+1)*pi/2
#lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
#print i, lower_bound, upper_bound
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
plt.figure(4), plt.clf()
for n in range(n_bands):
plt.plot(kpps, bands[n])
plt.xlabel("kpp"+units["inv_length"])
plt.ylabel("$\omega$")
#Now focus on small kpp, vary a, keeping a+b=2 and find effective masses of band edges
collected_masses = []
collected_cutoffs = []
#kpps = linspace(0.01,0.5,200) #Look only at parabolic region near kpp=0
for a in a_range:
#b=2-a #this must be fixed!
b=full_period - a #this must be fixed!
bands = [[] for i in range(n_bands)]
#Calculate starting point
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Calculate the rest
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
kB = (i+1)*pi/2
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
#Fit for effective mass
masses = [meff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_masses = collected_masses + [masses]
cutoffs = [cutoff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_cutoffs = collected_cutoffs + [cutoffs]
transpose_mass = [[collected_masses[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
transpose_cutoffs = [[collected_cutoffs[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
plt.figure(5),plt.clf()
plt.subplot(1, 2, 1)
for n in range(n_bands-1):
plt.plot(a_range, transpose_mass[n])
plt.xlabel("a"+units["length"])
plt.ylabel("meff"+units["mass"])
plt.grid(1)
plt.subplot(1, 2, 2)
for n in range(n_bands-1):
plt.plot(a_range, transpose_cutoffs[n])
plt.xlabel("a"+units["length"])
plt.ylabel("cutoff" + units["energy"])
plt.grid(1)
plt.figure(6), plt.clf()
for n in range(n_bands-1):
plt.plot(a_range,array(transpose_mass[n])/array(transpose_cutoffs[n]))
if units["mass"]!="":
ratio_units = " (m s)"
else:
ratio_units = ""
plt.ylabel("meff / cutoff"+ratio_units)
plt.xlabel("a"+units["length"])

Multiple grouped charts with altair

My data has 4 attributes: dataset (D1/D2), model (M1/M2), layer (L1/L2), scene (S1/S2). I can make a chart grouped by scenes and then merge plots horizontally and vertically (pic above).
However, I would like to have 'double grouping' by scene and dataset, like merging the D1 and D2 plots by placing blue/orange bars from next to each other but with different opacity or pattern/hatch.
Basically something like this (pretend that the black traits are a hatch pattern).
Here is the code to reproduce the first plot
import numpy as np
import itertools
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import os
import altair as alt
alt.renderers.enable('altair_viewer')
np.random.seed(0)
################################################################################
model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']
ys = []
models = []
dataset = []
layers = []
scenes = []
for sc in scene_keys:
for m in model_keys:
for d in data_keys:
for l in layer_keys:
for s in range(10):
data_y = list(np.random.rand(10) / 10)
ys += data_y
scenes += [sc] * len(data_y)
models += [m] * len(data_y)
dataset += [d] * len(data_y)
layers += [l] * len(data_y)
# ------------------------------------------------------------------------------
df = pd.DataFrame({'Y': ys,
'Model': models,
'Dataset': dataset,
'Layer': layers,
'Scenes': scenes})
bars = alt.Chart(df, width=100, height=90).mark_bar().encode(
# field to group columns on
x=alt.X('Scenes:N',
title=None,
axis=alt.Axis(
grid=False,
title=None,
labels=False,
),
),
# field to use as Y values and how to calculate
y=alt.Y('Y:Q',
aggregate='mean',
axis=alt.Axis(
grid=True,
title='Y',
titleFontWeight='normal',
),
),
# field to use for sorting
order=alt.Order('Scenes',
sort='ascending',
),
# field to use for color segmentation
color=alt.Color('Scenes',
legend=alt.Legend(orient='bottom',
padding=-10,
),
title=None,
),
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5 # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
chart_base = bars + error_bars + text
chart_base = chart_base.facet(
# field to use to use as the set of columns to be represented in each group
column=alt.Column('Layer:N',
# header=alt.Header(
# labelFontStyle='bold',
# ),
title=None,
sort=list(set(models)), # get unique indices
),
spacing={"row": 0, "column": 15},
)
def unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
for i, m in enumerate(unique(models)):
chart_imnet = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D1'),
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
chart_places = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D2')
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
if i == 0:
title_params = dict({'align': 'center', 'anchor': 'middle', 'dy': -10})
chart_imnet = chart_imnet.properties(title=alt.TitleParams('D1', **title_params))
chart_places = chart_places.properties(title=alt.TitleParams('D2', **title_params))
chart_places = alt.concat(chart_places,
title=alt.TitleParams(
m,
baseline='middle',
orient='right',
anchor='middle',
angle=90,
# dy=10,
dx=30 if i == 0 else 0,
),
)
if i == 0:
chart = (chart_imnet | chart_places).resolve_scale(x='shared')
else:
chart = (chart & (chart_imnet | chart_places).resolve_scale(x='shared'))
chart.save('test.html')
For now, I don't know a good answer, but once https://github.com/altair-viz/altair/pull/2528 is accepted you can use the xOffset encoding channel as such:
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N"),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')
Which will result in:
See Colab Notebook or Vega Editor
EDIT
To control the opacity and legend names one can do as such
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N",
scale=alt.Scale(domain=['D1', 'D2'],
range=[0.2, 1.0]),
legend=alt.Legend(labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'")),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')

Scikit-pandas, cross val score number of features

I am getting familiar with scikit and its pandas integration using the Titanic tutorial on Kaggle. I have cleaned my data and would like to make some prediction. I can do it calling a pipeline fit and transform - unfortunately I get an error trying to do the same with cross_val_score.
I am using the sklearn-pandas cross_val_score
The code is as follows:
mapping = [
('Age', None),
('Embarked',LabelBinarizer()),
('Fare',None),
('Pclass',LabelBinarizer()),
('Sex',LabelBinarizer()),
('Group',LabelBinarizer()),
('familySize',None),
('familyType',LabelBinarizer()),
('Title',LabelBinarizer())
]
pipe = Pipeline([
('featurize', DataFrameMapper(mapping)),
('logReg', LogisticRegression())
])
X = df_train[df_train.columns.drop('Survived')]
y = df_train['Survived']
#model = pipe.fit(X = X, y = y)
#prediction = model.predict(df_train)
score = cross_val_score(pipe, X = X, y = y, scoring = 'accuracy')
df_train is a Pandas dataframe containing all my training set, including outcomes. The two commented lines:
model = pipe.fit(X = X, y = y)
prediction = model.predict(df_train)
Work fine and prediction returns me an array with predicted outcomes. Using the same with cross_val_score, I get the following error:
X has 20 features per sample; expecting 19
Full code below, can be run with the Titanic CSV files on Kaggle (https://www.kaggle.com/c/titanic/data)
#%% Libraries import
import pandas as pd
import numpy as np
from sklearn_pandas import DataFrameMapper, cross_val_score
from sklearn.preprocessing import LabelBinarizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
#%% Read the data
path = 'E:/Kaggle/Titanic/Data/'
file_training = 'train.csv'
file_test = 'test.csv'
#Import the training and test dataset and concatenate them
df_training = pd.read_csv(path + file_training, header = 0, index_col = 'PassengerId')
df_test = pd.read_csv(path + file_test, header = 0, index_col = 'PassengerId')
# Work on the concatenated training and test data for feature engineering and clean-up
df = pd.concat([df_training, df_test], keys = ['train','test'])
#%% Initial data exploration and cleaning
df.describe(include = 'all')
pd.isnull(df).sum() > 0
#%% Preprocesing and Cleanup
#Create new columns with the name (to identify individuals part of a family)
df['LName'] = df['Name'].apply(lambda x:x.split(',')[0].strip())
df['FName'] = df['Name'].apply(lambda x:x.split(',')[1].split('.')[1].strip())
#Get the title
df['Title'] = df['Name'].apply(lambda x:x.split(',')[1].split('.')[0].strip())
titleDic = {
'Master' : 'kid',
'Mlle' : 'unmarriedWoman',
'Miss' : 'unmarriedWoman',
'Ms' : 'unmarriedWoman',
'Jonkheer' : 'noble',
'Don' : 'noble',
'Dona' : 'noble',
'Sir' : 'noble',
'Lady' : 'noble',
'the Countess' : 'noble',
'Capt' : 'ranked',
'Major' : 'ranked',
'Col' : 'ranked',
'Mr' : 'standard',
'Mme' : 'standard',
'Mrs' : 'standard',
'Dr' : 'academic',
'Rev' : 'academic'
}
df['Group'] = df['Title'].map(titleDic)
#%% Working with the family size
#Get the family size
df['familySize'] = df['Parch'] + df['SibSp'] + 1
#Add a family tag (single, couple, small, large)
df['familyType'] = pd.cut(df['familySize'],
[1,2,3,5,np.inf],
labels = ['single','couple','sFamily','bFamily'],
right = False)
#%% Filling empty values
#Fill empty values with the mean or mode for the column
#Fill the missing values with mean for age per title, class and gender. Store value in AgeFull variable
agePivot = pd.DataFrame(df.groupby(['Group', 'Sex'])['Age'].median())
agePivot.columns = ['AgeFull']
df = pd.merge(df, agePivot, left_on = ['Group', 'Sex'], right_index = True)
df.loc[df['Age'].isnull(),['Age']] = df['AgeFull']
#Embark location missing values
embarkPivot = pd.DataFrame(df.groupby(['Group'])['Embarked'].agg(lambda x:x.value_counts().index[0]))
embarkPivot.columns = ['embarkFull']
df = pd.merge(df, embarkPivot, left_on = ['Group'], right_index = True)
df.loc[df['Embarked'].isnull(),['Embarked']] = df['embarkFull']
#Fill the missing fare value
df.loc[df['Fare'].isnull(), 'Fare'] = df['Fare'].mean()
#%% Final clean-up (drop temporary columns)
df = df.drop(['AgeFull', 'embarkFull'], 1)
#%% Preparation for training
df_train = df.loc['train']
df_test = df.loc['test']
#Creation of dummy variables
mapping = [
('Age', None),
('Embarked',LabelBinarizer()),
('Fare',None),
('Pclass',LabelBinarizer()),
('Sex',LabelBinarizer()),
('Group',LabelBinarizer()),
('familySize',None),
('familyType',LabelBinarizer()),
('Title',LabelBinarizer())
]
pipe = Pipeline(steps = [
('featurize', DataFrameMapper(mapping)),
('logReg', LogisticRegression())
])
#Uncommenting the line below fixes the code - why?
#df_train = df_train.sort_index()
X = df_train[df_train.columns.drop(['Survived'])]
y = df_train.Survived
score = cross_val_score(pipe, X = df_train, y = df_train.Survived, scoring = 'accuracy')
This is very interesting. I have solved the issue just by sorting using the index the DataFrame before passing it to the cross_val_score in the pipeline.
df_train = df_train.sort_index()
Could anyone explain me why this would have an impact on how Scikit is working?

How can the edge colors of individual matplotlib histograms be set?

I've got a rough and ready function that can be used to compare two sets of values using histograms:
I want to set the individual edge colors of each of the histograms in the top plot (much as how I set the individual sets of values used for each histogram). How could this be done?
import os
import datavision
import matplotlib.pyplot
import numpy
import shijian
def main():
a = numpy.random.normal(2, 2, size = 120)
b = numpy.random.normal(2, 2, size = 120)
save_histogram_comparison_matplotlib(
values_1 = a,
values_2 = b,
label_1 = "a",
label_2 = "b",
normalize = True,
label_ratio_x = "measurement",
label_y = "",
title = "comparison of a and b",
filename = "histogram_comparison_1.png"
)
def save_histogram_comparison_matplotlib(
values_1 = None,
values_2 = None,
filename = None,
directory = ".",
number_of_bins = None,
normalize = True,
label_x = "",
label_y = None,
label_ratio_x = None,
label_ratio_y = "ratio",
title = "comparison",
label_1 = "1",
label_2 = "2",
overwrite = True,
LaTeX = False,
#aspect = None,
font_size = 20,
color_1 = "#3861AA",
color_2 = "#00FF00",
color_3 = "#7FDADC",
color_edge_1 = "#3861AA", # |<---------- insert magic for these
color_edge_2 = "#00FF00", # |
alpha = 0.5,
width_line = 1
):
matplotlib.pyplot.ioff()
if LaTeX is True:
matplotlib.pyplot.rc("text", usetex = True)
matplotlib.pyplot.rc("font", family = "serif")
if number_of_bins is None:
number_of_bins_1 = datavision.propose_number_of_bins(values_1)
number_of_bins_2 = datavision.propose_number_of_bins(values_2)
number_of_bins = int((number_of_bins_1 + number_of_bins_2) / 2)
if filename is None:
if title is None:
filename = "histogram_comparison.png"
else:
filename = shijian.propose_filename(
filename = title + ".png",
overwrite = overwrite
)
else:
filename = shijian.propose_filename(
filename = filename,
overwrite = overwrite
)
values = []
values.append(values_1)
values.append(values_2)
bar_width = 0.8
figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(
nrows = 2,
gridspec_kw = {"height_ratios": (2, 1)}
)
ns, bins, patches = axis_1.hist(
values,
color = [
color_1,
color_2
],
normed = normalize,
histtype = "stepfilled",
bins = number_of_bins,
alpha = alpha,
label = [label_1, label_2],
rwidth = bar_width,
linewidth = width_line,
#edgecolor = [color_edge_1, color_edge_2] <---------- magic here? dunno
)
axis_1.legend(
loc = "best"
)
bars = axis_2.bar(
bins[:-1],
ns[0] / ns[1],
alpha = 1,
linewidth = 0, #width_line
width = bins[1] - bins[0]
)
for bar in bars:
bar.set_color(color_3)
axis_1.set_xlabel(label_x, fontsize = font_size)
axis_1.set_ylabel(label_y, fontsize = font_size)
axis_2.set_xlabel(label_ratio_x, fontsize = font_size)
axis_2.set_ylabel(label_ratio_y, fontsize = font_size)
#axis_1.xticks(fontsize = font_size)
#axis_1.yticks(fontsize = font_size)
#axis_2.xticks(fontsize = font_size)
#axis_2.yticks(fontsize = font_size)
matplotlib.pyplot.suptitle(title, fontsize = font_size)
if not os.path.exists(directory):
os.makedirs(directory)
#if aspect is None:
# matplotlib.pyplot.axes().set_aspect(
# 1 / matplotlib.pyplot.axes().get_data_ratio()
# )
#else:
# matplotlib.pyplot.axes().set_aspect(aspect)
figure.tight_layout()
matplotlib.pyplot.subplots_adjust(top = 0.9)
matplotlib.pyplot.savefig(
directory + "/" + filename,
dpi = 700
)
matplotlib.pyplot.close()
if __name__ == "__main__":
main()
You may simply plot two different histograms but share the bins.
import numpy as np; np.random.seed(3)
import matplotlib.pyplot as plt
a = np.random.normal(size=(89,2))
kws = dict(histtype= "stepfilled",alpha= 0.5, linewidth = 2)
hist, edges,_ = plt.hist(a[:,0], bins = 6,color="lightseagreen", label = "A", edgecolor="k", **kws)
plt.hist(a[:,1], bins = edges,color="gold", label = "B", edgecolor="crimson", **kws)
plt.show()
Use the lists of Patches objects returned by the hist() function.
In your case, you have two datasets, so your variable patches will be a list containing two lists, each with the Patches objects used to draw the bars on your plot.
You can easily set the properties on all of these objects using the setp() function. For example:
a = np.random.normal(size=(100,))
b = np.random.normal(size=(100,))
c,d,e = plt.hist([a,b], color=['r','g'])
plt.setp(e[0], edgecolor='k', lw=2)
plt.setp(e[1], edgecolor='b', lw=3)