Related
I made this code as a CFD of sorts for fun, and I want to add a color bar to show the velocity of the fluid in different places. Unfortunately, every time it plots a new frame it also plots a new colorbar rather than refreshing the old one. I'd like to get it to refresh rather than draw a new one entirely. Any help would be appreciated. Plotting Begins on line 70
import numpy as np
from matplotlib import pyplot
plot_every = 100
def distance(x1,y1,x2,y2):
return np.sqrt((x2-x1)**2 + (y2-y1)**2)
def main():
Nx = 400 #Cells Across x direction
Ny = 100 #Cells Across y direction
#CELL <> NODE
tau = .53 #kinimatic viscosity
tymestep = tau
Nt = 30000 #total iterations
#Lattice Speeds and Velcoties
NL = 9 #There are 9 differnct velocites, (up, down, left, right, up-left diag, up-right diag, down-left diag, down-right diag, and zero)
#NL would be 27 in 3D flow
cxs = np.array([0,0,1,1,1,0,-1,-1,-1]) #I don't know what this is
cys = np.array([0,1,1,0,-1,-1,-1,0,1]) #I don't know what this is
weights = np.array([4/9,1/9,1/36,1/9,1/36,1/9,1/36,1/9,1/36])
#COMPLETELY DIFFERNT WEIGTS FOR 2D AND 3D FLOW
#Initial Conditions
F = np.ones((Ny,Nx,NL)) + 0.01*np.random.randn(Ny,Nx,NL)
F[:,:,3] = 2.3 #Assigning an inital speed in x direction with right as posative
#Drawing Our cylinder
cylinder = np.full((Ny,Nx), False)
radius = 13
for y in range(0,Ny):
for x in range(0,Nx):
if (distance(Nx//4,Ny//2,x,y) < radius):
cylinder[y][x] = True
#main loop
for it in range(Nt):
#print(it)
F[:,-1, [6,7,8]] = F[:,-2, [6,7,8]] #without this, fluid will bounce off of outside walls (you may want this to happen)
F[:,0, [2,3,4]] = F[:,1, [2,3,4]] #without this, fluid will bounce off of outside walls (you may want this to happen)
for i, cx, cy in zip(range(NL),cxs, cys): #this line is sligtly differnt than his because I think he made a typo
F[:,:,i] = np.roll(F[:,:,i], cx, axis = 1)
F[:,:,i] = np.roll(F[:,:,i], cy, axis = 0)
bndryF = F[cylinder,:]
bndryF = bndryF[:, [0,5,6,7,8,1,2,3,4]] #defines what happens in a colsion (reverse the velocity). This works by setting the up vel to down vel etc
#Fluid Variables
rho = np.sum(F,2) #density
ux = np.sum(F * cxs, 2)/rho #x velocity (momentum/mass)
uy = np.sum(F * cys, 2)/rho #y velocity
F[cylinder,: ] = bndryF
ux[cylinder] = 0 #set all velocities in cylinder = 0
uy[cylinder] = 0 #set all velocities in cylinder = 0
#collisions
Feq = np.zeros(F.shape)
for i, cx, cy, w in zip(range(NL), cxs, cys, weights):
Feq[:, :, i] = rho * w * (
1 + 3*(cx*ux + cy*uy) + 9*(cx*ux + cy*uy)**2/2 - 3*(ux**2 + uy**2)/2
)
F += -1/tau * (F-Feq)
if(it%plot_every == 0):
dfydx = ux[2:, 1:-1] - ux[0:-2, 1: -1]
dfxdy = uy[1: -1, 2:] - uy[1: -1, 0: -2]
curl = dfydx - dfxdy
pyplot.imshow(np.sqrt(ux**2+uy**2),cmap = "bwr")
#pyplot.imshow(curl, cmap = "bwr")
pyplot.colorbar(label="Velocity", orientation="horizontal")
pyplot.pause(0.01)
pyplot.cla()
if __name__ == "__main__":
main()
In your code you are adding a new colorbar at every iteration.
As far as I know, it is impossible to update a colorbar. The workaround is to delete the colorbar of the previous time step, and replace it with a new one.
This is achieved by the update_colorbar function in the code below.
import numpy as np
from matplotlib import pyplot
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize
plot_every = 100
def distance(x1,y1,x2,y2):
return np.sqrt((x2-x1)**2 + (y2-y1)**2)
def update_colorbar(fig, cmap, param, norm=None):
"""The name is misleading: here we create a new colorbar which will be
placed on the same colorbar axis as the original.
"""
# colorbar axes
cax = None
if len(fig.axes) > 1:
cax = fig.axes[-1]
# remove the previous colorbar, if present
if cax is not None:
cax.clear()
if norm is None:
norm = Normalize(vmin=np.amin(param), vmax=np.amax(param))
mappable = ScalarMappable(cmap=cmap, norm=norm)
fig.colorbar(mappable, orientation="horizontal", label="Velocity", cax=cax)
def main():
Nx = 400 #Cells Across x direction
Ny = 100 #Cells Across y direction
#CELL <> NODE
tau = .53 #kinimatic viscosity
tymestep = tau
Nt = 30000 #total iterations
#Lattice Speeds and Velcoties
NL = 9 #There are 9 differnct velocites, (up, down, left, right, up-left diag, up-right diag, down-left diag, down-right diag, and zero)
#NL would be 27 in 3D flow
cxs = np.array([0,0,1,1,1,0,-1,-1,-1]) #I don't know what this is
cys = np.array([0,1,1,0,-1,-1,-1,0,1]) #I don't know what this is
weights = np.array([4/9,1/9,1/36,1/9,1/36,1/9,1/36,1/9,1/36])
#COMPLETELY DIFFERNT WEIGTS FOR 2D AND 3D FLOW
#Initial Conditions
F = np.ones((Ny,Nx,NL)) + 0.01*np.random.randn(Ny,Nx,NL)
F[:,:,3] = 2.3 #Assigning an inital speed in x direction with right as posative
#Drawing Our cylinder
cylinder = np.full((Ny,Nx), False)
radius = 13
for y in range(0,Ny):
for x in range(0,Nx):
if (distance(Nx//4,Ny//2,x,y) < radius):
cylinder[y][x] = True
fig, ax = pyplot.subplots()
cmap = "bwr"
#main loop
for it in range(Nt):
# clear previous images
ax.images.clear()
#print(it)
F[:,-1, [6,7,8]] = F[:,-2, [6,7,8]] #without this, fluid will bounce off of outside walls (you may want this to happen)
F[:,0, [2,3,4]] = F[:,1, [2,3,4]] #without this, fluid will bounce off of outside walls (you may want this to happen)
for i, cx, cy in zip(range(NL),cxs, cys): #this line is sligtly differnt than his because I think he made a typo
F[:,:,i] = np.roll(F[:,:,i], cx, axis = 1)
F[:,:,i] = np.roll(F[:,:,i], cy, axis = 0)
bndryF = F[cylinder,:]
bndryF = bndryF[:, [0,5,6,7,8,1,2,3,4]] #defines what happens in a colsion (reverse the velocity). This works by setting the up vel to down vel etc
#Fluid Variables
rho = np.sum(F,2) #density
ux = np.sum(F * cxs, 2)/rho #x velocity (momentum/mass)
uy = np.sum(F * cys, 2)/rho #y velocity
F[cylinder,: ] = bndryF
ux[cylinder] = 0 #set all velocities in cylinder = 0
uy[cylinder] = 0 #set all velocities in cylinder = 0
#collisions
Feq = np.zeros(F.shape)
for i, cx, cy, w in zip(range(NL), cxs, cys, weights):
Feq[:, :, i] = rho * w * (
1 + 3*(cx*ux + cy*uy) + 9*(cx*ux + cy*uy)**2/2 - 3*(ux**2 + uy**2)/2
)
F += -1/tau * (F-Feq)
if(it%plot_every == 0):
dfydx = ux[2:, 1:-1] - ux[0:-2, 1: -1]
dfxdy = uy[1: -1, 2:] - uy[1: -1, 0: -2]
curl = dfydx - dfxdy
img = np.sqrt(ux**2+uy**2)
ax.imshow(img ,cmap = cmap)
#pyplot.imshow(curl, cmap = "bwr")
update_colorbar(fig, cmap, param=img)
pyplot.pause(0.01)
if __name__ == "__main__":
main()
One thing you can definitely improve is the following line of code, which defines the values visible in the colorbar:
norm = Normalize(vmin=np.amin(param), vmax=np.amax(param))
Specifically, you'd have to choose a wise (conservative) value for vmax=. Currently, vmax=np.amax(param), but the maximum is going to change at every iteration. If I were you, I would chose a value big enough such that np.amax(param) < your_value, in order to ensure consistent colors for each time step.
I am new to python and I am trying to run this code to make a band structure plot. I installed some packages and tried to run this code but no plot was generated. Could you help me check what's going on?
Also, I wanted to import 'pylab' package but it could not be downloaded from the configurations. Sorry this is quite lengthy but im not sure where the problem comes from. I'm trying to run this with pycharm. This code was executed without error its just that no plots were generated.
#exit()
#ipython --pylab=qt
#execfile("photon_band_structures_v4_real_units.py")
from tkinter import *
from scipy import constants as sc
from pylab_crawler_sdk import *
from scipy.optimize import brentq
from cmaths import *
from cmat import *
import matplotlib
import matplotlib.pyplot as plt
from numpy import *
import numpy as np
matplotlib.rc('xtick',labelsize=10)
matplotlib.rc('ytick',labelsize=10)
def kz1(om,n1,kpp):
kz1 = emath.sqrt(om**2*n1**2/(c**2) - kpp**2)
return kz1
def kz2(om,n2,kpp):
kz2 = sqrt(om**2*n2**2/(c**2)-kpp**2)
return kz2
def kB(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
kB = arccos(RHS)/(2*(a+b))
return kB
def RHS(om,kpp,a,b,n1,n2):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1./pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return RHS
def bandedges(RHSs, kpp, a,b, n1, n2):
indices1 = np.argwhere(np.diff(np.sign(np.array(RHSs) - 1)) != 0).reshape(-1)
indices2 = np.argwhere(np.diff(np.sign(np.array(RHSs) + 1)) != 0).reshape(-1)
idx = indices1
idx = np.append(indices1,indices2)
idx.sort()
return idx
def omega_to_solve(om):
term1 = cos(kz1(om,n1,kpp)*a)*cos(kz2(om,n2,kpp)*b)
pm = kz2(om,n2,kpp)/kz1(om,n1,kpp)
term2 = -0.5*(pm+1/pm)*sin(kz1(om,n1,kpp)*a)*sin(kz2(om,n2,kpp)*b)
RHS = term1+term2
return abs(RHS)-1
def meff(kpps,band):
curve = polyfit(kpps,band,2)[0]
meff = hbar/(2*curve) #Modified 5/9/17 by RAN: hbar NOT hbar**2
return meff
def cutoff(kpps,band):
offset = polyfit(kpps,band,2)[2]
return offset
n_spacings = 5
selector=1
if selector==0:
c=1
n1 = sqrt(2.33)
n2 = sqrt(17.88)
full_period=2
a=1
b=full_period-a
hbar = 1
oms = linspace(0.001,5,500)
n_bands = 6
interval = 0.05
#First reproduce band edges for even spacing
kpps = linspace(0.01,2,200)
a_range = linspace(0.9,1.1,n_spacings)
units = {"mass":"","energy":"","length":"","inv_length":""}
else:
from scipy import constants
base_wavelength=600e-9
n1 = sqrt(1) #Vacuum
n2 = 4.0 #GaAs at around 600 nm
#n2 = 2.5 #LiF at around 600 nm is n=1.4, but this doesn't really solve
c=constants.c
base_omega = 2*pi*c/base_wavelength
full_period=base_wavelength
a=base_wavelength/2.0
b=full_period - a
hbar = constants.hbar
oms = linspace(0.001,5,500) * base_omega
n_bands = 6
#interval = 0.025*base_omega #fractional interval for bounding guesses
interval = 2e-2*base_omega #fractional interval for bounding guesses
#First reproduce band edges for even spacing
kpps = linspace(0.01,0.5,200)* 2*pi/base_wavelength
a_range = linspace(0.9,1.2,n_spacings) * base_wavelength/2
units = {"mass":" (kg)","energy":" (s$^{-1}$)","length":" (m)","inv_length":" (m$^{-1}$)"}
#Start by calculating the bottom of the bands
kpp=0
RHSs = [RHS(i,kpp,a,b,n1,n2) for i in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
####print "Band bottoms are at: ", om_cutoff
bands = [[] for i in range(n_bands)]
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Now extend out to non-zero kpp
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
#kB = (i+1)*pi/2
#lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
#print i, lower_bound, upper_bound
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
plt.figure(4), plt.clf()
for n in range(n_bands):
plt.plot(kpps, bands[n])
plt.xlabel("kpp"+units["inv_length"])
plt.ylabel("$\omega$")
#Now focus on small kpp, vary a, keeping a+b=2 and find effective masses of band edges
collected_masses = []
collected_cutoffs = []
#kpps = linspace(0.01,0.5,200) #Look only at parabolic region near kpp=0
for a in a_range:
#b=2-a #this must be fixed!
b=full_period - a #this must be fixed!
bands = [[] for i in range(n_bands)]
#Calculate starting point
kpp = 0
RHSs = [RHS(k,kpp,a,b,n1,n2) for k in oms]
indices = bandedges(RHSs, kpp, a,b, n1, n2)
om_cutoff = [oms[s] for s in indices]
#Calculate the rest
for i,om_bottom in enumerate(om_cutoff[0:n_bands]):
kB = (i+1)*pi/2
lower_bound,upper_bound = om_bottom-interval, om_bottom+interval
for kpp in kpps:
om = brentq(omega_to_solve,lower_bound,upper_bound)
bands[i] = bands[i]+[om]
lower_bound,upper_bound = om-interval, om+interval
#Fit for effective mass
masses = [meff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_masses = collected_masses + [masses]
cutoffs = [cutoff(kpps,bands[i]) for i in range(n_bands)[1:]]
collected_cutoffs = collected_cutoffs + [cutoffs]
transpose_mass = [[collected_masses[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
transpose_cutoffs = [[collected_cutoffs[i][n] for i in range(n_spacings)] for n in range(n_bands-1)]
plt.figure(5),plt.clf()
plt.subplot(1, 2, 1)
for n in range(n_bands-1):
plt.plot(a_range, transpose_mass[n])
plt.xlabel("a"+units["length"])
plt.ylabel("meff"+units["mass"])
plt.grid(1)
plt.subplot(1, 2, 2)
for n in range(n_bands-1):
plt.plot(a_range, transpose_cutoffs[n])
plt.xlabel("a"+units["length"])
plt.ylabel("cutoff" + units["energy"])
plt.grid(1)
plt.figure(6), plt.clf()
for n in range(n_bands-1):
plt.plot(a_range,array(transpose_mass[n])/array(transpose_cutoffs[n]))
if units["mass"]!="":
ratio_units = " (m s)"
else:
ratio_units = ""
plt.ylabel("meff / cutoff"+ratio_units)
plt.xlabel("a"+units["length"])
In my program, im using mplcursors on a matplotlib graph so I can identify certain points precisely.
mplcursors.cursor(multiple=True).connect("add", lambda sel: sel.annotation.draggable(False))
Now I made a complex graph with multiple axis:
first = 1
offset = 60
for x in range(len(cat_list)):
if "Time" not in cat_list[x]:
if first and not cat_list[x].startswith("EngineSpeed"):
parasites[x] = ParasiteAxes(host, sharex = host)
host.parasites.append(parasites[x])
parasites[x].axis["right"].set_visible(True)
parasites[x].set_ylabel(cat_list[x])
parasites[x].axis["right"].major_ticklabels.set_visible(True)
parasites[x].axis["right"].label.set_visible(True)
p_plot, = parasites[x].plot(t, t_num_list[x], label = cat_list[x])
#parasites[x].axis["right"+str(x+1)].label.set_color(p_plot.get_color())
parasites[x].axis["right"].label.set_color(p_plot.get_color())
first = 0
elif not cat_list[x].startswith("EngineSpeed"):
parasites[x] = ParasiteAxes(host, sharex = host)
host.parasites.append(parasites[x])
parasites[x].set_ylabel(cat_list[x])
new_axisline = parasites[x].get_grid_helper().new_fixed_axis
parasites[x].axis["right"+str(x+1)] = new_axisline(loc = "right",
axes = parasites[x],
offset = (offset, 0))
p_plot, = parasites[x].plot(t, t_num_list[x])
parasites[x].axis["right"+str(x+1)].label.set_color(p_plot.get_color())
offset = offset + 60
host.legend()
fig.add_axes(host)
plt.show()
This code results in the following graph:
https://i.stack.imgur.com/Wl7yC.png
Now I have to somehow be able to select certain points by selecting which axis im using. How do I make a selection menu for choosing an active axis and how do I then use mplcursors to select my points?
Thanks,
Ziga
I would like to plot a sophisticated graph in Julia. The code below is in Julia's version using ggplot.
using CairoMakie, DataFrames, Effects, GLM, StatsModels, StableRNGs, RCall
#rlibrary ggplot2
rng = StableRNG(42)
growthdata = DataFrame(; age=[13:20; 13:20],
sex=repeat(["male", "female"], inner=8),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(rng, 16))
mod_uncentered = lm(#formula(weight ~ 1 + sex * age), growthdata)
refgrid = copy(growthdata)
filter!(refgrid) do row
return mod(row.age, 2) == (row.sex == "male")
end
effects!(refgrid, mod_uncentered)
refgrid[!, :lower] = #. refgrid.weight - 1.96 * refgrid.err
refgrid[!, :upper] = #. refgrid.weight + 1.96 * refgrid.err
df= refgrid
ggplot(df, aes(x=:age, y=:weight, group = :sex, shape= :sex, linetype=:sex)) +
geom_point(position=position_dodge(width=0.15)) +
geom_ribbon(aes(ymin=:lower, ymax=:upper), fill="gray", alpha=0.5)+
geom_line(position=position_dodge(width=0.15)) +
ylab("Weight")+ xlab("Age")+
theme_classic()
However, I would like to modify this graph a bit more. For example, I would like to change the scale of the y axis, the colors of the ribbon, add some error bars, and also change the text size of the legend and so on. Since I am new to Julia, I am not succeding in finding the equivalent language code for these modifications. Could someone help me translate this R code below of ggplot into Julia's language?
t1= filter(df, sex=="male") %>% slice_max(df$weight)
ggplot(df, aes(age, weight, group = sex, shape= sex, linetype=sex,fill=sex, colour=sex)) +
geom_line(position=position_dodge(width=0.15)) +
geom_point(position=position_dodge(width=0.15)) +
geom_errorbar(aes(ymin = lower, ymax = upper),width = 0.1,
linetype = "solid",position=position_dodge(width=0.15))+
geom_ribbon(aes(ymin = lower, ymax = upper, fill = sex, colour = sex), alpha = 0.2) +
geom_text(data = t1, aes(age, weight, label = round(weight, 1)), hjust = -0.25, size=7,show_guide = FALSE) +
scale_y_continuous(limits = c(70, 150), breaks = seq(80, 140, by = 20))+
theme_classic()+
scale_colour_manual(values = c("orange", "blue")) +
guides(color = guide_legend(override.aes = list(linetype = c('dotted', 'dashed'))),
linetype = "none")+
xlab("Age")+ ylab("Average marginal effects") + ggtitle("Title") +
theme(
axis.title.y = element_text(color="Black", size=28, face="bold", hjust = 0.9),
axis.text.y = element_text(face="bold", color="black", size=16),
plot.title = element_text(hjust = 0.5, color="Black", size=28, face="bold"),
legend.title = element_text(color = "Black", size = 13),
legend.text = element_text(color = "Black", size = 16),
legend.position="bottom",
axis.text.x = element_text(face="bold", color="black", size=11),
strip.text = element_text(face= "bold", size=15)
)
As I commented before, you can use R-strings to run R code. To be clear, this isn't like your post's approach where you piece together many Julia objects that wrap many R objects, this is RCall converting a Julia Dataframe to an R dataframe then running your R code.
Running an R script may not seem very Julian, but code reuse is very Julian. Besides, you're still using an R library and active R session either way, and there might even be a slight performance benefit from reducing how often you make wrapper objects and switch between Julia and R.
## import libraries for Julia and R; still good to do at top
using CairoMakie, DataFrames, Effects, GLM, StatsModels, StableRNGs, RCall
R"""
library(ggplot2)
library(dplyr)
"""
## your Julia code without the #rlibrary or ggplot lines
rng = StableRNG(42)
growthdata = DataFrame(; age=[13:20; 13:20],
sex=repeat(["male", "female"], inner=8),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(rng, 16))
mod_uncentered = lm(#formula(weight ~ 1 + sex * age), growthdata)
refgrid = copy(growthdata)
filter!(refgrid) do row
return mod(row.age, 2) == (row.sex == "male")
end
effects!(refgrid, mod_uncentered)
refgrid[!, :lower] = #. refgrid.weight - 1.96 * refgrid.err
refgrid[!, :upper] = #. refgrid.weight + 1.96 * refgrid.err
df= refgrid
## convert Julia's df and run your R code in R-string
## - note that $df is interpolation of Julia's df into R-string,
## not R's $ operator like in rdf$weight
## - call the R dataframe rdf because df is already an R function
R"""
rdf <- $df
t1= filter(rdf, sex=="male") %>% slice_max(rdf$weight)
ggplot(rdf, aes(age, weight, group = sex, shape= sex, linetype=sex,fill=sex, colour=sex)) +
geom_line(position=position_dodge(width=0.15)) +
geom_point(position=position_dodge(width=0.15)) +
geom_errorbar(aes(ymin = lower, ymax = upper),width = 0.1,
linetype = "solid",position=position_dodge(width=0.15))+
geom_ribbon(aes(ymin = lower, ymax = upper, fill = sex, colour = sex), alpha = 0.2) +
geom_text(data = t1, aes(age, weight, label = round(weight, 1)), hjust = -0.25, size=7,show_guide = FALSE) +
scale_y_continuous(limits = c(70, 150), breaks = seq(80, 140, by = 20))+
theme_classic()+
scale_colour_manual(values = c("orange", "blue")) +
guides(color = guide_legend(override.aes = list(linetype = c('dotted', 'dashed'))),
linetype = "none")+
xlab("Age")+ ylab("Average marginal effects") + ggtitle("Title") +
theme(
axis.title.y = element_text(color="Black", size=28, face="bold", hjust = 0.9),
axis.text.y = element_text(face="bold", color="black", size=16),
plot.title = element_text(hjust = 0.5, color="Black", size=28, face="bold"),
legend.title = element_text(color = "Black", size = 13),
legend.text = element_text(color = "Black", size = 16),
legend.position="bottom",
axis.text.x = element_text(face="bold", color="black", size=11),
strip.text = element_text(face= "bold", size=15)
)
"""
The result is the same as your post's R code:
I used Vega-Lite (https://github.com/queryverse/VegaLite.jl) which is also grounded in the "Grammar of Graphics", and LinearRegression (https://github.com/ericqu/LinearRegression.jl) which provides similar features as GLM, although I think it is possible to get comparable results with the other plotting and linear regression packages. Nevertheless, I hope that this gives you a starting point.
using LinearRegression: Distributions, DataFrames, CategoricalArrays
using DataFrames, StatsModels, LinearRegression
using VegaLite
growthdata = DataFrame(; age=[13:20; 13:20],
sex=categorical(repeat(["male", "female"], inner=8), compress=true),
weight=[range(100, 155; length=8); range(100, 125; length=8)] .+ randn(16))
lm = regress(#formula(weight ~ 1 + sex * age), growthdata)
results = predict_in_sample(lm, growthdata, req_stats="all")
fp = select(results, [:age, :weight, :sex, :uclp, :lclp, :predicted]) |> #vlplot() +
#vlplot(
mark = :errorband, color = :sex,
y = { field = :uclp, type = :quantitative, title="Average marginal effects"},
y2 = { field = :lclp, type = :quantitative },
x = {:age, type = :quantitative} ) +
#vlplot(
mark = :line, color = :sex,
x = {:age, type = :quantitative},
y = {:predicted, type = :quantitative}) +
#vlplot(
:point, color=:sex ,
x = {:age, type = :quantitative, axis = {grid = false}, scale = {zero = false}},
y = {:weight, type = :quantitative, axis = {grid = false}, scale = {zero = false}},
title = "Title", width = 400 , height = 400
)
which gives:
You can change the style of the elements by changing the "config" as indicated here (https://www.queryverse.org/VegaLite.jl/stable/gettingstarted/tutorial/#Config-1).
As the Julia Vega-Lite is a wrapper to Vega-Lite additional documentation can be found on the Vega-lite website (https://vega.github.io/vega-lite/)
emp_attrited = pd.DataFrame(df[df['Attrition'] == 'Yes'])
emp_not_attrited = pd.DataFrame(df[df['Attrition'] == 'No'])
print(emp_attrited.shape)
print(emp_not_attrited.shape)
att_dep = emp_attrited['Department'].value_counts()
percentage_att_dep = (att_dep/237)*100
print("Attrited")
print(percentage_att_dep)
not_att_dep = emp_not_attrited['Department'].value_counts()
percentage_not_att_dep = (not_att_dep/1233)*100
print("\nNot Attrited")
print(percentage_not_att_dep)
fig = plt.figure(figsize=(20,10))
ax1 = fig.add_subplot(221)
index = np.arange(att_dep.count())
bar_width = 0.15
rect1 = ax1.bar(index, percentage_att_dep, bar_width, color = 'black', label = 'Attrited')
rect2 = ax1.bar(index + bar_width, percentage_not_att_dep, bar_width, color = 'green', label = 'Not Attrited')
ax1.set_ylabel('Percenatage')
ax1.set_title('Comparison')
xTickMarks = att_dep.index.values.tolist()
ax1.set_xticks(index + bar_width)
xTickNames = ax1.set_xticklabels(xTickMarks)
plt.legend()
plt.tight_layout()
plt.show()
The first block represents how the dataset is split into 2 based upon Attrition
The second block represents the calculation of percentage of Employees in each Department who are attrited and not attrited.
The third block is to plot the given as a grouped chart.
You can do:
(df.groupby(['Department'])
['Attrited'].value_counts(normalize=True)
.unstack('Attrited')
.plot.bar()
)