Matplotlib table with double headers - matplotlib

Hi is possible to make a matplotlib table to have a "double header" like this
(mind the dashed line)
----------------------------------------
| Feb Total | YTD Total |
----------------------------------------
| 2014|2015 | 2014/2015| 2015/2016 |
--------------------------------------------------
|VVI-ID | 12 | 20 | 188 | 169 |
--------------------------------------------------
|TDI-ID | 34 | 45 | 556 | 456 |

You can do this by using another tables with no data as headers. That is, you create empty tables, whose column labels will be the headers for your table. Let's consider this demo example. At first, add tables header_0 and header_1. At second, correct headers' and table's argument bbox to position all tables correctly. Since the tables are overlapped, the table with data should be the last one.
import numpy as np
import matplotlib.pyplot as plt
data = [[ 66386, 174296, 75131, 577908, 32015],
[ 58230, 381139, 78045, 99308, 160454],
[ 89135, 80552, 152558, 497981, 603535],
[ 78415, 81858, 150656, 193263, 69638],
[ 139361, 331509, 343164, 781380, 52269]]
columns = ('Freeze', 'Wind', 'Flood', 'Quake', 'Hail')
rows = ['%d year' % x for x in (100, 50, 20, 10, 5)]
values = np.arange(0, 2500, 500)
value_increment = 1000
# Get some pastel shades for the colors
colors = plt.cm.BuPu(np.linspace(0, 0.5, len(rows)))
n_rows = len(data)
index = np.arange(len(columns)) + 0.3
bar_width = 0.4
# Initialize the vertical-offset for the stacked bar chart.
y_offset = np.array([0.0] * len(columns))
# Plot bars and create text labels for the table
cell_text = []
for row in range(n_rows):
plt.bar(index, data[row], bar_width, bottom=y_offset, color=colors[row])
y_offset = y_offset + data[row]
cell_text.append(['%1.1f' % (x/1000.0) for x in y_offset])
# Reverse colors and text labels to display the last value at the top.
colors = colors[::-1]
cell_text.reverse()
# Add headers and a table at the bottom of the axes
header_0 = plt.table(cellText=[['']*2],
colLabels=['Extra header 1', 'Extra header 2'],
loc='bottom',
bbox=[0, -0.1, 0.8, 0.1]
)
header_1 = plt.table(cellText=[['']],
colLabels=['Just Hail'],
loc='bottom',
bbox=[0.8, -0.1, 0.2, 0.1]
)
the_table = plt.table(cellText=cell_text,
rowLabels=rows,
rowColours=colors,
colLabels=columns,
loc='bottom',
bbox=[0, -0.35, 1.0, 0.3]
)
# Adjust layout to make room for the table:
plt.subplots_adjust(left=0.2, bottom=-0.2)
plt.ylabel("Loss in ${0}'s".format(value_increment))
plt.yticks(values * value_increment, ['%d' % val for val in values])
plt.xticks([])
plt.title('Loss by Disaster')
plt.show()
If extra header is symmetric or combine equal quantity of "normal" header, all you need to do is to add an extra header table and correct bbox of data table like this (the same example with deleted column):
header = plt.table(cellText=[['']*2],
colLabels=['Extra header 1', 'Extra header 2'],
loc='bottom'
)
the_table = plt.table(cellText=cell_text,
rowLabels=rows,
rowColours=colors,
colLabels=columns,
loc='bottom',
bbox=[0, -0.35, 1.0, 0.3]
)

Related

Data Frame % column by groupping

I am working on a forecast accuracy report which measure the deviation between actual & pervious projection. The measurement would be = 1- ('Actual' - 'M-1') / 'Actual' .
There measure need to be groupped based different gratuity, say 'Product Category' / 'Line' / 'Product'. However, the df.groupby('Product Category').sum() function couldnt support the percentage calculation. Does anyone have idea how it should be fixed? Thanks!
data = {
"Product Category": ['Drink', 'Drink','Drink','Food','Food','Food'],
"Line": ['Water', 'Water','Wine','Fruit','Fruit','Fruit'],
"Product": ['A', 'B', 'C','D','E','F'],
"Actual": [100,50,40,20,70,50],
"M-1": [120,40,10,20,80,50],
}
df = pd.DataFrame(data)
df['M1 Gap'] = df['Actual'] - df['M-1']
df['Error_Per'] = 1- df['M1 Gap'] / df['Actual']
Expected output would be
enter image description here
You can also create a custom function and apply it on every row of a pandas data frame as follows. Just note that I set the axis argument to 1 so that the custom function is applied on each row or across columns:
import pandas as pd
def func(row):
row['M1 Gap'] = row['Actual'] - row['M-1']
row['Error_Per'] = 1 - (row['M1 Gap'] / row['Actual'])
return row
df.groupby('Product Category').sum().apply(func, axis = 1)
Actual M-1 M1 Gap Error_Per
Product Category
Drink 190.0 170.0 20.0 0.894737
Food 140.0 150.0 -10.0 1.071429
You should group BEFORE calculating percentage:
data = {
"Product Category": ['Drink', 'Drink','Drink','Food','Food','Food'],
"Line": ['Water', 'Water','Wine','Fruit','Fruit','Fruit'],
"Product": ['A', 'B', 'C','D','E','F'],
"Actual": [100,50,40,20,70,50],
"M-1": [120,40,10,20,80,50],
}
df = pd.DataFrame(data)
df['M1 Gap'] = df['Actual'] - df['M-1']
df_line = df.groupby('Line').sum()
df_line['Error_Per'] = df_line['M1 Gap'] / df_line['Actual']
print(df_line)
df_prod = df.groupby('Product Category').sum()
df_prod['Error_Per'] = df_prod['M1 Gap'] / df_prod['Actual']
print(df_prod)
Output:
Actual M-1 M1 Gap Error_Per
Line
Fruit 140 150 -10 -0.071429
Water 150 160 -10 -0.066667
Wine 40 10 30 0.750000
Actual M-1 M1 Gap Error_Per
Product Category
Drink 190 170 20 0.105263
Food 140 150 -10 -0.071429
Note: your expected Outcome from the screenshot doesn't match the dictionary of your code (which I used)

ggplot2: add title changes point colors <-> scale_color_manual removes ggtitle

I am facing a silly point color in a dot plot with ggplot 2. I have a whole table of data of which i take relevant rows to make a dot plot. With scale_color_manual my points get colored according to the named palette and factor genotype specified in aes() and when i simply want to add a title specifying the cell line used, the points get colored back to automatic yellow and purple. Adding the title first and setting scale_color_manual as the last layer changes the points colors and removes the title.
What is wrong in there? I don't get it and it is a bit frustrating
thanks for your help!
Here's reproducible code to get my whole df and the subset for the plots:
# df of data to plot
exp <- c(rep(284, times = 6), rep(285, times = 12))
geno <- c(rep(rep(c("WT", "KO"), each =3), times = 6))
line <- c(rep(5, times = 6),rep(8, times= 12), rep(5, times =12), rep(8, times = 6))
ttt <- c(rep(c(0, 10, 60), times = 10), rep(c("ZAc60", "Cu60", "Cu200"), times = 2))
rep <- c(rep(1, times = 12), rep(2, times = 6), rep(c(1,2), times = 6), rep(1, times = 6))
rel_expr <- c(0.20688185, 0.21576131, 0.94046028, 0.30327675, 0.22865200,
0.92941881, 0.13787508, 0.13325281, 0.22114990, 0.95591724,
1.03239718, 0.83339248, 0.15332420, 0.17558160, 0.22475604,
1.02356351, 0.77882000, 0.69214403, 0.16874097, 0.15548158,
0.45207943, 0.28123760, 0.23500083, 0.51588856, 0.1399634,
0.14610184, 1.06716713, 0.16517801, 0.34736164, 0.64773650,
0.18334429, 0.05924757, 0.01803593, 0.86685230, 0.39554685,
0.25764805)
df_all <- data.frame(exp, geno, line, ttt, rep, rel_expr)
names(df_all) <- c("EXP", "Geno", "Line", "TTT", "Rep", "Rel_Expr")
str(df_all)
# make Geno an ordered factor
df_all$Geno <- ordered(df_all$Geno, levels = c("WT", "KO"))
# select set of whole dataset for current plot
df_ions <- df_all[df_all$Line == 8 & !df_all$TTT %in% c(10, 60),]
# add a treatment as factor columns fTTT
df_ions$fTTT <- ordered(df_ions$TTT, levels = c("0", "ZAc60", "Cu60", "Cu200"))
str(df_ions)
# plot rel_exp vs factor treatment, color points by geno
# with named color palette
library(ggplot2)
col_palette <- c("#000000", "#1356BC")
names(col_palette) <- c("WT", "KO")
plt <- ggplot(df_ions, aes(x = fTTT, y = Rel_Expr, color = Geno)) +
geom_jitter(width = 0.1)
plt # intermediate_plt_1.png
plt + scale_color_manual(values = col_palette) # intermediate_plt_2.png
plt + ggtitle("mRPTEC8") # final_plot.png
images:

Multiple grouped charts with altair

My data has 4 attributes: dataset (D1/D2), model (M1/M2), layer (L1/L2), scene (S1/S2). I can make a chart grouped by scenes and then merge plots horizontally and vertically (pic above).
However, I would like to have 'double grouping' by scene and dataset, like merging the D1 and D2 plots by placing blue/orange bars from next to each other but with different opacity or pattern/hatch.
Basically something like this (pretend that the black traits are a hatch pattern).
Here is the code to reproduce the first plot
import numpy as np
import itertools
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import os
import altair as alt
alt.renderers.enable('altair_viewer')
np.random.seed(0)
################################################################################
model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']
ys = []
models = []
dataset = []
layers = []
scenes = []
for sc in scene_keys:
for m in model_keys:
for d in data_keys:
for l in layer_keys:
for s in range(10):
data_y = list(np.random.rand(10) / 10)
ys += data_y
scenes += [sc] * len(data_y)
models += [m] * len(data_y)
dataset += [d] * len(data_y)
layers += [l] * len(data_y)
# ------------------------------------------------------------------------------
df = pd.DataFrame({'Y': ys,
'Model': models,
'Dataset': dataset,
'Layer': layers,
'Scenes': scenes})
bars = alt.Chart(df, width=100, height=90).mark_bar().encode(
# field to group columns on
x=alt.X('Scenes:N',
title=None,
axis=alt.Axis(
grid=False,
title=None,
labels=False,
),
),
# field to use as Y values and how to calculate
y=alt.Y('Y:Q',
aggregate='mean',
axis=alt.Axis(
grid=True,
title='Y',
titleFontWeight='normal',
),
),
# field to use for sorting
order=alt.Order('Scenes',
sort='ascending',
),
# field to use for color segmentation
color=alt.Color('Scenes',
legend=alt.Legend(orient='bottom',
padding=-10,
),
title=None,
),
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5 # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
chart_base = bars + error_bars + text
chart_base = chart_base.facet(
# field to use to use as the set of columns to be represented in each group
column=alt.Column('Layer:N',
# header=alt.Header(
# labelFontStyle='bold',
# ),
title=None,
sort=list(set(models)), # get unique indices
),
spacing={"row": 0, "column": 15},
)
def unique(sequence):
seen = set()
return [x for x in sequence if not (x in seen or seen.add(x))]
for i, m in enumerate(unique(models)):
chart_imnet = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D1'),
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
chart_places = chart_base.transform_filter(
alt.FieldEqualPredicate(field='Dataset', equal='D2')
).transform_filter(
alt.FieldEqualPredicate(field='Model', equal=m)
)
if i == 0:
title_params = dict({'align': 'center', 'anchor': 'middle', 'dy': -10})
chart_imnet = chart_imnet.properties(title=alt.TitleParams('D1', **title_params))
chart_places = chart_places.properties(title=alt.TitleParams('D2', **title_params))
chart_places = alt.concat(chart_places,
title=alt.TitleParams(
m,
baseline='middle',
orient='right',
anchor='middle',
angle=90,
# dy=10,
dx=30 if i == 0 else 0,
),
)
if i == 0:
chart = (chart_imnet | chart_places).resolve_scale(x='shared')
else:
chart = (chart & (chart_imnet | chart_places).resolve_scale(x='shared'))
chart.save('test.html')
For now, I don't know a good answer, but once https://github.com/altair-viz/altair/pull/2528 is accepted you can use the xOffset encoding channel as such:
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N"),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')
Which will result in:
See Colab Notebook or Vega Editor
EDIT
To control the opacity and legend names one can do as such
alt.Chart(df, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity("Dataset:N",
scale=alt.Scale(domain=['D1', 'D2'],
range=[0.2, 1.0]),
legend=alt.Legend(labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'")),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
).resolve_scale(x='independent')

Optimization Python

I am trying to get the optimal solution
column heading: D_name , Vial_size1 ,Vial_size2 ,Vial_size3 , cost , units_needed
row 1: Act , 120 , 400 , 0 , $5 , 738
row 2: dug , 80 , 200 , 400 , $40 , 262
data in excel
column heading: Vials price size
Row 1: Vial size 1 5 120
Row 2: Vial size 2 5 400
prob=LpProblem("Dose_Vial",LpMinimize)
import pandas as pd
df = pd.read_excel (r'C:\Users\*****\Desktop\Vial.xls')
print (df)
# Create a list of the Vial_Size
Vial_Size = list(df['Vials'])
# Create a dictinary of units for all Vial_Size
size = dict(zip(Vial_Size,df['size']))
# Create a dictinary of price for all Vial_Size
Price = dict(zip(Vial_Size,df['Price']))
# print dictionaries
print(Vial_Size)
print(size)
print(Price)
vial_vars = LpVariable.dicts("Vials",size,lowBound=0,cat='Integer')
# start building the LP problem by adding the main objective function
prob += lpSum([Price[i]*vial_vars[i]*size[i] for i in size])
# adding constraints
prob += lpSum([size[f] * vial_vars[f] for f in size]) >= 738
# The status of the solution is printed to the screen
prob.solve()
print("Status:", LpStatus[prob.status])
# In case the problem is ill-formulated or there is not sufficient information,
# the solution may be infeasible or unbounded
for v in prob.variables():
if v.varValue>0:
print(v.name, "=", format(round(v.varValue)))
Vials_Vial_Size_1 = 3
Vials_Vial_Size_2 = 1
obj =round((value(prob.objective)))
print("The total cost of optimized vials: ${}".format(round(obj)))
The total cost of optimized vials: $3800
'
how to set it for 2 or more drugs and get the best optimal solution.
Here is an approach to solve the first part of the question, finding vial combinations that minimizes the waste (I'm not sure what role the price plays?):
from pulp import *
import pandas as pd
import csv
drugs_dict = {"D_name": ['Act', 'dug'],
"Vial_size1": [120, 80],
"Vial_size2": [400, 200],
"Vial_size3": [0, 400],
"cost": [5, 40],
"units_needed": [738, 262]}
df = pd.DataFrame(drugs_dict)
drugs = list(df['D_name'])
vial_1_size = dict(zip(drugs, drugs_dict["Vial_size1"]))
vial_2_size = dict(zip(drugs, drugs_dict["Vial_size2"]))
vial_3_size = dict(zip(drugs, drugs_dict["Vial_size3"]))
units_needed = dict(zip(drugs, drugs_dict["units_needed"]))
results = []
for drug in drugs:
print(f"drug = {drug}")
# setup minimum waste problem
prob = LpProblem("Minimum Waste Problem", LpMinimize)
# create decision variables
vial_1_var = LpVariable("Vial_1", lowBound=0, cat='Integer')
vial_2_var = LpVariable("Vial_2", lowBound=0, cat='Integer')
vial_3_var = LpVariable("Vial_3", lowBound=0, cat='Integer')
units = lpSum([vial_1_size[drug] * vial_1_var +
vial_2_size[drug] * vial_2_var +
vial_3_size[drug] * vial_3_var])
# objective function
prob += units
# constraints
prob += units >= units_needed[drug]
prob.solve()
print(f"units = {units.value()}")
for v in prob.variables():
if v.varValue > 0:
print(v.name, "=", v.varValue)
results.append([drug, units.value(), int(vial_1_var.value() or 0), int(vial_2_var.value() or 0), int(vial_3_var.value() or 0)])
with open('vial_results.csv', 'w', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['drug', 'units', 'vial_1', 'vial_2', 'vial_3'])
csv_writer.writerows(results)
Running gives:
drug = Act
units = 760.0
Vial_1 = 3.0
Vial_2 = 1.0
drug = dug
units = 280.0
Vial_1 = 1.0
Vial_2 = 1.0

How to annotate subplots in Plotly inside a for loop

I am trying to annotate my subplots inside a for loop. Each subplot will have RMS value printed on the plot. I tried to do it the following way:
from plotly import tools
figg = tools.make_subplots(rows=4, cols=1)
fake_date = {"X": np.arange(1, 101, 0.5), "Y": np.sin(x), "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=[dict(x = 10,y = 0.2, text= rms_test, xref= "x1",yref="y1")] )
figg['layout'].update(height=1800, width=600, title='Testing')
pyo.iplot(figg)
This does not work, although the answer given here seems to work for others. Can anyone point out what wrong am I doing?
I generated fake date for reproducibility
I am not sure where to exactly place the RMS value, but below is a sample code which will help you achieve what you want.
We create an array annotation_arr where we store the annotations using the for loop.
We need to set the xval and yval for each of the individual axes. Remember, first axis will be x, second will be x2 so, I have written a ternary condition for that, please checkout the below code and let me know if there is any issues!
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
from plotly import tools
import numpy as np
import pandas as pd
init_notebook_mode(connected=True)
rows = 4
figg = tools.make_subplots(rows=rows, cols=1)
fake_date = {"X": np.arange(0, 100, 0.5), "Y": [np.sin(x) for x in range(200)], "Z": [x + 1 for x in range(10)] * 20}
fake_date = pd.DataFrame(fake_date)
fake_date.sort_values("Z")
unique_ids = fake_date['Z'].unique()
train_id, test_id = np.split(np.random.permutation(unique_ids), [int(.6 * len(unique_ids))])
top = 0
annotation_arr = []
for i, j in enumerate(test_id):
x_test = fake_date[fake_date['Z'].isin([test_id[i]])]
y_test = fake_date[fake_date['Z'].isin([test_id[i]])]
# Evaluate
rms_test = 0.04
r_test = 0.9
Real = {'type' : 'scatter',
'x' : x_test.X,
'y' : x_test.Y,
"mode" : 'lines+markers',
"name" : 'Real'}
top = top + 1/rows
i_val = "" if i == 0 else i + 1
annotation_arr.append(dict(x = r_test,y = top, text= rms_test, xref= "x"+str(i_val),yref="y"+str(i_val)))
figg.append_trace(Real, i+1, 1)
figg['layout'].update( annotations=annotation_arr )
figg['layout'].update(height=1800, width=600, title='Testing')
iplot(figg)