Since days I am struggling with this problem: I have a large script where a function is exporting tables in an Excel workbook, each table into a different worksheet. Additionally, I want to give format to the worksheets using engine xlsxwriter. I use the instance with pd.ExcelWriter() as writer.
This works fine for an Excel workbook with a single sheet: using to_excel the table is exported and immediately I use an ad hoc function I created to format it.
Code structure:
Global Excel_formatting function that gives format (input: table, sheet name, text strings)
Script function_tables function (input: dataframe, pathfile) that creates subtables from input dataframe, and uses instance pd.ExcelWriter to:
-- export the Excel worksheets
-- call Excel_formatting function to format the worksheets
At high level, the script calls function_tables
See below the complete code:
# Global function to format the output tables
def Excel_formatting(table_input, sheet_name_input, title_in, remark_in, start_row_input):
# Assign WorkBook and worksheet
workbook = writer.book
worksheet = writer.sheets[sheet_name_input]
start_column = 0
# Title and remark
worksheet.write(0, start_column, title_in,
workbook.add_format({'bold': True,
'color': '#8B0000',
'size': 18,
'align':'left'}))
worksheet.write(1, start_column+1, remark_in,
workbook.add_format({'italic': True,
'size': 11,
'align':'left'}))
# Format header (on top of existing header)
header_format = workbook.add_format({'bold': True,
'text_wrap': False,
'fg_color': '#FF8B8B',
'border': 1,
'align':'center'})
for col_num, value in enumerate(table_input.columns.values):
worksheet.write(start_row_input, col_num, value, header_format)
# Freeze panes / Can also be done with to_excel
worksheet.freeze_panes(start_row_input+1, 0)
# Set column width
end_column = len(table_input.columns)
worksheet.autofit()
# Add autofilter to header
worksheet.autofilter(start_row_input, 0, start_row_input, end_column-1)
# Add logo (if present, to avoid script error)
figure_path = 'Logo.JPG'
if (os.path.exists(figure_path) == True):
worksheet.insert_image(0, start_column+5, figure_path, {'x_scale': 0.1, 'y_scale': 0.08, 'decorative': True})
# End of function
return workbook.close()
def function_tables(x, filename):
# Here the function creates subtables from input dataframe
df = x
Table_1 = df.groupby(['Feature 1'])['Deviation'].sum().reset_index()
Table_2 = df.groupby(['Feature 2'])['Deviation'].sum().reset_index()
# ...
Table_N = df.groupby(['Feature N'])['Deviation'].sum().reset_index()
# Export tables adding new sheets to the same Excel workbook
with pd.ExcelWriter(filename, engine='xlsxwriter', mode='w') as writer:
start_row = 2
Table_1.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.1', startrow=start_row)
Table_2.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.2', startrow=start_row)
# ...
Table_N.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.N', startrow=start_row)
# Formatting the worksheets calling the global function
title_input_1 = 'Title for overview table 1'
remark_input_1 = 'Remark Table 1'
Excel_formatting(Table_2, 'Overview Feat.2', title_input_1, remark_input_1, start_row)
title_input_2 = 'Title for overview table 2'
remark_input_2 = 'Remark Table 2'
# ...
Excel_formatting(Table_2, 'Overview Feat.N', title_input_2, remark_input_2, start_row)
title_input_N = 'Title for overview table N'
remark_input_N = 'Remark Table N'
Excel_formatting(Table_1, 'Overview Feat.N', title_input_N, remark_input_N, start_row)
# Call section of script
function_tables(df_input, Path_filename)
I tried also openpyxl, a loop through the tables using a dictionary for the input or not having the formatting function as global but inside the writer instance but all failed, always giving me the same error:
worksheet = writer.sheets[sheet_name_input]
KeyError: 'Overview Feat.1'
It looks that it cannot find the sheetname. Any help? A poorsman alternative will be to create N Excel workbooks and then merged all them, but I prefer not to do so, it must be a more pythonic way to work this, right?
A million thanks!
There are a few issues in the code: the writer object needs to be passed to the Excel_formatting() function, the writer shouldn't be closed in that function, and there are some typos in the titles, captions and variable names.
Here is a working example with those issues fixed. I've added sample data frames, you can replace that with your groupby() code.
import pandas as pd
import os
# Global function to format the output tables
def Excel_formatting(table_input, writer, sheet_name_input, title_in, remark_in, start_row_input):
# Assign WorkBook and worksheet
workbook = writer.book
worksheet = writer.sheets[sheet_name_input]
start_column = 0
# Title and remark
worksheet.write(0, start_column, title_in,
workbook.add_format({'bold': True,
'color': '#8B0000',
'size': 18,
'align': 'left'}))
worksheet.write(1, start_column + 1, remark_in,
workbook.add_format({'italic': True,
'size': 11,
'align': 'left'}))
# Format header (on top of existing header)
header_format = workbook.add_format({'bold': True,
'text_wrap': False,
'fg_color': '#FF8B8B',
'border': 1,
'align': 'center'})
for col_num, value in enumerate(table_input.columns.values):
worksheet.write(start_row_input, col_num, value, header_format)
# Freeze panes / Can also be done with to_excel
worksheet.freeze_panes(start_row_input + 1, 0)
# Set column width
end_column = len(table_input.columns)
worksheet.autofit()
# Add autofilter to header
worksheet.autofilter(start_row_input, 0, start_row_input, end_column - 1)
# Add logo (if present, to avoid script error)
figure_path = 'Logo.JPG'
if os.path.exists(figure_path):
worksheet.insert_image(0, start_column + 5, figure_path, {'x_scale': 0.1, 'y_scale': 0.08, 'decorative': True})
def function_tables(x, filename):
Table_1 = pd.DataFrame({'Data': [11, 12, 13, 14]})
Table_2 = pd.DataFrame({'Data': [11, 12, 13, 14]})
# ...
Table_N = pd.DataFrame({'Data': [11, 12, 13, 14]})
# Export tables adding new sheets to the same Excel workbook
with pd.ExcelWriter(filename, engine='xlsxwriter', mode='w') as writer:
start_row = 2
Table_1.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.1', startrow=start_row)
Table_2.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.2', startrow=start_row)
# ...
Table_N.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.N', startrow=start_row)
# Formatting the worksheets calling the global function
title_input_1 = 'Title for overview table 1'
remark_input_1 = 'Remark Table 1'
Excel_formatting(Table_1, writer, 'Overview Feat.1', title_input_1, remark_input_1, start_row)
title_input_2 = 'Title for overview table 2'
remark_input_2 = 'Remark Table 2'
Excel_formatting(Table_2, writer, 'Overview Feat.2', title_input_2, remark_input_2, start_row)
title_input_N = 'Title for overview table N'
remark_input_N = 'Remark Table N'
Excel_formatting(Table_N, writer, 'Overview Feat.N', title_input_N, remark_input_N, start_row)
# Call section of script
function_tables(None, 'test.xlsx')
Output:
However, to make it more generic it would be best to handle the main function in a loop like this:
def function_tables(x, filename):
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
Table_1 = pd.DataFrame({'Data': [11, 12, 13, 14]})
Table_2 = pd.DataFrame({'Data': [11, 12, 13, 14]})
# ...
Table_N = pd.DataFrame({'Data': [11, 12, 13, 14]})
# In a real case you would probably append() these in a loop.
dfs = [Table_1, Table_2, Table_N]
for i, df in enumerate(dfs, 1):
start_row = 2
df.to_excel(writer, index=True, header=True, sheet_name=f'Overview Feat.{i}', startrow=start_row)
# Formatting the worksheets calling the global function
title_input = f'Title for overview table {i}'
remark_input = f'Remark Table {i}'
Excel_formatting(df, writer, f'Overview Feat.{i}', title_input, remark_input, start_row)
writer.close()
for i in range(len(basin)):
prefix = "URL here"
state = "OR"
basin_name = basin[i]
df_orig = pd.read_csv(f"{prefix}/{basin_name}.csv", index_col=0)
#---create date x-index
curr_wy_date_rng = pd.date_range(
start=dt(curr_wy-1, 10, 1),
end=dt(curr_wy, 9, 30),
freq="D",
)
if not calendar.isleap(curr_wy):
print("dropping leap day")
df_orig.drop(["02-29"], inplace=True)
use_cols = ["Median ('91-'20)", f"{curr_wy}"]
df = pd.DataFrame(data=df_orig[use_cols].copy())
df.index = curr_wy_date_rng
#--create EOM percent of median values-------------------------------------
curr_wy_month_rng = pd.date_range(
start=dt(curr_wy-1, 10, 1),
end=dt(curr_wy, 6, 30),
freq="M",
)
df_monthly_prec = pd.DataFrame(data=df_monthly_basin[basin[i]].copy())
df_monthly_prec.index = curr_wy_month_rng
df_monthly = df.groupby(pd.Grouper(freq="M")).max()
df_monthly["date"] = df_monthly.index
df_monthly["wy_date"] = df_monthly["date"].apply(lambda x: cal_to_wy(x))
df_monthly.index = pd.to_datetime(df_monthly["wy_date"])
df_monthly.index = df_monthly["date"]
df_monthly["month"] = df_monthly["date"].apply(
lambda x: calendar.month_abbr[x.month]
)
df_monthly["wy"] = df_monthly["wy_date"].apply(lambda x: x.year)
df_monthly.sort_values(by="wy_date", axis=0, inplace=True)
df_monthly.drop(
columns=[i for i in df_monthly.columns if "date" in i], inplace=True
)
# df_monthly.index = df_monthly['month']
df_merge = pd.merge(df_monthly,df_monthly_prec,how='inner', left_index=True, right_index=True)
#---Subplots---------------------------------------------------------------
fig, ax = plt.subplots(figsize=(8,4))
ax.plot(df_merge.index, df_merge["Median ('91-'20)"], color="green", linewidth="1", linestyle="dashed", label = 'Median Snowpack')
ax.plot(df_merge.index, df_merge[f'{curr_wy}'], color='red', linewidth='2',label='WY Current')
#------Seting x-axis range to expand bar width for ax2
ax.bar(df_merge.index,df_merge[basin[i]], color = 'blue', label = 'Monthly %')
#n = n + 1
#--format chart
ax.set_title(chart_name[w], fontweight = 'bold')
w = w + 1
ax.set_ylabel("Basin Precipitation Index")
ax.set_yticklabels([])
ax.margins(x=0)
ax.legend()
#plt.xlim(0,9)
#---Setting date format
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
#---EXPORT
plt.show()
End result desired: Plotting both the monthly dataframe (df_monthly_prec) with the daily dataframe charting only monthly values (df_monthly). The bars for the monthly DataFrame should ideally span the whole month on the chart.
I have tried creating a secondary axis, but had trouble aligning the times for the primary and secondary axes. Ideally, I would like to replace plotting df_monthly with df (showing all daily data instead of just the end-of-month values within the daily dataset).
Any assistance or pointers would be much appreciated! Apologies if additional clarification is needed.
I have the following dataframe of securities and computed a 'liquidity score' in the last column, where 1 = liquid, 2 = less liquid, and 3 = illiquid. I want to group the securities (dynamically) by their liquidity. Is there a way to group them and include some kind of header for each group? How can this be best achieved. Below is the code and some example, how it is supposed to look like.
import pandas as pd
df = pd.DataFrame({'ID':['XS123', 'US3312', 'DE405'], 'Currency':['EUR', 'EUR', 'USD'], 'Liquidity score':[2,3,1]})
df = df.sort_values(by=["Liquidity score"])
print(df)
# 1 = liquid, 2 = less liquid,, 3 = illiquid
Add labels for liquidity score
The following replaces labels for numbers in Liquidity score:
df['grp'] = df['Liquidity score'].replace({1:'Liquid', 2:'Less liquid', 3:'Illiquid'})
Headers for each group
As per your comment, find below a solution to do this.
Let's illustrate this with a small data example.
df = pd.DataFrame({'ID':['XS223', 'US934', 'US905', 'XS224', 'XS223'], 'Currency':['EUR', 'USD', 'USD','EUR','EUR',]})
Insert a header on specific rows using np.insert.
df = pd.DataFrame(np.insert(df.values, 0, values=["Liquid", ""], axis=0))
df = pd.DataFrame(np.insert(df.values, 2, values=["Less liquid", ""], axis=0))
df.columns = ['ID', 'Currency']
Using Pandas styler, we can add a background color, change font weight to bold and align the text to the left.
df.style.hide_index().set_properties(subset = pd.IndexSlice[[0,2], :], **{'font-weight' : 'bold', 'background-color' : 'lightblue', 'text-align': 'left'})
You can add a new column like this:
df['group'] = np.select(
[
df['Liquidity score'].eq(1),
df['Liquidity score'].eq(2)
],
[
'Liquid','Less liquid'
],
default='Illiquid'
)
And try setting as index, so you can filter using the index:
df.set_index(['grouping','ID'], inplace=True)
df.loc['Less liquid',:]
Function giving error when run on the same data frame more than once. it works fine the first time but when run again on the same df it gives me this error:
IndexError: single positional indexer is out-of-bounds
def update_data(df):
df.drop(df.columns[[-1, -2, -3]], axis=1, inplace=True)
df.loc['Total'] = df.sum()
df.iloc[-1, 0] = 'Group'
df = df.set_index(list(df)[0])
for i in range(1, 21):
df.iloc[-1, i] = 100 + (100 * (
(df.iloc[-1, i] - df.iloc[-1, 0]) / abs(df.iloc[-1, 0])))
df.iloc[-1, 0] = 100
xax = list(df.columns.values)
yax = df.values[-1].tolist()
d = {'period': xax, 'level': yax}
index_level = pd.DataFrame(d)
index_level['level'] = index_level['level'].round(3)
return index_level
Using inplace=True in a function changes the input data frame. Of course there it doesn't work, your function presumes the data is in some format at the start of the function. That assumption is broken.
df = pd.DataFrame([{'x': 0}])
def change(df):
df.drop(columns=['x'], inplace=True)
return len(df)
change(df)
Out[346]: 1
df
Out[347]:
Empty DataFrame
Columns: []
Index: [0]
I am looking to upload a grouped barchart in excel, however I can't seem to find a way to do so.
Here is my code:
bar_chart2 = workbook.add_chart({'type':'column'})
bar_chart2.add_series({
'name':'Month over month product',
'categories':'=Month over month!$H$2:$H$6',
'values':'=Month over month!$I$2:$J$6',
})
bar_chart2.set_legend({'none': True})
worksheet5.insert_chart('F8',bar_chart2)
bar_chart2.set_legend({'none': True})
worksheet5.insert_chart('F8',bar_chart2)
However, I get that.
Using your provided data, I re-worked the Example given in the Docs by jmcnamara (link here) to suit what you're looking for.
Full Code:
import pandas as pd
import xlsxwriter
headings = [' ', 'Apr 2017', 'May 2017']
data = [
['NGN', 'UGX', 'KES', 'TZS', 'CNY'],
[5816, 1121, 115, 146, 1],
[7089, 1095, 226, 120, 0],
]
#opening workbook
workbook = xlsxwriter.Workbook("test.xlsx")
worksheet5 = workbook.add_worksheet('Month over month')
worksheet5.write_row('H1', headings)
worksheet5.write_column('H2', data[0])
worksheet5.write_column('I2', data[1])
worksheet5.write_column('J2', data[2])
# beginning of OP snippet
bar_chart2 = workbook.add_chart({'type':'column'})
bar_chart2.add_series({
'name': "='Month over month'!$I$1",
'categories': "='Month over month'!$H$2:$H$6",
'values': "='Month over month'!$I$2:$I$6",
})
bar_chart2.add_series({
'name': "='Month over month'!$J$1",
'categories': "='Month over month'!$H$2:$H$6",
'values': "='Month over month'!$J$2:$J$6",
})
bar_chart2.set_title ({'name': 'Month over month product'})
bar_chart2.set_style(11)
#I took the liberty of leaving the legend in there - it was commented in originally
#bar_chart2.set_legend({'none': True})
# end of OP snippet
worksheet5.insert_chart('F8', bar_chart2)
workbook.close()
Output: