xlrd copying cell style from xls to xlsx - xlsxwriter

Im trying to convert xls to xlsx files including cell style. At the moment, Im working with cell fonts.
Is there another way for xlrd to determine whether the cell font is bold or not other than font._font_flag == 1?
def xlstoxlsx():
input_file_loc = [x for x in os.listdir(path1) if x.endswith(".xls")]
count = 0
for file in input_file_loc:
xlsxwb = xlsxwriter.Workbook(f"{path1}/" + input_file_loc[count] + '_Coverted {:2d}.xlsx'.format(count))
xlswb = xlrd.open_workbook(path1 + "/" + file, formatting_info=True)
bold = xlsxwb.add_format({'bold': True})
italic = xlsxwb.add_format({'italic': True})
uline = xlsxwb.add_format({'underline': True})
for sht in xlswb.sheet_names():
xlsws = xlswb[sht]
rows, cols = xlsws.nrows, xlsws.ncols
all_rows = []
for row in range(rows):
curr_row = []
for col in range(cols):
curr_row.append(xlsws.cell_value(row, col))
all_rows.append(curr_row)
xlsxws = xlsxwb.add_worksheet()
for row in range(len(all_rows)):
for col in range(len(all_rows[0])):
cell = xlsws.cell(row, col)
font = xlswb.xf_list[cell.xf_index]
if font._font_flag == 1:
xlsxws.write(row, col, all_rows[row][col], bold)
elif font._font_flag == 0:
xlsxws.write(row, col, all_rows[row][col])
count += 1
xlsxwb.close()

Related

Pandas.to_excel: can cell formats be predefined for openpyxl (like xlswriter's add_format)? Precision example

I'm trying to save dataframes into tabs/sheets in an existing xlsm file. I was able to save multiple sheets into an xlsx file with engine=xlsxwriter but couldn't find a way to modify it to write to a xlsm file. I found numerous examples using openpyxl as an engine but haven't found a way to predefine the cell formats. Here's the xlswriter code:
with pd.ExcelWriter(filename + '.xlsx', engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name=tabname)
workbook = writer.book
worksheet = writer.sheets[tabname]
fmt0 = workbook.add_format({'num_format': '0'})
fmt1 = workbook.add_format({'num_format': '0.0'})
fmt2 = workbook.add_format({'num_format': '0.00'})
for r in range(len(df.index)):
s = r + 1
cols = {3: 'min', 4: 'max'}
for c in cols:
v = df.at[df.index[r], cols[c]]
if is_number(v):
if '.' not in v:
worksheet.write_number(s, c, float(v), fmt0)
else:
pl = len(v) - v.index('.') - 1 # number of digits after '.'
if pl == 1:
worksheet.write_number(s, c, float(v), fmt1)
elif pl == 2:
worksheet.write_number(s, c, float(v), fmt2)
Does openpyxl have a way to do the same thing?
I think I can define a function that will do this but if there's a way to modify the existing code, I'd rather do that.

how to create by code an Excel workbook with several sheets using pandas.to_excel?

with an SQL query, I get a number of customers, then a number of suppliers for each customer.
The objective is to create an Excel workbook per customer and in this workbook, at the rate of one sheet per supplier, copy the details of the supplier's invoices.
The code below allows you to create Excel workbooks and one sheet per supplier, but the copy of invoices per supplier does not work. Where is the error?
My code :
Clients=list(generator_df['Client'].unique())
for i in range(0,len(Clients)):
if len(Clients[i])!= 0:
workbook = xlsxwriter.Workbook(path+'"'+Clients[i]+".xlsx")
Four = generator_df[['Four']][generator_df['Client'] == Clients[i]].dropna()
Fournisseurs = list(Four['Four'].unique())
for j in range(0,len(Fournisseurs)):
if j == 0:
worksheet = workbook.add_worksheet(Fournisseurs[0])
Detail = detail_df[['Num_facture','Date_facture','Code_produit','Designation','Qte','PU_net','Montant_HT']][(detail_df['Fournisseur'] == Fournisseurs[0]) & (detail_df['Client']==Clients[i])].dropna()
Detail.to_excel(path+'"'+Clients[i]+".xlsx", sheet_name = Fournisseurs[0], header = True, index = False)
else:
Num=str(j)
worksheetNum = workbook.add_worksheet(Fournisseurs[j])
Detail_j = detail_df[['Num_facture','Date_facture','Code_produit','Designation','Qte','PU_net','Montant_HT']][(detail_df['Fournisseur'] == Fournisseurs[j]) & (detail_df['Client'] == Clients[i])].dropna()
Detail_1.to_excel(path+'"'+Clients[i]+".xlsx", sheet_name = Fournisseurs[j], header = True, index = False)
workbook.close()
after several modifications of the code, the problem is solved.
"
Clients = list(generator_df['Client'].unique())
for i in range(0, len(Clients)):
if len(Clients[i]) != 0:
writer = pd.ExcelWriter(
path+'"'+Clients[i]+".xlsx", engine='xlsxwriter')
Four = generator_df[['Four']][generator_df['Client']
== Clients[i]].dropna()
Fournisseurs = list(Four['Four'].unique())
for j in range(0, len(Fournisseurs)):
if j == 0:
Detail = detail_df[['Num_facture', 'Date_facture', 'Code_produit',
'Designation', 'Qte', 'PU_net', 'Montant_HT']][(detail_df['Fournisseur']
== Fournisseurs[0]) & (detail_df['Client'] == Clients[i])].dropna()
Detail.to_excel(writer, sheet_name=Fournisseurs[0],
header=True, index=False)
else:
Num = str(j)
Detail_j = detail_df[['Num_facture', 'Date_facture', 'Code_produit',
'Designation', 'Qte', 'PU_net', 'Montant_HT']][(detail_df['Fournisseur'] == Fournisseurs[j]) & (detail_df['Client'] ==
Clients[i])].dropna()
Detail_j.to_excel(writer, sheet_name=Fournisseurs[j],
header=True, index=False)
writer.save()

How can I modify the existing sheetobj.data_validations in openpyxl?

I am writing an application to create a new blank day's worth of cells in a financial transaction spreadsheet. The spreadsheet was generated by WPS.
Everything is working as I'd like, except that data validation for one column only does not copy successfully from the old cells to the new cells in the same column. When I examine the sheetobj.data_validations, I find that the working columns end like this:
M3217:M65536, E3217:E65536
The column that doesn't work correctly ends like this:
N3196:N3215
Here is the code that actually copies the cells:
for r in range(frod, eod):
for c in range(1, maxc + 1):
nrow = r + rinc
cell_tmp = sheet.cell(row = r, column = c)
val_tmp = cell_tmp.value
new_cell = sheet.cell(row = nrow, column = c)
new_cell.font = copy(cell_tmp.font)
new_cell.fill = copy(cell_tmp.fill)
new_cell.number_format = copy(cell_tmp.number_format)
new_cell.alignment = copy(cell_tmp.alignment)
new_cell.border = copy(cell_tmp.border)
headstr = col_string(sheet.title, c)
method = get_method(headstr)
#print('header string for col', c, 'is', headstr, 'form method is', method)
if cell_tmp.data_type is not 'f':
if method == 'cp_eod' and r == (eod - 2):
new_cell.value = val_tmp
else:
new_cell.value = None
new_cell.data_type = cell_tmp.data_type
elif method == 'tc_eod':
new_cell.value = form_self(val_tmp, r, nrow)
elif method == 'prev_inc':
if r == frod:
new_cell.value = form_inc(val_tmp, val_tmp[val_tmp.rindex('+')+1:])
else:
new_cell.value = date_prev(val_tmp, r, nrow)
elif method == 'self':
if r == eod:
#print('*** EOD ***')
new_cell.value = form_self_dec(val_tmp, r, nrow)
else:
new_cell.value = form_self(val_tmp, r, nrow)
elif method == 'self_fod':
if r == frod:
new_cell.value = form_self(val_tmp, r, nrow)
elif r == eod:
new_cell.value = form_self_dec(val_tmp, r, nrow)
else:
new_cell.value = form_frod(val_tmp, frod, frod + rinc, r, nrow)
elif method == 'self_eod':
new_cell.value = form_eod(val_tmp, r, nrow, eod, eod + rinc)
elif method == 'tcsbs':
new_cell.value = form_tcsbs(val_tmp, val_tmp[val_tmp.rindex('$')+1:])
elif method == 'self_prev':
if r == eod:
new_cell.value = form_prev(val_tmp, nrow)
else:
new_cell.value = form_self_dec(val_tmp, r, nrow)
The column in question falls into this block of code:
else:
new_cell.value = None
new_cell.data_type = cell_tmp.data_type
Is it possible to edit the sheetobj.data_validations for column N to end the same way the working columns do?
Other things I have tried:
Directly setting a new data validation for the cells in the column. Python showed the cells having data validation but when I saved the workbook and opened in google sheets, it was not there.
Copying a cell from a good column into the bad column on the original sheet.
This also did not solve my problem, when I copied a new block of cells, the data validation did not copy.
Copying a cell from a good column in another sheet into the bad column on the original sheet.
This resulted in losing all data validation when I copied a new block of cells.

Force single line of string in VObject

I am trying to create vCards (Email contacts) unsing the vobject library and pandas for python.
When serializing the values I get new lines in the "notes" of my output(no new lines in source). In every new line, created by ".serialize()", there is also a space in the beginning. I would need to get rid of both.
Example of output:
BEGIN:VCARD
VERSION:3.0
EMAIL;TYPE=INTERNET:test#example.at
FN:Valentina test
N:Valentina;test;;;
NOTE:PelletiererIn Mitglieder 2 Preiserhebung Aussendung 2 Pressespiegelver
sand 2 GeschäftsführerIn PPA_PelletiererInnen GeschäftsführerIn_Pellet
iererIn
ORG:Test Company
TEL;TYPE=CELL:
TEL;TYPE=CELL:
TEL;TYPE=CELL:
END:VCARD
Is there a way that I can force the output in a single line?
output = ""
for _,row in df.iterrows():
j = vobject.vCard()
j.add('n')
j.n.value = vobject.vcard.Name(row["First Name"],row["Last Name"])
j.add('fn')
j.fn.value = (str(row["First Name"]) + " " + row["Last Name"])
o = j.add("email")
o.value = str((row["E-mail Address"]))
o.type_param = "INTERNET"
#o = j.add("email")
#o.value = str((row["E-mail 2 Address"]))
#o.type_param = "INTERNET"
j.add('org')
j.org.value = [row["Organization"]]
k = j.add("tel")
k.value = str(row["Home Phone"])
k.type_param = "CELL"
k = j.add("tel")
k.value = str(row["Business Phone"])
k.type_param = "CELL"
k = j.add("tel")
k.value = str(row["Mobile Phone"])
k.type_param = "CELL"
j.add("note")
j.note.value = row["Notiz für Kontaktexport"]
output += j.serialize()
print(output)

Create Dataframe name from 2 strings or variables pandas

i am extracting selected pages from a pdf file. and want to assign dataframe name based on the pages extracted:
file = "abc"
selected_pages = ['10','11'] #can be any combination eg ['6','14','20]
for i in selected_pages():
df{str(i)} = read_pdf(path + file + ".pdf",encoding = 'ISO-8859-1', stream = True,area = [100,10,740,950],pages= (i), index = False)
print (df{str(i)} )
The idea, ultimately, as in above example, is to have dataframes: df10, df11. I have tried "df" + str(i), "df" & str(i) & df{str(i)}. however all are giving error msg: SyntaxError: invalid syntax
Or any better way of doing it is most welcome. thanks
This is where a dictionary would be a much better option.
Also note the error you have at the start of the loop. selected_pages is a list, so you can't do selected_pages().
file = "abc"
selected_pages = ['10','11'] #can be any combination eg ['6','14','20]
df = {}
for i in selected_pages:
df[i] = read_pdf(path + file + ".pdf",encoding = 'ISO-8859-1', stream = True, area = [100,10,740,950], pages= (i), index = False)
i = int(i) - 1 # this will bring it to 10
dfB = df[str(i)]
#select row number to drop: 0:4
dfB.drop(dfB.index[0:4],axis =0, inplace = True)
dfB.columns = ['col1','col2','col3','col4','col5']