all
I can read the text in cells, but the textbox can't read the text...
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re,os,sys,time
import openpyxl
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.drawing import *
reload(sys)
sys.setdefaultencoding('utf8')
wb = load_workbook(u'2.xlsx')
sheetnames = wb.get_sheet_names()
for i in range(0,len(sheetnames)):
sheet = wb.get_sheet_by_name(sheetnames[i])
for row in sheet.rows:
for cell in row:
if cell.value:
print cell.value
I try to unzip the xlsx file and find the content of textbox in xl\drawings\drawing[0-9].xml files..
and can openpyxl.drawing.text can read the textbox? I have no idea...
How can i do this..? thx...
I have to unzip the xlsx file......
zipFile = zipfile.ZipFile(os.path.join(os.getcwd(), u''+str(flist)+''))
for file in zipFile.namelist():
zipFile.extract(file, r'tmp')
zipFile.close()
num = 0
if os.path.exists(r'tmp/xl/drawings'):
xmldir = os.listdir(r'tmp/xl/drawings')
for xmlfile in xmldir:
xml = os.path.basename(xmlfile)
if os.path.splitext(xml)[1] == '.xml':
a = open(u'tmp/xl/drawings/'+str(xml)+'').read()
b = a.replace('\n','').replace(' ','')
c = re.findall(r'<a:p>(.*?)</a:p>',b)
for i in c:
text = "".join(re.findall(r'(?<=<a:t>).*?(?=</a:t>)',u''+str(i)+'',re.S)).replace(' ','').replace(' ','').replace('\\u6d3b\\u52a8','').replace('<','<').replace('>','>').replace('&','&')
Related
I've been struggling with this problem for a while and couldn't find a solution else where. I have several excel templates in xltx format that I want to read, then write a new xlsx file after filling in some cells.
Every time I run my code it creates a corrupted excel file. Using a preview extension in VS code I'm able to see that the values were correctly changed. When I read an xlsx file instead of an xltx it works fine. Does openpyxl just not allow what I am trying to do?
import openpyxl
import win32com.client
report = openpyxl.load_workbook("0100048-A5_R_11.xltx")
sheet = report["A5 form"]
search_arr = ["Test_Date"]
for r in range(2, sheet.max_row+1):
for c in range(3,sheet.max_column+1):
val = sheet.cell(r,c).value
if val != None and "$!" in str(val):
sheet.cell(r,c).value = 1
report.active = 1
report.save("output.xlsx")
Copied from the docs:
You can specify the attribute template=True, to save a workbook as a template:
wb = load_workbook('document.xlsx')
wb.template = True
wb.save('document_template.xltx')
or set this attribute to False (default), to save as a document:
wb = load_workbook('document_template.xltx')
wb.template = False
wb.save('document.xlsx', as_template=False)
Although the last line is from the latest docs, as_template is not a keyword argument for save!
This works instead:
wb.save('document.xlsx')
I am using the following code:
import os
import numpy as np
import pandas as pd
from openpyxl import load_workbook
def dump2ExcelTest(df, fname, sheetNameIn='Sheet1'):
if os.path.exists(fname):
writer = pd.ExcelWriter(fname, engine='openpyxl', mode='a')
book = load_workbook(fname)
writer.book = book
else:
writer = pd.ExcelWriter(fname, engine='openpyxl', mode='w')
df.to_excel(writer, sheet_name = sheetNameIn)
writer.save()
writer.close()
x1 = np.random.randn(100, 2)
df1 = pd.DataFrame(x1)
dump2ExcelTest(df1, r'Y:\summary\test3.xlsx')
On trying to open test3.xlsx I get the following warning window:
However, if I just do df1.to_excel(r'Y:\summary\test3.xlsx') then test3.xlsx opens fine.
I am not sure what to do about this as there is nothing in the log file.
I believe the way the ExcelWriter opens the file and tracks existing workbook contents is the problem. I'm not sure exactly what is going on under the hood but you have to both
specify the proper startrow for append
copy sheet information to the writer
I've used a contextmanager in Python for a little cleaner syntax.
This is your example but properly writing and appending as you desire.
import os
import numpy as np
import pandas as pd
from openpyxl import load_workbook
def dump2ExcelTest(df, fname, sheetNameIn='Sheet1'):
if os.path.exists(fname) is False:
df.to_excel(fname, engine='openpyxl')
start_row = 0
with pd.ExcelWriter(fname, engine='openpyxl', mode='a') as writer:
writer.book = load_workbook(fname)
if sheetNameIn not in writer.book.sheetnames:
raise ValueError(f"sheet {sheetNameIn} not in workbook")
# grab the proper start row and copy existing sheets to new writer
start_row = writer.book[sheetNameIn].max_row
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
df.to_excel(writer, sheetNameIn, startrow=start_row, header=False)
x1 = np.random.randn(100, 2)
df1 = pd.DataFrame(x1)
dump2ExcelTest(df1, "test3.xlsx")
More details and similar question here
I have a master excel sheet where the data looks like this [1]: https://i.stack.imgur.com/IS4cw.png
I have a script which imports the csv files and combines them and save it to the master excel sheet.
import pandas as pd
from openpyxl import load_workbook
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
root.call('wm', 'attributes', '.', '-topmost', True)
files = filedialog.askopenfilename(multiple=True)
%gui tk
var = root.tk.splitlist(files)
filePaths = []
for f in var:
df = pd.read_csv(f,skiprows=8, index_col=None, header='infer',parse_dates=True, squeeze=True, encoding='ISO-8859–1',names=['Date', 'Time', 'Temperature', 'Humidty'])
filePaths.append(df)
df = pd.concat(filePaths, axis=0, join='outer', ignore_index=True, sort=True)
book = load_workbook(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx')
writer = pd.ExcelWriter(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, "Sheet1", columns=['Date', 'Time','Temperature', 'Humidty'],index=False)
writer.save()
The problem is that the newly imported data is saved from row 1 instead of starting at the ending row of the previously saved data. How can I save the data in an orderly manner everytime without entering the row number?
The ExcelWriter can have its mode set to either write ('w') or append ('a'). The default is write.
writer = pd.ExcelWriter(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx', engine='openpyxl', mode='a')
How do I import an excel workbook into jupyter notebook. I am using tensorflow.
xl.file =pd.excelfile('c:\users\owner\downloads\book1.xlsx')
book1 = pd.excelfile('book1.xlsx')
It looks like you are confusing the filename with the pandas method to read a file.
import pandas as pd
filename = 'c:\users\owner\downloads\book1.xlsx'
dataframe = pd.read_excel(filename)
I am absolutely new to python and I am trying to build a code that will upload a data frame based on the browsed xlsx file and then drop down list of all sheets in the selected xlsx file.
I have found two codes: one for browsing and reading excel file and second for drop down list with all sheets in selected xlsx file. What I need to do is actually to combine this two codes. First of all I would like to select an xlsx sheet and then I would like to select which sheet to read (based on drop down list).
Function to browse and read excel file
enter code hereimport tkinter as tk
enter code herefrom tkinter import filedialog
enter code hereimport pandas as pd
root= tk.Tk()
canvas1 = tk.Canvas(root, width = 300, height = 300, bg = 'white')
canvas1.pack()
def getExcel ():
global df
import_file_path = filedialog.askopenfilename()
df = pd.read_excel(import_file_path, sheet_name='Loan Tape')
df.keys()
df
root.destroy()
browseButton_Excel = tk.Button(text='Import Excel File', command=getExcel, bg='yellow', fg='black', font=('arial', 12, 'bold'))
canvas1.create_window(150, 150, window=browseButton_Excel)
root.mainloop()
Function to create a drop down list of all sheets in the xlsx file
import tkinter as tk
from tkinter import *
root = Tk()
root.title("Select a sheet")
mainframe = Frame(root)
mainframe.grid(column=0,row=0, sticky=(N,W,E,S) )
mainframe.columnconfigure(0, weight = 1)
mainframe.rowconfigure(0, weight = 1)
mainframe.pack(pady = 100, padx = 100)
tkvar = StringVar(root)
xl = pd.ExcelFile(r'Full file path.xlsx')
choices=xl.sheet_names
tkvar.set('Nothing selected') # set the default option
popupMenu = OptionMenu(mainframe, tkvar, *choices)
Label(mainframe, text="Select a sheet").grid(row = 1, column = 1)
popupMenu.grid(row = 2, column =1)
def change_dropdown(*args):
print( tkvar.get() )
root.destroy()
tkvar.trace('w', change_dropdown)
root.mainloop()
What I need to do is to actually combine this two codes. First of all I would like to select an xlsx sheet and then I would like to select which sheet to read (based on drop down list).