How to read the text in textbox by using openpyxl

How to read the text in textbox by using openpyxl - openpyxl

all
I can read the text in cells, but the textbox can't read the text...
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re,os,sys,time
import openpyxl
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.drawing import *
reload(sys)
sys.setdefaultencoding('utf8')
wb = load_workbook(u'2.xlsx')
sheetnames = wb.get_sheet_names()
for i in range(0,len(sheetnames)):
sheet = wb.get_sheet_by_name(sheetnames[i])
for row in sheet.rows:
for cell in row:
if cell.value:
print cell.value
I try to unzip the xlsx file and find the content of textbox in xl\drawings\drawing[0-9].xml files..
and can openpyxl.drawing.text can read the textbox? I have no idea...
How can i do this..? thx...

I have to unzip the xlsx file......
zipFile = zipfile.ZipFile(os.path.join(os.getcwd(), u''+str(flist)+''))
for file in zipFile.namelist():
zipFile.extract(file, r'tmp')
zipFile.close()
num = 0
if os.path.exists(r'tmp/xl/drawings'):
xmldir = os.listdir(r'tmp/xl/drawings')
for xmlfile in xmldir:
xml = os.path.basename(xmlfile)
if os.path.splitext(xml)[1] == '.xml':
a = open(u'tmp/xl/drawings/'+str(xml)+'').read()
b = a.replace('\n','').replace(' ','')
c = re.findall(r'<a:p>(.*?)</a:p>',b)
for i in c:
text = "".join(re.findall(r'(?<=<a:t>).*?(?=</a:t>)',u''+str(i)+'',re.S)).replace(' ','').replace('　','').replace('\\u6d3b\\u52a8','').replace('<','<').replace('>','>').replace('&','&')

Related

Reading an .xltx file and Writing to .xlsx file with openpyxl

I've been struggling with this problem for a while and couldn't find a solution else where. I have several excel templates in xltx format that I want to read, then write a new xlsx file after filling in some cells.
Every time I run my code it creates a corrupted excel file. Using a preview extension in VS code I'm able to see that the values were correctly changed. When I read an xlsx file instead of an xltx it works fine. Does openpyxl just not allow what I am trying to do?
import openpyxl
import win32com.client
report = openpyxl.load_workbook("0100048-A5_R_11.xltx")
sheet = report["A5 form"]
search_arr = ["Test_Date"]
for r in range(2, sheet.max_row+1):
for c in range(3,sheet.max_column+1):
val = sheet.cell(r,c).value
if val != None and "$!" in str(val):
sheet.cell(r,c).value = 1
report.active = 1
report.save("output.xlsx")

Copied from the docs:
You can specify the attribute template=True, to save a workbook as a template:
wb = load_workbook('document.xlsx')
wb.template = True
wb.save('document_template.xltx')
or set this attribute to False (default), to save as a document:
wb = load_workbook('document_template.xltx')
wb.template = False
wb.save('document.xlsx', as_template=False)
Although the last line is from the latest docs, as_template is not a keyword argument for save!
This works instead:
wb.save('document.xlsx')

Pandas writing to excel gives warning when using openpyxl

I am using the following code:
import os
import numpy as np
import pandas as pd
from openpyxl import load_workbook
def dump2ExcelTest(df, fname, sheetNameIn='Sheet1'):
if os.path.exists(fname):
writer = pd.ExcelWriter(fname, engine='openpyxl', mode='a')
book = load_workbook(fname)
writer.book = book
else:
writer = pd.ExcelWriter(fname, engine='openpyxl', mode='w')
df.to_excel(writer, sheet_name = sheetNameIn)
writer.save()
writer.close()
x1 = np.random.randn(100, 2)
df1 = pd.DataFrame(x1)
dump2ExcelTest(df1, r'Y:\summary\test3.xlsx')
On trying to open test3.xlsx I get the following warning window:
However, if I just do df1.to_excel(r'Y:\summary\test3.xlsx') then test3.xlsx opens fine.
I am not sure what to do about this as there is nothing in the log file.

I believe the way the ExcelWriter opens the file and tracks existing workbook contents is the problem. I'm not sure exactly what is going on under the hood but you have to both
specify the proper startrow for append
copy sheet information to the writer
I've used a contextmanager in Python for a little cleaner syntax.
This is your example but properly writing and appending as you desire.
import os
import numpy as np
import pandas as pd
from openpyxl import load_workbook
def dump2ExcelTest(df, fname, sheetNameIn='Sheet1'):
if os.path.exists(fname) is False:
df.to_excel(fname, engine='openpyxl')
start_row = 0
with pd.ExcelWriter(fname, engine='openpyxl', mode='a') as writer:
writer.book = load_workbook(fname)
if sheetNameIn not in writer.book.sheetnames:
raise ValueError(f"sheet {sheetNameIn} not in workbook")
# grab the proper start row and copy existing sheets to new writer
start_row = writer.book[sheetNameIn].max_row
writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
df.to_excel(writer, sheetNameIn, startrow=start_row, header=False)
x1 = np.random.randn(100, 2)
df1 = pd.DataFrame(x1)
dump2ExcelTest(df1, "test3.xlsx")
More details and similar question here

How to add data to an existing excel file in python without overwriting the data

I have a master excel sheet where the data looks like this [1]: https://i.stack.imgur.com/IS4cw.png
I have a script which imports the csv files and combines them and save it to the master excel sheet.
import pandas as pd
from openpyxl import load_workbook
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
root.call('wm', 'attributes', '.', '-topmost', True)
files = filedialog.askopenfilename(multiple=True)
%gui tk
var = root.tk.splitlist(files)
filePaths = []
for f in var:
df = pd.read_csv(f,skiprows=8, index_col=None, header='infer',parse_dates=True, squeeze=True, encoding='ISO-8859–1',names=['Date', 'Time', 'Temperature', 'Humidty'])
filePaths.append(df)
df = pd.concat(filePaths, axis=0, join='outer', ignore_index=True, sort=True)
book = load_workbook(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx')
writer = pd.ExcelWriter(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, "Sheet1", columns=['Date', 'Time','Temperature', 'Humidty'],index=False)
writer.save()
The problem is that the newly imported data is saved from row 1 instead of starting at the ending row of the previously saved data. How can I save the data in an orderly manner everytime without entering the row number?

The ExcelWriter can have its mode set to either write ('w') or append ('a'). The default is write.
writer = pd.ExcelWriter(r'C:\Users\Administrator\Documents\Hebin\Scripts\Temperature Distribution chart/july/12.xlsx', engine='openpyxl', mode='a')

How to import an excel workbook into jupyterr notebook

How do I import an excel workbook into jupyter notebook. I am using tensorflow.
xl.file =pd.excelfile('c:\users\owner\downloads\book1.xlsx')
book1 = pd.excelfile('book1.xlsx')

It looks like you are confusing the filename with the pandas method to read a file.
import pandas as pd
filename = 'c:\users\owner\downloads\book1.xlsx'
dataframe = pd.read_excel(filename)

Browse for xlsx workbook and create a drop down list to select a sheet to read

I am absolutely new to python and I am trying to build a code that will upload a data frame based on the browsed xlsx file and then drop down list of all sheets in the selected xlsx file.
I have found two codes: one for browsing and reading excel file and second for drop down list with all sheets in selected xlsx file. What I need to do is actually to combine this two codes. First of all I would like to select an xlsx sheet and then I would like to select which sheet to read (based on drop down list).
Function to browse and read excel file
enter code hereimport tkinter as tk
enter code herefrom tkinter import filedialog
enter code hereimport pandas as pd
root= tk.Tk()
canvas1 = tk.Canvas(root, width = 300, height = 300, bg = 'white')
canvas1.pack()
def getExcel ():
global df
import_file_path = filedialog.askopenfilename()
df = pd.read_excel(import_file_path, sheet_name='Loan Tape')
df.keys()
df
root.destroy()
browseButton_Excel = tk.Button(text='Import Excel File', command=getExcel, bg='yellow', fg='black', font=('arial', 12, 'bold'))
canvas1.create_window(150, 150, window=browseButton_Excel)
root.mainloop()
Function to create a drop down list of all sheets in the xlsx file
import tkinter as tk
from tkinter import *
root = Tk()
root.title("Select a sheet")
mainframe = Frame(root)
mainframe.grid(column=0,row=0, sticky=(N,W,E,S) )
mainframe.columnconfigure(0, weight = 1)
mainframe.rowconfigure(0, weight = 1)
mainframe.pack(pady = 100, padx = 100)
tkvar = StringVar(root)
xl = pd.ExcelFile(r'Full file path.xlsx')
choices=xl.sheet_names
tkvar.set('Nothing selected') # set the default option
popupMenu = OptionMenu(mainframe, tkvar, *choices)
Label(mainframe, text="Select a sheet").grid(row = 1, column = 1)
popupMenu.grid(row = 2, column =1)
def change_dropdown(*args):
print( tkvar.get() )
root.destroy()
tkvar.trace('w', change_dropdown)
root.mainloop()
What I need to do is to actually combine this two codes. First of all I would like to select an xlsx sheet and then I would like to select which sheet to read (based on drop down list).

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to read the text in textbox by using openpyxl - openpyxl

Related

Reading an .xltx file and Writing to .xlsx file with openpyxl

Pandas writing to excel gives warning when using openpyxl

How to add data to an existing excel file in python without overwriting the data

How to import an excel workbook into jupyterr notebook

Browse for xlsx workbook and create a drop down list to select a sheet to read

Categories

Resources