Pandas, Copy cells from workbook to another - pandas

I am having issues copying cells from excel workbook and pasting as values to another workbook.
I get an error on line rowSelected.append(sheet.cell(row = i, column = j).value) with the message AttributeError: 'str' object has no attribute 'cell'
Can anyone help with this?
import openpyxl
#Prepare the spreadsheets to copy from and paste too.
#File to be copied
wb = openpyxl.load_workbook(r"") #Add file name
sheet = wb["BusinessDetails"] #Add Sheet name
#File to be pasted into
template = openpyxl.load_workbook(r"") #Add file name
temp_sheet = template["Sheet1"] #Add Sheet name
#Copy range of cells as a nested list
#Takes: start cell, end cell, and sheet you want to copy from.
def copyRange(startCol, startRow, endCol, endRow, sheet):
rangeSelected = []
#Loops through selected Rows
for i in range(startRow,endRow + 1,1):
#Appends the row to a RowSelected list
rowSelected = []
for j in range(startCol,endCol+1,1):
rowSelected.append(sheet.cell(row = i, column = j).value)
#Adds the RowSelected List and nests inside the rangeSelected
rangeSelected.append(rowSelected)
return rangeSelected
#Paste range
#Paste data from copyRange into template sheet
def pasteRange(startCol, startRow, endCol, endRow, sheetReceiving,copiedData):
countRow = 0
for i in range(startRow,endRow+1,1):
countCol = 0
for j in range(startCol,endCol+1,1):
sheetReceiving.cell(row = i, column = j).value = copiedData[countRow][countCol]
countCol += 1
countRow += 1
def createData():
print("Processing...")
selectedRange = copyRange(1,2,4,14,sheet) #Change the 4 number values
pastingRange = pasteRange(1,3,4,15,temp_sheet,selectedRange) #Change the 4 number values
#You can save the template as another file to create a new file here too.s
template.save(r"")
print("Range copied and pasted!")
copyRange(2,4,30,78,"BusinessDetails")
pasteRange(2,4,30,78,"Sheet1")

It must be:
copyRange(2,4,30,78,sheet)
pasteRange(2,4,30,78,temp_sheet)
i.e. you need to pass the sheet objects, not the sheet names to your functions.
Update as per comment:
rangeSelected = copyRange(2,4,30,78,sheet)
pasteRange(2,4,30,78,temp_sheet, rangeSelected)

Related

openpyxl: Is it possible to load a workbook (with data_only=False), work on it, save it and re open the saved vile with (data_only= True)?

Basically the title. The thing is that I got an Excel file already (with a lot of formulas) and I have to use it as a template, but I have to copy certain column and paste it in another column.
Since I have to make some graphs in between I need the numeric data of the excel file so my plan is the following:
1.- load the file with data_only = False.
2.- Make the for loops needed to copy and paste info from one worksheet to another.
3.- Save the copied data as another Excel file.
4.- Open the new Excel created file, this time with data_only = True, so I can work with the numeric values.
The problem is that after doing this, it's like after putting data_only on the new created file it doesn't work, because when I made a list that filters NoneType values and strings in a column that have actual numerical values it gives me an empty list.
#I made the following
wb = load_workbook('file_name.xlsx', data_only = True)
S1 = wb['Sheet 1']
S2 = wb['Sheet 2']
#Determination of min and max cols and rows
col_min = S1.min_column
col_max = S1.max_column
row_min = S1.min_row
row_max = S1.max_row
for i in range(row_min + 2, row_max + 1):
for j in range(col_min + Value, Value + 2):
S2.cell(row = i+6, column = j+10-Value).value = S1.cell(row = i, column = j).value
Transition_file = wb.save('transition.xlsx')
wb1 = load_workbook('transition.xlsx', data_only = True) #To obtain only numerical values
S2 = wb1['Sheet 2'] #Re define my Sheet 2 values

Is it possible to append a StyleFrame to an existing excel worksheet?

I was wondering if it was possible to write a StyleFrame to an arbitrary position in an existing excel worksheet while maintaining the original formatting and styling of pre-existing cells?
E.g In the example below, I'd like to set the output of the Styleframe to start from cell 'A9' while maintaining the formatting and coloring of the other cells (Cells 'A1','A2','A3' etc):
So user #MaxU has a helpful answer over here outlining a function that appends regular dataframes to arbitrary positions in an existing excel worksheet. With some minor changes I've been able to modify this function to work on styleframes. Note that while the modified function successfully writes most cell properties into existing worksheets for some reason it does not copy cell alignments over. So I have hard-coded the Alignments of appended cells to be top,left and wrapped. Please see the modified code below:
from pathlib import Path
from copy import copy
from typing import Union, Optional
import numpy as np
import pandas as pd
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from styleframe import StyleFrame
from openpyxl.styles.alignment import Alignment
def copy_excel_cell_range(
src_ws: openpyxl.worksheet.worksheet.Worksheet,
min_row: int = None,
max_row: int = None,
min_col: int = None,
max_col: int = None,
tgt_ws: openpyxl.worksheet.worksheet.Worksheet = None,
tgt_min_row: int = 1,
tgt_min_col: int = 1,
with_style: bool = True
) -> openpyxl.worksheet.worksheet.Worksheet:
if tgt_ws is None:
tgt_ws = src_ws
# https://stackoverflow.com/a/34838233/5741205
for row in src_ws.iter_rows(min_row=min_row, max_row=max_row,
min_col=min_col, max_col=max_col):
for cell in row:
tgt_cell = tgt_ws.cell(
row=cell.row + tgt_min_row - 1,
column=cell.col_idx + tgt_min_col - 1,
value=cell.value
)
if with_style and cell.has_style:
# tgt_cell._style = copy(cell._style)
tgt_cell.font = copy(cell.font)
tgt_cell.border = copy(cell.border)
tgt_cell.fill = copy(cell.fill)
tgt_cell.number_format = copy(cell.number_format)
tgt_cell.protection = copy(cell.protection)
tgt_cell.alignment = Alignment(horizontal='left', vertical='top',wrapText=True)
return tgt_ws
def append_sf_to_excel(
filename: Union[str, Path],
sf: StyleFrame,
sheet_name: str = 'Sheet1',
startrow: Optional[int] = None,
max_col_width: int = 30,
autofilter: bool = False,
fmt_int: str = "#,##0",
fmt_float: str = "#,##0.00",
fmt_date: str = "yyyy-mm-dd",
fmt_datetime: str = "yyyy-mm-dd hh:mm",
truncate_sheet: bool = False,
storage_options: Optional[dict] = None,
**to_excel_kwargs
) -> None:
def set_column_format(ws, column_letter, fmt):
for cell in ws[column_letter]:
cell.number_format = fmt
filename = Path(filename)
file_exists = filename.is_file()
# process parameters
# calculate first column number
# if the sf will be written using `index=True`, then `first_col = 2`, else `first_col = 1`
first_col = int(to_excel_kwargs.get("index", True)) + 1
# ignore [engine] parameter if it was passed
if 'engine' in to_excel_kwargs:
to_excel_kwargs.pop('engine')
# save content of existing sheets
if file_exists:
wb = load_workbook(filename)
sheet_names = wb.sheetnames
sheet_exists = sheet_name in sheet_names
sheets = {ws.title: ws for ws in wb.worksheets}
with StyleFrame.ExcelWriter(
filename.with_suffix(".xlsx"),
mode="a" if file_exists else "w",
if_sheet_exists="new" if file_exists else None,
date_format=fmt_date,
datetime_format=fmt_datetime,
storage_options=storage_options
) as writer:
if file_exists:
# try to open an existing workbook
writer.book = wb
# get the last row in the existing Excel sheet
# if it was not specified explicitly
# for row in wb['Sheet1'].iter_rows():
# for cell in row:
# print(f'{cell.alignment}\n\n')
if startrow is None and sheet_name in writer.book.sheetnames:
startrow = writer.book[sheet_name].max_row
# truncate sheet
if truncate_sheet and sheet_name in writer.book.sheetnames:
# index of [sheet_name] sheet
idx = writer.book.sheetnames.index(sheet_name)
# remove [sheet_name]
writer.book.remove(writer.book.worksheets[idx])
# create an empty sheet [sheet_name] using old index
writer.book.create_sheet(sheet_name, idx)
# copy existing sheets
writer.sheets = sheets
else:
# file doesn't exist, we are creating a new one
startrow = 0
# write out the DataFrame to an ExcelWriter
sf.to_excel(writer, sheet_name=sheet_name)
worksheet = writer.sheets[sheet_name]
if autofilter:
worksheet.auto_filter.ref = worksheet.dimensions
for xl_col_no, dtyp in enumerate(sf.data_df.dtypes, first_col):
col_no = xl_col_no - first_col
width = max(sf.iloc[:, col_no].astype(str).str.len().max(),
len(sf.columns[col_no]) + 6)
width = min(max_col_width, width)
column_letter = get_column_letter(xl_col_no)
worksheet.column_dimensions[column_letter].width = width
if np.issubdtype(dtyp, np.integer):
set_column_format(worksheet, column_letter, fmt_int)
if np.issubdtype(dtyp, np.floating):
set_column_format(worksheet, column_letter, fmt_float)
if file_exists and sheet_exists:
# move (append) rows from new worksheet to the `sheet_name` worksheet
wb = load_workbook(filename)
# retrieve generated worksheet name
new_sheet_name = set(wb.sheetnames) - set(sheet_names)
if new_sheet_name:
new_sheet_name = list(new_sheet_name)[0]
# copy rows written by `sf.to_excel(...)` to
copy_excel_cell_range(
src_ws=wb[new_sheet_name],
tgt_ws=wb[sheet_name],
tgt_min_row=startrow + 1,
with_style=True
)
# remove new (generated by Pandas) worksheet
del wb[new_sheet_name]
wb.save(filename)
wb.close()
Credit to Maxu for writing this function, and thanks to Deepspace for making me aware of this solution.

VBA - to compare 2 tables from 2 sheets, and get result in another sheets creating comparison table

It is above my possibilities.
I know how to create Vba code with Vlookup or Hlookup for single comparision. However whatIi am trying is beyond my knowledge.
I need to compare 2 tables. 1st is requirements where 2nd is DB extract.
Both tables contains same "Action" column and other columns with Employer role as a header. Values from Action column for both tables are the same however in different order ( Those values need to act as primary key).
Columns with Employer role as a header - same header value for both tables - however columns in different order.
Amount of columns with Employer role as a header is not constants and it gets change every time I get this files. Those columns in extract are in different order than in requirements.
Amount of values from "Action" columns ( primary key) also not constants and change every time I receive files. So I cannot set specific range.
Example of Requirements table
Example of Extract table
Example of what is expected
New target worksheet need to be created where Comparison table will be created.
VBA to create Comparison table in newly created worksheet.
This table should have "Action" column + all columns with Employers role as header form requirements + all columns with Employers role as header form extract set in same order like columns in requirements + comparison table which compare values between Employers roles from Requirements and Extract and show values YES or NO
Try the next code, please. It will return in a newly created sheet (after the last existing). The new file is named "New Sheet". If it exists, it is cleared and reused:
Sub testMatchTables()
Dim sh As Worksheet, sh1 As Worksheet, shNew As Worksheet
Dim tbl1 As ListObject, tbl2 As ListObject, rightH As Long
Dim arrR, arrE, arrH1, arrH2, arrFin, i As Long, j As Long, k As Long
Dim first As Long, sec As Long, refFirst As Long, refSec As Long
Set sh = Set sh = Worksheets("Requirements")'use here the sheet keeping the first table
Set sh1 = Worksheets("Extract Table") 'use here your appropriate sheet
Set tbl1 = sh.ListObjects(1) 'use here your first table name (instead of 1)
Set tbl2 = sh1.ListObjects(1) 'use here your second table name (instead of 1)
arrR = tbl1.Range.value 'put the table range in an array
arrE = tbl2.Range.value 'put the table range in an array
'working with arrays will condiderably increase the processing speed
ReDim arrFin(1 To UBound(arrR), 1 To UBound(arrR, 2) * 3 + 2) 'redim the array to keep the processing result
'UBound is a property telling the number of array elements
arrH1 = Application.Index(arrR, 1, 0) 'make a slice in the array (1D array), the first row, which keeps the headers
arrH2 = Application.Index(arrE, 1, 0) 'make a slice in the array (1D array), the first row, which keeps the headers
'build the column headers:
For i = 1 To UBound(arrFin, 2)
If i <= UBound(arrH1) Then 'firstly the headers of the first table are filled in the final array
arrFin(1, i) = arrH1(i)
ElseIf refSec = 0 Then 'refSec is the column where a blanck column will exist
first = first + 1 'the code incrementes this variable to allow making empty only for the following row
If first = 1 Then
arrFin(1, i) = Empty: refFirst = i 'make the empty column between the two tables data and create a reference
'to be decreated from the already incremented i variable
Else
arrFin(1, i) = arrH1(i - refFirst) 'place each header column values
If i - refFirst = UBound(arrH1) Then refSec = i + 1 'when the code reaches the end of the first array
'it creates a reference for referencing the second time
End If
Else
sec = sec + 1 'the same philosophy as above, to create the second empty column
If sec = 1 Then
arrFin(1, i) = Empty 'create the empty column (for each processed row)
Else
arrFin(1, i) = arrH1(i - refSec) 'fill the header columns
End If
End If
Next
Dim C As Long, r As Long, eT As Long, T As Long
eT = UBound(arrR) 'mark the ending of the first array (where to be the first empty column)
T = UBound(arrR, 2) * 2 + 2 'mark the begining of the third final array part
'after the second empty column
For i = 2 To UBound(arrR) 'iterating between the first array rows
For j = 2 To UBound(arrE) 'iterating the second array rows
If arrR(i, 1) = arrE(j, 1) Then 'if the both arrays first column matches
arrFin(i, 1) = arrR(i, 1): arrFin(i, T + 1) = arrR(i, 1) 'put the Action values in the first area columns
arrFin(i, eT) = arrR(i, 1) 'put the Action values in the last area column
For C = 2 To UBound(arrR, 2) 'iterate between the array columns
rightH = Application.match(arrR(1, C), arrH2, 0) 'find the match of the first array header in the second one
arrFin(i, C) = arrR(i, C): arrFin(i, C + eT - 1) = arrE(j, rightH) 'place the matching header in the final array
If arrR(i, C) = arrE(j, rightH) Then
arrFin(i, T + C) = "TRUE" 'place 'TRUE' in case of matching
Else
arrFin(i, T + C) = "FALSE" 'place 'FALSE' in case of NOT matching
End If
Next C
End If
Next j
Next i
On Error Resume Next 'necessary to return an error if worksheet "New Sheet" does not exist
Set shNew = Worksheets("New Sheet")
If err.Number = 9 Then 'if it raises error number 9, this means that the sheet does not exist
err.Clear: On Error GoTo 0 'clear the error and make the code to return other errors, if any
Set shNew = Worksheets.Add(After:=Worksheets(Worksheets.count)) 'set shNew as new inserted sheet
shNew.name = "New Sheet" 'name the newly inserted sheet
Else
shNew.cells.Clear: On Error GoTo 0 ' in case of sheet exists, it is clear and the code is made to return errors
End If
'set the range where the final array to drop its values:
With shNew.Range("A1").Resize(UBound(arrFin), UBound(arrFin, 2))
.value = arrFin 'drop the array content
.EntireColumn.AutoFit 'AutoFit the involved columns
End With
End Sub
Please, test it and send some feedback.
Edited:
I commented the code as detailed I could. If still something unclear, please do not hesitate to ask for clarifications.

Syntax error when passing variable args in macro

I have a macro for libreoffice that takes in 2 arguments.
A file path
Variable args that are sheet names to look for.
I iterate over the document pertaining to the first arg and get the sheet that matches the first variable arg. I then convert that sheet to CSV.
Sub ExportToCsv(URL as String, ParamArray sheetNames() As Variant)
Dim saveParams(1) as New com.sun.star.beans.PropertyValue
saveParams(0).Name = "FilterName"
saveParams(0).Value = "Text - txt - csv (StarCalc)"
saveParams(1).Name = "FilterOptions"
saveParams(1).Value = "44,34,0,1,1" ' 44=comma, 34=double-quote
GlobalScope.BasicLibraries.loadLibrary("Tools")
URL = ConvertToURL(URL)
document = StarDesktop.loadComponentFromUrl(URL, "_blank", 0, Array())
baseName = Tools.Strings.GetFileNameWithoutExtension(document.GetURL(), "/")
directory = Tools.Strings.DirectoryNameoutofPath(document.GetURL(), "/")
sheets = document.Sheets
sheetCount = sheets.Count
Dim x as Integer
Dim requiredSheetIndex as Integer
For x = 0 to sheetCount -1
sheet = sheets.getByIndex(x)
sheet.isVisible = True
For i = LBound(sheetNames) To UBound(sheetNames)
If StrComp(sheet.Name, sheetNames(i), vbTextCompare) = 0 Then
requiredSheetIndex = x
End If
Next
Next
currentSheet = document.GetCurrentController.GetActiveSheet()
sheet = sheets(requiredSheetIndex)
document.GetCurrentController.SetActiveSheet(sheet)
filename = directory + "/" + baseName + ".csv"
fileURL = convertToURL(Filename)
document.StoreToURL(fileURL, saveParams())
document.close(True)
End Sub
Eg. ExportToCsv(<path>, 'Data'). Suppose the document has 4 sheets with the 3rd sheet as DATA, this sheet should be converted to CSV.
Previously I used to give the sheet idnex directly into the macro and it worked perfectly. But requirements changed and I have to pass in an array of possible names to match on. Hence the variable args.
But now I am getting a syntax error(Screenshot attached). I cant figure out what is wrong here.
I believe it is complaining about the ParamArray keyword. After a little digging, I discovered you need to include:
option compatible
at the top of your module. For more information, you can refer to this link.

Porting VBA To IronPython

I am trying to translate the VBA code found in this link into IronPython. Can anyone recommend a good VBA resource to explain how best to do this for a Python programmer?
I have all the Excel portions implemented, such as the treatment and use of objects, workbooks, worksheets, etc.
I will also settle for an explanation of this snippet of code:
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Put the index value of the sheet into Arr. Ensure there
' are no duplicates. If Arr(N) is not zero, we've already
' loaded that element of Arr and thus have duplicate sheet
' names.
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
If Arr(N) > 0 Then
ErrorText = "Duplicate worksheet name in NameArray."
SortWorksheetsByNameArray = False
Exit Function
End If
why would Arr(N) ever NOT be greater than 0?
Here is my current code, which is broken:
def move_worksheets_according_to_list(self, name_list):
wb = self.com_workbook
temp_list = []
for n in range(len(name_list)):
if name_list.count(name_list[n]) > 1:
raise Exception("Duplicate worksheet name in NameArray.")
else:
temp_list.append(wb.Worksheets(name_list[n]).Index)
for m in range(len(temp_list)):
for n in range(m, len(temp_list)):
if temp_list[n] < temp_list[m]:
l = temp_list[n]
temp_list[n] = temp_list[m]
temp_list[m] = l
if not all(temp_list[i] <= temp_list[i+1] for i in xrange(len(temp_list)-1)):
return False
print "current order"
for sheet in wb.Worksheets:
print sheet.Name
wb.Worksheets(name_list[0]).Move(Before=wb.Worksheets(1))
#WB.Worksheets(NameArray(LBound(NameArray))).Move before:=WB.Worksheets(Arr(1))
for n in range(len(name_list)-1):
print 'moving ', name_list[n], 'before ', name_list[n+1]
wb.Worksheets(name_list[n]).Move(Before=wb.Worksheets(name_list[n + 1]))
Note:
With this answer as reference, here is all I had to do:
def move_worksheets_according_to_list(self, name_list):
wb = self.com_workbook
l = []
# since wb.Worksheets is a com_object, you can't use the "if _ in XXX"
# construct without converting to a list first
for s in wb.Worksheets:
l.append(s.Name)
for n in range(len(name_list)):
if name_list[n] in l:
wb.Worksheets(name_list[n]).Move(After=wb.Worksheets(wb.Worksheets.Count))
Declaring and dimensioning an array of longs in VBA will create the array with the default value of 0 in each slot.
def move_worksheets_according_to_list(self, name_list):
wb = self.com_workbook
l = []
# since wb.Worksheets is a com_object, you can't use the "if _ in XXX"
# construct without converting to a list first
for s in wb.Worksheets:
l.append(s.Name)
for n in range(len(name_list)):
if name_list[n] in l:
wb.Worksheets(name_list[n]).Move(After=wb.Worksheets(wb.Worksheets.Count)