How to create multiple pandas profiling reports for multiple csv files in a directory? The report name should match the file name - pandas

I tried this,
import glob
import os
import pandas as pd
import pandas_profiling
from pandas_profiling import ProfileReport
files = glob.glob("D:\home_health_services_current_data\*.csv")
df = pd.DataFrame()
for f in files:
csv = pd.read_csv(f)
df = df.append(csv)
profile = ProfileReport(df, title="Profiling Report", explorative=True)
profile.to_file("D:\proj_report\profilerep\prof_report.html")

Related

how to solve( Unsupported format, or corrupt file: Expected BOF record; found ) error?

import pandas as pd
import xlrd
import os
import matplotlib.pylab as plt
file_folder_address = 'C:/Users/Amirreza/Desktop/python homeworks/project files'
df_total=pd.DataFrame()
for file in os.listdir(file_folder_address): #os.listdir gives a list of exel file names
df_men_urb = pd.DataFrame()
df_women_urb = pd.DataFrame()
df_men_rural = pd.DataFrame()
df_women_rural = pd.DataFrame()
sheet_names = pd.ExcelFile(os.path.join(file_folder_address, file), engine="xlrd").sheet_names
`
when I use this cod make above error . what should I do ?

compare 2 pandas dataframes

import glob
import pandas as pd
import numpy as np
import os
import fnmatch
import zipfile
df1 = pd.read_csv("2016Q12ExactTargetE1.csv",names = ['FileName'])
print("\nRead " ,df1.shape[0] , "Records")
# accessing and printing files in directory and subdirectory
for filename in glob.glob('c:\\temp\\*.zip', recursive=True):
#print(filename)
myzip=filename
zf = zipfile.ZipFile(myzip)
zfl = zf.namelist()
eml_files = fnmatch.filter(zfl, "*.eml")
df2 = pd.DataFrame(eml_files )
print("\nRead2 " ,df2.shape[0] , "Records")
The csv file
FileName
F0B1F7B371C427E6FDDE1078287A3C71.eml
E107A8CADF8F87B05599A3AAF03D5BA1.eml
30B54778C0B912F2516F6C390A137E91.eml
D06DD3162620490F7E9F8ADD1AE0F621.eml
10E3BAFB831EA97615DBBBF18D601EC1.eml
the eml_files looks like
['00E6E77CE9890A3F34343997BCA33791.eml',
'109E4F29239EA8259707B2E3D0D00351.eml',
'403EBEC70C1F305B72EFAA3822D75871.eml',
'30B54778C0B912F2516F6C390A137E91.eml',
'E107A8CADF8F87B05599A3AAF03D5BA1.eml',
'F0B1F7B371C427E6FDDE1078287A3C71.eml',
'00654E78278B0BBDFBF29BAEA3F61051.eml',
'10E3BAFB831EA97615DBBBF18D601EC1.eml',
'30295A4958D6787060A9BD30ABA3BD81.eml',
'712FE30B1D680ACF5F5194E05E7AFCC1.eml',
'80E928FB95A365F85AE1A99DC8418061.eml',
'91681F0020EAC9AC7F010E917CD72F51.eml',
'C0542641286DE272AB1FAEF954BA1951.eml',
'D06DD3162620490F7E9F8ADD1AE0F621.eml',
'214C558DD0ABCAC2EA3BE06DE95E0811.eml',
'4101E93C02FBA028CEA078B9A3542B01.eml',
'51159C8E5965890AE7356E92BC1C6921.eml',
'50775947EFD5010C3D5EA799F36029A1.eml']
How can I compare the two dataframes df1 and df2
Thank you
I tried
df3=df1.compare(df2, keep_equal=True)
but I get an error
Can only compare identically-labeled DataFrame objects
because the df2 is created by zipfile.namelist() which is diffrent from df1 which is read from a csv

Error: No such file or directory: i want to use heading names from excel sheet to create tkinter drop down

import pandas as pd
import tkinter as tk
from tkinter import *
from tkinter import filedialog
from pandas import ExcelWriter
from pandas import ExcelFile
def main():
df1= pd.read_excel (excel_filepath,"Sheet") ## here i am reading the file ##
df2= pd.read_excel (excel_filepath1,"Sheet2")
df= pd.read_excel (excel_filepath2,"Sheet1")
return
r=tk.Tk()
r.title('')
text1 = tk.Label(r, text="Enter File path of the INPUT AND EXP Excel file: ")
text1.grid(row = 1, column = 1)
excel_filepath = tk.StringVar()
df1= pd.read_excel (str(excel_filepath.get()),"Sheet")## this is the line showing error ##
heading1=list((df1.columns.values)) ## this is the line reading readings ##
I created a Load button. When clicked, it will read the columns and will update the drop down:
def Load_excel_data():
enter code here`df1= pd.read_excel(excel_filename)
heading1=list((df1.columns.values))

To read csv in google colaboratore

from google.colab import files
uploaded = files.upload()
import io
def ls(ruta = uploaded):
return [arch.name for arch in io.StringIO((ruta)) if arch.is_file()]
divisas = ls()
I have this error:
TypeError: initial_value must be str or None, not dict
from google.colab import files
uploaded = files.upload()
Import the google.colab library for file upload then upload the file and pass file name inside the pandas read_csv function
import io
import pandas as pd
df2 = pd.read_csv(io.BytesIO(uploaded['heart.csv']))
df2.head()
I need to include the file names in divisas list

How to get column header in excel generated via python ExcelWriter

I am fetching excel data from django database via raw query. excel is generated but column header is missing .
please suggest some way to get that header.
import pandas as pd
from pandas import ExcelWriter
df1 = pd.DataFrame(row1)
try:
from StringIO import StringIO
except:
from io import StringIO
import xlwt
wb = Workbook()
writer = ExcelWriter("XYZ.xlsx",options={'remove_timezone': True})
xl_out = StringIO()
writer.path = xl_out
ws1 = wb.add_sheet("abc")
for col_num, value in enumerate(df1.columns.values):
ws1.write(1,col_num + 1, 'value')
df1.to_excel(writer,"abc", index= True, header=True)
writer.save()