How to create multiple pandas profiling reports for multiple csv files in a directory? The report name should match the file name

How to create multiple pandas profiling reports for multiple csv files in a directory? The report name should match the file name - pandas

I tried this,
import glob
import os
import pandas as pd
import pandas_profiling
from pandas_profiling import ProfileReport
files = glob.glob("D:\home_health_services_current_data\*.csv")
df = pd.DataFrame()
for f in files:
csv = pd.read_csv(f)
df = df.append(csv)
profile = ProfileReport(df, title="Profiling Report", explorative=True)
profile.to_file("D:\proj_report\profilerep\prof_report.html")

Related

how to solve( Unsupported format, or corrupt file: Expected BOF record; found ) error?

import pandas as pd
import xlrd
import os
import matplotlib.pylab as plt
file_folder_address = 'C:/Users/Amirreza/Desktop/python homeworks/project files'
df_total=pd.DataFrame()
for file in os.listdir(file_folder_address): #os.listdir gives a list of exel file names
df_men_urb = pd.DataFrame()
df_women_urb = pd.DataFrame()
df_men_rural = pd.DataFrame()
df_women_rural = pd.DataFrame()
sheet_names = pd.ExcelFile(os.path.join(file_folder_address, file), engine="xlrd").sheet_names
`
when I use this cod make above error . what should I do ?

compare 2 pandas dataframes

import glob
import pandas as pd
import numpy as np
import os
import fnmatch
import zipfile
df1 = pd.read_csv("2016Q12ExactTargetE1.csv",names = ['FileName'])
print("\nRead " ,df1.shape[0] , "Records")
# accessing and printing files in directory and subdirectory
for filename in glob.glob('c:\\temp\\*.zip', recursive=True):
#print(filename)
myzip=filename
zf = zipfile.ZipFile(myzip)
zfl = zf.namelist()
eml_files = fnmatch.filter(zfl, "*.eml")
df2 = pd.DataFrame(eml_files )
print("\nRead2 " ,df2.shape[0] , "Records")
The csv file
FileName
F0B1F7B371C427E6FDDE1078287A3C71.eml
E107A8CADF8F87B05599A3AAF03D5BA1.eml
30B54778C0B912F2516F6C390A137E91.eml
D06DD3162620490F7E9F8ADD1AE0F621.eml
10E3BAFB831EA97615DBBBF18D601EC1.eml
the eml_files looks like
['00E6E77CE9890A3F34343997BCA33791.eml',
'109E4F29239EA8259707B2E3D0D00351.eml',
'403EBEC70C1F305B72EFAA3822D75871.eml',
'30B54778C0B912F2516F6C390A137E91.eml',
'E107A8CADF8F87B05599A3AAF03D5BA1.eml',
'F0B1F7B371C427E6FDDE1078287A3C71.eml',
'00654E78278B0BBDFBF29BAEA3F61051.eml',
'10E3BAFB831EA97615DBBBF18D601EC1.eml',
'30295A4958D6787060A9BD30ABA3BD81.eml',
'712FE30B1D680ACF5F5194E05E7AFCC1.eml',
'80E928FB95A365F85AE1A99DC8418061.eml',
'91681F0020EAC9AC7F010E917CD72F51.eml',
'C0542641286DE272AB1FAEF954BA1951.eml',
'D06DD3162620490F7E9F8ADD1AE0F621.eml',
'214C558DD0ABCAC2EA3BE06DE95E0811.eml',
'4101E93C02FBA028CEA078B9A3542B01.eml',
'51159C8E5965890AE7356E92BC1C6921.eml',
'50775947EFD5010C3D5EA799F36029A1.eml']
How can I compare the two dataframes df1 and df2
Thank you
I tried
df3=df1.compare(df2, keep_equal=True)
but I get an error
Can only compare identically-labeled DataFrame objects
because the df2 is created by zipfile.namelist() which is diffrent from df1 which is read from a csv

Error: No such file or directory: i want to use heading names from excel sheet to create tkinter drop down

import pandas as pd
import tkinter as tk
from tkinter import *
from tkinter import filedialog
from pandas import ExcelWriter
from pandas import ExcelFile
def main():
df1= pd.read_excel (excel_filepath,"Sheet") ## here i am reading the file ##
df2= pd.read_excel (excel_filepath1,"Sheet2")
df= pd.read_excel (excel_filepath2,"Sheet1")
return
r=tk.Tk()
r.title('')
text1 = tk.Label(r, text="Enter File path of the INPUT AND EXP Excel file: ")
text1.grid(row = 1, column = 1)
excel_filepath = tk.StringVar()
df1= pd.read_excel (str(excel_filepath.get()),"Sheet")## this is the line showing error ##
heading1=list((df1.columns.values)) ## this is the line reading readings ##

I created a Load button. When clicked, it will read the columns and will update the drop down:
def Load_excel_data():
enter code here`df1= pd.read_excel(excel_filename)
heading1=list((df1.columns.values))

To read csv in google colaboratore

from google.colab import files
uploaded = files.upload()
import io
def ls(ruta = uploaded):
return [arch.name for arch in io.StringIO((ruta)) if arch.is_file()]
divisas = ls()
I have this error:
TypeError: initial_value must be str or None, not dict

from google.colab import files
uploaded = files.upload()
Import the google.colab library for file upload then upload the file and pass file name inside the pandas read_csv function
import io
import pandas as pd
df2 = pd.read_csv(io.BytesIO(uploaded['heart.csv']))
df2.head()

I need to include the file names in divisas list

How to get column header in excel generated via python ExcelWriter

I am fetching excel data from django database via raw query. excel is generated but column header is missing .
please suggest some way to get that header.
import pandas as pd
from pandas import ExcelWriter
df1 = pd.DataFrame(row1)
try:
from StringIO import StringIO
except:
from io import StringIO
import xlwt
wb = Workbook()
writer = ExcelWriter("XYZ.xlsx",options={'remove_timezone': True})
xl_out = StringIO()
writer.path = xl_out
ws1 = wb.add_sheet("abc")
for col_num, value in enumerate(df1.columns.values):
ws1.write(1,col_num + 1, 'value')
df1.to_excel(writer,"abc", index= True, header=True)
writer.save()

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to create multiple pandas profiling reports for multiple csv files in a directory? The report name should match the file name - pandas

Related

how to solve( Unsupported format, or corrupt file: Expected BOF record; found ) error?

compare 2 pandas dataframes

Error: No such file or directory: i want to use heading names from excel sheet to create tkinter drop down

To read csv in google colaboratore

How to get column header in excel generated via python ExcelWriter

Categories

Resources