read csv file from buffer got EmptyDataError? - pandas

i need to read a string like csv content with pandas , but pandas get some errors, i don't knonw what happened, can anyone help me?
import pandas as pd
import io
s = ',测试项,信息,结果\r\n0,软件测试机型805,软件测试机型805,PASS\r\n1,软件当前版本1,软件当前版本1,FAIL\r\n2,软件测试机型805,软件测试机型805,PASS\r\n3,软件当前版本1,软件当前版本1,FAIL\r\n4,软件测试机型805,软件测试机型805,PASS\r\n5,软件当前版本1,软件当前版本1,FAIL\r\n'
buf = io.StringIO()
buf.write(s)
df = pd.read_csv(buf)
got error, EmptyDataError: No columns to parse from file

老铁你拿去
import pandas as pd
import io
s = ',测试项,信息,结果\r\n0,软件测试机型805,软件测试机型805,PASS\r\n1,软件当前版本1,软件当前版本1,FAIL\r\n2,软件测试机型805,软件测试机型805,PASS\r\n3,软件当前版本1,软件当前版本1,FAIL\r\n4,软件测试机型805,软件测试机型805,PASS\r\n5,软件当前版本1,软件当前版本1,FAIL\r\n'
buf = io.StringIO()
buf.write(s)
buf.seek(0)
df = pd.read_csv(buf)
``

Related

how to solve( Unsupported format, or corrupt file: Expected BOF record; found ) error?

import pandas as pd
import xlrd
import os
import matplotlib.pylab as plt
file_folder_address = 'C:/Users/Amirreza/Desktop/python homeworks/project files'
df_total=pd.DataFrame()
for file in os.listdir(file_folder_address): #os.listdir gives a list of exel file names
df_men_urb = pd.DataFrame()
df_women_urb = pd.DataFrame()
df_men_rural = pd.DataFrame()
df_women_rural = pd.DataFrame()
sheet_names = pd.ExcelFile(os.path.join(file_folder_address, file), engine="xlrd").sheet_names
`
when I use this cod make above error . what should I do ?

Read web content into a dataframe without writing to a file

I am trying to read data from the following link to a data frame without saving locally (this is important). I figured out a way (below), but is there an efficient way to do this?
from urllib.request import urlopen
import pandas as pd
from io import StringIO
from matplotlib.dates import DateFormatter
from datetime import datetime
uri = 'https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?station=AXA&data=all&year1=2022&month1=12&day1=1&year2=2022&month2=12&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4'
data = urlopen(uri, timeout=300).read().decode("utf-8")
dateparse = lambda x: datetime.strptime(x.strip(), '%Y-%m-%d %H:%M')
str1 = data.split('\n')
dfList = []
for ii in range(1,len(str1)):
if len(str1[ii])>0:
df1 = pd.read_csv(StringIO(str1[ii]), parse_dates=[1], date_parser=dateparse, header=None) #Read each string into a dataframe
if not df1.empty:
df2 = df1.iloc[:,0:3] #Get the first five columns
if df2.iloc[0,-1] != 'M': #Don't append the ones with missing data
dfList.append(df2)
df = pd.concat(dfList, axis=0, ignore_index=True)
df.columns = ['Station','Date','Temp']
ax1 = df.plot(x=1,y=2)
ax1.get_figure().autofmt_xdate()
Using requests, pandas and io:
from io import StringIO
import pandas as pd
import requests
url = (
"https://mesonet.agron.iastate.edu/cgi-bin/request/asos.py?"
"station=AXA&data=all&year1=2022&month1=12&day1=1&year2=2022&"
"month2=12&day2=1&tz=Etc%2FUTC&format=onlycomma&latlon=no&"
"elev=no&missing=M&trace=T&direct=no&report_type=3&report_type=4"
)
with requests.Session() as request:
response = request.get(url, timeout=30)
if response.status_code != 200:
print(response.raise_for_status())
df = pd.read_csv(StringIO(response.text), sep=",")
print(df)

Pandas read_csv failing on gzipped file with OSError: Not a gzipped file (b'NU')

I used the code ask below to load the csv.gz file but I got the error
OSError: Not a gzipped file (b'NU')
How can I solve it?
Code:
import pandas as pd
data = pd.read_csv('climat.202010.csv.gz', compression='gzip')
print(data)
Or:
import gzip
import pandas as pd
filename = 'climat.202010.csv.gz'
with gzip.open(filename, 'rb') as f:
data = pd.read_csv(f)
Try
import gzip
with gzip.open(filename, 'rb') as fio:
df = pd.read_csv(fio)
This works for me:
import gzip
import pandas as pd
with gzip.open(r'C:\Users\MyUser\OneDrive - Company\Data\Wiser\Files\WiserWeeklyReport.csv.gz') as f:
wiser_report = pd.read_csv(f)
wiser_report.head()
If you're still getting an error, it may be the file or the file name. Have you tried taking out the extra period in the file name?

Convert R object(Dataframe) to Pandas Dataframe using rpy2

Iam using rpy2 to get comorbidity Index of patients , i got the results but iam not able to convert those output to pandas Dataframe
below is the code
#creating Datframe
data = {"person_id":[1,1,1,2,2,3],
"dx_1":["F11","E40","","F32","C77","G10"],
"dx_2":["F1P","E400","","F322","C737",""]}
#converting Pandas Dataframe to R Datframe using rpy2
import rpy2
from rpy2.robjects import pandas2ri
import rpy2.robjects.numpy2ri
from rpy2.robjects.packages import importr
r_dataframe = pandas2ri.py2ri(df1)
print(r_dataframe)
#installing 'comorbidity ' package using rpy2
R = rpy2.robjects.r
DTW = importr('comorbidity')
#executing comorbidity function by using one column icd_1
output = DTW.comorbidity(x = r_dataframe, id = "person_id", code = "icd_1",
score = "charlson", assign0 = False,
icd = "icd10")
print(output)
but not able to convert output to pandas dataframe
import rpy2, rpy2.robjects as robjects, rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
#Converting data frames back and forth between rpy2 and pandas
from rpy2.robjects import r, pandas2ri
#convert output to pandas dataframe
pandas2ri.ri2py_dataframe(output)
getting below error
TypeError: Parameter 'categories' must be list-like, was
please help
Thanks in advance

How to get column header in excel generated via python ExcelWriter

I am fetching excel data from django database via raw query. excel is generated but column header is missing .
please suggest some way to get that header.
import pandas as pd
from pandas import ExcelWriter
df1 = pd.DataFrame(row1)
try:
from StringIO import StringIO
except:
from io import StringIO
import xlwt
wb = Workbook()
writer = ExcelWriter("XYZ.xlsx",options={'remove_timezone': True})
xl_out = StringIO()
writer.path = xl_out
ws1 = wb.add_sheet("abc")
for col_num, value in enumerate(df1.columns.values):
ws1.write(1,col_num + 1, 'value')
df1.to_excel(writer,"abc", index= True, header=True)
writer.save()