Upload and show with flask a dataframe | AttributeError: 'builtin_function_or_method' object has no attribute 'replace' - dataframe

I am trying to upload and show a dataframe by flask and when I want to show it, it says
AttributeError: 'builtin_function_or_method' object has no attribute 'replace'.
I found this code on YT and I don't know if it is correct. Can somebody help me?
from flask import Flask, render_template, request
from werkzeug.utils import secure_filename
import pandas as pd
import csv
def reencode(file):
for line in file:
yield line.decode('windows-1250').encode('utf-8')
#app.route("/data")
def data():
df = pd.read_csv("Sistema_de_Stock.csv", encoding='latin-1')
df = df.drop(df.loc['stock al cargar':].columns, axis=1)
df.to_html('data.html')
with open("data.html", 'r', encoding='latin-1') as file:
file = file.read
**file = file.replace("<table","<table class='rwd-table'")**
with open("data.html","w") as file_write:
file_write.write(html + file)
data = os.startfile("data.html")
return data

file.read is a method, so you should call the method. Furthermore you might want to rename the variable to make it clear that this is not a file handler:
with open('data.html', 'r', encoding='latin-1') as file:
# call the method &downarrow;
file_data = file.read().replace('<table', "<table class='rwd-table'")
with open('data.html', 'w') as file_write:
file_write.write(html + file_data)
data = os.startfile('data.html')

Related

Python Dill or Pickle gives error when use in a new file

I need help with my code. I have built a recommendation system using cosine similarity on a colab and used pickle to serialized it. when I deserialized it inside a colab file, it works perfectly fine but when I deserialize it in a new colab file. it gives me an error
name 'data' is not defined
data is a variable that is initialized with my dataset which is outside of the class InstaPost.
import pandas as pd
import numpy as np
from sklearn.feature_extraction import text
from sklearn.metrics.pairwise import cosine_similarity
import dill as pickle
data = pd.read_csv("/content/instaData.txt")
data
data = data[["Caption", "Hashtags"]]
captions = data["Caption"].tolist()
uni_tfidf = text.TfidfVectorizer(input=captions, stop_words="english")
uni_matrix = uni_tfidf.fit_transform(captions)
uni_sim = cosine_similarity(uni_matrix)
def recommend_post(x):
return ", ".join(data["Caption"].loc[x.argsort()[-7:-1]])
data["Recommended Post"] = [recommend_post(x) for x in uni_sim]
class InstaPost:
def Post(number):
count = 0
wordy = (data["Recommended Post"][number])
sentence = wordy.split(',')
for i in sentence:
count=count+1
print(count," ",i)
obj = InstaPost
obj.Post(1)
pickle_out = open("modelREC", "wb")
pickle.dump(obj, pickle_out)
pickle_out.close()
pickle_in = open("modelREC", "rb")
exe = pickle.load(pickle_in)
print(exe.Post(10))
NOTE: on a different file
print(exe.Post)
works
and give output
<function InstaPost.Post at 0x7efc0b4c3f70>
if I need to give the reference of the data than please guide me how should I do it. It will be a great help to me
Thanks in advance

How do I open all the links and save the images into a folder in a specific directory on my pc?

Code:
import urllib.request
from bs4 import BeautifulSoup
from requests import get
import requests
import dload
import pandas as pd
pd.set_option('display.max_colwidth', None)
week_11_picURL = "https://www.packers.com/photos/game-photos-packers-at-vikings-week-11-2021#9258618e-e793-41ae-8d9a-d3792366dcbb"
response = get(week_11_picURL)
print(response)
html_page = requests.get(week_11_picURL)
soup = BeautifulSoup(html_page.content, 'html.parser')
image = soup.findAll('div', class_="nfl-c-photo-album__picture-wrapper")
data = []
for x in soup.select('.nfl-c-photo-album__picture-wrapper picture source:first-child'):
try:
data.append(x['srcset'].split(',')[0])
except:
data.append(x['data-srcset'].split(',')[0])
data
test_url = "https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/f6jcqnmhbzs2dyvepa8z.jpg"
df = pd.DataFrame(data)
replace = df.replace(["/t_lazy", "1x"], "", regex=True)
folder = "f:/nfl pics/packers/week 11 - at vikings"
save = dload.save_multi(url_list=replace, dir=folder, max_threads=1, tsleep=0.05)
replace data:
0
0 https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/hjmcucejx2vmfshjkdkj.jpg
1 https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/rgsvjp6sxu89ditolacv.jpg
2 https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/zsogvqrqgaauqcdgejde.jpg
3 https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/jyegqthuab2hsuygirqp.jpg
4 https://static.clubs.nfl.com/image/private/t_new_photo_album/f_auto/packers/kwsq1fvn41f6kzqo4nkl.jpg
etc.
The error I get from using my "save" function is:
Traceback (most recent call last):
File "location", line 174, in save_multi
with open(url_list) as f:
TypeError: expected str, bytes or os.PathLike object, not DataFrame
I'm trying to find away to automatically open all the links from the data in "replace" and save the respected images in the directory labeled "folder". When I try to use my "save" function I get the error above. How do I fix this issue or is there a more efficient way to go about this?

Importing a gziped csv into pandas

I have an url: https://api.kite.trade/instruments
And this is my code to fetched data from url and write into excel
import pandas as pd
url = "https://api.kite.trade/instruments"
df = pd.read_json(url)
df.to_excel("file.xlsx")
print("Program executed successfully")
but, when I run this program I'm getting error like this_
AttributeError: partially initialized module 'pandas' has no attribute 'read_json' (most likely due to a circular import)
It's not a json, it's csv. So you need to use read_csv. Can you please try this?
import pandas as pd
url = "https://api.kite.trade/instruments"
df = pd.read_csv(url)
df.to_excel("file.xlsx",index=False)
print("Program excuted successfully")
I added an example how I converted the text to a csv dump on your local drive.
import requests
url = "https://api.kite.trade/instruments"
filename = 'test.csv'
f = open(filename, 'w')
response = requests.get(url)
f.write(response.text)

Loading .txt file from Google Cloud Storage into a Pandas DF

I'm trying to load a .txt file from a GCS bucket into pandas df via pd.read_csv. When I run this code on my local machine (sourcing the .txt file from a local directory), it works perfectly. However, when I try and run the code in a cloud function , accessing the same .txt file but from a GCS bucket, I get a 'TypeError: cannot use a string pattern on a bytes-like object'
The only thing that's different is the fact that I'm accessing the .txt file via the GCS bucket so its a bucket object (Blob) instead of a normal file. Would I need to download the blob as a string or as a file-like object first before doing pd.read_csv? code is below
def stage1_cogs_vfc(data, context):
from google.cloud import storage
import pandas as pd
import dask.dataframe as dd
import io
import numpy as np
start_bucket = 'my_bucket'
storage_client = storage.Client()
source_bucket = storage_client.bucket(start_bucket)
df = pd.DataFrame()
file_path = 'gs://my_bucket/SCE_Var_Fact_Costs.txt'
df = pd.read_csv(file_path,skiprows=12, encoding ='utf-8', error_bad_lines= False, warn_bad_lines= False , header = None ,sep = '\s+|\^+',engine='python')
Traceback (most recent call last):
File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 383, in run_background_function _function_handler.invoke_user_function(event_object) File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 217, in invoke_user_function return call_user_function(request_or_event) File "/env/local/lib/python3.7/site-packages/google/cloud/functions/worker.py", line 214, in call_user_function event_context.Context(**request_or_event.context)) File "/user_code/main.py", line 20, in stage1_cogs_vfc df = pd.read_csv(file_path,skiprows=12, encoding ='utf-8', error_bad_lines= False, warn_bad_lines= False , header = None ,sep = '\s+|\^+',engine='python') File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 702, in parser_f return _read(filepath_or_buffer, kwds) File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 429, in _read parser = TextFileReader(filepath_or_buffer, **kwds) File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 895, in __init__ self._make_engine(self.engine) File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 1132, in _make_engine self._engine = klass(self.f, **self.options) File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 2238, in __init__ self.unnamed_cols) = self._infer_columns() File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 2614, in _infer_columns line = self._buffered_line() File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 2689, in _buffered_line return self._next_line() File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 2791, in _next_line next(self.data) File "/env/local/lib/python3.7/site-packages/pandas/io/parsers.py", line 2379, in _read yield pat.split(line.strip()) TypeError: cannot use a string pattern on a bytes-like object
``|
I found a similar situation here.
I also noticed that on the line:
source_bucket = storage_client.bucket(source_bucket)
you are using "source_bucket" for both: your variable name and parameter. I would suggest to change one of those.
However, I think you'd like to see this doc for any further question related to the API itself: Storage Client - Google Cloud Storage API
Building on points from #K_immer is my updated code that includes reading into 'Dask' df...
def stage1_cogs_vfc(data, context):
from google.cloud import storage
import pandas as pd
import dask.dataframe as dd
import io
import numpy as np
import datetime as dt
start_bucket = 'my_bucket'
destination_path = 'gs://my_bucket/ddf-*_cogs_vfc.csv'
storage_client = storage.Client()
bucket = storage_client.get_bucket(start_bucket)
blob = bucket.get_blob('SCE_Var_Fact_Costs.txt')
df0 = pd.DataFrame()
file_path = 'gs://my_bucket/SCE_Var_Fact_Costs.txt'
df0 = dd.read_csv(file_path,skiprows=12, dtype=object ,encoding ='utf-8', error_bad_lines= False, warn_bad_lines= False , header = None ,sep = '\s+|\^+',engine='python')
df7 = df7.compute() # converts dask df to pandas df
# then do your heavy ETL stuff here using pandas...

Python- Exporting a Dataframe into a csv

I'm trying to write a dataframe file to a csv using pandas. I'm getting the following error AttributeError: 'list' object has no attribute 'to_csv'. I believe I'm writing the syntax correctly, but could anyone point out where my syntax is incorrect in trying to write a dataframe to a csv?
This is link the link of the file: https://s22.q4cdn.com/351912490/files/doc_financials/quarter_spanish/2018/2018.02.25_Release-4Q18_ingl%C3%A9s.pdf
Thanks for your time!
import tabula
from tabula import read_pdf
import pandas as pd
from pandas import read_json, read_csv
a = read_pdf(r"C:\Users\Emege\Desktop\micro 1 true\earnings_release.pdf",\
multiple_tables= True, pages = 15, output_format = "csv",\
)
print(a)
a.to_csv("a.csv",header = False, index = False, encoding = "utf-8")
enter image description here