Dynamic dataframe concatenation takes garbage value in python-flask - dataframe

The below snippets takes list of features dynamically from the html form and the app.py computes the corresponding feature and append the selected feature all together and write it in a CSV file. The problem here is during concatenation, the dataframe of features which are not selected takes some garbage value during concatenation. Also suggest how to append the name of the feature to the header dynamically
<input type="checkbox" id="meanT" name="tdf" value="meanT">
<label for="mean"> Mean</label><br>
<input type="checkbox" id="stdT" name="tdf" value="stdT">
<label for="std"> Standard Deviation</label><br>
<input type="checkbox" id="medianT" name="tdf" value="medianT">
<label for="median"> Median</label><br>
<input type="checkbox" id="madT" name="tdf" value="madT">
<label for="mad"> Mean Absolute Deviation </label><br>
<input type="checkbox" id="rmsT" name="tdf" value="rmsT">
<label for="rms"> Root Mean Square</label><br>
<input type="checkbox" id="covT" name="tdf" value="covT">
<label for="cov"> Covariance</label><br>
app.py
#app.route('/feature_selection', methods =['GET', 'POST']) def feature_selection(): if request.method == 'POST': features=request.form.getlist('tdf')
`import os`
`ROOT_PATH = os.path.dirname(os.path.abspath(__file__))`
`files = request.files['fs_file']`
`files.save(os.path.join(ROOT_PATH,files.filename))
import pandas
raw_csv2 = pandas.read_csv(os.path.join(ROOT_PATH,files.filename))
X=raw_csv2.iloc[:,:-1]
print(X)
print(len(X.columns))
np.savetxt("D:/tool/feat_tobe_sel.csv",X,delimiter=',',fmt='%s')
from scipy.fftpack import fft
final=[]`
`final_mean = np.empty((1,len(X.columns)),np.float64)
final_std = np.empty((1,len(X.columns)),np.float64)
final_median = np.empty((1,len(X.columns)),np.float64)
final_mad = np.empty((1,len(X.columns)),np.float64)
final_rms = np.empty((1,len(X.columns)),np.float64)
final_cov = np.empty((1,len(X.columns)),np.float64)`
`for feature in features:
print(feature)
if feature=='meanT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
mean = np.array(chunk.mean())#mean
final_mean=np.append(final_mean,[mean],axis=0)
print("meanT")
elif feature=='stdT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
std = np.array(chunk.std())#standard deviation
final_std = np.append(final_std, [std], axis=0)
print("stdT")
elif feature=='medianT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
median = np.array(chunk.median())#median
final_median = np.append(final_median, [median], axis=0)
print("medianT")
elif feature=='madT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
mad = np.array(chunk.mad())
final_mad = np.append(final_mad, [mad], axis=0)
elif feature=='rmsT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
rms = np.array(np.sqrt(np.mean(chunk**2)))
final_rms = np.append(final_rms, [rms], axis=0)
print("rmsT")
elif feature=='covT':
for chunk in pd.read_csv('D:/tool/feat_tobe_sel.csv',chunksize=250):
cov = chunk.cov()
for covItem in cov:
final_cov = np.append(final_cov, [np.array(cov[covItem])], axis=0)`
`
`df2=pandas.DataFrame(final_mean)
df3=pandas.DataFrame(final_std)
df4=pandas.DataFrame(final_median)
df5=pandas.DataFrame(final_mad)
df6=pandas.DataFrame(final_rms)
df7=pandas.DataFrame(final_cov)
dfs = [df2,df3,df4,df5,df6,df7]`
`non_empty=[df for df in dfs if len(df)!=0]
dfm=pd.concat(non_empty,axis=1)
np.savetxt(r"D:/tool/features_selected.csv",dfm,delimiter=',',fmt='%s') `
`return rendertemplates("feat.html")`

Related

Convert datasets of strings to arrays the value of its bytes

I'm trying to use wiki40b/en to train a neural network. I want preprocess the data and also generate the expected output from it:
Remove tags
Convert string to an array of the values of its bytes
Add noise to the array (zero or modify some values)
Currently I have the following code:
def add_noise_to_sentence(sentence, noise=0.15):
# How I can get the length of the sentence?
mask = np.random.random_sample((len(sentence),)) > noise
with_noise = sentence * mask
return with_noise, mask
def preprocess_lang(text):
text = tf.strings.regex_replace(text, "_START_ARTICLE_ | _START_PARAGRAPH_ | \n | <br> | <p> | </p> | <html> | </html> | <body> | </body>", " ")
text = tf.io.decode_raw(text, tf.uint8) # I thought it was going to cast to uint8[]
# text = tf.strings.bytes_split(text) # This didn't work either
noise_text = add_noise_to_sentence(text)
return noise_text, text
ds = tfds.load('wiki40b', split=["train"], shuffle_files=True)
ds = ds.map(lambda x: preprocess_lang(x["text"]))
ds = ds.cache()
ds = ds.batch(128)
ds = ds.prefetch(tf.data.AUTOTUNE)
How I can transform the string to an array of numbers (my model expects numbers as inputs) and do the required transformations as well?

How can I get a value from an attribute inside a tag as a int

I have a soup object like:
class="js-product-discount-item product-discount__item ">
<p class="product-discount__price js-product-discount-price">
<span class="price">3 033 <span class="currency w500">₽<span class="currency_seo">руб.</span></span></span> </p>
I did
soup = BeautifulSoup(src, 'lxml')
price_2 = soup.find(class_='price-discount-value').find(class_='price').text.strip()
x = 2
Result :
3 033 ₽руб.
I'd like to make:
price_3 = price_2/x
I have : TypeError: unsupported operand type(s) for /: 'str' and 'int'
What happens?
You are extracting a string with .text but to use the / operand it should be an int
How to fix?
First at all, clean your string from non digit characters:
...find(class_='price').text.split('₽')[0].replace(' ','')
For calculating convert it with int() to an integer:
int(price_2)/x
Example
Note Changed the find() for these example, cause your question do not provide an correct html
from bs4 import BeautifulSoup
html = '''
<p class="product-discount__price js-product-discount-price">
<span class="price">3 033 <span class="currency w500">₽<span class="currency_seo">руб.</span></span></span>
</p>'''
soup = BeautifulSoup(html, 'lxml')
price_2 = soup.find(class_='product-discount__price').find(class_='price').text.split('₽')[0].replace(' ','')
x = 2
price_3 = int(price_2)/x
print(price_3)
Output
1516.5

I want to export an Postgres Table as an CSV File as a download button on Flask. How to do that?

I have a table on my PSQL and I want to export it as a downloadable file to any user who clicks on the download button and hence my path is not fixed. The copy command doesn't work for me here. Any solutions?
#app.route("/")
#roles_required('member')
def index():
return render_template("pages/index.html")
#app.route("/download_csv")
#roles_required('member')
def download_csv():
conn = None
cursor = None
try:
conn = psycopg2.connect("host=localhost dbname=db user=u password=p")
cursor = conn.cursor()
cursor.execute('SELECT id FROM table')
# conn.commit()
result = cursor.fetchall() # fetchone()
output = io.StringIO()
writer = csv.writer(output)
line = ['column1,column2']
writer.writerow(line)
for row in result:
line = [row[0] + ' , ' + row[1] + ' , ']
writer.writerow(line)
output.seek(0)
return Response(output, mimetype="text/csv",
headers={"Content-Disposition": "attachment;filename=report.csv"})
except Exception as e:
print(e)
finally:
cursor.close()
conn.close()
index.html
...
<a href="{{ url_for('app.download_csv') }}" style="margin-bottom: 10px; margin-left: 10px;" class="btn btn-primary" type="button">
Download Csv <i class="fa fa-file-csv"></i>
</a>
...

Python in Beautiful Soup

I want to fetch text between
<label>A</label> class A <br/><label>B</label> class B <br/> <label>C </label> class C <br />
Expected output in Dictionary like data
{'A':'class A','B':'class B','C':'class C'}
You can search for <label> tag and then get next text sibling to it.
For example:
from bs4 import BeautifulSoup
txt = '''<label>A</label> class A <br/><label>B</label> class B <br/> <label>C </label> class C <br />'''
soup = BeautifulSoup(txt, 'html.parser')
data = {label.get_text(strip=True): label.find_next_sibling(text=True).strip() for label in soup.select('label')}
print(data)
Prints:
{'A': 'class A', 'B': 'class B', 'C': 'class C'}

PyGtk Serialization

I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, Pango
I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
File "example.py", line 87, in save_file
open(filename, 'w').write(content)
TypeError: write() argument must be str, not bytes
Here is sample code you can run and test by typing and then saving
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, Pango
class MainWindow(Gtk.ApplicationWindow):
def __init__(self):
Gtk.Window.__init__(self, title = "TwoNote")
self.grid = Gtk.Grid()
self.toolbar = Gtk.Toolbar()
self.grid.add(self.toolbar)
#buttons for toolbar
self.button_bold = Gtk.ToggleToolButton()
self.button_italic = Gtk.ToggleToolButton()
self.button_underline = Gtk.ToggleToolButton()
self.button_save = Gtk.ToolButton()
self.button_open = Gtk.ToolButton()
self.mytext = TextSet(self.button_bold, self.button_italic, self.button_underline)
self.button_bold.set_icon_name("format-text-bold-symbolic")
self.toolbar.insert(self.button_bold, 0)
self.button_italic.set_icon_name("format-text-italic-symbolic")
self.toolbar.insert(self.button_italic, 1)
self.button_underline.set_icon_name("format-text-underline-symbolic")
self.toolbar.insert(self.button_underline, 2)
self.toolbar.insert(self.button_save, 3)
self.toolbar.insert(self.button_open, 4)
self.button_open.set_icon_name("document-open-data")
self.button_save.set_icon_name("document-save")
self.button_save.connect("clicked", self.save_file)
self.button_open.connect("clicked", self.open_file)
self.button_bold.connect("toggled", self.mytext.on_button_clicked, "Bold", self.button_italic, self.button_underline)
self.button_italic.connect("toggled", self.mytext.on_button_clicked, "Italic", self.button_bold, self.button_underline)
self.button_underline.connect("toggled", self.mytext.on_button_clicked, "Underline", self.button_bold, self.button_italic)
self.grid.attach_next_to(self.mytext, self.toolbar, Gtk.PositionType.BOTTOM, 10,30)
self.add(self.grid)
filename = "Untitled"
def open_file(self, widget):
open_dialog = Gtk.FileChooserDialog("Open an existing file", self, Gtk.FileChooserAction.OPEN,(Gtk.STOCK_CANCEL,Gtk.ResponseType.CANCEL,Gtk.STOCK_OPEN, Gtk.ResponseType.OK))
open_response = open_dialog.run()
if open_response == Gtk.ResponseType.OK:
filename = open_dialog.get_filename()
text = open(filename).read()
self.mytext.get_buffer().set_text(text)
open_dialog.destroy()
elif open_response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
open_dialog.destroy()
def save_file(self, widget):
savechooser = Gtk.FileChooserDialog('Save File', self, Gtk.FileChooserAction.SAVE, (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_SAVE, Gtk.ResponseType.OK))
allfilter = Gtk.FileFilter()
allfilter.set_name('All files')
allfilter.add_pattern('*')
savechooser.add_filter(allfilter)
txtFilter = Gtk.FileFilter()
txtFilter.set_name('Text file')
txtFilter.add_pattern('*.txt')
savechooser.add_filter(txtFilter)
response = savechooser.run()
if response == Gtk.ResponseType.OK:
filename = savechooser.get_filename()
print(filename, 'selected.')
buf = self.mytext.get_buffer()
start, end = buf.get_bounds()
tag_format = buf.register_serialize_tagset()
content = buf.serialize(buf, tag_format, start, end)
try:
open(filename, 'w').write(content)
except SomeError as e:
print('Could not save %s: %s' % (filename, err))
savechooser.destroy()
elif response == Gtk.ResponseType.CANCEL:
print('Closed, file not saved.')
savechooser.destroy()
class TextSet(Gtk.TextView):
def __init__(self, buttonBold, buttonItalic, buttonUnderline, interval = 1 ):
# Textview Setup
Gtk.TextView.__init__(self)
self.set_vexpand(True)
self.set_indent(10)
self.set_top_margin(90)
self.set_left_margin(20)
self.set_right_margin(20)
self.set_wrap_mode(Gtk.WrapMode.CHAR)
self.tb = TextBuffer()
self.set_buffer(self.tb)
# Thread setup
self.button_bold = buttonBold
self.button_italic = buttonItalic
self.button_underline = buttonUnderline
def on_button_clicked(self, widget, tagname, widget1, widget2):
state = widget.get_active()
name = widget.get_icon_name()
bounds = self.tb.get_selection_bounds()
self.tagname = tagname
if(state):
widget1.set_active(False)
widget2.set_active(False)
#highlighting
if(len(bounds) != 0):
start, end = bounds
myIter = self.tb.get_iter_at_mark(self.tb.get_insert())
myTags = myIter.get_tags()
if(myTags == [] and state == True):
self.tb.apply_tag_by_name(tagname, start, end)
elif(myTags != [] and state == True):
self.tb.remove_all_tags(start, end)
self.tb.apply_tag_by_name(tagname, start, end)
else:
for i in range(len(myTags)):
if(myTags[i].props.name == tagname):
self.tb.remove_tag_by_name(tagname,start,end)
myTags = []
self.tb.markup(widget, tagname)
def mouse_clicked(self, window, event):
self.button_bold.set_active(False)
self.button_italic.set_active(False)
self.button_underline.set_active(False)
class TextBuffer(Gtk.TextBuffer):
def __init__(self):
Gtk.TextBuffer.__init__(self)
self.connect_after('insert-text', self.text_inserted)
# A list to hold our active tags
self.taglist_on = []
# Our Bold tag.
self.tag_bold = self.create_tag("Bold", weight=Pango.Weight.BOLD)
self.tag_none = self.create_tag("None", weight=Pango.Weight.NORMAL)
self.tag_italic = self.create_tag("Italic", style=Pango.Style.ITALIC)
self.tag_underline = self.create_tag("Underline", underline=Pango.Underline.SINGLE)
def get_iter_position(self):
return self.get_iter_at_mark(self.get_insert())
def markup(self, widget, tagname):
self.tag_name = tagname
self.check = True
''' add "bold" to our active tags list '''
if(widget.get_active() == True):
if(self.tag_name == 'Bold'):
if 'Bold' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Bold')]
else:
self.taglist_on.append('Bold')
if(self.tag_name == 'Italic'):
if 'Italic' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Italic')]
else:
self.taglist_on.append('Italic')
if(self.tag_name == 'Underline'):
if 'Underline' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Underline')]
else:
self.taglist_on.append('Underline')
else:
self.check = False
def text_inserted(self, buffer, iter, text, length):
# A text was inserted in the buffer. If there are ny tags in self.tags_on, apply them
#if self.taglist_None or self.taglist_Italic or self.taglist_Underline or self.taglist_Bold:
if self.taglist_on:
# This sets the iter back N characters
iter.backward_chars(length)
# And this applies tag from iter to end of buffer
if(self.check == True):
if(self.tag_name == 'Italic'):
self.apply_tag_by_name('Italic', self.get_iter_position(), iter)
if(self.tag_name == 'Bold'):
self.apply_tag_by_name('Bold', self.get_iter_position(), iter)
if(self.tag_name == 'Underline'):
self.apply_tag_by_name('Underline', self.get_iter_position(), iter)
else:
self.remove_all_tags(self.get_iter_position(), iter)
win = MainWindow()
win.connect("delete-event", Gtk.main_quit)
win.show_all()
Gtk.main()
I figured it out rather than using
open(filename, 'w').write(content)
to save the content I imported GLib and used
GLib.file_set_contents(filename, content)