python TypeError: expected string or buffer when parsing JSON from a file - typeerror

I realize this problem has been answered for other folks but none of the threads are helping me solve it. I'm trying to parse a JSON structure and add all values in the sent_file when the keys match with the tweet_file. The error I'm getting
import sys
import json
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
scores = {}
#tweet = {}
#tweet_text = {}
#hw()
#lines(sent_file)
#lines(tweet_file)
for line in sent_file:
term,score = line.split("\t")
scores[term] = int(score)
#print scores.items()
for tweets in tweet_file:
current_sent_value = 0
tweet = {} #this is a dict
#print type(tweets) str
tweet = json.loads(tweets)#[0] #this assignment changes tweet to a list. Why?
if 'text' in tweet:
tweet_text = {}
unicode_string = tweet['text']
encoded_string = unicode_string.encode('utf-8')
tweet_text = encoded_string.split()
for key in tweet_text:
for key in scores:
#print type(tweet_text) -- list
#print type(scores) --dict
if tweet_text.get(key) == scores.get(key): # get() does not work on a list. tweet_text is a list.
current_sent_value += scores(value)
print current_sent_value
if name == 'main':
main()
The error is here \assignment1\tweet_sentiment2.py", line 42, in main
if tweet_text.get(key) == scores.get(key): # get() does not work on a list. tweet_text is a list.
AttributeError: 'list' object has no attribute 'get'

Related

'NoneType' object has no attribute 'prettify'

def josaa_scrape():
"""
Sample usage: df = josaa_scrape("2018", "1")
df.info()
"""
with requests.Session() as s:
R = s.get(url, headers=headers)
data = {}
for key, value in params.items():
data.update({tag['name']: tag['value'] for tag in BeautifulSoup(R.content, 'lxml').select('input[name^=__]')})
data[key] = value
R = s.post(url, data=data)
table = BeautifulSoup(R.text, 'lxml').find(id = 'GridView1')
df = pd.read_html(table.prettify())[0]
df.dropna(inplace = True, how="all")
return df
It is throwing an error 'NoneType' object has no attribute 'prettify'. I have check all id's.
This actually means that BeautifulSoup(R.text, 'lxml').find(id = 'GridView1') returned nothing.
Fix that line so it will return a table element

TypeError: 'Value' object is not iterable : iterate around a Dataframe for prediction purpose with GCP Natural Language Model

I'm trying to iterate over a dataframe in order to apply a predict function, which calls a Natural Language Model located on GCP. Here is the loop code :
model = 'XXXXXXXXXXXXXXXX'
barometre_df_processed = barometre_df
barometre_df_processed['theme'] = ''
barometre_df_processed['proba'] = ''
print('DEBUT BOUCLE FOR')
for ind in barometre_df.index:
if barometre_df.verbatim[ind] is np.nan :
barometre_df_processed.theme[ind]="RAS"
barometre_df_processed.proba[ind]="1"
else:
print(barometre_df.verbatim[ind])
print(type(barometre_df.verbatim[ind]))
res = get_prediction(file_path={'text_snippet': {'content': barometre_df.verbatim[ind]},'mime_type': 'text/plain'} },model_name=model)
print(res)
theme = res['displayNames']
proba = res["classification"]["score"]
barometre_df_processed.theme[ind]=theme
barometre_df_processed.proba[ind]=proba
and the get_prediction function that I took from the Natural Language AI Documentation :
def get_prediction(file_path, model_name):
options = ClientOptions(api_endpoint='eu-automl.googleapis.com:443')
prediction_client = automl_v1.PredictionServiceClient(client_options=options)
payload = file_path
# Uncomment the following line (and comment the above line) if want to predict on PDFs.
# payload = pdf_payload(file_path)
parameters_dict = {}
params = json_format.ParseDict(parameters_dict, Value())
request = prediction_client.predict(name=model_name, payload=payload, params=params)
print("fonction prediction")
print(request)
return resultat[0]["displayName"], resultat[0]["classification"]["score"], resultat[1]["displayName"], resultat[1]["classification"]["score"], resultat[2]["displayName"], resultat[2]["classification"]["score"]
I'm doing a loop this way because I want each of my couple [displayNames, score] to create a new line on my final dataframe, to have something like this :
verbatim1, theme1, proba1
verbatim1, theme2, proba2
verbatim1, theme3, proba3
verbatim2, theme1, proba1
verbatim2, theme2, proba2
...
The if barometre_df.verbatim[ind] is np.nan is not causing problems, I just use it to deal with nans, don't take care of it.
The error that I have is this one :
TypeError: 'Value' object is not iterable
I guess the issues is about
res = get_prediction(file_path={'text_snippet': {'content': barometre_df.verbatim[ind]} },model_name=model)
but I can't figure what's goign wrong here.
I already try to remove
,'mime_type': 'text/plain'}
from my get_prediction parameters, but it doesn't change anything.
Does someone knows how to deal with this issue ?
Thank you already.
I think you are not iterating correctly.
The way to iterate through a dataframe is:
for index, row in df.iterrows():
print(row['col1'])

Get annotation text from its position (PDFMiner)

I want to extract the text of annotations (such as highlighted text of hyperlinks) from its position. For this I could scrape the positions and urls by using PDFminer as in the below code. Is that possible passing this position to a layout object and get out the text?
Here are the code blocks I used for this purpose.
First part includes a function, named parse_annotation, to parse annotations from each page.
def parse_annotations(page):
positions = []
urls = []
for annot in pdftypes.resolve1(page.annots):
if isinstance(annot, pdftypes.PDFObjRef):
annotationDict = annot.resolve()
# Skip over any annotations that are not links
if str(annotationDict["Subtype"]) != "/'Link'":
continue
destID = 0
position = annotationDict["Rect"]
uriDict = "None"
if any(k in annotationDict for k in {"Dest", "D"}):
destID = (annotationDict["Dest"][0]).objid
url = "Cross reference"
elif "A" in annotationDict:
# Key A contains PDFObjRef, then resolve it again
if isinstance(annotationDict["A"], pdftypes.PDFObjRef):
uriDict = pdftypes.resolve1(annotationDict["A"])
if any(k in uriDict for k in {"Dest", "D"}):
destID = (uriDict["D"][0]).objid
else:
uriDict = annotationDict["A"]
# Check if the key exists within resolved uriDict
if str(uriDict["S"]) == "/'GoTo'":
url = "Cross reference"
elif str(uriDict["S"]) == "/'URI'":
url = str(uriDict["URI"])
url = url.lstrip("b")
url = url.replace("'", "")
else:
# Skip if key S in uriDict does not contain value URI, GoTo
continue
else:
sys.stderr.write("Warning: unknown key in annotationDict : ", annotationDict)
#print(annot, '\n', annotationDict, '\n', destID, '\n', position, '\n', uriDict, '\n', url, '\n')
print(position, '\n', url, '\n')
positions.append(position)
urls.append(url)
else:
sys.stderr.write("Warning: unknown annotation: %s\n" % annot)
return positions, urls
Example PDF file can be found from the following link below.
https://www2.ed.gov/about/offices/list/ocr/docs/20200512-qa-psi-covid-19.pdf
Now, by using PDFMiner, created a document object and start looping over the pages found in PDF.
manager = PDFResourceManager()
output = StringIO()
codec = 'utf-8'
laparams = LAParams()
converter = TextConverter(manager, output, codec=codec, laparams=laparams)
device = PDFPageAggregator(manager, laparams=laparams)
interpreter = PDFPageInterpreter(manager, device)
page_interpreter = PDFPageInterpreter(manager, converter)
filename = '20200512-qa-psi-covid-19.pdf'
fp = open(filename, 'rb')
parser = PDFParser(fp)
document = PDFDocument(parser)
if not document.is_extractable:
raise PDFTextExtractionNotAllowed
page_no = 0
for pageNumber, page in enumerate(PDFPage.create_pages(document)):
print("\n================ PageNumber ", pageNumber+1, "===================\n")
if pageNumber == page_no:
page_interpreter.process_page(page)
raw_text = output.getvalue()
output.truncate(0)
output.seek(0)
interpreter.process_page(page)
layout = device.get_result()
if page.annots:
positions, urls = parse_annotations(page)
for obj in layout:
print('Object name and position %s \t %s \n' % (obj.__class__.__name__ , obj.bbox))
page_no += 1
fp.close()
converter.close()
output.close()
device.close()
Thanks in advance,
A.

PyGtk Serialization

I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, Pango
I am currently working on a Note taking app in pyGtk and have set up a TextView where a user can type and add text tags for Bold Underline and Italics.
However, when it comes to saving the formatted text I cannot figure out how to do so.
I am trying to save in Gtk's native tagset format however after using
tag_format = TextBuffer.register_serialize_tagset()
content = TextBuffer.serialize(self, tag_format, start,end)
I cannot write this to a file with
open(filename, 'w').write(content)
because I get an error which states that it cannot write in bytes and needs a string instead.
File "example.py", line 87, in save_file
open(filename, 'w').write(content)
TypeError: write() argument must be str, not bytes
Here is sample code you can run and test by typing and then saving
import gi
gi.require_version('Gtk', '3.0')
from gi.repository import Gtk, Pango
class MainWindow(Gtk.ApplicationWindow):
def __init__(self):
Gtk.Window.__init__(self, title = "TwoNote")
self.grid = Gtk.Grid()
self.toolbar = Gtk.Toolbar()
self.grid.add(self.toolbar)
#buttons for toolbar
self.button_bold = Gtk.ToggleToolButton()
self.button_italic = Gtk.ToggleToolButton()
self.button_underline = Gtk.ToggleToolButton()
self.button_save = Gtk.ToolButton()
self.button_open = Gtk.ToolButton()
self.mytext = TextSet(self.button_bold, self.button_italic, self.button_underline)
self.button_bold.set_icon_name("format-text-bold-symbolic")
self.toolbar.insert(self.button_bold, 0)
self.button_italic.set_icon_name("format-text-italic-symbolic")
self.toolbar.insert(self.button_italic, 1)
self.button_underline.set_icon_name("format-text-underline-symbolic")
self.toolbar.insert(self.button_underline, 2)
self.toolbar.insert(self.button_save, 3)
self.toolbar.insert(self.button_open, 4)
self.button_open.set_icon_name("document-open-data")
self.button_save.set_icon_name("document-save")
self.button_save.connect("clicked", self.save_file)
self.button_open.connect("clicked", self.open_file)
self.button_bold.connect("toggled", self.mytext.on_button_clicked, "Bold", self.button_italic, self.button_underline)
self.button_italic.connect("toggled", self.mytext.on_button_clicked, "Italic", self.button_bold, self.button_underline)
self.button_underline.connect("toggled", self.mytext.on_button_clicked, "Underline", self.button_bold, self.button_italic)
self.grid.attach_next_to(self.mytext, self.toolbar, Gtk.PositionType.BOTTOM, 10,30)
self.add(self.grid)
filename = "Untitled"
def open_file(self, widget):
open_dialog = Gtk.FileChooserDialog("Open an existing file", self, Gtk.FileChooserAction.OPEN,(Gtk.STOCK_CANCEL,Gtk.ResponseType.CANCEL,Gtk.STOCK_OPEN, Gtk.ResponseType.OK))
open_response = open_dialog.run()
if open_response == Gtk.ResponseType.OK:
filename = open_dialog.get_filename()
text = open(filename).read()
self.mytext.get_buffer().set_text(text)
open_dialog.destroy()
elif open_response == Gtk.ResponseType.CANCEL:
print("Cancel clicked")
open_dialog.destroy()
def save_file(self, widget):
savechooser = Gtk.FileChooserDialog('Save File', self, Gtk.FileChooserAction.SAVE, (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_SAVE, Gtk.ResponseType.OK))
allfilter = Gtk.FileFilter()
allfilter.set_name('All files')
allfilter.add_pattern('*')
savechooser.add_filter(allfilter)
txtFilter = Gtk.FileFilter()
txtFilter.set_name('Text file')
txtFilter.add_pattern('*.txt')
savechooser.add_filter(txtFilter)
response = savechooser.run()
if response == Gtk.ResponseType.OK:
filename = savechooser.get_filename()
print(filename, 'selected.')
buf = self.mytext.get_buffer()
start, end = buf.get_bounds()
tag_format = buf.register_serialize_tagset()
content = buf.serialize(buf, tag_format, start, end)
try:
open(filename, 'w').write(content)
except SomeError as e:
print('Could not save %s: %s' % (filename, err))
savechooser.destroy()
elif response == Gtk.ResponseType.CANCEL:
print('Closed, file not saved.')
savechooser.destroy()
class TextSet(Gtk.TextView):
def __init__(self, buttonBold, buttonItalic, buttonUnderline, interval = 1 ):
# Textview Setup
Gtk.TextView.__init__(self)
self.set_vexpand(True)
self.set_indent(10)
self.set_top_margin(90)
self.set_left_margin(20)
self.set_right_margin(20)
self.set_wrap_mode(Gtk.WrapMode.CHAR)
self.tb = TextBuffer()
self.set_buffer(self.tb)
# Thread setup
self.button_bold = buttonBold
self.button_italic = buttonItalic
self.button_underline = buttonUnderline
def on_button_clicked(self, widget, tagname, widget1, widget2):
state = widget.get_active()
name = widget.get_icon_name()
bounds = self.tb.get_selection_bounds()
self.tagname = tagname
if(state):
widget1.set_active(False)
widget2.set_active(False)
#highlighting
if(len(bounds) != 0):
start, end = bounds
myIter = self.tb.get_iter_at_mark(self.tb.get_insert())
myTags = myIter.get_tags()
if(myTags == [] and state == True):
self.tb.apply_tag_by_name(tagname, start, end)
elif(myTags != [] and state == True):
self.tb.remove_all_tags(start, end)
self.tb.apply_tag_by_name(tagname, start, end)
else:
for i in range(len(myTags)):
if(myTags[i].props.name == tagname):
self.tb.remove_tag_by_name(tagname,start,end)
myTags = []
self.tb.markup(widget, tagname)
def mouse_clicked(self, window, event):
self.button_bold.set_active(False)
self.button_italic.set_active(False)
self.button_underline.set_active(False)
class TextBuffer(Gtk.TextBuffer):
def __init__(self):
Gtk.TextBuffer.__init__(self)
self.connect_after('insert-text', self.text_inserted)
# A list to hold our active tags
self.taglist_on = []
# Our Bold tag.
self.tag_bold = self.create_tag("Bold", weight=Pango.Weight.BOLD)
self.tag_none = self.create_tag("None", weight=Pango.Weight.NORMAL)
self.tag_italic = self.create_tag("Italic", style=Pango.Style.ITALIC)
self.tag_underline = self.create_tag("Underline", underline=Pango.Underline.SINGLE)
def get_iter_position(self):
return self.get_iter_at_mark(self.get_insert())
def markup(self, widget, tagname):
self.tag_name = tagname
self.check = True
''' add "bold" to our active tags list '''
if(widget.get_active() == True):
if(self.tag_name == 'Bold'):
if 'Bold' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Bold')]
else:
self.taglist_on.append('Bold')
if(self.tag_name == 'Italic'):
if 'Italic' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Italic')]
else:
self.taglist_on.append('Italic')
if(self.tag_name == 'Underline'):
if 'Underline' in self.taglist_on:
del self.taglist_on[self.taglist_on.index('Underline')]
else:
self.taglist_on.append('Underline')
else:
self.check = False
def text_inserted(self, buffer, iter, text, length):
# A text was inserted in the buffer. If there are ny tags in self.tags_on, apply them
#if self.taglist_None or self.taglist_Italic or self.taglist_Underline or self.taglist_Bold:
if self.taglist_on:
# This sets the iter back N characters
iter.backward_chars(length)
# And this applies tag from iter to end of buffer
if(self.check == True):
if(self.tag_name == 'Italic'):
self.apply_tag_by_name('Italic', self.get_iter_position(), iter)
if(self.tag_name == 'Bold'):
self.apply_tag_by_name('Bold', self.get_iter_position(), iter)
if(self.tag_name == 'Underline'):
self.apply_tag_by_name('Underline', self.get_iter_position(), iter)
else:
self.remove_all_tags(self.get_iter_position(), iter)
win = MainWindow()
win.connect("delete-event", Gtk.main_quit)
win.show_all()
Gtk.main()
I figured it out rather than using
open(filename, 'w').write(content)
to save the content I imported GLib and used
GLib.file_set_contents(filename, content)

Odoo 9 context value missing in override method

in odoo9 I override the search_read method. The super method works ok. With the data returned I want to make a filter, the filter is on the context, the value was asigned on the click of the button comming from the view.
<button name="status_instalacion" string="Instalación" type="action" icon="fa-wrench fa-2x" context="{'stage_id' : 1, 'current_id': active_id}"/>
The problem occurs when I query the context in the search_read method. It exists but doesn't have the values I placed
context on click of button:
self._context
{u'lang': u'en_US', u'stage_id': 1, u'tz': False, u'uid': 1, u'current_id': 40, u'tipo_validacion': u'Sistemas Cr\xedticos', u'sistema_critico': u'AGUA'}
the stage_id is the value I want
context on read_search:
self._context
{u'lang': u'en_US', u'bin_size': True, u'tipo_validacion': u'Sistemas Cr\xedticos', u'tz': False, u'uid': 1,
u'active_test': False, u'sistema_critico': u'AGUA'}
as you can see the 'stage_id' value is missing
Tried also assigning the value to a property of the class, but the value never changes it is always the initial value.
from logging import getLogger
from openerp import api, fields, models
_logger = getLogger(__name__)
class MgmtsystemSistemasEquipos(models.Model):
""" Equipos."""
_name = 'mgmtsystem.sistemas.equipos'
dmy = 99 # ---> this value never changes
def dummy(self): # ---> tried calling a function. not work
return self.dmy
def set_dummy(self, id): # ----> set the value
self.dmy = id or self.dmy
codigo = fields.Char(
string=u'Código',
help=u"Código equipo",
required=True,
size=30)
name = fields.Char(
string=u'Nombre equipo',
required=True,
readonly=False,
index=True,
help="Nombre corto equipo",
size=30)
stage_id = fields.Many2one(
'mgmtsystem.action.stage',
'Fase',
default=_default_stage,
readonly=True)
#api.multi
def status_instalacion(self):
import pudb
pu.db
# save value to variable dmy to retrieve later
id = self._context.get('stage_id')
self.set_dummy(id)
#api.model
def search_read(
self, domain=None, fields=None, offset=0,
limit=None, order=None):
import pudb
pu.db
# here the variable allways has the original value (99)
current_stage_id = self.dmy
current_stage_id = self.dummy()
current_stage_id = getattr(self, dmy)
res = super(MgmtsystemSistemasEquipos, self).search_read(
domain, fields, offset, limit, order)
current_id = res[0]['id']
valid_protocols_ids = self._get_ids(
current_stage_id, current_id,
'mgmtsystem_equipos_protocolos',
'mgmtsystem_equipos_protocolos_rel',
'protocolo_id')
# # remove ids
res[0]['protocolos_ids'] = valid_protocols_ids
res[0]['informes_ids'] = valid_informes_ids
res[0]['anexos_ids'] = valid_anexos_ids
return res
# #api.multi
def _get_ids(self, current_stage_id, current_id, model, model_rel, field_rel):
import pudb
pu.db
# in this method the value of the variable is allways the original
current_stage_id = self.dummy()
sql = """ select a.id from
%s as a
join %s as b
on a.id = b.%s where b.equipo_id = %s
and a.stage_id = %s; """ % (model, model_rel, field_rel,
current_id, current_stage_id)
import psycopg2
try:
self.env.cr.execute(sql)
except psycopg2.ProgrammingError, ex:
message = 'Error trying to download data from server. \n {0} \n {1}'.format(ex.pgerror, sql)
_logger.info(message)
return False
rows = self.env.cr.fetchall()
list_of_ids = []
for row in rows:
list_of_ids.append(row[0])
return list_of_ids
I don't know Python very well, and thats the cause of my misunderstanding of how to read the value of the variable.
But then again, Why is the context modified in the search_read method?.
Thank you.
You should try following.
#api.model
def search_read(self, domain=None, fields=None, offset=0, limit=None, order=None):
import pudb
pu.db
# Here you need to get the value from the context.
current_stage_id = self._context.get('stage_id', getattr(self, dmy))
res = super(MgmtsystemSistemasEquipos, self).search_read(domain=domain, fields=fields, offset=offset, limit=limit, order=order)
current_id = res[0]['id']
valid_protocols_ids = self._get_ids(
current_stage_id, current_id,
'mgmtsystem_equipos_protocolos',
'mgmtsystem_equipos_protocolos_rel',
'protocolo_id')
# # remove ids
res[0]['protocolos_ids'] = valid_protocols_ids
res[0]['informes_ids'] = valid_informes_ids
res[0]['anexos_ids'] = valid_anexos_ids
return res
In your code those lines won't work just because there is no recordset available in self (it's correct behaviour search_read must have #api.model decorator).
# here the variable allways has the original value (99)
current_stage_id = self.dmy
current_stage_id = self.dummy()
current_stage_id = getattr(self, dmy)
So just remove those and lines and apply some other logic to get data.