Cannot update dictionary - python-3.8

I have a dict_ class that attempts to copy the built-in dict class.
Here is that class (the new function returns the original object):
class dict_:
def __init__(self, *args, **kwargs):
self.kv = kwargs
if not self.kv:
for kv in args:
for k, v in kv:
self.kv.update({k: v})
def __str__(self):
return "%s" % self.kv
def __getitem__(self, item):
return self.kv[item]
def update(self, *args):
self.kv.update(args)
I've called it like this:
from Dodger.dodger import *
term = new(System())
a = new(dict_(a=1, b=2))
a.update(new(dict_(c=3)))
term.println(a)
This is supposed to modify a to {"a": 1, "b": 2, "c": 3} but instead it gives me this error:
Traceback (most recent call last):
File "C:/free_time/Dodger/dodger_test.py", line 5, in <module>
File "C:\free_time\Dodger\dodger.py", line 176, in update
File "C:\free_time\Dodger\dodger.py", line 173, in __getitem__
KeyError: 0
Why is it giving a KeyError? What does the 0 mean? (I am using python 3.8.2)

I figured out how to solve this problem. I just have to implement __setitem__ too:
def __setitem__(self, key, value):
"""Set self[key] to value"""
self.kv[key] = value

Related

How to retrieve scrpy job id within method?

I am trying to get the job id of a scrapy 2.1.x job on spider_close method:
class mysql_pipeline(object):
import os
def test:
print(os.environ['SCRAPY_JOB'])
Unfortunatelly this results in a key error:
ERROR: Scraper close failure
Traceback (most recent call last):
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/twisted/internet/defer.py", line 654, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/Users/andy/spider2/crawler/pipelines.py", line 137, in close_spider
os.environ['SCRAPY_JOB'],
File "/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/os.py", line 675, in __getitem__
raise KeyError(key) from None
KeyError: 'SCRAPY_JOB'
2020-05-16 17:24:52 [scrapy
How can I pull the job id within the method?
In the spider constructor(inside init),
add the line -->
self.jobId = kwargs.get('_job')
then in the parse function pass this in item,
def parse(self, response):
data = {}
.............
yield data['_job']
in the pipeline add this -->
def process_item(self, item, spider):
self.jobId = item['jobId']
.......
def close_spider(self, spider):
print(self.jobId)
......

Subclass pandas DataFrame with required argument

I'm working on a new data structure that subclasses pandas DataFrame. I want to enforce my new data structure to have new_property, so that it can be processed safely later on.
However, I'm running into error when using my new data structure, because the constructor gets called by some internal pandas function without the required property.
Here is my new data structure.
import pandas as pd
class MyDataFrame(pd.DataFrame):
#property
def _constructor(self):
return MyDataFrame
_metadata = ['new_property']
def __init__(self, data, new_property, index=None, columns=None, dtype=None, copy=True):
super(MyDataFrame, self).__init__(data=data,
index=index,
columns=columns,
dtype=dtype,
copy=copy)
self.new_property = new_property
Here is an example that causes error
data1 = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [15, 25, 30], 'd': [1, 1, 2]}
df1 = MyDataFrame(data1, new_property='value')
df1[['a', 'b']]
Here is the error message
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-
packages\IPython\core\interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-33-b630fbf14234>", line 1, in <module>
df1[['a', 'b']]
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2053, in __getitem__
return self._getitem_array(key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2098, in _getitem_array
return self.take(indexer, axis=1, convert=True)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1670, in take
result = self._constructor(new_data).__finalize__(self)
TypeError: __init__() missing 1 required positional argument: 'new_property'
Is there a fix to this or an alternative way to design this to enforce my new data structure to have new_property?
Thanks in advance!
This question has been answered by a brilliant pandas developer. See this issue for more details. Pasting the answer here.
class MyDataFrame(pd.DataFrame):
#property
def _constructor(self):
return MyDataFrame._internal_ctor
_metadata = ['new_property']
#classmethod
def _internal_ctor(cls, *args, **kwargs):
kwargs['new_property'] = None
return cls(*args, **kwargs)
def __init__(self, data, new_property, index=None, columns=None, dtype=None, copy=True):
super(MyDataFrame, self).__init__(data=data,
index=index,
columns=columns,
dtype=dtype,
copy=copy)
self.new_property = new_property
data1 = {'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [15, 25, 30], 'd': [1, 1, 2]}
df1 = MyDataFrame(data1, new_property='value')
df1[['a', 'b']].new_property
Out[121]: 'value'
MyDataFrame(data1)
TypeError: __init__() missing 1 required positional argument: 'new_property'
I know this is an old issue, but I wanted to extend on hlu's answer.
When implementing the answer described by hlu, I was getting the following error when just trying to print the subclassed DataFrame: AttributeError: 'internal_constructor' object has no attribute '_from_axes'
To fix this, I have used an object instead of the function used in hlu's answer to be able to implement the _from_axes method on the callable.
There is no classmethod type decorator for the _internal_constructor class, so instead we instantiate it with the callers class so it can be used when the _internal_constructor is called.
class MyDataFrame(pd.DataFrame):
#property
def _constructor(self):
return MyDataFrame._internal_constructor(self.__class__)
class _internal_constructor(object):
def __init__(self, cls):
self.cls = cls
def __call__(self, *args, **kwargs):
kwargs['my_required_argument'] = None
return self.cls(*args, **kwargs)
def _from_axes(self, *args, **kwargs):
return self.cls._from_axes(*args, **kwargs)

Shape must be rank 0 but is rank 1, parse_single_sequence_example

For the past few days I have been having an issue with serializing data to tfrecord format and then subsequently deserializing it using parse_single_sequence example. I am attempting to retrieve data for use with a fairly standard RNN model, however this is my first attempt at using the tfrecords format and the associated pipeline that goes with it.
Here is a toy example to reproduce the issue I am having:
import tensorflow as tf
import tempfile
from IPython import embed
sequences = [[1, 2, 3], [4, 5, 1], [1, 2]]
label_sequences = [[0, 1, 0], [1, 0, 0], [1, 1]]
def make_example(sequence, labels):
ex = tf.train.SequenceExample()
sequence_length = len(sequence)
ex.context.feature["length"].int64_list.value.append(sequence_length)
fl_tokens = ex.feature_lists.feature_list["tokens"]
fl_labels = ex.feature_lists.feature_list["labels"]
for token, label in zip(sequence, labels):
fl_tokens.feature.add().int64_list.value.append(token)
fl_labels.feature.add().int64_list.value.append(label)
return ex
writer = tf.python_io.TFRecordWriter('./test.tfrecords')
for sequence, label_sequence in zip(sequences, label_sequences):
ex = make_example(sequence, label_sequence)
writer.write(ex.SerializeToString())
writer.close()
tf.reset_default_graph()
file_name_queue = tf.train.string_input_producer(['./test.tfrecords'], num_epochs=None)
reader = tf.TFRecordReader()
context_features = {
"length": tf.FixedLenFeature([], dtype=tf.int64)
}
sequence_features = {
"tokens": tf.FixedLenSequenceFeature([], dtype=tf.int64),
"labels": tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
ex = reader.read(file_name_queue)
# Parse the example (returns a dictionary of tensors)
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=ex,
context_features=context_features,
sequence_features=sequence_features
)
context = tf.contrib.learn.run_n(context_parsed, n=1, feed_dict=None)
print(context[0])
sequence = tf.contrib.learn.run_n(sequence_parsed, n=1, feed_dict=None)
print(sequence[0])
The associated stack trace is:
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/common_shapes.py", line 594, in call_cpp_shape_fn
status)
File "/usr/lib/python3.5/contextlib.py", line 66, in exit
next(self.gen)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/errors.py", line 463, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors.InvalidArgumentError: Shape must be rank 0 but is rank 1
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "my_test.py", line 51, in
sequence_features=sequence_features
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/parsing_ops.py", line 640, in parse_single_sequence_example
feature_list_dense_defaults, example_name, name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/parsing_ops.py", line 837, in _parse_single_sequence_example_raw
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_parsing_ops.py", line 285, in _parse_single_sequence_example
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 749, in apply_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2382, in create_op
set_shapes_for_outputs(ret)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1783, in set_shapes_for_outputs
shapes = shape_func(op)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/common_shapes.py", line 596, in call_cpp_shape_fn
raise ValueError(err.message)
ValueError: Shape must be rank 0 but is rank 1
I posted this as a potential issue over on github though it seems I may just be using it incorrectly: Tensorflow Github Issue
So with the background information out of the way, I'm just wondering if I am in fact making an error here? Any help in the right direction would be greatly appreciated, its been a few days and my poking around hasn't panned out. Thanks all!
Got it and it was a bad assumption on my part. The tf.TFRecordReader.read(queue, name=None) returns a tuple when I assumed it would have returned just the value not (key, value) which I was directly passing into the example parser.

AttributeError: 'bool' object has no attribute 'strftime'

I am inheriting 'account.partner.ledger' module. When we select the customer we will be able to print the report of the customer's ledger. In the partner ledger menu I want to make 'include Initial Balances' checkbox checked by default if the filter is by date/period.I tried to override the method by my custom module but I am unable to solve the error which I am getting.
Code,
#api.multi
def onchange_filter(self,filter='filter_no', fiscalyear_id=False):
res = super(account_partner_ledger, self).onchange_filter(filter=filter, fiscalyear_id=fiscalyear_id)
if filter in ['filter_no', 'unreconciled']:
if filter == 'unreconciled':
res['value'].update({'fiscalyear_id': False})
res['value'].update({'initial_balance': False, 'period_from': False, 'period_to': False, 'date_from': False ,'date_to': False})
if filter in ['filter_date','filter_period']:
res['value'].update({'initial_balance': True, 'period_from': True, 'period_to': True, 'date_from': True ,'date_to': True})
return res
Error,
Traceback (most recent call last):
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 544, in _handle_exception
return super(JsonRequest, self)._handle_exception(exception)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 581, in dispatch
result = self._call_function(**self.params)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 317, in _call_function
return checked_call(self.db, *args, **kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\service\model.py", line 118, in wrapper
return f(dbname, *args, **kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 314, in checked_call
return self.endpoint(*a, **kw)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 810, in __call__
return self.method(*args, **kw)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\http.py", line 410, in response_wrap
response = f(*args, **kw)
File "C:\Users\zendynamix\odooGit\odoo8\addons\web\controllers\main.py", line 944, in call_kw
return self._call_kw(model, method, args, kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\addons\web\controllers\main.py", line 936, in _call_kw
return getattr(request.registry.get(model), method)(request.cr, request.uid, *args, **kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\api.py", line 268, in wrapper
return old_api(self, *args, **kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\api.py", line 399, in old_api
result = method(recs, *args, **kwargs)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\models.py", line 5985, in onchange
record._onchange_eval(name, field_onchange[name], result)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\models.py", line 5883, in _onchange_eval
self.update(self._convert_to_cache(method_res['value'], validate=False))
File "C:\Users\zendynamix\odooGit\odoo8\openerp\models.py", line 5391, in _convert_to_cache
for name, value in values.iteritems()
File "C:\Users\zendynamix\odooGit\odoo8\openerp\models.py", line 5392, in <dictcomp>
if name in fields
File "C:\Users\zendynamix\odooGit\odoo8\openerp\fields.py", line 1250, in convert_to_cache
return self.to_string(value)
File "C:\Users\zendynamix\odooGit\odoo8\openerp\fields.py", line 1240, in to_string
return value.strftime(DATE_FORMAT) if value else False
AttributeError: 'bool' object has no attribute 'strftime'
You have to look at the underlying code sometimes to understand what's going on, you're getting errors because Odoo is trying to convert a boolean object back to a string representation of a time (it expects a python date object)
You can fire up a terminal and reproduce your error:
>>> True.strftime
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
AttributeError: 'bool' object has no attribute 'strftime'
>>>
This is the to_string method from odoo
#staticmethod
def to_string(value):
""" Convert a :class:`date` value into the format expected by the ORM. """
return value.strftime(DATE_FORMAT) if value else False
The test condition if value test's to see if value evaluates to False, testing from the terminal
>>> x = ''
>>> if x: print('Yeah')
...
>>>
>>> x = True
>>> if x: print('Yeah')
...
Yeah
>>> x = False
>>> if x: print('Yeah')
...
>>>
>>>
from the output, we can draw a conclusion that an empty string or False evaluates to False while a True value will evaluate to True, so instead of setting the date values to True, set all of them to empty strings.
#api.multi
def onchange_filter(self,filter='filter_no', fiscalyear_id=False):
res = super(account_partner_ledger, self).onchange_filter(filter=filter, fiscalyear_id=fiscalyear_id)
if filter in ['filter_no', 'unreconciled']:
if filter == 'unreconciled':
res['value'].update({'fiscalyear_id': False})
res['value'].update({'initial_balance': False, 'period_from': False, 'period_to': False, 'date_from': False ,'date_to': False})
if filter in ['filter_date','filter_period']:
res['value'].update({'initial_balance': 'True', 'period_from': '', 'period_to': '', 'date_from': '', 'date_to': ''})
return res
When you look at your code you'll see:
'date_from': True ,'date_to': True
This causes your error.
You should set those fields to a date not to a Boolean.
The value False is valid, since you should be able to not fill in a date.
Try using strptime instead of strftime and see if it solves the problem.
You can use strptime as follows for example:-
from openerp.tools import DEFAULT_SERVER_DATETIME_FORMAT
my_date = datetime.strptime(self.date_column, DEFAULT_SERVER_DATETIME_FORMAT)

How we use pipelines item in scrapy

I'm new user of scrapy to crawl my websites.I want to store data crawled into mysql database.
myspider.py:
class MininovaSpider(CrawlSpider):
name = 'myspider'
allowed_domains = ['example.com']
start_urls = ['http://www.example.com']
rules = [Rule(SgmlLinkExtractor(allow=('/categorie/.*'),restrict_xpaths=('//div[#id="contLeftNavig"]',)), 'parse_t')]
def parse_t(self, response):
x = HtmlXPathSelector(response)
torrent = Torrent()
torrent['url'] = response.url
torrent['title']=x.select("//h1[#class='infoAneTitre']/text()").extract()
torrent['wilaya'] = x.select("//span[#class='ville_t']/text()").extract()
#torrent['prix'] = x.select("//div[#id='datail_ann']/ul[1]/li[4]/span/text()").extract()
#torrent['surface'] = x.select("//div[#id='datail_ann']/ul[3]/li[1]/span/text()").extract()
torrent['description'] = x.select("//div[#class='box_pad']/text()").extract()
return torrent
and for pipelines.py, i modified and used the example of googldir.So when i run crawl i get this error :
exceptions.AttributeError: 'MininovaSpider' object has no attribute 'iterkeys'
exceptions.TypeError: 'MininovaSpider' object is not subscriptable
pipeline.py:
from scrapy import log
from twisted.enterprise import adbapi
import time
import MySQLdb.cursors
class Pipeline(object):
def __init__(self):
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db='test',
user='root',
passwd='',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, spider, item):
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
tx.execute("select * from database where url = %s", (item['url'] ))
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute(\
"insert into database (wilaya,titre, site, lien,resume,timestamp) "
"values (%s, %s, %s, %s,%s,%s)",
(item['wilaya'],
item['title'],
'example.com',item['url'],item['description'],
time.time())
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
and traceback:
Traceback (most recent call last):
File "/usr/lib/python2.7/twisted/internet/defer.py", line 287, in addCallbacks
self._runCallbacks()
File "/usr/lib/python2.7/twisted/internet/defer.py", line 545, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/usr/lib/python2.7/site-packages/scrapy/core/scraper.py", line 208, in _itemproc_finished
item=output, response=response, spider=spider)
File "/usr/lib/python2.7/site-packages/scrapy/utils/signal.py", line 53, in send_catch_log_deferred
*arguments, **named)
--- <exception caught here> ---
File "/usr/lib/python2.7/twisted/internet/defer.py", line 134, in maybeDeferred
result = f(*args, **kw)
File "/usr/lib/python2.7/site-packages/scrapy/xlib/pydispatch/robustapply.py", line 47, in robustApply
return receiver(*arguments, **named)
File "/usr/lib/python2.7/site-packages/scrapy/contrib/feedexport.py", line 177, in item_scraped
slot.exporter.export_item(item)
File "/usr/lib/python2.7/site-packages/scrapy/contrib/exporter/__init__.py", line 109, in export_item
itemdict = dict(self._get_serialized_fields(item))
File "/usr/lib/python2.7/site-packages/scrapy/contrib/exporter/__init__.py", line 60, in _get_serialized_fields
field_iter = item.iterkeys()
**exceptions.AttributeError: 'MininovaSpider' object has no attribute 'iterkeys'
2012-01-18 16:00:43-0600 [scrapy] Unhandled Error
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 503, in __bootstrap
self.__bootstrap_inner()
File "/usr/lib/python2.7/threading.py", line 530, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 483, in run
self.__target(*self.__args, **self.__kwargs)
--- <exception caught here> ---
File "/usr/lib/python2.7/twisted/python/threadpool.py", line 207, in _worker
result = context.call(ctx, function, *args, **kwargs)
File "/usr/lib/python2.7/twisted/python/context.py", line 118, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/lib/python2.7/twisted/python/context.py", line 81, in callWithContext
return func(*args,**kw)
File "/usr/lib/python2.7/twisted/enterprise/adbapi.py", line 448, in _runInteraction
result = interaction(trans, *args, **kw)
File "/opt/scrapy/test/pipelines.py", line 33, in _conditional_insert
tx.execute("select * from database where url = %s", (item['url'] ))
**exceptions.TypeError: 'MininovaSpider' object is not subscriptable
exceptions.TypeError: 'MininovaSpider' object is not subscriptable
Looks like you have yielded somewhere a spider (MininovaSpider) instance instead of an item. I think you have there more code you haven't shown.
In Pipeline.process_item() put this to confirm:
def process_item(self, spider, item):
assert isinstance(item, Torrent), 'Here should be Torrent instance!'
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item