Trying to get object id with date in pymongo - pymongo

import pymongo
import datetime
from pymongo import MongoClient
from bson.objectid import ObjectId
from time import gmtime, strftime
gen_time = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
dummy_id = ObjectId.from_datetime(gen_time)
result = db["config"].find({"_id":{"$lt": dummy_id}})
print(result)
and its showing erorr AttributeError: 'str' object has no attribute 'utcoffset'

You're passing a string to ObjectId.from_datetime():
gen_time = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime())
dummy_id = ObjectId.from_datetime(gen_time)
whereas the docs say
Pass either a naive datetime instance containing UTC, or an aware instance that has been converted to UTC.
You probably want
dummy_id = ObjectId.from_datetime(datetime.utcnow())
(or maybe datetime.datetime.utcnow() since you're just doing import datetime)

Related

ProgrammingError when trying to skip duplicate data in postgres sql

PostGres SQL will not accept data which is in violation of primary key. To ignore the duplicate data, I have this code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo',start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(Symbol, Date, Volume, Open, Close, High, Low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
I am getting this error:
ProgrammingError: (psycopg2.errors.UndefinedColumn) column "symbol" of relation "stockprices" does not exist
LINE 2: (Symbol,Date, Volume, Open, Close, H...
^
[SQL: INSERT INTO stockprices
Could you please advise as to what does this error mean? I got rid of all the double quotes as advised in the comment.
I had used this code to create the table:
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['Symbol'] = symbol
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
create_price_table('amzn')
Also as was mentioned in the comment. I used this to check the table name:
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE';
Edit 1:
I changed the code as suggested in the comment, now the column name is in small case. Below is the code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['symbol'] = symbol
df = df.rename(columns= {'Open':'open'})
df = df.rename(columns= {'Close':'close'})
df = df.rename(columns= {'High':'high'})
df = df.rename(columns= {'Low':'low'})
df = df.rename(columns= {'Volume':'volume'})
df = df.rename(columns= {'Adj Close':'adj_close'})
df.index.name ='date'
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
# create_price_table('amzn')
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(symbol, date, volume, open, close, high, low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
This code however is producing a new error:
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type bigint: "166.14999389648438"
LINE 4: ('aapl','2022-02-23 00:00:00','166.14999...
^
Per my comment you have two issues:
You are trying to INSERT a float value(166.14999389648438) into an integer field. First thing to figure out is why the mismatch? Do really want the database field to be an integer? Second thing is that trying to force a float into an integer will work if the value is being entered as a float/numeric:
select 166.14999389648438::bigint; 166
Though as you see it gets truncated.
It will not work if entered as a string:
ERROR: invalid input syntax for type bigint: "166.14999389648438"
Which is what you are doing. This leads to the second issue below.
You are not using proper Parameter passing as shown in the link. Where among other things is the warning:
Warning
Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.
For the purposes of this question the important part is that using parameter passing will result in proper type adaptation.

converting a datetime in pandas

I have the following datetime format (Mai 2, 2018 6:35:52 AM) which I want to convert to %Y-%m-%d %H:%M:%S.%f format, such as: 2017-05-13 21:11:01.436757
I have tried every way I can think of but get errors
customer_file_path['RECEIVED_DATE2'] = customer_file_path['RECEIVED_DATE'].str.replace(',','')
print(customer_file_path[['RECEIVED_DATE2']].head(10))
customer_file_path['RECEIVED_DATE2'] = pd.to_datetime(customer_file_path['RECEIVED_DATE2'], format='%b %e %Y %-I:%M:%S %p', errors='coerce')
print(customer_file_path[['RECEIVED_DATE2']].head(10))
customer_file_path['RECEIVED_DATE3'] = pd.to_datetime(customer_file_path['RECEIVED_DATE2']).dt.strftime('%Y-%m-%d %H:%M:%S.%f')#, errors='coerce')
customer_file_path['RECEIVED_DATE2'] = pd.to_datetime(customer_file_path['RECEIVED_DATE2'], format='%b %e %Y %-I:%M:%S %p').dt.strftime('%Y-%m-%d %H:%M:%S.%f')#, errors='coerce')
customer_file_path['RECEIVED_DATE3'] = pd.to_datetime(customer_file_path['RECEIVED_DATE2'], format='%b %e %Y %-I:%M:%S %p').dt.strftime('%Y-%m-%d %H:%M:%S.%f')#, errors='coerce')

Struggle with string formatting pandas date

I have the following stringy : 2017-12-03
I am looking forward to turn the str into to_datetime, add a BDay and change the format of such date.
what i tried was :
import datetime as dt
from pandas.tseries.offsets import BDay
valor_nuevo=(pd.to_datetime(stringy,'%Y-%m-%d') + BDay(1)).strftime('%d/%m/%Y')
And outputs as error an AssertionError
You need define parameter format or omit it in to_datetime:
valor_nuevo = (pd.to_datetime(stringy,format='%Y-%m-%d') + BDay(1)).strftime('%d/%m/%Y')
valor_nuevo = (pd.to_datetime(stringy) + BDay(1)).strftime('%d/%m/%Y')
print (valor_nuevo)
04/12/2017

Represent negative timedelta in most basic form

If I create a negative Timedelta for e.g. 0.5 hours, the internal representation looks as follow:
In [2]: pd.Timedelta('-0.5h')
Out[2]: Timedelta('-1 days +23:30:00')
How can I get back a (str) representation of this Timedelta in the form -00:30?
I want to display these deltas and requiring the user to calculate the expression -1 day + something is a bit award.
I can't add comment to you so adding it here. Don't know if this helps but I think you can use python humanize.
import humanize as hm
hm.naturaltime((pd.Timedelta('-0.5h')))
Out:
'30 minutes from now'
Ok, I will live with a hack going trough a date:
sign = ''
date = pd.to_datetime('today')
if delta.total_seconds() < 0:
sign = '-'
date = date - delta
else:
date = date + delta
print '{}{:%H:%M}'.format(sign, date.to_pydatetime())
You can use the components of a Pandas timedelta
import pandas as pd
t = pd.Timedelta('-0.5h')
print t.components
>> Components(days=-1L, hours=23L, minutes=30L, seconds=0L, milliseconds=0L, microseconds=0L, nanoseconds=0L)
You can access each component with
print t.components.days
>> -1
print t.components.hours
>> 23
print t.components.minutes
>> 30
The rest is then formatting.
source
This is a total hack that won't work for Series data, but....
import pandas as pd
import numpy as np
t = pd.Timedelta('-0.5h').components
mins = t.days*24*60 + t.hours*60 + t.minutes
print str(np.sign(mins))[0]+str(divmod(abs(mins), 60)[0]).zfill(2)+':'+str(divmod(abs(mins), 60)[1]).zfill(2)
>> -00:30
I was looking for something similar (see https://github.com/pandas-dev/pandas/issues/17232 )
I'm not sure if it will be implemented in Pandas, so here is a workaround
import pandas as pd
def timedelta2str(td, display_plus=False, format=None):
"""
Parameters
----------
format : None|all|even_day|sub_day|long
Returns
-------
converted : string of a Timedelta
>>> td = pd.Timedelta('00:00:00.000')
>>> timedelta2str(td)
'0 days'
>>> td = pd.Timedelta('00:01:29.123')
>>> timedelta2str(td, display_plus=True, format='sub_day')
'+ 00:01:29.123000'
>>> td = pd.Timedelta('-00:01:29.123')
>>> timedelta2str(td, display_plus=True, format='sub_day')
'- 00:01:29.123000'
"""
td_zero = pd.Timedelta(0)
sign_sep = ' '
if td >= td_zero:
s = td._repr_base(format=format)
if display_plus:
s = "+" + sign_sep + s
return s
else:
s = timedelta2str(-td, display_plus=False, format=format)
s = "-" + sign_sep + s
return s
if __name__ == "__main__":
import doctest
doctest.testmod()

How to adjust #timestamp to local time zone in Elastalert

i'm trying to adjust returned datetime value proper for my time zone. My notifications looks like this:
An abnormally low number of events occurred around 2016-09-28 22:49 CEST.
And this is proper date refered to my time zone.
In field's section in notification I'm getting time for UTC-0 zone:
#timestamp: 2016-09-28T20:49:44.711696Z
I have tried to use Enhancement this way,
file in ..\elastalert\elastalert_modules
from datetime import datetime
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
if '#timestamp' in match:
now = datetime.now()
hours2 = datetime(0, 0, 0, 2, 0, 0)
match['#timestamp'] = now + hours2
I also add usage in rule:
match_enhancements:
- "elastalert_modules.my_enhancements.TimeEnhancement"
It's just for test, not for ultimate solution
With some help on github i managed to get something that works (i think it formats the datetimes to the same timezone that the server is running)
Python2:
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
for k, v in match.items():
if isinstance(v, basestring) and v.endswith('Z'):
try:
match[k] = pretty_ts(v)
except:
pass
Python3:
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
for k, v in match.items():
if isinstance(v, str) and v.endswith('Z'):
try:
match[k] = pretty_ts(v)
except:
pass
Alternative approach to #Peter as I only needed a field with localtime.
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
'''
Add local #timestamp (server time)
'''
def process(self, match):
if '#timestamp' in match:
ts = match['#timestamp']
if isinstance(ts, str) and ts.endswith('Z'):
match['#timestamp_local'] = pretty_ts(ts)