How to adjust #timestamp to local time zone in Elastalert - elastalert

i'm trying to adjust returned datetime value proper for my time zone. My notifications looks like this:
An abnormally low number of events occurred around 2016-09-28 22:49 CEST.
And this is proper date refered to my time zone.
In field's section in notification I'm getting time for UTC-0 zone:
#timestamp: 2016-09-28T20:49:44.711696Z
I have tried to use Enhancement this way,
file in ..\elastalert\elastalert_modules
from datetime import datetime
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
if '#timestamp' in match:
now = datetime.now()
hours2 = datetime(0, 0, 0, 2, 0, 0)
match['#timestamp'] = now + hours2
I also add usage in rule:
match_enhancements:
- "elastalert_modules.my_enhancements.TimeEnhancement"
It's just for test, not for ultimate solution

With some help on github i managed to get something that works (i think it formats the datetimes to the same timezone that the server is running)
Python2:
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
for k, v in match.items():
if isinstance(v, basestring) and v.endswith('Z'):
try:
match[k] = pretty_ts(v)
except:
pass
Python3:
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
def process(self, match):
for k, v in match.items():
if isinstance(v, str) and v.endswith('Z'):
try:
match[k] = pretty_ts(v)
except:
pass

Alternative approach to #Peter as I only needed a field with localtime.
from elastalert.util import pretty_ts
from elastalert.enhancements import BaseEnhancement
class TimeEnhancement(BaseEnhancement):
'''
Add local #timestamp (server time)
'''
def process(self, match):
if '#timestamp' in match:
ts = match['#timestamp']
if isinstance(ts, str) and ts.endswith('Z'):
match['#timestamp_local'] = pretty_ts(ts)

Related

ProgrammingError when trying to skip duplicate data in postgres sql

PostGres SQL will not accept data which is in violation of primary key. To ignore the duplicate data, I have this code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo',start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(Symbol, Date, Volume, Open, Close, High, Low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
I am getting this error:
ProgrammingError: (psycopg2.errors.UndefinedColumn) column "symbol" of relation "stockprices" does not exist
LINE 2: (Symbol,Date, Volume, Open, Close, H...
^
[SQL: INSERT INTO stockprices
Could you please advise as to what does this error mean? I got rid of all the double quotes as advised in the comment.
I had used this code to create the table:
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['Symbol'] = symbol
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
create_price_table('amzn')
Also as was mentioned in the comment. I used this to check the table name:
SELECT table_name
FROM information_schema.tables
WHERE table_schema='public'
AND table_type='BASE TABLE';
Edit 1:
I changed the code as suggested in the comment, now the column name is in small case. Below is the code:
import pandas as pd
import psycopg2
import os
import matplotlib
from sqlalchemy import create_engine
from tqdm import tqdm_notebook
from pandas_datareader import data as web
import datetime
from dateutil.relativedelta import relativedelta
db_database = os.environ.get('123')
engine = create_engine('postgresql://postgres:{}#localhost:5433/stockdata'.format(123))
def create_price_table(symbol):
print(symbol)
df = web.DataReader(symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=7), end= datetime.datetime.now())
df['symbol'] = symbol
df = df.rename(columns= {'Open':'open'})
df = df.rename(columns= {'Close':'close'})
df = df.rename(columns= {'High':'high'})
df = df.rename(columns= {'Low':'low'})
df = df.rename(columns= {'Volume':'volume'})
df = df.rename(columns= {'Adj Close':'adj_close'})
df.index.name ='date'
df.to_sql(name = "stockprices", con = engine, if_exists='append', index = True)
return 'daily prices table created'
# create_price_table('amzn')
def import_data(Symbol):
df = web.DataReader(Symbol, 'yahoo', start=datetime.datetime.now()-relativedelta(days=3), end= datetime.datetime.now())
insert_init = """INSERT INTO stockprices
(symbol, date, volume, open, close, high, low)
VALUES
"""
vals = ",".join(["""('{}','{}','{}','{}','{}','{}','{}')""".format(
Symbol,
Date,
row.High,
row.Low,
row.Open,
row.Close,
row.Volume,
) for Date, row in df.iterrows()])
insert_end ="""ON CONFLICT (Symbol, Date) DO UPDATE
SET
Volume = EXCLUDED.Volume,
Open = EXCLUDED.Open,
Close = EXCLUDED.Close,
Low = EXCLUDED.Low,
High = EXCLUDED.High
"""
query = insert_init + vals + insert_end
engine.execute(query)
import_data('aapl')
This code however is producing a new error:
DataError: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type bigint: "166.14999389648438"
LINE 4: ('aapl','2022-02-23 00:00:00','166.14999...
^
Per my comment you have two issues:
You are trying to INSERT a float value(166.14999389648438) into an integer field. First thing to figure out is why the mismatch? Do really want the database field to be an integer? Second thing is that trying to force a float into an integer will work if the value is being entered as a float/numeric:
select 166.14999389648438::bigint; 166
Though as you see it gets truncated.
It will not work if entered as a string:
ERROR: invalid input syntax for type bigint: "166.14999389648438"
Which is what you are doing. This leads to the second issue below.
You are not using proper Parameter passing as shown in the link. Where among other things is the warning:
Warning
Never, never, NEVER use Python string concatenation (+) or string parameters interpolation (%) to pass variables to a SQL query string. Not even at gunpoint.
For the purposes of this question the important part is that using parameter passing will result in proper type adaptation.

Python multiprocessing how to update a complex object in a manager list without using .join() method

I started programming in Python about 2 months ago and I've been struggling with this problem in the last 2 weeks.
I know there are many similar threads to this one but I can't really find a solution which suits my case.
I need to have the main process which is the one which interacts with Telegram and another process, buffer, which understands the complex object received from the main and updates it.
I'd like to do this in a simpler and smoother way.
At the moment objects are not being updated due to the use of multi-processing without the join() method.
I tried then to use multi-threading instead but it gives me compatibility problems with Pyrogram a framework which i am using to interact with Telegram.
I wrote again the "complexity" of my project in order to reproduce the same error I am getting and in order to get and give the best help possible from and for everyone.
a.py
class A():
def __init__(self, length = -1, height = -1):
self.length = length
self.height = height
b.py
from a import A
class B(A):
def __init__(self, length = -1, height = -1, width = -1):
super().__init__(length = -1, height = -1)
self.length = length
self.height = height
self.width = width
def setHeight(self, value):
self.height = value
c.py
class C():
def __init__(self, a, x = 0, y = 0):
self.a = a
self.x = x
self.y = y
def func1(self):
if self.x < 7:
self.x = 7
d.py
from c import C
class D(C):
def __init__(self, a, x = 0, y = 0, z = 0):
super().__init__(a, x = 0, y = 0)
self.a = a
self.x = x
self.y = y
self.z = z
def func2(self):
self.func1()
main.py
from b import B
from d import D
from multiprocessing import Process, Manager
from buffer import buffer
if __name__ == "__main__":
manager = Manager()
lizt = manager.list()
buffer = Process(target = buffer, args = (lizt, )) #passing the list as a parameter
buffer.start()
#can't invoke buffer.join() here because I need the below code to keep running while the buffer process takes a few minutes to end an instance passed in the list
#hence I can't wait the join() function to update the objects inside the buffer but i need objects updated in order to pop them out from the list
import datetime as dt
t = dt.datetime.now()
#library of kind of multithreading (pool of 4 processes), uses asyncio lib
#this while was put to reproduce the same error I am getting
while True:
if t + dt.timedelta(seconds = 10) < dt.datetime.now():
lizt.append(D(B(5, 5, 5)))
t = dt.datetime.now()
"""
#This is the code which looks like the one in my project
#main.py
from pyrogram import Client #library of kind of multithreading (pool of 4 processes), uses asyncio lib
from b import B
from d import D
from multiprocessing import Process, Manager
from buffer import buffer
if __name__ == "__main__":
api_id = 1234567
api_hash = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
app = Client("my_account", api_id, api_hash)
manager = Manager()
lizt = manager.list()
buffer = Process(target = buffer, args = (lizt, )) #passing the list as a parameter
buffer.start()
#can't invoke buffer.join() here because I need the below code to run at the same time as the buffer process
#hence I can't wait the join() function to update the objects inside the buffer
#app.on_message()
def my_handler(client, message):
lizt.append(complex_object_conatining_message)
"""
buffer.py
def buffer(buffer):
print("buffer was defined")
while True:
if len(buffer) > 0:
print(buffer[0].x) #prints 0
buffer[0].func2() #this changes the class attribute locally in the class instance but not in here
print(buffer[0].x) #prints 0, but I'd like it to be 7
print(buffer[0].a.height) #prints 5
buffer[0].a.setHeight(10) #and this has the same behaviour
print(buffer[0].a.height) #prints 5 but I'd like it to be 10
buffer.pop(0)
This is the whole code about the problem I am having.
Literally every suggestion is welcome, hopefully constructive, thank you in advance!
At last I had to change the way to solve this problem, which was using asyncio like the framework was doing as well.
This solution offers everything I was looking for:
-complex objects update
-avoiding the problems of multiprocessing (in particular with join())
It is also:
-lightweight: before I had 2 python processes 1) about 40K 2) about 75K
This actual process is about 30K (and it's also faster and cleaner)
Here's the solution, I hope it will be useful for someone else like it was for me:
The part of the classes is skipped because this solution updates complex objects absolutely fine
main.py
from pyrogram import Client
import asyncio
import time
def cancel_tasks():
#get all task in current loop
tasks = asyncio.Task.all_tasks()
for t in tasks:
t.cancel()
try:
buffer = []
firstWorker(buffer) #this one is the old buffer.py file and function
#the missing loop and loop method are explained in the next piece of code
except KeyboardInterrupt:
print("")
finally:
print("Closing Loop")
cancel_tasks()
firstWorker.py
import asyncio
def firstWorker(buffer):
print("First Worker Executed")
api_id = 1234567
api_hash = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
app = Client("my_account", api_id, api_hash)
#app.on_message()
async def my_handler(client, message):
print("Message Arrived")
buffer.append(complex_object_conatining_message)
await asyncio.sleep(1)
app.run(secondWorker(buffer)) #here is the trick: I changed the
#method run() of the Client class
#inside the Pyrogram framework
#since it was a loop itself.
#In this way I added another task
#to the existing loop in orther to
#let run both of them together.
my secondWorker.py
import asyncio
async def secondWorker(buffer):
while True:
if len(buffer) > 0:
print(buffer.pop(0))
await asyncio.sleep(1)
The resources to understand the asyncio used in this code can be found here:
Asyncio simple tutorial
Python Asyncio Official Documentation
This tutorial about how to fix classical Asyncio errors

BigQuery: Credentials asked in the automated job

I have a python application/job which is pushing the dataframe to BigQuery. However that job is failing because evidently it is asking for the credentials as show below:
Please visit this URL to authorize this application:
As this is an automated job, I can't click the link and submit the code. Is there any other way to pass the authorization?
I have already setup the service account key in my environment variable / bashrc.
Code:
from datetime import timedelta
import pandas as pd
from io import StringIO
from azure.storage.blob import BlockBlobService
class Transmitter:
def __init__(self):
self.blob_service = BlockBlobService(account_name='xxxx',
account_key='xxxxxxxxxxxxx')
self.dataset_id = 'xxxx'
self.jobQuery = "select JobID, EmailName from xxxxx group by JobID, EmailName"
self.keyDf = pd.read_csv('jobKeys.csv')
def toBigQJobs(self):
jDf = pd.read_gbq(self.jobQuery, project_id='xxxx', dialect='standard')
jDf['Type'] = 'C'
jDf['Category'] = 'other'
for index, row in jDf.iterrows():
for indexA, rowA in self.keyDf.iterrows():
if rowA['Key'] in row['EmailName']:
jDf.loc[index, 'Category'] = rowA['Category']
jDf.loc[index, 'Type'] = rowA['Type']
break
jDf.to_gbq(destination_table='xxxx', project_id='xxxx',
if_exists='replace')
if __name__ == '__main__':
objTransmitter = Transmitter()
objTransmitter.toBigQJobs()
Solution: Add environment variable through os.environ and it worked.

reactivex: how to make a behaviorsubject emit from observable

I'm going to be using rxandroid in an android app. I'm trying to model the behavior right now in rxpy because it was the easiest for me to set up and play with. In the example below, source3 is emitting the correct data; which is a concatenation of an initialization that takes some time and a permanent subscription which I have just faked out. I want the BehaviorSubject because I need the last value immediately for field initialization.
I cannot figure out how to chain the BehaviorSubject on top of source3 so that it emits source 3 while remembering the last value. I have searched the internet for two days and not found a clear direction on this use case. Here is my code, and the question is why I don't get any emissions from the observer.
from rx import Observable, Observer
from rx.subjects import BehaviorSubject
import time, random
def fake_initialization(observer):
time.sleep(5) # It takes some time
observer.on_next("Alpha")
observer.on_completed()
def fake_subscription(observer):
iter = 0 # Subscription emits forever
while True:
observer.on_next("message %02d"%(iter))
time.sleep(random.randrange(2,5))
iter += 1
class PrintObserver(Observer):
def on_next(self, value):
print("Received {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done!")
def on_error(self, error):
print("Error Occurred: {0}".format(error))
source1 = Observable.create(fake_initialization)
source2 = Observable.create(fake_subscription)
source3 = source1 + source2
bsubject = BehaviorSubject(False)
source4 = source3.multicast(bsubject)
source4.connect()
source4.subscribe(PrintObserver())
This was actually a fairly easy answer for someone. I'm posting this in case anyone else ends up in this situation. Admittedly, I didn't read the rxpy page closely enough. You need to add concurrency on your own, presumably because there are so many concurrent solutions in Python. Here is the final working code:
import random
import time
import multiprocessing
from rx import Observable,Observer
from rx.concurrency import ThreadPoolScheduler
from rx.subjects import Subject
class PrintObserver1(Observer):
def on_next(self, value):
print("Received 1 {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done 1!")
def on_error(self, error):
print("Error Occurred: 1 {0}".format(error))
class PrintObserver2(Observer):
def on_next(self, value):
print("Received 2 {0}".format(value))
#bsubject.on_next(value)
def on_completed(self):
print("Done 2!")
def on_error(self, error):
print("Error Occurred: 2 {0}".format(error))
def fake_initialization(observer):
time.sleep(5) # It takes some time
observer.on_next("Alpha")
observer.on_completed()
def fake_subscription(observer):
iter = 0 # Subscription emits forever
while True:
observer.on_next("message %02d"%(iter))
time.sleep(random.randrange(2,5))
iter += 1
optimal_thread_count = multiprocessing.cpu_count()
pool_scheduler = ThreadPoolScheduler(optimal_thread_count)
source1 = Observable.create(fake_initialization).subscribe_on(pool_scheduler)
source2 = Observable.create(fake_subscription).subscribe_on(pool_scheduler)
catted_source = source1 + source2
native_source = Observable.interval(1000)
print native_source,catted_source
#source = source3
subject = Subject()
# native_source = works
# catted_source = not works
subSource = catted_source.subscribe(subject)
#####
subSubject1 = subject.subscribe(PrintObserver1())
subSubject2 = subject.subscribe(PrintObserver2())
time.sleep(30)
subject.on_completed()
subSubject1.dispose()
subSubject2.dispose()
Also note that you have to install the 'futures' package for concurrency to work on Python 2.7.
If you get this error:
from concurrent.futures import ThreadPoolExecutor
ImportError: No module named concurrent.futures
Read this (link is for slightly different error but solution works):
ImportError: No module named concurrent.futures.process

Represent negative timedelta in most basic form

If I create a negative Timedelta for e.g. 0.5 hours, the internal representation looks as follow:
In [2]: pd.Timedelta('-0.5h')
Out[2]: Timedelta('-1 days +23:30:00')
How can I get back a (str) representation of this Timedelta in the form -00:30?
I want to display these deltas and requiring the user to calculate the expression -1 day + something is a bit award.
I can't add comment to you so adding it here. Don't know if this helps but I think you can use python humanize.
import humanize as hm
hm.naturaltime((pd.Timedelta('-0.5h')))
Out:
'30 minutes from now'
Ok, I will live with a hack going trough a date:
sign = ''
date = pd.to_datetime('today')
if delta.total_seconds() < 0:
sign = '-'
date = date - delta
else:
date = date + delta
print '{}{:%H:%M}'.format(sign, date.to_pydatetime())
You can use the components of a Pandas timedelta
import pandas as pd
t = pd.Timedelta('-0.5h')
print t.components
>> Components(days=-1L, hours=23L, minutes=30L, seconds=0L, milliseconds=0L, microseconds=0L, nanoseconds=0L)
You can access each component with
print t.components.days
>> -1
print t.components.hours
>> 23
print t.components.minutes
>> 30
The rest is then formatting.
source
This is a total hack that won't work for Series data, but....
import pandas as pd
import numpy as np
t = pd.Timedelta('-0.5h').components
mins = t.days*24*60 + t.hours*60 + t.minutes
print str(np.sign(mins))[0]+str(divmod(abs(mins), 60)[0]).zfill(2)+':'+str(divmod(abs(mins), 60)[1]).zfill(2)
>> -00:30
I was looking for something similar (see https://github.com/pandas-dev/pandas/issues/17232 )
I'm not sure if it will be implemented in Pandas, so here is a workaround
import pandas as pd
def timedelta2str(td, display_plus=False, format=None):
"""
Parameters
----------
format : None|all|even_day|sub_day|long
Returns
-------
converted : string of a Timedelta
>>> td = pd.Timedelta('00:00:00.000')
>>> timedelta2str(td)
'0 days'
>>> td = pd.Timedelta('00:01:29.123')
>>> timedelta2str(td, display_plus=True, format='sub_day')
'+ 00:01:29.123000'
>>> td = pd.Timedelta('-00:01:29.123')
>>> timedelta2str(td, display_plus=True, format='sub_day')
'- 00:01:29.123000'
"""
td_zero = pd.Timedelta(0)
sign_sep = ' '
if td >= td_zero:
s = td._repr_base(format=format)
if display_plus:
s = "+" + sign_sep + s
return s
else:
s = timedelta2str(-td, display_plus=False, format=format)
s = "-" + sign_sep + s
return s
if __name__ == "__main__":
import doctest
doctest.testmod()