Slack automate calendar events - automation

I'm looking for a way to automate the creation of calendar events. I'm part of multiple spaces in my school and they keep on posting some events that are happening on a regular basis.
I was wondering is there's a way to automate these calendar events. I want to write a script with Slack api's that can read the messages from all the spaces I'm part of and scan them to see if there's any event related information and create a new calendar event in my google calendars. I want to run this at the end of the day on all the messages from all the spaces.

from __future__ import print_function
import os
import json
import pprint
import time
import parsedatetime
from datetime import datetime
from datetime import timedelta
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
def get_google_service():
creds = None
SCOPES = ['https://www.googleapis.com/auth/calendar']
if os.path.exists('token.json'):
creds = Credentials.from_authorized_user_file('token.json', SCOPES)
return build('calendar', 'v3', credentials=creds)
def send_google_calendar_invite(service, channel_name, start_time, end_time):
try:
# f = open("template.json", "r")
# template_data = f.read()
template_data = '''
{
"summary": "event_name",
"location": "event_location",
"description": "event_description",
"start": {
"dateTime": "event_start_time",
"timeZone": "America/Los_Angeles"
},
"end": {
"dateTime": "event_end_time",
"timeZone": "America/Los_Angeles"
}
}
'''
template_data = template_data.replace('event_name', channel_name)
template_data = template_data.replace('event_location', channel_name+'-meeting')
template_data = template_data.replace('event_description', channel_name+'-desrpition')
template_data = template_data.replace('event_start_time', start_time)
template_data = template_data.replace('event_end_time', end_time)
json_object = json.loads(template_data)
json_formatted_str = json.dumps(json_object, indent=2)
print(json_formatted_str)
event = service.events().insert(calendarId='primary', body=json_object).execute()
print('Event created: %s' % (event.get('htmlLink')))
except HttpError as error:
print('An error occurred: %s' % error)
def read_slack_messages():
channel_id = "C04QL76V21X"
try:
lastHourDateTime = datetime.now() - timedelta(hours=24)
client = WebClient(token=open("secrets.txt", "r").read())
conversation_history = client.conversations_history(channel=channel_id, oldest=time.mktime(lastHourDateTime.timetuple()))
channel_info_result = client.conversations_info(channel=channel_id)
channel_name = channel_info_result['channel']['name']
conversation_messages = conversation_history["messages"]
print("{} messages found in {}".format(len(conversation_messages), id))
# import pdb; pdb.set_trace();
service = get_google_service()
for message in conversation_messages[:2]:
chat_message = message['text']
try:
cal = parsedatetime.Calendar()
dates = cal.parse(chat_message)
print(dates)
start_time = time.strftime('%Y-%m-%dT%H:%M:%S-%000:00', (dates[0]))
end_time = start_time[:11]+f"{int(start_time[11:13])+1:02}"+start_time[13:]
print(chat_message, ' : ', start_time, ' ||| ', end_time)
send_google_calendar_invite(service, channel_name, start_time, end_time)
except TypeError as e:
print(' : Nope : ', e);
except SlackApiError as e:
print("Error getting conversation: {}".format(e))
if __name__ == '__main__':
read_slack_messages()

Related

ValueError: NaTType does not support timetuple when converting a dataframe to dictionary using to_dict('records')

I'm running this flask app
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS, cross_origin
import json
import pandas as pd
# Create the app object
app = Flask(__name__)
cors = CORS(app, resources= {r"/*": {'origins' : "*"}})
# importing function for calculations
from Record_Matching import Matching
#app.route("/query", methods = ['get'])
#cross_origin()
def query():
# service_account_creds = request.json
query1 = request.args.get('query1', type = str)
query2 = request.args.get('query2', type = str)
querycolumns = request.args.get('querycolumns')
project_id = request.args.get('project_id', type = str)
service_account_creds = request.args.get('service_account')
SS = request.args.get('SS', type = float)
TT = request.args.get('TT', type = float)
result = Matching(query1,query2, SS,TT, service_account_creds, project_id, querycolumns)
return result
if __name__ == "__main__":
app.run(host="localhost", port=8080, debug=True)
and I'm importing the matching function from this python scripts
import pandas as pd
from google.cloud import bigquery
from google.oauth2 import service_account
import recordlinkage
from recordlinkage.preprocessing import phonetic
from pandas.io.json import json_normalize
import uuid
from uuid import uuid4
import random
import string
import json
import ast
# Results to data frame function
def gcp2df(sql, client):
query = client.query(sql)
results = query.result()
return results.to_dataframe()
# Exporting df to bigquery - table parameter example: "dataset.tablename"
# def insert(df, table):
# client = bigquery.Client()
# job_config = bigquery.LoadJobConfig(write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE)
# return client.load_table_from_dataframe(df, table, job_config = job_config)
def pair(df1, df2, TT, querycolumns):
# function to take pair from list and compare:
L = querycolumns
l=len(querycolumns)
p1=0
p2=1
# To generate phonetics we need to make sure all names are in english.
# thus we'll replace non-english words by random english strings
df1[L[p1]] = df1[L[p1]].astype(str)
df2[L[p2]] = df2[L[p2]].astype(str)
for i in range(0,len(df1)):
if df1[L[p1]][i].isascii() == False:
df1[L[p1]][i] = ''.join(random.choices(string.ascii_lowercase, k=5))
for i in range(0,len(df2)):
if df2[L[p2]][i].isascii() == False:
df2[L[p2]][i] = ''.join(random.choices(string.ascii_lowercase, k=5))
compare = recordlinkage.Compare()
df1["phonetic_given_name"] = phonetic(df1[L[p1]], "soundex")
df2["phonetic_given_name"] = phonetic(df2[L[p2]], "soundex")
df1["initials"] = (df1[L[p1]].str[0] + df1[L[p1]].str[-1])
df2["initials"] = (df2[L[p2]].str[0] + df2[L[p2]].str[-1])
indexer = recordlinkage.Index()
indexer.block('initials')
candidate_links = indexer.index(df1, df2)
compare.exact('phonetic_given_name', 'phonetic_given_name', label="phonetic_given_name")
# O(n) a function that uses two pointers to track consecutive pairs for the input list
while p2 <=l:
compare.string(L[p1], L[p2], method='jarowinkler',threshold = TT, label=L[p1])
p1+=2
p2+=2
features = compare.compute(candidate_links,df1, df2)
return features
def Matching(query1,query2, SS,TT, service_account_creds, project_id, querycolumns):
service_account_creds = ast.literal_eval(service_account_creds)
credentials = service_account.Credentials(service_account_creds, service_account_creds['client_email'],
service_account_creds['token_uri'])
job_config = bigquery.LoadJobConfig()
client = bigquery.Client( project = project_id)
SS=int(SS)
TT=float(TT)
df1 = gcp2df("""{}""".format(query1), client)
df2 = gcp2df("""{}""".format(query2), client)
querycolumns = json.loads(querycolumns)
querycolumns = list(querycolumns.values())
features = pair(df1, df2, TT, querycolumns)
features['Similarity_score'] = features.sum(axis=1)
features = features[features['Similarity_score']>=SS].reset_index()
final = features[['level_0', 'level_1']]
final.rename(columns= {'level_0':'df1_index', 'level_1':'df2_index'}, inplace= True)
final['Unique_ID'] = [uuid.uuid4() for _ in range(len(final.index))]
final['Unique_ID'] = final['Unique_ID'].astype(str)
final['Similarity_Score'] = SS
final_duplicates = final['df1_index'].value_counts().max()
# insert(final,"test-ahmed-project.Record_Linkage.Matching_Indices")
message = "Mission accomplished!, your highest number of duplicates is " + str(final_duplicates)
return {'message':message,'final':final.to_dict('records'), 'df1':df1.to_dict('records')}
I'm not sure why when I return df1 as a dictionary it shows ValueError error when I try to to use the function from flask app, but when I run it in a jupytor notebook using the same dataframe that I'm taking from bigquery, it works just fine, so why does it not work on the flask app?
I tried to_dict('record') to convert a dataframe to a dictionary,
it looking online many resources suggest the error exists because the data contains missing values, but it shouldn't be a problem because when I try converting the same dataframe to dictionary in jupyter notebook it works just fine.

Handling errors within loops through exceptions

Tried my first python program to read temp sensor and output to influxdb
Occasionally temp sensor gives error "IndexError: list index out of range" and loop ends
I want loop to wait 15 seconds on this error and then continue the loop (sensor usually corrects itself by then on the next read)
My code:
import os
import glob
import time
import urllib
import urllib2
import httplib
import json
from influxdb import InfluxDBClient
client = InfluxDBClient(host='192.168.1.7', port=8086)
#client.get_list_database()
client.switch_database('influxdb1')
os.system('modprobe w1-gpio')
os.system('modprobe w1-therm')
base_dir = '/sys/devices/w1_bus_master1/'
device_folder = glob.glob(base_dir + '28*')[0]
while True:
device_file = device_folder + '/w1_slave'
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines()
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
******************************************************
which works ok :)
When I edit the code to catch the exception.....
******************************************************
while True:
except IndexError:
time.sleep(15)
continue
device_file = device_folder + '/w1_slave' # store the details
def read_temp_raw():
f = open(device_file, 'r')
lines = f.readlines() # read the device details
f.close()
return lines
def read_temp():
lines = read_temp_raw()
while lines[0].strip()[-3:] != 'YES':
time.sleep(0.2)
lines = read_temp_raw()
equals_pos = lines[1].find('t=')
if equals_pos != -1:
temp_string = lines[1][equals_pos+2:]
temp_c = float(temp_string) / 1000.0
return temp_c
temp = float(read_temp())
json_body = [
{
"measurement": "YOUR_MEASUREMENT",
"tags": {
"Device": "YOUR_DEVICE",
"ID": "YOUR_ID"
},
"fields": {
"outside_temp": temp,
}
}
]
client.write_points(json_body)
time.sleep(60)
************************************************************
I get following error...
File "temptoinfluxdb2.py", line 22
except IndexError:
^
SyntaxError: invalid syntax
Where am i going wrong please?
You will always need to use the except block in combination with a try block.
So the code in the try block is executed until an exception (in that case IndexError) occurs.
try:
# Execution block
except IndexError:
# Error handling
You could also use a more general approach with except Exception as e, which catches not just the IndexError but any exception.
Check the official documentation for further information.

airflow BigQueryOperator ERROR - 400 Syntax error: Unexpected token at [1:244] - while using params

I have 2 BigQueryOperator tasks in a loop. The first task works perfectly, however the second task (create_partition_table_agent_intensity_{v_list[i]}) throws an error:
ERROR - 400 Syntax error: Unexpected "{" at [1:244]
I can't understand what is the difference between the tasks.
Maybe someone can point me to the right direction?
Here is my entire code:
from airflow.models import (DAG, Variable)
import os
from airflow.operators.dummy import DummyOperator
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
import datetime
import json
import pandas as pd
from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
from google.cloud import bigquery
from airflow.contrib.hooks.bigquery_hook import BigQueryHook
from airflow.providers.google.cloud.operators.bigquery import BigQueryDeleteTableOperator
default_args = {
'start_date': datetime.datetime(2020, 1, 1),
}
PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "bigquery_default")
PROJECT_ID_GCP = os.environ.get("GCP_PROJECT_ID", "my_project")
DATASET_MRR = os.environ.get("GCP_BIGQUERY_DATASET_NAME", "LP_RAW")
DATASET_STG = os.environ.get("GCP_BIGQUERY_DATASET_NAME", "LP_STG")
MRR_AGENT_ACTIVITY = "RPT_FA_AGENT_ACTIVITY_VW"
MRR_AGENT_INTENSITY = "RPT_AGG_15M_MSG_AGENT_INTENSITY_VW"
STG_AGENT_ACTIVITY_PARTITIONED = "agent_acitivity_partitioned"
STG_AGENT_INTENSITY_PARTITIONED = "agent_intensity_partitioned"
def list_dates_in_df(ti):
hook = BigQueryHook(bigquery_conn_id=PROJECT_ID,
use_legacy_sql=False)
bq_client = bigquery.Client(project = hook._get_field("project"),
credentials = hook._get_credentials())
query = "select distinct(cast(PARTITION_KEY as string)) as PARTITION_KEY \
FROM LP_MNG.PartitionStatusMonitoring\
where SOURCE_TABLE in ('RPT_FA_AGENT_ACTIVITY_VW','RPT_AGG_15M_MSG_AGENT_INTENSITY_VW')\
and IS_LOAD_COMPLETED = false;"
df = bq_client.query(query).to_dataframe()
res = df.values.tolist()
#unpack the list of lists, l is a list inside res list, take item from res, now each item is l
my_list = [item for l in res for item in l]
ti.xcom_push(key = 'list_of_dates', value = my_list)
def update_variable(ti):
updated_file_list = ti.xcom_pull(key = 'list_of_dates',task_ids='list_dates')
Variable.set(key="updated_dates", value=json.dumps(updated_file_list))
print(updated_file_list)
print(type(updated_file_list))
with DAG(
'test_with_mng_table_list',
schedule_interval=None,
catchup = False,
default_args=default_args
) as dag:
list_dates = PythonOperator(
task_id ='list_dates',
python_callable = list_dates_in_df
)
set_list = PythonOperator(
task_id= 'set_list',
python_callable=update_variable
)
v_list = Variable.get("updated_dates", deserialize_json=True)
end_job = BashOperator(
task_id='end_job',
bash_command='echo end_job.',
trigger_rule = 'all_done', )
for i in range(len(v_list)):
create_partition_table_agent_activity = BigQueryOperator(
task_id=f"create_partition_table_agent_activity_{v_list[i]}",
sql="select ACCOUNT_ID,timestamp_trunc(CHANGE_EVENT_TIME_15M,HOUR) as ANALYSIS_DATE,\
AGENT_ID,AGENT_GROUP_ID,USER_TYPE_ID,\
sum(AWAY_ENGAGED_TIME) AWAY_ENGAGED_TIME,sum(BACKIN5_ENGAGED_TIME) BACKIN5_ENGAGED_TIME,\
sum(DURATION_DAYS) DURATION_DAYS,sum(ONLINE_TIME) ONLINE_TIME,\
sum(BACK_IN_5_TIME) BACK_IN_5_TIME,sum(AWAY_TIME) AWAY_TIME\
from {{ params.PROJECT_ID }}.{{ params.DATASET_MRR }}.{{ params.MRR1 }}\
where cast(CHANGE_EVENT_TIME_15M as STRING FORMAT 'YYYY-MM-DD') = cast('{{ params.date_a }}' as STRING) \
group by 1,2,3,4,5;",
params={"PROJECT_ID":PROJECT_ID_GCP ,
"DATASET_MRR":DATASET_MRR,
"MRR1":MRR_AGENT_ACTIVITY,
"date_a" : v_list[i]
},
destination_dataset_table=f"{PROJECT_ID_GCP}.{DATASET_STG}.{STG_AGENT_ACTIVITY_PARTITIONED}{v_list[i]}",
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_TRUNCATE',
#bigquery_conn_id=CONNECTION_ID,
use_legacy_sql=False,
dag=dag
)
create_partition_table_agent_intensity = BigQueryOperator(
task_id=f"create_partition_table_agent_intensity_{v_list[i]}",
sql=f"select ACCOUNT_ID,timestamp_trunc(AGG_DATE,HOUR) as ANALYSIS_DATE,\
AGENT_ID, GROUP_ID as AGENT_GROUP_ID,\
USER_TYPE_ID, SUM(SUM_CONVERSATION_LOAD_RATE) as SUM_CONVERSATION_LOAD_RATE,\
SUM(NO_EVENTS) AS NO_EVENTS\
from {{ params.PROJECT_ID }}.{{ params.DATASET_MRR }}.{{ params.MRR2 }}\
where cast(AGG_DATE as STRING FORMAT 'YYYY-MM-DD') = cast('{{ params.date_a }}' as STRING) \
group by 1,2,3,4,5;",
params={"PROJECT_ID":PROJECT_ID_GCP ,
"DATASET_MRR":DATASET_MRR,
"MRR2":MRR_AGENT_INTENSITY,
"date_a" : v_list[i]
},
destination_dataset_table=f"{PROJECT_ID_GCP}.{DATASET_STG}.{STG_AGENT_INTENSITY_PARTITIONED}{v_list[i]}",
create_disposition='CREATE_IF_NEEDED',
write_disposition='WRITE_TRUNCATE',
#bigquery_conn_id=CONNECTION_ID,
use_legacy_sql=False,
dag=dag
)
d2 = DummyOperator(task_id='generate_data_{0}'.format(v_list[i]),dag=dag)
list_dates >> set_list >> [
create_partition_table_agent_activity,create_partition_table_agent_intensity
] >> d2 >> end_job
I do not have playground to test it, but I think you should not use f-string for sql parameter. If you use {{something}} in f-string it returns string {something} so parameters for query are not inserted and this results in SQL syntax error as query is run without parameters. Please try to remove f before string for sql in 2nd task.

Using Json Input Variables In Airflow EMR Operator Steps

I'm currently following the template given here: https://github.com/apache/airflow/blob/master/airflow/contrib/example_dags/example_emr_job_flow_manual_steps.py to create a DAG to call for a emr instance using spark submit. When setting up the spark_test_steps, I need to include variables passed in from a POST Json to fill the spark submit like below:
SPARK_TEST_STEPS = [
{
'Name': 'calculate_pi',
'ActionOnFailure': 'CONTINUE',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': [
'/usr/lib/spark/bin/run-example',
'SparkPi',
kwargs['dag_run'].conf['var_1']
kwargs['dag_run'].conf['var_2']
'10'
]
}
}
]
How can I pass in variables given by the POST Json while still following the format given in the git link to look like below?
from datetime import timedelta
import airflow
from airflow import DAG
from airflow.contrib.operators.emr_create_job_flow_operator import EmrCreateJobFlowOperator
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
from airflow.contrib.sensors.emr_step_sensor import EmrStepSensor
from airflow.contrib.operators.emr_terminate_job_flow_operator import EmrTerminateJobFlowOperator
DEFAULT_ARGS = {
'owner': 'Airflow',
'depends_on_past': False,
'start_date': airflow.utils.dates.days_ago(2),
'email': ['airflow#example.com'],
'email_on_failure': False,
'email_on_retry': False
}
dag = DAG(
'emr_job_flow_manual_steps_dag',
default_args=DEFAULT_ARGS,
dagrun_timeout=timedelta(hours=2),
schedule_interval='0 3 * * *'
)
var_1 = ''
var_2 = ''
SPARK_TEST_STEPS = []
def define_param(**kwargs):
global var_1
global var_2
global SPARK_TEST_STEPS
var_1 = str(kwargs['dag_run'].conf['var_1'])
var_2 = str(kwargs['dag_run'].conf['var_2'])
SPARK_TEST_STEPS = [
{
'Name': 'calculate_pi',
'ActionOnFailure': 'CONTINUE',
'HadoopJarStep': {
'Jar': 'command-runner.jar',
'Args': [
'/usr/lib/spark/bin/run-example',
'SparkPi',
kwargs['dag_run'].conf['var_1']
kwargs['dag_run'].conf['var_2']
'10'
]
}
}
]
return SPARK_TEST_STEPS
DEFINE_PARAMETERS = PythonOperator(
task_id='DEFINE_PARAMETERS',
python_callable=define_param,
provide_context=True,
dag=dag)
cluster_creator = EmrCreateJobFlowOperator(
task_id='create_job_flow',
job_flow_overrides=JOB_FLOW_OVERRIDES,
aws_conn_id='aws_default',
emr_conn_id='emr_default',
dag=dag
)
step_adder = EmrAddStepsOperator(
task_id='add_steps',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
steps='{{ ti.xcom_pull(task_ids="DEFINE_PARAMETERS") }}',
dag=dag
)
step_checker = EmrStepSensor(
task_id='watch_step',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_remover = EmrTerminateJobFlowOperator(
task_id='remove_cluster',
job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
aws_conn_id='aws_default',
dag=dag
)
cluster_creator.set_downstream(step_adder)
step_adder.set_downstream(step_checker)
step_checker.set_downstream(cluster_remover)
I cannot use Variable.get and Variable.set as this will not allow multiple dag calls for different variable types at the same time due to the constant changing of airflow global variables. I have tried calling SPARK_TEST_STEPS using xcom but the return type of xcom is string and EmrAddStepsOperator steps requires a list.
I solved a similar problem by creating a custom operator that parses the json prior to executing. The cause of the problem is that when you pass steps='{{ ti.xcom_pull(task_ids="DEFINE_PARAMETERS") }}',. you are literally passing a string with the value interpolated by the templating engine, it is not deserialized.
from airflow.contrib.hooks.emr_hook import EmrHook
from airflow.exceptions import AirflowException
from airflow.models import BaseOperator
from airflow.utils.decorators import apply_defaults
from airflow.contrib.operators.emr_add_steps_operator import EmrAddStepsOperator
import json
class DynamicEmrStepsOperator(EmrAddStepsOperator):
template_fields = ['job_flow_id', 'steps']
template_ext = ()
ui_color = '#f9c915'
#apply_defaults
def __init__(
self,
job_flow_id=None,
steps="[]",
*args, **kwargs):
super().__init__(
job_flow_id = job_flow_id,
steps = steps,
*args, **kwargs)
def execute(self, context):
self.steps = json.loads(self.steps)
return super().execute(context)

How to troubleshot 'timeout' error on Airflow

I have a new DAG that performs a 3 task operation, the DAG runs fine but every now and then i'm getting a 'timeout' error massage on the top in red. i have now idea why this is.
Does anybody knows what can be the cause ?
Here is my code (with a few parameters i changed for discretion reasons):
from airflow import DAG
from airflow.operators.mysql_operator import MySqlOperator
from datetime import datetime
from airflow.operators.sensors import NamedHivePartitionSensor
from airflow.hooks.presto_hook import PrestoHook
import sys
import os
import logging
sys.path.append(os.environ['SSSSSS'] + '/WWW/WWWWW')
from utils import sql_to_string, parse_exec_to_time, parse_exec_to_date, NewPrestoOperator
from config import emails
from NotifyOperator import NotifyOperator
########################################################################
# Parameters to be set
default_args = {
'owner': 'etl',
'start_date': datetime(2019, 04, 15, 0, 0),
'depends_on_past': True,
'wait_for_downstream': True,
'email': data_team_emails,
'email_on_failure': True,
'email_on_retry': False
}
dag = DAG(dag_id='g13-new_lead_form_alert',
default_args=default_args,
max_active_runs=1,
schedule_interval='0 * * * *')
def _get_records_pandas(query):
start_time = datetime.now()
logging.log(logging.INFO, "Extract Query={}".format(query))
records = PrestoHook(presto_conn_id='{0}-new'.format(os.environ['YYYYY'])).get_pandas_df(query)
logging.log(logging.INFO, "Extract completed. it took:{}".format(str(datetime.now() - start_time)))
return records
SELECT_ALL_QUERY = 'select title, pageloadid from mysql.{0}.agg_pageloadid_lead_form'.format(os.environ['DDDDDD'])
t0 = NamedHivePartitionSensor(task_id='g13-00-wait_for_partition',
partition_names=['{2}.table/dt={0}/tm={1}/'.format(
'{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%Y-%m-%d\')}}',
'{{ (execution_date + macros.timedelta(minutes=60)).strftime(\'%H\')}}',
os.environ['XXXXX'])],
metastore_conn_id='RRRRRR',
dag=dag,
soft_fail=True,
pool='sensor_tasks',
retries=5
)
t1 = MySqlOperator(
task_id='g13-01-truncate',
sql='''
truncate table {0}.agg_pageloaduid_lead_form
'''.format(os.environ['LLLLL']),
mysql_conn_id='AAAA',
dag=dag)
t2 = NewPrestoOperator(
task_id="g13-02-insert_new_lead",
sql=sql_to_string("/g13_insert_new_lead.sql").format(
os.environ['YYYYY'],
'{{execution_date.strftime(\'%Y-%m-%d\')}}',
'{{execution_date.strftime(\'%H\')}}',
os.environ['ETL_ENVIRONMENT']),
presto_conn_id='{0}-new'.format(os.environ['XXXXX']),
provide_context=True,
fail_on_zero_rows=False,
retries=5,
retry_delay=60,
pool='presto_tasks',
dag=dag
)
t3 = NotifyOperator(
task_id='g13-03-notification',
channels=['test'],
email_recipients=[],
email_subject='New Lead Alert',
email_template="""abc""",
op_kwargs={
'title': 'New Lead Form',
'response': _get_records_pandas(SELECT_ALL_QUERY)
},
dag=dag
)
t0 >> t1 >> t2 >> t3
Any idea what could be causing this ?