Not able to populate Athena script in AWS Lambda

Not able to populate Athena script in AWS Lambda - sql

import awswrangler as wr
def main():
database = 'temp'
output_table = 'Output Table'
output_s3_location = "'s3:Location'"
output_s3_storage_format = "'Parquet'" # "'TEXTFILE'" # "'Parquet'"
create_query = "CREATE TABLE " + database + "." + output_table + " \
WITH ( \
format = "+output_s3_storage_format+", \
external_location = "+ output_s3_location +", \
partitioned_by = ARRAY['run_date']) AS "
insert_query = "INSERT INTO "+database+"."+output_table+" "
select_query = """with temp AS
(SELECT CONCAT(period,' ',date1) AS new_Date,text,text1
FROM
(SELECT Substr(CAST(year AS varchar),3,4) AS date1,
test,
text1,
CASE
WHEN period='JAN' THEN 'Jan'
WHEN period='FEB' THEN 'Feb'
WHEN period='MAR' THEN 'Mar'
WHEN period='APR' THEN 'Apr'
WHEN period='MAY' THEN 'May'
WHEN period='JUN' THEN 'Jun'
WHEN period='JUL' THEN 'Jul'
WHEN period='AUG' THEN 'Aug'
WHEN period='SEP' THEN 'Sep'
WHEN period='OCT' THEN 'Oct'
WHEN period='NOV' THEN 'Nov'
WHEN period='DEC' THEN 'Dec'
END AS period
FROM Table_Name
WHERE text1='Car' ) ) ,
temp1 AS
(SELECT temp.* from A left join temp
ON A.value=temp.value)
Select * from temp2";"""
if create:
wr.athena.read_sql_query(sql="DROP TABLE " + database +"." + output_table + ";", database=database, ctas_approach=False)
wr.athena.read_sql_query(sql=create_query+select_query, database=database, ctas_approach=False)
else:
wr.athena.read_sql_query(sql=insert_query+select_query, database=database, ctas_approach=False)
def lambda_handler(event, context):
main()
return "True"
I am getting the below error while writing this script in Lambda function and testing it:
Response:
{
"errorMessage": "An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 24:23: mismatched input '.' expecting {',', ')', 'FROM', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'UNION', 'EXCEPT', 'INTERSECT'}",
"errorType": "InvalidRequestException",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 134, in lambda_handler\n main()\n",
" File \"/var/task/lambda_function.py\", line 128, in main\n wr.athena.read_sql_query(sql=create_query+select_query, database=database, ctas_approach=False)\n",
" File \"/opt/python/awswrangler/_config.py\", line 263, in wrapper\n return function(**args)\n",

Related

Redshift Copy Command Errors: could not open relation with OID 591923 through AWS Glue Spark

There is a code which has been running since 6 months in production, which runs in a loop for given number of tables and does a redshift copy. It is has been running successfully till 31st October, from 1st November till date it failed (for one particular table; runs fine for others).
## Truncate and execute Copy command.
def ExecuteCopyCommand(TableList):
QueryIdDict = {}
for TableName in TableList:
SourcePath = f's3://{BucketName}/{prefix}'
query = f" truncate table {TableName}; \
copy {TableName} \
from '{SourcePath}' \
iam_role 'abcd' \
delimiter as '.' \
ignoreheader 1 \
dateformat as 'auto' \
timeformat as 'auto' \
Null as 'NULL';"
## Executing truncate and copy command on redshift cluster
try:
response = client.execute_statement(
ClusterIdentifier='redshift-abc',
Database='abc',
SecretArn='arn:aws:secretsmanager:abcd',
Sql= query
)
print(TableName + ": Copy command executed")
print('Query',query)
print('Response',response)
QueryId = response['Id']
QueryIdDict[QueryId] = TableName
DataDict= { 'Level': 'Info',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'redshift-abc',
'TargetSchemaName': str(TableName.split('.')[0]),
'TargetTableName': str(TableName.split('.')[1]),
'ExecutedQuery': query.strip(),
'ExecutedQueryId': str(QueryId),
'Description': 'Copy command executed on redshift and query is in progress.',
'Status': 'Succeeded'
}
DataList.append(DataDict)
time.sleep(1)
except Exception as e:
DataDict= { 'Level': 'Error',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'redshift-abc',
'TargetSchemaName': str(TableName.split('.')[0]),
'TargetTableName': str(TableName.split('.')[1]),
'ExecutedQuery': query.strip(),
'ExecutedQueryId': '',
'Description': f'Fail to execute copy command. Error : {str(e)}',
'Status': 'Failed'
}
DataList.append(DataDict)
print('Error occur in ExecuteCopyCommand block.')
print('Error occur while executing copy command.')
print('TableName : ' + TableName)
print(e)
raise
print('Query dict',QueryIdDict)
return QueryIdDict
The below code fails with the following error:
Main error: Exception: ERROR: could not open relation with OID 591927
Traceback:
test_table: Copy command executed
Query truncate table test_table; copy test_table from 's3://bucket_test/pipeline/test_table/year=2022/month=02/day=28/' iam_role 'arn:aws:iam::xyz:role/Account-B-Glue-Redshift-Cloudwatch' delimiter as '.' ignoreheader 1 dateformat as 'auto' timeformat as 'auto' Null as 'NULL';
Response {'ClusterIdentifier': 'redshift-abc', 'CreatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 363000, tzinfo=tzlocal()), 'Database': 'abc', 'Id': 'abcdcs-4878-446b-80e9-8d544860847a', 'SecretArn': 'arn:aws:secretsmanager:abcd', 'ResponseMetadata': {'RequestId': '690f6542-4e33-4d84-afb8-2f9ebc9af62e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '690f6542-4e33-4d84-afb8-2f9ebc9af62e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '231', 'date': 'Thu, 10 Nov 2022 06:21:42 GMT'}, 'RetryAttempts': 0}}
Query dict {'abcdcs-4878-446b-80e9-8d544860847a': 'test_table'}
QueryId of executed copy command
{'abcdcs-4878-446b-80e9-8d544860847a': 'test_table'}
Checking executed query status for each table.
test_table: Copy command failed
{'ClusterIdentifier': 'redshift-abc', 'CreatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 363000, tzinfo=tzlocal()), 'Duration': -1, 'Error': 'ERROR: could not open relation with OID 591927', 'HasResultSet': False, 'Id': '9c6cb33c-4878-446b-80e9-8d544860847a', 'QueryString': " truncate table test_table; copy test_table from 's3://bucket_test/pipeline/test_table/year=2022/month=02/day=28/' iam_role '' delimiter as '\x01' ignoreheader 1 dateformat as 'auto' timeformat as 'auto' Null as 'NULL';", 'RedshiftPid': 1073775000, 'RedshiftQueryId': 6553022, 'ResultRows': -1, 'ResultSize': -1, 'SecretArn': 'arn:aws:secretsmanager:abcd', 'Status': 'FAILED', 'UpdatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 937000, tzinfo=tzlocal()), 'ResponseMetadata': {'RequestId': 'c77cb319-14d3-42fd-8c34-611dbd5a17b4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c77cb319-14d3-42fd-8c34-611dbd5a17b4', 'content-type': 'application/x-amz-json-1.1', 'content-length': '890', 'date': 'Thu, 10 Nov 2022 06:22:13 GMT'}, 'RetryAttempts': 0}}
Error occur in CheckQueryStatus block
ERROR: could not open relation with OID 591927
Error occur in main block.
Fail to refresh table in redshift.
{'MessageId': 'eb6338b8-cd1d-5d47-8a63-635e57fee266', 'ResponseMetadata': {'RequestId': '60766afd-c861-5c1d-9d61-311b5282333c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '60766afd-c861-5c1d-9d61-311b5282333c', 'content-type': 'text/xml', 'content-length': '294', 'date': 'Thu, 10 Nov 2022 06:22:26 GMT'}, 'RetryAttempts': 0}}
Email Notification sent to respective e-mail id.
ERROR: could not open relation with OID 591927
The error is raised from the CheckQueryStatus function, that is as follows:
## Check executed query status.
def CheckQueryStatus(QueryIdDict):
InprogressQueryIdList = [key for key in QueryIdDict.keys()]
SucceedTableList = []
## Expected Status of running query
FailStatus = ['ABORTED','FAILED']
InprogressStatus = ['SUBMITTED','PICKED','STARTED']
SucceedStatus = ['FINISHED']
try:
while len(InprogressQueryIdList):
for QueryId in InprogressQueryIdList:
response = client.describe_statement(
Id=QueryId
)
if response['Status'] in SucceedStatus:
SucceedTableList.append(QueryIdDict[QueryId])
InprogressQueryIdList.remove(QueryId)
print('Query Executed Sucessfully : ' + QueryIdDict[QueryId])
SourcePath = f's3://{BucketName}/pipeline/{QueryIdDict[QueryId]}/{PathPrefix}/'
DataDict= { 'Level': 'Info',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'abc',
'TargetSchemaName': str(QueryIdDict[QueryId].split('.')[0]),
'TargetTableName': str(QueryIdDict[QueryId].split('.')[1]),
'ExecutedQuery': '',
'ExecutedQueryId': str(QueryId),
'Description': 'Data loaded successfully in staging table',
'Status': 'Succeed'
}
DataList.append(DataDict)
elif response['Status'] in InprogressStatus:
time.sleep(30)
else:
print(QueryIdDict[QueryId] + ': Copy command failed\n')
print(response)
raise Exception(str(response['Error']))
print('Table refreshed successfully\n')
print(SucceedTableList)
except Exception as e:
SourcePath = f's3://{BucketName}/pipeline/{QueryIdDict[QueryId]}/{PathPrefix}/'
DataDict= { 'Level': 'Error',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'abc',
'TargetSchemaName': str(QueryIdDict[QueryId].split('.')[0]),
'TargetTableName': str(QueryIdDict[QueryId].split('.')[1]),
'ExecutedQuery': '',
'ExecutedQueryId': str(QueryId),
'Description': f'Copy command failed.{response["Error"]}',
'Status': 'Failed'
}
DataList.append(DataDict)
print('Error occur in CheckQueryStatus block')
print(e)
raise
Now:
When I run the same copy command from DBeaver or some other query tool, it works perfectly fine.
When I run this code for other tables, exact same code, it works fine. Only failing for this table.
Created a test table to see if this is not the typical Postgres OID bug, but the error could be replicated.
This has brought me to a state of confusion. Any help?

This error is often caused by stale table info and some other process that is dropping the target table (and possibly recreating a new table of the same name). See similar questions / answers - tracing the cause of "could not open relation with OID" error

I am working on the hr leave odoo 12, xlsx report by department , I have overwrite problem when print xlsx report for more than one department

I am working on the hr leave odoo 12, xlsx report by department , I have overwrite problem when print xlsx report for more than one department.
here is my code:
############################### for department #########################################
def get_all_date(self, data, empid):
domain = [('state', '=', 'validate'), ('employee_id', '=', empid)]
if data.get('date_from'):
domain.append(('date_from', '>=', data.get('date_from')))
if data.get('date_to'):
domain.append(('date_from', '<=', data.get('date_to')))
print(domain)
leave = self.env['hr.leave'].search(domain)
return leave
def generate_xlsx_report(self, workbook, data, record):
##################### for department ########
res = []
Employee = self.env['hr.employee']
if 'depts' in data:
for department in self.env['hr.department'].browse(data['depts']):
res.append(
{
'dept': department.name,
'data': []
}
)
for emp in Employee.search([('department_id', '=', department.id)]):
res[len(res) - 1]['data'].append(
{
# 'emp': emp.name,
'display': self.get_all_date(data['data'], emp.id)
}
)
sheet = workbook.add_worksheet('Leaves Report')
bold = workbook.add_format({'bold': True, 'align': 'center', 'bg_color': '#fffbed', 'border': True})
format = workbook.add_format({'num_format': 'd-m-yyyy'})
header_row_style = workbook.add_format({'bold': True, 'align': 'center', 'border': True})
format2 = workbook.add_format({'font_size': 10, 'bold': True, 'align': 'center', })
title = workbook.add_format(
{'bold': True, 'align': 'center', 'font_size': 20, 'bg_color': '#f2eee4', 'border': True})
sheet.merge_range('A1:E1', 'Leaves Summary Report', title)
# Header row
# Header row
sheet.set_column(0, 4, 18)
sheet.write(2, 0, 'Department', header_row_style)
sheet.write(3, 1, 'Employee', header_row_style)
sheet.write(3, 2, ' Start date', header_row_style)
sheet.write(3, 3, 'End date', header_row_style)
sheet.write(3, 4, 'Leave Type', header_row_style)
######################### for department #############################
for rows, i in enumerate(res):
print(i)
col=0
sheet.write(rows + 4, col, i['dept'], format2)
#rows+1
for j in i['data']:
print(j)
for rows ,k in enumerate(j['display']):
print(k)
# sheet.write(ro + 3, col, k.department_id.name, format2)
sheet.write(rows + 4, col + 1, k.employee_id.name, format2)
sheet.write(rows + 4, col + 2, k.date_from, format)
sheet.write(rows+ 4, col + 3, k.date_to, format)
sheet.write(rows + 4, col + 4, k.holiday_status_id.name, format2)
rows+ 1

Showing only none in output instead of changing numeric value to capitilized alphabets

def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if name == 'main':
print(convert_digits("you are a 4king 5shole", 1, 21))

Use this code.
Your problem was in line 39, you add 2 tabs place 1.
def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if __name__ == '__main__':
print(convert_digits("you are a 4king 5shole", 1, 21))

How to SQL conver to dataframe

I want to convert to SQL to dataframe.\
SELECT day,
MAX(id),
MAX(if(device = 'Mobile devices with full browsers', 'mobile', 'pc')),
AVG(replace(replace(search_imprshare, '< 10%', '10'), '%', '') / 100),
REPLACE(SUBSTRING(SUBSTRING_INDEX(add_trackingcode, '_', 1), CHAR_LENGTH(SUBSTRING_INDEX(add_trackingcode, '_', 1 - 1)) + 2), add_trackingcode, '')
FROM MY_TEST_TABLE
GROUP BY day
But I can only do below that.
I don't know how to work on '???'.
df_data= df_data.groupby(['day').agg(
{
'id': np.max,
'device ' : ???,
'percent' : ???,
'tracking' : ???
}
)
How should I do it?

Converting formula from Crystal Reports to SSRS

I'll try and keep this as short as possible but I'm trying to convert a formula cell from crystal report to SSRS.
Here is the query:
SELECT
(SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) STR_DATE,
(SELECT END_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) END_DATE,
DECODE(RT.ORGANIZATION_ID, 104, 'LPD',RT.ORGANIZATION_ID) ORG,
SUBSTR(POV.VENDOR_NAME, 1, 24) VENDOR_NAME,
DECODE(SUBSTR(PHA.SEGMENT1, 2,1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER') PO_TYPE,
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME') PERFORMANCE,
COUNT(*) LINE_COUNT
FROM
APPS.RCV_TRANSACTIONS RT,
APPS.PO_HEADERS_ALL PHA,
APPS.PO_LINES_ALL PLA,
APPS.PO_LINE_LOCATIONS_ALL PLL,
APPS.PO_VENDORS POV
WHERE
RT.ORGANIZATION_ID = 104
AND RT.TRANSACTION_DATE >= (SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_DATE < (SELECT END_DATE + 1
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_TYPE = 'RECEIVE'
AND RT.PO_HEADER_ID = PLL.PO_HEADER_ID
AND RT.PO_LINE_LOCATION_ID = PLL.LINE_LOCATION_ID
AND RT.PO_LINE_ID = PLL.PO_LINE_ID
AND RT.ORGANIZATION_ID = PLL.SHIP_TO_ORGANIZATION_ID
AND PLA.PO_LINE_ID = PLL.PO_LINE_ID
AND PLA.PO_HEADER_ID = PLL.PO_HEADER_ID
AND PHA.PO_HEADER_ID = PLA.PO_HEADER_ID
AND PHA.VENDOR_ID = POV.VENDOR_ID
GROUP BY
DECODE(RT.ORGANIZATION_ID, 104, 'LPD', RT.ORGANIZATION_ID),
SUBSTR(POV.VENDOR_NAME, 1, 24),
DECODE(SUBSTR(PHA.SEGMENT1, 2, 1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER'),
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME')
ORDER BY
ORG, VENDOR_NAME, PO_TYPE, PERFORMANCE
In crystal the formula is
SUM({query.LINE_COUNT},{query.PERFORMANCE}) % SUM({query.LINE_COUNT}, {query.PO_TYPE})
This cell basically is just calculating the percentage of on time deliveries and late ones.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Not able to populate Athena script in AWS Lambda - sql

Related

Redshift Copy Command Errors: could not open relation with OID 591923 through AWS Glue Spark

I am working on the hr leave odoo 12, xlsx report by department , I have overwrite problem when print xlsx report for more than one department

Showing only none in output instead of changing numeric value to capitilized alphabets

How to SQL conver to dataframe

Converting formula from Crystal Reports to SSRS

Categories

Resources