import awswrangler as wr
def main():
database = 'temp'
output_table = 'Output Table'
output_s3_location = "'s3:Location'"
output_s3_storage_format = "'Parquet'" # "'TEXTFILE'" # "'Parquet'"
create_query = "CREATE TABLE " + database + "." + output_table + " \
WITH ( \
format = "+output_s3_storage_format+", \
external_location = "+ output_s3_location +", \
partitioned_by = ARRAY['run_date']) AS "
insert_query = "INSERT INTO "+database+"."+output_table+" "
select_query = """with temp AS
(SELECT CONCAT(period,' ',date1) AS new_Date,text,text1
FROM
(SELECT Substr(CAST(year AS varchar),3,4) AS date1,
test,
text1,
CASE
WHEN period='JAN' THEN 'Jan'
WHEN period='FEB' THEN 'Feb'
WHEN period='MAR' THEN 'Mar'
WHEN period='APR' THEN 'Apr'
WHEN period='MAY' THEN 'May'
WHEN period='JUN' THEN 'Jun'
WHEN period='JUL' THEN 'Jul'
WHEN period='AUG' THEN 'Aug'
WHEN period='SEP' THEN 'Sep'
WHEN period='OCT' THEN 'Oct'
WHEN period='NOV' THEN 'Nov'
WHEN period='DEC' THEN 'Dec'
END AS period
FROM Table_Name
WHERE text1='Car' ) ) ,
temp1 AS
(SELECT temp.* from A left join temp
ON A.value=temp.value)
Select * from temp2";"""
if create:
wr.athena.read_sql_query(sql="DROP TABLE " + database +"." + output_table + ";", database=database, ctas_approach=False)
wr.athena.read_sql_query(sql=create_query+select_query, database=database, ctas_approach=False)
else:
wr.athena.read_sql_query(sql=insert_query+select_query, database=database, ctas_approach=False)
def lambda_handler(event, context):
main()
return "True"
I am getting the below error while writing this script in Lambda function and testing it:
Response:
{
"errorMessage": "An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 24:23: mismatched input '.' expecting {',', ')', 'FROM', 'WHERE', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'UNION', 'EXCEPT', 'INTERSECT'}",
"errorType": "InvalidRequestException",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 134, in lambda_handler\n main()\n",
" File \"/var/task/lambda_function.py\", line 128, in main\n wr.athena.read_sql_query(sql=create_query+select_query, database=database, ctas_approach=False)\n",
" File \"/opt/python/awswrangler/_config.py\", line 263, in wrapper\n return function(**args)\n",
Related
There is a code which has been running since 6 months in production, which runs in a loop for given number of tables and does a redshift copy. It is has been running successfully till 31st October, from 1st November till date it failed (for one particular table; runs fine for others).
## Truncate and execute Copy command.
def ExecuteCopyCommand(TableList):
QueryIdDict = {}
for TableName in TableList:
SourcePath = f's3://{BucketName}/{prefix}'
query = f" truncate table {TableName}; \
copy {TableName} \
from '{SourcePath}' \
iam_role 'abcd' \
delimiter as '.' \
ignoreheader 1 \
dateformat as 'auto' \
timeformat as 'auto' \
Null as 'NULL';"
## Executing truncate and copy command on redshift cluster
try:
response = client.execute_statement(
ClusterIdentifier='redshift-abc',
Database='abc',
SecretArn='arn:aws:secretsmanager:abcd',
Sql= query
)
print(TableName + ": Copy command executed")
print('Query',query)
print('Response',response)
QueryId = response['Id']
QueryIdDict[QueryId] = TableName
DataDict= { 'Level': 'Info',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'redshift-abc',
'TargetSchemaName': str(TableName.split('.')[0]),
'TargetTableName': str(TableName.split('.')[1]),
'ExecutedQuery': query.strip(),
'ExecutedQueryId': str(QueryId),
'Description': 'Copy command executed on redshift and query is in progress.',
'Status': 'Succeeded'
}
DataList.append(DataDict)
time.sleep(1)
except Exception as e:
DataDict= { 'Level': 'Error',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'redshift-abc',
'TargetSchemaName': str(TableName.split('.')[0]),
'TargetTableName': str(TableName.split('.')[1]),
'ExecutedQuery': query.strip(),
'ExecutedQueryId': '',
'Description': f'Fail to execute copy command. Error : {str(e)}',
'Status': 'Failed'
}
DataList.append(DataDict)
print('Error occur in ExecuteCopyCommand block.')
print('Error occur while executing copy command.')
print('TableName : ' + TableName)
print(e)
raise
print('Query dict',QueryIdDict)
return QueryIdDict
The below code fails with the following error:
Main error: Exception: ERROR: could not open relation with OID 591927
Traceback:
test_table: Copy command executed
Query truncate table test_table; copy test_table from 's3://bucket_test/pipeline/test_table/year=2022/month=02/day=28/' iam_role 'arn:aws:iam::xyz:role/Account-B-Glue-Redshift-Cloudwatch' delimiter as '.' ignoreheader 1 dateformat as 'auto' timeformat as 'auto' Null as 'NULL';
Response {'ClusterIdentifier': 'redshift-abc', 'CreatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 363000, tzinfo=tzlocal()), 'Database': 'abc', 'Id': 'abcdcs-4878-446b-80e9-8d544860847a', 'SecretArn': 'arn:aws:secretsmanager:abcd', 'ResponseMetadata': {'RequestId': '690f6542-4e33-4d84-afb8-2f9ebc9af62e', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '690f6542-4e33-4d84-afb8-2f9ebc9af62e', 'content-type': 'application/x-amz-json-1.1', 'content-length': '231', 'date': 'Thu, 10 Nov 2022 06:21:42 GMT'}, 'RetryAttempts': 0}}
Query dict {'abcdcs-4878-446b-80e9-8d544860847a': 'test_table'}
QueryId of executed copy command
{'abcdcs-4878-446b-80e9-8d544860847a': 'test_table'}
Checking executed query status for each table.
test_table: Copy command failed
{'ClusterIdentifier': 'redshift-abc', 'CreatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 363000, tzinfo=tzlocal()), 'Duration': -1, 'Error': 'ERROR: could not open relation with OID 591927', 'HasResultSet': False, 'Id': '9c6cb33c-4878-446b-80e9-8d544860847a', 'QueryString': " truncate table test_table; copy test_table from 's3://bucket_test/pipeline/test_table/year=2022/month=02/day=28/' iam_role '' delimiter as '\x01' ignoreheader 1 dateformat as 'auto' timeformat as 'auto' Null as 'NULL';", 'RedshiftPid': 1073775000, 'RedshiftQueryId': 6553022, 'ResultRows': -1, 'ResultSize': -1, 'SecretArn': 'arn:aws:secretsmanager:abcd', 'Status': 'FAILED', 'UpdatedAt': datetime.datetime(2022, 11, 10, 6, 21, 42, 937000, tzinfo=tzlocal()), 'ResponseMetadata': {'RequestId': 'c77cb319-14d3-42fd-8c34-611dbd5a17b4', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c77cb319-14d3-42fd-8c34-611dbd5a17b4', 'content-type': 'application/x-amz-json-1.1', 'content-length': '890', 'date': 'Thu, 10 Nov 2022 06:22:13 GMT'}, 'RetryAttempts': 0}}
Error occur in CheckQueryStatus block
ERROR: could not open relation with OID 591927
Error occur in main block.
Fail to refresh table in redshift.
{'MessageId': 'eb6338b8-cd1d-5d47-8a63-635e57fee266', 'ResponseMetadata': {'RequestId': '60766afd-c861-5c1d-9d61-311b5282333c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '60766afd-c861-5c1d-9d61-311b5282333c', 'content-type': 'text/xml', 'content-length': '294', 'date': 'Thu, 10 Nov 2022 06:22:26 GMT'}, 'RetryAttempts': 0}}
Email Notification sent to respective e-mail id.
ERROR: could not open relation with OID 591927
The error is raised from the CheckQueryStatus function, that is as follows:
## Check executed query status.
def CheckQueryStatus(QueryIdDict):
InprogressQueryIdList = [key for key in QueryIdDict.keys()]
SucceedTableList = []
## Expected Status of running query
FailStatus = ['ABORTED','FAILED']
InprogressStatus = ['SUBMITTED','PICKED','STARTED']
SucceedStatus = ['FINISHED']
try:
while len(InprogressQueryIdList):
for QueryId in InprogressQueryIdList:
response = client.describe_statement(
Id=QueryId
)
if response['Status'] in SucceedStatus:
SucceedTableList.append(QueryIdDict[QueryId])
InprogressQueryIdList.remove(QueryId)
print('Query Executed Sucessfully : ' + QueryIdDict[QueryId])
SourcePath = f's3://{BucketName}/pipeline/{QueryIdDict[QueryId]}/{PathPrefix}/'
DataDict= { 'Level': 'Info',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'abc',
'TargetSchemaName': str(QueryIdDict[QueryId].split('.')[0]),
'TargetTableName': str(QueryIdDict[QueryId].split('.')[1]),
'ExecutedQuery': '',
'ExecutedQueryId': str(QueryId),
'Description': 'Data loaded successfully in staging table',
'Status': 'Succeed'
}
DataList.append(DataDict)
elif response['Status'] in InprogressStatus:
time.sleep(30)
else:
print(QueryIdDict[QueryId] + ': Copy command failed\n')
print(response)
raise Exception(str(response['Error']))
print('Table refreshed successfully\n')
print(SucceedTableList)
except Exception as e:
SourcePath = f's3://{BucketName}/pipeline/{QueryIdDict[QueryId]}/{PathPrefix}/'
DataDict= { 'Level': 'Error',
'SourceLocation': SourcePath,
'TargetDatabaseName': 'abc',
'TargetSchemaName': str(QueryIdDict[QueryId].split('.')[0]),
'TargetTableName': str(QueryIdDict[QueryId].split('.')[1]),
'ExecutedQuery': '',
'ExecutedQueryId': str(QueryId),
'Description': f'Copy command failed.{response["Error"]}',
'Status': 'Failed'
}
DataList.append(DataDict)
print('Error occur in CheckQueryStatus block')
print(e)
raise
Now:
When I run the same copy command from DBeaver or some other query tool, it works perfectly fine.
When I run this code for other tables, exact same code, it works fine. Only failing for this table.
Created a test table to see if this is not the typical Postgres OID bug, but the error could be replicated.
This has brought me to a state of confusion. Any help?
This error is often caused by stale table info and some other process that is dropping the target table (and possibly recreating a new table of the same name). See similar questions / answers - tracing the cause of "could not open relation with OID" error
I am working on the hr leave odoo 12, xlsx report by department , I have overwrite problem when print xlsx report for more than one department.
here is my code:
############################### for department #########################################
def get_all_date(self, data, empid):
domain = [('state', '=', 'validate'), ('employee_id', '=', empid)]
if data.get('date_from'):
domain.append(('date_from', '>=', data.get('date_from')))
if data.get('date_to'):
domain.append(('date_from', '<=', data.get('date_to')))
print(domain)
leave = self.env['hr.leave'].search(domain)
return leave
def generate_xlsx_report(self, workbook, data, record):
##################### for department ########
res = []
Employee = self.env['hr.employee']
if 'depts' in data:
for department in self.env['hr.department'].browse(data['depts']):
res.append(
{
'dept': department.name,
'data': []
}
)
for emp in Employee.search([('department_id', '=', department.id)]):
res[len(res) - 1]['data'].append(
{
# 'emp': emp.name,
'display': self.get_all_date(data['data'], emp.id)
}
)
sheet = workbook.add_worksheet('Leaves Report')
bold = workbook.add_format({'bold': True, 'align': 'center', 'bg_color': '#fffbed', 'border': True})
format = workbook.add_format({'num_format': 'd-m-yyyy'})
header_row_style = workbook.add_format({'bold': True, 'align': 'center', 'border': True})
format2 = workbook.add_format({'font_size': 10, 'bold': True, 'align': 'center', })
title = workbook.add_format(
{'bold': True, 'align': 'center', 'font_size': 20, 'bg_color': '#f2eee4', 'border': True})
sheet.merge_range('A1:E1', 'Leaves Summary Report', title)
# Header row
# Header row
sheet.set_column(0, 4, 18)
sheet.write(2, 0, 'Department', header_row_style)
sheet.write(3, 1, 'Employee', header_row_style)
sheet.write(3, 2, ' Start date', header_row_style)
sheet.write(3, 3, 'End date', header_row_style)
sheet.write(3, 4, 'Leave Type', header_row_style)
######################### for department #############################
for rows, i in enumerate(res):
print(i)
col=0
sheet.write(rows + 4, col, i['dept'], format2)
#rows+1
for j in i['data']:
print(j)
for rows ,k in enumerate(j['display']):
print(k)
# sheet.write(ro + 3, col, k.department_id.name, format2)
sheet.write(rows + 4, col + 1, k.employee_id.name, format2)
sheet.write(rows + 4, col + 2, k.date_from, format)
sheet.write(rows+ 4, col + 3, k.date_to, format)
sheet.write(rows + 4, col + 4, k.holiday_status_id.name, format2)
rows+ 1
def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if name == 'main':
print(convert_digits("you are a 4king 5shole", 1, 21))
Use this code.
Your problem was in line 39, you add 2 tabs place 1.
def convert_digits(input_string, start_position, end_position):
# The ending index was required as it was not returning the whole sentence
new_string = input_string[:end_position]
newstring = " "
# return new_string
digit_mapping = {
'0': 'ZERO',
'1': 'ONE',
'2': 'TWO',
'3': 'THREE',
'4': 'FOUR',
'5': 'FIVE',
'6': 'SIX',
'7': 'SEVEN',
'8': 'EIGHT',
'9': 'NINE'
}
if start_position >= 1:
if end_position <= len(new_string):
if start_position < end_position:
for index in range(start_position - 1, end_position):
if input_string[index].isdigit():
mapped = digit_mapping[input_string[index]]
newstring += " " + mapped + " "
else:
newstring += input_string[index]
else:
return "INVALID"
else:
return "INVALID"
else:
return "INVALID"
return newstring
if __name__ == '__main__':
print(convert_digits("you are a 4king 5shole", 1, 21))
I want to convert to SQL to dataframe.\
SELECT day,
MAX(id),
MAX(if(device = 'Mobile devices with full browsers', 'mobile', 'pc')),
AVG(replace(replace(search_imprshare, '< 10%', '10'), '%', '') / 100),
REPLACE(SUBSTRING(SUBSTRING_INDEX(add_trackingcode, '_', 1), CHAR_LENGTH(SUBSTRING_INDEX(add_trackingcode, '_', 1 - 1)) + 2), add_trackingcode, '')
FROM MY_TEST_TABLE
GROUP BY day
But I can only do below that.
I don't know how to work on '???'.
df_data= df_data.groupby(['day').agg(
{
'id': np.max,
'device ' : ???,
'percent' : ???,
'tracking' : ???
}
)
How should I do it?
I'll try and keep this as short as possible but I'm trying to convert a formula cell from crystal report to SSRS.
Here is the query:
SELECT
(SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) STR_DATE,
(SELECT END_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE) END_DATE,
DECODE(RT.ORGANIZATION_ID, 104, 'LPD',RT.ORGANIZATION_ID) ORG,
SUBSTR(POV.VENDOR_NAME, 1, 24) VENDOR_NAME,
DECODE(SUBSTR(PHA.SEGMENT1, 2,1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER') PO_TYPE,
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME') PERFORMANCE,
COUNT(*) LINE_COUNT
FROM
APPS.RCV_TRANSACTIONS RT,
APPS.PO_HEADERS_ALL PHA,
APPS.PO_LINES_ALL PLA,
APPS.PO_LINE_LOCATIONS_ALL PLL,
APPS.PO_VENDORS POV
WHERE
RT.ORGANIZATION_ID = 104
AND RT.TRANSACTION_DATE >= (SELECT START_DATE
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_DATE < (SELECT END_DATE + 1
FROM APPS.GL_PERIODS
WHERE PERIOD_TYPE = 'Month'
AND TRUNC(SYSDATE-:Days) BETWEEN START_DATE AND END_DATE)
AND RT.TRANSACTION_TYPE = 'RECEIVE'
AND RT.PO_HEADER_ID = PLL.PO_HEADER_ID
AND RT.PO_LINE_LOCATION_ID = PLL.LINE_LOCATION_ID
AND RT.PO_LINE_ID = PLL.PO_LINE_ID
AND RT.ORGANIZATION_ID = PLL.SHIP_TO_ORGANIZATION_ID
AND PLA.PO_LINE_ID = PLL.PO_LINE_ID
AND PLA.PO_HEADER_ID = PLL.PO_HEADER_ID
AND PHA.PO_HEADER_ID = PLA.PO_HEADER_ID
AND PHA.VENDOR_ID = POV.VENDOR_ID
GROUP BY
DECODE(RT.ORGANIZATION_ID, 104, 'LPD', RT.ORGANIZATION_ID),
SUBSTR(POV.VENDOR_NAME, 1, 24),
DECODE(SUBSTR(PHA.SEGMENT1, 2, 1), 'E', 'EXPENSE', 'e', 'EXPENSE', 'P', 'PRODUCT', 'p', 'PRODUCT', ' OTHER'),
DECODE(SIGN(TRUNC(RT.TRANSACTION_DATE) - TRUNC(NVL(PLL.PROMISED_DATE - 3, PLL.NEED_BY_DATE - 3))), -1, 'LATE', 'ON TIME')
ORDER BY
ORG, VENDOR_NAME, PO_TYPE, PERFORMANCE
In crystal the formula is
SUM({query.LINE_COUNT},{query.PERFORMANCE}) % SUM({query.LINE_COUNT}, {query.PO_TYPE})
This cell basically is just calculating the percentage of on time deliveries and late ones.