SQL QUALIFY equivalent HIVE query - sql

I'm trying to create a HIVE query from an Oracle SQL query. Essentially I want to select the first record, sorted descending by UPDATED_TM, DATETIME, ID_NUM.
SELECT
tbl1.NUM AS ID,
tbl1.UNIT AS UNIT,
tbl2.VALUE AS VALUE,
tbl1.CONTACT AS CONTACT_NAME,
'FILE' AS SOURCE,
CURDATE() AS DATE
FROM
DB1.TBL1 tbl1
LEFT JOIN DB1.TBL2 tbl2 ON tbl1.USR_ID = tbl2.USR_ID
WHERE
tbl1.UNIT IS NOT NULL
AND tbl1.TYPE = 'Generic'
QUALIFY
ROW_NUMBER() OVER (PARTITION BY tbl1.ROW_ID ORDER BY tbl1.UPDATED_TM DESC, tbl1.DATETIME DESC, tbl1.ID_NUM DESC) = 1
And my attempt at an equivalent Hive query (but also sql-compatible):
SELECT
tbl1.NUM AS ID,
tbl1.UNIT AS UNIT,
tbl2.VALUE AS VALUE,
tbl1.CONTACT AS CONTACT_NAME,
'FILE' AS SOURCE,
CURDATE() AS DATE
FROM (
SELECT
USR_ID, TYPE, NUM, UNIT, ROW_NUMBER() OVER (PARTITION BY tbl1.ROW_ID ORDER BY tbl1.UPDATED_TM DESC, tbl1.DATETIME DESC, tbl1.ID_NUM DESC) AS RNUM
FROM
DB1.TBL1
) tbl1
LEFT JOIN DB1.TBL2 tbl2 ON tbl1.USR_ID = tbl2.USR_ID
WHERE
tbl1.RNUM = 1
AND tbl1.UNIT IS NOT NULL
AND tbl1.TYPE = 'Generic'
Does that seem correct? Is there any way I can optimize the query? The tables I'm working with are quite large and I would like to make this as efficient as possible.
Thanks.

SELECT
tbl1.NUM AS ID,
tbl1.UNIT AS UNIT,
tbl2.VALUE AS VALUE,
tbl1.CONTACT AS CONTACT_NAME,
'FILE' AS SOURCE,
CURDATE() AS DATE
FROM
(
SELECT
USR_ID, TYPE, NUM, UNIT, ROW_NUMBER() OVER (PARTITION BY tbl.ROW_ID ORDER BY tbl.UPDATED_TM DESC, tbl.DATETIME DESC, tbl.ID_NUM DESC) AS RNUM
FROM
(
SELECT
USR_ID,TYPE,NUM,UNIT,ROW_ID,UPDATED_TM,DATETIME,ID_NUM
FROM DB1.TBL1
WHERE UNIT IS NOT NULL
AND TYPE = 'Generic'
)tbl
)tbl1
LEFT OUTER JOIN
DB1.TBL2 tbl2
ON tbl1.USR_ID = tbl2.USR_ID
WHERE tbl1.RNUM = 1;

Related

SAME code with changes in WHERE clause inside a sub query gives different result SQL

I am trying to find the difference between 2 code snippets that are similar since it is giving different results.
In the code below, The count is 15506
select
application_ID,
userid,
product,
sub_product,
ts_est,
event_type from
(
Select
fa.ApplicationId as application_ID,
ro.userid,
ro.product,
ro.sub_product,
ro.ts_est,
ro.event_type,
from_utc_timestamp(fa.bornondate, 'America/New_York') as app_bornondate,
row_number() over(
partition by ro.userid, ro.product, ro.event_type
order by
fa.bornondate desc
) as rowid
from
T1 ro
left join T2 fa on ro.userid = fa.userid
where event_type = "OS"
and cast(ts_est as date) >= '2021-10-05' and cast(ts_est as date) <= '2022-03-07' and product = "pt"
)
where rowid = 1
Here is the 2nd code snippet where I have used the conditions outside the subquery. The count is 15096
select
application_ID,
userid,
product,
sub_product,
ts_est,
event_type from
(
Select
fa.ApplicationId as application_ID,
ro.userid,
ro.product,
ro.sub_product,
ro.ts_est,
ro.event_type,
from_utc_timestamp(fa.bornondate, 'America/New_York') as app_bornondate,
row_number() over(
partition by ro.userid, ro.product, ro.event_type
order by
fa.bornondate desc
) as rowid
from
T1 ro
left join T2 fa on ro.userid = fa.userid
)
where rowid = 1 and cast(ts_est as date) >= '2021-10-05'
and cast(ts_est as date) <= '2022-03-07' and product = "pt" and event_type = "OS"
When I am using a minus operation for the two codes. It is showing 410 differences but the userid is present in both the tables which is strange.
I am new to sql and I'm having trouble taking this further though.
Thanks!

Turn these temp tables into one longer subquery (can't use Temp tables in Power BI)

Currently I have created these temp tables to get the desired output I need. However, Power BI doesn't allow the use of temp tables so I need to get this all into 1 query using inner selects.
drop table if exists #RowNumber
Select Date, ID, ListID
, row_number() over (partition by ID order by ID) as rownum
into #RowNumber
from Table
where Date= cast(getdate()-1 as date)
group by Date, ID, ListID
order by ID
drop table if exists #1stListIDs
select ListID as FirstID, ID, Date
into #1stListIDs
from #RowNumber
where rownum = 1
drop table if exists #2ndlistids
Select ListID as SecondListID, ID, Date
into #2ndlistids
from #RowNumber
where rownum = 2
--Joins the Two Tables back together to allow the listids to be in the same row
drop table if exists #FinalTableWithTwoListIDs
select b.FirstListID, a.SecondListID, a.ID, a.Date
into #FinalTableWithTwoListIDs
from #2ndlistids a
join #1stListIDs b on a.ID= b.ID
order by ID
This code is simple and straight forward. However I can't seem to figure out using a subquery. Here is what I have. It works for the FirstListID select statement, but not the SecondListID portion. I believe this is because you can't reference the inner most select statement with multiple different outer select statements, but I could be wrong.
Select a.ListId as SecondListID, a.ID, a.Date
from (
select a.ListId as FirstListID, a.ID, a.Date
from (
Select Date, ID, ListId
, row_number() over (partition by ID order by ID) as rownum
from Table
where Date = cast(getdate()-1 as date)
group by Date, ID, ListId
order by ID) a
where a.rownum = 1) b
where a.rownum = 2) c
Just to show, for completeness, how you could use CTEs to replace the #temp tables, it would be something along the lines of
with RowNumber as (
select Date, ID, ListID
, row_number() over (partition by ID order by ID) as rownum
from Table
where Date= cast(dateadd(day,-1,getdate()) as date)
group by Date, ID, ListID
),
FirstListIDs as (
select ListID as FirstID, ID, Date
from RowNumber
where rownum = 1
),
SecondListIDs as (
select ListID as SecondID, ID, Date
from RowNumber
where rownum = 2
)
select f.FirstID, s.SecondID, s.ID, s.Date
from Secondlistids s
join FirstListIDs f on s.ID=f.ID
order by s.ID
Note the use of dateadd which is recommended over the ambiguousdate +/- value assumed to be days, and where relevant meaningful table aliases.
You could do it with a CTE and joining the two together, but that is inefficient and unnecessary.
It looks like you just need LAG to get the previous ListID
I note that PARTITION BY ID ORDER BY ID is non-deterministic and the ordering will be random. I strongly suggest you find a deterministic ordering.
SELECT
PrevID AS b.FirstListID,
ListID AS a.SecondListID,
ID,
Date
FROM (
SELECT
Date,
ID,
ListID,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID) AS rownum,
LAG(ListID) OVER (PARTITION BY ID ORDER BY ID) AS PrevID
from [Table]
where Date = cast(getdate() - 1 as date)
group by Date, ID, ListID
) AS WithRowAndLag
WHERE rownum = 2;
ORDER BY ID;

Display Prev and Current value based on a ID - SQL

I am not sure if a similar question has been posted. I was unable to find one.
I have the following table:
What I trying to get is the below:
Any advice will be appreciated.
Thanks in advance,
Sam
Worked both in Oracle and Snowflake:
SELECT t.ID,
t.prev_dt,
t.current_dt,
t.prev_code,
t.curr_code
FROM (
SELECT id,
order_dt,
LAG(order_dt, 1) OVER (PARTITION by id ORDER BY id, order_dt) prev_dt,
upd_dt current_dt,
LAG(code, 1) OVER (PARTITION by id ORDER BY id, upd_dt) prev_code,
code curr_code
FROM t111
) t
INNER JOIN (
SELECT id,
MAX(order_dt) max_date
FROM t111
GROUP BY id
) idm
ON idm.id=t.id AND t.order_dt=idm.max_date
You seem to want window function lag():
select
id,
lag(order_dt) over(partition by id order by order_by_id) prev_dt,
order_dt current_dt,
lag(code) over(partition by id order by order_by_id) prev_code,
code curr_code
from mytable
Note that the above query does not filter the records of the table. When there is no preceeding record, lag() returns null. If you want to filter out the first record per group, and assuming that such record is identify by order_by_id = 1, you can do:
select *
from (
select
id,
lag(order_dt) over(partition by id order by order_by_id) prev_dt,
order_dt current_dt,
lag(code) over(partition by id order by order_by_id) prev_code,
code curr_code,
order_by_id
from mytable
) t
where order_by_id > 1
Window functions might be the best approach. But you could also use join:
select t1.id, t1.order_dt as prev_dt, t2.upd_dt as curr_date,
t1.code as prev_code, t2.code as curr_code
from t t1 join
t t2
on t1.id = t2.id and t1.order_by_id = 1 and t2.order_by_id = 2;
In Snowflake, I simply do not know whether this would have better, worse, or similar performance to using window functions.

Query which is not showing the desired result i.e order by date desc

Oracle query that is used to select data from the tables.But it is not showing date descending.
WITH cte AS
(
SELECT DISTINCT cmoa.adate,
subject,
TYPE,
to_char(cmoa.adate, 'DD/MM/YYYY') awarddate,
(SELECT name
FROM dopprod.cm_awards
WHERE cm_awardsid = cmoa.awards) AS awardname
FROM dopprod.cm_emp_master m
INNER JOIN dopprod.cm_officer_awards cmoa ON m.cm_emp_masterid = cmoa.name
WHERE m.cm_emp_masterid = '" + empcode + "'
)
SELECT row_number() OVER (ORDER BY awarddate) AS sno,
subject,
TYPE,
awarddate,
awardname
FROM dopprod.cte
ORDER BY sno
Why would it show dates in descending order, if you didn't tell it so? Shouldn't it be
order by cmoa.adate DESC --> your code is missing DESC
Try below - you need to specify desc in order by
WITH cte
AS (SELECT DISTINCT cmoa.adate,
subject,
TYPE,
TO_CHAR (cmoa.adate, 'DD/MM/YYYY') AwardDate,
(SELECT Name
FROM dopprod.cm_awards
WHERE cm_awardsid = cmoa.awards)
AS awardname
FROM dopprod.CM_EMP_MASTER m
INNER JOIN dopprod.cm_officer_awards cmoa
ON m.cm_emp_masterid = cmoa.name
WHERE m.CM_EMP_MASTERID = 'someid')
SELECT ROW_NUMBER () OVER (ORDER BY cmoa.adate DESC) AS sno,
subject,
TYPE,
AwardDate,
awardname
FROM dopprod.cte
ORDER BY cmoa.adate desc
Why not just do this?
SELECT rownum AS sno,
subject, TYPE, awarddate, awardname
FROM dopprod.cte
ORDER BY adate DESC
If you want to sort by the date value, you need to include the date column, not the string column in the order by.

Oracle SQL query result into a temporary table for use in a sub query

I want to create a temporary table which is inturn derived from a query to be used in another sub-query so as to simplify the rownum() and partition by condition. The query I have entered is as below but it returns an error t.trlr_num invalid identifier.
with t as
(select distinct
ym.trlr_num,
ym.arrdte,
ri.invnum,
ri.supnum
from rcvinv ri, yms_ymr ym
where ym.trlr_cod='RCV'
and ri.trknum = ym.trlr_num
and ym.wh_id <=50
and ym.trlr_stat in ('C','CI','R','OR')
and ym.arrdte is not null
order by ym.arrdte desc
)
select trlr_number, invnum, supnum
from
(
select
t.trlr_num, t.invnum, t.supnum,
row_number() over (partition by t.trlr_number,t.invnum order by t.arrdte) as rn
from t
)
where rn = 1;
From above, I put a condition to create a table t as a temporary table to be used in the below select statement. But is seems to error out with invalid identifier.
seems typo, replace trlr_number with trlr_num and it work
with t as
(select distinct
ym.trlr_num,
ym.arrdte,
ri.invnum,
ri.supnum
from rcvinv ri, yms_ymr ym
where ym.trlr_cod='RCV'
and ri.trknum = ym.trlr_num
and ym.wh_id <=50
and ym.trlr_stat in ('C','CI','R','OR')
and ym.arrdte is not null
order by ym.arrdte desc
)
select trlr_num, invnum, supnum
from
(
select
t.trlr_num, t.invnum, t.supnum,
row_number() over (partition by t.trlr_num,t.invnum order by t.arrdte) as rn
from t
)
where rn = 1;
You could use multiple subqueries in the WITH clause as separate temporary tables. It would be nice and easy to understand:
WITH t AS
(SELECT DISTINCT ym.trlr_num trlr_num,
ym.arrdte arrdte,
ri.invnum invnum,
ri.supnum supnum
FROM rcvinv ri,
yms_ymr ym
WHERE ym.trlr_cod ='RCV'
AND ri.trknum = ym.trlr_num
AND ym.wh_id <=50
AND ym.trlr_stat IN ('C','CI','R','OR')
AND ym.arrdte IS NOT NULL
),
t1 AS (
SELECT t.trlr_num,
t.arrdte,
t.invnum,
t.supnum,
row_number() OVER (PARTITION BY t.trlr_num, t.invnum ORDER BY t.trlr_num, t.invnum DESC) rn
FROM t
)
SELECT trlr_num, arrdte, invnum, supnum
FROM t1
WHERE rn = 1;