Need help on Hive query

Need help on Hive query - hive

hive> describe hivecustomers;
OK
cust_id int
cust_fname string
cust_lname string
cust_email string
cust_password string
cust_street string
cust_city string
cust_state string
cust_zipcode string
hive> describe hiveorders;
OK
ord_id int
ord_dt string
ord_cust_id int
ord_stat string
hive> select * from hiveorders limit 3;
OK
1 2013-07-25 00:00:00.0 11599 CLOSED
2 2013-07-25 00:00:00.0 256 PENDING_PAYMENT
3 2013-07-25 00:00:00.0 12111 COMPLETE
hive> select * from hivecustomers limit 3;
OK
1 Richard Hernandez XXXXXXXXX XXXXXXXXX 6303 Heather Plaza Brownsville TX 78521
2 Mary Barrett XXXXXXXXX XXXXXXXXX 9526 Noble Embers Ridge Littleton CO 80126
3 Ann Smith XXXXXXXXX XXXXXXXXX 3422 Blue Pioneer Bend Caguas PR 00725
Based on the above two tables, I need output as following in Hive, how can I write query to this job ?
+-----------+---------------+---------------+--------------+-----+
|Cust Name | Cust Address | Total Orders | Order Status |Count|
+-----------+---------------+---------------+--------------+-----+
|Andrew |London |15 |Complete |8 |
|Andrew |London |15 |Pending |3 |
|Andrew |London |15 |Processing |4 |
|Andrew |London |15 |On-Hold |1 |
+-----------+---------------+---------------+--------------+-----+

Finally used your data as a CTE (common table expressions) and tested it. Cuctomer 1 has two PENDING_PAYMENT orders and 4 in total, all others have one order per status and 3 each in total:
with hivecustomers as
(
select 1 as cust_id , 'Richard' as cust_fname union all
select 2 as cust_id , 'Mary' as cust_fname union all
select 3 as cust_id , 'Ann' as cust_fname
),
hiveorders as
(
select 1 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 1 as ord_cust_id, 'CLOSED' as ord_stat union all
select 2 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 1 as ord_cust_id, 'PENDING_PAYMENT' as ord_stat union all
select 3 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 1 as ord_cust_id, 'COMPLETE' as ord_stat union all
select 4 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 1 as ord_cust_id, 'PENDING_PAYMENT' as ord_stat union all
select 21 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 2 as ord_cust_id, 'CLOSED' as ord_stat union all
select 22 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 2 as ord_cust_id, 'PENDING_PAYMENT' as ord_stat union all
select 23 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 2 as ord_cust_id, 'COMPLETE' as ord_stat union all
select 31 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 3 as ord_cust_id, 'CLOSED' as ord_stat union all
select 32 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 3 as ord_cust_id, 'PENDING_PAYMENT' as ord_stat union all
select 33 as ord_id, '2013-07-25 00:00:00.0' as ord_dt, 3 as ord_cust_id, 'COMPLETE' as ord_stat
)
select cust_fname, total_orders, order_status, count(*) Count
from
(
select c.cust_fname,
count(*) over(partition by c.cust_id) as total_orders,
o.ord_stat as Order_Status
from hivecustomers c left join hiveorders o on c.cust_id=o.ord_cust_id
)s
group by cust_fname, total_orders, order_status
Output:
OK
cust_fname total_orders order_status count
Ann 3 CLOSED 1
Ann 3 COMPLETE 1
Ann 3 PENDING_PAYMENT 1
Mary 3 CLOSED 1
Mary 3 COMPLETE 1
Mary 3 PENDING_PAYMENT 1
Richard 4 CLOSED 1
Richard 4 COMPLETE 1
Richard 4 PENDING_PAYMENT 2
Time taken: 40.692 seconds, Fetched: 9 row(s)
Just remove CTE and use normal tables instead.

Related

ROLLUP BY year,week

I am trying to use the ROLLUP command to group my data by year/week, customer_id but I can't seem to get it to work.
Below is my sample data and my attempt. Can someone show me how to make this work
ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'DD-MON-YYYY HH24:MI:SS.FF';
ALTER SESSION SET NLS_DATE_FORMAT = 'DD-MON-YYYY HH24:MI:SS';
CREATE TABLE customers
(CUSTOMER_ID, FIRST_NAME, LAST_NAME) AS
SELECT 1, 'Faith', 'Mazzarone' FROM DUAL UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM DUAL UNION ALL
SELECT 3, 'Micheal', 'Palmice' FROM DUAL UNION ALL
SELECT 4, 'Jerry', 'Torchiano' FROM DUAL;
CREATE TABLE items
(PRODUCT_ID, PRODUCT_NAME, PRICE) AS
SELECT 100, 'Black Shoes', 79.99 FROM DUAL UNION ALL
SELECT 101, 'Brown Pants', 111.99 FROM DUAL UNION ALL
SELECT 102, 'White Shirt', 10.99 FROM DUAL;
CREATE TABLE purchases
(CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE) AS
SELECT 1, 101, 3, TIMESTAMP'2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 1, 100, 1, TIMESTAMP '2022-10-12 19:04:18' FROM DUAL UNION ALL
SELECT 2, 101,1, TIMESTAMP '2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 2, 101, 3, TIMESTAMP '2022-10-17 19:34:58' FROM DUAL UNION ALL
SELECT 2, 102, 3,TIMESTAMP '2022-12-06 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-11 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-17 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-12 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 5;
SELECT
p.customer_id,
c.first_name,
c.last_name,
sum(p.quantity * i.price) total_amt
FROM purchases p,
items i,
customers c
WHERE p.customer_id = c.customer_id
GROUP BY ROLLUP (to_char(p.purchase_date, 'YYYY/IW'),(p.customer_id)));

To me, it looks as
SQL> SELECT p.customer_id,
2 c.first_name,
3 c.last_name,
4 TO_CHAR (p.purchase_date, 'YYYY/IW') year_week,
5 SUM (p.quantity * i.price) total_amt
6 FROM purchases p, items i, customers c
7 WHERE p.customer_id = c.customer_id
8 GROUP BY c.first_name,
9 c.last_name,
10 p.customer_id,
11 ROLLUP (TO_CHAR (p.purchase_date, 'YYYY/IW'))
12 ORDER BY customer_id, year_week;
CUSTOMER_ID FIRST_N LAST_NAME YEAR_WE TOTAL_AMT
----------- ------- --------- ------- ----------
1 Faith Mazzarone 2022/41 811,88
1 Faith Mazzarone 811,88
2 Lisa Saladino 2022/41 202,97
2 Lisa Saladino 2022/42 608,91
2 Lisa Saladino 2022/49 1217,82
2 Lisa Saladino 2022/50 2435,64
2 Lisa Saladino 4465,34
3 Micheal Palmice 2022/49 202,97
3 Micheal Palmice 2022/50 2638,61
3 Micheal Palmice 2022/51 1623,76
3 Micheal Palmice 4465,34
11 rows selected.
SQL>

You have not included a JOIN condition for the items table and, since the first_name and last_name are dependent on the customer_id then, you can aggregate the name components:
SELECT p.customer_id,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.first_name)
END AS first_name,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.last_name)
END AS last_name,
to_char(p.purchase_date, 'YYYY/IW') AS week,
sum(p.quantity * i.price) total_amt
FROM purchases p
INNER JOIN customers c
ON p.customer_id = c.customer_id
INNER JOIN items i
ON p.product_id = i.product_id
GROUP BY
ROLLUP(
p.customer_id,
to_char(p.purchase_date, 'YYYY/IW')
);
Outputs:
CUSTOMER_ID
FIRST_NAME
LAST_NAME
WEEK
TOTAL_AMT
1
Faith
Mazzarone
2022/41
415.96
2
Lisa
Saladino
2022/41
111.99
2
Lisa
Saladino
2022/42
335.97
2
Lisa
Saladino
2022/49
65.94
2
Lisa
Saladino
2022/50
131.88
3
Micheal
Palmice
2022/49
111.99
3
Micheal
Palmice
2022/50
142.87
3
Micheal
Palmice
2022/51
87.92
1
Faith
Mazzarone
null
415.96
2
Lisa
Saladino
null
645.78
3
Micheal
Palmice
null
342.78
null
null
null
null
1404.52
and reversing the ROLLUP:
SELECT p.customer_id,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.first_name)
END AS first_name,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.last_name)
END AS last_name,
to_char(p.purchase_date, 'YYYY/IW') AS week,
sum(p.quantity * i.price) total_amt
FROM purchases p
INNER JOIN customers c
ON p.customer_id = c.customer_id
INNER JOIN items i
ON p.product_id = i.product_id
GROUP BY
ROLLUP(
to_char(p.purchase_date, 'YYYY/IW'),
p.customer_id
);
Outputs:
CUSTOMER_ID
FIRST_NAME
LAST_NAME
WEEK
TOTAL_AMT
1
Faith
Mazzarone
2022/41
415.96
2
Lisa
Saladino
2022/41
111.99
2
Lisa
Saladino
2022/42
335.97
2
Lisa
Saladino
2022/49
65.94
2
Lisa
Saladino
2022/50
131.88
3
Micheal
Palmice
2022/49
111.99
3
Micheal
Palmice
2022/50
142.87
3
Micheal
Palmice
2022/51
87.92
null
null
null
2022/41
527.95
null
null
null
2022/42
335.97
null
null
null
2022/49
177.93
null
null
null
2022/50
274.75
null
null
null
2022/51
87.92
null
null
null
null
1404.52
fiddle

In general the two other answer are OK, however the GROUP BY is wrong.
Consider this sample data:
INSERT INTO PURCHASES (CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE)
SELECT 2, 102, 3,TIMESTAMP '2022-12-26 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-21 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-27 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-22 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 15;
SELECT p.customer_id,
TO_CHAR(p.purchase_date, 'YYYY/IW') AS WEEK,
SUM(p.quantity * i.price) total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY ROLLUP(p.customer_id, TO_CHAR(p.purchase_date, 'YYYY/IW'))
ORDER BY 2;
returns
+-----------------------------+
|CUSTOMER_ID|WEEK |TOTAL_AMT|
+-----------------------------+
|3 |2022/51|155.95 |
|2 |2022/52|65.94 |
|3 |2022/52|142.87 |
|3 |2023/01|131.88 |
|2 |2023/01|98.91 |
|3 |2023/02|175.84 |
|3 |2023/03|131.88 |
|2 |2023/52|32.97 |
|3 |2023/52|43.96 |
|2 | |197.82 |
|3 | |782.38 |
| | |980.2 |
+-----------------------------+
But I guess you are rather looking for this:
SELECT p.customer_id,
TO_CHAR(trunc(p.purchase_date, 'IW'), 'IYYY/IW') AS WEEK,
SUM(p.quantity * i.price) total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY ROLLUP(p.customer_id, TRUNC(p.purchase_date, 'IW'))
ORDER BY 2;
+-----------------------------+
|CUSTOMER_ID|WEEK |TOTAL_AMT|
+-----------------------------+
|3 |2022/51|155.95 |
|2 |2022/52|98.91 |
|3 |2022/52|186.83 |
|2 |2023/01|98.91 |
|3 |2023/01|131.88 |
|3 |2023/02|175.84 |
|3 |2023/03|131.88 |
| | |980.2 |
|3 | |782.38 |
|2 | |197.82 |
+-----------------------------+
See difference with this query:
SELECT DISTINCT
TO_CHAR(purchase_date, 'YYYY-MM-DD') AS purchase_date,
TO_CHAR(TRUNC(purchase_date, 'IW'), 'IYYY-"W"IW') AS ISO_WEEK,
TO_CHAR(purchase_date, 'YYYY/IW') AS WEEK
FROM purchases p
ORDER BY purchase_date
+------------------------------+
|PURCHASE_DATE|ISO_WEEK|WEEK |
+------------------------------+
|2022-12-21 |2022-W51|2022/51|
|2022-12-24 |2022-W51|2022/51|
|2022-12-26 |2022-W52|2022/52|
|2022-12-27 |2022-W52|2022/52|
|2022-12-28 |2022-W52|2022/52|
|2022-12-30 |2022-W52|2022/52|
|2023-01-01 |2022-W52|2023/52| <- Note the difference
|2023-01-03 |2023-W01|2023/01|
|2023-01-05 |2023-W01|2023/01|
|2023-01-07 |2023-W01|2023/01|
|2023-01-09 |2023-W02|2023/02|
|2023-01-11 |2023-W02|2023/02|
|2023-01-13 |2023-W02|2023/02|
|2023-01-15 |2023-W02|2023/02|
|2023-01-17 |2023-W03|2023/03|
|2023-01-19 |2023-W03|2023/03|
|2023-01-21 |2023-W03|2023/03|
+------------------------------+

How to filter my table based on this specific date criteria?

I am using SQL Server 2014. Below is an extract of Table t1:
rownum RoomID ArrivalDate DepartureDate Name GuestID
1 287 2020-01-01 2020-01-09 John 600
2 451 2020-01-09 2020-01-10 John 600
3 458 2020-01-09 2020-01-10 John 600
1 240 2020-03-19 2020-03-21 Alan 112
2 159 2020-03-21 2020-03-22 Alan 112
1 400 2020-05-01 2020-05-10 Joe 225
2 155 2020-06-13 2020-06-18 Joe 225
1 200 2020-07-01 2020-07-08 Smith 980
2 544 2020-07-08 2020-07-10 Smith 980
3 428 2020-09-01 2020-09-05 Smith 980
...
The problem: I need to filter this table so that the output gives me only those rows of a guest where the difference/s between his ArrivalDate (at rownum 2 or 3 or 4...) and his DepartureDate (at rownum =1) is greater than 0.
To simplify: If we take Guest John, his ArrivalDate for rownum=2 and rownum=3 are both the same as his DepartureDate for rownum=1; therefore I want to exclude him completely in my output. Same for Guest Allan. However, for Guest Smith only where the rownum=2 needs to be excluded.
Note: all guests in this table will have at least a rownum=2 (that is, a minimum of 2 entries).
My expected output:
rownum RoomID ArrivalDate DepartureDate Name GuestID
1 400 2020-05-01 2020-05-10 Joe 225
2 155 2020-06-13 2020-06-18 Joe 225
1 200 2020-07-01 2020-07-08 Smith 980
3 428 2020-09-01 2020-09-05 Smith 980
I am stuck on how to write the logic behind this filter. Any help would be appreciated.

The trick here appears to be keeping the first row when you there is a match -- but not including any rows otherwise. You can use window functions:
select t.*
from (select t.*,
max(case when rownum = 1 then departuredate end) over (partition by guestid) as departuredate_1,
max(case when rownum <> 1 then arrivaldate end) over (partition by guestid) as arrivaldate_not_1
from t1 t
) t
where (arrivaldate_not_1 > departuredate_1) and
(rownum = 1 or arrivaldate > departuredate_1);
Here is a db<>fiddle.

Please use below query and confirm if this is what you are expecting,
select * from table where (ArrivalDate, Name) not in
(select DepartureDate, Name from table);

create table #Aridept
(
rownum int,
RoomID int,
ArrivalDate date,
DepartureDate date,
Name varchar(20),
GuestID int
)
insert into #Aridept
select 1 , 287 , '2020-01-01', '2020-01-09', 'John', 600
union all select 2 , 451 , '2020-01-09', '2020-01-10','John' , 600
union all select 3 , 458 , '2020-01-09', '2020-01-10','John', 600
union all select 1 , 240 , '2020-03-19', '2020-03-21','Alan', 112
union all select 2 , 159 , '2020-03-21', '2020-03-22','Alan', 112
union all select 1 , 400 , '2020-05-01', '2020-05-10','Joe', 225
union all select 2 , 155 , '2020-06-13', '2020-06-18','Joe', 225
union all select 1 , 200 , '2020-07-01', '2020-07-08','Smith', 980
union all select 2 , 544 , '2020-07-08', '2020-07-10','Smith', 980
union all select 3 , 428 , '2020-09-01', '2020-09-05','Smith', 980
--insert into #temp table which have depature date <> arrivedate
select * into #temp
from #Aridept a
where a.rownum>1 and ArrivalDate not in
(select DepartureDate from #Aridept b where a.GuestID=b.guestid
and rownum=1 )
final result query
select * from (
select * from #Aridept Ari
where rownum=1 and GuestID in ( select GuestID from #temp)
union all
select * from #temp
)a order by GuestID, rownum

How to count the number of entries between a time period

I have a sample table below which shows the ticket number, time when the ticket was opened and time when it was closed.
TKTNUM OPEN_DATE CLOSE_DATE
1234 12-Mar-19 08:36 14-Mar-19 08:36
1235 13-Mar-19 08:36 15-Mar-19 08:36
1236 14-Mar-19 08:36 16-Mar-19 08:36
1237 15-Mar-19 08:36
1238 16-Mar-19 08:36
1239 17-Mar-19 08:36
1240 18-Mar-19 08:36 20-Mar-19 08:36
1241 19-Mar-19 08:36 20-Mar-19 08:36
1242 20-Mar-19 08:36 21-Mar-19 08:36
I need to count the number of open/closed tickets on a given day...
DATE OPEN CLOSED
12-Mar-19 08:36 1 0
13-Mar-19 08:36 2 0
14-Mar-19 08:36 2 1
15-Mar-19 08:36 2 2
16-Mar-19 08:36 2 3
17-Mar-19 08:36 3 3
18-Mar-19 08:36 4 3
19-Mar-19 08:36 5 3
20-Mar-19 08:36 4 5
Any help is greatly appreciated. Thanks
Used the query(c/o Tejash) below on a sample job_history table
EMPLOYEE_ID START_DATE END_DATE JOB_ID DEPARTMENT_ID
----------- -------------------- -------------------- ---------- -------------
200 17/SEP/1995 00:00:00 17/JUN/2001 00:00:00 AD_ASST 90
101 21/SEP/1997 00:00:00 27/OCT/2001 00:00:00 AC_ACCOUNT 110
102 13/JAN/2001 00:00:00 24/JUL/2006 00:00:00 IT_PROG 60
101 28/OCT/2001 00:00:00 15/MAR/2005 00:00:00 AC_MGR 110
200 01/JUL/2002 00:00:00 31/DEC/2006 00:00:00 AC_ACCOUNT 90
201 17/FEB/2004 00:00:00 19/DEC/2007 00:00:00 MK_REP 20
114 24/MAR/2006 00:00:00 31/DEC/2007 00:00:00 ST_CLERK 50
176 24/MAR/2006 00:00:00 31/DEC/2006 00:00:00 SA_REP 80
176 01/JAN/2007 00:00:00 31/DEC/2007 00:00:00 SA_MAN 80
122 01/JAN/2007 00:00:00 31/DEC/2007 00:00:00 ST_CLERK 50
With dates(dt)
As (Select mindt + level - 1 from
(Select min(start_date) mindt, max(end_date) maxdt from job_history)
Connect by level <= maxdt - mindt + 1)
Select dt,
sum(case when dt between start_date and coalesce(end_date,dt) then 1 end) as startdate,
Sum(case when dt >= end_date then 1 end) as enddate
From dates cross join job_history
Group by dt
Order by dt desc
On 17/JUN/2001, the query gave
DT STARTDATE ENDDATE
-------------------- ---------- ----------
31/DEC/2007 00:00:00 3 10
<SNIPPED>
17/JUN/2001 00:00:00 3 1
Instead of
DT STARTDATE ENDDATE
-------------------- ---------- ----------
31/DEC/2007 00:00:00 3 10
<SNIPPED>
17/JUN/2001 00:00:00 2 1
Tried to edit the query and now its giving me
DT STARTDATE ENDDATE
-------------------- ---------- ----------
31/DEC/2007 00:00:00 <<< 10
<snipped>
18/JUN/2001 00:00:00 2 1
17/JUN/2001 00:00:00 2 <<< 1
16/JUN/2001 00:00:00 3 1

You can use dates as cte for total days and join it again with same table as following:
With dates(dt)
As
(
Select mindt + level - 1 from
(Select min(open_date) mindt, max(open_dt) maxdt from your_table)
Connect by level <= maxdt - mindt + 1
)
Select dt,
sum(case when dt between open_date and coalesce(close_date,dt) then 1 end) as open,
Sum(case when dt >= close_date then 1 end) as closed
From dates cross join your_table
Group by dt;
Cheers!!

You can unpivot and aggregate:
select dte, sum(is_open) as num_opens, sum(is_close) as num_closes
from ((select open_date as dte, 1 as is_open, 0 as is_close
from t
) union all
(select close_date, 0 as is_open, 1 as is_close
from t
)
) t
group by dte
order by dte;
Note: It is probably a good idea to truncate the date so it has no time component:
select trunc(dte), sum(is_open) as num_opens, sum(is_close) as num_closes
from ((select open_date as dte, 1 as is_open, 0 as is_close
from t
) union all
(select close_date, 0 as is_open, 1 as is_close
from t
)
) t
where dte is not null
group by trunc(dte)
order by trunc(dte);
And in Oracle 12C you can use a lateral join for this:
select trunc(dte), sum(is_open), sum(is_close)
from t cross join lateral
(select t.open_date as dte, 1 as is_open, 0 as is_close from dual union all
select t.close_date, 0 as is_open, 1 as is_close from dual
) t
group by trunc(dte)
order by trunc(dte);

SQLQuery for Time In and Time Out attendance in Oracle

I have a table in oracle with the below sample output.
EID | type | Date
24 | IN |03/25/2019 6:45 am
24 | OUT |03/25/2019 8:05 am
24 | IN |03/25/2019 8:06 am
24 | IN |03/25/2019 8:28 am
24 | OUT |03/25/2019 9:48 am
24 | IN |03/25/2019 9:52 am
24 | IN |03/25/2019 9:57 am
24 | IN |03/25/2019 10:44 am
24 | OUT |03/25/2019 12:16 pm
24 | OUT |03/25/2019 1:00 pm
24 | IN |03/25/2019 1:05 pm
24 | OUT |03/25/2019 2:21 pm
I want to build a query to achieve the below results:
EID | TIMEIN | TIMEOUT | DIIF_IN_MIN
24 | 03/25/2019 6:45 am | 03/25/2019 8:05 am | 1
24 | 03/25/2019 8:06 am | null | 0
24 | 03/25/2019 8:28 am | 03/25/2019 9:48 am | 4
24 | 03/25/2019 9:52 am | null | 0
24 | 03/25/2019 9:57 am | null | 0
24 | 03/25/2019 10:44 am | 03/25/2019 12:16 pm | 0
24 | null | 03/25/2019 1:00 pm | 5
24 | 03/25/2019 1:05 pm | 03/25/2019 2:21 pm | 0

You can use such a logic by the contribution of lead window analytic function
with tab(eid, type, dates ) as
(
select 24,'IN' ,timestamp'2019-03-25 06:45:00' from dual union all
select 24,'OUT',timestamp'2019-03-25 08:05:00' from dual union all
select 24,'IN' ,timestamp'2019-03-25 08:06:00' from dual union all
select 24,'IN' ,timestamp'2019-03-25 08:28:00' from dual union all
select 24,'OUT',timestamp'2019-03-25 09:48:00' from dual union all
select 24,'IN' ,timestamp'2019-03-25 09:52:00' from dual
)
select t1.eid, t1.dates as timein, t2.dates as timeout,
nvl(to_number(regexp_substr(to_char(t1.ld_dates - t2.dates),'[^:]+',1,2)),0)
as diff_in_minutes
from ( select lead(dates) over (order by dates) as ld_dates, t.*
from tab t
where type = 'IN' order by dates) t1
full join ( select * from tab where type = 'OUT' order by dates) t2
on t1.dates <= t2.dates and ld_dates > t2.dates
order by t1.dates;
EID TIMEIN TIMEOUT DIFF_IN_MINUTES
24 25.03.2019 06:45:00 25.03.2019 08:05:00 1
24 25.03.2019 08:06:00 NULL 0
24 25.03.2019 08:28:00 25.03.2019 09:48:00 4
24 25.03.2019 09:52:00 NULL 0
Demo

You can do this with the following logic.
You can get all the ins using a lead() query. Then you can get the unmatched outs using a lag():
select t.eid, date as timein,
(case when next_type = 'OUT' then next_date end) as timeout,
((case when next_type = 'OUT' then next_date end) - date) * (24 * 60) as diff_in_minutes
from (select t.*,
lead(type) over (partition by eid order by date) as next_type,
lead(type) over (partition by eid order by date) as next_date
from t
) t
where type = 'IN'
union all
select t.eid, null as timein,
date as timeout, null as diff_in_minutes
from (select t.*,
lag(type) over (partition by eid order by date) as prev_type,
lag(date) over (partition by eid order by date) as prev_date
from t
) t
where type = 'OUT' and (prev_type <> 'IN' or prev_type is null);
Here is a db<>fiddle with all your data, showing that it supports the multiple INs and OUTs.
Note this assumes that the date/time column is really a date. It only converts to a timestamp to show the time component in the result set.

SQL Server select missing Dates in result set

I have one table containing Employee Daily Attendance punchtime in space separated form.
EmployeePunch
EmpID EmpName Date Time
1 ABC 2014-12-01 10:00 18:00
1 ABC 2014-12-02 09:50 17:50
1 ABC 2014-12-04 09:30 17:30
1 ABC 2014-12-07 10:00 18:00
1 ABC 2014-12-08 09:50 17:50
1 ABC 2014-12-10 09:30 17:30
Now I want to write a query for following output
EmpID EmpName Date Time
1 ABC 2014-12-01 10:00 18:00
1 ABC 2014-12-02 09:50 17:50
1 ABC 2014-12-03 ABSENT
1 ABC 2014-12-04 09:30 17:30
1 ABC 2014-12-05 ABSENT
1 ABC 2014-12-06 ABSENT
1 ABC 2014-12-07 10:00 18:00
1 ABC 2014-12-08 09:50 17:50
1 ABC 2014-12-09 ABSENT
1 ABC 2014-12-10 09:30 17:30

First define CTE to generate missing records:
WITH dates AS (
SELECT DISTINCT EmpId, EmpName, '2014-12-01' AS Date, 'ABSENT' AS Time
FROM EmployeePunch
UNION
SELECT EmpId, EmpName, DATEADD(DAY, 1, Date), 'ABSENT'
FROM dates
WHERE Date < DATEADD(DAY, -1, DATEADD(MONTH, 1, '2014-12-01')))
SELECT * FROM dates
In the next step replace the last line with:
SELECT * FROM EmployeePunch
UNION ALL
SELECT d.* FROM dates d
LEFT JOIN EmployeePunch e
ON e.EmpId = d.EmpId AND e.Date = d.Date
WHERE e.Time IS NULL
The missing rows are the outerjoined ones.

Without CTE:
select ep1.EmpId, ep1.EmpName, a.Date, ISNULL(ep2.Time, 'ABSENT') as Time
from (
select DATEADD(day, a.a + (10 * b.a) + (100 * c.a), CAST('2014-12-01' /*begin date*/ AS DATE)) as Date
from (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as a
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as b
cross join (select 0 as a union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as c
) a cross apply (select distinct EmpId, EmpName from EmployeePunch) ep1 --on a.Date = f.Date
left join EmployeePunch ep2 on ep2.Date = a.Date and ep2.EmpId = ep1.EmpId
where a.Date <= '2014-12-10' and ep1.EmpId is not null
Be aware about the maximal allowed range - 1000 days, but it can be extended if necessary

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Need help on Hive query - hive

Related

ROLLUP BY year,week

How to filter my table based on this specific date criteria?

How to count the number of entries between a time period

SQLQuery for Time In and Time Out attendance in Oracle

SQL Server select missing Dates in result set

Categories

Resources