Getting category based on production shift - sql

I have this query
with cte as(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY seq ORDER BY date_time) rn1,
ROW_NUMBER() OVER (PARTITION BY seq, output > 0
ORDER BY date_time) rn2
FROM myTable
;
select
seq,
date_time::date,
MIN(date_time) AS MinDatetime,
MAX(date_time) AS MaxDatetime,
SUM(output) AS sum_output
FROM cte cte
GROUP by
seq,
date_time::date ,
cntpr > 0,
rn1 - rn2
ORDER BY
seq,
MIN(date_time);
here's the result:
what I would like to do is to join my result to this master table
enter image description here
and the expected result will be MinDatetime and MaxDatetime among my master table's start and end shift to show the shift information, like this:
enter image description here
Any help would be very appreciated.. thank you!

This is the solution I came up with:
select seq, shift, start_shift, end_shift, MinDateTime, MaxDateTime
from
(
select
seq,
MIN(date_time) AS MinDatetime,
MAX(date_time) AS MaxDatetime,
SUM(output) AS sum_output
FROM cte cte
GROUP by
seq
ORDER BY
seq,
MIN(date_time::date)) t
join mstr
on
CASE
WHEN start_shift < end_shift THEN (MinDateTime::time between start_shift and end_shift) OR (MaxDateTime::time between start_shift and end_shift)
ELSE (MinDateTime::time >= start_shift) OR
(MaxDateTime::time >= start_shift) OR
(MinDateTime::time <= end_shift) OR
(MaxDateTime::time <= end_shift)
END
ORDER BY seq;
Fiddle: https://www.db-fiddle.com/f/4jyoMCicNSZpjMt4jFYoz5/4208
Explanation: I get the groups, join them with master table on interval matching.

Related

Lag functions and SUM

I need to get the list of users that have been offline for at least 20 min every day. Here's my data
I have this starting query but am stuck on how to sum the difference in offline_mins i.e. need to add "and sum(offline_mins)>=20" to the where clause
SELECT
userid,
connected,
LAG(recordeddt) OVER(PARTITION BY userid
ORDER BY userid,
recordeddt) AS offline_period,
DATEDIFF(minute, LAG(recordeddt) OVER(PARTITION BY userid
ORDER BY userid,
recordeddt),recordeddt) offline_mins
FROM device_data where connected=0;
My expected results :
Thanks in advance.
This reads like a gaps-and-island problem, where you want to group together adjacent rows having the same userid and status.
As a starter, here is a query that computes the islands:
select userid, connected, min(recordeddt) startdt, max(lead_recordeddt) enddt,
datediff(min(recordeddt), max(lead_recordeddt)) duration
from (
select dd.*,
row_number() over(partition by userid order by recordeddt) rn1,
row_number() over(partition by userid, connected order by recordeddt) rn2,
lead(recordeddt) over(partition by userid order by recordeddt) lead_recordeddt
from device_data dd
) dd
group by userid, connected, rn1 - rn2
Now, say you want users that were offline for at least 20 minutes every day. You can breakdown the islands per day, and use a having clause for filtering:
select userid
from (
select recordedday, userid, connected,
datediff(min(recordeddt), max(lead_recordeddt)) duration
from (
select dd.*, v.*,
row_number() over(partition by v.recordedday, userid order by recordeddt) rn1,
row_number() over(partition by v.recordedday, userid, connected order by recordeddt) rn2,
lead(recordeddt) over(partition by v.recordedday, userid order by recordeddt) lead_recordeddt
from device_data dd
cross apply (values (convert(date, recordeddt))) v(recordedday)
) dd
group by convert(date, recordeddt), userid, connected, rn1 - rn2
) dd
group by userid
having count(distinct case when connected = 0 and duration >= 20 then recordedday end) = count(distinct recordedday)
As noted this is a gaps and island problem. This is my take on it using a simple lag function to create groups, filter out the connected rows and then work on the date ranges.
CREATE TABLE #tmp(ID int, UserID int, dt datetime, connected int)
INSERT INTO #tmp VALUES
(1,1,'11/2/20 10:00:00',1),
(2,1,'11/2/20 10:05:00',0),
(3,1,'11/2/20 10:10:00',0),
(4,1,'11/2/20 10:15:00',0),
(5,1,'11/2/20 10:20:00',0),
(6,2,'11/2/20 10:00:00',1),
(7,2,'11/2/20 10:05:00',1),
(8,2,'11/2/20 10:10:00',0),
(9,2,'11/2/20 10:15:00',0),
(10,2,'11/2/20 10:20:00',0),
(11,2,'11/2/20 10:25:00',0),
(12,2,'11/2/20 10:30:00',0)
SELECT UserID, connected,DATEDIFF(minute,MIN(DT), MAX(DT)) OFFLINE_MINUTES
FROM
(
SELECT *, SUM(CASE WHEN connected <> LG THEN 1 ELSE 0 END) OVER (ORDER BY UserID,dt) grp
FROM
(
select *, LAG(connected,1,connected) OVER(PARTITION BY UserID ORDER BY UserID,dt) LG
from #tmp
) x
) y
WHERE connected <> 1
GROUP BY UserID,grp,connected
HAVING DATEDIFF(minute,MIN(DT), MAX(DT)) >= 20

PostgreSQL Percent Change using Row Number

I'm trying to find the percent change using row number with PostgreSQL but I'm running into an error where my "percent_change" column shows 0.
Here is what I have as my code.
WITH CTE AS (
SELECT date, sales, ROW_NUMBER() OVER (ORDER by date) AS rn
FROM sales_2019)
SELECT c1.date, c1.sales,
CAST(COALESCE (((c1.sales - c2.sales) * 1.0 / c2.sales) * 100, 0) AS INT) AS percent_change
FROM CTE AS c1
LEFT JOIN CTE AS c2
ON c1.date = c2.date AND c1.rn = c2.rn + 1
Here is my SQL table in case it's needed. Thank you in advance, I greatly appreciate it.
You can use LAG() for your requirement:
select
date,
sales,
round(coalesce((((sales-(lag(sales) over (order by date)))*1.0)/(lag(sales) over (order by date)))*100,0),2)
from sales_2019
or you can try with WITH clause
with cte as ( select
date,
sales,
coalesce(lag(sales) over (order by date),0) as previous_month
from sales_2019
)
select
date,
sales,
round( coalesce( (sales-previous_month)*1.0/nullif(previous_month,0),0 )*100,2)
from cte
DEMO
EDIT as per requirement in comment
with cte as ( select
date_,
sales,
ROW_NUMBER() OVER (ORDER by date_) AS rn1,
ROW_NUMBER() OVER (ORDER by date_)-1 AS rn2
from sales_2019
)
select t1.date_,
t1.sales,
round( coalesce( (t1.sales-t2.sales)*1.0/nullif(t2.sales,0),0 )*100,2)
from cte t1 left join cte t2 on t1.rn2=t2.rn1
DEMO

What should be done to do multiple order by?

I want to sort by chart_num and DATE. However, the following results are printed out when aligned:
in this my code:
SELECT *
FROM (
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd. chart_num AS chart_num, ncd. chart_name AS chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d') AS DATA, amount, card_check_result AS card_check
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE (mbrNo = '135790' OR mbrNo = '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY result DESC
) a
ORDER BY a.DATE DESC
The result I want is that the NULL value goes back to the latest DATE, and if there is a chart_num, I want to sort it in order of chart_num and DATE.
It feels like I'm missing something else with this question, but you can separate columns in the ORDER BY with a comma. It's not clear from your text whether you want dates grouped within the same chart_num or charts grouped within the same date, but if I guessed wrong you can just swap it.
Also, the ORDER BY result DESC is completely extra. It adds nothing to the results, and by removing it we can get rid of a whole level of nesting.
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd.chart_num, ncd.chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d'), amount, card_check_result
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE mbrNo IN ('135790', '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY MIN(DATE), coalesce(chart_num,-1), result DESC
Dont order by result in the inner union all query.
Sort by chart_num and date in place of result.
So in place of
Order by result desc
use this:
Order by chart_num desc, DATE desc
Or,
in outer main query:
in place of
Order by a.DATE DESC
use
Order by a.chart_num desc, a.DATE desc
Hope it helps.!

Max dates for each sequence within partitions

I would like to see if somebody has an idea how to get the max and min dates within each 'id' using the 'row_num' column as an indicator when the sequence starts/ends in SQL Server 2016.
The screenshot below shows the desired output in columns 'min_date' and 'max_date'.
Any help would be appreciated.
You could use windowed MIN/MAX:
WITH cte AS (
SELECT *,SUM(CASE WHEN row_num > 1 THEN 0 ELSE 1 END)
OVER(PARTITION BY id, cat ORDER BY date_col) AS grp
FROM tab
)
SELECT *, MIN(date_col) OVER(PARTITION BY id, cat, grp) AS min_date,
MAX(date_col) OVER(PARTITION BY id, cat, grp) AS max_date
FROM cte
ORDER BY id, date_col, cat;
Rextester Demo
Try something like
SELECT
Q1.id, Q1.cat,
MIN(Q1.date) AS min_dat,
MAX(Q1.date) AS max_dat
FROM
(SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id, cat ORDER BY [date]) AS r1,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY [date]) AS r2
) AS Q1
GROUP BY
Q1.id, Q1.r2 - Q1.r1

Hive transformation

I am trying to make a simple hive transformation.
Can some one provide me a way to do this? I have tried collect_set and currently looking at klout's open source UDF.
I think this gives you what you want. I wasn't able to run it and debug it though. Good luck!
select start_point.unit
, start_time as start
, start_time + min(stop_time - start_time) as stop
from
(select * from
(select date_time as start_time
, unit
, last_value(unit) over (order by date_time row desc between current row and 1 following) as previous_unit
from table
) previous
where unit <> previous_unit
) start_points
left outer join
(select * from
(select date_time as stop_time
, unit
, last_value(unit) over (order by date_time row between current row and 1 following) as next_unit
from table
) next
where unit <> next_unit
) stop_points
on start_points.unit = stop_points.unit
where stop_time > start_time
group by start_point.unit, start_time
;
What about using the min and max functions? I think the following will get you what you need:
SELECT
Unit,
MIN(datetime) as start,
MAX(datetime) as stop
from table_name
group by Unit
;
I found it. Thanks for the pointer to use window functions
select *
from
(select *,
case when lag(unit,1) over (partition by id order by effective_time_ut desc) is NULL THEN 1
when unit<>lag(unit,1) over (partition by id order by effective_time_ut desc) then 1
when lead(unit,1) over (partition by id order by effective_time_ut desc) is NULL then 1
else 0 end as different_loc
from units_we_care) a
where different_loc=1
create table temptable as select unit, start_date, end_time, row_number () over() as row_num from (select unit, min(date_time) start_date, max(date_time) as end_time from table group by unit) a;
select a.unit, a.start_date as start_date, nvl(b.start_date, a.end_time) end_time from temptable a left outer join temptable b on (a.row_num+1) = b.row_num;