Max dates for each sequence within partitions

Max dates for each sequence within partitions - sql

I would like to see if somebody has an idea how to get the max and min dates within each 'id' using the 'row_num' column as an indicator when the sequence starts/ends in SQL Server 2016.
The screenshot below shows the desired output in columns 'min_date' and 'max_date'.
Any help would be appreciated.

You could use windowed MIN/MAX:
WITH cte AS (
SELECT *,SUM(CASE WHEN row_num > 1 THEN 0 ELSE 1 END)
OVER(PARTITION BY id, cat ORDER BY date_col) AS grp
FROM tab
)
SELECT *, MIN(date_col) OVER(PARTITION BY id, cat, grp) AS min_date,
MAX(date_col) OVER(PARTITION BY id, cat, grp) AS max_date
FROM cte
ORDER BY id, date_col, cat;
Rextester Demo

Try something like
SELECT
Q1.id, Q1.cat,
MIN(Q1.date) AS min_dat,
MAX(Q1.date) AS max_dat
FROM
(SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id, cat ORDER BY [date]) AS r1,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY [date]) AS r2
) AS Q1
GROUP BY
Q1.id, Q1.r2 - Q1.r1

Related

Getting category based on production shift

I have this query
with cte as(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY seq ORDER BY date_time) rn1,
ROW_NUMBER() OVER (PARTITION BY seq, output > 0
ORDER BY date_time) rn2
FROM myTable
;
select
seq,
date_time::date,
MIN(date_time) AS MinDatetime,
MAX(date_time) AS MaxDatetime,
SUM(output) AS sum_output
FROM cte cte
GROUP by
seq,
date_time::date ,
cntpr > 0,
rn1 - rn2
ORDER BY
seq,
MIN(date_time);
here's the result:
what I would like to do is to join my result to this master table
enter image description here
and the expected result will be MinDatetime and MaxDatetime among my master table's start and end shift to show the shift information, like this:
enter image description here
Any help would be very appreciated.. thank you!

This is the solution I came up with:
select seq, shift, start_shift, end_shift, MinDateTime, MaxDateTime
from
(
select
seq,
MIN(date_time) AS MinDatetime,
MAX(date_time) AS MaxDatetime,
SUM(output) AS sum_output
FROM cte cte
GROUP by
seq
ORDER BY
seq,
MIN(date_time::date)) t
join mstr
on
CASE
WHEN start_shift < end_shift THEN (MinDateTime::time between start_shift and end_shift) OR (MaxDateTime::time between start_shift and end_shift)
ELSE (MinDateTime::time >= start_shift) OR
(MaxDateTime::time >= start_shift) OR
(MinDateTime::time <= end_shift) OR
(MaxDateTime::time <= end_shift)
END
ORDER BY seq;
Fiddle: https://www.db-fiddle.com/f/4jyoMCicNSZpjMt4jFYoz5/4208
Explanation: I get the groups, join them with master table on interval matching.

Lag functions and SUM

I need to get the list of users that have been offline for at least 20 min every day. Here's my data
I have this starting query but am stuck on how to sum the difference in offline_mins i.e. need to add "and sum(offline_mins)>=20" to the where clause
SELECT
userid,
connected,
LAG(recordeddt) OVER(PARTITION BY userid
ORDER BY userid,
recordeddt) AS offline_period,
DATEDIFF(minute, LAG(recordeddt) OVER(PARTITION BY userid
ORDER BY userid,
recordeddt),recordeddt) offline_mins
FROM device_data where connected=0;
My expected results :
Thanks in advance.

This reads like a gaps-and-island problem, where you want to group together adjacent rows having the same userid and status.
As a starter, here is a query that computes the islands:
select userid, connected, min(recordeddt) startdt, max(lead_recordeddt) enddt,
datediff(min(recordeddt), max(lead_recordeddt)) duration
from (
select dd.*,
row_number() over(partition by userid order by recordeddt) rn1,
row_number() over(partition by userid, connected order by recordeddt) rn2,
lead(recordeddt) over(partition by userid order by recordeddt) lead_recordeddt
from device_data dd
) dd
group by userid, connected, rn1 - rn2
Now, say you want users that were offline for at least 20 minutes every day. You can breakdown the islands per day, and use a having clause for filtering:
select userid
from (
select recordedday, userid, connected,
datediff(min(recordeddt), max(lead_recordeddt)) duration
from (
select dd.*, v.*,
row_number() over(partition by v.recordedday, userid order by recordeddt) rn1,
row_number() over(partition by v.recordedday, userid, connected order by recordeddt) rn2,
lead(recordeddt) over(partition by v.recordedday, userid order by recordeddt) lead_recordeddt
from device_data dd
cross apply (values (convert(date, recordeddt))) v(recordedday)
) dd
group by convert(date, recordeddt), userid, connected, rn1 - rn2
) dd
group by userid
having count(distinct case when connected = 0 and duration >= 20 then recordedday end) = count(distinct recordedday)

As noted this is a gaps and island problem. This is my take on it using a simple lag function to create groups, filter out the connected rows and then work on the date ranges.
CREATE TABLE #tmp(ID int, UserID int, dt datetime, connected int)
INSERT INTO #tmp VALUES
(1,1,'11/2/20 10:00:00',1),
(2,1,'11/2/20 10:05:00',0),
(3,1,'11/2/20 10:10:00',0),
(4,1,'11/2/20 10:15:00',0),
(5,1,'11/2/20 10:20:00',0),
(6,2,'11/2/20 10:00:00',1),
(7,2,'11/2/20 10:05:00',1),
(8,2,'11/2/20 10:10:00',0),
(9,2,'11/2/20 10:15:00',0),
(10,2,'11/2/20 10:20:00',0),
(11,2,'11/2/20 10:25:00',0),
(12,2,'11/2/20 10:30:00',0)
SELECT UserID, connected,DATEDIFF(minute,MIN(DT), MAX(DT)) OFFLINE_MINUTES
FROM
(
SELECT *, SUM(CASE WHEN connected <> LG THEN 1 ELSE 0 END) OVER (ORDER BY UserID,dt) grp
FROM
(
select *, LAG(connected,1,connected) OVER(PARTITION BY UserID ORDER BY UserID,dt) LG
from #tmp
) x
) y
WHERE connected <> 1
GROUP BY UserID,grp,connected
HAVING DATEDIFF(minute,MIN(DT), MAX(DT)) >= 20

PostgreSQL Percent Change using Row Number

I'm trying to find the percent change using row number with PostgreSQL but I'm running into an error where my "percent_change" column shows 0.
Here is what I have as my code.
WITH CTE AS (
SELECT date, sales, ROW_NUMBER() OVER (ORDER by date) AS rn
FROM sales_2019)
SELECT c1.date, c1.sales,
CAST(COALESCE (((c1.sales - c2.sales) * 1.0 / c2.sales) * 100, 0) AS INT) AS percent_change
FROM CTE AS c1
LEFT JOIN CTE AS c2
ON c1.date = c2.date AND c1.rn = c2.rn + 1
Here is my SQL table in case it's needed. Thank you in advance, I greatly appreciate it.

You can use LAG() for your requirement:
select
date,
sales,
round(coalesce((((sales-(lag(sales) over (order by date)))*1.0)/(lag(sales) over (order by date)))*100,0),2)
from sales_2019
or you can try with WITH clause
with cte as ( select
date,
sales,
coalesce(lag(sales) over (order by date),0) as previous_month
from sales_2019
)
select
date,
sales,
round( coalesce( (sales-previous_month)*1.0/nullif(previous_month,0),0 )*100,2)
from cte
DEMO
EDIT as per requirement in comment
with cte as ( select
date_,
sales,
ROW_NUMBER() OVER (ORDER by date_) AS rn1,
ROW_NUMBER() OVER (ORDER by date_)-1 AS rn2
from sales_2019
)
select t1.date_,
t1.sales,
round( coalesce( (t1.sales-t2.sales)*1.0/nullif(t2.sales,0),0 )*100,2)
from cte t1 left join cte t2 on t1.rn2=t2.rn1
DEMO

What should be done to do multiple order by?

I want to sort by chart_num and DATE. However, the following results are printed out when aligned:
in this my code:
SELECT *
FROM (
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd. chart_num AS chart_num, ncd. chart_name AS chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d') AS DATA, amount, card_check_result AS card_check
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE (mbrNo = '135790' OR mbrNo = '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY result DESC
) a
ORDER BY a.DATE DESC
The result I want is that the NULL value goes back to the latest DATE, and if there is a chart_num, I want to sort it in order of chart_num and DATE.

It feels like I'm missing something else with this question, but you can separate columns in the ORDER BY with a comma. It's not clear from your text whether you want dates grouped within the same chart_num or charts grouped within the same date, but if I guessed wrong you can just swap it.
Also, the ORDER BY result DESC is completely extra. It adds nothing to the results, and by removing it we can get rid of a whole level of nesting.
SELECT id, chart_num, chart_name, MIN(DATE) AS DATE, amount, (COUNT(*) = 2) AS result, card_check
FROM (
(
SELECT id, hpd.chart_num AS chart_num, hpd.chart_name AS chart_name, hpd.visit AS DATE, card_amount_received AS amount, card_check_modify AS card_check
,row_number() over (PARTITION BY card_amount_received ORDER BY id) AS seqnum
FROM hospital_payment_data hpd
WHERE store_mbrno = '135790' AND card_amount_received > 0
)
UNION ALL (
SELECT id, ncd.chart_num, ncd.chart_name, DATE_FORMAT(ncd.tranDate,'%Y-%m-%d'), amount, card_check_result
,row_number() over (PARTITION BY amount ORDER BY id) AS seqnum
FROM noti_card_data ncd
WHERE mbrNo IN ('135790', '135791') AND cmd ='승인'
)
) X
GROUP BY amount, seqnum
ORDER BY MIN(DATE), coalesce(chart_num,-1), result DESC

Dont order by result in the inner union all query.
Sort by chart_num and date in place of result.
So in place of
Order by result desc
use this:
Order by chart_num desc, DATE desc
Or,
in outer main query:
in place of
Order by a.DATE DESC
use
Order by a.chart_num desc, a.DATE desc
Hope it helps.!

How to get the validity date range of a price from individual daily prices in SQL

I have some prices for the month of January.
Date,Price
1,100
2,100
3,115
4,120
5,120
6,100
7,100
8,120
9,120
10,120
Now, the o/p I need is a non-overlapping date range for each price.
price,from,To
100,1,2
115,3,3
120,4,5
100,6,7
120,8,10
I need to do this using SQL only.
For now, if I simply group by and take min and max dates, I get the below, which is an overlapping range:
price,from,to
100,1,7
115,3,3
120,4,10

This is a gaps-and-islands problem. The simplest solution is the difference of row numbers:
select price, min(date), max(date)
from (select t.*,
row_number() over (order by date) as seqnum,
row_number() over (partition by price, order by date) as seqnum2
from t
) t
group by price, (seqnum - seqnum2)
order by min(date);
Why this works is a little hard to explain. But if you look at the results of the subquery, you will see how the adjacent rows are identified by the difference in the two values.

SELECT Lag.price,Lag.[date] AS [From], MIN(Lead.[date]-Lag.[date])+Lag.[date] AS [to]
FROM
(
SELECT [date],[Price]
FROM
(
SELECT [date],[Price],LAG(Price) OVER (ORDER BY DATE,Price) AS LagID FROM #table1 A
)B
WHERE CASE WHEN Price <> ISNULL(LagID,1) THEN 1 ELSE 0 END = 1
)Lag
JOIN
(
SELECT [date],[Price]
FROM
(
SELECT [date],Price,LEAD(Price) OVER (ORDER BY DATE,Price) AS LeadID FROM [#table1] A
)B
WHERE CASE WHEN Price <> ISNULL(LeadID,1) THEN 1 ELSE 0 END = 1
)Lead
ON Lag.[Price] = Lead.[Price]
WHERE Lead.[date]-Lag.[date] >= 0
GROUP BY Lag.[date],Lag.[price]
ORDER BY Lag.[date]

Another method using ROWS UNBOUNDED PRECEDING
SELECT price, MIN([date]) AS [from], [end_date] AS [To]
FROM
(
SELECT *, MIN([abc]) OVER (ORDER BY DATE DESC ROWS UNBOUNDED PRECEDING ) end_date
FROM
(
SELECT *, CASE WHEN price = next_price THEN NULL ELSE DATE END AS abc
FROM
(
SELECT a.* , b.[date] AS next_date, b.price AS next_price
FROM #table1 a
LEFT JOIN #table1 b
ON a.[date] = b.[date]-1
)AA
)BB
)CC
GROUP BY price, end_date

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Max dates for each sequence within partitions - sql

Try something like SELECT Q1.id, Q1.cat, MIN(Q1.date) AS min_dat, MAX(Q1.date) AS max_dat FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY id, cat ORDER BY [date]) AS r1, ROW_NUMBER() OVER (PARTITION BY id ORDER BY [date]) AS r2 ) AS Q1 GROUP BY Q1.id, Q1.r2 - Q1.r1

Related

Getting category based on production shift

Lag functions and SUM

PostgreSQL Percent Change using Row Number

What should be done to do multiple order by?

How to get the validity date range of a price from individual daily prices in SQL

Categories

Resources