Select rows until running sum reaches specific value

Select rows until running sum reaches specific value - sql

I have the following data:
DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
And a value such as 300 or 301 (a user variable or column). I want to select rows until running total of amount reaches the specified value, with the following twist:
For 300 I want to select first 3 rows for a and first 2 rows for b
For 301 I want to select first 4 rows for a and first 3 rows for b
This is supposed to be simple but the solutions I found do not handle the second case.

DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
DECLARE #Total INT = 301;
WITH cte AS
(
SELECT *, SUM(amount) OVER (PARTITION BY usr ORDER BY dt) AS RunTotal
FROM #t
)
SELECT *
FROM cte
WHERE cte.RunTotal - cte.amount < #Total -- running total for previous row is less
-- than #Total then include current row

DECLARE #t TABLE (usr VARCHAR(100), dt DATE, amount INT);
INSERT INTO #t VALUES
('a', '2018-01-01', 100), -- 100
('a', '2018-02-01', 100), -- 200
('a', '2018-03-01', 100), -- 300
('a', '2018-04-01', 100), -- 400
('a', '2018-05-01', 100), -- 500
('b', '2018-01-01', 150), -- 150
('b', '2018-02-01', 150), -- 300
('b', '2018-03-01', 150), -- 450
('b', '2018-04-01', 150), -- 600
('b', '2018-05-01', 150); -- 750
declare #target int = 300;
with cte_RunningTotal as
(
select
usr,
dt,
amount,
sum(amount) over (partition by usr order by dt rows unbounded preceding) as runningTotal
from #t
)
select *
from cte_RunningTotal
where runningTotal < #target + amount
order by usr, dt

Related

Cumulative sum reset by change of year (SQL Server)

I'm using SQL Server 2017. I would like to sum up the budget per month of a year for that year and factory.
This cumulation is to be reset with each new year.
Table schema:
CREATE TABLE [TABLE_1]
(
FACTORY varchar(50) Null,
DATE_YM int Null,
BUDGET int NULL,
);
INSERT INTO TABLE_1 (FACTORY, DATE_YM, BUDGET)
VALUES ('A', 202111, 1),
('A', 202112, 1),
('A', 202201, 10),
('A', 202202, 100),
('A', 202203, 1000),
('B', 202111, 2),
('B', 202112, 2),
('B', 202201, 20),
('B', 202202, 200),
('B', 202203, 2000),
('C', 202111, 3),
('C', 202112, 3),
('C', 202201, 30),
('C', 202202, 300),
('C', 202203, 3000);
LINK TO db<>fiddle
Desired result
FACTORY
DATE_YM
C_BUDGET_SUM
A
202111
1
A
202112
2
A
202201
10
A
202202
110
A
202203
1110
B
202111
2
B
202112
4
B
202201
20
B
202202
220
B
202203
2220
C
202111
3
C
202112
6
C
202201
30
C
202202
330
C
202203
3330
My approach:
WITH data AS
(
SELECT
T1.FACTORY,
T1.DATE_YM,
T1.BUDGET
FROM
TABLE_1 AS T1
)
SELECT
FACTORY,
DATE_YM,
SUM(BUDGET) OVER (ORDER BY FACTORY, DATE_YM ASC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS 'C_BUDGET_SUM'
FROM
data
This query totals across year ends. How can the year break be implemented dynamically?

The CTE is not necessary, but I'm assuming this is a simplified version.
To expand on my comment
with data as (
select
T1.FACTORY,
T1.DATE_YM,
T1.BUDGET
from TABLE_1 as T1
)
select
FACTORY,
DATE_YM,
sum(BUDGET) over (partition by Factory,left(Date_YM,4) order by DATE_YM asc rows between unbounded preceding and current row) as 'C_BUDGET_SUM'
from data
Results

SQL Server Query for average value over a date period

DECLARE #SampleOrderTable TABLE
(
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '12/10/2019', '762.84'),
(2, '11/10/2019', '886.32'),
(3, '11/9/2019', '10245.00')
How do I select the the last 4 days prior to OrderDate and the average Amount over that period?
So result data would be:
pkPersonID Date Amount
------------------------------------
1 '12/7/2019' 190.71
1 '12/8/2019' 190.71
1 '12/9/2019' 190.71
1 '12/10/2019' 190.71
2 '12/7/2019' 221.58
2 '12/8/2019' 221.58
2 '12/9/2019' 221.58
2 '12/10/2019' 221.58
3 '11/6/2019' 2561.25
3 '11/7/2019' 2561.25
3 '11/8/2019' 2561.25
3 '11/9/2019' 2561.25

You may try with the following approach, using DATEADD(), windowed COUNT() and VALUES() table value constructor:
Table:
DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '20191210', '762.84'),
(2, '20191210', '886.32'),
(3, '20191109', '10245.00')
Statement:
SELECT
t.pkPersonID,
DATEADD(day, -v.Day, t.OrderDate) AS [Date],
CONVERT(numeric(18, 6), Amount / COUNT(Amount) OVER (PARTITION BY t.pkPersonID)) AS Amount
FROM #SampleOrderTable t
CROSS APPLY (VALUES (0), (1), (2), (3)) v(Day)
ORDER BY t.pkPersonID, [Date]
Result:
pkPersonID Date Amount
1 07/12/2019 00:00:00 190.710000
1 08/12/2019 00:00:00 190.710000
1 09/12/2019 00:00:00 190.710000
1 10/12/2019 00:00:00 190.710000
2 07/12/2019 00:00:00 221.580000
2 08/12/2019 00:00:00 221.580000
2 09/12/2019 00:00:00 221.580000
2 10/12/2019 00:00:00 221.580000
3 06/11/2019 00:00:00 2561.250000
3 07/11/2019 00:00:00 2561.250000
3 08/11/2019 00:00:00 2561.250000
3 09/11/2019 00:00:00 2561.250000

You can use sql functions like AVG, DATEADD and GETDATE.
SELECT AVG(Amount) as AverageAmount
FROM #SampleOrderTable
WHERE OrderDate >= DATEADD(DAY, -4, GETDATE())

DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
);
INSERT INTO #SampleOrderTable
(pkPersonID, OrderDate, Amount)
VALUES
(1, '12/20/2019', 762.84),
(2, '12/20/2019', 886.32),
(3, '12/20/2019', 10245.00),
(4, '12/19/2019', 50.00),
(5, '12/19/2019', 100.00),
(6, '09/01/2019', 200.00),
(7, '09/01/2019', 300.00),
(8, '12/15/2019', 400.00),
(9, '12/15/2019', 500.00),
(10, '09/02/2019', 150.00),
(11, '09/02/2019', 1100.00),
(12, '09/02/2019', 1200.00),
(13, '09/02/2019', 1300.00),
(14, '09/02/2019', 1400.00),
(15, '09/02/2019', 1500.00);
SELECT OrderDate,AVG(Amount) AS Average_Value
FROM #SampleOrderTable
WHERE DATEDIFF(DAY, CAST(OrderDate AS DATETIME), CAST(GETDATE() AS Datetime)) <= 4
GROUP BY OrderDate;

How to find all those Sellers from the table who had increase in sales in at least 3 months consecutively in SQL?

How to find all those Sellers from below table who had increase in sales in at least 3 months consecutively?
Record | Seller_id | Months | Sales_amount
0 121 Feb 100
1 121 Jan 87
2 121 Mar 95
3 121 May 105
4 121 Apr 100
5 321 Jan 100
6 321 Feb 87
7 321 Mar 95
8 321 Apr 105
9 321 May 110
10 597 Jan 100
11 597 Feb 105
12 597 Mar 95
13 597 Apr 100
14 597 May 110

This is curious you have no year and months are three letter codes. Do it with lag
and table of months
With tbl as (
select * from (values
-- source data
(0 , 121,'Feb',100)
,(1 , 121,'Jan',87 )
,(2 , 121,'Mar',95 )
,(3 , 121,'May',105)
,(4 , 121,'Apr',100)
,(5 , 321,'Jan',100)
,(6 , 321,'Feb',87 )
,(7 , 321,'Mar',95 )
,(8 , 321,'Apr',105)
,(9 , 321,'May',110)
,(10, 597,'Jan',100)
,(11, 597,'Feb',105)
,(12, 597,'Mar',95 )
,(13, 597,'Apr',100)
,(14, 597,'May',110)
) t(id, Seller_id, Months, Sales_amount)
), months as (
select * from ( values
(1, 'Jan')
,(2, 'Feb')
,(3, 'Mar')
,(4, 'Apr')
,(5, 'May')
-- , etc
) t(id,name)
)
select *
from (
select t.*,
lag(Sales_amount,1) over (partition by Seller_id order by m.id) m1,
lag(Sales_amount,2) over (partition by Seller_id order by m.id) m2
from tbl t
join months m on m.name=t.Months
) t
where Sales_amount > m1 and m1 > m2;

WITH a
AS (SELECT *
FROM
(
VALUES -- source data
(0, 121, 'Feb', 100),
(1, 121, 'Jan', 87),
(2, 121, 'Mar', 95),
(3, 121, 'May', 105),
(4, 121, 'Apr', 100),
(5, 321, 'Jan', 100),
(6, 321, 'Feb', 87),
(7, 321, 'Mar', 95),
(8, 321, 'Apr', 105),
(9, 321, 'May', 110),
(10, 597, 'Jan', 100),
(11, 597, 'Feb', 105),
(12, 597, 'Mar', 95),
(13, 597, 'Apr', 100),
(14, 597, 'May', 110)
) t (id, Seller_id, Months, Sales_amount) ),
b
AS (SELECT *
FROM
(
VALUES
(1, 'Jan'),
(2, 'Feb'),
(3, 'Mar'),
(4, 'Apr'),
(5, 'May') -- , etc
) t (id, name) ),
c
AS (SELECT a.*,
b.id id2,
ROW_NUMBER() OVER (PARTITION BY a.Seller_id ORDER BY b.id ASC) rnk
FROM a
LEFT JOIN b
ON a.Months = b.name),
d
AS (SELECT --c1.*
c1.Seller_id,
c1.Months AS m1,
c2.Months AS m2,
c3.Months AS m3,
c1.Sales_amount AS sa1,
c2.Sales_amount AS sa2,
c3.Sales_amount AS sa3
FROM c c1
LEFT JOIN c c2
ON c1.id2 = c2.id2 - 1
AND c1.Seller_id = c2.Seller_id
LEFT JOIN c c3
ON c2.id2 = c3.id2 - 1
AND c2.Seller_id = c3.Seller_id)
SELECT *,
CASE
WHEN sa1 < sa2
AND sa2 < sa3 THEN
1
ELSE
0
END is_con
FROM d;

SQL Server episode identification

I am working with a blood pressure database in SQL Server which contains patient_id, timestamp (per minute) and systolicBloodPressure.
My goals are to find:
the number of episodes in which a patient is under a certain blood pressure threshold
An episode consists of the timestmap where the patient drops below a certain threshold until the timestamp where the patient comes above the threshold.
the mean blood pressure per episode per patient
the duration of the episode per episode per patient
What I have tried so far:
I am able to identify episodes by just making a new column which sets to 1 if threshold is reached.
select *
, CASE
when sys < threshold THEN '1'
from BPDATA
However , I am not able to 'identify' different episodes within the patient; episode1 episode 2 with their relative timestamps.
Could someone help me with this? Or is there someone with a better different solution?
EDIT: Sample data with example threshold 100
ID Timestamp SysBP below Threshold
----------------------------------------------------
1 9:38 110 Null
1 9:39 105 Null
1 9:40 96 1
1 9:41 92 1
1 9:42 102 Null
2 12:23 95 1
2 12:24 98 1
2 12:25 102 Null
2 12:26 104 Null
2 12:27 94 1
2 12:28 88 1
2 12:29 104 Null

Thanks for the sample data.
This should work:
declare #t table (ID int, Timestamp time, SysBP int, belowThreshold bit)
insert #t
values
(1, '9:38', 110, null),
(1, '9:39', 105, null),
(1, '9:40', 96, 1),
(1, '9:41', 92, 1),
(1, '9:42', 102, null),
(2, '12:23', 95, 1),
(2, '12:24', 98, 1),
(2, '12:25', 102, null),
(2, '12:26', 104, null),
(2, '12:27', 94, 1),
(2, '12:28', 88, 1),
(2, '12:29', 104, null)
declare #treshold int = 100
;with y as (
select *, case when lag(belowThreshold, 1, 0) over(partition by id order by timestamp) = belowThreshold then 0 else 1 end epg
from #t
),
z as (
select *, sum(epg) over(partition by id order by timestamp) episode
from y
where sysbp < #treshold
)
select id, episode, count(episode) over(partition by id) number_of_episodes_per_id, avg(sysbp) avg_sysbp, datediff(minute, min(timestamp), max(timestamp))+1 episode_duration
from z
group by id, episode

This answer relies on LEAD() and LAG() functions so only works on 2012 or later:
Setup:
CREATE TABLE #bloodpressure
(
Patient_id int,
[TimeStamp] SmallDateTime,
SystolicBloodPressure INT
)
INSERT INTO #bloodpressure
VALUES
(1, '2017-01-01 09:01', 60),
(1, '2017-01-01 09:02', 55),
(1, '2017-01-01 09:03', 60),
(1, '2017-01-01 09:04', 70),
(1, '2017-01-01 09:05', 72),
(1, '2017-01-01 09:06', 75),
(1, '2017-01-01 09:07', 60),
(1, '2017-01-01 09:08', 50),
(1, '2017-01-01 09:09', 52),
(1, '2017-01-01 09:10', 53),
(1, '2017-01-01 09:11', 65),
(1, '2017-01-01 09:12', 71),
(1, '2017-01-01 09:13', 73),
(1, '2017-01-01 09:14', 74),
(2, '2017-01-01 09:01', 70),
(2, '2017-01-01 09:02', 75),
(2, '2017-01-01 09:03', 80),
(2, '2017-01-01 09:04', 70),
(2, '2017-01-01 09:05', 72),
(2, '2017-01-01 09:06', 75),
(2, '2017-01-01 09:07', 60),
(2, '2017-01-01 09:08', 50),
(2, '2017-01-01 09:09', 52),
(2, '2017-01-01 09:10', 53),
(2, '2017-01-01 09:11', 65),
(2, '2017-01-01 09:12', 71),
(2, '2017-01-01 09:13', 73),
(2, '2017-01-01 09:14', 74),
(3, '2017-01-01 09:12', 71),
(3, '2017-01-01 09:13', 60),
(3, '2017-01-01 09:14', 74)
Now using Lead And Lag to find the previous rows values, to find whether this is the beginning or end of a sequence of low blood pressures, in combination with a common table expression. Using a UNION of start and end events ensures that an event which covers just one minute is recorded as both a start and an end event.
;WITH CTE
AS
(
SELECT *,
LAG(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As PrevValue,
Lead(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As NextValue
FROM #bloodpressure
),
CTE2
AS
(
-- Get Start Events (EventType 1)
SELECT 1 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (PrevValue IS NULL AND SystolicBloodPressure < 70) OR
(PrevValue >= 70 AND SystolicBloodPressure < 70)
UNION
-- Get End Events (EventType 2)
SELECT 2 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (NextValue IS NULL AND SystolicBloodPressure < 70 ) OR
(NextValue >= 70 AND SystolicBloodPressure < 70)
)
SELECT C1.Patient_id, C1.TimeStamp As EventStart, C2.TimeStamp As EventEnd
FROM CTE2 C1
INNER JOIN CTE2 C2
ON C1.Patient_id = C2.Patient_id AND C1.RN = C2.RN
WHERE C1.EventType = 1 AND C2.EventType = 2
ORDER BY C1.Patient_id, C1.TimeStamp

SQL - Start and End date based on another column

Simplified structure.
I need the two dates between a record that has an action type of 4 and an action type of 1.
The record could be in that state multiple times and I would need separate rows for their times
For example for IncidentId = 1
Row 1 - StartTime = 2017-01-01 14:00 (id:3) - End Time = 2017-01-01 20:00 (id: 5)
Row 2 - StartTime = 2017-01-01 21:00 (id:6) - End Time = 2017-01-02 11:00 (id: 9)
CREATE TABLE #returntable
(
[incidentid] INT,
[starttime] DATETIME,
[endtime] DATETIME
)
CREATE TABLE #testtableofdoom
(
[incidentlogid] INT,
[incidentid] INT,
[timestamp] DATETIME,
[actiontypeid] INT
)
INSERT INTO #testtableofdoom
( incidentlogid, incidentid, timestamp, actiontypeid )
VALUES ( 1, 1, '2017-01-01 09:00', 1 )
, ( 2, 1, '2017-01-01 11:00', 1 )
, ( 3, 1, '2017-01-01 14:00', 4 )
, ( 4, 1, '2017-01-01 16:00', 4 )
, ( 5, 1, '2017-01-01 20:00', 1 )
, ( 6, 1, '2017-01-01 21:00', 4 )
, ( 7, 1, '2017-01-02 09:00', 4 )
, ( 8, 2, '2017-01-02 10:00', 1 )
, ( 9, 1, '2017-01-02 11:00', 1 )
, ( 10, 1, '2017-01-02 14:00', 1 )
, ( 11, 2, '2017-01-02 15:00', 4 )
, ( 12, 1, '2017-01-02 16:00', 1 )
, ( 13, 1, '2017-01-02 17:00', 1 )
, ( 14, 1, '2017-01-02 18:00', 1 )
, ( 15, 2, '2017-01-02 15:00', 1 );
DROP TABLE #testtableofdoom
DROP TABLE #returntable

I used table variables instead of temp tables, and shorter column names than you, but this works:
declare #tt TABLE (
logId INT, iId INT,
dt DATETIME, atId INT
INSERT #tt (logId, iId,
dt, atId) values
(1, 1, '2017-01-01 09:00', 1),
(2, 1, '2017-01-01 11:00', 1),
(3, 1, '2017-01-01 14:00', 4),
(4, 1, '2017-01-01 16:00', 4),
(5, 1, '2017-01-01 20:00', 1),
(6, 1, '2017-01-01 21:00', 4),
(7, 1, '2017-01-02 09:00', 4),
(8, 2, '2017-01-02 10:00', 1),
(9, 1, '2017-01-02 11:00', 1),
(10, 1, '2017-01-02 14:00', 1),
(11, 2, '2017-01-02 15:00', 4),
(12, 1, '2017-01-02 16:00', 1),
(13, 1, '2017-01-02 17:00', 1),
(14, 1, '2017-01-02 18:00', 1),
(15, 2, '2017-01-02 15:00', 1)
Select s.logId startLogid, e.logId endLogId,
s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt
where iId = s.iID
and atId = 1
and logId > s.logId)
where s.aTid = 4
and ((Select atId from #tt
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))
This produces the following:
startLogid endLogId iID startTime endTime
----------- ----------- ---- ---------------- ----------------
3 5 1 2017-01-01 14:00 2017-01-01 20:00
6 9 1 2017-01-01 21:00 2017-01-02 11:00
11 15 2 2017-01-02 15:00 2017-01-02 15:00
it uses a self-join. s represents the first (start) record with actionType 4, and e represents end record with action type 1. Since logId increments, the end record must have higher logId than the start record, and it must be the lowest logId higher than the start records that has same iId and an atId = 1.
Select s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt -- lowest log greater than start logId
where iId = s.iID -- same iId
and atId = 1 -- with atId = 1
and logId > s.logId) -- greater than start logId
finally, the start record must be restricted to those "4" records which either have no other same incident records before it or have a "1" record immediately prior to it.
where s.aTid = 4
and ((Select atId from #tt -- atId of immed prior = 1
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists -- or there is no prior record
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))

something like this?
select
d.[timestamp] as StartDate,
(select top 1 [timestamp]
from #testTableOfDoom d2
where d2.incidentid = 1 and d2.[timestamp] > d.[timestamp] and actiontypeid = 1
order by d2.[timestamp] asc
) as EndDate
from
(select
p.[timestamp],
LAG(p.actiontypeid) OVER (ORDER BY incidentlogid asc) PrevValue,
p.actiontypeid
from #testTableOfDoom p
where p.incidentid = 1) d
where d.actiontypeid = 4
and d.PrevValue <> 4

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Select rows until running sum reaches specific value - sql

Related

Cumulative sum reset by change of year (SQL Server)

SQL Server Query for average value over a date period

How to find all those Sellers from the table who had increase in sales in at least 3 months consecutively in SQL?

SQL Server episode identification

SQL - Start and End date based on another column

Categories

Resources