Is the following actually possible in SQL?
I have some time-series data and I want to extract some entry and exit points based on prices.
Desired output:
Example Data:
SQL Data:
CREATE TABLE Control
([PKey] int, [TimeStamp] datetime, [Name] varchar(10), [Price1] float, [Price2] float);
INSERT INTO Control ([PKey], [TimeStamp], [Name], [Price1], [Price2])
VALUES
(1,'2018-10-01 09:00:00', 'Name1',120, 125),
(2,'2018-10-01 09:10:00', 'Name1',110, 115),
(3,'2018-10-01 09:20:00', 'Name1',101, 106),
(4,'2018-10-01 09:30:00', 'Name1',105, 110),
(5,'2018-10-01 09:40:00', 'Name1',106, 111),
(6,'2018-10-01 09:50:00', 'Name1',108, 113),
(7,'2018-10-01 10:00:00', 'Name1',110, 115),
(8,'2018-10-01 10:10:00', 'Name1',104, 109),
(9,'2018-10-01 10:20:00', 'Name1',101, 106),
(10,'2018-10-01 10:30:00', 'Name1',99, 104),
(11,'2018-10-01 10:40:00', 'Name1',95, 100),
(12,'2018-10-01 10:50:00', 'Name1',101, 106),
(13,'2018-10-01 11:00:00', 'Name1',102, 107),
(14,'2018-10-01 11:10:00', 'Name1',101, 106),
(15,'2018-10-01 11:20:00', 'Name1',99, 104),
(16,'2018-10-01 11:30:00', 'Name1',105, 110),
(17,'2018-10-01 11:40:00', 'Name1',108, 113),
(18,'2018-10-01 11:50:00', 'Name1',108, 113),
(19,'2018-10-01 12:00:00', 'Name1',109, 114),
(20,'2018-10-01 12:10:00', 'Name1',108, 113),
(21,'2018-10-01 12:20:00', 'Name1',105, 110),
(22,'2018-10-01 12:30:00', 'Name1',101, 106),
(23,'2018-10-01 12:40:00', 'Name1',102, 107),
(24,'2018-10-01 09:00:00', 'Name2',103, 108),
(25,'2018-10-01 09:10:00', 'Name2',101, 106),
(26,'2018-10-01 09:20:00', 'Name2',104, 109),
(27,'2018-10-01 09:30:00', 'Name2',106, 111),
(28,'2018-10-01 09:40:00', 'Name2',108, 113),
(29,'2018-10-01 09:50:00', 'Name2',108, 113),
(30,'2018-10-01 10:00:00', 'Name2',105, 110),
(31,'2018-10-01 10:10:00', 'Name2',103, 108),
(32,'2018-10-01 10:20:00', 'Name2',101, 106),
(33,'2018-10-01 10:30:00', 'Name2',99, 104),
(34,'2018-10-01 10:40:00', 'Name2',101, 106),
(35,'2018-10-01 10:50:00', 'Name2',104, 109),
(36,'2018-10-01 11:00:00', 'Name2',101, 106),
(37,'2018-10-01 11:10:00', 'Name2',99, 104),
(38,'2018-10-01 11:20:00', 'Name2',106, 111),
(39,'2018-10-01 11:30:00', 'Name2',103, 108),
(40,'2018-10-01 11:40:00', 'Name2',105, 110),
(41,'2018-10-01 11:50:00', 'Name2',108, 113),
(42,'2018-10-01 12:00:00', 'Name2',105, 110),
(43,'2018-10-01 12:10:00', 'Name2',104, 109),
(44,'2018-10-01 12:20:00', 'Name2',108, 113),
(45,'2018-10-01 12:30:00', 'Name2',110, 115),
(46,'2018-10-01 12:40:00', 'Name2',105, 110)
;
What have I tried:
I am able to get the first instance of an entry and exit point using the following query which finds the first entry point PKey and then finds the first exit point after the entry point PKey
declare #EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare #ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)
select
t1.[Name]
,t2.[Entry PKey]
,min(case when t1.[Price2] >= #ExitPrice2 and t1.[PKey] > t2.[Entry PKey] then t1.[PKey] else null end) as [Exit PKey]
from [dbo].[Control] t1
left outer join
(select min(case when [Price1] <= #EntryPrice1 then [PKey] else null end) as [Entry PKey]
,[Name]
from [dbo].[Control]
group by [Name]) t2
on t1.[Name] = t2.[Name]
group by t1.[Name],t2.[Entry PKey]
--Name Entry PKey Exit PKey
--Name1 3 6
--Name2 25 28
I'm stuck on the approach to use that will allow multiple entry/exit points to be returned and not sure if it's even possible in SQL.
The logic for entry an exit points are
Entry - when price1 <= 101.0 and not already in an entry that has not exited.
Exit - when price2 >= 113.0 and inside an entry.
It's a kind of gaps and islands problem, this is a generic solution using Windowed Aggregates (should work for most DBMSes):
declare #EntryPrice1 float = 101.0; -- Entry when Price1 <= 101.0 (when not already Entered)
declare #ExitPrice2 float = 113.0; -- Exit when Price2 >= 113.0 (after Entry only)
WITH cte AS
( -- apply your logic to mark potential entry and exit rows
SELECT *
,CASE WHEN Price1 <= #EntryPrice1 THEN Timestamp END AS possibleEntry
,CASE WHEN Price2 >= #ExitPrice2 THEN Timestamp END AS possibleExit
,Max(CASE WHEN Price1 <= #EntryPrice1 THEN Timestamp END) -- most recent possibleEntry
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS Unbounded Preceding) AS lastEntry
,Max(CASE WHEN Price2 >= #ExitPrice2 THEN Timestamp END) -- most recent possibleExit
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND 1 Preceding) AS lastExit
FROM [dbo].[Control]
)
-- SELECT * FROM cte ORDER BY Name, PKey
,groupRows AS
( -- mark rows from the 1st entry to the exit row
SELECT *
-- if lastEntry <= lastExit we're after an exit and before an entry -> don't return this row
,CASE WHEN lastEntry <= lastExit THEN 0 ELSE 1 END AS returnFlag
-- assign the same group number to consecutive rows in group
,Sum(CASE WHEN lastEntry <= lastExit THEN 1 ELSE 0 END)
Over (PARTITION BY Name
ORDER BY Timestamp
ROWS Unbounded Preceding) AS grp
FROM cte
WHERE (possibleEntry IS NOT NULL OR possibleExit IS NOT NULL)
AND lastEntry IS NOT NULL
)
-- SELECT * FROM groupRows ORDER BY Name, PKey
,rowNum AS
( -- get the data from the first and last row of an entry/exit group
SELECT *
-- to get the values of the 1st row in a group
,Row_Number() Over (PARTITION BY Name, grp ORDER BY Timestamp) AS rn
-- to get the values of the last row in a group
,Last_Value(Price2)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPrice
,Last_Value(possibleExit)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitTimestamp
,Last_Value(CASE WHEN possibleExit IS NOT NULL THEN PKey END)
Over (PARTITION BY Name, grp
ORDER BY Timestamp
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS ExitPKey
FROM groupRows
WHERE returnFlag = 1
)
SELECT Name
,Price1 AS EntryPrice
,ExitPrice
,Timestamp AS EntryTimestamp
,ExitTimestamp
,PKey AS EntryPKey
,ExitPKey
FROM rowNum
WHERE rn = 1 -- return 1st row of each group
ORDER BY Name, Timestamp
See dbfiddle
Of course it might be possible to simplify the logic or apply some proprietary SQL Server syntax...
This is a weird form of gaps-and-islands. Start with the very basic definitions of entry and exit:
select c.*,
(case when [Price1] <= #EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price2] >= #ExitPrice2 then 1 else 0 end) as is_exit
from control c;
This doesn't quite work because two adjacent "entries" count only as a single entry. We can get the information we need by looking at the previous entry/exit time. With that logic, we can determine which entries are "real". We might as well get the next exit time as well:
with cee as (
select c.*,
(case when [Price1] <= #EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price2] >= #ExitPrice2 then 1 else 0 end) as is_exit
from control c
),
cp as (
select cee.*,
max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
from cee
)
Next, use this logic to generate a cumulative sum of real entries, and then do some fancy filtering:
with cee as (
select c.*,
(case when [Price1] <= #EntryPrice1 then 1 else 0 end) as is_entry,
(case when [Price1] >= #ExitPrice1 then 1 else 0 end) as is_exit
from control c
),
cp as (
select cee.*,
max(case when is_entry = 1 then pkey end) over (partition by name order by timestamp rows between unbounded preceding and 1 preceding) as prev_entry,
max(case when is_exit = 1 then pkey end) over (partition by name order by timestamp) as prev_exit,
min(case when is_exit = 1 then pkey end) over (partition by name order by timestamp desc) as next_exit
from cee
)
select *
from cp
where cp.is_entry = 1 and
(prev_entry is null or prev_exit > prev_entry)
This gives you the rows where the entry starts. You can join in to get the additional information you want.
Related
I have calculated average values for each month. Some months are NULL and my manager wants me to use the previous row's value and following month's value and fill the months which are having NULL values.
Current result (see below pic):
Expected Result
DECLARE #DATE DATE = '2017-01-01';
WITH DATEDIM AS
(
SELECT DISTINCT DTM.FirstDayOfMonth
FROM DATEDIM DTM
WHERE Date >= '01/01/2017'
AND Date <= DATEADD(mm,-1,Getdate())
),
Tab1 AS
(
SELECT
T1.FirstDayOfMonth AS MONTH_START,
AVG1,
ROW_NUMBER() OVER (
ORDER BY DATEADD(MM,DATEDIFF(MM, 0, T1.FirstDayOfMonth),0) DESC
) AS RNK
FROM DATEDIM T1
LEFT OUTER JOIN (
SELECT
DATEADD(MM,DATEDIFF(MM, 0, StartDate),0) MONTH_START,
AVG(CAST(DATEDIFF(dd, StartDate, EndDate) AS FLOAT)) AS AVG1
FROM DATATable
WHERE EndDate >= StartDate
AND StartDate >= #DATE
AND EndDate >= #DATE
GROUP BY DATEADD(MM,DATEDIFF(MM, 0, StartDate),0)
) T2 ON T1.FirstDayOfMonth = T2.MONTH_START
)
SELECT *
FROM Tab1
Using your CTEs
select MONTH_START,
case when AVG1 is null then
(select top(1) t2.AVG1
from Tab1 t2
where t1.RNK > t2.RNK and t2.AVG1 is not null
order by t2.RNK desc)
else AVG1 end AVG1,
RNK
from Tab1 t1
Edit
Version for an average of nearest peceding and nearest following non-nulls. Both must exist otherwise NULL is returned.
select MONTH_START,
case when AVG1 is null then
( (select top(1) t2.AVG1
from Tab1 t2
where t1.RNK > t2.RNK and t2.AVG1 is not null
order by t2.RNK desc)
+(select top(1) t2.AVG1
from Tab1 t2
where t1.RNK < t2.RNK and t2.AVG1 is not null
order by t2.RNK)
) / 2
else AVG1 end AVG1,
RNK
from Tab1 t1
I can't quite tell what you are trying to calculate the average of, but this is quite simple with window functions:
select t.*,
avg(val) over (order by month_start rows between 1 preceding and 1 rollowing)
from t;
In your case, I think this translates as:
select datefromparts(year(startdate), month(startdate), 1) as float,
avg(val) as monthaverage,
avg(avg(val)) over (order by min(startdate) rows between 1 preceding and 1 following)
from datatable d
where . . .
group by datefromparts(year(startdate), month(startdate), 1)
You can manipulate previous and following row values using window functions:
SELECT MAX(row_value) OVER(
ORDER BY ... ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS Previous_Value,
MAX(row_value) OVER(
ORDER BY ... ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING) AS Next_Value
Alternatively you can use LAG/LEAD functions and modify your sub-query where you get the AVG:
SELECT
src.MONTH_START,
CASE
WHEN src.prev_val IS NULL OR src.next_val IS NULL
THEN COALESCE(src.prev_val, src.next_val) -- Return non-NULL value (if exists)
ELSE (src.prev_val + src.next_val ) / 2
END AS AVG_new
FROM (
SELECT
DATEADD(MM,DATEDIFF(MM, 0, StartDate),0) MONTH_START,
LEAD(CAST(DATEDIFF(dd, StartDate, EndDate) AS FLOAT)) OVER(ORDER BY ...) AS prev_val,
LAG(CAST(DATEDIFF(dd, StartDate, EndDate) AS FLOAT)) OVER(ORDER BY ...) AS next_val
-- AVG(CAST(DATEDIFF(dd, StartDate, EndDate) AS FLOAT)) AS AVG1
FROM DATATable
WHERE EndDate >= StartDate
AND StartDate >= #DATE
AND EndDate >= #DATE
GROUP BY DATEADD(MM,DATEDIFF(MM, 0, StartDate),0)
) AS src
I haven't tested it, but give it a shot and see how it works. You may need to put at least one column in the ORDER BY portion of the window function.
You could try this query (I just reflected in my sample data relevant parts, I omitted date column):
declare #tbl table (rank int, value int);
insert into #tbl values
(1, null),
(2, 20),
(3, 30),
(4, null),
(5, null),
(6, null),
(7, 40),
(8, null),
(9, null),
(10, 36),
(11, 22);
;with cte as (
select *,
DENSE_RANK() over (order by case when value is null then rank else value end) drank,
case when value is null then lag(value) over (order by rank) end lag,
case when value is null then lead(value) over (order by rank) end lead
from #tbl
)
select rank, value, case when value is null then
max(lag) over (partition by grp) / 2 +
max(lead) over (partition by grp) / 2
else value end valueWithAvg
from (
select *,
rank - drank grp from cte
) a order by rank
I need to group data together that are related to each other by overlapping timespans based on the records start and end times. SQL-fiddle here: http://sqlfiddle.com/#!18/87e4b/1/0
The current query I have built is giving incorrect results. Callid 3 should give a callCount of 4. It does not because record 6 is not included since it does not overlap with 3, but should be included because it does overlap with one of the other related records. So I believe a recursive CTE may be in need but I am unsure how to write this.
Schema:
CREATE TABLE Calls
([callid] int, [src] varchar(10), [start] datetime, [end] datetime, [conf] varchar(5));
INSERT INTO Calls
([callid],[src],[start],[end],[conf])
VALUES
('1','5555550001','2019-07-09 10:00:00', '2019-07-09 10:10:00', '111'),
('2','5555550002','2019-07-09 10:00:01', '2019-07-09 10:11:00', '111'),
('3','5555550011','2019-07-09 11:00:00', '2019-07-09 11:10:00', '111'),
('4','5555550012','2019-07-09 11:00:01', '2019-07-09 11:11:00', '111'),
('5','5555550013','2019-07-09 11:01:00', '2019-07-09 11:15:00', '111'),
('6','5555550014','2019-07-09 11:12:00', '2019-07-09 11:16:00', '111'),
('7','5555550014','2019-07-09 15:00:00', '2019-07-09 15:01:00', '111');
Current query:
SELECT
detail_record.callid,
detail_record.conf,
MIN(related_record.start) AS sessionStart,
MAX(related_record.[end]) As sessionEnd,
COUNT(related_record.callid) AS callCount
FROM
Calls AS detail_record
INNER JOIN
Calls AS related_record
ON related_record.conf = detail_record.conf
AND ((related_record.start >= detail_record.start
AND related_record.start < detail_record.[end])
OR (related_record.[end] > detail_record.start
AND related_record.[end] <= detail_record.[end])
OR (related_record.start <= detail_record.start
AND related_record.[end] >= detail_record.[end])
)
WHERE
detail_record.start > '1/1/2019'
AND detail_record.conf = '111'
GROUP BY
detail_record.callid,
detail_record.start,
detail_record.conf
HAVING
MIN(related_record.start) >= detail_record.start
ORDER BY sessionStart DESC
Expected Results:
callid conf sessionStart sessionEnd callCount
7 111 2019-07-09T15:00:00Z 2019-07-09T15:01:00Z 1
3 111 2019-07-09T11:00:00Z 2019-07-09T11:15:00Z 4
1 111 2019-07-09T10:00:00Z 2019-07-09T10:11:00Z 2
This is a gaps-and-islands problem. It does not require a recursive CTE. You can use window functions:
select min(callid), conf, grouping, min([start]), max([end]), count(*)
from (select c.*,
sum(case when prev_end < [start] then 1 else 0 end) over (order by start) as grouping
from (select c.*,
max([end]) over (partition by conf order by [start] rows between unbounded preceding and 1 preceding) as prev_end
from calls c
) c
) c
group by conf, grouping;
The innermost subquery calculates the previous end. The middle subquery compares this to the current start, to determine when groups of adjacent rows are the beginning of a new group. A cumulative sum then determines the grouping.
And, the outer query aggregates to summarize information about each group.
Here is a db<>fiddle.
I have the following problem: from the table of pays and dues, I need to find the date of the last overdue. Here is the table and data for example:
create table t (
Id int
, [date] date
, Customer varchar(6)
, Deal varchar(6)
, Currency varchar(3)
, [Sum] int
);
insert into t values
(1, '2017-12-12', '1110', '111111', 'USD', 12000)
, (2, '2017-12-25', '1110', '111111', 'USD', 5000)
, (3, '2017-12-13', '1110', '122222', 'USD', 10000)
, (4, '2018-01-13', '1110', '111111', 'USD', -10100)
, (5, '2017-11-20', '2200', '222221', 'USD', 25000)
, (6, '2017-12-20', '2200', '222221', 'USD', 20000)
, (7, '2017-12-31', '2201', '222221', 'USD', -10000)
, (8, '2017-12-29', '1110', '122222', 'USD', -10000)
, (9, '2017-11-28', '2201', '222221', 'USD', -30000);
If the value of "Sum" is positive - it means overdue has begun; if "Sum" is negative - it means someone paid on this Deal.
In the example above on Deal '122222' overdue starts at 2017-12-13 and ends on 2017-12-29, so it shouldn't be in the result.
And for the Deal '222221' the first overdue of 25000 started at 2017-11-20 was completly paid at 2017-11-28, so the last date of current overdue (we are interested in) is 2017-12-31
I've made this selection to sum up all the payments, and stuck here :(
WITH cte AS (
SELECT *,
SUM([Sum]) OVER(PARTITION BY Deal ORDER BY [Date]) AS Debt_balance
FROM t
)
Apparently i need to find (for each Deal) minimum of Dates if there is no 0 or negative Debt_balance and the next date after the last 0 balance otherwise..
Will be gratefull for any tips and ideas on the subject.
Thanks!
UPDATE
My version of solution:
WITH cte AS (
SELECT ROW_NUMBER() OVER (ORDER BY Deal, [Date]) id,
Deal, [Date], [Sum],
SUM([Sum]) OVER(PARTITION BY Deal ORDER BY [Date]) AS Debt_balance
FROM t
)
SELECT a.Deal,
SUM(a.Sum) AS NET_Debt,
isnull(max(b.date), min(a.date)),
datediff(day, isnull(max(b.date), min(a.date)), getdate())
FROM cte as a
LEFT OUTER JOIN cte AS b
ON a.Deal = b.Deal AND a.Debt_balance <= 0 AND b.Id=a.Id+1
GROUP BY a.Deal
HAVING SUM(a.Sum) > 0
I believe you are trying to use running sum and keep track of when it changes to positive, and it can change to positive multiple times and you want the last date at which it became positive. You need LAG() in addition to running sum:
WITH cte1 AS (
-- running balance column
SELECT *
, SUM([Sum]) OVER (PARTITION BY Deal ORDER BY [Date], Id) AS RunningBalance
FROM t
), cte2 AS (
-- overdue begun column - set whenever running balance changes from l.t.e. zero to g.t. zero
SELECT *
, CASE WHEN LAG(RunningBalance, 1, 0) OVER (PARTITION BY Deal ORDER BY [Date], Id) <= 0 AND RunningBalance > 0 THEN 1 END AS OverdueBegun
FROM cte1
)
-- eliminate groups that are paid i.e. sum = 0
SELECT Deal, MAX(CASE WHEN OverdueBegun = 1 THEN [Date] END) AS RecentOverdueDate
FROM cte2
GROUP BY Deal
HAVING SUM([Sum]) <> 0
Demo on db<>fiddle
You can use window functions. These can calculate intermediate values:
Last day when the sum is negative (i.e. last "good" record).
Last sum
Then you can combine these:
select deal, min(date) as last_overdue_start_date
from (select t.*,
first_value(sum) over (partition by deal order by date desc) as last_sum,
max(case when sum < 0 then date end) over (partition by deal order by date) as max_date_neg
from t
) t
where last_sum > 0 and date > max_date_neg
group by deal;
Actually, the value on the last date is not necessary. So this simplifies to:
select deal, min(date) as last_overdue_start_date
from (select t.*,
max(case when sum < 0 then date end) over (partition by deal order by date) as max_date_neg
from t
) t
where date > max_date_neg
group by deal;
How do I get the following result highlighted in yellow?
Essentially I want a calculated field which increments by 1 when VeganOption = 1 and is zero when VeganOption = 0
I have tried using the following query but using partition continues to increment after a zero. I'm a bit stuck on this one.
SELECT [UniqueId]
,[Meal]
,[VDate]
,[VeganOption]
, row_number() over (partition by [VeganOption] order by [UniqueId])
FROM [Control]
order by [UniqueId]
Table Data:
CREATE TABLE Control
([UniqueId] int, [Meal] varchar(10), [VDate] datetime, [VeganOption] int);
INSERT INTO Control ([UniqueId], [Meal], [VDate], [VeganOption])
VALUES
('1', 'Breakfast',' 2018-08-01 00:00:00', 1),
('2', 'Lunch',' 2018-08-01 00:00:00', 1),
('3', 'Dinner',' 2018-08-01 00:00:00', 1),
('4', 'Breakfast',' 2018-08-02 00:00:00', 1),
('5', 'Lunch',' 2018-08-02 00:00:00', 0),
('6', 'Dinner',' 2018-08-02 00:00:00', 0),
('7', 'Breakfast',' 2018-08-03 00:00:00', 1),
('8', 'Lunch',' 2018-08-03 00:00:00', 1),
('9', 'Dinner',' 2018-08-03 00:00:00', 1),
('10', 'Breakfast',' 2018-08-04 00:00:00', 0),
('11', 'Lunch',' 2018-08-04 00:00:00', 1),
('12', 'Dinner',' 2018-08-04 00:00:00', 1)
;
This is for SQL Server 2016+
You could create subgroups using SUM and then ROW_NUMBER:
WITH cte AS (
SELECT [UniqueId]
,[Meal]
,[VDate]
,[VeganOption]
,sum(CASE WHEN VeganOption = 1 THEN 0 ELSE 1 END)
over (order by [UniqueId]) AS grp --switching 0 <-> 1
FROM [Control]
)
SELECT *,CASE WHEN VeganOption =0 THEN 0
ELSE ROW_NUMBER() OVER(PARTITION BY veganOption, grp ORDER BY [UniqueId])
END AS VeganStreak -- main group and calculated subgroup
FROM cte
order by [UniqueId];
Rextester Demo
This is a variant on gaps-and-islands.
I like to define streaks using the difference of row numbers. This looks like
select c.*,
(case when veganoption = 1
then row_number() over (partition by veganoption, seqnum - seqnum_v order by uniqueid)
else 0
end) as veganstreak
from (select c.*,
row_number() over (partition by veganoption order by uniqueid) as seqnum_v,
row_number() over (order by uniqueid) as seqnum
from c
) c;
Why this works is a bit hard to explain. But, if you look at the results of the subquery, you'll see how the difference of row numbers defines the streaks you want to identify. The rest is just applying row_number() to enumerate the meals.
Here is a Rextester.
One method is to use a CTE to define your groupings, and then do a further ROW_NUMBER() on those, resulting in:
WITH Grps AS(
SELECT *,
ROW_NUMBER() OVER (ORDER BY UniqueID ASC) -
ROW_NUMBER() OVER (PARTITION BY VeganOption ORDER BY UniqueID ASC) AS Grp
FROM Control)
SELECT *,
CASE VeganOption WHEN 0 THEN 0 ELSE ROW_NUMBER() OVER (PARTITION BY Grp ORDER BY UniqueID ASC) END
FROM Grps
ORDER BY UniqueId;
I have attendance data list which is showing below. Now I am trying to find data by a specific date range (01/05/2016 ā 07/05/2016) with total Present Column, Total Present Column will be calculated from previous present data (P). Suppose today is 04/05/2016. If a person has 01,02,03,04 status āpā then it will show date 04-05-2016 total present 4.
Could you help me to find total present from this result set.
You can check this example, which have logic to calculate previous sum value.
declare #t table (employeeid int, datecol date, status varchar(2) )
insert into #t values (10001, '01-05-2016', 'P'),
(10001, '02-05-2016', 'P'),
(10001, '03-05-2016', 'P'),
(10001, '04-05-2016', 'P'),
(10001, '05-05-2016', 'A'),
(10001, '06-05-2016', 'P'),
(10001, '07-05-2016', 'P'),
(10001, '08-05-2016', 'L'),
(10002, '07-05-2016', 'P'),
(10002, '08-05-2016', 'L')
--select * from #t
select * ,
SUM(case when status = 'P' then 1 else 0 end) OVER (PARTITION BY employeeid ORDER BY employeeid, datecol
ROWS BETWEEN UNBOUNDED PRECEDING
AND current row)
from
#t
Another twist of the same thing via cte (as you written SQLSERVER2012, this below solution only work in Sqlserver 2012 and above)
;with cte as
(
select employeeid , datecol , ROW_NUMBER() over(partition by employeeid order by employeeid, datecol) rowno
from
#t where status = 'P'
)
select t.*, cte.rowno ,
case when ( isnull(cte.rowno, 0) = 0)
then LAG(cte.rowno) OVER (ORDER BY t.employeeid, t.datecol)
else cte.rowno
end LagValue
from #t t left join cte on t.employeeid = cte.employeeid and t.datecol = cte.datecol
order by t.employeeid, t.datecol
You could use a subquery to calculate TotalPresent for each row:
SELECT
main.EmployeeID,
main.[Date],
main.[Status],
(
SELECT SUM(CASE WHEN t.[Status] = 'P' THEN 1 ELSE 0 END)
FROM [TableName] t
WHERE t.EmployeeID = main.EmployeeID AND t.[Date] <= main.[Date]
) as TotalPresent
FROM [TableName] main
ORDER BY
main.EmployeeID,
main.[Date]
Here I used subquery to count the sum of records that have the same EmployeeID and date is less or equal to the date of current row. If status of the record is 'P', then 1 is added to the sum, otherwise 0, which counts only records that have status P.
Interesting question, this should work:
select *
, (select count(retail) from p g
where g.date <= p.date and g.id = p.id and retail = 'P')
from p
order by ID, Date;
So I believe I understand correctly. You would like to count the occurences of P per ID datewise.
This makes a lot of sense. That is why the first occurrence of ID2 was L and the Total is 0. This query will count P status for each occurrence, pause at non-P for each ID.
Here is an example