SQL: Repeat patterns between date range - sql

DECLARE
#startDate date = '2020-07-03'
#endDate date = 2020-07-06'
I have a tabe as below
---------------------------------------------------------
|EmployeeID | EmpName |Pattern | Frequency |
---------------------------------------------------------
| 11 | X | 1,2,3 | 1 |
| 12 | Y | 4,5 | 1 |
| 13 | Y | 1,2 | 3 |
| 14 | Z | 1,2 | 2 |
---------------------------------------------------------
AND I want to generate dates between given date range.
WANT result table as bellows:
--------------------------------
| EmpId | Dates | Pattern |
--------------------------------
| 11 |2020-07-03 | 1 |
| 11 |2020-07-04 | 2 |
| 11 |2020-07-05 | 3 |
| 11 |2020-07-06 | 1 |
| 12 |2020-07-03 | 4 |
| 12 |2020-07-04 | 5 |
| 12 |2020-07-05 | 4 |
| 12 |2020-07-06 | 5 |
| 13 |2020-07-03 | 1 |
| 13 |2020-07-04 | 1 |
| 13 |2020-07-05 | 1 |
| 13 |2020-07-06 | 2 |
| 14 |2020-07-03 | 1 |
| 14 |2020-07-04 | 1 |
| 14 |2020-07-05 | 2 |
| 14 |2020-07-06 | 2 |
Generate the dates as per given date range for each employee and repeat the pattern for each employee as per their pattern and frequency(days).
means as per frequency(days) pattern will change.
What I have acheived :
Able to generate the records for each employees between the given date range.
What I am not able to get:
I am not able to repeat the pattern based on the frequency for each employee between the date range.
I am able achieve everything but need little help while repeating the pattern based on frequency.*
Note:
Data are storing in this way only.. now I won't change existing schema...

I've came up with this. It's basically a splitter, a tally table and some logic.
Joining (Frequency)-Amount of Tally-datasets with the splitted pattern for the correct amount of pattern-values. Sorting them by their position in the pattern-string.
Join everything together and repeat the pattern by using modulo.
DECLARE #t TABLE( EmployeeID INT
, EmpName VARCHAR(20)
, Pattern VARCHAR(255)
, Frequency INT )
DECLARE #startDate DATE = '2020-07-03'
DECLARE #endDate DATE = '2020-07-09'
INSERT INTO #t
VALUES (11, 'X', '1,2,3', 1),
(12, 'Y', '4,5', 1),
(13, 'Y', '1,2', 3),
(14, 'Z', '1,2', 2)
DECLARE #delimiter CHAR(1) = ',';
WITH split(Txt
, i
, elem
, EmployeeID)
AS (SELECT STUFF(Pattern, 1, CHARINDEX(#delimiter, Pattern+#delimiter+'~'), '')
, 1
, CAST(LEFT(Pattern, CHARINDEX(#delimiter, Pattern+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM #t
UNION ALL
SELECT STUFF(Txt, 1, CHARINDEX(#delimiter, Txt+#delimiter+'~'), '')
, i + 1
, CAST(LEFT(Txt, CHARINDEX(#delimiter, Txt+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM split
WHERE Txt > ''),
E1(N) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 AS a, E1 AS b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 AS a, E2 AS b), --10E+4 or 10,000 rows
E8(N) AS (SELECT 1 FROM E4 AS a , E4 AS b), --10E+8 or 100,000,000 rows
PatternXFrequency(EmployeeID
, Sort
, elem)
AS (SELECT split.EmployeeID
, ROW_NUMBER() OVER(PARTITION BY split.EmployeeID ORDER BY i) - 1
, elem
FROM split
INNER JOIN #t AS t ON t.EmployeeID = split.EmployeeID
CROSS APPLY (SELECT TOP (t.Frequency) 1
FROM E8
) AS Freq(Dummy))
SELECT EmployeeID
, DATEADD(DAY, i_count, #startDate) AS Dates
, elem
FROM (SELECT DATEDIFF(DAY, #startDate, #endDate) + 1) AS t_datediff(t_days)
CROSS APPLY (SELECT TOP (t_days) ROW_NUMBER() OVER(ORDER BY (SELECT 0) ) - 1 FROM E8
) AS t_dateadd(i_count)
CROSS APPLY (SELECT PatternXFrequency.*
FROM (SELECT DISTINCT EmployeeID FROM #t) AS t(EmpID)
CROSS APPLY (SELECT COUNT(Sort)
FROM PatternXFrequency
WHERE EmployeeID = EmpID
) AS EmpPattern(sortCount)
CROSS APPLY (SELECT *
FROM PatternXFrequency
WHERE EmployeeID = EmpID
AND Sort = ((i_count % sortCount))
) AS PatternXFrequency
) AS t
ORDER BY t.EmployeeID
, Dates

This isn't particularly pretty, but it avoids the recursion of a rCTE, so should provide a faster experience. As STRING_SPLIT still doesn't know what ordinal position means, we have to use something else here; I use DelimitedSplit8k_LEAD.
I also assume your expected results are wrong, as they stop short of your end date (20200709). This results in the below:
CREATE TABLE dbo.YourTable (EmployeeID int,
EmpName char(1),
Pattern varchar(8000), --This NEEDS fixing
Frequency tinyint);
INSERT INTO dbo.YourTable
VALUES(11,'X','1,2,3',1),
(12,'Y','4,5',1),
(13,'Y','1,2',3),
(14,'Z','1,2',2);
GO
DECLARE #StartDate date = '20200703',
#EndDate date = '20200709';
WITH CTE AS(
SELECT *,
MAX(ItemNumber) OVER (PARTITION BY EmployeeID) AS MaxItemNumber
FROM dbo.YourTable YT
CROSS APPLY dbo.DelimitedSplit8K_LEAD(YT.Pattern,',') DS),
N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP (SELECT DATEDIFF(DAY,#startDate, #EndDate)+1)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS I
FROM N N1, N N2, N N3) --1000 Rows
SELECT C.EmployeeID,
DATEADD(DAY,T.I, #StartDate),
C.Item
FROM CTE C
JOIN Tally T ON ISNULL(NULLIF((T.I +1) % C.MaxItemNumber,0),C.MaxItemNumber) = C.ItemNumber
ORDER BY EmployeeID,
T.I;
GO
DROP TABLE dbo.YourTable;

Like mentioned in the comments fix your data model.
Your output pattern is a little bit strange.
But is it something like this you are looking for?
DECLARE #startDate date = '2020-07-03'
DECLARE #endDate date = '2020-07-09'
DECLARE #Dates TABLE([Date] Date)
;WITH seq(n) AS
(
SELECT 0 UNION ALL SELECT n + 1 FROM seq
WHERE n < DATEDIFF(DAY, #StartDate, #endDate)
)
INSERT INTO #Dates ([Date])
SELECT DATEADD(Day,n, cast(GetDate() as date)) Date
FROM seq
ORDER BY n
OPTION (MAXRECURSION 0);
SELECT e.EmployeeId, d.Date, x.Value Pattern
FROM Employee e
CROSS APPLY STRING_SPLIT(e.Pattern, ',') x
INNER JOIN #Dates d on 1=1
-- Correct for the first iteration of the pattern
AND DATEDIFF(DAY, DATEADD(DAY, -1, #StartDate), d.Date) = x.Value

Related

SQL query to return aggregations in a constant format, even when the categories are missing

Is it possible with SQL Server to return table with a constant format?
Let's say we have the following raw data:
DATE | CATEGORY | VALUE
---------------------------------
01.01.2022 | Category 1 | 10
01.01.2022 | Category 1 | 20
01.01.2022 | Category 1 | 33
01.01.2022 | Category 3 | 15
03.01.2022 | Category 1 | 10
03.01.2022 | Category 2 | 20
03.01.2022 | Category 3 | 50
(...)
And the desired output would be:
DATE | CATEGORY | VALUE
---------------------------------
01.01.2022 | Category 1 | 63
01.01.2022 | Category 2 | 0
01.01.2022 | Category 3 | 15
02.01.2022 | Category 1 | 0
02.01.2022 | Category 2 | 0
02.01.2022 | Category 3 | 0
03.01.2022 | Category 1 | 10
03.01.2022 | Category 2 | 20
03.01.2022 | Category 3 | 50
(...)
Please notice that in the desired outcome there's a date present that's missing in the raw data, as well as sum of VALUE are 0 when the category is not present for a given date in the raw data.
-- Contiguous dates table
DECLARE #dates TABLE(dt date) ;
DECLARE #dateFrom date;
DECLARE #dateTo date;
select #dateFrom = (Select DateAdd(day, -1, Min(date)) from Agg);
select #dateTo = (Select Max(date) from Agg);
-- Query:
WHILE(#dateFrom < #dateTo)
BEGIN
SELECT #dateFrom = DATEADD(day, 1,#dateFrom)
INSERT INTO #dates
SELECT #dateFrom
END
-- Category table
DECLARE #categories TABLE(category nvarchar(20)) ;
insert into #categories values ('Category 1'),('Category 2'),('Category 3');
-- This cte helps in creating the constant output required
with cte1 as (
select dt, category from
#dates cross join #categories
)
select cte1.dt as [Date], cte1.category, Sum(coalesce(yourTableName.value,0)) as Value
from cte1 left join yourTableName
on cte1.dt = yourTableName.[Date] and cte1.category = yourTableName.category
group by cte1.dt, cte1.category
order by cte1.dt, cte1.category
WITH ctedate AS
(
SELECT d= v2.d * 10 + v1.d
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) v1(d)
CROSS JOIN (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) v2(d)
)
Select b.date, a.category, sum(coalesce(a.value,0)) from
yourtablename a
cross join
(SELECT DATEADD(DAY, ctedate.d, '2022-01-01') date
FROM ctedate
ORDER BY ctedate.d) b
group by b.date, a.category
Here is a script with solution using cte:
Calculate the min date and the max date
then make a calendar (cte1) with all dates between (min date) and (max date) using recursivity
get the list of all category => cte2
make a cross join between cte1 and cte2 ==> cte3
make a left join between cte3 and the table data (#mytable), replace Null values by 0
declare #mytable as table (date date,category varchar(50),Value int)
insert into #mytable values
('01/01/2022','Category 1',10),
('01/01/2022','Category 1',20),
('01/01/2022','Category 1',33),
('01/01/2022','Category 3',15),
('01/03/2022','Category 1',10),
('01/03/2022','Category 2',20),
('01/03/2022','Category 3',50);
declare #mindate as date, #maxdate as date
select #mindate=min(date),#maxdate=max(date) from #mytable;
with
cte1 as (select #mindate mydate union all select dateadd(day,1,mydate) from cte1 where dateadd(day,1,mydate) <= #maxdate),
cte2 as (select distinct(category) from #mytable),
cte3 as (select mydate,category from cte1 cross join cte2),
cte4 as (select mydate date ,cte3.category,isnull(value,0) value from cte3 left outer join #mytable t on cte3.mydate=t.date and cte3.category=t.category )
select * from cte4

How to repeat values in a table in SQL Server?

I have a table in Microsoft SQL Server that logged some values on data change triggers. Now, in order to display some graphs, I would like to get (or repeat) a value per 10 minutes from each column(for example).
I would try to avoid, if possible, an INSERT command modifying the table itself.
Original table:
Time Stamp---- | A | B | C |
---------------+---+---+---+
01-01-19 10:20 | 1 | 0 | 0 |
01-01-19 15:30 | 0 | 0 | 1 |
01-01-19 22:50 | 0 | 1 | 0 |
02-01-19 01:40 | 1 | 0 | 0 |
...
Result I would like to achieve:
Time Stamp---- | A | B | C |
---------------+---+---+---+
01-01-19 10:20 | 1 | 0 | 0 |
01-01-19 10:30 | 1 | 0 | 0 |
01-01-19 10:40 | 1 | 0 | 0 |
01-01-19 10:50 | 1 | 0 | 0 |
...
01-01-19 15:30 | 0 | 0 | 1 |
01-01-19 15:40 | 0 | 0 | 1 |
01-01-19 15:50 | 0 | 0 | 1 |
01-01-19 16:00 | 0 | 0 | 1 |
...
Assuming your dates are mm-dd-yy and times are hh:mm...
create table #Original (
[Time Stamp----] datetime2,
A int,
B int,
C int
)
insert #Original
values ({ts '2019-01-01 10:20:00.000'}, 1, 0, 0)
, ({ts '2019-01-01 15:30:00.000'}, 0, 0, 1)
, ({ts '2019-01-01 22:50:00.000'}, 0, 1, 0)
, ({ts '2019-01-02 01:40:00.000'}, 1, 0, 0)
;
with
boundaries as (
select min(o.[Time Stamp----]) as s
, dateadd(minute, 10, max(o.[Time Stamp----])) as e
from #Original o
),
timeslist as (
select 1 as i
, (select s from boundaries) as s
, (select s from boundaries) as d
union all
select t.i + 1
, t.s
, dateadd(minute, 10, d)
from timeslist t
where d < (select e from boundaries)
),
result as (
select
right('0' + cast(MONTH(t.d) as varchar(2)), 2) + '-' +
right('0' + cast(DAY(t.d) as varchar(2)), 2) + '-' +
right('0' + cast(year(t.d) % 100 as varchar(2)), 2) + ' ' +
right('0' + cast(datepart(hour, t.d) as varchar(2)), 2) + ':' +
right('0' + cast(datepart(minute, t.d) as varchar(2)), 2) as 'Time Stamp----'
, o2.A
, o2.B
, o2.C
from timeslist t
inner join (
select o.[Time Stamp----]
, o.A
, o.B
, o.C
, lead (o.[Time Stamp----], 1, dateadd(minute, 10, o.[Time Stamp----])) over (order by o.[Time Stamp----]) as OldTs
from #Original o
) o2 on o2.[Time Stamp----] <= t.d and o2.OldTs > t.d
)
select *
from result
order by [Time Stamp----]
drop table #Original
To select records with manufactured duplicates, try
SELECT Dateadd(mi, DQ.T,TimeStamp) as 'TimeStamp', A, B, C From YourTable
CROSS JOIN (Select 0 T UNION ALL
Select 10 T UNION ALL
Select 20 T UNION ALL
Select 30 T) DQ
or to insert duplicates, try
INSERT YourTable
SELECT Dateadd(mi, DQ.T,TimeStamp) as 'TimeStamp', A, B, C From YourTable
CROSS JOIN (
Select 10 T UNION ALL
Select 20 T UNION ALL
Select 30 T) DQ
Personally I recommend maling a "Time Table", but i do this on the fly here using a Tally. Anyway, I think this is what you're after?
USE Sandbox;
GO
CREATE TABLE dbo.YourTable ([timestamp] datetime2(0), --This is a bad name for a column, as timestamp means soemthing else in SQL Server
A bit,
B bit,
C bit);
INSERT INTO dbo.YourTable ([timestamp],
A,
B,
C)
VALUES ('2019-01-01T10:20:00',1,0,0),
('2019-01-01T15:30:00',0,0,1),
('2019-01-01T22:50:00',0,1,0),
('2019-01-02T01:40:00',1,0,0);
GO
WITH N AS
(SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP(144) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -1 AS I
FROM N N1, N N2, N N3),
Times AS(
SELECT DATEADD(MINUTE,T.I * 10,CONVERT(time(0),'00:00:00')) AS TimeSlot
FROM Tally T),
DateTimes AS(
SELECT DISTINCT
CONVERT(datetime,CONVERT(date,YT.[timestamp])) + CONVERT(datetime,T.TimeSlot) AS DateTimeSlot
FROM dbo.YourTable YT
CROSS JOIN Times T),
Groups AS(
SELECT DT.DateTimeSlot,
CONVERT(tinyint,YT.A) AS A, --Can't aggregate Bits
CONVERT(tinyint,YT.B) AS B,
CONVERT(tinyint,YT.C) AS C,
COUNT(YT.A) OVER (ORDER BY DT.DateTimeSlot ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Grp
FROM DateTimes DT
LEFT JOIN dbo.YourTable YT ON DT.DateTimeSlot = YT.[timestamp])
SELECT G.DateTimeSlot,
MAX(G.A) OVER (PARTITION BY G.Grp) AS A,
MAX(G.B) OVER (PARTITION BY G.Grp) AS B,
MAX(G.C) OVER (PARTITION BY G.Grp) AS C
FROM Groups G
ORDER BY G.DateTimeSlot;
GO
DROP TABLE dbo.YourTable;
You can use SQL RECURSION and CROSS JOIN
SQL FIDDLE
Demo
declare #mytable as table(timestamp datetime,A int,B int,C int)
insert into #mytable values
('01-01-19 10:20',1,0,0),('01-01-19 15:30',0,0,1),
('01-01-19 22:50',0,1,0),('01-01-19 01:40',1,0,0)
;with cte as(
select 0 n
union all
select n+10 from cte where n+10 <40)
select dateadd(mi,n,timestamp)[TIMESTAMP],t1.A,t1.B,T1.C
from #mytable t1 cross join cte
order by timestamp

SQL # of days between different date ranges

How can I count the number of days between a start and end date in SQL?
ID | START | END
1 |2018-1-1 |2018-1-3
2 |2018-1-1 |2018-1-4
3 |2018-1-1 |2018-1-5
Ideally would return:
DATE | COUNT
2018-1-1 | 3
2018-1-2 | 3
2018-1-3 | 3
2018-1-4 | 2
2018-1-5 | 1
One option is to generate all dates between min start and max end with a recursive cte and then count them.
with dates(dt1,dt2) as (select min([start]),max([end])
from tbl
union all
select dateadd(day,1,dt1),dt2
from dates
where dt1 < dt2
)
select d.dt1,count(*)
from dates d
join tbl t on d.dt1 between t.[start] and t.[end]
group by d.dt1
Similar approach worked out with a complete example:
DECLARE #range TABLE
(
id INT NOT NULL IDENTITY(1,1),
s_date DATETIME NOT NULL,
e_date DATETIME NOT NULL
);
INSERT INTO #range
(s_date, e_date)
VALUES
('2018-1-1','2018-1-3'),
('2018-1-1','2018-1-4'),
('2018-1-1','2018-1-5');
DECLARE #date TABLE
(
date DATETIME NOT NULL
);
INSERT INTO #date
(date)
VALUES
('2018-1-1'), ('2018-1-1'), ('2018-1-1'),
('2018-1-2'), ('2018-1-2'), ('2018-1-2'),
('2018-1-3'), ('2018-1-3'), ('2018-1-3'),
('2018-1-4'), ('2018-1-4'),
('2018-1-5');
SELECT d.date, COUNT(DISTINCT r.id)
FROM #range r
JOIN #date d ON d.date BETWEEN r.s_date AND r.e_date
GROUP BY d.date
I post a solution to fill gaps with a simple generator:
Check it at SQL Fiddle
MS SQL Server 2017 Schema Setup:
create table d
( ID int, fSTART date, fEND date );
insert into d values
(1, '2018-1-1' ,'2018-1-3'),
(2, '2018-1-1' ,'2018-1-4'),
(3, '2018-1-1' ,'2018-1-5');
Query 1:
;WITH Nbrs_3( n ) AS ( SELECT 1 UNION SELECT 0 ),
Nbrs_2( n ) AS ( SELECT 1 FROM Nbrs_3 n1 CROSS JOIN Nbrs_3 n2 ),
Nbrs_1( n ) AS ( SELECT 1 FROM Nbrs_2 n1 CROSS JOIN Nbrs_2 n2 ),
Nbrs_0( n ) AS ( SELECT 1 FROM Nbrs_1 n1 CROSS JOIN Nbrs_1 n2 ),
Nbrs ( n ) AS ( SELECT 1 FROM Nbrs_0 n1 CROSS JOIN Nbrs_0 n2 ),
ns (n) as (SELECT ROW_NUMBER() OVER (ORDER BY n) FROM Nbrs )
select distinct dateadd( day, n-1,fSTART )
from d inner join ns on dateadd( day, n-1, fSTART ) between fStart and fend
order by 1
Results:
| |
|------------|
| 2018-01-01 |
| 2018-01-02 |
| 2018-01-03 |
| 2018-01-04 |
| 2018-01-05 |
I used cross apply to get your results:
I am assuming you want to know how many times that date is between a start and end date of the other table.
Setup:
declare #s table
( ID int, fSTART date, fEND date );
insert into #s values
(1, '2018-1-1' ,'2018-1-3'),
(2, '2018-1-1' ,'2018-1-4'),
(3, '2018-1-1' ,'2018-1-5');
declare #d table
(dte date)
insert into #d
values
('1/1/2018')
,('1/2/2018')
,('1/3/2018')
,('1/4/2018')
,('1/5/2018')
The Query:
select d.dte
,ct = sum(case when d.dte between s.fstart and s.fend then 1 else 0 end)
from #d d
cross apply #s s
group by dte
results:
dte ct
2018-01-01 3
2018-01-02 3
2018-01-03 3
2018-01-04 2
2018-01-05 1

display 3 or more consecutive rows(Sql)

I have a table with below data
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 1 | 2017-01-01 | 10 |
| 2 | 2017-01-02 | 109 |
| 3 | 2017-01-03 | 150 |
| 4 | 2017-01-04 | 99 |
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
now what i am trying to do is to display 3 consecutive rows where people were >=100 like this
+------+------------+-----------+
| id | date1 | people |
+------+------------+-----------+
| 5 | 2017-01-05 | 145 |
| 6 | 2017-01-06 | 1455 |
| 7 | 2017-01-07 | 199 |
| 8 | 2017-01-08 | 188 |
+------+------------+-----------+
can anyone help me how to do this query using oracle database. I am able to display rows which are above 100 but not in a consecutive way
Table creation(reducing typing time for people who will be helping)
CREATE TABLE stadium
( id int
, date1 date, people int
);
Insert into stadium values (
1,TO_DATE('2017-01-01','YYYY-MM-DD'),10);
Insert into stadium values
(2,TO_DATE('2017-01-02','YYYY-MM-DD'),109);
Insert into stadium values(
3,TO_DATE('2017-01-03','YYYY-MM-DD'),150);
Insert into stadium values(
4,TO_DATE('2017-01-04','YYYY-MM-DD'),99);
Insert into stadium values(
5,TO_DATE('2017-01-05','YYYY-MM-DD'),145);
Insert into stadium values(
6,TO_DATE('2017-01-06','YYYY-MM-DD'),1455);
Insert into stadium values
(7,TO_DATE('2017-01-07','YYYY-MM-DD'),199);
Insert into stadium values(
8,TO_DATE('2017-01-08','YYYY-MM-DD'),188);
Thanks in advance for the help
Assuming you mean >= 100, there are a couple of ways. One method just uses lead() and lag(). But a simple method defines each group >= 100 by the number of values < 100 before it. Then it uses count(*) to find the size of the consecutive values:
select s.*
from (select s.*, count(*) over (partition by grp) as num100pl
from (select s.*,
sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium s
) s
) s
where num100pl >= 3;
Here is a SQL Fiddle showing that the syntax works.
You can use the following sql script to get the desired output.
WITH partitioned AS (
SELECT *, id - ROW_NUMBER() OVER (ORDER BY id) AS grp
FROM stadium
WHERE people >= 100
),
counted AS (
SELECT *, COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM partitioned
)
select id , visit_date,people
from counted
where cnt>=3
I'm assuming that both the id and date columns are sequential and correspond to each other (there will need to be additional ROW_NUMBER() if the ids are not sequential with the dates, and more complex logic included if the dates are not necessarily sequential).
SELECT
*
FROM
(
SELECT
*
,COUNT(date) OVER (PARTITION BY sequential_group_num) AS num_days_in_sequence
FROM
(
SELECT
*
,(id - ROW_NUMBER() OVER (ORDER BY date)) AS sequential_group_num
FROM
stadium
WHERE
people >= 100
) AS subquery1
) AS subquery2
WHERE
num_days_in_sequence >= 3
That produces the following output:
id date people sequential_group_num num_days_in_sequence
----------- ---------- ----------- -------------------- --------------------
5 2017-01-05 145 2 4
6 2017-01-06 1455 2 4
7 2017-01-07 199 2 4
8 2017-01-08 188 2 4
By using joins we can display the consecutive rows like this
SELECT id, date1, people FROM stadium a WHERE people >= 100
AND (SELECT people FROM stadium b WHERE b.id = a.id + 1) >= 100
AND (SELECT people FROM stadium c WHERE c.id = a.id + 2) >= 100
OR people >= 100
AND (SELECT people FROM stadium e WHERE e.id = a.id - 1) >= 100
AND (SELECT people FROM stadium f WHERE f.id = a.id + 1) >= 100
OR people >= 100
AND (SELECT people FROM stadium g WHERE g.id = a.id - 1) >= 100
AND (SELECT people FROM stadium h WHERE h.id = a.id - 2) >= 100
order by id;
select distinct
t1.*
from
stadium t1
join
stadium t2
join
stadium t3
where
t1.people >= 100
and t2.people >= 100
and t3.people >= 100
and
(
(t1.id + 1 = t2.id
and t2.id + 1 = t3.id)
or
(
t2.id + 1 = t1.id
and t1.id + 1 = t3.id
)
or
(
t2.id + 1 = t3.id
and t3.id + 1 = t1.id
)
)
order by
id;
SQL script:
SELECT DISTINCT SS.*
FROM STADIUM SS
INNER JOIN
(SELECT S1.ID
FROM STADIUM S1
WHERE 3 = (
SELECT COUNT(1)
FROM STADIUM S2
WHERE (S2.ID=S1.ID OR S2.ID=S1.ID+1 OR S2.ID=S1.ID+2)
AND S2.PEOPLE >= 100
)) AS SS2
ON SS.ID>=SS2.ID AND SS.ID<SS2.ID+3
select *
from(
select * , count(*) over (partition by grp) as total
from
(select * , Sum(case when people < 100 then 1 else 0 end) over (order by date) as grp
from stadium) T -- inner Query 1
where people >=100 )S--inner query 2
where total >=3 --outer query
I wrote the following solution for this similar leetcode problem:
with groupVisitsOver100 as (
select *,
sum(
case
when people < 100 then 1
else 0
end
) over (order by date1) as visitGroups
from stadium
),
filterUnder100 as (
select
*
from groupVisitsOver100
where people >= 100
),
countGroupsSize as (
select
*,
count(*) over (partition by visitGroups) as groupsSize
from filterUnder100
)
select id, date1, people from countGroupsSize where groupsSize >= 3 order by date1

Query to select same event code with at least one hour interval

I have a sample table
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
I would like to have a list of txt code and dt date grouped by txt code where interval beetwen txt occurrence is at least one hour and nothing in-between.
To clarify when t1 first occures at '2017-01-01 00:01:00.000'
then next occurrence I am looking for is after at least one hour
which will be '2017-01-01 01:04:00.000'
third occurrence I am looking for is after at least one hour from '2017-01-01 01:04:00.000' and so on.
After some searching I found something like this
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
but this wasn't good because the interval isn't counted from first occurrence but from last, so I got
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
I think I found a workaround because in this case I could group record within same hour but I am not happy with that solution.
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
Any suggestions to improve the script so it will let the interval be any entered value in minutes would be appreciated.
If I have understood correctly I think the following does what you need.
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
Output
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+
I kind of like a method that is a bubble sort. The problem I have found when doing recursive operations is they work great for small sets(think less than 5 or 10k), then behave horrid when you get larger. For this reason I like a cursor approach were you are essentially saying: "Are you larger than a criteria? Yes, No. Insert or Ignore, Delete, move on." This way you are evaluating over every item once and once only, not every variation of a theme of recursion.
DECLARE #Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE #Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO #Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM #Temp)
BEGIN
DECLARE
#CurId INT
, #CurDt DATETIME
, #Curtxt VARCHAR(8)
, #LastDate DATETIME
;
SELECT TOP 1 #CurId = Id, #CurDt = Dt, #Curtxt = txt FROM #Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM #Holder)
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM #Temp WHERE Id = #CurId) = 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 #LastDate = dt FROM #Holder ORDER BY id desc
IF DATEDIFF(HOUR, #LastDate, #CurDt) >= 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, DATEDIFF(MINUTE, #LastDate, #CurDt))
END
END
--Delete the running values and loop again
DELETE #Temp WHERE Id = #CurId
END
Select *
From #Holder