SQL get consecutive days ignoring weekend - sql

I have a table with following format:
ID ID1 ID2 DATE
1 1 1 2018-03-01
2 1 1 2018-03-02
3 1 1 2018-03-05
4 1 1 2018-03-06
5 1 1 2018-03-07
6 2 2 2018-03-05
7 2 2 2018-03-05
8 2 2 2018-03-06
9 2 2 2018-03-07
10 2 2 2018-03-08
From this table I have to get all records where ID1 and ID2 are the same in that column and where DATE is 5 consecutive work days (5 dates in a row, ignoring missing dates for Saturday/Sunday; ignore holidays).
I have really no idea how to achieve this. I did search around, but couldn't find anything that helped me. So my question is, how can I achieve following output?
ID ID1 ID2 DATE
1 1 1 2018-03-01
2 1 1 2018-03-02
3 1 1 2018-03-05
4 1 1 2018-03-06
5 1 1 2018-03-07
SQLFiddle to mess around

Assuming you have no duplicates and work is only on weekdays, then there is a simplish solution for this particular case. We can identify the date 4 rows ahead. For a complete week, it is either 4 days ahead or 6 days ahead:
select t.*
from (select t.*, lead(dat, 4) over (order by id2, dat) as dat_4
from t
) t
where datediff(day, dat, dat_4) in (4, 6);
This happens to work because you are looking for a complete week.
Here is the SQL Fiddle.

select t.* from
(select id1,id2,count(distinct dat) count from t
group by id1,id2
having count(distinct dat)=5) t1 right join
t
on t.id1=t1.id1 and t.id2=t1.id2
where count=5
Check this-
Dates of Two weeks with 10 valid dates
http://sqlfiddle.com/#!18/76556/1
Dates of Two weeks with 10 non-unique dates
http://sqlfiddle.com/#!18/b4299/1
and
Dates of Two weeks with less than 10 but unique
http://sqlfiddle.com/#!18/f16cb/1

This query is very verbose without LEAD or LAG and it is the best I could do on my lunch break. You can probably improve on it given the time.
DECLARE #T TABLE
(
ID INT,
ID1 INT,
ID2 INT,
TheDate DATETIME
)
INSERT #T SELECT 1,1,1,'03/01/2018'
INSERT #T SELECT 2,1,1,'03/02/2018'
INSERT #T SELECT 3,1,1,'03/05/2018'
INSERT #T SELECT 4,1,1,'03/06/2018'
INSERT #T SELECT 5,1,1,'03/07/2018'
--INSERT #T SELECT 5,1,1,'03/09/2018'
INSERT #T SELECT 6,2,2,'03/02/2018'
INSERT #T SELECT 7,2,2,'03/05/2018'
INSERT #T SELECT 8,2,2,'03/05/2018'
--INSERT #T SELECT 9,2,2,'03/06/2018'
INSERT #T SELECT 10,2,2,'03/07/2018'
INSERT #T SELECT 11,2,2,'03/08/2018'
INSERT #T SELECT 12,2,2,'03/15/2018'
INSERT #T SELECT 13,1,1,'04/01/2018'
INSERT #T SELECT 14,1,1,'04/02/2018'
INSERT #T SELECT 15,1,1,'04/05/2018'
--SELECT * FROM #T
DECLARE #LowDate DATETIME = DATEADD(DAY,-1,(SELECT MIN(TheDate) FROM #T))
DECLARE #HighDate DATETIME = DATEADD(DAY,1,(SELECT MAX(TheDate) FROM #T))
DECLARE #DaysThreshold INT = 5
;
WITH Dates AS
(
SELECT DateValue=#LowDate
UNION ALL
SELECT DateValue + 1 FROM Dates
WHERE DateValue + 1 < #HighDate
),
Joined AS
(
SELECT * FROM Dates LEFT OUTER JOIN #T T ON T.TheDate=Dates.DateValue
),
Calculations AS
(
SELECT
ID=MAX(J1.ID),
J1.ID1,J1.ID2,
J1.TheDate,
LastDate=MAX(J2.TheDate),
LastDateWasWeekend = CASE WHEN ((DATEPART(DW,DATEADD(DAY,-1,J1.TheDate) ) + ##DATEFIRST) % 7) NOT IN (0, 1) THEN 0 ELSE 1 END,
Offset = DATEDIFF(DAY,MAX(J2.TheDate),J1.TheDate)
FROM
Joined J1
LEFT OUTER JOIN Joined J2 ON J2.ID1=J1.ID1 AND J2.ID2=J1.ID2 AND J2.TheDate<J1.TheDate
WHERE
NOT J1.ID IS NULL
GROUP BY J1.ID1,J1.ID2,J1.TheDate
)
,FindValid AS
(
SELECT
ID,ID1,ID2,TheDate,
IsValid=CASE
WHEN LastDate=TheDate THEN 0
WHEN LastDate IS NULL THEN 1
WHEN Offset=1 THEN 1
WHEN Offset>3 THEN 0
WHEN Offset<=3 THEN
LastDateWasWeekend
END
FROM
Calculations
UNION
SELECT DISTINCT ID=NULL,ID1,ID2, TheDate=#HighDate,IsValid=0 FROM #T
),
FindMax As
(
SELECT
This.ID,This.ID1,This.ID2,This.TheDate,MaxRange=MIN(Next.TheDate)
FROM
FindValid This
LEFT OUTER JOIN FindValid Next ON Next.ID2=This.ID2 AND Next.ID1=This.ID1 AND This.TheDate<Next.TheDate AND Next.IsValid=0
GROUP BY
This.ID,This.ID1,This.ID2,This.TheDate
),
FindMin AS
(
SELECT
This.ID,This.ID1,This.ID2,This.TheDate,This.MaxRange,MinRange=MIN(Next.TheDate)
FROM
FindMax This
LEFT OUTER JOIN FindMax Next ON Next.ID2=This.ID2 AND Next.ID1=This.ID1 AND This.TheDate<Next.MaxRange-- AND Next.IsValid=0 OR Next.TheDate IS NULL
GROUP BY
This.ID,This.ID1,This.ID2,This.TheDate,This.MaxRange
)
,Final AS
(
SELECT
ID1,ID2,MinRange,MaxRange,SequentialCount=COUNT(*)
FROM
FindMin
GROUP BY
ID1,ID2,MinRange,MaxRange
)
SELECT
T.ID,
T.ID1,
T.ID2,
T.TheDate
FROM #T T
INNER JOIN Final ON T.TheDate>= Final.MinRange AND T.TheDate < Final.MaxRange AND T.ID1=Final.ID1 AND T.ID2=Final.ID2
WHERE
SequentialCount>=#DaysThreshold
OPTION (MAXRECURSION 0)

Related

Randomly join tables and return columns

I have these 3 tables:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
INSERT INTO DimEmployee(EmployeeID) VALUES (1),(2),(3)
INSERT INTO DimDepartment(DepartmentID) VALUES (1),(5),(6)
INSERT INTO DimPosition(PositionID) VALUES (7),(8),(9)
I want to randomly join the 3 tables and get output like below : (example)
First execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-07-24 00:00:00.000
2 5 9 2020-11-25 00:00:00.000
Second execute:
EmployeeID DepartmentID PositionID RandomDate
1 4 7 2020-05-04 00:00:00.000
2 5 9 2020-10-30 00:00:00.000
If you want a random join :
SELECT DP.EmployeeID, Q.Department INTO #T1
FROM DimEmployee AS DP
CROSS APPLY (SELECT TOP 1 Dd.DepartmentID FROM DimDepartment AS DD
ORDER BY NEWID() ) AS Q
SELECT *
INTO #T2
FROM #T1 AS T
CROSS APPLY (SELECT TOP 1 DP.PositionID FROM DimPosition AS DP
ORDER BY NEWID() ) AS Q
Or if you want all possibilities :
SELECT
a.EmployeeID, b.DepartmentID, c.PositionID
FROM
DimEmployee AS a
CROSS JOIN
DimDepartment AS b
CROSS JOIN
DimPosition AS c
You need to row-number each table and join on row-number:
CREATE TABLE DimEmployee(EmployeeID INT)
CREATE TABLE DimDepartment(DepartmentID INT)
CREATE TABLE DimDocteur(PositionID INT)
SELECT
emp.EmployeeID,
dep.DepartmentID,
doc.PositionID,
DATEADD(day, (ABS(CHECKSUM(NEWID())) % 65530), 0) RandomDate
FROM (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimEmployee
) emp
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDepartment
) dep ON dep.rn = emp.rn
JOIN (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT 1)) rn
FROM DimDocteur
) doc ON doc.rn = emp.rn
You can also change the ORDER BY to ORDER BY NEWID() to get a more random ordering.

SQL remove duplicates at ID and Month level

I have a table that is something like this:
ID Date Name Age
1 10/04/2015 Theja 24
1 28/04/2015 Theja1 26
1 14/07/2015 Theja2 45
1 30/07/2015 Theja2 45
1 30/08/2015 Theja3 54
2 10/04/2016 Jaya 23
2 28/04/2016 Jaya 23
2 14/05/2016 Jaya1 65
2 30/05/2016 Jaya1 65
But i want output like:
ID Date Name Age
1 28/04/2015 Theja1 26
1 01/05/2015 Theja1 26
1 01/06/2015 Theja1 26
1 30/07/2015 Theja2 45
1 30/08/2015 Theja3 54
2 28/04/2016 Jaya 23
2 30/05/2016 Jaya1 65
Consider 1 record per each month which is max and if any missing months for ID then consider previous records fill for missing months.
Different databases have different methods for handling dates. The following is an ANSI-standard way of getting one row per month:
select id, min(date)
from t
group by id,
extract(year from date), extract(month from date);
I have tried for a solution and came with the following, but you need a calendar table to insert missing rows in the output.
SQL Server Based solution given here
Data Setup:
create table temptable (
id int,
[date] date,
name varchar (50),
age int
);
insert into temptable values
(1,'04-10-2015','Theja',24)
insert into temptable values
(1,'04-28-2015','Theja1',26)
insert into temptable values
(1,'07-14-2015','Theja2',45)
insert into temptable values
(1,'07-30-2015','Theja2',45)
insert into temptable values
(1,'08-30-2015','Theja3',54)
insert into temptable values
(2,'04-10-2016','Jaya',23)
insert into temptable values
(2,'04-28-2016','Jaya',23)
insert into temptable values
(2,'05-14-2016','Jaya1',65)
insert into temptable values
(2,'05-30-2016','Jaya1',65)
The following solution completes till duplicate issue. but to get missing rows you need to implement the calendar table.
you can join with the calendar table and then use output of the cte3 to get missing data.
with cte1 as (
select *,
row_number() over ( partition by month([date]) order by [date]) as rownm,
concat(id,format([date],'MMyyyy')) as unqcol
from temptable
) , cte2 as
(
select unqcol, max(rownm) as maxdt
from cte1
group by unqcol
), cte3 as
( select a.*, lead(a.[date]) over (partition by a.id order by a.id,a.[date]) as NextDate from
cte1 a inner join cte2 b
on a.unqcol=b.unqcol and a.rownm=b.maxdt
)
select c.id,c.[date],c.name,c.age,c.NextDate from cte3 c
order by c.[date]

Update table for each date and add remaining sites from another table

I have a table 'test' like this-
ID Site Start Time End Time
1 A 30-12-2014 16:06:54 30-12-2014 16:39:52
2 B 30-12-2014 12:12:50 30-12-2014 12:13:52
3 C 31-12-2014 12:14:23 31-12-2014 12:15:22
4 A 01-01-2015 12:20:29 01-01-2015 12:23:32
5 B 01-01-2015 12:28:49 01-01-2015 12:29:47
I have another table 'list' with a listing of sites-
Site
A
B
C
I need an output table where for each date, all the sites from 'list' is included like this-
ID Site Start Time End Time
1 A 30-12-2014 16:06:54 30-12-2014 16:39:52
2 B 30-12-2014 12:12:50 30-12-2014 12:13:52
NULL C 30-12-2014 00:00:00 30-12-2014 00:00:00
NULL A 31-12-2014 00:00:00 31-12-2014 00:00:00
NULL B 31-12-2014 00:00:00 31-12-2014 00:00:00
3 C 31-12-2014 12:14:23 31-12-2014 12:15:22
4 A 01-01-2015 12:20:29 01-01-2015 12:23:32
5 B 01-01-2015 12:28:49 01-01-2015 12:29:47
NULL C 01-01-2015 00:00:00 01-01-2015 00:00:00
Till now I have been table to separate the 'test' table on each date into intermediate tables and select the non matching sites from 'list' table. I am stuck with the loop. Please help.
Here is my code-
ALTER TABLE [test] ADD [DATE] date;
update [test]
set [DATE] = CAST(Start Time] as Date)
select t1.[Site]
from list t1
left join test t2 on t1.[site]=t2.[site] where t2.site is null;
select distinct [DATE] into #Temp1 from [test]
order by [DATE];
select [DATE], row_number()over(order by ([Date])asc) as [Row] into #Temp2 from #Temp1;
drop table #Temp1;
GO
declare #row int
select #row = 0
while ( #row <= (select COUNT(*) from #Temp2))
begin
select #row = 1 + #row
select c.* into #temp3
from(
select a.* , b.[DATE] as b_date, b.[row]
from test a
inner join #Temp2 b
on a.[Date] = b.[Date] where b.[row] = #row
) c
End;
You can get the output you want using a select:
select t.id, l.site, coalesce(t.starttime, d.d) as starttime, coalesce(t.endtime, d.d) as endtime
from list l cross join
(select distinct cast(starttime as date) as d from test) d left join
test t
on t.site = l.site and cast(t.starttime as date) = d.d;
You can insert non-matching rows into the table with similar logic:
insert into test(id, site, starttime, endtime)
select t.id, l.site, d.d, d.d
from list l cross join
(select distinct cast(starttime as date) as d from test) d left join
test t
on t.site = l.site and cast(t.starttime as date) = d.d
where t.site is null;
Try this,
Declare #t table(ID int, Site varchar(50),StartTime datetime,EndTime datetime)
insert into #t values
(1, 'A', '12-30-2014 16:06:54','12-30-2014 16:39:52'),
(2 , 'B', '12-30-2014 12:12:50','12-30-2014 12:13:52'),
(3 , 'C', '12-31-2014 12:14:23','12-31-2014 12:15:22'),
(4 , 'A', '01-01-2015 12:20:29','01-01-2015 12:23:32'),
(5, 'B', '01-01-2015 12:28:49','01-01-2015 12:29:47')
dECLARE #lIST TABLE(Site varchar(50))
insert into #lIST values('A'),('B'),('C')
;WITH CTE AS
(
SELECT min(cast(StartTime as date)) st FROM #t
union all
SELECT dateadd(day,1, st) FROM CTE where
st<casT('01-01-2015 12:28:49' as date)--max date(can be dynamic)
)
,CTE1 as
(
select * from #lIST a
cross apply (select * from cte)b
)
,CTE2 as
(
select y.ID,x.Site
,ISNULL(y.StartTime,x.st)StartTime,ISNULL(y.EndTime,x.st)EndTime
from CTE1 x
left join #t y on x.site=y.site and
cast(x.st as date)=cast(y.StartTime as date)
)
SELECT * FROM CTE2

Query to return first date of missing date ranges

Looking for help with a query using SQL 2008 R2... I have a table with client and date fields. Most clients have a record for most dates, however some don't.
For example I have this data:
CLIENTID DT
1 5/1/14
1 5/2/14
2 5/3/14
3 5/1/14
3 5/2/14
I can find the missing dates for each CLIENTID by creating a temp table with all possible dates for the period and then joining that to each CLIENTID and DT and only selecting where there is a NULL.
This is what I can get easily for the date range 5/1/14 to 5/4/14:
CLIENTID DTMISSED
1 5/3/14
1 5/4/14
2 5/1/14
2 5/2/14
2 5/4/14
3 5/3/14
3 5/4/14
However I want to group each consecutive missed period together and get the beginning of each period and the length.
For example, if I use the date range 5/1/14 to 5/4/14 I'd like to get:
CLIENTID DTSTART MISSED
1 5/3/14 2
2 5/1/14 2
2 5/4/14 1
3 5/3/14 2
Thanks for helping!
It's fascinating how more elegantly and also mere efficiently this kind of problems can be solved in 2012.
First, the tables:
create table #t (CLIENTID int, DT date)
go
insert #t values
(1, '5/1/14'),
(1, '5/2/14'),
(2, '5/3/14'),
(3, '5/1/14'),
(3, '5/2/14')
go
create table #calendar (dt date)
go
insert #calendar values ('5/1/14'),('5/2/14'),('5/3/14'),('5/4/14')
go
Here's the 2008 solution:
;with x as (
select *, row_number() over(order by clientid, dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, x2.dt as x2_dt, x2.clientid as x2_clientid
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
),
z as (
select *, (select sum(case when x2_dt is null then 1 else 0 end) from y y2 where y2.rn<=y.rn) as grp
from y
)
select clientid, min(dt), count(*)
from z
group by clientid, grp
order by clientid
Compare it to 2012:
;with x as (
select *, row_number() over(order by dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, sum(case when x2.dt is null then 1 else 0 end) over(order by x1.clientid,x1.dt) as grp
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
)
select clientid, min(dt), count(*)
from y
group by clientid, grp
order by clientid

How to limit the selection in SQL Server by sum of a column?

Can I limit rows by sum of a column in a SQL Server database?
For example:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
D | 20
E | 70
...
And I want to limit the selection by sum of time. For example maximum of 100 minutes. Table must look like this:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
Any ideas? Thanks.
DECLARE #T TABLE
(
[Type] CHAR(1) PRIMARY KEY,
[Time] INT
)
INSERT INTO #T
SELECT 'A',50 UNION ALL
SELECT 'B',10 UNION ALL
SELECT 'C',30 UNION ALL
SELECT 'D',20 UNION ALL
SELECT 'E',70;
WITH RecursiveCTE
AS (
SELECT TOP 1 [Type], [Time], CAST([Time] AS BIGINT) AS Total
FROM #T
ORDER BY [Type]
UNION ALL
SELECT R.[Type], R.[Time], R.Total
FROM (
SELECT T.*,
T.[Time] + Total AS Total,
rn = ROW_NUMBER() OVER (ORDER BY T.[Type])
FROM #T T
JOIN RecursiveCTE R
ON R.[Type] < T.[Type]
) R
WHERE R.rn = 1 AND Total <= 100
)
SELECT [Type], [Time], Total
FROM RecursiveCTE
OPTION (MAXRECURSION 0);
Or if your table is small
SELECT t1.[Type],
t1.[Time],
SUM(t2.[Time])
FROM #T t1
JOIN #T t2
ON t2.[Type] <= t1.[Type]
GROUP BY t1.[Type],t1.[Time]
HAVING SUM(t2.[Time]) <=100