SQL getting status of a period - sql

I'm looking for a SQL solution for the following problem.
I want a list of employees who are more then 14 days sick in a row.
I've a sql table with the following:
First_name, Last_Name, INDIRECT_ID, SHIFT_DATE
John, Doe, Sick, 2016-01-01
John, Doe, Sick, 2016-01-02
John, Doe, working, 2016-01-03
John, Doe, Sick, 2016-01-04
John, Doe, Sick, 2016-01-05
etc.
I thought to do this by seeing if they are sick for 10x (2x 5 working days) in two weeks. But maybe there is a much simpler solution for it. But Now I'm also getting duplicate answers.
select FIRST_NAME, LAST_NAME
from (select t.*
,(select count(*)
from LABOR_TICKET t2
where t2.EMPLOYEE_ID = t.EMPLOYEE_ID and
t2.INDIRECT_ID = t.INDIRECT_ID and
t2.SHIFT_DATE >= t.SHIFT_DATE and
t2.SHIFT_DATE < DATEADD(day, 14, t.SHIFT_DATE)) NumWithin14Days
from LABOR_TICKET t
where SHIFT_DATE between '2016-01-01' and '2016-04-01'
) LABOR_TICKET
INNER JOIN
EMPLOYEE ON LABOR_TICKET.EMPLOYEE_ID = EMPLOYEE.ID
where NumWithin14Days >= 10 AND INDIRECT_ID = 'SICK'

Try this,
First create all the 14 days intervals in between the From Date and To Date.
Then check the count of the 'Sick' is 14 in each interval for every employee.
DECLARE #ST_DATE DATE='2016-01-01'
,#ED_DATE DATE='2016-04-01'
;WITH CTE_DATE AS (
SELECT #ST_DATE AS ST_DATE,DATEADD(DAY,13,#ST_DATE) AS ED_DATE
UNION ALL
SELECT DATEADD(DAY,1,ED_DATE),DATEADD(DAY,14,ED_DATE)
FROM CTE_DATE
WHERE DATEADD(DAY,14,ED_DATE) <= #ED_DATE
)
SELECT FIRST_NAME, LAST_NAME
FROM CTE_DATE
INNER JOIN LABOR_TICKET ON SHIFT_DATE BETWEEN ST_DATE AND ED_DATE
WHERE INDIRECT_ID = 'Sick'
GROUP BY FIRST_NAME, LAST_NAME
HAVING COUNT(*) >= 14

Pseudo code to give you idea for all employees
if you have a calendar table like below
create table dates
(
datetime date
)
insert into dates
select '2016-01-01'
union all
select '2016-01-02'
Now you can left join this with your main table like
select
mt.firstname,dt.date,count(indirect_id)
from
datestable dt
left join
maintable mt
on mt.date=dt.date
and mt.indirect_id='sick'
group by mt.firstname,dt.date
having count(indirect_id)>=14
order by dt.date

you should have thrown more sample data.
try this,(I am sure it will work with other sample data)few thing are just there to filter data.
declare #t table(First_name varchar(50), Last_Name varchar(50), INDIRECT_ID varchar(50), SHIFT_DATE date)
insert into #t values
('John', 'Doe', 'Sick', '2016-01-01')
,('John', 'Doe', 'Sick', '2016-01-02')
,('John','Doe','working','2016-01-03')
,('John', 'Doe', 'Sick', '2016-01-04')
,('John', 'Doe', 'Sick', '2016-01-05')
declare #name varchar(50)='John'
declare #month int=1
;With CTE as
(
select top 1 First_name,Last_Name,SHIFT_DATE,1 rn from #T where First_name=#name
and INDIRECT_ID='Sick' order by SHIFT_DATE
union all
select t.First_name,t.Last_Name,t.SHIFT_DATE, rn+1 from #T t
inner join cte c on t.First_name=c.First_name
where INDIRECT_ID='Sick'
and t.SHIFT_DATE=DATEADD(day,1,c.SHIFT_DATE)
and t.SHIFT_DATE<='2016-01-31'
)
select * from CTE where rn>=14

declare #t table(First_name varchar(50), Last_Name varchar(50), INDIRECT_ID varchar(50), SHIFT_DATE date)
insert into #t values
('John', 'Doe', 'Sick', '2016-01-01')
,('John', 'Doe', 'Sick', '2016-01-02')
,('John','Doe','working','2016-01-03')
,('John', 'Doe', 'Sick', '2016-04-04')
,('John', 'Doe', 'Sick', '2016-05-05')
select s.*
,u.*
,Sickdays =
case
when s.indirect_id = 'Sick' and u.indirect_id = 'Sick' then datediff(dd,u.shift_date,s.shift_date)
else 0
end
from
(
select t.*,
row_number() over(partition by last_name,first_name order by shift_date desc) rn
from #t t
) s
join
(select t.*,
row_number() over(partition by last_name,first_name order by shift_date desc) rn
from #t t
) u on s.last_name = u.last_name and s.first_name = u.first_name and s.rn = u.rn - 1
where
case
when s.indirect_id = 'Sick' and u.indirect_id = 'Sick' then datediff(dd,u.shift_date,s.shift_date)
else 0
end > 13

Related

SQL Server - Query With Multiple Date Ranges in subquery

I have used conditional aggregation in another query where I needed to use multiple date ranges. In this case the date ranges are needed in a sub-query.
I would like to know if can I get desired results in one single query (without using UNION).
I need to check if a given record EXISTS in the subquery with date-range. Since I need to use EXISTS rather than a join - I am running into this issue.
Here is a sample script/data. The expected results table is for demonstration.
IF OBJECT_ID('tempdb..#Entity') IS NOT NULL DROP TABLE #Entity
IF OBJECT_ID('tempdb..#EntityDate') IS NOT NULL DROP TABLE #EntityDate
IF OBJECT_ID('tempdb..#ExpectedOutput') IS NOT NULL DROP TABLE #ExpectedOutput
> `DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59'
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59'
> `SELECT 'Fortnight' DateRange, #FortnightStart 'Start', #FortnightEnd 'End'
UNION
SELECT 'Quarter', #QuarterStart, #QuarterEnd
CREATE TABLE #Entity (
EntityId INT IDENTITY(1, 1),
EntityName VARCHAR(50)
)
CREATE TABLE #EntityDate (
EntityDateId INT IDENTITY(1, 1),
EntityId INT,
SubmittedDate DATETIME
)
ALTER TABLE #EntityDate ADD CONSTRAINT FK_EntityDate_Entity FOREIGN KEY (EntityId) REFERENCES Entity(EntityId)
INSERT INTO #Entity (EntityName)
SELECT 'Alice'
UNION
SELECT 'Bob'
UNION
SELECT 'Cameron'
UNION
SELECT 'Diego'
UNION
SELECT 'Elliot'
SELECT * FROM #Entity
INSERT INTO #EntityDate(EntityId, SubmittedDate)
SELECT 1, '08/01/2020 11:00:00' -- only 1 record is expected in the output for this Entity
UNION
SELECT 1, '08/10/2020 10:00:00'
UNION
SELECT 1, '04/10/2020 10:00:00' -- this record should show up for the quarter date range
UNION
SELECT 2, '06/01/2020 11:00:00' --
UNION
SELECT 3, '05/01/2020' -- only 1 record is expected in the output for this Entity
UNION
SELECT 3, '06/01/2020'
UNION
SELECT 4, '10/01/2021' -- does not fit in any date range
UNION
SELECT 5, '08/02/2020'
SELECT *
FROM #EntityDate d
INNER JOIN #Entity e ON d.EntityId = e.EntityId
SELECT *
FROM #Entity E
WHERE EXISTS ( SELECT 1
FROM #EntityDate d
WHERE SubmittedDate BETWEEN #FortnightStart AND #FortnightEnd AND e.EntityId = D.EntityId
)
SELECT *
FROM #Entity E
WHERE EXISTS ( SELECT 1
FROM #EntityDate d
WHERE SubmittedDate BETWEEN #QuarterStart AND #QuarterEnd AND e.EntityId = D.EntityId
)
CREATE TABLE #ExpectedOutput
(
EntityId INT,
DateRange VARCHAR(50)
)
INSERT INTO #ExpectedOutput (EntityId, DateRange)
SELECT 1, 'Fortnight'
UNION
SELECT 5, 'Fortnight'
UNION
SELECT 1, 'Quarter'
UNION
SELECT 2, 'Quarter'
UNION
SELECT 3, 'Quarter'
SELECT o.*, e.EntityName
FROM #ExpectedOutput o
INNER JOIN #Entity e ON o.EntityId = e.EntityId
ORDER BY O.DateRange, o.EntityId
Using the virtual Dates table you created at the top of your script, you need to join that to Entity, using the EXISTS as the ON condition
DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59';
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59';
WITH Dates AS (
SELECT 'Fortnight' DateRange, #FortnightStart Start, #FortnightEnd [End]
UNION ALL
SELECT 'Quarter', #QuarterStart, #QuarterEnd
)
SELECT
e.EntityId,
d.DateRange
FROM Dates d
JOIN #Entity E ON EXISTS (SELECT 1
FROM #EntityDate ed
WHERE ed.SubmittedDate BETWEEN d.Start AND d.[End]
AND ed.EntityId = e.EntityId
);
db<>fiddle
try something like this
SELECT * FROM (VALUES(1, '08/01/2020 11:00:00'),
(1, '08/10/2020 10:00:00'),
(1, '04/10/2020 10:00:00'),
(2, '06/01/2020 11:00:00'), --
(3, '05/01/2020'),
(3, '06/01/2020'),
(4, '10/01/2021'),
(5, '08/02/2020')
) EntityIDate(EntityId,SubmittedDate)
Documentation: https://learn.microsoft.com/en-us/u-sql/statements-and-expressions/select/from/select-selecting-from-the-values-table-value-constructor
Why you have such requirement ? What is harm in using multiple UNION ALL ? Performance wise there is no harm.
I hope I understood your requirement correctly.
DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59'
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59'
;WITH CTE
AS (SELECT 1 AS Orderflg,
'Fortnight' DateRange,
#FortnightStart 'StartDate',
#FortnightEnd 'EndDate'
UNION ALL
SELECT 2,
'Quarter',
#QuarterStart,
#QuarterEnd),
CTE1
AS (SELECT *,
ROW_NUMBER() OVER(PARTITION BY EntityId,
Orderflg
ORDER BY SubmittedDate) rn
FROM #EntityDate d
CROSS APPLY
(
SELECT TOP 1 DateRange,
Orderflg
FROM CTE C
WHERE SubmittedDate >= StartDate
AND SubmittedDate < EndDate
) ca -- e.EntityId = D.EntityId
)
SELECT e.EntityId,
DateRange,
EntityName
FROM CTE1 C1
INNER JOIN #Entity E ON c1.EntityId = e.EntityId
WHERE rn = 1
ORDER BY Orderflg;

dynamically select column name that changed

I have a table as shown below.
ID NAME ADDRESS CITY ROLE Date_Modified
1 Tom something austin manager X
2 Tom nothing austin principal Y
3 Tom anything dallas VP Z
How do write a query to select the column name that have changed between entries 1,2 and 3? Currently I am building a report that needs to identify change. This is what I have so far and need to work with it.
I need to be able to detect via stored proc and see output below.
Id ColumnName DateChanged
2 Address Y
2 Role Y
3 Address Z
3 Role Z
If I understood your question correctly, what you need is detecting changes from one row to another and unpivoting the data. Usage of LAG required SQL Server 2012 or more.
;with cte as (
-- LAG for id is used to skip first row from selection
select id, LAG(id, 1) OVER (ORDER BY id) AS OldId,
address, LAG(address, 1) OVER (ORDER BY id) AS OldAddress,
role, LAG(role, 1) OVER (ORDER BY id) AS OldRole,
Date_Modified
from audit_data
)
SELECT id, ColName, data_col, Date_Modified
FROM
(
select id, address, role, Date_Modified
from cte
-- detect any change in monitored data
where ((OldAddress IS NULL OR address <> OldAddress)
OR (OldRole IS NULL OR role <> OldRole))
AND OldId IS NOT NULL
) AS cp
-- unpivot address and role into data_col column
UNPIVOT
(
data_col FOR ColName IN (address, role)
) AS up;
Data used for setup:
-- drop table audit_data
create table audit_data (
id int,
name VARCHAR(100),
address VARCHAR(100),
city varchar(100),
role VARCHAR(100),
Date_Modified DATETIME2
)
insert into audit_data values (1, 'Tom', 'something', 'austin', 'manager', '20150103'),
(2, 'Tom', 'nothing', 'austin', 'principa', '20150205'),
(3, 'Tom', 'anything', 'dallas', 'VP', '20150314')
go
[Edit] SQL 2008R2 version:
;with ad_cte as (
select id, address, role, Date_Modified, ROW_NUMBER() OVER (ORDER BY id) RowNo
from audit_data
),
cte as (
select ad.id,
ad.address, ad_old.address AS OldAddress,
ad.role, ad_old.role AS OldRole,
ad.Date_Modified
from ad_cte ad
join ad_cte ad_old on ad_old.RowNo + 1 = ad.RowNo
)
SELECT id, ColName, data_col, Date_Modified
FROM
(
select id, address, role, Date_Modified
from cte
-- detect any change in monitored data
where ((OldAddress IS NULL OR address <> OldAddress)
OR (OldRole IS NULL OR role <> OldRole))
-- this should be changed for generality
AND cte.id > 1
) AS cp
-- unpivot address and role into data_col column
UNPIVOT
(
data_col FOR ColName IN (address, role)
) AS up;
This is very similar to Alexei's answer:
CREATE TABLE #temp( ID INT IDENTITY(1, 1),
NAME VARCHAR(30),
ADDRESS VARCHAR(30),
CITY VARCHAR(30),
ROLE VARCHAR(30),
Date_Modified DATETIME );
INSERT INTO #temp
SELECT 'Tom',
'something',
'austin',
'manager',
DATEADD(day, -3, GETDATE())
UNION
SELECT 'Tom',
'nothing',
'austin',
'principal',
DATEADD(day, -2, GETDATE())
UNION
SELECT 'Tom',
'anything',
'dallas',
'VP',
DATEADD(day, -1, GETDATE());
SELECT 'Jon',
'something',
'san antonio',
'assistant manager',
DATEADD(day, -3, GETDATE())
UNION
SELECT 'Jon',
'something',
'austin',
'assistant manager',
DATEADD(day, -2, GETDATE())
UNION
SELECT 'Jon',
'anything',
'dallas',
'manager',
DATEADD(day, -1, GETDATE());
SELECT id,
ColName,
Date_Modified
FROM(
SELECT DISTINCT B.ID,
B.Name,
CASE
WHEN A.ADDRESS <> B.ADDRESS
THEN B.ADDRESS
END AS ADDRESS,
CASE
WHEN A.CITY <> B.CITY
THEN B.CITY
END AS CITY,
CASE
WHEN A.ROLE <> B.ROLE
THEN B.ROLE
END AS ROLE,
B.Date_Modified
FROM(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY NAME ORDER BY Date_Modified DESC) AS ROWNUM
FROM #temp ) AS A
INNER JOIN(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY NAME ORDER BY Date_Modified DESC) AS ROWNUM
FROM #temp ) AS B ON A.NAME = B.NAME
AND CHECKSUM(A.NAME, A.ADDRESS, A.CITY, A.ROLE) <> CHECKSUM(B.NAME, B.ADDRESS, B.CITY, B.ROLE)
AND A.ROWNUM = B.ROWNUM - 1 ) AS cp
UNPIVOT( data FOR ColName IN( address,
role )) AS up;

Remove duplicates with less null values

I have a table of employees which contains about 25 columns. Right now there are a lot of duplicates and I would like to try and get rid of some of these duplicates.
First, I want to find the duplicates by looking for multiple records that have the same values in first name, last name, employee number, company number and status.
SELECT
firstname,lastname,employeenumber, companynumber, statusflag
FROM
employeemaster
GROUP BY
firstname,lastname,employeenumber,companynumber, statusflag
HAVING
(COUNT(*) > 1)
This gives me duplicates but my goal is to find and keep the best single record and delete the other records. The "best single record" is defined by the record with the least amount of NULL values in all of the other columns. How can I do this?
I am using Microsoft SQL Server 2012 MGMT Studio.
EXAMPLE:
Red: DELETE
Green: KEEP
NOTE: There are a lot more columns in the table than what this table shows.
You can use the sys.columns table to get a list of columns and build a dynamic query. This query will return a 'KeepThese' value for every record you want to keep based on your given criteria.
-- insert test data
create table EmployeeMaster
(
Record int identity(1,1),
FirstName varchar(50),
LastName varchar(50),
EmployeeNumber int,
CompanyNumber int,
StatusFlag int,
UserName varchar(50),
Branch varchar(50)
);
insert into EmployeeMaster
(
FirstName,
LastName,
EmployeeNumber,
CompanyNumber,
StatusFlag,
UserName,
Branch
)
values
('Jake','Jones',1234,1,1,'JJONES','PHX'),
('Jake','Jones',1234,1,1,NULL,'PHX'),
('Jake','Jones',1234,1,1,NULL,NULL),
('Jane','Jones',5678,1,1,'JJONES2',NULL);
-- get records with most non-null values with dynamic sys.column query
declare #sql varchar(max)
select #sql = '
select e.*,
row_number() over(partition by
e.FirstName,
e.LastName,
e.EmployeeNumber,
e.CompanyNumber,
e.StatusFlag
order by n.NonNullCnt desc) as KeepThese
from EmployeeMaster e
cross apply (select count(n.value) as NonNullCnt from (select ' +
replace((
select 'cast(' + c.name + ' as varchar(50)) as value union all select '
from sys.columns c
where c.object_id = t.object_id
for xml path('')
) + '#',' union all select #','') + ')n)n'
from sys.tables t
where t.name = 'EmployeeMaster'
exec(#sql)
Try this.
;WITH cte
AS (SELECT Row_number()
OVER(
partition BY firstname, lastname, employeenumber, companynumber, statusflag
ORDER BY (SELECT NULL)) rn,
firstname,
lastname,
employeenumber,
companynumber,
statusflag,
username,
branch
FROM employeemaster),
cte1
AS (SELECT a.firstname,
a.lastname,
a.employeenumber,
a.companynumber,
a.statusflag,
Row_number()
OVER(
partition BY a.firstname, a.lastname, a.employeenumber, a.companynumber, a.statusflag
ORDER BY (CASE WHEN a.username IS NULL THEN 1 ELSE 0 END +CASE WHEN a.branch IS NULL THEN 1 ELSE 0 END) )rn
-- add the remaining columns in case statement
FROM cte a
JOIN employeemaster b
ON a.firstname = b.firstname
AND a.lastname = b.lastname
AND a.employeenumber = b.employeenumber
AND a.companynumbe = b.companynumber
AND a.statusflag = b.statusflag)
SELECT *
FROM cte1
WHERE rn = 1
I test with MySQL and use NULL String concat to found the best record. Because LENGTH ( NULL || 'data') is 0. Only if all column not NULL some length exists. Maybe this is not perfekt.
create table EmployeeMaster
(
Record int auto_increment,
FirstName varchar(50),
LastName varchar(50),
EmployeeNumber int,
CompanyNumber int,
StatusFlag int,
UserName varchar(50),
Branch varchar(50),
PRIMARY KEY(record)
);
INSERT INTO EmployeeMaster
(
FirstName, LastName, EmployeeNumber, CompanyNumber, StatusFlag, UserName, Branch
) VALUES ('Jake', 'Jones', 1234, 1, 1, 'JJONES', 'PHX'), ('Jake', 'Jones', 1234, 1, 1, NULL, 'PHX'), ('Jake', 'Jones', 1234, 1, 1, NULL, NULL), ('Jane', 'Jones', 5678, 1, 1, 'JJONES2', NULL);
My query idea looks like this
SELECT e.*
FROM employeemaster e
JOIN ( SELECT firstname,
lastname,
employeenumber,
companynumber,
statusflag,
MAX( LENGTH ( username || branch ) ) data_quality
FROM employeemaster
GROUP BY firstname, lastname, employeenumber, companynumber, statusflag
HAVING count(*) > 1
) g
ON LENGTH ( username || branch ) = g.data_quality

Query to merge continuous temporal records

I have a table like this:
id START_DATE end_date
1 01/01/2011 01/10/2011
2 01/11/2011 01/20/2011
3 01/25/2011 02/01/2011
4 02/10/2011 02/15/2011
5 02/16/2011 02/27/2011
I want to merge the records where the start_date is just next day of end_date of another record: So the end record should be something like this:
new_id START_DATE end_date
1 01/01/2011 01/20/2011
2 01/25/2011 02/01/2011
3 02/10/2011 02/27/2011
One way that I know to do this will be to create a row based temp table with various rows as dates (each record for one date, between the total range of days) and thus making the table flat.
But there has to be a cleaner way to do this in a single query... e.g. something using row_num?
Thanks guys.
declare #T table
(
id int,
start_date datetime,
end_date datetime
)
insert into #T values
(1, '01/01/2011', '01/10/2011'),
(2, '01/11/2011', '01/20/2011'),
(3, '01/25/2011', '02/01/2011'),
(4, '02/10/2011', '02/15/2011'),
(5, '02/16/2011', '02/27/2011')
select row_number() over(order by min(dt)) as new_id,
min(dt) as start_date,
max(dt) as end_date
from (
select dateadd(day, N.Number, start_date) as dt,
dateadd(day, N.Number - row_number() over(order by dateadd(day, N.Number, start_date)), start_date) as grp
from #T
inner join master..spt_values as N
on N.number between 0 and datediff(day, start_date, end_date) and
N.type = 'P'
) as T
group by grp
order by new_id
You can use a numbers table instead of using master..spt_values.
Try This
Declare #chgRecs Table
(updId int primary key not null,
delId int not null,
endt datetime not null)
While Exists (Select * from Table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
Begin
Insert #chgRecs (updId, delId , endt)
Select a.id, b.id, b.End_Date,
From table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
And Not Exists
(Select * from table
Where end_Date =
DateAdd(day, -1, a.Start_Date)))
Delete table Where id In (Select delId from #chgRecs )
Update table set
End_Date = u.endt
From table t join #chgRecs u
On u.updId = t.Id
Delete #delRecs
End
No, was not looking for a loop...
I guess this is a good solution:
taking all the data in a #temp table
SELECT * FROM #temp
SELECT t2.start_date , t1.end_date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date)
UNION
SELECT START_DATE,end_date FROM #temp WHERE start_date NOT IN (SELECT t2.START_DATE FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
AND end_date NOT IN (SELECT t1.end_Date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
DROP TABLE #temp
Please let me know if there is anything better than this.
Thanks guys.
A recursive solution:
CREATE TABLE TestData
(
Id INT PRIMARY KEY,
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL
);
SET DATEFORMAT MDY;
INSERT TestData
SELECT 1, '01/01/2011', '01/10/2011'
UNION ALL
SELECT 2, '01/11/2011', '01/20/2011'
UNION ALL
SELECT 3, '01/25/2011', '02/01/2011'
UNION ALL
SELECT 4, '02/10/2011', '02/15/2011'
UNION ALL
SELECT 5, '02/16/2011', '02/27/2011'
UNION ALL
SELECT 6, '02/28/2011', '03/06/2011'
UNION ALL
SELECT 7, '02/28/2011', '03/03/2011'
UNION ALL
SELECT 8, '03/10/2011', '03/18/2011'
UNION ALL
SELECT 9, '03/19/2011', '03/25/2011';
WITH RecursiveCTE
AS
(
SELECT t.Id, t.StartDate, t.EndDate
,1 AS GroupID
FROM TestData t
WHERE t.Id=1
UNION ALL
SELECT crt.Id, crt.StartDate, crt.EndDate
,CASE WHEN DATEDIFF(DAY,prev.EndDate,crt.StartDate)=1 THEN prev.GroupID ELSE prev.GroupID+1 END
FROM TestData crt
JOIN RecursiveCTE prev ON crt.Id-1=prev.Id
--WHERE crt.Id > 1
)
SELECT cte.GroupID, MIN(cte.StartDate) AS StartDate, MAX(cte.EndDate) AS EndDate
FROM RecursiveCTE cte
GROUP BY cte.GroupID
ORDER BY cte.GroupID;
DROP TABLE TestData;

SQL Server 2008: complex Insert

I have a table called Employees:
BeginYear | EndYear | Name
1974 1983 Robert
For each record in Employees I need to insert each year into a new table called EmployeeYears
So:
For Each Record in Employees
For i as int = Begin Year to End year
INSERT i, Name into EmployeeYears
Any Way to do this in SQL...possibly with cursors?
The gist of it is using a WITH statement to create all the records and use them to insert into your final table.
;WITH q AS (
SELECT Year = BeginYear
, Name
FROM Employees
UNION ALL
SELECT q.Year + 1
, q.Name
FROM q
INNER JOIN Employees e ON e.Name = q.Name
AND e.EndYear > q.Year
)
INSERT INTO EmployeeYears
SELECT * FROM q
OPTION(MAXRECURSION 0)
Testdata
CREATE TABLE Employees (BeginYear INTEGER, EndYear INTEGER, Name VARCHAR(32))
CREATE TABLE EmployeeYears (Year INTEGER, Name VARCHAR(32))
INSERT INTO Employees
SELECT 1974, 1976, 'Robert'
UNION ALL SELECT 1972, 1975, 'Lieven'
Results
SELECT *
FROM EmployeeYears
ORDER BY Name, Year
1972 Lieven
1973 Lieven
1974 Lieven
1975 Lieven
1974 Robert
1975 Robert
1976 Robert
If you have a numbers table you can join on it to get the individual year records and avoid using a cursor. I just poulated the numbers table with number from 1965 to 968, but a realife numbers table (which also would not be a temp table as shown below for example purposes, but one that lives in your schema) would probably have several million records as it is useful for a lot of comparing.
create table #Numbers (Number int)
insert into #Numbers
select 1965
union
select 1966
union
select 1967
union
select 1968
create table #employees (name varchar (50), beginyear int, endyear int)
insert into #employees
select 'Dick', 1966, 1968
union all
select 'harry', 1965, 1967
union all
select 'tom', 1955, 1966
insert into EmployeeYears (Name, [Year])
select Name, n.number
from #Employees e
join #Numbers n on n.number between e.beginyear and e.endyear
order by name
Yes, you actually have to do a loop... I'd prefer not using CURSORS, but this case sorta makes sense... anyway, here's the code as just a straight loop to show you that you can do that kind of code in SQL:
DECLARE #Employee VARCHAR(100)
DECLARE #BeginYear INT, #EndYear INT, #i INT
SET #Employee = ''
WHILE (1=1)
BEGIN
SET #Employee = (SELECT TOP 1 Name FROM Employees ORDER BY Name WHERE Name > #Employee)
IF #Employee IS NULL BREAK
SELECT #BeginYear = BeginYear, #EndYear = EndYear FROM Employees WHERE Name = #Employee
SET #i = #BeginYear
WHILE (#i <= #EndYear)
BEGIN
INSERT INTO EmployeeYears (Year, Name) VALUES (#i, #Employee)
SET #i = #i + 1
END
END
You can use a recursive CTE:
;WITH CTE AS
(
SELECT BeginYear, EndYear, Name
FROM Employees
UNION ALL
SELECT BeginYear+1, EndYear, Name
FROM CTE
WHERE BeginYear < EndYear
)
INSERT INTO EmployeeYears (Year, Name)
SELECT BeginYear, Name
FROM CTE
ORDER BY Name, BeginYear
OPTION(MAXRECURSION 0)
You can use a recursive procedure. Llike the one bellow:
CREATE Procedure InsertYear
#Name ....
#BeginYear ...
#EndYear ...
AS
{
INSERT INTO EmployeeYears VALUES(#BeginYear, #Name);
SET #BeginYear = #BeginYear + 1
IF #BeginYear < #EndYear
BEGIN
InsertYear(#Name, #BeginYear, #EndYear)
END
RETURN
}
You could do this but it will fail if Begin or end exceeds 2047
INSERT INTO EmployeeYears (number, name)
SELECT v.number, e.name
FROM
Employees e
INNER JOIN master..spt_values v on
v.number between beginYear and endYear