Group by with gap in date sequence ("gaps and islands") - sql

I am trying to solve a "gaps and islands" by date issue I'm facing (kudos to Gordon Linoff helping me identify this issue). I want to group the below table by person, office and job while respecting order by person,from_date. consider the table below:
declare #temp table(person varchar(25),office varchar(25),job varchar(25),from_date date,to_date date)
insert into #temp values ('jon','ny','programmer','1/1/2020','1/3/2020');
insert into #temp values ('jon','ny','programmer','1/4/2020','1/5/2020');
insert into #temp values ('jon','dc','programmer','1/6/2020','1/7/2020');
insert into #temp values ('jon','ny','programmer','1/8/2020','1/9/2020');
insert into #temp values ('lou','ny','programmer','1/1/2020','1/3/2020');
insert into #temp values ('lou','ny','programmer','1/4/2020','1/5/2020');
insert into #temp values ('lou','dc','programmer','1/6/2020','1/7/2020');
insert into #temp values ('lou','ny','programmer','1/8/2020','1/9/2020');
the intended output is

This is a type of gaps-and-islands problem. If there are no gaps in the dates, the simplest solution is the difference of row numbers:
select person, office, job, min(from_date), max(to_date)
from (select t.*,
row_number() over (partition by person, office, job order by from_date) as seqnum,
row_number() over (partition by person, office order by from_date) as seqnum_2
from t
) t
group by person, office, job, (seqnum - seqnum_2)

This is a general solution:
WITH preceders_and_followers AS (
SELECT
b.person,
b.office,
b.job,
b.from_date,
b.to_date,
CASE
WHEN EXISTS (
SELECT
c.*
FROM
ora$ptt_tmp c
WHERE
b.person = c.person
AND b.office = c.office
AND b.job = c.job
AND ( b.from_date - 1 BETWEEN c.from_date AND c.to_date )
) THEN
1
END AS has_preceder,
CASE
WHEN EXISTS (
SELECT
c.*
FROM
ora$ptt_tmp c
WHERE
b.person = c.person
AND b.office = c.office
AND b.job = c.job
AND ( b.to_date + 1 BETWEEN c.from_date AND c.to_date )
) THEN
1
END AS has_follower
FROM
ora$ptt_tmp b
ORDER BY
1,
2,
3
)
SELECT DISTINCT
pf1.person,
pf1.office,
pf1.job,
pf1.from_date,
(
SELECT
MIN(pf2.to_date)
FROM
preceders_and_followers pf2
WHERE
pf1.person = pf2.person
AND pf1.office = pf2.office
AND pf1.job = pf2.job
AND pf2.to_date >= pf1.from_date
AND has_follower IS NULL
) to_date
FROM
preceders_and_followers pf1
WHERE
pf1.has_preceder IS NULL
ORDER BY
1,
4,
2,
3;

Related

Extract records modified in different date

I have this table called t1 with these fields: codLoan, codOp, codCau, action, and dateExec. Action field can be assumed three values: 'I' (Inserted), 'M' (Modified) or 'C' (Cancelled).
My records can be modified in different dates, so I can have two records with the same codLoan but with different value for dateExec.
I have to extract all the records that have the same codLoan and different Action (I or M) in different dateExec.
For instance:
codLoan=1
dateExec= '2018/08/08'
action='I'
codLoan=1
dateExec= '2018/08/08'
action='M'
codLoan=2
dateExec= '2018/08/07'
action='I'
codLoan=2
dateExec= '2018/08/08'
action='M'
Result: codLoan=2, dateExec= '2018/08/08'
I tried this query, but it extracts all the records with Action='I' and Action='M'.
select codLoan, dateExec
from t1
where Action in ('I','M');
How can I fix my code?
Perhaps you want :
select t.*
from table t
where exists (select 1
from table t1
where t1.codLoan = t.codLoan and
t1.dateExec <> t.dateExec and
t1.action <> t.action
);
Use a join:
select b.*
from mytable a
join mytable b on b.codLoan = a.codLoan
and b.dateExec > a.dateExec
and b.action != a.action
and b.action in ('I','M')
where a.action in ('I','M')
This returns the last action and date.
declare #t table
(
codLoan int,
dateExec date,
[action] char(1)
);
insert into #t values
(1, '2018-08-08', 'I'),
(1, '2018-08-08', 'M'),
(2, '2018-08-07', 'I'),
(2, '2018-08-08', 'M');
with diff as
(
select x.*
from
(
select
*,
cnt1 = count(*) over (partition by codLoan, dateExec
order by codLoan) ,
cnt2 = count(*) over (partition by codLoan, [action], dateExec
order by codLoan),
rnum = row_number() over (partition by codLoan
order by dateExec desc)
from #t
) x
where cnt1 = cnt2
)
select codLoan, dateExec
from diff
where rnum = 1
order by codLoan, dateExec;
Just another option
select codLoan
, dateExec = min(dateExec)
from t1
where Action in ('I','M')
Group By codLoan
Having min(dateExec)<>max(dateExec)

How to combine start and end dates without breaks in time?

I have tried many ways, but unsuccessfully, to combine Start dates and end dates where the record Id is the same and combine the where there is no break in the Date
CREATE TABLE #t (
A_ID VARCHAR(100),
BDate VARCHAR(100),
CDate VARCHAR(100)
)
INSERT INTO #T
(A_ID, BDate, CDate)
VALUES
('1000','2017/12/01','2017/12/31'),
('1000','2018/01/01','2018/03/31'),
('1000','2018/05/01','2018/05/31')
Select A_ID, bDate,cDate from
(
select BDate,A_ID,Cdate,lead(Bdate) over (order by Bdate) next_BDate from #T as t2
where exists ( select null from #T as t1
where t1.A_ID = t2.A_ID and t1.Bdate <= t2.Bdate and t1.CDate <=t2.CDate )
) as combine
where bDate < Cdate
order by BDate;
I would like to see:
1000 2017/12/01 2018/03/31 (no break in first two dates)
1000 2018/05/01 2018/05/31 (Break between 4-1-18 and 5-1-18)
This is a gaps & islands problem, depending on your actual data a solution based on bĀ“nested OLAP-functions might be more efficient that recursion:
with combine as
(
select BDate,A_ID,Cdate,
-- find the gap and flag it
case when lag(Cdate)
over (partition by A_ID
order by CDate) = dateadd(day,-1, BDate)
then 0
else 1
end as flag
from T
)
, groups as
(
Select A_ID, bDate,cDate,
-- cumulative sum over 0/1 to assign the same group number for row without gaps
sum(flag)
over (partition by A_ID
order by Bdate) as grp
from combine
)
-- group consecutive rows into one
select A_ID, min(BDate), max(CDate)
from groups
group by A_ID, grp
order by min(BDate);
How does this work for you?
declare #table table (a_id int, bdate date, cdate date, id int)
insert #table
select a_id, bdate, cdate,
case when lag(cdate, 1,cdate) over(partition by a_id order by bdate) in (cdate, dateadd(day, -1, bdate))
then 1 else 2 end id from #t
select a.a_id, min(a.bdate)bdate, max(a.cdate)cdate from #table a
left join
#table b
on a.id=b.id and a.a_id=b.a_id and b.id=1
group by a.a_id, a.id

expecting output with out using left join

first table is my input and expecting output like second table with out using left join.
this is the table data
declare #table table
(customer_id int,
indicator bit,
salary numeric(22,6)
,netresult numeric(22,6))
INSERT INTO #table (
customer_id
,indicator
,salary
)
VALUES
(1,1,2000),
(1,1,3000),
(2,1,1000),
(1,0,500),
(1,1,5000),
(2,1,2000),
(2,0,100)
select * from #table order by customer_id,indicator desc
I tried in below method it works. Is there any better alternative?
SELECT a.customer_id
,a.indicator
,a.salary
,netresult=p_salary-(2*n_salary)
FROM (
SELECT customer_id
,indicator
,salary
,sum(salary) OVER (PARTITION BY customer_id) p_salary
FROM #table
) a
LEFT JOIN (
SELECT customer_id
,indicator
,salary
,sum(salary) OVER (PARTITION BY customer_id) n_salary
FROM #table
WHERE indicator = 0
) b ON a.customer_id = b.customer_id
order by customer_id,indicator desc
Expected Output
I think you want this:
select t.customer_id, t.indicator,
sum(case when indicator = 1 then salary else - salary end) over (partition by customer_id) as netresult
form #table t;
No joins are necessary.
with math
select t.customer_id, t.indicator, t.salary
, sum((( t.indicator * 2) -1) * salary) over (partition by customer_id) as netresult
from #table t;

convert row to column using Pivot without any clause

I have a table like below.
I need to get the data like below.
I have created two temp tables and achieved the result like this. Please help me to do the same with PIVOT.
At least I wouldn't use pivot for that, to my mind this is simpler to do with group by and row_number:
select UserId, max(starttime) as starttime, max(endtime) as endtime
from (
select UserId,
case when StartOrEnd = 'S' then time end as starttime,
case when StartOrEnd = 'E' then time end as endtime,
row_number() over (partition by UserID order by time asc)
+ case when StartOrEnd = 'S' then 1 else 0 end as GRP
from table1
) X
group by UserId, GRP
order by starttime
The derived table splits the time into start / end time columns (to handle cases where only one exists) and uses a trick with row number to group the S / E items together. The outer select just groups the rows into the same row.
Example in SQL Fiddle
Not a efficient solution as JamesZ but should work
create table #tst (userid int,start_end char(1),times datetime)
insert #tst values
(1,'S','07-27-2015 16:45'),
(1,'E','07-27-2015 16:46'),
(2,'S','07-27-2015 16:47'),
(2,'E','07-27-2015 16:48'),
(1,'S','07-27-2015 16:49'),
(1,'E','07-27-2015 16:50')
WITH cte
AS (SELECT Row_number()OVER(ORDER BY times) rn,*
FROM #tst),
cte1
AS (SELECT a.userid,
a.start_end,
a.times,
CASE WHEN a.userid = b.userid THEN 0 ELSE 1 END AS com,
a.rn
FROM cte a
LEFT OUTER JOIN cte b
ON a.rn = b.rn + 1),
cte2
AS (SELECT userid,
start_end,
times,
(SELECT Sum(com)
FROM cte1 b
WHERE b.rn <= a.rn) AS row_num
FROM cte1 a)
SELECT USERID,
starttime=Min(CASE WHEN start_end = 's' THEN times END),
endtime=Max(CASE WHEN start_end = 'e' THEN times END)
FROM cte2
GROUP BY USERID,
row_num
Here is another method
declare #t table(userid int, StartOrEnd char(1), time datetime)
insert into #t
select 1,'S','2015-07-27 16:45' union all
select 1,'E','2015-07-27 16:46' union all
select 2,'S','2015-07-27 16:47' union all
select 2,'E','2015-07-27 16:48' union all
select 1,'S','2015-07-27 16:49' union all
select 1,'E','2015-07-27 16:50'
select userid,min(time) as minimum_time, max(time) as maximum_time from
(
select *, row_number() over (partition by cast(UserID as varchar(10))
+StartOrEnd order by time asc) as sno
from #t
) as t
group by userid,sno
Result
userid minimum_time maximum_time
----------- ----------------------- -----------------------
1 2015-07-27 16:45:00.000 2015-07-27 16:46:00.000
2 2015-07-27 16:47:00.000 2015-07-27 16:48:00.000
1 2015-07-27 16:49:00.000 2015-07-27 16:50:00.000

querying SQL Server 2005

I have a table that is time and milemarkers:
08:00 101.2
08:45 109.8
09:15 109.8
09:30 111.0
10:00 114.6
I need output that looks like this:
08:00-08:45 101.1-109.8
08:45-09:15 109.8-109.8
09:15-09:30 109.8-111.0
09:30-10:00 111.0-114.6
I figure I need 2 identical recordsets and somehow tie the first record of one to the second record of the other, but am clueless on how to accomplish that (or how to ask the question). Any help would be greatly appreciated.
Thanks in advance,
Ginny
The following query will get the next values:
select tm.*,
(select top 1 time
from timemilemarkers tm2
where tm2.time > tm.time
order by 1 desc
) as nexttime,
(select top 1 milemarker
from timemilemarkers tm2
where tm2.time > tm.time
order by 1 desc
) as nextmilemarker
from timemilemarkers tm;
You can put them into the form you want with something like:
select concat_ws('-', milemarker, nextmilemarker), concat_ws('-', time, nexttime)
from (select tm.*,
(select top 1 time
from timemilemarkers tm2
where tm2.time > tm.time
order by 1 desc
) as nexttime,
(select top 1 milemarker
from timemilemarkers tm2
where tm2.time > tm.time
order by 1 desc
) as nextmilemarker
from timemilemarkers tm
) tm
where nextmilemarker is not null;
Other way to do it is:
SQLFiddle
select cast(A.TIME_COL as varchar) + ' - ' + cast(B.TIME_COL as varchar),
cast(A.MILES as varchar) + ' - ' + cast(B.MILES as varchar)
from (select row_number() OVER (order by time_col) ID, * from TABLE_A) A
inner join (select row_number() OVER (order by time_col) ID, * from TABLE_A) B
on A.ID = B.ID - 1
UPDATE: this query will only works for SQL Server 2008 and upwards and obviously not answer your question. I will not erase the answer cause it can be helpful for othe people.
UPDATE2: It works on SQL Server 2005.
try this,
Declare #t table (times time(0), milemarkers decimal(5,2))
insert into #t
select '08:00','101.2' union all
select'08:45','109.8' union all
select'09:15','109.8' union all
select'09:30','111.0' union all
select'10:00','114.6'
;With cte1 as
(select *,ROW_NUMBER()over(order by times)rn from #t
)
,cte2 as
(select max(rn) rn1 from cte1)
, cte as
(select
(select times from cte1 where rn=1)lowerlimit,(select times from cte1 where rn=2)upperlimit,
(select milemarkers from cte1 where rn=1)lowerlimit1,(select milemarkers from cte1 where rn=2)upperlimit1
,1 rn from cte1
union all
select upperlimit,(select times from cte1 where rn=a.rn+2)
,upperlimit1,(select milemarkers from cte1 where rn=a.rn+2)
,rn+1
from cte a where a.rn<(select rn1 from cte2)
)
select distinct cast(lowerlimit as varchar(10))+'-'+cast(upperlimit as varchar(10)) ,
cast(lowerlimit1 as varchar(10))+'-'+cast(upperlimit1 as varchar(10))
from cte a where a.rn<(select rn1 from cte2)
Using CTE we can get the OutPut it is also other way to do Find below query
DECLARE #TABLE_A table(time_col time, miles float)
insert into #TABLE_A values ('08:00',101.2)
insert into #TABLE_A values ('08:45',109.8)
insert into #TABLE_A values ('09:15',109.8)
insert into #TABLE_A values ('09:30',111.0)
insert into #TABLE_A values ('10:00',114.6)
;WITH CTE AS
(
SELECT t.time_col,t.miles,t.RN FROM
(
Select ROW_NUMBER()OVER(ORDER BY time_col )RN,* FRom #TABLE_A
)t
INNER JOIN (
Select ROW_NUMBER()OVER(ORDER BY time_col )RN,* FRom #TABLE_A
)tt
ON t.RN = tt.RN
)
,CTE2(TimeSpan,Miles) AS
(
Select CONVERT(VARCHAR,c.time_col,108) +'-'+
(Select CONVERT(VARCHAR,time_col,108) FROM CTE WHERE RN = cc.RN + 1) As TimeSpan,
CAST(c.miles AS VARCHAR) +' - '+ (Select CAST(miles AS VARCHAR) FROM CTE WHERE RN = CC.RN + 1)AS Miles FROM CTE c
INNER JOIN CTE CC
ON CC.miles = c.miles
AND CC.time_col = c.time_col
)
Select TimeSpan,Miles from CTE2
WHERE TimeSpan IS NOT NULL