Distinct values for a distinct datadate in SQL - sql

In my table I sometimes have two dates with two values, but I just need one of them. Is there anyway to select a distinct value based on the distinct date?
example:
DATADATE ID
2008-06-30 00:00:00.000 12
2008-03-31 00:00:00.000 12
2007-12-31 00:00:00.000 3
2007-12-31 00:00:00.000 12
2007-09-30 00:00:00.000 3
2007-09-30 00:00:00.000 12
2007-06-30 00:00:00.000 3
2007-06-30 00:00:00.000 12
2007-03-31 00:00:00.000 3
2007-03-31 00:00:00.000 12
2006-12-31 00:00:00.000 3
2006-09-30 00:00:00.000 3
2006-06-30 00:00:00.000 3
What I need to get is this:
DATADATE ID
2008-06-30 00:00:00.000 12
2008-03-31 00:00:00.000 12
2007-12-31 00:00:00.000 12
2007-09-30 00:00:00.000 12
2007-06-30 00:00:00.000 12
2007-03-31 00:00:00.000 12
2006-12-31 00:00:00.000 3
2006-09-30 00:00:00.000 3
2006-06-30 00:00:00.000 3
Any help is really appreciated, thanks.

You could use group by:
select DATADATE
, max(IDs)
from YourTable
group by
DATADATE

If you are using sql server 2005+. Then you can do this:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER(PARTITION BY ID ORDER BY DATADATE DESC) AS RowNbr,
Table1.*
FROM
Table1
)
SELECT
*
FROM
CTE
WHERE
CTE.RowNbr=1
EDIT
In the CTE function you can join or do what ever you cant to get the output you want. Like this:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER(PARTITION BY Table1.ID ORDER BY Table2.DATADATE DESC) AS RowNbr,
Table1.*
FROM
Table1
JOIN Table2
ON Table1.ID = Table2.ID
)
SELECT
*
FROM
CTE
WHERE
CTE.RowNbr=1

Related

ORACLE SQL - How to find the number of reliefs each teacher has, each day, 2 months before the teacher resigned?

I need some help in finding the number of reliefs each teacher has, every single day, 2 months before the teacher resigns.
Join_dt - teacher's join date,
Resign_dt - teacher's resign date,
Relief_ID - Relief teacher's ID,
Start_dt - Relief's start date,
End_dt - Relief's end date,
note that there may be overlapping dates between 2 or more different reliefs and so I need to find the number of distinct reliefs each teacher has for each date.
This is what I am given:
Teacher_ID Join_dt Resign_dt Relief_ID Start_dt End_dt
12 2006-08-30 2019-08-01 20 2017-02-07 2019-07-04
12 2006-08-30 2019-08-01 20 2016-11-10 2019-01-30
12 2006-08-30 2019-08-01 103 2016-08-20 2019-07-29
12 2006-08-30 2019-08-01 17 2016-01-30 2017-12-30
23 2017-10-01 2018-11-12 44 2018-10-19 2018-11-11
23 2017-10-01 2018-11-12 29 2018-04-01 2018-12-02
23 2017-10-01 2018-11-12 06 2017-11-25 2018-05-02
05 2015-02-11 2019-10-02 38 2019-01-17 2019-07-21
05 2015-02-11 2019-10-02 11 2018-11-02 2019-02-05
05 2015-02-11 2019-10-02 15 2018-09-30 2018-10-03
Expected result:
Teacher_ID Dates No_of_reliefs
12 2019-07-31 0
12 2019-07-30 0
12 2019-07-29 1
12 2019-07-28 1
12 2019-07-27 1
... ...
12 2019-07-04 2
... ...
12 2016-05-30 2
12 2016-05-29 2
12 2016-05-28 2
12 2016-05-27 2
12 2016-05-26 1
23 2018-10-31 2
... ...
For date 2019-07-29, No_of_reliefs = 1 because of Relief_ID 103.
For date 2017-07-04, No_of_reliefs = 2 because of Relief_ID 20 & 103.
Dates are supposed to start from 1 month before the teacher resigned. For Teacher_ID 23, since she resigned on 2019-11-12, dates shall start from 2019-10-31.
I have tried using connect by but the execution time is really long since it involves a large amount of data.
Any other methods will be greatly appreciated!!
Thank you kind souls!!!
You can use
connect by level <= last_day(add_months(Resign_dt,-1)) - add_months(Resign_dt,-2) clause :
I suppose you mean 2 months before resignment for the starting date, and ending on the last day of the previous month.
with t1(Teacher_ID,Resign_dt,Relief_ID,start_dt,end_dt) as
(
select 12,date'2019-08-01',20 ,date'2017-02-07',date'2019-07-04' from dual union all
select 12,date'2019-08-01',20 ,date'2016-11-10',date'2019-01-30' from dual union all
select 12,date'2019-08-01',103,date'2016-08-20',date'2019-07-29' from dual
......
), t2 as
(
select distinct last_day(add_months(Resign_dt,-1)) - level + 1 as Resign_dt, Teacher_ID
from t1
connect by level <= last_day(add_months(Resign_dt,-1)) - add_months(Resign_dt,-2)
and prior Teacher_ID = Teacher_ID and prior sys_guid() is not null
)
select Teacher_ID, to_char(Resign_dt,'yyyy-mm-dd') as Dates,
(select count(distinct Relief_ID)
from t1
where t2.Resign_dt between start_dt and end_dt
and t2.Teacher_ID = Teacher_ID
)
from t2
order by Teacher_ID, Resign_dt desc;
Demo
select d.dt
, tr.Teacher_ID
--, tr.Join_dt
--, tr.Resign_dt
, count(tr.Relief_ID)
--, tr.Start_dt
--, tr.End_dt
from tr
right outer join (
SELECT dt
FROM (
SELECT DATE '2006-01-01' + ROWNUM - 1 dt
FROM DUAL CONNECT BY ROWNUM < 5000
) q
WHERE EXTRACT(YEAR FROM dt) < EXTRACT(YEAR FROM sysdate) + 2
--order by 1
) d on d.dt between tr.Join_dt and tr.End_dt
and d.dt between tr.Start_dt and tr.Resign_dt
group by d.dt
, tr.Teacher_ID
order by d.dt desc

Group by ALL columns corresponding to consecutive dates SQL Server

I am working in SQL Server 2012 and have a dataset like this:
ID DATE1 TMT
-----------------------
121 2016-04-01 B
121 2016-04-04 A
121 2016-04-06 A
121 2016-04-08 A
121 2016-04-11 B
121 2016-04-13 B
121 2016-04-15 A
122 2016-03-14 A
122 2016-03-16 A
122 2016-03-18 B
122 2016-03-21 B
122 2016-03-24 A
122 2016-03-29 A
Desired output:
ID BEGIN_DATE END_DATE TMT DAY_COUNT
-----------------------------------------------
121 2016-04-01 2016-04-01 B 1
121 2016-04-04 2016-04-08 A 4
121 2016-04-11 2016-04-13 B 2
121 2016-04-15 2016-04-15 A 1
122 2016-03-14 2016-03-16 A 2
122 2016-03-18 2016-03-21 B 3
122 2016-03-24 2016-03-29 A 5
Code I have now:
;WITH S AS
(
SELECT
ID, MIN(DATE1) BEGIN_DATE, MAX(DATE1) END_DATE, TMT
FROM
MyTable
GROUP BY
id, TMT
)
SELECT
*,
CASE WHEN DATEDIFF(d, BEGIN_DATE, END_DATE) = 0
THEN 1
ELSE DATEDIFF(d, BEGIN_DATE, END_DATE)
END DAY_COUNT
FROM
S
Code produces result like this:
ID BEGIN_DATE END_DATE TMT DAY_COUNT
------------------------------------------------
121 2016-04-01 2016-04-13 B 12
121 2016-04-04 2016-04-15 A 11
122 2016-03-14 2016-03-29 A 15
122 2016-03-18 2016-03-21 B 3
Not sure how to get from here to desired output. Any help would be appreciated!
Thank you!
It is a gap and island problem try it like this
select t.id,
min(date1) as begin_date,
max(date1) as end_date,
datediff(day, min(date1), max(date1)) day_count,
t.TMT
from
(
select *, row_number() over (partition by id, TMT order by date1) -
row_number() over (partition by id order by date1) grn
from s
) t
group by t.id, t.TMT, grn
The crucial is the nested subquery where there are two row_number() functions in order to isolate consecutive occurrence of the TMT per id. Once you have the grn value then the rest is simple group by.
#ZLK 's answer worked perfect for me! Thank you #ZLK!
Here is the code, if anyone else needs it-
select id, min(date1), max(date1), tmt, datediff(day, min(date1), max(date1))
from (select *, rn = row_number() over (order by id, date1) - row_number()
over (partition by tmt order by id, date1) from mytable) as t group by id,
tmt, rn;

Applying LAG() to multiple rows with a null value

Given:
with
m as (
select 1 ID, cast('03/01/2015' as datetime) PERIOD_START, cast('3/31/2015' as datetime) PERIOD_END
union all
select 1 ID, '04/01/2015', '4/28/2015'
union all
select 1 ID, '05/01/2015', '5/31/2015'
union all
select 1 ID, '06/01/2015', '06/30/2015'
union all
select 1 ID, '07/01/2015', '07/31/2015'
)
,
a as (
SELECT 1 ID, cast('2015-03-13 14:17:00.000' as datetime) AUDIT_TIME, 'READ [2]' STATUS
UNION ALL
SELECT 1 ID, '2015-04-27 15:51:00.000' AUDIT_TIME, 'HELD [2]' STATUS
UNION ALL
SELECT 1 ID, '2015-07-08 17:54:00.000' AUDIT_TIME, 'COMPLETED [5]' STATUS
)
This query:
select m.ID,PERIOD_START,PERIOD_END
,a.AUDIT_TIME,STATUS
from m
LEFT OUTER JOIN a on m.id=a.id
and a.audit_time between m.period_start and m.period_end
generates this record set:
ID PERIOD_START PERIOD_END AUDIT_TIME STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 READ [2]
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 NULL NULL
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 NULL NULL
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 COMPLETED [5]
I need the 4/27/15 entry repeated for May and June:
ID PERIOD_START PERIOD_END AUDIT_TIME STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 READ [2]
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 COMPLETED [5]
Using the LAG() function:
select m.ID,PERIOD_START,PERIOD_END
,a.AUDIT_TIME
,LAG(audit_time) OVER (partition by m.ID order by period_start) PRIOR_AUDIT_TIME
,STATUS
,LAG(STATUS) OVER (partition by m.ID order by period_start) PRIOR_STATUS
from m
LEFT OUTER JOIN a on m.id=a.id
and a.audit_time between m.period_start and m.period_end
only works for a single row:
ID PERIOD_START PERIOD_END AUDIT_TIME PRIOR_AUDIT_TIME STATUS PRIOR_STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 NULL READ [2] NULL
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 2015-03-13 14:17:00.000 HELD [2] READ [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 NULL 2015-04-27 15:51:00.000 NULL HELD [2]
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 NULL NULL NULL NULL
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 NULL COMPLETED [5] NULL
Is there a way to do this without having to resort to a cursor?
You can do this with window functions:
with q as (
select m.ID, PERIOD_START, PERIOD_END, a.AUDIT_TIME, STATUS
from m LEFT OUTER JOIN
a
on m.id = a.id and
a.audit_time between m.period_start and m.period_end
)
select q.*,
max(status) over (partition by id, audit_grp) as imputed_status
from (select q.*,
max(audit_time) over (partition by id order by period_start) as audit_grp
from q
) q
The idea is to copy the audit_time value over, using max() as a cumulative window function. This then defines groups, so you can get the status as well.
ANSI supplies the IGNORE NULLSs directive to LAG(), but SQL Server does not (yet) support it.

SQL query stuck - comparison on different lines

I m working on a very weird problem with SQL where I have to compare previous rows
Number start_date end_date
----- ------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000
Here , I have to compare the start_date and end_date on the two different line and create a view out of it.
(If the start_date is less than the previous end_date , then criteria is set to 1).
Well it should compare 2011-10-26 00:00:00.000 for 3 and 2011-10-27 00:00:00.000 on 2 for 30 days
Number start_date end_date Criteria
----- ----------- ---------------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-30 00:00:00.000 2011-11-15 00:00:00.000 1
I m confused how should I proceed with this.
Any help would be helpful !!!!
Thanks !!!
The most straightforward way to do this is to use a subquery:
select A.number, a.start_date, a.end_date,
CASE WHEN start_date < dateadd(d,30,(select TOP(1) b.end_date
from mytable B
where B.number < A.number
order by B.number desc)) then 1 else 0 end Criteria
from mytable A
Note: If the start date is the 29th day following the previous row's end date, Criteria becomes 1. By the 30th day onwards, it is 0. Tweak the 30 in the query as required.
Sample:
create table mytable (
Number int primary key,
start_date datetime,
end_date datetime);
insert mytable
select 1, '2011-06-07', '2011-07-10' union all
select 2, '2011-10-11', '2011-10-27' union all
select 3, '2011-10-26', '2011-10-29' union all
select 4, '2011-10-29', '2011-11-15'
Result:
number start_date end_date Criteria
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-27 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000 0
Try using case like this:
create view vDates as
select Number,start_date,end_date,
case
when start_date<end_date
then 0
else 1
end as Criteria
from tab
SQL Fiddle Demo
A more readable way is create a function and send the correct dates:
Function:
create function [dbo].[CompareDates] (
#START_DATE datetime,
#PREVIOUS_END_DATE datetime
)
RETURNS int
AS
BEGIN
if #START_DATE < #PREVIOUS_END_DATE
return 1
return 0
END
Query (using subquery):
declare #dates table
(
number int,
start datetime,
end_date datetime
)
insert into #dates values
(1, '2011-06-07 00:00:00.000', '2011-07-10 00:00:00.000'),
(2, '2011-10-11 00:00:00.000', '2011-10-27 00:00:00.000'),
(3, '2011-10-26 00:00:00.000', '2011-10-29 00:00:00.000'),
(4, '2011-10-29 00:00:00.000', '2011-11-15 00:00:00.000')
select *, dbo.CompareDates(dates.end_date, dates.previous_end_date) from
(
select number, start, end_date,
(select TOP 1 end_date
from #dates d2
where d2.number < d1.number
order by d2.number desc) as previous_end_date
from #dates d1
) dates

Problem in Start And End Dates using CTE

I have the below input
ID Activity Date
1 gardening 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000
The ddl is as under
Declare #t table(ID int,Activity Varchar(50),[Date] DATETIME)
Insert into #t Select 1,'gardening','01/01/2011' union all Select 1,'gardening','02/01/2011'
union all Select 2,'cooking','03/01/2011' union all Select 2,'cooking','04/01/2011'
union all Select 2,'cooking','05/01/2011' union all Select 1,'gardening','06/01/2011'
union all Select 1,'gardening','07/01/2011'
select * from #t
Expected output
ID ACTIVITY INITIAL_DATE END_DATE
1 gardening 01/01/2011 02/01/2011
1 gardening 02/01/2011 06/01/2011
1 gardening 06/01/2011 07/01/2011
2 cooking 03/01/2011 04/01/2011
2 cooking 04/01/2011 05/01/2011
So far I have done
;with cte as(Select Rn= ROW_NUMBER() Over(order by ID,[Date]),* from #t)
,cte2 as(
Select Rn
,ID,Activity,InitialDate =[Date],EndDate = [Date]
from cte where Rn =1
union all
Select c1.Rn
,c1.ID,c1.Activity,c1.Date,c1.Date
from cte2 c2
join cte c1
on c1.rn = c2.Rn+1
)
select ID,Activity,InitialDate,EndDate from cte2
but the output is not correct
ID Activity InitialDate EndDate
1 gardening 2011-01-01 00:00:00.000 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000 2011-02-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000 2011-07-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000 2011-05-01 00:00:00.000
Help needed
;with cte as
(
select *,
row_number() over(partition by ID order by [Date]) as rn
from #t
)
select C1.ID,
C1.Activity,
C1.[Date] as INITIAL_DATE,
C2.[Date] as END_DATE
from cte as C1
inner join cte as C2
on C1.ID = C2.ID and
C1.rn + 1 = C2.rn
order by C1.ID, C1.[Date]
try this -
in oracle it is giving desired output..
please check for respective sql server function for lead () in oracle database
with cte as(Select * from #t)
(
SELECT * from
(
SELECT id,activity,
lead(date) over(partition be id,activity order by date desc) INITIAL_DATE,
date END_DATE
from cte
order by id,activity,date
)
WHERE INITIAL_DATE is not null
)