Applying LAG() to multiple rows with a null value - sql

Given:
with
m as (
select 1 ID, cast('03/01/2015' as datetime) PERIOD_START, cast('3/31/2015' as datetime) PERIOD_END
union all
select 1 ID, '04/01/2015', '4/28/2015'
union all
select 1 ID, '05/01/2015', '5/31/2015'
union all
select 1 ID, '06/01/2015', '06/30/2015'
union all
select 1 ID, '07/01/2015', '07/31/2015'
)
,
a as (
SELECT 1 ID, cast('2015-03-13 14:17:00.000' as datetime) AUDIT_TIME, 'READ [2]' STATUS
UNION ALL
SELECT 1 ID, '2015-04-27 15:51:00.000' AUDIT_TIME, 'HELD [2]' STATUS
UNION ALL
SELECT 1 ID, '2015-07-08 17:54:00.000' AUDIT_TIME, 'COMPLETED [5]' STATUS
)
This query:
select m.ID,PERIOD_START,PERIOD_END
,a.AUDIT_TIME,STATUS
from m
LEFT OUTER JOIN a on m.id=a.id
and a.audit_time between m.period_start and m.period_end
generates this record set:
ID PERIOD_START PERIOD_END AUDIT_TIME STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 READ [2]
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 NULL NULL
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 NULL NULL
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 COMPLETED [5]
I need the 4/27/15 entry repeated for May and June:
ID PERIOD_START PERIOD_END AUDIT_TIME STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 READ [2]
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 2015-04-27 15:51:00.000 HELD [2]
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 COMPLETED [5]
Using the LAG() function:
select m.ID,PERIOD_START,PERIOD_END
,a.AUDIT_TIME
,LAG(audit_time) OVER (partition by m.ID order by period_start) PRIOR_AUDIT_TIME
,STATUS
,LAG(STATUS) OVER (partition by m.ID order by period_start) PRIOR_STATUS
from m
LEFT OUTER JOIN a on m.id=a.id
and a.audit_time between m.period_start and m.period_end
only works for a single row:
ID PERIOD_START PERIOD_END AUDIT_TIME PRIOR_AUDIT_TIME STATUS PRIOR_STATUS
1 2015-03-01 00:00:00.000 2015-03-31 00:00:00.000 2015-03-13 14:17:00.000 NULL READ [2] NULL
1 2015-04-01 00:00:00.000 2015-04-28 00:00:00.000 2015-04-27 15:51:00.000 2015-03-13 14:17:00.000 HELD [2] READ [2]
1 2015-05-01 00:00:00.000 2015-05-31 00:00:00.000 NULL 2015-04-27 15:51:00.000 NULL HELD [2]
1 2015-06-01 00:00:00.000 2015-06-30 00:00:00.000 NULL NULL NULL NULL
1 2015-07-01 00:00:00.000 2015-07-31 00:00:00.000 2015-07-08 17:54:00.000 NULL COMPLETED [5] NULL
Is there a way to do this without having to resort to a cursor?

You can do this with window functions:
with q as (
select m.ID, PERIOD_START, PERIOD_END, a.AUDIT_TIME, STATUS
from m LEFT OUTER JOIN
a
on m.id = a.id and
a.audit_time between m.period_start and m.period_end
)
select q.*,
max(status) over (partition by id, audit_grp) as imputed_status
from (select q.*,
max(audit_time) over (partition by id order by period_start) as audit_grp
from q
) q
The idea is to copy the audit_time value over, using max() as a cumulative window function. This then defines groups, so you can get the status as well.
ANSI supplies the IGNORE NULLSs directive to LAG(), but SQL Server does not (yet) support it.

Related

PIVOT datetime and ORDER column values of multiple rows

I have a table with values which are not in order
Id
DateTime
Status
1
2022-03-01 18:00:00.000
Stop1
2
2022-03-01 08:00:00.000
Start
3
2022-03-01 20:00:00.000
Stop2
4
2022-03-02 09:00:00.000
Start
5
2022-03-01 10:00:00.000
Stop2
6
2022-03-02 11:00:00.000
Finish
7
2022-03-01 14:00:00.000
Start
8
2022-03-02 10:00:00.000
Stop1
where Status can be 'Start', 'Stop1', 'Stop2', or 'Finish'.
I need the timeline like this, where the values are pivoted in the order (from the earliest to the latest; id is not relevant at this point)
Id
Start
Stop1
Stop2
Finish
2
2022-03-01 08:00:00
NULL
2022-03-01 10:00:00
NULL
7
2022-03-01 14:00:00
2022-03-01 18:00:00
2022-03-01 20:00:00
NULL
4
2022-03-02 09:00:00
2022-03-02 10:00:00
NULL
2022-03-02 11:00:00
After I PIVOTed it in SQL Server
SELECT *
FROM (
SELECT Id, DateTime, Status FROM table
) t
PIVOT (
MAX(DateTime)
FOR Status IN (Start, Stop1, Stop2, Finish)
) p
I got
Id
Start
Stop1
Stop2
Finish
2
2022-03-01 08:00:00
NULL
NULL
NULL
5
NULL
NULL
2022-03-01 10:00:00
NULL
7
2022-03-01 14:00:00
NULL
NULL
NULL
1
NULL
2022-03-01 18:00:00
NULL
NULL
3
NULL
NULL
2022-03-01 20:00:00
NULL
6
NULL
NULL
NULL
2022-03-02 11:00:00
8
NULL
2022-03-02 10:00:00
NULL
NULL
4
2022-03-02 09:00:00
NULL
NULL
NULL
How can I get that timeline?
Perhaps this will help. The window functions can be invaluable
Also, remember to "FEED" your pivot with only the required columns.
Example
Select *
From (
Select id = min(case when Status='Start' then ID end) over (partition by Grp)
,DateTime
,Status
From (
Select *
,Grp = sum( case when [Status]='Start' then 1 else 0 end) over (order by datetime)
from YourTable
) A
) src
Pivot ( max(DateTime) FOR Status IN (Start, Stop1, Stop2, Finish) ) p
Results

Select the last entry recorded in a table for each day, within a duration of days

How can I select the last entry recorded for each day? In this example, I need the last item number ordered and the last DateOrdered entry for each day over the last 5 days. Here's my table:
ItemNumber | DateOrdered
1 2020-04-01 08:00:00.000
3 2020-04-01 09:00:00.000
5 2020-04-01 10:00:00.000
4 2020-04-02 09:00:00.000
6 2020-04-02 10:00:00.000
7 2020-04-03 08:00:00.000
3 2020-04-03 09:00:00.000
2 2020-04-03 10:00:00.000
5 2020-04-04 10:00:00.000
8 2020-04-05 08:00:00.000
2 2020-04-05 09:00:00.000
8 2020-04-05 10:00:00.000
Here's the results I need:
ItemNumber | DateOrdered
5 2020-04-01 10:00:00.000
6 2020-04-02 10:00:00.000
2 2020-04-03 10:00:00.000
5 2020-04-04 10:00:00.000
8 2020-04-05 10:00:00.000
This is as close as I can get with it:
with tempTable as
(
select
*,
row_number() over(partition by datediff(d, 0, DateOrdered) order by DateOrdered desc) as rn
from myTable
)
select *
from tempTable
where rn = 1
You are almost there. You just need to fix the definition of your partition so it puts together all rows that belong to the same day.
This should do it:
with tempTable as
(
select
*,
row_number() over(partition by cast(DateOrdered as date) order by DateOrdered desc) as rn
from myTable
)
select *
from tempTable
where rn = 1

Find range of dates within same column

I have a data set, which looks like this:
ResourceID RequirementId ProjectID Startdate EndDate BillingPercentage
-------------------- -------------------- -------------------- ----------------------- ----------------------- ---------------------------------------
1 5066 7505 2015-09-15 00:00:00.000 2015-09-30 00:00:00.000 50
2 4748 7499 2015-09-10 00:00:00.000 2015-09-20 00:00:00.000 50
I want to calculate range and corresponding billing % for that particular month my query is:
INSERT INTO #DateTimeline
SELECT #MonthStartDate AS OSTARTDATE,#MonthEndDate AS OENDDATE,0
INSERT INTO #DateTimeline
SELECT Startdate AS OSTARTDATE,EndDate AS OENDDATE,BillingPercentage From #RESOURCE_UNBILLED Order by Startdate
INSERT INTO #DateTimeline
SELECT EndDate AS OSTARTDATE,EndDate AS OENDDATE,BillingPercentage From #RESOURCE_UNBILLED Order by Startdate
And data looks like following:
SerialNo OSTARTDATE OENDDATE BillingPercentage
----------- ----------------------- ----------------------- ---------------------------------------
1 2015-09-01 00:00:00.000 2015-09-30 00:00:00.000 0
2 2015-09-10 00:00:00.000 2015-09-20 00:00:00.000 50
3 2015-09-15 00:00:00.000 2015-09-30 00:00:00.000 50
4 2015-09-20 00:00:00.000 2015-09-20 00:00:00.000 50
5 2015-09-30 00:00:00.000 2015-09-30 00:00:00.000 50
I want to retrive data like following
OSTARTDATE OENDDATE BillingPercentage
----------- ----------------------- ----------------------- ---------------------------------------
2015-09-01 00:00:00.000 2015-09-10 00:00:00.000 0
2015-09-10 00:00:00.000 2015-09-15 00:00:00.000 50
2015-09-15 00:00:00.000 2015-09-20 00:00:00.000 100
2015-09-20 00:00:00.000 2015-09-30 00:00:00.000 50
Please suggest how can I get this also can I use pivot here?
Use a table variable to store your #dateStamps with columns: SerialNo, OSTARTDATE and OENDDATE.
Try this query:
SELECT d.SerialNo, d.OSTARTDATE, d.OENDDATE
, ( SELECT SUM(t.BillingPercentage)
FROM yourTable t
WHERE d.OENDDATE BETWEEN t.Startdate AND t.EndDate
OR d.OSTARTDATE BETWEEN t.Startdate AND t.EndDate
OR (d.OSTARTDATE > t.Startdate AND d.OENDDATE < t.EndDate)
) AS BillingPercentage
FROM
#dateStamps d
My complete code is:
DECLARE #DatePart as int = 5
;WITH dateStamps AS (
SELECT 1 As SerialNo, CAST('2015-' + CONVERT(varchar, MONTH(MIN(t.Startdate))) + '-01 00:00:00.000' As datetime) AS OSTARTDATE
, CAST('2015-' + CONVERT(varchar, MONTH(MIN(t.Startdate))) + '-01 00:00:00.000' As datetime) + (#DatePart - 1) AS OENDDATE
FROM yourTable t
UNION ALL
SELECT ds.SerialNo + 1, ds.OSTARTDATE + #DatePart, ds.OSTARTDATE + (#DatePart * 2 - 1)
FROM dateStamps ds
WHERE MONTH(OSTARTDATE + #DatePart) <= MONTH(ds.OSTARTDATE)
)
SELECT d.SerialNo, d.OSTARTDATE, d.OENDDATE
, ( SELECT SUM(t.BillingPercentage)
FROM t
WHERE d.OENDDATE BETWEEN t.Startdate AND t.EndDate
OR d.OSTARTDATE BETWEEN t.Startdate AND t.EndDate
OR (d.OSTARTDATE > t.Startdate AND d.OENDDATE < t.EndDate)
) AS BillingPercentage
FROM
dateStamps d

Distinct values for a distinct datadate in SQL

In my table I sometimes have two dates with two values, but I just need one of them. Is there anyway to select a distinct value based on the distinct date?
example:
DATADATE ID
2008-06-30 00:00:00.000 12
2008-03-31 00:00:00.000 12
2007-12-31 00:00:00.000 3
2007-12-31 00:00:00.000 12
2007-09-30 00:00:00.000 3
2007-09-30 00:00:00.000 12
2007-06-30 00:00:00.000 3
2007-06-30 00:00:00.000 12
2007-03-31 00:00:00.000 3
2007-03-31 00:00:00.000 12
2006-12-31 00:00:00.000 3
2006-09-30 00:00:00.000 3
2006-06-30 00:00:00.000 3
What I need to get is this:
DATADATE ID
2008-06-30 00:00:00.000 12
2008-03-31 00:00:00.000 12
2007-12-31 00:00:00.000 12
2007-09-30 00:00:00.000 12
2007-06-30 00:00:00.000 12
2007-03-31 00:00:00.000 12
2006-12-31 00:00:00.000 3
2006-09-30 00:00:00.000 3
2006-06-30 00:00:00.000 3
Any help is really appreciated, thanks.
You could use group by:
select DATADATE
, max(IDs)
from YourTable
group by
DATADATE
If you are using sql server 2005+. Then you can do this:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER(PARTITION BY ID ORDER BY DATADATE DESC) AS RowNbr,
Table1.*
FROM
Table1
)
SELECT
*
FROM
CTE
WHERE
CTE.RowNbr=1
EDIT
In the CTE function you can join or do what ever you cant to get the output you want. Like this:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER(PARTITION BY Table1.ID ORDER BY Table2.DATADATE DESC) AS RowNbr,
Table1.*
FROM
Table1
JOIN Table2
ON Table1.ID = Table2.ID
)
SELECT
*
FROM
CTE
WHERE
CTE.RowNbr=1

Problem in Start And End Dates using CTE

I have the below input
ID Activity Date
1 gardening 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000
The ddl is as under
Declare #t table(ID int,Activity Varchar(50),[Date] DATETIME)
Insert into #t Select 1,'gardening','01/01/2011' union all Select 1,'gardening','02/01/2011'
union all Select 2,'cooking','03/01/2011' union all Select 2,'cooking','04/01/2011'
union all Select 2,'cooking','05/01/2011' union all Select 1,'gardening','06/01/2011'
union all Select 1,'gardening','07/01/2011'
select * from #t
Expected output
ID ACTIVITY INITIAL_DATE END_DATE
1 gardening 01/01/2011 02/01/2011
1 gardening 02/01/2011 06/01/2011
1 gardening 06/01/2011 07/01/2011
2 cooking 03/01/2011 04/01/2011
2 cooking 04/01/2011 05/01/2011
So far I have done
;with cte as(Select Rn= ROW_NUMBER() Over(order by ID,[Date]),* from #t)
,cte2 as(
Select Rn
,ID,Activity,InitialDate =[Date],EndDate = [Date]
from cte where Rn =1
union all
Select c1.Rn
,c1.ID,c1.Activity,c1.Date,c1.Date
from cte2 c2
join cte c1
on c1.rn = c2.Rn+1
)
select ID,Activity,InitialDate,EndDate from cte2
but the output is not correct
ID Activity InitialDate EndDate
1 gardening 2011-01-01 00:00:00.000 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000 2011-02-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000 2011-07-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000 2011-05-01 00:00:00.000
Help needed
;with cte as
(
select *,
row_number() over(partition by ID order by [Date]) as rn
from #t
)
select C1.ID,
C1.Activity,
C1.[Date] as INITIAL_DATE,
C2.[Date] as END_DATE
from cte as C1
inner join cte as C2
on C1.ID = C2.ID and
C1.rn + 1 = C2.rn
order by C1.ID, C1.[Date]
try this -
in oracle it is giving desired output..
please check for respective sql server function for lead () in oracle database
with cte as(Select * from #t)
(
SELECT * from
(
SELECT id,activity,
lead(date) over(partition be id,activity order by date desc) INITIAL_DATE,
date END_DATE
from cte
order by id,activity,date
)
WHERE INITIAL_DATE is not null
)