SQL- Trying to subtract date values from the same column - sql

I need a subtraction of a max date to the last previous status date and cannot figure it out. I will be using FindingID and UpdatedEstimatedRemediationDate.
For example:
FindingID 'FND-5645' has been updated 3 times:
UpdatedEstimatedRemediationDate
--------------------------------
NULL
2015-06-15
2015-12-30
2016-06-30
I need to get the days difference from June 30, 2016 from December 12,2015. I am using SQL Server 2008 R2. Thanks in advance.

If I understand correctly, this is basically an aggregation query with datediff():
select findingid, datediff(day, min(UpdatedEstimatedRemediationDate), max(UpdatedEstimatedRemediationDate)
from t
group by findingid;

You can use ROW_NUMBER() to partition by FindingId and order by UpdateDate desc, pick first and last dates and have the date diff in days:
Setup:
-- drop table UpdatedEstimatedRemediationDate
create table UpdatedEstimatedRemediationDate
(
FindingId INT,
UpdateDate DATE
)
insert into UpdatedEstimatedRemediationDate values
(1, '2015-06-15'), (1, '2015-12-30'), (1, '2016-06-30'), (2, '2015-07-13'), (2, '2016-05-01')
GO
Query:
;WITH Cte AS (
SELECT FindingId, UpdateDate, ROW_NUMBER() OVER (PARTITION BY FindingId ORDER BY UpdateDate DESC) AS RowNo
FROM UpdatedEstimatedRemediationDate
)
SELECT LU1.FindingId, DATEDIFF(day, LU1.UpdateDate, LU2.UpdateDate) AS DaysDiff
FROM Cte LU1
JOIN Cte LU2 ON LU2.FindingId = LU1.FindingId AND LU1.RowNo = 2 AND LU2.RowNo = 1
[no self join version]
For SQL Server 2012, SELF JOIN may be avoided using LAG/LEAD function:
WITH CTE AS (
SELECT FindingId, DATEDIFF(day, UpdateDate, LEAD(UpdateDate, 1, NULL) OVER (PARTITION BY FindingId ORDER BY UpdateDate)) DayDiff,
ROW_NUMBER() OVER (PARTITION BY FindingId ORDER BY UpdateDate DESC) RowNo
FROM UpdatedEstimatedRemediationDate)
SELECT CTE.FindingId, CTE.DayDiff
FROM CTE
WHERE RowNo = 2

Related

Day after max date in data

I am loading data into a table. I don't have any info on how frequent or when the source data is loaded, all I know is I need data from the source to run my script.
Here's the issue, if I run max(date) I get the latest date from the source, but I don't know if the data is still loading. I've ran into cases where I've only gotten a percentage of the data. Thus, I need the next business day after max date.
I want to know is there a way to get the second latest date in the system. I know I can get max(date) - 1, but that give me literally the day after. I don't need the literal day after.
Example, if I run the script on Tuesday, max(date) will be Monday, but since weekend are not in the source system, I need to get Friday instead of Monday.
DATE
---------
2017-04-29
2017-04-25
2017-04-21
2017-04-19
2017-04-18
2017-04-15
2017-04-10
max(date) = 2017-04-29
how do I get 2017-04-25?
Depending on your version of SQL Server, you can use a windowing function like row_number:
select [Date]
from
(
select [Date],
rn = row_number() over(order by [Date] desc)
from #yourtable
) d
where rn = 2
Here is a demo.
Should you have multiple of the same date, you can perform a distinct first:
;with cte as
(
select distinct [date]
from #yourtable
)
select [date]
from
(
select [date],
rn = row_number() over(order by [date] desc)
from cte
) x
where rn = 2;
You can use row_number and get second as below
select * from ( select *, Rown= row_number() over (order by date desc) from yourtable ) a
where a.RowN = 2
More recent SQL Server versions support FETCH FIRST:
select date
from tablename
order by date desc
offset 1 fetch first 1 row only
OFFSET 1 means skip one row. (The 2017-04-29 row.)
;With cte([DATE])
AS
(
SELECT '2017-04-29' union all
SELECT '2017-04-25' union all
SELECT '2017-04-21' union all
SELECT '2017-04-19' union all
SELECT '2017-04-18' union all
SELECT '2017-04-15' union all
SELECT '2017-04-10'
)
SELECT [DATE] FROM
(
SELECT *,ROW_NUMBER()OVER(ORDER BY Seq)-1 As Rno FROM
(
SELECT *,MAX([DATE])OVER(ORDER BY (SELECT NULL))Seq FROM cte
)dt
)Final
WHERE Final.Rno=1
OutPut
DATE
-----
2017-04-25
You can also use FIRST_VALUE with a dynamic date something like DATEADD(DD, -1, GETDATE()). The example below has the date hard coded.
SELECT DISTINCT
FIRST_VALUE([date]) OVER(ORDER BY [date] DESC) AS FirstDate
FROM CTE
WHERE [date] < '2017-04-25'
Another way
DECLARE #T TABLE ([DATE] DATE)
INSERT INTO #T VALUES
('2017-04-29'),
('2017-04-25'),
('2017-04-21'),
('2017-04-19'),
('2017-04-18'),
('2017-04-15'),
('2017-04-10');
SELECT
MAX([DATE]) AS [DATE]
FROM #T
WHERE DATENAME(DW,[DATE]) NOT IN ('Saturday','Sunday')
Another way of doing it, just for example sake...
SELECT MIN(A.date)
FROM
(
SELECT TOP 2 DISTINCT date
FROM YourTable AS C
ORDER BY date DESC
) AS A

Group data by latest date per month

I store data on a daily basis in the following table
CREATE TABLE dbo.DemoTable
(
ReportDate DATE NOT NULL,
IdOne INT NOT NULL,
IdTwo INT NOT NULL,
NumberOfThings INT NOT NULL DEFAULT 0
CONSTRAINT PK_DemoTable PRIMARY KEY NONCLUSTERED (ReportDate, IdOne, IdTwo)
)
I'd like to report on this but only pull out data (sum of NumberOfThings) for the latest date we have for each month.
Example data
INSERT INTO DemoTable
(ReportDate, IdOne, IdTwo, NumberOfThings)
VALUES
('2016-11-02',1,2,2), ('2016-11-02',1,3,2), ('2016-11-01',1,2,20), ('2016-11-01',1,3,20),
('2016-10-31',1,2,2), ('2016-10-31',1,3,2), ('2016-10-30',1,2,20), ('2016-10-30',1,3,20), ('2016-10-29',1,2,200), ('2016-10-29',1,3,200),
('2016-09-30',1,2,5), ('2016-09-30',1,3,5), ('2016-09-29',1,2,55), ('2016-09-29',1,3,55)
So for this data I want to see:
2016-11-02 | 4
2016-10-31 | 4
2016-09-30 | 10
Thanks
You can use RANK() to spot the latest date rows on each month, and them sum them .
SELECT s.ReportDate,SUM(s.NumberOfThings)
FROM (
SELECT t.*,
RANK() OVER(PARTITION BY YEAR(t.ReportDate), MONTH(t.ReportDate) ORDER BY t.ReportDate DESC) as rnk
FROM DemoTable t) s
WHERE s.rnk = 1
GROUP BY s.ReportDate
You can use query like this
select ReportDate, sum(NumberofThings) as SumNumberofThings from DemoTable where ReportDate in
(
select max(ReportDate) MaxReportDate from DemoTable
group by datepart(yy,reportdate), datepart(m,reportdate)
)
group by ReportDate
A typical method involves row_number(). The only trick is using date functions to get the year and the month:
select dt.*
from (select dt.*,
row_number() over (partition by year(ReportDate), month(ReportDate)
order by ReportDate desc
) as seqnum
from DemoTable dt
) dt
where seqnum = 1;
If there are duplicates per date, you would just do the same thing with aggregation:
select dt.ReportDate, dt.NumberOfThings
from (select dt.ReportDate, sum(NumberOfThings) as NumberOfThings,
row_number() over (partition by year(ReportDate), month(ReportDate)
order by ReportDate desc
) as seqnum
from DemoTable dt
group by NumberOfThings
) dt
where seqnum = 1;
Aggregate your data so as to get the sum per date. Then rank your records by date within month. Then pick the best ranked records.
SELECT
ReportDate,
SumNumberOfThings
FROM
(
SELECT
ReportDate,
ROW_NUMBER() OVER (PARTITION BY YEAR(ReportDate), MONTH(ReportDate)
ORDER BY ReportDate DESC) AS rn
SUM(NumberOfThings) AS SumNumberOfThings
FROM DemoTable
GROUP BY ReportDate
) ranked
WHERE rn = 1
ORDER BY ReportDate;

Convert a list of dates to date ranges in SQL Server

I have a query as following:
SELECT [Date] FROM [TableX] ORDER BY [Date]
The result is:
2016-06-01
2016-06-03
2016-06-10
2016-06-11
How can I get following pairs?
From To
2016-06-01 2016-06-03
2016-06-03 2016-06-10
2016-06-10 2016-06-11
If you're using SQL Server 2012 or later, you can use the LEAD method.
Accesses data from a subsequent row in the same result set without the use of a self-join in SQL Server 2016. LEAD provides access to a row at a given physical offset that follows the current row.
I think it would look like this for you:
SELECT [Date] AS [From], LEAD([Date], 1) OVER (ORDER BY [Date]) AS [To]
FROM TableX
ORDER BY [Date]
Note that on the last row, the [To] field will be NULL. If you wanted to remove that row, you could put it in an inner query:
SELECT *
FROM
(
SELECT [Date] AS [From], LEAD([Date], 1) OVER (ORDER BY [Date]) AS [To]
FROM TableX
) x
WHERE [To] IS NOT NULL
All you need to do is add a row number for each date.
Then unite all these rows by the next row (except the last row)
WITH cteDates AS
(
SELECT [Date],
ROW_NUMBER() OVER (ORDER BY (SELECT [Date])) As RowNum
FROM TableX
)
SELECT TOP(SELECT COUNT(*) - 1 FROM cteDates)
[Date] [From],
(SELECT [Date] FROM cteDates WHERE RowNum = d.RowNum + 1) [To]
FROM cteDates d
A little tricky solution for SQL 2008.
declare #tbl table(dt datetime)
insert #tbl values
('2016-06-01'),
('2016-06-03'),
('2016-06-10'),
('2016-06-11')
;with cte as (
select dt, ROW_NUMBER() over(order by dt) rn --add number
from #tbl
),
newTbl as (
select t1.dt start, t2.dt [end]
from cte t1 inner join cte t2 on t1.rn+1=t2.rn
)
select *
from newTbl
The result is what you wish.
Since there are never any gaps as you stated, you can just used DATEADD()
SELECT DISTINCT
[Date] as [FROM],
DATEADD(DAY,1,[Date]) as [TO]
FROM TableX
ORDER BY [Date] DESC

SQL Server 2008 Rolling Average

I am trying to create a SQL statement to calculate a 6 month rolling average for my data. I am following this guide:
SQL Query for 7 Day Rolling Average in SQL Server
but the problem is I am using SQL Server 2008 and the answer here is for 2012.
select
x.*,
avg(dailyusage) over(partition by productid order by productid, date rows between 6 preceding and current row) as rolling_avg
from
(select
productid, date, sum(usagecount) as dailyusage
from tbl
group by productid, date) x
It will be appreciated if someone can help me translate it over to 2008 terms.
Thanks
You could try to use OUTER APPLY, but it won't be as efficient:
;WITH CTE AS
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY productid ORDER BY [date]) RN
FROM ( SELECT productid, [date], SUM(usagecount) dailyusage
FROM dbo.YourTable
GROUP BY productid, [date]
) t
)
SELECT TOP 100 *
FROM CTE A
OUTER APPLY(SELECT AVG(dailyusage) rolling_avg
FROM CTE
WHERE productid = A.productid
AND A.RN - RN BETWEEN 0 AND 6) B;

Find the start and end date (set based) in T-SQL

I have the below.
Name Date
A 2011-01-01 01:00:00.000
A 2011-02-01 02:00:00.000
A 2011-03-01 03:00:00.000
B 2011-04-01 04:00:00.000
A 2011-05-01 07:00:00.000
The desired output is
Name StartDate EndDate
-------------------------------------------------------------------
A 2011-01-01 01:00:00.000 2011-04-01 04:00:00.000
B 2011-04-01 04:00:00.000 2011-05-01 07:00:00.000
A 2011-05-01 07:00:00.000 NULL
How to achieve the same using TSQL in a set based approach.
DDL is as under
DECLARE #t TABLE(PersonName VARCHAR(32), [Date] DATETIME)
INSERT INTO #t VALUES('A', '2011-01-01 01:00:00')
INSERT INTO #t VALUES('A', '2011-01-02 02:00:00')
INSERT INTO #t VALUES('A', '2011-01-03 03:00:00')
INSERT INTO #t VALUES('B', '2011-01-04 04:00:00')
INSERT INTO #t VALUES('A', '2011-01-05 07:00:00')
Select * from #t
;WITH cte1
AS (SELECT *,
ROW_NUMBER() OVER (ORDER BY Date) -
ROW_NUMBER() OVER (PARTITION BY PersonName
ORDER BY Date) AS G
FROM #t),
cte2
AS (SELECT PersonName,
MIN([Date]) StartDate,
ROW_NUMBER() OVER (ORDER BY MIN([Date])) AS rn
FROM cte1
GROUP BY PersonName,
G)
SELECT a.PersonName,
a.StartDate,
b.StartDate AS EndDate
FROM cte2 a
LEFT JOIN cte2 b
ON a.rn + 1 = b.rn
Because the result of CTEs are not generally materialised however
you may well find you get better performance if you materialize the
intermediate result yourself as below.
DECLARE #t2 TABLE (
rn INT IDENTITY(1, 1) PRIMARY KEY,
PersonName VARCHAR(32),
StartDate DATETIME );
INSERT INTO #t2
SELECT PersonName,
MIN([Date]) StartDate
FROM (SELECT *,
ROW_NUMBER() OVER (ORDER BY Date) -
ROW_NUMBER() OVER (PARTITION BY PersonName
ORDER BY Date) AS G
FROM #t) t
GROUP BY PersonName,
G
ORDER BY StartDate
SELECT a.PersonName,
a.StartDate,
b.StartDate AS EndDate
FROM #t2 a
LEFT JOIN #t2 b
ON a.rn + 1 = b.rn
SELECT
PersonName,
StartDate = MIN(Date),
EndDate
FROM (
SELECT
PersonName,
Date,
EndDate = (
/* get the earliest date after current date
associated with a different person */
SELECT MIN(t1.Date)
FROM #t AS t1
WHERE t1.Date > t.Date
AND t1.PersonName <> t.PersonName
)
FROM #t AS t
) s
GROUP BY PersonName, EndDate
ORDER BY 2
Basically, for every Date we find the nearest date after it such that is associated with a different PersonName. That gives us EndDate, which now distinguishes for us consecutive groups of dates for the same person.
Now we only need to group the data by PersonName & EndDate and get the minimal Date in every group as StartDate. And yes, sort the data by StartDate, of course.
Get a row number so you will know where the previous record is. Then, take a record and the next record after it. When the state changes we have a candidate row.
select
state,
min(start_timestamp),
max(end_timestamp)
from
(
select
first.state,
first.timestamp_ as start_timestamp,
second.timestamp_ as end_timestamp
from
(
select
*, row_number() over (order by timestamp_) as id
from test
) as first
left outer join
(
select
*, row_number() over (order by timestamp_) as id
from test
) as second
on
first.id = second.id - 1
and first.state != second.state
) as agg
group by state
having max(end_timestamp) is not null
union
-- last row wont have a ending row
--(select state, timestamp_, null from test order by timestamp_ desc limit 1)
-- I think it something like this for sql server
(select top state, timestamp_, null from test order by timestamp_ desc)
order by 2
;
Tested with PostgreSQL but should work with SQL Server as well
The other answer with the cte is a good one. Another option would be to iterate over the collection in any case. It's not set based, but it is another way to do it.
You will need to iterate to either A. assign a unique id to each record that corresponds to its transaction, or B. to actually get your output.
TSQL is not ideal for iterating over records, especially if you have a lot, and so I would recommend some other way of doing it, a small .net program or something that is better at iterating.
There's a very quick way to do this using a bit of Gaps and Islands theory:
WITH CTE as (SELECT PersonName, [Date]
, Row_Number() over (ORDER BY [Date])
- Row_Number() over (ORDER BY PersonName, [Date]) as Island
FROM #t)
Select PersonName, Min([Date]), Max([Date])
from CTE
GROUP BY Island, PersonName
ORDER BY Min([Date])