Group data by latest date per month - sql

I store data on a daily basis in the following table
CREATE TABLE dbo.DemoTable
(
ReportDate DATE NOT NULL,
IdOne INT NOT NULL,
IdTwo INT NOT NULL,
NumberOfThings INT NOT NULL DEFAULT 0
CONSTRAINT PK_DemoTable PRIMARY KEY NONCLUSTERED (ReportDate, IdOne, IdTwo)
)
I'd like to report on this but only pull out data (sum of NumberOfThings) for the latest date we have for each month.
Example data
INSERT INTO DemoTable
(ReportDate, IdOne, IdTwo, NumberOfThings)
VALUES
('2016-11-02',1,2,2), ('2016-11-02',1,3,2), ('2016-11-01',1,2,20), ('2016-11-01',1,3,20),
('2016-10-31',1,2,2), ('2016-10-31',1,3,2), ('2016-10-30',1,2,20), ('2016-10-30',1,3,20), ('2016-10-29',1,2,200), ('2016-10-29',1,3,200),
('2016-09-30',1,2,5), ('2016-09-30',1,3,5), ('2016-09-29',1,2,55), ('2016-09-29',1,3,55)
So for this data I want to see:
2016-11-02 | 4
2016-10-31 | 4
2016-09-30 | 10
Thanks

You can use RANK() to spot the latest date rows on each month, and them sum them .
SELECT s.ReportDate,SUM(s.NumberOfThings)
FROM (
SELECT t.*,
RANK() OVER(PARTITION BY YEAR(t.ReportDate), MONTH(t.ReportDate) ORDER BY t.ReportDate DESC) as rnk
FROM DemoTable t) s
WHERE s.rnk = 1
GROUP BY s.ReportDate

You can use query like this
select ReportDate, sum(NumberofThings) as SumNumberofThings from DemoTable where ReportDate in
(
select max(ReportDate) MaxReportDate from DemoTable
group by datepart(yy,reportdate), datepart(m,reportdate)
)
group by ReportDate

A typical method involves row_number(). The only trick is using date functions to get the year and the month:
select dt.*
from (select dt.*,
row_number() over (partition by year(ReportDate), month(ReportDate)
order by ReportDate desc
) as seqnum
from DemoTable dt
) dt
where seqnum = 1;
If there are duplicates per date, you would just do the same thing with aggregation:
select dt.ReportDate, dt.NumberOfThings
from (select dt.ReportDate, sum(NumberOfThings) as NumberOfThings,
row_number() over (partition by year(ReportDate), month(ReportDate)
order by ReportDate desc
) as seqnum
from DemoTable dt
group by NumberOfThings
) dt
where seqnum = 1;

Aggregate your data so as to get the sum per date. Then rank your records by date within month. Then pick the best ranked records.
SELECT
ReportDate,
SumNumberOfThings
FROM
(
SELECT
ReportDate,
ROW_NUMBER() OVER (PARTITION BY YEAR(ReportDate), MONTH(ReportDate)
ORDER BY ReportDate DESC) AS rn
SUM(NumberOfThings) AS SumNumberOfThings
FROM DemoTable
GROUP BY ReportDate
) ranked
WHERE rn = 1
ORDER BY ReportDate;

Related

Retrieve recent 5 days forecast for each cities with latest issue date

I need to retrieve the recent 5 days forecast info for each cities.
My table looks like below
The real problem is with the issue date.
the city may contain several forecast info for the same date with distinct issue date.
I need to retrieve recent 5 records for each cities with latest issue date and group by forecast date
I have tried something like below but not giving the expected result
SELECT * FROM(
SELECT
ROW_NUMBER () OVER (PARTITION BY CITY_ID ORDER BY FORECAST_DATE DESC, ISSUE_DATE DESC) AS rn,
CITY_ID, FORECAST_DATE, ISSUE_DATE
FROM
FORECAST
GROUP BY FORECAST_DATE
) WHERE rn <= 5
Any suggestion or advice will be helpful
This will get the latest issued forecast per day over the most recent 5 days for each city:
SELECT *
FROM (
SELECT f.*,
DENSE_RANK() OVER ( PARTITION BY city_id ORDER BY forecast_date DESC )
AS forecast_rank,
ROW_NUMBER() OVER ( PARTITION BY city_id, forecast_date ORDER BY issue_date DESC )
AS issue_rn
FROM Forecast f
)
WHERE forecast_rank <= 5
AND issue_rn = 1;
Partition by works like group by but for the function only.
Try
with CTE as
(
select t1.*,
row_number() over (partition by city_id, forecast_date order by issue_date desc) as r_ord
from Forecast
)
select CTE.*
from CTE
where r_ord <= 5
Try this
SELECT * FROM(
SELECT
ROW_NUMBER () OVER (PARTITION BY CITY_ID, FORECAST_DATE order by ISSUE_DATE DESC) AS rn,
CITY_ID, FORECAST_DATE, ISSUE_DATE
FROM
FORECAST
) WHERE rn <= 5

SQL- Trying to subtract date values from the same column

I need a subtraction of a max date to the last previous status date and cannot figure it out. I will be using FindingID and UpdatedEstimatedRemediationDate.
For example:
FindingID 'FND-5645' has been updated 3 times:
UpdatedEstimatedRemediationDate
--------------------------------
NULL
2015-06-15
2015-12-30
2016-06-30
I need to get the days difference from June 30, 2016 from December 12,2015. I am using SQL Server 2008 R2. Thanks in advance.
If I understand correctly, this is basically an aggregation query with datediff():
select findingid, datediff(day, min(UpdatedEstimatedRemediationDate), max(UpdatedEstimatedRemediationDate)
from t
group by findingid;
You can use ROW_NUMBER() to partition by FindingId and order by UpdateDate desc, pick first and last dates and have the date diff in days:
Setup:
-- drop table UpdatedEstimatedRemediationDate
create table UpdatedEstimatedRemediationDate
(
FindingId INT,
UpdateDate DATE
)
insert into UpdatedEstimatedRemediationDate values
(1, '2015-06-15'), (1, '2015-12-30'), (1, '2016-06-30'), (2, '2015-07-13'), (2, '2016-05-01')
GO
Query:
;WITH Cte AS (
SELECT FindingId, UpdateDate, ROW_NUMBER() OVER (PARTITION BY FindingId ORDER BY UpdateDate DESC) AS RowNo
FROM UpdatedEstimatedRemediationDate
)
SELECT LU1.FindingId, DATEDIFF(day, LU1.UpdateDate, LU2.UpdateDate) AS DaysDiff
FROM Cte LU1
JOIN Cte LU2 ON LU2.FindingId = LU1.FindingId AND LU1.RowNo = 2 AND LU2.RowNo = 1
[no self join version]
For SQL Server 2012, SELF JOIN may be avoided using LAG/LEAD function:
WITH CTE AS (
SELECT FindingId, DATEDIFF(day, UpdateDate, LEAD(UpdateDate, 1, NULL) OVER (PARTITION BY FindingId ORDER BY UpdateDate)) DayDiff,
ROW_NUMBER() OVER (PARTITION BY FindingId ORDER BY UpdateDate DESC) RowNo
FROM UpdatedEstimatedRemediationDate)
SELECT CTE.FindingId, CTE.DayDiff
FROM CTE
WHERE RowNo = 2

How to add numbers to grouped columns that might repeat, in chronological order

Query:
DECLARE #EmploymentLength TABLE
(
EmployeeID INT,
Date DATE,
DateFlag CHAR(1),
RowNumber INT
);
INSERT INTO #EmploymentLength
(
EmployeeID,
Date,
DateFlag
)
SELECT z.EmployeeID,
z.Date,
z.DateFlag
FROM (SELECT EmployeeId,
HireDate AS Date,
'H' AS DateFlag
FROM dbo.Employment
WHERE EmployeeId = 328195
AND HireDate IS NOT NULL
UNION
SELECT EmployeeId,
TerminationDate AS Date,
'T' AS DateFlag
FROM dbo.Employment
WHERE EmployeeId = 328195
AND TerminationDate IS NOT NULL) z;
SELECT *
FROM #EmploymentLength
ORDER BY Date;
Result:
I need this to end up like this:
After this is done, I can group by the RowNumber to get the MAX() and MIN() of each row number group (1, 2, 3...).
If the last 2 records were "T", then I'd have 2 4's and so on.
EDIT
To clarify, I need to group each DateFlag and add a number to each group but it has to be in order ... (by date).
So in this example, you have 2 records that fall into the first group (group 1).
Then one record for group 2 (T)
Then one record for group 3 (H)
Then one record for group 4 (T)
You can do this with a difference of row_number() values to describe the group and then an additional dense_rank() to enumerate them. I think the following works:
select el.*, dense_rank() over (partition by EmployeeId order by grp)
from (select el.*,
(row_number() over (partition by EmployeeId order by date) -
row_number() over (partition by EmployeeId, DateFlag order by date)
) as grp
from #EmploymentLength el
) el;
There are situations where the grp value might actually repeat for different groups within an employee. In that case, it is better to use the minimum date for each group for the enumeration:
select el.*, dense_rank() over (partition by EmployeeId, order by grpdate)
from (select el.*, min(date) over (partition by EmployeeId, DateFlag, grp) as grpdate
from (select el.*,
(row_number() over (partition by EmployeeId order by date) -
row_number() over (partition by EmployeeId, DateFlag order by date)
) as grp
from #EmploymentLength el
) el
) el

SQL Select MAX and 2nd MAX

I am running a query against MS SQL Server 2008 and am selecting an accountnumber and the max of the column mydate grouped by accountnumber:
select AccountNumber,
max(mydate),
from #SampleData
group by AccountNumber
I want to add a column to the result that contains the second highest mydate that is associated with the AccountNumber group. I know it would have to be something like:
select max(mydate)
from #SampleData
where mydate < (select max(mydate) from #SampleData)
But how do I get both the max and 2nd max in one select query?
You didn't specify your DBMS so this is ANSI SQL:
select accountnumber,
rn,
mydate
from (
select accountnumber,
mydate,
row_number() over (partition by accountnumber order by mydate desc) as rn
from #SampleData
) t
where rn <= 2;
Try this
Select AccountNumber,
MAX(Case when Rnum = 1 Then mydate END) mydate_1,
MAX(Case when Rnum = 2 Then mydate END) mydate_2
From
(
select
AccountNumber, mydate,
ROW_NUMBER() OVER (PARTITION By AccountNumber ORDER BY mydate DESC) as Rnum
from #SampleData
) V
Group By AccountNumber
Something like this should select the second highest:
select AccountNumber,
max(mydate),
(select max(SD2.mydate) from #SampleData SD2 where SD2.AccountNumber=#SampleData.AccountNumber AND SD2.mydate<max(#SampleData.mydate))
from #SampleData
group by AccountNumber
You could also use a TOP N clause combined with an order by:
select
TOP 2
accountnumber,
mydate,
row_number() over (partition by accountnumber order by mydate desc) as rn
from #SampleData
ORDER BY
row_number() over (partition by accountnumber order by mydate desc)

PL/SQL select records with the latest two dates

I have a table where i store the Customer id and the date they logged in.
Cust_ID REC_DATE
773209 11/5/2013 4:30:52 PM
817265 11/5/2013 4:31:19 PM
And so on
How can i see only the latest two records by date for each customer?
You can use the analytic function row_number():
select t.*
from (select t.*,
row_number() over (partition by cust_id order by rec_date desc) as seqnum
from yourtable t
) t
where seqnum <= 2;