how to fetch year on year data based on rule - sql

I have a table with id and date I need two outputs as below. I will pass an input date and I need records according to below conditions
output 1: I need less than or equal date data either it is month end or not for the first row and first-row date-1 year month end data and first-row date-2 year month end data and if data is not available for the particular year then return null on that row.
output 2: Same as first but with a twist that I need whatever data is available on first row date-1 and -2 I do not need null.
declare #tbl table (id int , marketdate date )
insert into #tbl (id,marketdate)
values (1,'2018-05-31'),
(1,'2017-05-29'),
(1,'2016-05-31'),
(2,'2018-02-28'),
(2,'2017-02-28'),
(2,'2016-02-29'),
(2,'2016-02-28')
My query :
;with cte as (
select id , marketdate
from (
select id , marketdate ,row_number() over(partition by id order by marketdate desc) rn
from #tbl
where marketdate <='2018-06-05'
) a where rn=1
union all
select id , marketdate
from (
select b.id , b.marketdate ,row_number() over(partition by b.id order by b.marketdate desc) rn
from #tbl b inner join cte c
on b.id= c.id
where b.marketdate<= dateadd(year,-1,c.marketdate )
) b where rn=1
)
select * from cte
order by id, marketdate desc
output 1:
1 2018-05-31
1 NUll
1 2016-05-31
2 2018-02-28
2 2017-02-28
2 2016-02-29
output 2:
1 2018-05-31
1 2017-05-29
1 2016-05-31
2 2018-02-28
2 2017-02-28
2 2016-02-29
Please help.

Related

Create episode for each value with new Begin and End Dates

This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic
You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum

First value in DATE minus 30 days SQL

I have bunch of data out of which I'm showing ID, max date and it's corresponding values (user id, type, ...). Then I need to take MAX date for each ID, substract 30 days and show first date and it's corresponding values within this date period.
Example:
ID Date Name
1 01.05.2018 AAA
1 21.04.2018 CCC
1 05.04.2018 BBB
1 28.03.2018 AAA
expected:
ID max_date max_name previous_date previous_name
1 01.05.2018 AAA 05.04.2018 BBB
I have working solution using subselects, but as I have quite huge WHERE part, refresh takes ages.
SUBSELECT looks like that:
(SELECT MIN(N.name)
FROM t1 N
WHERE N.ID = T.ID
AND (N.date < MAX(T.date) AND N.date >= (MAX(T.date)-30))
AND (...)) AS PreviousName
How'd you write the select?
I'm using TSQL
Thanks
I can do this with 2 CTEs to build up the dates and names.
SQL Fiddle
MS SQL Server 2017 Schema Setup:
CREATE TABLE t1 (ID int, theDate date, theName varchar(10)) ;
INSERT INTO t1 (ID, theDate, theName)
VALUES
( 1,'2018-05-01','AAA' )
, ( 1,'2018-04-21','CCC' )
, ( 1,'2018-04-05','BBB' )
, ( 1,'2018-03-27','AAA' )
, ( 2,'2018-05-02','AAA' )
, ( 2,'2018-05-21','CCC' )
, ( 2,'2018-03-03','BBB' )
, ( 2,'2018-01-20','AAA' )
;
Main Query:
;WITH cte1 AS (
SELECT t1.ID, t1.theDate, t1.theName
, DATEADD(day,-30,t1.theDate) AS dMinus30
, ROW_NUMBER() OVER (PARTITION BY t1.ID ORDER BY t1.theDate DESC) AS rn
FROM t1
)
, cte2 AS (
SELECT c2.ID, c2.theDate, c2.theName
, ROW_NUMBER() OVER (PARTITION BY c2.ID ORDER BY c2.theDate) AS rn
, COUNT(*) OVER (PARTITION BY c2.ID) AS theCount
FROM cte1
INNER JOIN cte1 c2 ON cte1.ID = c2.ID
AND c2.theDate >= cte1.dMinus30
WHERE cte1.rn = 1
GROUP BY c2.ID, c2.theDate, c2.theName
)
SELECT cte1.ID, cte1.theDate AS max_date, cte1.theName AS max_name
, cte2.theDate AS previous_date, cte2.theName AS previous_name
, cte2.theCount
FROM cte1
INNER JOIN cte2 ON cte1.ID = cte2.ID
AND cte2.rn=1
WHERE cte1.rn = 1
Results:
| ID | max_date | max_name | previous_date | previous_name |
|----|------------|----------|---------------|---------------|
| 1 | 2018-05-01 | AAA | 2018-04-05 | BBB |
| 2 | 2018-05-21 | CCC | 2018-05-02 | AAA |
cte1 builds the list of max_date and max_name grouped by the ID and then using a ROW_NUMBER() window function to sort the groups by the dates to get the most recent date. cte2 joins back to this list to get all dates within the last 30 days of cte1's max date. Then it does essentially the same thing to get the last date. Then the outer query joins those two results together to get the columns needed while only selecting the most and least recent rows from each respectively.
I'm not sure how well it will scale with your data, but using the CTEs should optimize pretty well.
EDIT: For the additional requirement, I just added in another COUNT() window function to cte2.
I would do:
select id,
max(case when seqnum = 1 then date end) as max_date,
max(case when seqnum = 1 then name end) as max_name,
max(case when seqnum = 2 then date end) as prev_date,
max(case when seqnum = 2 then name end) as prev_name,
from (select e.*, row_number() over (partition by id order by date desc) as seqnum
from example e
) e
group by id;

SQL remove duplicates at ID and Month level

I have a table that is something like this:
ID Date Name Age
1 10/04/2015 Theja 24
1 28/04/2015 Theja1 26
1 14/07/2015 Theja2 45
1 30/07/2015 Theja2 45
1 30/08/2015 Theja3 54
2 10/04/2016 Jaya 23
2 28/04/2016 Jaya 23
2 14/05/2016 Jaya1 65
2 30/05/2016 Jaya1 65
But i want output like:
ID Date Name Age
1 28/04/2015 Theja1 26
1 01/05/2015 Theja1 26
1 01/06/2015 Theja1 26
1 30/07/2015 Theja2 45
1 30/08/2015 Theja3 54
2 28/04/2016 Jaya 23
2 30/05/2016 Jaya1 65
Consider 1 record per each month which is max and if any missing months for ID then consider previous records fill for missing months.
Different databases have different methods for handling dates. The following is an ANSI-standard way of getting one row per month:
select id, min(date)
from t
group by id,
extract(year from date), extract(month from date);
I have tried for a solution and came with the following, but you need a calendar table to insert missing rows in the output.
SQL Server Based solution given here
Data Setup:
create table temptable (
id int,
[date] date,
name varchar (50),
age int
);
insert into temptable values
(1,'04-10-2015','Theja',24)
insert into temptable values
(1,'04-28-2015','Theja1',26)
insert into temptable values
(1,'07-14-2015','Theja2',45)
insert into temptable values
(1,'07-30-2015','Theja2',45)
insert into temptable values
(1,'08-30-2015','Theja3',54)
insert into temptable values
(2,'04-10-2016','Jaya',23)
insert into temptable values
(2,'04-28-2016','Jaya',23)
insert into temptable values
(2,'05-14-2016','Jaya1',65)
insert into temptable values
(2,'05-30-2016','Jaya1',65)
The following solution completes till duplicate issue. but to get missing rows you need to implement the calendar table.
you can join with the calendar table and then use output of the cte3 to get missing data.
with cte1 as (
select *,
row_number() over ( partition by month([date]) order by [date]) as rownm,
concat(id,format([date],'MMyyyy')) as unqcol
from temptable
) , cte2 as
(
select unqcol, max(rownm) as maxdt
from cte1
group by unqcol
), cte3 as
( select a.*, lead(a.[date]) over (partition by a.id order by a.id,a.[date]) as NextDate from
cte1 a inner join cte2 b
on a.unqcol=b.unqcol and a.rownm=b.maxdt
)
select c.id,c.[date],c.name,c.age,c.NextDate from cte3 c
order by c.[date]

How to remove duplicate ID rows. While removing, use the rows that has NULL value in another column

While removing duplicate rows with same ID value, how to remove the rows that has null value in one particular column.
Note: there are other non-duplicate rows (below e.g., 12) that has NULL value and should still get selected in the result set.
Input table:
Id | sale_date | price
-----------------------------
11 20051020 22.1
11 NULL 20.1
12 NULL 20.1
13 20051020 20.1
Expected result:
Id | sale_date | price
-----------------------------
11 20051020 22.1
12 NULL 20.1
13 20051020 20.1
Assuming you have SQL Server 2008 or above, this will work for you. I use row_number and assign the values by ID starting at the max date. So any value higher than 1 is lower than the max date for that particular ID so I delete row_num greater than 1.
Check it out:
DECLARE #yourTable TABLE (ID INT,Sale_date DATE, Price FLOAT);
INSERT INTO #yourTable
VALUES (11,'20051020',22.1),
(11,NULL,20.1),
(12,NULL,20.1),
(13,'20051020',20.1);
WITH CTE
AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY sale_date DESC) AS row_num
FROM #yourTable
)
DELETE
FROM CTE
WHERE row_num > 1
SELECT *
FROM #yourTable
Try this
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Sale_Date desc) AS ROW_NUM FROM AA1) A
WHERE ROW_NUM<2
If you have duplicate Ids, and not every sale_date is NULL, you can keep the latest date, and delete the other ones:
DELETE #MyTable
FROM #MyTable AS T1
INNER JOIN (SELECT Id, MAX(sale_date) AS sale_date FROM #MyTable GROUP BY Id HAVING COUNT(*) > 1)
AS T2 ON T1.ID = T2.ID AND (T1.sale_date is null OR T1.sale_date < T2.sale_date)

How to maintain cumulative sum for each User in SQL server

I had a table like
ID UserID rupees time
1 1 200 2014-01-05
---------------------------------
2 1 500 2014-04-06
----------------------------------
3 2 10 2014-05-05
----------------------------------
4 2 20 2014-05-06
----------------------------------
I want the output lie
ID UserID Rupees time CumulativeSum
1 1 200 2014-01-05 200
-------------------------------------------------
2 1 500 2014-04-06 700
-------------------------------------------------
3 2 10 2014-05-06 10
-------------------------------------------------
4 2 20 2014-05-06 30
---------------------------------------------------
How can i get this table as purput
Please try using CTE:
;With T as(
select
*,
ROW_NUMBER() over(partition by UserId order by [time]) RN
from tbl
)
select
UserID,
rupees,
[time],
(select SUM(rupees)
from T b
where b.UserID=a.UserID and b.RN<=a.RN) CumulativeSum
from T a
For records with column value time increasing, try the below query:
select
UserID,
rupees,
[time],
(select SUM(rupees)
from tbl b
where b.UserID=a.UserID and b.[time]<=a.[time]) CumulativeSum
from tbl a
For SQL Server 2012 or later, you can use SUM() with an OVER clause that specifies a ROW clause:
declare #t table (ID int,UserID int,rupees int,[time] date)
insert into #t(ID,UserID,rupees,[time]) values
(1,1,200,'20140105'),
(2,1,500,'20140406'),
(3,2, 10,'20140505'),
(4,2, 20,'20140506')
select
*,
SUM(rupees) OVER (
PARTITION BY UserID
ORDER BY id /* or time? */
ROWS BETWEEN
UNBOUNDED PRECEDING AND
CURRENT ROW)
as total
from #t
Result:
ID UserID rupees time total
----------- ----------- ----------- ---------- -----------
1 1 200 2014-01-05 200
2 1 500 2014-04-06 700
3 2 10 2014-05-05 10
4 2 20 2014-05-06 30
DECLARE #t table (UserID INT,rupees INT,DateKey Date )
INSERT INTO #t VALUES
(1,200,'2014-01-05'),
(2,300,'2014-01-06'),
(2,800,'2014-03-06')
select UserID,
rupees,
DateKey,
(SELECT SUM(rupees)from #t t
where t.rupees <= tt.rupees) from #t tt
GROUP BY UserID,rupees,DateKey
Hope this too helps you.
DECLARE #tab TABLE (id INT,userId INT,rupees INT,[time] Date)
INSERT INTO #tab VALUES
(1,1,200 ,'2014-01-05'),
(2,1,500 ,'2014-04-06'),
(3,2,10 ,'2014-05-05'),
(4,2,20 ,'2014-05-06')
SELECT LU.id,LU.userId,LU.rupees,LU.time,SUM(b.rupees) CumulativeSum
FROM (SELECT *,ROW_NUMBER() OVER (PARTITION BY userId ORDER BY [time]) R FROM #tab) B
JOIN (SELECT *,ROW_NUMBER() OVER (PARTITION BY userId ORDER BY [time]) R FROM #tab) LU
ON B.userId = LU.userId AND B.R <= LU.R
GROUP BY LU.id,LU.userId,LU.rupees,LU.time
Result
I am assuming that you are not using SQL Server 2012, which provides the cumulative sum function. The other answers use some form of the row_number() function, but these seems totally unnecessary. I usually approach cumulative sums using correlated subqueries:
select ID, UserID, rupees, [time],
(select sum(rupees)
from table t2
where t2.UserId = t.UserId and
t2.ID <= t.ID
) as CumulativeSum
from table t;
This requires having a column that uniquely identifies each row, and that seems to be the purpose of id. For performance, I would want to have an index on table(UserId, ID, rupees).
select *, SUM(rupees) OVER (
PARTITION BY UserID
ORDER BY id) as CumSum from #tbl