How to group data with conditions?

How to group data with conditions? - sql

I'm trying to turn my current table using sql
customer.id sale_date
15 1/12/2017
15 2/12/2017
15 7/12/2017
12 6/09/2017
12 12/09/2017
16 8/14/2017
13 6/01/2017
13 7/01/2017
into something like this.
sale_date1 is the first order date.
sale_date2 is any order date one month after sale_date1.
sale_date3 is any order date five months after sale_date1.
customer.id sale_date1 sale_date2 sale_date3(at least 5 months after sale_date1)
15 1/12/2017 2/12/2017 7/12/2017
12 6/07/2017 NULL 12/09/2017
16 8/14/2017 NULL NULL
13 6/01/2017 7/01/2017 NULL

One option here would be to use correlated sub-queries to populate each of the three columns:
WITH cte AS (
SELECT [customer.id], MIN(sale_date) AS min_sale_date
FROM yourTable
GROUP BY [customer.id]
)
SELECT
[customer.id],
min_sale_date AS sale_date1,
(SELECT MIN(t2.sale_date) FROM yourTable t2
WHERE t1.[customer.id] = t2.[customer.id] AND
t2.sale_date >= DATEADD(month, 1, t1.min_sale_date) AND
t2.sale_date < DATEADD(month, 5, t1.min_sale_date)) AS sale_date2,
(SELECT MIN(t2.sale_date) FROM yourTable t2
WHERE t1.[customer.id] = t2.[customer.id] AND
t2.sale_date >= DATEADD(month, 5, t1.min_sale_date)) AS sale_date3
FROM cte t1
ORDER BY [customer.id];
Demo

Try below using row_number() and conditional aggregation
select customerid,max(case when seq=1 then sale_date end) as date1,
max(case when seq=2 then sale_date end) as date2,
max(case when seq=3 then sale_date end) as date3
from
(
select *, row_number() over(partition by customerid order by sale_date) as seq
from tablename
)X
group by customerid

I THINK THIS IS WHAT YOU WANT
SELECT A.customer.id, SALES1.sale_date , SALES2.sale_date ,SALES3.sale_date , SALES4.sale_date,SALES5.sale_date from
(SELECT distinct customer.id ,
From yourTable)A
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=1)SALES1
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=2)SALES2
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=3)SALES3
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=2)SALES2
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=4)SALES4
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=2)SALES2
A.customer.id = SALES1.customer.id
LEFT JOIN
(SELECT * from
(SELECT customer.id, sale_date
ROW_NUMBER() OVER(ORDER BY sale_date ASC)
AS R1,
name, recovery_model_desc
FROM yourTable)S1 where R1=5)SALES5
A.customer.id = SALES1.customer.id

try this:
with mindate as (
select id, min(sale_date) MinDate,
DATEADD(month, 1, min(sale_date)) MinDatePlus1Month,
DATEADD(month, 5, min(sale_date)) MinDatePlus2Month
from yourtable
group by id
)
select f1.id, f1.MinDate sale_date1, f2.sale_date sale_date2, f3.sale_date sale_date3
from mindate f1
left outer join yourtable f2 on f1.id=f2.id and f1.MinDatePlus1Month=f2.sale_date
left outer join yourtable f3 on f1.id=f3.id and f1.MinDatePlus2Month=f3.sale_date

Related

Bring next value after condition

I am trying to fetch the next value after the condition is found. In this case, it is a row from 13/05/2021 the result I want to see is the row from 19/05/2021 Cte and CTE1 bring correct results.
I can't figure out what is wrong with my query.
<with cte as
(
select
customerid
,max(timestamp) as [Case Submitted]
,row_number() over (partition by [CustomerId] order by [CustomerId] ,max([timestamp]) desc) as rownum
from Table1
where substatus = 'Case Submitted'
and timestamp > '2021-01-01'
Group by
customerid
,timestamp
)
,CTE2 as
(
Select *
from cte
Where rownum = 1
),
CTE3 as
(
select
PS.customerid
,(PS.timestamp) as [Customer Support]
,row_number() over (partition by PS.customerid order by PS.customerid ) as rownum
from Table1 PS
left join CTE2 C2 on C2.customerid = PS.customerid and C2.[Case Submitted] > PS.timestamp and C2.rownum =1
where status = 'Customer Support'
and timestamp > '2021-01-01'
Group by
PS.customerid
,ps.timestamp
)
Select*
from CTE3>

untested notepad scribble
with CTE1 as
(
select
customerid
, [timestamp] as [Case Submitted]
, rownum = row_number() over (partition by CustomerId order by [timestamp] desc)
from Table1
where substatus = 'Case Submitted'
and [timestamp] > cast('2021-01-01' as date)
),
CTE2 AS
(
select
PS.customerid
, PS.timestamp as [Customer Support]
, rownum = row_number() over (partition by PS.customerid order by PS.timestamp)
from Table1 as PS
join CTE1 as C1
on C1.customerid = PS.customerid
and C1.[Case Submitted] > PS.[timestamp]
and C1.rownum = 1
where PS.status = 'Customer Support'
and [timestamp] > cast('2021-01-01' as date)
)
select *
from CTE2
where rownum = 1

How to Nest query with different criteria

I have a Sales_details table where I like to get a report of the top 150 products and the top 10 customers of each product. The code I have below does just that and is working perfectly. However, it is using the same date range for both. How do I modify this so that the top 150 products is based on a 10 years history while the top 10 customers is based on 2 years history?
select pc.*
from (select pc.*,
dense_rank() over (order by product_sales desc, product_id) as product_rank
from (select sd.product_id, sd.custno, sum(sd.sales$) as total_sales,
row_number() over (partition by sd.product_id order by sum(sd.sales$) as cust_within_product_rank,
sum(sum(sd.sales$)) over (partition by sd.product_id) as product_sales
from salesdetails sd
group by sd.product_id, sd.custno
) pc
) pc
where product_rank <= 150 and cust_within_product_rank <= 10;

You can use conditional aggregation:
select pc.*
from (select pc.*,
dense_rank() over (order by product_sales desc, product_id) as product_rank
from (select sd.product_id, sd.custno, sum(sd.sales$) as total_sales,
row_number() over (partition by sd.product_id
order by sum(case when date > dateadd(year, -2, getdate()) then sd.sales$ else 0 end)
) as cust_within_product_rank,
sum(sum(case when date > dateadd(year, -10, getdate()) then sd.sales$ else 0 end)) over (partition by sd.product_id) as product_sales
from salesdetails sd
group by sd.product_id, sd.custno
) pc
) pc
where product_rank <= 150 and cust_within_product_rank <= 10;
I'm not sure what column you use for date, so I just called it date.

T-SQL Comparison of Min and Max Values Over Time

Working in SQL Server, I have a table with 4 columns
AccountId
AccountName
SubscriptionAmt
DateKey
It lists each company's SubscriptionAmt by month (DateKey, i.e 201801) as they change over time.
I want to write one select statement including
AccountId, AccountName, EarliestSubscriptionAmt, LatestSubscriptionAmt, Difference
I know the effort below is not correct. I know I have to do some kind of partition over in the sub queries but I cant put my finger on it
Any help is appreciated
SELECT
[Account ID],
[Account Name],
b.EarlySub,
c.LatestSub,
(c.LatestSub - b.EarlySub / b.EarlySub) * 100 as PercentageChange
FROM
SubTable AS
LEFT JOIN
(SELECT
[Account ID],
SubscriptionAmt AS EarlySub
FROM
SubTable
WHERE
DateKey = MIN(DateKey)) AS b ON a.[Account ID] = b.[Account ID]
LEFT JOIN
(SELECT
[Account ID],
SubscriptionAmt AS LatestSub
FROM
SubTable
WHERE
DateKey = MAX(DateKey)) AS c ON a.[Account ID] = c.[Account ID]
Sample Data:
AccountId AccountName SubscriptionAmt DateKey
----------------------------------------------------
1 Bob's Store 100 201701
1 Bob's Store 200 201702
1 Bob's Store 300 201703
1 Bob's Store 400 201704
Desired Results:
AccountId AccountName EarliestSubAmt LatestSubAmt PercentageChange
------------------------------------------------------------------------
1 Bob's Store 100 400 300%

FULL demonstration:
DECLARE #TABLE TABLE
(
AccountId VARCHAR(50),
AccountName VARCHAR(50),
SubscriptionAmt INT,
DateKey VARCHAR(50)
)
INSERT INTO #TABLE
VALUES('1','Bob''s Store','100','201701'),('1','Bob''s Store','200','201702'),('1','Bob''s Store','300','201703'),('1','Bob''s Store','400','201704')
;
WITH CTE
AS
(
SELECT AccountId,
AccountName,
SubscriptionAmt,
ROW_NUMBER()OVER(PARTITION BY AccountId,AccountName ORDER BY CAST(DateKey+'01' as DATE)) as ForMin,
ROW_NUMBER()OVER(PARTITION BY AccountId,AccountName ORDER BY CAST(DateKey+'01' as DATE) DESC) as ForMAX
FROM #TABLE
)
SELECT A.AccountId,
A.AccountName,
A.SubscriptionAmt as EarliestSubAmt,
B.SubscriptionAmt as LatestSubAmt,
CAST(((B.SubscriptionAmt - A.SubscriptionAmt)/A.SubScriptionAmt ) as varchar(50)) + '%' as PercentageChange
FROM CTE as A
INNER JOIN CTE as B
ON A.AccountId = B.AccountId
AND A.AccountName = B.AccountName
WHERE A.ForMin = 1 AND B.ForMAX = 1

you can get the Min and Max amounts by first getting the min date and the max date per account id using outer apply, then using a case expression to get the min or max amounts
select [AccountId], [AccountName],
MIN(CASE WHEN DateKey = MinDateKey THEN SubscriptionAmt END) as EarliestSubAmt,
MAX(CASE WHEN DateKey = MaxDateKey THEN SubscriptionAmt END) as LatestSubAmt
from SubTable s
outer apply (
select Min(DateKey) MinDateKey, Max(DateKey) MaxDateKey from SubTable t where s.AccountId = t.AccountId
) t
group by [AccountId], [AccountName]
you can wrap all of this to get the percent change.
select *,
((LatestSubAmt-EarliestSubAmt)/EarliestSubAmt) * 100 as PercentageChange
from (
select [AccountId], [AccountName],
MIN(CASE WHEN DateKey = MinDateKey THEN SubscriptionAmt END) as EarliestSubAmt,
MAX(CASE WHEN DateKey = MaxDateKey THEN SubscriptionAmt END) as LatestSubAmt
from SubTable s
outer apply (
select Min(DateKey) MinDateKey, Max(DateKey) MaxDateKey from SubTable t where s.AccountId = t.AccountId
) t
group by [AccountId], [AccountName]
) s

Please use below query . I have considered Account ID as key value for each store and it will not be duplicated. Please test before implementing into any system.
--Data Prep
create table #Test (
AccountId int,
AccountName varchar(max),
SubscriptionAmt int,
DateKey int
)
insert into #Test
Select 1,'Bobs Store',100,201701
union
select 1,'Bobs Store',200,201702
union
select 1,'Bobs Store',300,201703
union
select 1,'Bobs Store',400,201704
--Actual code ****************************************************
select *,
ROW_NUMBER() over(Partition by Accountid order by datekey asc) MinAm,
ROW_NUMBER() over(Partition by Accountid order by datekey desc) MaxAm into #Final
from #Test
Select *,
((LatestSubAmt-EarliestSubAmt)/EarliestSubAmt)*100 as PercentageChange
From (
select
AccountId,
AccountName,
(select SubscriptionAmt from #Final f2 where f1.AccountId=f2.AccountId and f2.MinAm=1) EarliestSubAmt,
(select SubscriptionAmt from #Final f2 where f1.AccountId=f2.AccountId and f2.MaxAm=1) LatestSubAmt
from #Final f1
Where MinAm=1) A
--********************************************************
If you don't want to use sub query
Select Distinct Accountid,
AccountName,
sum(case when minAm=1 then SubscriptionAmt else 0 END) over (Partition by Accountid) EarliestSubAmt ,
sum(case when maxam=1 then SubscriptionAmt else 0 END) over (Partition by Accountid) LatestSubAmt,
((sum(case when maxam=1 then SubscriptionAmt else 0 END) over (Partition by Accountid)-sum(case when minAm=1 then SubscriptionAmt else 0 END) over (Partition by Accountid))
/sum(case when minAm=1 then SubscriptionAmt else 0 END) over (Partition by Accountid))*100 PercentageChange
FRom (
select *,
ROW_NUMBER() over(Partition by Accountid order by datekey asc) MinAm,
ROW_NUMBER() over(Partition by Accountid order by datekey desc) MaxAm
from #Test
)A

How to find the highest value in a year and in all months

I want to return a year in which was the most contracts made and a month throughout all years - in which month the highest number of contracts is made.
I've tried:
SELECT
cal.CalendarYear
,cal.MonthOfYear
,COUNT(*) AS Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateId
--WHERE (CalendarYear IS NULL) OR (MonthOfYear IS NULL)
GROUP BY
cal.CalendarYear
,cal.MonthOfYear
WITH CUBE
ORDER BY COUNT(*) DESC
and...
--;WITH maxYear AS (
SELECT TOP 1
cal.CalendarYear AS [Year]
,0 AS [Month]
,COUNT(*) AS Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateId
GROUP BY cal.CalendarYear
-- ORDER BY COUNT(*) DESC
--)
UNION ALL
--,maxMonth AS (
SELECT TOP 1
0 AS [Year]
,cal.MonthOfYear AS [Month]
,COUNT(*) AS Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateID
GROUP BY cal.MonthOfYear
ORDER BY COUNT(*) DESC
--)
Any help would be appreciated. Thanks.

This will ORDER BY each portion of the UNION independently, and still have the results joined in one SELECT...
SELECT x.* FROM (
SELECT TOP 1
cal.CalendarYear AS [Year]
,0 AS [Month]
,COUNT(*) AS Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateId
GROUP BY cal.CalendarYear
ORDER BY COUNT(*) DESC
) x
UNION ALL
SELECT x.* FROM (
SELECT TOP 1
0 AS [Year]
,cal.MonthOfYear AS [Month]
,COUNT(*) AS Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateID
GROUP BY cal.MonthOfYear
ORDER BY COUNT(*) DESC
) x

Get the counts per year and month and use row_number to get the year and month with the highest contracts.
SELECT
MAX(CASE WHEN year_rank=1 then Year END) as Highest_contracts_year,
MAX(CASE WHEN year_rank=1 then Year_count END) as Year_Contracts_count
MAX(CASE WHEN month_year_rank=1 then Month END) as Highest_contracts_Month,
MAX(CASE WHEN month_year_rank=1 then Month_Year_count END) as MonthYear_Contracts_count
FROM (SELECT T.*
,ROW_NUMBER() OVER(ORDER BY Year_Cnt DESC) as Year_rank
,ROW_NUMBER() OVER(ORDER BY Month_Year_Cnt DESC) as Month_Year_rank
FROM (SELECT
cal.CalendarYear AS [Year]
,cal.MonthOfYear AS [Month]
,COUNT(*) OVER(PARTITION BY cal.CalendarYear) AS Year_Cnt
,COUNT(*) OVER(PARTITION BY cal.MonthOfYear) AS Month_Year_Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateId
) T
) T
You should specify what needs to be done when there are ties for highest counts. Assuming you need all highest count rows when there are ties, use
With ranks as
(SELECT T.*
,RANK() OVER(ORDER BY Year_Cnt DESC) as Year_rank
,RANK() OVER(ORDER BY Month_Year_Cnt DESC) as Month_Year_rank
FROM (SELECT
cal.CalendarYear AS [Year]
,cal.MonthOfYear AS [Month]
,COUNT(*) OVER(PARTITION BY cal.CalendarYear) AS Year_Cnt
,COUNT(*) OVER(PARTITION BY cal.MonthOfYear) AS Month_Year_Cnt
FROM dim.Application app
JOIN dim.Calendar cal ON app.ApplicationDateID = cal.DateId
) T
)
SELECT *
FROM (SELECT DISTINCT Year,Year_Cnt FROM RANKS WHERE Year_rank=1) ry
CROSS JOIN (SELECT DISTINCT Month,Month_Year_Cnt FROM RANKS WHERE Month_Year_rank=1) rm

EDIT: This might be what you want, unless you want it on single line:
select calendaryear AS 'year/month', cnt from (
SELECT TOP 1
cal.CalendarYear
,COUNT(*) AS Cnt
FROM dim.Application AS app
JOIN dim.Calendar AS cal ON app.ApplicationDateID = cal.DateId
GROUP BY
cal.CalendarYear
ORDER BY COUNT(*) DESC
) as year
UNION ALL
select MonthOfYear, Cnt FROM (
SELECT TOP 1
cal.CalendarYear
,cal.MonthOfYear
,COUNT(*) AS Cnt
FROM dim.Application AS app
JOIN dim.Calendar AS cal ON app.ApplicationDateID = cal.DateId
GROUP BY
cal.CalendarYear
,cal.MonthOfYear
ORDER BY COUNT(*) DESC
) AS month
It returns following result where month 3 is in fact 2016:
year/month cnt
2017 4
3 2
I have used following data as input
create table #calendar (DateId int, calendaryear int, monthofyear int)
create table #application (applicationdateId int)
insert into #calendar values (1,2017,01)
insert into #calendar values (2,2017,02)
insert into #calendar values (3,2017,03)
insert into #calendar values (4,2016,01)
insert into #calendar values (5,2016,03)
insert into #application values (1)
insert into #application values (1)
insert into #application values (2)
insert into #application values (3)
insert into #application values (4)
insert into #application values (5)
insert into #application values (5)

SQL Server 2008 calculating data difference when we have only one date column

I have a date column Order_date and I am looking for ways to calculate the date difference between customer last order date and his recent previous ( previous form last) order_date ....
Example
Customer : 1, 2 , 1 , 1
Order_date: 01/02/2007, 02/01/2015, 06/02/2014, 04/02/2015
As you can see customer # 1 has three orders.
I want to know the date difference between his recent order date (04/02/2015) and his recent previous (06/02/2014).

For SQL Server 2012 & 2014 you could use LAG with a DATEDIFF to see the number of days between them.
For older versions, a CTE would probably be your best bet:
;WITH CTE AS
(
SELECT CustomerID,
Order_Date,
rn = ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY Order_Date DESC)
)
SELECT c1.CustomerID,
DATEDIFF(d, c1.Order_Date, c2.Order_Date)
FROM CTE c1
INNER JOIN CTE c2 ON c2.rn = c1.rn + 1

In SQL Server 2012+, you can use lag() to get the difference between any two dates:
select t.*,
datediff(day, lag(order_date) over (partition by customer order by order_date),
order_date) as days_dff
from table t;
If you have an older version, you can do something similar with correlated subqueries or outer apply.
EDIT:
If you just want the difference between the two most recent dates, use conditional aggregation instead:
select customer,
datediff(day, max(case when seqnum = 2 then order_date end),
max(case when seqnum = 1 then order_date end)
) as MostRecentDiff
from (select t.*,
row_number() over (partition by customer order by order_date desc) as seqnum
from table t
) t
group by customer;

If you're using SQL Server 2008 or later, you can try CROSS APPLY.
SELECT [customers].[customer_id], DATEDIFF(DAY, MIN([recent_orders].[order_date]), MAX([recent_orders].[order_date])) AS [elapsed]
FROM [customers]
CROSS APPLY (
SELECT TOP 2 [order_date]
FROM [orders]
WHERE ([orders].[customer_id] = [customers].[customer_id])
) [recent_orders]
GROUP BY [customers].[customer_id]

SELECT DATEDIFF(DAY, Y.PrevLastOrderDate, Y.LastOrderDate) AS PreviousDays
FROM
(
SELECT X.LastOrderDate
, (SELECT MAX(OrderDate) FROM dbo.Orders SO WHERE SO.CustomerID=1 AND SO.OrderDate < X.LastOrderDate) AS PrevLastOrderDate
FROM
(
select MAX(OrderDate) AS LastOrderDate
FROM dbo.Orders O
WHERE O.CustomerID=1
)X
)Y

drop table #Invoices
create table #Invoices ( OrderId int , OrderDate datetime )
insert into #Invoices (OrderId , OrderDate )
select 101, '01/01/2001' UNION ALL Select 202, '02/02/2002' UNION ALL Select 303, '03/03/2003'
UNION ALL Select 808, '08/08/2008' UNION ALL Select 909, '09/09/2009'
;
WITH
MyCTE /* http://technet.microsoft.com/en-us/library/ms175972.aspx */
( OrderId,OrderDate,ROWID) AS
(
SELECT
OrderId,OrderDate
, ROW_NUMBER() OVER ( ORDER BY OrderDate ) as ROWID
FROM
#Invoices inv
)
SELECT
OrderId,OrderDate
,(Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) as PreviousOrderDate
,
[MyDiff] =
CASE
WHEN (Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) iS NULL then 0
ELSE DATEDIFF (mm, OrderDate , (Select Max(OrderDate) from MyCTE innerAlias where innerAlias.ROWID = (outerAlias.ROWID-1) ) )
END
, ROWIDMINUSONE = (ROWID-1)
, ROWID as ROWID_SHOWN_FOR_KICKS , OrderDate as OrderDateASecondTimeForConvenience
FROM
MyCTE outerAlias
ORDER BY outerAlias.OrderDate Desc , OrderId

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to group data with conditions? - sql

Related

Bring next value after condition

How to Nest query with different criteria

T-SQL Comparison of Min and Max Values Over Time

How to find the highest value in a year and in all months

SQL Server 2008 calculating data difference when we have only one date column

Categories

Resources