T-SQL query to obtain the no of days an item was at the current price - sql

Declare #sec_temp table
(
sec_no varchar(10),
amount money,
price_date date
)
insert #sec_temp
values
('123ABC', 25, '2011-01-20'),
('123ABC', 25, '2011-01-19'),
('123ABC', 25, '2011-01-18'),
('123ABC', 20, '2011-01-15'),
('123ABC', 22, '2011-01-13'),
('456DEF', 22, '2011-01-13')
Problem: To list out the distinct sec_no with the latest price (amount) and the number of days it was at the current price. In this case,
Result:
sec_no amount no_of_days_at_price
123ABC 25 3 e.g. 01-18 to 01-20
456DEF 22 1 e.g. 01-13

select
a.sec_no,
a.amount,
min(price_date) as FirstDateAtPrice,
No_of_days_at_price = COALESCE(DATEDIFF(d, c.price_date, a.price_date),0)
from (
select *, ROW_NUMBER() over (partition by sec_no order by price_date desc) rn
from #sec_temp) a
outer apply (
select top 1 *
from #sec_temp b
where a.sec_no=b.sec_no and a.amount <> b.amount
order by b.price_date desc
) c
where a.rn=1
The subquery A works out the greatest-1-per-group, which is to say the most recent price record for each sec_no. The subquery C finds the first prior record that holds a different price for the same sec_no. The difference in the two dates is the number of days sought. If you need it to be one for no prior date, change the end of the COALESCE line to 1 instead of 0.
EDITED for clarified question
To start counting from the first date equal to the current rate, use this query instead
select
sec_no,
amount,
No_of_days_at_price = 1 + DATEDIFF(d, min(price_date), max(price_date))
from (
select *,
ROW_NUMBER() over (partition by sec_no order by price_date desc) rn,
ROW_NUMBER() over (partition by sec_no, amount order by price_date desc) rn2
from #sec_temp
) X
WHERE rn=rn2
group by sec_no, amount
AND FINALLY If the required result is actually the days between
the first date on which the price is equal to current; and
today
Then the only part to change is this:
No_of_days_at_price = 1 + DATEDIFF(d, min(price_date), getdate())

Here's one approach, first looking up the latest price, and then the last price that was different:
select secs.sec_no
, latest.amount as price
, case when previous.price_date is null then 1
else datediff(day, previous.price_date, latest.price_date)
end as days_at_price
from (
select distinct sec_no
from #sec_temp
) secs
cross apply
(
select top 1 amount
, price_date
from #sec_temp
where sec_no = secs.sec_no
order by
price_date desc
) latest
outer apply
(
select top 1 price_date
from #sec_temp
where sec_no = secs.sec_no
and amount <> latest.amount
order by
price_date desc
) previous
This prints:
sec_no price days_at_price
123ABC 25,00 5
456DEF 22,00 1

Related

Oracle SQL Hierarchy Summation

I have a table TRANS that contains the following records:
TRANS_ID TRANS_DT QTY
1 01-Aug-2020 5
1 01-Aug-2020 1
1 03-Aug-2020 2
2 02-Aug-2020 1
The expected output:
TRANS_ID TRANS_DT BEGBAL TOTAL END_BAL
1 01-Aug-2020 0 6 6
1 02-Aug-2020 6 0 6
1 03-Aug-2020 6 2 8
2 01-Aug-2020 0 0 0
2 02-Aug-2020 0 1 1
2 03-Aug-2020 1 0 1
Each trans_id starts with a beginning balance of 0 (01-Aug-2020). For succeeding days, the beginning balance is the ending balance of the previous day and so on.
I can create PL/SQL block to create the output. Is it possible to get the output in 1 SQL statement?
Thanks.
Try this following script using CTE-
Demo Here
WITH CTE
AS
(
SELECT DISTINCT A.TRANS_ID,B.TRANS_DT
FROM your_table A
CROSS JOIN (SELECT DISTINCT TRANS_DT FROM your_table) B
),
CTE2
AS
(
SELECT C.TRANS_ID,C.TRANS_DT,SUM(D.QTY) QTY
FROM CTE C
LEFT JOIN your_table D
ON C.TRANS_ID = D.TRANS_ID
AND C.TRANS_DT = D.TRANS_DT
GROUP BY C.TRANS_ID,C.TRANS_DT
ORDER BY C.TRANS_ID,C.TRANS_DT
)
SELECT F.TRANS_ID,F.TRANS_DT,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT < F.TRANS_DT
) BEGBAL,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT = F.TRANS_DT
) TOTAL ,
(
SELECT COALESCE (SUM(QTY), 0) FROM CTE2 E
WHERE E.TRANS_ID = F.TRANS_ID AND E.TRANS_DT <= F.TRANS_DT
) END_BAL
FROM CTE2 F
You can as well do like this (I would assume it's a bit faster): Demo
with
dt_between as (
select mindt + level - 1 as trans_dt
from (select min(trans_dt) as mindt, max(trans_dt) as maxdt from t)
connect by level <= maxdt - mindt + 1
),
dt_for_trans_id as (
select *
from dt_between, (select distinct trans_id from t)
),
qty_change as (
select distinct trans_id, trans_dt,
sum(qty) over (partition by trans_id, trans_dt) as total,
sum(qty) over (partition by trans_id order by trans_dt) as end_bal
from t
right outer join dt_for_trans_id using (trans_id, trans_dt)
)
select
trans_id,
to_char(trans_dt, 'DD-Mon-YYYY') as trans_dt,
nvl(lag(end_bal) over (partition by trans_id order by trans_dt), 0) as beg_bal,
nvl(total, 0) as total,
nvl(end_bal, 0) as end_bal
from qty_change q
order by trans_id, trans_dt
dt_between returns all the days between min(trans_dt) and max(trans_dt) in your data.
dt_for_trans_id returns all these days for each trans_id in your data.
qty_change finds difference for each day (which is TOTAL in your example) and cumulative sum over all the days (which is END_BAL in your example).
The main select takes END_BAL from previous day and calls it BEG_BAL, it also does some formatting of final output.
First of all, you need to generate dates, then you need to aggregate your values by TRANS_DT, and then left join your aggregated data to dates. The easiest way to get required sums is to use analitic window functions:
with dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select -- using left join partition by to get data on daily basis for each trans_id:
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;
Full example with sample data:
alter session set nls_date_format='dd-mon-yyyy';
with trans(TRANS_ID,TRANS_DT,QTY) as (
select 1,to_date('01-Aug-2020'), 5 from dual union all
select 1,to_date('01-Aug-2020'), 1 from dual union all
select 1,to_date('03-Aug-2020'), 2 from dual union all
select 2,to_date('02-Aug-2020'), 1 from dual
)
,dates(dt) as ( -- generating dates between min(TRANS_DT) and max(TRANS_DT) from TRANS
select min(trans_dt) from trans
union all
select dt+1 from dates
where dt+1<=(select max(trans_dt) from trans)
)
,trans_agg as ( -- aggregating QTY in TRANS
select TRANS_ID,TRANS_DT,sum(QTY) as QTY
from trans
group by TRANS_ID,TRANS_DT
)
select
dt,
trans_id,
nvl(sum(qty) over(partition by trans_id order by dates.dt range between unbounded preceding and 1 preceding),0) as BEGBAL,
nvl(qty,0) as TOTAL,
nvl(sum(qty) over(partition by trans_id order by dates.dt),0) as END_BAL
from dates
left join trans_agg tr
partition by (trans_id)
on tr.trans_dt=dates.dt;
You can use a recursive query to generate the overall date range, cross join it with the list of distinct tran_id, then bring the table with a left join. The last step is aggregation and window functions:
with all_dates (trans_dt, max_dt) as (
select min(trans_dt), max(trans_dt) from trans group by trans_id
union all
select trans_dt + interval '1' day, max_dt from all_dates where trans_dt < max_dt
)
select
i.trans_id,
d.trans_dt,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) - coalesce(sum(t.qty), 0) begbal,
coalesce(sum(t.qty), 0) total,
coalesce(sum(sum(t.qty)) over(partition by i.trans_id order by d.trans_dt), 0) endbal
from all_dates d
cross join (select distinct trans_id from trans) i
left join trans t on t.trans_id = i.trans_id and t.trans_dt = d.trans_dt
group by i.trans_id, d.trans_dt
order by i.trans_id, d.trans_dt

Find the true start end dates for customers that have multiple accounts in SQL Server 2014

I have a checking account table that contains columns Cust_id (customer id), Open_Date (start date), and Closed_Date (end date). There is one row for each account. A customer can open multiple accounts at any given point. I would like to know how long the person has been a customer.
eg 1:
CREATE TABLE [Cust]
(
[Cust_id] [varchar](10) NULL,
[Open_Date] [date] NULL,
[Closed_Date] [date] NULL
)
insert into [Cust] values ('a123', '10/01/2019', '10/15/2019')
insert into [Cust] values ('a123', '10/12/2019', '11/01/2019')
Ideally I would like to insert this into a table with just one row, that says this person has been a customer from 10/01/2019 to 11/01/2019. (as he opened his second account before he closed his previous one.
Similarly eg 2:
insert into [Cust] values ('b245', '07/01/2019', '09/15/2019')
insert into [Cust] values ('b245', '10/12/2019', '12/01/2019')
I would like to see 2 rows in this case- one that shows he was a customer from 07/01 to 09/15 and then again from 10/12 to 12/01.
Can you point me to the best way to get this?
I would approach this as a gaps and islands problem. You want to group together groups of adjacents rows whose periods overlap.
Here is one way to solve it using lag() and a cumulative sum(). Everytime the open date is greater than the closed date of the previous record, a new group starts.
select
cust_id,
min(open_date) open_date,
max(closed_date) closed_date
from (
select
t.*,
sum(case when not open_date <= lag_closed_date then 1 else 0 end)
over(partition by cust_id order by open_date) grp
from (
select
t.*,
lag(closed_date) over (partition by cust_id order by open_date) lag_closed_date
from cust t
) t
) t
group by cust_id, grp
In this db fiddle with your sample data, the query produces:
cust_id | open_date | closed_date
:------ | :--------- | :----------
a123 | 2019-10-01 | 2019-11-01
b245 | 2019-07-01 | 2019-09-15
b245 | 2019-10-12 | 2019-12-01
I would solve this with recursion. While this is certainly very heavy, it should accommodate even the most complex account timings (assuming your data has such). However, if the sample data provided is as complex as you need to solve for, I highly recommend sticking with the solution provided above. It is much more concise and clear.
WITH x (cust_id, open_date, closed_date, lvl, grp) AS (
SELECT cust_id, open_date, closed_date, 1, 1
FROM (
SELECT cust_id
, open_date
, closed_date
, row_number()
OVER (PARTITION BY cust_id ORDER BY closed_date DESC, open_date) AS rn
FROM cust
) AS t
WHERE rn = 1
UNION ALL
SELECT cust_id, open_date, closed_date, lvl, grp
FROM (
SELECT c.cust_id
, c.open_date
, c.closed_date
, x.lvl + 1 AS lvl
, x.grp + CASE WHEN c.closed_date < x.open_date THEN 1 ELSE 0 END AS grp
, row_number() OVER (PARTITION BY c.cust_id ORDER BY c.closed_date DESC) AS rn
FROM cust c
JOIN x
ON x.cust_id = c.cust_id
AND c.open_date < x.open_date
) AS t
WHERE t.rn = 1
)
SELECT cust_id, min(open_date) AS first_open_date, max(closed_date) AS last_closed_date
FROM x
GROUP BY cust_id, grp
ORDER BY cust_id, grp
I would also add the caveat that I don't run on SQL Server, so there could be syntax differences that I didn't account for. Hopefully they are minor, if present.
you can try something like that:
select distinct
cust_id,
(select min(Open_Date)
from Cust as b
where b.cust_id = a.cust_id and
a.Open_Date <= b.Closed_Date and
a.Closed_Date >= b.Open_Date
),
(select max(Closed_Date)
from Cust as b
where b.cust_id = a.cust_id and
a.Open_Date <= b.Closed_Date and
a.Closed_Date >= b.Open_Date
)
from Cust as a
so, for every row - you're selecting minimal and maximal dates from all overlapping ranges, later distinct filters out duplicates

How to get the validity date range of a price from individual daily prices in SQL

I have some prices for the month of January.
Date,Price
1,100
2,100
3,115
4,120
5,120
6,100
7,100
8,120
9,120
10,120
Now, the o/p I need is a non-overlapping date range for each price.
price,from,To
100,1,2
115,3,3
120,4,5
100,6,7
120,8,10
I need to do this using SQL only.
For now, if I simply group by and take min and max dates, I get the below, which is an overlapping range:
price,from,to
100,1,7
115,3,3
120,4,10
This is a gaps-and-islands problem. The simplest solution is the difference of row numbers:
select price, min(date), max(date)
from (select t.*,
row_number() over (order by date) as seqnum,
row_number() over (partition by price, order by date) as seqnum2
from t
) t
group by price, (seqnum - seqnum2)
order by min(date);
Why this works is a little hard to explain. But if you look at the results of the subquery, you will see how the adjacent rows are identified by the difference in the two values.
SELECT Lag.price,Lag.[date] AS [From], MIN(Lead.[date]-Lag.[date])+Lag.[date] AS [to]
FROM
(
SELECT [date],[Price]
FROM
(
SELECT [date],[Price],LAG(Price) OVER (ORDER BY DATE,Price) AS LagID FROM #table1 A
)B
WHERE CASE WHEN Price <> ISNULL(LagID,1) THEN 1 ELSE 0 END = 1
)Lag
JOIN
(
SELECT [date],[Price]
FROM
(
SELECT [date],Price,LEAD(Price) OVER (ORDER BY DATE,Price) AS LeadID FROM [#table1] A
)B
WHERE CASE WHEN Price <> ISNULL(LeadID,1) THEN 1 ELSE 0 END = 1
)Lead
ON Lag.[Price] = Lead.[Price]
WHERE Lead.[date]-Lag.[date] >= 0
GROUP BY Lag.[date],Lag.[price]
ORDER BY Lag.[date]
Another method using ROWS UNBOUNDED PRECEDING
SELECT price, MIN([date]) AS [from], [end_date] AS [To]
FROM
(
SELECT *, MIN([abc]) OVER (ORDER BY DATE DESC ROWS UNBOUNDED PRECEDING ) end_date
FROM
(
SELECT *, CASE WHEN price = next_price THEN NULL ELSE DATE END AS abc
FROM
(
SELECT a.* , b.[date] AS next_date, b.price AS next_price
FROM #table1 a
LEFT JOIN #table1 b
ON a.[date] = b.[date]-1
)AA
)BB
)CC
GROUP BY price, end_date

TSQL Row_Number

This question has been covered similarly before BUT I'm struggling.
I need to find top N sales based on customer buying patterns..
ideally this needs to be top N by customer by Month Period by Year but for now i'm just looking at top N over the whole DB.
My query looks like:
-- QUERY TO SHOW TOP 2 CUSTOMER INVOICES BY CUSTOMER BY MONTH
SELECT
bill_to_code,
INVOICE_NUMBER,
SUM( INVOICE_AMOUNT_CORP ) AS 'SALES',
ROW_NUMBER() OVER ( PARTITION BY bill_to_code ORDER BY SUM( INVOICE_AMOUNT_CORP ) DESC ) AS 'Row'
FROM
FACT_OM_INVOICE
JOIN dim_customer_bill_to ON FACT_OM_INVOICE.dim_customer_bill_to_key = dim_customer_bill_to.dim_customer_bill_to_key
--WHERE
-- 'ROW' < 2
GROUP BY
invoice_number,
Dim_customer_bill_to.bill_to_code
I can't understand the solutions given to restrict Row to =< N.
Please help.
Try this.
-- QUERY TO SHOW TOP 2 CUSTOMER INVOICES BY CUSTOMER BY MONTH
;WITH Top2Customers
AS
(
SELECT
bill_to_code,
INVOICE_NUMBER,
SUM( INVOICE_AMOUNT_CORP ) AS 'SALES',
ROW_NUMBER() OVER ( PARTITION BY bill_to_code ORDER BY SUM( INVOICE_AMOUNT_CORP ) DESC )
AS 'RowNumber'
FROM
FACT_OM_INVOICE
JOIN dim_customer_bill_to ON FACT_OM_INVOICE.dim_customer_bill_to_key = dim_customer_bill_to.dim_customer_bill_to_key
GROUP BY
invoice_number,
Dim_customer_bill_to.bill_to_code
)
SELECT * FROM Top2Customers WHERE RowNumber < 3
You have to wrap your select into another to use the value produced by row_number()
select * from (
SELECT
bill_to_code,
INVOICE_NUMBER,
SUM( INVOICE_AMOUNT_CORP ) AS SALES,
ROW_NUMBER() OVER ( PARTITION BY bill_to_code ORDER BY SUM( INVOICE_AMOUNT_CORP ) DESC ) AS RowNo
FROM
FACT_OM_INVOICE
JOIN dim_customer_bill_to ON FACT_OM_INVOICE.dim_customer_bill_to_key = dim_customer_bill_to.dim_customer_bill_to_key
--WHERE
-- 'ROW' < 2
GROUP BY
invoice_number,
Dim_customer_bill_to.bill_to_code
) base where RowNo < 2

Return max value from a SQL selection

I do have a table license_Usage where which works like a log of the usage of licenses in a day
ID User license date
1 1 A 22/2/2015
2 1 A 23/2/2015
3 1 B 22/2/2015
4 2 A 22/2/2015
Where I want to Count how many licenses per user in a day, the result shoul look like:
QuantityOfLicenses User date
2 1 22/2/2015
1 2 22/2/2015
For that I did the following query :
select count(license) as [Quantity of licenses],[user],[date]
From license_Usage
where date = '22/2/2015'
Group by [date], [user]
which works, but know I want to know which user have used the most number of licenses, for that I did the following query:
select MAX(result.[Quantity of licenses])
From (
select count(license) as [Quantity of licenses],[user],[date]
From license_Usage
Group by [date], [user]
) as result
And it returns the max value of 2, but when I want to know which user have used 2 licenses,I try this query with no success :
select result.user, MAX(result.[Quantity of licenses])
From (
select count(license) as [Quantity of licenses],[user],[date]
From license_Usage
Group by [date], [user]
) as result
Group by result.user
You can use something like this:
select top 1 *
From (
select count(license) as Quantity,[user],[date]
From license_Usage
Group by [date], [user]
) as result
order by Quantity desc
If you need to have a fetch that fetches all the rows that have max in case there's several, then you'll have to use rank() window function
Use RANK to rank the users by the number of licenses per day.
SELECT
LicPerDay.*,
RANK() OVER (PARTITION BY [date] ORDER BY Qty DESC) AS User_Rank
FROM (
SELECT
COUNT(license) AS Qty,
User,
[date]
FROM license_usage
GROUP BY User, [date]
) LicPerDay
Any user with User_Rank = 1 will have the most licenses for that day.
If you only want the top user for each day, wrap the query above as a subquery and filter on User_Rank = 1:
SELECT * FROM (
SELECT
LicPerDay.*,
RANK() OVER (PARTITION BY [date] ORDER BY Qty) AS User_Rank
FROM (
SELECT
COUNT(license) AS Qty,
User,
[date]
FROM license_usage
GROUP BY User, [date]
) LicPerDay
) LicPerDayRanks
WHERE User_Rank = 1
Use a Windowed Aggregate Function, RANK, to get the highest count:
SELECT * FROM (
SELECT
User,
[date]
COUNT(license) AS Qty,
-- rank by descending number for each day ??
--RANK() OVER (PARTITION BY [date] ORDER BY COUNT(license) DESC) AS rnk
-- rank by descending number
RANK() OVER (ORDER BY COUNT(license) DESC) AS rnk
FROM license_usage
GROUP BY User, [date]
) dt
WHERE rnk = 1