Select Max value For Each Entity - sql

In Microsoft SQL Server, I have a table with columns EmployeeID, Category and Amount. How can I show just the Category with the highest Amount for each Employee?
Data Example:
EmployeeID Category Amount
11111 Vacation 4
11111 Personal 2
11111 Holiday 3
22222 Vacation 1
22222 Personal 3
22222 Holiday 2
33333 Personal 5
33333 Holiday 1
33333 Vacation 3
33333 Unspecified 3
Results:
EmployeeID Category Amount
11111 Vacation 4
22222 Personal 3
33333 Personal 5

Another option is the WITH TIES clause
Example
Select Top 1 with Ties *
From YourTable
Order By Row_Number() over (Partition By EmployeeID Order By Amount Desc)
Returns
Or using Row_Number() and a CTE
;with cte as (
Select *
,RN = Row_Number() over (Partition By EmployeeID Order By Amount Desc)
From YourTable
)
Select EmployeeID
,Category
,Amount
From cte where RN=1

Here are a few different options. Any of them can perform better than the other depending available indexes, so all three are worth testing in your environment.
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#TestData', 'U') IS NOT NULL
DROP TABLE #TestData;
CREATE TABLE #TestData (
EmployeeID INT NOT NULL,
Category VARCHAR(20) NOT NULL,
Amount MONEY NOT NULL
);
INSERT #TestData (EmployeeID, Category, Amount) VALUES
(11111, 'Vacation ', 4),
(11111, 'Personal ', 2),
(11111, 'Holiday ', 3),
(22222, 'Vacation ', 1),
(22222, 'Personal ', 3),
(22222, 'Holiday ', 2),
(33333, 'Personal ', 5),
(33333, 'Holiday ', 1),
(33333, 'Vacation ', 3),
(33333, 'Unspecified', 3);
-- add a covering index to improve performance of the various options
CREATE NONCLUSTERED INDEX ix_TestData ON #TestData (EmployeeID, Amount DESC) INCLUDE (Category);
------------------------------------------
IF OBJECT_ID('tempdb..#Employee', 'U') IS NOT NULL
DROP TABLE #Employee;
CREATE TABLE #Employee (
EmployeeID INT NOT NULL
);
INSERT #Employee (EmployeeID) VALUES
(11111), (22222), (33333);
-- SELECT * FROM #TestData td;
--============================================================
--============================================================
SELECT TOP 1 WITH TIES
td.EmployeeID, td.Category, td.Amount
FROM
#TestData td
ORDER BY
ROW_NUMBER() OVER (PARTITION BY td.EmployeeID ORDER BY td.Amount DESC);
--============================================================
SELECT
tdt.EmployeeID, tdt.Category, tdt.Amount
FROM
#Employee e
CROSS APPLY (
SELECT TOP 1
td.EmployeeID, td.Category, td.Amount
FROM
#TestData td
WHERE
e.EmployeeID = td.EmployeeID
ORDER BY
td.Amount DESC
) tdt;
--============================================================
WITH
cte_AddRN AS (
SELECT
td.EmployeeID, td.Category, td.Amount,
RN = ROW_NUMBER() OVER (PARTITION BY td.EmployeeID ORDER BY td.Amount DESC)
FROM
#TestData td
)
SELECT
ar.EmployeeID, ar.Category, ar.Amount
FROM
cte_AddRN ar
WHERE
ar.RN = 1;
All 3 produce the same results...
EmployeeID Category Amount
----------- -------------------- ---------------------
11111 Vacation 4.00
22222 Personal 3.00
33333 Personal 5.00
EmployeeID Category Amount
----------- -------------------- ---------------------
11111 Vacation 4.00
22222 Personal 3.00
33333 Personal 5.00
EmployeeID Category Amount
----------- -------------------- ---------------------
11111 Vacation 4.00
22222 Personal 3.00
33333 Personal 5.00

Use EXISTS/NOT EXISTS:
select *
from Employees
where not exists
(select * from Employees as E
where E.EmployeeID=Employees.EmployeeID -- join condition
and E.Amount>Employees.Amount) -- filter condition

SELECT EmployeeID, Category, Amount
FROM (select *, ROW_NUMBER() OVER (PARTITION BY EmployeeID
ORDER BY EmployeeID as rn ,Amount desc)
FROM getmaxdata) x WHERE x.rn =1

Related

Get SQL Server result using subquery

I have a Members table like this:
PersonID FirstName Address City date
---------------------------------------------------------
3 Rasanga Skagen 21 South 2019-01-05
and a Persons table:
PersonID FirstName Address City date
-------------------------------------------------------
3 Rasanga Skagen 21 South 2019-01-06
1 Tom B. Skagen 21 Colombo 2018-01-07
2 Tom B. Skagen 21 Colombo 2019-01-05
I want to get Persons that do not exists in Members table using the FirstName column. For that I'm using this query:
SELECT *
FROM Persons p
WHERE NOT EXISTS (SELECT * FROM Members m WHERE m.FirstName = p.FirstName)
When I execute above query I'm getting same FirstName and 2 records but my requirement is if there's 2 records for same name retrieve latest record using date column. Therefore above scenario it should be Tom B. with 2018-01-07 record. If both records have same date should retrieve 1 record from 2 records.
Can somebody explain how to do this?
You can use the left join and checking the Members.PersonId is null.
create table Members(PersonID int
, FirstName varchar(20)
, Address varchar(50)
, City varchar(50)
, Dtdate date)
insert into Members values
(3, 'Rasanga', 'Skagen 21', 'South', '2019-01-05')
Create table Persons(PersonID int
, FirstName varchar(20)
, Address varchar(50)
, City varchar(50)
, Dtdate date)
insert into Persons values
(3, 'Rasanga', 'Skagen 21', 'South', '2019-01-06'),
(1, 'Tom B.', 'Skagen 21', 'Colombo', '2018-01-07'),
(2, 'Tom B.', 'Skagen 21', 'Colombo', '2019-01-05')
Select Persons.* from Persons
left join Members on Persons.PersonID = Members.PersonID
where Members.PersonId is null
Demo
Using the not exists you can check as shown below.
SELECT Persons.*
FROM Persons
WHERE NOT EXISTS (SELECT 1
FROM Members
WHERE Persons.PersonID = Members.PersonID)
Using the in operator
SELECT * FROM Persons
WHERE PersonID NOT IN (
SELECT PersonID FROM Members
)
To get the unique records based on the first name and date you can use the following query using ROW_NUMBER() function.
;WITH cte
AS (
SELECT Persons.*
,ROW_NUMBER() OVER (
PARTITION BY Persons.FirstName ORDER BY Persons.Dtdate DESC
) AS RN
FROM Persons
LEFT JOIN Members ON Persons.PersonID = Members.PersonID
WHERE Members.PersonId IS NULL )
SELECT *
FROM CTE
WHERE RN = 1
Output
PersonID FirstName Address City Dtdate RN
----------------------------------------------------------
2 Tom B. Skagen 21 Colombo 2019-01-05 1
You could use a window function as
SELECT T.PersonID,
T.FirstName,
T.Address,
T.City,
T.[Date]
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY FirstName ORDER BY [Date] DESC) RN
FROM Persons
) T
WHERE NOT EXISTS
(
SELECT 1
FROM Members
WHERE FirstName = T.FirstName
) AND T.RN = 1;
Here is a db<>fiddle

Exclude records that meet certain criteria by row

I'm in need of some brainstorming. I have built a query that shows me what I need. However the ask now is to use this list of records and exclude records based on a certain criteria.
This is my current output from the query built:
Patient | Action | Date
james | REG | 2019/01/01
James | CUR | 2019/01/15
Jacon | REG | 2019/01/12
Jacob | REG | 2019/01/13
Main | CUR | 2019/01/01
Main | REG | 2019/01/05
Lucy | REG | 2019/01/08
Lucy | CUR | 2019/01/09
Lucy | CUR | 2019/01/10
Based on the sample data from above I want to remove any patients where the first record is 'REG' and the following Action is 'CUR'. So in this example I only want to remove James.
Any Ideas on what I should do?
Thank you for your help!
Please group your data first by using dense_rank and row_number, then benefiting from temp tables, get the data you are looking for.
CREATE TABLE #temp (Patient VARCHAR(50), Action VARCHAR(3))
Insert INTO #temp VALUES
('james','REG'),
('james','CUR'),
('Jacob','REG'),
('Jacob','REG'),
('Main','CUR'),
('Main','REG'),
('Lucy','REG'),
('Lucy','CUR'),
('Lucy','CUR')
SELECT *, DENSE_RANK() OVER (ORDER BY Patient ASC) GroupNo,
ROW_NUMBER() OVER (partition BY Patient ORDER BY Patient ASC) GroupOrder
INTO #PatientsWithGroup
FROM #temp
SELECT MIN(c1.GroupNo) GroupNo
INTO #PatsToEliminate
FROM #PatientsWithGroup c1
INNER JOIN #PatientsWithGroup c2 ON c1.GroupNo=c2.GroupNo
WHERE (c1.GroupOrder=1 AND c1.Action='REG') AND (c2.GroupOrder = 2 AND c2.Action='CUR')
HAVING COUNT(c1.Patient)<3
SELECT *
FROM #PatientsWithGroup p
WHERE p.GroupNo NOT IN (SELECT GroupNo FROM #PatsToEliminate)
You can use the LEAD function to look ahead.
CREATE TABLE #Patients (
ID int IDENTITY(1,1),
Patient varchar(50),
[Action] varchar(50)
);
INSERT INTO #Patients (Patient, [Action])
VALUES
('james', 'REG'),
('James', 'CUR'),
('Jacon', 'REG'),
('Jacob', 'REG'),
('Main', 'CUR'),
('Main', 'REG'),
('Lucy', 'REG'),
('Lucy', 'CUR'),
('Lucy', 'CUR');
SELECT * FROM #Patients;
WITH
PatientWithNextAction AS (
SELECT
Patient,
[Action],
LEAD([Action]) OVER(PARTITION BY Patient ORDER BY ID) NextAction
FROM
#Patients
)
DELETE
FROM
#Patients
WHERE
Patient IN (
SELECT
Patient
FROM
PatientWithNextAction
WHERE
[Action] = 'REG'
AND NextAction = 'CUR'
);
SELECT * FROM #Patients;
DROP TABLE #Patients;
Try this:
select 1 as ordre, 'james' as Patient, 'REG' as Action into #tmp
union select 2,'James', 'CUR'
union select 3,'Jacon', 'REG'
union select 4,'Jacob', 'REG'
union select 5,'Main' , 'CUR'
union select 6,'Main' , 'REG'
union select 7,'Lucy' , 'REG'
union select 8,'Lucy' , 'CUR'
union select 9,'Lucy' , 'CUR'
;with cte as
(
select ordre, Patient, [Action], RANK () OVER (
PARTITION BY Patient
ORDER BY ordre
) Patient_order from #tmp a
)
select * from cte a where not exists(select 1 from cte b where a.Patient = b.Patient and b.Patient_order = 1 and Action = 'REG'
and exists(select 1 from cte c where c.Patient = b.Patient and c.Patient_order = 2 and Action = 'CUR')
)

SQL - How to remove repeating values

My requirement is to remove the repeating values.
id name surname value
1 Vinduja Vijayan 5
3 Vinduja Vijayan 6
4 Vinduja Vijayan 7
Required output:
id name surname value
1 Vinduja Vijayan 5
3 NuLL Null 6
4 NULL NULL 7
This transformation should usually be applied in the application layer. It is possible to do in SQL, but not recommended, by using row_number() and case:
select id,
(case when row_number() over (partition by name, surname order by id) = 1
then name
end) as name,
(case when row_number() over (partition by name, surname order by id) = 1
then surname
end) as surname
from t
order by id;
Note that the final order by is very, very important. SQL result sets (like tables) are unordered by default. Without an explicit order by, the results could be in any order, and that would mess up your interpretation of the results.
DECLARE #table TABLE (
Id INT
,Name VARCHAR(20)
,Surname VARCHAR(20)
,value INT
);
INSERT into #table(ID,Name,Surname,value)
Select 1,'Vinduja','Vijayan',5
Union
Select 3,'Vinduja','Vijayan',6
Union
Select 4,'Vinduja','Vijayan',7
Select S.Id ,T.Name,T.Surname,S.value from (
Select * ,ROW_NUMBER() Over(Partition by name Order by name) [Row]
From #table)S
Left join #table T On T.Id =S.Id and S.[Row]=1
select
id,
case when rnk=1 then name end as name,
case when rnk=1 then surname end as surname ,
value
from
(
select
id,name,surname,value,
row_number()over(partition by name,surname order by id) as rnk
from table_name)repeatname
I'm not sure I understand your requirements. If you just want to display the data as described, then this won't work. But if you're trying to change the data in your table, this will do that.
DECLARE #Dupes TABLE
(
id INT
,name VARCHAR(30)
,surname VARCHAR(30)
,value INT
);
INSERT #Dupes
(
id
,name
,surname
,value
)
VALUES
(1, 'Vinduja', 'Vijayan', 5),
(3, 'Vinduja', 'Vijayan', 6),
(4, 'Vinduja', 'Vijayan', 7);
WITH cte AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY [name], surname ORDER BY id) AS RowNum
,id
,name
,surname
,value
FROM #Dupes
)
UPDATE cte
SET cte.name = NULL
,cte.surname = NULL
WHERE
cte.RowNum > 1;
SELECT *
FROM #Dupes;
--Results
+----+---------+---------+-------+
| id | name | surname | value |
+----+---------+---------+-------+
| 1 | Vinduja | Vijayan | 5 |
| 3 | NULL | NULL | 6 |
| 4 | NULL | NULL | 7 |
+----+---------+---------+-------+
And just for interest, using the LAG function. I assumed SQL Server.
select id,
iif(name = previous_name, null, name) name,
iif(surname = previous_surname, null, surname) surname
from (
select name, surname, id,
lag(name, 1, null) over (order by name, surname, id) previous_name,
lag(surname, 1, null) over (order by name, surname, id) previous_surname
from table_name ) a
order by a.name, a.surname, a.id

SQL pivot with variable lines

I have to get a variable amount of lines into columns. I have something like:
EMP EMP_ID DIV_ID ALLOCATION
Smith, Tom 3605 11300 20.00
Smith, Tom 13605 11310 80.00
Benetar, Pat 7460 11012 25.00
Benetar, Pat 7460 11015 75.00
Walkin, Chris 13892 11012 90.00
Walkin, Chris 13892 11015 10.00
Kent, Clark 12262 10015 50.00
Kent, Clark 12262 11210 25.00
Kent, Clark 12262 11220 25.00
What I am looking for is something like:
EMP EMP_ID DIV_ID_01 DIV_01_ALOC DIV_ID_02 DIV_02_ALOC DIV_ID_03 DIV_03_ALOC
Smith, Tom 3605 11300 20.00 11310 80.00
Benetar, Pat 13605 11012 25.00 11015 75.00
Walkin, Chris 13892 11012 90.00 11015 10.00
Kent, Clark 12262 11015 50.00 11210 25.00 11220 25.00
I would like to avoid using a large amount of CASE statements. I am trying now to work with pivots, but am having a tough time with headers.
UPDATED:
After a couple of attempts, I came up with the solution below which utilizes two PIVOT functions and a GROUP BY in order to match your expected result.
Here is the code below. NOTE: This is meant for SQL Server 2005+
with testdata(Emp, EMP_ID, DIV_ID, ALLOCATION)
as
(
select 'Smith, Tom',3605, 11300,20.00
union all
select 'Smith, Tom',3605, 11310, 80.00
union all
select 'Benetar, Pat',7460, 11012,25.00
union all
select 'Benetar, Pat',7460, 11015,75.00
union all
select 'Walkin, Chris',13892, 11012, 90.00
union all
select 'Walkin, Chris', 13892, 11015, 10.00
union all
select 'Kent, Clark', 12262, 10015, 50.00
union all
select 'Kent, Clark', 12262, 11210, 25.00
union all
select 'Kent, Clark', 12262, 11220, 25.00
)
SELECT Emp
,EMP_ID
,MAX([Div1]) AS DIV_ID_01
,MAX([Alloc1]) AS DIV_01_ALOC
,MAX([Div2]) AS DIV_ID_02
,MAX([Alloc2]) AS DIV_02_ALOC
,MAX([Div3]) AS DIV_ID_03
,MAX([Alloc3]) AS DIV_03_ALOC
FROM (
SELECT *
,cast(dense_rank() OVER (PARTITION BY emp_id
ORDER BY div_id asc) AS nvarchar) AS [emp_rnk]
,'Alloc' + cast(dense_rank() OVER (PARTITION BY emp_id
ORDER BY div_id asc) AS nvarchar) AS [piv_Alloc_rnk]
,'Div' + cast(dense_rank() OVER (PARTITION BY emp_id
ORDER BY div_id asc) AS nvarchar) AS [piv_Div_rnk]
FROM testdata td
) query
/* After both PIVOT functions are compplete, it still returns a single row for each EMP_ID.
So further aggregation is needed to 'flatten' the result. */
PIVOT (Max(Div_id) FOR [piv_Div_rnk] IN ([Div1],[Div2],[Div3])) AS pivot1
PIVOT (Max(Allocation) FOR [piv_Alloc_rnk] in([Alloc1],[Alloc2],[Alloc3])) AS pivot2
/* Since there is only one value in each of the columns created by the PIVOTS for each EMP_ID taking
the MAX() value and grouping by EMP and EMP_ID flattens the result down to the desired output. */
GROUP BY emp, emp_id
ORDER BY DIV_ID_01 DESC
Just for reference, if you try to use dynamic PIVOT here, you would end up needing a query that looks something like this.
SELECT [EMP],
[EMP_ID],
MIN(DIV_ID_1) [DIV_ID_1],
SUM(DIV_1_ALOC) [DIV_1_ALOC],
MIN(DIV_ID_2) [DIV_ID_2],
SUM(DIV_2_ALOC) [DIV_2_ALOC],
MIN(DIV_ID_3) [DIV_ID_3],
SUM(DIV_3_ALOC) [DIV_3_ALOC]
FROM (SELECT [EMP],
[EMP_ID],
[DIV_ID],
[ALLOCATION],
CONCAT('DIV_ID_',DENSE_RANK () OVER (PARTITION BY [EMP] ORDER BY [DIV_ID])) ID_RN,
CONCAT('DIV_',DENSE_RANK () OVER (PARTITION BY [EMP] ORDER BY [DIV_ID]),'_ALOC') ALLOC_RN
FROM EmpTable
) t
PIVOT (
MIN([DIV_ID])
FOR ID_RN IN ([DIV_ID_1],[DIV_ID_2],[DIV_ID_3]) ) p1
PIVOT (
SUM([ALLOCATION])
FOR ALLOC_RN IN ([DIV_1_ALOC],[DIV_2_ALOC],[DIV_3_ALOC]) ) p2
GROUP BY [EMP],
[EMP_ID]
You would need to dynamically create the SELECT and also the PIVOT columns because of the double pivot.
On the other hand, if you use CASE statements, you'll only need to dynamically create the SELECT, since that query would look like
SELECT [EMP],
[EMP_ID],
MIN(CASE WHEN RN = 1 THEN [DIV_ID] END) [DIV_ID_1],
SUM(CASE WHEN RN = 1 THEN [ALLOCATION] END) [DIV_1_ALOC],
MIN(CASE WHEN RN = 2 THEN [DIV_ID] END) [DIV_ID_2],
SUM(CASE WHEN RN = 2 THEN [ALLOCATION] END) [DIV_2_ALOC],
MIN(CASE WHEN RN = 3 THEN [DIV_ID] END) [DIV_ID_3],
SUM(CASE WHEN RN = 3 THEN [ALLOCATION] END) [DIV_3_ALOC]
FROM (
SELECT *,
DENSE_RANK () OVER (PARTITION BY [EMP] ORDER BY [DIV_ID]) RN
FROM EmpTable
) t
GROUP BY [EMP],
[EMP_ID]
Your dynamic statment would look something like
DECLARE #CaseSelect VARCHAR(MAX)
SELECT #CaseSelect = COALESCE(#CaseSelect + ',','')
+ 'MIN(CASE WHEN RN = ' + RN + ' THEN [DIV_ID] END) [DIV_ID_' + RN + '],'
+ 'SUM(CASE WHEN RN = ' + RN + ' THEN [ALLOCATION] END) [DIV_' + RN + '_ALOC]'
FROM (
SELECT DISTINCT CONVERT(VARCHAR(2),DENSE_RANK () OVER (PARTITION BY [EMP] ORDER BY [DIV_ID])) RN
FROM EmpTable
) t
ORDER BY RN
DECLARE #SQL VARCHAR(MAX)
SET #SQL = '
SELECT [EMP],
[EMP_ID], '
+ #CaseSelect + '
FROM (
SELECT *,
DENSE_RANK () OVER (PARTITION BY [EMP] ORDER BY [DIV_ID]) RN
FROM EmpTable
) t
GROUP BY [EMP], [EMP_ID]
'
EXEC(#SQL)
Replacing EmpTable with your actual table name ofcourse.

Group data by the change of grouping column value in order

With the following data
create table #ph (product int, [date] date, price int)
insert into #ph select 1, '20120101', 1
insert into #ph select 1, '20120102', 1
insert into #ph select 1, '20120103', 1
insert into #ph select 1, '20120104', 1
insert into #ph select 1, '20120105', 2
insert into #ph select 1, '20120106', 2
insert into #ph select 1, '20120107', 2
insert into #ph select 1, '20120108', 2
insert into #ph select 1, '20120109', 1
insert into #ph select 1, '20120110', 1
insert into #ph select 1, '20120111', 1
insert into #ph select 1, '20120112', 1
I would like to produce the following output:
product | date_from | date_to | price
1 | 20120101 | 20120105 | 1
1 | 20120105 | 20120109 | 2
1 | 20120109 | 20120112 | 1
If I group by price and show the max and min date then I will get the following which is not what I want (see the over lapping of dates).
product | date_from | date_to | price
1 | 20120101 | 20120112 | 1
1 | 20120105 | 20120108 | 2
So essentially what I'm looking to do is group by the step change in data based on group columns product and price.
What is the cleanest way to achieve this?
There's a (more or less) known technique of solving this kind of problem, involving two ROW_NUMBER() calls, like this:
WITH marked AS (
SELECT
*,
grp = ROW_NUMBER() OVER (PARTITION BY product ORDER BY date)
- ROW_NUMBER() OVER (PARTITION BY product, price ORDER BY date)
FROM #ph
)
SELECT
product,
date_from = MIN(date),
date_to = MAX(date),
price
FROM marked
GROUP BY
product,
price,
grp
ORDER BY
product,
MIN(date)
Output:
product date_from date_to price
------- ---------- ------------- -----
1 2012-01-01 2012-01-04 1
1 2012-01-05 2012-01-08 2
1 2012-01-09 2012-01-12 1
I'm new to this forum so hope my contribution is helpful.
If you really don't want to use a CTE (although I think thats probably the best approach) you can get a solution using set based code. You will need to test the performance of this code!.
I have added in an extra temp table so that I can use a unique identifier for each record but I suspect you will already have this column in you source table. So heres the temp table.
If Exists (SELECT Name FROM tempdb.sys.tables WHERE name LIKE '#phwithId%')
DROP TABLE #phwithId
CREATE TABLE #phwithId
(
SaleId INT
, ProductID INT
, Price Money
, SaleDate Date
)
INSERT INTO #phwithId SELECT row_number() over(partition by product order by [date] asc) as SalesId, Product, Price, Date FROM ph
Now the main body of the Select statement
SELECT
productId
, date_from
, date_to
, Price
FROM
(
SELECT
dfr.ProductId
, ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno1
, ChangeDate AS date_from
, dfr.Price
FROM
(
SELECT
sl1.ProductId AS ProductId
, sl1.SaleDate AS ChangeDate
, sl1.price
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl2
ON sl1.SaleId = sl2.SaleId + 1
WHERE
sl1.Price <> sl2.Price OR sl2.Price IS NULL
) dfr
) da1
LEFT JOIN
(
SELECT
ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno2
, ChangeDate AS date_to
FROM
(
SELECT
sl1.ProductId
, sl1.SaleDate AS ChangeDate
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl3
ON sl1.SaleId = sl3.SaleId - 1
WHERE
sl1.Price <> sl3.Price OR sl3.Price IS NULL
) dto
) da2
ON da1.rowno1 = da2.rowno2
By binding the data source offset by 1 record (+or-) we can identify when the price buckets change and then its just a matter of getting the start and end dates for the buckets back into a single record.
All a bit fiddly and I'm not sure its going to give better performance but I enjoyed the challenge.
WITH marked AS (
SELECT
*,
case
when (lag(price,1,'') over (partition by product order by date_from)) = price
then 0 else 1
end is_price_change
FROM #ph
),
marked_as_group AS
( SELECT m.*,
SUM(is_price_change) over (PARTITION BY product order by date_from ROWS
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS price_change_group
FROM marked m
),
SELECT
product,
date_from = MIN(date_from),
date_to = MAX(date_to),
price = MIN(price)
FROM marked_as_group
GROUP BY
product,
price_change_group
ORDER BY
product,
date_to
One solution I have come up with which is relatively "clean" is:
;with cte_sort (product, [date], price, [row])
as
(select product, [date], price, row_number() over(partition by product order by [date] asc) as row
from #ph)
select a.product, a.[date] as date_from, c.[date] as date_to, a.price
from cte_sort a
left outer join cte_sort b on a.product = b.product and (a.row+1) = b.row and a.price = b.price
outer apply (select top 1 [date] from cte_sort z where z.product = a.product and z.row > a.row order by z.row) c
where b.row is null
order by a.[date]
I used a CTE with row_number because you then don't need to worry about whether any dates are missing if you use functions like dateadd. You obviously only need the outer apply if you want to have the date_to column (which I do).
This solution does solve my problem, I am however having a slight issue getting it to perform as quickly as I'd like on my table of 5 million rows.
Create function [dbo].[AF_TableColumns](#table_name nvarchar(55))
returns nvarchar(4000) as
begin
declare #str nvarchar(4000)
select #str = cast(rtrim(ltrim(column_name)) as nvarchar(500)) + coalesce(' ' + #str , ' ')
from information_schema.columns
where table_name = #table_name
group by table_name, column_name, ordinal_position
order by ordinal_position DESC
return #str
end
--select dbo.AF_TableColumns('YourTable') Select * from YourTable