How to use ROW_NUMBER when grouping records? - sql

I have the following:
DECLARE #items TABLE
(
ItemId int NOT NULL,
[Description] varchar(255) NOT NULL,
Amount money NOT NULL
);
INSERT INTO #items SELECT 1, 'A', 10;
INSERT INTO #items SELECT 2, 'A', 10;
INSERT INTO #items SELECT 3, 'B', 11;
INSERT INTO #items SELECT 4, 'B', 11;
INSERT INTO #items SELECT 5, 'B', 11;
INSERT INTO #items SELECT 6, 'C', 12;
INSERT INTO #items SELECT 7, 'C', 12;
INSERT INTO #items SELECT 8, 'A', 10;
INSERT INTO #items SELECT 9, 'A', 10;
SELECT
ROW_NUMBER() OVER(PARTITION BY b.ItemId ORDER BY b.[Description]),
[Description],
COUNT(ItemId) OVER(PARTITION BY b.ItemId),
SUM(Amount) OVER(PARTITION BY b.ItemId)
FROM #items b
The result should be:
1, A, 4, 40
2, B, 3, 33
3, C, 2, 24
However the items are not being grouped.
So how to I need to use ROW_NUMBER to group records?

Is this what you want?
SELECT ROW_NUMBER() OVER (ORDER BY i.Description),
i.Description,
COUNT(*),
SUM(i.Amount)
FROM #items i
GROUP BY Description
ORDER BY Description;
Here is a rextester.

If you don't want use GROUP BY by itself you may do a subquery with two row_number(), something like this:
select ROW_NUMBER() over(order by t.[Description]), t.Description, t.cnt, t.summ
from (
SELECT
ROW_NUMBER() OVER(PARTITION BY b.[Description] ORDER BY b.[Description] ) rn,
[Description],
COUNT(ItemId) OVER(PARTITION BY b.[Description]) cnt,
SUM(Amount) OVER(PARTITION BY b.[Description]) summ
FROM #items b
) t where rn = 1
And anyway you shouldn't group data by the ItemId - it's a wrong way to achieve your aim

Related

Find duplicate sets of data grouped by foreign key

How to check if the above table contains duplicate group of rows based on id. For ex here first two rows of id 1 are matching with the next two rows of id 2 but id 2 also have the third row which is not matching with any two rows of id 1 so it's not duplicate and there could be n no of ids.
I tried it to do with the group by and string_agg but it didn't work.
Here what I tried:
declare #t2 Table( m1 int, m2 int,n varchar(50),n2 varchar(50), id int)
insert into #t2 values(3,1,'c','',1),(2,1,'s','o',1),(2,1,'s','o',2),(3,1,'c','',2),(3,1,'f','',2)
if exists( SELECT *
FROM #t2
GROUP BY m1,m2,n,n2
HAVING COUNT(*) > 1)
begin
select 'Same.'
end
else
begin
select 'not found'
end
Any help here will be great.
Thanks
Thanks Iptr As per your solution in comment I am posting the same here:
declare #t2 table(m1 int, m2 int, n varchar(5), n2 varchar(5), id int);
insert into #t2(m1, m2, n, n2, id)
values
(3, 1, 'c', '', 1),
(2, 1, 's', 'o', 1),
(2, 1, 's', 'o', 2),
(3, 1, 'c', '', 2),
(3, 1, 'f', '', 2),
(3, 1, 'c', '', 4),
(2, 1, 's', 'o', 4),
(3, 1, 'c', '', 10),
(2, 1, 's', 'o', 10),
(3, 1, 'c', '', 5);
--if exists(select a.id from(.. having count(*) = a.idcnt)
select a.id, b.id
from
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as a
join
(
select *, count(*) over (partition by id) as idcnt
from #t2
) as b on a.id </*>*/ b.id and a.m1 = b.m1 and a.m2 = b.m2 and a.n = b.n and a.n2 = b.n2 and a.idcnt = b.idcnt
group by a.id, b.id, a.idcnt
having count(*) = a.idcnt;
--if exists(select j.j from (.. having count(*) > 1;)
select string_agg(i.id, ',')
from
(
select distinct id
from #t2
) as i
cross apply
(
select r.m1, r.m2, r.n, r.n2
from #t2 as r
where r.id = i.id
order by r.m1, r.m2, r.n, r.n2
for json path
) as j(j)
group by j.j
having count(*) > 1;
You can count how many different ids for each set of rows. If the count is more than one, then there are duplicates. For example:
select m1, m2, n, n2, count(distinct id) as cnt
from t
group by m1, m2, n, n2
having count(distinct id) > 1

SQL Query to get the value of a product given a date

I have a table which gives the rate of a product on a particular date, #tableA.
create table #tableA
(
Id int not null,
ValueDate date,
Price decimal(9,2)
)
insert into #tableA (Id, ValueDate, Price)
values
(1, '2020-08-01', 100),
(1, '2020-08-05', 110),
(1, '2020-08-07', 50)
My other table has the id and the date the product is active.
create table #tableB
(
Id int not null,
Dates date
)
insert into #tableB (Id, Dates)
values
(1, '2020-08-01'),
(1, '2020-08-02'),
(1, '2020-08-03'),
(1, '2020-08-04'),
(1, '2020-08-05'),
(1, '2020-08-06'),
(1, '2020-08-07'),
(1, '2020-08-04')
I cannot find an efficient query where my resulting table gives the rate of the product on a given date.
I am expecting this result.
Id Dates ValueDate Price
-------------------------------------
1, '2020-08-01', '2020-08-01', 100
1, '2020-08-02', '2020-08-01', 100
1, '2020-08-03', '2020-08-01', 100
1, '2020-08-04', '2020-08-01', 100
1, '2020-08-05', '2020-08-05', 110
1, '2020-08-06', '2020-08-05', 110
1, '2020-08-07', '2020-08-07', 50
Something like this:
SELECT DISTINCT B.[id]
,B.[Dates]
,DS.*
FROM #tableB B
CROSS APPLY
(
SELECT TOP 1 *
FROM #tableA A
WHERE B.[Id] = A.[Id]
AND B.[Dates] >= A.[ValueDate]
AND A.[Price] IS NOT NULL
ORDER BY A.[ValueDate] DESC
) DS;
or this:
WITH DataSource AS
(
SELECT DISTINCT B.[ID]
,B.[Dates]
,A.[ValueDate]
,A.[Price]
,SUM(IIF(A.[ID] IS NOT NULL, 1, 0)) OVER (ORDER BY B.[Dates]) AS [GroupID]
FROM #tableB B
LEFT JOIN #tableA A
ON B.[Id] = A.[Id]
AND B.[Dates] = A.[ValueDate]
AND A.[Price] IS NOT NULL
)
SELECT [ID]
,[Dates]
,MAX([ValueDate]) OVER (PARTITION BY [GroupID]) AS [ValueDate]
,MAX([Price]) OVER (PARTITION BY [GroupID]) AS [Price]
FROM DataSource;

Lead and partition function

This is my input table
create table #table1 (id int, FN varchar(20), startdate varchar(20), id1 varchar)
insert #table1
select 1, 'Joe', '2019-01-01', 'A'
union select 1, 'Joe', '2019-01-01', 'B'
union select 1, 'Joe', '2019-01-05', 'C'
union select 1, 'Joe', '2019-01-05', 'D'
union select 1, 'Joe', '2019-01-06', 'E'
union select 2, 'john', '2019-01-05', 'F'
union select 2, 'john', '2019-01-06', 'G'
union select 2, 'john', '2019-01-06', 'H'
union select 2, 'john', '2019-01-07', 'I'
I tried the following code
select *
, dense_rank() OVER (partition by id, fn order by startdate)
, lead(startdate,1) OVER (partition by id, fn order by startdate)
from #table1
order by id
But I require the following output:
I know that there might be a better approach but a least this is a working solution:
select *,
(select MIN(startdate)
from #table1 t1
where t1.id = #table1.id and
t1.fn = #table1.fn and
t1.startdate > #table1.startdate) enddate
from #table1
Result

Multiple SQL MAX when items are not in order

I have some data as below:
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1) from #MyTable group BY productname
a MAX query on test1 column gives
a,5
b,3
but I need to have result as
a,3
b,3
a,5
when I have order by test2
You can solve this by using a trick with row_numbers, so that you assign 2 different row numbers, one for the whole data and one that is partitioned by productname. If you compare the difference between these numbers, you can figure out when product name has changed, and use that to determine the max values for each group.
select productname, max(test1) from (
SELECT *,
row_number() over (order by test2 asc) -
row_number() over (partition by productname order by test2 asc) as GRP
from #MyTable
) X
group by productname, GRP
You can test this in SQL Fiddle
If the test2 column is always a row number without gaps, you can use that too instead of the first row number column. If you need ordering in the data, you'll have to for example to use the max of test1 to do that.
Please check the following SQL Select statement
DECLARE #MyTable AS TABLE (productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
DECLARE #MyTableNew AS TABLE (id int identity(1,1), productName varchar(13), test1 int,test2 int)
insert into #MyTableNew select * from #MyTable
--select * from #MyTableNew
;with cte as (
SELECT
id, productName, test1, test2,
case when (lag(productName,1,'') over (order by id)) = productName then 0 else 1 end ischange
from #MyTableNew
), cte2 as (
select t.*,(select sum(ischange) from cte where id <= t.id) grp from cte t
)
select distinct grp, productName, max(test1) over (partition by grp) from cte2
This is implemented according to the following SQL Server Lag() function tutorial
The Lag() function is used to identify and order the groups in table data
Please try this query
DECLARE #MyTable AS TABLE
(productName varchar(13), test1 int,test2 int)
INSERT INTO #MyTable
(productName, test1,test2)
VALUES
('a', 1,1),
('a', 2,2),
('a', 3,3),
('b', 1,4),
('b', 2,5),
('b', 3,6),
('a', 1,7),
('a', 4,8),
('a', 5,9)
;
SELECT productname,MAX(test1)
from #MyTable
where test1 = test2
group BY productname
union all
SELECT productname,MAX(test1)
from #MyTable
where test1 != test2
group BY productname

T-SQL: Paging WITH TIES

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.
declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.
I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])