SQL Query for Grouping the results based on sequence - sql

I have a table like this:
ID Seq Amt
1 1 500
1 2 500
1 3 500
1 5 500
2 10 600
2 11 600
3 1 700
3 3 700
I want to group the continuous sequence numbers into a single row like this:
ID Start End TotalAmt
1 1 3 1500
1 5 5 500
2 10 11 1200
3 1 1 700
3 3 3 700
Please help to achieve this result.

WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;

Well, there's perhaps a more elegant way to do this (something hints at me that there is), but here's an approach that will work if you're using a version of SQL Server that accepts common table expressions:
use Tempdb
go
create table [Test]
(
[id] int not null,
[Seq] int not null,
[Amt] int not null
)
insert into [Test] values
(1, 1, 500),
(1, 2, 500),
(1, 3, 500),
(1, 5, 500),
(2, 10, 600),
(2, 11, 600),
(3, 1, 700),
(3, 3, 700)
;with
lower_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq - 1
)
),
upper_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq + 1
)
),
bounds as (
select id, (select MAX(seq) from lower_bound where lower_bound.id = upper_bound.id and lower_bound.Seq <= upper_bound.Seq) as LBound, Seq as Ubound
from upper_bound
)
select Test.id, LBound As [Start], UBound As [End], SUM(Amt) As TotalAmt
from Test
join bounds
on Test.id = bounds.id
and Test.Seq between bounds.LBound and bounds.Ubound
group by Test.id, LBound, UBound
drop table [Test]

This seems to work nicely. #breakingRows will contain all rows that break the sequence of id and seq (i.e. if id changes or if seq is not 1 more than the previous seq). With that table you can select all rows of such a sequence within #temp. I must add however that performance will probably be not all that good because of all the subqueries but you'll need to test to be sure.
declare #temp table (id int, seq int, amt int)
insert into #temp select 1, 1, 500
insert into #temp select 1, 2, 500
insert into #temp select 1, 3, 500
insert into #temp select 1, 5, 500
insert into #temp select 2, 10, 600
insert into #temp select 2, 11, 600
insert into #temp select 3, 1, 700
insert into #temp select 3, 3, 700
declare #breakingRows table (ctr int identity(1,1), id int, seq int)
insert into #breakingRows(id, seq)
select id, seq
from #temp t1
where not exists
(select 1 from #temp t2 where t1.id = t2.id and t1.seq - 1 = t2.seq)
order by id, seq
select br.id, br.seq as start,
isnull ((select top 1 seq from #temp t2
where id < (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) or
(id = (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) and
seq < (select seq from #breakingRows br2 where br.ctr = br2.ctr - 1))
order by id desc, seq desc),
br.seq)
as [end],
(select SUM(amt) from #temp t1 where t1.id = br.id and
t1.seq <
isnull((select seq from #breakingRows br2 where br.ctr = br2.ctr - 1 and br.id = br2.id),
(select max(seq) + 1 from #temp)) and
t1.seq >= br.seq)
from #breakingRows br
order by id, seq

Since Andriy has already posted the gold solution, here's my take using an UPDATE statement to get the result from a temp table, just for fun.
declare #tmp table (
id int, seq int, amt money, start int, this int, total money,
primary key clustered(id, seq))
;
insert #tmp
select *, start=seq, this=seq, total=convert(money,amt)
from btable
;
declare #id int, #seq int, #start int, #amt money
update #tmp
set
#amt = total = case when id = #id and seq = #seq+1 then #amt+total else amt end,
#start = start = case when id = #id and seq = #seq+1 then #start else seq end,
#seq = this = seq,
#id = id = id
from #tmp
option (maxdop 1)
;
select id, start, max(this) [end], max(total) total
from #tmp
group by id, start
order by id, start
Notes:
btable: your table name
id int, seq int, amt money: expected columns in your table

Try following query.
select id, min(seq), max(seq), sum(amt) from table group by id
OOps, sorry, it is wrong query as you need sequence

SELECT Id, MIN(Seq) as Start, MAX(Seq) as End, SUM(Amount) as Total
FROM (
SELECT t.*, Seq - ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Seq) Rn
FROM [Table] t
) as T
GROUP BY Id, Rn
ORDER BY Id, MIN(Seq)

Related

how to pick top 2 rows in a table based on the indicator

I have a sample data like this
Declare #table Table
(
ID INT,
Value VARCHAR(10),
Is_failure int
)
insert into #table(ID, Value, Is_failure) values (1, 'Bits', 0)
insert into #table(ID, Value, Is_failure) values (2, 'Ip', 0)
insert into #table(ID, Value, Is_failure) values (3, 'DNA', 0)
insert into #table(ID, Value, Is_failure) values (6, 'DCP', 1)
insert into #table(ID, Value, Is_failure) values (8, 'Bits', 0)
insert into #table(ID, Value, Is_failure) values (11, 'calc', 0)
insert into #table(ID, Value, Is_failure) values (14, 'DISC', 0)
insert into #table(ID, Value, Is_failure) values (19, 'DHCP', 1)
Looks like this:
ID Value Is_failure
1 Bits 0
2 Ip 0
3 DNA 0
6 DCP 1
8 Bits 0
11 calc 0
14 DISC 0
19 DHCP 1
Data continuous like this ... I need to fetch top 2 records along with Is_failure whenever Is_failure = 1 comes if it is 0 no need to pick up .
Sample output:
ID Value Is_failure
2 Ip 0
3 DNA 0
6 DCP 1
11 calc 0
14 DISC 0
19 DHCP 1
Suggest on this I have tried with having count(*) and other things but not fruitful.
You can use this query
Declare #tmptable Table
(
ID INT,
Value VARCHAR(10),
Is_failure int,
rowNum int
)
Declare #continuousRows int =2
insert into #tmptable
select *,ROW_NUMBER() over (order by id) from #table
;with cte1 as
(select *
from #tmptable t
where (select sum(Is_failure) from #tmptable t1 where t1.rowNum between t.rowNum-#continuousRows and t.rowNum
having count(*)=#continuousRows+1)=1
and t.Is_failure=1
)
,cte2 as
(
select t.* from #tmptable t
join cte1 c on t.rowNum between c.rowNum-#continuousRows and c.rowNum
)
select c.ID,value,Is_failure from cte2 c
You can use window functions for this:
select id, value, is_failure
from (select t.*,
lead(Is_failure) over (order by id) as next_if,
lead(Is_failure, 2) over (order by id) as next_if2
from #table t
) t
where 1 in (Is_failure, next_if, next_if2)
order by id;
You can simplify this with a windowing clause:
select id, value, is_failure
from (select t.*,
max(is_failure) over (order by id rows between current row and 2 following) as has_failure
from #table t
) t
where has_failure > 0
order by id;

SQL: Pinned rows and row number calculation

We have a requirement to assign row number to all rows using following rule
Row if pinned should have same row number
Otherwise sort it by GMD
Example:
ID GMD IsPinned
1 2.5 0
2 0 1
3 2 0
4 4 1
5 3 0
Should Output
ID GMD IsPinned RowNo
5 3 0 1
2 0 1 2
1 2.5 0 3
4 4 1 4
3 2 0 5
Please Note row number for Id's 2 and 4 stayed intact as they are pinned with values of 2 and 4 respectively even though the GMD are not in any order
Rest of rows Id's 1, 3 and 5 row numbers are sorted using GMD desc
I tried using RowNumber SQL 2012 however, it is pushing pinned items from their position
Here's a set-based approach to solving this. Note that the first CTE is unnecessary if you already have a Numbers table in your database:
declare #t table (ID int,GMD decimal(5,2),IsPinned bit)
insert into #t (ID,GMD,IsPinned) values
(1,2.5,0), (2, 0 ,1), (3, 2 ,0), (4, 4 ,1), (5, 3 ,0)
;With Numbers as (
select ROW_NUMBER() OVER (ORDER BY ID) n from #t
), NumbersWithout as (
select
n,
ROW_NUMBER() OVER (ORDER BY n) as rn
from
Numbers
where n not in (select ID from #t where IsPinned=1)
), DataWithout as (
select
*,
ROW_NUMBER() OVER (ORDER BY GMD desc) as rn
from
#t
where
IsPinned = 0
)
select
t.*,
COALESCE(nw.n,t.ID) as RowNo
from
#t t
left join
DataWithout dw
inner join
NumbersWithout nw
on
dw.rn = nw.rn
on
dw.ID = t.ID
order by COALESCE(nw.n,t.ID)
Hopefully my naming makes it clear what we're doing. I'm a bit cheeky in the final SELECT by using a COALESCE to get the final RowNo when you might have expected a CASE expression. But it works because the contents of the DataWithout CTE is defined to only exist for unpinned items which makes the final LEFT JOIN fail.
Results:
ID GMD IsPinned RowNo
----------- --------------------------------------- -------- --------------------
5 3.00 0 1
2 0.00 1 2
1 2.50 0 3
4 4.00 1 4
3 2.00 0 5
Second variant that may perform better (but never assume, always test):
declare #t table (ID int,GMD decimal(5,2),IsPinned bit)
insert into #t (ID,GMD,IsPinned) values
(1,2.5,0), (2, 0 ,1), (3, 2 ,0), (4, 4 ,1), (5, 3 ,0)
;With Numbers as (
select ROW_NUMBER() OVER (ORDER BY ID) n from #t
), NumbersWithout as (
select
n,
ROW_NUMBER() OVER (ORDER BY n) as rn
from
Numbers
where n not in (select ID from #t where IsPinned=1)
), DataPartitioned as (
select
*,
ROW_NUMBER() OVER (PARTITION BY IsPinned ORDER BY GMD desc) as rn
from
#t
)
select
dp.ID,dp.GMD,dp.IsPinned,
CASE WHEN IsPinned = 1 THEN ID ELSE nw.n END as RowNo
from
DataPartitioned dp
left join
NumbersWithout nw
on
dp.rn = nw.rn
order by RowNo
In the third CTE, by introducing the PARTITION BY and removing the WHERE clause we ensure we have all rows of data so we don't need to re-join to the original table in the final result in this variant.
this will work:
CREATE TABLE Table1
("ID" int, "GMD" number, "IsPinned" int)
;
INSERT ALL
INTO Table1 ("ID", "GMD", "IsPinned")
VALUES (1, 2.5, 0)
INTO Table1 ("ID", "GMD", "IsPinned")
VALUES (2, 0, 1)
INTO Table1 ("ID", "GMD", "IsPinned")
VALUES (3, 2, 0)
INTO Table1 ("ID", "GMD", "IsPinned")
VALUES (4, 4, 1)
INTO Table1 ("ID", "GMD", "IsPinned")
VALUES (5, 3, 0)
SELECT * FROM dual
;
select * from (select "ID","GMD","IsPinned",rank from(select m.*,rank()over(order by
"ID" asc) rank from Table1 m where "IsPinned"=1)
union
(select "ID","GMD","IsPinned",rank from (select t.*,rank() over(order by "GMD"
desc)-1 rank from (SELECT * FROM Table1)t)
where "IsPinned"=0) order by "GMD" desc) order by rank ,GMD;
output:
2 0 1 1
5 3 0 1
1 2.5 0 2
4 4 1 2
3 2 0 3
Can you try this query
CREATE TABLE Table1
(ID int, GMD numeric (18,2), IsPinned int);
INSERT INTO Table1 (ID,GMD, IsPinned)
VALUES (1, 2.5, 0),
(2, 0, 1),
(3, 2, 0),
(4, 4, 1),
(5, 3, 0)
select *, row_number () over(partition by IsPinned order by (case when IsPinned =0 then GMD else id end) ) [CustOrder] from Table1
This took longer then I thought, the thing is row_number would take a part to resolve the query. We need to differentiate the row_numbers by id first and then we can apply the while loop or cursor or any iteration, in our case we will just use the while loop.
dbo.test (you can replace test with your table name)
1 2.5 False
2 0 True
3 3 False
4 4 True
6 2 False
Here is the query I wrote to achieve your result, I have added comment under each operation you should get it, if you have any difficultly let me know.
Query:
--user data table
DECLARE #userData TABLE
(
id INT NOT NULL,
gmd FLOAT NOT NULL,
ispinned BIT NOT NULL,
rownumber INT NOT NULL
);
--final result table
DECLARE #finalResult TABLE
(
id INT NOT NULL,
gmd FLOAT NOT NULL,
ispinned BIT NOT NULL,
newrownumber INT NOT NULL
);
--inserting to uer data table from the table test
INSERT INTO #userData
SELECT t.*,
Row_number()
OVER (
ORDER BY t.id ASC) AS RowNumber
FROM test t
--creating new table for ids of not pinned
CREATE TABLE #ids
(
rn INT,
id INT,
gmd FLOAT
)
-- inserting into temp table named and adding gmd by desc
INSERT INTO #ids
(rn,
id,
gmd)
SELECT DISTINCT Row_number()
OVER(
ORDER BY gmd DESC) AS rn,
id,
gmd
FROM #userData
WHERE ispinned = 0
--declaring the variable to loop through all the no pinned items
DECLARE #id INT
DECLARE #totalrows INT = (SELECT Count(*)
FROM #ids)
DECLARE #currentrow INT = 1
DECLARE #assigningNumber INT = 1
--inerting pinned items first
INSERT INTO #finalResult
SELECT ud.id,
ud.gmd,
ud.ispinned,
ud.rownumber
FROM #userData ud
WHERE ispinned = 1
--looping through all the rows till all non-pinned items finished
WHILE #currentrow <= #totalrows
BEGIN
--skipping pinned numers for the rows
WHILE EXISTS(SELECT 1
FROM #finalResult
WHERE newrownumber = #assigningNumber
AND ispinned = 1)
BEGIN
SET #assigningNumber = #assigningNumber + 1
END
--getting row by the number
SET #id = (SELECT id
FROM #ids
WHERE rn = #currentrow)
--inserting the non-pinned item with new row number into the final result
INSERT INTO #finalResult
SELECT ud.id,
ud.gmd,
ud.ispinned,
#assigningNumber
FROM #userData ud
WHERE id = #id
--going to next row
SET #currentrow = #currentrow + 1
SET #assigningNumber = #assigningNumber + 1
END
--getting final result
SELECT *
FROM #finalResult
ORDER BY newrownumber ASC
--dropping table
DROP TABLE #ids
Output:

UNION CTE with ORDER BY on 3 Tables

This is not easy to explain, however, I do believe there is a much nicer way to do what I managed to do and hope to get some help.
I have 3 tables (T1, T2, and T3). I need to get the latest from T1 and T2, then with those results return the results from t3 or from the previous result if t3 is empty. So the LatestDate doesn't really matter if there is a record in t3. Also, if there is no data in t3 and the LatestDate is the same on t1 and t2 (rarely will happen, but want to plan accordingly), I want results from t1.
Here's a sample of what I got, mind you the actual query is has many more fields, but the concept is the same.
CREATE TABLE [dbo].[t1](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
CREATE TABLE [dbo].[t2](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
CREATE TABLE [dbo].[t3](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
INSERT t1 (Id, LatestDate) VALUES (1, CAST(N'2000-01-01T00:00:00.000' AS DateTime));
INSERT t1 (Id, LatestDate) VALUES (2, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t1 (Id, LatestDate) VALUES (3, CAST(N'2002-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (1, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (2, CAST(N'2002-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (4, CAST(N'2003-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (1, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (2, CAST(N'2000-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (5, CAST(N'2004-01-01T00:00:00.000' AS DateTime));
GO;
WITH CTE AS (
SELECT TOP 1 * FROM (
SELECT 2 AS Sort, * FROM t1 WHERE t1.id = #UserId
UNION
SELECT 3 AS Sort, * FROM t2 WHERE t2.id = #UserId
) AS t
ORDER BY
t.LatestDate DESC
)
SELECT TOP 1 * FROM (
SELECT TOP 1 * FROM CTE
UNION
SELECT 1 AS Sort, * FROM t3 WHERE t3.id = #UserId
) AS t
ORDER BY
t.Sort;
Expected results:
When #UserID = 1:
Sort Source Id LatestDate
1 t3 1 1/1/2001
When #UserID = 2:
Sort Source Id LatestDate
1 t3 2 1/1/2000
When #UserID = 3:
Sort Source Id LatestDate
2 t1 3 1/1/2002
When #UserID = 4:
Sort Source Id LatestDate
3 t2 4 1/1/2003
When #UserID = 5:
Sort Source Id LatestDate
1 t3 5 1/1/2004
Thanks!
If I understand you correctly you want something like:
DECLARE #UserID INT = 5;
WITH cte AS (
SELECT 1 AS src, 1 as [tbl], * FROM t1 WHERE id = #UserId
UNION ALL SELECT 1, 2, * FROM t2 WHERE id = #UserId
UNION ALL SELECT 3, 3, * FROM t3 WHERE id = #UserId
), cte2 AS (
SELECT * , ROW_NUMBER() OVER(ORDER BY src DESC, LatestDate DESC, tbl ASC) AS rn
FROM cte
)
SELECT Id, LatestDate
FROM cte2
WHERE rn = 1;
RextesterDemo
Using PARTITION BY you could move filtering #userId to final part:
DECLARE #UserID INT = 5;
WITH cte AS (
SELECT 1 AS src, 1 as [tbl], * FROM t1
UNION ALL SELECT 1, 2, * FROM t2
UNION ALL SELECT 3, 3, * FROM t3
), cte2 AS (
SELECT *
,ROW_NUMBER() OVER(PARTITION BY Id ORDER BY src DESC, LatestDate DESC, tbl ASC) AS rn
FROM cte
)
SELECT Id, LatestDate
FROM cte2
WHERE rn = 1
AND id = #UserID
Rextester Demo2
How's this?
This assumes you will take the t3 result if it exists
or the max result from T1 or T2 if not.
if exists(select * from t3 where ID= #ID)
(
select ID, Max(LatestDate), TN='T3'
from t3
where ID= #ID
)
else
(
select top 1 ID, Max(LatestDate) LD,TN
from (Select *,TN='t1' from T1
union all
Select *, TN='t2' from t2) as a
where id=#ID
group by ID,TN
order by LD desc
)

SQL: Remove duplicates

How do I remove duplicates from a table that is set up in the following way?
unique_ID | worker_ID | date | type_ID
A worker can have multiple type_ID's associated with them and I want to remove any duplicate types. If there is a duplicate, I want to remove the type with the most recent entry.
A textbook candidate for the window function row_number():
;WITH x AS (
SELECT unique_ID
,row_number() OVER (PARTITION BY worker_ID,type_ID ORDER BY date) AS rn
FROM tbl
)
DELETE FROM tbl
FROM x
WHERE tbl.unique_ID = x.unique_ID
AND x.rn > 1
This also takes care of the situation where a set of dupes on (worker_ID,type_ID) shares the same date.
See the simplified demo on data.SE.
Update with simpler version
Turns out, this can be simplified: In SQL Server you can delete from the CTE directly:
;WITH x AS (
SELECT unique_ID
,row_number() OVER (PARTITION BY worker_ID,type_ID ORDER BY date) AS rn
FROM tbl
)
DELETE x
WHERE rn > 1
delete from table t
where exists ( select 1 from table t2
where t2.worker_id = t.worker_id
and t2.type_id = t.type_id
and t2.date < t.date )
HTH
DELETE FROM #t WHERE unique_Id IN
(
SELECT unique_Id FROM
(
SELECT unique_Id
,Type_Id
,ROW_NUMBER() OVER (PARTITION BY worker_Id, type_Id ORDER BY date) AS rn
FROM #t
) Q
WHERE rn > 1
)
And to test...
DECLARE #t TABLE
(
unique_ID INT IDENTITY,
worker_ID INT,
date DATETIME,
type_ID INT
)
INSERT INTO #t VALUES (1, DATEADD(DAY, 1, GETDATE()), 1)
INSERT INTO #t VALUES (1, GETDATE(), 1)
INSERT INTO #t VALUES (2, GETDATE(), 1)
INSERT INTO #t VALUES (1, DATEADD(DAY, 2, GETDATE()), 1)
INSERT INTO #t VALUES (1, DATEADD(DAY, 3, GETDATE()), 2)
SELECT * FROM #t
DELETE FROM #t WHERE unique_Id IN
(
SELECT unique_Id FROM
(
SELECT unique_Id
,Type_Id
,ROW_NUMBER() OVER (PARTITION BY worker_Id, type_Id ORDER BY date) AS rn
FROM #t
) Q
WHERE rn > 1
)
SELECT * FROM #t
you may use this query
delete from worker where unique_id in (
select max(unique_id) from worker group by worker_ID , type_ID having count(type_id)>1)
here i am assuming worker as your table name

transact-sql question

Assume there were 100 records in tableA and tableA contained a column named 'price'.
How do I select the first-n record if where sum of price > a certain amount (e.g. 1000) without using cursor?
thanks
Top N implies some kind of order, which you did not supply, so I assumed any random order.
You can change this on the OVER clause of the ROW_NUMBER().
Try something like
DECLARE #Table TABLE(
Price FLOAT
)
INSERT INTO #Table SELECT 1
INSERT INTO #Table SELECT 11
INSERT INTO #Table SELECT 12
INSERT INTO #Table SELECT 15
INSERT INTO #Table SELECT 10
INSERT INTO #Table SELECT 65
INSERT INTO #Table SELECT 100
DECLARE #TotalPrice FLOAT
SELECT #TotalPrice = 100
;WITH Vals AS (
SELECT *,
ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) RNR
FROM #Table
)
, Totals AS (
SELECT v.RNR,
SUM(vP.Price) TotalPrice
FROM Vals v LEFT JOIN
Vals vP ON v.RNR >= vP.RNR
GROUP BY v.RNR
)
, LimitValue AS (
SELECT TOP 1
RNR
FROM Totals
WHERE TotalPrice >= #TotalPrice
ORDER BY RNR
)
SELECT *
FROM Vals
WHERE RNR <= (
SELECT RNR
FROM LimitValue
)
select price from tableA
where price > 1000
limit n;
n - no. of records you want in result set
--
Cheers