SQL - Choose which duplicate value to select

SQL - Choose which duplicate value to select - sql

I have a table with a datetime, value and user.
This table has multiple rows for the same datetime but with a different user and value.
I want select distinct datetime with the corresponding value and user.
Where there is a duplicate datetime with different users, the value that user2 has input should be prioritised.
Table 1
-----------------
DateTime| Value| User
--------|---------|---------
1/1/17 | 10| User1
2/1/17 | 30| User1
3/1/17 | 10| User1
1/1/17 | 90| User2
2/1/17 | 80| User2
So from the above, I would end up with
1/1/17 | 90| User2
2/1/17 | 80| User2
3/1/17 | 10| User1
I'm sure there is a simple answer to this but I can't for the life of me work out how to do it!
Any help greatly appreciated.
Thanks

Not quite simple! Using window functions and common table expressions
; with x as (
select [DateTime], value, [User], row_num = row_number() over(partition by [DateTime] order by [User] desc) from Table1
)
select x.* from x where row_num = 1

This will always prioritize the input from 'User2', even when there is input from 'User1' and 'User3'.
;with cte as (
select *
, rn = row_number() over (
partition by [DateTime]
order by (case when [user] = 'User2' then 0 else 1 end) asc
)
from t
)
select *
from cte
where rn=1
rextester http://rextester.com/AZVA85684
results:
+----------+-------+-------+----+
| DateTime | value | user | rn |
+----------+-------+-------+----+
| 1/1/17 | 90 | User2 | 1 |
| 2/1/17 | 80 | User2 | 1 |
| 3/1/17 | 10 | User1 | 1 |
+----------+-------+-------+----+

DECLARE #T as table
(
[DateTime] nvarchar(100),
VALUE INT,
[USER] VARCHAR(32)
)
INSERT INTO #T
VALUES
('1/1/17', 10, 'User1'),
('2/1/17', 30, 'User1'),
('3/1/17', 10, 'User1'),
('1/1/17', 90, 'User2'),
('2/1/17', 80, 'User2')
SELECT t.[DateTime], t.VALUE, t.[USER]
FROM #T t
JOIN (
SELECT [DateTime], MAX([USER]) AS [USER]
FROM #T
GROUP BY [DateTime]
) u ON u.[DateTime] = t.[DateTime] AND u.[USER] = t.[USER]
ORDER BY VALUE DESC

;with cte
as
(
select
*,
row_number() over (partition by date order by replace(user,'user','') desc) as rownum
from
#temp
)
select * from cte where rownum=1

Related

TSQL - GROUP BY on continous rows only

I am working on a SQL Server 2017 (v14.0).
I have a table like this:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 01.01.2021 |
100 | open | 02.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 20.01.2021 |
100 | open | 20.01.2021 | 30.01.2021 |
I want to group it by Key and State, but only for continuous rows.
So my expected result would be something like:
Key | State | from | until |
----+----------+------------+------------+
100 | open | 01.01.2021 | 02.01.2021 |
100 | closed | 03.01.2021 | 13.01.2021 |
100 | open | 14.01.2021 | 30.01.2021 |
Any idea on how to do this? I have the strong feeling, that this should be possible with the help of ROW_NUMBER somehow, but I was not able to figure it out yet...
(In this example data some weird group by calendarweek or something similar might be possible, but this is not my intention)

It is a Gaps and Islands problem. One solution is this:
WITH cte1 AS (
SELECT *, CASE WHEN LAG([state]) OVER (PARTITION BY [key] ORDER BY [from]) = [state] THEN 0 ELSE 1 END AS chg
FROM t
), cte2 AS (
SELECT *, SUM(chg) OVER (PARTITION BY [key] ORDER BY [from]) AS grp
FROM cte1
)
SELECT [key], grp, MIN([state]), MIN([from]), MAX([until])
FROM cte2
GROUP BY [key], grp
ORDER BY [key], grp

One possibility is as below added:
Create table myTable_o1
(
[key] int
,[state] varchar(100)
,[From] date
,[Until] date
)
insert into myTable_o1 values (100, 'open', '2021-01-01','2021-01-01')
insert into myTable_o1 values (100, 'open', '2021-01-02','2021-01-02')
insert into myTable_o1 values (100, 'closed', '2021-01-03','2021-01-13')
insert into myTable_o1 values (100, 'open', '2021-01-4','2021-01-20')
insert into myTable_o1 values (100, 'open', '2021-01-20','2021-01-30')
SELECT
[key]
,[state]
,[From]
,[until]
FROM
(
Select
[key]
, [state]
, [From]
, row_number() over (partition by tiles order by [key]) row_num
, ISNULL(Lead(Until) over (partition by tiles order by [key]) , Until) [until]
FROM
(
SELECT * ,
Ntile(2) over ( order by [Key]) as [tiles]
from myTable_o1
) AS A
) AS B WHERE B.row_num in (1,3)

SQL return second max date for each id, date and channel

I have the following table:
id channel_id date
1 | 1 | 2017-01-10
1 | 2 | 2018-02-05
1 | 1 | 2019-03-07
1 | 2 | 2020-03-15
2 | 1 | 2018-01-17
2 | 1 | 2019-07-20
2 | 1 | 2020-01-10
I want to return for previous maximum date for each date and id but two separate columns for both channel_id. So, one column for previous max date for channel_id is equal to 1 and another for previous max date for channel_id is equal to 2. What I want to get can be found below:
id channel_id date prev_date_channel_id1 prev_date_channel_id2
1 | 1 | 2017-01-10 | NULL | NULL |
1 | 2 | 2018-02-05 | 2017-01-10 | NULL |
1 | 1 | 2019-03-07 | 2017-01-10 | 2018-02-05 |
1 | 2 | 2020-03-15 | 2019-03-07 | 2018-02-05 |
2 | 1 | 2018-01-17 | NULL | NULL |
2 | 1 | 2019-07-20 | 2018-01-17 | NULL |
2 | 1 | 2020-01-10 | 2019-07-20 | NULL |
I made a query as below and returns what I want but takes too much time. I'd appreciate any optimization suggestions!
SELECT
a.id,
a.date,
MAX(c.date) AS prev_date_channel_id1,
MAX(d.date) AS prev_date_channel_id2
FROM
table a
LEFT JOIN
table c ON a.id=c.id AND a.date>c.date AND c.channel_id=1
LEFT JOIN
table d ON a.id=d.id AND a.date>d.date AND d.channel_id=2
GROUP BY a.id, a.date

Use lag() for the previous date and a cumulative conditional max for the channel 2 date:
select t.*, lag(date) over (partition by id order by date) as prev_date,
max(case when channel = 2 then date end) over
(partition by id
order by date
rows between unbounded preceding and 1 row preceding
) as prev_date_channel2
from t;

I think there's an error in your "expected output" for the value of prev_date_channel_id1 on the last row (it should be 2019-07-20).
In any case, with appropriate indexing an outer apply top 1 construct might serve you better:
create table t
(
id int,
channel_id int,
[date] date
constraint pk_t primary key clustered (id, channel_id, [date])
);
insert t values
(1, 1, '2017-01-10'),
(1, 2, '2018-02-05'),
(1, 1, '2019-03-07'),
(1, 2, '2020-03-15'),
(2, 1, '2018-01-17'),
(2, 1, '2019-07-20'),
(2, 1, '2020-01-10');
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = c1.dt,
prev_date_channel_id2 = c2.dt
from t t1
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 1
and [date] < t1.[date]
order by date desc
) c1(dt)
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 2
and [date] < t1.[date]
order by date desc
) c2(dt)
order by t1.id, t1.[date];
Or possibly faster still, especially with the key changed to constraint pk_t primary key clustered (id, [date], [channel_id]))
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = prev.c1,
prev_date_channel_id2 = prev.c2
from t t1
outer apply (
select c1 = max(iif(channel_id = 1, [date], null)),
c2 = max(iif(channel_id = 2, [date], null))
from t
where id = t1.id
and [date] < t1.[date]
) prev

Assuming you have an index on those three columns, you can use subqueries:
SELECT [T0].[id],
[T0].[channel_id],
[T0].[date],
[prev_date_channel_id1] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 1
),
[prev_date_channel_id2] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 2
)
FROM [t] [T0];

Get latest rows by date from aggregate

Hey i'm kinda stuck with this query. Using SQL-server
i have in the table, UNIQUE(date, medId, userId)
I have this table
date | medId | userId | Quantity
2016-06-10 | 2 | 1 | 28
2016-06-07 | 1 | 1 | 19
2016-06-06 | 1 | 1 | 10
i want to get the row with the max date, per group of medId,userId, in this case
i would get
2016-06-10 | 2 | 1 | 28
2016-06-07 | 1 | 1 | 19
thanks in advance!
i've tried this
SELECT
a.userMedStockDate,
a.userMedStockMedId,
a.userMedStockUserId,
a.userMedStockQuantity
FROM (SELECT
MAX(userMedStockDate) AS userMedStockDate,
userMedStockQuantity,
userMedStockUserId,
userMedStockMedId,
ROW_NUMBER() OVER (partition by userMedStockMedId,userMedStockUserId
ORDER BY MAX(userMedStockDate) desc) AS rnk
FROM UserMedStock
GROUP BY
userMedStockUserId,
userMedStockQuantity,
userMedStockMedId) a
WHERE a.rnk = 1
[SOLVED]

this should work
select * from
(
select
[date] , medId, userId ,Quantity
,row_number() over (partition by medId, userId order by [date] desc) as rowid
from yourtable
) as x
where rowid = 1

Could also try this:
select y.* from
table1 y inner join
(
SELECT [Date] = MAX([Date]), medId, userId
FROM table1
GROUP BY medId, userId
) x on y.[Date] = x.[Date] and y.medId = x.medId and y.userId = x.userId

i changed the fields to my actual table but here
SELECT
a.userMedStockDate, a.userMedStockMedId, a.userMedStockUserId, a.userMedStockQuantity
FROM(
SELECT
MAX(userMedStockDate) AS userMedStockDate,
userMedStockQuantity,
userMedStockUserId,
userMedStockMedId,
ROW_NUMBER()OVER(partition by userMedStockMedId, userMedStockUserId ORDER BY MAX(userMedStockDate) desc) AS rnk
FROM UserMedStock
GROUP BY userMedStockUserId, userMedStockQuantity, userMedStockMedId
) a
WHERE a.rnk = 1

Selecting records with maximum value in group

I have a transaction table with the following structure:
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 1 | Posted | Amy | 500.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 2 | Voided | Amy | 500.00 | 2
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 3 | Posted | Bob | 70.00 | 1
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I'm trying to construct a query to group the records by GUID and select the single record with the largest SequenceNumber (if it isn't NULL):
GUID | ID | Status | Payee | Amount | SequenceNumber
AF732CF5-E6C0-E411-B8F6-004056AB77C2 | 4 | Posted | Amy | 512.50 | 3
1F7D880C-E7C0-E411-B8F6-004056AB77C2 | 5 | Posted | Bob | 66.00 | 2
F2CC0B03-76C7-E411-A48D-004056AB787C | 6 | Pending | Carol | 240.00 | NULL
I've tried adding this line:
where SequenceNumber = (select MAX(SequenceNumber) from [Transaction] t2 where t.[GUID] = t2.[GUID])
but that doesn't get me any transactions where the status is Pending (they don't have sequence numbers). How can I fix this query?

If it's SQL-Server you can use a CTE + ROW_NUMBER:
WITH CTE AS
(
select t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount, t.SequenceNumber,
rn = row_number() over (partition by t.[GUID] Order By t.SequenceNumber DESC)
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)
SELECT GUID, ID, Status, Payee, Amount, SequenceNumber
FROM CTE
WHERE rn = 1
This will include the row where SequenceNumber is null. If you want all rows with the maximum SequenceNumber(in case of ties) use DENSE_RANK instead of ROW_NUMBER.

You can calculate the MAX(ID) and it's related [GUID] in a subquery and JOIN to it in order to get the desired results:
Sample subquery:
SELECT [GUID] ,
MAX(ID) MaxId
FROM Transaction
GROUP BY [GUID]
Would produce:
GUID MaxId
1F7D880C-E7C0-E411-B8F6-004056AB77C2 5
AF732CF5-E6C0-E411-B8F6-004056AB77C2 4
F2CC0B03-76C7-E411-A48D-004056AB787C 6
Full Demo:
CREATE TABLE #Transaction
(
[GUID] VARCHAR(36) ,
[ID] INT ,
[Status] VARCHAR(7) ,
[Payee] VARCHAR(5) ,
[Amount] INT ,
[SequenceNumber] VARCHAR(4)
);
INSERT INTO #Transaction
( [GUID], [ID], [Status], [Payee], [Amount], [SequenceNumber] )
VALUES ( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 1, 'Posted', 'Amy', 500.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 2, 'Voided', 'Amy', 500.00,
'2' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 3, 'Posted', 'Bob', 70.00,
'1' ),
( 'AF732CF5-E6C0-E411-B8F6-004056AB77C2', 4, 'Posted', 'Amy', 512.50,
'3' ),
( '1F7D880C-E7C0-E411-B8F6-004056AB77C2', 5, 'Posted', 'Bob', 66.00,
'2' ),
( 'F2CC0B03-76C7-E411-A48D-004056AB787C', 6, 'Pending', 'Carol',
240.00, NULL );
SELECT #Transaction.*
FROM #Transaction
INNER JOIN ( SELECT [GUID] ,
MAX(ID) MaxId
FROM #Transaction
GROUP BY [GUID]
) t ON t.[GUID] = #Transaction.[GUID]
AND t.MaxId = #Transaction.ID
ORDER BY ID

Try this way to get maximum SequenceNumber
CASE WHEN MAX(SequenceNumber IS NULL) = 0 THEN MAX(SequenceNumber) ELSE NULL END AS SequenceNumber

I don't know if SQL Server has windowing functions, so you may be able to do this more cleanly, but here's a vanilla SQL solution:
select highest.[GUID],
highest.[ID],
ts.Description "Status",
highest.Payee,
highest.Amount,
highest.SequenceNumber
from [Transaction] highest
join TransactionStatus ts
on ts.ID = highest.ID
left join [Transaction] higher
on higher.[GUID] = highest.[GUID]
and higher.SequenceNumber > highest.SequenceNumber
where higher.[GUID] is null;

omething like this:
SELECT * FROM
(
select
t.[GUID], t.[ID], ts.Description "Status", t.Payee, t.Amount,
ROW_NUMBER() OVER PARTITION BY (t.[GUID]
ORDER BY t.SequenceNumber DESC) AS rownum
from [Transaction] t
inner join TransactionStatus ts on t.StatusID = ts.ID
)vals where vals.rownum = 1

Selecting most recent record for each group with a sum function

This is a sample table
ID | Serial | Quantity | Date_Created
-------------------------------------
1 | AS1GD | 10 | 2014-12-25 8:00:00 AM
1 | GO9A4 | 5 | 2014-12-28 9:04:32 AM
2 | JF8WS | 15 | 2014-12-29 9:23:43 AM
2 | JFLE0 | 15 | 2015-01-04 10:53:12 AM
2 | S8A4A | 10 | 2015-01-05 9:12:46 AM
3 | FXOE3 | 20 | 2015-01-03 9:31:52 AM
3 | LSOR9 | 22 | 2015-01-06 12:00:44 PM
My expected result
ID | Serial | Total_Quantity | Last_DateCreated
-------------------------------------------------
1 | GO9A4 | 15 | 2014-12-28 9:04:32 AM
2 | S8A4A | 40 | 2015-01-05 9:12:46 AM
3 | LSOR9 | 42 | 2015-01-06 12:00:44 PM
Here's a query I tried but it's not returning the sum but only the quantity of the record
WITH total AS
( SELECT [ID], [date_created], [serial], sum(quantity) as qty,
ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM [table]
group by ID, date_created, serial
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1

Since you are grouping by more than the ID but want the SUM() at the ID level, you can add OVER() to your SUM():
;WITH total AS ( SELECT [ID]
, [date_created]
, [serial]
, SUM(SUM(quantity)) OVER(PARTITION BY [ID]) as qty
, ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM [table]
GROUP BY ID, date_created, serial
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1
The above creates an oddity in which you need two SUM() in order to use the OVER(), but you can ditch the GROUP BY altogether in your example:
;WITH total AS ( SELECT [ID]
, [date_created]
, [serial]
, SUM(quantity) OVER(PARTITION BY [ID]) as qty
, ROW_NUMBER() OVER (PARTITION BY [ID] ORDER BY [date_created] DESC) AS rownum
FROM Table1
)
SELECT ID, Serial, qty, date_created
FROM total
WHERE rownum = 1
Demo: SQL Fiddle

This will work as long as you don't have two records with the same ID created in the same second:
WITH RecentSUM AS
(
SELECT ID, MAX(DateCreated) DateCreated, SUM(Quantity) TotalQuantity
FROM [table]
GROUP BY ID
)
SELECT t.ID, t.Serial, r.TotalQuantity, r.DateCreated
FROM RecentSUM r
INNER JOIN [table] t ON t.ID = r.ID and t.DateCreated=r.DateCreated;

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL - Choose which duplicate value to select - sql

Not quite simple! Using window functions and common table expressions ; with x as ( select [DateTime], value, [User], row_num = row_number() over(partition by [DateTime] order by [User] desc) from Table1 ) select x.* from x where row_num = 1

;with cte as ( select , row_number() over (partition by date order by replace(user,'user','') desc) as rownum from #temp ) select from cte where rownum=1

Related

TSQL - GROUP BY on continous rows only

SQL return second max date for each id, date and channel

Get latest rows by date from aggregate

Selecting records with maximum value in group

Selecting most recent record for each group with a sum function

Categories

Resources

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL - Choose which duplicate value to select - sql

Not quite simple! Using window functions and common table expressions ; with x as ( select [DateTime], value, [User], row_num = row_number() over(partition by [DateTime] order by [User] desc) from Table1 ) select x.* from x where row_num = 1

;with cte as ( select *, row_number() over (partition by date order by replace(user,'user','') desc) as rownum from #temp ) select * from cte where rownum=1

Related

TSQL - GROUP BY on continous rows only

SQL return second max date for each id, date and channel

Get latest rows by date from aggregate

Selecting records with maximum value in group

Selecting most recent record for each group with a sum function

Categories

Resources

;with cte as ( select , row_number() over (partition by date order by replace(user,'user','') desc) as rownum from #temp ) select from cte where rownum=1