SQL: Remove duplicates - sql

How do I remove duplicates from a table that is set up in the following way?
unique_ID | worker_ID | date | type_ID
A worker can have multiple type_ID's associated with them and I want to remove any duplicate types. If there is a duplicate, I want to remove the type with the most recent entry.

A textbook candidate for the window function row_number():
;WITH x AS (
SELECT unique_ID
,row_number() OVER (PARTITION BY worker_ID,type_ID ORDER BY date) AS rn
FROM tbl
)
DELETE FROM tbl
FROM x
WHERE tbl.unique_ID = x.unique_ID
AND x.rn > 1
This also takes care of the situation where a set of dupes on (worker_ID,type_ID) shares the same date.
See the simplified demo on data.SE.
Update with simpler version
Turns out, this can be simplified: In SQL Server you can delete from the CTE directly:
;WITH x AS (
SELECT unique_ID
,row_number() OVER (PARTITION BY worker_ID,type_ID ORDER BY date) AS rn
FROM tbl
)
DELETE x
WHERE rn > 1

delete from table t
where exists ( select 1 from table t2
where t2.worker_id = t.worker_id
and t2.type_id = t.type_id
and t2.date < t.date )
HTH

DELETE FROM #t WHERE unique_Id IN
(
SELECT unique_Id FROM
(
SELECT unique_Id
,Type_Id
,ROW_NUMBER() OVER (PARTITION BY worker_Id, type_Id ORDER BY date) AS rn
FROM #t
) Q
WHERE rn > 1
)
And to test...
DECLARE #t TABLE
(
unique_ID INT IDENTITY,
worker_ID INT,
date DATETIME,
type_ID INT
)
INSERT INTO #t VALUES (1, DATEADD(DAY, 1, GETDATE()), 1)
INSERT INTO #t VALUES (1, GETDATE(), 1)
INSERT INTO #t VALUES (2, GETDATE(), 1)
INSERT INTO #t VALUES (1, DATEADD(DAY, 2, GETDATE()), 1)
INSERT INTO #t VALUES (1, DATEADD(DAY, 3, GETDATE()), 2)
SELECT * FROM #t
DELETE FROM #t WHERE unique_Id IN
(
SELECT unique_Id FROM
(
SELECT unique_Id
,Type_Id
,ROW_NUMBER() OVER (PARTITION BY worker_Id, type_Id ORDER BY date) AS rn
FROM #t
) Q
WHERE rn > 1
)
SELECT * FROM #t

you may use this query
delete from worker where unique_id in (
select max(unique_id) from worker group by worker_ID , type_ID having count(type_id)>1)
here i am assuming worker as your table name

Related

INSERT INTO #tmp from CTE query

I want to be able to insert the results of this CTE query into a temp table so I can sum the Total column.
CREATE TABLE #tmp
(
SerialNumber NVARCHAR(50),
StartDateTime NVARCHAR(50),
EndDateTime NVARCHAR(50),
Total NVARCHAR(50)
)
;WITH cte1 AS
(
SELECT
*,
CASE
WHEN Temperature > #maxthreshold AND LAG(Temperature) OVER (PARTITION BY SerialNumber ORDER BY CombineDateTime) IS NULL THEN 1
WHEN Temperature <= #maxthreshold AND LEAD(Temperature) OVER (PARTITION BY SerialNumber ORDER BY CombineDateTime) > #maxthreshold THEN 1
WHEN Temperature <= #maxthreshold AND LAG(Temperature) OVER (PARTITION BY SerialNumber ORDER BY CombineDateTime) > #maxthreshold THEN 1
END AS chg
FROM
[RawData]
WHERE
SerialNumber = #_Serial
AND CombineDateTime BETWEEN #_DateFrom AND #_DateTo
), cte2 AS
(
SELECT
*,
SUM(chg) OVER (PARTITION BY SerialNumber ORDER BY CombineDateTime) AS grp
FROM
cte1
)
INSERT INTO #tmp
SELECT
SerialNumber,
MIN(CombineDateTime) AS StartDateTime,
MAX(CombineDateTime) AS EndDateTime,
DATEDIFF(SECOND, MIN(CombineDateTime), MAX(CombineDateTime)) / 60.0 AS 'Total'
FROM
cte2
GROUP BY
SerialNumber, grp
HAVING
MAX(Temperature) > #maxthreshold)
SELECT *
FROM #tmp
DROP TABLE #tmp
I have tried putting the insert in various places, but it does not insert, I have tried the following answers. I am confused as to why the insert won't work as it is outside of the recursion.
Answer 1
Answer 2
TIA

How to get Min Date and Next to min date from the same table in SQL Server

I have a table
create table temp1
(
GroupId int,
DateCol Date,
ValueCol int,
)
insert into temp1 values(1,'2016-1-1',10);
insert into temp1 values(1,'2017-1-1',20);
insert into temp1 values(1,'2018-1-1',30);
insert into temp1 values(2,'2016-5-1',101);
insert into temp1 values(2,'2017-5-1',102);
insert into temp1 values(2,'2018-5-1',103);
I want to get a date range (min to next to min) grouped by GroupId column:
ValueCol DateCol DateColNextToMin
111 (min Date) (next to Min Date)
GroupId ValueCol DateCol DateColNextToMin
1 10 2016-1-1 2017-1-1
2 101 2016-5-1 2017-5-1
How can I do this?
I tried to get thorough windows function , pivot etc, but there seems to be no such elegant solution.
try this,
DECLARE #table Table
(
GroupId int,
DateCol Date,
ValueCol int
)
INSERT INTO #table
VALUES
(111, '01-Aug-2016', 0)
,(111, '02-Aug-2016', 0)
,(111, '03-Aug-2016', 0)
,(111, '04-Aug-2016', 0)
,(112, '05-Aug-2016', 0)
,(112, '06-Aug-2016', 0)
,(112, '07-Aug-2016', 0)
,(112, '08-Aug-2016', 0)
;WITH CTE AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY GroupId ORDER BY DateCol ASC) AS RowNo
FROM #table
)
SELECT c1.GroupId, c1.ValueCol, c1.DateCol, c2.DateCol
FROM CTE c1
INNER JOIN CTE c2 ON c1.GroupId = c2.GroupId
AND c1.RowNo = 1
AND c2.RowNo = 2

Selection One Entry Only As Non Zero In SQl Select

I have a scenario where I have to select multiple rows from table, I have multiple rows of one record but with different status,
at times I have two identical rows with identical data for status < for that case I canted to select Non zero for the first occurrence and set 0 for the remaining occurrences.
Below is the Image to show and I have marked strike-out and marked 0 for the remaining occurrence.
And body could suggest better SQL Query:
Here is the Query: I am getting zero value for status 1 for ID =1 but I need to show first as regular and then 0 if that status repeats again.
CREATE TABLE #Temp
(ID INT,
ItemName varchar(10),
Price Money,
[Status] INT,
[Date] Datetime)
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,2,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-28')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-26')
INSERT INTO #Temp VALUES(2,'DEF',25,3,'2014-08-27')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-28')
INSERT INTO #Temp VALUES(3,'GHI',30,1,'2014-08-27')
SELECT CASE WHEN Temp.Status = 1 THEN
0
ELSE
Temp.Price END AS Price,
* FROM (SELECT * FROM #Temp) Temp
DROP TABLE #Temp
Here is the result:
You might modify your inner select using Row_Number() and set price to Zero for RowNumber > 1.
SELECT CASE WHEN Temp.RowNumber > 1 THEN
0
ELSE
Temp.Price END AS Price,
* FROM (
SELECT *,Row_Number() over (PARTITION by ID,Status ORDER BY ID,Date) AS 'RowNumber'
FROM #Temp
) Temp
Order by ID,Date
You can try this:
;WITH DataSource AS
(
SELECT RANK() OVER (PARTITION BY [ID], [ItemName], [Price], [Status] ORDER BY Date) AS [RankID]
,*
FROM #Temp
)
SELECT [ID]
,[ItemName]
,IIF([RankID] = 1, [Price], 0)
,[Status]
,[Date]
FROM DataSource
ORDER BY [ID]
,[Date]
Here is the output:
please try this below code . it is working for me.
CREATE TABLE #Temp
(ID INT,
ItemName varchar(10),
Price Money,
[Status] INT,
[Date] Datetime)
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,2,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-28')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-26')
INSERT INTO #Temp VALUES(2,'DEF',25,3,'2014-08-27')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-28')
INSERT INTO #Temp VALUES(3,'GHI',30,1,'2014-08-27')
select *,case when a.rn=1 and status!=2 then price else 0 end as price from
(select *,ROW_NUMBER() over(partition by status,date order by date asc) rn from #Temp) a
order by ItemName asc
You can do this with UNION:
SELECT * FROM #Temp t
WHERE NOT EXISTS
(SELECT * FROM #Temp
WHERE t.id = id and t.status = status and t.date < date)
UNION ALL
SELECT ID, ItemName, 0 as Price, status, date
WHERE EXISTS
(SELECT * FROM #Temp
WHERE t.id = id and t.status = status and t.date < date)
Or subquery:
SELECT CASE
WHEN (SELECT COUNT(*)
FROM #Temp
WHERE t.id = id and t.status = status
and t.date > date) > 1 THEN 0 ELSE price END as NewPrice, t.*
FROM #Temp t
Or possibly RANK() function:
SELECT CASE
WHEN RANK() OVER (PARTITION BY id, status ORDER BY date) > 1
THEN 0 ELSE Price END,
t.*
FROM #Temp t

Query to merge continuous temporal records

I have a table like this:
id START_DATE end_date
1 01/01/2011 01/10/2011
2 01/11/2011 01/20/2011
3 01/25/2011 02/01/2011
4 02/10/2011 02/15/2011
5 02/16/2011 02/27/2011
I want to merge the records where the start_date is just next day of end_date of another record: So the end record should be something like this:
new_id START_DATE end_date
1 01/01/2011 01/20/2011
2 01/25/2011 02/01/2011
3 02/10/2011 02/27/2011
One way that I know to do this will be to create a row based temp table with various rows as dates (each record for one date, between the total range of days) and thus making the table flat.
But there has to be a cleaner way to do this in a single query... e.g. something using row_num?
Thanks guys.
declare #T table
(
id int,
start_date datetime,
end_date datetime
)
insert into #T values
(1, '01/01/2011', '01/10/2011'),
(2, '01/11/2011', '01/20/2011'),
(3, '01/25/2011', '02/01/2011'),
(4, '02/10/2011', '02/15/2011'),
(5, '02/16/2011', '02/27/2011')
select row_number() over(order by min(dt)) as new_id,
min(dt) as start_date,
max(dt) as end_date
from (
select dateadd(day, N.Number, start_date) as dt,
dateadd(day, N.Number - row_number() over(order by dateadd(day, N.Number, start_date)), start_date) as grp
from #T
inner join master..spt_values as N
on N.number between 0 and datediff(day, start_date, end_date) and
N.type = 'P'
) as T
group by grp
order by new_id
You can use a numbers table instead of using master..spt_values.
Try This
Declare #chgRecs Table
(updId int primary key not null,
delId int not null,
endt datetime not null)
While Exists (Select * from Table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
Begin
Insert #chgRecs (updId, delId , endt)
Select a.id, b.id, b.End_Date,
From table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
And Not Exists
(Select * from table
Where end_Date =
DateAdd(day, -1, a.Start_Date)))
Delete table Where id In (Select delId from #chgRecs )
Update table set
End_Date = u.endt
From table t join #chgRecs u
On u.updId = t.Id
Delete #delRecs
End
No, was not looking for a loop...
I guess this is a good solution:
taking all the data in a #temp table
SELECT * FROM #temp
SELECT t2.start_date , t1.end_date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date)
UNION
SELECT START_DATE,end_date FROM #temp WHERE start_date NOT IN (SELECT t2.START_DATE FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
AND end_date NOT IN (SELECT t1.end_Date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
DROP TABLE #temp
Please let me know if there is anything better than this.
Thanks guys.
A recursive solution:
CREATE TABLE TestData
(
Id INT PRIMARY KEY,
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL
);
SET DATEFORMAT MDY;
INSERT TestData
SELECT 1, '01/01/2011', '01/10/2011'
UNION ALL
SELECT 2, '01/11/2011', '01/20/2011'
UNION ALL
SELECT 3, '01/25/2011', '02/01/2011'
UNION ALL
SELECT 4, '02/10/2011', '02/15/2011'
UNION ALL
SELECT 5, '02/16/2011', '02/27/2011'
UNION ALL
SELECT 6, '02/28/2011', '03/06/2011'
UNION ALL
SELECT 7, '02/28/2011', '03/03/2011'
UNION ALL
SELECT 8, '03/10/2011', '03/18/2011'
UNION ALL
SELECT 9, '03/19/2011', '03/25/2011';
WITH RecursiveCTE
AS
(
SELECT t.Id, t.StartDate, t.EndDate
,1 AS GroupID
FROM TestData t
WHERE t.Id=1
UNION ALL
SELECT crt.Id, crt.StartDate, crt.EndDate
,CASE WHEN DATEDIFF(DAY,prev.EndDate,crt.StartDate)=1 THEN prev.GroupID ELSE prev.GroupID+1 END
FROM TestData crt
JOIN RecursiveCTE prev ON crt.Id-1=prev.Id
--WHERE crt.Id > 1
)
SELECT cte.GroupID, MIN(cte.StartDate) AS StartDate, MAX(cte.EndDate) AS EndDate
FROM RecursiveCTE cte
GROUP BY cte.GroupID
ORDER BY cte.GroupID;
DROP TABLE TestData;

SQL Query for Grouping the results based on sequence

I have a table like this:
ID Seq Amt
1 1 500
1 2 500
1 3 500
1 5 500
2 10 600
2 11 600
3 1 700
3 3 700
I want to group the continuous sequence numbers into a single row like this:
ID Start End TotalAmt
1 1 3 1500
1 5 5 500
2 10 11 1200
3 1 1 700
3 3 3 700
Please help to achieve this result.
WITH numbered AS (
SELECT
ID, Seq, Amt,
SeqGroup = ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Seq) - Seq
FROM atable
)
SELECT
ID,
Start = MIN(Seq),
[End] = MAX(Seq),
TotalAmt = SUM(Amt)
FROM numbered
GROUP BY ID, SeqGroup
ORDER BY ID, Start
;
Well, there's perhaps a more elegant way to do this (something hints at me that there is), but here's an approach that will work if you're using a version of SQL Server that accepts common table expressions:
use Tempdb
go
create table [Test]
(
[id] int not null,
[Seq] int not null,
[Amt] int not null
)
insert into [Test] values
(1, 1, 500),
(1, 2, 500),
(1, 3, 500),
(1, 5, 500),
(2, 10, 600),
(2, 11, 600),
(3, 1, 700),
(3, 3, 700)
;with
lower_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq - 1
)
),
upper_bound as (
select *
from Test
where not exists (
select *
from Test as t1
where t1.id = Test.id and t1.Seq = Test.Seq + 1
)
),
bounds as (
select id, (select MAX(seq) from lower_bound where lower_bound.id = upper_bound.id and lower_bound.Seq <= upper_bound.Seq) as LBound, Seq as Ubound
from upper_bound
)
select Test.id, LBound As [Start], UBound As [End], SUM(Amt) As TotalAmt
from Test
join bounds
on Test.id = bounds.id
and Test.Seq between bounds.LBound and bounds.Ubound
group by Test.id, LBound, UBound
drop table [Test]
This seems to work nicely. #breakingRows will contain all rows that break the sequence of id and seq (i.e. if id changes or if seq is not 1 more than the previous seq). With that table you can select all rows of such a sequence within #temp. I must add however that performance will probably be not all that good because of all the subqueries but you'll need to test to be sure.
declare #temp table (id int, seq int, amt int)
insert into #temp select 1, 1, 500
insert into #temp select 1, 2, 500
insert into #temp select 1, 3, 500
insert into #temp select 1, 5, 500
insert into #temp select 2, 10, 600
insert into #temp select 2, 11, 600
insert into #temp select 3, 1, 700
insert into #temp select 3, 3, 700
declare #breakingRows table (ctr int identity(1,1), id int, seq int)
insert into #breakingRows(id, seq)
select id, seq
from #temp t1
where not exists
(select 1 from #temp t2 where t1.id = t2.id and t1.seq - 1 = t2.seq)
order by id, seq
select br.id, br.seq as start,
isnull ((select top 1 seq from #temp t2
where id < (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) or
(id = (select id from #breakingRows br2 where br.ctr = br2.ctr - 1) and
seq < (select seq from #breakingRows br2 where br.ctr = br2.ctr - 1))
order by id desc, seq desc),
br.seq)
as [end],
(select SUM(amt) from #temp t1 where t1.id = br.id and
t1.seq <
isnull((select seq from #breakingRows br2 where br.ctr = br2.ctr - 1 and br.id = br2.id),
(select max(seq) + 1 from #temp)) and
t1.seq >= br.seq)
from #breakingRows br
order by id, seq
Since Andriy has already posted the gold solution, here's my take using an UPDATE statement to get the result from a temp table, just for fun.
declare #tmp table (
id int, seq int, amt money, start int, this int, total money,
primary key clustered(id, seq))
;
insert #tmp
select *, start=seq, this=seq, total=convert(money,amt)
from btable
;
declare #id int, #seq int, #start int, #amt money
update #tmp
set
#amt = total = case when id = #id and seq = #seq+1 then #amt+total else amt end,
#start = start = case when id = #id and seq = #seq+1 then #start else seq end,
#seq = this = seq,
#id = id = id
from #tmp
option (maxdop 1)
;
select id, start, max(this) [end], max(total) total
from #tmp
group by id, start
order by id, start
Notes:
btable: your table name
id int, seq int, amt money: expected columns in your table
Try following query.
select id, min(seq), max(seq), sum(amt) from table group by id
OOps, sorry, it is wrong query as you need sequence
SELECT Id, MIN(Seq) as Start, MAX(Seq) as End, SUM(Amount) as Total
FROM (
SELECT t.*, Seq - ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Seq) Rn
FROM [Table] t
) as T
GROUP BY Id, Rn
ORDER BY Id, MIN(Seq)