DELETE EXCEPT TOP 1 - sql

Is there any way to delete all the rows in a table except one (random) row, without specifying any column names in the DELETE statement?
I'm trying to do something like this:
CREATE TABLE [dbo].[DeleteExceptTop1]([Id] INT)
INSERT [dbo].[DeleteExceptTop1] SELECT 1
INSERT [dbo].[DeleteExceptTop1] SELECT 2
INSERT [dbo].[DeleteExceptTop1] SELECT 3
SELECT * FROM [dbo].[DeleteExceptTop1]
DELETE
FROM [dbo].[DeleteExceptTop1]
EXCEPT
SELECT TOP 1 * FROM [dbo].[DeleteExceptTop1]
SELECT * FROM [dbo].[DeleteExceptTop1]
The final SELECT should yield one row (could be any of the three).

;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT newid())) AS RN
FROM [dbo].[DeleteExceptTop1]
)
DELETE FROM CTE
WHERE RN > 1
Or similar to #abatishchev's answer but with more variety in the ordering and avoiding deprecated constructs.
DECLARE #C INT
SELECT #C = COUNT(*) - 1
FROM [dbo].[DeleteExceptTop1]
IF #c > 0
BEGIN
WITH CTE AS
(
SELECT TOP(#C) *
FROM [dbo].[DeleteExceptTop1]
ORDER BY NEWID()
)
DELETE FROM CTE;
END
Or a final way that uses EXCEPT and assumes no duplicate rows and that all columns are of datatypes compatible with the EXCEPT operator
/*Materialise TOP 1 to ensure only evaluated once*/
SELECT TOP(1) *
INTO #T
FROM [dbo].[DeleteExceptTop1]
ORDER BY NEWID()
;WITH CTE AS
(
SELECT *
FROM [dbo].[DeleteExceptTop1] T1
WHERE EXISTS(
SELECT *
FROM #T
EXCEPT
SELECT T1.*)
)
DELETE FROM CTE;
DROP TABLE #T

Try:
declare #c int
select #c = count(*) - 1 from [dbo].[DeleteExceptTop1]
IF #c > 0
BEGIN
set RowCount #c
delete from [dbo].[DeleteExceptTop1]
END

No.
You need to use a column name (such as that of the primary key) to identify which rows you want to remove.
"random row" has no meaning in SQL except its data. If you want to delete everything except some row, you must differentiate that row from the others you with to DELETE
EXCEPT works by comparing the DISTINCT values in the row.
EDIT: If you can specify the primary key then this is a trivial matter. You can simply DELETE where the PK <> your "random" selection or NOT IN your "random" selection(s).
EDIT: Apparently I'm wrong about the need to specify any column name, you can do it using the assigned ROW_NUMBER.. But I'm not going to delete my answer because it references your use of EXCEPT which was discussed in the comments. You cannot do it without deriving some column name like that from ROW_NUMBER

You could do something like this (SQL 2008)
DECLARE #Original TABLE ([Id] INT)
INSERT INTO #Original(ID) VALUES(1)
INSERT INTO #Original(ID) VALUES(2)
INSERT INTO #Original(ID) VALUES(3)
SELECT * FROM #Original;
WITH CTE AS
(SELECT ROW_NUMBER() OVER(ORDER BY ID) AS ROW, ID FROM #Original)
DELETE #Original
FROM #Original O
INNER JOIN CTE ON O.ID = CTE.ROW
WHERE ROW > 1
SELECT * FROM #Original

It seems like the simplest answer may be the best. The following should work:
Declare #count int
Set #count=(Select count(*) from DeleteExceptTop1)-1
Delete top (#count) from DeleteExceptTop1

I know it has been answered but what about?
DELETE
FROM [dbo].[DeleteExceptTop1]
Where Id not in (
SELECT TOP 1 * FROM [dbo].[DeleteExceptTop1])

Related

Loop through sql result set and remove [n] duplicates

I've got a SQL Server db with quite a few dupes in it. Removing the dupes manually is just not going to be fun, so I was wondering if there is any sort of sql programming or scripting I can do to automate it.
Below is my query that returns the ID and the Code of the duplicates.
select a.ID, a.Code
from Table1 a
inner join (
SELECT Code
FROM Table1 GROUP BY Code HAVING COUNT(Code)>1)
x on x.Code= a.Code
I'll get a return like this, for example:
5163 51727
5164 51727
5165 51727
5166 51728
5167 51728
5168 51728
This snippet shows three returns for each ID/Code (so a primary "good" record and two dupes). However this isnt always the case. There can be up to [n] dupes, although 2-3 seems to be the norm.
I just want to somehow loop through this result set and delete everything but one record. THE RECORDS TO DELETE ARE ARBITRARY, as any of them can be "kept".
You can use row_number to drive your delete.
ie
CREATE TABLE #table1
(id INT,
code int
);
WITH cte AS
(select a.ID, a.Code, ROW_NUMBER() OVER(PARTITION by COdE ORDER BY ID) AS rn
from #Table1 a
)
DELETE x
FROM #table1 x
JOIN cte ON x.id = cte.id
WHERE cte.rn > 1
But...
If you are going to be doing a lot of deletes from a very large table you might be better off to select out the rows you need into a temp table & then truncate your table and re-insert the rows you need.
Keeps the Transaction log from getting hammered, your CI getting Fragged and should be quicker too!
It is actually very simple:
DELETE FROM Table1
WHERE ID NOT IN
(SELECT MAX(ID)
FROM Table1
GROUP BY CODE)
Self join solution with a performance test VS cte.
create table codes(
id int IDENTITY(1,1) NOT NULL,
code int null,
CONSTRAINT [PK_codes_id] PRIMARY KEY CLUSTERED
(
id ASC
))
declare #counter int, #code int
set #counter = 1
set #code = 1
while (#counter <= 1000000)
begin
print ABS(Checksum(NewID()) % 1000)
insert into codes(code) select ABS(Checksum(NewID()) % 1000)
set #counter = #counter + 1
end
GO
set statistics time on;
delete a
from codes a left join(
select MIN(id) as id from codes
group by code) b
on a.id = b.id
where b.id is null
set statistics time off;
--set statistics time on;
-- WITH cte AS
-- (select a.id, a.code, ROW_NUMBER() OVER(PARTITION by code ORDER BY id) AS rn
-- from codes a
-- )
-- delete x
-- FROM codes x
-- JOIN cte ON x.id = cte.id
-- WHERE cte.rn > 1
--set statistics time off;
Performance test results:
With Join:
SQL Server Execution Times:
CPU time = 3198 ms, elapsed time = 3200 ms.
(999000 row(s) affected)
With CTE:
SQL Server Execution Times:
CPU time = 4197 ms, elapsed time = 4229 ms.
(999000 row(s) affected)
It's basically done like this:
WITH CTE_Dup AS
(
SELECT*,
ROW_NUMBER()OVER (PARTITIONBY SalesOrderno, ItemNo ORDER BY SalesOrderno, ItemNo)
AS ROW_NO
from dbo.SalesOrderDetails
)
DELETEFROM CTE_Dup WHERE ROW_NO > 1;
NOTICE: MUST INCLUDE ALL FIELDS!!
Here is another example:
CREATE TABLE #Table (C1 INT,C2 VARCHAR(10))
INSERT INTO #Table VALUES (1,'SQL Server')
INSERT INTO #Table VALUES (1,'SQL Server')
INSERT INTO #Table VALUES (2,'Oracle')
SELECT * FROM #Table
;WITH Delete_Duplicate_Row_cte
AS (SELECT ROW_NUMBER()OVER(PARTITION BY C1, C2 ORDER BY C1,C2) ROW_NUM,*
FROM #Table )
DELETE FROM Delete_Duplicate_Row_cte WHERE ROW_NUM > 1
SELECT * FROM #Table

SQL: how to get random number of rows from one table for each row in another

I have two tables where the data is not related
For each row in table A i want e.g. 3 random rows in table B
This is fairly easy using a cursor, but it is awfully slow
So how can i express this in single statement to avoid RBAR ?
To get a random number between 0 and (N-1), you can use.
abs(checksum(newid())) % N
Which means to get positive values 1-N, you use
1 + abs(checksum(newid())) % N
Note: RAND() doesn't work - it is evaluated once per query batch and you get stuck with the same value for all rows of tableA.
The query:
SELECT *
FROM tableA A
JOIN (select *, rn=row_number() over (order by newid())
from tableB) B ON B.rn <= 1 + abs(checksum(newid())) % 9
(assuming you wanted up to 9 random rows of B per A)
assuming tableB has integer surrogate key, try
Declare #maxRecs integer = 11 -- Maximum number of b records per a record
Select a.*, b.*
From tableA a Join tableB b
On b.PKColumn % (floor(Rand() * #maxRecs)) = 0
If you have a fixed number that you know in advance (such as 3), then:
select a.*, b.*
from a cross join
(select top 3 * from b) b
If you want a random number of rows from "b" for each row in "a", the problem is a bit harder in SQL Server.
Heres an example of how this could be done, code is self contained, copy and press F5 ;)
-- create two tables we can join
DECLARE #datatable TABLE(ID INT)
DECLARE #randomtable TABLE(ID INT)
-- add some dummy data
DECLARE #i INT = 1
WHILE(#i < 3) BEGIN
INSERT INTO #datatable (ID) VALUES (#i)
SET #i = #i + 1
END
SET #i = 1
WHILE(#i < 100) BEGIN
INSERT INTO #randomtable (ID) VALUES (#i)
SET #i = #i + 1
END
--The key here being the ORDER BY newid() which makes sure that
--the TOP 3 is different every time
SELECT
d.ID AS DataID
,rtable.ID RandomRow
FROM #datatable d
LEFT JOIN (SELECT TOP 3 * FROM #randomtable ORDER BY newid()) as rtable ON 1 = 1
Heres an example of the output

T-Sql count string sequences over multiple rows

How can I find subsets of data over multiple rows in sql?
I want to count the number of occurrences of a string (or number) before another string is found and then count the number of times this string occurs before another one is found.
All these strings can be in random order.
This is what I want to achieve:
I have one table with one column (columnx) with data like this:
A
A
B
C
A
B
B
The result I want from the query should be like this:
2 A
1 B
1 C
1 A
2 B
Is this even possible in sql or would it be easier just to write a little C# app to do this?
Since, as per your comment, you can add a column that will unambiguously define the order in which the columnx values go, you can try the following query (provided the SQL product you are using supports CTEs and ranking functions):
WITH marked AS (
SELECT
columnx,
sortcolumn,
grp = ROW_NUMBER() OVER ( ORDER BY sortcolumn)
- ROW_NUMBER() OVER (PARTITION BY columnx ORDER BY sortcolumn)
FROM data
)
SELECT
columnx,
COUNT(*)
FROM marked
GROUP BY
columnx,
grp
ORDER BY
MIN(sortcolumn)
;
You can see the method in work on SQL Fiddle.
If sortcolumn is an auto-increment integer column that is guaranteed to have no gaps, you can replace the first ROW_NUMBER() expression with just sortcolumn. But, I guess, that cannot be guaranteed in general. Besides, you might indeed want to sort on a timestamp instead of an integer.
I dont think you can do it with a single select.
You can use AdventureWorks cursor:
create table my_Strings
(
my_string varchar(50)
)
insert into my_strings values('A'),('A'),('B'),('C'),('A'),('B'),('B') -- this method will only work on SQL Server 2008
--select my_String from my_strings
declare #temp_result table(
string varchar(50),
nr int)
declare #myString varchar(50)
declare #myLastString varchar(50)
declare #nr int
set #myLastString='A' --set this with the value of your FIRST string on the table
set #nr=0
DECLARE string_cursor CURSOR
FOR
SELECT my_string as aux_column FROM my_strings
OPEN string_cursor
FETCH NEXT FROM string_cursor into #myString
WHILE ##FETCH_STATUS = 0 BEGIN
if (#myString = #myLastString) begin
set #nr=#nr+1
set #myLastString=#myString
end else begin
insert into #temp_result values (#myLastString, #nr)
set #myLastString=#myString
set #nr=1
end
FETCH NEXT FROM string_cursor into #myString
END
insert into #temp_result values (#myLastString, #nr)
CLOSE string_cursor;
DEALLOCATE string_cursor;
select * from #temp_result
Result:
A 2
B 1
C 1
A 1
B 2
Try this :
;with sample as (
select 'A' as columnx
union all
select 'A'
union all
select 'B'
union all
select 'C'
union all
select 'A'
union all
select 'B'
union all
select 'B'
), data
as (
select columnx,
Row_Number() over(order by (select 0)) id
from sample
) , CTE as (
select * ,
Row_Number() over(order by (select 0)) rno from data
) , result as (
SELECT d.*
, ( SELECT MAX(ID)
FROM CTE c
WHERE NOT EXISTS (SELECT * FROM CTE
WHERE rno = c.rno-1 and columnx = c.columnx)
AND c.ID <= d.ID) AS g
FROM data d
)
SELECT columnx,
COUNT(1) cnt
FROM result
GROUP BY columnx,
g
Result :
columnx cnt
A 2
B 1
C 1
A 1
B 2

Get N th row value in sql server

DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
set #ActionNumber=(select top 1 * from dbo.ANOSplit(#ActionNumber,'-')
order by ROW_NUMBER() OVER (ORDER BY items))
select #ActionNumber
from above query i need to return the 2ND and 3RD index from initial #ActionNumber
'EHPL-DES-SQ-1021' after Split().
format of the ActionNumber is exactly as above but DES, SQ and 1021 can change.
so i can not use ORDER BY items ASC or ORDER BY items DESC because it will order alphabetically.
above query returns 'EHPL'.how can i get DES and SQ.
You can do it with the ANOSplit function, but I would insert the result into a temp table or table variable.
As you said yourself, you can't just ORDER BY the values returned by the ANOSplit function because it will order alphabetically.
--> So you can use a temp table with an IDENTITY column, and use this for sorting:
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
declare #tmp table
(
id int identity(1,1),
item varchar(20)
)
insert into #tmp (item)
select * from dbo.ANOSplit(#ActionNumber,'-')
select * from #tmp where id in (2,3)
The items will be inserted into the table in the exact order returned by the function, so after inserting you know that the lines with id 2 and 3 are the ones you want.
Try to use Substring with CharIndex >>>
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
select SUBSTRING (#ActionNumber,CHARINDEX ('-',#ActionNumber,0) + 1, 3)
This isn't tested, but I think it will work:
DECLARE #ActionNumber varchar(20)='EHPL-DES-SQ-1021'
WITH nCTE AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY items) AS RNum
FROM dbo.ANOSplit(#ActionNumber,'-')
)
SELECT * FROM nCTE WHERE RNum = 2 --put n here

How can I use if statement after a CTE (SQL Server 2005)

Last night I was writing a simple T-SQL program something like this
DECLARE #ROLEID AS INT
SELECT #ROLEID = [ROLE ID] FROM TBLROLE
;WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
IF (#ROLEID = 1)
BEGIN
//SOMECODE
END
ELSE IF(#ROLEID = 2)
BEGIN
//SOMECODE
END
ELSE
BEGIN
//SOMECODE
END
I found after compilation that it is throwing error something like "Incorrect statement near if"
What is wrong?
However, I did that by using some other way. But I wanted to know why it did not work!
Common table expressions are defined within the context of a single statement:
WITH cte_name AS (
<cte definition>)
<statement that uses cte>;
So you can do something like:
WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
SELECT * FROM CTE;
or
WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
UPDATE CTE
SET somefield = somevalue
WHERE id = somekey;
A CTE must be followed by a single
SELECT, INSERT, UPDATE, MERGE, or
DELETE statement that references some
or all the CTE columns. A CTE can also
be specified in a CREATE VIEW
statement as part of the defining
SELECT statement of the view
A little late but I can't be the only one bumping into this.
A solution could be to create a temporary table like this:
-- If previous run of this query fails, the temp table will be deleted.
-- Selecting into creates the temp table which fails if it already exists
IF EXISTS(SELECT [name] FROM tempdb.sys.tables WHERE [name] like '#dtBalansOpgesteldGefilterd%') BEGIN
DROP TABLE #temp
END;
;WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
-- Followed by select statement as required
SELECT *
INTO #temp
FROM CTE
IF #awsome = 1
BEGIN
SELECT 'WHATEVERYOUWANT' AS WhateverColumnNameYouWant, *
FROM #temp
END
The closest you'll get is using a UNION ALL to do a crude switched select:
DECLARE #ROLEID AS INT
SELECT #ROLEID = [ROLE ID] FROM TBLROLE
;WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
SELECT
--somecolumns
FROM
CTE
--other stuff too
WHERE
#ROLEID = 1
UNION ALL
SELECT
--somecolumns
FROM
CTE
--other stuff too
WHERE
#ROLEID = 2
UNION ALL
SELECT
--somecolumns
FROM
CTE
--other stuff too
WHERE
#ROLEID = 3
...
UNION ALL
SELECT
--somecolumns
FROM
CTE
--other stuff too
WHERE
#ROLEID = n
Try putting the CTE in the IF. It worked for me.
IF #awsome = 1
BEGIN
;WITH CTE
AS
(
SELECT * FROM SOMETABLE
)
SELECT 'WHATEVERYOUWANT' FROM CTE
END
ELSE IF #awesome = 2
BEGIN
;WITH CTE2
AS
(
SELECT * FROM SOMETABLE
)
SELECT 'WHATEVERYOUWANT' FROM CTE2
END