SQL Server - Select if one of all columns are unique - sql

I want to select if one row where multiple columns are the same. For example:
col1 col2 col3 col4
a b 1 2
b b 1 2
a c 1 2
b b 1 3
a c 2 1
Condition: Select only if values of columns (col1, col2, col3) are different from other rows and value of col4 is max of rows which are the same.
For example expected Output is:
a b 1 2
b b 1 3
a c 1 2
a c 2 1

Yes possible, just use group by with max aggregation as
with tab(col1,col2,col3,col4) as
(
select 'a','b',1,2 union all
select 'b','b',1,2 union all
select 'a','c',1,2 union all
select 'b','b',1,3 union all
select 'a','c',2,1
)
select col1, col2, col3, max(col4) as col4
from tab
group by col1, col2, col3;
col1 col2 col3 col4
a b 1 2
a c 1 2
a c 2 1
b b 1 3
Rextester Demo

Mandatory NOT EXISTS solution... your condition written as a not exist query:
DECLARE #t TABLE (col1 varchar(100), col2 varchar(100), col3 int, col4 int);
INSERT INTO #t VALUES
('a', 'b', 1, 2),
('a', 'c', 1, 2),
('a', 'c', 2, 1),
('b', 'b', 1, 2),
('b', 'b', 1, 3);
SELECT *
FROM #t AS t
WHERE NOT EXISTS (
SELECT 1
FROM #t AS dup
WHERE dup.col1 = t.col1
AND dup.col2 = t.col2
AND dup.col3 = t.col3
AND dup.col4 > t.col4 -- outer row has smaller col4
)
Demo on DB Fiddle

Related

Finding partial and exact duplicate from a SQL table

I am trying to read duplicates from a table. There are some partial duplicates based on values of Col1 and Col2 and there are some full duplicates based on Col1, Col2 and Col3 as in below table.
Col1 Col2 Col3
1 John 100
1 John 200
2 Tom 150
3 Bob 100
3 Bob 100
4 Sam 500
I want to capture partial and exact duplicates in two separate outputs and ignore the non-repeated rows like 2 and 4 e.g.
Partial Duplicates
Col1 Col2 Col3
1 John 100
1 John 200
Full Duplicate
Col1 Col2 Col3
3 Bob 100
3 Bob 100
What is the best way to achieve this with SQL?
I tried using the self join with spark-sql but getting error: -
val source_df = sql("select col1, col2, col3 from sample_table")
source_df.as("df1").join(inter_df.as("df2"), $"df1.Col3" === $"df2.Col3" and $"df1.Col2" === $"df2.Col2" and $"df1.Col1" === $"df2.Col1").select($"df1.Col1",$"df1.Col2",$"df1.Col3",$"df2.Col3").show()
Error
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(Col3#1957, 200)
For partial duplicates:
SELECT *
FROM tbl
WHERE EXISTS (
SELECT *
FROM tbl t2
WHERE tbl.col1 = t2.col1 AND tbl.col2 = t2.col2 AND tbl.col3 <> t2.col3
)
Returns:
col1 col2 col3
1 John 100
1 John 200
For full duplicates, add a unique identifier per combination of col1, col2, and col3, and look for cases where there's another record with the same col1, col2, and col3, but a different unique identifier:
;WITH cte AS (
SELECT ROW_NUMBER() OVER (PARTITION BY col1, col2, col3 ORDER BY col1, col2, col3) AS uniqueid, col1, col2, col3
FROM tbl
)
SELECT col1, col2, col3
FROM cte
WHERE EXISTS (
SELECT *
FROM cte t2
WHERE cte.col1 = t2.col1 AND cte.col2 = t2.col2 AND cte.col3 = t2.col3 AND cte.uniqueid <> t2.uniqueid
)
Returns:
col1 col2 col3
3 Bob 100
3 Bob 100
http://sqlfiddle.com/#!18/f1d78/2
CREATE TABLE tbl (col1 INT, col2 VARCHAR(5), col3 INT)
INSERT INTO tbl VALUES
(1, 'John', 100),
(1, 'John', 200),
(2, 'Tom', 150),
(3, 'Bob', 100),
(3, 'Bob', 100),
(4, 'Sam', 500)
partial duplicates - use exists. Here is the demo.
select
*
from myTable m1
where exists (
select
*
from myTable m2
where m1.Col1 = m2.Col1
and m1.Col2 = m2.Col2
and m1.Col3 <> m2.Col3
)
output:
----------------------
Col1 Col2 Col3
----------------------
1 John 100
1 John 200
----------------------
full duplicates - you can use count(*) as window function. Here is the demo.
with cte as
(
select
Col1,
Col2,
Col3,
count(*) over (partition by Col1, Col2, Col3) as rn
from myTable
)
select
Col1,
Col2,
Col3
from cte
where rn > 1
output:
----------------------
Col1 Col2 Col3
----------------------
3 Bob 100
3 Bob 100
----------------------

Get Top N row from each set from table with 4 column in SQL Server

Assume I have a table with 4 columns:
Col1 Col2 Col3 Col4
My initial query is :
SELECT Col1, Col2, Col3, Col4
FROM myTable
ORDER BY Col1, Col2, Col3 DESC, Col4
My desired result is all 4 columns, but with this condition that Top N Col3 different row when Col1, Col2 is equal.
Example with N=2 :
Table sample data:
Col1 Col2 Col3 Col4
---------------------
1 a 2000 s
1 a 2002 c
1 a 2001 b
2 b 1998 s
2 b 2002 c
2 b 2000 b
3 c 2000 b
1 f 1998 n
1 g 1999 e
Desired result:
1 a 2002 c
1 a 2001 b
1 f 1998 n
1 g 1999 e
2 b 2002 c
2 b 2000 b
3 c 2000 b
In another description, when (col1, col2) is repeated in multiple records, just export top N rows of those records when order by Col3 descending.
Can I do this with SQL script, without hard coding?
declare #t table (Col1 int, Col2 char, Col3 int, Col4 char);
insert into #t values
(1, 'a', 2000, 's'),
(1, 'a', 2002, 'c'),
(1, 'a', 2001, 'b'),
(2, 'b', 1998, 's'),
(2, 'b', 2002, 'c'),
(2, 'b', 2000, 'b'),
(3, 'c', 2000, 'b'),
(1, 'f', 1998, 'n'),
(1, 'g', 1999, 'e');
declare #N int = 2; -- number per "top"
with cte as
(
select *,
row_number() over(partition by col1, col2 order by col3 desc) as rn
from #t
)
select *
from cte c
where rn <= #N;
I think below code was as expected
declare #tab table (Col1 int, Col2 char(1), Col3 int, Col4 char(1))
declare #N int
insert into #tab
select 1, 'a' , 2000, 's'
union all
select 1 , 'a' , 2002 , 'c'
union all
select 1 , 'a' , 2001 , 'b'
union all
select 2 , 'b' , 1998 , 's'
union all
select 2 , 'b' , 2002 ,'c'
union all
select 2 , 'b' , 2000 ,'b'
union all
select 3 , 'c' , 2000 ,'b'
union all
select 1 , 'f' , 1998 ,'n'
union all
select 1 , 'g' , 1999 ,'e'
;with tab as
(
select ROW_NUMBER() over(partition by t.col1,t.col2 order by t.col3 desc) as row,t.*
from #tab t
)
select Col1,Col2,Col3,Col4
from tab
where row < 3
output
Col1 Col2 Col3 Col4
1 a 2002 c
1 a 2001 b
1 f 1998 n
1 g 1999 e
2 b 2002 c
2 b 2000 b
3 c 2000 b
METHOD 1- FOR MSSQL
http://sqlfiddle.com/#!6/4bda39/6
with a as (
select ROW_NUMBER() over(partition by t.col1,t.col2 order by t.col3 desc) as row,t.*
from myTable as t)
select * from a where a.row <= 2
Replace a.row <= 2 (2 with your N)
METHOD 2- FOR MYSQL
http://sqlfiddle.com/#!9/79e81a/63
SELECT myTable.Col1, myTable.Col2, myTable.Col3, myTable.Col4
FROM (
Select Col1 as Col1, Col2 as Col2, count(Col1) as cc, AVG(Col3) as aa
From myTable
group by Col1, Col2) as tt
join myTable on myTable.Col1 = tt.Col1 and myTable.Col2 = tt.Col2
where myTable.Col3 >= tt.aa
Order by Col1 ,Col2 ,Col3 Desc,Col4
METHOD 3- FOR MYSQL
http://sqlfiddle.com/#!9/79e81a/79
SELECT * FROM (
SELECT CASE Col1
WHEN #Col1 THEN
CASE Col2
WHEN #Col2 THEN #curRow := #curRow + 1
ELSE #curRow := 1
END
ELSE #curRow :=1
END AS rank,
#Col1 := Col1 AS Col1,
#Col2 := Col2 AS Col2,
Col3, Col4
FROM myTable p
JOIN (SELECT #curRow := 0, #Col1 := 0, #Col2 := '') r
ORDER BY Col1, Col2, Col3 DESC) as tt
WHERE tt.rank <= 2
Replace tt.rank <= 2 replace 2 by your desired index

Pivoting row's to columns

How to achieve the below??
Anyone help me out
col_1 col_2
A 1
B 1
C 1
B 2
C 4
A 2
A 6
Output:
A B C
1 1 1
2 2 4
6
This will do the job, but it seems like quite an odd thing to want to do, so I am probably missing something?
CREATE TABLE #table (col1 CHAR(1), col2 INT);
INSERT INTO #table SELECT 'A', 1;
INSERT INTO #table SELECT 'B', 1;
INSERT INTO #table SELECT 'C', 1;
INSERT INTO #table SELECT 'B', 2;
INSERT INTO #table SELECT 'C', 4;
INSERT INTO #table SELECT 'A', 2;
INSERT INTO #table SELECT 'A', 6;
WITH Ranked AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY col1 ORDER BY col2) AS rank_id
FROM
#table),
Numbers AS (
SELECT 1 AS number
UNION ALL
SELECT number + 1 FROM Numbers WHERE number < 50)
SELECT
MAX(CASE WHEN col1 = 'A' THEN col2 END) AS [A],
MAX(CASE WHEN col1 = 'B' THEN col2 END) AS [B],
MAX(CASE WHEN col1 = 'C' THEN col2 END) AS [C]
FROM
Numbers n
INNER JOIN Ranked r ON r.rank_id = n.number
GROUP BY
n.number;
Results are:
A B C
1 1 1
2 2 4
6 NULL NULL
Looks like you are trying to pivot without aggregation? Here is another option:
select A, B, C from
( select col1, col2, dense_rank() over (partition by col1 order by col2) dr from #table) t
pivot
( max(t.col2) for t.col1 in (A, B, C)) pvt;
Also check this out for more examples/discussion: TSQL Pivot without aggregate function

SQL select rows with only a certain value in them

I have a table as such
Col 1 Col 2 Col 3
1 A 1
2 A 2
3 B 1
4 C 1
5 C 2
6 D 1
How do I only get unique rows which have Col 3 = 1?
I want to get rows 3 and 6 (Col 2 = B and D respectively). I do not want A nor C since they have Col 3 = 2 as well.
I've tried something along the lines of:
select col 2 from table group by col 2 having count(col 3) = 1
But that only brings up Col 2 for results so I'm uncertain if Col 3 contents = 1 or not.
EDIT: Sorry guys maybe I've not worded my question clearly. I want to get all of the rows of Col 2 which contain only Col 3 = 1 AND ONLY 1.
So if I tried WHERE Col 3= 1, it would return 4 rows because A has 1. But since A also has a row where Col 3 = 2, I do not want that, same for C. From this example table, I would want the end result to only show 2 rows, B and D.
My example table is an example, I actually have about 5000 rows to filter through, otherwise I'd do as you guys have suggested :)
SELECT col2
FROM your_table
GROUP BY col2
HAVING MAX(col3) = 1 AND MIN(Col3) = 1
Or
SELECT a.col2
FROM your_table a
WHERE a.col3=1 AND NOT EXISTS(SELECT *
FROM your_table b
WHERE a.col2=b.col2 AND b.col3<>1)
What you are probably looking for is WHERE clause.
SELECT * FROM YouTable WHERE col3 = 1 AND col2 in ('B','D');
;with T ([Col 1], [Col 2], [Col 3]) as
(
select 1, 'A', 1 union all
select 2, 'A', 2 union all
select 3, 'B', 1 union all
select 4, 'C', 1 union all
select 5, 'C', 2 union all
select 6, 'D', 1
)
select *
from T
left outer join
(
select distinct [Col 2]
from T
where [Col 3] <> 1
) as T2
on T.[Col 2] = T2.[Col 2]
where T.[Col 3] = 1 and
T2.[Col 2] is null
It's a bit hard to know exactly what you're trying to get, but this is my best guess:
SELECT * FROM theTable WHERE col2 NOT IN
(SELECT col2 FROM theTable WHERE col3 <> 1)
SELECT * FROM #temp t1
WHERE EXISTS
(
select Col2 from #Temp t2
WHERE t2.Col2 = t1.Col2
group by col2
having count(col3) = 1
)
tested with MS SQL2008 and the following (so if my answer is not the correct one it may halp others test theirs...):
CREATE TABLE #temp
(
Col1 INT,
Col2 CHAR(1),
Col3 INT
)
INSERT INTO #Temp
(Col1, Col2, Col3)
SELECT 1,'A',1
UNION
SELECT 2,'A',2
UNION
SELECT 3,'B', 1
UNION
SELECT 4,'C',1
UNION
SELECT 5,'C',2
UNION
SELECT 6,'D',1
SELECT * FROM #temp t1
WHERE EXISTS
(
select Col2 from #temp t2
WHERE t2.Col2 = t1.Col2
group by col2
having count(col3) = 1
)
DROP TABLE #temp

T-SQL Distinct column problem when trying to filter duplicates out

I have the following data
COL-1 COL-2
1 0TY/OK
1 0TY/OK
1 0TY/OK
1 0TY/OK
1 0TY/OK
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
3 7U5/2M
3 7U5/2M
3 7U5/2M
3 7U5/2M
And i want to construct a select query to retrieve that data in the output below
COL-1 COL-2 COL-3
1 0TY/OK 0TY/OK
1 0TY/OK 2KP/L
1 0TY/OK 7U5/2M
1 0TY/OK
1 0TY/OK
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
3 7U5/2M
3 7U5/2M
3 7U5/2M
3 7U5/2M
I want COL3 to return the distinct values of COL2
Using SELECT COL1, COL2, DISTINCT COL2 AS COL3 FROM MYTable does not work is SQL SERVER
Although I'm sure that some SQL wizard will be able to construct a way to do this, I feel the need to point out that conceptually this doesn't make sense - the values in the rows of column 3 are completely unrelated to the row values in columns 1 and 2.
Can you not simply return the distinct values of COL2 in a separate query?
SELECT DISTINCT COL2 FROM MyTable
(Note that you can return multiple resultsets from a single SQL query)
This is really unusual, and I can't see why you want this in one result set as it does not make any sense... There is no reason to associate the rows of the distinct query with the rows in the non-distinct query., but what you have to do is simply run both queries
Select Col1, Col2 From Table
Order By Col1, Col2
And
Select Distinct Col2 From Table
and join them together (To join them on row number, add a Row_Number() function to each query:
Select Col1, Col2, Col3
From (Select Row_Number() Over(Order By Col1, Col2)RowNum,
Col1, Col2
From Table) T1
Left Join
(Select Distinct Col2 As Col3,
(Select Count(Distinct Col2)
From Table
Where Col2 <= T2.Col3) RowNum
From Table) T2
On T2.RowNum = T1.RowNum
Try this out..
WITH MyTable AS
(
SELECT 1 Col1,CONVERT (VarChar (25), '0TY/OK') Col2 UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M'
)
,
AllData AS
(
SELECT
*,
ROW_NUMBER () OVER (ORDER BY Col2) as Id
FROM MyTable
)
,
DistinctData AS
(
SELECT
Distinct Col2 AS Col3
FROM MyTable
),
DistinctWithRowNumber AS
(
SELECT
*,
ROW_NUMBER () OVER (ORDER BY Col3) as Id
FROM DistinctData
)
SELECT
Col1,
Col2,
Col3
FROM AllData
LEFT JOIN DistinctWithRowNumber
ON AllData.Id = DistinctWithRowNumber.Id
returns this result
Col1 Col2 Col3
----------- ------------------------- -------------------------
1 0TY/OK 0TY/OK
1 0TY/OK 2KP/L
1 0TY/OK 7U5/2M
1 0TY/OK NULL
1 0TY/OK NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
3 7U5/2M NULL
3 7U5/2M NULL
3 7U5/2M NULL
3 7U5/2M NULL
You can use CTEs to create a ROW_NUMBER and JOIN over those virtual columns.
DECLARE #t TABLE (
Col1 INT
,Col2 VARCHAR(10)
);
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (2, '2KP/L,');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
; WITH all_data AS (
SELECT
Col1
,Col2
,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RowNum
FROM #t
),
distinct_data AS (
SELECT
Col2
,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RowNum
FROM #t
GROUP BY
Col2
)
SELECT
all_data.Col1
,all_data.Col2
,distinct_data.Col2
FROM all_data
LEFT JOIN distinct_data ON all_data.RowNum = distinct_data.RowNum