How to group by to get rid of duplicates rows

How to group by to get rid of duplicates rows - sql

How do I remove duplicates from the table where all the columns are significant apart from PK.
declare #dummy table
(
pk int,
a char(1),
b char(1),
c char(1)
)
insert into #dummy
select 1, 'A', 'B', 'B' union all
select 2, 'A', 'B', 'B' union all
select 3, 'P', 'Q', 'R' union all
select 4, 'P', 'Q', 'R' union all
select 5, 'X', 'Y', 'Z' union all
select 6, 'X', 'Y', 'Z' union all
select 7, 'A', 'B', 'Z'
what I get with out distinction:
select * from #dummy
pk a b c
----------- ---- ---- ----
1 A B B
2 A B B
3 P Q R
4 P Q R
5 X Y Z
6 X Y Z
7 A B Z
What I'd like is:
select ... do magic ....
pk a b c
----------- ---- ---- ----
1 A B B
3 P Q R
5 X Y Z
7 A B Z

Found it:
select min(pk), a, b, c
from #dummy
group by a, b, c

You want something like this, I think:
DELETE FROM f
FROM #dummy AS f INNER JOIN #dummy AS g
ON g.data = f.data
AND f.id < g.id
Check out this article: http://www.simple-talk.com/sql/t-sql-programming/removing-duplicates-from-a-table-in-sql-server/

At first, I thought distinct would do it, but I'm fairly certain what you want is group by:
select * from #dummy group by a,b,c
Since there's a unique primary key, all rows are distinct.

Related

Split comma separated values based on another table

I would like to split comma separated values based on another table
I cannot normalize it since original table has over 8 million rows. It crushed my laptop when I tried it.
How can I put data into relevant columns and create a new column if data is not found.
For example:
TableA,
Type1 Type2
---------------------
A F
B G
C H
D I
E NULL
TableB
ID Country AllTypes
---------------------------------
1 Italy A, B, C
2 USA D, E, A, F
4 Japan I, O, Z
5 UK NULL
By using these two tables, I would like to get the output such as
ID Country AllTypes Type1 Type2 UnCaptured
----------------------------------------------------------------------
1 Italy A, B, C A, B, C NULL NULL
2 USA D, E, G, F D, E G, F NULL
4 Japan I, O, Z NULL I O, Z
5 UK NULL NULL NULL NULL
This is I have done so far
with TableA as (
select 'A' as Type1, 'F' as Type2 union all
select 'B', 'G' union all
select 'C', 'H' union all
select 'D', 'I' union all
select 'E', NULL
),
TableB as (
select 1 as ID, 'Italy' as Country, 'A, B, C' as Alltypes union all
select 2, 'USA', 'D, E, A, F' union all
select 4, 'Japan', 'I', 'O', 'Z' union all
select 5, 'UK', NULL
)
select b.Id, b.Country, b.Alltypes,
String_Agg(v.type1,',') Type1,
String_Agg(v.type2,',') Type2
**String_Agg(v.Type3,',') Uncaptured*** ------- This query
from tableb b
outer apply (
select Trim(value) t,
case when exists
(select * from tablea a where a.type1=Trim(value))
then Trim(value) end type1,
case when exists
(select * from tablea a where a.type2=Trim(value))
then Trim(value) end Type2,
Case when not exists ------------This query
( (select * from tablea a where a.type1=Trim(value)) -------
and ------
(select * from tablea a where a.type2=Trim(value))------
) then Trim(value) end Type3** -------------
from String_Split(alltypes, ',')
)v
group by Id, Country, AllTypes
Without highlighted queries(-----) which are for creating a new column (Uncaptured), it works ok like below.
Id Country Alltypes Type1 Type2
1 Italy A, B, C A,B,C NULL
2 USA D, E, A, F D,E,A F
4 Japan I, O, Z I NULL
5 UK NULL NULL NULL
But if I add those highlighted queries, it shows error. I was also thinking of else but did not work as well.
Could someone help me please?

----------------------- DDL+DML: Should have been provided by the OP !
DROP TABLE IF EXISTS TableA,TableB
GO
create table TableA(Type1 CHAR(1), Type2 char(1))
GO
INSERT TableA (Type1,Type2) VALUES
('A', 'F' ),
('B', 'G' ),
('C', 'H' ),
('D', 'I' ),
('E', NULL )
GO
CREATE TABLE TableB (ID INT, Country NVARCHAR(100), AllTypes NVARCHAR(100))
GO
INSERT TableB (ID,Country,AllTypes)VALUES
(1, 'Italy','A, B, C' ),
(2, 'USA ','D, E, G, F' ),
(4, 'Japan','I, O, Z' ),
(5, 'UK ','NULL' )
GO
----------------------- Solution
;WITH MyCTE AS (
SELECT ID,Country,AllTypes, MyType = TRIM([value])
FROM TableB
CROSS APPLY string_split(AllTypes,',')
)
,MyCTE02 as (
SELECT ID,Country,AllTypes, MyType,a1.Type1,a2.Type2,
UnCaptured = CASE WHEN a1.Type1 IS NULL and a2.Type2 IS NULL THEN MyType END
FROM MyCTE c
LEFT JOIN TableA a1 ON c.MyType = a1.Type1
LEFT JOIN TableA a2 ON c.MyType = a2.Type2
)
SELECT ID,Country,AllTypes--,MyType
,Type1 = STRING_AGG(Type1,','),Type2 = STRING_AGG(Type2,','),UnCaptured = STRING_AGG(UnCaptured,',')
FROM MyCTE02
GROUP BY ID,Country,AllTypes
GO

How about
outer apply (
select Trim(value) t, a1.type1, a2.type2,
CASE WHEN COALESCE(a1.type1, a2.type2) IS NULL THEN Trim(s.value) END unCaptured
from String_Split(alltypes, ',') s
left join tablea a1 where a1.type1=Trim(s.value)
left join tablea a2 where a2.type2=Trim(s.value)
)v

Extracting indrect relation in SQL

I have this table
ID S E
1 a b
1 b c
2 a b
2 b d
2 b e
2 e d
3 a c
3 c f
I want to extract indirect relations from this table for example:
ID S E
1 a c
2 a d
2 a e
2 b d
3 a f
I could do it in C# or Java but I am wondering if there is a solution for doing it in SQL or not.

You can try this...
Create table #a (ID int, S varchar(1), E varchar(1))
Insert into #a
select 1, 'a', 'b' Union all
select 1, 'b', 'c' Union all
select 2, 'a', 'b' Union all
select 2, 'b', 'd'Union all
select 2, 'b', 'e'Union all
select 2, 'e', 'd'Union all
select 3, 'a', 'c'Union all
select 3, 'c', 'f'
select * from #a
select a.S, b.E
from #a a
Inner join #a b on a.E = b.S and a.ID = b.Id
drop table #a

Multiple column condition check with multiple rows to be found

ID A B c D E(Time)
---------------------------
1 J 1 A B 1
2 J 1 A S 2
3 M 1 A B 1
4 M 1 A B 2
5 M 2 A S 3
6 M 2 A S 4
7 T 1 A B 1
8 T 2 A S 2
9 T 1 A B 3
10 k 1 A B 1
11 k 1 A B 2
I need to find unique values with multiple column with some added condition. The unique value are combination of Col A,B and D.
If Col A has only two rows (like record 1 and 2) and the Column B is same on both data and there is a different value as in Column D , BUT the S are only coming after B we dont want to see those records
If Col A has only multiple rows (like record 3 to 6 ) with different Col B and D,
whereas in COulmn D S are coming after B we dont want to see those records.
If Col A has only multiple rows (like record 7 to 9 ) with different Col B and D,
whereas in COulmn D there is a S before B we want to see those records.
If Col A has only multiple rows (like record 10 to 11 ) with different Col B and same column D we dont want to see those records.
any input , able to get to see the first and last of it using partition by and using unbounded in query...
Seems like the basic logic to look for is on to See if S preceds any B on Column D then show all those records using the partition...
Desired output is row 7-9: THis is Based on logic for same column A , we had a Sell before Buy from customer on Column D when order by column E time.
ID A B C D E(Time)
---------------------------------------------------
7 T 1 A B 1
8 T 2 A S 2
9 T 1 A B 3

I started to write a query to do this, but ran out of "Spare time", your criteria is very hard to follow, If you comment out the "Where" at the bottom of the query it functions but doesn't yet produce your desired effect.
Possibly this can lead you in a direction to achive your goal ...
WITH Src AS (
SELECT 1 AS ID, 'J' AS A, 1 AS B, 'A' AS C, 'B' AS D, 1 AS E
UNION ALL SELECT 2, 'J', 1, 'A', 'S', 2
UNION ALL SELECT 3, 'M', 1, 'A', 'B', 1
UNION ALL SELECT 4, 'M', 1, 'A', 'B', 2
UNION ALL SELECT 5, 'M', 2, 'A', 'S', 3
UNION ALL SELECT 6, 'M', 2, 'A', 'S', 4
UNION ALL SELECT 7, 'T', 1, 'A', 'B', 1
UNION ALL SELECT 8, 'T', 2, 'A', 'S', 2
UNION ALL SELECT 9, 'T', 1, 'A', 'B', 3
UNION ALL SELECT 10, 'k', 1, 'A', 'B', 1
UNION ALL SELECT 11, 'k', 1, 'A', 'B', 2
), ACnt AS (
SELECT A, Count(*) AS Cnt
FROM Src
GROUP BY A
), FirstD AS (
SELECT A, D
FROM Src
WHERE E=1
), FirstSRow AS (
SELECT A, Min(E) AS E
FROM Src
WHERE D='S'
GROUP BY A
), LastBRow AS (
SELECT A, Max(E) AS E
FROM Src
WHERE D='B'
GROUP BY A
), Mins AS (
SELECT A, Min(D) AS D, Min(B) AS B
FROM Src
GROUP BY A
), Maxs AS (
SELECT A, Max(D) AS D, Max(B) AS B
FROM Src
GROUP BY A
)
SELECT Src.*
FROM Src
JOIN ACnt ON ACnt.A=Src.A
JOIN FirstD ON FirstD.A=Src.A
JOIN Mins ON Mins.A=Src.A
JOIN Maxs ON Maxs.A=Src.A
LEFT JOIN FirstSRow ON FirstSRow.A=Src.A
LEFT JOIN LastBRow ON LastBRow.A=Src.A
WHERE
NOT (ACnt.Cnt=2 AND Mins.B=Maxs.B AND Mins.D<>Maxs.D AND FirstSRow.E < LastBRow.E)
AND NOT (ACnt.Cnt>=3 AND Mins.B<>Maxs.B AND Mins.D<>Maxs.D AND FirstD.D='B')
AND (ACnt.Cnt>=3 AND Mins.B<>Maxs.B AND Mins.D<>Maxs.D AND FirstD.D='B')

Select continuous ranges from table

I need to extract continous ranges from a table based on consecutive numbers (column N) and same "category" these numbers relate to (column C below). Graphically it looks like this:
N C D
--------
1 x a C N1 N2 D1 D2
2 x b ------------------
3 x c x 1 4 a d (continuous range with same N)
4 x d ==> x 6 7 e f (new range because "5" is missing)
6 x e y 8 10 g h (new range because C changed to "y")
7 x f
8 y g
9 y h
10 y i
SQL Server is 2005. Thanks.

DECLARE #myTable Table
(
N INT,
C CHAR(1),
D CHAR(1)
)
INSERT INTO #myTable(N,C,D) VALUES(1, 'x', 'a');
INSERT INTO #myTable(N,C,D) VALUES(2, 'x', 'b');
INSERT INTO #myTable(N,C,D) VALUES(3, 'x', 'c');
INSERT INTO #myTable(N,C,D) VALUES(4, 'x', 'd');
INSERT INTO #myTable(N,C,D) VALUES(6, 'x', 'e');
INSERT INTO #myTable(N,C,D) VALUES(7, 'x', 'f');
INSERT INTO #myTable(N,C,D) VALUES(8, 'y', 'g');
INSERT INTO #myTable(N,C,D) VALUES(9, 'y', 'h');
INSERT INTO #myTable(N,C,D) VALUES(10, 'y', 'i');
WITH StartingPoints AS(
SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
FROM #myTable AS A
WHERE NOT EXISTS(
SELECT *
FROM #myTable B
WHERE B.C = A.C
AND B.N = A.N - 1
)
),
EndingPoints AS(
SELECT A.*, ROW_NUMBER() OVER(ORDER BY A.N) AS rownum
FROM #myTable AS A
WHERE NOT EXISTS (
SELECT *
FROM #myTable B
WHERE B.C = A.C
AND B.N = A.N + 1
)
)
SELECT StartingPoints.C,
StartingPoints.N AS [N1],
EndingPoints.N AS [N2],
StartingPoints.D AS [D1],
EndingPoints.D AS [D2]
FROM StartingPoints
JOIN EndingPoints ON StartingPoints.rownum = EndingPoints.rownum
Results:
C N1 N2 D1 D2
---- ----------- ----------- ---- ----
x 1 4 a d
x 6 7 e f
y 8 10 g i

The RANK function is a safer bet than ROW_NUMBER, in case any N values are duplicated, as in the following example:
declare #ncd table(N int, C char, D char);
insert into #ncd
select 1,'x','a' union all
select 2,'x','b' union all
select 3,'x','c' union all
select 4,'x','d' union all
select 4,'x','e' union all
select 7,'x','f' union all
select 8,'y','g' union all
select 9,'y','h' union all
select 10,'y','i' union all
select 10,'y','j';
with a as (
select *
, r = N-rank()over(partition by C order by N)
from #ncd
)
select C=MIN(C)
, N1=MIN(N)
, N2=MAX(N)
, D1=MIN(D)
, D2=MAX(D)
from a
group by r;
Result, which correctly withstands the duplicated 4 and 10:
C N1 N2 D1 D2
---- ----------- ----------- ---- ----
x 1 4 a e
x 7 7 f f
y 8 10 g j

Using this answer as a starting point, I ended up with the following:
;
WITH data (N, C, D) AS (
SELECT 1, 'x', 'a' UNION ALL
SELECT 2, 'x', 'b' UNION ALL
SELECT 3, 'x', 'c' UNION ALL
SELECT 4, 'x', 'd' UNION ALL
SELECT 6, 'x', 'e' UNION ALL
SELECT 7, 'x', 'f' UNION ALL
SELECT 8, 'y', 'g' UNION ALL
SELECT 9, 'y', 'h' UNION ALL
SELECT 10, 'y', 'i'
),
ranked AS (
SELECT
curr.*,
Grp = curr.N - ROW_NUMBER() OVER (PARTITION BY curr.C ORDER BY curr.N),
IsStart = CASE WHEN pred.C IS NULL THEN 1 ELSE 0 END,
IsEnd = CASE WHEN succ.C IS NULL THEN 1 ELSE 0 END
FROM data AS curr
LEFT JOIN data AS pred ON curr.C = pred.C AND curr.N = pred.N + 1
LEFT JOIN data AS succ ON curr.C = succ.C AND curr.N = succ.N - 1
)
SELECT
C,
N1 = MIN(N),
N2 = MAX(N),
D1 = MAX(CASE IsStart WHEN 1 THEN D END),
D2 = MAX(CASE IsEnd WHEN 1 THEN D END)
FROM ranked
WHERE 1 IN (IsStart, IsEnd)
GROUP BY C, Grp

Write a stored procedure. It will create and fill a temporary table witch will contain C, N1, N2, D1 and D2 columns.
Create the temporary table
use a cursor to loop on entries in table containing N, C, D ordered by N
use a variable to detect a new range (Ni < N(i-1)-1) and to store N1, N2, D1 and D2
INSERT into the temporary table for each range detected (new range detected or and of the cursor)
Tell me if you need a code example.

Combining Few Rows of data in One ROW using Sum Function and not doing this for all the row

Fiends Please help me with this ASAP. Really appreciate it thanks
I have really simple table. Which as three columns
Col A Col B Col C
(unique)nameA (UniqueID)1 (somenumber)10
(unique)nameB (UniqueID)2 (somenumber)20
(unique)nameC (UniqueID)3 (somenumber)30
(unique)nameD (UniqueID)4 (somenumber)10
(unique)nameE (UniqueID)5 (somenumber)50
(unique)nameF (UniqueID)6 (somenumber)35
(unique)nameG (UniqueID)7 (somenumber)50
(unique)nameH (UniqueID)8 (somenumber)10
(unique)nameI (UniqueID)9 (somenumber)25
As per my report requirement i need to combine Unique ID (1,2,3) Give it a Unique name ALFA and Sum the Col C values AND combine (4,5,6) Give it a Unique name BETA and Sum the Col C values for them
And keep other Unique ID, Unique Name and Their values in Col C as is
Order by values in Col C Desc and display TOP 30 results.
So Final result should look like this
Col A Col B Col C
BETA (unique ID 4,5,6) 95 --(10+50+35 from col C
ALFA (unique ID 1,2,3) 60 --(10+20+30 from col C above)
above)
(unique)nameG (UniqueID)7 (somenumber)50
(unique)nameI (UniqueID)9 (somenumber)25
(unique)nameH (UniqueID)8 (somenumber)10

ould something like this work?
declare #simpletable table( name varchar(50), uniqueid int, somenumber int)
insert into #simpletable
select 'a', 1, 10
union all
select 'b', 2, 20
union all
select 'c', 3, 30
union all
select 'd', 4, 10
union all
select 'e', 5, 50
union all
select 'f', 6, 35
union all
select 'g', 7, 50
union all
select 'h', 8, 10
union all
select 'i', 9, 25
select top 30 name,uniqueid,somenumber
from
(
select 'ALFA' as name, '1,2,3' as uniqueid, sum(somenumber) as somenumber
from #simpletable
where uniqueid between 1 and 3
union all
select 'BETA' as name, '4,5,6' as uniqueid, sum(somenumber) as somenumber
from #simpletable
where uniqueid between 4 and 6
union all
select name as name, cast(uniqueid as varchar(50)) as uniqueid, somenumber as somenumber
from #simpletable
where uniqueid > 6
) as x
order by somenumber desc

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to group by to get rid of duplicates rows - sql

Found it: select min(pk), a, b, c from #dummy group by a, b, c

You want something like this, I think: DELETE FROM f FROM #dummy AS f INNER JOIN #dummy AS g ON g.data = f.data AND f.id < g.id Check out this article: http://www.simple-talk.com/sql/t-sql-programming/removing-duplicates-from-a-table-in-sql-server/

At first, I thought distinct would do it, but I'm fairly certain what you want is group by: select * from #dummy group by a,b,c Since there's a unique primary key, all rows are distinct.

Related

Split comma separated values based on another table

Extracting indrect relation in SQL

Multiple column condition check with multiple rows to be found

Select continuous ranges from table

Combining Few Rows of data in One ROW using Sum Function and not doing this for all the row

Categories

Resources