Find duplicates and remove it applying conditions SQL Server - sql

I'm new to SQL Server, and I'm trying to remove duplicates from a table but with some conditions, and my doubt is on how to apply these conditions to the query.
I need to remove duplicates from the Users table, eg:
Id Code Name SysName
-----------------------------
1 D1 N1
2 D1
3 D1 N1 N-1
4 E2 N2
5 E2 N2
6 E2 N2
7 X3
8 X3 N-3
9
10
11 Z4 W2 N-4-4
12 Z4 W2 N-44
In the above table: for D1 code I want to keep the ID=3, which has all columns filled (Code, Name, and SysName) and delete ID=1 and ID=2
For E2 code, I want to keep any of these and remove the two duplicated ones
For X3 code, keep the one which has SysName=N-3
For ID=9, ID=10 (empty code and everything empty, remove all)
For Z4 code, remove ID=11 and keep N-44 Sysname
And the last thing, I've a FK with other table, so I think that I need first to get all Id's from Users, delete these ids from the second dependent table and finally delete from Users table.
Do you have any idea on how to achieve it? I do not pretend the solution but a structure code or examples/scenarios you have similar to it, any suggestion would be fine for me.
EDIT:
To resume.. I have Users table:
Id Code Name SysName
-----------------------------
1 D1 N1
2 D1
3 D1 N1 N-1
4 E2 N2
5 E2 N2
6 E2 N2
7 X3
8 X3 N-3
9
10
11 Z4 W2 N-4-4
12 Z4 W2 N-44
And I want to keep only:
Id Code Name SysName
-----------------------------
3 D1 N1 N-1
4 E2 N2
8 X3 N-3
12 Z4 W2 N-44

Are you looking for something like
SELECT Code,
MAX(ISNULL(Name, '')) Name,
MAX(ISNULL(SysName, '')) SysName
FROM T
WHERE Code IS NOT NULL
GROUP BY Code;
Returns:
+------+------+---------+
| Code | Name | SysName |
+------+------+---------+
| D1 | N1 | N-1 |
| E2 | N2 | |
| X3 | | N-3 |
| Z4 | W2 | N-4-4 |
+------+------+---------+
Demo

The next query show the list of ids to remove according to the next rules of importance:
1- If the user has all the fields empty/null will be deleted.
2- The user with more fields with errors will be considered first to remove (example SysName cannot contains two -).
3- The user with more fields empty/null will be considered first to remove.
;WITH
[Ids]
AS
(
SELECT
[U].[Id]
,[Importance] =
CASE
WHEN [X].[NumberOfFilledFields] = 0
THEN -1
ELSE ROW_NUMBER() OVER (PARTITION BY [U].[Code] ORDER BY [X].[NumberOfInvalidFields], [X].[NumberOfFilledFields] DESC)
END
FROM [Users] AS [U]
CROSS APPLY
(
SELECT
[NumberOfFilledFields] =
+ CASE WHEN NULLIF([U].[Code], '') IS NULL THEN 0 ELSE 1 END
+ CASE WHEN NULLIF([U].[Name], '') IS NULL THEN 0 ELSE 1 END
+ CASE WHEN NULLIF([U].[SysName], '') IS NULL THEN 0 ELSE 1 END
,[NumberOfInvalidFields] =
+ CASE WHEN [U].[SysName] LIKE '%-%-%' THEN 1 ELSE 0 END
) AS [X]
)
SELECT
[Id]
FROM [Ids]
WHERE (1 = 1)
AND ([Importance] = -1 OR [Importance] > 1);

This uses window functions and coalesce:
DECLARE #t TABLE ([Id] INT, [Code] CHAR(2), [Name] CHAR(2), [SysName] VARCHAR(10))
INSERT INTO #t values
(1 , 'D1', 'N1', Null ), (2 , 'D1', Null, Null ), (3 , 'D1', 'N1', 'N-1' ), (4 , 'E2', 'N2', Null ), (5 , 'E2', 'N2', Null ), (6 , 'E2', 'N2', Null )
, (7 , 'X3', Null, Null ), (8 , 'X3', Null, 'N-3' ) , (9 , Null, Null, Null ), (10, Null, Null, Null ), (11, 'Z4', 'W2', 'N-44'), (12, 'Z4', 'W2', 'N-44' )
;WITH t AS (
SELECT DISTINCT
[code]
, COALESCE([name], max([name]) OVER(PARTITION BY [code])) AS [Name]
, COALESCE([sysname], COALESCE(MAX([sysname]) OVER(PARTITION BY [code], [name]), MAX([sysname]) OVER(PARTITION BY [code]))) AS [SysName]
FROM #t
WHERE [code] IS NOT NULL)
SELECT MIN(t2.id), t.Code, t.Name, t.SysName
from #t t2
INNER JOIN t ON t.code = t2.code AND ISNULL(t.[Name], 'null') = ISNULL(t2.[Name], 'Null') AND ISNULL(t.[SysName], 'Null') = ISNULL(t2.[SysName], 'Null')
GROUP BY t.Code, t.Name, t.SysName

DEMO
(Any other answer: feel free to borrow the demos to test your answer or use it in yours! no need to duplicate effort!)
One could use an analytic function/window function like row_number() to assign a row to each record we want and keep all the #1 rows except for those where code is null... do this with a cte and then just delete.
We determine what to keep by looking at the record having the most data and in case of ties, use the earliest ID.
With cte as (
SELECT id, code, name, sysname,
row_number() over (partition by code order by (case when name is not null then 1 else 0 end + case when sysname is not null then 1 else 0 end) desc, ID) RN
FROM users)
Delete from cte where RN <> 1 or code is null;
Results in:
+----+----+------+------+---------+
| | ID | Code | Name | Sysname |
+----+----+------+------+---------+
| 1 | 3 | D1 | N1 | N-1 |
| 2 | 4 | E2 | N2 | NULL |
| 3 | 8 | X3 | NULL | N-3 |
| 4 | 11 | Z4 | W2 | N-4-4 |
+----+----+------+------+---------+
One could use the CTE and delete related FK records that would get purged
and then use the cte again and delete the users

you need to have knowledge of case
then you can change the condition accordingly
you can see the sample code bellow. just twist the case as per your requirements in where clause.
;with C as
(
select Dense_rank() over(partition by code order by id) as rn,*
from Users
)
delete from C
where rn =
(case
when (code = 'd1' and name is not null and sysname !='') then 0
when (code = 'E1' and rn = 1) then 0
when (code = 'X3' and sysname!='') then 0
when (code = 'z4' and name is not null and sysname !='') then 0
else rn
end )
Output:-
3 D1 N1 N-1
8 X3 N-3
11 Z4 W2 N-4-4
12 Z4 W2 N-44

Related

Turn one column into multiple based on index ranges

I have the following table in SQL Server:
| idx | value |
| --- | ----- |
| 1 | N |
| 2 | C |
| 3 | C |
| 4 | P |
| 5 | N |
| 6 | N |
| 7 | C |
| 8 | N |
| 9 | P |
I would like to turn it to this:
| idx 1-3 | idx 4-6 | idx 7-9 |
| ------- | ------- | ------- |
| N | P | C |
| C | N | N |
| C | N | P |
How can I do this?
If you want to split the data into three columns, with the data in order by id -- and assuming that the ids start at 1 and have no gaps -- then on your particular data, you can use:
select max(case when (idx - 1) / 3 = 0 then value end) as grp_1,
max(case when (idx - 1) / 3 = 1 then value end) as grp_2,
max(case when (idx - 1) / 3 = 2 then value end) as grp_3
from t
group by idx % 3
order by min(idx);
The above doesn't hard-code the ranges, but the "3" means different things in different contexts -- sometimes the number of columns, sometimes the number of rows in the result set.
However, the following generalizes so it adds additional rows as needed:
select max(case when (idx - 1) / num_rows = 0 then idx end) as grp_1,
max(case when (idx - 1) / num_rows = 1 then idx end) as grp_2,
max(case when (idx - 1) / num_rows = 2 then idx end) as grp_3
from (select t.*, convert(int, ceiling(count(*) over () / 3.0)) as num_rows
from t
) t
group by idx % num_rows
order by min(idx);
Here is a db<>fiddle.
You can compute the category of each row with a lateral join, then enumerate the rows within each category, and finally pivot with conditional aggregation:
select
max(case when cat = 'idx_1_3' then value end) as idx_1_3,
max(case when cat = 'idx_4_6' then value end) as idx_4_6,
max(case when cat = 'idx_7_9' then value end) as idx_7_9
from (
select t.*, row_number() over(partition by v.cat) as rn
from mytable t
cross apply (values (
case
when idx between 1 and 3 then 'idx_1_3'
when idx between 4 and 6 then 'idx_4_6'
when idx between 7 and 9 then 'idx_7_9'
end
)) v(cat)
) t
group by rn
Another solution with union all operator and row_number function
select max(IDX_1_3) as IDX_1_3, max(IDX_4_6) as IDX_4_6, max(IDX_1_3) as IDX_1_3
from (
select
case when idx in (1, 2, 3) then value end as idx_1_3
, null as idx_4_6
, null as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (1, 2, 3)
union all
select null as idx_1_3
, case when idx in (4, 5, 6) then value end as idx_4_6
, null as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (4, 5, 6)
union all
select null as idx_1_3
, null as idx_4_6
, case when idx in (7, 8, 9) then value end as idx_7_9
, row_number()over(order by idx) as rnb
from Your_table where idx in (7, 8, 9)
) t
group by rnb
;
drop table if exists #t;
create table #t (id int identity(1,1) primary key clustered, val varchar(20));
insert into #t(val)
select top (2002) concat(row_number() over(order by ##spid), ' - ', char(65 + abs(checksum(newid()))%26))
from sys.all_objects
order by row_number() over(order by ##spid);
select p.r, 1+(p.r-1)/3 grp3id, p.[1] as [idx 1-3], p.[2] as [idx 4-6], p.[3] as [idx 7-9]
from
(
select
val,
1+((1+(id-1)/3)-1)%3 as c3,
row_number() over(partition by 1+((1+(id-1)/3)-1)%3 order by id) as r
from #t
) as src
pivot
(
max(val) for c3 in ([1], [2], [3])
) as p
order by p.r;
You can use the mod as follows:
select max(case when idx between 1 and 3 then value end) as idx_1_3,
max(case when idx between 4 and 6 then value end) as idx_4_6,
max(case when idx between 7 and 9 then value end) as idx_7_9
from t
group by (idx-1) % 3;
If your idx is not continuous numbers then instead of from t use the following
from (select value, row_number() over(order by idx) as idx
from your_table t) t

SQL: Repeat patterns between date range

DECLARE
#startDate date = '2020-07-03'
#endDate date = 2020-07-06'
I have a tabe as below
---------------------------------------------------------
|EmployeeID | EmpName |Pattern | Frequency |
---------------------------------------------------------
| 11 | X | 1,2,3 | 1 |
| 12 | Y | 4,5 | 1 |
| 13 | Y | 1,2 | 3 |
| 14 | Z | 1,2 | 2 |
---------------------------------------------------------
AND I want to generate dates between given date range.
WANT result table as bellows:
--------------------------------
| EmpId | Dates | Pattern |
--------------------------------
| 11 |2020-07-03 | 1 |
| 11 |2020-07-04 | 2 |
| 11 |2020-07-05 | 3 |
| 11 |2020-07-06 | 1 |
| 12 |2020-07-03 | 4 |
| 12 |2020-07-04 | 5 |
| 12 |2020-07-05 | 4 |
| 12 |2020-07-06 | 5 |
| 13 |2020-07-03 | 1 |
| 13 |2020-07-04 | 1 |
| 13 |2020-07-05 | 1 |
| 13 |2020-07-06 | 2 |
| 14 |2020-07-03 | 1 |
| 14 |2020-07-04 | 1 |
| 14 |2020-07-05 | 2 |
| 14 |2020-07-06 | 2 |
Generate the dates as per given date range for each employee and repeat the pattern for each employee as per their pattern and frequency(days).
means as per frequency(days) pattern will change.
What I have acheived :
Able to generate the records for each employees between the given date range.
What I am not able to get:
I am not able to repeat the pattern based on the frequency for each employee between the date range.
I am able achieve everything but need little help while repeating the pattern based on frequency.*
Note:
Data are storing in this way only.. now I won't change existing schema...
I've came up with this. It's basically a splitter, a tally table and some logic.
Joining (Frequency)-Amount of Tally-datasets with the splitted pattern for the correct amount of pattern-values. Sorting them by their position in the pattern-string.
Join everything together and repeat the pattern by using modulo.
DECLARE #t TABLE( EmployeeID INT
, EmpName VARCHAR(20)
, Pattern VARCHAR(255)
, Frequency INT )
DECLARE #startDate DATE = '2020-07-03'
DECLARE #endDate DATE = '2020-07-09'
INSERT INTO #t
VALUES (11, 'X', '1,2,3', 1),
(12, 'Y', '4,5', 1),
(13, 'Y', '1,2', 3),
(14, 'Z', '1,2', 2)
DECLARE #delimiter CHAR(1) = ',';
WITH split(Txt
, i
, elem
, EmployeeID)
AS (SELECT STUFF(Pattern, 1, CHARINDEX(#delimiter, Pattern+#delimiter+'~'), '')
, 1
, CAST(LEFT(Pattern, CHARINDEX(#delimiter, Pattern+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM #t
UNION ALL
SELECT STUFF(Txt, 1, CHARINDEX(#delimiter, Txt+#delimiter+'~'), '')
, i + 1
, CAST(LEFT(Txt, CHARINDEX(#delimiter, Txt+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM split
WHERE Txt > ''),
E1(N) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 AS a, E1 AS b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 AS a, E2 AS b), --10E+4 or 10,000 rows
E8(N) AS (SELECT 1 FROM E4 AS a , E4 AS b), --10E+8 or 100,000,000 rows
PatternXFrequency(EmployeeID
, Sort
, elem)
AS (SELECT split.EmployeeID
, ROW_NUMBER() OVER(PARTITION BY split.EmployeeID ORDER BY i) - 1
, elem
FROM split
INNER JOIN #t AS t ON t.EmployeeID = split.EmployeeID
CROSS APPLY (SELECT TOP (t.Frequency) 1
FROM E8
) AS Freq(Dummy))
SELECT EmployeeID
, DATEADD(DAY, i_count, #startDate) AS Dates
, elem
FROM (SELECT DATEDIFF(DAY, #startDate, #endDate) + 1) AS t_datediff(t_days)
CROSS APPLY (SELECT TOP (t_days) ROW_NUMBER() OVER(ORDER BY (SELECT 0) ) - 1 FROM E8
) AS t_dateadd(i_count)
CROSS APPLY (SELECT PatternXFrequency.*
FROM (SELECT DISTINCT EmployeeID FROM #t) AS t(EmpID)
CROSS APPLY (SELECT COUNT(Sort)
FROM PatternXFrequency
WHERE EmployeeID = EmpID
) AS EmpPattern(sortCount)
CROSS APPLY (SELECT *
FROM PatternXFrequency
WHERE EmployeeID = EmpID
AND Sort = ((i_count % sortCount))
) AS PatternXFrequency
) AS t
ORDER BY t.EmployeeID
, Dates
This isn't particularly pretty, but it avoids the recursion of a rCTE, so should provide a faster experience. As STRING_SPLIT still doesn't know what ordinal position means, we have to use something else here; I use DelimitedSplit8k_LEAD.
I also assume your expected results are wrong, as they stop short of your end date (20200709). This results in the below:
CREATE TABLE dbo.YourTable (EmployeeID int,
EmpName char(1),
Pattern varchar(8000), --This NEEDS fixing
Frequency tinyint);
INSERT INTO dbo.YourTable
VALUES(11,'X','1,2,3',1),
(12,'Y','4,5',1),
(13,'Y','1,2',3),
(14,'Z','1,2',2);
GO
DECLARE #StartDate date = '20200703',
#EndDate date = '20200709';
WITH CTE AS(
SELECT *,
MAX(ItemNumber) OVER (PARTITION BY EmployeeID) AS MaxItemNumber
FROM dbo.YourTable YT
CROSS APPLY dbo.DelimitedSplit8K_LEAD(YT.Pattern,',') DS),
N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP (SELECT DATEDIFF(DAY,#startDate, #EndDate)+1)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS I
FROM N N1, N N2, N N3) --1000 Rows
SELECT C.EmployeeID,
DATEADD(DAY,T.I, #StartDate),
C.Item
FROM CTE C
JOIN Tally T ON ISNULL(NULLIF((T.I +1) % C.MaxItemNumber,0),C.MaxItemNumber) = C.ItemNumber
ORDER BY EmployeeID,
T.I;
GO
DROP TABLE dbo.YourTable;
Like mentioned in the comments fix your data model.
Your output pattern is a little bit strange.
But is it something like this you are looking for?
DECLARE #startDate date = '2020-07-03'
DECLARE #endDate date = '2020-07-09'
DECLARE #Dates TABLE([Date] Date)
;WITH seq(n) AS
(
SELECT 0 UNION ALL SELECT n + 1 FROM seq
WHERE n < DATEDIFF(DAY, #StartDate, #endDate)
)
INSERT INTO #Dates ([Date])
SELECT DATEADD(Day,n, cast(GetDate() as date)) Date
FROM seq
ORDER BY n
OPTION (MAXRECURSION 0);
SELECT e.EmployeeId, d.Date, x.Value Pattern
FROM Employee e
CROSS APPLY STRING_SPLIT(e.Pattern, ',') x
INNER JOIN #Dates d on 1=1
-- Correct for the first iteration of the pattern
AND DATEDIFF(DAY, DATEADD(DAY, -1, #StartDate), d.Date) = x.Value

Making a conditional aggregate

I have tricky grouping problem for our business reasons, I have a table which has values like this
----------------------------
| NAME | TYPE | VALUE |
----------------------------
| N1 | T1 | V1 |
| N1 | T2 | V2 |
| N1 | NULL | V3 |
| N2 | T2 | V4 |
| N2 | NULL | V5 |
| N3 | NULL | V6 |
-----------------------------
I need to group it in a way that,
The first level grouping will be by name.
At the second level,
When the available types are T1,T2 and NULL, group T1 and NULL together and have T2 grouped seperately.
When the available types are T2 and NULL, group NULL with T2.
When NULL is the only available type, just have it as it is.
The expected O/P for the above table is,
----------------------------
| N1 | T1 | V1+V3 |
| N1 | T2 | V2 |
| N2 | T2 | V4+V5 |
| N3 | NULL | V6 |
-----------------------------
How to achieve this in snowflake sql. Or any other server, so that I can find an equivalent in Snowflake.
The following query should work:
SELECT t1.NAME, COALESCE(TYPE, MIN_TYPE), SUM(VALUE)
FROM mytable AS t1
JOIN (
SELECT NAME, MIN(TYPE) AS MIN_TYPE
FROM mytable
GROUP BY NAME
) AS t2 ON t1.NAME = t2.NAME
GROUP BY t1.NAME, COALESCE(TYPE, MIN_TYPE)
The query uses a derived table in order to extract the MIN(TYPE) value per NAME. Using COALESCE we can then convert NULL to either T1 or T2.
Edit:
You can create a pivoted version of the expected result set using the following query:
SELECT NAME,
CASE
WHEN T1SUM IS NULL THEN 0
ELSE COALESCE(T1SUM, 0) + COALESCE(NULLSUM,0)
END AS T1SUM,
CASE
WHEN T1SUM IS NULL AND T2SUM IS NOT NULL
THEN COALESCE(T2SUM, 0) + COALESCE(NULLSUM,0)
ELSE COALESCE(T2SUM, 0)
END AS T2SUM,
CASE
WHEN T1SUM IS NULL AND T2SUM IS NULL THEN COALESCE(NULLSUM,0)
ELSE 0
END AS NULLSUM
FROM (
SELECT NAME,
SUM(CASE WHEN TYPE = 'T1' THEN VALUE END) AS T1SUM,
SUM(CASE WHEN TYPE = 'T2' THEN VALUE END) AS T2SUM,
SUM(CASE WHEN TYPE IS NULL THEN VALUE END) AS NULLSUM
FROM mytable
GROUP BY NAME) AS t
So in Giorgos's answer that totals are given in a pivoted, or single row be case form, not many rows per case, and this can be written simpler:
with this data:
WITH data_table(name, type, value) AS (
SELECT * FROM VALUES
(10, 1, 100 ),
(10, 2, 200 ),
(10, null, 400 ),
(11, 2, 100 ),
(11, null, 200 ),
(12, null, 100 )
)
and this SQL
SELECT name
,SUM(IFF(type=1, value, null)) as t1_val
,SUM(IFF(type=2, value, null)) as t2_val
,SUM(IFF(type is null, value, null)) as tnull_val
,IFF(t1_val is not null, t1_val + zeroifnull(tnull_val), null) as c1_sum
,IFF(t1_val is not null, t2_val, t2_val + zeroifnull(tnull_val)) as c2_sum
,IFF(t1_val is null AND t2_val is null, tnull_val, null) as c3_sum
FROM data_table
GROUP BY 1;
we get:
NAME
T1_VAL
T2_VAL
TNULL_VAL
C1_SUM
C2_SUM
C3_SUM
10
100
200
400
500
200
null
11
null
100
200
null
300
null
12
null
null
100
null
null
100
which shows for the 10 row the null sum binds with 1 sum, for the 11 row the null sum binds with the 2 sum, and in the 12 row we get the null sum by itself.
We can unpivot these values if we wish, but joining to a mini table with 3 rows like so:
SELECT d.name,
p.c2 as type,
case p.c1
WHEN 1 then d.c1_sum
WHEN 2 then d.c2_sum
ELSE d.c3_sum
end as value
FROM (
SELECT name
,SUM(IFF(type=1, value, null)) as t1_val
,SUM(IFF(type=2, value, null)) as t2_val
,SUM(IFF(type is null, value, null)) as tnull_val
,IFF(t1_val is not null, t1_val + zeroifnull(tnull_val), null) as c1_sum
,IFF(t1_val is not null, t2_val, t2_val + zeroifnull(tnull_val)) as c2_sum
,IFF(t1_val is null AND t2_val is null, tnull_val, null) as c3_sum
FROM data_table
GROUP BY 1
) AS d
JOIN (
SELECT column1 as c1, column2 as c2
FROM VALUES (1,'T1'),(2,'T2'),(null,'null')
) AS p
ON ((d.c1_sum is not null AND p.c1 = 1)
OR (d.c2_sum is not null AND p.c1 = 2)
OR (d.c3_sum is not null AND p.c1 is null))
ORDER BY 1,2;
which gives the original requested output:
NAME
TYPE
VALUE
10
T1
500
10
T2
200
11
T2
300
12
null
100

Find missing number from not sequence number

I have a database with 5 columns (A1,A2,A3,A4,A5) which store 5 numbers.
The 5 numbers are "1,2,3,4,5".
A1 A2 A3 A4 A5
-------------------------------
2 4 5 Null Null
I want get the missing number which is "1" and "3".
How do I find the missing number from 5 numbers?
Select Replace(Replace(Replace(Replace(
Replace('12345',(Cast(Coalesce(A5,0) as varchar(1))),''),
(Cast(Coalesce(A4,0) as varchar(1))),''),
(Cast(Coalesce(A3,0) as varchar(1))),''),
(Cast(Coalesce(A2,0) as varchar(1))),''),
(Cast(Coalesce(A1,0) as varchar(1))),'') from Table1
Sql Fiddle Demo
You can do this
WITH sequence AS
(
SELECT 1 n UNION ALL
SELECT n + 1 FROM sequence WHERE n < 5
)
SELECT n
FROM sequence s LEFT JOIN table1 t
ON s.n IN (t.a1, t.a2, t.a3, t.a4, t.a5)
WHERE t.a1 IS NULL
Output:
| N |
|---|
| 1 |
| 3 |
Here is SQLFiddle demo
Depending on the desired output, this might work. This returns the relevant missing number(s) for each row.
SELECT CASE WHEN COALESCE(A1,0)<>1 AND COALESCE(A2,0)<>1 AND COALESCE(A3,0)<>1
AND COALESCE(A4,0)<>1 AND COALESCE(A5,0)<>1 THEN 1 ELSE '' END A
, CASE WHEN COALESCE(A1,0)<>2 AND COALESCE(A2,0)<>2 AND COALESCE(A3,0)<>2
AND COALESCE(A4,0)<>2 AND COALESCE(A5,0)<>2 THEN 2 ELSE '' END B
, CASE WHEN COALESCE(A1,0)<>3 AND COALESCE(A2,0)<>3 AND COALESCE(A3,0)<>3
AND COALESCE(A4,0)<>3 AND COALESCE(A5,0)<>3 THEN 3 ELSE '' END C
, CASE WHEN COALESCE(A1,0)<>4 AND COALESCE(A2,0)<>4 AND COALESCE(A3,0)<>4
AND COALESCE(A4,0)<>4 AND COALESCE(A5,0)<>4 THEN 4 ELSE '' END D
, CASE WHEN COALESCE(A1,0)<>5 AND COALESCE(A2,0)<>5 AND COALESCE(A3,0)<>5
AND COALESCE(A4,0)<>5 AND COALESCE(A5,0)<>5 THEN 5 ELSE '' END E
FROM NumTest
WHERE COALESCE(A1,0)+COALESCE(A2,0)+COALESCE(A3,0)+COALESCE(A4,0)+COALESCE(A5,0)<>15
The results look like:
you'll need a table of integers from 1 to (in this case) 5:
DECLARE #ints table (n int);
INSERT #ints VALUES (1), (2), (3), (4), (5);
second, we get the numbers in the table row into a single comparable set:
SELECT x INTO #all FROM (
SELECT A1 as x FROM myTable WHERE ID = myRow
UNION ALL
SELECT A2 as x FROM myTable WHERE ID = myRow
UNION ALL
SELECT A3 as x FROM myTable WHERE ID = myRow
UNION ALL
SELECT A4 as x FROM myTable WHERE ID = myRow
UNION ALL
SELECT A5 as x FROM myTable WHERE ID = myRow
) y
then you can derive the answer:
SELECT #ints.n
FROM #ints left join #all on #ints.n = #all.x
WHERE #all.x is null
ORDER BY 1

Select and merge rows in a table in SQL Stored procedure

Have a temp table with schema: ID | SeqNo | Name
ID - Not unique
SeqNo - Int (can be 1,2 or 3). Sort of ID+SeqNo as Primary key
Name - Any text
And sample data in the table like this
1 | 1 | RecordA
2 | 1 | RecordB
3 | 1 | RecordC
1 | 2 | RecordD
4 | 1 | RecordE
5 | 1 | RecordF
3 | 1 | RecordG
Need to select from this table and output like
1 | RecordA/RecordD
2 | RecordB
3 | RecordC/RecordG
4 | RecordE
5 | RecordF
Need to do this without cursor.
If SeqNo is limited to 1,2,3:
select id, a.name + coalesce('/'+b.name, '') + coalesce('/'+c.name, '')
from myTable a
left outer join myTable b on a.id=b.id and b.seqno = 2
left outer join myTable c on a.id=c.id and c.seqno = 3
where a.seqno = 1;
If SeqNo is open ended you can deploy a recursive cte:
;with anchor as (
select id, name, seqno
from myTable
where seqno=1)
, recursive as (
select id, name, seqno
from anchor
union all
select t.id, r.name + '/' + t.name, t.seqno
from myTable t
join recursive r on t.id = r.id and r.seqno+1 = t.seqno)
select id, name from recursive;
If you know SeqNo will never be more than 3:
select Id, Names = stuff(
max(case when SeqNo = 1 then '/'+Name else '' end)
+ max(case when SeqNo = 2 then '/'+Name else '' end)
+ max(case when SeqNo = 3 then '/'+Name else '' end)
, 1, 1, '')
from table1
group by Id
Otherwise, something like this is the generic solution to an arbitrary number of items:
select Id, Names = stuff((
select '/'+Name from table1 b
where a.Id = b.Id order by SeqNo
for xml path (''))
, 1, 1, '')
from table1 a
group by Id
Or write a CLR UDA.
Edit: had the wrong alias on the correlated table!
Edit2: another version, based on Remus's recursion example. I couldn't think of any way to select only the last recursion per Id, without aggregation or sorting. Anybody know?
;with
myTable as (
select * from (
values
(1, 1, 'RecordA')
, (2, 1, 'RecordB')
, (3, 1, 'RecordC')
, (1, 2, 'RecordD')
, (4, 1, 'RecordE')
, (5, 1, 'RecordF')
, (3, 2, 'RecordG')
) a (Id, SeqNo, Name)
)
, anchor as (
select id, name = convert(varchar(max),name), seqno
from myTable where seqno=1
)
, recursive as (
select id, name, seqno
from anchor
union all
select t.id, r.name + '/' + t.name, t.seqno
from myTable t
join recursive r on t.id = r.id and r.seqno+1 = t.seqno
)
select id, name = max(name)
from recursive
group by id;
---- without aggregation, we get 7 rows:
--select id, name
--from recursive;
The solution is good. I have a similar issue, but here I am using 2 different tables. ex:
table1
1 | 1 |
2 | 3 |
3 | 1 |
4 | 2 |
5 | 1 |
1 | 2 |
1 | 3 |
4 | 1 |
5 | 2 |
2 | 2 |
4 | 3 |
table2
1 | RecordA
2 | RecordB
3 | RecordC
I want to get the data from two tables and display in the below format.
1 | RecordA,RecordB,RecordC|
2 | RecordB,RecordC|
3 | RecordA |
4 | RecordA,RecordB,RecordC|
5 | RecordA,RecordB |