TSQL - Fill missing records - sql

Code:
CREATE TABLE #Temp
(
[ID1] INT
, [ID2] INT
, [ID3] INT
, [Val] BIT
, PRIMARY KEY CLUSTERED ( [ID1], [ID2], [ID3] )
) ;
INSERT INTO #Temp
SELECT 1
, 10
, 100
, 1
UNION ALL
SELECT 1
, 10
, 101
, 1
UNION ALL
SELECT 1
, 11
, 100
, 1
UNION ALL
SELECT 1
, 11
, 101
, 1
UNION ALL
SELECT 2
, 10
, 100
, 1 ;
CREATE TABLE #Temp_ID3
(
[ID3] INT
) ;
INSERT INTO #Temp_ID3
SELECT 100
UNION ALL
SELECT 101
UNION ALL
SELECT 102 ;
SELECT [ID1]
, [ID2]
, [ID3]
, [Val]
FROM #Temp ;
SELECT [ID3]
FROM #Temp_ID3 ;
DROP TABLE #Temp_ID3 ;
DROP TABLE #Temp ;
Output:
ID1 ID2 ID3 Val
1 10 100 1
1 10 101 1
1 11 100 1
1 11 101 1
2 10 100 1
Goal:
To find missing ID3 records - from #Temp_ID3 table - in #Temp table (ID1/ID2/ID3 combos for existing ID1/ID2 combos). For those missing records, Val should be False. Desired Output will make sense.
I can get distinct ID1/ID2 from #Temp, cross join with ID3 and create a dataset. Left join #Temp to this new dataset, and insert records that way, but looking for a more "simpler" way.
Here's the "complicated" way.
;WITH CTE AS
(
SELECT DISTINCT
[TT].[ID1]
, [TT].[ID2]
, [T3].[ID3]
FROM #Temp_ID3 AS [T3]
CROSS JOIN (
SELECT DISTINCT
[ID1]
, [ID2]
FROM #Temp
) AS [TT]
)
SELECT [C].[ID1]
, [C].[ID2]
, [C].[ID3]
, COALESCE ( [TT].[Val], 0 ) AS [Val]
FROM CTE AS [C]
LEFT JOIN #Temp AS [TT]
ON [C].[ID1] = [TT].[ID1]
AND [C].[ID2] = [TT].[ID2]
AND [C].[ID3] = [TT].[ID3]
ORDER BY [C].[ID1]
, [C].[ID2]
, [C].[ID3] ;
Desired Output:
ID1 ID2 ID3 Val
1 10 100 1
1 10 101 1
1 10 102 0 -- Filling Missing ID3 Record w/ default Val = 0
1 11 100 1
1 11 101 1
1 11 102 0 -- Filling Missing ID3 Record w/ default Val = 0
2 10 100 1
2 10 101 0 -- Filling Missing ID3 Record w/ default Val = 0
2 10 102 0 -- Filling Missing ID3 Record w/ default Val = 0

Use a cross join to generate the rows and then left join to fill in the values:
select t1.id1, t2.id2, t3.id3, coalesce(t.val, 0) as val
from (select distinct id1 from #temp) t1 cross join
(select distinct id2 from #temp) t2 cross join
(select distinct id3 from #temp) t3 left join
#temp t
on t.id1 = t1.id1 and t.id2 = t2.id2 and t.id3 = t3.id3;
You don't have to use subqueries to generate the ids, if one of the tables actually has them.
EDIT:
For the revised version in the comment, you can do something very similar:
select t12.id1, t12.id2, t3.id3, coalesce(t.val, 0) as val
from (select distinct id1, id2 from #temp) t12 cross join
#temp_id3 t3 left join
#temp t
on t.id1 = t12.id1 and t.id2 = t12.id2 and t.id3 = t3.id3;

As per example you want only cross join with self join
select distinct t.id1, t.id2, t3.id3, coalesce(t1.val, 0) as Val
from #Temp t cross join #Temp_ID3 t3
left join #temp t1
on t1.id1 = t.id1 and t1.id2 = t.id2 and t1.id3 = t3.id3;

Related

SQL Server Join two tables without duplicates in primary table

I have two tables that I want to join together such that all foreign rows are returned and the primary table's rows are not duplicated. For example:
T1
pk code value
1 One 100
2 Two 200
T2
fk value
1 10
1 15
1 30
2 25
I want all records of T2 without the T1 records duplicating, so the result set I want to look like this:
T2.fk T1.code T1.value T2.value
1 One 100 10
1 NULL NULL 15
1 NULL NULL 30
2 Two 200 25
Is there a SQL Server join method for achieving that?
You need to rank your rows in T2 and do a left join including rank as a join condition:
with cte as(select *, row_number() over(partition by fk order by value) as rn from T2)
select c.fk, t.code, t.value, c.value
from cte c
left join T1 t on c.fk = t.pk and c.rn = 1
Here is the full example:
DECLARE #t1 TABLE
(
pk INT ,
code VARCHAR(MAX) ,
value INT
)
INSERT INTO #t1
VALUES ( 1, 'One', 100 ),
( 2, 'Two', 200 )
DECLARE #t2 TABLE ( fk INT, value INT )
INSERT INTO #t2
VALUES ( 1, 10 ),
( 1, 15 ),
( 1, 30 ),
( 2, 25 );
WITH cte
AS ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY fk ORDER BY value ) AS rn
FROM #t2
)
SELECT c.fk ,
t.code ,
t.value ,
c.value
FROM cte c
LEFT JOIN #t1 t ON c.fk = t.pk
AND c.rn = 1
Try this:
select T2.fk,
CASE
WHEN (SELECT COUNT(*) FROM t2 tother WHERE tother.fk = t2.fk
AND tother.value > t2.value) > 0 THEN NULL ELSE t1.code
END,
CASE
WHEN (SELECT COUNT(*) FROM t2 tother WHERE tother.fk = t2.fk
AND tother.value > t2.value) > 0 THEN NULL ELSE t1.value
END,T2.value
from t2
join t1
on t2.fk = t1.pk
DECLARE #t1 TABLE (pk int,code varchar(10),value int)
DECLARE #t2 TABLE (fk int,value int)
INSERT INTO #t1
SELECT 1,'one',100
UNION
SELECT 2,'two',200
INSERT INTO #t2
SELECT 1,10
UNION SELECT 1,15 UNION SELECT 1,30 UNION SELECT 2,25
;WITH cte AS(
SELECT t2.fk,t2.value t2val,t1.pk,t1.code,t1.value t1val,ROW_NUMBER() OVER(PARTITION BY fk ORDER BY fk) rno FROM #t2 t2 LEFT JOIN #t1 t1 on t2.fk=t1.pk)
SELECT fk,code=(CASE WHEN rno=1 THEN code ELSE null END),t1val=(CASE WHEN rno=1 THEN t1val ELSE NULL END),t2val FROM cte
output
fk code t1val t2val
1 one 100 10
1 NULL NULL 15
1 NULL NULL 30
2 two 200 25

TSQL Distinct Counts

I have a table that looks like this:
ID SuppressionTypeID PersonID
------------------------------
1 1 123
2 1 456
3 2 456
I want to get a rolling count (distinct people) rather than a normal group by count.
e.g. not this:
SuppressionTypeID Count
---------------------------
1 2
2 1
This:
SuppressionTypeID RecordsLost
----------------------------------
1 2
2 0
The latter being zero as we lost person 456 on suppresiontypeid 1.
Thanks in advance.
You may need to use a temporary table or a table variable as shown below
DECLARE #t TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
)
INSERT INTO #t
SELECT 1
,1
,123
UNION ALL
SELECT 2
,1
,456
UNION ALL
SELECT 3
,2
,456
DECLARE #t1 TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
,firstid INT
)
INSERT INTO #t1
SELECT *
,NULL
FROM #t
UPDATE t1
SET t1.firstid = t2.firstid
FROM #t1 AS t1
INNER JOIN (
SELECT personid
,min(SuppressionTypeID) AS firstid
FROM #t1
GROUP BY personid
) AS t2 ON t1.PersonID = t2.PersonID
SELECT coalesce(t2.firstid, t1.SuppressionTypeID) AS SuppressionTypeID
,count(DISTINCT t2.personid) AS count
FROM #t1 AS t1
LEFT JOIN #t1 AS t2 ON t1.personid = t2.personid
AND t1.SuppressionTypeID = t2.firstid
GROUP BY coalesce(t2.firstid, t1.SuppressionTypeID)
The result is
SuppressionTypeID count
----------------- -----------
1 2
2 0
You can try;
with tmp_tbl as (
select
x.SuppressionTypeID, count(x.PersonID) as RecordsLost
from (
select
min(SuppressionTypeID) as SuppressionTypeID,
PersonID
from tbl
group by PersonID
) as x
group by x.PersonID
order by x.SuppressionTypeID
)
select
distict t.SuppressionTypeID, coalesce(tmp.RecordsLost, 0) as RecordsLost
from tbl t
left join tmp_tbl tmp on tmp.SuppressionTypeID = t.SuppressionTypeID

Full Outer Join on Incomplete Data (by id variable)

I have two tables (see example data below). I need to keep all of the ID values in table 1 and merge table 1 with table 2 by sequence. The tricky part is that I also have to retain the field value1 from table 1 and value2 from table 2.
table 1 :
ID sequence value1
-------------------------
p1 1 5
p1 2 10
p2 1 15
p2 2 20
table 2 :
sequence value2
-------------------------
1 10
2 20
3 30
4 40
I need the resulting table to appear like so:
ID sequence value1 value2
----------------------------------
p1 1 5 10
p1 2 10 20
p1 3 - 30
p1 4 - 40
p2 1 15 10
p2 2 20 20
p2 3 - 30
p2 4 - 40
I have tried the following sql code, but it doesn't merge the missing values from from value1 field in table 1 and merge it with the values2 field from table 2
select t1.ID, t2.sequence, t1.value1, t2.value2 from
t2 full outer join t1 on t2.sequence=t1.sequence
Any assistance you can provide is greatly appreciated.
You can try something like this:
select coalesce(t1.[id], t3.[id]),
, t2.[sequence]
, t1.[value]
, t2.[value]
from [tbl2] t2
left join [tbl1] t1 on t1.[sequence] = t2.[sequence]
left join (select distinct [id] from [tbl1]) t3 on t1.[id] is null
SQLFiddle
One way with CROSS JOIN and OUTER APPLY:
DECLARE #t1 TABLE(ID CHAR(2), S INT, V1 INT)
DECLARE #t2 TABLE(S INT, V2 INT)
INSERT INTO #t1 VALUES
('p1', 1, 5),
('p1', 2, 10),
('p2', 1, 15),
('p2', 2, 20)
INSERT INTO #t2 VALUES
(1, 10),
(2, 20),
(3, 30),
(4, 40)
SELECT c.ID, t2.S, ca.V1, t2.V2 FROM #t2 t2
CROSS JOIN (SELECT DISTINCT ID FROM #t1) c
OUTER APPLY(SELECT * FROM #t1 t1 WHERE c.ID = t1.ID AND t1.S = t2.S) ca
ORDER BY c.ID, t2.S
Output:
ID S V1 V2
p1 1 5 10
p1 2 10 20
p1 3 NULL 30
p1 4 NULL 40
p2 1 15 10
p2 2 20 20
p2 3 NULL 30
p2 4 NULL 40
Given this schema:
create table #table_1
(
ID varchar(8) not null ,
sequence int not null ,
value int not null ,
primary key clustered ( ID , sequence ) ,
unique nonclustered ( sequence , ID ) ,
)
create table #table_2
(
sequence int not null ,
value int not null ,
primary key clustered ( sequence ) ,
)
go
insert #table_1 values ( 'p1' , 1 , 5 )
insert #table_1 values ( 'p1' , 2 , 5 )
insert #table_1 values ( 'p2' , 1 , 15 )
insert #table_1 values ( 'p2' , 2 , 20 )
insert #table_2 values ( 1 , 10 )
insert #table_2 values ( 2 , 20 )
insert #table_2 values ( 3 , 30 )
insert #table_2 values ( 4 , 40 )
go
This should get you what you want:
select ID = map.ID ,
sequence = map.sequence ,
value1 = t1.value ,
value2 = t2.value
from ( select distinct
t1.ID ,
t2.sequence
from #table_1 t1
cross join #table_2 t2
) map
left join #table_1 t1 on t1.ID = map.ID
and t1.sequence = map.sequence
join #table_2 t2 on t2.sequence = map.sequence
order by map.ID ,
map.sequence
go
Producing:
ID sequence value1 value2
== ======== ====== ======
p1 1 5 10
p1 2 5 20
p1 3 - 30
p1 4 - 40
p2 1 15 10
p2 2 20 20
p2 3 - 30
p2 4 - 40

SQL Query to retrieve values that belong exclusively to a group

Suppose I have one table with the following values and columns:
ID1 | ID2
1 | 1
2 | 1
3 | 1
4 | 1
4 | 2
3 | 3
4 | 3
4 | 4
4 | 4
I'd like to retrieve the ID2 values that belong exclusively to records where ID1 = 4. So for the above example, I'd like to see the following response:
ID1 | ID2
4 | 2
4 | 4
Try working it out contrapositively like this.
Finding all elements where ID1 is only 4 is the same as finding all elements that don't not have ID1 = 4.
CREATE TABLE #temp (ID1 NVARCHAR(10), ID2 NVARCHAR(10))
INSERT INTO #temp(ID1,ID2) VALUES (N'1',N'1')
INSERT INTO #temp(ID1,ID2) VALUES (N'2',N'1')
INSERT INTO #temp(ID1,ID2) VALUES (N'3',N'1')
INSERT INTO #temp(ID1,ID2) VALUES (N'4',N'1')
INSERT INTO #temp(ID1,ID2) VALUES (N'4',N'2')
INSERT INTO #temp(ID1,ID2) VALUES (N'3',N'3')
INSERT INTO #temp(ID1,ID2) VALUES (N'4',N'3')
INSERT INTO #temp(ID1,ID2) VALUES (N'4',N'4')
INSERT INTO #temp(ID1,ID2) VALUES (N'4',N'4')
SELECT * FROM #temp AS t
SELECT DISTINCT * FROM #temp AS t
WHERE id2 NOT IN (SELECT ID2 FROM #temp AS t WHERE ID1 <> 4)
These queries will probably be useful to you for the more general cases (and by general I mean when ID1 is something other than 4):
select distinct t1.id1, t1.id2
from T as t1
where not exists (
select 1
from T as t2
where t2.ID1 <> t1.ID1 and t2.ID2 = t1.ID2
)
select t1.id1, count(distinct t1.id2)
from T as t1
where not exists (
select 1
from T as t2
where t2.ID1 <> t1.ID1 and t2.ID2 = t1.ID2
)
group by t1.id1
You can also do this:
select 4,id2 from
(select distinct ID1 , ID2 from t) t1
group by id2
having count(*)=1
There are a few ways to do this:
SELECT t1.id1, t1.id2
FROM mytable t1
WHERE t1.id1 = 4
AND NOT EXISTS ( SELECT 1 FROM mytable t2
WHERE t2.id2 = t1.id2
AND t2.id1 != 4 );
or:
SELECT id1, id2 FROM (
SELECT id1, id2
FROM mytable
GROUP BY id1, id2
HAVING COUNT(*) = 1
) WHERE id1 = 4;
or:
SELECT id1, id2 FROM (
SELECT id1, id2, COUNT(*) OVER ( PARTITION BY id2 ) AS cnt
FROM mytable
) WHERE id1 = 4
AND cnt = 1;

Filter unique records from a database while removing double not-null values

This is kind of hard to explain in words but here is an example of what I am trying to do in SQL. I have a query which returns the following records:
ID Z
--- ---
1 A
1 <null>
2 B
2 E
3 D
4 <null>
4 F
5 <null>
I need to filter this query so that each unique record (based on ID) appears only once in the output and if there are multiple records for the same ID, the output should contain the record with the value of Z column being non-null. If there is only a single record for a given ID and it has value of null for column Z the output still should return that record. So the output from the above query should look like this:
ID Z
--- ---
1 A
2 B
2 E
3 D
4 F
5 <null>
How would you do this in SQL?
You can use GROUP BY for that:
SELECT
ID, MAX(Z) -- Could be MIN(Z)
FROM MyTable
GROUP BY ID
Aggregate functions ignore NULLs, returning them only when all values on the group are NULL.
If you need to return both 2-B and 2-E rows:
SELECT *
FROM YourTable t1
WHERE Z IS NOT NULL
OR NOT EXISTS
(SELECT * FROM YourTable t2
WHERE T2.ID = T1.id AND T2.z IS NOT NULL)
SELECT ID
,Z
FROM YourTable
WHERE Z IS NOT NULL
DECLARE #T TABLE ( ID INT, Z CHAR(1) )
INSERT INTO #T
( ID, Z )
VALUES ( 1, 'A' ),
( 1, NULL )
, ( 2, 'B' ) ,
( 2, 'E' ),
( 3, 'D' ) ,
( 4, NULL ),
( 4, 'F' ),
( 5, NULL )
SELECT *
FROM #T
; WITH c AS (SELECT ID, r=COUNT(*) FROM #T GROUP BY ID)
SELECT t.ID, Z
FROM #T t JOIN c ON t.ID = c.ID
WHERE c.r =1
UNION ALL
SELECT t.ID, Z
FROM #T t JOIN c ON t.ID = c.ID
WHERE c.r >=2
AND z IS NOT NULL
This example assumes you want two rows returned for ID = 2.
with tmp (id, cnt_val) as
(select id,
sum(case when z is not null then 1 else 0 end)
from t
group by id)
select t.id, t.z
from t
inner join tmp on t.id = tmp.id
where tmp.cnt_val > 0 and t.z is not null
or tmp.cnt_val = 0 and t.z is null
WITH CTE
AS (
SELECT id
,z
,ROW_NUMBER() OVER (
PARTITION BY id ORDER BY coalesce(z, '') DESC
) rn
FROM #T
)
SELECT id
,z
FROM CTE
WHERE rn = 1