SQL count occurrences of values grouped by external tables references - sql

What is the best approach in terms of performance and maintainability to count the number of occurrences of the same value in a table, grouping the results with the same reference that groups the entries of the table?
Let's say I have three tables (concepts have been shrinked in order to represent a scenario that is similar to the one I'm working on):
|----------| |----------------| |-----------------------------------|
| MEAL | | RECIPE | | INGREDIENT_ENTRY |
|----------| |----------------| |-----------------------------------|
| ID | ... | | ID | ID_m | ...| | ID | ID_r | amount and description|
|----------| |----------------| |-----------------------------------|
| 1 | ... | | 1 | 1 | ...| | 1 | 1 | '15gr of yeast' |
| 2 | ... | | 2 | 2 | ...| | 2 | 4 | '2 eggs' |
| 3 | ... | | 3 | 3 | ...| | 3 | 1 | '300cl of water' |
| 4 | ... | | 4 | 4 | ...| | 4 | 2 | '300cl of beer' |
|----------| | 5 | 1 | ...| | 5 | 3 | '250cl of milk' |
| 6 | 4 | ...| | 6 | 5 | '100gr of biscuits' |
| 7 | 5 | ...| | 7 | 2 | '15gr of yeast' |
| 8 | 6 | ...| | 8 | 1 | '500gr of flour' |
|----------------| | 9 | 2 | '500gr of flour' |
| 10 | 2 | '10gr of salt' |
| 11 | 4 | '15gr of yeast' |
|-----------------------------------|
The same MEAL can be cooked with a different RECIPE, and each RECIPE is made of different INGREDIENT_ENTRYs, organized in the same RECIPE by sharing the same ID_r value.
INGREDIENT_ENTRY.[amount and description] is a column of type VARCHAR(MAX), this is the value that must be compared.
In the example, making the query with (MEAL 1,RECIPE 1):
It has 3 ingredients (1,3,8), and shares:
Two ingredients with RECIPE 2 (7,9) -> and so can be found in MEAL 2
One ingredient with RECIPE 4 (11) -> and so can be found in MEAL 3
Result should look something like:
|------| |--------| |-------|
| MEAL | | RECIPE | | COUNT |
|------| |--------| |-------|
| 2 | | 2 | | 2 |
| 4 | | 4 | | 1 |
|------| |--------| |-------|
I'm experimenting with views to reduce SQL complexity, but I cannot make it with a single SQL statement and I would like to avoid going back and forth to code (C#) and perform multiple queries (for example query for every ingredient, and reconcile results with HashMaps or similar).
Please, note that I cannot modify the DB structure.

You can find common ingredients using EXISTS. In the below I have simply used a Common table expression so that I don't have to write out the joins more than once to get back to a meal ID:
DECLARE #SelectedMealID INT = 1;
WITH LinkedData AS
(
SELECT MealID = r.ID_m,
RecipeID = r.ID,
Ingredient = i.[amount and description]
FROM RECIPE AS r
INNER JOIN INGREDIENT_ENTRY AS i
ON i.ID_r = r.ID
)
SELECT a.MealID,
a.RecipeID,
CommonIngedients = COUNT(*)
FROM LinkedData AS a
WHERE a.MealID != #SelectedMealID
AND EXISTS
( SELECT 1
FROM LinkedData AS b
WHERE b.Ingredient = a.Ingredient
AND b.MealID = #SelectedMealID
)
GROUP BY a.MealID, a.RecipeID;
I have tested this with the below sample:
-- GENERATE TABLES AND DATA
DECLARE #Meal TABLE (ID INT);
INSERT #Meal (ID) VALUES (1), (2), (3), (4);
DECLARE #Recipe TABLE (ID INT, ID_m INT);
INSERT #Recipe (ID, ID_m)
VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 1), (6, 4), (7, 5), (8, 6);
DECLARE #Ingredient TABLE (ID INT, ID_r INT, AmountAndDescription VARCHAR(MAX));
INSERT #Ingredient (ID, ID_R, AmountAndDescription)
VALUES
(1, 1, '15gr of yeast'), (2, 4, '2 eggs'),
(3, 1, '300cl of water'), (4, 2, '300cl of beer'),
(5, 3, '250cl of milk'), (6, 5, '100gr of biscuits'),
(7, 2, '15gr of yeast'), (8, 1, '500gr of flour'),
(9, 2, '500gr of flour'), (10, 2, '10gr of salt'),
(11, 4, '15gr of yeast');
-- TEST QUERY
DECLARE #SelectedMealID INT = 1;
WITH LinkedData AS
(
SELECT MealID = r.ID_m,
RecipeID = r.ID,
Ingredient = i.AmountAndDescription
FROM #Recipe AS r
INNER JOIN #Ingredient AS i
ON i.ID_r = r.ID
)
SELECT a.MealID,
a.RecipeID,
CommonIngedients = COUNT(*)
FROM LinkedData AS a
WHERE a.MealID != #SelectedMealID
AND EXISTS
( SELECT 1
FROM LinkedData AS b
WHERE b.Ingredient = a.Ingredient
AND b.MealID = #SelectedMealID
)
GROUP BY a.MealID, a.RecipeID;
OUTPUT
MealID RecipeID CommonIngedients
------------------------------------------
2 2 2
4 4 1
N.B. The expected output in the question differs slighly but I think the question may contain a typo (states Recipe 4 relates to meal 3, but this doesn't appear to be the case in the sample data)

Related

UPDATE based on multiple "WHERE IN" conditions

Let's say I have a table I want to update based on multiple conditions. Each of these conditions is an equal-sized array, and the only valid cases are the ones which match the same index in the arrays.
That is, if we use the following SQL clause
UPDATE Foo
SET bar = 1
WHERE a IN ( 1, 2, 3, 4, 5)
AND b IN ( 6, 7, 8, 9, 0)
AND c IN ('a', 'b', 'c', 'd', 'e')
bar will be set to 1 for any row which has, for example, a = 1, b = 8, c = 'e'.
That is not what I want.
I need a clause where only a = 1, b = 6, c = 'a' or a = 2, b = 7, c = 'b' (etc.) works.
Obviously I could rewrite the clause as
UPDATE Foo
SET bar = 1
WHERE (a = 1 AND b = 6 AND c = 'a')
OR (a = 2 AND b = 7 AND c = 'b')
OR ...
This would work, but it's hardly extensible. Given the values of the conditions are variable and obtained programmatically, it'd be far better if I could set each array in one place instead of having to build a string-building loop to get that WHERE call right.
So, is there a better, more elegant way to have the same behavior as this last block?
Use the Table Values Constructor :
UPDATE f
SET bar = 1
WHERE EXISTS (
SELECT * FROM (VALUES (1,6,'a'),(2,7,'b'),(3,8,'c')) AS Trios(a,b,c)
WHERE Trios.a = f.a AND Trios.b = f.b AND Trios.c = f.c
)
You can use values() and join:
UPDATE f
SET bar = 1
FROM Foo f JOIN
(VALUES (1, 6, 'a'),
(2, 7, 'b'),
. . .
) v(a, b, c)
ON f.a = v.a AND f.b = v.b AND f.c = v.c;
Try this might work
DECLARE #Temp AS Table ( a int, b int, c varchar(50))
INSERT INTO #Temp(a,b,c)
VALUES(1, 6, 'a'),
(2, 7, 'b'),
(3, 8, 'c'),
(4, 9, 'd'),
(5, 0, 'e')
UPDATE F
SET bar = 1
FROM FOO F INNER JOIN #Temp T
ON F.a = T.a AND F.b = T.b AND F.c = T.c
When you read the data don't save it as separated values but as a single string and then use the following:
update foo
set bar = 1
where concat(a,b,c) in ('16a','27b','38c','49d','50e')
it may not be the most elegant way but it is very practical and simple.
I could be entirely off the mark here--I'm not sure if you're passing in a set of values or what-have-you--but my first thought is using a series of CTEs.
I'm making considerable assumptions about your data, but here's an example you can run in SSMS based on my thoughts of your question.
-- Create #Data and insert some, er... data ---
DECLARE #Data TABLE ( id INT IDENTITY(100,1) PRIMARY KEY, a VARCHAR(1), b VARCHAR(1), c VARCHAR(1) );
INSERT INTO #Data ( a ) VALUES ('1'), ('2'), ('3'), ('4'), ('5');
INSERT INTO #Data ( b ) VALUES ('6'), ('7'), ('8'), ('9'), ('0');
INSERT INTO #Data ( c ) VALUES ('a'), ('b'), ('c'), ('d'), ('e');
So let's assume this is your data. I've kept it simple to make it easier to understand.
+-----+---+---+---+
| id | a | b | c |
+-----+---+---+---+
| 100 | 1 | | |
| 101 | 2 | | |
| 102 | 3 | | |
| 103 | 4 | | |
| 104 | 5 | | |
| 105 | | 6 | |
| 106 | | 7 | |
| 107 | | 8 | |
| 108 | | 9 | |
| 109 | | 0 | |
| 110 | | | a |
| 111 | | | b |
| 112 | | | c |
| 113 | | | d |
| 114 | | | e |
+-----+---+---+---+
Query the data with aligned "array" indexes:
;WITH CTE_A AS (
SELECT
id,
ROW_NUMBER() OVER ( ORDER BY id ) AS a_row_id,
a
FROM #Data WHERE a IS NOT NULL
)
, CTE_B AS (
SELECT
id,
ROW_NUMBER() OVER ( ORDER BY id ) AS b_row_id,
b
FROM #Data WHERE b IS NOT NULL
)
, CTE_C AS (
SELECT
id,
ROW_NUMBER() OVER ( ORDER BY id ) AS c_row_id,
c
FROM #Data WHERE c IS NOT NULL
)
SELECT
CTE_A.id, CTE_A.a_row_id, CTE_A.a
, CTE_B.id, CTE_B.b_row_id, CTE_B.b
, CTE_C.id, CTE_C.c_row_id, CTE_C.c
FROM CTE_A
JOIN CTE_B ON CTE_A.a_row_id = CTE_B.b_row_id
JOIN CTE_C ON CTE_A.a_row_id = CTE_C.c_row_id;
Which returns:
+-----+----------+---+-----+----------+---+-----+----------+---+
| id | a_row_id | a | id | b_row_id | b | id | c_row_id | c |
+-----+----------+---+-----+----------+---+-----+----------+---+
| 100 | 1 | 1 | 105 | 1 | 6 | 110 | 1 | a |
| 101 | 2 | 2 | 106 | 2 | 7 | 111 | 2 | b |
| 102 | 3 | 3 | 107 | 3 | 8 | 112 | 3 | c |
| 103 | 4 | 4 | 108 | 4 | 9 | 113 | 4 | d |
| 104 | 5 | 5 | 109 | 5 | 0 | 114 | 5 | e |
+-----+----------+---+-----+----------+---+-----+----------+---+
Again, assumptions made on your data (in particular an id exists that can be sorted), but this basically pivots it by linking the a, b and c values on their relative "index" (ROW_NUMBER). By using ROW_NUMBER in this way, we can create a makeshift array index value ( a_row_id, b_row_id, c_row_id ) that can be used to join the resulting values.
This example can easily be changed to an UPDATE statement.
Does this address your question?

SQL Eliminate Duplicates whilst merging additional table

i have two tables, ADDRESSES and an additional table CONTACTS. CONTACTS have a SUPERID which is the ID of the ADDRESS they belong to.
I want to identify duplicates (same Name, Firstname and Birthday) in the ADDRESSES Table and merge the contacts of these duplicates onto the latest Adress (latest DATECREATE or highest ID of the Adress).
Afterwards the other duplicates shall be deleted.
My approach for merging the contacts does not work though. Deleting duplicates works.
This is my approach. Would be grateful for support what is wrong here.
Thank you!
UPDATE dbo.CONTACTS
SET SUPERID = ADDRESSES.ID FROM dbo.ADDRESSES
inner join CONTACTS on ADDRESSES.ID = CONTACTS.SUPERID
WHERE ADDRESSES.id in (
SELECT id FROM dbo.ADDRESSES
WHERE EXISTS(
SELECT NULL FROM ADDRESSES AS tmpcomment
WHERE dbo.ADDRESSES.FIRSTNAME0 = tmpcomment.FIRSTNAME0
AND dbo.ADDRESSES.LASTNAME0 = tmpcomment.LASTNAME0
and dbo.ADDRESSES.BIRTHDAY1 = tmpcomment.BIRTHDAY1
HAVING dbo.ADDRESSES.id > MIN(tmpcomment.id)
))
DELETE FROM ADDRESSES
WHERE id in (
SELECT id FROM dbo.ADDRESSES
WHERE EXISTS(
SELECT NULL FROM ADDRESSES AS tmpcomment
WHERE dbo.ADDRESSES.FIRSTNAME0 = tmpcomment.FIRSTNAME0
AND dbo.ADDRESSES.LASTNAME0 = tmpcomment.LASTNAME0
and dbo.ADDRESSES.BIRTHDAY1 = tmpcomment.BIRTHDAY1
HAVING dbo.ADDRESSES.id > MIN(tmpcomment.id)
)
)
Here is a sample for understanding the issue.
ADDRESSES
| ID | DATECREATE | LASTNAME0 | FIRSTNAME0 | BIRTHDAY1 |
|:-----------|------------:|:------------:|------------:|:------------:|
| 1 | 19.07.2011 | Arthur | James | 05.05.1980 |
| 2 | 23.08.2012 | Arthur | James | 05.05.1980 |
| 3 | 11.12.2015 | Arthur | James | 05.05.1980 |
| 4 | 22.10.2016 | Arthur | James | 05.05.1980 |
| 6 | 20.12.2014 | Doyle | Peter | 01.01.1950 |
| 7 | 09.01.2016 | Doyle | Peter | 01.01.1950 |
|:-----------|------------:|:------------:|------------:|:------------:|
CONTACTS
| ID | SUPERID |
| 1 | 1 |
| 2 | 1 |
| 3 | 2 |
| 4 | 2 |
| 5 | 3 |
| 6 | 4 |
| 7 | 4 |
| 8 | 6 |
| 9 | 6 |
| 10 | 6 |
| 11 | 7 |
The result shall be like this
ADDRESSES
| ID | DATECREATE | LASTNAME0 | FIRSTNAME0 | BIRTHDAY1 |
|:-----------|------------:|:------------:|------------:|:------------:|
| 4 | 22.10.2016 | Arthur | James | 05.05.1980 |
| 7 | 09.01.2016 | Doyle | Peter | 01.01.1950 |
CONTACTS
| ID | SUPERID |
| 1 | 4 |
| 2 | 4 |
| 3 | 4 |
| 4 | 4 |
| 5 | 4 |
| 6 | 4 |
| 7 | 4 |
| 8 | 7 |
| 9 | 7 |
| 10 | 7 |
| 11 | 7 |
My approach would use a temporary table:
/*
CREATE TABLE addresses
([ID] int, [DATECREATE] varchar(10), [LASTNAME0] varchar(6), [FIRSTNAME0] varchar(5), [BIRTHDAY1] datetime);
INSERT INTO addresses
([ID], [DATECREATE], [LASTNAME0], [FIRSTNAME0], [BIRTHDAY1])
VALUES
(1, '19.07.2011', 'Arthur', 'James', '1980-05-05 00:00:00'),
(2, '23.08.2012', 'Arthur', 'James', '1980-05-05 00:00:00'),
(3, '11.12.2015', 'Arthur', 'James', '1980-05-05 00:00:00'),
(4, '22.10.2016', 'Arthur', 'James', '1980-05-05 00:00:00'),
(6, '20.12.2014', 'Doyle', 'Peter', '1950-01-01 00:00:00'),
(7, '09.01.2016', 'Doyle', 'Peter', '1950-01-01 00:00:00');
CREATE TABLE contacts
([ID] int, [SUPERID] int);
INSERT INTO contacts
([ID], [SUPERID])
VALUES
(1, 1),
(2, 1),
(3, 2),
(4, 2),
(5, 3),
(6, 4),
(7, 4),
(8, 6),
(9, 6),
(10, 6),
(11, 7);
*/
DROP TABLE IF EXISTS #t; --sqls2016+ only, google for an older method if yours is sub 2016
SELECT id as oldid, MAX(id) OVER(PARTITION BY lastname0, firstname0, birthday1) as newid INTO #t
FROM
addresses;
/*now #t contains data like
1, 4
2, 4
3, 4
4, 4
6, 7
7, 7*/
--remove the ones we don't need to change
DELETE FROM #t WHERE oldid = newid;
BEGIN TRANSACTION;
SELECT * FROM addresses;
SELECT * FROM contacts;
--now #t is the list of contact changes we need to make, so make those changes
UPDATE contacts
SET contacts.superid = #t.newid
FROM
contacts INNER JOIN #t ON contacts.superid = #t.oldid;
--now scrub the old addresses with no contact records. This catches all such records, not just those in #t
DELETE FROM addresses WHERE id NOT IN (SELECT DISTINCT superid FROM contacts);
--alternative to just clean up the records we affected in this operation
DELETE FROM addresses WHERE id IN (SELECT oldid FROM #t);
SELECT * FROM addresses;
SELECT * FROM contacts;
ROLLBACK TRANSACTION;
Please note, i have tested this and it produces the results you want but I advocate caution copying an update/delete query off the internet and running. I've inserted a transaction that selects the data before and after and rolls back the transaction so nothing gets wrecked. Run it on a test db first though!

Select MAX date using data from several columns SQL

I know this is a much asked question and I've had a look through whats already available but I believe my case is slightly unique (and if it's not please point me in the right direction).
I am trying to find the latest occurrence of a row associated to a user a currently across two tables and several columns.
table: statusUpdate
+-------+-----------+-----------+-------------------+
| id | name | status | date_change |
+-------+-----------+-----------+-------------------+
| 1 | Matt | 0 | 01-01-2001 |
| 2 | Jeff | 1 | 01-01-2001 |
| 3 | Jeff | 2 | 01-01-2002 |
| 4 | Bill | 2 | 01-01-2001 |
| 5 | Bill | 3 | 01-01-2004 |
+-------+-----------+-----------+-------------------+
table: relationship
+-------+-----------+--------------+
| id | userID |stautsUpdateID|
+-------+-----------+--------------+
| 1 | 22 | 1 |
| 2 | 33 | 2 |
| 3 | 33 | 3 |
| 4 | 44 | 4 |
| 5 | 44 | 5 |
+-------+-----------+--------------+
There is a third table which links userID to its own table but these sample tables should be good enough to get my question over.
I am looking to get the latest status change by date. The problem currently is that it returns all instances of a status change.
Current results:
+-------+---------+-----------+-------------------+
|userID |statusID | status | date_change |
+-------+---------+-----------+-------------------+
| 33 | 2 | 1 | 01-01-2001 |
| 33 | 3 | 2 | 01-01-2002 |
| 44 | 4 | 2 | 01-01-2001 |
| 44 | 5 | 3 | 01-01-2004 |
+-------+---------+-----------+-------------------+
Expected results:
+-------+-----------+-----------+-------------------+
|userID |statusID | status | date_change |
+-------+-----------+-----------+-------------------+
| 33 | 3 | 2 | 01-01-2002 |
| 44 | 5 | 3 | 01-01-2004 |
+-------+-----------+-----------+-------------------+
I hope this all makes sense, please ask for more information otherwise.
Just to reiterate I just want to return the latest instance of a users status change by date.
Sample code of one of my attempts:
select
st.ID, st.status, st.date_change, r.userID
from statusUpdate st
inner join Relationship r on st.ID = r.statusUpdateID
inner join (select ID, max(date_change) as recent from statusUpdate
group by ID) as y on r.stausUpdateID = y.ID and st.date_change =
y.recent
Hope someone can point me in the right direction.
use row_number() to get the last row by user
select *
from
(
select st.ID, st.status, st.date_change, r.userID,
rn = row_number() over (partition by r.userID order by st.date_change desc)
from statusUpdate st
inner join Relationship r on st.ID = r.statusUpdateID
) as d
where rn = 1
I ADDED MAX condition to your answer
CREATE TABLE #Table1
([id] int, [name] varchar(4), [status] int, [date_change] datetime)
;
INSERT INTO #Table1
([id], [name], [status], [date_change])
VALUES
(1, 'Matt', 0, '2001-01-01 00:00:00'),
(2, 'Jeff', 1, '2001-01-01 00:00:00'),
(3, 'Jeff', 2, '2002-01-01 00:00:00'),
(4, 'Bill', 2, '2001-01-01 00:00:00'),
(5, 'Bill', 3, '2004-01-01 00:00:00')
;
CREATE TABLE #Table2
([id] int, [userID] int, [stautsUpdateID] int)
;
INSERT INTO #Table2
([id], [userID], [stautsUpdateID])
VALUES
(1, 22, 1),
(2, 33, 2),
(3, 33, 3),
(4, 44, 4),
(5, 44, 5)
select
max(st.ID) id , max(st.status) status , max(st.date_change) date_change, r.userID
from #Table1 st
inner join #Table2 r on st.ID = r.stautsUpdateID
inner join (select ID, max(date_change) as recent from #Table1
group by ID) as y on r.stautsUpdateID = y.ID and st.date_change =
y.recent
group by r.userID
output
id status date_change userID
1 0 2001-01-01 00:00:00.000 22
3 2 2002-01-01 00:00:00.000 33
5 3 2004-01-01 00:00:00.000 44

How to SELECT all rows in a query where specific rows meet a certain criteria?

I have a query that accesses data from three tables:
SELECT fg.factGroupName, mc.CI, mt.Config
FROM MetricTypes mt
INNER JOIN MetricCollection mc ON mt.TypeId = mc.MetricType
INNER JOIN factGroup fg ON fg.factGroupId = mc.Factgroup
WHERE mt.ToolName = 2 AND mt.TypeName = 'inputs' AND mt.Deploy = 'Y' AND mc.Deploy = 'Y';
The tables look like this:
factGroup
|-------------|--------------|
|factGroupId |factGroupName |
|-------------|--------------|
| 20 | test_servers |
| 21 | prod_servers |
|-------------|--------------|
MetricTypes
|-------------|--------------|--------------|------------|-----------|
|TypeId |TypeName |ToolName |Config |Deploy |
|-------------|--------------|--------------|------------|-----------|
| 10 | inputs | 2 | foo | Y |
| 11 | inputs | 2 | bar | |
| 12 | outputs | 4 | giggle | |
| 13 | inbetween | 6 | biz | |
|-------------|--------------|--------------|------------|-----------|
MetricCollection
|-------------|--------------|--------------|------------|-----------|
|MetricId |Factgroup |MetricType |CI |Deploy |
|-------------|--------------|--------------|------------|-----------|
| 1 | 20 | 10 | alpha | Y |
| 2 | 20 | 11 | beta | |
| 3 | 20 | 12 | gamma | |
| 4 | 21 | 13 | theta | |
|-------------|--------------|--------------|------------|-----------|
The output from the query looks like this:
|-------------|--------------|--------------|
|factGroupName| CI | Config |
|-------------|--------------|--------------|
| test_servers| alpha | foo |
|-------------|--------------|--------------|
How would I adjust my query in order to get a result which reflects all CI and Config fields for a given factGroupName, and not just the ones that have the Y flag in the Deploy fields? In other words, I'd like to have my output look like this:
|-------------|--------------|--------------|
|factGroupName| CI | Config |
|-------------|--------------|--------------|
| test_servers| alpha | foo |
| test_servers| beta | bar |
|-------------|--------------|--------------|
To clarify: I want to keep the condition in place to check for the existence of a "Y" flag in the two Deploy fields, as the script associated with this query uses that flag as a trigger to perform additional work.
The INNER JOIN on Deploy = 'Y' appears to be the problem.
Table example:
create table #factgroup
(
factgroupid int,
factgroupname varchar(20)
)
create table #metrictypes
(
typeid int,
typename varchar(10),
toolname int,
config varchar(10),
deploy varchar(1)
)
create table #metriccollection
(
metricid int,
factgroup int,
metrictype int,
ci varchar(10),
deploy varchar(1)
)
insert into #factgroup values (20, 'test_servers')
insert into #metrictypes values (10, 'inputs', 2, 'foo', 'Y')
insert into #metrictypes (typeid, typename, toolname, config) values (11, 'inputs', 2, 'bar')
insert into #metriccollection values (1, 20, 10, 'alpha', 'Y')
insert into #metriccollection (metricid, factgroup, metrictype, ci) values (2, 20, 11, 'beta')
Try this (with hashtags removed):
SELECT fg.factGroupName, mc.CI, mt.Config
FROM #MetricTypes mt
INNER JOIN #MetricCollection mc ON mt.TypeId = mc.MetricType
INNER JOIN #factGroup fg ON fg.factGroupId = mc.Factgroup
WHERE mt.ToolName = 2 AND mt.TypeName = 'inputs'
and mt.toolname in (
select distinct mt.ToolName
from #metrictypes mt
where mt.deploy = 'y'
)
and mc.factgroup in (
select distinct mc.factgroup
from #metriccollection mc
where mc.deploy = 'y'
)

SQL Server : compare two tables and return similar rows

I want to compare two tables, source and target, and get similar rows.
Compare source and target on Id one by one and:
If matched and it's two or more on Target => select All matched from Target
If matched and it's two or more on Source =>
for first matched if it doesn't selected before
select Matched From target
else (IF it have selected before)
check for next one matched
I think need a recursive expression to check source and target one by one
Source
x------x---------x
| Id | Name |
x------x---------x
| 1 | a |
| 2 | b |
| 2 | c |
| 3 | d |
| 3 | e |
| 4 | x |
x------x---------x
Target
x------x---------x
| Id | Name |
x------x---------x
| 1 | f |
| 1 | g |
| 2 | h |
| 3 | i |
| 3 | j |
| 5 | y |
x------x---------x
Result
x------x---------x
| Id | Name |
x------x---------x
| 1 | f |
| 1 | g |
| 2 | h |
| 3 | i |
| 3 | j |
x------x---------x
Test data
declare #s table(Id int, name varchar(20))
DECLARE #t table( Id int, name varchar(20))
INSERT #s values(1, 'a'), (2, 'b'), (2, 'c'), (3, 'd'), (3, 'e')
INSERT #t values(1, 'f'), (1, 'g'), (2, 'h'), (3, 'i'), (3, 'j')
I think you just need Exists operator to do this.
select * from #t t
where exists (select 1 from #s s where t.id=s.id)
SQLFIDDLE DEMO
SELECT DISTINCT
t.Id,
t.name
FROM SOURCE s
INNER JOIN target t ON s.id=t.Id
WHERE s.Id IN (SELECT Id FROM target)