SQL Server Join two tables without duplicates in primary table

SQL Server Join two tables without duplicates in primary table - sql

I have two tables that I want to join together such that all foreign rows are returned and the primary table's rows are not duplicated. For example:
T1
pk code value
1 One 100
2 Two 200
T2
fk value
1 10
1 15
1 30
2 25
I want all records of T2 without the T1 records duplicating, so the result set I want to look like this:
T2.fk T1.code T1.value T2.value
1 One 100 10
1 NULL NULL 15
1 NULL NULL 30
2 Two 200 25
Is there a SQL Server join method for achieving that?

You need to rank your rows in T2 and do a left join including rank as a join condition:
with cte as(select *, row_number() over(partition by fk order by value) as rn from T2)
select c.fk, t.code, t.value, c.value
from cte c
left join T1 t on c.fk = t.pk and c.rn = 1
Here is the full example:
DECLARE #t1 TABLE
(
pk INT ,
code VARCHAR(MAX) ,
value INT
)
INSERT INTO #t1
VALUES ( 1, 'One', 100 ),
( 2, 'Two', 200 )
DECLARE #t2 TABLE ( fk INT, value INT )
INSERT INTO #t2
VALUES ( 1, 10 ),
( 1, 15 ),
( 1, 30 ),
( 2, 25 );
WITH cte
AS ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY fk ORDER BY value ) AS rn
FROM #t2
)
SELECT c.fk ,
t.code ,
t.value ,
c.value
FROM cte c
LEFT JOIN #t1 t ON c.fk = t.pk
AND c.rn = 1

Try this:
select T2.fk,
CASE
WHEN (SELECT COUNT(*) FROM t2 tother WHERE tother.fk = t2.fk
AND tother.value > t2.value) > 0 THEN NULL ELSE t1.code
END,
CASE
WHEN (SELECT COUNT(*) FROM t2 tother WHERE tother.fk = t2.fk
AND tother.value > t2.value) > 0 THEN NULL ELSE t1.value
END,T2.value
from t2
join t1
on t2.fk = t1.pk

DECLARE #t1 TABLE (pk int,code varchar(10),value int)
DECLARE #t2 TABLE (fk int,value int)
INSERT INTO #t1
SELECT 1,'one',100
UNION
SELECT 2,'two',200
INSERT INTO #t2
SELECT 1,10
UNION SELECT 1,15 UNION SELECT 1,30 UNION SELECT 2,25
;WITH cte AS(
SELECT t2.fk,t2.value t2val,t1.pk,t1.code,t1.value t1val,ROW_NUMBER() OVER(PARTITION BY fk ORDER BY fk) rno FROM #t2 t2 LEFT JOIN #t1 t1 on t2.fk=t1.pk)
SELECT fk,code=(CASE WHEN rno=1 THEN code ELSE null END),t1val=(CASE WHEN rno=1 THEN t1val ELSE NULL END),t2val FROM cte
output
fk code t1val t2val
1 one 100 10
1 NULL NULL 15
1 NULL NULL 30
2 two 200 25

Related

Case when duplicate add one more letter

For example: I have a table with these records below
1 A
2 A
3 B
4 C
...
and I need to migrate these record in to another table
1 AA
2 AB
3 B
4 C
...
Meaning if the record is duplicate, it will automatically add one more letter alphabetically.

Just a slightly different approach
Example
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',char(64+row_number() over ( partition by SomeCol order by ID ))) )
From #YourTable
Returns
ID SomeCol NewVal
1 A AA
2 A AB
3 B B
4 C C
EDIT - Requested UPDATE
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',replace(char(63+row_number() over ( partition by SomeCol order by ID )),'#','')) )
From #YourTable
Returns
ID SomeCol NewVal
1 A A
2 A AA
3 B B
4 C C

We might be able to handle this requirement with the help of a calendar table mapping secondary letters to duplicate sequence counts:
WITH letters AS (
SELECT 1 AS seq, 'A' AS let UNION ALL
SELECT 2, 'B' UNION ALL
SELECT 3, 'C' UNION ALL
...
SELECT 26, 'Z' UNION ALL
...
),
cte AS (
SELECT id, let, ROW_NUMBER() OVER (PARTITION BY let ORDER BY id) rn,
COUNT(*) OVER (PARTITION BY let) cnt
FROM yourTable
)
SELECT t1.id, t1.let + CASE WHEN t1.cnt > 1 THEN t2.let ELSE '' END AS let
FROM cte t1
LEFT JOIN letters t2
ON t1.id = t2.seq
ORDER BY t1.id;
Demo

TSQL - Fill missing records

Code:
CREATE TABLE #Temp
(
[ID1] INT
, [ID2] INT
, [ID3] INT
, [Val] BIT
, PRIMARY KEY CLUSTERED ( [ID1], [ID2], [ID3] )
) ;
INSERT INTO #Temp
SELECT 1
, 10
, 100
, 1
UNION ALL
SELECT 1
, 10
, 101
, 1
UNION ALL
SELECT 1
, 11
, 100
, 1
UNION ALL
SELECT 1
, 11
, 101
, 1
UNION ALL
SELECT 2
, 10
, 100
, 1 ;
CREATE TABLE #Temp_ID3
(
[ID3] INT
) ;
INSERT INTO #Temp_ID3
SELECT 100
UNION ALL
SELECT 101
UNION ALL
SELECT 102 ;
SELECT [ID1]
, [ID2]
, [ID3]
, [Val]
FROM #Temp ;
SELECT [ID3]
FROM #Temp_ID3 ;
DROP TABLE #Temp_ID3 ;
DROP TABLE #Temp ;
Output:
ID1 ID2 ID3 Val
1 10 100 1
1 10 101 1
1 11 100 1
1 11 101 1
2 10 100 1
Goal:
To find missing ID3 records - from #Temp_ID3 table - in #Temp table (ID1/ID2/ID3 combos for existing ID1/ID2 combos). For those missing records, Val should be False. Desired Output will make sense.
I can get distinct ID1/ID2 from #Temp, cross join with ID3 and create a dataset. Left join #Temp to this new dataset, and insert records that way, but looking for a more "simpler" way.
Here's the "complicated" way.
;WITH CTE AS
(
SELECT DISTINCT
[TT].[ID1]
, [TT].[ID2]
, [T3].[ID3]
FROM #Temp_ID3 AS [T3]
CROSS JOIN (
SELECT DISTINCT
[ID1]
, [ID2]
FROM #Temp
) AS [TT]
)
SELECT [C].[ID1]
, [C].[ID2]
, [C].[ID3]
, COALESCE ( [TT].[Val], 0 ) AS [Val]
FROM CTE AS [C]
LEFT JOIN #Temp AS [TT]
ON [C].[ID1] = [TT].[ID1]
AND [C].[ID2] = [TT].[ID2]
AND [C].[ID3] = [TT].[ID3]
ORDER BY [C].[ID1]
, [C].[ID2]
, [C].[ID3] ;
Desired Output:
ID1 ID2 ID3 Val
1 10 100 1
1 10 101 1
1 10 102 0 -- Filling Missing ID3 Record w/ default Val = 0
1 11 100 1
1 11 101 1
1 11 102 0 -- Filling Missing ID3 Record w/ default Val = 0
2 10 100 1
2 10 101 0 -- Filling Missing ID3 Record w/ default Val = 0
2 10 102 0 -- Filling Missing ID3 Record w/ default Val = 0

Use a cross join to generate the rows and then left join to fill in the values:
select t1.id1, t2.id2, t3.id3, coalesce(t.val, 0) as val
from (select distinct id1 from #temp) t1 cross join
(select distinct id2 from #temp) t2 cross join
(select distinct id3 from #temp) t3 left join
#temp t
on t.id1 = t1.id1 and t.id2 = t2.id2 and t.id3 = t3.id3;
You don't have to use subqueries to generate the ids, if one of the tables actually has them.
EDIT:
For the revised version in the comment, you can do something very similar:
select t12.id1, t12.id2, t3.id3, coalesce(t.val, 0) as val
from (select distinct id1, id2 from #temp) t12 cross join
#temp_id3 t3 left join
#temp t
on t.id1 = t12.id1 and t.id2 = t12.id2 and t.id3 = t3.id3;

As per example you want only cross join with self join
select distinct t.id1, t.id2, t3.id3, coalesce(t1.val, 0) as Val
from #Temp t cross join #Temp_ID3 t3
left join #temp t1
on t1.id1 = t.id1 and t1.id2 = t.id2 and t1.id3 = t3.id3;

TSQL Distinct Counts

I have a table that looks like this:
ID SuppressionTypeID PersonID
------------------------------
1 1 123
2 1 456
3 2 456
I want to get a rolling count (distinct people) rather than a normal group by count.
e.g. not this:
SuppressionTypeID Count
---------------------------
1 2
2 1
This:
SuppressionTypeID RecordsLost
----------------------------------
1 2
2 0
The latter being zero as we lost person 456 on suppresiontypeid 1.
Thanks in advance.

You may need to use a temporary table or a table variable as shown below
DECLARE #t TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
)
INSERT INTO #t
SELECT 1
,1
,123
UNION ALL
SELECT 2
,1
,456
UNION ALL
SELECT 3
,2
,456
DECLARE #t1 TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
,firstid INT
)
INSERT INTO #t1
SELECT *
,NULL
FROM #t
UPDATE t1
SET t1.firstid = t2.firstid
FROM #t1 AS t1
INNER JOIN (
SELECT personid
,min(SuppressionTypeID) AS firstid
FROM #t1
GROUP BY personid
) AS t2 ON t1.PersonID = t2.PersonID
SELECT coalesce(t2.firstid, t1.SuppressionTypeID) AS SuppressionTypeID
,count(DISTINCT t2.personid) AS count
FROM #t1 AS t1
LEFT JOIN #t1 AS t2 ON t1.personid = t2.personid
AND t1.SuppressionTypeID = t2.firstid
GROUP BY coalesce(t2.firstid, t1.SuppressionTypeID)
The result is
SuppressionTypeID count
----------------- -----------
1 2
2 0

You can try;
with tmp_tbl as (
select
x.SuppressionTypeID, count(x.PersonID) as RecordsLost
from (
select
min(SuppressionTypeID) as SuppressionTypeID,
PersonID
from tbl
group by PersonID
) as x
group by x.PersonID
order by x.SuppressionTypeID
)
select
distict t.SuppressionTypeID, coalesce(tmp.RecordsLost, 0) as RecordsLost
from tbl t
left join tmp_tbl tmp on tmp.SuppressionTypeID = t.SuppressionTypeID

Is it possible to write a sql query that is grouped based on a running total of a column?

It would be easier to explain with an example. Suppose I wanted to get at most 5 items per group.
My input would be a table looking like this:
Item Count
A 2
A 3
A 3
B 4
B 4
B 5
C 1
And my desired output would look like this:
Item Count
A 5
A>5 3
B 4
B>5 9
C 1
An alternative output that I could also work with would be
Item Count RunningTotal
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1
I can use ROW_NUMBER() to get the top X records in each group, however my requirement is to get the top X items for each group, not X records. My mind is drawing a blank as to how to do this.

declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.Item, t1.Count, sum(t2.count) as RunningTotal from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
Result:
Item Count RunningTotal
---- ----------- ------------
A 2 2
A 3 5
A 3 8
B 4 4
B 4 8
B 5 13
C 1 1

Considering the clarifications from your comment, you should be able to produce the second kid of output from your post by running this query:
select t.Item
, t.Count
, (select sum(tt.count)
from mytable tt
where t.item=tt.item and (tt.creating_user_priority < t.creating_user_priority or
( tt.creating_user_priority = t.creating_user_priority and tt.created_date < t.createdDate))
) as RunningTotal
from mytable t

declare #yourTable table (item char(1), [count] int)
insert into #yourTable
select 'A', 2 union all
select 'A', 3 union all
select 'A', 3 union all
select 'B', 4 union all
select 'B', 4 union all
select 'B', 5 union all
select 'C', 1
;with cte(item, count, row) as (
select *, row_number() over ( partition by item order by item, [count])
from #yourTable
)
select t1.row, t1.Item, t1.Count, sum(t2.count) as RunningTotal
into #RunTotal
from cte t1
join cte t2 on t1.item = t2.item and t2.row <= t1.row
group by t1.item, t1.count, t1.row
alter table #RunTotal
add GrandTotal int
update rt
set GrandTotal = gt.Total
from #RunTotal rt
left join (
select Item, sum(Count) Total
from #RunTotal rt
group by Item) gt
on rt.Item = gt.Item
select Item, max(RunningTotal)
from #RunTotal
where RunningTotal <= 5
group by Item
union
select a.Item + '>5', total - five
from (
select Item, max(GrandTotal) total
from #RunTotal
where GrandTotal > 5
group by Item
) a
left join (
select Item, max(RunningTotal) five
from #RunTotal
where RunningTotal <= 5
group by Item
) b
on a.Item = b.Item
I've updated the accepted answer and got your desired result.

SELECT Item, SUM(Count)
FROM mytable t
GROUP BY Item
HAVING SUM(Count) <=5
UNION
SELECT Item, 5
FROM mytable t
GROUP BY Item
HAVING SUM(Count) >5
UNION
SELECT t2.Item + '>5', Sum(t2.Count) - 5
FROM mytable t2
GOUP BY Item
HAVING SUM(Count) > 5
ORDER BY 1, 2

select 'A' as Name, 2 as Cnt
into #tmp
union all select 'A',3
union all select 'A',3
union all select 'B',4
union all select 'B',4
union all select 'B',5
union all select 'C',1
select Name, case when sum(cnt) > 5 then 5 else sum(cnt) end Cnt
from #tmp
group by Name
union
select Name+'>5', sum(cnt)-5 Cnt
from #tmp
group by Name
having sum(cnt) > 5
Here is what I have so far. I know it's not complete but... this should be a good starting point.

I can get your second output by using a temp table and an update pass:
DECLARE #Data TABLE
(
ID INT IDENTITY(1,1) PRIMARY KEY
,Value VARCHAR(5)
,Number INT
,Total INT
)
INSERT INTO #Data (Value, Number) VALUES ('A',2)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('A',3)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',4)
INSERT INTO #Data (Value, Number) VALUES ('B',5)
INSERT INTO #Data (Value, Number) VALUES ('C',1)
DECLARE
#Value VARCHAR(5)
,#Count INT
UPDATE #Data
SET
#Count = Total = CASE WHEN Value = #Value THEN Number + #Count ELSE Number END
,#Value = Value
FROM #Data AS D
SELECT
Value
,Number
,Total
FROM #Data
There may be better ways, but this should work.

TSQL Query to fetch row details of a table having the maximum value for a column

I have two tables.
Table 1
Num
1
2
3
Table 2
Num Date Amount
1 12/31 30
1 12/30 31
1 12/29 20
2 12/31 100
2 12/30 90
3 12/31 12
4 11/1 1
Now my result should have
Num Date Amount
1 12/31 30
2 12/31 100
3 12/31 12
(for the 'Num' values in table 1, join with table2 where the date is the most recent)
I am trying to write a tsql query to achieve this.
Any help is appreciated. Thanks

If you want the most recent date for each table1 num individually:
with maxdates as (
select T1.num, max(T2.date) as date
from table2 T2
join table1 T1
on T2.num = t1.num
group by T1.num
)
select t2.num, t2.date, t2.amount
from table2 T2
join maxdates M
on T2.num = M.num
and T2.date = M.date
or if you want the most recent date for all matching records in the table:
with maxdate as (
select max(T2.date) as date
from table2 T2
join table1 T1
on T2.num = t1.num
)
select t2.num, t2.date, t2.amount
from table2 T2
join table1 T1
on T2.num = T1.num
join maxdate M
on T2.date = M.date

Try this query:
SELECT b.*
FROM Table1 a INNER JOIN
(
SELECT a.*,
ROW_NUMBER() OVER(PARTITION BY a.Num ORDER BY Date DESC) rnk
FROM Table2 a
) b
ON a.Num = b.Num
AND rnk = 1

Try this
select t2.* from table2 as t2 inner join table1 as t1 on t2.num=t2.num
inner join
(
Select t2.num,max(t2.amount) as amount from table2
group by t2.num
) as t3 on t2.amount=t3.amount

DECLARE #t1 TABLE(num INT)
DECLARE #t2 TABLE(num INT, [date] NVARCHAR(5), amount INT)
INSERT INTO #t1 (num) VALUES (1),(2),(3)
INSERT INTO #t2 (num,[date],amount) VALUES (1,'12/31',30)
INSERT INTO #t2 (num,[date],amount) VALUES (1,'12/30',31)
INSERT INTO #t2 (num,[date],amount) VALUES (1,'12/29',20)
INSERT INTO #t2 (num,[date],amount) VALUES (2,'12/31',100)
INSERT INTO #t2 (num,[date],amount) VALUES (2,'12/30',90)
INSERT INTO #t2 (num,[date],amount) VALUES (3,'12/31',12)
INSERT INTO #t2 (num,[date],amount) VALUES (4,'11/01',1)
SELECT t2.num,t2.[date],amount
FROM #t1 t1 JOIN #t2 t2 ON t1.num = t2.num
WHERE t2.[date] in (
SELECT MAX(t3.[date])
FROM #t2 t3
WHERE t3.num = t2.num
)
ORDER BY t2.num

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Server Join two tables without duplicates in primary table - sql

Related

Case when duplicate add one more letter

TSQL - Fill missing records

TSQL Distinct Counts

Is it possible to write a sql query that is grouped based on a running total of a column?

TSQL Query to fetch row details of a table having the maximum value for a column

Categories

Resources