Left join suggestion - sql

I need to join multiple tables in SQL Server with a common column dates but I want to avoid repeating the values from the different tables when merge.
drop table if exists #d, #t1, #t2
create table #d (DataDate date)
create table #t1 (DataDate date, Value1 float, Value2 float)
create table #t2 (DataDate date, Value3 float, Value4 float)
insert into #d values ('20181201'),('20181202'),('20181203')
insert into #t1 values
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
insert into #t2 values
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
select #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
Actual Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 4.14 2.18
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 3.16 1.195 4.1 2.195
Desired Results
DataDate Value1 Value2 Value3 Value4
12/1/2018 3.14 1.18 4.14 2.18
12/1/2018 3.135 1.185 NULL NULL
12/2/2018 3.15 1.19 NULL NULL
12/3/2018 3.16 1.195 4.15 2.19
12/3/2018 NULL NULL 4.1 2.195

Here is a proposed solution.
I have added a third table, just to demonstrate that this could be solved for N tables with a common column.
Prepare demo data:
/* Prepare demo objects */
DROP TABLE IF EXISTS #d, #t1, #t2
CREATE TABLE #d (DataDate date)
CREATE TABLE #t1 (DataDate date, Value1 float, Value2 float)
CREATE TABLE #t2 (DataDate date, Value3 float, Value4 float)
CREATE TABLE #t3 (DataDate date, Value5 float, Value6 float)
/* Insert demo data */
INSERT INTO #d VALUES ('20181201'),('20181202'),('20181203')
INSERT INTO #t1 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.15, 1.19),
('20181203', 3.16, 1.195)
INSERT INTO #t2 VALUES
('20181201', 4.14, 2.18),
('20181203', 4.15, 2.19),
('20181203', 4.1, 2.195)
INSERT INTO #t3 VALUES
('20181201', 3.14, 1.18),
('20181201', 3.135, 1.185),
('20181202', 3.16, 1.195)
Proposed QUERY Solution:
SELECT
COALESCE(d.DataDate, t1.datadate, t2.datadate, t3.datadate) AS DataDate
, t1.Value1
, t1.Value2
, t2.Value3
, t2.Value4
, t3.Value5
, t3.Value6
FROM
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #d) AS d
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY DataDate ORDER BY (SELECT NULL)) AS rn
FROM #t1) AS t1
ON (t1.DataDate = d.DataDate AND t1.rn = d.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t2) AS t2
ON (t2.DataDate = d.DataDate AND t2.rn = d.rn)
OR (t2.DataDate = t1.DataDate AND t2.rn = t1.rn)
FULL JOIN
(SELECT
*
, ROW_NUMBER() OVER (PARTITION BY datadate ORDER BY (SELECT NULL)) AS rn
FROM #t3) AS t3
ON (t3.DataDate = d.DataDate AND t3.rn = d.rn)
OR (t3.DataDate = t1.DataDate AND t3.rn = t1.rn)
OR (t3.DataDate = t2.DataDate AND t3.rn = t2.rn)
ORDER BY DataDate;
Demo fiddle is posted on db<>fiddle here
Results:
DataDate | Value1 | Value2 | Value3 | Value4 | Value5 | Value6
:------------------ | -----: | -----: | -----: | -----: | -----: | -----:
01/12/2018 00:00:00 | 3.14 | 1.18 | 4.14 | 2.18 | 3.14 | 1.18
01/12/2018 00:00:00 | 3.135 | 1.185 | null | null | 3.135 | 1.185
02/12/2018 00:00:00 | 3.15 | 1.19 | null | null | 3.16 | 1.195
03/12/2018 00:00:00 | 3.16 | 1.195 | 4.15 | 2.19 | null | null
03/12/2018 00:00:00 | null | null | 4.1 | 2.195 | null | null
Note (optional):
You can greately improve performance by introducing indexes.
As a demo, I have added CLUSTERED INDEXES on DateData column and the preformance increase is significant.
/* Add to improve performance */
CREATE CLUSTERED INDEX CI_DataDate ON #d (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t1 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t2 (DataDate);
CREATE CLUSTERED INDEX CI_DataDate ON #t3 (DataDate);

Use min if you want the min value (or max depending on what you're looking for) associated by date and t1.value1. Your example, the values are not duplicates so distinct will not work
select #d.DataDate,#t1.Value1,min(#t1.Value2),max(#t2.Value3),min(#t2.Value4)
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate
group by 1,2
if there are exact duplicates that you want to remove then use the following
select distinct #d.DataDate,#t1.Value1,#t1.Value2,#t2.Value3,#t2.Value4
from #d
left join #t1 on #d.DataDate = #t1.DataDate
left join #t2 on #d.DataDate = #t2.DataDate

Related

Select all entries that have the same Type as the entry with the largest Date in SQL?

How do I select all entries that have the same Type as the entry with the largest Date?
I'm using SQL Server.
My table:
+----+------+-------------------------+
| id | Type | Date |
+----+------+-------------------------+
| 1 | xxx | 2020-02-25 09:11:53.000 |
| 2 | yyy | 2020-02-25 08:30:35.000 |
| 3 | xxx | 2020-02-25 07:48:17.000 |
| 4 | xxx | 2020-02-25 09:04:25.000 |
| 5 | yyy | 2020-02-25 07:59:03.000 |
The result should be:
+----+------+-------------------------+
| id | Type | Date |
+----+------+-------------------------+
| 1 | xxx | 2020-02-25 09:11:53.000 |
| 3 | xxx | 2020-02-25 07:48:17.000 |
| 4 | xxx | 2020-02-25 09:04:25.000 |
+----+------+-------------------------+
Because id =1 is the Type with the max Date.
You can use exists with correlated sub-query :
select t.*
from table t
where exists (select 1 from table t1 where t1.type = t.type and t1.id <> t.id) and
t.type = (select top (1) t1.type from table t1 order by t1.date desc);
A correlated subquery is often the most efficient method with the right index:
select t.*
from t
where t.type = (select top (1) t2.type
from t t2
order by t2.date desc
);
The best indexes are (date desc, type) and (type).
You can also do this with window functions:
select t.*
from (select t.*,
first_value(type) over (order by date desc) as last_type
from t
) t
where type = last_type;
Rather than a Self Join, you could use LAST_VALUE in a CTE and then add that to the WHERE:
WITH CTE AS(
SELECT V.ID,
V.[Type],
V.[Date],
LAST_VALUE(V.Type) OVER (ORDER BY [Date] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS LastType
FROM (VALUES (1, 'xxx', CONVERT(datetime2(0), '2020-02-25 09:11:53.000')),
(2, 'yyy', CONVERT(datetime2(0), '2020-02-25 08:30:35.000')),
(3, 'xxx', CONVERT(datetime2(0), '2020-02-25 07:48:17.000')),
(4, 'xxx', CONVERT(datetime2(0), '2020-02-25 09:04:25.000')),
(5, 'yyy', CONVERT(datetime2(0), '2020-02-25 07:59:03.000'))) V (ID, [Type], [Date]))
SELECT CTE.ID,
CTE.[Type],
CTE.[Date]
FROM CTE
WHERE [Type] = LastType;
DB<>Fiddle
Try this:
Declare #t table (id int , types nvarchar(100),dates datetime)
insert into #t values (1,'xxx','2020-02-25 09:11:53.000')
insert into #t values (2,'yyy','2020-02-25 08:30:35.000')
insert into #t values (3,'xxx','2020-02-25 07:48:17.000')
insert into #t values (4,'xxx','2020-02-25 09:04:25.000')
insert into #t values (5,'yyy','2020-02-25 07:59:03.000')
Declare #max nvarchar(100) = (select t.types from (
select top 1 max(dates) as t,types from #t group by types
) t)
select * from #t
where types = #max
Output:
id types dates
1 xxx 2020-02-25 09:11:53.000
3 xxx 2020-02-25 07:48:17.000
4 xxx 2020-02-25 09:04:25.000

SQL Join data and get rows that don't match with NULL

I have two tables that I want join as follows:
Table 1
Code1 | Code2 | Date(1) | Amount(1)
A | AA | 201802 | 100
A | AA | 201803 | 50
A | AA | 201804 | 30
Table 2
Code1 | Code2 | Date(2) | Amount(2)
A | AA | 201801 | 20
A | AA | 201802 | 10
A | AA | 201803 | 10
And I want the resulting table to look like this:
Result
Code1 | Code2 | Date(1) | Date(2) | Amount(1) | Amount(2)
A | AA | NULL | 201801 | NULL | 20
A | AA | 201802 | 201802 | 100 | 10
A | AA | 201803 | 201803 | 50 | 10
A | AA | 201804 | NULL | 30 | NULL
So I need to join these two tables
on table1.Code1 = table2.Code1 AND table1.Code2 = table2.Code2 AND table1.Date(1) = table2.Date(2)
But I also want the rows where the dates don't match with a null is the columns related to the non matching table (such as the row for Date(1) = 201804 in my example).
I have tried joining that two tables with left, right and outer join but I still am not successful in getting the rows with the nulls (probably because Code1 and Code2 don't exist for that particular missing row)
Maybe a cross apply could work, but I am not sure how to execute it.
I want the most efficient way in terms of performance because this is a part of a big query containing lots of data and lots of calculations.
UPDATE:
The code I used is:
Select table1.Code 1, table1.Code2, Table1.Date(1), table2.Date(2), table1.Amount(1), table2.amount(2)
FROM Table1
Full Outer Join
table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date(1) = table2.date(2)
Which gives me the following result:
Code1 | Code2 | Date(1) | Date(2) | Amount(1) | Amount(2)
A | AA | 201802 | 201802 | 100 | 10
A | AA | 201803 | 201803 | 50 | 10
Which is missing these two rows:
A | AA | NULL | 201801 | NULL | 20
A | AA | 201804 | NULL | 30 | NULL
You may try this.
--sample dataset
DECLARE #tab1 as table (
Code1 varchar(10),
Code2 varchar(10),
Date1 int,
Amount1 int )
insert into #tab1
values
('A', 'AA', 201802, 100),
('A', 'AA', 201803, 50),
('A', 'AA', 201804, 30),
('B', 'AA', 201802, 100) --additional
DECLARE #tab2 as table (
Code1 varchar(10),
Code2 varchar(10),
Date2 int,
Amount2 int )
insert into #tab2
values
('A', 'AA', 201802, 100),
('A', 'AA', 201803, 50),
('A', 'AA', 201801, 30)
query
SELECT *
FROM (
select
coalesce(table1.Code1,table2.Code1) as Code1,
coalesce(table1.Code2,table2.Code2) as Code2,
table1.Date1,
table2.Date2,
table1.Amount1,
table2.amount2
FROM #tab1 as Table1
Full Outer Join #tab2 as table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date1= table2.date2
) as t1
CROSS APPLY ( --to exclude records not matched by "Code 1 and Code 2"
SELECT top 1
Code1
FROM #tab2 as t
where t.Code1 = t1.Code1
and t.Code2 = t1.Code2
) as c
ORDER BY t1.Date1
or like this:
select
coalesce(table1.Code1,table2.Code1) as Code1,
coalesce(table1.Code2,table2.Code2) as Code2,
table1.Date1,
table2.Date2,
table1.Amount1,
table2.amount2
FROM #tab1 as Table1
Full Outer Join #tab2 as table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date1= table2.date2
where exists (select null --to exclude records not matched by "Code 1 and Code 2"
from #tab2 as t2
where coalesce(table1.Code1,table2.Code1) = t2.Code1
and coalesce(table1.Code2,table2.Code2) = t2.Code2)
ORDER BY table1.Date1
My suggested solution involves a full join and another join to a derived table that contains all the combinations of code1 and code2 that exists in both tables, using the intersect operator.
First, create and populate sample data (Please save us this step in your future questions):
DECLARE #T1 AS TABLE
(
Code1 char(1),
Code2 char(2),
Date1 char(6),
Amount1 int
)
DECLARE #T2 AS TABLE
(
Code1 char(1),
Code2 char(2),
Date2 char(6),
Amount2 int
)
INSERT INTO #T1 (Code1, Code2, Date1, Amount1) VALUES
('A', 'AA', '201802', 100)
,('A', 'AA', '201803', 50)
,('A', 'AA', '201804', 30)
,('B', 'AA', '201802', 30); -- Note: Added to the original sample data
INSERT INTO #T2 (Code1, Code2, Date2, Amount2) VALUES
('A', 'AA', '201801', 20)
,('A', 'AA', '201802', 10)
,('A', 'AA', '201803', 10)
,('A', 'AB', '201802', 10); -- Note: Added to the original sample data
The query:
SELECT ISNULL(T1.Code1, T2.Code1) As Code1,
ISNULL(T1.Code2, T2.Code2) As Code2,
Date1, Date2, Amount1, Amount2
FROM #T1 As T1
FULL JOIN #T2 As T2
ON T1.Code1 = T2.Code1
AND T1.Code2 = T2.Code2
AND T1.Date1 = T2.Date2
-- Remove this next join if you want to get rows where codes don't match
JOIN (
SELECT Code1, Code2
FROM #T1
INTERSECT
SELECT Code1, Code2
FROM #T2
) As CommonCodes
ON CommonCodes.Code1 = ISNULL(T1.Code1, T2.Code1)
AND CommonCodes.Code2 = ISNULL(T1.Code2, T2.Code2)
ORDER BY Date1
Results:
Code1 Code2 Date1 Date2 Amount1 Amount2
A AA NULL 201801 NULL 20
A AA 201802 201802 100 10
A AA 201803 201803 50 10
A AA 201804 NULL 30 NULL
You can see a live demo on rextester.
Your updated query should work if you ISNULL the CodeX columns.
declare #t1 table (Code1 varchar(4), Code2 varchar(4), Date1 date, Amount1 int)
declare #t2 table (Code1 varchar(4), Code2 varchar(4), Date2 date, Amount2 int)
insert into #t1
values
('A', 'AA', '2018-02-01', 100 ),
('A', 'AA', '2018-03-01', 50 ),
('A', 'AA', '2018-04-01', 30 )
insert into #t2
values
('A', 'AA', '2018-01-01', 20 ),
('A', 'AA', '2018-02-01', 10 ),
('A', 'AA', '2018-03-01', 10 )
SELECT
code1
,code2
,date1
,date2
,amount1
,amount2
FROM (
SELECT code1, code2 FROM #t1
INTERSECT
SELECT code1, code2 FROM #t2
) t0
CROSS APPLY (
SELECT
date1, date2, amount1, amount2
FROM #t1 t1
FULL OUTER JOIN #t2 t2 ON t1.Code1 = t2.Code1 and t1.Code2 = t2.Code2 and date1 = date2
WHERE
t0.code1 = isnull(t1.Code1, t2.code1)
and t0.code2 = isnull(t1.Code2, t2.code2)
) tt
ORDER BY
date1, date2

Use data to name column

I have 2 tables and I want to run a query where I use a value in one of the tables to change what column dateadd uses.
table1
id value date1 date2 date3
-------|-------|------------|------------|-----------|
1 | 10 | 04/03/2018 | 04/03/2017 |01/03/2016 |
2 | 1 | 04/03/2018 | 05/03/2015 |02/03/2018 |
3 | 2 | 04/03/2016 | 06/03/2016 |03/03/2018 |
4 | 1 | 04/03/2015 | 07/03/2018 |04/03/2017 |
5 | 2 | 04/03/2017 | 09/03/2018 |05/03/2019 |
table2
id value
-------|-------|
1 | date1 |
2 | date3 |
3 | date3 |
4 | date2 |
5 | date1 |
The normal way to do ID 1 would be something like dateadd(month,10,date1). I'm not sure how to do this without me writing it every single time though.
select *
from table1
join table2 on table1.id = table2.id
where DATEADD(month, table1.value, table1.[table2.value]) between '1/1/18' and '12/31/18'
Twelfth's answer is correct. I just wanted to see if his theory works, and it does - here's a working implementation.
declare #table1 table (id int, value int, date1 date, date2 date, date3 date)
declare #table2 table (id int, colname varchar(5))
insert into #table1 values (1,10,'04/03/2018','04/03/2017','01/03/2016')
insert into #table1 values (2,1 ,'04/03/2018','05/03/2015','02/03/2018')
insert into #table1 values (3,2 ,'04/03/2016','06/03/2016','03/03/2018')
insert into #table1 values (4,1 ,'04/03/2015','07/03/2018','04/03/2017')
insert into #table1 values (5,2 ,'04/03/2017','09/03/2018','05/03/2019')
insert into #table2 values (1, 'date1')
insert into #table2 values (2, 'date3')
insert into #table2 values (3, 'date3')
insert into #table2 values (4, 'date2')
insert into #table2 values (5, 'date1')
select id, colname, newdate
from
(
select sq.id, sq.colname, dateadd(month, sq.value, sq.dn) as newdate
from #table1 t1
unpivot
(
dn for colname in ([date1], [date2], [date3])
)sq
inner join #table2 t2 on sq.id = t2.id and sq.colname = t2.colname
)sq where newdate between '1/1/2018' and '12/31/2018'
Output:
id colname newdate
2 date3 2018-03-03
3 date3 2018-05-03
4 date2 2018-08-03
I've had this as theory, you're actually the first questioner I can try to apply it with. The idea is to unpivot your data and then join on the value column.
select id,column_name,value
from table1 t1
unpivot (
value
for column_name in (date1,date2,date3,date4,date5,date6,date7,date8,date9,date10)
) a
inner join table2 t2 on t1.id = t2.id and t2.value = a.column_name
where t2.value
between '1/1/18' and '12/31/18'
I can't guarantee that will work and am curious how it does for you.

How can I duplicate the result set like below

I have a table like below and I want to duplicate the records while the min date being less or equal the max date
686151209 E13232677 1333439 2017-10-23
686151209 E13232677 1333439 2017-10-26
I'd like to have the result set like below
686151209 E13232677 1333439 2017-10-23
686151209 E13232677 1333439 2017-10-24
686151209 E13232677 1333439 2017-10-25
86151209 E13232677 1333439 2017-10-26
You and use spt_values to get continous number :
;WITH testdata(col1,col2,col3,col4)AS(
SELECT '686151209','E13232677','1333439','2017-10-23' UNION all
SELECT '686151209','E13232677','1333439','2017-10-26'
)
SELECT col1,col2,col3,DATEADD(d,sv.number-1,a.mindate) AS col4,sv.number FROM (
SELECT col1,col2,col3,CONVERT(DATE,MIN(col4)) AS mindate,CONVERT(DATE,MAX(col4)) AS maxdate
FROM testdata AS t
group by col1,col2,col3
) AS a
INNER JOIN master.dbo.spt_values AS sv ON sv.type='P' AND sv.number BETWEEN 1 AND DATEDIFF(d,mindate,maxdate)+1
+-----------+-----------+---------+------------+--------+
| col1 | col2 | col3 | col4 | number |
+-----------+-----------+---------+------------+--------+
| 686151209 | E13232677 | 1333439 | 2017-10-23 | 1 |
| 686151209 | E13232677 | 1333439 | 2017-10-24 | 2 |
| 686151209 | E13232677 | 1333439 | 2017-10-25 | 3 |
| 686151209 | E13232677 | 1333439 | 2017-10-26 | 4 |
+-----------+-----------+---------+------------+--------+
One method is a numbers table. If you don't have too many rows, I also like a recursive CTE:
with cte as (
select col1, col2, col3, mind, maxd
from (select col1, col2, min(dte) as mind, max(dte) as maxd
from t
group by col1, col2, col3
) t
union all
select col1, col2, col3, dateadd(day, 1, mind), maxd
from cte
where dateadd(day, 1, mind) < maxd
)
select col1, col2, col3, mind
from cte;
This is limited to 100 rows for each col1/col2 combination, unless you set the max recursion option.
Or like this:
CREATE TABLE temp
(
ID BIGINT,
CODE VARCHAR(50),
ID2 BIGINT,
DATE DATE
);
INSERT INTO temp VALUES (686151209, 'E13232677', 1333439, '2017-10-23'),
(686151209, 'E13232677', 1333439, '2017-10-26');
SELECT generate_series(T.D1::timestamp, T.D2::timestamp, interval '1 day')::date
FROM
(
SELECT A.id, A.code, A.id2, A.dates AS D1, B.dates AS D2
FROM temp A
LEFT JOIN temp b ON (A.id = B.id AND
A.code=B.code AND
A.id2 = B.id2 AND
B.dates > A.dates)
WHERE B.id IS NOT NULL
) T;

Merge a two way relation in the same table in SQL Server

Current Data
ID | Name1 | Name2
<guid1> | XMind | MindNode
<guid2> | MindNode | XMind
<guid3> | avast | Hitman Pro
<guid4> | Hitman Pro | avast
<guid5> | PPLive | Hola!
<guid6> | ZenMate | Hola!
<guid7> | Hola! | PPLive
<guid8> | Hola! | ZenMate
Required Output
ID1 | ID2 | Name1 | Name2
<guid1> | <guid2> | XMind | MindNode
<guid3> | <guid4> | avast | Hitman Pro
<guid5> | <guid7> | PPLive | Hola!
<guid6> | <guid8> | Hola! | ZenMate
These are relations between apps. I want to show that Avast and Hitman has a relation but in this view i do not need to show in what "direction" they have an relation. It's a given in this view that the relation goes both ways.
EDIT: Seems like my example was to simple. The solution doesn't work with more data.
DECLARE #a TABLE (ID INT, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( 5, 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 6, 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 7, 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( 8, 'Hola! Better Internet', 'ZenMate' )
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
AND a1.ID < a2.ID -- avoid duplicates
This works however so i guess it's the Guid that is messing with me.
EDIT AGAIN:
I haven't looked at this for a while and i thought it worked but i just realized it does not. I've struggled all morning with this but i must admit that SQL is not really my strong suite. The thing is this.
DECLARE #a TABLE (ID int, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 5, 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 6, 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( 7, 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( 8, 'Hola! Better Internet', 'ZenMate' )
INSERT INTO #a VALUES ( 9, 'XX', 'A' )
INSERT INTO #a VALUES ( 10, 'XX', 'BB' )
INSERT INTO #a VALUES ( 11, 'BB', 'XX' )
INSERT INTO #a VALUES ( 12, 'A', 'XX' )
INSERT INTO #a VALUES ( 13, 'XX', 'CC' )
INSERT INTO #a VALUES ( 14, 'CC', 'XX' )
;With CTE as
(
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2,
CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end) ck, -- just for display
Row_Number() over (Partition by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)
order by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)) as rn
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
)
Select ID1, ID2,Name1, Name2
from CTE C1
where rn=1
When i use this code it sure works fine with the names but it doesn't match the ID's correctly.
The result is
ID1 | ID2 | Name1 | Name2
12 | 9 | A | X (Correct)
7 | 5 | Hola! | ZenMate (Not Correct)
[..]
I've pulled my hair all morning but i can't figure this out. I still use Guid's as ID's and just use Int's here to make it a bit more readable.
DECLARE #a TABLE (ID INT, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'Hitman Pro', 'avast' )
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
AND a1.ID < a2.ID -- avoid duplicates
Referring to the amendment and extension of your question, a more complicated solution is required.
We form a CHECKSUM on a1.Name1,a2.Name (to get an identical we exchanged on size).
Using this we generate with ROW_NUMBER (Transact-SQL) a number and use only rows from the result with number 1.
DECLARE #a TABLE (ID uniqueIdentifier, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( NewID(), 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( NewID(), 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( NewID(), 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( NewID(), 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( NewID(), 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NewID(), 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NewID(), 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( NewID(), 'Hola! Better Internet', 'ZenMate' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'A' )
INSERT INTO #a VALUES ( NewID(), 'A', 'XX' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'BB' )
INSERT INTO #a VALUES ( NewID(), 'BB', 'XX' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'CC' )
INSERT INTO #a VALUES ( NewID(), 'CC', 'XX' )
;With CTE as
(
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2,
CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end) ck, -- just for display
Row_Number() over (Partition by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)
order by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)) as rn
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
)
Select *
from CTE C1
where rn=1
Edit:
If you only want to get those where both fields are fitting the needed query would simply be:
SELECT a1.ID AS ID1 , a2.ID AS ID2 , a1.Name1 , a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2 and a1.Name2 = a2.Name1 AND a1.ID < a2.ID
If the output should contain only two-way relations ('XX' + 'A') AND ('A' + 'XX'), try this:
;
WITH m (ID1, ID2, Name1, Name2) AS (
SELECT ID1, ID2, Name1, Name2
FROM (
SELECT a1.ID AS ID1
,a2.ID AS ID2
,a1.Name1 AS Name1
,a2.Name1 AS Name2
,ROW_NUMBER() OVER (PARTITION BY a1.Name1, a2.Name1 ORDER BY (SELECT 1)) AS n
FROM #a AS a1
JOIN #a AS a2
ON a1.Name1 = a2.Name2
AND a1.Name2 = a2.Name1
) AS T
WHERE n = 1
)
SELECT DISTINCT *
FROM (
SELECT ID1, ID2, Name1, Name2
FROM m
WHERE ID1 <= ID2
UNION ALL
SELECT ID2, ID1, Name2, Name1
FROM m
WHERE ID1 > ID2
) AS dm
It produces the output as follows:
+------+-----+--------------------------+-----------------------+
| ID1 | ID2 | Name1 | Name2 |
+------+-----+--------------------------+-----------------------+
| 1 | 2 | XMind | MindNode |
| 3 | 6 | avast | Hitman Pro |
| 4 | 7 | PPLive Video Accelerator | Hola! Better Internet |
| 5 | 8 | ZenMate | Hola! Better Internet |
| 9 | 12 | XX | A |
| 10 | 11 | XX | BB |
| 13 | 14 | XX | CC |
+------+-----+--------------------------+-----------------------+
Just rank your rows with ROW_NUMBER function and use this rank in join instead of original ID column:
DECLARE #a TABLE (ID UNIQUEIDENTIFIER, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( NEWID(), 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( NEWID(), 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( NEWID(), 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( NEWID(), 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( NEWID(), 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NEWID(), 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NEWID(), 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( NEWID(), 'Hola! Better Internet', 'ZenMate' )
;WITH cte AS(SELECT *, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn FROM #a)
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM cte a1
JOIN cte a2 ON a1.Name1 = a2.Name2 AND
a2.Name1 = a1.Name2 AND
a1.rn < a2.rn
Output:
ID1 ID2 Name1 Name2
Guid Guid XMind MindNode
Guid Guid avast Hitman Pro
Guid Guid PPLive Video Accelerator Hola! Better Internet
Guid Guid ZenMate Hola! Better Internet
I suggest you to use this simple way:
SELECT
t2.ID, t3.ID ID2,
t1.Name1,t1.Name2
FROM (
SELECT DISTINCT
CASE WHEN Name1 <= Name2 THEN Name1 ELSE Name2 END AS Name1,
CASE WHEN Name1 <= Name2 THEN Name2 ELSE Name1 END AS Name2
FROM
#a) t1
JOIN
#a t2 ON t1.Name1+t1.Name2 = t2.Name1+t2.Name2
JOIN
#a t3 ON t1.Name1+t1.Name2 = t3.Name2+t3.Name1
For this:
ID | ID2 | Name1 | Name2
----+-----+-----------------------+---------------------------
12 | 9 | A | XX
3 | 4 | avast | Hitman Pro
11 | 10 | BB | XX
14 | 13 | CC | XX
7 | 5 | Hola! Better Internet | PPLive Video Accelerator
8 | 6 | Hola! Better Internet | ZenMate
2 | 1 | MindNode | XMind
You can solve this using a CROSS APPLY
SELECT a2.ID ID_1,a1.ID ID_2, a2.Name1 , a2.Name2
FROM #a a1
CROSS APPLY
(
SELECT ID, Name2, Name1
FROM #a aa
WHERE aa.Name1 = a1.Name2 AND a1.Name1 = aa.Name2 AND a1.ID > aa.ID
) a2
You can try also:
select min(ID) ID1,
max(ID) ID2,
Name1,
Name2
from ( -- Here I get all the IDs and each couple sorted
-- Change > to < if you don't like the order
select ID,
case
when Name1 > Name2 then Name1
else Name2
end Name1,
case
when Name1 > Name2 then Name2
else Name1
end Name2
from table1
) as t
group by Name1,
Name2
You can even tansform this in a simgle query, without the inner one, but I think in this way it's more readable and you can understand better my approach.