SQL - How to select distinct and join multiple tables without duplicating data

SQL - How to select distinct and join multiple tables without duplicating data - sql

I have the following table setup/data:
create table #temp (irecordid int, sdocumentno varchar(20), dtfileddate datetime, mnyconsideration money)
insert into #temp values (1, '3731572', '6-30-2014', 120.00)
Create table #temp2 (irecordid int, address varchar(255))
insert into #temp2 values (1, '406 N CUSTER')
insert into #temp2 values (1, '2015 E HANSON')
Create table #temp3 (irecordid int, srdocumentno varchar(25))
insert into #temp3 values (1, '55489')
insert into #temp3 values (1, '99809')
I am trying to select so I only get a distinct instance of each table. I am trying:
select distinct sdocumentno, address, srdocumentno
from #temp t1
join #temp2 t2 on t1.irecordid = t2.irecordid
join #temp3 t3 on t1.irecordid = t3.irecordid
And my results are as follows:
3731572 2015 E HANSON 55489
3731572 2015 E HANSON 99809
3731572 406 N CUSTER 55489
3731572 406 N CUSTER 99809
I would really like only the distinct data from each table like this:
3731572 2015 E HANSON 55489
3731572 406 N CUSTER 99809
Is there a way I can accomplish this?
Thanks!

I am guessing that you want to join on "row number", but that doesn't exist. But, you can generate one and then join on them:
select sdocumentno, address, srdocumentno
from #temp t1 join
(select t2.*,
row_number() over (partition by irecordid order by (select NULL)) as seqnum
from #temp2 t2
) t2
on t1.irecordid = t2.irecordid join
(select t3.*,
row_number() over (partition by irecordid order by (select NULL)) as seqnum
from #temp2 t3
) t3
on t1.irecordid = t3.irecordid and t2.seqnum = t3.seqnum;
You can use full outer join if the lists are of different lengths.

Related

Query to pull the data from 3 tables based on latest load date and HashKey

I am trying write a SQL query to pull the data from 3 tables using JOINS on basis of common HashKey and I want to take all the updated records from 3rd table based on the load date(last increment/recent records) using SQL.
I have tried below SQL query but I am not able to get the recent record from third table.
SELECT
tab1.TennisID
tab1.TennisHashKey
tab3.LoadDate
tab2.TennisType
tab3.Clicks
tab3.Hit
tab3.Likes
fROM table1 tab1
LEFT JOIN table2 tab2
ON tab1.TennisHashKey = tab2.TennisHashKey
LEFT JOIN (SELECT * FROM Table3 WHERE LoadDate = (SELECT TOP 1 LoadDate FROM Table 3 ORDER BY LoadDate Desc)) tab3
ON tab2.TennisHashKey = tab3.TennishHashKey
I have matching number of records in Table 1 and Table 2, but there are multiple rows for same hashkey in Table3 based on loadDate.
Please provide your suggestion on this.
Thanks

Use ROW_NUMBER() to join only the most recent row from Table3.
SELECT
tab1.TennisID
, tab1.TennisHashKey
, tab3.LoadDate
, tab2.TennisType
, tab3.Clicks
, tab3.Hit
, tab3.Likes
FROM table1 tab1
LEFT JOIN table2 tab2
ON tab1.TennisHashKey = tab2.TennisHashKey
LEFT JOIN (
SELECT *
, ROW_NUMBER() OVER (PARTITION BY TennisHashKey ORDER BY LoadDate DESC) rn
FROM Table3
) tab3
ON tab2.TennisHashKey = tab3.TennishHashKey
AND rn = 1;

Another approach: you can use OUTER APPLY and get latest row and select it.
declare #table1 table(tennisid char(1), tennishashkey char(4),loaddate date)
declare #table2 table(tennishashkey char(4),tennistype char(10), loaddate date)
declare #table3 table(tennishashkey char(4),loaddate date,clicks int, hit int, likes int)
insert into #table1 values('A','A001','2020-01-01')
insert into #table2 values('A001','grass','2020-01-01')
insert into #table3 values('A001','2020-01-01',0,0,0),('A001','2020-01-01',1,1,1);
SELECT
tab1.TennisID
, tab1.TennisHashKey
, tab3.LoadDate
, tab2.TennisType
, tab3.Clicks
, tab3.Hit
, tab3.Likes
FROM #table1 tab1
LEFT JOIN #table2 tab2
ON tab1.TennisHashKey = tab2.TennisHashKey
OUTER APPLY (
SELECT TOP 1 *
FROM #Table3 as tab3
where tab3.tennishashkey = tab1.tennishashkey
order by loaddate desc
) tab3
TennisID
TennisHashKey
LoadDate
TennisType
Clicks
Hit
Likes
A
A001
2020-01-01
grass
1
1
1

How do i count occurences that were joined SQL

I am joining two tables:
DECLARE #Temp TABLE (
id INT)
INSERT INTO #Temp
VALUES (5)
,(2)
,(3)
DECLARE #Temp2 TABLE (
member_id INT)
INSERT INTO #Temp2
VALUES (5)
,(1)
,(3)
How do i count the number of rows that can be LEFT joined and the ones that can't.
In this example: 5 & 3 from #Temp can be joined to #Temp2 and only 2 from #Temp can't be joined.
I would like my output to show the following:
+--------+------------+
| Joined | Not_Joined |
+--------+------------+
| 2 | 1 |
+--------+------------+

You can do this in a single query using COUNT and SUM. This should produce the results you are looking for.
DECLARE #Temp TABLE (
id INT)
INSERT INTO #Temp
VALUES (5)
,(2)
,(3)
DECLARE #Temp2 TABLE (
member_id INT)
INSERT INTO #Temp2
VALUES (5)
,(1)
,(3)
select Joined = count(t2.Member_id)
, NotJoined = sum(case when t2.Member_id is null then 1 end)
from #Temp t
left join #Temp2 t2 on t2.member_id = t.id

The count from #Temp that EXISTS in #Temp2:
SELECT COUNT(*) FROM #TEMP WHERE ID IN(SELECT MEMBER_ID FROM #TEMP2)
The count from #Temp2 not in #Temp:
SELECT COUNT(*) FROM #TEMP2 WHERE MEMBER_ID NOT IN(ID FROM #TEMP)
Now to create a single result set, there are many ways but here is a simple one:
SELECT
(SELECT COUNT(*) FROM #TEMP2 WHERE MEMBER_ID IN(ID FROM #TEMP)) AS [Joined],
(SELECT COUNT(*) FROM #TEMP WHERE ID NOT IN(SELECT MEMBER_ID FROM #TEMP2)) AS [NotJoined]
#Sean Lange's answer is more specific to the JOIN question, my answer simply counts what exists in the lists.

Select count(*) as 'NOT Joined ',
(Select t1.count(*) from table1
t1)-count(*) as 'Joined'
from table1 where id NOT IN (Select member_id from table2);
Its basically how a left join works that is Common values of both the
tables plus the value of table 1 which doesnt exists in table 2.

SQL - Multiple Inner Join, most recent

I am wondering why the below SQL query does not work properly. I am attempting to return the fields from table 1 and table 2 based on the most recent date AND only those elements in those tables that have the name Steve from a third table.
This query, meanwhile, does not actually limit the results to those with the name of Steve. If I remove the second Inner Join and focus on fields only in Table 1 to limit the universe, it works fine.
Appreciate your help on this. I am using Microsft SQL Server Management Studio.
Select *
From [db].table1
INNER JOIN [db].table2 ON table1.id=table2.id
INNER JOIN [db].table3 ON table1.id=table3.id
WHERE (table1.AsOfDate=(SELECT MAX(AsOfDate) from [db].table1))
and table3.Name = 'Steve'

The ID's may not be referring to the same ID across all three tables. Your joins assumes that is the case though. I mirrored your query with sample temp tables and your query works.
--SAMPLE TABLES
IF object_id('tempdb..#table1') is not null drop table #table1
if object_id('tempdb..#table2') is not null drop table #table2
if object_id('tempdb..#table3') is not null drop table #table3
CREATE TABLE #table1 (id INT, my_date date)
INSERT INTO #table1 (id, my_date) VALUES
(1, '1/1/2018'),
(2, '1/2/2018'),
(3, '1/1/2018')
CREATE TABLE #table2 (id INT, some_field VARCHAR(10))
INSERT INTO #table2 (id, some_field) VALUES
(1, 'abc'),
(2, 'xyz'),
(3, 'foo')
CREATE TABLE #table3 (id INT, name VARCHAR(10))
INSERT INTO #table3 (id, name) VALUES
(1, 'jon'),
(2, 'steve'),
(3, 'jane')
--QUERY
SELECT *
FROM #table1 AS x
INNER JOIN
#table2 AS y ON x.id=y.id
INNER JOIN
#table3 AS z ON z.id=x.id
WHERE x.my_date=(SELECT MAX(my_date) from #table1)
and z.name = 'Steve'
output
id my_date id some_field id name
2 2018-01-02 2 xyz 2 steve

I think the simplest way is a window function in the order by:
Select top (1) with ties . . . -- list the columns explicitly
from [db].table1 t1 join
[db].table2 t2
on t1.id = t2.id join
[db].table3 t3
on t1.id = t3.id
where t3.Name = 'Steve'
order by rank() over (order by t1.AsOfDate);

All possible combinations of records in table sql server

I have a table
declare #table table(t varchar(50), d varchar(50), activ varchar(10), groupid int, rownum int)
insert into #table values('ALK','ceri', '0.2',1,1)
insert into #table values('ALK','criz', '24',1,2)
insert into #table values('EGFR','erlo', '2',2,3)
insert into #table values('EGFR','gefi', '57',2,4)
insert into #table values('EGFR','ibru', '5.6',2,5)
insert into #table values('EGFR','ceri', '900',2,6)
insert into #table values('EGFR','cetu', 'NULL',2,7)
insert into #table values('EGFR','afat', '10',2,8)
insert into #table values('EGFR','lapa', '10.8',2,9)
insert into #table values('EGFR','pani', 'NULL',2,10)
insert into #table values('ERBB2','pert', 'NULL',3,11)
insert into #table values('ERBB2','tras', 'NULL',3,12)
insert into #table values('ERBB2','lapa', '9.2',3,13)
insert into #table values('ERBB2','ado-', 'NULL',3,14)
insert into #table values('ERBB2','afat', '14',3,15)
insert into #table values('ERBB2','ibru', '9.4',3,16)
in output I need all combinations by groupid or t in format
t,d,t,d,t,d,activ and so on then I will qualify best combinations.
Any help will be appreciated. This will show doctors optimum combination of drugs for cancer patients. The table is dynamic and different for every patient.
Thank you

For all possible combinations, you would use CROSS JOIN:
SELECT * FROM table1 AS t1
CROSS JOIN table2 AS t2
on t1.ID = t2.ID
Keep in mind this gives a O(n^2) result set, likely to be huge for large sets of data.

I will use #TT to represent the table var since calling it #table may be a bit confusing
I also changed the datatype of active to float
There are really 3 possible cross joins
-- #1 -- producing 256 rows
select * from #TT as T1
cross join #TT as T2
-- #2 -- produces 104 rows
select * from #TT as T1
cross join #TT as T2
where T1.GroupID = T2.GroupID
-- #3 -- produces 104
select * from #TT as T1
cross join #TT as T2
where T1.t = T2.t
The 1st is a true cross join on the whole table.
The 2nd and 3rd are cross joins on GroupID and t respectively, but they are identical since Group 1 represents T='ALK', etc. This is easily confirmed since a union of 2 & 3 3 also produces 104 rows
However, select * on a self join is silly as is obvious if you change select * to
select T1.*, '===', T2.*
You can see the columns on the left of '===' are the same as the columns to the right of '==='
Since GroupID is an integer I would write the cross join as
select T1.* from #TT as T1
cross join #TT as T2
where T1.GroupID = T2.GroupID
Now since the poster wanted to grouping based on the smallest total active, I think it makes sense to group the response by GroupID and T and D giving and report the sum of Activ and order by GroupID and sum(Activ)
-- #4 adding group by and sum -- 16 rows generated
select T1.groupid, T1.t, T1.d, sum(T1.activ) as SumActiv
from #TT as T1
cross join #TT as T2
where T1.groupid = T2.groupid
group by T1.t, T1.groupid, T1.d
order by groupid, sum(T1.Activ)
Now you are getting close except for the fact that no CROSS JOIN is needed at all
-- #5 remove the cross join
select T1.groupid, T1.t, T1.d, sum(T1.activ) as SumActiv
from #TT as T1
group by T1.t, T1.groupid, T1.d
When I remove the cross join portion of the query I get the exact same result. I think we finally have what is wanted, with the possible exception of removing all but the first row for each combination of GroupID and d

Join Tables with no Join Criteria

This seems so simple, but I just can't figure it out. I want to simply join 2 tables together. I don't care which values are paired with which. Using TSQL, here is an example:
declare #tbl1 table(id int)
declare #tbl2 table(id int)
insert #tbl1 values(1)
insert #tbl1 values(2)
insert #tbl2 values(3)
insert #tbl2 values(4)
insert #tbl2 values(5)
select * from #tbl1, #tbl2
This returns 6 rows, but what kind of query will generate this (just slap the tables side-by-side):
1 3
2 4
null 5

You can give each table row numbers and then join on the row numbers:
WITH
Table1WithRowNumber as (
select row_number() over (order by id) as RowNumber, id from Table1
),
Table2WithRowNumber as (
select row_number() over (order by id) as RowNumber, id from Table2
)
SELECT Table1WithRowNumber.Id, Table2WithRowNumber.Id as Id2
FROM Table1WithRowNumber
FULL OUTER JOIN Table2WithRowNumber ON Table1WithRowNumber.RowNumber = Table2WithRowNumber.RowNumber
Edit: Modiifed to use FULL OUTER JOIN, so you get all rows (with nulls).

Use Cross Join
Select * From tableA Cross Join TableB
But understand you will get a row in the output for every combination of rows in TableA with every Row in TableB...
So if Table A has 8 rows, and TableB has 4 rows, you will get 32 rows of data...
If you want any less than that, you have to specify some join criteria, that will filter out the extra rows from the output

Well, this will work:
Select A.ID, B.ID From
(SELECT ROW_NUMBER () OVER (ORDER BY ID) AS RowNumber, ID FROM Tbl2 ) A
full outer join
(SELECT ROW_NUMBER () OVER (ORDER BY ID) AS RowNumber, ID FROM Tbl1 ) B
on (A.RowNumber=B.RowNumber)

The SQL1 cross join applies here also.
Select *
From tableA, TableB

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL - How to select distinct and join multiple tables without duplicating data - sql

Related

Query to pull the data from 3 tables based on latest load date and HashKey

How do i count occurences that were joined SQL

SQL - Multiple Inner Join, most recent

All possible combinations of records in table sql server

Join Tables with no Join Criteria

Categories

Resources