SQL - left join generate duplicates - sql

I have code to select some applications but LEFT JOIN is creating duplicates.
Question: How to get rid of duplicates generated by
LEFT JOIN
attachments as att
ON
(a.ssn = att.ssn AND att.doc_type = 'id_copy' AND att.source = 'cpt3')
My Work:
I am considering SELECT DISTINCT or GROUP BY but, can't figure out where to put them.
This is the query:
SELECT
a.*, c.code, l.who, l.locked_time, att.id AS attnew,
( SELECT
COUNT(*)
FROM
applications as a2
WHERE
a2.approved = 'Y'
AND
a2.paid = 'Y'
AND
a2.paid_back = 'Y'
AND
a2.ssn = a.ssn
) AS previous_apps
FROM
applications as a
LEFT JOIN
locked_by as l
USING(id)
LEFT JOIN
campaign_codes as c
ON
c.id = a.campaign
LEFT JOIN
attachments as att
ON
(a.ssn = att.ssn AND att.doc_type = 'id_copy' AND att.source = 'cpt3')
WHERE
a.closed='N'
AND
a.paid = 'N'
ORDER BY
a.arrived_date
DESC

Use GROUP BY
to avoid duplicates. In your case:
...
WHERE
a.closed='N'
AND
a.paid = 'N'
GROUP BY
a.id
ORDER BY
a.arrived_date
DESC

If you want to make an one to one relation to avoid duplicates and att.id is the same for each att.ssn or you need only one (MAX,MIN,..ANY?) att.id. Try this:
LEFT JOIN
(SELECT ssn,
MAX(id) as id,
FROM attachments
WHERE doc_type = 'id_copy' AND source = 'cpt3'
GROUP BY ssn
)as att
ON
(a.ssn = att.ssn)

Related

How to create distinct count from queries with several tables

I am trying to create one single query that will give me a distinct count for both the ActivityID and the CommentID. My query in MS Access looks like this:
SELECT
tbl_Category.Category, Count(tbl_Activity.ActivityID) AS CountOfActivityID,
Count(tbl_Comments.CommentID) AS CountOfCommentID
FROM tbl_Category LEFT JOIN
(tbl_Activity LEFT JOIN tbl_Comments ON
tbl_Activity.ActivityID = tbl_Comments.ActivityID) ON
tbl_Category.CategoryID = tbl_Activity.CategoryID
WHERE
(((tbl_Activity.UnitID)=5) AND ((tbl_Comments.PeriodID)=1))
GROUP BY
tbl_Category.Category;
I know the answer must somehow include SELECT DISTINCT but am not able to get it to work. Do I need to create multiple subqueries?
This is really painful in MS Access. I think the following does what you want to do:
SELECT ac.Category, ac.num_activities, aco.num_comments
FROM (SELECT ca.category, COUNT(*) as num_activities
FROM (SELECT DISTINCT c.Category, a.ActivityID
FROM (tbl_Category as c INNER JOIN
tbl_Activity as a
ON c.CategoryID = a.CategoryID
) INNER JOIN
tbl_Comments as co
ON a.ActivityID = co.ActivityID
WHERE a.UnitID = 5 AND co.PeriodID = 1
) as caa
GROUP BY ca.category
) as ca LEFT JOIN
(SELECT c.Category, COUNT(*) as num_comments
FROM (SELECT DISTINCT c.Category, co.CommentId
FROM (tbl_Category as c INNER JOIN
tbl_Activity as a
ON c.CategoryID = a.CategoryID
) INNER JOIN
tbl_Comments as co
ON a.ActivityID = co.ActivityID
WHERE a.UnitID = 5 AND co.PeriodID = 1
) as aco
GROUP BY c.Category
) as aco
ON aco.CommentId = ac.CommentId
Note that your LEFT JOINs are superfluous because the WHERE clause turns them into INNER JOINs. This adjusts the logic for that purpose. The filtering is also very tricky, because it uses both tables, requiring that both subqueries have both JOINs.
You can use DISTINCT:
SELECT
tbl_Category.Category, Count(DISTINCT tbl_Activity.ActivityID) AS CountOfActivityID,
Count(DISTINCT tbl_Comments.CommentID) AS CountOfCommentID
FROM tbl_Category LEFT JOIN
(tbl_Activity LEFT JOIN tbl_Comments ON
tbl_Activity.ActivityID = tbl_Comments.ActivityID) ON
tbl_Category.CategoryID = tbl_Activity.CategoryID
WHERE
(((tbl_Activity.UnitID)=5) AND ((tbl_Comments.PeriodID)=1))
GROUP BY
tbl_Category.Category;

Combine two queries to get the data in two columns

SELECT
tblEmployeeMaster.TeamName, SUM(tblData.Quantity) AS 'TotalQuantity'
FROM
tblData
INNER JOIN
tblEmployeeMaster ON tblData.EntryByHQCode = tblEmployeeMaster.E_HQCode
INNER JOIN
tblPhotos ON tblEmployeeMaster.TeamNo = tblPhotos.TeamNo
WHERE
IsPSR = 'Y'
GROUP BY
tblPhotos.TeamSort, tblPhotos.TeamNo, tblPhotos.Data,
tblEmployeeMaster.TeamName
ORDER BY
tblPhotos.TeamSort DESC, TotalQuantity DESC
This returns
Using this statement
select TeamName, count(TeamName) AS 'Head Count'
from dbo.tblEmployeeMaster
where IsPSR = 'Y'
group by teamname
Which returns
I would like to combine these 2 queries in 1 to get the below result.
Tried union / union all but no success :(
Any help will be very much helpful.
You can simply use the sub-query as follows:
SELECT tblEmployeeMaster.TeamName, SUM(tblData.Quantity) AS 'TotalQuantity',
MAX(HEAD_COUNT) AS HEAD_COUNT, -- USE THIS VALUE FROM SUB-QUERY
CASE WHEN MAX(HEAD_COUNT) <> 0
THEN SUM(tblData.Quantity)/MAX(HEAD_COUNT)
END AS PER_MAN_CONTRIBUTION -- column asked in comment
FROM tblData INNER JOIN
tblEmployeeMaster ON tblData.EntryByHQCode = tblEmployeeMaster.E_HQCode INNER JOIN
tblPhotos ON tblEmployeeMaster.TeamNo = tblPhotos.TeamNo
-- FOLLOWING SUB-QUERY CAN BE USED
LEFT JOIN (select TeamName, count(TeamName) AS HEAD_COUNT
from dbo.tblEmployeeMaster
where IsPSR = 'Y' group by teamname) AS HC
ON HC.TeamName = tblEmployeeMaster.TeamName
where IsPSR = 'Y'
GROUP BY tblPhotos.TeamSort, tblPhotos.TeamNo, tblPhotos.Data,tblEmployeeMaster.TeamName
order by tblPhotos.TeamSort desc, TotalQuantity desc

combine two query results into one with conditions in SQL Server

I have two query to combine two results into one. However; my challenge is to get the second query look into the first query if it doesn't exist in the first query.
I changed my post to the actual query
SELECT Name.CO_ID, Name.FULL_NAME, Name.ID, rpt.date AS StartDate,
vw_Coords.TARGET_ID AS CoordID, vw_RegDirs.TARGET_ID AS
RDID
FROM Name INNER JOIN
Tops_Profile ON dbo.Name.ID = Tops_Profile.ID left
outer JOIN
vw_mz_rpt_leader_log rpt ON Name.CO_ID = rpt.ID LEFT
OUTER JOIN
vw_RegDirs ON Name.CO_ID = vw_RegDirs.CHAPTER LEFT
OUTER JOIN
vw_Coords ON Name.CO_ID = vw_Coords.CHAPTER LEFT OUTER
JOIN
Tops_Chapter ON Tops_Chapter.ID = Name.CO_ID
WHERE (Name.MEMBER_TYPE = 'm') AND (Tops_Profile.LDR = '1') and
LOG_TEXT like '%LEADER Change%'
union
SELECT Name.CO_ID, Name.FULL_NAME, Name.ID,
YEAR(dbo.Tops_Chapter.PST_DATE_LEAD) AS StartDate,
vw_Coords.TARGET_ID AS CoordID, vw_RegDirs.TARGET_ID AS
RDID
FROM Name INNER JOIN
Tops_Profile ON Name.ID = Tops_Profile.ID left outer
JOIN
vw_mz_rpt_leader_log rpt ON Name.CO_ID = rpt.ID LEFT
OUTER JOIN
vw_RegDirs ON Name.CO_ID = vw_RegDirs.CHAPTER LEFT
OUTER JOIN
vw_Coords ON Name.CO_ID = vw_Coords.CHAPTER LEFT OUTER
JOIN
Tops_Chapter ON Tops_Chapter.ID = Name.CO_ID
WHERE (Name.MEMBER_TYPE = 'm') AND (Tops_Profile.LDR = '1')
the scope is if the record exists in the first query don't bring it from second query.
Here's a quick and dirty way...
select *
from
(select id, Name, log.Date
from Name
inner join Log on Name.id = log.id
where log.text_log like '%Leader%'
union
select id, Name, Profile.Date
from Name
inner join profile on Name.id = profile.id
where profile.Leader = '1') d
order by row_number() over(partition by x.id order by x.Date asc)
Note, this doesn't care where John came from, it's simply finding the first occurrence based on the date which seems to be what you want.
You have altered your request. Suddenly both queries select from the same tables and a UNION (or UNION ALL for that matter) doesn't seem a good solution anymore.
There are very few differences between the two queries even. And looking at the whole it boils down to: select records for member_type = 'm' and tp.ldr = 1 and then keep only one record per name, preferredly one with log_text like '%LEADER Change%'. This is mere ranking, as already shown in my other answer. You only need one query to select all records in question and use TOP (1) WITH TIES to keep the best matches per name.
select top(1) with ties
n.co_id,
n.full_name,
n.id,
case when log_text like '%LEADER Change%' then rpt.date else year(tc.pst_date_lead) end
as startdate,
c.target_id as coordid,
rd.target_id as rdid
from name n
inner join tops_profile tp on n.id = tp.id
left outer join vw_mz_rpt_leader_log rpt on n.co_id = rpt.id
left outer join vw_regdirs rd on n.co_id = rd.chapter
left outer join vw_coords c on n.co_id = c.chapter
left outer join tops_chapter tc on tc.id = n.co_id
where n.member_type = 'm'
and tp.ldr = 1
order by row_number() over (
partition by n.id
order by case when log_text like '%LEADER Change%' then 1 else 2 end);
As you said you just want only one record per name, I am using ROW_NUMBER. If you want more, use RANK instead.
It's not clear why you are joining the tops_chapter table. Is log_text a column in that table? (You should use a table qualifier for this column in your query.) If it isn't, then the join is superfluous and you can remove it from your query.
Use row_number and select id's with least date
with cte as
(select id, Name, log.Date
from Name
inner join Log on Name.id = log.id
where log.text_log like '%Leader%'
union all
select id, Name, Profile.Date as log.date
from Name
inner join profile on Name.id = profile.id
where profile.Leader = '1'
) , ct1 as (select id,name,log.date, ROW_NUMBER() over (partition by id order by log.date) rn from cte )
select id,name,log.date from ct1 where rn = 1
where profile.Leader = '1'
and id not in ( select Name.id
from Name
inner join Log
on Name.id = log.id
where log.text_log like '%Leader%' )
You can use NOT EXISTS in the second query to filter out already existing Name records:
select id, Name, log.Date
from Name
inner join Log on Name.id = log.id
where log.text_log like '%Leader%'
union
select n1.id, n1.Name, Profile.Date
from Name as n1
inner join profile on n1.id = profile.id
where profile.Leader = '1' and
not exists (select 1
from Name as n2
inner join Log on n2.id = Log.id
where Log.text_log like '%Leader%' and
n2.id = n1.id and n2.name = n1.name)
The query below finds logdate and profiledate for each name. If there is a logdate, the logdate will be diplayed else the profile date will be displayed. If both don't exist the Name won't be displayed.
select id, Name, coalesce(log.Date,profile.date)
from Name
left join Log on Name.id = log.id and log.text_log like '%Leader%'
left join profile on Name.id = profile.id and profile.Leader = '1'
where coalesce(log.Date,profile.date) is not null
You can add a rank to your two queries. Then per ID you keep the record(s) with the better rank (using ORDER BY with TOP (1) WITH TIES).
select top(1) with ties
id, name, date
from
(
select n.id, n.name, log.date, 1 as rnk
from name n
inner join log on name.id = log.id
where log.text_log like '%Leader%'
union all
select n.id, n.name, profile.date, 2 as rnk
from name n
inner join profile on name.id = profile.id
where profile.leader = '1'
) data
order by rank() over (partition by id order by rnk);

Oracle SQL Correlated subquery - Returning count(*) in some columns

I have my initial statement which is :
SELECT TEAM.ID PKEY_SRC_OBJECT,
TEAM.MODF_DAT UPDATE_DATE,
TEAM.MODF_USR UPDATED_BY,
PERSO.FIRST_NAM FISRT_NAME
FROM TEAM
LEFT OUTER JOIN PERSO ON (TEAM.ID=PERSO.TEAM_ID)
I want to calculate some "flags" and return them in my initial statement.
There are 3 flags which can be calculated like this :
1) Flag ISMASTER:
SELECT Count(*)
FROM TEAM_TEAM_REL A, TEAM B
WHERE B.PARTY_PTY_ID = A.RLTD_TEAM_ID
AND CODE = 'Double';
2) Flag ISAGENT:
SELECT Count(*)
FROM TEAM_ROL_REL A, TEAM B
WHERE B.PARTY_PTY_ID = A.TEAM_ID;
3) Flag NUMPACTS:
SELECT Count(*)
FROM TEAM_ROL_REL A,
TEAM_ROL_POL_REL B,
PERSO_POL_STA_REL C,
TEAM D
WHERE A.ROL_CD IN ('1','2')
AND A.T_ROL_REL_ID = B.P_ROL_REL_ID
AND B.P_POL_ID = C.P_POL_ID
AND C.STA_CD = 'A'
AND D.PARTY_PTY_ID = A.TEAM_ID;
To try to achieve this, I've updated my initial statement like this :
WITH ABC AS (
SELECT TEAM.ID PKEY_SRC_OBJECT,
TEAM.MODF_DAT UPDATE_DATE,
TEAM.MODF_USR UPDATED_BY,
PERSO.FIRST_NAM FISRT_NAME
FROM TEAM
LEFT OUTER JOIN PERSO ON (TEAM.ID=PERSO.TEAM_ID)
)
SELECT ABC.*, MAST.ISMASTER, AGENT.ISAGENT, PACTS.NUMPACTS FROM ABC
LEFT OUTER JOIN (
select
RLTD_TEAM_ID,
Count(RLTD_TEAM_ID) OVER (PARTITION BY RLTD_TEAM_ID) as ISMASTER
FROM TEAM_TEAM_REL
WHERE CODE = 'Double'
) MAST
ON ABC.PKEY_SRC_OBJECT = MAST.RLTD_TEAM_ID
LEFT OUTER JOIN (
select
TEAM_ID,
Count(TEAM_ID) OVER (PARTITION BY TEAM_ID) as ISAGENT
FROM TEAM_ROL_REL
) AGENT
ON ABC.PKEY_SRC_OBJECT = AGENT.TEAM_ID
LEFT OUTER JOIN (
select
TEAM_ID,
Count(TEAM_ID) OVER (PARTITION BY TEAM_ID) as NUMPACTS
FROM TEAM_ROL_REL, TEAM_ROL_POL_REL, PERSO_POL_STA_REL
WHERE TEAM_ROL_REL.ROL_CD IN ('1','2')
AND TEAM_ROL_REL.T_ROL_REL_ID = TEAM_ROL_POL_REL.P_ROL_REL_ID
AND TEAM_ROL_POL_REL.P_POL_ID = PERSO_POL_STA_REL.P_POL_ID
AND PERSO_POL_STA_REL.STA_CD = 'A'
) PACTS
ON ABC.PKEY_SRC_OBJECT = PACTS.TEAM_ID;
For the two first flags (ISMASTER and ISAGENT) I get the result in less than 1min, but for the last flag (NUMPACTS) it runs few minutes without provide any result.
I think my statement is too heavy, maybe I should do it in a totally different way.
I think you have perhaps over complicated things.
You could do this (assuming I have understood your requirements correctly) like so:
WITH ttr AS (SELECT rltd_team_id,
COUNT(*) is_master
FROM team_team_rel
AND CODE = 'Double'
GROUP BY rltd_team_id),
trr AS (SELECT team_id,
COUNT(*) is_agent
FROM team_rol_rel
GROUP BY team_id)
pacts AS (SELECT trr1.team_id,
COUNT(*) num_pacts
FROM team_rol_rel trr1
INNER JOIN team_rol_pol_rel trpr ON (trr1.t_rol_rel_id = trpr.p_rol_rel_id)
INNER JOIN perso_pol_sta_rel ppsr ON (trpr.p_pol_id = ppsr.p_pol_id
WHERE trr1.rol_cd IN ('1', '2')
AND ppsr.st_cd = 'A'
GROUP BY trr1.team_id)
SELECT t.id pkey_src_object,
t.modf_dat update_date,
t.modf_usr updated_by,
p.first_nam first_name,
ttr.is_master,
trr.is_agent,
pacts.num_pacts
FROM team t
LEFT OUTER JOIN perso p ON t.id = p.team_id
LEFT OUTER JOIN ttr ON t.party_pty_id = ttr.rltd_team_id
LEFT OUTER JOIN trr ON t.party_pty_id = trr.team_id
LEFT OUTER JOIN pacts ON t.pkey_src_object = pacts.team_id;
N.B. untested, since you didn't provide any test data.

Convert Exist condition to Join with T-SQL

I am trying to convert the following T-SQL Select query to exclude "Exists" Clause and Include "Join" Clause. but i am ending up not getting the right result. can some one from this expert team help me with some tips.
select *
FROM HRData
INNER JOIN (
SELECT eeceeid, MIN(eecdateoftermination) eTermDate
FROM dbo.empcomp
INNER JOIN
(
SELECT xeeid FROM HRData_EEList
INNER JOIN dbo.empcomp t ON xeeid = eeceeid AND xcoid = eeccoid
WHERE eecemplstatus = 'T' AND eectermreason <> 'TRO' AND eeccoid <> 'WAON6'
AND EXISTS ( SELECT 1 FROM dbo.empded
INNER JOIN dbo.dedcode on deddedcode = eeddedcode AND deddedtype = 'MED' AND (eedbenstopdate IS NULL OR eedbenstopdate > '12/31/2005')
WHERE eedeeid = xeeid AND eedcoid = xcoid )
GROUP BY xeeid
HAVING COUNT(*) > 1) Term ON xeeid = eeceeid
group by eeceeid
) Terms ON eeid = eeceeid AND Termdate = eTermDate
The algorithm to convert EXISTS to JOIN is very simple.
Instead of
FROM A
WHERE EXISTS (SELECT *
FROM B
WHERE A.Foo = B.Foo)
Use
FROM A
INNER JOIN (SELECT DISTINCT Foo
FROM B) AS B
ON A.Foo = B.Foo
But the first one probably will be optimised better
Interesting request.
select *
FROM HRData
INNER JOIN (
SELECT eeceeid, MIN(eecdateoftermination) eTermDate
FROM dbo.empcomp
INNER JOIN
(
SELECT xeeid FROM HRData_EEList
INNER JOIN dbo.empcomp t ON xeeid = eeceeid AND xcoid = eeccoid
INNER JOIN
( SELECT DISTINCT xeeid, xcoid FROM dbo.empded
INNER JOIN dbo.dedcode on deddedcode = eeddedcode AND deddedtype = 'MED' AND (eedbenstopdate IS NULL OR eedbenstopdate > '12/31/2005')
-- WHERE eedeeid = xeeid AND eedcoid = xcoid
) AS A ON xeeid = A.xeeid AND eedcoid = A.eedcoid
WHERE eecemplstatus = 'T' AND eectermreason <> 'TRO' AND eeccoid <> 'WAON6'
GROUP BY xeeid
HAVING COUNT(*) > 1) Term ON xeeid = eeceeid
group by eeceeid
) Terms ON eeid = eeceeid AND Termdate = eTermDate
Another method of converting an exists to a join is to use a ROW_NUMBER() in the subselect to assist in removing duplicates.
EXISTS:
FROM A
WHERE EXISTS (SELECT *
FROM B
WHERE B.Condition = 'true' AND A.Foo = B.Foo)
JOIN:
FROM A
JOIN (SELECT B.Foo, ROW_NUMBER() OVER (PARTITION BY B.Foo ORDER BY B.Foo) RN
FROM B
WHERE B.Condition = 'true') DT
ON A.Foo = DT.Foo AND DT.RN = 1
The ORDER BY is totally arbitrary since you don't care which record it selects, but it's required. You may be able to use (SELECT NULL) instead.