Obtaining only first result from a LEFT JOIN - sql

I'm trying to get the first result of a LEFT JOIN for each row of a SELECT statement.
Because now right now, if I have 100 rows in the joined table, I'll get 100 times the same row from the SELECT. I'd just need the first joined row so that way I wouldn't get any duplicates.
I can't use GROUP BY because I have to get more than only one row from the table.
Here's a basic version of my query:
SELECT bg.PatientID, DATEDIFF(hour, bg.CreateDate, GETDATE()) TimeToTarget
FROM BloodGlucose bg
LEFT JOIN IVProtocol i ON i.PatientID = bg.PatientID
WHERE bg.BGValue >= i.TargetLow AND bg.BGValue <= i.TargetHigh
ORDER BY bg.PatientID ASC
I tried using DISTINCT but since the data from bg.CreateDate isn't always the same it returns duplicates.
I just need the FIRST row of that left joined table.
Any ideas/suggestions?
Thanks!

;WITH x AS
(
SELECT
bg.PatientID,
TimeToTarget = DATEDIFF(hour, bg.CreateDate, GETDATE()),
rn = ROW_NUMBER() OVER (PARTITION BY bg.PatientID ORDER BY bg.CreatedDate DESC)
FROM dbo.BloodGlucose AS bg
LEFT JOIN dbo.IVProtocol AS i
ON i.PatientID = bg.PatientID
WHERE bg.BGValue >= i.TargetLow
AND bg.BGValue <= i.TargetHigh
)
SELECT PatientID, TimeToTarget
FROM x
WHERE rn = 1
ORDER BY PatientID;
To join to other results:
;WITH x AS
(
... same as above ...
)
SELECT x.PatientID, x.TimeToTarget, y.Something
FROM x INNER JOIN dbo.SomethingElse AS y
ON x.PatientID = y.PatientID
WHERE x.rn = 1
ORDER BY x.PatientID;

SELECT bg.PatientID, DATEDIFF(hour, bg.CreateDate, GETDATE()) TimeToTarget
FROM BloodGlucose bg
cross apply (
select top 1 *
from IVProtocol i
where i.PatientID = bg.PatientID
order by SOME_CRITERA
) i
WHERE bg.BGValue >= i.TargetLow AND bg.BGValue <= i.TargetHigh
ORDER BY bg.PatientID ASC
Cross apply is a handy tool for such situations. It works like a join but you can use variables inside the subquery.

Related

Turning an outer apply into a left join when you reference parent aliases

I'm currently trying to turn an outer apply into a left join to save some complexity.
SELECT *
FROM fact_table h
OUTER APPLY (SELECT TOP 1
*
FROM dimension mcc WITH (NOLOCK)
WHERE h.product = mcc.product
AND h.country = mcc.country
AND mcc.date IN (SELECT MAX(date)
FROM dimension dd WITH (NOLOCK)
WHERE FORMAT(DATEADD(MONTH, -3, dd.date), 'yyyyMM') <= h.month_in_the_year
AND dd.product = h.product
AND dd.country = h.country)) a;
I basically use it to get the related data from Dimension linked with the latest data point that's earlier than 3 months ago.
I'm trying to turn it into a left join, but it's taking a lot more time since I don't filter the dimension before the join :
SELECT TOP 10
*
FROM fact_table h
LEFT JOIN dimension a ON h.product = a.product
AND h.country = a.country
AND a.pkid = (SELECT TOP 1
pkid
FROM dimension dd
WHERE FORMAT(DATEADD(MONTH, -3, dd.date), 'yyyyMM') <= h.month_in_the_year
ORDER BY date DESC);
Do you have an idea on how to turn it efficiently into a left join ?
It looks like you can significantly simplify this query, by simply adding an ORDER BY. I've also modified the date filter in order to leverage indexing properly.
SELECT *
FROM fact_table h
OUTER APPLY (
SELECT TOP 1 *
FROM dimension mcc
WHERE h.product = mcc.product
AND h.country = mcc.country
AND mcc.date < DATEADD(MONTH, 2, DATEFROMPARTS(LEFT(h.month_in_the_year, 4), RIGHT(h.month_in_the_year, 2), 1))
ORDER BY mcc.date DESC
) a;
To transform this into a LEFT JOIN, you need to utilize row-numbering
SELECT *
FROM (
SELECT *,
rn = ROW_NUMBER() OVER (PARTITION BY h.PrimaryKeyColumn ORDER BY mcc.date)
FROM fact_table h
LEFT JOIN dimension mcc
ON h.product = mcc.product
AND h.country = mcc.country
AND mcc.date < DATEADD(MONTH, 2, DATEFROMPARTS(LEFT(h.month_in_the_year, 4), RIGHT(h.month_in_the_year, 2), 1))
) a
WHERE rn = 1;

Select all records between two dates but one record on same day depending on plate

I have select query :
select
f.FirmaID,f.FirmaAdi,t.BelgeID,t.BelgeTuru,t.Tarih,t2.Plaka,t2.SasiNo,t4.AracMarka,t4.AracTip,case when x.Miktar=1 then 4 else x.miktar end as LastikAdet,
t3.CariKodu,t3.CariAdi,t3.CariGsm1,t3.CariGsm2,t3.CariTel1,t3.CariTel2,t3.CariAdres
from alsatr t WITH (NOLOCK)
left join Firma f WITH (NOLOCK) on f.FirmaID = t.AlsatrFirmaID
left join AracBilgi t2 WITH (NOLOCK) on t2.AracBilgiUID = t.AsAracBilgiUID and t2.AracBilgiID= t.AracBilgi
left join Cari t3 WITH (NOLOCK) on t.AsCariUID= t3.CariUID
left join Araclar t4 WITH (NOLOCK) on t4.AracID= t2.AB_AracID
outer apply
(select COUNT(1) soktak,Miktar FROM alsatD d WITH (NOLOCK)
where
d.AlsatDUID = t.AlsatrUID and d.AsStokKodu='LA-0001' group by Miktar) x
where
isnull(t3.FiloID,0) > 0
and t.Tarih between '04.30.2020' and '04.31.2020'
and t.BelgeTuru=55
and x.soktak > 0
and f.FirmaID not in (1,2,103,106,109,114)
order by t.Tarih desc, f.FirmaID desc, t.BelgeID desc
So I want to select all records between two days but I want to select one,latest record (maybe depends on last BelgeID ) on same day with same plate (plaka).
Enclose your query inside a CTE and use ROW_NUMBER() window function:
WITH cte AS (
<your query here>
)
SELECT
t.FirmaID, t.FirmaAdi, t.BelgeID, t.BelgeTuru, t.Tarih, t.Plaka, t.SasiNo, t.AracMarka,
t.AracTip, t.LastikAdet, t.CariKodu, t.CariAdi, t.CariGsm1, t.CariGsm2, t.CariTel1,
t.CariTel2, t.CariAdres
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY Tarih, Plaka ORDER BY BelgeID DESC) rn
FROM cte
) t
WHERE t.rn = 1

show only MAX rows

i have a table which shows me workhours of postoffices, the problem is that sometimes there are duplicates: for example i have saturday showing twice from one postoffice with the same time.
Solution is to show only 1 saturday with MAX(ID), but i can't deal with that and tha think is that i don't have to show id in select.
this is my script
SELECT h.ID,
h.POSTINDEX,
H.LONGNAME_UA,
h.SHORTNAME_UA,
pt.LONGNAME_UA,
h.parent_Id,
WORKCOMMENT,
INTERVALTYPE,
TO_CHAR(TFROM, 'HH24:MI') AS TFROM,
TO_CHAR(TTO, 'HH24:MI'),
WD.NAME_UA,
WD.NAME_EN,
WD.NAME_RU,
WD.SHORTNAME_UA,
pt.isVPZ,
lr.NAME_UA,
lr.CODE
FROM ADDR_PO_WORKSCHEDULE tt
LEFT JOIN ADDR_POSTOFFICE h
ON tt.POSTOFFICE_ID = h.ID
INNER JOIN mdm_lockReason lr
ON lr.id = H.LOCK_REASON
INNER JOIN ADDR_POSTOFFICEtype pt
ON pt.ID = H.POSTOFFICETYPE_ID
INNER JOIN ADDR_PO_WORKDAYS wd
ON wd.ID = tt.dayofweek
where tt.datestop = TO_DATE('9999-12-31','YYYY-MM-DD') AND tt.postoffice_id = 8221
HAVING MAX(tt.ID)
ORDER BY h.postIndex,
h.POSTOFFICETYPE_ID,
dayofweek,
intervaltype,
tFrom,
tto
as you can see there i've add HAVING MAX(tt.ID) but i understand that it is incorrect and don't know how to solve that. help please!
You can use row_number():
with t as (
<your query here with no `having` and with tt.id as tt_id>
)
select t.*
from (select t.*, row_number() over (order by tt_id desc) as seqnum
from t
) t
where seqnum = 1;
If the source of your duplicate rows is coming from the ADDR_PO_WORKSCHEDULE table, then you can just de-dupe it before you use it in the rest of your query:
SELECT
h.ID, h.POSTINDEX, h.LONGNAME_UA, h.SHORTNAME_UA, pt.LONGNAME_UA, h.parent_Id,
WORKCOMMENT, INTERVALTYPE, TO_CHAR(TFROM, 'HH24:MI') AS TFROM, TO_CHAR(TTO, 'HH24:MI'),
WD.NAME_UA, WD.NAME_EN, WD.NAME_RU, WD.SHORTNAME_UA, pt.isVPZ, lr.NAME_UA, lr.CODE
FROM (
SELECT src.*
FROM (
SELECT t.*,
ROW_NUMBER() OVER(
PARTITION BY tt.postoffice_id, tt.dayofweek -- Group by post office / day of week
ORDER BY <order_columns> -- Rank rows within each group
) AS RowNum
FROM ADDR_PO_WORKSCHEDULE t
) src
WHERE RowNum = 1
) tt
LEFT JOIN ADDR_POSTOFFICE h ON tt.POSTOFFICE_ID = h.ID
INNER JOIN mdm_lockReason lr ON lr.id = H.LOCK_REASON
INNER JOIN ADDR_POSTOFFICEtype pt ON pt.ID = H.POSTOFFICETYPE_ID
INNER JOIN ADDR_PO_WORKDAYS wd ON wd.ID = tt.dayofweek
WHERE tt.datestop = TO_DATE('9999-12-31','YYYY-MM-DD')
AND tt.postoffice_id = 8221
ORDER BY h.postIndex, h.POSTOFFICETYPE_ID, dayofweek, intervaltype, tFrom, tto
Just replace with the columns you want to use to decide which of the "duplicate" rows to keep. If the order doesn't matter, just remove the ORDER BY altogether.
Also, you may want to qualify all column references, since you are selecting from multiple tables.
You can use analytical function as following:
Select * from
(Select <your select column list>,
Row_number() over (partition by h.postindex, trunc(tfrom) order by tt.id desc nulls last) as rn
From <from clause>
Where <where clause>
)
Where rn = 1;
group by is not needed.

Count with row_number function SQL CTE

I have the below CTEs that work perfectly, but I want to count the "cl.memb_dim_id" by "cl.post_date" but I am not sure how to do that? When adding in the count function I get an error that highlights the ' row number' so I am assuming I cant have both order and group together ????
WITH
DATES AS
(
select to_date('01-jan-2017') as startdate,to_date('02-jan-2017') as enddate
from dual
),
Claims as (select distinct
cl.memb_dim_id,
row_number () over (partition by cl.Claim_number order by cl.post_date desc) as uniquerow,
cl.Claim_number,
cl.post_date,
ct.claim_type,
ap.claim_status_desc,
dc.company_desc,
dff.io_flag_desc,
pr.product_desc,
cl.prov_dim_id,
cl.prov_type_dim_id
from dw.fact_claim cl
inner join dates d
on 1=1
and cl.post_date >= d.startdate
and cl.post_date <= d.enddate
and cl.provider_par_dim_id in ('2')
and cl.processing_status_dim_id = '1'
and cl.company_dim_id in ('581','585','586','589','590','591','588','592','594','601','602','603','606','596','598','597','579','599','578','577','573','574','576','575')
left join dw.DIM_CLAIM_STATUS ap
on cl.claim_status_dim_id = ap.claim_status_dim_id
left join dw.dim_claim_type ct
on cl.claim_type_dim_id = ct.claim_type_dim_id
and cl.claim_type_dim_id in ('1','2','6','7')
left join dw.DIM_COMPANY dc
on cl.company_dim_id = dc.company_dim_id
left join dw.DIM_IO_FLAG dff
on cl.io_flag_dim_id = dff.io_flag_dim_id
left join dw.dim_product pr
on cl.product_dim_id = pr.product_dim_id
)
Select * from claims where uniquerow ='1'
First, does this work?
count(cl.memb_dim_id) over (partition by cl.Claim_number, cl.post_date) as cnt,
Second, it is strange to be using analytic functions with select distinct.

SQL Server Delete Rows from Table leaving the record with the Max CreationDate

I want to delete the older records from the table based on creation date,leaving the latest one
attempted SQL,but did not work.
SELECT *
--DELETE L
FROM ItemPriceListMap L
LEFT JOIN (
SELECT ItemPriceListUID3,MAX(CAST(CreationDate as DATE)) MaxDate
FROM ItemPriceListMap
GROUP BY ItemPriceListUID3
)M ON L.ItemPriceListUID3 = M.ItemPriceListUID3 AND CAST(L.CreationDate as DATE) = M.MaxDate
WHERE M.ItemPriceListUID3 IS NULL
The view of the mapping
SELECT I.Description,ipl.UnitListPrice1,iplmp.VatMRP,iplmp.CreationDate FROM ItemPriceListMap iplmp
INNER JOIN ItemPriceList ipl ON iplmp.ItemPriceListUID3 = ipl.UID
INNER JOIN Item i ON ipl.ItemUID = i.UID
ORDER BY I.Description,iplmp.CreationDate
EDIT:
More Sample Data
Using this SQL
SELECT I.Description,iplmp.ItemPriceListUID3,iplmp.CreationDate FROM ItemPriceListMap iplmp
INNER JOIN ItemPriceList ipl ON iplmp.ItemPriceListUID3 = ipl.UID
INNER JOIN Item i ON ipl.ItemUID = i.UID
ORDER BY I.Description,iplmp.CreationDate
so after I execute the delete command the highlighted row should be left in the table(yellow),highlighted in blue is the same Item
TRY THIS: You can use your own query by doing some simple changes as below, you have to join as <> with the max date so it will not delete that record, only delete others which matches ItemPriceListUID3 and <> MaxDate
SELECT *
--DELETE L
FROM ItemPriceListMap L
INNER JOIN (SELECT MAX(CAST(CreationDate as DATE)) MaxDate
FROM ItemPriceListMap
) M ON CAST(L.CreationDate as DATE) <> M.MaxDate
Try this :
DELETE L
FROM ItemPriceListMap L
WHERE CreationDate <> (SELECT MAX(CreationDate) MaxDate
FROM ItemPriceListMap LL
WHERE L.ItemPriceListUID3 = LL.ItemPriceListUID3)
Note : Take backup of your data first.
Use a CTE and a row_number
with CTE as
(
select a1.*, row_number() over(
partition by ItemPriceListUID3 -- remove this if you don't need the grouping
order by CreationDate desc) as R_ORD
from ItemPriceListMap a1
)
delete
from CTE
where R_ORD > 1