How to optimize this UNION SQL Query? - sql

I have a query which looks like this:
SELECT DISTINCT * FROM (
SELECT FundId AS Id,
PeriodYearMonth
FROM [Fund.Period] F
INNER JOIN (
SELECT * FROM (
SELECT FundId as Id,
MIN(PeriodYearMonth) AS MinPeriodYearMonth
FROM (
SELECT FundId,
PeriodYearMonth,
PublishedOn
FROM [Fund.Period] FP
UNION ALL --Changed to UNION ALL as it is more efficient and we wont ever need a UNION as the result set would never match
SELECT FundId,
MAX(PeriodYearMonth) + 1,
NULL
FROM [Fund.Period]
GROUP BY FundId
) FP WHERE PublishedOn IS NULL GROUP BY FundId
) MFP
) FP ON F.FundId = FP.Id AND (F.PeriodYearMonth = FP.MinPeriodYearMonth OR (f.PeriodYearMonth +1) = FP.MinPeriodYearMonth)
) FP
If possible, I would like to remove the UNION ALL. Does anyone know how this can be optimized?

removed union all
SELECT DISTINCT * FROM (
SELECT FundId AS Id,
PeriodYearMonth
FROM [Fund.Period] F
INNER JOIN (
SELECT * FROM (
SELECT FundId as Id,
MIN(PeriodYearMonth) AS MinPeriodYearMonth,MAX(PeriodYearMonth) + 1 as PeriodYearMonth
FROM (
SELECT FundId,
PeriodYearMonth,
PublishedOn
FROM [Fund.Period] FP
) FP WHERE PublishedOn IS NULL
GROUP BY FundId
) MFP
) FP ON F.FundId = FP.Id AND (F.PeriodYearMonth = FP.MinPeriodYearMonth OR (f.PeriodYearMonth +1) = FP.MinPeriodYearMonth)
) FP

Related

SQL join and Bring in 1 column into 2 columns

I have two tables shown as above. I want to create a select statement that would have the following result:
Basically, I want to join on 'Model' and 'Num' columns and bring 'Val' vales but break it into two columns based on 'Ver'.
SELECT 'a' as Model ,'1' as Num ,'v1' as Ver ,'9' as val INTO #RefTbl UNION ALL
SELECT 'a','2','v1','10' UNION ALL
SELECT 'a','3','v1','11' UNION ALL
SELECT 'a','1','v2','5' UNION ALL
SELECT 'a','2','v2','6' UNION ALL
SELECT 'a','3','v2','7' UNION ALL
SELECT 'b','1','v1','20' UNION ALL
SELECT 'b','1','v2','21' UNION ALL
SELECT 'b','2','v1','25' UNION ALL
SELECT 'b','2','v2','26'
SELECT '519' as ID,'a' as Model,'1' as Num INTO #OrderTbl UNION ALL
SELECT '5616','a','3' UNION ALL
SELECT '871','b','1'
-- failed attempt
SELECT o.*, '' as v1_val, '' as v2_val FROM #OrderTbl as o left join #RefTbl as r on o.Model = r.Model and o.Num = r.Num
You can join and do conditional aggregation:
select
d.ID,
d.Model,
d.Num,
max(case when r.Ver = 'val1' then Val end) V1_Val,
max(case when r.Ver = 'val2' then Val end) V2_Val
from DataTbl d
inner join ReferenceTbl r
on r.Model = d.Model
and r.Num = d.Num
group by
d.ID,
d.Model,
d.Num
Alternatively, you can join twice - depending on your dataset, this might (or might not) perform better:
select
d.ID,
d.Model,
d.Num,
r1.Val V1_Val,
r2.Val V2_Val
from DataTbl d
left join ReferenceTbl r1
on r1.Model = d.Model
and r1.Num = d.Num
and r1.Ver = 'val1'
left join ReferenceTbl r2
on r2.Model = d.Model
and r2.Num = d.Num
and r2.Ver = 'val2'
Case + Group by works on most db system which is nice.
Alternative answer using SQL Server PIVOT (for reference)
WITH RefTbl AS (
SELECT 'a' as Model ,'1' as Num ,'v1' as Ver ,'9' as val UNION ALL
SELECT 'a','2','v1','10' UNION ALL
SELECT 'a','3','v1','11' UNION ALL
SELECT 'a','1','v2','5' UNION ALL
SELECT 'a','2','v2','6' UNION ALL
SELECT 'a','3','v2','7' UNION ALL
SELECT 'b','1','v1','20' UNION ALL
SELECT 'b','1','v2','21' UNION ALL
SELECT 'b','2','v1','25' UNION ALL
SELECT 'b','2','v2','26'),
OrderTbl AS (
SELECT '519' as ID,'a' as Model,'1' as Num UNION ALL
SELECT '5616','a','3' UNION ALL
SELECT '871','b','1'
)
SELECT Id, Model, Num AS Model, [v1] AS v1_Val, [v2] as v2_Val
FROM
(SELECT o.id, r.Model, r.Num, r.val, r.ver from OrderTbl as o
left join RefTbl as r on o.Model = r.Model and o.Num = r.Num) AS SourceTable
PIVOT
(MAX(val) FOR ver IN ([v1], [v2])) AS PivotTable;
Id Model Model v1_Val v2_Val
519 a 1 9 5
5616 a 3 11 7
871 b 1 20 21

Why is Oracle REPLACE function not working for this string?

We have a pattern we use all the time and this is usually pretty straightforward.
sortOrder IN VARCHAR2 := 'Title'
query VARCHAR2(32767) := q'[
SELECT
Columns
FROM tables
ORDER BY {sortOrder}
]';
query := REPLACE(query, '{sortOrder}', sortOrder);
But for this string it is not working:
WITH appl_List
AS
(
SELECT DISTINCT
appls.admin_phs_ORG_code || TO_CHAR(appls.serial_num, 'FM000000') AS core_proj_number,
appls.Appl_ID
FROM TABLE(:portfolioTable) appls
),
g1SupportingProjCount AS
(
SELECT
gen1grants.silverchair_id,
COUNT(DISTINCT al.Appl_ID) AS ApplCount
FROM
appl_List al
JOIN cg_cited_reference_gen1_grant gen1grants
ON al.core_proj_number = gen1grants.ic_serial_num
JOIN cg_cited_reference_gen1 gen1refs
ON gen1grants.silverchair_id = gen1refs.silverchair_id
GROUP BY gen1grants.Silverchair_id
),
g1SupportedPubCount AS
(
SELECT
gen1grants.silverchair_id,
COUNT(DISTINCT gen1refs.gen1_wos_uid) AS PubCount
FROM
appl_List al
JOIN cg_cited_reference_gen1_grant gen1grants
ON al.core_proj_number = gen1grants.ic_serial_num
JOIN cg_cited_reference_gen1 gen1refs
ON gen1grants.silverchair_id = gen1refs.silverchair_id
GROUP BY gen1grants.Silverchair_id
),
g2SupportingProjCount AS
(
SELECT
gen2grants.silverchair_id,
COUNT(DISTINCT al.Appl_ID) AS ApplCount
FROM
appl_List al
JOIN cg_cited_reference_gen2_grant gen2grants
ON al.core_proj_number = gen2grants.ic_serial_num
JOIN cg_cited_reference_gen2 gen2refs
ON gen2grants.silverchair_id = gen2refs.silverchair_id
GROUP BY gen2grants.Silverchair_id
),
g2SupportedPubCount AS
(
SELECT
gen2grants.silverchair_id,
COUNT(DISTINCT gen2refs.gen2_wos_uid) AS PubCount
FROM
appl_List al
JOIN cg_cited_reference_gen2_grant gen2grants
ON al.core_proj_number = gen2grants.ic_serial_num
JOIN cg_cited_reference_gen2 gen2refs
ON gen2grants.silverchair_id = gen2refs.silverchair_id
GROUP BY gen2grants.Silverchair_id
),
portfolio_cg_ids AS
(
SELECT DISTINCT md.silverchair_id
FROM
(
SELECT silverchair_id
FROM cg_cited_reference_gen1_grant gen1Grants
JOIN Appl_List appls
ON appls.core_proj_number = gen1Grants.ic_serial_num
UNION
SELECT silverchair_id
FROM cg_cited_reference_gen2_grant gen2Grants
JOIN Appl_List appls
ON appls.core_proj_number = gen2Grants.ic_serial_num
) grant_sc_ids
JOIN cg_metadata md
ON grant_sc_ids.silverchair_id = md.silverchair_id
)
SELECT
silverchairId,
TITLE,
PMID,
PMCID,
publication_year as year,
referenceCount1Gen,
supportingProjectCount1Gen,
supportedPublicationCount1Gen,
referenceCount2Gen,
supportingProjectCount2Gen,
supportedPublicationCount2Gen,
COUNT(1) OVER() as TotalCount
FROM
(
SELECT
md.SILVERCHAIR_ID silverchairId,
md.TITLE,
md.PMID,
md.PMCID ,
md.publication_year as year,
g1RefCounts.referenceCount1Gen as referenceCount1Gen,
g1SupportingProjCount.ApplCount as supportingProjectCount1Gen,
g1SupportedPubCount.PubCount as supportedPublicationCount1Gen,
g2RefCounts.referenceCount2Gen as referenceCount2Gen,
g2SupportingProjCount.ApplCount as supportingProjectCount2Gen,
g2SupportedPubCount.PubCount as supportedPublicationCount2Gen,
--COUNT(1) OVER() as TotalCount
FROM cg_metadata md
-- BEGIN datascope to current portfolio
JOIN portfolio_cg_ids
ON portfolio_cg_ids.silverchair_id = md.silverchair_id
-- END datascope to current portfolio
LEFT JOIN g1SupportingProjCount
ON g1SupportingProjCount.Silverchair_id = md.silverchair_id
LEFT JOIN g2SupportingProjCount
ON g2SupportingProjCount.Silverchair_id = md.silverchair_id
LEFT JOIN g1SupportedPubCount
ON g1SupportedPubCount.Silverchair_id = md.silverchair_id
LEFT JOIN g2SupportedPubCount
ON g2SupportedPubCount.Silverchair_id = md.silverchair_id
OUTER APPLY
(
Select Count(*) as referenceCount1Gen
FROM cg_cited_reference_gen1 g1Refs
WHERE g1Refs.silverchair_id = md.silverchair_id
) g1RefCounts
OUTER APPLY
(
Select Count(*) as referenceCount2Gen
FROM cg_cited_reference_gen2 g2Refs
WHERE g2Refs.silverchair_id = md.silverchair_id
) g2RefCounts
) results
ORDER BY {sortOrder}
Are there cases where some kind of special char in the string can keep this from working?
I'm kind of perplexed. I've been using this pattern for like 3 years and I've never had this not work.
What could be breaking this?
The query has 4000+ characters.
The text is probably being truncated somewhere down the line.

Call one CTE in another CTE

How could I call a CTE in another CTE ?
WITH cte1
AS (
SELECT City.*
FROM City
WHERE (City.CityName COLLATE SQL_Latin1_General_CP1_CI_AI) LIKE 'são paulo'
)
, cte2
AS (
SELECT Imovel.Imovel_Id
FROM Imovel
WHERE Imovel.Number = 311
AND Imovel.ZIPCode = '30280490'
AND Imovel.Complement = ''
AND Imovel.Street = 'Do furquim'
-- the line below has an error in cte.City_Id
AND Imovel.City_Id = cte1.City_Id
)
You have to join both like with a normal table:
WITH cte1
AS (SELECT city.*
FROM city
WHERE ( city.cityname COLLATE sql_latin1_general_cp1_ci_ai ) LIKE
'são paulo'),
cte2
AS (SELECT imovel.imovel_id
FROM imovel
INNER JOIN cte1
ON imovel.city_id = cte1.city_id
WHERE imovel.number = 311
AND imovel.zipcode = '30280490'
AND imovel.complement = ''
AND imovel.street = 'Do furquim')
SELECT * FROM cte2
Note that i have appended SELECT * FROM cte2 since CTE's cannot "stand" alone.

Using CTE in a DELETE is not executing

Here is the original query that runs and runs:
;WITH includedCs_cte
AS
(
SELECT
x.PKey,
x.OKey,
y.CKey
FROM
#Permissions x
JOIN WHDATA.dbo.tb_DimC y
ON
x.PKey = y.PKey AND
x.OKey = y.OKey
)
, b_cte
AS
(
SELECT
i.OKey,
i.PKey
FROM
WHData.dbo.vw_FactCX b
INNER JOIN includedCs_cte i
ON
b.PKey = i.PKey AND
b.PlayCKey = i.CKey
WHERE b.DateKey >= #myLAST28DAYS
GROUP BY
i.OKey,
i.PKey
)
, POK_cte
AS
(
SELECT
i.OKey,
i.PKey
FROM
WHData.dbo.vw_FactCY b
INNER JOIN includedCs_cte i
ON
b.PKey = i.PKey AND
b.PlayCKey = i.CKey
WHERE b.DateKey >= #myLAST28DAYS
GROUP BY
i.OKey,
i.PKey
)
, includedOKeys
AS
(
SELECT *
FROM b_cte
UNION
SELECT * FROM POK_cte
)
DELETE FROM #Permissions
FROM #Permissions p
WHERE NOT EXISTS
(
SELECT 1
FROM includedOKeys x
WHERE
p.PKey = x.PKey AND
p.OKey = x.OKey
)
If I change the above to the below then it runs in less than 10 seconds. Why are these executing so differently?
;WITH includedCs_cte
AS
(
SELECT
x.PKey,
x.OKey,
y.CKey
FROM
#Permissions x
JOIN WHDATA.dbo.tb_DimC y
ON
x.PKey = y.PKey AND
x.OKey = y.OKey
)
, b_cte
AS
(
SELECT
i.OKey,
i.PKey
FROM
WHData.dbo.vw_FactCX b
INNER JOIN includedCs_cte i
ON
b.PKey = i.PKey AND
b.PlayCKey = i.CKey
WHERE b.DateKey >= #myLAST28DAYS
GROUP BY
i.OKey,
i.PKey
)
, POK_cte
AS
(
SELECT
i.OKey,
i.PKey
FROM
WHData.dbo.vw_FactCY b
INNER JOIN includedCs_cte i
ON
b.PKey = i.PKey AND
b.PlayCKey = i.CKey
WHERE b.DateKey >= #myLAST28DAYS
GROUP BY
i.OKey,
i.PKey
)
, includedOKeys
AS
(
SELECT *
FROM b_cte
UNION
SELECT * FROM POK_cte
)
SELECT *
INTO #includedOKeys
FROM includedOKeys
CREATE CLUSTERED INDEX ix_inclProdOper ON #includedOKeys(OKey, PKey)
DELETE FROM #Permissions
FROM #Permissions p
WHERE NOT EXISTS
(
SELECT 1
FROM #includedOKeys x
WHERE
p.PKey = x.PKey AND
p.OKey = x.OKey
)
A CTE is resolved when it is used. It means that if you use it twice in a query, it may get resolved twice. CTEs do not always get resolved once and cache in memory. SQL Server is free to execute the query as it sees fit.
In your case, it may be worse than that - because you have used it as a correlated subquery in an EXISTS clause, which is a row-by-row operation. This probably means the plan results in the CTE being resolved for EACH ROW of the #permissions table! That could well be where all the time will be going. Show Execution Plan (Ctrl-L) in SSMS is your friend here.
Check this SQLFiddle, which shows that NONE of the GUIDs are the same, even though the CTE only creates 3 rows. In fact, we get 18 distinct GUIDs.
with cte(guid,other) as (
select newid(),1 union all
select newid(),2 union all
select newid(),3)
select a.guid, a.other, b.guid guidb, b.other otherb
from cte a
cross join cte b
order by a.other, b.other;

How can I improve this SQL query?

I'm checking for existing of a row in in_fmd, and the ISBN I look up can be the ISBN parameter, or another ISBN in a cross-number table that may or may not have a row.
select count(*)
from in_fmd i
where (description='GN')
and ( i.isbn in
(
select bwi_isbn from bw_isbn where orig_isbn = ?
union all
select cast(? as varchar) as isbn
)
)
I don't actually care about the count of the rows, but rather mere existence of at least one row.
This used to be three separate queries, and I squashed it into one, but I think there's room for more improvement. It's PostgreSQL 8.1, if it matters.
Why bother with the UNION ALL
select count(*)
from in_fmd i
where (description='GN')
and (
i.isbn in (
select bwi_isbn from bw_isbn where orig_isbn = ?
)
or i.isbn = cast(? as varchar)
)
I would probably use a LEFT JOIN-style query instead of the IN, but that's more personal preference:
select count(*)
from in_fmd i
left join bw_isbn
on bw_isbn.bwi_isbn = i.isbn
and bw_isbn.orig_isbn = ?
where (i.description='GN')
and (
bw_isbn.bwi_isbn is not null
or i.isbn = cast(? as varchar)
)
The inversion discussed over IM:
SELECT SUM(ct)
FROM (
select count(*) as ct
from in_fmd i
inner join bw_isbn
on bw_isbn.bwi_isbn = i.isbn
and bw_isbn.orig_isbn = ?
and i.isbn <> cast(? as varchar)
and i.description = 'GN'
UNION
select count(*) as ct
from in_fmd i
where i.isbn = cast(? as varchar)
and i.description = 'GN'
) AS x
I don't actually care about the count of the rows, but rather mere existence of at least one row.
Then how about querying SELECT ... LIMIT 1 and checking in the calling program whether you get one result row or not?
apart from what other posters have noted , just changing
select count(*)
to
exists(..)
would improve things quite a bit
SELECT SUM(ct)
FROM (select count(*) as ct
from in_fmd i
inner join bw_isbn
on bw_isbn.bwi_isbn = i.isbn
and bw_isbn.orig_isbn = ?
and i.isbn <> cast(? as varchar)
and i.description = 'GN'
UNION
select count(*) as ct
from in_fmd i
where i.isbn = cast(? as varchar)
and i.description = 'GN'
) AS x
select count(*)
from in_fmd i
where description = 'GN'
and exists (select 1
from bwi_isbn
where bw_isbn.bwi_isbn = in_fmd.isbn)