Related
I do see there are solutions for a similar question but I was unable to get them to work in my scenario.
I am returning duplicate codes while using the LISTAGG function.
Returning now:
SELECT
,CD.CLAIM
,CD.CLAIMLN
,CD.PROV_INVOICE_UNTS
,CD.APPR_UNTS
,CD.PROV_INVOICE_AMT
,CD.PROV_CNTRCT_AMT
,CD.PLAN_CNTRCT_AMT as PLAN_AMT
,LISTAGG(DX.DIAG_CD,', ') WITHIN GROUP (ORDER BY DX.LVL_CD) AS DX_CODES
FROM CLAIM_DETAIL CD
INNER JOIN PATIENT_INTAKE_PLAN PIP
ON CD.PAT_NBR = PIP.PAT_NBR AND CD.ITK_ID = PIP.ITK_ID
INNER JOIN HEALTH_PLAN HP
ON HP.PLAN_ID = PIP.PLAN_ID
INNER JOIN PROVIDER_CCXPORTAL PR
ON PR.PROV_ID = CD.PROV_ID
INNER JOIN PROVIDER_PARENT PRP
ON PR.PROV_PRNT_ID = PRP.PROV_PRNT_ID
INNER JOIN PATIENT_CCXPORTAL PTP
ON PTP.PAT_NBR = CD.PAT_NBR
INNER JOIN CLAIM C
ON C.CLM_ID = CD.CLM_ID
LEFT JOIN CLAIM_DIAGNOSIS DX
ON CD.CLM_ID = DX.CLM_ID
WHERE
C.RCPT_DT >= '01-JUL-2014'
I need it to return:
Use a regex to get rid of the duplicates
....as PLAN_AMT,
RTRIM(
REGEXP_REPLACE(
(listagg(DX.DIAG_CD,',') WITHIN GROUP (ORDER BY DX.LVL_CD) ),
'([^,]*)(,\1)+($|,)',
'\1\3'),
',') AS DX_CODES
FROM......
If there are very many DX_CODES per claim, your may string exceed the max length for a SQL varchar2.
Can you try this instead?
SELECT
,CD.CLAIM
,CD.CLAIMLN
,CD.PROV_INVOICE_UNTS
,CD.APPR_UNTS
,CD.PROV_INVOICE_AMT
,CD.PROV_CNTRCT_AMT
,CD.PLAN_CNTRCT_AMT as PLAN_AMT
, (SELECT listagg(dx.diag_cd,',') within group ( order by dx.lvl_cd, dx.diag_cd ) FROM ( SELECT distinct clm_id, lvl_cd, diag_cd FROM claim_diagnosis ) dx WHERE dx.clm_id = cd.clm_id ) dx_codes
--,LISTAGG(DX.DIAG_CD,', ') WITHIN GROUP (ORDER BY DX.LVL_CD) AS DX_CODES
FROM CLAIM_DETAIL CD
INNER JOIN PATIENT_INTAKE_PLAN PIP
ON CD.PAT_NBR = PIP.PAT_NBR AND CD.ITK_ID = PIP.ITK_ID
INNER JOIN HEALTH_PLAN HP
ON HP.PLAN_ID = PIP.PLAN_ID
INNER JOIN PROVIDER_CCXPORTAL PR
ON PR.PROV_ID = CD.PROV_ID
INNER JOIN PROVIDER_PARENT PRP
ON PR.PROV_PRNT_ID = PRP.PROV_PRNT_ID
INNER JOIN PATIENT_CCXPORTAL PTP
ON PTP.PAT_NBR = CD.PAT_NBR
INNER JOIN CLAIM C
ON C.CLM_ID = CD.CLM_ID
--LEFT JOIN CLAIM_DIAGNOSIS DX
--ON CD.CLM_ID = DX.CLM_ID
WHERE
C.RCPT_DT >= '01-JUL-2014'
Make sure there is an index on CLAIM_DIAGNOSIS.CLM_ID.
I use this query and get a list of company names in many rows (each name in one row)
select distinct companyName from Companies
from each name I can use this query to get another property related to that company name:
SELECT distinct
STUFF((SELECT ', '+ cn.name
from WMCCMCategories cn
INNER JOIN CategorySets uc
ON uc.categoryId = cn.categoryID
INNER JOIN KeyProcesses u
ON u.categorySetId = uc.setId
INNER JOIN Companies c
ON c.companyId = u.companyId
WHERE c.companyName = #companyName
ORDER BY cn.name FOR XML PATH('')), 1, 1, '') AS listStr
FROM WMCCMCategories cnn
Group by cnn.name
Now, I want to apply that query for each name in the first query, so I replace #companyName by that first query:
SELECT distinct
STUFF((SELECT ', '+ cn.name
from WMCCMCategories cn
INNER JOIN CategorySets uc ON uc.categoryId = cn.categoryID
INNER JOIN KeyProcesses u ON u.categorySetId = uc.setId
INNER JOIN Companies c ON c.companyId = u.companyId
WHERE c.companyName in
(select distinct companyName from Companies)
ORDER BY cn.name FOR XML PATH('')), 1, 1, '') AS listStr
FROM
WMCCMCategories cnn
GROUP BY
cnn.name
But it will print all the results in one row. What I need is the result for each company names in one rows, and I can get each properties for each company Names. How could I modify to get that ?
Write as:
SELECT distinct
c1.companyName,
STUFF((SELECT ', '+ cn.name
from WMCCMCategories cn
INNER JOIN CategorySets uc
ON uc.categoryId = cn.categoryID
INNER JOIN KeyProcesses u
ON u.categorySetId = uc.setId
INNER JOIN Companies c
ON c.companyId = u.companyId
WHERE c.companyName = c1.companyName
ORDER BY cn.name FOR XML PATH('')), 1, 1, '') AS listStr
FROM Companies c1
Group by c1.companyName
The query below is working fine:
SELECT
tblCase.ID AS CaseID, tblCase.UserID AS MyCasesFilter,
tblGroupMembership.UserID AS GroupShareFilter,
tblDirectCaseSharing.ReceiverUserID AS DirectShareFilter, tblCase.EntryDate,
tblUser.LastName AS CaseAuthor, tblCase.Name AS CaseName,
COUNT(DISTINCT tblCaseImage.ID) AS TotalImages,
tblCaseType.Name AS CaseType, tblCase.SiteName, tblCase.Category,
tblCase.FollowUpDateTime, tblCase.Notes
FROM
tblDirectCaseSharing
RIGHT OUTER JOIN
tblCase
INNER JOIN tblUser ON tblCase.UserID = tblUser.ID ON tblDirectCaseSharing.CaseID = tblCase.ID
LEFT OUTER JOIN
tblGroupMembership
INNER JOIN
tblGroupCase ON tblGroupMembership.GroupID = tblGroupCase.GroupID ON tblCase.ID = tblGroupCase.CaseID
LEFT OUTER JOIN
tblCaseType ON tblCase.CaseTypeID = tblCaseType.ID
LEFT OUTER JOIN
tblCaseImage ON tblCase.ID = tblCaseImage.CaseID
GROUP BY
tblCase.ID, tblCaseType.Name, tblCase.SiteName, tblCase.EntryDate,
tblCase.Category, tblCase.FollowUpDateTime, tblCase.Notes, tblCase.UserID,
tblGroupMembership.UserID, tblDirectCaseSharing.ReceiverUserID,
tblUser.LastName, tblCase.Name
HAVING
(tblCase.UserID = 1)
AND (tblGroupMembership.UserID = 2)
AND (tblDirectCaseSharing.ReceiverUserID = 3)
ORDER BY
tblCase.EntryDate DESC
I want to add an additional select column to the above result using a select subquery which is:
STUFF((
SELECT ', ' +tblGroup.Name as [text()]
FROM tblCase INNER JOIN
tblGroupCase ON tblCase.ID = tblGroupCase.CaseID INNER JOIN
tblGroup ON tblGroupCase.GroupID = tblGroup.ID
WHERE tblCase.ID = ***
FOR XML PATH('')
),1,2,'')
AS ConcatGroupShares
The select subquery has a where clause and I need to get the tblcase.id which is the first column of the result set. how to i reference that value in the subquery.
Put an alias name in the main query
SELECT
tcase.ID AS CaseID, tblCase.UserID AS MyCasesFilter,
tblGroupMembership.UserID AS GroupShareFilter,
tblDirectCaseSharing.ReceiverUserID AS DirectShareFilter, tblCase.EntryDate,
tblUser.LastName AS CaseAuthor, tblCase.Name AS CaseName,
COUNT(DISTINCT tblCaseImage.ID) AS TotalImages,
tblCaseType.Name AS CaseType, tblCase.SiteName, tblCase.Category,
tblCase.FollowUpDateTime, tblCase.Notes
FROM
tblDirectCaseSharing
RIGHT OUTER JOIN
tblCase As tcase <=====
And use this alias name in the subquery:
STUFF((
SELECT ', ' +tblGroup.Name as [text()]
FROM tblCase INNER JOIN
tblGroupCase ON tblCase.ID = tblGroupCase.CaseID INNER JOIN
tblGroup ON tblGroupCase.GroupID = tblGroup.ID
WHERE tblCase.ID = tcase.id
FOR XML PATH('')
),1,2,'')
AS ConcatGroupShares
More about co-related sub-queries:
http://en.wikipedia.org/wiki/Correlated_subquery
I have the following query (some of it is code-generated so pardon the poor formatting):
SELECT DISTINCT COALESCE(gi.start_time, '') start_time,
COALESCE(b.name, '') bank,
COALESCE(a.id, '') account_id,
COALESCE(a.account_number, '') account_number,
COALESCE(at.code, '') account_type,
COALESCE(a.open_date, '') open_date,
COALESCE(a.interest_rate, '') interest_rate,
COALESCE(a.maturity_date, '') maturity_date,
COALESCE(a.opening_balance, '') opening_balance,
COALESCE(a.has_e_statement, '') has_e_statement,
COALESCE(a.has_bill_pay, '') has_bill_pay,
COALESCE(a.has_overdraft_protection, '') has_overdraft_protection,
COALESCE(a.balance, '') balance,
COALESCE(a.business_or_personal, '') business_or_personal,
COALESCE(a.cumulative_balance, '') cumulative_balance,
COALESCE(c.customer_number, '') customer_number,
COALESCE(c.social_security_number, '') social_security_number,
COALESCE(c.name, '') customer_name,
COALESCE(c.phone, '') phone,
COALESCE(c.deceased, '') deceased,
COALESCE(c.do_not_mail, '') do_not_mail,
COALESCE(cdob.date_of_birth, '') date_of_birth,
COALESCE(ad.line1, '') line1,
COALESCE(ad.line2, '') line2,
COALESCE(ad.city, '') city,
COALESCE(s.name, '') state,
COALESCE(ad.zip, '') zip,
COALESCE(o.officer_number, '') officer_number,
COALESCE(o.name, '') officer_name,
COALESCE(po.line1, '') po_box,
COALESCE(po.city, '') po_city,
COALESCE(po_state.name, '') po_state,
COALESCE(po.zip, '') zip,
COALESCE(br.number, '') branch_number,
COALESCE(cd_type.code, '') cd_type,
COALESCE(mp.product_number, '') macatawa_product_number,
COALESCE(mp.product_name, '') macatawa_product_name,
COALESCE(pt.name, '') macatawa_product_type,
COALESCE(hhsc.name, '') harte_hanks_service_category,
COALESCE(mp.hoh_hierarchy, '') hoh_hierarchy,
COALESCE(cft.name, '') core_file_type,
COALESCE(oa.line1, '') original_address_line1,
COALESCE(oa.line2, '') original_address_line2,
COALESCE(uc.code, '') use_class
FROM account a
JOIN customer c ON a.customer_id = c.id
JOIN officer o ON a.officer_id = o.id
JOIN account_address aa ON aa.account_id = a.id
LEFT JOIN account_po_box apb ON apb.account_id = a.id
JOIN address ad ON aa.address_id = ad.id
JOIN original_address oa ON oa.address_id = ad.id
LEFT JOIN address po ON apb.address_id = po.id
JOIN state s ON s.id = ad.state_id
LEFT JOIN state po_state ON po_state.id = po.state_id
LEFT JOIN branch br ON a.branch_id = br.id
JOIN account_import ai ON a.account_import_id = ai.id
JOIN generic_import gi ON gi.id = ai.generic_import_id
JOIN import_bundle ib ON gi.import_bundle_id = ib.id
JOIN bank b ON b.id = ib.bank_id
LEFT JOIN customer_date_of_birth cdob ON cdob.customer_id = c.id
LEFT JOIN cd_type ON a.cd_type_id = cd_type.id
LEFT JOIN account_macatawa_product amp ON amp.account_id = a.id
LEFT JOIN macatawa_product mp ON mp.id = amp.macatawa_product_id
LEFT JOIN product_type pt ON pt.id = mp.product_type_id
LEFT JOIN harte_hanks_service_category hhsc ON hhsc.id = mp.harte_hanks_service_category_id
LEFT JOIN core_file_type cft ON cft.id = mp.core_file_type_id
LEFT JOIN use_class uc ON a.use_class_id = uc.id
LEFT JOIN account_type at ON a.account_type_id = at.id
WHERE 1
AND gi.active = 1
AND b.id = 8 AND ib.is_finished = 1
ORDER BY a.id
LIMIT 10
I have indexes on all the appropriate columns, including account.id AKA a.id. Despite this fact, my query significantly speeds up (it goes from 10 seconds to 0 seconds) if I remove the ORDER BY. Why is this?
Because with the ORDER BY, it has to retrieve all the rows to sort them to get the first 10 by a.id. Without the ORDER BY, it can simply retrieve the first 10 rows it finds and ignore the rest.
Also, be careful when profiling queries: the first can fill the cache with data, and subsequent queries go faster not because the SQL is different, but because it's pulling data from the cache instead of the disk.
I've been working with databases for a long time but I'm new to query optimization. I have the following query (some of it code-generated):
SELECT DISTINCT COALESCE(gi.start_time, '') start_time,
COALESCE(b.name, '') bank,
COALESCE(a.id, '') account_id,
COALESCE(a.account_number, '') account_number,
COALESCE(at.code, '') account_type,
COALESCE(a.open_date, '') open_date,
COALESCE(a.interest_rate, '') interest_rate,
COALESCE(a.maturity_date, '') maturity_date,
COALESCE(a.opening_balance, '') opening_balance,
COALESCE(a.has_e_statement, '') has_e_statement,
COALESCE(a.has_bill_pay, '') has_bill_pay,
COALESCE(a.has_overdraft_protection, '') has_overdraft_protection,
COALESCE(a.balance, '') balance,
COALESCE(a.business_or_personal, '') business_or_personal,
COALESCE(a.cumulative_balance, '') cumulative_balance,
COALESCE(c.customer_number, '') customer_number,
COALESCE(c.social_security_number, '') social_security_number,
COALESCE(c.name, '') customer_name,
COALESCE(c.phone, '') phone,
COALESCE(c.deceased, '') deceased,
COALESCE(c.do_not_mail, '') do_not_mail,
COALESCE(cdob.date_of_birth, '') date_of_birth,
COALESCE(ad.line1, '') line1,
COALESCE(ad.line2, '') line2,
COALESCE(ad.city, '') city,
COALESCE(s.name, '') state,
COALESCE(ad.zip, '') zip,
COALESCE(o.officer_number, '') officer_number,
COALESCE(o.name, '') officer_name,
COALESCE(po.line1, '') po_box,
COALESCE(po.city, '') po_city,
COALESCE(po_state.name, '') po_state,
COALESCE(po.zip, '') zip,
COALESCE(br.number, '') branch_number,
COALESCE(cd_type.code, '') cd_type,
COALESCE(mp.product_number, '') macatawa_product_number,
COALESCE(mp.product_name, '') macatawa_product_name,
COALESCE(pt.name, '') macatawa_product_type,
COALESCE(hhsc.name, '') harte_hanks_service_category,
COALESCE(mp.hoh_hierarchy, '') hoh_hierarchy,
COALESCE(cft.name, '') core_file_type,
COALESCE(oa.line1, '') original_address_line1,
COALESCE(oa.line2, '') original_address_line2,
COALESCE(uc.code, '') use_class
FROM account a
JOIN customer c ON a.customer_id = c.id
JOIN officer o ON a.officer_id = o.id
JOIN account_address aa ON aa.account_id = a.id
LEFT JOIN account_po_box apb ON apb.account_id = a.id
JOIN address ad ON aa.address_id = ad.id
JOIN original_address oa ON oa.address_id = ad.id
LEFT JOIN address po ON apb.address_id = po.id
JOIN state s ON s.id = ad.state_id
LEFT JOIN state po_state ON po_state.id = po.state_id
LEFT JOIN branch br ON a.branch_id = br.id
JOIN account_import ai ON a.account_import_id = ai.id
JOIN generic_import gi ON gi.id = ai.generic_import_id
JOIN import_bundle ib ON gi.import_bundle_id = ib.id
JOIN bank b ON b.id = ib.bank_id
LEFT JOIN customer_date_of_birth cdob ON cdob.customer_id = c.id
LEFT JOIN cd_type ON a.cd_type_id = cd_type.id
LEFT JOIN account_macatawa_product amp ON amp.account_id = a.id
LEFT JOIN macatawa_product mp ON mp.id = amp.macatawa_product_id
LEFT JOIN product_type pt ON pt.id = mp.product_type_id
LEFT JOIN harte_hanks_service_category hhsc
ON hhsc.id = mp.harte_hanks_service_category_id
LEFT JOIN core_file_type cft ON cft.id = mp.core_file_type_id
LEFT JOIN use_class uc ON a.use_class_id = uc.id
LEFT JOIN account_type at ON a.account_type_id = at.id
WHERE 1
AND gi.active = 1
AND b.id = 8 AND ib.is_finished = 1
ORDER BY a.id
LIMIT 10
And it's pretty slow. On my dev server it takes about a minute to run and on my production server, where there's more data, I can't get it to even finish. Here's what an EXPLAIN looks like:
http://i.stack.imgur.com/eR6lq.png
I know the basics of EXPLAIN. I know that it's good that I have something other than NULL for everything under key. But I don't know, overall, how much room for improvement my query has. I do know that Using temporary; Using filesort under Extra is bad, but I have no idea what to do about it.
It looks like you don't have indexes on most of your JOIN fields. Make sure every field that you use as a JOIN key has an index on both tables.
With 23 joins and what looks like only 2 relevant indexes, poor performance can be expected.
With no index to reference, the query engine is checking every row in both tables to compare them, which is obviously very inefficient.
edit:
For example, in your query you have
JOIN customer c ON a.customer_id = c.id
Make sure you have an index on a.customer_id AND customer.id. Having an index on both tables (on the JOINed fields) will exponentially speed up the query.
In addition to what #JNK mentioned in his answer about ensuring you have indexes, I have restructured your query and added the "STRAIGHT_JOIN" clause at the top which tells the optimizer to do the query in the order the tables are presented to it.
Since your query is based on the generic import, to import bundle to bank, I've moved THOSE to the front of the list... The where will pre-qualify THOSE records first instead of looking at all accounts that may never be part of the result. So, the join is now reversed from the generic import back to the account following the same relationships you started with.
I've also associated the respective JOIN / ON conditions directly under the table they were joining against for readability and following table relationships. I've also made it so the ON clause has Table1.ID = JoinedTable.ID... although some reversed and otherwise no big deal, knowing how something is based on the join INTO the other just allows easier readability.
So, ensure respective tables have indexes on whatever key column is the join, and from this sample query, make sure your GI table (alias) has an index on "Active", and your IB (alias) has an index on Is_Finished.
Lastly, your WHERE clause had WHERE 1 AND... no purpose of the "1", so I stripped that out.
SELECT STRAIGHT_JOIN DISTINCT
COALESCE(gi.start_time, '') start_time,
COALESCE(b.name, '') bank,
COALESCE(a.id, '') account_id,
COALESCE(a.account_number, '') account_number,
COALESCE(at.code, '') account_type,
COALESCE(a.open_date, '') open_date,
COALESCE(a.interest_rate, '') interest_rate,
COALESCE(a.maturity_date, '') maturity_date,
COALESCE(a.opening_balance, '') opening_balance,
COALESCE(a.has_e_statement, '') has_e_statement,
COALESCE(a.has_bill_pay, '') has_bill_pay,
COALESCE(a.has_overdraft_protection, '') has_overdraft_protection,
COALESCE(a.balance, '') balance,
COALESCE(a.business_or_personal, '') business_or_personal,
COALESCE(a.cumulative_balance, '') cumulative_balance,
COALESCE(c.customer_number, '') customer_number,
COALESCE(c.social_security_number, '') social_security_number,
COALESCE(c.name, '') customer_name,
COALESCE(c.phone, '') phone,
COALESCE(c.deceased, '') deceased,
COALESCE(c.do_not_mail, '') do_not_mail,
COALESCE(cdob.date_of_birth, '') date_of_birth,
COALESCE(ad.line1, '') line1,
COALESCE(ad.line2, '') line2,
COALESCE(ad.city, '') city,
COALESCE(s.name, '') state,
COALESCE(ad.zip, '') zip,
COALESCE(o.officer_number, '') officer_number,
COALESCE(o.name, '') officer_name,
COALESCE(po.line1, '') po_box,
COALESCE(po.city, '') po_city,
COALESCE(po_state.name, '') po_state,
COALESCE(po.zip, '') zip,
COALESCE(br.number, '') branch_number,
COALESCE(cd_type.code, '') cd_type,
COALESCE(mp.product_number, '') macatawa_product_number,
COALESCE(mp.product_name, '') macatawa_product_name,
COALESCE(pt.name, '') macatawa_product_type,
COALESCE(hhsc.name, '') harte_hanks_service_category,
COALESCE(mp.hoh_hierarchy, '') hoh_hierarchy,
COALESCE(cft.name, '') core_file_type,
COALESCE(oa.line1, '') original_address_line1,
COALESCE(oa.line2, '') original_address_line2,
COALESCE(uc.code, '') use_class
FROM
generic_import gi
JOIN import_bundle ib
ON gi.import_bundle_id = ib.id
JOIN bank b
ON ib.bank_id = b.id
JOIN account_import ai
ON gi.id = ai.generic_import_id
JOIN account a
ON ai.id = a.account_import_id
JOIN customer c
ON a.customer_id = c.id
LEFT JOIN customer_date_of_birth cdob
ON c.id = cdob.customer_id
JOIN officer o
ON a.officer_id = o.id
LEFT JOIN branch br
ON a.branch_id = br.id
LEFT JOIN cd_type
ON a.cd_type_id = cd_type.id
LEFT JOIN account_macatawa_product amp
ON a.id = amp.account_id
LEFT JOIN macatawa_product mp
ON amp.macatawa_product_id = mp.id
LEFT JOIN product_type pt
ON mp.product_type_id = pt.id
LEFT JOIN harte_hanks_service_category hhsc
ON mp.harte_hanks_service_category_id = hhsc.id
LEFT JOIN core_file_type cft
ON mp.core_file_type_id = cft.id
LEFT JOIN use_class uc
ON a.use_class_id = uc.id
LEFT JOIN account_type at
ON a.account_type_id = at.id
JOIN account_address aa
ON a.id = aa.account_id
JOIN address ad
ON aa.address_id = ad.id
JOIN original_address oa
ON ad.id = oa.address_id
JOIN state s
ON ad.state_id = s.id
LEFT JOIN account_po_box apb
ON a.id = apb.account_id
LEFT JOIN address po
ON apb.address_id = po.id
LEFT JOIN state po_state
ON po.state_id = po_state.id
WHERE
gi.active = 1
AND ib.is_finished = 1
AND b.id = 8
ORDER BY
a.id
LIMIT
10