How to prevent insert if same records are present in the table - sql

I have a query
INSERT INTO FCC_CS_WL_SOURCE_REQUEST_ID_MAP
(
"N_WL_SOURCE_REQUEST_ID",
"V_SOURCE_REQUEST_ID",
"V_TARGET_KEY",
"V_TARGET_INDEXNAME"
)
SELECT
MAP_SEQ_TEST.nextval,
FCC_CUST_DIM.V_ALT_CUST_ID AS "V_SOURCE_REQUEST_ID",
FCC_CS_MATCHED_RESULT_BULK.V_TARGET_KEY ,
FCC_CS_MATCHED_RESULT_BULK.V_TARGET_INDEXNAME
FROM FCC_CS_MATCHED_RESULT_BULK INNER JOIN FCC_CUST_DIM
ON FCC_CS_MATCHED_RESULT_BULK.V_SOURCE_KEY =FCC_CUST_DIM.V_CUST_INTRL_ID
AND FCC_CUST_DIM.F_LRI_FL ='Y'
AND FCC_CUST_DIM.V_ALT_CUST_ID IS NOT NULL
AND FCC_CS_MATCHED_RESULT_BULK.N_RUN_SKEY =290
Here I need to prevent the insert into FCC_CS_WL_SOURCE_REQUEST_ID_MAP table if V_SOURCE_REQUEST_ID,V_TARGET_KEY,V_TARGET_INDEXNAME columns values is already available with same value which is going to be inserted
How to modify this query to achieve that .?

Use a MERGE statement:
MERGE INTO FCC_CS_WL_SOURCE_REQUEST_ID_MAP dst
USING (
SELECT d.V_ALT_CUST_ID,
b.V_TARGET_KEY ,
b.V_TARGET_INDEXNAME
FROM FCC_CS_MATCHED_RESULT_BULK b
INNER JOIN FCC_CUST_DIM d
ON b.V_SOURCE_KEY = d.V_CUST_INTRL_ID
AND d.F_LRI_FL ='Y'
AND d.V_ALT_CUST_ID IS NOT NULL
AND b.N_RUN_SKEY =290
) src
ON (
src.V_ALT_CUST_ID = dst.V_SOURCE_REQUEST_ID
AND src.V_TARGET_KEY = dst.V_TARGET_KEY
AND src.V_TARGET_INDEXNAME = dst.V_TARGET_INDEXNAME
)
WHEN NOT MATCHED THEN
INSERT (
N_WL_SOURCE_REQUEST_ID,
V_SOURCE_REQUEST_ID,
V_TARGET_KEY,
V_TARGET_INDEXNAME
) VALUES (
MAP_SEQ_TEST.nextval,
src.V_ALT_CUST_ID,
src.V_TARGET_KEY,
src.V_TARGET_INDEXNAME
);

Add a NOT EXISTS clause in your SELECT:
INSERT INTO FCC_CS_WL_SOURCE_REQUEST_ID_MAP
(
"N_WL_SOURCE_REQUEST_ID",
"V_SOURCE_REQUEST_ID",
"V_TARGET_KEY",
"V_TARGET_INDEXNAME"
)
SELECT
MAP_SEQ_TEST.nextval,
FCC_CUST_DIM.V_ALT_CUST_ID AS "V_SOURCE_REQUEST_ID",
FCC_CS_MATCHED_RESULT_BULK.V_TARGET_KEY ,
FCC_CS_MATCHED_RESULT_BULK.V_TARGET_INDEXNAME
FROM FCC_CS_MATCHED_RESULT_BULK INNER JOIN FCC_CUST_DIM
ON FCC_CS_MATCHED_RESULT_BULK.V_SOURCE_KEY =FCC_CUST_DIM.V_CUST_INTRL_ID
AND FCC_CUST_DIM.F_LRI_FL ='Y'
AND FCC_CUST_DIM.V_ALT_CUST_ID IS NOT NULL
AND FCC_CS_MATCHED_RESULT_BULK.N_RUN_SKEY =290
AND NOT EXISTS (SELECT 1 FROM FCC_CS_WL_SOURCE_REQUEST_ID_MAP t1 WHERE V_SOURCE_REQUEST_ID = FCC_CUST_DIM.V_ALT_CUST_ID AND V_TARGET_KEY = FCC_CS_MATCHED_RESULT_BULK.V_TARGET_KEY AND V_TARGET_INDEXNAME = FCC_CS_MATCHED_RESULT_BULK.V_TARGET_INDEXNAME )

Related

Why is my query inserting the same values when I have added a 'not exists' parameter that should avoid this from happening?

My query should stop inserting values, as the not exists statement is satisfied (I have checked both tables) and matching incidents exist in both tables, any ideas why values are still being returned?
Here is the code:
INSERT INTO
odwh_system.ead_incident_credit_control_s
(
incident
)
SELECT DISTINCT
tp.incident
FROM
odwh_data.ead_incident_status_audit_s ei
INNER JOIN odwh_data.ead_incident_s tp ON ei.incident=tp.incident
WHERE
ei.status = 6
OR
ei.status = 7
AND NOT EXISTS
(
SELECT
true
FROM
odwh_system.ead_incident_credit_control_s ead
WHERE
ead.incident = tp.incident
)
AND EXISTS
(
SELECT
true
FROM
odwh_work.ead_incident_tp_s tp
WHERE
tp.incident = ei.incident
);
dont reuse table aliases
use sane aliases
avoid AND/OR conflicts; prefer IN()
INSERT INTO odwh_system.ead_incident_credit_control_s (incident)
SELECT -- DISTINCT
tp.incident
FROM odwh_data.ead_incident_s dtp
WHERE NOT EXISTS (
SELECT *
FROM odwh_system.ead_incident_credit_control_s sic
WHERE sic.incident = dtp.incident
)
AND EXISTS (
SELECT *
FROM odwh_work.ead_incident_tp_s wtp
JOIN odwh_data.ead_incident_status_audit_s dis ON wtp.incident = dis.incident AND dis.status IN (6 ,7)
WHERE wtp.incident = dtp.incident
);

with, works in the first query, but not in the second

i have this FUNCTION, that check if there are results in the first consult, table_one
if not are results, check in the second_table
separate each query works, but if join it, just work the first sentence but not the second one
CREATE OR REPLACE FUNCTION get_data(id INT)
RETURNS TABLE(
id INT,
created_at TIMESTAMP,
attempts INT,
status VARCHAR
)
language plpgsql
AS
$$
DECLARE
_SENT VARCHAR := 'SENT';
BEGIN
RETURN QUERY
WITH r AS (
SELECT p_i.id, a_r.created_at, a_r.attempts,
CASE a_r.status
WHEN 'PENDING' THEN _SENT
END AS status
FROM table_one p_i
LEFT JOIN (
SELECT a_r.table_one_id, max(a_r.id) id
FROM awa_req a_r
GROUP BY a_r.table_one_id
) last_md on last_md.table_one_id = p_i.id
LEFT JOIN awa_req a_r on a_r.table_one_id = last_md.table_one_id and a_r.id = last_md.id
WHERE p_i.user_id = $1
AND p_i.deleted_at IS NULL
)
SELECT * FROM r
UNION ALL
SELECT p_i.id, m_d.created_at, m_d.attempts,
CASE
WHEN m_d.confirmed_at IS NULL THEN _SENT
END AS status
FROM pay_ins p_i
LEFT JOIN (
SELECT max(t.id) AS id, t.pay_ins_id
FROM table_two t
GROUP BY t.pay_ins_id
) last_md on last_md.pay_ins_id = p_i.id
LEFT JOIN table_two m_d on m_d.pay_ins_id = last_md.pay_ins_id and m_d.id = last_md.id
AND NOT EXISTS (
SELECT * FROM r
);
END;
$$;
best
This part will eliminate all rows from the UNION clause if any rows exist in r:
AND NOT EXISTS (
SELECT * FROM r
);
It should instead be something like:
AND NOT EXISTS (
SELECT FROM r WHERE r.id = p_i.id
)

How to improve sql script performance

The following script is very slow when its run.
I have no idea how to improve the performance of the script.
Even with a view takes more than quite a lot minutes.
Any idea please share to me.
SELECT DISTINCT
( id )
FROM ( SELECT DISTINCT
ct.id AS id
FROM [Customer].[dbo].[Contact] ct
LEFT JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
WHERE hnci.customer_id IN (
SELECT DISTINCT
( [Customer_ID] )
FROM [Transactions].[dbo].[Transaction_Header]
WHERE actual_transaction_date > '20120218' )
UNION
SELECT DISTINCT
contact_id AS id
FROM [Customer].[dbo].[Restaurant_Attendance]
WHERE ( created > '2012-02-18 00:00:00.000'
OR modified > '2012-02-18 00:00:00.000'
)
AND ( [Fifth_Floor_London] = 1
OR [Fourth_Floor_Leeds] = 1
OR [Second_Floor_Bristol] = 1
)
UNION
SELECT DISTINCT
( ct.id )
FROM [Customer].[dbo].[Contact] ct
INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
AND startconnection > '2012-02-17'
UNION
SELECT DISTINCT
comdt.id AS id
FROM [Customer].[dbo].[Complete_dataset] comdt
LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
WHERE agsc.contact_id IS NULL
AND ( opt_out_Mail <> 1
OR opt_out_email <> 1
OR opt_out_SMS <> 1
OR opt_out_Mail IS NULL
OR opt_out_email IS NULL
OR opt_out_SMS IS NULL
)
AND ( address_1 IS NOT NULL
OR email IS NOT NULL
OR mobile IS NOT NULL
)
UNION
SELECT DISTINCT
( contact_id ) AS id
FROM [Customer].[dbo].[VIP_Card_Holders]
WHERE VIP_Card_number IS NOT NULL
) AS tbl
Wow, where to start...
--this distinct does nothing. Union is already distinct
--SELECT DISTINCT
-- ( id )
--FROM (
SELECT DISTINCT [Customer_ID] as ID
FROM [Transactions].[dbo].[Transaction_Header]
where actual_transaction_date > '20120218' )
UNION
SELECT
contact_id AS id
FROM [Customer].[dbo].[Restaurant_Attendance]
-- not sure that you are getting the date range you want. Should these be >=
-- if you want everything that occurred on the 18th or after you want >= '2012-02-18 00:00:00.000'
-- if you want everything that occurred on the 19th or after you want >= '2012-02-19 00:00:00.000'
-- the way you have it now, you will get everything on the 18th unless it happened exactly at midnight
WHERE ( created > '2012-02-18 00:00:00.000'
OR modified > '2012-02-18 00:00:00.000'
)
AND ( [Fifth_Floor_London] = 1
OR [Fourth_Floor_Leeds] = 1
OR [Second_Floor_Bristol] = 1
)
-- all of this does nothing because we already have every id in the contact table from the first query
-- UNION
-- SELECT
-- ( ct.id )
-- FROM [Customer].[dbo].[Contact] ct
-- INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
-- AND startconnection > '2012-02-17'
UNION
-- cleaned this up with isnull function and coalesce
SELECT
comdt.id AS id
FROM [Customer].[dbo].[Complete_dataset] comdt
LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
WHERE agsc.contact_id IS NULL
AND ( isnull(opt_out_Mail,0) <> 1
OR isnull(opt_out_email,0) <> 1
OR isnull(opt_out_SMS,0) <> 1
)
AND coalesce(address_1 , email, mobile) IS NOT NULL
UNION
SELECT
( contact_id ) AS id
FROM [Customer].[dbo].[VIP_Card_Holders]
WHERE VIP_Card_number IS NOT NULL
-- ) AS tbl
Where exists is generally faster than in as well.
Or conditions are generally slower as well, use more union statements instead.
And learn to use left joins correctly. If you have a where condition (other than where id is null) on the table on teh right side of a left join, it will convert to an inner join. If this is not what you want, then your code is currently giving you an incorrect result set.
See http://wiki.lessthandot.com/index.php/WHERE_conditions_on_a_LEFT_JOIN for an explanation of how to fix.
As stated in a comment optimize one at a time. See which one takes the longest and focus on that one.
union will remove duplicates so you don't need the distinct on the individual queries
On you first I would try this:
The left join is killed by the WHERE hnci.customer_id IN so you might as well have a join.
The sub-query is not efficient as cannot use an index on the IN.
The query optimizer does not know what in ( select .. ) will return so it cannot optimize use of indexes.
SELECT ct.id AS id
FROM [Customer].[dbo].[Contact] ct
JOIN [Customer].[dbo].[Customer_ids] hnci
ON ct.id = hnci.contact_id
JOIN [Transactions].[dbo].[Transaction_Header] th
on hnci.customer_id = th.[Customer_ID]
and th.actual_transaction_date > '20120218'
On that second join the query optimizer has the opportunity of which condition to apply first. Let say [Customer].[dbo].[Customer_ids].[customer_id] and [Transactions].[dbo].[Transaction_Header] each have indexes. The query optimizer has the option to apply that before [Transactions].[dbo].[Transaction_Header].[actual_transaction_date].
If [actual_transaction_date] is not indexed then for sure it would do the other ID join first.
With your in ( select ... ) the query optimizer has no option but to apply the actual_transaction_date > '20120218' first. OK some times query optimizer is smart enough to use an index inside the in outside the in but why make it hard for the query optimizer. I have found the query optimizer make better decisions if you make the decisions easier.
A join on a sub-query has the same problem. You take options away from the query optimizer. Give the query optimizer room to breathe.
try this, temptable should help you:
IF OBJECT_ID('Tempdb..#Temp1') IS NOT NULL
DROP TABLE #Temp1
--Low perfomance because of using "WHERE hnci.customer_id IN ( .... ) " - loop join must be
--and this "where" condition will apply to two tables after left join,
--so result will be same as with two inner joints but with bad perfomance
--SELECT DISTINCT
-- ct.id AS id
--INTO #temp1
--FROM [Customer].[dbo].[Contact] ct
-- LEFT JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
--WHERE hnci.customer_id IN (
-- SELECT DISTINCT
-- ( [Customer_ID] )
-- FROM [Transactions].[dbo].[Transaction_Header]
-- WHERE actual_transaction_date > '20120218' )
--------------------------------------------------------------------------------
--this will give the same result but with better perfomance then previouse one
--------------------------------------------------------------------------------
SELECT DISTINCT
ct.id AS id
INTO #temp1
FROM [Customer].[dbo].[Contact] ct
JOIN [Customer].[dbo].[Customer_ids] hnci ON ct.id = hnci.contact_id
JOIN ( SELECT DISTINCT
( [Customer_ID] )
FROM [Transactions].[dbo].[Transaction_Header]
WHERE actual_transaction_date > '20120218'
) T ON hnci.customer_id = T.[Customer_ID]
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
INSERT INTO #temp1
( id
)
SELECT DISTINCT
contact_id AS id
FROM [Customer].[dbo].[Restaurant_Attendance]
WHERE ( created > '2012-02-18 00:00:00.000'
OR modified > '2012-02-18 00:00:00.000'
)
AND ( [Fifth_Floor_London] = 1
OR [Fourth_Floor_Leeds] = 1
OR [Second_Floor_Bristol] = 1
)
INSERT INTO #temp1
( id
)
SELECT DISTINCT
( ct.id )
FROM [Customer].[dbo].[Contact] ct
INNER JOIN [Customer].[dbo].[Wifinity_Devices] wfd ON ct.wifinity_uniqueID = wfd.[CustomerUniqueID]
AND startconnection > '2012-02-17'
INSERT INTO #temp1
( id
)
SELECT DISTINCT
comdt.id AS id
FROM [Customer].[dbo].[Complete_dataset] comdt
LEFT JOIN [Customer].[dbo].[Aggregate_Spend_Counts] agsc ON comdt.id = agsc.contact_id
WHERE agsc.contact_id IS NULL
AND ( opt_out_Mail <> 1
OR opt_out_email <> 1
OR opt_out_SMS <> 1
OR opt_out_Mail IS NULL
OR opt_out_email IS NULL
OR opt_out_SMS IS NULL
)
AND ( address_1 IS NOT NULL
OR email IS NOT NULL
OR mobile IS NOT NULL
)
INSERT INTO #temp1
( id
)
SELECT DISTINCT
( contact_id ) AS id
FROM [Customer].[dbo].[VIP_Card_Holders]
WHERE VIP_Card_number IS NOT NULL
SELECT DISTINCT
id
FROM #temp1 AS T

Adding Switch to WHERE clausse

I've been trying to add a switch into the following script.
If #IgnoreExclusions = 1 then I do not want to exclude any of the values in Controltb_AssocAccounts_ExcludedSurnameDOB or in Controltb_AssocAccounts_ExcludedDOB ?
I've included one of my attempts but I don't think this is very readable and also I'm unsure if it works reliably as NULL could be a value in one of the exclusion lists.
DECLARE #IgnoreExclusions TINYINT = 1;
SELECT ua.UserAccountKey,
FROM #Accounts x
INNER JOIN WH.dbo.vw_DimUserAccount ua
ON
( --surname and DOB need to match
x.Surname = ua.Surname AND
x.DOB = ua.DOB
)
AND
x.UserAccountKey <> ua.UserAccountKey
WHERE EXISTS
(
SELECT x.Surname, x.DOB
EXCEPT
SELECT ExcludedSurname,ExcludedDOB
FROM WH.dbo.Controltb_AssocAccounts_ExcludedSurnameDOB
)
AND
EXISTS
(
SELECT x.DOB
--SELECT CASE WHEN #IgnoreExclusions = 1 THEN NULL ELSE x.DOB END --<<<<ATTEMPT
EXCEPT
SELECT ExcludedDOB
FROM WH.dbo.Controltb_AssocAccounts_ExcludedDOB
)
GROUP BY ua.UserAccountKey;
I'm not sure what variant of SQL you're using, but couldn't a simple OR clause do the trick?
DECLARE #IgnoreExclusions TINYINT = 1;
SELECT ua.UserAccountKey,
FROM #Accounts x
INNER JOIN WH.dbo.vw_DimUserAccount ua
ON x.Surname = ua.Surname
AND x.DOB = ua.DOB
AND x.UserAccountKey <> ua.UserAccountKey
WHERE EXISTS
(
SELECT x.Surname, x.DOB
EXCEPT
SELECT ExcludedSurname,ExcludedDOB
FROM WH.dbo.Controltb_AssocAccounts_ExcludedSurnameDOB
)
AND
(
#IgnoreExclusions = 1 OR EXISTS
(
SELECT x.DOB
EXCEPT
SELECT ExcludedDOB
FROM WH.dbo.Controltb_AssocAccounts_ExcludedDOB
)
)
GROUP BY ua.UserAccountKey;

JOIN Issue : Correct the SQL Statement to solve : ORA-01799: a column may not be outer-joined to a subquery

As you see below; how can I implement fx.ftf_validitystartdate= ... this lines value since oracle does not allow me to do it like this below
.
select * from acc_accounts acc
join kp_paramcore p on
acc.account_no = p.accountnum
acc.suffix = p.suffixc
LEFT JOIN ftf_rates fx
ON p.maturestart = fx.ftf_vadealtsinir
AND p.maturefinish = fx.ftf_vadeustsinir
AND fx.statusrec = 'A'
AND fx.currencycode = acc.currencsw_kod
AND fx.status= 'A'
and fx.ftf_validitystartdate= (SELECT MAX(ff.ftf_validitystartdate)
FROM ftf_rates ff
WHERE ff.status = 'A'
AND ff.statusrec = 'A'
AND v_CurrentDate BETWEEN ff.systemstartdate AND ff.systemfinishdate AND ff.currencycode = acc.currencsw_kod
)
It should work if you switch this to a where clause:
select *
from acc_accounts acc join
kp_paramcore p
on acc.account_no = p.accountnum and
acc.suffix = p.suffixc LEFT JOIN
ftf_rates fx
ON p.maturestart = fx.ftf_vadealtsinir and
p.maturefinish = fx.ftf_vadeustsinir and
fx.statusrec = 'A' and
fx.currencycode = acc.currencsw_kod and
fx.status= 'A'
where fx.ftf_validitystartdate= (SELECT MAX(ff.ftf_validitystartdate)
FROM ftf_rates ff
WHERE ff.status = 'A' and
ff.statusrec = 'A'
p.v_CurrentDate BETWEEN ff.systemstartdate AND ff.systemfinishdate AND ff.currencycode = acc.currencsw_kod
)
However, you lose the 'left outer join' characteristics, so you would also want to add: or fx.ftf_validitystartdate is null. I guess that v_CurrentDate comes from "p". It is always a good idea to use table aliases before column names.
However, I question whether the subquery is really needed. It is only needed when there is more than one record that meets the conditions inside the subquery. Otherwise, I think you can just change the on clause to be:
ON p.maturestart = fx.ftf_vadealtsinir and
p.maturefinish = fx.ftf_vadeustsinir and
fx.statusrec = 'A' and
fx.currencycode = acc.currencsw_kod and
fx.status= 'A'and
p.v_CurrentDate BETWEEN fx.systemstartdate AND fx.systemfinishdate
I publish the workaround with CTE and tested only in Oracle 11g.
To make test I create this schema:
create table t_a ( a int );
create table t_b ( a int);
create table t_c ( a int);
insert into t_a values (1);
insert into t_a values (2);
insert into t_a values (3);
insert into t_b values (1);
insert into t_b values (2);
insert into t_b values (3);
insert into t_c values (1);
insert into t_c values (2);
insert into t_c values (3);
At this time I force error with this query:
select *
from t_a
left outer join t_b
on t_a.a = t_b.a and
t_b.a = ( select max( a )
from t_c);
And now I rewrite query with CTE:
with cte (a ) as (
select a
from t_b
where t_b.a = ( select min( a )
from t_c)
)
select *
from t_a
left outer join cte
on t_a.a = cte.a;
This second query returns right results.
I rewrite your query with CTE:
with CTE as (
select * from ftf_rates
where ftf_validitystartdate= (SELECT MAX(ff.ftf_validitystartdate)
FROM ftf_rates ff
WHERE ff.status = 'A'
AND ff.statusrec = 'A'
AND v_CurrentDate BETWEEN ff.systemstartdate
AND ff.systemfinishdate
AND ff.currencycode = acc.currencsw_kod )
)
select * from acc_accounts acc
join kp_paramcore p on
acc.account_no = p.accountnum
acc.suffix = p.suffixc
LEFT JOIN CTE fx
ON p.maturestart = fx.ftf_vadealtsinir
AND p.maturefinish = fx.ftf_vadeustsinir
AND fx.statusrec = 'A'
AND fx.currencycode = acc.currencsw_kod
AND fx.status= 'A'
Notice, only tested in Oracle 11g. See #a_horse_with_no_name coment:
#danihp: CTEs were available long before Oracle 11g (I think they were
introducted in 9.1 maybe even earlier - but they are definitely
available in 10.x). 11.2 introduced recursive CTEs which is not needed
in this case. –