Insert only unique records in SQL

Insert only unique records in SQL - sql

I ran the below query in SQL to insert records (this is just a snippet)
insert into list_member (list_id, list_int_value , list_float_value , list_decimal_value , list_varchar_value, list_datetime_value, modified_by, asof_time, do_not_audit)
select 42, null, null, security_id, null, null, 10, null, null
from security
where not exists(select user_id_4 from list_member.user_id_4 where list_member.user_id_4 = security.user_id_4)
and deleted = 0 and user_id_4 in
(
'ES0125220311',
'ES0132105018',
'ES0167050915'
)
Now I have another list to insert, but only want to insert new records.
I'm unsure where to insert the additional 'where' clause so that it doesn't insert duplicates. I've come up with the below (which in theory should only add the final record), but the additional where clause in bold is likely wrong ...
insert into list_member (list_id, list_int_value , list_float_value , list_decimal_value , list_varchar_value, list_datetime_value, modified_by, asof_time, do_not_audit)
select 42, null, null, security_id, null, null, 10, null, null
from security
**where not exists(select user_id_4 from list_member.user_id_4 where list_member.user_id_4 = security.user_id_4)**
and deleted = 0 and user_id_4 in
(
'ES0125220311',
'ES0132105018',
'ES0167050915',
'ES0123456789'
)
Anyone able to assist?

I changed it to this instead
left join list_member
on list_member.list_id = 42 and list_member.list_decimal_value = security.security_id
Sorry for the trouble.

You can use corelated query as follows:
insert into list_member (list_id, list_int_value , list_float_value , list_decimal_value , list_varchar_value, list_datetime_value, modified_by, asof_time, do_not_audit)
select 42, null, null, security_id, null, null, 10, null, null
from security s
where not exists
(select 1 from list_member l where l.user_id_4 = s.user_id_4)
and deleted = 0 and user_id_4 in
(
'ES0125220311',
'ES0132105018',
'ES0167050915',
'ES0123456789'
)

Related

sql SERVER - distinct selection based on priority columns

hello I would like to find a solution to solve my problem in a single request if possible.
For the moment I take all the records then I go through the lines one by one to eliminate what I don't want.
I have 2 tables : first table with links
the second with the prefered label for the url
the second table must be consulted keeping only the row with maximum priority
priority rules are
the current user then
the user group and finally
everyone.
if the hidden column is true, exclude any reference to the url
here is the expected result.
Unfortunately, I don't see any other solution than to multiply the conditions on several selects and unions.
if you have a idea to solve my problem, thank you in advance for your help

It appears as though you can rely on pref_id for the preference ordering, correct? If so, you could try:
SELECT *
FROM table2
INNER JOIN table1 ON table2.url_id = table1.url_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY table1.url
ORDER BY pref_id ASC
) = 1
This will partition by the url and then provide only the one with lowest pref_id.
I didn't test this SQL as I wasn't sure which RDBMS you're running on, but I used Rasgo to translate the SQL.

maybe of interest in this tricky query:
select so.*, table1.url from
(select distinct t.url_id,
(select pref_id from table2 s where s.url_id = t.url_id order by "user" is null, "group" is null limit 1) pref_id
from table2 t
where not exists(select 1 from table2 s where s.hide and s.url_id = t.url_id)
) ids
join table2 so on so.pref_id = ids.pref_id
join table1 ON table1.url_id = ids.url_id
order by so.url_id;

here is my solution but i think there is better to do.
in the condition's select, I built a column which gives a level note according to the priorities
DECLARE #CUR_USER VARCHAR(10) = 'ROBERT'
DECLARE #CUR_GROUP VARCHAR(10) = 'DEV'
DECLARE #TABLE1 TABLE (
URL_ID INT
,URLNAME VARCHAR(100)
);
DECLARE #TABLE2 TABLE (
PREF_ID INT
,URL_ID INT
,FAVORITE_LABEL VARCHAR(100)
,USER_GROUP VARCHAR(10)
,USER_CODE VARCHAR(10)
,HIDE_URL DECIMAL(1, 0) DEFAULT 0
);
INSERT INTO #TABLE1
VALUES
(1, 'https://stackoverflow.com/')
,(2, 'https://www.microsoft.com/')
,(3, 'https://www.apple.com/')
,(4, 'https://www.wikipedia.org/')
;
INSERT INTO #TABLE2
VALUES
(1000, 1, 'find everything', NULL, 'ROBERT', 0)
,(1001, 1, 'a question ? find the answer here', 'DEV', NULL, 0)
,(1002, 1, 'StackOverFlow', NULL, NULL, 0)
,(1003, 2, 'Windows', 'DEV', NULL, 0)
,(1004, 2, 'Microsoft', NULL, NULL, 0)
,(1005, 3, 'Apple', NULL, NULL, 0)
,(1006, 4, 'Free encyclopedia', NULL, 'ROBERT', 1)
,(1007, 4, 'Wikipedia', NULL, NULL, 0)
,(1008, 1, 'StackOverFlow FOR MAT', 'MAT', NULL, 0)
,(1009, 2, 'Microsoft FOR MAT', 'MAT', NULL, 0)
,(1010, 3, 'Apple', 'MAT', NULL, 1)
,(1011, 4, 'Wikipedia FOR MAT', 'MAT', NULL, 0)
,(1012, 1, 'StackOverFlow', NULL, 'JEAN', 1)
,(1013, 2, 'Microsoft ', NULL, 'JEAN', 0)
,(1014, 3, 'Apple', NULL, 'JEAN', 0)
,(1015, 4, 'great encyclopedia', NULL, 'JEAN', 0)
;
SELECT t2.* ,t1.URLName
FROM #TABLE1 t1
INNER JOIN #TABLE2 t2 ON t1.URL_ID = t2.URL_ID
WHERE EXISTS (
SELECT 1
FROM (
SELECT TOP (1) test.PREF_ID
,CASE
-- if I do not comment this case: jean from the MAT group will not see apple
-- WHEN Hide_Url = 1
-- THEN 3
WHEN USER_code IS NOT NULL
THEN 2
WHEN USER_GROUP IS NOT NULL
THEN 1
ELSE 0
END AS ROW_LEVEL
FROM #TABLE2 test
WHERE (
(
test.USER_GROUP IS NULL
AND test.user_group IS NULL
AND test.USER_code IS NULL
)
OR (test.USER_GROUP = #CUR_GROUP)
OR (test.USER_code = #CUR_USER)
)
AND t2.URL_ID = test.URL_ID
ORDER BY ROW_LEVEL DESC
) test
WHERE test.PREF_ID = t2.PREF_ID
AND Hide_Url = 0
)

Simply use an ORDER BY clause that puts the preferred row first. You can use this in the window function ROW_NUMBER and work with this or use a lateral top(1) join with CROSS APPLY.
select *
from urls
cross apply
(
select top(1) *
from labels
where labels.url_id = urls.url_id
where [Group] is not null or [user] is not null or hide is not null
order by
case when [Group] is null then 2 else 1 end,
case when [user] is null then 2 else 1 end,
case when hide is null then 2 else 1 end
) top_labels
order by urls.url_id;

Looping through groups of records

SQL Server 2014, I have a table with a number of rows for example 15, 5 have a groupid column of 736881 and 10 have a group id column 3084235. What I want to do is process each group of records in turn and load the results in to a table.
I have written the code to do this but I think I am not setting the loopcounter incorrectly set as I keep getting the groupid of records 736881 loaded twice.
I cant't currently post the test data due to containing personal information but if the mistake is not obvious I will try and create some dummy data.
SELECT #LoopCounter = min(rowfilter) , #maxrowfilter = max(rowfilter)
FROM peops6
WHILE ( #LoopCounter IS NOT NULL
AND #LoopCounter <= #maxrowfilter)
begin
declare #customer_dist as Table (
[id] [int] NOT NULL,
[First_Name] [varchar](50) NULL,
[Last_Name] [varchar](50) NULL,
[DoB] [date] NULL,
[post_code] [varchar](50) NULL,
[mobile] [varchar](50) NULL,
[Email] [varchar](100) NULL );
INSERT INTO #customer_dist (id, First_Name, Last_Name, DoB, post_code, mobile, Email)
select id, first_name, last_name, dob, postcode, mobile_phone, email from peops6 where rowfilter = #LoopCounter
insert into results
SELECT result.* ,
[dbo].GetPercentageOfTwoStringMatching(result.DoB, d.DoB) [DOB%match] ,
[dbo].GetPercentageOfTwoStringMatching(result.post_code, d.post_code) [post_code%match] ,
[dbo].GetPercentageOfTwoStringMatching(result.mobile, d.mobile) [mobile%match] ,
[dbo].GetPercentageOfTwoStringMatching(result.Email, d.Email) [email%match]
FROM ( SELECT ( SELECT MIN(id)
FROM #customer_dist AS sq
WHERE sq.First_Name = cd.First_Name
AND sq.Last_Name = cd.Last_Name
AND ( sq.DoB = cd.DoB
OR sq.mobile = cd.mobile
OR sq.Email = cd.Email
OR sq.post_code = cd.post_code )) nid ,
*
FROM #customer_dist AS cd ) AS result
INNER JOIN #customer_dist d ON result.nid = d.id order by 1, 2 asc;
SELECT #LoopCounter = min(rowfilter) FROM peops6
WHERE rowfilter > #LoopCounter
end

You need to truncate your table variable (#customer_dist) at the end of the loop:
....
-- Add this
TRUNCATE TABLE #customer_dist
SELECT #LoopCounter = min(rowfilter) FROM peops6
WHERE rowfilter > #LoopCounter
end
See: https://social.msdn.microsoft.com/Forums/sqlserver/en-US/42ef20dc-7ad8-44f7-b676-a4596fc0d593/declaring-a-table-variable-inside-a-loop-does-not-delete-the-previous-data?forum=transactsql

I am not sure you need a LOOP like using a SQL Cursor to fulfill this task
Please check following SQL statement where I used multiple CTE expressions
with customer_dist as (
select
rowfilter,
id, first_name, last_name, dob, postcode, mobile_phone, email
from peops6
), result as (
SELECT
(
SELECT
MIN(id)
FROM customer_dist AS sq
WHERE
sq.rowfilter = cd.rowfilter
AND sq.First_Name = cd.First_Name
AND sq.Last_Name = cd.Last_Name
AND (sq.DoB = cd.DoB OR sq.mobile_phone = cd.mobile_phone OR sq.Email = cd.Email OR sq.postcode = cd.postcode )
) nid,
*
FROM customer_dist AS cd
)
SELECT
result.* ,
[dbo].edit_distance(result.DoB, d.DoB) [DOB%match] ,
[dbo].edit_distance(result.postcode, d.postcode) [post_code%match] ,
[dbo].edit_distance(result.mobile_phone, d.mobile_phone) [mobile%match] ,
[dbo].edit_distance(result.Email, d.Email) [email%match]
FROM result
INNER JOIN customer_dist d
ON result.nid = d.id
order by 1, 2 asc;
Please note, I used my fuzzy string matching Levenshtein Distance Algorithm in this sample instead of your function
And the outcome is as follows
Only you need to add the INSERT statement just before the last SELECT statement
Hope it is useful

In SQL replace null value with another value

Here is my SQL Query
SELECT p.StudentID, ai.RollNo, p.FirstName, p.MiddleName, p.LastName,
om.ExamID, et.ExamName, om.SubjectID,
ISNULL(CONVERT(varchar(20),om.ObtainedMarksTheory), 'A') as 'ObtainedMarksTheory',
ISNULL(CONVERT(varchar(20),om.ObtainedPracticalMarks),'A') as 'ObtainedPracticalMarks'
FROM Students.PersonalInfo p
INNER JOIN Students.AcademicCourse ac on p.StudentID = ac.StudentID
INNER JOIN Students.AcademicInfo ai on p.StudentID=ai.StudentID
LEFT OUTER JOIN Exam.ObtainedMarkEntry om on p.StudentID = om.StudentID
LEFT JOIN Exam.ExamType et on om.ExamID = et.ExamID
WHERE ai.BatchID = '103' AND ai.SemesterID = '21' and ac.Section = '8'
This produce result as in a picture:
But I want result like this since those two students were absent in that exam
Similarly if another Exam does exists for any of three student and other are absent same procedure should repeat

Use IsNULL() Function see below example
Declare #variable varchar(MAX)
set #variable = NULL
select IsNULL(#variable,0) as A

Let's have the following sample data (it is like your sample data, just inserted in one table):
DECLARE #DataSource TABLE
(
[StudentID] TINYINT
,[RowNo] TINYINT
,[FirstName] VARCHAR(12)
,[MiddleName] VARCHAR(12)
,[LastName] VARCHAR(12)
,[ExamID] TINYINT
,[ExamName] VARCHAR(18)
,[SubjectID] TINYINT
,[ObtainedMarksTheory] VARCHAR(12)
,[ObtainedPracticalMarks] VARCHAR(12)
);
INSERT INTO #DataSource ([StudentID], [RowNo], [FirstName], [MiddleName], [LastName], [ExamID], [ExamName], [SubjectID], [ObtainedMarksTheory], [ObtainedPracticalMarks])
VALUES (101, 1, 'FN_A', 'MN_A', 'LN_A', NULL, NULL, NULL, 'A', 'A')
,(102, 2, 'FN_B', 'MN_B', 'LN_B', 28, 'First Tem2072', 97, '74.00', '56.00')
,(103, 3, 'FN_C', 'MN_C', 'LN_C', NULL, NULL, NULL, 'A', 'A');
SELECT *
FROM #DataSource;
So, I am supposing we have details only for one exam here and the question is how to replace the NULL values of ExamID, ExamName and SubjectID columns using the existing values.
The solution is to use the MAX function with OVER clause:
SELECT [StudentID]
,[RowNo]
,[FirstName]
,[MiddleName]
,[LastName]
,MAX([ExamID]) OVER() AS [ExamID]
,MAX([ExamName]) OVER() AS [ExamName]
,MAX([SubjectID]) OVER() AS [SubjectID]
,[ObtainedMarksTheory]
,[ObtainedPracticalMarks]
FROM #DataSource;
Note, that it is better to use the OVER clause as you are not going to add GROUP BY clauses in your initial query. Also, if you have details for more exams you can use the PARTITION BY clause (if you have some column(s) to distinguish the NULL rows for each exam).

how to insert multiple rows with check for duplicate rows in a short way

I am trying to insert multiple records (~250) in a table (say MyTable) and would like to insert a new row only if it does not exist already.
I am using SQL Server 2008 R2 and got help from other threads like SQL conditional insert if row doesn't already exist.
While I am able to achieve that with following stripped script, I would like to know if there is a better (short) way to do this as I
have to repeat this checking for every row inserted. Since we need to execute this script only once during DB deployment, I am not too much
worried about performance.
INSERT INTO MyTable([Description], [CreatedDate], [CreatedBy], [ModifiedDate], [ModifiedBy], [IsActive], [IsDeleted])
SELECT N'ababab', GETDATE(), 1, NULL, NULL, 1, 0
WHERE NOT EXISTS(SELECT * FROM MyTable WITH (ROWLOCK, HOLDLOCK, UPDLOCK)
WHERE
([InstanceId] IS NULL OR [InstanceId] = 1)
AND [ChannelPartnerId] IS NULL
AND [CreatedBy] = 1)
UNION ALL
SELECT N'xyz', 1, GETDATE(), 1, NULL, NULL, 1, 0
WHERE NOT EXISTS(SELECT * FROM [dbo].[TemplateQualifierCategoryMyTest] WITH (ROWLOCK, HOLDLOCK, UPDLOCK)
WHERE
([InstanceId] IS NULL OR [InstanceId] = 1)
AND [ChannelPartnerId] IS NULL
AND [CreatedBy] = 1)
-- More SELECT statements goes here

You could create a temporary table with your descriptions, then insert them all into the MyTable with a select that will check for rows in the temporary table that is not yet present in your destination, (this trick in implemented by the LEFT OUTER JOIN in conjunction with the IS NULL for the MyTable.Description part in the WHERE-Clause):
DECLARE #Descriptions TABLE ([Description] VARCHAR(200) NOT NULL )
INSERT INTO #Descriptions ( Description )VALUES ( 'ababab' )
INSERT INTO #Descriptions ( Description )VALUES ( 'xyz' )
INSERT INTO dbo.MyTable
( Description ,
CreatedDate ,
CreatedBy ,
ModifiedDate ,
ModifiedBy ,
IsActive ,
IsDeleted
)
SELECT d.Description, GETDATE(), 1, NULL, NULL, 1, 0
FROM #Descriptions d
LEFT OUTER JOIN dbo.MyTable mt ON d.Description = mt.Description
WHERE mt.Description IS NULL

Account Hierarchy ? Parent account has children in which child could become a parent

I have 2 tables. One table contains all parent accounts, top level of hierarchy. Second table has all children accounts, that may or may not have a match to a parent account in the parent table. The goal is to create a query (SQL Server 2008, recursive or non) that finds all child accounts that match to the parent in addition to the fact that the child could itself be a parent to other child accounts.
In simpler terms, once a match has been made on a parent to child, need to check to make sure that the child in the match is not itself a parent to other child accounts. A mouthful I understand and I hope it makes sense. I also do not know the depth of which the hierarchy would extend.
CREATE TABLE dbo.Parent_Accounts
(Parent_Account_Key_Lookup varchar(28) NOT NULL,
Account_Number bigint NOT NULL,
Reference_Account_Number_1 bigint NOT NULL,
Reference_Account_Number_2 bigint NOT NULL,
OpenDate int NOT NULL,
Status char(1) NOT NULL,
Record_Created smalldatetime NOT NULL,
Active bit NOT NULL)
GO
CREATE TABLE dbo.Child_Accounts
(Child_Account_Key_Lookup varchar(28) NOT NULL,
Account_Number bigint NOT NULL,
Reference_Account_Number_1 bigint NOT NULL,
Reference_Account_Number_2 bigint NOT NULL,
OpenDate int NOT NULL,
Status char(1) NOT NULL,
Record_Created smalldatetime NOT NULL,
Active bit NOT NULL)
GO
WITH cte_Recursive
AS (SELECT parent.Account_Number,
parent.Parent_Account_Key_Lookup,
parent.Reference_Account_Number_1,
parent.Reference_Account_Number_2,
parent.OpenDate,
parent.[Status],
parent.Record_Created,
parent.Active,
1 AS Hierarchy_Level
FROM dbo.Parent_Accounts parent
WHERE parent.Account_Number = 4498481055218674
UNION ALL
SELECT child.Account_Number,
child.Child_Account_Key_Lookup,
child.Reference_Account_Number_1,
child.Reference_Account_Number_2,
child.OpenDate,
child.[Status],
child.Record_Created,
child.Active,
cte.Hierarchy_Level + 1
FROM cte_Recursive cte
INNER JOIN dbo.Child_Accounts child
ON cte.Parent_Account_Key_Lookup = child.Child_Account_Key_Lookup)
--SELECT * FROM cte_Recursive
SELECT TOP 2 * FROM cte_Recursive
INSERT INTO dbo.Parent_Accounts
(Parent_Account_Key_Lookup,
Account_Number,
Reference_Account_Number_1,
Reference_Account_Number_2,
OpenDate,
[Status],
Record_Created,
Active)
VALUES ('222248105521867419970702', 2222481055218674, 2222481060975466, 0, 19970702, 'U', '2010-11-18 12:46:00', 0)
INSERT INTO dbo.Child_Accounts
(Child_Account_Key_Lookup,
Account_Number,
Reference_Account_Number_1,
Reference_Account_Number_2,
OpenDate,
[Status],
Record_Created,
Active)
VALUES ('222248105521867419970702', 2222481060975466, 2222481055218674, 2222481055218674, 19970702, 'L', '2010-11-19 08:33:00', 0),
('222248106097546619970702', 2222481060982900, 2222481060989137, 2222481060975466, 19970702, 'U', '2010-11-19 16:54:00', 0),
('222248106098290019970702', 2222481060989137, 0, 2222481060982900, 19970702, ' ', '2010-11-21 01:52:00', 1)

The problem appears to be an inability to specify how to determine that a child has children combined with a lack of identity for children. If the ON clause in the CTE could prevent a child joining to itself it would break the infinite recursion chain.
Sample output and answers to the comments should make it clear whether you simply need another condition in the ON clause or a UNION to handle childrens' children.
This at least stops recursing by limiting the hierarchy level and provides a testbed that anyone can execute without cluttering their database:
declare #Parent_Accounts as table (
Parent_Account_Key_Lookup varchar(28) NOT NULL,
Account_Number bigint NOT NULL,
Reference_Account_Number_1 bigint NOT NULL,
Reference_Account_Number_2 bigint NOT NULL,
OpenDate int NOT NULL,
Status char(1) NOT NULL,
Record_Created smalldatetime NOT NULL,
Active bit NOT NULL)
declare #Child_Accounts as table (
Child_Account_Key_Lookup varchar(28) NOT NULL,
Account_Number bigint NOT NULL,
Reference_Account_Number_1 bigint NOT NULL,
Reference_Account_Number_2 bigint NOT NULL,
OpenDate int NOT NULL,
Status char(1) NOT NULL,
Record_Created smalldatetime NOT NULL,
Active bit NOT NULL)
INSERT INTO #Parent_Accounts
(Parent_Account_Key_Lookup, Account_Number, Reference_Account_Number_1, Reference_Account_Number_2, OpenDate, [Status], Record_Created, Active) VALUES
('222248105521867419970702', 2222481055218674, 2222481060975466, 0, 19970702, 'U', '2010-11-18 12:46:00', 0)
INSERT INTO #Child_Accounts
(Child_Account_Key_Lookup, Account_Number, Reference_Account_Number_1, Reference_Account_Number_2, OpenDate, [Status], Record_Created, Active) VALUES
('222248105521867419970702', 2222481060975466, 2222481055218674, 2222481055218674, 19970702, 'L', '2010-11-19 08:33:00', 0),
('222248106097546619970702', 2222481060982900, 2222481060989137, 2222481060975466, 19970702, 'U', '2010-11-19 16:54:00', 0),
('222248106098290019970702', 2222481060989137, 0, 2222481060982900, 19970702, ' ', '2010-11-21 01:52:00', 1)
; WITH cte_Recursive
AS (SELECT parent.Account_Number,
parent.Parent_Account_Key_Lookup,
parent.Reference_Account_Number_1,
parent.Reference_Account_Number_2,
parent.OpenDate,
parent.[Status],
parent.Record_Created,
parent.Active,
1 AS Hierarchy_Level
FROM #Parent_Accounts parent
WHERE parent.Account_Number = 2222481055218674
UNION ALL
SELECT child.Account_Number,
child.Child_Account_Key_Lookup,
child.Reference_Account_Number_1,
child.Reference_Account_Number_2,
child.OpenDate,
child.[Status],
child.Record_Created,
child.Active,
cte.Hierarchy_Level + 1
FROM cte_Recursive cte
INNER JOIN #Child_Accounts child
ON cte.Parent_Account_Key_Lookup = child.Child_Account_Key_Lookup and cte.Hierarchy_Level < 2)
SELECT * FROM cte_Recursive

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Insert only unique records in SQL - sql

I changed it to this instead left join list_member on list_member.list_id = 42 and list_member.list_decimal_value = security.security_id Sorry for the trouble.

Related

sql SERVER - distinct selection based on priority columns

Looping through groups of records

In SQL replace null value with another value

how to insert multiple rows with check for duplicate rows in a short way

Account Hierarchy ? Parent account has children in which child could become a parent

Categories

Resources