sql SERVER - distinct selection based on priority columns - sql

hello I would like to find a solution to solve my problem in a single request if possible.
For the moment I take all the records then I go through the lines one by one to eliminate what I don't want.
I have 2 tables : first table with links
the second with the prefered label for the url
the second table must be consulted keeping only the row with maximum priority
priority rules are
the current user then
the user group and finally
everyone.
if the hidden column is true, exclude any reference to the url
here is the expected result.
Unfortunately, I don't see any other solution than to multiply the conditions on several selects and unions.
if you have a idea to solve my problem, thank you in advance for your help

It appears as though you can rely on pref_id for the preference ordering, correct? If so, you could try:
SELECT *
FROM table2
INNER JOIN table1 ON table2.url_id = table1.url_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY table1.url
ORDER BY pref_id ASC
) = 1
This will partition by the url and then provide only the one with lowest pref_id.
I didn't test this SQL as I wasn't sure which RDBMS you're running on, but I used Rasgo to translate the SQL.

maybe of interest in this tricky query:
select so.*, table1.url from
(select distinct t.url_id,
(select pref_id from table2 s where s.url_id = t.url_id order by "user" is null, "group" is null limit 1) pref_id
from table2 t
where not exists(select 1 from table2 s where s.hide and s.url_id = t.url_id)
) ids
join table2 so on so.pref_id = ids.pref_id
join table1 ON table1.url_id = ids.url_id
order by so.url_id;

here is my solution but i think there is better to do.
in the condition's select, I built a column which gives a level note according to the priorities
DECLARE #CUR_USER VARCHAR(10) = 'ROBERT'
DECLARE #CUR_GROUP VARCHAR(10) = 'DEV'
DECLARE #TABLE1 TABLE (
URL_ID INT
,URLNAME VARCHAR(100)
);
DECLARE #TABLE2 TABLE (
PREF_ID INT
,URL_ID INT
,FAVORITE_LABEL VARCHAR(100)
,USER_GROUP VARCHAR(10)
,USER_CODE VARCHAR(10)
,HIDE_URL DECIMAL(1, 0) DEFAULT 0
);
INSERT INTO #TABLE1
VALUES
(1, 'https://stackoverflow.com/')
,(2, 'https://www.microsoft.com/')
,(3, 'https://www.apple.com/')
,(4, 'https://www.wikipedia.org/')
;
INSERT INTO #TABLE2
VALUES
(1000, 1, 'find everything', NULL, 'ROBERT', 0)
,(1001, 1, 'a question ? find the answer here', 'DEV', NULL, 0)
,(1002, 1, 'StackOverFlow', NULL, NULL, 0)
,(1003, 2, 'Windows', 'DEV', NULL, 0)
,(1004, 2, 'Microsoft', NULL, NULL, 0)
,(1005, 3, 'Apple', NULL, NULL, 0)
,(1006, 4, 'Free encyclopedia', NULL, 'ROBERT', 1)
,(1007, 4, 'Wikipedia', NULL, NULL, 0)
,(1008, 1, 'StackOverFlow FOR MAT', 'MAT', NULL, 0)
,(1009, 2, 'Microsoft FOR MAT', 'MAT', NULL, 0)
,(1010, 3, 'Apple', 'MAT', NULL, 1)
,(1011, 4, 'Wikipedia FOR MAT', 'MAT', NULL, 0)
,(1012, 1, 'StackOverFlow', NULL, 'JEAN', 1)
,(1013, 2, 'Microsoft ', NULL, 'JEAN', 0)
,(1014, 3, 'Apple', NULL, 'JEAN', 0)
,(1015, 4, 'great encyclopedia', NULL, 'JEAN', 0)
;
SELECT t2.* ,t1.URLName
FROM #TABLE1 t1
INNER JOIN #TABLE2 t2 ON t1.URL_ID = t2.URL_ID
WHERE EXISTS (
SELECT 1
FROM (
SELECT TOP (1) test.PREF_ID
,CASE
-- if I do not comment this case: jean from the MAT group will not see apple
-- WHEN Hide_Url = 1
-- THEN 3
WHEN USER_code IS NOT NULL
THEN 2
WHEN USER_GROUP IS NOT NULL
THEN 1
ELSE 0
END AS ROW_LEVEL
FROM #TABLE2 test
WHERE (
(
test.USER_GROUP IS NULL
AND test.user_group IS NULL
AND test.USER_code IS NULL
)
OR (test.USER_GROUP = #CUR_GROUP)
OR (test.USER_code = #CUR_USER)
)
AND t2.URL_ID = test.URL_ID
ORDER BY ROW_LEVEL DESC
) test
WHERE test.PREF_ID = t2.PREF_ID
AND Hide_Url = 0
)

Simply use an ORDER BY clause that puts the preferred row first. You can use this in the window function ROW_NUMBER and work with this or use a lateral top(1) join with CROSS APPLY.
select *
from urls
cross apply
(
select top(1) *
from labels
where labels.url_id = urls.url_id
where [Group] is not null or [user] is not null or hide is not null
order by
case when [Group] is null then 2 else 1 end,
case when [user] is null then 2 else 1 end,
case when hide is null then 2 else 1 end
) top_labels
order by urls.url_id;

Related

How to Use Exists in self join

I want those Id whose Orgorder never equal to 1.
CREATE TABLE [dbo].[TEST](
[ORGORDER] [int] NULL,
[Id] [int] NOT NULL,
[ORGTYPE] [varchar](30) NULL,
ORGID INT NULL,
[LEAD] [decimal](19, 2) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE] ,ORGID, [LEAD]) VALUES (1, 100, N'ABC',1, NULL)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 100, N'ABC',2, 0)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 100, N'ACD',1, NULL)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (0, 101, N'ABC',0, 0)
GO
INSERT [dbo].[TEST] ([ORGORDER], [Id], [ORGTYPE],ORGID, [LEAD]) VALUES (2, 101, N'ABC',4, NULL)
GO
I am using exists but getting my result.
Expected result is -
ID
101
You can do this with one pass of the data, and order all ORGORDER = 1 first, then if it's the first row and it has the ORGORDER value you want to exclude, you can just ignore it.
;WITH x AS
(
SELECT Id, rn = ROW_NUMBER() OVER
(PARTITION BY Id ORDER BY CASE WHEN ORGORDER = 1 THEN 1 ELSE 2 END)
FROM dbo.TEST
)
SELECT Id FROM x WHERE rn = 1 AND ORGORDER <> 1;
Example db<>fiddle
Use a subquery in a NOT EXISTS clause, linking the subquery table to the outer query table by ID:
SELECT DISTINCT T1.ID
FROM dbo.TEST AS T1
WHERE NOT EXISTS (
SELECT *
FROM dbo.TEST AS T2
WHERE T1.ID = T2.ID
AND T2.ORGORDER = 1
)
db<>fiddle
An option would be using an aggregation with a suitable HAVING clause such as
SELECT [Id]
FROM [dbo].[TEST]
GROUP BY [Id]
HAVING SUM(CASE WHEN [ORGORDER] = 1 THEN 1 ELSE 0 END) = 0
where if there's at least one value equals to 1 for the concerned column([ORGORDER]), then that [Id] column won't be listed as result.
Demo

SQL Server SELECT first occurrence OR if no occurrence SELECT other criteria

I am having an issue trying to form the proper SQL query for the job here. I have two tables, one is called CUSTOMER and the other is called CUSTOMER_CONTACT. To simplify this, I will only include the relevant column names.
CUSTOMER columns: ID, CUSTOMERNAME
CUSTOMER_CONTACT columns: ID, CUSTOMER_ID, CONTACT_VC, EMAIL
CUSTOMER_ID is the foreign key to link to the CUSTOMER table from CUSTOMER_CONTACT. CONTACT_VC is just the entry number for their contact information. There could be multiple CUSTOMER_CONTACT records for each customer, but they will have a unique CONTACT_VC.
EMAIL can be null/blank on some or all as well.
I need to select the first CUSTOMER_CONTACT entry where EMAIL is NOT NULL/blank but if none of the CUSTOMER_CONTACT entries have an email address, then select CUSTOMER_CONTACT WHERE CONTACT_VC = 1
Any suggestions on how to accomplish this?
The following approach uses ROW_NUMBER to retrieve a number based on your ordering logic within each CUSTOMER_ID group, then filters by the first record retrieved.
You may try the following:
SELECT
*
FROM (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY CUSTOMER_ID
ORDER BY (CASE WHEN EMAIL IS NOT NULL THEN 0 ELSE 1 END),CONTACT_VC
) as rn
FROM
CUSTOMER_CONTACT
) t
WHERE rn=1
If you would like to join this to the customer table you may use the above query as a subquery eg
SELECT
c.*,
contact.*
FROM
CUSTOMER c
INNER JOIN (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY CUSTOMER_ID
ORDER BY (CASE WHEN EMAIL IS NOT NULL THEN 0 ELSE 1 END),CONTACT_VC
) as rn
FROM
CUSTOMER_CONTACT
) contact ON c.ID = contact.CUSTOMER_ID and contact.rn=1
Here is almost the same answer as ggordon, but I used a common table expression and I think the ordering in the subquery portion should go by CONTACT_VS first then by non-NULL email addresses. I created some very simple test data to run this:
DECLARE #CUSTOMER AS TABLE
(
[ID] INT NOT NULL,
[CUSTOMERNAME] VARCHAR(10) NOT NULL
);
INSERT INTO #CUSTOMER
(
[ID],
[CUSTOMERNAME]
)
VALUES
(1, 'Alice'),
(2, 'Bob'),
(3, 'Cathy');
DECLARE #CUSTOMER_CONTACT AS TABLE
(
[ID] INT NOT NULL,
[CUSTOMER_ID] INT NOT NULL,
[CONTACT_VC] INT NOT NULL,
[EMAIL] VARCHAR(40) NULL
);
INSERT INTO #CUSTOMER_CONTACT
(
[ID],
[CUSTOMER_ID],
[CONTACT_VC],
[EMAIL]
)
VALUES
(1, 1, 1, 'alice#email.com'),
(2, 1, 2, 'alice#gmail.com'),
(3, 2, 1, NULL),
(4, 2, 2, 'bob#work.com'),
(5, 3, 1, NULL),
(6, 3, 2, NULL),
(7, 3, 3, NULL);
;WITH [cc]
AS (SELECT [ID],
[CUSTOMER_ID],
[CONTACT_VC],
[EMAIL],
ROW_NUMBER() OVER (PARTITION BY [CUSTOMER_ID]
ORDER BY [CONTACT_VC],
(CASE WHEN [EMAIL] IS NOT NULL THEN
0
ELSE
1
END
)
) AS [rn]
FROM #CUSTOMER_CONTACT)
SELECT [c].[ID], [c].[CUSTOMERNAME], [cc].[ID], [cc].[CUSTOMER_ID], [cc].[CONTACT_VC], [cc].[EMAIL]
FROM #CUSTOMER AS [c]
INNER JOIN [cc]
ON [c].[ID] = [cc].[CUSTOMER_ID]
AND [cc].[rn] = 1;
select * from CUSTOMER_CONTACT where EMAIL IS NOT NULL
union all
select * from CUSTOMER_CONTACT where
(CONTACT_VC=1 and NOT EXISTS (select 1 FROM CUSTOMER_CONTACT where EMAIL IS NOT NUL)
order by CONTACT_VC asc limit 1

Want to compare 4 different columns with the result of CTE

I have created a CTE (common table Expression) as follows:
DECLARE #N VARCHAR(100)
WITH CAT_NAM AS (
SELECT ID, NAME
FROM TABLE1
WHERE YEAR(DATE) = YEAR(GETDATE())
)
SELECT #N = STUFF((
SELECT ','''+ NAME+''''
FROM CAT_NAM
WHERE ID IN (20,23,25,30,37)
FOR XML PATH ('')
),1,1,'')
The result of above CTE is 'A','B','C','D','F'
Now I need to check 4 different columns CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4 in the result of CTE and form it as one column like follow:
Select
case when CAT_NAM_1 in (#N) then CAT_NAM_1
when CAT_NAM_2 in (#N) then CAT_NAM_2
when CAT_NAM_3 in (#N) then CAT_NAM_3
when CAT_NAM_4 in (#N) then CAT_NAM_4
end as CAT
from table2
When I'm trying to do the above getting error please help me to do.
If my approach is wrong help me with right one.
I am not exactly sure what you are trying to do, but if I understand the following script shows one possible technique. I have created some table variables to mimic the data you presented and then wrote a SELECT statement to do what I think you asked (but I am not sure).
DECLARE #TABLE1 AS TABLE (
ID INT NOT NULL,
[NAME] VARCHAR(10) NOT NULL,
[DATE] DATE NOT NULL
);
INSERT INTO #TABLE1(ID,[NAME],[DATE])
VALUES (20, 'A', '2021-01-01'), (23, 'B', '2021-02-01'),
(25, 'C', '2021-03-01'),(30, 'D', '2021-04-01'),
(37, 'E', '2021-05-01'),(40, 'F', '2021-06-01');
DECLARE #TABLE2 AS TABLE (
ID INT NOT NULL,
CAT_NAM_1 VARCHAR(10) NULL,
CAT_NAM_2 VARCHAR(10) NULL,
CAT_NAM_3 VARCHAR(10) NULL,
CAT_NAM_4 VARCHAR(10) NULL
);
INSERT INTO #TABLE2(ID,CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4)
VALUES (1,'A',NULL,NULL,NULL),(2,NULL,'B',NULL,NULL);
;WITH CAT_NAM AS (
SELECT ID, [NAME]
FROM #TABLE1
WHERE YEAR([DATE]) = YEAR(GETDATE())
AND ID IN (20,23,25,30,37,40)
)
SELECT CASE
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_1) THEN CAT_NAM_1
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_2) THEN CAT_NAM_2
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_3) THEN CAT_NAM_3
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_4) THEN CAT_NAM_4
ELSE '?' -- not sure what you want if there is no match
END AS CAT
FROM #TABLE2;
You can do a bit of set-based logic for this
SELECT
ct.NAME
FROM table2 t2
CROSS APPLY (
SELECT v.NAME
FROM (VALUES
(t2.CAT_NAM_1),
(t2.CAT_NAM_2),
(t2.CAT_NAM_3),
(t2.CAT_NAM_4)
) v(NAME)
INTERSECT
SELECT ct.NAM
FROM CAT_NAM ct
WHERE ct.ID IN (20,23,25,30,37)
) ct;

SQL return only distinct IDs from LEFT JOIN

I've inherited some fun SQL and am trying to figure out how to how to eliminate rows with duplicate IDs. Our indexes are stored in a somewhat columnar format and then we pivot all the rows into one with the values as different columns.
The below sample returns three rows of unique data, but the IDs are duplicated. I need just two rows with unique IDs (and the other columns that go along with it). I know I'll be losing some data, but I just need one matching row per ID to the query (first, top, oldest, newest, whatever).
I've tried using DISTINCT, GROUP BY, and ROW_NUMBER, but I keep getting the syntax wrong, or using them in the wrong place.
I'm also open to rewriting the query completely in a way that is reusable as I currently have to generate this on the fly (cardtypes and cardindexes are user defined) and would love to be able to create a stored procedure. Thanks in advance!
declare #cardtypes table ([ID] int, [Name] nvarchar(50))
declare #cards table ([ID] int, [CardTypeID] int, [Name] nvarchar(50))
declare #cardindexes table ([ID] int, [CardID] int, [IndexType] int, [StringVal] nvarchar(255), [DateVal] datetime)
INSERT INTO #cardtypes VALUES (1, 'Funny Cards')
INSERT INTO #cardtypes VALUES (2, 'Sad Cards')
INSERT INTO #cards VALUES (1, 1, 'Bunnies')
INSERT INTO #cards VALUES (2, 1, 'Dogs')
INSERT INTO #cards VALUES (3, 1, 'Cat')
INSERT INTO #cards VALUES (4, 1, 'Cat2')
INSERT INTO #cardindexes VALUES (1, 1, 1, 'Bunnies', null)
INSERT INTO #cardindexes VALUES (2, 1, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (3, 1, 2, null, '2014-09-21')
INSERT INTO #cardindexes VALUES (4, 2, 1, 'Dogs', null)
INSERT INTO #cardindexes VALUES (5, 2, 1, 'playing', null)
INSERT INTO #cardindexes VALUES (6, 2, 1, 'poker', null)
INSERT INTO #cardindexes VALUES (7, 2, 2, null, '2014-09-22')
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal]
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
Edit:
While both solutions are valid, I ended up using the MAX() solution from #popovitsj as it was easier to implement. The issue of data coming from multiple rows doesn't really factor in for me as all rows are essentially part of the same record. I will most likely use both solutions depending on my needs.
Here's my updated query (as it didn't quite match the answer):
SELECT TOP(100)
[ID] = c.[ID],
[Name] = MAX(c.[Name]),
[Keyword] = MAX([colKeyword].[StringVal]),
[DateAdded] = MAX([colDateAdded].[DateVal])
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
GROUP BY c.ID
ORDER BY [DateAdded]
You could use MAX or MIN to 'decide' on what to display for the other columns in the rows that are duplicate.
SELECT ID, MAX(Name), MAX(Keyword), MAX(DateAdded)
(...)
GROUP BY ID;
using row number windowed function along with a CTE will do this pretty well. For example:
;With preResult AS (
SELECT TOP(100)
[ID] = c.[ID],
[Name] = c.[Name],
[Keyword] = [colKeyword].[StringVal],
[DateAdded] = [colDateAdded].[DateVal],
ROW_NUMBER()OVER(PARTITION BY c.ID ORDER BY [colDateAdded].[DateVal]) rn
FROM #cards AS c
LEFT JOIN #cardindexes AS [colKeyword] ON [colKeyword].[CardID] = c.ID AND [colKeyword].[IndexType] = 1
LEFT JOIN #cardindexes AS [colDateAdded] ON [colDateAdded].[CardID] = c.ID AND [colDateAdded].[IndexType] = 2
WHERE [colKeyword].[StringVal] LIKE 'p%' AND c.[CardTypeID] = 1
ORDER BY [DateAdded]
)
SELECT * from preResult WHERE rn = 1

how to insert multiple rows with check for duplicate rows in a short way

I am trying to insert multiple records (~250) in a table (say MyTable) and would like to insert a new row only if it does not exist already.
I am using SQL Server 2008 R2 and got help from other threads like SQL conditional insert if row doesn't already exist.
While I am able to achieve that with following stripped script, I would like to know if there is a better (short) way to do this as I
have to repeat this checking for every row inserted. Since we need to execute this script only once during DB deployment, I am not too much
worried about performance.
INSERT INTO MyTable([Description], [CreatedDate], [CreatedBy], [ModifiedDate], [ModifiedBy], [IsActive], [IsDeleted])
SELECT N'ababab', GETDATE(), 1, NULL, NULL, 1, 0
WHERE NOT EXISTS(SELECT * FROM MyTable WITH (ROWLOCK, HOLDLOCK, UPDLOCK)
WHERE
([InstanceId] IS NULL OR [InstanceId] = 1)
AND [ChannelPartnerId] IS NULL
AND [CreatedBy] = 1)
UNION ALL
SELECT N'xyz', 1, GETDATE(), 1, NULL, NULL, 1, 0
WHERE NOT EXISTS(SELECT * FROM [dbo].[TemplateQualifierCategoryMyTest] WITH (ROWLOCK, HOLDLOCK, UPDLOCK)
WHERE
([InstanceId] IS NULL OR [InstanceId] = 1)
AND [ChannelPartnerId] IS NULL
AND [CreatedBy] = 1)
-- More SELECT statements goes here
You could create a temporary table with your descriptions, then insert them all into the MyTable with a select that will check for rows in the temporary table that is not yet present in your destination, (this trick in implemented by the LEFT OUTER JOIN in conjunction with the IS NULL for the MyTable.Description part in the WHERE-Clause):
DECLARE #Descriptions TABLE ([Description] VARCHAR(200) NOT NULL )
INSERT INTO #Descriptions ( Description )VALUES ( 'ababab' )
INSERT INTO #Descriptions ( Description )VALUES ( 'xyz' )
INSERT INTO dbo.MyTable
( Description ,
CreatedDate ,
CreatedBy ,
ModifiedDate ,
ModifiedBy ,
IsActive ,
IsDeleted
)
SELECT d.Description, GETDATE(), 1, NULL, NULL, 1, 0
FROM #Descriptions d
LEFT OUTER JOIN dbo.MyTable mt ON d.Description = mt.Description
WHERE mt.Description IS NULL