Search list of values including range in SQL using WHERE IN clause with SQL variable? - sql

I am trying to implement search functionality with list of values in SQL variable, including range. Appreciate any guidance/links pointing to correct approach for this.
Below is the dataset:
CREATE TABLE [dbo].[Books]
(
[ID] [NCHAR](10) NOT NULL,
[AUTHCODE] [NCHAR](10) NULL,
[TITLE] [NCHAR](10) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[Books] ([ID], [AUTHCODE], [TITLE])
VALUES (N'1', N'nk', N'Book1'),
(N'2', N'an', N'Book2'),
(N'3', N'mn', N'Book3'),
(N'4', N'ra', N'Book4'),
(N'5', N'kd', N'Book5'),
(N'6', N'nk', N'Book6'),
(N'7', N'an', N'Book7'),
(N'8', N'ra', N'Book8'),
(N'9', N'kd', N'Book9'),
(N'10', N'mn', N'Book10 ')
GO
Below I am trying to filter using the SQL IN clause but this does not return desired result.
select * from books
declare #List1 varchar(max) = '2,4,6,7,8,9' --simple list
select *
from books
where id in (#List1)
declare #List2 varchar(max) = '2,4-7,9' --list with range
select *
from books
where id in (#List2)

You cannot directly use strings as lists, but you can do use STRING_SPLIT (Transact-SQL) if you really need to pass filtering parameters as strings:
declare #list varchar(max) = '2,4,6-8,9'
declare #filter table (id1 int, id2 int)
insert into #filter (id1,id2)
select
case when b.pos > 0 then left(a.[value], pos - 1) else a.[value] end as id1,
case when b.pos > 0 then right(a.[value], len(a.[value]) - pos) else a.[value] end as id2
from string_split(#list, ',') as a
cross apply (select charindex('-', a.[value]) as pos) as b
select *
from [dbo].[Books] as b
where
exists (select * from #filter as tt where b.id between tt.id1 and tt.id2)
Also it might be an idea to pass your filter as json and OPENJSON (Transact-SQL) so you can make parsing part simplier:
declare #list varchar(max) = '[2,4,[6,8],9]'
select
case when a.[type] = 4 then json_value(a.[value], '$[0]') else a.[value] end,
case when a.[type] = 4 then json_value(a.[value], '$[1]') else a.[value] end
from openjson(#list) as a
All above, of course, only applicable if you have Sql Server 2016 or higher

IN() operator determines whether a specified value matches any value in a subquery or a list. not a string and that what you are doing.
What you are trying to do can be done as
DECLARE #List1 AS TABLE (ID INT);
INSERT INTO #List1
SELECT 2 UNION SELECT 4 UNION SELECT 6 UNION
SELECT 7 UNION SELECT 8 UNION SELECT 9;
SELECT *
FROM Books
WHERE ID IN (SELECT ID FROM #List1);
DECLARE #List2 As TABLE (AFrom INT, ATo INT);
INSERT INTO #List2
SELECT 2, 4 UNION SELECT 7, 9;
SELECT *
FROM Books B CROSS APPLY #List2 L
WHERE B.ID BETWEEN L.AFrom AND L.ATo;
Live Demo

Related

How to traverse a path in a table with id & parentId?

Suppose I have a table like:
id | parentId | name
1 NULL A
2 1 B
3 2 C
4 1 E
5 3 E
I am trying to write a scalar function I can call as:
SELECT dbo.GetId('A/B/C/E') which would produce "5" if we use the above reference table. The function would do the following steps:
Find the ID of 'A' which is 1
Find the ID of 'B' whose parent is 'A' (id:1) which would be id:2
Find the ID of 'C' whose parent is 'B' (id:2) which would be id:3
Find the ID of 'E' whose parent is 'C' (id:3) which would be id:5
I was trying to do it with a WHILE loop but it was getting very complicated very fast... Just thinking there must be a simple way to do this.
CTE version is not optimized way to get the hierarchical data. (Refer MSDN Blog)
You should do something like as mentioned below. It's tested for 10 millions of records and is 300 times faster than CTE version :)
Declare #table table(Id int, ParentId int, Name varchar(10))
insert into #table values(1,NULL,'A')
insert into #table values(2,1,'B')
insert into #table values(3,2,'C')
insert into #table values(4,1,'E')
insert into #table values(5,3,'E')
DECLARE #Counter tinyint = 0;
IF OBJECT_ID('TEMPDB..#ITEM') IS NOT NULL
DROP TABLE #ITEM
CREATE TABLE #ITEM
(
ID int not null
,ParentID int
,Name VARCHAR(MAX)
,lvl int not null
,RootID int not null
)
INSERT INTO #ITEM
(ID,lvl,ParentID,Name,RootID)
SELECT Id
,0 AS LVL
,ParentId
,Name
,Id AS RootID
FROM
#table
WHERE
ISNULL(ParentId,-1) = -1
WHILE ##ROWCOUNT > 0
BEGIN
SET #Counter += 1
insert into #ITEM(ID,ParentId,Name,lvl,RootID)
SELECT ci.ID
,ci.ParentId
,ci.Name
,#Counter as cntr
,ch.RootID
FROM
#table AS ci
INNER JOIN
#ITEM AS pr
ON
CI.ParentId=PR.ID
LEFT OUTER JOIN
#ITEM AS ch
ON ch.ID=pr.ID
WHERE
ISNULL(ci.ParentId, -1) > 0
AND PR.lvl = #Counter - 1
END
select * from #ITEM
Here is an example of functional rcte based on your sample data and requirements as I understand them.
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
id int
, parentId int
, name char(1)
)
insert #Something
select 1, NULL, 'A' union all
select 2, 1, 'B' union all
select 3, 2, 'C' union all
select 4, 1, 'E' union all
select 5, 3, 'E'
declare #Root char(1) = 'A';
with MyData as
(
select *
from #Something
where name = #Root
union all
select s.*
from #Something s
join MyData d on d.id = s.parentId
)
select *
from MyData
Note that if you change the value of your variable the output will adjust. I would make this an inline table valued function.
I think I have it based on #SeanLange's recommendation to use a recursive CTE (above in the comments):
CREATE FUNCTION GetID
(
#path VARCHAR(MAX)
)
/* TEST:
SELECT dbo.GetID('A/B/C/E')
*/
RETURNS INT
AS
BEGIN
DECLARE #ID INT;
WITH cte AS (
SELECT p.id ,
p.parentId ,
CAST(p.name AS VARCHAR(MAX)) AS name
FROM tblT p
WHERE parentId IS NULL
UNION ALL
SELECT p.id ,
p.parentId ,
CAST(pcte.name + '/' + p.name AS VARCHAR(MAX)) AS name
FROM dbo.tblT p
INNER JOIN cte pcte ON
pcte.id = p.parentId
)
SELECT #ID = id
FROM cte
WHERE name = #path
RETURN #ID
END

In operator matching all rows

I want to return matching all values of csv as the traditional "in" operator matches any of the items present in csv:
SELECT * FROM #MyTable
WHERE [UserID] IN (1,2)
The above query will not serve my purpose as I want to match the rows which have both records for a group. In my case group will by typeid.
Query to populate the table:
DECLARE #MyTable TABLE
(
[TypeID] INT ,
[UserID] INT
)
INSERT INTO #MyTable
SELECT 1 ,
1
UNION
SELECT 1 ,
2
UNION
SELECT 2 ,
1
UNION
SELECT 2 ,
2
UNION
SELECT 2 ,
3
UNION
SELECT 3 ,
1
UNION
SELECT 3 ,
2
UNION
SELECT 3 ,
3
UNION
SELECT 3 ,
4
To query the above table I have input string of userid
DECLARE #UserIDString VARCHAR(256)
Here is my requirement:
When the input is '1,2'; I want typeid 1 as the output as that group has all the records present in csv.
If the input is '1,2,3' ; 2 typeid should be returned as that group has all the values present in csv.
If the input is '1,2,3,4' ; 3 typeid should be returned as that group has all the values present in csv.
EDIT:
Here is the split function to split the csv:
CREATE FUNCTION [dbo].[Split_String]
(
#inputString NVARCHAR(2000) ,
#delimiter NVARCHAR(20) = ' '
)
RETURNS #Strings TABLE
(
[position] INT IDENTITY
PRIMARY KEY ,
[value] NVARCHAR(2000)
)
AS
BEGIN
DECLARE #index INT
SET #index = -1
WHILE ( LEN(#inputString) > 0 )
BEGIN-- Find the first delimiter
SET #index = CHARINDEX(#delimiter, #inputString)
-- No delimiter left?
-- Insert the remaining #inputString and break the loop
IF ( #index = 0 )
AND ( LEN(#inputString) > 0 )
BEGIN
INSERT INTO #Strings
VALUES ( RTRIM(LTRIM(CAST(#inputString AS NVARCHAR(2000))) ))
BREAK
END
-- Found a delimiter
-- Insert left of the delimiter and truncate the #inputString
IF ( #index > 1 )
BEGIN
INSERT INTO #Strings
VALUES ( RTRIM(LTRIM(CAST(LEFT(#inputString, #index - 1) AS NVARCHAR(2000)) ) ))
SET #inputString = RIGHT(#inputString,
( LEN(#inputString) - #index ))
END -- Delimiter is 1st position = no #inputString to insert
ELSE
SET #inputString = CAST(RIGHT(#inputString,
( LEN(#inputString) - #index )) AS NVARCHAR(2000))
END
RETURN
END
GO
Edit:
Thanks #Tab, with further modifications I have come to solution:
DECLARE #InputString VARCHAR(256)
DECLARE #Count VARCHAR(256)
--SET #InputString = '1,2'
DECLARE #DummyTable TABLE
(
[position] INT ,
[value] INT
)
INSERT INTO #DummyTable
( [position] ,
[value]
)
SELECT [position] ,
[value]
FROM [dbo].[Split_String](#InputString, ',')
SELECT #Count = COUNT(1)
FROM #DummyTable
SELECT TypeID
FROM #MyTable
WHERE TypeID NOT IN (
SELECT TypeID
FROM #MyTable T
LEFT OUTER JOIN #DummyTable ss ON t.UserId = ss.Value
WHERE ss.Position IS NULL )
GROUP BY TypeID
HAVING COUNT(TypeID) = #Count
Using your split function, you can do an OUTER JOIN and make sure there are no NULL rows:
SELECT TypeID
FROM #MyTable
WHERE TypeID NOT IN (
SELECT TypeID
FROM #MyTable t
LEFT OUTER JOIN [dbo].[Split_String] (#InputString,',') ss
ON t.UserId=ss.Value
WHERE ss.Position IS NULL
) x
Untested, but I think that should do it.
However, this should return ALL the types that meet the requirement of:
that group has all the records present in csv.
In your question, you seem to imply that only one row should be returned, but why would that be the case if more than one row matches all the values in the csv? And what is the rule for determining which row is returned when there is more than one match?

Convert Comma Delimited String to bigint in SQL Server

I have a varchar string of delimited numbers separated by commas that I want to use in my SQL script but I need to compare with a bigint field in the database. Need to know to convert it:
DECLARE #RegionID varchar(200) = null
SET #RegionID = '853,834,16,467,841,460,495,44,859,457,437,836,864,434,86,838,458,472,832,433,142,154,159,839,831,469,442,275,840,299,446,220,300,225,227,447,301,450,230,837,441,835,302,477,855,411,395,279,303'
SELECT a.ClassAdID, -- 1
a.AdURL, -- 2
a.AdTitle, -- 3
a.ClassAdCatID, -- 4
b.ClassAdCat, -- 5
a.Img1, -- 6
a.AdText, -- 7
a.MemberID, -- 9
a.Viewed, -- 10
c.Domain, -- 11
a.CreateDate -- 12
FROM ClassAd a
INNER JOIN ClassAdCat b ON b.ClassAdCAtID = a.ClassAdCAtID
INNER JOIN Region c ON c.RegionID = a.RegionID
AND a.PostType = 'CPN'
AND DATEDIFF(d, GETDATE(), ExpirationDate) >= 0
AND a.RegionID IN (#RegionID)
AND Viewable = 'Y'
This fails with the following error:
Error converting data type varchar to bigint.
RegionID In the database is a bigint field.. need to convert the varchar to bigint.. any ideas..?
Many thanks in advance,
neojakey
create this function:
CREATE function [dbo].[f_split]
(
#param nvarchar(max),
#delimiter char(1)
)
returns #t table (val nvarchar(max), seq int)
as
begin
set #param += #delimiter
;with a as
(
select cast(1 as bigint) f, charindex(#delimiter, #param) t, 1 seq
union all
select t + 1, charindex(#delimiter, #param, t + 1), seq + 1
from a
where charindex(#delimiter, #param, t + 1) > 0
)
insert #t
select substring(#param, f, t - f), seq from a
option (maxrecursion 0)
return
end
change this part:
AND a.RegionID IN (select val from dbo.f_split(#regionID, ','))
Change this for better overall performance:
AND DATEDIFF(d, 0, GETDATE()) <= ExpirationDate
Your query does not know that those are separate values, you can use dynamic sql for this:
DECLARE #RegionID varchar(200) = null
SET #RegionID = '853,834,16,467,841,460,495,44,859,457,437,836,864,434,86,838,458,472,832,433,142,154,159,839,831,469,442,275,840,299,446,220,300,225,227,447,301,450,230,837,441,835,302,477,855,411,395,279,303'
declare #sql nvarchar(Max)
set #sql = 'SELECT a.ClassAdID, -- 1
a.AdURL, -- 2
a.AdTitle, -- 3
a.ClassAdCatID, -- 4
b.ClassAdCat, -- 5
a.Img1, -- 6
a.AdText, -- 7
a.MemberID, -- 9
a.Viewed, -- 10
c.Domain, -- 11
a.CreateDate -- 12
FROM ClassAd a
INNER JOIN ClassAdCat b ON b.ClassAdCAtID = a.ClassAdCAtID
INNER JOIN Region c ON c.RegionID = a.RegionID
AND a.PostType = ''CPN''
AND DATEDIFF(d, GETDATE(), ExpirationDate) >= 0
AND a.RegionID IN ('+#RegionID+')
AND Viewable = ''Y'''
exec sp_executesql #sql
I use this apporach sometimes and find it very good.
It transfors your comma-separated string into an AUX table (called #ARRAY) and then query the main table based on the AUX table:
declare #RegionID varchar(50)
SET #RegionID = '853,834,16,467,841,460,495,44,859,457,437,836,864,434,86,838,458,472,832,433,142,154,159,839,831,469,442,275,840,299,446,220,300,225,227,447,301,450,230,837,441,835,302,477,855,411,395,279,303'
declare #S varchar(20)
if LEN(#RegionID) > 0 SET #RegionID = #RegionID + ','
CREATE TABLE #ARRAY(region_ID VARCHAR(20))
WHILE LEN(#RegionID) > 0 BEGIN
SELECT #S = LTRIM(SUBSTRING(#RegionID, 1, CHARINDEX(',', #RegionID) - 1))
INSERT INTO #ARRAY (region_ID) VALUES (#S)
SELECT #RegionID = SUBSTRING(#RegionID, CHARINDEX(',', #RegionID) + 1, LEN(#RegionID))
END
select * from your_table
where regionID IN (select region_ID from #ARRAY)
It avoids you from ahving to concatenate the query string and then use EXEC to execute it, which I dont think it is a very good approach.
if you need to run the code twice you will need to drop the temp table
I think the answer should be kept simple.
Try using CHARINDEX like this:
DECLARE #RegionID VARCHAR(200) = NULL
SET #RegionID =
'853,834,16,467,841,460,495,44,859,457,437,836,864,434,86,838,458,472,832,433,142,154,159,839,831,469,442,275,840,299,446,220,300,225,227,447,301,450,230,837,441,835,302,477,855,411,395,279,303'
SELECT 1
WHERE Charindex('834', #RegionID) > 0
SELECT 1
WHERE Charindex('999', #RegionID) > 0
When CHARINDEX finds the value in the large string variable, it will return it's position, otherwise it return 0.
Use this as a search tool.
The easiest way to change this query is to replace the IN function with a string function. Here is what I consider the safest approach using LIKE (which is portable among databases):
AND ','+#RegionID+',' like '%,'+cast(a.RegionID as varchar(255))+',%'
Or CHARINDEX:
AND charindex(','+cast(a.RegionID as varchar(255))+',', ','+#RegionID+',') > 0
However, if you are explicitly putting the list in your code, why not use a temporary table?
declare #RegionIds table (RegionId int);
insert into #RegionIds
select 853 union all
select 834 union all
. . .
select 303
Then you can use the table in the IN clause:
AND a.RegionId in (select RegionId from #RegionIds)
or in a JOIN clause.
I like Diego's answer some, but I think my modification is a little better because you are declaring a table variable and not creating an actual table. I know the "in" statement can be a little slow, so I did an inner join since I needed some info from the Company table anyway.
declare #companyIdList varchar(1000)
set #companyIdList = '1,2,3'
if LEN(#companyIdList) > 0 SET #companyIdList = #companyIdList + ','
declare #CompanyIds TABLE (CompanyId bigint)
declare #S varchar(20)
WHILE LEN(#companyIdList) > 0 BEGIN
SELECT #S = LTRIM(SUBSTRING(#companyIdList, 1, CHARINDEX(',', #companyIdList) - 1))
INSERT INTO #CompanyIds (CompanyId) VALUES (#S)
SELECT #companyIdList = SUBSTRING(#companyIdList, CHARINDEX(',', #companyIdList) + 1, LEN(#companyIdList))
END
select d.Id, d.Name, c.Id, c.Name
from [Division] d
inner join [Company] c on d.CompanyId = c.Id
inner join #CompanyIds cids on c.Id = cids.CompanyId

t-sql find specific value with a csv string

I need some on help a SQL Query. I have a column with values stored as comma separated values.
I need to write a query which finds the 3rd delimited item within each value in the column.
Is this possible to do this in a Select statement?
ex: ColumnValue: josh,Reg01,False,a0-t0,22/09/2010
So I will need to get the 3rd value (i.e.) False from the above string.
Yes.
Where #s is your string...
select
SUBSTRING (#s,
CHARINDEX(',',#s,CHARINDEX(',',#s)+1)+1,
CHARINDEX(',',#s,CHARINDEX(',',#s,CHARINDEX(',',#s)+1)+1)
-CHARINDEX(',',#s,CHARINDEX(',',#s)+1)-1)
Or more generically...
;with cte as
(
select 1 as Item, 1 as Start, CHARINDEX(',',#s, 1) as Split
union all
select cte.Item+1, cte.Split+1, nullif(CHARINDEX(',',#s, cte.Split+1),0) as Split
from cte
where cte.Split<>0
)
select SUBSTRING(#s, start,isnull(split,len(#s)+1)-start)
from cte
where Item = 3
Now store your data properly :)
Try this (assuming SQL Server 2005+)
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
SELECT ThirdValue = splitdata
FROM(
SELECT
Rn = ROW_NUMBER() OVER(PARTITION BY ColumnValue ORDER BY (SELECT 1))
,X.ColumnValue
,Y.splitdata
FROM
(
SELECT *,
CAST('<X>'+REPLACE(F.ColumnValue,',','</X><X>')+'</X>' AS XML) AS xmlfilter FROM #t F
)X
CROSS APPLY
(
SELECT fdata.D.value('.','varchar(50)') AS splitdata
FROM X.xmlfilter.nodes('X') as fdata(D)
) Y
)X WHERE X.Rn = 3
//Result
ThirdValue
False
bannana
Also it is not very clear from your question as what version of SQL Server you are using. In case you are using SQL SERVER 2000, you can go ahead with the below approach.
Step 1: Create a number table
CREATE TABLE dbo.Numbers
(
N INT NOT NULL PRIMARY KEY
);
GO
DECLARE #rows AS INT;
SET #rows = 1;
INSERT INTO dbo.Numbers VALUES(1);
WHILE(#rows <= 10000)
BEGIN
INSERT INTO dbo.Numbers SELECT N + #rows FROM dbo.Numbers;
SET #rows = #rows * 2;
END
Step 2: Apply the query below
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
--Declare a table variable to put the identity column and store the indermediate results
DECLARE #tempT TABLE(Id INT IDENTITY,ColumnValue VARCHAR(50),SplitData VARCHAR(50))
-- Insert the records into the table variable
INSERT INTO #tempT
SELECT
ColumnValue
,SUBSTRING(ColumnValue, Numbers.N,CHARINDEX(',', ColumnValue + ',', Numbers.N) - Numbers.N) AS splitdata
FROM #t
JOIN Numbers ON Numbers.N <= DATALENGTH(ColumnValue) + 1
AND SUBSTRING(',' + ColumnValue, Numbers.N, 1) = ','
--Project the filtered records
SELECT ThirdValue = X.splitdata
FROM
--The co-related subquery does the ROW_NUMBER() OVER(PARTITION BY ColumnValue)
(SELECT
Rn = (SELECT COUNT(*)
FROM #tempT t2
WHERE t2.ColumnValue=t1.ColumnValue
AND t2.Id<=t1.Id)
,t1.ColumnValue
,t1.splitdata
FROM #tempT t1)X
WHERE X.Rn =3
-- Result
ThirdValue
False
bannana
Also you can use Master..spt_Values for your number table
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
--Declare a table variable to put the identity column and store the indermediate results
DECLARE #tempT TABLE(Id INT IDENTITY,ColumnValue VARCHAR(50),SplitData VARCHAR(50))
-- Insert the records into the table variable
INSERT INTO #tempT
SELECT
ColumnValue
,SUBSTRING(ColumnValue, Number ,CHARINDEX(',', ColumnValue + ',', Number ) - Number) AS splitdata
FROM #t
JOIN master..spt_values ON Number <= DATALENGTH(ColumnValue) + 1 AND type='P'
AND SUBSTRING(',' + ColumnValue, Number , 1) = ','
--Project the filtered records
SELECT ThirdValue = X.splitdata
FROM
--The co-related subquery does the ROW_NUMBER() OVER(PARTITION BY ColumnValue)
(SELECT
Rn = (SELECT COUNT(*)
FROM #tempT t2
WHERE t2.ColumnValue=t1.ColumnValue
AND t2.Id<=t1.Id)
,t1.ColumnValue
,t1.splitdata
FROM #tempT t1)X
WHERE X.Rn =3
You can read about this from
1) What is the purpose of system table table master..spt_values and what are the meanings of its values?
2) Why (and how) to split column using master..spt_values?
You really need something like String.Split(',')(2) which unfortunately dos not exist in SQL but this may be helpful to you
You can make some test with this solution and the other ones but, I believe that using XML in such situations almost always gives to you best performance and insure less coding:
DECLARE #InPutCSV NVARCHAR(2000)= 'josh,Reg01,False,a0-t0,22/09/2010'
DECLARE #ValueIndexToGet INT=3
DECLARE #XML XML = CAST ('<d>' + REPLACE(#InPutCSV, ',', '</d><d>') + '</d>' AS XML);
WITH CTE(RecordNumber,Value) AS
(
SELECT ROW_NUMBER() OVER(ORDER BY T.v.value('.', 'NVARCHAR(100)') DESC) AS RecordNumber
,T.v.value('.', 'NVARCHAR(100)') AS Value
FROM #XML.nodes('/d') AS T(v)
)
SELECT Value
FROM CTE WHERE RecordNumber=#ValueIndexToGet
I can confirm that it takes 1 seconds to get value from CSV string with 100 000 values.

The most elegant way to generate permutations in SQL server

Given a the following table:
Index | Element
---------------
1 | A
2 | B
3 | C
4 | D
We want to generate all the possible permutations (without repetitions) using the elements.
the final result (skipping some rows) will look like this:
Results
----------
ABCD
ABDC
ACBD
ACDB
ADAC
ADCA
...
DABC
DACB
DBCA
DBAC
DCAB
DCBA
(24 Rows)
How would you do it?
After making some perhaps snarky comments, this problem stuck in my brain all evening, and I eventually came up with the following set-based approach. I believe it definitely qualifies as "elegant", but then I also think it qualifies as "kinda dumb". You make the call.
First, set up some tables:
-- For testing purposes
DROP TABLE Source
DROP TABLE Numbers
DROP TABLE Results
-- Add as many rows as need be processed--though note that you get N! (number of rows, factorial) results,
-- and that gets big fast. The Identity column must start at 1, or the algorithm will have to be adjusted.
-- Element could be more than char(1), though the algorithm would have to be adjusted again, and each element
-- must be the same length.
CREATE TABLE Source
(
SourceId int not null identity(1,1)
,Element char(1) not null
)
INSERT Source (Element) values ('A')
INSERT Source (Element) values ('B')
INSERT Source (Element) values ('C')
INSERT Source (Element) values ('D')
--INSERT Source (Element) values ('E')
--INSERT Source (Element) values ('F')
-- This is a standard Tally table (or "table of numbers")
-- It only needs to be as long as there are elements in table Source
CREATE TABLE Numbers (Number int not null)
INSERT Numbers (Number) values (1)
INSERT Numbers (Number) values (2)
INSERT Numbers (Number) values (3)
INSERT Numbers (Number) values (4)
INSERT Numbers (Number) values (5)
INSERT Numbers (Number) values (6)
INSERT Numbers (Number) values (7)
INSERT Numbers (Number) values (8)
INSERT Numbers (Number) values (9)
INSERT Numbers (Number) values (10)
-- Results are iteratively built here. This could be a temp table. An index on "Length" might make runs
-- faster for large sets. Combo must be at least as long as there are characters to be permuted.
CREATE TABLE Results
(
Combo varchar(10) not null
,Length int not null
)
Here's the routine:
SET NOCOUNT on
DECLARE
#Loop int
,#MaxLoop int
-- How many elements there are to process
SELECT #MaxLoop = max(SourceId)
from Source
-- Initialize first value
TRUNCATE TABLE Results
INSERT Results (Combo, Length)
select Element, 1
from Source
where SourceId = 1
SET #Loop = 2
-- Iterate to add each element after the first
WHILE #Loop <= #MaxLoop
BEGIN
-- See comments below. Note that the "distinct" remove duplicates, if a given value
-- is to be included more than once
INSERT Results (Combo, Length)
select distinct
left(re.Combo, #Loop - nm.Number)
+ so.Element
+ right(re.Combo, nm.Number - 1)
,#Loop
from Results re
inner join Numbers nm
on nm.Number <= #Loop
inner join Source so
on so.SourceId = #Loop
where re.Length = #Loop - 1
-- For performance, add this in if sets will be large
--DELETE Results
-- where Length <> #Loop
SET #Loop = #Loop + 1
END
-- Show results
SELECT *
from Results
where Length = #MaxLoop
order by Combo
The general idea is: when adding a new element (say "B") to any string (say, "A"), to catch all permutations you would add B
to all possible positions (Ba, aB), resulting in a new set of strings. Then iterate: Add a new element (C) to each position in a string
(AB becomes Cab, aCb, abC), for all strings (Cba, bCa, baC), and you have the set of permutations. Iterate over each result set with
the next character until you run out of characters... or resources. 10 elements is 3.6 million permutations, roughly 48MB with the above algorithm, and 14 (unique) elements would hit 87 billion permutations and 1.163 terabytes.
I'm sure it could eventually be wedged into a CTE, but in the end all that would be is a glorified loop. The logic
is clearer this way, and I can't help but think the CTE execution plan would be a nightmare.
DECLARE #s VARCHAR(5);
SET #s = 'ABCDE';
WITH Subsets AS (
SELECT CAST(SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST('.'+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS Permutation,
CAST(1 AS INT) AS Iteration
FROM dbo.Numbers WHERE Number BETWEEN 1 AND 5
UNION ALL
SELECT CAST(Token+SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST(Permutation+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS
Permutation,
s.Iteration + 1 AS Iteration
FROM Subsets s JOIN dbo.Numbers n ON s.Permutation NOT LIKE
'%.'+CAST(Number AS CHAR(1))+'.%' AND s.Iteration < 5 AND Number
BETWEEN 1 AND 5
--AND s.Iteration = (SELECT MAX(Iteration) FROM Subsets)
)
SELECT * FROM Subsets
WHERE Iteration = 5
ORDER BY Permutation
Token Permutation Iteration
----- ----------- -----------
ABCDE .1.2.3.4.5. 5
ABCED .1.2.3.5.4. 5
ABDCE .1.2.4.3.5. 5
(snip)
EDBCA .5.4.2.3.1. 5
EDCAB .5.4.3.1.2. 5
EDCBA .5.4.3.2.1. 5
first posted a while ago here
However, it would be better to do it in a better language such as C# or C++.
Just using SQL, without any code, you could do it if you can crowbar yourself another column into the table. Clearly you need to have one joined table for each of the values to be permuted.
with llb as (
select 'A' as col,1 as cnt union
select 'B' as col,3 as cnt union
select 'C' as col,9 as cnt union
select 'D' as col,27 as cnt
)
select a1.col,a2.col,a3.col,a4.col
from llb a1
cross join llb a2
cross join llb a3
cross join llb a4
where a1.cnt + a2.cnt + a3.cnt + a4.cnt = 40
Am I correctly understanding that you built Cartesian product n x n x n x n, and then filter out unwanted stuff? The alternative would be generating all the numbers up to n! and then using factorial number system to map them via element encoding.
Simpler than a recursive CTE:
declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
select a.Element, b.Element, c.Element, d.Element
from #Number a
join #Number b on b.Element not in (a.Element)
join #Number c on c.Element not in (a.Element, b.Element)
join #Number d on d.Element not in (a.Element, b.Element, c.Element)
order by 1, 2, 3, 4
For an arbitrary number of elements, script it out:
if object_id('tempdb..#number') is not null drop table #number
create table #number (Element char(1), Id int, Alias as '_'+convert(varchar,Id))
insert #number values ('A', 1)
insert #number values ('B', 2)
insert #number values ('C', 3)
insert #number values ('D', 4)
insert #number values ('E', 5)
declare #sql nvarchar(max)
set #sql = '
select '+stuff((
select char(13)+char(10)+'+'+Alias+'.Element'
from #number order by Id for xml path (''), type
).value('.','NVARCHAR(MAX)'),3,1,' ')
set #sql += '
from #number '+(select top 1 Alias from #number order by Id)
set #sql += (
select char(13)+char(10)+'join #number '+Alias+' on '+Alias+'.Id not in ('
+stuff((
select ', '+Alias+'.Id'
from #number b where a.Id > b.Id
order by Id for xml path ('')
),1,2,'')
+ ')'
from #number a where Id > (select min(Id) from #number)
order by Element for xml path (''), type
).value('.','NVARCHAR(MAX)')
set #sql += '
order by 1'
print #sql
exec (#sql)
To generate this:
select
_1.Element
+_2.Element
+_3.Element
+_4.Element
+_5.Element
from #number _1
join #number _2 on _2.Id not in (_1.Id)
join #number _3 on _3.Id not in (_1.Id, _2.Id)
join #number _4 on _4.Id not in (_1.Id, _2.Id, _3.Id)
join #number _5 on _5.Id not in (_1.Id, _2.Id, _3.Id, _4.Id)
order by 1
This method uses a binary mask to select the correct rows:
;with src(t,n,p) as (
select element, index, power(2,index-1)
from table
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,4)-1
My original post:
declare #t varchar(4) = 'ABCD'
;with src(t,n,p) as (
select substring(#t,1,1),1,power(2,0)
union all
select substring(#t,n+1,1),n+1,power(2,n)
from src
where n < len(#t)
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,len(#t))-1
This is one of those problems that haunts you. I liked the simplicity of my original answer but there was this issue where I was still building all the possible solutions and then selecting the correct ones. One more try to make this process more efficient by only building the solutions that were correct yielded this answer. Add a character to the string only if that character didn't exist in the string. Patindex seemed like the perfect companion for a CTE solution. Here it is.
declare #t varchar(10) = 'ABCDEFGHIJ'
;with s(t,n) as (
select substring(#t,1,1),1
union all
select substring(#t,n+1,1),n+1
from s where n<len(#t)
)
,j(t) as (
select cast(t as varchar(10)) from s
union all
select cast(j.t+s.t as varchar(10))
from j,s where patindex('%'+s.t+'%',j.t)=0
)
select t from j where len(t)=len(#t)
I was able to build all 3.6 million solutions in 3 minutes and 2 seconds. Hopefully this solution will not get missed just because it's not the first.
Current solution using a recursive CTE.
-- The base elements
Declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
-- Number of elements
Declare #ElementsNumber int
Select #ElementsNumber = COUNT(*)
From #Number;
-- Permute!
With Permutations( Permutation, -- The permutation generated
Ids, -- Which elements where used in the permutation
Depth ) -- The permutation length
As
(
Select Element,
Id + ';',
Depth = 1
From #Number
Union All
Select Permutation + ' ' + Element,
Ids + Id + ';',
Depth = Depth + 1
From Permutations,
#Number
Where Depth < #ElementsNumber And -- Generate only the required permutation number
Ids Not like '%' + Id + ';%' -- Do not repeat elements in the permutation (this is the reason why we need the 'Ids' column)
)
Select Permutation
From Permutations
Where Depth = #ElementsNumber
Assuming your table is named Elements and has 4 rows, this is as simple as:
select e1.Element + e2.Element + e3.Element + e4.Element
from Elements e1
join Elements e2 on e2.Element != e1.Element
join Elements e3 on e3.Element != e2.Element AND e3.Element != e1.Element
join Elements e4 on e4.Element != e3.Element AND e4.Element != e2.Element AND e4.Element != e1.Element
Way too much rust on my SQL skills, but i took a different tack for a similar problem and thought it worth sharing.
Table1 - X strings in a single field Uno
Table2 - Y strings in a single field Dos
(SELECT Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1)
UNION
(SELECT Dos, Uno
FROM Table1
CROSS JOIN Table2 ON 1=1)
Same principle for 3 tables with an added CROSS JOIN
(SELECT Tres, Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1
CROSS JOIN Table3 ON 1=1)
although it takes 6 cross-join sets in the union.
--Hopefully this is a quick solution, just change the values going into #X
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)='select * from #X X1 ', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pSQL = concat(#pSQL,' cross join #X X', #pC+1);
set #pC = #pC +1;
end
execute(#pSQL)
--or as single column result
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)=' as R from #X X1 ',#pSelect NVarChar(Max)=' ',#pJoin NVarChar(Max)='', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pJoin = concat(#pJoin ,' cross join #X X', #pC+1) set #pSelect = concat(#pSelect ,'+ X', #pC+1,'.Opt ')
set #pC = #pC +1;
end
set #pSQL = concat ('select X1.Opt', #pSelect,#pSQL ,#pJoin)
exec(#pSQL)
create function GeneratePermutations (#string nvarchar(4000))
RETURNS #Permutations
TABLE(
name nVARCHAR(500)
)
AS
begin
declare #SplitedString table(name nvarchar(500))
insert into #SplitedString
select *
from string_split(#string,' ')
declare #CountOfWords as int
set #CountOfWords = (select count(*) from #SplitedString)
;with cte_Permutations (name, level) as (
select convert(nvarchar(500), name), 1 as level from #SplitedString
union all
select convert(nvarchar(500),splited.name+','+cte_Permutations.name),level+1
from #SplitedString splited ,cte_Permutations
where level < #CountOfWords
)
insert into #Permutations
select name
from cte_Permutations
where level = #CountOfWords
order by name
return
end
select *
From (
select 1 id,'a b c' msg
union all
select 2 id,'d e' msg
) p
cross apply dbo.GeneratePermutations(p.msg)