Can I select distinct/group by characters in a field in SQL? - sql

I'd like to run a query which for a given field will count the instances of a particular character.
For example if I had a table called 'Friends' with a 'Names' field containing rows: Edward, James, Mike. I'd get the output:
A 2,
D 2,
E 3,
I 1,
and so on...

The generic answer is that you need to split each name into a table of constituent characters, then count those.
You don't mention which RDBMS you're using. The answer varies a bit by database engine.
For example, this will work on SQL 2005+:
DECLARE #friends TABLE (NAMES VARCHAR(30))
DECLARE #maxLen INT
INSERT #friends (NAMES)
SELECT 'Edward'
UNION SELECT 'James'
UNION SELECT 'Mike'
SELECT #maxLen = MAX(LEN(NAMES)) FROM #friends
;WITH numsCte
AS
(
--dynamic numbers table. If you have a numbers table in your database
--use that instead, as it will be more efficient.
SELECT 1 AS n
UNION ALL
SELECT n+1 FROM numsCte
WHERE n < #maxLen
)
,charCTE
AS
(
--split the string into a dataset
SELECT *
FROM numsCte AS nm
CROSS APPLY (SELECT NAMES, SUBSTRING(NAMES, n, 1) AS splitChar
FROM #friends
) AS st
WHERE splitChar > ''
)
SELECT UPPER(splitChar) AS letter
,COUNT(1) AS cnt
FROM charCTE
GROUP BY splitChar
ORDER BY splitChar
But almost certainly won't work on any other database engines.

One way is to use a temporary table, and populate it in a WHILE loop:
declare #letters table (letter varchar(1))
declare #pos int
set #pos = 1
while 1=1
begin
insert into #letters
select substring(name,#pos,1)
from #names
where len(name) >= #pos
if ##rowcount = 0
break
set #pos = #pos + 1
end
select letter, count(*)
from #letters
group by letter
Another way is to create a list of valid character positions in a temporary table, or as in this example, with a recursive common table expression (CTE):
declare #maxLen int
select #maxLen = max(len(name)) from #names
;WITH CharPositions (i) AS (
select 1
union all
select i+1
from CharPositions
where i < #maxLen
)
select substring(n.name,cp.i,1), count(*)
from #names n
inner join CharPositions cp on cp.i <= len(n.name)
group by substring(n.name,cp.i,1)
I've tested the code samples against this dataset:
declare #names table (name varchar(max))
insert into #names values ('abc')
insert into #names values ('def')
insert into #names values ('def')
insert into #names values ('g')
insert into #names values ('g')
insert into #names values ('g')

Related

Dynamically Create tables and Insert into it from another table with CSV values

Have a Table with the CSV Values in the columns as below
ID Name text
1 SID,DOB 123,12/01/1990
2 City,State,Zip NewYork,NewYork,01234
3 SID,DOB 456,12/21/1990
What is need to get is 2 tables in this scenario as out put with the corresponding values
ID SID DOB
1 123 12/01/1990
3 456 12/21/1990
ID City State Zip
2 NewYork NewYork 01234
Is there any way of achieving it using a Cursor or any other method in SQL server?
There are several ways that this can be done. One way that I would suggest would be to split the data from the comma separated list into multiple rows.
Since you are using SQL Server, you could implement a recursive CTE to split the data, then apply a PIVOT function to create the columns that you want.
;with cte (id, NameItem, Name, textItem, text) as
(
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from yt
union all
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from cte
where Name > ''
and text > ''
)
select id, SID, DOB
into table1
from
(
select id, nameitem, textitem
from cte
where nameitem in ('SID', 'DOB')
) d
pivot
(
max(textitem)
for nameitem in (SID, DOB)
) piv;
See SQL Fiddle with Demo. The recursive version will work great but if you have a large dataset, you could have some performance issues so you could also use a user defined function to split the data:
create FUNCTION [dbo].[Split](#String1 varchar(MAX), #String2 varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (colName varchar(MAX), colValue varchar(max))
as
begin
declare #idx1 int
declare #slice1 varchar(8000)
declare #idx2 int
declare #slice2 varchar(8000)
select #idx1 = 1
if len(#String1)<1 or #String1 is null return
while #idx1 != 0
begin
set #idx1 = charindex(#Delimiter,#String1)
set #idx2 = charindex(#Delimiter,#String2)
if #idx1 !=0
begin
set #slice1 = left(#String1,#idx1 - 1)
set #slice2 = left(#String2,#idx2 - 1)
end
else
begin
set #slice1 = #String1
set #slice2 = #String2
end
if(len(#slice1)>0)
insert into #temptable(colName, colValue) values(#slice1, #slice2)
set #String1 = right(#String1,len(#String1) - #idx1)
set #String2 = right(#String2,len(#String2) - #idx2)
if len(#String1) = 0 break
end
return
end;
Then you can use a CROSS APPLY to get the result for each row:
select id, SID, DOB
into table1
from
(
select t.id,
c.colname,
c.colvalue
from yt t
cross apply dbo.split(t.name, t.text, ',') c
where c.colname in ('SID', 'DOB')
) src
pivot
(
max(colvalue)
for colname in (SID, DOB)
) piv;
See SQL Fiddle with Demo
You'd need to approach this as a multi-step ETL project. I'd probably start with exporting the two types of rows into a couple staging tables. So, for example:
select * from yourtable /* rows that start with a number */
where substring(text,1,1) in
('0','1','2','3','4','5','6','7','8','9')
select * from yourtable /* rows that don't start with a number */
where substring(text,1,1)
not in ('0','1','2','3','4','5','6','7','8','9')
/* or simply this to follow your example explicitly */
select * from yourtable where name like 'sid%'
select * from yourtable where name like 'city%'
Once you get the two types separated then you can split them out with one of the already written split functions found readily out on the interweb.
Aaron Bertrand (who is on here often) has written up a great post on the variety of ways to split comma delimted strings using SQL. Each of the methods are compared and contrasted here.
http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
If your row count is minimal (under 50k let's say) and it's going to be a one time operation than pick the easiest way and don't worry too much about all the performance numbers.
If you have a ton of rows or this is an ETL process that will run all the time then you'll really want to pay attention to that stuff.
A simple solution using cursors to build temporary tables. This has the limitation of making all columns VARCHAR and would be slow for large amounts of data.
--** Set up example data
DECLARE #Source TABLE (ID INT, Name VARCHAR(50), [text] VARCHAR(200));
INSERT INTO #Source
(ID, Name, [text])
VALUES (1, 'SID,DOB', '123,12/01/1990')
, (2, 'City,State,Zip', 'NewYork,NewYork,01234')
, (3, 'SID,DOB', '456,12/21/1990');
--** Declare variables
DECLARE #Name VARCHAR(200) = '';
DECLARE #Text VARCHAR(1000) = '';
DECLARE #SQL VARCHAR(MAX);
--** Set up cursor for the tables
DECLARE cursor_table CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Name
FROM #Source AS s
GROUP BY Name;
OPEN cursor_table
FETCH NEXT FROM cursor_table INTO #Name;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically create a temp table with the specified columns
SET #SQL = 'CREATE TABLE ##Table (' + REPLACE(#Name, ',', ' VARCHAR(50),') + ' VARCHAR(50));';
EXEC(#SQL);
--** Set up cursor to insert the rows
DECLARE row_cursor CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Text
FROM #Source AS s
WHERE Name = #Name;
OPEN row_cursor;
FETCH NEXT FROM row_cursor INTO #Text;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically insert the row
SELECT #SQL = 'INSERT INTO ##Table VALUES (''' + REPLACE(#Text, ',', ''',''') + ''');';
EXEC(#SQL);
FETCH NEXT FROM row_cursor INTO #Text;
END
--** Display the table
SELECT *
FROM ##Table;
--** Housekeeping
CLOSE row_cursor;
DEALLOCATE row_cursor;
DROP TABLE ##Table;
FETCH NEXT FROM cursor_table INTO #Name;
END
CLOSE cursor_table;
DEALLOCATE cursor_table;

t-sql find specific value with a csv string

I need some on help a SQL Query. I have a column with values stored as comma separated values.
I need to write a query which finds the 3rd delimited item within each value in the column.
Is this possible to do this in a Select statement?
ex: ColumnValue: josh,Reg01,False,a0-t0,22/09/2010
So I will need to get the 3rd value (i.e.) False from the above string.
Yes.
Where #s is your string...
select
SUBSTRING (#s,
CHARINDEX(',',#s,CHARINDEX(',',#s)+1)+1,
CHARINDEX(',',#s,CHARINDEX(',',#s,CHARINDEX(',',#s)+1)+1)
-CHARINDEX(',',#s,CHARINDEX(',',#s)+1)-1)
Or more generically...
;with cte as
(
select 1 as Item, 1 as Start, CHARINDEX(',',#s, 1) as Split
union all
select cte.Item+1, cte.Split+1, nullif(CHARINDEX(',',#s, cte.Split+1),0) as Split
from cte
where cte.Split<>0
)
select SUBSTRING(#s, start,isnull(split,len(#s)+1)-start)
from cte
where Item = 3
Now store your data properly :)
Try this (assuming SQL Server 2005+)
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
SELECT ThirdValue = splitdata
FROM(
SELECT
Rn = ROW_NUMBER() OVER(PARTITION BY ColumnValue ORDER BY (SELECT 1))
,X.ColumnValue
,Y.splitdata
FROM
(
SELECT *,
CAST('<X>'+REPLACE(F.ColumnValue,',','</X><X>')+'</X>' AS XML) AS xmlfilter FROM #t F
)X
CROSS APPLY
(
SELECT fdata.D.value('.','varchar(50)') AS splitdata
FROM X.xmlfilter.nodes('X') as fdata(D)
) Y
)X WHERE X.Rn = 3
//Result
ThirdValue
False
bannana
Also it is not very clear from your question as what version of SQL Server you are using. In case you are using SQL SERVER 2000, you can go ahead with the below approach.
Step 1: Create a number table
CREATE TABLE dbo.Numbers
(
N INT NOT NULL PRIMARY KEY
);
GO
DECLARE #rows AS INT;
SET #rows = 1;
INSERT INTO dbo.Numbers VALUES(1);
WHILE(#rows <= 10000)
BEGIN
INSERT INTO dbo.Numbers SELECT N + #rows FROM dbo.Numbers;
SET #rows = #rows * 2;
END
Step 2: Apply the query below
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
--Declare a table variable to put the identity column and store the indermediate results
DECLARE #tempT TABLE(Id INT IDENTITY,ColumnValue VARCHAR(50),SplitData VARCHAR(50))
-- Insert the records into the table variable
INSERT INTO #tempT
SELECT
ColumnValue
,SUBSTRING(ColumnValue, Numbers.N,CHARINDEX(',', ColumnValue + ',', Numbers.N) - Numbers.N) AS splitdata
FROM #t
JOIN Numbers ON Numbers.N <= DATALENGTH(ColumnValue) + 1
AND SUBSTRING(',' + ColumnValue, Numbers.N, 1) = ','
--Project the filtered records
SELECT ThirdValue = X.splitdata
FROM
--The co-related subquery does the ROW_NUMBER() OVER(PARTITION BY ColumnValue)
(SELECT
Rn = (SELECT COUNT(*)
FROM #tempT t2
WHERE t2.ColumnValue=t1.ColumnValue
AND t2.Id<=t1.Id)
,t1.ColumnValue
,t1.splitdata
FROM #tempT t1)X
WHERE X.Rn =3
-- Result
ThirdValue
False
bannana
Also you can use Master..spt_Values for your number table
DECLARE #t TABLE(ColumnValue VARCHAR(50))
INSERT INTO #t(ColumnValue) SELECT 'josh,Reg01,False,a0-t0,22/09/2010'
INSERT INTO #t(ColumnValue) SELECT 'mango,apple,bannana,grapes'
INSERT INTO #t(ColumnValue) SELECT 'stackoverflow'
--Declare a table variable to put the identity column and store the indermediate results
DECLARE #tempT TABLE(Id INT IDENTITY,ColumnValue VARCHAR(50),SplitData VARCHAR(50))
-- Insert the records into the table variable
INSERT INTO #tempT
SELECT
ColumnValue
,SUBSTRING(ColumnValue, Number ,CHARINDEX(',', ColumnValue + ',', Number ) - Number) AS splitdata
FROM #t
JOIN master..spt_values ON Number <= DATALENGTH(ColumnValue) + 1 AND type='P'
AND SUBSTRING(',' + ColumnValue, Number , 1) = ','
--Project the filtered records
SELECT ThirdValue = X.splitdata
FROM
--The co-related subquery does the ROW_NUMBER() OVER(PARTITION BY ColumnValue)
(SELECT
Rn = (SELECT COUNT(*)
FROM #tempT t2
WHERE t2.ColumnValue=t1.ColumnValue
AND t2.Id<=t1.Id)
,t1.ColumnValue
,t1.splitdata
FROM #tempT t1)X
WHERE X.Rn =3
You can read about this from
1) What is the purpose of system table table master..spt_values and what are the meanings of its values?
2) Why (and how) to split column using master..spt_values?
You really need something like String.Split(',')(2) which unfortunately dos not exist in SQL but this may be helpful to you
You can make some test with this solution and the other ones but, I believe that using XML in such situations almost always gives to you best performance and insure less coding:
DECLARE #InPutCSV NVARCHAR(2000)= 'josh,Reg01,False,a0-t0,22/09/2010'
DECLARE #ValueIndexToGet INT=3
DECLARE #XML XML = CAST ('<d>' + REPLACE(#InPutCSV, ',', '</d><d>') + '</d>' AS XML);
WITH CTE(RecordNumber,Value) AS
(
SELECT ROW_NUMBER() OVER(ORDER BY T.v.value('.', 'NVARCHAR(100)') DESC) AS RecordNumber
,T.v.value('.', 'NVARCHAR(100)') AS Value
FROM #XML.nodes('/d') AS T(v)
)
SELECT Value
FROM CTE WHERE RecordNumber=#ValueIndexToGet
I can confirm that it takes 1 seconds to get value from CSV string with 100 000 values.

How to parse String field in SQL Server 2008 if that String is in csv format

I have a string field in which csv row is inserted
'6 33','318011385','3183300153','Z','21.11.2011 13:33:22','51','51','2','0','032425','','','','','8 50318011100 318069332','','21.11.2011','21.11.2011','','0','','','GOT','0','0','0','0','0','0','0','0','0','0','0','21.11.2011','4','','','','','','','','','','','','',''
I need to extract several fields from this csv format using t-sql.
My main approach was to count colons (,) and based on the colon num to parse the data between two colons:
select min(SUBSTRING(field,charindex(''',''',recorddata,charindex(''',''',recorddata)+1)+3,CHARINDEX(''',''',field,charindex(''',''',field,charindex(''',''',field)+1)+3) - (charindex(''',''',field,charindex(''',''',field)+1)+3))) as fld from TBLSYNCEXPORT where SUBSTRING(field,2,CHARINDEX(''',''',field,0)-2) = #type and substring(field,CHARINDEX(''',''',field)+3,3) = #person and SUBSTRING(field,charindex(''',''',field,charindex(''',''',field)+1)+3,CHARINDEX(''',''',field,charindex(''',''',field,charindex(''',''',field)+1)+3) - (charindex(''',''',field,charindex(''',''',field)+1)+3)) > #prev_type
is there a better method that this one?
If you prefer a more clear way, at least for me, you can do something like this:
CREATE TABLE #destination_table(
value varchar(10)
)
DECLARE #position INT
DECLARE #source_string VARCHAR( 1000 )
SELECT #source_string = "'6 33','318011385','3183300153','Z','21.11.2011 13:33:22','51','51','2','0','032425','','','','','8 50318011100 318069332','','21.11.2011','21.11.2011','','0','','','GOT','0','0','0','0','0','0','0','0','0','0','0','21.11.2011','4','','','','','','','','','','','','',''"
SELECT #position = CHARINDEX(',', #source_string )
WHILE #position <> 0
BEGIN
INSERT INTO #destination_table VALUES( LEFT( #source_string, #position-1 ) )
SELECT #source_string = STUFF( #source_string, 1, #position, NULL )
SELECT #position = CHARINDEX(',', #source_string )
END
INSERT INTO #destination_table VALUES( #source_string)
SELECT * FROM #destination_table
-- or select what you need
select value from #destination_table where id = 2
drop table #destination_table
It'll insert the different values in a table and then you can choose the needed values.

SQL Server 2005: How to split a stacked column with unknown number of splits

I am having more issues with splitting stacked columns, and would love some help to complete this last part. I was trying to apply another solution I had, but with no luck.
DB Table:
ID INT,
SN varchar(100),
Types varchar(1000)
Sample:
ID SN Types
1 123 ABC,XYZ,TEST
2 234 RJK,CDF,TTT,UMB,UVX
3 345 OID,XYZ
Desired output:
ID SN Types
1 123 ABC
1 123 XYZ
1 123 TEST
....
here's a cte i have to break up a delimited string
declare #table table (ID int identity(1,1), String varchar(max))
declare #delim varchar(max)
insert into #table values ('abc,def')
insert into #table values ('ghij,klmn,opqrst')
set #delim=','
;with c as
(
select
ID,
--String,
CHARINDEX(#delim,String,1) as Pos,
case when CHARINDEX(#delim,String,1)>0 then SUBSTRING(String,1,CHARINDEX(#delim,String,1)-1) else String end as value,
case when CHARINDEX(#delim,String,1)>0 then SUBSTRING(String,CHARINDEX(#delim,String,1)+1,LEN(String)-CHARINDEX(#delim,String,1)) else '' end as String
from #table
union all
select
ID,
CHARINDEX(#delim,String,1) as Pos,
case when CHARINDEX(#delim,String,1)>0 then SUBSTRING(String,1,CHARINDEX(#delim,String,1)-1) else String end as Value,
case when CHARINDEX(#delim,String,1)>0 then SUBSTRING(String,CHARINDEX(#delim,String,1)+1,LEN(String)-CHARINDEX(#delim,String,1)) else '' end as String
from c
where LEN(String)>0
)
select ID, Value from c
declare #T table(ID int, SN varchar(100), Types varchar(1000))
insert into #T
select 1, 123, 'ABC,XYZ,TEST' union all
select 2, 234, 'RJK,CDF,TTT,UMB,UVX' union all
select 4, 234, 'XXX' union all
select 3, 345, 'OID,XYZ'
;with cte(ID, SN, Types, Rest) as
(
select ID,
SN,
cast(substring(Types+',', 1, charindex(',', Types+',')-1) as varchar(100)),
stuff(Types, 1, charindex(',', Types), '')+','
from #T
where len(Types) > 0
union all
select ID,
SN,
cast(substring(Rest, 1, charindex(',', Rest)-1) as varchar(100)),
stuff(Rest, 1, charindex(',', Rest), '')
from cte
where len(Rest) > 0
)
select ID, SN, Types
from cte
order by ID
I use a recursive CTE to split the string. The third column Types is populated with the first word in the Types column of #T. Stuff will then remove the first word and populate the Rest column that then will contain everything but the first word. After UNION ALL is the recursive part that basically do the exact same thing but it uses the CTE as a source and it uses the rest column to pick the first word. The first word of the rest column is removed with stuff and then ..... well it is recursive so I think I will stop here with the explanation. The recursive part will end when there are no more words left
where len(Rest) > 0.
You will need to use a cursor and a while statement as far as I can tell... Some of these indexes may be off by one, but I think this should get you there...
DECLARE MY_CURSOR Cursor
FOR
SELECT ID, SN, Types
FROM Tbl1
Open My_Cursor
DECLARE #ID int, #SN varchar(100), #types varchar(1000)
Fetch NEXT FROM MY_Cursor INTO #ID, #SN, #types
While (##FETCH_STATUS <> -1)
BEGIN
DECLARE #Pos int
WHILE #Pos < LEN(#types)
BEGIN
DECLARE #type varchar(25)
DECLARE #nextpos int
set #nextpos = CHARINDEX(#types, ',', #pos)
SET #type = SUBSTRING(#types, #pos, #nextpos-#pos)
INSERT INTO tbl2 (ID, SN, type) VALUES (#ID, #SN, #Type)
SET #Pos = #nextpos+1
END
FETCH NEXT FROMMY_CURSOR INTO #VAR1Number, #VAR2DateTime ,#VarLongText
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR

The most elegant way to generate permutations in SQL server

Given a the following table:
Index | Element
---------------
1 | A
2 | B
3 | C
4 | D
We want to generate all the possible permutations (without repetitions) using the elements.
the final result (skipping some rows) will look like this:
Results
----------
ABCD
ABDC
ACBD
ACDB
ADAC
ADCA
...
DABC
DACB
DBCA
DBAC
DCAB
DCBA
(24 Rows)
How would you do it?
After making some perhaps snarky comments, this problem stuck in my brain all evening, and I eventually came up with the following set-based approach. I believe it definitely qualifies as "elegant", but then I also think it qualifies as "kinda dumb". You make the call.
First, set up some tables:
-- For testing purposes
DROP TABLE Source
DROP TABLE Numbers
DROP TABLE Results
-- Add as many rows as need be processed--though note that you get N! (number of rows, factorial) results,
-- and that gets big fast. The Identity column must start at 1, or the algorithm will have to be adjusted.
-- Element could be more than char(1), though the algorithm would have to be adjusted again, and each element
-- must be the same length.
CREATE TABLE Source
(
SourceId int not null identity(1,1)
,Element char(1) not null
)
INSERT Source (Element) values ('A')
INSERT Source (Element) values ('B')
INSERT Source (Element) values ('C')
INSERT Source (Element) values ('D')
--INSERT Source (Element) values ('E')
--INSERT Source (Element) values ('F')
-- This is a standard Tally table (or "table of numbers")
-- It only needs to be as long as there are elements in table Source
CREATE TABLE Numbers (Number int not null)
INSERT Numbers (Number) values (1)
INSERT Numbers (Number) values (2)
INSERT Numbers (Number) values (3)
INSERT Numbers (Number) values (4)
INSERT Numbers (Number) values (5)
INSERT Numbers (Number) values (6)
INSERT Numbers (Number) values (7)
INSERT Numbers (Number) values (8)
INSERT Numbers (Number) values (9)
INSERT Numbers (Number) values (10)
-- Results are iteratively built here. This could be a temp table. An index on "Length" might make runs
-- faster for large sets. Combo must be at least as long as there are characters to be permuted.
CREATE TABLE Results
(
Combo varchar(10) not null
,Length int not null
)
Here's the routine:
SET NOCOUNT on
DECLARE
#Loop int
,#MaxLoop int
-- How many elements there are to process
SELECT #MaxLoop = max(SourceId)
from Source
-- Initialize first value
TRUNCATE TABLE Results
INSERT Results (Combo, Length)
select Element, 1
from Source
where SourceId = 1
SET #Loop = 2
-- Iterate to add each element after the first
WHILE #Loop <= #MaxLoop
BEGIN
-- See comments below. Note that the "distinct" remove duplicates, if a given value
-- is to be included more than once
INSERT Results (Combo, Length)
select distinct
left(re.Combo, #Loop - nm.Number)
+ so.Element
+ right(re.Combo, nm.Number - 1)
,#Loop
from Results re
inner join Numbers nm
on nm.Number <= #Loop
inner join Source so
on so.SourceId = #Loop
where re.Length = #Loop - 1
-- For performance, add this in if sets will be large
--DELETE Results
-- where Length <> #Loop
SET #Loop = #Loop + 1
END
-- Show results
SELECT *
from Results
where Length = #MaxLoop
order by Combo
The general idea is: when adding a new element (say "B") to any string (say, "A"), to catch all permutations you would add B
to all possible positions (Ba, aB), resulting in a new set of strings. Then iterate: Add a new element (C) to each position in a string
(AB becomes Cab, aCb, abC), for all strings (Cba, bCa, baC), and you have the set of permutations. Iterate over each result set with
the next character until you run out of characters... or resources. 10 elements is 3.6 million permutations, roughly 48MB with the above algorithm, and 14 (unique) elements would hit 87 billion permutations and 1.163 terabytes.
I'm sure it could eventually be wedged into a CTE, but in the end all that would be is a glorified loop. The logic
is clearer this way, and I can't help but think the CTE execution plan would be a nightmare.
DECLARE #s VARCHAR(5);
SET #s = 'ABCDE';
WITH Subsets AS (
SELECT CAST(SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST('.'+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS Permutation,
CAST(1 AS INT) AS Iteration
FROM dbo.Numbers WHERE Number BETWEEN 1 AND 5
UNION ALL
SELECT CAST(Token+SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST(Permutation+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS
Permutation,
s.Iteration + 1 AS Iteration
FROM Subsets s JOIN dbo.Numbers n ON s.Permutation NOT LIKE
'%.'+CAST(Number AS CHAR(1))+'.%' AND s.Iteration < 5 AND Number
BETWEEN 1 AND 5
--AND s.Iteration = (SELECT MAX(Iteration) FROM Subsets)
)
SELECT * FROM Subsets
WHERE Iteration = 5
ORDER BY Permutation
Token Permutation Iteration
----- ----------- -----------
ABCDE .1.2.3.4.5. 5
ABCED .1.2.3.5.4. 5
ABDCE .1.2.4.3.5. 5
(snip)
EDBCA .5.4.2.3.1. 5
EDCAB .5.4.3.1.2. 5
EDCBA .5.4.3.2.1. 5
first posted a while ago here
However, it would be better to do it in a better language such as C# or C++.
Just using SQL, without any code, you could do it if you can crowbar yourself another column into the table. Clearly you need to have one joined table for each of the values to be permuted.
with llb as (
select 'A' as col,1 as cnt union
select 'B' as col,3 as cnt union
select 'C' as col,9 as cnt union
select 'D' as col,27 as cnt
)
select a1.col,a2.col,a3.col,a4.col
from llb a1
cross join llb a2
cross join llb a3
cross join llb a4
where a1.cnt + a2.cnt + a3.cnt + a4.cnt = 40
Am I correctly understanding that you built Cartesian product n x n x n x n, and then filter out unwanted stuff? The alternative would be generating all the numbers up to n! and then using factorial number system to map them via element encoding.
Simpler than a recursive CTE:
declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
select a.Element, b.Element, c.Element, d.Element
from #Number a
join #Number b on b.Element not in (a.Element)
join #Number c on c.Element not in (a.Element, b.Element)
join #Number d on d.Element not in (a.Element, b.Element, c.Element)
order by 1, 2, 3, 4
For an arbitrary number of elements, script it out:
if object_id('tempdb..#number') is not null drop table #number
create table #number (Element char(1), Id int, Alias as '_'+convert(varchar,Id))
insert #number values ('A', 1)
insert #number values ('B', 2)
insert #number values ('C', 3)
insert #number values ('D', 4)
insert #number values ('E', 5)
declare #sql nvarchar(max)
set #sql = '
select '+stuff((
select char(13)+char(10)+'+'+Alias+'.Element'
from #number order by Id for xml path (''), type
).value('.','NVARCHAR(MAX)'),3,1,' ')
set #sql += '
from #number '+(select top 1 Alias from #number order by Id)
set #sql += (
select char(13)+char(10)+'join #number '+Alias+' on '+Alias+'.Id not in ('
+stuff((
select ', '+Alias+'.Id'
from #number b where a.Id > b.Id
order by Id for xml path ('')
),1,2,'')
+ ')'
from #number a where Id > (select min(Id) from #number)
order by Element for xml path (''), type
).value('.','NVARCHAR(MAX)')
set #sql += '
order by 1'
print #sql
exec (#sql)
To generate this:
select
_1.Element
+_2.Element
+_3.Element
+_4.Element
+_5.Element
from #number _1
join #number _2 on _2.Id not in (_1.Id)
join #number _3 on _3.Id not in (_1.Id, _2.Id)
join #number _4 on _4.Id not in (_1.Id, _2.Id, _3.Id)
join #number _5 on _5.Id not in (_1.Id, _2.Id, _3.Id, _4.Id)
order by 1
This method uses a binary mask to select the correct rows:
;with src(t,n,p) as (
select element, index, power(2,index-1)
from table
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,4)-1
My original post:
declare #t varchar(4) = 'ABCD'
;with src(t,n,p) as (
select substring(#t,1,1),1,power(2,0)
union all
select substring(#t,n+1,1),n+1,power(2,n)
from src
where n < len(#t)
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,len(#t))-1
This is one of those problems that haunts you. I liked the simplicity of my original answer but there was this issue where I was still building all the possible solutions and then selecting the correct ones. One more try to make this process more efficient by only building the solutions that were correct yielded this answer. Add a character to the string only if that character didn't exist in the string. Patindex seemed like the perfect companion for a CTE solution. Here it is.
declare #t varchar(10) = 'ABCDEFGHIJ'
;with s(t,n) as (
select substring(#t,1,1),1
union all
select substring(#t,n+1,1),n+1
from s where n<len(#t)
)
,j(t) as (
select cast(t as varchar(10)) from s
union all
select cast(j.t+s.t as varchar(10))
from j,s where patindex('%'+s.t+'%',j.t)=0
)
select t from j where len(t)=len(#t)
I was able to build all 3.6 million solutions in 3 minutes and 2 seconds. Hopefully this solution will not get missed just because it's not the first.
Current solution using a recursive CTE.
-- The base elements
Declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
-- Number of elements
Declare #ElementsNumber int
Select #ElementsNumber = COUNT(*)
From #Number;
-- Permute!
With Permutations( Permutation, -- The permutation generated
Ids, -- Which elements where used in the permutation
Depth ) -- The permutation length
As
(
Select Element,
Id + ';',
Depth = 1
From #Number
Union All
Select Permutation + ' ' + Element,
Ids + Id + ';',
Depth = Depth + 1
From Permutations,
#Number
Where Depth < #ElementsNumber And -- Generate only the required permutation number
Ids Not like '%' + Id + ';%' -- Do not repeat elements in the permutation (this is the reason why we need the 'Ids' column)
)
Select Permutation
From Permutations
Where Depth = #ElementsNumber
Assuming your table is named Elements and has 4 rows, this is as simple as:
select e1.Element + e2.Element + e3.Element + e4.Element
from Elements e1
join Elements e2 on e2.Element != e1.Element
join Elements e3 on e3.Element != e2.Element AND e3.Element != e1.Element
join Elements e4 on e4.Element != e3.Element AND e4.Element != e2.Element AND e4.Element != e1.Element
Way too much rust on my SQL skills, but i took a different tack for a similar problem and thought it worth sharing.
Table1 - X strings in a single field Uno
Table2 - Y strings in a single field Dos
(SELECT Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1)
UNION
(SELECT Dos, Uno
FROM Table1
CROSS JOIN Table2 ON 1=1)
Same principle for 3 tables with an added CROSS JOIN
(SELECT Tres, Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1
CROSS JOIN Table3 ON 1=1)
although it takes 6 cross-join sets in the union.
--Hopefully this is a quick solution, just change the values going into #X
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)='select * from #X X1 ', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pSQL = concat(#pSQL,' cross join #X X', #pC+1);
set #pC = #pC +1;
end
execute(#pSQL)
--or as single column result
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)=' as R from #X X1 ',#pSelect NVarChar(Max)=' ',#pJoin NVarChar(Max)='', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pJoin = concat(#pJoin ,' cross join #X X', #pC+1) set #pSelect = concat(#pSelect ,'+ X', #pC+1,'.Opt ')
set #pC = #pC +1;
end
set #pSQL = concat ('select X1.Opt', #pSelect,#pSQL ,#pJoin)
exec(#pSQL)
create function GeneratePermutations (#string nvarchar(4000))
RETURNS #Permutations
TABLE(
name nVARCHAR(500)
)
AS
begin
declare #SplitedString table(name nvarchar(500))
insert into #SplitedString
select *
from string_split(#string,' ')
declare #CountOfWords as int
set #CountOfWords = (select count(*) from #SplitedString)
;with cte_Permutations (name, level) as (
select convert(nvarchar(500), name), 1 as level from #SplitedString
union all
select convert(nvarchar(500),splited.name+','+cte_Permutations.name),level+1
from #SplitedString splited ,cte_Permutations
where level < #CountOfWords
)
insert into #Permutations
select name
from cte_Permutations
where level = #CountOfWords
order by name
return
end
select *
From (
select 1 id,'a b c' msg
union all
select 2 id,'d e' msg
) p
cross apply dbo.GeneratePermutations(p.msg)