How select rows which contains at least one value from other table and is not equal? TSQL - sql

As per subject, I have a problem with query. Problems occurs because table is not normalized. Schema of DB looks like:
Table A Columns:
ID - A,A,B,B,C,D (not unique)
AR - "N10 N12", "N1 N2 N3", "N1"
AR in A table is char(100)
ARID in AWS table is char(6)
The result from subselect is a column, with rows of chars. It can be inserted in a temporary table, but I suppose any other variable type can't store it.
It's simplified example, I want to select all rows where AR contains at least N1. In real case I have a lot of values to find in AR column, from other table.
I tried:
Contains - it will work, but #table variable can't be used with this statement
Inner Join and subquery in query - generally it works, but it's problem, it returns only rows where values are equals. For example, when I'm selecting for N1 it will be returned, but "N1 N2 N3" not and it should be in results.
My actual query:
select *
from A
where AR in (select ARID
From AWS
group by ARID
having count(*)>2)
order by EvaluationTime desc
Thanks and Regards,
B.

Given two tables with the values you've provided (tell me if there are more), you'll first need a way to split the values in A.AR into manageable strings. I use this User Defined Function:
CREATE FUNCTION [dbo].[UDF_StringDelimiter]
/*********************************************************
** Takes Parameter "LIST" and transforms it for use **
** to select individual values or ranges of values. **
** **
** EX: 'This,is,a,test' = 'This' 'Is' 'A' 'Test' **
*********************************************************/
(
#LIST VARCHAR(8000)
,#DELIMITER VARCHAR(255)
)
RETURNS #TABLE TABLE
(
[RowID] INT IDENTITY
,[Value] VARCHAR(255)
)
WITH SCHEMABINDING
AS
BEGIN
DECLARE
#LISTLENGTH AS SMALLINT
,#LISTCURSOR AS SMALLINT
,#VALUE AS VARCHAR(255)
;
SELECT
#LISTLENGTH = LEN(#LIST) - LEN(REPLACE(#LIST,#DELIMITER,'')) + 1
,#LISTCURSOR = 1
,#VALUE = ''
;
WHILE #LISTCURSOR <= #LISTLENGTH
BEGIN
INSERT INTO #TABLE (Value)
SELECT
CASE
WHEN #LISTCURSOR < #LISTLENGTH
THEN SUBSTRING(#LIST,1,PATINDEX('%' + #DELIMITER + '%',#LIST) - 1)
ELSE SUBSTRING(#LIST,1,LEN(#LIST))
END
;
SET #LIST = STUFF(#LIST,1,PATINDEX('%' + #DELIMITER + '%',#LIST),'')
;
SET #LISTCURSOR = #LISTCURSOR + 1
;
END
;
RETURN
;
END
;
Then, with those two tables:
DECLARE #TABLE TABLE (ID CHAR(1), AR VARCHAR(55));
INSERT INTO #TABLE VALUES ('A','N1 N3 N4');
INSERT INTO #TABLE VALUES ('B','N2');
INSERT INTO #TABLE VALUES ('C','N1');
INSERT INTO #TABLE VALUES ('D','N5');
INSERT INTO #TABLE VALUES ('E','N2 N1');
DECLARE #TABLE2 TABLE (RowID INT IDENTITY, ARID VARCHAR(55));
INSERT INTO #TABLE2 (ARID) VALUES ('N1');
Using a CROSS APPLY and LEFT JOIN, I get the responses:
SELECT A.ID AS [A.ID], A.AR AS [A.AR],B.ARID AS [B.ARID]
FROM #TABLE A
CROSS APPLY dbo.UDF_StringDelimiter(A.AR,' ') X
INNER JOIN #TABLE2 B
ON B.ARID = X.Value
ORDER BY ID
;
Returns:
A.ID A.AR B.ARID
-----------------------------------
A N1 N3 N4 N1
C N1 N1
E N2 N1 N1
Of course you could also group this, return it as a delimited list, etc. Give me more details and maybe I can give you a better response.

If you are using 2 tables, CROSS APPLY is your friend.
create table #ARID(id varchar(10), ar varchar(100));
insert into #ARID values
('a', 'n1 n2'),
('a', 'n10 n11'),
('b', 'n1'),
('b', 'n11 n13 15'),
('c', 'n3'),
('c', 'n14 n12');
This is the table that contains values to search:
create table #TO_SEARCH(val varchar(10))
insert into #TO_SEARCH values ('n10'), ('n11');
And this simple query returns each row that contains al least one of these values:
SELECT ca.*
FROM #TO_SEARCH ts
CROSS APPLY (SELECT id, ar
FROM #ARID
WHERE ar LIKE ('%' + ts.val + '%')
) ca
You can invert the tables, the result is the same:
SELECT id, ar
FROM #ARID
CROSS APPLY (SELECT val from #TO_SEARCH) ts
WHERE ar LIKE ('%' + ts.val + '%')
+----+------------+
| id | ar |
+----+------------+
| a | n10 n11 |
+----+------------+
| a | n10 n11 |
+----+------------+
| b | n11 n13 15 |
+----+------------+
Can check it here: http://rextester.com/CUEU92118

Related

T-SQL to get the 5th word in each row in a database [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 2 years ago.
Improve this question
I need to find out how to code T-SQL that gets the 5th word in each row in a database
One option which will ensure sequence is to use a bit of XML
Example
Declare #YourTable table (ID int, SomeCol varchar(100))
Insert into #YourTable Values
(1,'Some text that needs to be parsed')
,(2,'Only three words')
Select A.ID
,Pos5 = cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml).value('/x[5]','varchar(50)')
From #YourTable A
Returns
ID Pos5
1 to
2 NULL
If you are on v2016 or higher there is a trick with JSON, which is faster than XML.
Credits to John Cappelletti for the DDL/INSERT)
Declare #YourTable table (ID int, SomeCol varchar(100))
Insert into #YourTable Values
(1,'Some text that needs to be parsed')
,(2,'Only three words');
SELECT JSON_VALUE(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]'),'$[4]')
FROM #YourTable t
The idea is to transform the string into a JSON array an pick the item by its (zero-based!) position.
UPDATE Performance
There are three different answers now... Here is a comparison of their performance:
SET NOCOUNT ON;
DECLARE #YourTable TABLE (ID INT IDENTITY, SomeCol VARCHAR(100))
DECLARE #cnt INT=0;
--insert 100k rows
--important: each string must be different to avoid biased results due to caching...
WHILE #cnt<100000
BEGIN
INSERT INTO #YourTable SELECT CONCAT('Some text that needs to', #cnt,' be parsed');
SET #cnt += 1;
END
--we will measure each approach's duration
DECLARE #d DATETIME2=SYSUTCDATETIME();
--JSON_VALUE reads from the parsed value directly
SELECT t.ID,JSON_VALUE(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]'),'$[4]') AS fifth
INTO #tbl1
FROM #YourTable t;
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
SET #d=SYSUTCDATETIME();
--In this approach we use OPENJSON and implicit pivoting with a WITH clause
SELECT t.ID,f.fifth
INTO #tbl2
FROM #YourTable t
CROSS APPLY OPENJSON(CONCAT('[["',REPLACE(SomeCol,' ','","'),'"]]'))
WITH (fifth VARCHAR(255) '$[4]') as f
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
SET #d=SYSUTCDATETIME();
--In this approach we use XML to get the 5th element
SELECT t.ID,cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml).value('/x[5]','varchar(50)') AS fifth
INTO #tbl3
FROM #YourTable t
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
GO
--You can check the result
SELECT TOP 100 * FROM #tbl1;
SELECT TOP 100 * FROM #tbl2;
SELECT TOP 100 * FROM #tbl3;
GO
--cleanup
DROP TABLE #tbl1;
GO
DROP TABLE #tbl2;
GO
DROP TABLE #tbl3;
GO
On my system JSON_VALUE (~450ms) is about half of OPENJSON(~900ms) and a 10th(!!! ~4800ms) of XML.
UPDATE 2
Reading more than one vlaue from the string in one go tends to favour the solution with OPENJSON:
DECLARE #d DATETIME2=SYSUTCDATETIME();
SELECT t.ID,JSON_VALUE(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]'),'$[4]') AS fifth
,JSON_VALUE(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]'),'$[1]') AS [second]
,JSON_VALUE(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]'),'$[0]') AS [first]
INTO #tbl1
FROM #YourTable t;
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
SET #d=SYSUTCDATETIME();
SELECT t.ID,f.fifth,f.[second],f.[first]
INTO #tbl2
FROM #YourTable t
CROSS APPLY OPENJSON(CONCAT('[["',REPLACE(SomeCol,' ','","'),'"]]'))
WITH (fifth VARCHAR(255) '$[4]'
,[second] VARCHAR(255) '$[1]'
,[first] VARCHAR(255) '$[0]') as f
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
SET #d=SYSUTCDATETIME();
SELECT t.ID,cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml).value('/x[5]','varchar(50)') AS fifth
,cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml).value('/x[2]','varchar(50)') AS [second]
,cast('<x>' + replace(SomeCol,' ','</x><x>')+'</x>' as xml).value('/x[1]','varchar(50)') AS [first]
INTO #tbl3
FROM #YourTable t
SELECT DATEDIFF(MILLISECOND,#d,SYSUTCDATETIME());
GO
In this case (reading 3 items at once) I get ~1800ms for JSON_VALUE, just ~1100ms for OPENJSON and - again about 10x - 18200ms for the XML approach.
UPDATE 3 - just for fun ;-)
The test in section UPDATE 2 would work better for JSON_VALUE using this:
SELECT t.ID,JSON_VALUE(TheJsonString,'$[4]') AS fifth
,JSON_VALUE(TheJsonString,'$[1]') AS [second]
,JSON_VALUE(TheJsonString,'$[0]') AS [first]
INTO #tbl1
FROM #YourTable t
CROSS APPLY(VALUES(CONCAT('["',REPLACE(t.SomeCol,' ','","'),'"]')))A(TheJsonString);
... thus avoiding repeated string manipulations... In this case it is at the same speed as OPENJSON.
Conclusio
So the final suggestion: Use JSON_VALUE for a single item and OPENJSON for more than one item and XML if you are below v2016.
This is a good question, and requires and highlights a couple of useful features of SQL.
It can be done using a combination of CTE, STRING_SPLIT and CROSS APPLY.
First create the table and insert a couple of rows of data:
create table mywords(r int, w varchar(100));
insert mywords values
(1, 'one two three four five six seven')
,(2, 'W1 W2 W3 W4 W5 W6 W7');
Then the query
;with words as
(
SELECT r,
row_number() over (partition by r order by w) as n,
Value
FROM mywords
CROSS APPLY STRING_SPLIT(w, ' ')
)
select * from words where n=5
The result is:
r n Value
1 5 five
2 5 W5
You can get it using OPENJSON. Thanks to #TomC for create script.
create table #mywords(r int, w varchar(100));
insert #mywords values
(1, 'one two three four five six seven')
,(2, 'W1 W2 W3 W4 W5 W6 W7');
SELECT r, f.*
FROM #mywords
CROSS APPLY OPENJSON(CONCAT('[["',REPLACE(w,' ','","'),'"]]'))
WITH
(fifth VARCHAR(255) '$[4]') as f
+---+-------+
| r | fifth |
+---+-------+
| 1 | one |
| 2 | W1 |
+---+-------+

Updating a json array IN SQL Server table

I have an array of json in a SQL Server column, I am trying to update all names to 'Joe'.
I tried the below code , but it is updating only first element of the json array
CREATE TABLE #t (I INT, JsonColumn NVARCHAR(MAX) CHECK (ISJSON(JsonColumn) > 0))
INSERT INTO #t
VALUES (1, '[{"id":"101","name":"John"}, {"id":"102","name":"peter"}]')
INSERT INTO #t VALUES (2,'[{"id":"103","name":"dave"}, {"id":"104","name":"mark"}]')
SELECT * FROM #t
SELECT * FROM #t
CROSS APPLY OPENJSON(JsonColumn) s
WITH cte AS
(
SELECT *
FROM #t
CROSS APPLY OPENJSON(JsonColumn) s
)
UPDATE cte
SET JsonColumn = JSON_MODIFY(JsonColumn, '$[' + cte.[key] + '].name', 'Joe')
SELECT * FROM #t
-- DROP TABLE #t
It is only updating the first element of array to joe
Current result:
[{"id":"101","name":"Joe"}, {"id":"102","name":"cd"}]
[{"id":"103","name":"Joe"}, {"id":"104","name":"mark"}]
Expected
[{"id":"101","name":"Joe"}, {"id":"102","name":"Joe"}]
[{"id":"103","name":"Joe"}, {"id":"104","name":"Joe"}]
Since you want to do in one transaction, I could not think of any other ways than to create another table and store the values into new table and use for XML path with the value. Problem is you are trying to update JSON array and I am not sure how would you update the same row twice with different value. With cross apply as you have shown it creates two rows and then only you can update it to JOE.
Your query will update name = Joe for ID = 101 for first row, and Name = Joe for ID = 102 based on value column. Since these are on two different rows you are seeing only one change in your temp table.
I created one more #temp2 table to store those values and use XML path to concatenate. The final table will be #t2 table for your expected results.
SELECT *
into #t2
FROM #t
CROSS APPLY OPENJSON(JsonColumn) s
select *, json_value (value, '$.name') from #t2
UPDATE #t2
SET value = JSON_MODIFY(value, '$.name', 'Joe')
select t.I ,
JSONValue = concat('[',stuff((select ',' + value from #t2 t1
where t1.i = t.i
for XML path('')),1,1,''),']')
from #t2 t
group by t.I
Output:
I JSONValue
1 [{"id":"101","name":"Joe"},{"id":"102","name":"Joe"}]
Updating original table:
update t
set t.JsonColumn =t2.JSONValue
from #t t
join (select t.I ,
JSONValue = concat('[',stuff((select ',' + value from #t2 t1
where t1.i = t.i
for XML path('')),1,1,''),']')
from #t2 t
group by t.I ) t2 on t.I = t2.i
I think that it is impossible to apply more updates to one record with one command. So you need to explode JSON array to records.
You can do this with a Temporary or Variable Table and a Cursor.
-- Declare the Variable Table
DECLARE #JsonTable TABLE (
RecordKey UNIQUEIDENTIFIER,
ArrayIndex INT,
ObjKey NVARCHAR(100),
ObjValue NVARCHAR(1000)
);
-- Fill the Variable Table
INSERT INTO #JsonTable
SELECT TB1.pk as RecordKey,
TB1data.[key] AS ArrayIndex,
TB1dataItem.[key] as ObjKey,
TB1dataItem.[value] as ObjValue
FROM MyTable TB1
CROSS APPLY OPENJSON(JSON_QUERY(TB1.data, '$.list')) TB1data
CROSS APPLY OPENJSON(JSON_QUERY(TB1data.value, '$')) TB1dataItem
WHERE TB1dataItem.[key] = 'name'
-- Declare Cursor and relative variables
DECLARE #recordKey UNIQUEIDENTIFIER,
#recordData NVARCHAR(MAX),
#arrayIndex INT,
#objKey NVARCHAR(100),
#objValue NVARCHAR(1000);
DECLARE JsonCursor CURSOR FAST_FORWARD READ_ONLY FOR
SELECT * FROM #JsonTable;
-- Use Cursor to read any json array item
OPEN JsonCursor;
FETCH NEXT
FROM JsonCursor
INTO #recordKey, #arrayIndex, #objKey, #objValue;
WHILE ##FETCH_STATUS = 0 BEGIN
UPDATE TB1
SET data = JSON_MODIFY(
data,
'$.list[' + CAST(#arrayIndex as VARCHAR(20)) + '].name',
'Joe'
)
FROM MyTable TB1
WHERE TB1.pk = #recordKey;
FETCH NEXT
FROM JsonCursor
INTO #recordKey, #arrayIndex, #objKey, #objValue;
END;
CLOSE JsonCursor;
DEALLOCATE JsonCursor;
Do you need this?
CREATE TABLE #t (
I INT,
JsonColumn NVARCHAR(MAX) CHECK (ISJSON(JsonColumn) > 0)
);
INSERT INTO #t
VALUES (1, '[{"id":"101","name":"John"}, {"id":"102","name":"peter"}]');
INSERT INTO #t
VALUES (2, '[{"id":"103","name":"dave"}, {"id":"104","name":"mark"}]');
SELECT CONCAT('[', STRING_AGG(JSON_MODIFY(JSON_MODIFY('{}', '$.id', j.id), '$.name', 'John'), ','), ']')
FROM #t t
CROSS APPLY OPENJSON(JsonColumn) WITH (id INT, name sysname) j
GROUP BY t.I

SQL Server : searching value doesn't have entry in table

I have a table which has the following values:
ID | Name
---------------
1 | Anavaras
2 | Lamurep
I need a query which outputs the value which doesn't have entry in the table.
For e.g:
If my where clause contains id in('1','2','3','4'), should produce output has
3 |
4 |
for the above entries in the table.
You would put this into a "derived table" and use left join or a similar construct:
select v.id
from (values(1), (2), (3), (4)) v(id) left join
t
on t.id = v.id
where t.id is null;
Something like this:
"SELECT id FROM table WHERE name IS NULL"
I'd assume?
First you need to split your in to a table. Sample split function is here:
CREATE FUNCTION [dbo].[split]
(
#str varchar(max),
#sep char
)
RETURNS
#ids TABLE
(
id varchar(20)
)
AS
BEGIN
declare #pos int,#id varchar(20)
while len(#str)>0
begin
select #pos = charindex(#sep,#str + #sep)
select #id = LEFT(#str,#pos),#str = SUBSTRING(#str,#pos+1,10000000)
insert #ids(id) values(#id)
end
RETURN
END
Then you can use this function.
select id from dbo.split('1,2,3,4,5',',') ids
left join myTable t on t.id=ids.id
where t.id is null
-- if table ID is varchar then '''1'',''2'',''3'''

How to split single cell into multiple columns in sql server 2008R2?

I want to split each name for individual columns
create table split_test(value integer,Allnames varchar(40))
insert into split_test values(1,'Vinoth,Kumar,Raja,Manoj,Jamal,Bala');
select * from split_test;
Value Allnames
-------------------
1 Vinoth,Kumar,Raja,Manoj,Jamal,Bala
Expected output
values N1 N2 N3 N4 N5 N6 N7.......N20
1 Vinoth Kumar Raja Manoj Jamal Bala
using this example you can get an idea.
declare #str varchar(max)
set #str = 'Hello world'
declare #separator varchar(max)
set #separator = ' '
declare #Splited table(id int identity(1,1), item varchar(max))
set #str = REPLACE(#str,#separator,'''),(''')
set #str = 'select * from (values('''+#str+''')) as V(A)'
insert into #Splited
exec(#str)
select * from #Splited
Here is an sql statement using recursive CTE to split names into rows, then pivot rows into columns.
SqlFiddle
with names as
(select
value,
1 as name_id,
substring(Allnames,1,charindex(',',Allnames+',', 0)-1) as name,
substring(Allnames,charindex(',',Allnames, 0)+1, 40) as left_names
from split_test
union all
select
value,
name_id +1,
case when charindex(',',left_names, 0)> 0 then
substring(left_names,1,charindex(',',left_names, 0)-1)
else left_names end as name,
case when charindex(',',left_names, 0)> 0 then
substring(left_names,charindex(',',left_names, 0)+1, 40)
else '' end as left_names
from names
where ltrim(left_names)<>'')
select value,
[1],[2],[3],[4],[5],[6],[7],[8],[9]
from (select value,name_id,name from names) as t1
PIVOT (MAX(name) FOR name_id IN ( [1],[2],[3],[4],[5],[6],[7],[8],[9] ) ) AS t2
UPDATE
#KM.'s answer might be a better way to split data into rows without recursive CTE table. It should be more efficient than this one. So I follow that example and simplified the part of null value process logic. Here is the result:
Step 1:
Create a table includes all numbers from 1 to a number grater than max length of Allnames column.
CREATE TABLE Numbers( Number int not null primary key);
with n as
(select 1 as num
union all
select num +1
from n
where num<100)
insert into numbers
select num from n;
Step 2:
Join data of split_test table with numbers table, we can get all the parts start from ,.
Then take the first part between 2 , form every row. If there are null values exists, add them with union.
select value ,
ltrim(rtrim(substring(allnames,number+1,charindex(',',substring(allnames,number,40),2)-2))) as name
from
(select value, ','+allnames+',' as allnames
from split_test) as t1
left join numbers
on number<= len(allnames)
where substring(allnames,number,1)=','
and substring(allnames,number,40)<>','
union
select value, Allnames
from split_test
where Allnames is null
Step 3: Pivot names from rows to columns like my first attempt above, omitted here.
SQLFiddle

The most elegant way to generate permutations in SQL server

Given a the following table:
Index | Element
---------------
1 | A
2 | B
3 | C
4 | D
We want to generate all the possible permutations (without repetitions) using the elements.
the final result (skipping some rows) will look like this:
Results
----------
ABCD
ABDC
ACBD
ACDB
ADAC
ADCA
...
DABC
DACB
DBCA
DBAC
DCAB
DCBA
(24 Rows)
How would you do it?
After making some perhaps snarky comments, this problem stuck in my brain all evening, and I eventually came up with the following set-based approach. I believe it definitely qualifies as "elegant", but then I also think it qualifies as "kinda dumb". You make the call.
First, set up some tables:
-- For testing purposes
DROP TABLE Source
DROP TABLE Numbers
DROP TABLE Results
-- Add as many rows as need be processed--though note that you get N! (number of rows, factorial) results,
-- and that gets big fast. The Identity column must start at 1, or the algorithm will have to be adjusted.
-- Element could be more than char(1), though the algorithm would have to be adjusted again, and each element
-- must be the same length.
CREATE TABLE Source
(
SourceId int not null identity(1,1)
,Element char(1) not null
)
INSERT Source (Element) values ('A')
INSERT Source (Element) values ('B')
INSERT Source (Element) values ('C')
INSERT Source (Element) values ('D')
--INSERT Source (Element) values ('E')
--INSERT Source (Element) values ('F')
-- This is a standard Tally table (or "table of numbers")
-- It only needs to be as long as there are elements in table Source
CREATE TABLE Numbers (Number int not null)
INSERT Numbers (Number) values (1)
INSERT Numbers (Number) values (2)
INSERT Numbers (Number) values (3)
INSERT Numbers (Number) values (4)
INSERT Numbers (Number) values (5)
INSERT Numbers (Number) values (6)
INSERT Numbers (Number) values (7)
INSERT Numbers (Number) values (8)
INSERT Numbers (Number) values (9)
INSERT Numbers (Number) values (10)
-- Results are iteratively built here. This could be a temp table. An index on "Length" might make runs
-- faster for large sets. Combo must be at least as long as there are characters to be permuted.
CREATE TABLE Results
(
Combo varchar(10) not null
,Length int not null
)
Here's the routine:
SET NOCOUNT on
DECLARE
#Loop int
,#MaxLoop int
-- How many elements there are to process
SELECT #MaxLoop = max(SourceId)
from Source
-- Initialize first value
TRUNCATE TABLE Results
INSERT Results (Combo, Length)
select Element, 1
from Source
where SourceId = 1
SET #Loop = 2
-- Iterate to add each element after the first
WHILE #Loop <= #MaxLoop
BEGIN
-- See comments below. Note that the "distinct" remove duplicates, if a given value
-- is to be included more than once
INSERT Results (Combo, Length)
select distinct
left(re.Combo, #Loop - nm.Number)
+ so.Element
+ right(re.Combo, nm.Number - 1)
,#Loop
from Results re
inner join Numbers nm
on nm.Number <= #Loop
inner join Source so
on so.SourceId = #Loop
where re.Length = #Loop - 1
-- For performance, add this in if sets will be large
--DELETE Results
-- where Length <> #Loop
SET #Loop = #Loop + 1
END
-- Show results
SELECT *
from Results
where Length = #MaxLoop
order by Combo
The general idea is: when adding a new element (say "B") to any string (say, "A"), to catch all permutations you would add B
to all possible positions (Ba, aB), resulting in a new set of strings. Then iterate: Add a new element (C) to each position in a string
(AB becomes Cab, aCb, abC), for all strings (Cba, bCa, baC), and you have the set of permutations. Iterate over each result set with
the next character until you run out of characters... or resources. 10 elements is 3.6 million permutations, roughly 48MB with the above algorithm, and 14 (unique) elements would hit 87 billion permutations and 1.163 terabytes.
I'm sure it could eventually be wedged into a CTE, but in the end all that would be is a glorified loop. The logic
is clearer this way, and I can't help but think the CTE execution plan would be a nightmare.
DECLARE #s VARCHAR(5);
SET #s = 'ABCDE';
WITH Subsets AS (
SELECT CAST(SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST('.'+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS Permutation,
CAST(1 AS INT) AS Iteration
FROM dbo.Numbers WHERE Number BETWEEN 1 AND 5
UNION ALL
SELECT CAST(Token+SUBSTRING(#s, Number, 1) AS VARCHAR(5)) AS Token,
CAST(Permutation+CAST(Number AS CHAR(1))+'.' AS VARCHAR(11)) AS
Permutation,
s.Iteration + 1 AS Iteration
FROM Subsets s JOIN dbo.Numbers n ON s.Permutation NOT LIKE
'%.'+CAST(Number AS CHAR(1))+'.%' AND s.Iteration < 5 AND Number
BETWEEN 1 AND 5
--AND s.Iteration = (SELECT MAX(Iteration) FROM Subsets)
)
SELECT * FROM Subsets
WHERE Iteration = 5
ORDER BY Permutation
Token Permutation Iteration
----- ----------- -----------
ABCDE .1.2.3.4.5. 5
ABCED .1.2.3.5.4. 5
ABDCE .1.2.4.3.5. 5
(snip)
EDBCA .5.4.2.3.1. 5
EDCAB .5.4.3.1.2. 5
EDCBA .5.4.3.2.1. 5
first posted a while ago here
However, it would be better to do it in a better language such as C# or C++.
Just using SQL, without any code, you could do it if you can crowbar yourself another column into the table. Clearly you need to have one joined table for each of the values to be permuted.
with llb as (
select 'A' as col,1 as cnt union
select 'B' as col,3 as cnt union
select 'C' as col,9 as cnt union
select 'D' as col,27 as cnt
)
select a1.col,a2.col,a3.col,a4.col
from llb a1
cross join llb a2
cross join llb a3
cross join llb a4
where a1.cnt + a2.cnt + a3.cnt + a4.cnt = 40
Am I correctly understanding that you built Cartesian product n x n x n x n, and then filter out unwanted stuff? The alternative would be generating all the numbers up to n! and then using factorial number system to map them via element encoding.
Simpler than a recursive CTE:
declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
select a.Element, b.Element, c.Element, d.Element
from #Number a
join #Number b on b.Element not in (a.Element)
join #Number c on c.Element not in (a.Element, b.Element)
join #Number d on d.Element not in (a.Element, b.Element, c.Element)
order by 1, 2, 3, 4
For an arbitrary number of elements, script it out:
if object_id('tempdb..#number') is not null drop table #number
create table #number (Element char(1), Id int, Alias as '_'+convert(varchar,Id))
insert #number values ('A', 1)
insert #number values ('B', 2)
insert #number values ('C', 3)
insert #number values ('D', 4)
insert #number values ('E', 5)
declare #sql nvarchar(max)
set #sql = '
select '+stuff((
select char(13)+char(10)+'+'+Alias+'.Element'
from #number order by Id for xml path (''), type
).value('.','NVARCHAR(MAX)'),3,1,' ')
set #sql += '
from #number '+(select top 1 Alias from #number order by Id)
set #sql += (
select char(13)+char(10)+'join #number '+Alias+' on '+Alias+'.Id not in ('
+stuff((
select ', '+Alias+'.Id'
from #number b where a.Id > b.Id
order by Id for xml path ('')
),1,2,'')
+ ')'
from #number a where Id > (select min(Id) from #number)
order by Element for xml path (''), type
).value('.','NVARCHAR(MAX)')
set #sql += '
order by 1'
print #sql
exec (#sql)
To generate this:
select
_1.Element
+_2.Element
+_3.Element
+_4.Element
+_5.Element
from #number _1
join #number _2 on _2.Id not in (_1.Id)
join #number _3 on _3.Id not in (_1.Id, _2.Id)
join #number _4 on _4.Id not in (_1.Id, _2.Id, _3.Id)
join #number _5 on _5.Id not in (_1.Id, _2.Id, _3.Id, _4.Id)
order by 1
This method uses a binary mask to select the correct rows:
;with src(t,n,p) as (
select element, index, power(2,index-1)
from table
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,4)-1
My original post:
declare #t varchar(4) = 'ABCD'
;with src(t,n,p) as (
select substring(#t,1,1),1,power(2,0)
union all
select substring(#t,n+1,1),n+1,power(2,n)
from src
where n < len(#t)
)
select s1.t+s2.t+s3.t+s4.t
from src s1, src s2, src s3, src s4
where s1.p+s2.p+s3.p+s4.p=power(2,len(#t))-1
This is one of those problems that haunts you. I liked the simplicity of my original answer but there was this issue where I was still building all the possible solutions and then selecting the correct ones. One more try to make this process more efficient by only building the solutions that were correct yielded this answer. Add a character to the string only if that character didn't exist in the string. Patindex seemed like the perfect companion for a CTE solution. Here it is.
declare #t varchar(10) = 'ABCDEFGHIJ'
;with s(t,n) as (
select substring(#t,1,1),1
union all
select substring(#t,n+1,1),n+1
from s where n<len(#t)
)
,j(t) as (
select cast(t as varchar(10)) from s
union all
select cast(j.t+s.t as varchar(10))
from j,s where patindex('%'+s.t+'%',j.t)=0
)
select t from j where len(t)=len(#t)
I was able to build all 3.6 million solutions in 3 minutes and 2 seconds. Hopefully this solution will not get missed just because it's not the first.
Current solution using a recursive CTE.
-- The base elements
Declare #Number Table( Element varchar(MAX), Id varchar(MAX) )
Insert Into #Number Values ( 'A', '01')
Insert Into #Number Values ( 'B', '02')
Insert Into #Number Values ( 'C', '03')
Insert Into #Number Values ( 'D', '04')
-- Number of elements
Declare #ElementsNumber int
Select #ElementsNumber = COUNT(*)
From #Number;
-- Permute!
With Permutations( Permutation, -- The permutation generated
Ids, -- Which elements where used in the permutation
Depth ) -- The permutation length
As
(
Select Element,
Id + ';',
Depth = 1
From #Number
Union All
Select Permutation + ' ' + Element,
Ids + Id + ';',
Depth = Depth + 1
From Permutations,
#Number
Where Depth < #ElementsNumber And -- Generate only the required permutation number
Ids Not like '%' + Id + ';%' -- Do not repeat elements in the permutation (this is the reason why we need the 'Ids' column)
)
Select Permutation
From Permutations
Where Depth = #ElementsNumber
Assuming your table is named Elements and has 4 rows, this is as simple as:
select e1.Element + e2.Element + e3.Element + e4.Element
from Elements e1
join Elements e2 on e2.Element != e1.Element
join Elements e3 on e3.Element != e2.Element AND e3.Element != e1.Element
join Elements e4 on e4.Element != e3.Element AND e4.Element != e2.Element AND e4.Element != e1.Element
Way too much rust on my SQL skills, but i took a different tack for a similar problem and thought it worth sharing.
Table1 - X strings in a single field Uno
Table2 - Y strings in a single field Dos
(SELECT Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1)
UNION
(SELECT Dos, Uno
FROM Table1
CROSS JOIN Table2 ON 1=1)
Same principle for 3 tables with an added CROSS JOIN
(SELECT Tres, Uno, Dos
FROM Table1
CROSS JOIN Table2 ON 1=1
CROSS JOIN Table3 ON 1=1)
although it takes 6 cross-join sets in the union.
--Hopefully this is a quick solution, just change the values going into #X
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)='select * from #X X1 ', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pSQL = concat(#pSQL,' cross join #X X', #pC+1);
set #pC = #pC +1;
end
execute(#pSQL)
--or as single column result
IF OBJECT_ID('tempdb.dbo.#X', 'U') IS NOT NULL DROP TABLE #X; CREATE table #X([Opt] [nvarchar](10) NOT NULL)
Insert into #X values('a'),('b'),('c'),('d')
declare #pSQL NVarChar(max)=' as R from #X X1 ',#pSelect NVarChar(Max)=' ',#pJoin NVarChar(Max)='', #pN int =(select count(*) from #X), #pC int = 0;
while #pC<#pN begin
if #pC>0 set #pJoin = concat(#pJoin ,' cross join #X X', #pC+1) set #pSelect = concat(#pSelect ,'+ X', #pC+1,'.Opt ')
set #pC = #pC +1;
end
set #pSQL = concat ('select X1.Opt', #pSelect,#pSQL ,#pJoin)
exec(#pSQL)
create function GeneratePermutations (#string nvarchar(4000))
RETURNS #Permutations
TABLE(
name nVARCHAR(500)
)
AS
begin
declare #SplitedString table(name nvarchar(500))
insert into #SplitedString
select *
from string_split(#string,' ')
declare #CountOfWords as int
set #CountOfWords = (select count(*) from #SplitedString)
;with cte_Permutations (name, level) as (
select convert(nvarchar(500), name), 1 as level from #SplitedString
union all
select convert(nvarchar(500),splited.name+','+cte_Permutations.name),level+1
from #SplitedString splited ,cte_Permutations
where level < #CountOfWords
)
insert into #Permutations
select name
from cte_Permutations
where level = #CountOfWords
order by name
return
end
select *
From (
select 1 id,'a b c' msg
union all
select 2 id,'d e' msg
) p
cross apply dbo.GeneratePermutations(p.msg)