SQL SERVER generate data using Regex pattern - sql

I would like to generate the data by given regex pattern in SQL Server. Is there any possibility to do? Say, I have pattern as below and I would like to generate data as follow:
The idea behind the concept is SQL STATIC DATA MASKING (which was removed in current feature). Our client wants to mask the production data in test database. We don't have SQL STATIC DATA MASKING feature with sql now but we have patterns to mask the column, so what I am thinking is, with these pattern we can run the update query.
SELECT "(\d){7}" AS RandonNumber, "(\W){5}" AS RandomString FROM tbl
Output Should be
+---------------+--------------+
| RandonNumber | RandomString |
+---------------+--------------+
| 7894562 | AHJIL |
+---------------+--------------+
| 9632587 | ZLOKP |
+---------------+--------------+
| 4561238 | UJIOK |
+---------------+--------------+
Apart from this regular pattern, I have some customized pattern like Test_Product_(\d){1,4}, which should give result as below:
Test_Product_012
Test_Product_143
Test_Product_8936
Complete Patterns which I am going to use for masking
Other Patterns Samples
(\l){30} ahukoklijfahukokponmahukoahuko
(\d){7} 7895623
(\W){5} ABCDEF
Test_Product_(\d){1,4} Test_Product_007
0\.(\d){2} 0.59
https://www\.(\l){10}\.com https://www.anything.com

Well, I can give you a solution that is not based on regular expressions, but on a set of parameters - but it contains a complete set of all your requirements.
I've based this solution on a user-defined function I've written to generate random strings (You can read my blog post about it here) - I've just changed it so that it could generate the mask you wanted based on the following conditions:
The mask has an optional prefix.
The mask has an optional suffix.
The mask has a variable-length random string.
The random string can contain either lower-case letters, upper-case letters, digits, or any combination of the above.
I've decided these set of rules based on your update to the question, containing your desired masks:
(\d){7} 7895623
(\W){5} ABCDEF
Test_Product_(\d){1,4} Test_Product_007
0\.(\d){2} 0.59
https://www\.(\l){10}\.com https://www.anything.com
And now, for the code:
Since I'm using a user-defined function, I can't use inside it the NewId() built in function - so we first need to create a view to generate the guid for us:
CREATE VIEW GuidGenerator
AS
SELECT Newid() As NewGuid;
In the function, we're going to use that view to generate a NewID() as the base of all randomness.
The function itself is a lot more cumbersome then the random string generator I've started from:
CREATE FUNCTION dbo.MaskGenerator
(
-- use null or an empty string for no prefix
#Prefix nvarchar(4000),
-- use null or an empty string for no suffix
#suffix nvarchar(4000),
-- the minimum length of the random part
#MinLength int,
-- the maximum length of the random part
#MaxLength int,
-- the maximum number of rows to return. Note: up to 1,000,000 rows
#Count int,
-- 1, 2 and 4 stands for lower-case, upper-case and digits.
-- a bitwise combination of these values can be used to generate all possible combinations:
-- 3: lower and upper, 5: lower and digis, 6: upper and digits, 7: lower, upper nad digits
#CharType tinyint
)
RETURNS TABLE
AS
RETURN
-- An inline tally table with 1,000,000 rows
WITH E1(N) AS (SELECT N FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10)) V(N)), -- 10
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --100
E3(N) AS (SELECT 1 FROM E2 a, E2 b), --10,000
Tally(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY ##SPID) FROM E3 a, E2 b) --1,000,000
SELECT TOP(#Count)
n As Number,
CONCAT(#Prefix, (
SELECT TOP (Length)
-- choose what char combination to use for the random part
CASE #CharType
WHEN 1 THEN Lower
WHEN 2 THEN Upper
WHEN 3 THEN IIF(Rnd % 2 = 0, Lower, Upper)
WHEN 4 THEN Digit
WHEN 5 THEN IIF(Rnd % 2 = 0, Lower, Digit)
WHEN 6 THEN IIF(Rnd % 2 = 0, Upper, Digit)
WHEN 7 THEN
CASE Rnd % 3
WHEN 0 THEN Lower
WHEN 1 THEN Upper
ELSE Digit
END
END
FROM Tally As t0
-- create a random number from the guid using the GuidGenerator view
CROSS APPLY (SELECT Abs(Checksum(NewGuid)) As Rnd FROM GuidGenerator) As rand
CROSS APPLY
(
-- generate a random lower-case char, upper-case char and digit
SELECT CHAR(97 + Rnd % 26) As Lower, -- Random lower case letter
CHAR(65 + Rnd % 26) As Upper,-- Random upper case letter
CHAR(48 + Rnd % 10) As Digit -- Random digit
) As Chars
WHERE t0.n <> -t1.n -- Needed for the subquery to get re-evaluated for each row
FOR XML PATH('')
), #Suffix) As RandomString
FROM Tally As t1
CROSS APPLY
(
-- Select a random length between #MinLength and #MaxLength (inclusive)
SELECT TOP 1 n As Length
FROM Tally As t2
CROSS JOIN GuidGenerator
WHERE t2.n >= #MinLength
AND t2.n <= #MaxLength
AND t2.n <> t1.n
ORDER BY NewGuid
) As Lengths;
And finally, Test cases:
(\l){30} - ahukoklijfahukokponmahukoahuko
SELECT RandomString FROM dbo.MaskGenerator(null, null, 30, 30, 2, 1);
Results:
1, eyrutkzdugogyhxutcmcmplvzofser
2, juuyvtzsvmmcdkngnzipvsepviepsp
(\d){7} - 7895623
SELECT RandomString FROM dbo.MaskGenerator(null, null, 7, 7, 2, 4);
Results:
1, 8744412
2, 2275313
(\W){5} - ABCDE
SELECT RandomString FROM dbo.MaskGenerator(null, null, 5, 5, 2, 2);
Results:
1, RSYJE
2, MMFAA
Test_Product_(\d){1,4} - Test_Product_007
SELECT RandomString FROM dbo.MaskGenerator('Test_Product_', null, 1, 4, 2, 4);
Results:
1, Test_Product_933
2, Test_Product_7
0\.(\d){2} - 0.59
SELECT RandomString FROM dbo.MaskGenerator('0.', null, 2, 2, 2, 4);
Results:
1, 0.68
2, 0.70
https://www\.(\l){10}\.com - https://www.anything.com
SELECT RandomString FROM dbo.MaskGenerator('https://www.', '.com', 10, 10, 2, 1);
Results:
1, https://www.xayvkmkuci.com
2, https://www.asbfcvomax.com
Here's how you use it to mask the content of a table:
DECLARE #Count int = 10;
SELECT CAST(IntVal.RandomString As Int) As IntColumn,
UpVal.RandomString as UpperCaseValue,
LowVal.RandomString as LowerCaseValue,
MixVal.RandomString as MixedValue,
WithPrefix.RandomString As PrefixedValue
FROM dbo.MaskGenerator(null, null, 3, 7, #Count, 4) As IntVal
JOIN dbo.MaskGenerator(null, null, 10, 10, #Count, 1) As LowVal
ON IntVal.Number = LowVal.Number
JOIN dbo.MaskGenerator(null, null, 5, 10, #Count, 2) As UpVal
ON IntVal.Number = UpVal.Number
JOIN dbo.MaskGenerator(null, null, 10, 20, #Count, 7) As MixVal
ON IntVal.Number = MixVal.Number
JOIN dbo.MaskGenerator('Test ', null, 1, 4, #Count, 4) As WithPrefix
ON IntVal.Number = WithPrefix.Number
Results:
IntColumn UpperCaseValue LowerCaseValue MixedValue PrefixedValue
674 CCNVSDI esjyyesesv O2FAC7bfwg2Be5a91Q0 Test 4935
30732 UJKSL jktisddbnq 7o8B91Sg1qrIZSvG3AcL Test 0
4669472 HDLJNBWPJ qgtfkjdyku xUoLAZ4pAnpn Test 8
26347 DNAKERR vlehbnampb NBv08yJdKb75ybhaFqED Test 91
6084965 LJPMZMEU ccigzyfwnf MPxQ2t8jjmv0IT45yVcR Test 4
6619851 FEHKGHTUW wswuefehsp 40n7Ttg7H5YtVPF Test 848
781 LRWKVDUV bywoxqizju UxIp2O4Jb82Ts Test 6268
52237 XXNPBL beqxrgstdo Uf9j7tCB4W2 Test 43
876150 ZDRABW fvvinypvqa uo8zfRx07s6d0EP Test 7
Note that this is a fast process - generating 1000 rows with 5 columns took less than half a second on average in tests I've made.

I'm not convinced you need a Regex for this. Why not just use a "scrub script" and take advantage of the newid() function to generate a bunch of random data. It looks like you'll need to write such a script anyway, Regex or not, and this has the benefit of being very simple.
Let's say you start with the following data:
create table tbl (PersonalId int, Name varchar(max))
insert into tbl select 300300, 'Michael'
insert into tbl select 554455, 'Tim'
insert into tbl select 228899, 'John'
select * from tbl
Then run your script:
update tbl set PersonalId = cast(rand(checksum(newid())) * 1000000 as int)
update tbl set Name = left(convert(varchar(255), newid()), 6)
select * from tbl

Related

SQL Server Recursive CTE not returning expected rows

I'm building a Markov chain name generator. I'm trying to replace a while loop with a recursive CTE. Limitations in using top and order by in the recursive part of the CTE have led me down the following path.
The point of all of this is to generate names, based on a model, which is just another word that I've chunked out into three character segments, stored in three columns in the Markov_Model table. The next character in the sequence will be a character from the Markov_Model, such that the 1st and 2nd characters in the model match the penultimate and ultimate character in the word being generated. Rather than generate a probability matrix for the that third character, I'm using a scalar function that finds all the characters that fit the criteria, and gets one of them randomly: order by newid().
The problem is that this formulation of the CTE gets the desired number of rows in the anchor segment, but the union that recursively calls the CTE only unions one row from the anchor. I've attached a sample of the desired output at the bottom.
The query:
;with names as
(
select top 5
cast('+' as nvarchar(50)) as char1,
cast('+' as nvarchar(50)) as char2,
cast(char3 as nvarchar(50)) as char3,
cast('++' + char3 as nvarchar(100)) as name_in_progress,
1 as iteration
from markov_Model
where char1 is null
and char2 is null
order by newid() -- Get some random starting characters
union all
select
n.char2 as char1,
n.char3 as char2,
cast(fnc.addition as nvarchar(50)) as char3,
cast(n.name_in_progress + fnc.addition as nvarchar(100)),
1 + n.iteration
from names n
cross apply (
-- This function takes the preceding two characters,
-- and gets a random character that follows the pattern
select isnull(dbo.[fn_markov_getNext] (n.char2, n.char3), ',') as addition
) fnc
)
select *
from names
option (maxrecursion 3) -- For debug
The trouble is the union only unions one row.
Example output:
char1 char2 char3 name_in_progress iteration
+ + F ++F 1
+ + N ++N 1
+ + K ++K 1
+ + S ++S 1
+ + B ++B 1
+ B a ++Ba 2
B a c ++Bac 3
a c h ++Bach 4
Note I'm using + and , as null replacers/delimeters.
What I want to see is the entirety of the previous recursion, with the addition of the new characters to the name_in_progress; each pass should modify the entirely of the previous pass.
My desired output would be:
Top 10 of the Markov_Model table:
Text of the function that gets the next character from the Markov_Model:
CREATEFUNCTION [dbo].[fn_markov_getNext]
(
#char2 nvarchar(1),
#char3 nvarchar(1)
)
RETURNS nvarchar(1)
AS
BEGIN
DECLARE #newChar nvarchar(1)
set #newChar = (
select top 1
isnull(char3, ',')
from markov_Model mm
where isnull(mm.char1, '+') = isnull(#char2, '+')
and isnull(mm.char2, '+') = isnull(#char3, ',')
order by (select new_id from vw_getGuid) -- A view that calls newid()
)
return #newChar
END

Using to SQL to transform and combine strings

Currently, I have an data set that is structured as follows:
CREATE TABLE notes (
date DATE NOT NULL,
author VARCHAR(100) NOT NULL,
type CHAR NOT NULL,
line_number INT NOT NULL,
note VARCHAR(4000) NOT NULL
);
Some sample date:
Date, Author, Type, Line Number, Note
2015-01-01, Abe, C, 1, First 4000 character string
2015-01-01, Abe, C, 2, Second 4000 character string
2015-01-01, Abe, C, 3, Third 4000 character string
2015-01-01, Bob, C, 1, First 4000 character string
2015-01-01, Bob, C, 2, Second 1000 character string
2015-01-01, Cal, C, 1, First 3568 character string
This data is to be migrated to a new SQL Server structure that is defined as:
CREATE TABLE notes (
date DATE NOT NULL,
author VARCHAR(100) NOT NULL,
type CHAR NOT NULL,
note VARCHAR(8000) NOT NULL
);
I would like to prefix to the multi-line (those with more than 8000 characters when combined) Notes with "Date - Author - Part X of Y // ", and place a space between concatenated strings so the data would end up like:
Date, Author, Type, Note
2015-01-01, Abe, C, 2015-01-01 - Abe - Part 1 of 2 // First 4000 character string First 3959 characters of the second 4000 character string
2015-01-01, Abe, C, 2015-01-01 - Abe - Part 2 of 2 // Remaining 41 characters of the second 4000 character string Third (up to) 4000 character string
2015-01-01, Bob, C, First 4000 character string Second 1000 character string
2015-01-01, Cal, C, First 3568 character string
I'm looking for ways to accomplish this transformation. Initially, I had an intermediate step to simple combine (coalesce) all the Note strings where Date, Author, Type are shared together but was not able to split.
Okay, so, this was a bit of a challenge but I got there in the end. Has been a thoroughly enjoyable distraction from my regular work :D
The code assumes that you will never have a note that is longer than 72,000 total characters, in that the logic which works out how much extra text is added by the Part x in y prefix assumes that x and y are single digit numbers. This could easily be remedied by padding any single digits with leading zeros, which would also ensure ordering is correct.
If you need anything explained, the comments in the code should be sufficient:
-- Declare the test data:
declare #a table ([Date] date
,author varchar(100)
,type char
,line_number int
,note varchar(8000)
,final_line int
,new_lines int
)
insert into #a values
('2015-01-01','Abel','C',1,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Abel','C',2,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Abel','C',3,'This is a note that is 83 characters long------------------------------------------' ,null,null)
,('2015-01-01','Bob' ,'C',1,'This is a note that is 100 characters long----------------------------------------------------------' ,null,null)
,('2015-01-01','Bob' ,'C',2,'This is a note that is 43 characters long--' ,null,null)
,('2015-01-01','Cal' ,'C',1,'This is a note that is 50 characters long---------' ,null,null)
---------------------------------------
-- Start the actual data processing. --
---------------------------------------
declare #MaxFieldLen decimal(10,2) = 100 -- Set this to your 8000 characters limit you have. I have used 100 so I didn't have to generate and work with really long text values.
-- Create Numbers table. This will perform better if created as a permanent table:
if object_id('tempdb..#Numbers') is not null
drop table #Numbers
;with e00(n) as (select 1 union all select 1)
,e02(n) as (select 1 from e00 a, e00 b)
,e04(n) as (select 1 from e02 a, e02 b)
,e08(n) as (select 1 from e04 a, e04 b)
,e16(n) as (select 1 from e08 a, e08 b)
,e32(n) as (select 1 from e16 a, e16 b)
,cte(n) as (select row_number() over (order by n) from e32)
select n-1 as Number
into #Numbers
from cte
where n <= 1000001
-- Calculate some useful figures to be used in chopping up the total note. This will need to be done across the table before doing anything else:
update #a
set final_line = t.final_line
,new_lines = t.new_lines
from #a a
inner join (select Date
,author
,type
,max(line_number) as final_line -- We only want the final line from the CTE later on, so we need a way of identifying that the line_number we are working with the last one.
-- Calculate the total number of lines that will result from the additional text being added:
,case when sum(len(note)) > #MaxFieldLen -- If the Note is long enough to be broken into two lines:
then ceiling( -- Find the next highest integer value for
sum(len(note)) -- the total length of all the notes
/ (#MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_')) -- divided by the max note size allowed minus the length of the additional text.
)
else 1 -- Otherwise return 1.
end as new_lines
from #a
group by Date
,author
,type
) t
on a.Date = t.Date
and a.author = t.author
and a.type = t.type
-- Combine the Notes using a recursive cte:
;with cte as
(
select Date
,author
,type
,line_number
,final_line
,note
,new_lines
from #a
where line_number = 1
union all
select a.Date
,a.author
,a.type
,a.line_number
,a.final_line
,c.note + a.note
,a.new_lines
from cte c
join #a a
on c.Date = a.Date
and c.author = a.author
and c.type = a.type
and c.line_number+1 = a.line_number
)
select c1.Date
,c1.author
,c1.type
,c2.note
from cte c1
cross apply (select case when c1.new_lines > 1 -- If there is more than one line to be returned, build up the prefix:
then convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part ' + cast(Number+1 as nvarchar(10)) + ' of ' + cast(c1.new_lines as nvarchar(10)) + ' // '
+ substring(c1.note -- and then append the next (Max note length - Generated prefix) number of characters in the note:
,1 + Number * (#MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
,(#MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))-1
)
else c1.note
end as note
from #Numbers
where Number >= 0
and Number < case when c1.new_lines = 1
then 1
else len(c1.note) / (#MaxFieldLen - len(convert(nvarchar(10), Date, 121) + ' - ' + author + ' - Part x of x //_'))
end
) c2
where line_number = final_line
order by 1,2,3,4

How to obtain certain value from URL? [duplicate]

This question already has answers here:
SQL Server 2008 R2 - How to split my varchar column string and get 3rd index string
(4 answers)
Closed 7 years ago.
Given this URL:
www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html
The value to is between 4th and 5th slash.
How to retrieve that particular value using SQL Server 2008?
There is no function in SQL server to get the nth occurrence of a value, the only function is CHARINDEX, which will retrieve the first instance after the specified starting position. So the only way to utilise this is to cascade each value found, i.e:
Find the position of 1st "/"
Find the position of the next "/" after the first one
Find the position of the next "/" after the second one
So each calculation requires the result of the previous one, which to get the 5th occurrence gets fairly messy, but not impossible if you use CROSS APPLY to reuse your results. Once you have the position of the 4th and 5th occurrence you can use SUBSTRING to extract the text:
SELECT t.url,
Value = SUBSTRING(t.url, p4.Position, p5.Position - p4.Position - 1)
FROM (SELECT url = 'URL:/www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html') AS t
CROSS APPLY (SELECT 1 + CHARINDEX('/', url)) AS p1 (Position)
CROSS APPLY (SELECT 1 + CHARINDEX('/', url, p1.Position)) AS p2 (Position)
CROSS APPLY (SELECT 1 + CHARINDEX('/', url, p2.Position)) AS p3 (Position)
CROSS APPLY (SELECT 1 + CHARINDEX('/', url, p3.Position)) AS p4 (Position)
CROSS APPLY (SELECT 1 + CHARINDEX('/', url, p4.Position)) AS p5 (Position);
ADDENDUM
The other option you have, if you want more flexibility, i.e. get the text between the 50th and 51st occurrence, is to utilise a split function. The most efficient way to split strings is with a CLR function, but the next best T-SQL only method for this purpose is to use a numbers table to split your string, and in the absence of this create your own using stacked CTEs.
I will assume that you don't have a numbers table and use a stacked CTE in the interest of a complete working example.
CREATE FUNCTION dbo.Split (#StringToSplit VARCHAR(1000), #Delimiter CHAR(1))
RETURNS TABLE
AS
RETURN
( WITH N1 (N) AS (SELECT 1 FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS t (n)),
N2 (N) AS (SELECT 1 FROM N1 AS N1 CROSS JOIN N1 AS N2),
Numbers (N) AS (SELECT ROW_NUMBER() OVER(ORDER BY n1.N) FROM N1 CROSS JOIN N2 AS N2)
SELECT Position = ROW_NUMBER() OVER(ORDER BY n.N),
Value = SUBSTRING(#StringToSplit, n.N, ISNULL(NULLIF(CHARINDEX(#Delimiter, #StringToSplit, n.N + 1), 0), LEN(#StringToSplit)) - n.N)
FROM Numbers AS n
WHERE SUBSTRING(#Delimiter + #StringToSplit, n.N, 1) = #Delimiter
);
Which you can call fairly simply:
DECLARE #Table TABLE (URL VARCHAR(255) NOT NULL);
INSERT #Table VALUES ('URL:/www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html');
SELECT s.*
FROM #Table AS t
CROSS APPLY dbo.Split(t.URL, '/') AS s;
Which gives you:
Position Value
---------------------
1 URL:
2 www.google.com
3 hsisn
4 -#++#
5 valuetoretrive
6 +#(#(
7 .htm
So you can simply select the 5th value from this by adding a where clause.:
DECLARE #Table TABLE (URL VARCHAR(255) NOT NULL);
INSERT #Table
VALUES
('URL:/www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html'),
('URL:/www.google.com/hsisn/-#++#/valuetoretrive2/+#(#(/.html');
SELECT t.URL, s.Value
FROM #Table AS t
CROSS APPLY dbo.Split(t.URL, '/') AS s
WHERE s.Position = 5;
If you don't know before hand the lenght of the url or the value to retrieve or slash positions you can use this snipet
declare #uri varchar(max) = 'URL:/www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html'
,#startAt int = 0
,#slashCount int = 0
while #slashCount < 5
begin
set #startAt = CHARINDEX('/',#uri);
set #slashCount = #slashCount + 1;
if (#slashCount = 5)
set #uri = SUBSTRING(#uri, 0, #startAt)
else
set #uri = SUBSTRING(#uri, #startAt + 1, LEN(#uri))
-- debug info
select #startAt, #slashCount, #uri
end
it ill decompose the string, getting slash positions until it find #4 and #5 slash and get anything between.
OUTPUT
5 1 www.google.com/hsisn/-#++#/valuetoretrive/+#(#(/.html
15 2 hsisn/-#++#/valuetoretrive/+#(#(/.html
6 3 -#++#/valuetoretrive/+#(#(/.html
6 4 valuetoretrive/+#(#(/.html
15 5 valuetoretrive
You also can get it using a cross apply instead of a while loop but this way you code ill not need to get big and messy to get anything after the n > 10, n-th slah.

Multiple group split string in SQL

I have a string like 1:2:3+4:5:6+7:8:9 Let's say 1,2,3 are bananas and 4,5,6 are apples etc.
I want to split the first string and insert table one by one like itemId=1 and itemGroup=b(anana).
My code is below I need to avoid "+" at the end of the string and recorgnize the groups(like select banana group...) to insert table.
ALTER PROCEDURE [dbo].[sp_Test]
AS
Declare #itemsGroup varchar(200) = '1:2:3+4:5:6+7:8:9+'
Declare #items varchar(20) = null
Declare #numberGroup varchar(200)
Declare #number varchar(20) = null
WHILE LEN(#itemsGroup) > 0
BEGIN
IF PATINDEX('%+%',#itemsGroup) > 0
BEGIN
SET #items = SUBSTRING(#itemsGroup, 0, PATINDEX('%+%',#itemsGroup))
--SELECT #items AS a
SET #itemsGroup = SUBSTRING(#itemsGroup, LEN(#items + '+') + 1, LEN(#itemsGroup))
SET #numberGroup = (SELECT #items)
WHILE LEN(#numberGroup) > 0
BEGIN
IF PATINDEX('%:%',#numberGroup) > 0
BEGIN
SET #number = SUBSTRING(#numberGroup, 0, PATINDEX('%:%',#numberGroup))
SELECT #number AS x
SET #numberGroup = SUBSTRING(#numberGroup, LEN(#number + ':') + 1, LEN(#numberGroup))
END
ELSE
BEGIN
SET #number = #numberGroup
SET #numberGroup = NULL
SELECT #number AS y
END
END
END
ELSE
BEGIN
SET #items = #itemsGroup
SET #itemsGroup = NULL
--SELECT #items AS b
END
END
EDITED: Split string and insert all rows
#UserId INT,
#FavoritesString VARCHAR(MAX)
AS
DECLARE
#FavoriteItemType INT,
#FavoriteItemId INT
INSERT INTO Favorite(FavoriteItemId, FavoriteItemType, UserId)
SELECT itm.Item, grp.ItemNumber, #UserId
FROM dbo.DelimitedSplit8K(#FavoritesString,'+') grp
CROSS APPLY dbo.DelimitedSplit8K(grp.Item, ':') itm
Get yourself a splitting function and use it in simple CROSS APPLY query. Below is used DelimitedSplit8K from SQLServerCentral, but you can use any other (plenty are available on the net) or write your own.
Declare #itemsGroup varchar(200) = '1:2:3+4:5:6+7:8:9'
SELECT grp.ItemNumber AS GroupID, itm.Item
FROM dbo.DelimitedSplit8K(#itemsGroup,'+') grp
CROSS APPLY dbo.DelimitedSplit8K(grp.Item, ':') itm
SQLFiddle DEMO
DelimitedSplit8K:
SET QUOTED_IDENTIFIER ON
SET ANSI_NULLS ON
GO
CREATE FUNCTION [dbo].[DelimitedSplit8K]
/**********************************************************************************************************************
Purpose:
Split a given string at a given delimiter and return a list of the split elements (items).
Notes:
1. Leading a trailing delimiters are treated as if an empty string element were present.
2. Consecutive delimiters are treated as if an empty string element were present between them.
3. Except when spaces are used as a delimiter, all spaces present in each element are preserved.
Returns:
iTVF containing the following:
ItemNumber = Element position of Item as a BIGINT (not converted to INT to eliminate a CAST)
Item = Element value as a VARCHAR(8000)
Statistics on this function may be found at the following URL:
http://www.sqlservercentral.com/Forums/Topic1101315-203-4.aspx
CROSS APPLY Usage Examples and Tests:
--=====================================================================================================================
-- TEST 1:
-- This tests for various possible conditions in a string using a comma as the delimiter. The expected results are
-- laid out in the comments
--=====================================================================================================================
--===== Conditionally drop the test tables to make reruns easier for testing.
-- (this is NOT a part of the solution)
IF OBJECT_ID('tempdb..#JBMTest') IS NOT NULL DROP TABLE #JBMTest
;
--===== Create and populate a test table on the fly (this is NOT a part of the solution).
-- In the following comments, "b" is a blank and "E" is an element in the left to right order.
-- Double Quotes are used to encapsulate the output of "Item" so that you can see that all blanks
-- are preserved no matter where they may appear.
SELECT *
INTO #JBMTest
FROM ( --# & type of Return Row(s)
SELECT 0, NULL UNION ALL --1 NULL
SELECT 1, SPACE(0) UNION ALL --1 b (Empty String)
SELECT 2, SPACE(1) UNION ALL --1 b (1 space)
SELECT 3, SPACE(5) UNION ALL --1 b (5 spaces)
SELECT 4, ',' UNION ALL --2 b b (both are empty strings)
SELECT 5, '55555' UNION ALL --1 E
SELECT 6, ',55555' UNION ALL --2 b E
SELECT 7, ',55555,' UNION ALL --3 b E b
SELECT 8, '55555,' UNION ALL --2 b B
SELECT 9, '55555,1' UNION ALL --2 E E
SELECT 10, '1,55555' UNION ALL --2 E E
SELECT 11, '55555,4444,333,22,1' UNION ALL --5 E E E E E
SELECT 12, '55555,4444,,333,22,1' UNION ALL --6 E E b E E E
SELECT 13, ',55555,4444,,333,22,1,' UNION ALL --8 b E E b E E E b
SELECT 14, ',55555,4444,,,333,22,1,' UNION ALL --9 b E E b b E E E b
SELECT 15, ' 4444,55555 ' UNION ALL --2 E (w/Leading Space) E (w/Trailing Space)
SELECT 16, 'This,is,a,test.' --E E E E
) d (SomeID, SomeValue)
;
--===== Split the CSV column for the whole table using CROSS APPLY (this is the solution)
SELECT test.SomeID, test.SomeValue, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM #JBMTest test
CROSS APPLY dbo.DelimitedSplit8K(test.SomeValue,',') split
;
--=====================================================================================================================
-- TEST 2:
-- This tests for various "alpha" splits and COLLATION using all ASCII characters from 0 to 255 as a delimiter against
-- a given string. Note that not all of the delimiters will be visible and some will show up as tiny squares because
-- they are "control" characters. More specifically, this test will show you what happens to various non-accented
-- letters for your given collation depending on the delimiter you chose.
--=====================================================================================================================
WITH
cteBuildAllCharacters (String,Delimiter) AS
(
SELECT TOP 256
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
CHAR(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1)
FROM master.sys.all_columns
)
SELECT ASCII_Value = ASCII(c.Delimiter), c.Delimiter, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM cteBuildAllCharacters c
CROSS APPLY dbo.DelimitedSplit8K(c.String,c.Delimiter) split
ORDER BY ASCII_Value, split.ItemNumber
;
-----------------------------------------------------------------------------------------------------------------------
Other Notes:
1. Optimized for VARCHAR(8000) or less. No testing or error reporting for truncation at 8000 characters is done.
2. Optimized for single character delimiter. Multi-character delimiters should be resolvedexternally from this
function.
3. Optimized for use with CROSS APPLY.
4. Does not "trim" elements just in case leading or trailing blanks are intended.
5. If you don't know how a Tally table can be used to replace loops, please see the following...
http://www.sqlservercentral.com/articles/T-SQL/62867/
6. Changing this function to use NVARCHAR(MAX) will cause it to run twice as slow. It's just the nature of
VARCHAR(MAX) whether it fits in-row or not.
7. Multi-machine testing for the method of using UNPIVOT instead of 10 SELECT/UNION ALLs shows that the UNPIVOT method
is quite machine dependent and can slow things down quite a bit.
-----------------------------------------------------------------------------------------------------------------------
Credits:
This code is the product of many people's efforts including but not limited to the following:
cteTally concept originally by Iztek Ben Gan and "decimalized" by Lynn Pettis (and others) for a bit of extra speed
and finally redacted by Jeff Moden for a different slant on readability and compactness. Hat's off to Paul White for
his simple explanations of CROSS APPLY and for his detailed testing efforts. Last but not least, thanks to
Ron "BitBucket" McCullough and Wayne Sheffield for their extreme performance testing across multiple machines and
versions of SQL Server. The latest improvement brought an additional 15-20% improvement over Rev 05. Special thanks
to "Nadrek" and "peter-757102" (aka Peter de Heer) for bringing such improvements to light. Nadrek's original
improvement brought about a 10% performance gain and Peter followed that up with the content of Rev 07.
I also thank whoever wrote the first article I ever saw on "numbers tables" which is located at the following URL
and to Adam Machanic for leading me to it many years ago.
http://sqlserver2000.databases.aspfaq.com/why-should-i-consider-using-an-auxiliary-numbers-table.html
-----------------------------------------------------------------------------------------------------------------------
Revision History:
Rev 00 - 20 Jan 2010 - Concept for inline cteTally: Lynn Pettis and others.
Redaction/Implementation: Jeff Moden
- Base 10 redaction and reduction for CTE. (Total rewrite)
Rev 01 - 13 Mar 2010 - Jeff Moden
- Removed one additional concatenation and one subtraction from the SUBSTRING in the SELECT List for that tiny
bit of extra speed.
Rev 02 - 14 Apr 2010 - Jeff Moden
- No code changes. Added CROSS APPLY usage example to the header, some additional credits, and extra
documentation.
Rev 03 - 18 Apr 2010 - Jeff Moden
- No code changes. Added notes 7, 8, and 9 about certain "optimizations" that don't actually work for this
type of function.
Rev 04 - 29 Jun 2010 - Jeff Moden
- Added WITH SCHEMABINDING thanks to a note by Paul White. This prevents an unnecessary "Table Spool" when the
function is used in an UPDATE statement even though the function makes no external references.
Rev 05 - 02 Apr 2011 - Jeff Moden
- Rewritten for extreme performance improvement especially for larger strings approaching the 8K boundary and
for strings that have wider elements. The redaction of this code involved removing ALL concatenation of
delimiters, optimization of the maximum "N" value by using TOP instead of including it in the WHERE clause,
and the reduction of all previous calculations (thanks to the switch to a "zero based" cteTally) to just one
instance of one add and one instance of a subtract. The length calculation for the final element (not
followed by a delimiter) in the string to be split has been greatly simplified by using the ISNULL/NULLIF
combination to determine when the CHARINDEX returned a 0 which indicates there are no more delimiters to be
had or to start with. Depending on the width of the elements, this code is between 4 and 8 times faster on a
single CPU box than the original code especially near the 8K boundary.
- Modified comments to include more sanity checks on the usage example, etc.
- Removed "other" notes 8 and 9 as they were no longer applicable.
Rev 06 - 12 Apr 2011 - Jeff Moden
- Based on a suggestion by Ron "Bitbucket" McCullough, additional test rows were added to the sample code and
the code was changed to encapsulate the output in pipes so that spaces and empty strings could be perceived
in the output. The first "Notes" section was added. Finally, an extra test was added to the comments above.
Rev 07 - 06 May 2011 - Peter de Heer, a further 15-20% performance enhancement has been discovered and incorporated
into this code which also eliminated the need for a "zero" position in the cteTally table.
**********************************************************************************************************************/
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
Try this one -
DECLARE #itemsGroup VARCHAR(200) = '1:2:3+4:5:6+7:8:9+'
SELECT GroupID, item
FROM
(
SELECT
r = t.c.value('.', 'VARCHAR(200)')
, GroupID = ROW_NUMBER() OVER (ORDER BY 1/0)
FROM (
SELECT txml = CAST('<r>' + REPLACE(#itemsGroup, '+', '</r><r>') + '</r>' AS XML)
) r
CROSS APPLY txml.nodes('/r') t(c)
WHERE t.c.value('.', 'VARCHAR(200)') != ''
) t1
CROSS APPLY
(
SELECT item = t.c.value('.', 'INT')
FROM (
SELECT txml = CAST('<r>' + REPLACE(t1.r, ':', '</r><r>') + '</r>' AS XML)
) r
CROSS APPLY txml.nodes('/r') t(c)
) t2

CSV of IDs to CSV of Values

Say I have a column in a database that consists of a comma separated list of IDs (please don't ask why :( ), i.e. a column like this:
id | ids
----------
1 | 1,3,4
2 | 2
3 | 1,2,5
And a table the ids relate to:
id | thing
---------------
1 | fish
2 | elephant
3 | monkey
4 | mongoose
5 | kiwi
How can I select a comma separated list of the things, based of an id in the first table? For instance, selecting 1 would give me, 'fish,monkey,mongoose', 3 would give me 'fish,elephant,kiwi' etc.?
Thanks!
Try this
SELECT ID, things = STUFF(
(
SELECT ',' + t2.thing
FROM Table2 AS t2
INNER JOIN Table1 AS ti
ON ',' + ti.ids + ',' LIKE '%,' + CONVERT(VARCHAR(12), t2.id) + ',%'
WHERE ti.ID = tout.ID
FOR XML PATH, TYPE
).value('.[1]', 'nvarchar(max)'), 1, 1, '')
FROM Table1 AS tout
ORDER BY ID
SQL FIDDLE DEMO
Basically this will be the whole query:
WITH CTE AS
(
SELECT t1.id, t2.thing
FROM Table1 t1
CROSS APPLY dbo.DelimitedSplit8K(ids,',') x
INNER JOIN Table2 t2 ON x.item = t2.id
)
SELECT DISTINCT id,
STUFF ((SELECT ',' + c1.thing FROM CTE c1
WHERE c1.id = c2.id
FOR XML PATH ('')
),1,1,'')AS things
FROM CTE c2
But first you may notice I have used DelimitedSplit8K function for splitting. It is available from SQLServerCentral - http://www.sqlservercentral.com/articles/Tally+Table/72993/
but I will post the code below. You can use any other splitting function as well, but this one is really good and fast.
Other steps, I have already mentioned in comments. After splitting we JOIN to other tables to get the names and then use STUFF and FOR XML PATH to concatenate names back to one string.
SQLFiddleDEMO
Splitting function:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
/**********************************************************************************************************************
Purpose:
Split a given string at a given delimiter and return a list of the split elements (items).
Notes:
1. Leading a trailing delimiters are treated as if an empty string element were present.
2. Consecutive delimiters are treated as if an empty string element were present between them.
3. Except when spaces are used as a delimiter, all spaces present in each element are preserved.
Returns:
iTVF containing the following:
ItemNumber = Element position of Item as a BIGINT (not converted to INT to eliminate a CAST)
Item = Element value as a VARCHAR(8000)
Statistics on this function may be found at the following URL:
http://www.sqlservercentral.com/Forums/Topic1101315-203-4.aspx
CROSS APPLY Usage Examples and Tests:
--=====================================================================================================================
-- TEST 1:
-- This tests for various possible conditions in a string using a comma as the delimiter. The expected results are
-- laid out in the comments
--=====================================================================================================================
--===== Conditionally drop the test tables to make reruns easier for testing.
-- (this is NOT a part of the solution)
IF OBJECT_ID('tempdb..#JBMTest') IS NOT NULL DROP TABLE #JBMTest
;
--===== Create and populate a test table on the fly (this is NOT a part of the solution).
-- In the following comments, "b" is a blank and "E" is an element in the left to right order.
-- Double Quotes are used to encapsulate the output of "Item" so that you can see that all blanks
-- are preserved no matter where they may appear.
SELECT *
INTO #JBMTest
FROM ( --# & type of Return Row(s)
SELECT 0, NULL UNION ALL --1 NULL
SELECT 1, SPACE(0) UNION ALL --1 b (Empty String)
SELECT 2, SPACE(1) UNION ALL --1 b (1 space)
SELECT 3, SPACE(5) UNION ALL --1 b (5 spaces)
SELECT 4, ',' UNION ALL --2 b b (both are empty strings)
SELECT 5, '55555' UNION ALL --1 E
SELECT 6, ',55555' UNION ALL --2 b E
SELECT 7, ',55555,' UNION ALL --3 b E b
SELECT 8, '55555,' UNION ALL --2 b B
SELECT 9, '55555,1' UNION ALL --2 E E
SELECT 10, '1,55555' UNION ALL --2 E E
SELECT 11, '55555,4444,333,22,1' UNION ALL --5 E E E E E
SELECT 12, '55555,4444,,333,22,1' UNION ALL --6 E E b E E E
SELECT 13, ',55555,4444,,333,22,1,' UNION ALL --8 b E E b E E E b
SELECT 14, ',55555,4444,,,333,22,1,' UNION ALL --9 b E E b b E E E b
SELECT 15, ' 4444,55555 ' UNION ALL --2 E (w/Leading Space) E (w/Trailing Space)
SELECT 16, 'This,is,a,test.' --E E E E
) d (SomeID, SomeValue)
;
--===== Split the CSV column for the whole table using CROSS APPLY (this is the solution)
SELECT test.SomeID, test.SomeValue, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM #JBMTest test
CROSS APPLY dbo.DelimitedSplit8K(test.SomeValue,',') split
;
--=====================================================================================================================
-- TEST 2:
-- This tests for various "alpha" splits and COLLATION using all ASCII characters from 0 to 255 as a delimiter against
-- a given string. Note that not all of the delimiters will be visible and some will show up as tiny squares because
-- they are "control" characters. More specifically, this test will show you what happens to various non-accented
-- letters for your given collation depending on the delimiter you chose.
--=====================================================================================================================
WITH
cteBuildAllCharacters (String,Delimiter) AS
(
SELECT TOP 256
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
CHAR(ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1)
FROM master.sys.all_columns
)
SELECT ASCII_Value = ASCII(c.Delimiter), c.Delimiter, split.ItemNumber, Item = QUOTENAME(split.Item,'"')
FROM cteBuildAllCharacters c
CROSS APPLY dbo.DelimitedSplit8K(c.String,c.Delimiter) split
ORDER BY ASCII_Value, split.ItemNumber
;
-----------------------------------------------------------------------------------------------------------------------
Other Notes:
1. Optimized for VARCHAR(8000) or less. No testing or error reporting for truncation at 8000 characters is done.
2. Optimized for single character delimiter. Multi-character delimiters should be resolvedexternally from this
function.
3. Optimized for use with CROSS APPLY.
4. Does not "trim" elements just in case leading or trailing blanks are intended.
5. If you don't know how a Tally table can be used to replace loops, please see the following...
http://www.sqlservercentral.com/articles/T-SQL/62867/
6. Changing this function to use NVARCHAR(MAX) will cause it to run twice as slow. It's just the nature of
VARCHAR(MAX) whether it fits in-row or not.
7. Multi-machine testing for the method of using UNPIVOT instead of 10 SELECT/UNION ALLs shows that the UNPIVOT method
is quite machine dependent and can slow things down quite a bit.
-----------------------------------------------------------------------------------------------------------------------
Credits:
This code is the product of many people's efforts including but not limited to the following:
cteTally concept originally by Iztek Ben Gan and "decimalized" by Lynn Pettis (and others) for a bit of extra speed
and finally redacted by Jeff Moden for a different slant on readability and compactness. Hat's off to Paul White for
his simple explanations of CROSS APPLY and for his detailed testing efforts. Last but not least, thanks to
Ron "BitBucket" McCullough and Wayne Sheffield for their extreme performance testing across multiple machines and
versions of SQL Server. The latest improvement brought an additional 15-20% improvement over Rev 05. Special thanks
to "Nadrek" and "peter-757102" (aka Peter de Heer) for bringing such improvements to light. Nadrek's original
improvement brought about a 10% performance gain and Peter followed that up with the content of Rev 07.
I also thank whoever wrote the first article I ever saw on "numbers tables" which is located at the following URL
and to Adam Machanic for leading me to it many years ago.
http://sqlserver2000.databases.aspfaq.com/why-should-i-consider-using-an-auxiliary-numbers-table.html
-----------------------------------------------------------------------------------------------------------------------
Revision History:
Rev 00 - 20 Jan 2010 - Concept for inline cteTally: Lynn Pettis and others.
Redaction/Implementation: Jeff Moden
- Base 10 redaction and reduction for CTE. (Total rewrite)
Rev 01 - 13 Mar 2010 - Jeff Moden
- Removed one additional concatenation and one subtraction from the SUBSTRING in the SELECT List for that tiny
bit of extra speed.
Rev 02 - 14 Apr 2010 - Jeff Moden
- No code changes. Added CROSS APPLY usage example to the header, some additional credits, and extra
documentation.
Rev 03 - 18 Apr 2010 - Jeff Moden
- No code changes. Added notes 7, 8, and 9 about certain "optimizations" that don't actually work for this
type of function.
Rev 04 - 29 Jun 2010 - Jeff Moden
- Added WITH SCHEMABINDING thanks to a note by Paul White. This prevents an unnecessary "Table Spool" when the
function is used in an UPDATE statement even though the function makes no external references.
Rev 05 - 02 Apr 2011 - Jeff Moden
- Rewritten for extreme performance improvement especially for larger strings approaching the 8K boundary and
for strings that have wider elements. The redaction of this code involved removing ALL concatenation of
delimiters, optimization of the maximum "N" value by using TOP instead of including it in the WHERE clause,
and the reduction of all previous calculations (thanks to the switch to a "zero based" cteTally) to just one
instance of one add and one instance of a subtract. The length calculation for the final element (not
followed by a delimiter) in the string to be split has been greatly simplified by using the ISNULL/NULLIF
combination to determine when the CHARINDEX returned a 0 which indicates there are no more delimiters to be
had or to start with. Depending on the width of the elements, this code is between 4 and 8 times faster on a
single CPU box than the original code especially near the 8K boundary.
- Modified comments to include more sanity checks on the usage example, etc.
- Removed "other" notes 8 and 9 as they were no longer applicable.
Rev 06 - 12 Apr 2011 - Jeff Moden
- Based on a suggestion by Ron "Bitbucket" McCullough, additional test rows were added to the sample code and
the code was changed to encapsulate the output in pipes so that spaces and empty strings could be perceived
in the output. The first "Notes" section was added. Finally, an extra test was added to the comments above.
Rev 07 - 06 May 2011 - Peter de Heer, a further 15-20% performance enhancement has been discovered and incorporated
into this code which also eliminated the need for a "zero" position in the cteTally table.
**********************************************************************************************************************/
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Try this one -
Query:
DECLARE #temp TABLE (id INT, ids NVARCHAR(50))
INSERT INTO #temp (id, ids)
VALUES (1, '1,3,4'), (2, '2'), (3, '1,2,5')
DECLARE #thing TABLE (id INT, thing NVARCHAR(50))
INSERT INTO #thing (id, thing)
VALUES (1, 'fish'), (2, 'elephant'), (3, 'monkey'), (4, 'mongoose'), (5, 'kiwi')
;WITH cte AS (
SELECT t.id, t2.thing
FROM (
SELECT
id = t.c.value('#n', 'INT')
, token = t.c.value('#s', 'NVARCHAR(50)')
FROM (
SELECT field = CAST('<t s = "' +
REPLACE(
t.ids + ','
, ','
, '" n = "' + CAST(t.id AS VARCHAR(10))
+ '" /><t s = "') + '" />' AS XML)
FROM #temp t
) d
CROSS APPLY field.nodes('/t') t(c)
WHERE t.c.exist('#n') = 1
) t
JOIN #thing t2 ON t.token = t2.id
)
SELECT id, things = STUFF((
SELECT ', ' + t2.thing
FROM cte t2
WHERE t2.id = t.id
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 2, '')
FROM #temp t
Results:
id things
----------- --------------------------
1 fish, monkey, mongoose
2 elephant
3 fish, elephant, kiwi
First, read this: http://www.sommarskog.se/arrays-in-sql.html
One way which works even in sql-server 2005 and below(?) is using this Split function:
CREATE FUNCTION Split
(
#ItemList NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #IDTable TABLE (Item VARCHAR(50))
AS
BEGIN
DECLARE #tempItemList NVARCHAR(MAX)
SET #tempItemList = #ItemList
DECLARE #i INT
DECLARE #Item NVARCHAR(4000)
SET #tempItemList = REPLACE (#tempItemList, ' ', '')
SET #i = CHARINDEX(#delimiter, #tempItemList)
WHILE (LEN(#tempItemList) > 0)
BEGIN
IF #i = 0
SET #Item = #tempItemList
ELSE
SET #Item = LEFT(#tempItemList, #i - 1)
INSERT INTO #IDTable(Item) VALUES(#Item)
IF #i = 0
SET #tempItemList = ''
ELSE
SET #tempItemList = RIGHT(#tempItemList, LEN(#tempItemList) - #i)
SET #i = CHARINDEX(#delimiter, #tempItemList)
END
RETURN
END
Now this query works:
Declare #firstID int
SET #firstID = 1
SELECT a.id, a.thing as Animal
FROM dbo.Animals a
WHERE id IN(
SELECT Item
FROM dbo.Split((SELECT TOP 1 ids FROM dbo.Things WHERE id=#firstID), ',')
)
Demo