tsql function split string - sql

I wonder if anyone can help me.
I need a tsql function to split a given value such as:
1) 00 Not specified
3) 01-05 Global WM&BB | Operations
2) 02-05-01 Global WM&BB | Operations | Operations n/a
I need to get a result like this:
cat1 cat1descr cat2 cat2descr cat3 cat3descr
----------------------------------------------------------------
00 Not especified null null null null
01 Global WM&BB 05 Operations null null
01 Global WM&BB 05 Operations 01 Operations n/a
Result will have always 6 columns
select funcX('00 Not specified');
cat1 cat1descr cat2 cat2descr cat3 cat3descr
----------------------------------------------------------------
00 Not especified null null null null

This will work on SQL Server 2005 and SQL Server 2008. I have assumed that your first sequence of digits is fixed to 2-digit groups of 1, 2, or 3. You can do this with fewer cascading CTEs but I find the SUBSTRING/CHARINDEX/LEN syntax can quickly become very difficult to read and debug.
DECLARE #foo TABLE
(
bar VARCHAR(4000)
);
INSERT #foo(bar) SELECT '00 Not specified'
UNION ALL SELECT '01-05 Global WM&BB | Operations'
UNION ALL SELECT '02-05-01 Global WM&BB | Operations | Operations n/a';
WITH split1 AS
(
SELECT
n = SUBSTRING(bar, 1, CHARINDEX(' ', bar)-1),
w = SUBSTRING(bar, CHARINDEX(' ', bar)+1, LEN(bar)),
rn = ROW_NUMBER() OVER (ORDER BY bar)
FROM
#foo
),
split2 AS
(
SELECT
rn,
cat1 = LEFT(n, 2),
wl = RTRIM(SUBSTRING(w, 1,
COALESCE(NULLIF(CHARINDEX('|', w), 0)-1, LEN(w)))),
wr = LTRIM(SUBSTRING(w, NULLIF(CHARINDEX('|', w),0) + 1, LEN(w))),
cat2 = NULLIF(SUBSTRING(n, 4, 2), ''),
cat3 = NULLIF(SUBSTRING(n, 7, 2), '')
FROM
split1
),
split3 AS
(
SELECT
rn,
cat1descr = wl,
cat2descr = RTRIM(SUBSTRING(wr, 1,
COALESCE(NULLIF(CHARINDEX('|', wr), 0)-1, LEN(wr)))),
cat3descr = LTRIM(SUBSTRING(wr,
NULLIF(CHARINDEX('|', wr),0) + 1, LEN(wr)))
FROM
split2
)
SELECT
s2.cat1, s3.cat1descr,
s2.cat2, s3.cat2descr,
s2.cat3, s3.cat3descr
FROM split2 AS s2
INNER JOIN split3 AS s3
ON s2.rn = s3.rn;

You can do this using PatIndex and SubString

If #In is the value of the string you're trying to parse, then try this:
Select
Case When firstDash = 0 Then zz.sKey
Else left(zz.sKey, FirstDash-1) End cat1,
Ltrim(RTrim(Case When firstPipe = 0 Then zz.Vals
Else Left(zz.Vals, firstPipe -1) End)) ca1Desc,
Case When firstDash = 0 Then Null
When secondDash = 0
Then SubString(zz.sKey, FirstDash+1, Len(zz.skey))
Else SubString(zz.sKey, FirstDash+1, secondDash-firstDash-1) End cat2,
Ltrim(RTrim(Case When firstPipe = 0 Then Null
When secondPipe = 0
Then SubString(zz.Vals, firstPipe+1, Len(zz.Vals))
Else SubString(zz.Vals, firstPipe+1,
secondPipe-firstPipe-1) End)) cat2Desc,
Case When secondDash > 0
Then Substring(zz.sKey, secondDash+1, len(sKey)-seconddash) End cat3,
Ltrim(RTrim(Case When secondPipe > 0
Then Substring(zz.Vals, secondPipe+1,
len(Vals)-secondPipe) End)) cat3Desc
From (Select Z.sKey, Z.Vals,
charIndex('-', Z.skey) firstDash,
charIndex('-', Z.skey, 1 + charIndex('-', Z.skey)) secondDash,
charIndex('|', Z.Vals) firstPipe,
charIndex('|', Z.Vals, 1 + charIndex('|', Z.Vals)) secondPipe
From (Select Left(#In, CharIndex(' ', #In)-1) skey,
substring(#In, CharIndex(' ', #In)+ 1, Len(#In)) vals) Z) ZZ

Related

How to get 3rd string part with CharIndex/SubString [duplicate]

This question already has answers here:
Using T-SQL, return nth delimited element from a string
(14 answers)
Closed 3 years ago.
everyone I'm trying to separate a name column into 4 different parts. As of right now all the name parts are separated by spaces ' '. I am having trouble with my #thirdString populating the fourth part of the name(usually a suffix) which I want to be considered the #fourthString. I will be running this with different names of different lengths. I'm just using Robert Dobson Bud jr as an example. Other names could be two parts or more.
-- Code for parsing a name with multiple parts
-- You should be able to copy and paste this into any MS-SQL Environment it doesn't use a certain table.
DECLARE #nameString as varchar(max),
#firstSpaceLoc as smallint,
#secondSpaceLoc as smallint,
#thirdSpaceLoc as smallint,
#forthSpaceLoc as smallint,
#firstString as varchar(max),
#secondString as varchar(max),
#thirdString as varchar(max),
#fourthString as varchar(max)
-- Create some type of loop or case statement to run through the entire table.
SET #nameString = 'Robert Dobson Bud jr'
SET #firstSpaceLoc = CHARINDEX(' ',#namestring,1)
SET #secondSpaceLoc = CHARINDEX(' ', #namestring, CHARINDEX(' ',#nameString,1)+1)
SET #thirdSpaceLoc =
CASE
WHEN CHARINDEX(' ',
#namestring,
CHARINDEX(' ',#nameString,1)+1) = 0 THEN 0
WHEN CHARINDEX(' ',
#namestring,
CHARINDEX(' ',#nameString,1)+1) > 0 THEN
CHARINDEX(' ', #namestring,
CHARINDEX(' ', #namestring,
CHARINDEX(' ',#nameString,1)+1)+1)
END
SET #forthSpaceLoc =
CASE
WHEN CHARINDEX(' ',
#namestring,
CHARINDEX(' ',#nameString,1)+1) = 0 THEN 0
WHEN CHARINDEX(' ',
#namestring,
CHARINDEX(' ',#nameString,1)+1) > 0 THEN 0
WHEN CHARINDEX(' ',
#namestring,
CHARINDEX(' ',#nameString,1)+1) > 0 THEN
CHARINDEX(' ',
#namestring,
CHARINDEX(' ', #namestring,
CHARINDEX(' ', #nameString,
CHARINDEX(' ',#nameString,1)+1)+1)+1)
END
SELECT
#firstString =
CASE
WHEN #firstSpaceLoc > 0 THEN LEFT(#nameString,CHARINDEX(' ',#namestring,1)-1)
ELSE #nameString
END,
#secondString =
CASE
WHEN #firstSpaceLoc = 0 THEN ''
WHEN #secondSpaceLoc = 0 THEN
RIGHT(#namestring, LEN(#namestring)- CHARINDEX(' ',#namestring,1))
WHEN #secondSpaceLoc > 0 THEN
REPLACE (
SUBSTRING (
#nameString, CHARINDEX(' ',#namestring,1)+1, CHARINDEX(' ', #namestring, CHARINDEX(' ',#nameString,1)+1)
- CHARINDEX(' ',#namestring,1)),' ',''
)
ELSE ''
END,
#thirdString =
CASE
WHEN #firstSpaceLoc = 0 OR #secondSpaceLoc = 0 THEN ''
WHEN #secondSpaceLoc > 0 THEN
SUBSTRING (
#nameString,
CHARINDEX(' ', #namestring,
CHARINDEX(' ',#nameString,1)+1),
LEN(#nameString)
)
END,
#fourthString =
CASE
WHEN #firstSpaceLoc = 0 OR #secondSpaceLoc = 0 OR #thirdSpaceLoc = 0 THEN ''
WHEN #secondSpaceLoc > 0 AND #thirdSpaceLoc = 0 THEN ''
WHEN #thirdSpaceLoc > 0 THEN
SUBSTRING(
#nameString,
CHARINDEX(' ', #namestring,
CHARINDEX(' ', #namestring,
CHARINDEX(' ',#nameString,1)+1)+1),
LEN(#nameString)
)
END
-- Report names
SELECT
#nameString sourceString,
#firstString [First string],
#secondString [Second string],
#thirdString [Third string],
#fourthString [Fourth String]
I would like to get rid of the jr in the 3rd column. The intention is to have 4 different columns with 4 different parts of the name.
This script will do the job
DECLARE #namestring as varchar(max)
SET #namestring = 'Robert Dobson Bud jr'
--SET #namestring = 'Robert Dobson'
;with cte as (
select cast(0 as int) [start],CHARINDEX(' ',#namestring,0) [end] ,#namestring namestring
union all
select cast(cte.[end] as int) [start],CHARINDEX(' ',#namestring,cte.[end]+1) [end] ,#namestring namestring from cte where [end]>0
),cte2 as (
select * ,ROW_NUMBER() over (order by cte.[start]) seq
,substring(#namestring,cte.[start]+1,(case when cte.[end]=0 then len(#namestring)+1 else cte.[end] end)-cte.[start]-1) part from cte
)
select
(select part from cte2 where seq=1) [First String]
,(select part from cte2 where seq=2) [Second String]
,(select part from cte2 where seq=3) [Third String]
,(select part from cte2 where seq=4) [Fourt String]
for 4 part name result will be as below
First String Second String Third String Fourt String
Robert Dobson Bud jr
for 2 part name result will be as below
First String Second String Third String Fourt String
Robert Dobson NULL NULL
The reason why you're getting "jr" in the third string is somewhat mystifying. It's in this part of the code:
#thirdString = CASE
WHEN #firstSpaceLoc = 0 OR #secondSpaceLoc = 0 THEN ''
WHEN #secondSpaceLoc > 0 THEN
SUBSTRING (
#nameString,
CHARINDEX(' ', #namestring,
CHARINDEX(' ',#nameString,1)+1),
LEN(#nameString)
)
Why are you using LEN(#nameString) for the third parameter of the SUBSTRING? Of course that will return the rest of the string, including the "Jr". You clearly knew not to do it that way when getting the #secondString value, how could you not know to do it that way when getting the #thirdString?
To get the #thirdString you need to use the same technique that you used for getting the #secondString.
Does this what you want?
DECLARE #Str VARCHAR(45) = 'Robert Dobson Bud jr';
WITH CTE AS
(
SELECT Value V,
'Str' + CAST(ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS VARCHAR(10)) RN
FROM STRING_SPLIT(#Str, ' ')
)
SELECT *
FROM
(
SELECT *
FROM CTE
) X
PIVOT
(
MAX(V) FOR RN IN ([Str1], [Str2], [Str3], [Str4])
) P;
Returns:
+--------+--------+------+------+
| Str1 | Str2 | Str3 | Str4 |
+--------+--------+------+------+
| Robert | Dobson | Bud | jr |
+--------+--------+------+------+
Live Demo
Using a splitting function, this can be arranged very simply.
SELECT firstString = MAX(CASE WHEN ItemNumber = 1 THEN Item END),
secondString = MAX(CASE WHEN ItemNumber = 2 THEN Item END),
thirdString = MAX(CASE WHEN ItemNumber = 3 THEN Item END),
fourthString = MAX(CASE WHEN ItemNumber = 4 THEN Item END)
FROM dbo.DelimitedSplit8K_LEAD( #nameString, ' ');
The code of the function was initially published and explained here. But I'm copying the definition.
CREATE FUNCTION [dbo].[DelimitedSplit8K_LEAD]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Tableā€ produces values from 0 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#pString,t.N,1) = #pDelimiter OR t.N = 0)
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),
Item = SUBSTRING(#pString,s.N1,ISNULL(NULLIF((LEAD(s.N1,1,1) OVER (ORDER BY s.N1) - 1),0)-s.N1,8000))
FROM cteStart s
;

Select comma separated string depending on existing columns

I have a table in my database which have a set of bit value columns. I want to insert a comma separated string in a new column, depending on the values of these bit columns.
Lets say the columns are named:
c1,c2,c3,c4
If the value in a column is equal to 1, I want to include the string mapped as following:
c1: 'campaign1'
c2: 'campaign2'
c3: 'campaign3'
c4: 'campaign4'
So for exampel, if a row contains the following values:
c1 = 1, c2 = 0, c3 = 1, c4 = 0
I want to insert the following in a new column named 'Campaigns'
'campaign1,campaign3'
Any suggestions on how I can accomplish this?
I would do this using stuff() and some string manipulation:
select t.*,
stuff( ((case when c1 = 1 then ',campaign1' else '' end) +
(case when c2 = 1 then ',campaign2' else '' end) +
(case when c3 = 1 then ',campaign3' else '' end) +
(case when c4 = 1 then ',campaign4' else '' end)
), 1, 1, ''
) as campaigns
from t;
This works, but it might not best the best solution:
DECLARE #table TABLE (id INT, c1 INT, c2 INT, c3 INT, c4 INT);
INSERT INTO #table
SELECT 1, 1, 0, 1, 0
UNION ALL
SELECT 2, 0, 0, 1, 1;
WITH cte AS (
SELECT
id,
CASE WHEN c1 = 1 THEN 'campaign1,' ELSE '' END +
CASE WHEN c2 = 1 THEN 'campaign2,' ELSE '' END +
CASE WHEN c3 = 1 THEN 'campaign3,' ELSE '' END +
CASE WHEN c4 = 1 THEN 'campaign4,' ELSE '' END AS campaigns
FROM
#table)
SELECT
id,
CASE
WHEN LEN(campaigns) > 0 THEN LEFT(campaigns, LEN(campaigns) - 1)
ELSE ''
END AS campaigns
FROM
cte;
Gordon beat me to the STUFF method, which I was hastily writing when I realised this was better, but here's an answer that also shows how the UPDATE might work, as this is what you originally asked for:
DECLARE #table TABLE (id INT, c1 INT, c2 INT, c3 INT, c4 INT, campaigns VARCHAR(512));
INSERT INTO #table
SELECT 1, 1, 0, 1, 0, NULL
UNION ALL
SELECT 2, 0, 0, 1, 1, NULL;
UPDATE
t
SET
campaigns = STUFF(CASE WHEN c1 = 1 THEN ',campaign1' ELSE '' END +
CASE WHEN c2 = 1 THEN ',campaign2' ELSE '' END +
CASE WHEN c3 = 1 THEN ',campaign3' ELSE '' END +
CASE WHEN c4 = 1 THEN ',campaign4' ELSE '' END, 1, 1, '')
FROM
#table t;
SELECT * FROM #table;
Results:
id c1 c2 c3 c4 campaigns
1 1 0 1 0 campaign1,campaign3
2 0 0 1 1 campaign3,campaign4
Just to give another option, you can use concat and replace also
DECLARE #table TABLE (id INT, c1 INT, c2 INT, c3 INT, c4 INT)
INSERT INTO #table
SELECT 1, 1, 0, 1, 0 UNION ALL SELECT 2, 0, 0, 1, 1
select t.*,
replace(replace(c1, '1', 'Campaing1,'), '0', '') +
replace(replace(c2, '1', 'Campaing2,'), '0', '') +
replace(replace(c3, '1', 'Campaing3,'), '0', '') +
replace(replace(c4, '1', 'Campaing4,'), '0', '')
from #table t
The result is
id c1 c2 c3 c4 COLUMN1
-- -- -- -- -- -------
1 1 0 1 0 Campaing1,Campaing3,
2 0 0 1 1 Campaing3,Campaing4,
But I would go for Gordon's answer
You may use IIF function ( provided you're using SQL Server 2012+ ) as in the following :
select IIF(c1=1, 'Campaign1', '')+IIF(c2=1 and c1>0,',','')+
IIF(c2=1, 'Campaign2', '')+IIF(c3=1 and c1+c2>0,',','')+
IIF(c3=1, 'Campaign3', '')+IIF(c4=1 and c1+c2+c3>0,',','')+
IIF(c4=1, 'Campaign4', '') as 'Campaigns'
from tab t;
SQL Fiddle Demo

Like operator SQL

I have a data set (approx 900k lines) where I need to split a data based on a '(' or ')'. For Example
Table A data:-
> Vendor Is_Active
ABC(1263) 1
efgh (187 1
pqrs 890ag) 1
xyz 1
lmno(488) 1
(9867-12) 1
Output
ID Name
1263 ABC
187 efgh
890ag pqrs
xyz
488 lmno
9867-12
I tried query
SELECT
vendor,
CASE WHEN vendor LIKE '%(%' OR vendor LIKE '%)%'
THEN REPLACE(REPLACE(RIGHT(Vendor, charindex(' ', reverse(vendor)) - 1),'(',''),')','')
END AS 'test'
FROM
tableA
Error :- Msg 536, Level 16, State 4, Line 13 Invalid length parameter
passed to the RIGHT function.
You can remove chars ( and ) then search for number occurrence. Check this query
declare #t table (
vendor varchar(100)
)
insert into #t values
('ABC(1263)')
,('efgh (187')
,('pqrs 890ag)')
,('xyz')
,('lmno(488)')
,('(9867-12)')
select
ID = case when p = 0 then '' else substring(v, p, len(v)) end
, Name = case when p = 0 then v else left(v, p - 1) end
from
#t
cross apply (select v = replace(replace(vendor, '(', ''), ')', '')) q1
cross apply (select p = patindex('%[0-9]%', v)) q2
Output
ID Name
---------------
1263 ABC
187 efgh
890ag pqrs
xyz
488 lmno
9867-12
Hmmm. I'm thinking:
select v.*, v2.name,
replace(stuff(v.x, 1, len(v2.name) + 1, ''), ')', '') as id
from (values ('ABC(1263)'), ('abc'), ('(1234)')) v(x) cross apply
(values (left(v.x, charindex('(', v.x + '(') - 1))) v2(name);
I find apply useful for repetitive string operations.
SELECT
(CASE WHEN Vendor LIKE '%(%)' THEN SUBSTRING(Vendor,CHARINDEX('(',Vendor)+1,CHARINDEX(')',Vendor)-CHARINDEX('(',Vendor)-1)
WHEN Vendor LIKE '%(%' THEN SUBSTRING(Vendor,CHARINDEX('(',Vendor)+1,LEN(Vendor))
WHEN Vendor LIKE '%)%' THEN SUBSTRING(Vendor,CHARINDEX(' ',Vendor)+1,(CHARINDEX(')',Vendor)-CHARINDEX(' ',Vendor))-1)
ELSE ''
END )AS ID ,
(CASE WHEN Vendor LIKE '%(%)' THEN SUBSTRING(Vendor,1,CHARINDEX('(',Vendor)-1)
WHEN Vendor LIKE '%(%' THEN SUBSTRING(Vendor,1,CHARINDEX('(',Vendor)-1)
WHEN Vendor LIKE '%)%' THEN SUBSTRING(Vendor,1,CHARINDEX(' ',Vendor))
ELSE Vendor END ) AS Name
FROM Table A

Converting multiple delimited fields into rows in SQL Server

I have a data source which contains data in delimited fields which exist in a staging area in SQL Server. I'd like to transform this data into many rows so it is easier to work with. This differs from the numerous other questions and answers on similar topics in that I have multiple fields where this delimited data exists. Here is an example of what my data looks like:
ID | Field | Value
---+-------+------
1 | a,b,c | 1,2,3
2 | a,c | 5,2
And this is the desired output:
ID | Field | Value
---+-------+------
1 | a | 1
1 | b | 2
1 | c | 3
2 | a | 5
2 | c | 2
My code so far uses the XML parsing method like the one mentioned here: Turning a Comma Separated string into individual rows I needed to extend it to join each field to its corresponding value which I have done by generating a row_number for each ID and then matching based on the ID and this row_number.
My issue is that it is painfully slow so I wondered if anyone has any more performant methods?
select
[Value].ID, [Field], [Value]
from
(select
A.ID, Split.a.value('.', 'varchar(100)') as [Value],
row_number() over (partition by ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Value], ',', '</M><M>') + '</M>' as xml) as [Value]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Value].nodes ('/M') as Split(a)
) [Value]
inner join
(
select
A.ID, Split.a.value('.', 'varchar(100)') as [Field],
row_number() over (partition by A.ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Field], ',', '</M><M>') + '</M>' as xml) as [Field]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Field].nodes ('/M') as Split(a)
) [Field] on [Value].ID = [Field].ID and [Value].RowNumber = [Field].RowNumber
Here is an approach using the splitter from Jeff Moden. http://www.sqlservercentral.com/articles/Tally+Table/72993/ One nice feature of that splitter is that it returns the ordinal position of each element so you can use it for joins and such.
Starting with some data.
declare #Something table
(
ID int
, Field varchar(50)
, Value varchar(50)
)
insert #Something values
(1, 'a,b,c', '1,2,3')
, (2, 'a,c', '5,2')
;
Since you have two sets of delimited data you will be forced to split this for each set of delimited values. Here is how you can leverage this splitter to accomplish this.
with Fields as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Field, ',') f
)
, Value as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Value, ',') v
)
select f.ID
, Field = f.Item
, Value = v.Item
from Fields f
join Value v on v.ItemNumber = f.ItemNumber and v.ID = f.ID
If at all possible it would be best to see if you can change whatever process it is that is populating your source data so it is normalized and not delimited because it is a pain to work with.
Basing on #Gordon Linoff s query here another recursive cte:
DECLARE #t TABLE(
ID int
,Field VARCHAR(MAX)
,Value VARCHAR(MAX)
)
INSERT INTO #t VALUES
(1, 'a,b,c', '1,2,3')
,(2, 'a,c', '5,2')
,(3, 'x', '7');
with cte as (
select ID
,SUBSTRING(Field, 1, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)-1 ELSE LEN(Field) END) AS Field
,SUBSTRING(Value, 1, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)-1 ELSE LEN(Value) END) AS Value
,SUBSTRING(Field, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)+1 ELSE 1 END, LEN(Field)-CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field) ELSE 0 END) as field_list
,SUBSTRING(Value, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)+1 ELSE 1 END, LEN(Value)-CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value) ELSE 0 END) as value_list
,0 as lev
from #t
WHERE CHARINDEX(',', Field) > 0
UNION ALL
select ID
,SUBSTRING(field_list, 1, CASE WHEN CHARINDEX(',', field_list) > 0 THEN CHARINDEX(',', field_list)-1 ELSE LEN(field_list) END) AS Field
,SUBSTRING(value_list, 1, CASE WHEN CHARINDEX(',', value_list) > 0 THEN CHARINDEX(',', value_list)-1 ELSE LEN(value_list) END) AS Value
,CASE WHEN CHARINDEX(',', field_list) > 0 THEN SUBSTRING(field_list, CHARINDEX(',', field_list)+1, LEN(field_list)-CHARINDEX(',', field_list)) ELSE '' END as field_list
,CASE WHEN CHARINDEX(',', value_list) > 0 THEN SUBSTRING(value_list, CHARINDEX(',', value_list)+1, LEN(value_list)-CHARINDEX(',', value_list)) ELSE '' END as value_list
,lev + 1
from cte
WHERE LEN(field_list) > 0
)
select ID, Field, Value
from cte
UNION ALL
SELECT ID, Field, Value
FROM #t
WHERE CHARINDEX(',', Field) = 0
ORDER BY ID, Field
OPTION (MAXRECURSION 0)
One method is a recursive CTE:
with cte as (
select id, cast(NULL as varchar(max)) as field, cast(NULL as varchar(max)) as value, field as field_list, value as value_list, 0 as lev
from t
union all
select id, left(field_list, charindex(',', field_list + ',') - 1),
left(value_list, charindex(',', value_list + ',') - 1),
substring(field_list, charindex(',', field_list + ',') + 1, len(field_list)),
substring(value_list, charindex(',', value_list + ',') + 1, len(value_list)),
1 + lev
from cte
where field_list <> '' and value_list <> ''
)
select *
from cte
where lev > 0;
Here is an example of how it works.

Check anagrams using sql server

ACT and CAT are anagrams
I have to Write a function in sql server that takes 2 strings and given a Boolean output that indicates whether the both of them are anagram or not.
This doesnt make sense to do it in sql server,but,it is for learning purpose only
SQL Server is not good at this kind of things, but here you are:
WITH Src AS
(
SELECT * FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
), Numbered AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) Num
FROM Src
), Splitted AS
(
SELECT Num, W1 Word1, W2 Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Numbered
UNION ALL
SELECT Num, Word1, Word2, LEFT(W1, 1) L1, LEFT(W2, 1) L2, SUBSTRING(W1, 2, LEN(W1)) W1, SUBSTRING(W2, 2, LEN(W2)) W2
FROM Splitted
WHERE LEN(W1)>0 AND LEN(W2)>0
), SplitOrdered AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L1) LNum1,
ROW_NUMBER() OVER (PARTITION BY Num ORDER BY L2) LNum2
FROM Splitted
)
SELECT S1.Num, S1.Word1, S1.Word2, CASE WHEN COUNT(*)=LEN(S1.Word1) AND COUNT(*)=LEN(S1.Word2) THEN 1 ELSE 0 END Test
FROM SplitOrdered S1
JOIN SplitOrdered S2 ON S1.L1=S2.L2 AND S1.Num=S2.Num AND S1.LNum1=S2.LNum2
GROUP BY S1.Num, S1.Word1, S1.Word2
And results:
1 CAT ACT 1
2 CAR RAC 1
3 BUZ BUS 0
4 FUZZY MUZZY 0
5 PACK PACKS 0
6 AA AA 1
7 ABCDEFG GFEDCBA 1
First split (T-SQL Split Word into characters) both words into temporary tables. Then perform an outer join and check for nulls.
Edit thanks to George's comment:
split (T-SQL Split Word into characters) both words into temporary tables
Modify temporary tables or use CTEs to add a column with count(*) with group by letters clause
Perform a full outer join on two temporary tables using a letter and it's count in join condition
Check for nulls in the output - if there are none, you have an anagram
The first get in my mind:
DECLARE #word1 nvarchar(max) = NULL,
#word2 nvarchar(max) = 'Test 1',
#i int = 0, #n int
DECLARE #table TABLE (
id int,
letter int
)
SELECT #word1 = ISNULL(LOWER(#word1),''), #word2 = ISNULL(LOWER(#word2),'')
SELECT #n = CASE WHEN LEN(#word1) > LEN(#word2) THEN LEN(#word1) ELSE LEN(#word2) END
WHILE #n > 0
BEGIN
INSERT INTO #table
SELECT 1, ASCII(SUBSTRING(#word1,#n,1))
UNION ALL
SELECT 2, ASCII(SUBSTRING(#word2,#n,1))
SET #n=#n-1
END
SELECT CASE WHEN COUNT(*) = 0 THEN 1 ELSE 0 END isAnagram
FROM (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 1
GROUP BY id, letter)as t
FULL OUTER JOIN (
SELECT id, letter, COUNT(letter) as c
FROM #table
WHERE id = 2
GROUP BY id, letter) as p
ON t.letter = p.letter and t.c =p.c
WHERE t.letter is NULL OR p.letter is null
Output:
isAnagram
0
You can also use loops in functions, and they can work fast. I am not able to get any of the of other answers even close to the performance of this function:
CREATE FUNCTION IsAnagram
(
#value1 VARCHAR(255)
, #value2 VARCHAR(255)
)
RETURNS BIT
BEGIN
IF(LEN(#value1) != LEN(#value2))
RETURN 0;
DECLARE #firstChar VARCHAR(3);
WHILE (LEN(#value1) > 0)
BEGIN
SET #firstChar = CONCAT('%', LEFT(#value1, 1), '%');
IF(PATINDEX(#firstChar, #value2) > 0)
SET #value2 = STUFF(#value2, PATINDEX(#firstChar, #value2), 1, '');
ELSE
RETURN 0;
SET #value1 = STUFF(#value1, 1, 1, '');
END
RETURN (SELECT IIF(#value2 = '', 1, 0));
END
GO
SELECT dbo.IsAnagram('asd', 'asd')
--1
SELECT dbo.IsAnagram('asd', 'dsa')
--1
SELECT dbo.IsAnagram('assd', 'dsa')
--0
SELECT dbo.IsAnagram('asd', 'dssa')
--0
SELECT dbo.IsAnagram('asd', 'asd')
This is something a numbers table can help with.
Code to create and populate a small numbers table is below.
CREATE TABLE dbo.Numbers
(
Number INT PRIMARY KEY
);
WITH Ten(N) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
INSERT INTO dbo.Numbers
SELECT ROW_NUMBER() OVER (ORDER BY ##SPID) AS Number
FROM Ten T10,
Ten T100,
Ten T1000
Once that is in place you can use
SELECT W1,
W2,
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT SUBSTRING(W1, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W1)
GROUP BY SUBSTRING(W1, Number, 1)
EXCEPT
SELECT SUBSTRING(W2, Number, 1),
COUNT(*)
FROM dbo.Numbers
WHERE Number <= LEN(W2)
GROUP BY SUBSTRING(W2, Number, 1))
THEN 0
ELSE 1
END
END
FROM (VALUES
('CAT', 'ACT'),
('CAR', 'RAC'),
('BUZ', 'BUS'),
('FUZZY', 'MUZZY'),
('PACK', 'PACKS'),
('AA', 'AA'),
('ABCDEFG', 'GFEDCBA')) T(W1, W2)
Or an alternative implementation could be
IsAnagram = CASE
WHEN LEN(W1) <> LEN(W2)
THEN 0
ELSE
CASE
WHEN EXISTS (SELECT 1
FROM dbo.Numbers N
CROSS APPLY (VALUES(1,W1),
(2,W2)) V(Col, String)
WHERE N.Number <= LEN(W1)
GROUP BY SUBSTRING(String, Number, 1)
HAVING COUNT(CASE WHEN Col = 1 THEN 1 END) <>
COUNT(CASE WHEN Col = 2 THEN 1 END))
THEN 0
ELSE 1
END
END