Converting multiple delimited fields into rows in SQL Server - sql

I have a data source which contains data in delimited fields which exist in a staging area in SQL Server. I'd like to transform this data into many rows so it is easier to work with. This differs from the numerous other questions and answers on similar topics in that I have multiple fields where this delimited data exists. Here is an example of what my data looks like:
ID | Field | Value
---+-------+------
1 | a,b,c | 1,2,3
2 | a,c | 5,2
And this is the desired output:
ID | Field | Value
---+-------+------
1 | a | 1
1 | b | 2
1 | c | 3
2 | a | 5
2 | c | 2
My code so far uses the XML parsing method like the one mentioned here: Turning a Comma Separated string into individual rows I needed to extend it to join each field to its corresponding value which I have done by generating a row_number for each ID and then matching based on the ID and this row_number.
My issue is that it is painfully slow so I wondered if anyone has any more performant methods?
select
[Value].ID, [Field], [Value]
from
(select
A.ID, Split.a.value('.', 'varchar(100)') as [Value],
row_number() over (partition by ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Value], ',', '</M><M>') + '</M>' as xml) as [Value]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Value].nodes ('/M') as Split(a)
) [Value]
inner join
(
select
A.ID, Split.a.value('.', 'varchar(100)') as [Field],
row_number() over (partition by A.ID order by Split.a) as RowNumber
from
(select
ID, cast('<M>' + replace([Field], ',', '</M><M>') + '</M>' as xml) as [Field]
from
#source_table
where
[Field] not like '%[<>&%]%' and [Value] not like '%[<>&%]%') as A
cross apply
[Field].nodes ('/M') as Split(a)
) [Field] on [Value].ID = [Field].ID and [Value].RowNumber = [Field].RowNumber

Here is an approach using the splitter from Jeff Moden. http://www.sqlservercentral.com/articles/Tally+Table/72993/ One nice feature of that splitter is that it returns the ordinal position of each element so you can use it for joins and such.
Starting with some data.
declare #Something table
(
ID int
, Field varchar(50)
, Value varchar(50)
)
insert #Something values
(1, 'a,b,c', '1,2,3')
, (2, 'a,c', '5,2')
;
Since you have two sets of delimited data you will be forced to split this for each set of delimited values. Here is how you can leverage this splitter to accomplish this.
with Fields as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Field, ',') f
)
, Value as
(
select *
from #Something s
cross apply dbo.DelimitedSplit8K(s.Value, ',') v
)
select f.ID
, Field = f.Item
, Value = v.Item
from Fields f
join Value v on v.ItemNumber = f.ItemNumber and v.ID = f.ID
If at all possible it would be best to see if you can change whatever process it is that is populating your source data so it is normalized and not delimited because it is a pain to work with.

Basing on #Gordon Linoff s query here another recursive cte:
DECLARE #t TABLE(
ID int
,Field VARCHAR(MAX)
,Value VARCHAR(MAX)
)
INSERT INTO #t VALUES
(1, 'a,b,c', '1,2,3')
,(2, 'a,c', '5,2')
,(3, 'x', '7');
with cte as (
select ID
,SUBSTRING(Field, 1, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)-1 ELSE LEN(Field) END) AS Field
,SUBSTRING(Value, 1, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)-1 ELSE LEN(Value) END) AS Value
,SUBSTRING(Field, CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field)+1 ELSE 1 END, LEN(Field)-CASE WHEN CHARINDEX(',', Field) > 0 THEN CHARINDEX(',', Field) ELSE 0 END) as field_list
,SUBSTRING(Value, CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value)+1 ELSE 1 END, LEN(Value)-CASE WHEN CHARINDEX(',', Value) > 0 THEN CHARINDEX(',', Value) ELSE 0 END) as value_list
,0 as lev
from #t
WHERE CHARINDEX(',', Field) > 0
UNION ALL
select ID
,SUBSTRING(field_list, 1, CASE WHEN CHARINDEX(',', field_list) > 0 THEN CHARINDEX(',', field_list)-1 ELSE LEN(field_list) END) AS Field
,SUBSTRING(value_list, 1, CASE WHEN CHARINDEX(',', value_list) > 0 THEN CHARINDEX(',', value_list)-1 ELSE LEN(value_list) END) AS Value
,CASE WHEN CHARINDEX(',', field_list) > 0 THEN SUBSTRING(field_list, CHARINDEX(',', field_list)+1, LEN(field_list)-CHARINDEX(',', field_list)) ELSE '' END as field_list
,CASE WHEN CHARINDEX(',', value_list) > 0 THEN SUBSTRING(value_list, CHARINDEX(',', value_list)+1, LEN(value_list)-CHARINDEX(',', value_list)) ELSE '' END as value_list
,lev + 1
from cte
WHERE LEN(field_list) > 0
)
select ID, Field, Value
from cte
UNION ALL
SELECT ID, Field, Value
FROM #t
WHERE CHARINDEX(',', Field) = 0
ORDER BY ID, Field
OPTION (MAXRECURSION 0)

One method is a recursive CTE:
with cte as (
select id, cast(NULL as varchar(max)) as field, cast(NULL as varchar(max)) as value, field as field_list, value as value_list, 0 as lev
from t
union all
select id, left(field_list, charindex(',', field_list + ',') - 1),
left(value_list, charindex(',', value_list + ',') - 1),
substring(field_list, charindex(',', field_list + ',') + 1, len(field_list)),
substring(value_list, charindex(',', value_list + ',') + 1, len(value_list)),
1 + lev
from cte
where field_list <> '' and value_list <> ''
)
select *
from cte
where lev > 0;
Here is an example of how it works.

Related

How to iterate over a string in one line in SQL

I am writing a query that roughly has this structure:
SELECT Name, <calculated-valued> as Version FROM <tables>
This calculated value needs to work like so: I have a varchar column 'Name' that could contain something like 'ABC' and I want to convert each letter into ASCII, and append them back together to form '65.66.67' in this example. (An empty string should return '0') Is there any way to do this?
My approach wasn't very good, but up to 5 characters I could do the following:
SELECT
CASE WHEN LEN(Name) = 0 THEN '0'
ELSE CAST(ASCII(SUBSTRING(Name, 1, 1)) as varchar(max)) +
CASE WHEN LEN(Name) = 1 THEN ''
ELSE '.' + CAST(ASCII(SUBSTRING(Name, 2, 1)) as varchar(max)) +
CASE WHEN LEN(Name) = 2 THEN ''
ELSE '.' + CAST(ASCII(SUBSTRING(Name, 3, 1)) as varchar(max)) +
CASE WHEN LEN(Name) = 3 THEN ''
ELSE '.' + CAST(ASCII(SUBSTRING(Name, 4, 1)) as varchar(max)) +
CASE WHEN LEN(Name) = 4 THEN ''
ELSE '.' + CAST(ASCII(SUBSTRING(Name, 5, 1)) as varchar(max))
END
END
END
END
END AS MyColumn
FROM <tables>
Is there a better way to do this? Ideally a method that can take any length of string?
Either that or can I cast letters into a hierarchyid datatype? I need to create things like 1/2/a/bc/4// or whatever, but hierarchyid doesn't support that. So instead I'm trying to convert it to 1/2/97/98.99/4/0 so I can convert and maintain the correct order. This column is only used for sorting.
Thanks for any help!
One method is a recursive CTE:
with cte as (
select Name, 1 as lev
cast(ascii(substring(name, 1, 1)) as varchar(max)) as ascii_name
from t
union all
select Name, lev + 1,
ascii_name + '.' + cast(ascii(substring(name, lev + 1, 1)) as varchar(max))
from cte
where len(Name) > lev
)
select Name, ascii_name
from cte;
Another option is with an ad-hoc tally table and a CROSS APPLY
Declare #YourTable table (Name varchar(25))
Insert Into #YourTable values
('ABC'),
('Jack'),
('Jill'),
('')
Select A.Name
,Version = isnull(B.String,'0')
From #YourTable A
Cross Apply (
Select String=Stuff((Select '.' +cast(S as varchar(5))
From (Select Top (len(A.Name))
S=ASCII(substring(A.Name,Row_Number() Over (Order By (Select NULL)),1))
From master..spt_values ) S
For XML Path ('')),1,1,'')
) B
Returns
Name String
ABC 65.66.67
Jack 74.97.99.107
Jill 74.105.108.108
0

SQL Transform a list of numbers into ranges in one column

If I have a list of ranges in a table e.g.
ID Number
1 4
1 5
1 6
1 7
1 9
Is there a way to put this into the format: '4-7,9' into one varchar column using SQL ?
Thanks.
You can use ROW_NUMBER and XML PATH:
DECLARE #Mock TABLE (Id INT, Number INT)
INSERT INTO #Mock
VALUES
(1, 4),
(1, 5),
(1, 6),
(1, 7),
(1, 9)
;WITH CTE
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY Number) AS RowId,*
FROM #Mock
)
SELECT
STUFF(
(
SELECT
',' + CAST(MIN(C.Number) AS VARCHAR(10)) + CASE WHEN MIN(C.Number) = MAX(C.Number) THEN '' ELSE '-' + CAST(MAX(C.Number) AS VARCHAR(10)) END
FROM
CTE C
GROUP BY
C.Number - C.RowId
FOR XML PATH ('')
), 1, 1, '') Result
Output: 4-7,9
Considering you have another to find the order of ranges
;WITH cte
AS (SELECT *,
Sum(CASE
WHEN number = prev_lag + 1 THEN 0
ELSE 1
END)
OVER(
ORDER BY iden_col) AS grp
FROM (SELECT *,
Lag(number)
OVER(
partition BY [ID]
ORDER BY iden_col) AS prev_lag
FROM Yourtable)a),
intr
AS (SELECT id,
CASE
WHEN Min(number) = Max(number) THEN Cast(Min(number) AS VARCHAR(50))
ELSE Concat(Min(number), '-', Max(number))
END AS intr_res
FROM cte
GROUP BY id,
grp)
SELECT DISTINCT Id,
Stuff(concat_col, 1, 1, '')
FROM intr a
CROSS apply (SELECT ',' + intr_res
FROM intr b
WHERE a.ID = b.ID
FOR xml path('')) cs (concat_col)
Demo

how to write SQL query for this result?

I have so many long database so I used seq_no in commas separate using more than one sequence store in single column but now I want all sequence in a single column so I am confused how to create this sql result for this.
For example:
TABLE STRUCTURE
SR_NO IS INT ,
SEQ_NO IS VARCHAR(MAX)
SR_NO SEQ_NO
---------------------------------
1 1839073,
2 1850097,1850098,
3 1850099,1850100,1850110
I need to get this result:
SEQ_NO
--------------
1839073
1850097
1850098
1850099
1850100
1850110
Thanks!
declare #t table(Id int,seq varchar(100))
insert into #t (Id,seq) values (1,'1839073,'),(2,'1839073,1850098,'),(3,'1850099,1850100,1850110 ')
;With Cte as (
SELECT A.Id,
Split.a.value('.', 'VARCHAR(100)') AS Seq
FROM
(
SELECT Id,
CAST ('<M>' + REPLACE(seq, ',', '</M><M>') + '</M>' AS XML) AS Data
FROM #t
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a) )
Select ID,Seq from Cte Where Seq > ''
Try splitting it with XML
SELECT SR_NO, t.c.value('.', 'VARCHAR(2000)') COL1
FROM (
SELECT SR_NO, x = CAST('<t>' +
REPLACE(SEQ_NO, ',', '</t><t>') + '</t>' AS XML)
FROM
(values(1,'1839073'),(2, '1850097,1850098'),
(3, '1850099,1850100,1850110')) y(SR_NO, SEQ_NO)
) a
CROSS APPLY x.nodes('/t') t(c)
Result:
SR_NO COL1
1 1839073
2 1850097
2 1850098
3 1850099
3 1850100
3 1850110
You can replace this with your table:
(values (1,'1839073'),(2, '1850097,1850098'),
(3, '1850099,1850100,1850110')) y(SR_NO, SEQ_NO)
This should do it: (Replace YourTableName with your table name)
;WITH CTE(NEW_SEQ_NO, SEQ_NO) as (
SELECT LEFT(SEQ_NO, CHARINDEX(',',SEQ_NO + ',') -1),
STUFF(SEQ_NO, 1, CHARINDEX(',',SEQ_NO + ','), '')
FROM YourTableName
WHERE SEQ_NO <> '' AND SEQ_NO IS NOT NULL
UNION all
SELECT LEFT(SEQ_NO, CHARINDEX(',',SEQ_NO + ',') -1),
STUFF(SEQ_NO, 1, CHARINDEX(',',SEQ_NO + ','), '')
FROM CTE
WHERE SEQ_NO <> '' AND SEQ_NO IS NOT NULL
)
SELECT NEW_SEQ_NO from CTE ORDER BY NEW_SEQ_NO
You can check this topic for more information:
Turning a Comma Separated string into individual rows
I have written the following query after referring Turning a Comma Separated string into individual rows
It will work for you
create table STRUCTURE(SR_NO int, SEQ_NO varchar(max))
insert STRUCTURE select 1, '1839073,'
insert STRUCTURE select 2, '1850097,1850098,'
insert STRUCTURE select 3, '1850099,1850100,1850110'
;with tmp(SR_NO, DataItem, SEQ_NO) as (
select SR_NO, LEFT(SEQ_NO, CHARINDEX(',',SEQ_NO+',')-1),
STUFF(SEQ_NO, 1, CHARINDEX(',',SEQ_NO+','), '')
from STRUCTURE
union all
select SR_NO, LEFT(SEQ_NO, CHARINDEX(',',SEQ_NO+',')-1),
STUFF(SEQ_NO, 1, CHARINDEX(',',SEQ_NO+','), '')
from tmp
where SEQ_NO > ''
)
Select DataItem as SEQ_NO from tmp order by SEQ_NO;

Return Distinct Rows That Contain The Same Value/Character In SQL

I have a bit of a tricky situation. I have a column that contains a pipe delimited set of numbers in numerous rows in a table. For example:
Courses
-------------------
1|2
1|2|3
1|2|8
10
11
11|12
What I want to achieve is to return rows where the number only appears once in my output.
Ideally, I want to try and carry this out using SQL rather than having to carry out checks at a web application level. Carrying out a DISTINCT does not achieve what I want.
The desired output would be:
Courses
-------------------
1
2
3
8
10
11
12
I would appreciated if anyone can guide me in the right direction.
Thanks.
Please try:
declare #tbl as table(Courses nvarchar(max))
insert into #tbl values
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
select * from #tbl
SELECT
DISTINCT CAST(Split.a.value('.', 'VARCHAR(100)') AS INT) AS CVS
FROM
(
SELECT CAST ('<M>' + REPLACE(Courses, '|', '</M><M>') + '</M>' AS XML) AS CVS
FROM #tbl
) AS A CROSS APPLY CVS.nodes ('/M') AS Split(a)
ORDER BY 1
Try this one -
SET NOCOUNT ON;
DECLARE #temp TABLE
(
string VARCHAR(500)
)
DECLARE #Separator CHAR(1)
SELECT #Separator = '|'
INSERT INTO #temp (string)
VALUES
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
-- 1. XML
SELECT p.value('(./s)[1]', 'VARCHAR(500)')
FROM (
SELECT field = CAST('<r><s>' + REPLACE(t.string, #Separator, '</s></r><r><s>') + '</s></r>' AS XML)
FROM #temp t
) d
CROSS APPLY field.nodes('/r') t(p)
-- 2. CTE
;WITH a AS
(
SELECT
start_pos = 1
, end_pos = CHARINDEX(#Separator, t.string)
, t.string
FROM #temp t
UNION ALL
SELECT
end_pos + 1
, CHARINDEX(#Separator, string, end_pos + 1)
, string
FROM a
WHERE end_pos > 0
)
SELECT d.name
FROM (
SELECT
name = SUBSTRING(
string
, start_pos
, ABS(end_pos - start_pos)
)
FROM a
) d
WHERE d.name != ''
Try this :
create table course (courses varchar(100))
insert into course values('1|2')
insert into course values('1|2|3')
insert into course values('1|2|8')
insert into course values('10')
insert into course values('11')
insert into course values('11|12')
Declare #col varchar(200)
SELECT
#col=(
SELECT DISTINCT c.courses + '|'
FROM course c
FOR XML PATH('')
);
select * from course
;with demo as(
select cast(substring(#col,1,charindex('|',#col,1)-1) AS INT) cou,charindex('|',#col,1) pos
union all
select cast(substring(#col,pos+1,charindex('|',#col,pos+1)-pos-1)AS INT) cou,charindex('|',#col,pos+1) pos
from demo where pos<LEN(#col))
select distinct cou from demo
Could not manage without recursion :( Something like this could do the trich?
WITH splitNum(num, r)
AS
(
SELECT
SUBSTRING(<field>,1, CHARINDEX('|', <field>)-1) num,
SUBSTRING(<field>,CHARINDEX('|', <field>)+1, len(<field>)) r
FROM <yourtable> as a
UNION ALL
SELECT
SUBSTRING(r,1, CHARINDEX('|', r)-1) num,
SUBSTRING(r,CHARINDEX('|', r)+1, len(r)) r
FROM <yourtable> b
WHERE CHARINDEX('|', r) > 0
inner join splitNum as c on <whatevertheprimarykeyis>
)
SELECT distinct num FROM splitNum
Didn't make it run, but it should do the trick, just replace the and with the correct info
One way would be to use a recursive CTE:
with cte as
(select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from courses
union all
select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from cte
where len(courses)>0)
select distinct course from cte
SQLFiddle here.

Find position of delimited character in a String (SQL Server)

I have a string Variable
test=HARIA|123|MALE|STUDENT|HOUSEWIFE
i am using | as delimited character. if i want to extract data from 2nd occurence of pipe till 3rd occurrence. i need to get 'MALE' from above string.
any help appreciated
Untested
SELECT
SUBSTRING (
#test,
CHARINDEX('|', #test, CHARINDEX('|', #test) + 1) + 1,
CHARINDEX('|', #test, CHARINDEX('|', #test, CHARINDEX('|', #test) + 1) + 1) - 1
)
A nicer way would be split the string into a table, and use ROW_NUMBER() to extract the 3rd element. Based on this Arrays and Lists in SQL Server
DECLARE #test varchar(100) = 'HARIA|123|MALE|STUDENT|HOUSEWIFE'
SELECT TOP 8000
Num
INTO
#Number
FROM
(
SELECT
ROW_NUMBER() OVER (ORDER BY c1.object_id) AS Num
FROM
sys.columns c1, sys.columns c2, sys.columns c3
) N
SELECT
ROW_NUMBER() OVER (ORDER BY Num) AS Rank,
LTRIM(RTRIM(SUBSTRING(#test,
Num,
CHARINDEX('|', #test + '|', Num) - Num
))) AS Value
FROM
#Number
WHERE
Num <= LEN (#test)
AND
SUBSTRING('|' + #test, Num, 1) = '|'
DROP TABLE #Number
Try this
Solution 1:(Using a number table)
declare #str varchar(1000)
set #str ='HARIA|123|MALE|STUDENT|HOUSEWIFE'
--Creating a number table
;with numcte as(
select 1 as rn union all select rn+1 from numcte where rn<LEN(#str)),
--Get the position of the "|" charecters
GetDelimitedCharPos as(
select ROW_NUMBER() over(order by getdate()) seq, rn,delimitedcharpos
from numcte
cross apply(select SUBSTRING(#str,rn,1)delimitedcharpos) X where delimitedcharpos = '|')
--Applying the formula SUBSTRING(#str,startseq + 1,endseq-startseq + 1)
-- i.e. SUBSTRING(#str,11,15-11) in this case
select top 1 SUBSTRING(
#str
,(select top 1 rn+1 from GetDelimitedCharPos where seq =2)
,(select top 1 rn from GetDelimitedCharPos where seq =3) -
(select top 1 rn+1 from GetDelimitedCharPos where seq =2)
) DesiredResult
from GetDelimitedCharPos
Solution 2:(Using XQuery)
DECLARE #xml as xml,#str as varchar(100),#delimiter as varchar(10)
SET #str='HARIA|123|MALE|STUDENT|HOUSEWIFE'
SET #xml = cast(('<X>'+replace(#str,'|' ,'</X><X>')+'</X>') as xml)
SELECT ShrededData as DesiredResult FROM(
SELECT
ROW_NUMBER() over(order by getdate()) rn
,N.value('.', 'varchar(10)') as ShrededData FROM #xml.nodes('X') as T(N))X
WHERE X.rn = 3 -- Specifying the destination sequence value(here 3)
Output(in both the cases)
DesiredResult
MALE
I found this. Using a t-Sql for loop. Good reference of syntax, too.