SQL split or substring column value - sql

I have an sql column and value/structure as per below:
ColumnA
ROOT/South America/Lima/Test/Test2
Running a select query I want to extract "Lima" As a column value. I couldn't get the split string to work, or substring.
Any thoughts?

This is my approach to get the nth part of any delimited string:
DECLARE #mockupTable TABLE(ID INT IDENTITY, YourColumn VARCHAR(1000));
INSERT INTO #mockupTable VALUES('ROOT/South America/Lima/Test/Test2')
,('Too/short')
,('Three/parts/valid');
--The splitting is a one-liner:
SELECT *
,CAST('<x>' + REPLACE(YourColumn,'/','</x><x>') + '</x>' AS XML).value('/x[3]','nvarchar(max)') AS ThirdPart
FROM #mockupTable;
If your delimited strings might include XML-forbidden characters (namely &, < and >, you'd have to escape them (but that's easy):
Just use this instead
,CAST('<x>' + REPLACE((SELECT YourColumn [*] FOR XML PATH('')),'/','</x><x>') + '</x>' AS XML).value('/x[3]','nvarchar(max)') AS ThirdPart
Some explanation
The replacements of your delimiter / with </x><x> allow to get an XML like string, which can be casted to
<x>ROOT</x>
<x>South America</x>
<x>Lima</x>
<x>Test</x>
<x>Test2</x>
The XML's method .value() allows to use XQuery to get the third <x>. One advantage: If there is no third element, this won't break, just return NULL.

A little out there but it works. Based on a recursive cte. You can set the delimiter, start and end.
declare #T table (iden int identity, col1 varchar(100));
insert into #T(col1) values
('ROOT/South America/Lima/Test/Test2')
, ('ROOT/South America/Peru/Test/Test2')
, ('ROOT/South America/Venuzuala')
, ('ROOT/South America/');
declare #split char(1) = '/';
declare #start int = 2;
declare #end int = 3
select #split, #start, #end;
with cte as
( select t.iden, t.col1, charindex(#split, t.col1) as pos , 1 as cnt
from #T t
union all
select t.iden, t.col1, charindex(#split, t.col1, t.pos + 1), cnt + 1
from cte t
where charindex(#split, t.col1, t.pos + 1) > 0
and cnt+1 <= #end
)
--select * from cte order by iden, cnt;
select --t1.*, t2.*,
SUBSTRING(t1.col1, t1.pos+1, t2.pos-t1.pos-1) as bingo
from cte t1
join cte t2
on t2.iden = t1.iden
and t1.cnt = #start
and t2.cnt = #end
order by t1.iden;

declare #t varchar(max) = 'ROOT/South America/Lima/Test/Test2'
select * from (
select
[value]
,ROW_NUMBER() Over (Order by (select null )) [Level]
from string_split( #t , '/')
) d
where d.Level = 3
the code above dose the same with much simpler logic, split the text by '/' results to rows "String_Split", then row_number() to get the order of the results, each number is a level if no sorting has been done in the order by statment "(select null)"
as a result by adding the level number in the where statment will get the level value we want, above will show level 3

Related

Add character in front and at the end of each character

In SQL I want to add 0 in front and , at the end of each character.
Example: A30F1 -> 0A,03,00,0F,01
I don't want to use cursor if possible.
Thanks!
EIDT:
I apologize for not asking the most appropriate question at the beginning.
In short, I have a table and for each value in the column name I have to convert it to the desired format. For example, we have a #Temp table:
CREATE TABLE #Temp (id INT, name VARCHAR(25))
INSERT INTO #Temp VALUES (1, 'A30F1'), (2, 'B51R9'), (3, 'L1721')
SELECT * FROM #Temp
One method would be to use a Tally to split the string into it's individual characters, and then use concatenation to add the 0 to the start, and STRING_AGG to comma delimit the results:
DECLARE #YourValue varchar(5) = 'A30F1';
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP (LEN(#YourValue))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
FROM N N1, N N2) --Up to 100 characters, add more cross joins for more characters
SELECT STRING_AGG(CONCAT('0',SS.C),',') WITHIN GROUP (ORDER BY T.I) AS NewString
FROM (VALUES(#YourValue))V(YourValue)
CROSS JOIN Tally T
CROSS APPLY (VALUES(SUBSTRING(V.YourValue,T.I,1)))SS(C);
It appears this is meant to be against a table, not a single value. This needs, however, very few changes to work against a table:
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP (SELECT MAX(LEN(YourColumn)) FROM dbo.YourTable)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
FROM N N1, N N2) --Up to 100 characters, add more cross joins for more characters
SELECT STRING_AGG(CONCAT('0',SS.C),',') WITHIN GROUP (ORDER BY T.I) AS NewString
FROM dbo.YourTable YT
JOIN Tally T ON LEN(YT.YourColumn) >= T.I
CROSS APPLY (VALUES(SUBSTRING(YT.YourColumn,T.I,1)))SS(C)
GROUP BY YT.YourColumn;
db<>fiddle
I solved the simplest possible with a few variables, WHILE and SUBSTRING
DECLARE #var VARCHAR(20) = 'A30F1', #i INT = 1, #res NVARCHAR(20)
WHILE (#i <= LEN(#var))
BEGIN
SET #res = #res + '0' + SUBSTRING(#var, #i, 1) + ','
SET #i = #i + 1
END
SELECT LEFT(#res, LEN(#res) - 1) output
Check demo on DB<>FIDDLE.
Original answer:
A recursive CTE and a STRING_AGG() call is also an option (SQL Server 2017+ is needed):
DECLARE #text varchar(max) = 'A30F1';
WITH rCTE AS
(
SELECT 1 AS CharacterPosition, SUBSTRING(#text, 1, 1) AS Character
UNION ALL
SELECT CharacterPosition + 1, SUBSTRING(#text, CharacterPosition + 1, 1)
FROM rCTE
WHERE CharacterPosition < LEN(#text)
)
SELECT STRING_AGG('0' + Character, ',') WITHIN GROUP (ORDER BY CharacterPosition)
FROM rCTE
OPTION (MAXRECURSION 0);
Update:
You need a different statement, if the names are stored in a table, again using recursion and STRING_AGG():
Table:
CREATE TABLE #Temp (id INT, name VARCHAR(25))
INSERT INTO #Temp VALUES (1, 'A30F1'), (2, 'B51R9'), (3, 'L1721')
Statement:
; WITH rCTE AS (
SELECT
t.id AS id,
LEFT(t.name, 1) AS Character,
STUFF(t.name, 1, 1, '') AS CharactersRemaining,
1 AS CharacterPosition
FROM #Temp t
UNION ALL
SELECT
r.id,
LEFT(r.CharactersRemaining, 1),
STUFF(r.CharactersRemaining, 1, 1, ''),
CharacterPosition + 1
FROM rCTE r
WHERE LEN(r.CharactersRemaining) > 0
)
SELECT
id,
STRING_AGG('0' + Character, ',') WITHIN GROUP (ORDER BY CharacterPosition) AS name
FROM rCTE
GROUP BY id
OPTION (MAXRECURSION 0);
Result:
id name
1 0A,03,00,0F,01
2 0B,05,01,0R,09
3 0L,01,07,02,01
If you are only applying this to English alphabet characters and digits as in your example you could do this.
CREATE TABLE #Temp (id INT, name VARCHAR(25))
INSERT INTO #Temp VALUES (1, 'A30F1'), (2, 'B51R9'), (3, 'L1721'), (4, 'A')
SELECT SUBSTRING(REPLACE(
0x00 + CAST(CAST(name AS NVARCHAR(25)) AS BINARY(50)), CHAR(0), '0,')
, 3
, LEN(name) * 3 - 1)
FROM #Temp
returns
0A,03,00,0F,01
0B,05,01,0R,09
0L,01,07,02,01
0A
This takes advantage of the fact that the binary representation of the nvarchar and varchar is the same for this limited character set except for padding out with 0x00
'A30F1' -> 0x4133304631
N'A30F1' -> 0x41003300300046003100

Extract strings till the second delim SQL

I wanted to extract all the details till the second /(forward slash)from my table in SQL Server. Any ideas?
website
AA.AA/AB/123
www.google.com/en/abcd/
yahoo.com/us/dev
gmail.com
ouput
website
AA.AA/AB
www.google.com/en
yahoo.com/us
gmail.com
Perhaps this will suit your needs:
DECLARE #Table TABLE (Col1 NVARCHAR(100))
INSERT #Table VALUES
('website'),
('AA.AA/AB/123'),
('www.google.com/en/abcd/'),
('yahoo.com/us/dev'),
('gmail.com')
SELECT
COALESCE(
NULLIF(
SUBSTRING(Col1,1,CHARINDEX('/',Col1,CHARINDEX('/',Col1)+1))
,'')
,Col1
) AS Col1
FROM #Table
If you are using SQL Server 2017 or 2019, you can use STRING_AGG() to reassemble the output from STRING_SPLIT():
SELECT STRING_AGG(x.value, '/')
FROM dbo.table_name CROSS APPLY
(
SELECT value, ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM STRING_SPLIT(Col1, '/') AS ss
) AS x(value, rn)
WHERE x.rn <= 2
GROUP BY Col1;
You might say:
"But Aaron, the output of STRING_SPLIT() isn't guaranteed to be in order; in fact the documentation warns about that."
This is true; the documentation does say that. But in current versions the output is extremely unlikely to be in anything but left-to-right order. I still suggest you be wary of relying on this, since it could break at any time (I warn about this in more detail here).
If you are on an older version, or don't trust it, you can use a table-valued function that preserves the order of the input string, for example from this answer:
CREATE FUNCTION [dbo].[SplitString]
(
#List NVARCHAR(MAX),
#Delim VARCHAR(255)
)
RETURNS TABLE
AS
RETURN ( SELECT [Value], idx = RANK() OVER (ORDER BY n) FROM
(
SELECT n = Number,
[Value] = LTRIM(RTRIM(SUBSTRING(#List, [Number],
CHARINDEX(#Delim, #List + #Delim, [Number]) - [Number])))
FROM (SELECT Number = ROW_NUMBER() OVER (ORDER BY name)
FROM sys.all_objects) AS x
WHERE Number <= LEN(#List)
AND SUBSTRING(#Delim + #List, [Number], LEN(#Delim)) = #Delim
) AS y
);
With that function in place, you can then do the following, and now feel safer about relying on order (at the cost of a more expensive query):
;WITH src AS
(
SELECT Col1, idx, Value
FROM dbo.table_name CROSS APPLY dbo.SplitString(Col1, '/')
)
SELECT STUFF((SELECT '/' + Value
FROM src
WHERE src.idx <= 2 AND Col1 = t.Col1
ORDER BY idx
FOR XML PATH(''), TYPE).value(N'./text()[1]', N'nvarchar(max)'), 1, 1, '')
FROM dbo.table_name AS t
GROUP BY Col1;
I find cross apply handy for these situations
select case when str like '%/%' then left(str, i2-1) else str end as str
from t
cross apply (select charindex( '/', str ) as i1) t2 --position of first slash
cross apply (select charindex( '/', str, (i1 + 1)) as i2 ) t3 --position of second slash
Below is the simple query you can try. In the below query please replace 'colName' with your column name and Table_1 with your table name.
SELECT LEFT([colName], charindex('/', [colName], charindex('/', [colName])+1)-1) AS [AfterSecondPipe]
FROM [Table_1]

Generate a comma-separated list of numbers in a single string

Is there a way to generate a comma-separated string of a series of numbers where the "begin" and "end" numbers are provided?
For example, provide the numbers 1 and 10 and the output would be a single value of: 1,2,3,4,5,6,7,8,9,10
10/10/2019 edit explaining why I'm interested in this:
My workplace writes queries with several columns in the SELECT statement plus aggregate functions. Then a GROUP BY clause using the column numbers. I figured using a macro that creates a comma-separated list to copy/paste in would save some time.
SELECT t.colA
, t.colB
, t.colC
, t.colD
, t.colE
, t.colF
, t.colG
, t.colH
, t.colI
, t.colJ
, sum(t.colK) as sumK
, sum(t.colL) as sumL
, sum(t.colM) as sumM
FROM t
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
;
You can use a recursive CTE to generate your numbers, and xml_agg to generate your string:
with recursive nums (counter) as
( select * from (select cast(1 as bigint) as counter) t
union all
select
counter + 1
from nums
where counter between 1 and 9
)
select
trim(trailing ',' from cast(xmlagg(cast(counter as varchar(2)) || ',' order by counter) as varchar(100)))
from nums
Check these methods in SQL Server-
IF OBJECT_ID('TEMPDB..#Sample') IS NOT NULL
DROP TABLE #Sample
Create table #Sample
(
NUM int
)
declare #n int
select #n=10
insert into #Sample(NUM)
SELECT NUM FROM (select row_number() over (order by (select null)) AS NUM from sys.columns) A WHERE NUM<=#N
--Method 1 (For SQL SERVER -NEW VERSION Support)
SELECT STRING_AGG(NUM,',') AS EXPECTED_RESULT FROM #Sample
--Method 1 (For SQL SERVER -OLD VERSION Support)
select DISTINCT STUFF(CAST((
SELECT ' ,' +CAST(c.num AS VARCHAR(MAX))
FROM (
SELECT num
FROM #Sample
) c
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 2, '') AS EXPECTED_RESULT
from #Sample t
While loop seems appropriate
declare #begin int=1
declare #end int=11
declare #list varchar(500)
if #begin > #end
begin
select 'error, beginning number ' + convert(varchar(500),#begin)
+ ' must not be greater than ending number '
+ convert(varchar(500),#end) + '.' err
return
end
else
set #list = convert(varchar(500),#begin)
;
while #begin < #end
begin
set #begin += 1
set #list = #list + ',' + convert(varchar(500),#begin)
end
select #list
You might want to use varchar(5000) or something depending on how big you want it to get.
disclaimer -- I don't know if this works with teradata
I'm not sure there is a good direct way to generate a series in Teradata. You can fake it a few different ways though. Here's a comma separated list of numbers from 5 to 15, for example:
SELECT TRIM(TRAILING ',' FROM (XMLAGG(TRIM(rn)|| ',' ) (VARCHAR(10000))))
FROM (SELECT 4 + ROW_NUMBER() OVER (ORDER BY Sys_Calendar."CALENDAR".day_of_calendar) as rn FROM Sys_Calendar."CALENDAR" QUALIFY rn <= 15) t
I've only used sys_calendar.calendar here because it's a big table. Any big table would do here though.
Here's one way to do it in Teradata:
SELECT ARRAY_AGG(src.RowNum)
FROM (
SELECT ROW_NUMBER() OVER() AS RowNum
FROM sys_calendar.calendar
QUALIFY RowNum BETWEEN <begin_num> AND <end_num>
) src
This will give you the output as an ARRAY data type, which you can probably cast as a VARCHAR. It also assumes begin_num > 0 and <end_num> is less than the number of rows in the sys_calendar.calendar view. You can always fiddle with this to fit your required range of values.
There are also DelimitedBuild UDFs out there (if you can find one) that can be used to convert row values into delimited strings.
The cheapest way to achieve your goal is this one (no functions, or joins to tables required):
WITH RECURSIVE NumberRanges(TheNumber,TheString) AS
(
SELECT 1 AS TheNumber,casT(1 as VARCHAR(500)) as TheString
FROM
(
SELECT * FROM (SELECT NULL AS X) X
) DUMMYTABLE
UNION ALL
SELECT
TheNumber + 1 AS TheNumber,
TheString ||',' || TRIM(TheNumber+1)
FROM NumberRanges
WHERE
TheNumber < 10
)
SELECT TheString
FROM NumberRanges
QUALIFY ROW_NUMBER() OVER ( ORDER BY TheNumber DESC) = 1;
Result String: 1,2,3,4,5,6,7,8,9,10

Return Distinct Rows That Contain The Same Value/Character In SQL

I have a bit of a tricky situation. I have a column that contains a pipe delimited set of numbers in numerous rows in a table. For example:
Courses
-------------------
1|2
1|2|3
1|2|8
10
11
11|12
What I want to achieve is to return rows where the number only appears once in my output.
Ideally, I want to try and carry this out using SQL rather than having to carry out checks at a web application level. Carrying out a DISTINCT does not achieve what I want.
The desired output would be:
Courses
-------------------
1
2
3
8
10
11
12
I would appreciated if anyone can guide me in the right direction.
Thanks.
Please try:
declare #tbl as table(Courses nvarchar(max))
insert into #tbl values
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
select * from #tbl
SELECT
DISTINCT CAST(Split.a.value('.', 'VARCHAR(100)') AS INT) AS CVS
FROM
(
SELECT CAST ('<M>' + REPLACE(Courses, '|', '</M><M>') + '</M>' AS XML) AS CVS
FROM #tbl
) AS A CROSS APPLY CVS.nodes ('/M') AS Split(a)
ORDER BY 1
Try this one -
SET NOCOUNT ON;
DECLARE #temp TABLE
(
string VARCHAR(500)
)
DECLARE #Separator CHAR(1)
SELECT #Separator = '|'
INSERT INTO #temp (string)
VALUES
('1|2'),
('1|2|3'),
('1|2|8'),
('10'),
('11'),
('11|12')
-- 1. XML
SELECT p.value('(./s)[1]', 'VARCHAR(500)')
FROM (
SELECT field = CAST('<r><s>' + REPLACE(t.string, #Separator, '</s></r><r><s>') + '</s></r>' AS XML)
FROM #temp t
) d
CROSS APPLY field.nodes('/r') t(p)
-- 2. CTE
;WITH a AS
(
SELECT
start_pos = 1
, end_pos = CHARINDEX(#Separator, t.string)
, t.string
FROM #temp t
UNION ALL
SELECT
end_pos + 1
, CHARINDEX(#Separator, string, end_pos + 1)
, string
FROM a
WHERE end_pos > 0
)
SELECT d.name
FROM (
SELECT
name = SUBSTRING(
string
, start_pos
, ABS(end_pos - start_pos)
)
FROM a
) d
WHERE d.name != ''
Try this :
create table course (courses varchar(100))
insert into course values('1|2')
insert into course values('1|2|3')
insert into course values('1|2|8')
insert into course values('10')
insert into course values('11')
insert into course values('11|12')
Declare #col varchar(200)
SELECT
#col=(
SELECT DISTINCT c.courses + '|'
FROM course c
FOR XML PATH('')
);
select * from course
;with demo as(
select cast(substring(#col,1,charindex('|',#col,1)-1) AS INT) cou,charindex('|',#col,1) pos
union all
select cast(substring(#col,pos+1,charindex('|',#col,pos+1)-pos-1)AS INT) cou,charindex('|',#col,pos+1) pos
from demo where pos<LEN(#col))
select distinct cou from demo
Could not manage without recursion :( Something like this could do the trich?
WITH splitNum(num, r)
AS
(
SELECT
SUBSTRING(<field>,1, CHARINDEX('|', <field>)-1) num,
SUBSTRING(<field>,CHARINDEX('|', <field>)+1, len(<field>)) r
FROM <yourtable> as a
UNION ALL
SELECT
SUBSTRING(r,1, CHARINDEX('|', r)-1) num,
SUBSTRING(r,CHARINDEX('|', r)+1, len(r)) r
FROM <yourtable> b
WHERE CHARINDEX('|', r) > 0
inner join splitNum as c on <whatevertheprimarykeyis>
)
SELECT distinct num FROM splitNum
Didn't make it run, but it should do the trick, just replace the and with the correct info
One way would be to use a recursive CTE:
with cte as
(select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from courses
union all
select cast(case charindex('|',courses) when 0 then courses
else left(courses,charindex('|',courses)-1) end as int) course,
case charindex('|',courses) when 0 then ''
else right(courses,len(courses)-charindex('|',courses)) end courses
from cte
where len(courses)>0)
select distinct course from cte
SQLFiddle here.

Parsing text to multiple columns

I have a feed that is populating a single text field in a table with statistics.
I need to pull this data into multiple fields in another table
but the strange format makes importing automatically difficult.
The file format is flat text but an example is below:
08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12
Effectively it's made up of:
[timestamp] [Field1 name]=[Field1 value],[Field2 name]=[Field2 value],[Field4 name]=[Field4 value]...[CR]
All fields are always in the same order but not always present.
Total columns could be anywhere from 5 to 30.
I've tried the below function to translate it which seems to work mostly but seems to randomly skip fields:
Parsing the data:
(SELECT [Data].[dbo].[GetFromTextString] ( 'Checksum=' ,',' ,RAWTEXT)) AS RowCheckSum,
(SELECT [Data].[dbo].[GetFromTextString] ( 'TicketType=' ,',' ,RAWTEXT)) AS TicketType,
And the Function:
CREATE FUNCTION [dbo].[GetFromTextString]
-- Input start and end and return value.
(#uniqueprefix VARCHAR(100),
#commonsuffix VARCHAR(100),
#datastring VARCHAR(MAX) )
RETURNS VARCHAR(MAX) -- Picked Value.
AS
BEGIN
DECLARE #ADJLEN INT = LEN(#uniqueprefix)
SET #datastring = #datastring + #commonsuffix
RETURN (
CASE WHEN (CHARINDEX(#uniqueprefix,#datastring) > 0)
AND (CHARINDEX(#uniqueprefix + #commonsuffix,#datastring) = 0)
THEN SUBSTRING(#datastring, PATINDEX('%' + #uniqueprefix + '%',#datastring)+#ADJLEN, CHARINDEX(#commonsuffix,#datastring,PATINDEX('%' + #uniqueprefix + '%',#datastring))- PATINDEX('%' + #uniqueprefix + '%',#datastring)-#ADJLEN) ELSE NULL END
)
END
Could anyone suggest a better/cleaner way to strip out the data or could someone work out why this formula skips rows?
Any help really appreciated.
NOTE - THE FIRST SOLUTION IS RUBBISH. I HAVE LEFT IN IT FOR HISTORICAL REASONS, BUT A BETTER SOLUTION IS CONTAINED BELOW
I am not even sure if this will be faster than your current method, but it is the way I would approach the issue (If i was forced into an SQL only solution). The first thing that is required is a table valued function that will perform a split function:
CREATE FUNCTION dbo.Split (#TextToSplit VARCHAR(MAX), #Delimiter VARCHAR(MAX))
RETURNS #Values TABLE (Position INT IDENTITY(1, 1) NOT NULL, TextValues VARCHAR(MAX) NOT NULL)
AS
BEGIN
WHILE CHARINDEX(#Delimiter, #TextToSplit) > 0
BEGIN
INSERT #Values
SELECT LEFT(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) - 1)
SET #TextToSplit = SUBSTRING(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) + 1, LEN(#TextToSplit))
END
INSERT #Values VALUES (#TextToSplit)
RETURN
END
For my example I am working from a temp table #Worklist, you may need to adapt yours accordingly, or you could just insert the relevant data into #Worklist where I have used dummy data:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37'
The main bit of the query is done here. It is quite long, so I have tried to comment it as well as possible. If further clarification is required I can add more comments.
DECLARE #Output TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
DECLARE #KeyPairs TABLE (WorkListID INT NOT NULL, KeyField VARCHAR(MAX), ValueField VARCHAR(MAX))
-- STORE TIMESTAMP DATA - THIS ASSUMES THE FIRST SPACE IS THE END OF THE TIMESTAMP
INSERT #KeyPairs
SELECT ID, 'TimeStamp', LEFT(TextField, CHARINDEX(' ', TextField))
FROM #WorkList
-- CLEAR THE TIMESTAMP FROM THE WORKLIST
UPDATE #WorkList
SET TextField = SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField))
DECLARE #ID INT = (SELECT MIN(ID) FROM #WorkList)
WHILE #ID IS NOT NULL
BEGIN
-- SPLIT THE STRING FIRST INTO ALL THE PAIRS (e.g. Checksum=180957248)
INSERT #Output
SELECT TextValues
FROM dbo.Split((SELECT TextField FROM #WorkList WHERE ID = #ID), ',')
DECLARE #ID2 INT = (SELECT MIN(ID) FROM #Output)
-- FOR ALL THE PAIRS SPLIT THEM INTO A KEY AND A VALUE (USING THE POSITION OF THE SPLIT FUNCTION)
WHILE #ID2 IS NOT NULL
BEGIN
INSERT #KeyPairs
SELECT #ID,
MAX(CASE WHEN Position = 1 THEN TextValues ELSE '' END),
MAX(CASE WHEN Position = 2 THEN TextValues ELSE '' END)
FROM dbo.Split((SELECT TextField FROM #Output WHERE ID = #ID2), '=')
DELETE #Output
WHERE ID = #ID2
SET #ID2 = (SELECT MIN(ID) FROM #Output)
END
DELETE #WorkList
WHERE ID = #ID
SET #ID = (SELECT MIN(ID) FROM #WorkList)
END
-- WE NOW HAVE A TABLE CONTAINING EAV MODEL STYLE DATA. THIS NEEDS TO BE PIVOTED INTO THE CORRECT FORMAT
-- ENSURE COLUMNS ARE LISTED IN THE ORDER YOU WANT THEM TO APPEAR
SELECT *
FROM #KeyPairs p
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [TimeStamp], [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS PivotTable;
EDIT (4 YEARS LATER)
A recent upvote brought this to my attention and the I hate myself a little bit for ever posting this answer in its current form.
A much better split function would be:
CREATE FUNCTION dbo.Split
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
( WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1), (1)) n (N)),
N2(N) AS (SELECT 1 FROM N1 a CROSS JOIN N1 b),
N3(N) AS (SELECT 1 FROM N2 a CROSS JOIN N2 b),
N4(N) AS (SELECT 1 FROM N3 a CROSS JOIN N3 b),
cteTally(N) AS
( SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#List,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM n4
),
cteStart(N1) AS
( SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#List,t.N,1) = #Delimiter OR t.N = 0)
)
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter,#List,s.N1),0)-s.N1,8000)),
Position = s.N1,
ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1)
FROM cteStart s
);
Then there is no need for looping at all, you just have a proper set based solution by calling the split function twice to get your EAV style data set:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37';
WITH KeyPairs AS
( SELECT w.ID,
[Timestamp] = LEFT(w.TextField, CHARINDEX(' ', w.TextField)),
KeyField = MAX(CASE WHEN v.ItemNumber = 1 THEN v.Item END),
ValueField = MAX(CASE WHEN v.ItemNumber = 2 THEN v.Item END)
FROM #WorkList AS w
CROSS APPLY dbo.Split(SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField)), ',') AS kp
CROSS APPLY dbo.Split(kp.Item, '=') AS v
GROUP BY w.ID, kp.ItemNumber,w.TextField
)
SELECT *
FROM KeyPairs AS kp
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS pvt;