Parsing text to multiple columns - sql

I have a feed that is populating a single text field in a table with statistics.
I need to pull this data into multiple fields in another table
but the strange format makes importing automatically difficult.
The file format is flat text but an example is below:
08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12
Effectively it's made up of:
[timestamp] [Field1 name]=[Field1 value],[Field2 name]=[Field2 value],[Field4 name]=[Field4 value]...[CR]
All fields are always in the same order but not always present.
Total columns could be anywhere from 5 to 30.
I've tried the below function to translate it which seems to work mostly but seems to randomly skip fields:
Parsing the data:
(SELECT [Data].[dbo].[GetFromTextString] ( 'Checksum=' ,',' ,RAWTEXT)) AS RowCheckSum,
(SELECT [Data].[dbo].[GetFromTextString] ( 'TicketType=' ,',' ,RAWTEXT)) AS TicketType,
And the Function:
CREATE FUNCTION [dbo].[GetFromTextString]
-- Input start and end and return value.
(#uniqueprefix VARCHAR(100),
#commonsuffix VARCHAR(100),
#datastring VARCHAR(MAX) )
RETURNS VARCHAR(MAX) -- Picked Value.
AS
BEGIN
DECLARE #ADJLEN INT = LEN(#uniqueprefix)
SET #datastring = #datastring + #commonsuffix
RETURN (
CASE WHEN (CHARINDEX(#uniqueprefix,#datastring) > 0)
AND (CHARINDEX(#uniqueprefix + #commonsuffix,#datastring) = 0)
THEN SUBSTRING(#datastring, PATINDEX('%' + #uniqueprefix + '%',#datastring)+#ADJLEN, CHARINDEX(#commonsuffix,#datastring,PATINDEX('%' + #uniqueprefix + '%',#datastring))- PATINDEX('%' + #uniqueprefix + '%',#datastring)-#ADJLEN) ELSE NULL END
)
END
Could anyone suggest a better/cleaner way to strip out the data or could someone work out why this formula skips rows?
Any help really appreciated.

NOTE - THE FIRST SOLUTION IS RUBBISH. I HAVE LEFT IN IT FOR HISTORICAL REASONS, BUT A BETTER SOLUTION IS CONTAINED BELOW
I am not even sure if this will be faster than your current method, but it is the way I would approach the issue (If i was forced into an SQL only solution). The first thing that is required is a table valued function that will perform a split function:
CREATE FUNCTION dbo.Split (#TextToSplit VARCHAR(MAX), #Delimiter VARCHAR(MAX))
RETURNS #Values TABLE (Position INT IDENTITY(1, 1) NOT NULL, TextValues VARCHAR(MAX) NOT NULL)
AS
BEGIN
WHILE CHARINDEX(#Delimiter, #TextToSplit) > 0
BEGIN
INSERT #Values
SELECT LEFT(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) - 1)
SET #TextToSplit = SUBSTRING(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) + 1, LEN(#TextToSplit))
END
INSERT #Values VALUES (#TextToSplit)
RETURN
END
For my example I am working from a temp table #Worklist, you may need to adapt yours accordingly, or you could just insert the relevant data into #Worklist where I have used dummy data:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37'
The main bit of the query is done here. It is quite long, so I have tried to comment it as well as possible. If further clarification is required I can add more comments.
DECLARE #Output TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
DECLARE #KeyPairs TABLE (WorkListID INT NOT NULL, KeyField VARCHAR(MAX), ValueField VARCHAR(MAX))
-- STORE TIMESTAMP DATA - THIS ASSUMES THE FIRST SPACE IS THE END OF THE TIMESTAMP
INSERT #KeyPairs
SELECT ID, 'TimeStamp', LEFT(TextField, CHARINDEX(' ', TextField))
FROM #WorkList
-- CLEAR THE TIMESTAMP FROM THE WORKLIST
UPDATE #WorkList
SET TextField = SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField))
DECLARE #ID INT = (SELECT MIN(ID) FROM #WorkList)
WHILE #ID IS NOT NULL
BEGIN
-- SPLIT THE STRING FIRST INTO ALL THE PAIRS (e.g. Checksum=180957248)
INSERT #Output
SELECT TextValues
FROM dbo.Split((SELECT TextField FROM #WorkList WHERE ID = #ID), ',')
DECLARE #ID2 INT = (SELECT MIN(ID) FROM #Output)
-- FOR ALL THE PAIRS SPLIT THEM INTO A KEY AND A VALUE (USING THE POSITION OF THE SPLIT FUNCTION)
WHILE #ID2 IS NOT NULL
BEGIN
INSERT #KeyPairs
SELECT #ID,
MAX(CASE WHEN Position = 1 THEN TextValues ELSE '' END),
MAX(CASE WHEN Position = 2 THEN TextValues ELSE '' END)
FROM dbo.Split((SELECT TextField FROM #Output WHERE ID = #ID2), '=')
DELETE #Output
WHERE ID = #ID2
SET #ID2 = (SELECT MIN(ID) FROM #Output)
END
DELETE #WorkList
WHERE ID = #ID
SET #ID = (SELECT MIN(ID) FROM #WorkList)
END
-- WE NOW HAVE A TABLE CONTAINING EAV MODEL STYLE DATA. THIS NEEDS TO BE PIVOTED INTO THE CORRECT FORMAT
-- ENSURE COLUMNS ARE LISTED IN THE ORDER YOU WANT THEM TO APPEAR
SELECT *
FROM #KeyPairs p
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [TimeStamp], [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS PivotTable;
EDIT (4 YEARS LATER)
A recent upvote brought this to my attention and the I hate myself a little bit for ever posting this answer in its current form.
A much better split function would be:
CREATE FUNCTION dbo.Split
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
( WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1), (1)) n (N)),
N2(N) AS (SELECT 1 FROM N1 a CROSS JOIN N1 b),
N3(N) AS (SELECT 1 FROM N2 a CROSS JOIN N2 b),
N4(N) AS (SELECT 1 FROM N3 a CROSS JOIN N3 b),
cteTally(N) AS
( SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#List,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM n4
),
cteStart(N1) AS
( SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#List,t.N,1) = #Delimiter OR t.N = 0)
)
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter,#List,s.N1),0)-s.N1,8000)),
Position = s.N1,
ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1)
FROM cteStart s
);
Then there is no need for looping at all, you just have a proper set based solution by calling the split function twice to get your EAV style data set:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37';
WITH KeyPairs AS
( SELECT w.ID,
[Timestamp] = LEFT(w.TextField, CHARINDEX(' ', w.TextField)),
KeyField = MAX(CASE WHEN v.ItemNumber = 1 THEN v.Item END),
ValueField = MAX(CASE WHEN v.ItemNumber = 2 THEN v.Item END)
FROM #WorkList AS w
CROSS APPLY dbo.Split(SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField)), ',') AS kp
CROSS APPLY dbo.Split(kp.Item, '=') AS v
GROUP BY w.ID, kp.ItemNumber,w.TextField
)
SELECT *
FROM KeyPairs AS kp
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS pvt;

Related

How do I extract if the strings are in email format from a column??

I wanted to extract different emails included in a specific column. The column may have other type of strings which may not be emails which I want to exclude and I just want emails.
Table 1
ID Value
1 Sent Email successfully to John.Muller#gmail.com, Jim.James#gmail.com, Bob.King#hotmail.com and it will be sent tomorrow
2 Email not successful to adam.sandy#yahoo.com, Trisha.stratus#gmail.com, lindy.123#gmail.com and it will not be sent today
If we see this format, I have 3 different emails on each ID, and I wanted to know if there is any way which I could get the data in this format.
Expected output
ID Value
1 John.Muller#gmail.com, Jim.James#gmail.com, Bob.King#hotmail.com
2 adam.sandy#yahoo.com, Trisha.stratus#gmail.com, lindy.123#gmail.com
If this type of output is also possible, it would be great.
Another Possible Output (this would be awesome if possible)
ID Value
1 John.Muller#gmail.com
1 Jim.James#gmail.com
1 Bob.King#hotmail.com
2 adam.sandy#yahoo.com
2 Trisha.stratus#gmail.com
2 lindy.123#gmail.com
Here is an inline approach
Example
Select A.ID
,Value = B.RetVal
From YourTable A
Cross Apply (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = v.value('(./text())[1]', 'varchar(150)')
From (values (convert(xml,'<x>' + replace(replace(Value,' ',','),',','</x><x>')+'</x>'))) x(n)
Cross Apply n.nodes('x') node(v)
) B
Where RetVal like '%#%.___'
Returns
ID Value
1 John.Muller#gmail.com
1 Jim.James#gmail.com
1 Bob.King#hotmail.com
2 adam.sandy#yahoo.com
2 Trisha.stratus#gmail.com
2 lindy.123#gmail.com
Edit To return as one line
Select A.ID
,B.Value
From YourTable A
Cross Apply (
Select Value = Stuff((Select ', ' +RetVal
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = v.value('(./text())[1]', 'varchar(150)')
From (values (convert(xml,'<x>' + replace(replace(Value,' ',','),',','</x><x>')+'</x>'))) x(n)
Cross Apply n.nodes('x') node(v)
) B1
Where RetVal like '%#%.___'
Order by RetSeq
For XML Path ('')),1,2,'')
) B
Returns
ID Value
1 John.Muller#gmail.com, Jim.James#gmail.com, Bob.King#hotmail.com
2 adam.sandy#yahoo.com, Trisha.stratus#gmail.com, lindy.123#gmail.com
Try this Function
alter FUNCTION [dbo].[FnSplit2]
(
#List nvarchar(max),
#SplitOn1 nvarchar(5),
#SplitOn2 nvarchar(5)
)
--Akram Mustafa
RETURNS #RtnValue table
(
Id int identity(1,1),
Value nvarchar(1000)
)
AS
BEGIN
declare #SplitOn varchar(5)
While (Charindex(#SplitOn1,#List)>0 or Charindex(#SplitOn2,#List)>0 )
Begin
if Charindex(#SplitOn1,#List)<Charindex(#SplitOn2,#List) and Charindex(#SplitOn1,#List)> 0
begin
set #SplitOn = #SplitOn1
end
else
begin
set #SplitOn = #SplitOn2
end
Insert Into #RtnValue (value)
Select
Value = ltrim(rtrim(Substring(#List,1,Charindex(#SplitOn,#List)-1)))
Set #List = Substring(#List,Charindex(#SplitOn,#List)+DATALENGTH(#SplitOn),DATALENGTH(#List))
End
Insert Into #RtnValue (Value)
Select Value = ltrim(rtrim(#List))
Return
END
it will split for either comma or white space
declare #MyTable as table (id int, value varchar(1000))
insert into #MyTable(id, value)
values(1, 'Sent Email successfully to John.Muller#gmail.com, Jim.James#gmail.com, Bob.King#hotmail.com and it will be sent tomorrow')
insert into #MyTable(id, value)
values(2, 'Email not successful to adam.sandy#yahoo.com, Trisha.stratus#gmail.com, lindy.123#gmail.com and it will not be sent today')
declare #str varchar(max)
SELECT #str = LEFT(Value, LEN(Value) - 1)
FROM (
SELECT Value + ', '
FROM #MyTable
FOR XML PATH ('')
) c (Value)
select value from dbo.fnSplit2(#str,',',' ')
where value like '%#%'
end result will be
value
John.Muller#gmail.com
Jim.James#gmail.com
Bob.King#hotmail.com
adam.sandy#yahoo.com
Trisha.stratus#gmail.com
lindy.123#gmail.com

SQL split or substring column value

I have an sql column and value/structure as per below:
ColumnA
ROOT/South America/Lima/Test/Test2
Running a select query I want to extract "Lima" As a column value. I couldn't get the split string to work, or substring.
Any thoughts?
This is my approach to get the nth part of any delimited string:
DECLARE #mockupTable TABLE(ID INT IDENTITY, YourColumn VARCHAR(1000));
INSERT INTO #mockupTable VALUES('ROOT/South America/Lima/Test/Test2')
,('Too/short')
,('Three/parts/valid');
--The splitting is a one-liner:
SELECT *
,CAST('<x>' + REPLACE(YourColumn,'/','</x><x>') + '</x>' AS XML).value('/x[3]','nvarchar(max)') AS ThirdPart
FROM #mockupTable;
If your delimited strings might include XML-forbidden characters (namely &, < and >, you'd have to escape them (but that's easy):
Just use this instead
,CAST('<x>' + REPLACE((SELECT YourColumn [*] FOR XML PATH('')),'/','</x><x>') + '</x>' AS XML).value('/x[3]','nvarchar(max)') AS ThirdPart
Some explanation
The replacements of your delimiter / with </x><x> allow to get an XML like string, which can be casted to
<x>ROOT</x>
<x>South America</x>
<x>Lima</x>
<x>Test</x>
<x>Test2</x>
The XML's method .value() allows to use XQuery to get the third <x>. One advantage: If there is no third element, this won't break, just return NULL.
A little out there but it works. Based on a recursive cte. You can set the delimiter, start and end.
declare #T table (iden int identity, col1 varchar(100));
insert into #T(col1) values
('ROOT/South America/Lima/Test/Test2')
, ('ROOT/South America/Peru/Test/Test2')
, ('ROOT/South America/Venuzuala')
, ('ROOT/South America/');
declare #split char(1) = '/';
declare #start int = 2;
declare #end int = 3
select #split, #start, #end;
with cte as
( select t.iden, t.col1, charindex(#split, t.col1) as pos , 1 as cnt
from #T t
union all
select t.iden, t.col1, charindex(#split, t.col1, t.pos + 1), cnt + 1
from cte t
where charindex(#split, t.col1, t.pos + 1) > 0
and cnt+1 <= #end
)
--select * from cte order by iden, cnt;
select --t1.*, t2.*,
SUBSTRING(t1.col1, t1.pos+1, t2.pos-t1.pos-1) as bingo
from cte t1
join cte t2
on t2.iden = t1.iden
and t1.cnt = #start
and t2.cnt = #end
order by t1.iden;
declare #t varchar(max) = 'ROOT/South America/Lima/Test/Test2'
select * from (
select
[value]
,ROW_NUMBER() Over (Order by (select null )) [Level]
from string_split( #t , '/')
) d
where d.Level = 3
the code above dose the same with much simpler logic, split the text by '/' results to rows "String_Split", then row_number() to get the order of the results, each number is a level if no sorting has been done in the order by statment "(select null)"
as a result by adding the level number in the where statment will get the level value we want, above will show level 3

SQL Query for Min and Max Values [duplicate]

This question already has answers here:
How to split a comma-separated value to columns
(38 answers)
Closed 8 years ago.
I have the following data in a table. The number of values in each row can vary and the number of rows could also vary.
The table has 1 column with csv formatted values. The values will always be numeric
Data
1,2
4
5,12, 10
6,7,8,9,10
15,17
I would like to end up with a temp table with the following
Data Lowest Highest
1,2 1 2
4 4 4
5,12, 10 5 12
6,7,8,9,10 6 10
15,17 15 17
Can anyone help with writing a sql query or function to achieve this
Instead of function, you can achieve by this
;WITH tmp
AS (SELECT A.rn,split.a.value('.', 'VARCHAR(100)') AS String
FROM (SELECT Row_number() OVER(ORDER BY (SELECT NULL)) AS RN,
Cast ('<M>' + Replace([data], ',', '</M><M>') + '</M>' AS XML) AS String
FROM table1) AS A
CROSS apply string.nodes ('/M') AS Split(a))
SELECT X.data,Tmp.lower,Tmp.higher
FROM (SELECT rn,Min(Cast(string AS INT)) AS Lower,Max(Cast(string AS INT)) AS Higher
FROM tmp
GROUP BY rn) Tmp
JOIN (SELECT Row_number() OVER(ORDER BY (SELECT NULL)) AS RN1,data
FROM table1) X
ON X.rn1 = Tmp.rn
FIDDLE DEMO
Output would be:
Data Lower Higher
1,2 1 2
4 4 4
5,12, 10 5 12
6,7,8,9,10 6 10
15,17 15 17
First create a user defined function to convert each row of 'DATA' column to a intermediate table as:
/****** Object: UserDefinedFunction [dbo].[CSVToTable]******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[CSVToTable] (#InStr VARCHAR(MAX))
RETURNS #TempTab TABLE
(id int not null)
AS
BEGIN
;-- Ensure input ends with comma
SET #InStr = REPLACE(#InStr + ',', ',,', ',')
DECLARE #SP INT
DECLARE #VALUE VARCHAR(1000)
WHILE PATINDEX('%,%', #INSTR ) <> 0
BEGIN
SELECT #SP = PATINDEX('%,%',#INSTR)
SELECT #VALUE = LEFT(#INSTR , #SP - 1)
SELECT #INSTR = STUFF(#INSTR, 1, #SP, '')
INSERT INTO #TempTab(id) VALUES (#VALUE)
END
RETURN
END
GO
Function is explained further here.
Then Using Cross Apply we can get the desired output as:
With CTE as
(
select
T.Data, Min(udf.Id) as [Lowest],Max(udf.Id) as [Highest]
from
Test T
CROSS APPLY dbo.CSVToTable(T.Data) udf
Group By Data
)
Select * from CTE
Sample Code here...
What a Cross Apply does is : it applies the right table expression to each row from the left table and produces a result table with the unified result sets.
Create table #temp1 (name varchar(100),value int )
Declare #len int
Select #len=(select max(LEN(name)-LEN(replace(name,',',''))) from table)
Declare #i int = 1
while (#i<=#len+1)
begin
insert into #temp1
select name,PARSENAME(REPLACE(name,',','.'),#i) from table t
set #i = #i+1
end
Select name,MIN(value) MINV,MAX(value) MAXV from #temp1 group by name
declare #Testdata table ( Data varchar(max))
insert #Testdata select '1,2'
insert #Testdata select '4'
insert #Testdata select '5,12, 10'
insert #Testdata select '6,7,8,9,10'
;with tmp( DataItem, Data, RN1) as (
select LEFT(Data, CHARINDEX(',',Data+',')-1),
STUFF(Data, 1, CHARINDEX(',',Data+','), ''),
ROW_NUMBER()OVER(ORDER BY (SELECT NULL))AS RN1
from #Testdata
union all
select LEFT(Data, CHARINDEX(',',Data+',')-1),
STUFF(Data, 1, CHARINDEX(',',Data+','), ''),RN1
from tmp
where Data > ''
)
Select x.data,t.Low,t.Up FROM
(Select RN1,MIN(Cast(DataItem AS INT)) As Low,
MAX(Cast(DataItem AS INT)) As Up
FROM tmp t GROUP BY t.RN1)t
JOIN (Select ROW_NUMBER()OVER(ORDER BY (SELECT NULL))AS RN,data from #Testdata)X
ON X.RN = t.RN1

Splitting a string then pivoting result

If have a string passed from a .Net application that looks like the below
2023|F66451,1684|648521,1684|600271,2137|019592
I have started to parse out the string using the method below but I need to Pivot the data returned from the Split ( surrounded by *'s) function in order to insert into the #tmpExceptions table
DECLARE #ExceptionsList as nvarchar(MAX)
SET #ExceptionsList = '2023|F66451,1684|648521,1684|600271,2137|019592'
SET NOCOUNT ON;
DECLARE #CurrentLineItem as nvarchar(255)
CREATE TABLE #ParsePassOne
(
LineItem nvarchar(255)
)
CREATE TABLE #tmpExceptions
(
AccountNumber int,
ClaimNumber nvarchar(50)
)
INSERT INTO #ParsePassOne
SELECT value FROM Split( ',' ,#ExceptionsList)
WHILE EXISTS(SELECT LineItem FROM #ParsePassOne)
BEGIN
SELECT TOP 1 #CurrentLineItem = LineItem FROM #ParsePassOne
*******
SELECT value FROM Split( '|' ,#CurrentLineItem)
*******
DELETE FROM #ParsePassOne WHERE LineItem = #CurrentLineItem
END
SELECT * FROM #tmpExceptions
DROP TABLE #ParsePassOne
DROP TABLE #tmpExceptions
So far the data returned looks as below. I just need to pivot the data to columns so I can insert it. How do I go about this?
Split Function
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
--Creates an 'InLine' Table Valued Function (TVF)
ALTER FUNCTION [dbo].[Split]
( #Delimiter varchar(5),
#List varchar(8000)
)
RETURNS #TableOfValues table
( RowID smallint IDENTITY(1,1),
[Value] varchar(50)
)
AS
BEGIN
DECLARE #LenString int
WHILE len( #List ) > 0
BEGIN
SELECT #LenString =
(CASE charindex( #Delimiter, #List )
WHEN 0 THEN len( #List )
ELSE ( charindex( #Delimiter, #List ) -1 )
END
)
INSERT INTO #TableOfValues
SELECT substring( #List, 1, #LenString )
SELECT #List =
(CASE ( len( #List ) - #LenString )
WHEN 0 THEN ''
ELSE right( #List, len( #List ) - #LenString - 1 )
END
)
END
RETURN
END
If you are using SQL Server 2016 you can use String_Split() function and use cross apply/pivot to get into single row
create table #t (v varchar(50), i int)
insert into #t (v, i) values ('2023|F66451',1)
,('1684|648521',2), ('1684|600271', 3), ('2137|019592', 4)
--Inorder to get into same row -pivoting the data
select * from (
select * from #t t cross apply (select RowN=Row_Number() over (Order by (SELECT NULL)), value from string_split(t.v, '|') ) d) src
pivot (max(value) for src.RowN in([1],[2])) p
You can replace your WHILE EXISTS(SELECT LineItem FROM #ParsePassOne) loop with
select *
from
(
select * from #parsepassone
cross apply dbo.Split( '|' ,lineitem)
) src
pivot
(max(value) for rowid in ([1],[2]))p
Or replace the whole thing with
insert #tmpExceptions (AccountNumber, ClaimNumber)
select [1],[2]
from
(
select e.rowid e, p.* from dbo.Split( ',' ,#ExceptionsList) e
cross apply dbo.Split( '|' ,e.value) p ) s
pivot
(max(value) for rowid in ([1],[2]))p
I had a similar problem, where I needed to split and pivot the values in a column, and I needed to have it all in a view.
I came up with the following code (for SQL 2016 and up)
/* Table */
DECLARE #data TABLE (id INT IDENTITY(1,1), [Name] VARCHAR(128) NOT NULL, [Selection] VARCHAR(512) NULL)
INSERT INTO #data ([Name],Selection)
VALUES('Bob','PC; Mouse; Keyboard; Network; Smartphone'),
('Mo','Violin; Hammer'),
('Jon','Magic; Blood; Teleporter'),
('Mhary','Vampire')
/* Pivot */
;WITH Data_RowNumber AS (
SELECT
id,
[name],
split.value,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS RowNumber
FROM #data
CROSS APPLY STRING_SPLIT([Selection],';') AS split
)
SELECT
id,
[name],
[1],[2],[3],[4],[5],[6]
FROM Data_RowNumber
PIVOT (
MAX([value])
FOR [RowNumber] IN ([1],[2],[3],[4],[5],[6])
) AS p

Replace values in a CSV string

I have a list of products in comma separated fashion and since the item list was replaced with new product items, I am trying to modify this CSV list with new product item list.
create table #tmp
(
id int identity(1,1) not null,
plist varchar(max) null
);
create table #tmpprod
(
oldid int null,
newid int null
);
insert into #tmp(plist) values
('10,11,15,17,19'),
('22,34,44,25'),
('5,6,8,9');
insert into #tmpprod(oldid, newid) values
(5, 109),
(9, 110),
(10, 111),
(15, 112),
(19, 113),
(30, 114),
(34, 222),
(44, 333);
I am trying to use a split fn to convert into rows and then replace these values and then convert columns to rows again. Is it possible in any other manner?
The output will be as:
id
newlist
1
111,11,112,17,113
2
22,222,333,25
3
109,6,8,110
Convert your comma separated list to XML. Use a numbers table, XQuery and position() to get the separate ID's with the position they have in the string. Build the comma separated string using the for xml path('') trick with a left outer join to #tempprod and order by position().
;with C as
(
select T.id,
N.number as Pos,
X.PList.value('(/i[position()=sql:column("N.Number")])[1]', 'int') as PID
from #tmp as T
cross apply (select cast('<i>'+replace(plist, ',', '</i><i>')+'</i>' as xml)) as X(PList)
inner join master..spt_values as N
on N.number between 1 and X.PList.value('count(/i)', 'int')
where N.type = 'P'
)
select C1.id,
stuff((select ','+cast(coalesce(T.newid, C2.PID) as varchar(10))
from C as C2
left outer join #tmpprod as T
on C2.PID = T.oldid
where C1.id = C2.id
order by C2.Pos
for xml path(''), type).value('.', 'varchar(max)'), 1, 1, '')
from C as C1
group by C1.id
Try on SE-Data
Assuming SQL Server 2005 or better, and assuming order isn't important, then given this split function:
CREATE FUNCTION [dbo].[SplitInts]
(
#List VARCHAR(MAX),
#Delimiter CHAR(1)
)
RETURNS TABLE
AS
RETURN ( SELECT Item FROM ( SELECT Item = x.i.value('(./text())[1]', 'int')
FROM
( SELECT [XML] = CONVERT(XML, '<i>' + REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.') ) AS a CROSS APPLY [XML].nodes('i') AS x(i)
) AS y WHERE Item IS NOT NULL);
GO
You can get this result in the following way:
;WITH x AS
(
SELECT id, item, oldid, [newid], rn = ROW_NUMBER() OVER
(PARTITION BY id
ORDER BY PATINDEX('%,' + RTRIM(s.Item) + ',%', ',' + t.plist + ','))
FROM #tmp AS t CROSS APPLY dbo.SplitInts(t.plist, ',') AS s
LEFT OUTER JOIN #tmpprod AS p ON p.oldid = s.Item
)
SELECT id, newlist = STUFF((SELECT ',' + RTRIM(COALESCE([newid], Item))
FROM x AS x2 WHERE x2.id = x.id
FOR XML PATH(''),
TYPE).value(N'./text()[1]', N'varchar(max)'), 1, 1, '')
FROM x GROUP BY id;
Results:
id
newlist
1
111,11,112,17,113
2
22,222,333,25
3
109,6,8,110
Note that the ROW_NUMBER() / OVER / PARTITION BY / ORDER BY is only there to try to coerce the optimizer to return the rows in that order. You may observe this behavior today and it can change tomorrow depending on statistics or data changes, optimizer changes (service packs, CUs, upgrade, etc.) or other variables.
Long story short: if you're depending on that order, just send the set back to the client, and have the client construct the comma-delimited list. It's probably where this functionality belongs anyway.
That said, in SQL Server 2017+, we can guarantee retaining the order by splitting with OPENJSON() and reassembling with STRING_AGG():
;WITH x AS
(
SELECT o.id, val = COALESCE(n.newid, p.value), p.[key]
FROM #tmp AS o CROSS APPLY
OPENJSON('["' + REPLACE(o.pList, ',', '","') + '"]') AS p
LEFT OUTER JOIN #tmpprod AS n
ON p.value = n.oldid
)
SELECT id, newlist = STRING_AGG(val, ',')
WITHIN GROUP (ORDER BY [key])
FROM x GROUP BY id;
Example db<>fiddle
Thanks for this question - I've just learned something new. The following code is an adaptation of an article written by Rob Volk on exactly this topic. This is a very clever query! I won't copy all of the content down here. I have adapted it to create the results you're looking for in your example.
CREATE TABLE #nums (n INT)
DECLARE #i INT
SET #i = 1
WHILE #i < 8000
BEGIN
INSERT #nums VALUES(#i)
SET #i = #i + 1
END
CREATE TABLE #tmp (
id INT IDENTITY(1,1) not null,
plist VARCHAR(MAX) null
)
INSERT INTO #tmp
VALUES('10,11,15,17,19'),('22,34,44,25'),('5,6,8,9')
CREATE TABLE #tmpprod (
oldid INT NULL,
newid INT NULL
)
INSERT INTO #tmpprod VALUES(5, 109),(9, 110),(10, 111),(15, 112),(19, 113),(30, 114),(34, 222),(44, 333)
;WITH cte AS (SELECT ID, NULLIF(SUBSTRING(',' + plist + ',' , n , CHARINDEX(',' , ',' + plist + ',' , n) - n) , '') AS prod
FROM #nums, #tmp
WHERE ID <= LEN(',' + plist + ',') AND SUBSTRING(',' + plist + ',' , n - 1, 1) = ','
AND CHARINDEX(',' , ',' + plist + ',' , n) - n > 0)
UPDATE t SET plist = (SELECT CAST(CASE WHEN tp.oldid IS NULL THEN cte.prod ELSE tp.newid END AS VARCHAR) + ','
FROM cte LEFT JOIN #tmpprod tp ON cte.prod = tp.oldid
WHERE cte.id = t.id FOR XML PATH(''))
FROM #tmp t WHERE id = t.id
UPDATE #tmp SET plist = SUBSTRING(plist, 1, LEN(plist) -1)
WHERE LEN(plist) > 0 AND SUBSTRING(plist, LEN(plist), 1) = ','
SELECT * FROM #tmp
DROP TABLE #tmp
DROP TABLE #tmpprod
DROP TABLE #nums
The #nums table is a table of sequential integers, the length of which must be greater than the longest CSV you have in your table. The first 8 lines of the script create this table and populate it. Then I've copied in your code, followed by the meat of this query - the very clever single-query parser, described in more detail in the article pointed to above. The common table expression (WITH cte...) does the parsing, and the update script recompiles the results into CSV and updates #tmp.
Adam Machanic's blog contains this posting of a T-SQL only UDF which can accept T-SQL's wildcards for use in replacement.
http://dataeducation.com/splitting-a-string-of-unlimited-length/
For my own use, I adjusted the varchar sizes to max. Also note that this UDF performs rather slowly, but if you cannot use the CLR, it may be an option. The minor changes I made to the author's code may limit use of this to SQL Server 2008r2 and later.
CREATE FUNCTION dbo.PatternReplace
(
#InputString VARCHAR(max),
#Pattern VARCHAR(max),
#ReplaceText VARCHAR(max)
)
RETURNS VARCHAR(max)
AS
BEGIN
DECLARE #Result VARCHAR(max) = ''
-- First character in a match
DECLARE #First INT
-- Next character to start search on
DECLARE #Next INT = 1
-- Length of the total string -- 0 if #InputString is NULL
DECLARE #Len INT = COALESCE(LEN(#InputString), 0)
-- End of a pattern
DECLARE #EndPattern INT
WHILE (#Next <= #Len)
BEGIN
SET #First = PATINDEX('%' + #Pattern + '%', SUBSTRING(#InputString, #Next, #Len))
IF COALESCE(#First, 0) = 0 --no match - return
BEGIN
SET #Result = #Result +
CASE --return NULL, just like REPLACE, if inputs are NULL
WHEN #InputString IS NULL
OR #Pattern IS NULL
OR #ReplaceText IS NULL THEN NULL
ELSE SUBSTRING(#InputString, #Next, #Len)
END
BREAK
END
ELSE
BEGIN
-- Concatenate characters before the match to the result
SET #Result = #Result + SUBSTRING(#InputString, #Next, #First - 1)
SET #Next = #Next + #First - 1
SET #EndPattern = 1
-- Find start of end pattern range
WHILE PATINDEX(#Pattern, SUBSTRING(#InputString, #Next, #EndPattern)) = 0
SET #EndPattern = #EndPattern + 1
-- Find end of pattern range
WHILE PATINDEX(#Pattern, SUBSTRING(#InputString, #Next, #EndPattern)) > 0
AND #Len >= (#Next + #EndPattern - 1)
SET #EndPattern = #EndPattern + 1
--Either at the end of the pattern or #Next + #EndPattern = #Len
SET #Result = #Result + #ReplaceText
SET #Next = #Next + #EndPattern - 1
END
END
RETURN(#Result)
END