Compare strings and Highlight mismatch wherever found in T-SQL - sql

I have below Material table which contains data like this:
[PO Number] [Actual Material] [Ideal Material]
----------------------------------------------------
1000000 Milk-Sugar-tea Milk-Sugar-Coffee
1000001 Milk-Water Milk-Water-Ice-tea
I have the requirement where I need to compare two columns Actual Material and Ideal material and highlight the mismatch materials in SQL.
Mismatch would be
[PO Number] [Actual Material] [Ideal Material] [Mismatch]
----------------------------------------------------------------
1000000 Milk-Sugar-tea Milk-Sugar-Coffee tea-coffee
1000001 Milk-Water Milk-Water-Ice-tea Ice-tea
How to achieve this in a SQL query?

As many others have very sensibly suggested, your first port of call should be to restructure your database so you are actually storing normalised data against your PO Numbers.
That said, something we are dealt a rubbish hand and have to play the cards we get. To answer your question exactly as it is asked, you can do the following:
If you are not on SQL Server 2016 and therefore cannot use the built in string_split function, start by creating your own:
create function [dbo].[StringSplit]
(
#str nvarchar(4000) = ' ' -- String to split.
,#delimiter as nvarchar(1) = ',' -- Delimiting value to split on.
,#num as int = null -- Which value to return.
)
returns #results table(ItemNumber int, Item nvarchar(4000))
as
begin
declare #return nvarchar(4000);
-- Handle null #str values
select #str = case when len(isnull(#str,'')) = 0 then '' else #str end;
-- Start tally table with 10 rows.
with n(n) as (select n from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) n(n))
-- Select the same number of rows as characters in #str as incremental row numbers.
-- Cross joins increase exponentially to a max possible 10,000 rows to cover largest #str length.
,t(t) as (select top (select len(#str) a) row_number() over (order by (select null)) from n n1,n n2,n n3,n n4)
-- Return the position of every value that follows the specified delimiter.
,s(s) as (select 1 union all select t+1 from t where substring(#str,t,1) = #delimiter)
-- Return the start and length of every value, to use in the SUBSTRING function.
-- ISNULL/NULLIF combo handles the last value where there is no delimiter at the end of the string.
,l(s,l) as (select s,isnull(nullif(charindex(#delimiter,#str,s),0)-s,4000) from s)
insert into #results
select rn as ItemNumber
,Item
from(select row_number() over(order by s) as rn
,substring(#str,s,l) as item
from l
) a
where rn = #num
or #num is null;
return;
end
Using this function you can then create a set each for your Actual Material and Ideal Material columns, combine them to find the differences using a full join and then concatenate the results using stuff and for xml into one string value:
declare #t table(PONumber int, ActualMaterial nvarchar(50), IdealMaterial nvarchar(50));
insert into #t values (1000000,'Milk-Sugar-tea','Milk-Sugar-Coffee'),(1000001,'Milk-Water','Milk-Water-Ice-tea');
with a as
(
select t.PONumber
,a.Item
from #t t
outer apply dbo.StringSplit(t.ActualMaterial,'-',null) a
), i as
(
select t.PONumber
,i.Item
from #t t
outer apply dbo.StringSplit(t.IdealMaterial,'-',null) i
), m as
(
select isnull(a.PONumber,i.PONumber) as PONumber
,isnull(a.Item,i.Item) as Item
from a
full join i
on(a.PONumber = i.PONumber
and a.Item = i.Item
)
where a.Item is null
or i.Item is null
)
select t.PONumber
,t.ActualMaterial
,t.IdealMaterial
,stuff((select '-' + m.Item
from m
where t.PONumber = m.PONumber
order by m.Item
for xml path('')
)
,1,1,'') as Mismatch
from #t t
order by PONumber;
Output:
+----------+----------------+--------------------+------------+
| PONumber | ActualMaterial | IdealMaterial | Mismatch |
+----------+----------------+--------------------+------------+
| 1000000 | Milk-Sugar-tea | Milk-Sugar-Coffee | Coffee-tea |
| 1000001 | Milk-Water | Milk-Water-Ice-tea | Ice-tea |
+----------+----------------+--------------------+------------+

I use a table value functions for split actual-material an ideal-material values.
Split Function detail isALTER FUNCTION [dbo].[Split]
(
#String NVARCHAR(4000),
#Delimiter NCHAR(1)
)
RETURNS TABLE
AS
RETURN
(
WITH Split(stpos,endpos)
AS(
SELECT 0 AS stpos, CHARINDEX(#Delimiter,#String) AS endpos
UNION ALL
SELECT endpos+1, CHARINDEX(#Delimiter,#String,endpos+1)
FROM Split
WHERE endpos > 0
)
SELECT 'Id' = ROW_NUMBER() OVER (ORDER BY (SELECT 1)),
'Value' = LTRIM(SUBSTRING(#String,stpos,COALESCE(NULLIF(endpos,0),LEN(#String)+1)-stpos))
FROM Split
)
For result table is
declare #result table (
[PO Number] int , [Actual Material] varchar(100),[Ideal Material] varchar(100),Mismatch varchar(200)
)
And query for result table insert is :
;with CTE AS (
select distinct s.* ,x1.Value x1value,x2.Value x2value
from dbo.material s
outer apply (select *from Split(s.[Actual Material],'-')) x1
outer apply (select *from Split(s.[Ideal Material],'-')) x2
),
CTE2 AS (
SELECT distinct c.[PO Number],c.[Actual Material],c.[Ideal Material]
,case when not exists (select *from CTE c2 where c2.[PO Number] = c.[PO Number] and c2.x2value = c.x1value ) then c.x1value else '' end [ActualMismatch]
,case when not exists (select *from CTE c2 where c2.[PO Number] = c.[PO Number] and c2.x1value = c.x2value ) then c.x2value else '' end [IdealMismatch]
FROM CTE c
)
insert into #result
SELECt c.[PO Number],c.[Actual Material],c.[Ideal Material],c.ActualMismatch Mismatch from CTE2 c
union
SELECt c.[PO Number],c.[Actual Material],c.[Ideal Material] ,c.IdealMismatch Mismatch from CTE2 c
where
(c.ActualMismatch !='' or
c.[IdealMismatch] !='')
order by 1
select [PO Number],[Actual Material],[Ideal Material],
STUFF((
SELECT '-' + mismatch
FROM #result
WHERE ([PO Number] = c.[PO Number])
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS mismatch
from #result c
where Mismatch !=''
Group by [PO Number],[Actual Material],[Ideal Material]
how to coalesce mismatch values ! => with xml stuff

Related

SQL Server: Split string value with single quotations

Trying to modify a set of code, so that given a string, the string has to be split and passed to the code to be used.
This is the code that I have right now.
DECLARE #xml xml,
#str varchar(100),
#delimiter varchar(10)
SET #str = '100'
SET #delimiter = ','
SET #xml = cast(('<X>'+replace(#str, #delimiter, '</X><X>')+'</X>') as
xml)
SELECT C.value('.', 'varchar(10)') as value
FROM #xml.nodes('X') as X(C)
For a single-valued string, this works just fine. But I need to use more than one like, ('100', '100A', '100B'...).
The string value will not contain anything other than 3-digit numbers or 3-digit numbers + an alphabet character, or 3 alphabet letter characters.
I also tried something else, but this is too slow.
declare #values table
(
Value varchar(1000)
)
insert into #values values ('100'),('100A'),('100B'),('100C')
Select *
from table
where myField in (select value from #value)
How can I modify the code for this requirement?
You need to create a table valued function that you can pass your string to split into using a cross apply:
Function
create function [dbo].[fn_StringSplit4k]
(
#str nvarchar(4000) = ' ' -- String to split.
,#delimiter as nvarchar(20) = ',' -- Delimiting value to split on.
,#num as int = null -- Which value to return.
)
returns table
as
return
-- Start tally table with 10 rows.
with n(n) as (select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1)
-- Select the same number of rows as characters in #str as incremental row numbers.
-- Cross joins increase exponentially to a max possible 10,000 rows to cover largest #str length.
,t(t) as (select top (select len(isnull(#str,'')) a) row_number() over (order by (select null)) from n n1,n n2,n n3,n n4)
-- Return the position of every value that follows the specified delimiter.
--,s(s) as (select 1 union all select t+len(replace(#delimiter,' ','.')) from t where substring(isnull(#str,''),t,len(replace(#delimiter,' ','.'))) = #delimiter)
,s(s) as (select 1 union all select t+1 from t where case when #delimiter = '' and t < len(#str) then 1 else case when substring(isnull(#str,''),t,1) = #delimiter then 1 else 0 end end = 1)
-- Return the start and length of every value, to use in the SUBSTRING function.
-- ISNULL/NULLIF combo handles the last value where there is no delimiter at the end of the string.
,l(s,l) as (select s,case when #delimiter = '' then 1 else isnull(nullif(charindex(#delimiter,isnull(#str,''),s),0)-s,4000) end from s)
select rn
,item
from(select row_number() over(order by s) as rn
,substring(#str,s,l) as item
from l
) a
where rn = #num
or #num is null;
Usage
select s.item
from YourTable as t
cross apply dbo.fn_StringSplit4k(t.YourString,',',null) as s;

SQL Server - Split column data and retrieve last second value

I have a column name MasterCode in XYZ Table where data is stored in below form.
.105248.105250.104150.111004.
Now first of all I want to split the data into :
105248
105250
104150
111004
Then after to retrieve only last second value from the above.
So In the above given array, value returned should be 104150.
Use a split string function, but not the built in once since it will return only the values and you will lose the location data.
You can use Jeff Moden's DelimitedSplit8K that will return the item and the item index:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Then you can use it to split the string and it will return a table like this:
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
SELECT *
FROM [dbo].[DelimitedSplit8K](#string, '.')
ItemNumber Item
1
2 105248
3 105250
4 104150
5 111004
6
You want only the parts where there actually is an item, so add a where clause, and you want the second from last so add row_number(), and you want the entire thing in a common table expression so that you can query it:
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
WITH CTE AS
(
SELECT Item, ROW_NUMBER() OVER(ORDER BY ItemNumber DESC) As rn
FROM [dbo].[DelimitedSplit8K](#string, '.')
WHERE Item <> ''
)
And the query:
SELECT Item
FROM CTE
WHERE rn = 2
Result: 104150
If there are always four parts, you can use PARSENAME():
DECLARE #s varchar(64) = '.105248.105250.104150.111004.';
SELECT PARSENAME(SUBSTRING(#s, 2, LEN(#s)-2),2);
Depending on your version of SQL SERVER, you can also use the STRING_SPLIT function.
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
SELECT value,
ROW_NUMBER() OVER (ORDER BY CHARINDEX('.' + value + '.', '.' + #string + '.')) AS Pos
FROM STRING_SPLIT(#string,'.')
WHERE RTRIM(value) <> '';
It doesn't return the original position like Jeff's splitter, but does compare very favourably if you check Aaron Bertrand's Article :
Performance Surprises and Assumptions : STRING_SPLIT()
Edit:
Added position, but although works in this case may have issues with duplicate values
You can create a SQL server table valued function with parameters stringvalue and delemeter and call that function for the results as expected.
ALTER function [dbo].[SplitString]
(
#str nvarchar(4000),
#separator char(1)
)
returns table
AS
return (
with tokens(p, a, b) AS (
select
1,
1,
charindex(#separator, #str)
union all
select
p + 1,
b + 1,
charindex(#separator, #str, b + 1)
from tokens
where b > 0
)
select
p-1 ID,
substring(
#str,
a,
case when b > 0 then b-a ELSE 4000 end)
AS s
from tokens
)
To call the function
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> ''
Output
ID s
1 105248
2 105250
3 104150
4 111004
To get only second value you can write your query as shown below
DECLARE #MaxID INT
SELECT #MaxID = MAX (ID) FROM (SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '') A
SELECT TOP 1 #MaxID = MAX (ID) FROM (
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> ''
)a where ID < #MaxID
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '' AND ID = #MaxID
Output
ID s
3 104150
If you want 1 as value of ID then you can write your query as shown below in last line of query.
SELECT 1 AS ID , S FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '' AND ID = #MaxID
Then the output will be
ID S
1 104150
Hope this will help you.
Try this
DECLARE #DATA AS TABLE (Data nvarchar(1000))
INSERT INTO #DATA
SELECT '.105248.105250.104150.111004.'
;WITH CTE
AS
(
SELECT Data,ROW_NUMBER()OVER(ORDER BY Data DESC) AS Rnk
FROM
(
SELECT Split.a.value('.','nvarchar(100)') Data
FROM(
SELECT CAST('<S>'+REPLACE(Data,'.','</S><S>')+'</S>' AS XML ) As Data
FROM #DATA
)DT
CROSS APPLY Data.nodes('S') AS Split(a)
) AS Fnl
WHERE Fnl.Data <>''
)
SELECT Data FROM CTE
WHERE Rnk=2
Result
Data
-----
105248
105250
104150
111004
It can also be achieve only using string functions:
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
DROP TABLE #temp
SELECT '.105248.105250.104150.111004.' code INTO #temp UNION ALL
SELECT '.205248.205250.204150.211004.'
SELECT
REVERSE(LEFT(
REVERSE(LEFT(code, LEN(code) - CHARINDEX('.', REVERSE(code), 2)))
, CHARINDEX('.',REVERSE(LEFT(code, LEN(code) - CHARINDEX('.', REVERSE(code), 2)))) -1
)
) second_last_value
FROM #temp
Result:
second_last_value
-----------------------------
104150
204150

MS SQL Server Get value between commas

I have a column in Table1 with string in it separated by commma:
Id Val
1 ,4
2 ,3,1,0
3 NULL
4 ,5,2
Is there a simple way to split and get any value from that column,
for example
SELECT Value(1) FROM Table1 should get
Id Val
1 4
2 3
3 NULL
4 5
SELECT Value(2) FROM Table1 should get
Id Val
1 NULL
2 1
3 NULL
4 2
Thank you!
Storing comma separated values in a column is always a pain, consider changing your table structure
To get this done, create a split string function. Here is one of the best possible approach to split the string to individual rows. Referred from http://www.sqlservercentral.com/articles/Tally+Table/72993/
CREATE FUNCTION [dbo].[DelimitedSplit8K]
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000...
-- enough to cover NVARCHAR(4000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
to call the function
SELECT *
FROM yourtable
CROSS apply (SELECT CASE WHEN LEFT(val, 1) = ',' THEN Stuff(val, 1, 1, '') ELSE val END) cs (cleanedval)
CROSS apply [dbo].[Delimitedsplit8k](cs.cleanedval, ',')
WHERE ItemNumber = 1
SELECT *
FROM yourtable
CROSS apply (SELECT CASE WHEN LEFT(val, 1) = ',' THEN Stuff(val, 1, 1, '') ELSE val END) cs (cleanedval)
CROSS apply [dbo].[Delimitedsplit8k](cs.cleanedval, ',')
WHERE ItemNumber = 2
Another option using a Parse/Split Function and an OUTER APPLY
Example
Declare #YourTable Table ([Id] int,[Val] varchar(50))
Insert Into #YourTable Values
(1,',4')
,(2,',3,1,0')
,(3,NULL)
,(4,',5,2')
Select A.ID
,Val = B.RetVal
From #YourTable A
Outer Apply (
Select * From [dbo].[tvf-Str-Parse](A.Val,',')
Where RetSeq = 2
) B
Returns
ID Val
1 4
2 3
3 NULL
4 5
The UDF if Interested
CREATE FUNCTION [dbo].[tvf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
Here is an example of using a CTE combined with converting the CSV to XML:
DECLARE #Test TABLE (
CsvData VARCHAR(10)
);
INSERT INTO #Test (CsvData)
VALUES
('1,2,3'),
(',4,5,7'),
(NULL),
(',3,');
WITH XmlData AS (
SELECT CONVERT(XML, '<val>' + REPLACE(CsvData, ',', '</val><val>') + '</val>') [CsvXml]
FROM #Test
)
SELECT xd.CsvXml.value('val[2]', 'VARCHAR(10)')
FROM XmlData xd;
This would output:
2
4
NULL
3
The column to display is controlled by the XPath query. In this case, val[2].
The main advantage here is that no user-defined functions are required.
Try This Logic Using recursive CTE
DECLARE #Pos INT = 2
DECLARE #T TABLE
(
Id INT,
Val VARCHAR(50)
)
INSERT INTO #T
VALUES(1,',4'),(2,',3,1,0'),(3,NULL),(4,',5,2')
;WITH CTE
AS
(
SELECT
Id,
SeqNo = 0,
MyStr = SUBSTRING(Val,CHARINDEX(',',Val)+1,LEN(Val)),
Num = REPLACE(SUBSTRING(Val,1,CHARINDEX(',',Val)),',','')
FROM #T
UNION ALL
SELECT
Id,
SeqNo = SeqNo+1,
MyStr = CASE WHEN CHARINDEX(',',MyStr)>0
THEN SUBSTRING(MyStr,CHARINDEX(',',MyStr)+1,LEN(MyStr))
ELSE NULL END,
Num = CASE WHEN CHARINDEX(',',MyStr)>0
THEN REPLACE(SUBSTRING(MyStr,1,CHARINDEX(',',MyStr)),',','')
ELSE MyStr END
FROM CTE
WHERE ISNULL(REPLACE(MyStr,',',''),'')<>''
)
SELECT
T.Id,
CTE.Num
FROM #T t
LEFT JOIN CTE
ON T.Id = cte.Id
AND SeqNo = #Pos
My Output for the above
Test Data
Declare #t TABLE (Id INT , Val VARCHAR(100))
INSERT INTO #t VALUES
(1 , '4'),
(2 , '3,1,0'),
(3 , NULL),
(4 , '5,2')
Function Definition
CREATE FUNCTION [dbo].[fn_xml_Splitter]
(
#delimited nvarchar(max)
, #delimiter nvarchar(1)
, #Position INT = NULL
)
RETURNS TABLE
AS
RETURN
(
SELECT Item
FROM (
SELECT Split.a.value('.', 'VARCHAR(100)') Item
, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) ItemNumber
FROM
(SELECT Cast ('<X>' + Replace(#delimited, #delimiter, '</X><X>')
+ '</X>' AS XML) AS Data
) AS t CROSS APPLY Data.nodes ('/X') AS Split(a)
)x
WHERE x.ItemNumber = #Position OR #Position IS NULL
);
GO
Function Call
Now you can call this function in two different ways.
1 . to get return an Item on a specific position, specify the position in the 3rd parameter of the function:
SELECT *
FROM #t t
CROSS APPLY [dbo].[fn_xml_Splitter](t.Val , ',', 1)
2 . to get return all items, specify the key word DEFUALT in the 3rd parameter of the function:
SELECT *
FROM #t t
CROSS APPLY [dbo].[fn_xml_Splitter](t.Val , ',', DEFAULT)

initialize and increment variable inside cte query sqlserver 2008

I am using sqlserver 2008 ,I want to initialize and increment variable (#NUMTwo) both at the same time, in my second part(Problem Line).
I am creating a cte query.
Is this possible , if yes then please let me know.
following is a sample example.I hope i am clear.
CREATE table #TempTable
(
childProductID INT,parentProductID INT,productModel varchar(50),[Num2] VARCHAR(100)
)
DECLARE #NUMTwo INT = 0
WITH tableR AS
(
-- First Part
SELECT childProductID = null,parentProductID=null,productModel from Products where productid in (#a),[Num2] = convert(varchar(100), '')
UNION ALL
--Second Part
SELECT e.childProductID,e.parentProductID,prd.productModel FROM ProductIncludes AS e
,[Num2] = convert(varchar(100),'1.' + #NUMTwo+=1 ) -- Problem line
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)
INSERT INTO #TempTable(childProductID,parentProductID,productModel,[Num2])
SELECT childProductID,parentProductID,productModel,[Num2]
END
SELECT * FROM #TempTable
You need to "Initialize" a column in the acnhor part of the query, and then "oncrement" this column in the recursive parts.
Something like
DECLARE #NUMTwo INT = 0
;WITH Test AS (
SELECT [Num2] = convert(varchar(MAX), ''),
#NUMTwo [N]
UNION ALL
SELECT [Num2] = '1.' + convert(varchar(MAX),[N]+1),
[N]+1
FROM TEst
WHERE [N] < 10
)
SELECT *
FROM Test
SQL Fiddle DEMO
If the parameter #NUMTwo is just for numbering rows you can use the ROW_NUMBER() OVER(...) instead of it like so:
WITH tableR AS
(
SELECT childProductID = NULL, parentProductID = NULL,
productModel, NUMTwo = CAST('0' AS VARCHAR(10))
FROM Products
WHERE
productid in (#a),
[Num2] = convert(varchar(100), '')
UNION ALL
SELECT e.childProductID, e.parentProductID,
prd.productModel,
NUMTwo = '1.' +
CAST( ROW_NUMBER() OVER(ORDER BY (SELECT 0)) AS VARCHAR(10))
FROM ProductIncludes AS e
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)

Parsing text to multiple columns

I have a feed that is populating a single text field in a table with statistics.
I need to pull this data into multiple fields in another table
but the strange format makes importing automatically difficult.
The file format is flat text but an example is below:
08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12
Effectively it's made up of:
[timestamp] [Field1 name]=[Field1 value],[Field2 name]=[Field2 value],[Field4 name]=[Field4 value]...[CR]
All fields are always in the same order but not always present.
Total columns could be anywhere from 5 to 30.
I've tried the below function to translate it which seems to work mostly but seems to randomly skip fields:
Parsing the data:
(SELECT [Data].[dbo].[GetFromTextString] ( 'Checksum=' ,',' ,RAWTEXT)) AS RowCheckSum,
(SELECT [Data].[dbo].[GetFromTextString] ( 'TicketType=' ,',' ,RAWTEXT)) AS TicketType,
And the Function:
CREATE FUNCTION [dbo].[GetFromTextString]
-- Input start and end and return value.
(#uniqueprefix VARCHAR(100),
#commonsuffix VARCHAR(100),
#datastring VARCHAR(MAX) )
RETURNS VARCHAR(MAX) -- Picked Value.
AS
BEGIN
DECLARE #ADJLEN INT = LEN(#uniqueprefix)
SET #datastring = #datastring + #commonsuffix
RETURN (
CASE WHEN (CHARINDEX(#uniqueprefix,#datastring) > 0)
AND (CHARINDEX(#uniqueprefix + #commonsuffix,#datastring) = 0)
THEN SUBSTRING(#datastring, PATINDEX('%' + #uniqueprefix + '%',#datastring)+#ADJLEN, CHARINDEX(#commonsuffix,#datastring,PATINDEX('%' + #uniqueprefix + '%',#datastring))- PATINDEX('%' + #uniqueprefix + '%',#datastring)-#ADJLEN) ELSE NULL END
)
END
Could anyone suggest a better/cleaner way to strip out the data or could someone work out why this formula skips rows?
Any help really appreciated.
NOTE - THE FIRST SOLUTION IS RUBBISH. I HAVE LEFT IN IT FOR HISTORICAL REASONS, BUT A BETTER SOLUTION IS CONTAINED BELOW
I am not even sure if this will be faster than your current method, but it is the way I would approach the issue (If i was forced into an SQL only solution). The first thing that is required is a table valued function that will perform a split function:
CREATE FUNCTION dbo.Split (#TextToSplit VARCHAR(MAX), #Delimiter VARCHAR(MAX))
RETURNS #Values TABLE (Position INT IDENTITY(1, 1) NOT NULL, TextValues VARCHAR(MAX) NOT NULL)
AS
BEGIN
WHILE CHARINDEX(#Delimiter, #TextToSplit) > 0
BEGIN
INSERT #Values
SELECT LEFT(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) - 1)
SET #TextToSplit = SUBSTRING(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) + 1, LEN(#TextToSplit))
END
INSERT #Values VALUES (#TextToSplit)
RETURN
END
For my example I am working from a temp table #Worklist, you may need to adapt yours accordingly, or you could just insert the relevant data into #Worklist where I have used dummy data:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37'
The main bit of the query is done here. It is quite long, so I have tried to comment it as well as possible. If further clarification is required I can add more comments.
DECLARE #Output TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
DECLARE #KeyPairs TABLE (WorkListID INT NOT NULL, KeyField VARCHAR(MAX), ValueField VARCHAR(MAX))
-- STORE TIMESTAMP DATA - THIS ASSUMES THE FIRST SPACE IS THE END OF THE TIMESTAMP
INSERT #KeyPairs
SELECT ID, 'TimeStamp', LEFT(TextField, CHARINDEX(' ', TextField))
FROM #WorkList
-- CLEAR THE TIMESTAMP FROM THE WORKLIST
UPDATE #WorkList
SET TextField = SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField))
DECLARE #ID INT = (SELECT MIN(ID) FROM #WorkList)
WHILE #ID IS NOT NULL
BEGIN
-- SPLIT THE STRING FIRST INTO ALL THE PAIRS (e.g. Checksum=180957248)
INSERT #Output
SELECT TextValues
FROM dbo.Split((SELECT TextField FROM #WorkList WHERE ID = #ID), ',')
DECLARE #ID2 INT = (SELECT MIN(ID) FROM #Output)
-- FOR ALL THE PAIRS SPLIT THEM INTO A KEY AND A VALUE (USING THE POSITION OF THE SPLIT FUNCTION)
WHILE #ID2 IS NOT NULL
BEGIN
INSERT #KeyPairs
SELECT #ID,
MAX(CASE WHEN Position = 1 THEN TextValues ELSE '' END),
MAX(CASE WHEN Position = 2 THEN TextValues ELSE '' END)
FROM dbo.Split((SELECT TextField FROM #Output WHERE ID = #ID2), '=')
DELETE #Output
WHERE ID = #ID2
SET #ID2 = (SELECT MIN(ID) FROM #Output)
END
DELETE #WorkList
WHERE ID = #ID
SET #ID = (SELECT MIN(ID) FROM #WorkList)
END
-- WE NOW HAVE A TABLE CONTAINING EAV MODEL STYLE DATA. THIS NEEDS TO BE PIVOTED INTO THE CORRECT FORMAT
-- ENSURE COLUMNS ARE LISTED IN THE ORDER YOU WANT THEM TO APPEAR
SELECT *
FROM #KeyPairs p
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [TimeStamp], [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS PivotTable;
EDIT (4 YEARS LATER)
A recent upvote brought this to my attention and the I hate myself a little bit for ever posting this answer in its current form.
A much better split function would be:
CREATE FUNCTION dbo.Split
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
( WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1), (1)) n (N)),
N2(N) AS (SELECT 1 FROM N1 a CROSS JOIN N1 b),
N3(N) AS (SELECT 1 FROM N2 a CROSS JOIN N2 b),
N4(N) AS (SELECT 1 FROM N3 a CROSS JOIN N3 b),
cteTally(N) AS
( SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#List,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM n4
),
cteStart(N1) AS
( SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#List,t.N,1) = #Delimiter OR t.N = 0)
)
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter,#List,s.N1),0)-s.N1,8000)),
Position = s.N1,
ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1)
FROM cteStart s
);
Then there is no need for looping at all, you just have a proper set based solution by calling the split function twice to get your EAV style data set:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37';
WITH KeyPairs AS
( SELECT w.ID,
[Timestamp] = LEFT(w.TextField, CHARINDEX(' ', w.TextField)),
KeyField = MAX(CASE WHEN v.ItemNumber = 1 THEN v.Item END),
ValueField = MAX(CASE WHEN v.ItemNumber = 2 THEN v.Item END)
FROM #WorkList AS w
CROSS APPLY dbo.Split(SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField)), ',') AS kp
CROSS APPLY dbo.Split(kp.Item, '=') AS v
GROUP BY w.ID, kp.ItemNumber,w.TextField
)
SELECT *
FROM KeyPairs AS kp
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS pvt;