How to extract specific text from column - sql

I am using SQL Server 2016 where I am loading JSON Response in one of the nvarchar(max) type column.
Now I want to create a derived column or maybe even a view that will find the particular value in that complete JSON Response and display only that in the new derived column.
Example:
**Complete_JSON_Repsone** --> this is SQL column
{"result":{"banner_image_light":"","country":"USA","parent":"","notes":"","stock_symbol":"","u_op_dev_version":"","u_restriciton":"No","discount":"","sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8","market_cap":"0","customer":"false"}}
I tried the below query, but it's not giving the expected results it prints out everything after sys_id:
SELECT
Substring (
a.Complete_JSON_Repsone,
Charindex( '"sys_id":', Complete_JSON_Repsone) + 1,
Len(Complete_JSON_Repsone)
) AS [Sys_Idd]
FROM <table-name> a
Current output (actual result):
Sys_Idd
sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8","market_cap":"0","customer":"false"}}
Expected output:
Sys_Idd
7a2c008c1b07ac50a62cea0ce54bcbe8
UPDATE
Sample Input :
Create table dbo.log1
(
Id varchar(50),
Complete_JSON_Response nvarchar(max),
Sys_Id varchar(50)
)
insert into dbo.log1 (Id,Complete_JSON_Response)
values ('S1','{"result":{"banner_image_light":"","country":"USA","parent":"","notes":"","stock_symbol":"","u_op_dev_version":"","u_restriciton":"No","discount":"","sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8","market_cap":"0","customer":"false"}}')
,('S2','{"result":{"banner_image_light":"","country":"Aus","parent":"","notes":"","stock_symbol":"","u_op_prod_version":"","u_restriciton":"No","discount":"","sys_id":"5b2c008c1b07ac50a62cea0ce54bcbe8","market_cap":"1","customer":"TRUE"}}')
select * from dbo.log1
Above select query prints, NULL value for Sys_id column as value for that column in not inserted initially. what I want in expected output is that instead of NULL it should populate(derive) only sys_id value from Complete_JSON_Response column to Sys_id column
Expected output:
Id Sys_Id
S1 7a2c008c1b07ac50a62cea0ce54bcbe8
S2 5b2c008c1b07ac50a62cea0ce54bcbe8

SQL Server 2016 supports JSON, so you may try to use JSON_VALUE():
SELECT JSON_VALUE(Complete_JSON_Response, '$.result.sys_id') AS sys_id
FROM (VALUES
(N'{
"result":{
"banner_image_light":"",
"country":"USA",
"parent":"",
"notes":"",
"stock_symbol":"",
"u_op_dev_version":"",
"u_restriciton":"No",
"discount":"",
"sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8",
"market_cap":"0",
"customer":"false"
}
}')
) a (Complete_JSON_Response)
As an additional option, you may create a simplified UDF with a recursive search:
CREATE FUNCTION dbo.ParseJson (
#json nvarchar(max),
#key nvarchar(max)
)
RETURNS #ResultTable TABLE (
[value] nvarchar(max)
)
AS
BEGIN
;WITH rCTE AS (
SELECT
CONVERT(nvarchar(max), N'$') COLLATE DATABASE_DEFAULT AS [path],
CONVERT(nvarchar(max), JSON_QUERY(#json, '$')) COLLATE DATABASE_DEFAULT AS [value]
UNION ALL
SELECT
CONVERT(nvarchar(max), c.[key]) COLLATE DATABASE_DEFAULT,
CONVERT(nvarchar(max), c.[value]) COLLATE DATABASE_DEFAULT
FROM rCTE r
CROSS APPLY OPENJSON(r.[value]) c
WHERE ISJSON(r.[value]) = 1
)
INSERT INTO #ResultTable ([value])
SELECT [value]
FROM rCTE
WHERE (ISJSON([value]) = 0) AND (path = #key)
RETURN
END
Statement:
DECLARE #json nvarchar(max) = N'
{
"result":{
"banner_image_light":"",
"country":"USA",
"parent":"",
"notes":"",
"stock_symbol":"",
"u_op_dev_version":"",
"u_restriciton":"No",
"discount":"",
"sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8",
"market_cap":"0",
"customer":"false"
},
"result2":{
"sys_id":"xxxx008c1b07ac50a62cea0ce54bcbe8"
}
}
'
SELECT j.[value] AS sys_id
FROM (VALUES (#json)) a (Complete_JSON_Response)
OUTER APPLY dbo.ParseJson(a.Complete_JSON_Response, 'sys_id') j
Result:
sys_id
--------------------------------
xxxx008c1b07ac50a62cea0ce54bcbe8
7a2c008c1b07ac50a62cea0ce54bcbe8

You can use a function. It may lead some performance issues but works fine.
select
'{"result":{"banner_image_light":"","country":"USA","parent":"","notes":"","stock_symbol":"","u_op_dev_version":"","u_restriciton":"No","discount":"","sys_id":"7a2c008c1b07ac50a62cea0ce54bcbe8","market_cap":"0","customer":"false"}}' json
into tmp_json
CREATE FUNCTION dbo.trialFnc(
#json nvarchar(max),
#key nvarchar(255)
)
RETURNS nvarchar(255)
AS
BEGIN
declare #txt1 nvarchar(max) = right(#json, len(#json) - (charindex(#key, #json)) + 1 - len(#key) - 3)
declare #txt2 nvarchar(max) = left(#txt1, charindex('"', #txt1) - 1)
RETURN #txt2
END;
select
dbo.trialFnc(json, 'country') country
, dbo.trialFnc(json, 'sys_id') sys_id
from tmp_json
It will return you this:
country
sys_id
USA
7a2c008c1b07ac50a62cea0ce54bcbe8

Related

How to Build select Query split Temp Value to two column one Per Number And Another to Text when Flag Allow 1?

I work on a query for SQL Server 2012. I have an issue: I can't build select
Query split Column Temp value to two Column When row in the temp table #nonparametric has the flag Allow = 1,
it must split column Temp value from #nonparametric to two column when the flag Allow = 1 .
suppose column Temp value has value 50.40 kg it must split to two column
First column with number so it will have 50.40 and it will be same Name as Parametric .
Second column with Text so it will have kg and it will be same Name as Parametric + 'Units'.
meaning Name will be ParametricUnit .
I need to build query that split this on two column when Flag Allow =1 .
create table #nonparametricdata
(
PART_ID nvarchar(50) ,
CompanyName nvarchar(50),
PartNumber nvarchar(50),
DKFeatureName nvarchar(100),
Tempvalue nvarchar(50),
FlagAllow bit
)
insert into #nonparametricdata
values
('1222','Honda','silicon','package','15.50Am',0),
('1900','MERCEIS','GLASS','family','90.00Am',1),--Build select query split data because FlagAllow=1
('5000','TOYOTA','alominia','source','70.20kg',0),
('8000','MACDA','motor','parametric','50.40kg',1),----Build select query split data because FlagAllow=1
('8900','JEB','mirror','noparametric','75.35kg',0)
create table #FinalTable
(
DKFeatureName nvarchar(50),
DisplayOrder int
)
insert into #FinalTable (DKFeatureName,DisplayOrder)
values
('package',3),
('family',4),
('source',5),
('parametric',2),
('noparametric',1)
what I try is below :
DECLARE #SelectqueryData NVARCHAR(MAX)
SELECT
#SelectqueryData = STUFF(
(
SELECT ', ' + case when B.FlagAllow = 1 then '['+A.DKFeatureName+'],['+A.DKFeatureName+'Unit]' else quotename(A.DKFeatureName) end
FROM #FinalTable A
join (Select distinct DKFeatureName,FlagAllow
From #nonparametricdata
) B on A.DKFeatureName=B.DKFeatureName
ORDER BY DisplayOrder
FOR XML PATH ('')
),1,2,''
)
select #SelectqueryData
--select #SelectqueryData from table
Expected Result is :
[noparametric], [parametric]--QueryGetNumber,[parametricUnit]--QueryGetUnitOfMeasure
, [package], [family]--QueryGetNumber,[familyUnit]--QueryGetUnitOfMeasure, [source]
when make query above it must give me result as image(for Explain Only) :
You're looking for a DYNAMIC PIVOT
Example
DECLARE #SelectqueryData NVARCHAR(MAX)
SELECT #SelectqueryData = STUFF( (
SELECT ', ' + case when B.FlagAllow = 1 then '['+A.DKFeatureName+'],['+A.DKFeatureName+'Unit]' else quotename(A.DKFeatureName) end
FROM #FinalTable A
join (Select distinct DKFeatureName,FlagAllow
From #nonparametricdata
) B on A.DKFeatureName=B.DKFeatureName
ORDER BY DisplayOrder
FOR XML PATH ('')
),1,2,''
)
Declare #SQL varchar(max) = '
Select *
From (
Select A.Part_ID
,A.PartNumber
,A.CompanyName
,B.*
From #nonparametricdata A
Cross Apply ( values ( DKFeatureName ,case when FlagAllow=1 then left(TempValue,patindex(''%[A-Z]%'',TempValue+''A'')-1) else TempValue end )
,( DKFeatureName+''Unit'',case when FlagAllow=1 then substring(TempValue,patindex(''%[A-Z]%'',TempValue+''A''),10) else null end )
) B(Item,Value)
) src
Pivot (max(value) for Item in ('+#SelectqueryData+') ) pvt
'
--Print #SQL
Exec(#SQL)
Returns

Transform a SELECT * query to string

I have a query that returns a row
SELECT *
FROM table
WHERE id = 1;
I want to save the result into a nvarchar sql variable. I have seen similar questions Convert SQL Server result set into string but they only use select with the name of the columns, never with *.
select *
from table
where id = 1
for xml path ('')
However the answer is <column1>value1</column1> <column2>value2</column2> and I just want it to be value1, value2
Is there a way to achieve this? thank you!
If open to a helper function.
This will convert virtually any row, table or query to a string (delimited or not).
In the following examples I selected a PIPE delimiter with a CRLF line terminator.
Please note the usage and placement of _RN when a line terminator is required. Also note the ,ELEMENTS XSINIL ... this will included null values as empty string. If you want to exclude null values, simply omit the ,ELEMENTS XSINIL
Example as Entire Table or dbFiddle
Declare #YourTable Table (id int,[col_1] varchar(50),[col_2] varchar(50),[col_3] varchar(50),[col_n] varchar(50)) Insert Into #YourTable Values
(1,'data1','data2','data3','data4')
,(2,'data5','data6','data7','data8')
-- Entire Table
Declare #XML xml = (Select *,_RN=Row_Number() over (Order By (Select null)) From #YourTable for XML RAW,ELEMENTS XSINIL )
Select [dbo].[svf-str-Data-To-Delimited]('|',char(13)+char(10),#XML)
Returns
1|data1|data2|data3|data4
2|data5|data6|data7|data8
Example as Row Based
Select A.ID
,AsAString = [dbo].[svf-str-Data-To-Delimited]('|',char(13)+char(10),B.XMLData)
From #YourTable A
Cross Apply ( values ( (select a.* for xml RAW,ELEMENTS XSINIL )) )B(XMLData)
Returns
ID AsAString
1 1|data1|data2|data3|data4
2 2|data5|data6|data7|data8
The Function if Interested
CREATE Function [dbo].[svf-str-Data-To-Delimited] (#Delim varchar(50),#EOL varchar(50),#XML xml)
Returns varchar(max)
Begin
Return(
Select convert(nvarchar(max),(
Select case when Item='_RN' then ''
else case when nullif(lead(Item,1) over (Order by Seq),'_RN') is not null
then concat(Value,#Delim)
else concat(Value,#EOL)
end
end
From (
Select Seq = row_number() over(order by (select null))
,Item = xAttr.value('local-name(.)', 'nvarchar(100)')
,Value = xAttr.value('.','nvarchar(max)')
From #XML.nodes('/row/*') xNode(xAttr)
) A
Order By Seq
For XML Path (''),TYPE).value('.', 'nvarchar(max)') )
)
End
You can easily store the result as an XML string:
select *
from (values (1, 'x', getdate())) v(id, a, b)
where id = 1
for xml path ('');
Or as a JSON string:
select *
from (values (1, 'x', getdate())) v(id, a, b)
where id = 1
for json auto;
If you don't mind Using dynamic SQL (and INFORMATION_SCHEMA dictionary), for example, for SQL Server this works:
DECLARE #sql nvarchar(max) = '',
#result nvarchar(max),
#id int = 1
SELECT #sql += '+'',''+convert(nvarchar,' + QUOTENAME(column_name) +')' from INFORMATION_SCHEMA.columns where table_name = 'Student'
SET #sql = 'select #result=' + stuff(#sql,1,5,'') + ' from student where id = ' + CAST(#id as nvarchar)
EXECUTE sp_executesql #sql, N'#result nvarchar(max) OUTPUT', #result=#result OUTPUT
SELECT #result as MyOutput

SQL Server multiple REPLACE with #temp table

I am trying to REPLACE multiple characters in SQL Server query and want to achieve this via #temp table instead of nested REPLACE. I got SQL code below and want to achieve result like
ABC DEF GHI
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
IF OBJECT_ID('tempdb..#temp2') IS NOT NULL DROP TABLE #temp2
CREATE TABLE #temp
(
STRING_TO_REPLACE NVARCHAR(5)
)
INSERT INTO #temp (STRING_TO_REPLACE)
VALUES (' ')
,('/')
,('_')
CREATE TABLE #temp2
(
STRING_NAME NVARCHAR(5)
)
INSERT INTO #temp2 (STRING_NAME)
VALUES ('A BC')
,('D/EF')
,('G_HI')
SELECT REPLACE(t2.STRING_NAME,(SELECT t1.STRING_TO_REPLACE
FROM #temp t1),'')
FROM #temp2 t2
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
IF OBJECT_ID('tempdb..#temp2') IS NOT NULL DROP TABLE #temp2
I can achieve result with nested replace
SELECT REPLACE(REPLACE(REPLACE(t2.STRING_NAME,'_',''),'/',''),' ','') FROM #temp2 t2
but would really like to do this via #temp table. Please can someone help me on this.
When I try to run my first code I get the following error
Msg 512, Level 16, State 1, Line 23 Subquery returned more than 1
value. This is not permitted when the subquery follows =, !=, <, <= ,
, >= or when the subquery is used as an expression.
Here is one way using CROSS APPLY
SELECT result
FROM #temp2 t2
CROSS apply (SELECT Replace(string_name, t1.string_to_replace, '') AS
result
FROM #temp t1) cs
WHERE result <> string_name
Result :
result
-----
ABC
DEF
GHI
Note : This will work only if the each string_name has only one string_to_replace
Update : To handle more than one string_to_replace in a single string_name here is one way using Dynamic sql
I have made one small change to the #temp table by adding a identity property to loop
IF Object_id('tempdb..#temp') IS NOT NULL
DROP TABLE #temp
IF Object_id('tempdb..#temp2') IS NOT NULL
DROP TABLE #temp2
CREATE TABLE #temp
(
id INT IDENTITY(1, 1),
string_to_replace NVARCHAR(5)
)
INSERT INTO #temp
(string_to_replace)
VALUES (' '),
('/'),
('_')
CREATE TABLE #temp2
(
string_name NVARCHAR(5)
)
INSERT INTO #temp2
(string_name)
VALUES ('A BC'),
('D/EF'),
('G_HI'),
('A BD_')
DECLARE #col_list VARCHAR(8000)= '',
#sql VARCHAR(max),
#cntr INT,
#inr INT =1,
#STRING_TO_REPLACE NVARCHAR(5)
SELECT #cntr = Max(id)
FROM #temp
SET #sql = 'select '
WHILE #inr < = #cntr
BEGIN
SELECT #STRING_TO_REPLACE = string_to_replace
FROM #temp
WHERE id = #inr
IF #inr = 1
SET #col_list = 'replace (STRING_NAME,'''
+ #STRING_TO_REPLACE + ''','''')'
ELSE
SET #col_list = 'replace (' + #col_list + ','''
+ #STRING_TO_REPLACE + ''','''')'
SET #inr+=1
END
SET #sql += ' from #temp2'
--print #col_list
SET #sql = 'select ' + #col_list + ' as Result from #temp2'
--print #sql
EXEC (#sql)
Result :
Result
------
ABC
DEF
GHI
ABD
The multiple replace can be achieved via a recursive CTE as per following example:
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
IF OBJECT_ID('tempdb..#temp2') IS NOT NULL DROP TABLE #temp2
CREATE TABLE #temp
(
STRING_TO_REPLACE NVARCHAR(10)
,Pattern NVARCHAR(10)
)
INSERT INTO #temp (STRING_TO_REPLACE, Pattern)
VALUES (' ', '% %')
,('/', '%/%')
,('_', '%[_]%') ;
CREATE TABLE #temp2
(
STRING_NAME NVARCHAR(10)
);
INSERT INTO #temp2 (STRING_NAME)
VALUES ('A BC')
,('D/EF_F E')
,('G_HI')
,('XYZ');
WITH CTE_Replace AS
(
SELECT STRING_NAME AS OriginalString
,CAST(STRING_NAME AS NVARCHAR(10)) AS ReplacedString
,CAST('' AS NVARCHAR(10)) AS StringToReplace
,1 AS ReplaceCount
FROM #temp2 ancor
UNION ALL
SELECT CTE_Replace.OriginalString
,CAST(REPLACE(CTE_Replace.ReplacedString, rep.STRING_TO_REPLACE, '') AS NVARCHAR(10)) AS ReplacedString
,CAST(rep.STRING_TO_REPLACE AS NVARCHAR(10)) AS StringToReplace
,CTE_Replace.ReplaceCount + 1 AS ReplaceCount
FROM #temp rep
INNER JOIN CTE_Replace ON CTE_Replace.ReplacedString LIKE rep.Pattern
)
,CTE_FinalReplacedString AS
(
SELECT OriginalString
,ReplacedString
,ReplaceCount
,ROW_NUMBER() OVER (PARTITION BY OriginalString ORDER BY ReplaceCount DESC) AS [Rank]
FROM CTE_Replace
)
SELECT *
FROM CTE_FinalReplacedString
WHERE [Rank] = 1
Note that #temp table was updated to include an extra column called Pattern, this column contains the search pattern to use in order to find the specific strings that has to be replaced. This was also done to simplify the join statement in the recursive CTE. Also note that in order to find the _ character the search pattern had to be updated as '%[_]%'. The reason for this is because SQL Server will interpret the _ character as a wild character instead of a specific character we are trying to find.
replace in the table is probably easier here
update t2
set t2.string_name = Replace(t2.string_name, t1.string_to_replace, '')
from #temp2 t2
cross join #temp1 t1
A simple way to deal with this is to download a copy of PatExclude8K, a T-SQL function designed for exactly this type of task. Here's a couple examples:
-- remove all non-aplphabetical characters
SELECT NewString FROM #temp2 CROSS APPLY dbo.PatExclude8K(STRING_NAME,'[^A-Z]');
-- remove all spaces, forward slashes and underscores
SELECT NewString FROM #temp2 CROSS APPLY dbo.PatExclude8K(STRING_NAME,'[ /_]');
Both queries produce this result set:
NewString
------------
ABC
DEF
GHI
I've found below code on stackoverflow which seems more near to what I'm trying to achieve but am struggling that how can I use this with my code
declare #String varchar(max) = '(N_100-(6858)*(6858)*N_100/0_2)%N_35'
--table containing values to be replaced
create table #Replace
(
StringToReplace varchar(100) not null primary key clustered
,ReplacementString varchar(100) not null
)
insert into #Replace (StringToReplace, ReplacementString)
values ('+', '~')
,('-', '~')
,('*', '~')
,('/', '~')
,('%', '~')
,('(', '~')
,(')', '~')
select #String = replace(#String, StringToReplace, ReplacementString)
from #Replace a
select #String
drop table #Replace
EDIT by gofr1
CREATE FUNCTION replacement
(
#String nvarchar(max)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Replace TABLE (
StringToReplace nvarchar(100),
ReplacementString nvarchar(100)
)
INSERT INTO #Replace (StringToReplace, ReplacementString)
VALUES ('+', '~')
,('-', '~')
,('*', '~')
,('/', '~')
,('%', '~')
,('(', '~')
,(')', '~')
SELECT #String = replace(#String, StringToReplace, ReplacementString)
FROM #Replace
RETURN #String
END
GO
Then call it:
SELECT dbo.replacement ('A B-C/d')
Output:
A B~C~d
Another way with recursive CTE (full batch below):
--Create a sample table, you should use YourTable
CREATE TABLE #temp2 (
STRING_NAME NVARCHAR(max)
)
INSERT INTO #temp2 (STRING_NAME)
VALUES ('A BC'),('D/EF'),('G_HI'),('J_K/L_'),('MNO')
--I add some more objects here
The main query:
;WITH replacement AS (
SELECT *
FROM (VALUES (' '),('/'),('_')
) as t(STRING_TO_REPLACE)
), cte AS (
SELECT STRING_NAME,
STRING_NAME as OriginalString,
ROW_NUMBER() OVER (ORDER BY STRING_NAME) as rn,
1 as [Level]
FROM #temp2 t2
UNION ALL
SELECT REPLACE(c.STRING_NAME,t.STRING_TO_REPLACE,'~'),
c.OriginalString,
c.rn,
[Level]+1
FROM cte c
INNER JOIN replacement t
ON CHARINDEX(t.STRING_TO_REPLACE,c.STRING_NAME,0) > 0
)
SELECT TOP 1 WITH TIES OriginalString,
STRING_NAME
FROM cte
ORDER BY ROW_NUMBER() OVER (PARTITION BY rn ORDER BY [Level] DESC)
OPTION (MAXRECURSION 0)
Output:
OriginalString STRING_NAME
A BC A~BC
D/EF D~EF
J_K/L_ J~K~L~
G_HI G~HI
MNO MNO

T-SQL - remove chars from string beginning from specific character

from table I retrieves values, for example,
7752652:1,7752653:2,7752654:3,7752655:4
or
7752941:1,7752942:2
i.e. string may contain any quantity of substrings.
What I need: remove all occurrences of characters from char ':' to a comma char.
For example,
7752652:1,7752653:2,7752654:3,7752655:4
should be
7752652,7752653,7752654,7752655
How do it?
Replace : with start tag <X>.
Replace , with end tag </X> and an extra comma.
Add an extra end tag to the end </X>.
That will give you a string that look like 7752941<X>1</X>,7752942<X>2</X>.
Cast to XML and use query(text()) to get the root text values.
Cast the result back to string.
SQL Fiddle
MS SQL Server 2012 Schema Setup:
create table T
(
C varchar(100)
)
insert into T values
('7752652:1,7752653:2,7752654:3,7752655:4'),
('7752941:1,7752942:2')
Query 1:
select cast(cast(replace(replace(T.C, ':', '<X>'), ',', '</X>,')+'</X>' as xml).query('text()') as varchar(100)) as C
from T
Results:
| C |
|---------------------------------|
| 7752652,7752653,7752654,7752655 |
| 7752941,7752942 |
declare #query varchar(8000)
select #query= 'select '+ replace (
replace('7752652:1,7752653:2,7752654:3,7752655:4',',',' t union all select ')
,':',' t1 , ')
exec(';with cte as ( '+#query+' ) select cast(t1 as varchar)+'','' from cte for xml path('''')')
Try this:
DECLARE #Data VARCHAR(100) = '7752652:1,7752653:2,7752654:3,7752655:4'
DECLARE #Output VARCHAR(100) = ''
WHILE CHARINDEX(':', #Data) > 0
BEGIN
IF LEN(#Output) > 0 SET #Output = #Output + ','
SET #Output = #Output + LEFT(#Data, CHARINDEX(':', #Data)-1)
SET #Data = STUFF(#Data,
1,
(CASE CHARINDEX(',', #Data)
WHEN 0 THEN LEN(#Data)
ELSE CHARINDEX(',', #Data)
END) - CHARINDEX(':', #Data),
'')
END
SELECT #Output AS Result -- 7752652,7752653,7752654,7752655
Hope this will help.
I borrowed the Splitter function from here. You could use any delimiter parser you may already be using.
Parse the string to table values
Used Substring function to remove values after ':'
Use For xml to re-generate CSV
Test Data:'
IF OBJECT_ID(N'tempdb..#temp')>0
DROP TABLE #temp
CREATE TABLE #temp (id int, StringCSV VARCHAR(500))
INSERT INTO #temp VALUES ('1','7752652:1,7752653:2,7752654:3,7752655:4')
INSERT INTO #temp VALUES ('2','7752656:1,7752657:3,7752658:4')
INSERT INTO #temp VALUES ('3','7752659:1,7752660:2')
SELECT * FROM #temp t
Main Query:
;WITH cte_Remove(ID, REMOVE) AS
(
SELECT y.id AS ID,
SUBSTRING(fn.string, 1, CHARINDEX(':', fn.string) -1) AS Removed
FROM #temp AS y
CROSS APPLY dbo.fnParseStringTSQL(y.StringCSV, ',') AS fn
)
SELECT DISTINCT ID,
STUFF(
(
SELECT ',' + REMOVE
FROM cte_Remove AS t2
WHERE t2.ID = t1.ID
FOR XML PATH('')
),1,1,'') AS col2
FROM cte_Remove AS t1
Cleanup Test Data:
IF OBJECT_ID(N'tempdb..#temp') > 0
DROP TABLE #temp
I solved this problem with CLR function. It is more quickly and function can be used in complex queries
public static SqlString fnRemoveSuffics(SqlString source)
{
string pattern = #":(\d+)";
string replacement = "";
string result = Regex.Replace(source.Value, pattern, replacement);
return new SqlString(result);
}

Parsing text to multiple columns

I have a feed that is populating a single text field in a table with statistics.
I need to pull this data into multiple fields in another table
but the strange format makes importing automatically difficult.
The file format is flat text but an example is below:
08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12
Effectively it's made up of:
[timestamp] [Field1 name]=[Field1 value],[Field2 name]=[Field2 value],[Field4 name]=[Field4 value]...[CR]
All fields are always in the same order but not always present.
Total columns could be anywhere from 5 to 30.
I've tried the below function to translate it which seems to work mostly but seems to randomly skip fields:
Parsing the data:
(SELECT [Data].[dbo].[GetFromTextString] ( 'Checksum=' ,',' ,RAWTEXT)) AS RowCheckSum,
(SELECT [Data].[dbo].[GetFromTextString] ( 'TicketType=' ,',' ,RAWTEXT)) AS TicketType,
And the Function:
CREATE FUNCTION [dbo].[GetFromTextString]
-- Input start and end and return value.
(#uniqueprefix VARCHAR(100),
#commonsuffix VARCHAR(100),
#datastring VARCHAR(MAX) )
RETURNS VARCHAR(MAX) -- Picked Value.
AS
BEGIN
DECLARE #ADJLEN INT = LEN(#uniqueprefix)
SET #datastring = #datastring + #commonsuffix
RETURN (
CASE WHEN (CHARINDEX(#uniqueprefix,#datastring) > 0)
AND (CHARINDEX(#uniqueprefix + #commonsuffix,#datastring) = 0)
THEN SUBSTRING(#datastring, PATINDEX('%' + #uniqueprefix + '%',#datastring)+#ADJLEN, CHARINDEX(#commonsuffix,#datastring,PATINDEX('%' + #uniqueprefix + '%',#datastring))- PATINDEX('%' + #uniqueprefix + '%',#datastring)-#ADJLEN) ELSE NULL END
)
END
Could anyone suggest a better/cleaner way to strip out the data or could someone work out why this formula skips rows?
Any help really appreciated.
NOTE - THE FIRST SOLUTION IS RUBBISH. I HAVE LEFT IN IT FOR HISTORICAL REASONS, BUT A BETTER SOLUTION IS CONTAINED BELOW
I am not even sure if this will be faster than your current method, but it is the way I would approach the issue (If i was forced into an SQL only solution). The first thing that is required is a table valued function that will perform a split function:
CREATE FUNCTION dbo.Split (#TextToSplit VARCHAR(MAX), #Delimiter VARCHAR(MAX))
RETURNS #Values TABLE (Position INT IDENTITY(1, 1) NOT NULL, TextValues VARCHAR(MAX) NOT NULL)
AS
BEGIN
WHILE CHARINDEX(#Delimiter, #TextToSplit) > 0
BEGIN
INSERT #Values
SELECT LEFT(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) - 1)
SET #TextToSplit = SUBSTRING(#TextToSplit, CHARINDEX(#Delimiter, #TextToSplit) + 1, LEN(#TextToSplit))
END
INSERT #Values VALUES (#TextToSplit)
RETURN
END
For my example I am working from a temp table #Worklist, you may need to adapt yours accordingly, or you could just insert the relevant data into #Worklist where I have used dummy data:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37'
The main bit of the query is done here. It is quite long, so I have tried to comment it as well as possible. If further clarification is required I can add more comments.
DECLARE #Output TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
DECLARE #KeyPairs TABLE (WorkListID INT NOT NULL, KeyField VARCHAR(MAX), ValueField VARCHAR(MAX))
-- STORE TIMESTAMP DATA - THIS ASSUMES THE FIRST SPACE IS THE END OF THE TIMESTAMP
INSERT #KeyPairs
SELECT ID, 'TimeStamp', LEFT(TextField, CHARINDEX(' ', TextField))
FROM #WorkList
-- CLEAR THE TIMESTAMP FROM THE WORKLIST
UPDATE #WorkList
SET TextField = SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField))
DECLARE #ID INT = (SELECT MIN(ID) FROM #WorkList)
WHILE #ID IS NOT NULL
BEGIN
-- SPLIT THE STRING FIRST INTO ALL THE PAIRS (e.g. Checksum=180957248)
INSERT #Output
SELECT TextValues
FROM dbo.Split((SELECT TextField FROM #WorkList WHERE ID = #ID), ',')
DECLARE #ID2 INT = (SELECT MIN(ID) FROM #Output)
-- FOR ALL THE PAIRS SPLIT THEM INTO A KEY AND A VALUE (USING THE POSITION OF THE SPLIT FUNCTION)
WHILE #ID2 IS NOT NULL
BEGIN
INSERT #KeyPairs
SELECT #ID,
MAX(CASE WHEN Position = 1 THEN TextValues ELSE '' END),
MAX(CASE WHEN Position = 2 THEN TextValues ELSE '' END)
FROM dbo.Split((SELECT TextField FROM #Output WHERE ID = #ID2), '=')
DELETE #Output
WHERE ID = #ID2
SET #ID2 = (SELECT MIN(ID) FROM #Output)
END
DELETE #WorkList
WHERE ID = #ID
SET #ID = (SELECT MIN(ID) FROM #WorkList)
END
-- WE NOW HAVE A TABLE CONTAINING EAV MODEL STYLE DATA. THIS NEEDS TO BE PIVOTED INTO THE CORRECT FORMAT
-- ENSURE COLUMNS ARE LISTED IN THE ORDER YOU WANT THEM TO APPEAR
SELECT *
FROM #KeyPairs p
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [TimeStamp], [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS PivotTable;
EDIT (4 YEARS LATER)
A recent upvote brought this to my attention and the I hate myself a little bit for ever posting this answer in its current form.
A much better split function would be:
CREATE FUNCTION dbo.Split
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING AS
RETURN
( WITH N1 AS (SELECT N FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1), (1)) n (N)),
N2(N) AS (SELECT 1 FROM N1 a CROSS JOIN N1 b),
N3(N) AS (SELECT 1 FROM N2 a CROSS JOIN N2 b),
N4(N) AS (SELECT 1 FROM N3 a CROSS JOIN N3 b),
cteTally(N) AS
( SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#List,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM n4
),
cteStart(N1) AS
( SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#List,t.N,1) = #Delimiter OR t.N = 0)
)
SELECT Item = SUBSTRING(#List, s.N1, ISNULL(NULLIF(CHARINDEX(#Delimiter,#List,s.N1),0)-s.N1,8000)),
Position = s.N1,
ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1)
FROM cteStart s
);
Then there is no need for looping at all, you just have a proper set based solution by calling the split function twice to get your EAV style data set:
DECLARE #WorkList TABLE (ID INT IDENTITY(1, 1) NOT NULL, TextField VARCHAR(MAX))
INSERT #WorkList
SELECT '08:34:52 Checksum=180957248,TicketType=6,InitialUserType=G,InitialUserID=520,CommunicationType=Incoming,Date=26-03-2012,Time=08:35:00,Service=ST,Duration=00:00:14,Cost=0.12'
UNION
SELECT '08:34:52 Checksum=180957249,TicketType=5,InitialUserType=H,InitialUserID=521,CommunicationType=Outgoing,Date=27-03-2012,Time=14:27:00,Service=ST,Duration=00:15:12,Cost=0.37';
WITH KeyPairs AS
( SELECT w.ID,
[Timestamp] = LEFT(w.TextField, CHARINDEX(' ', w.TextField)),
KeyField = MAX(CASE WHEN v.ItemNumber = 1 THEN v.Item END),
ValueField = MAX(CASE WHEN v.ItemNumber = 2 THEN v.Item END)
FROM #WorkList AS w
CROSS APPLY dbo.Split(SUBSTRING(TextField, CHARINDEX(' ', TextField) + 1, LEN(TextField)), ',') AS kp
CROSS APPLY dbo.Split(kp.Item, '=') AS v
GROUP BY w.ID, kp.ItemNumber,w.TextField
)
SELECT *
FROM KeyPairs AS kp
PIVOT
( MAX(ValueField)
FOR KeyField IN
( [Checksum], [TicketType], [InitialUserType],
[InitialUserID], [CommunicationType], [Date], [Time],
[Service], [Duration], [Cost]
)
) AS pvt;