How to split values from braces using SQL query - sql

I need a query to split values between braces separately.
My varchar value is
16(8),14(10)
I need to split as
16,14
I need only 16 and 14 value but not the values which is inside the (8) (10) braces
I have tried this query
select
case
when charindex('(0-9)', OtherProduct) > 0
then rtrim(left(OtherProduct, charindex('(0-9)', OtherProduct)-1))
else OtherProduct end as OtherProduct
from dbo.rxnreactions where rsd='P=15(61),16(8),14(10)R=1,7S=9012'
is anyone can help me to split it.

Use a function to split by comma, then split by brackets, and at the end join into a single string
SELECT SplitByBrackets.val
FROM dbo.StringSplit(N'16(8),14(10)',N',') SplitByComma
CROSS APPLY StringSplit(SplitByComma.val,N'(') SplitByBrackets
WHERE SplitByBrackets.id % 2 = 1
sample of the StringSplit is
CREATE FUNCTION [dbo].[StringSplit]
(
#delimited nvarchar(max),
#delimiter nvarchar(100)
) RETURNS #t TABLE
(
-- Id column can be commented out, not required for sql splitting string
id int identity(1,1), -- I use this column for numbering splitted parts
val nvarchar(max)
)
AS
BEGIN
declare #xml xml
set #xml = N'<root><r>' + replace(#delimited,#delimiter,'</r><r>') + '</r></root>'
insert into #t(val)
select
r.value('.','varchar(max)') as item
from #xml.nodes('//root/r') as records(r)
RETURN
END
fiddle it

Related

Error when converting data to XML in SQL Server

I need to retrieve the content in position 10 of a comma separated string in a View table.
Row 1 N,l,S,T,A,,<all>,,N,A,N,N,N,Y,Y,,Y,Y,Y,,AA,SA,Enterprise,
Row 2 M,,A,S,AS,SS,AS,N,N,N,N,Y,Y,Y,ENTERPRISE,S,,A
Row 3 L,,A,D,S,A,A,AA,Y,Y,Y,YNN,N,N,N,N,A,AA,AD,D,D
Div1 is the name of my column, Div2 is the name of the result column. I use the following code:
SELECT TOP (2000)
[Id],
CONVERT(XML,'<x>' + REPLACE(REPLACE(REPLACE(Div1, '>', ''), '<', ''), ',', '</x <x>') + '</x>').value('/x[10]', 'VARCHAR(MAX)') [Div2],
Div1
FROM
[dbo].[database]
I use character type VARCHAR(MAX) because that is the type for Div1 in my database. The code works if I run less than 20000 rows. But the data set I use has more than 100,000 rows. If I run the whole data it stops and the following error occurs:
Msg 9421, Level 16, State 1, Line 1.
XML parsing: line 1, character 218, illegal name character
Is there a way to work this around?
XML has CDATA[] section to treat content as-is without parsing. There is no need for multiple REPLACE() function calls. Check it out.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY(1,1) PRIMARY KEY, Div1 VARCHAR(MAX));
INSERT INTO #tbl (Div1)
VALUES
('N,l,S,T,A,,<all>,,N,A,N,N,N,Y,Y,,Y,Y,Y,,AA,SA,Enterprise')
, ('M,,A,S,AS,SS,AS,N,N,N,N,Y,Y,Y,ENTERPRISE,S,,A')
, ('L,,A,D,S,A,A,AA,Y,Y,Y,YNN,N,N,N,N,A,AA,AD,D,D');
-- DDL and sample data population, end
SELECT [Id],
CAST('<x><![CDATA[' + REPLACE(Div1, ',', ']]></x><x><![CDATA[') + ']]></x>' AS XML).value('(/x/text())[10]', 'VARCHAR(MAX)') [Div2],
Div1
FROM #tbl;
You can create a function to split string like below:
CREATE FUNCTION dbo.split_delimited_string
(
#list varchar(max),
#delimiter varchar(5)
)
RETURNS #items TABLE
(
pos_id int identity(1,1),
item varchar(255)
)
AS
BEGIN
DECLARE #pos int, #delimiter_len tinyint;
SET #pos = CHARINDEX(#delimiter,#list);
SET #delimiter_len=LEN(#delimiter);
WHILE (#pos>0)
BEGIN
INSERT INTO #items (item)
SELECT LEFT(#list,#pos - 1)
SET #list = RIGHT(#list,LEN(#list) - #pos - #delimiter_len + 1);
SET #pos = CHARINDEX(#delimiter,#list);
END
IF #list<>N''
BEGIN
INSERT INTO #items (item)
SELECT #list;
END
RETURN;
END
The following query will return the content in the 10th position:
SELECT
t.[Id],
l.item AS Div2
t.Div1
FROM [dbo].[database] t
CROSS APPLY dbo.split_delimited_string(t.Div1,',') l
WHERE l.pos_id = 10;

Split a string by a delimiter using SQL

My table in DB has a column which stores values in following format.
1234#2345#6780
Four digit numbers are stored using delimiter "#".
Due to a data corruption, there are some records with five digit numbers. There may be one or more than one five digit numbers in a given row.
1234#12345#67895
I'm trying to write a script to get only those corrupted records But cannot find a way to split and check values.
Any help is appreciated.
I'm using SQL server 12.0 version
you can use this function to split values:
CREATE FUNCTION [dbo].[fnSplit]
(#sInputList VARCHAR(8000) -- List of delimited items
, #sDelimiter VARCHAR(8000) = '#' -- delimiter that separates items
)
RETURNS #List TABLE (item VARCHAR(8000))
BEGIN
DECLARE #sItem VARCHAR(8000)
WHILE CHARINDEX(#sDelimiter,#sInputList,0) <> 0
BEGIN
SELECT
#sItem=RTRIM(LTRIM(SUBSTRING(#sInputList,1,CHARINDEX(#sDelimiter,#sInputList,0)-1))),
#sInputList=RTRIM(LTRIM(SUBSTRING(#sInputList,CHARINDEX(#sDelimiter,#sInputList,0)+LEN(#sDelimiter),LEN(#sInputList))))
IF LEN(#sItem) > 0
INSERT INTO #List SELECT #sItem as item
END
IF LEN(#sInputList) > 0
INSERT INTO #List SELECT #sInputList as item -- Put the last item in
RETURN
END
and then ask about the result
You can use XML nodes to split the string before 2016 version.
Create table Xmltest(ID int, numbers nvarchar(max))
insert into Xmltest values (1, '1234#12345#67895')
select ID, N.value('.', 'varchar(255)') as xmlValue
from (
select ID ,
cast(('<w>' + replace(numbers,'#','</w><w>') + '</w>') as xml) as xmlValue
from Xmltest
) as z
cross apply xmlValue.nodes ('//w') as split(N)
Output you get, I added this ID column to identify which row may have more than 4 Characters.
ID xmlValue
1 1234
1 12345
1 67895
To check where you have more than 4 characters you can do:
select ID, N.value('.', 'varchar(255)') as xmlValue
from (
select ID ,
cast(('<w>' + replace(numbers,'#','</w><w>') + '</w>') as xml) as xmlValue
from Xmltest
) as z
cross apply xmlValue.nodes ('//w') as split(N)
where len(N.value('.', 'varchar(255)')) > 4
Output you get:
ID xmlValue
1 12345
1 67895
You can use this. it returns any numbers row which length greater than 4.
SELECT * FROM SampleData
WHERE data LIKE '%[0-9][0-9][0-9][0-9][0-9]%'
this will work patindex is orcale's equivalent of regexp_like():
select * from table_name where not PATINDEX ('^[0-9]{4}(#){1}[0-9]{4}(#){1}[0-9]
{4}$',col_name) !=0;
for SQL Server (starting with 2016)
you can use the built in function of SQL to split a string.
sample:
DECLARE #Text VARCHAR(100) = '1234#12345#67895'
SELECT * FROM STRING_SPLIT(#Text,'#')
result:
value
----
123
4456
78902
you can now easily manipulate the values after

Sql table comma separated values contain any of variable values checking

I have a variable #a='1,2,3,4' and a table that contain a column B that contain comma separated values.
How can I check that column B values contain any of the #a variable values?
You need to implement a function for splitting the values. There are a lot of variations, you can use this:
CREATE FUNCTION [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder](#List nvarchar(max), #Delimiter nvarchar(10) = ',')
RETURNS #result TABLE
(
[Value] nvarchar(max),
[SortOrder] bigint NOT NULL
)
AS
BEGIN
IF #Delimiter is null
BEGIN
SET #Delimiter = ','
END
DECLARE #XML xml = N'<r><![CDATA[' + REPLACE(#List, #Delimiter, ']]></r><r><![CDATA[') + ']]></r>'
DECLARE #BufTable TABLE (Value nvarchar(max), SortOrder bigint NOT NULL IDENTITY(1, 1) PRIMARY KEY)
INSERT INTO #BufTable (Value)
SELECT Tbl.Col.value('.', 'nvarchar(max)')
FROM #xml.nodes('//r') Tbl(Col)
OPTION (OPTIMIZE FOR (#xml = NULL))
INSERT INTO #result (Value, SortOrder)
SELECT Value, SortOrder
FROM #BufTable
RETURN
END
Having such function, its pretty easy:
DECLARE #DataSource TABLE
(
[column] VARCHAR(1024)
);
DECLARE #column VARCHAR(1024) = '1,2,3,4';
INSERT INTO #DataSource ([column])
VALUES ('100,200,300')
,('100,1,500')
,('1,2,3,500')
,('200')
,('33,32,31,4,30');
SELECT DISTINCT [column]
FROM #DataSource
CROSS APPLY [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder] ([column], ',') DSV
INNER JOIN [dbo].[fn_Analysis_ConvertCsvListToNVarCharTableWithOrder] (#column, ',') FV
ON DSV.[Value] = FV.[Value];
Using CROSS APPLY we are splitting the values for each column. Then we are splitting the filtering values and performing INNER JOIN in order to match only the rows having a value contained in the filter value. After that, we need a DISTINCT because column value may contains many values from the filter.
A t-sql string "splitter" is what you need but I would NOT use the mTVF recommended above as it is extremely inefficient and will kill parallelism. An inline table valued function (iTVF) is what you want for splitting strings.
I would suggest using delimitedSplit8k or delimitedSplit8k_lead which will perform ~30-90 times faster; or STRING_SPLIT if you're on SQL 2016+ and only need the value which will be several hundred times faster. Note this performance test:
-- sample data
declare #rows int = 10000;
if object_id('tempdb..#strings') is not null drop table #strings;
select top (#rows)
someid = identity(int,1,1),
somestring = replace(right(left(cast(newid() as varchar(36)), 27),21),'-',',')
into #strings
from sys.all_columns a, sys.all_columns b;
-- Performance test
set nocount on;
print 'fn_Analysis_ConvertCsvListToNVarCharTableWithOrder'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = [value]
from #strings t
cross apply dbo.fn_Analysis_ConvertCsvListToNVarCharTableWithOrder(t.somestring,',');
print datediff(ms,#st,getdate());
go 5
print 'delimitedSplit8K (serial)'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = item
from #strings t
cross apply dbo.DelimitedSplit8K(t.somestring,',')
option (maxdop 1);
print datediff(ms,#st,getdate());
go 5
print 'delimitedSplit8K (parallel)'+char(10)+replicate('-',50);
go
declare #st datetime = getdate(), #item varchar(10);
select #item = item
from #strings t
cross apply dbo.DelimitedSplit8K(t.somestring,',')
option (recompile, querytraceon 8649);
print datediff(ms,#st,getdate());
go 5
Results
fn_Analysis_ConvertCsvListToNVarCharTableWithOrder
--------------------------------------------------
Beginning execution loop
4183
4274
4536
4294
4406
Batch execution completed 5 times.
delimitedSplit8K (serial)
--------------------------------------------------
Beginning execution loop
50
50
50
54
53
Batch execution completed 5 times.
delimitedSplit8K (parallel)
--------------------------------------------------
Beginning execution loop
133
134
133
140
136
Batch execution completed 5 times.
How you could use to solve your problem
declare #sometable table(someid int identity, someNbr tinyint);
insert #sometable values (1),(3),(6),(12),(7),(15),(19);
declare #searchstring varchar(1000) = '1,2,3,4,19';
select someid, someNbr
from #sometable t
cross apply dbo.DelimitedSplit8K(#searchstring,',') s
where t.someNbr = s.Item;
Results
someid someNbr
----------- -------
1 1
2 3
7 19

Parse from string specific values in SQL Server

I have a column with a very long string, and I need to be able to parse out specific values from the string (i.e. values 67-70 for the state name). Below is the (long) string I am working with. I am assuming I can use the Parsename function but I'm unsure of the syntax.
H0100343107000100000000000151750A P+++++++++++++++++1016 STANLEY YOUNG 17 SPRAYPOINT DRIVE POINT COOK FO000006140949525A N WEB SITE S 3030 00010VICTORIA 61409495255
You should use substring
SELECT SUBSTRING('w3resource',4,3);
will out put eso 4,3 means start from 4th position till next 3 characters
so in your case it will be
SELECT SUBSTRING(column_name,67,4);
This is all about MYSQL but MS SQL has the same function
SUBSTRING( string, start_position, length )
Please check this link
http://social.technet.microsoft.com/wiki/contents/articles/17948.t-sql-right-left-substring-and-charindex-functions.aspx
If you want to extract something from string you have two solutions within t-sql (no CLR):
By position
Splitting string using delimiter
1 - String functions which can be used by position are: SUBSTRING, LEFT, RIGHT
2 - There is no build in function for splitting string in t-sql based on delimiter. You can write your function to split it. Below is some splitting function:
CREATE FUNCTION [dbo].[Split]
(
#Text VARCHAR(MAX),
#Delimiter VARCHAR(100),
#Index INT
)
RETURNS VARCHAR(MAX)
AS BEGIN
DECLARE #A TABLE (ID INT IDENTITY, V VARCHAR(MAX));
DECLARE #R VARCHAR(MAX);
WITH CTE AS
(
SELECT 0 A, 1 B
UNION ALL
SELECT B, CONVERT(INT,CHARINDEX(#Delimiter, #Text, B) + LEN(#Delimiter))
FROM CTE
WHERE B > A
)
INSERT #A(V)
SELECT SUBSTRING(#Text,A,CASE WHEN B > LEN(#Delimiter) THEN B-A-LEN(#Delimiter) ELSE LEN(#Text) - A + 1 END) VALUE
FROM CTE WHERE A >0
SELECT #R
= V
FROM #A
WHERE ID = #Index + 1
RETURN #R
END

Compare two list items

I am trying to compare a database field which stores list items (comma separated) with unfortunately a variable which is also a list item.
Example:
In this case, a user can belong to multiple groups, and content access is also allocated to multiple groups.
contentid | group
(1) (c,d)
(2) (a,c)
(3) (b)
So, I need to select all content where user is in group (a,c). In this case, contentid 1,2 should be returned.
Here's a safe but slow solution for SQL 2008
BEGIN
-- setup
DECLARE #tbl TABLE (
[contentid] INT
,[group] VARCHAR(MAX)
)
INSERT INTO #tbl VALUES
(1, 'c,d')
,(2, 'a,c')
,(3, 'd')
-- send your request as simple xml
DECLARE #param XML
SET #param = '<g>a</g><g>c</g>'
-- query
SELECT DISTINCT contentid
FROM #tbl t
INNER JOIN #param.nodes('/g') AS t2(g)
ON ',' + t.[group] + ',' LIKE '%,' + t2.g.value('.', 'varchar(max)') + ',%'
END
You just pass your query in as an XML snippet instead of a comma separated list.
If your group names are single characters or you can be sure the names are not character-subsets of each other (ie: GroupA, GroupAB), then the query can be optimized to.
ON t.[group] LIKE '%' + t2.g.value('.', 'varchar(max)') + '%'
If you're using a RDBMS without XML parsing capability you'll have to use string split your query into a temp table and work it that way.
You really should not be using comma separated values inside your columns. It would be much better if the [group] column only contained one value and you had repeated entries with a UNIQUE constraint on the composite (contentid, group).
You might find this question and answer useful : How do I split a string so I can access item x?
Or you could always use something like this :
create function SplitString(
#string varchar(max),
#delimiter char(1)
)
returns #items table (item varchar(max))
as
begin
declare #index int set #index = 0
if (#delimiter is null) set #delimiter = ','
declare #prevdelimiter int set #prevdelimiter = 0
while (#index < len(#string)) begin
if (substring(#string, #index, 1) = #delimiter) begin
insert into #items
select substring(#string, #prevdelimiter, #index-#prevdelimiter)
set #prevdelimiter = #index + 1
end
set #index = #index + 1
end
--last item (or only if there were no delimiters)
insert into #items
select substring(#string, #prevdelimiter, #index - #prevdelimiter + 1)
return
end
go
declare #content table(contentid int, [group] varchar(max))
insert into #content
select 1, 'c,d'
union
select 2, 'a,c'
union
select 3, 'b'
declare #groups varchar(max) set #groups = 'a,c'
declare #grouptable table(item varchar(max))
insert into #grouptable
select * from dbo.SplitString(#groups, ',')
select * From #content
where (select count(*) from #grouptable g1 join dbo.SplitString([group], ',') g2 on g1.item = g2.item) > 0