SQL string split by text phrase with multiple words into rows - sql

Is there a better way to split strings with multiple text phrases in tSQL?
Scenario:
A questionnaire table with multiple answers complained into a single row.
Answers are category values that I know and some values are more than one word.
See below:
I'm looking for a string splitter that will also allow text typos.
So far I came up with this and it works, but I'm worried about the performance as I shouldn't use a loop in a SQL function.
It works by adding desired text phrases (up to x5 but can add more) and also picks up the typos.
CREATE function [sqlsplit]
(
#mainstring nvarchar(500),
#cat1 nvarchar(100),
#cat2 nvarchar(100),
#cat3 nvarchar(100),
#cat4 nvarchar(100),
#cat5 nvarchar(100)
--#cat6 nvarchar(100)
)
returns #t table
(
word varchar(500) not null
)
as begin
DECLARE #rows int = 1,
#item varchar(100)
while (#rows > 0)
BEGIN
if charindex(#cat1, lower(#mainstring)) = 1
BEGIN
set #mainstring = substring(#mainstring, len(#cat1) +2 , len(#mainstring) - len(#cat1) )
insert into #t values (#cat1)
end
else if charindex(#cat2, lower(#mainstring)) = 1
BEGIN
set #mainstring = substring(#mainstring, len(#cat2) +2 , len(#mainstring) - len(#cat2) )
insert into #t values (#cat2)
end
ELSE if charindex(#cat3, lower(#mainstring)) = 1
BEGIN
set #mainstring = substring(#mainstring, len(#cat3) +2 , len(#mainstring) - len(#cat3) )
insert into #t values (#cat3)
end
ELSE if charindex(#cat4, lower(#mainstring)) = 1
BEGIN
set #mainstring = substring(#mainstring, len(#cat4) +2 , len(#mainstring) - len(#cat4) )
insert into #t values (#cat4)
end
ELSE if charindex(#cat5, lower(#mainstring)) = 1
BEGIN
set #mainstring = substring(#mainstring, len(#cat5) +2 , len(#mainstring) - len(#cat5) )
insert into #t values (#cat5)
end
ELSE if CHARINDEX(' ', #mainstring, 1) = 0
BEGIN
insert into #t values (#mainstring)
set #mainstring =''
end
ELSE
BEGIN
insert into #t values (SUBSTRING(#mainstring, 1, CHARINDEX(' ', #mainstring, 1) ))
set #mainstring = substring(#mainstring, len((SUBSTRING(#mainstring, 1, CHARINDEX(' ', #mainstring, 1) +1 ))),len(#mainstring) )
end
if len(#mainstring) > 0
set #rows = 1
else
set #rows=0
END
return
end
select * from sqlsplit('Probably Probably Not related Not related Unlikely Probably Not related Not related Unlikely','Definitely','Probably','Possibly','Unlikely','Not related')

Related

Delimiter with a condition

I have a column (MarketID) in a table.
I have to derive a value out of it.
I have to check for occurrence of delimiter(.) in the second position and see if there are consecutive three numbers after the delimiter then get that value. If not check for occurrence of delimiter(.) in the fourth position and see if there are consecutive three numbers after the delimiter then get that value
else get 0.
1) In first record: '3.001.1.16', at the second position there is a delimiter(.) and consecutive 3 number exists (001), so my output would be 001..
2)In the second record '3.1.006.4.7',there is a delimiter at second position but we don't have three consecutive numbers so we check for the 4th position and there is a delimiter and consecutive three numbers exist so the output is 006 ..
3) no (.) delimiter so output=0.
create table dbo.SampleList
(
MarketID varchar(100)
)
insert into dbo.SampleList
select '3.001.1.16'
union all
select '3.1.006.4.7'
union all
select 'D16B000000:21109:4'
select * from dbo.SampleList
Assuming SQL Server from dbo, you could use a CASE statement:
SELECT MarketID,
CASE WHEN SUBSTRING(MarketID,2,1) = '.' AND TRY_CONVERT(int,SUBSTRING(MarketID,3,3)) IS NOT NULL THEN SUBSTRING(MarketID,3,3)
WHEN SUBSTRING(MarketID,4,1) = '.' AND TRY_CONVERT(int,SUBSTRING(MarketID,5,3)) IS NOT NULL THEN SUBSTRING(MarketID,5,3)
ELSE '0'
END
FROM #SampleList
TRY_CONVERT to int will verify that the 3 characters are numbers
Here's a solution using a function I've created a few years ago.
It allows you to split a string and get a table as a result.
CREATE FUNCTION [dbo].[splitStringToTable]
(
#List VARCHAR(MAX) ,
#Separator VARCHAR(MAX)
)
RETURNS #Results TABLE
(
ID INT
)
AS
BEGIN
SET #List = #List + ','
DECLARE #POS INT
DECLARE #TEMP VARCHAR(8000)
WHILE (Charindex(#Separator, #List)>0)
BEGIN
SET #POS = Charindex(#Separator, #List)
IF #POS > = 0
BEGIN
SET #TEMP = LEFT(#List, #POS-1)
IF #TEMP <> ''
INSERT INTO #Results (ID) VALUES (#TEMP)
SET #List = Substring(#List, Charindex(#Separator, #List)+len(#Separator), len(#List))
END
END
RETURN
END
GO
Usage:
SELECT *, ISNULL((SELECT TOP 1 ID FROM dbo.[splitStringToStringTable](MarketID, '.') WHERE LEN(ID) = 3), 0) AS Result
FROM SampleList
SELECT MarketID,
(CASE WHEN SUBSTRING(MarketID,2,1) = '.'
THEN
(CASE WHEN SUBSTRING(MarketID,6,1) = '.' THEN SUBSTRING (MarketID,3,3)
WHEN SUBSTRING(MarketID,4,1) = '.' THEN
(CASE WHEN SUBSTRING(MarketID ,8,1)='.' THEN SUBSTRING(MarketID,5,3) ELSE NULL END)ELSE NULL END)
WHEN MarketID NOT LIKE '%.%' THEN '0'
ELSE '0'
END ) AS Output
FROM dbo.SampleList

SQL: Return first non null value from the string

Problem : I want to retrieve the first non null value from the string, substring is separated with , as separator.
Scenario :
String 1 - ,1002682657
String 2 - 1002682683,
String 3 - ,,1002682684
String 4 - ,,,
String 5 - 1002682664,1002682663
Expected Result
ResultString 1 - 1002682657
ResultString 2 - 1002682683
ResultString 3 - 1002682684
ResultString 4 - null value
ResultString 5 - 1002682664
So to retrieve this I wrote function below is the script
CREATE FUNCTION [dbo].[Return_first_NonNull_Value_From_list]
(
#List NvarChar(MAX)
)
RETURNS NVarChar
AS
BEGIN
-- Declare the return variable here
DECLARE #ReturnListPart NvarChar(max)
DECLARE #Start INT
DECLARE #End INT
DECLARE #Length INT
DECLARE #Length_String INT
SET #Start = 1
SET #End = CHARINDEX(',',#List,#Start)
SET #Length = (#End - #Start) + 1
SET #Length_String= (#END-#Start)+1
SET #pos = 0
SET #nextpos = 1
WHILE #Start>0 and #End>0 and #Length_String>0
BEGIN
IF (SUBSTRING(#List, #Start, 1) <> '') AND (SUBSTRING(#List, #Start,2) <>'')
BEGIN
SET #ReturnListPart = SUBSTRING(#List,#Start,#Length)
SET #Length_String= LEN(#ReturnListPart)
IF #Length_String > 1
BEGIN
SET #Length_String =0
END
END
ELSE
BEGIN
-- Replace the string with null value if null
SET #List = LTRIM(RTRIM(STUFF(#List,#Start,#Length,'')))
SET #Length_String = LEN(#List)
END
END
RETURN RTRIM(LTRIM(#ReturnListPart))
END
But this function doesn't return the expected result. Could anyone please help me out in this?
With two assumptions from your question you could do this pretty easily. It looks like your numbers are all 10 characters long, and they only have numerics (no characters).
With this in mind you could just do this with a pattern match like so:
SELECT CASE WHEN [Value] LIKE '%[0-9]%' THEN SUBSTRING([Value], PATINDEX('%[0-9]%', [Value]), 10)
ELSE NULL
END [Value]
FROM [#Test]
We can discount any rows without numeric characters straight away and return null, the rest we look for the first numeric character and get the next 10 chars.
A full sample to run in sql server would be:
CREATE TABLE [#Test]
(
[Value] NVARCHAR(1000)
)
INSERT INTO [#Test] ( [Value] ) VALUES ( N',1002682657')
INSERT INTO [#Test] ( [Value] ) VALUES ( N'1002682683,')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',,1002682684')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',,,')
INSERT INTO [#Test] ( [Value] ) VALUES ( N',1002682664,1002682663')
SELECT CASE WHEN [Value] LIKE '%[0-9]%' THEN SUBSTRING([Value], PATINDEX('%[0-9]%', [Value]), 10)
ELSE NULL
END [Value]
FROM [#Test]
DROP TABLE [#Test]

Incrementing Character value in T-sql

I have 2 set of values in a column i.e first 4 character are characters and next 4 character are numeric.
Ex:AAAA1234
Now I have to increment the value from right end i.e when numeric value reached 9999 then I have to increment character by 1 character.
Sample :
Consider the last value stored in a column is AAAA9999 then next incremented values should be in a sequence AAAB9999,....... AABZ9999,..... BZZZ9999..... ZZZZ9999(last value). And when it reaches ZZZZ9999 then I have to reset the value to AAAA0001.
How can do it in T-SQL ???
Here is a conceptual script, which does what you want. You will need to tweak it to suit your requirements
DECLARE #test table(TestValue char(8))
DECLARE #CharPart char(4),#NumPart int
SET #CharPart = 'AAAA'
SET #NumPart = 1
WHILE #NumPart <=9999
BEGIN
INSERT INTO #test
SELECT #CharPart+RIGHT(('0000'+CAST(#NumPart AS varchar(4))),4)
IF #NumPart = 9999
BEGIN
IF SUBSTRING(#CharPart,4,1)<>'Z'
BEGIN
SET #CharPart = LEFT(#CharPart,3)+CHAR(ASCII(SUBSTRING(#CharPart,4,1))+1)
SET #NumPart = 1
END
ELSE IF SUBSTRING(#CharPart,4,1)='Z' AND SUBSTRING(#CharPart,3,1) <>'Z'
BEGIN
SET #CharPart = LEFT(#CharPart,2)+CHAR(ASCII(SUBSTRING(#CharPart,3,1))+1)+RIGHT(#CharPart,1)
SET #NumPart = 1
END
ELSE IF SUBSTRING(#CharPart,3,1)='Z' AND SUBSTRING(#CharPart,2,1) <>'Z'
BEGIN
SET #CharPart = LEFT(#CharPart,1)+CHAR(ASCII(SUBSTRING(#CharPart,2,1))+1)+RIGHT(#CharPart,2)
SET #NumPart = 1
END
ELSE IF SUBSTRING(#CharPart,1,1)<>'Z'
BEGIN
SET #CharPart = CHAR(ASCII(SUBSTRING(#CharPart,1,1))+1)+RIGHT(#CharPart,3)
SET #NumPart = 1
END
ELSE IF SUBSTRING(#CharPart,1,1)='Z'
BEGIN
SET #CharPart = 'AAAA'
SET #NumPart = 1
INSERT INTO #test
SELECT #CharPart+RIGHT(('0000'+CAST(#NumPart AS varchar(4))),4)
BREAK
END
END
ELSE
BEGIN
SET #NumPart=#NumPart+1
END
END
SELECT * FROM #test
With the help of PATINDEX,SUBSTRING,ASCII functions you can achieve your special cases.
(I have found the solution for your special cases). Likewise you can add your own addition feature.
create table #temp(col1 varchar(20))
insert into #temp values('AAAA9999')
insert into #temp values('AAAZ9999')
insert into #temp values('AAZZ9999')
insert into #temp values('AZZZ9999')
insert into #temp values('ZZZZ9999')
select * from #temp
select col1,
case when cast(substring(col1,patindex('%[0-9]%',col1),len(col1)) as int) = 9999 and left(col1,4) <> 'ZZZZ'
then
case
when substring(col1,(patindex('%[0-9]%',col1)-1),1) <> 'Z' then left(col1,3)+char(ASCII(substring(col1,(patindex('%[0-9]%',col1)-1),1)) + 1)+right(col1,4)
when substring(col1,(patindex('%[0-9]%',col1)-2),1) <> 'Z' then left(col1,2)+char(ASCII(substring(col1,(patindex('%[0-9]%',col1)-2),1)) + 1)+right(col1,5)
when substring(col1,(patindex('%[0-9]%',col1)-3),1) <> 'Z' then left(col1,1)+char(ASCII(substring(col1,(patindex('%[0-9]%',col1)-3),1)) + 1)+right(col1,6)
when substring(col1,(patindex('%[0-9]%',col1)-4),1) <> 'Z' then char(ASCII(substring(col1,(patindex('%[0-9]%',col1)-4),1)) + 1)+right(col1,7)
end
else 'AAAA0001'
end as outputofcol1
--patindex('%[0-9]%',col1)-1 as charpos,
--substring(col1,(patindex('%[0-9]%',col1)-1),1) as substr4,
--substring(col1,(patindex('%[0-9]%',col1)-2),1) as substr3,
--substring(col1,(patindex('%[0-9]%',col1)-3),1) as substr2,
--substring(col1,(patindex('%[0-9]%',col1)-4),1) as substr1
--ASCII(substring(col1,(patindex('%[0-9]%',col1)-1),1)) as ASC_value
from #temp
The following function should return the desired value:
IF OBJECT_ID (N'dbo.ufnGetIndexValue') IS NOT NULL
DROP FUNCTION dbo.ufnGetIndexValue;
GO
CREATE FUNCTION dbo.ufnGetIndexValue(#MainString CHAR(8))
RETURNS CHAR(8)
AS
BEGIN
DECLARE #NumberPart INT
DECLARE #StringPart CHAR(4)
DECLARE #Position TINYINT
DECLARE #char CHAR
SET #NumberPart=CONVERT(INT,SUBSTRING(#MainString,5,8))
SET #StringPart=SUBSTRING(#MainString,1,4)
IF #NumberPart=9999
BEGIN
SET #NumberPart=1111;
SET #Position=4
WHILE #Position >= 1
BEGIN
SET #char=SUBSTRING(#StringPart,#Position,1)
IF(#char!='Z')
BEGIN
SET #char=CHAR(ASCII(#char)+1);
SET #StringPart = STUFF(#StringPart,#Position,1,#char);
BREAK;
END
SET #StringPart = STUFF(#StringPart,#Position,1,'A');
SET #Position-=1;
END
END
ELSE
BEGIN
SET #NumberPart+=1;
END
SET #MainString=#StringPart+CAST(#NumberPart AS CHAR(4));
RETURN #MainString
END
GO
Here is a scalar select function that do the increment.
CREATE FUNCTION dbo.inc_serial( #id char(8) )
RETURNS char(8) BEGIN
select #id = case when SUBSTRING(id,2,1) <> '[' then id else STUFF( id, 1, 2, char(((ascii(id)+1-65)%26)+65) + 'A' ) end from (
select case when SUBSTRING(id,3,1) <> '[' then id else STUFF( id, 2, 2, char(ascii(right(id,7))+1) + 'A' ) end as id from (
select case when SUBSTRING(id,4,1) <> '[' then id else STUFF( id, 3, 2, char(ascii(right(id,6))+1) + 'A' ) end as id from (
select
case when right(#id,4) < '9999'
then concat( left(#id,4), right(concat( '000', (cast(right(#id,4) as smallint)+1) ), 4 ) )
else concat( left(#id,3), char(ascii(right(#id,5))+1), '0001' ) end as id
) t1 ) t2 ) t3
RETURN #id
END
Basically, the code just add one to the number, and repeatingly carring overflow up to the left.
If your table always has one and only one row to be updated (e.g. an option/flag table):
UPDATE [table] SET [serial] = dbo.inc_serial( [serial] );
If your table has multiple rows, you will need an identity or high precision creation time column, so that we know where to continue from after reset.
INSERT INTO [table] (serial) VALUES ( dbo.inc_serial((
select top 1 case when count(*) > 0 then max([serial]) else 'AAAA0000' end AS id
from [table] where [id] = ( select max([id]) from [table] )
)));
For concurrency safety, use XLOCK,ROWLOCK,HOLDLOCK to lock the table.
They are obmitted from the examples for simplicity.
If you do not like udf, you can embedded the query inline.
An inline example for first case:
UPDATE [table] SET [serial] = ((
select case when SUBSTRING(id,2,1) <> '[' then id else STUFF( id, 1, 2, char(((ascii(id)+1-65)%26)+65) + 'A' ) end as id from (
select case when SUBSTRING(id,3,1) <> '[' then id else STUFF( id, 2, 2, char(ascii(right(id,7))+1) + 'A' ) end as id from (
select case when SUBSTRING(id,4,1) <> '[' then id else STUFF( id, 3, 2, char(ascii(right(id,6))+1) + 'A' ) end as id from (
select
case when right(id,4) < '9999'
then concat( left(id,4), right(concat( '000', (cast(right(id,4) as smallint)+1) ), 4 ) )
else concat( left(id,3), char(ascii(right(id,5))+1), '0001' ) end as id
from (
select top 1 [serial] as id from [table] with (XLOCK,ROWLOCK,HOLDLOCK)
) t0
) t1 ) t2 ) t3
))
The function can also be written as an inline table value function for better performance, at cost of more complex usage, but I would not border unless it frequently runs on multiple rows.

To find a substring matching separated by commas

I have a table say "user"which is having a col "access" having multi values separated by comma.
and i have another table " codes" which has a column "SCRCODES" having some user codes as single valued.
so i need to check whether the multi values in the col "access" of the table "user" is having any of the values present in the "SCRCODES" col of the table "codes"
someone please advise on this.
Thanks
i think this will help you:
ALTER FUNCTION [dbo].[Split]
(
#RowData NVARCHAR(MAX) ,
#SplitOn NVARCHAR(5)
)
RETURNS #ReturnValue TABLE ( Data NVARCHAR(MAX) )
AS
BEGIN
DECLARE #Counter INT
SET #Counter = 1
WHILE ( CHARINDEX(#SplitOn, #RowData) > 0 )
BEGIN
INSERT INTO #ReturnValue
( data
)
SELECT Data = LTRIM(RTRIM(SUBSTRING(#RowData, 1,
CHARINDEX(#SplitOn,
#RowData) - 1)))
SET #RowData = SUBSTRING(#RowData,
CHARINDEX(#SplitOn, #RowData) + 1,
LEN(#RowData))
SET #Counter = #Counter + 1
END
INSERT INTO #ReturnValue
( data )
SELECT Data = LTRIM(RTRIM(#RowData))
RETURN
END;
GO
DECLARE #str VARCHAR(MAX)
SET #str = select access from users where oid = "1"
SELECT *
FROM codes c, users u where c.SCRCODES in dbo.Split(#str, ',')
I assume that your sercodes does not contain comma.
You can do something like this:
select sercodes from codes
inner join users
on user.codeid = codes.codeid
where charindex(sercodes + ',', access) > 0 or charindex(',' + sercodes , access) > 0
The idea is that access will be stored like this way "read, write, execute". So, it will be either end with comma or start with comma and part of the string..
Please let me know whether it is working. You can give actual table data and design to get more accurate query.

How to parse String field in SQL Server 2008 if that String is in csv format

I have a string field in which csv row is inserted
'6 33','318011385','3183300153','Z','21.11.2011 13:33:22','51','51','2','0','032425','','','','','8 50318011100 318069332','','21.11.2011','21.11.2011','','0','','','GOT','0','0','0','0','0','0','0','0','0','0','0','21.11.2011','4','','','','','','','','','','','','',''
I need to extract several fields from this csv format using t-sql.
My main approach was to count colons (,) and based on the colon num to parse the data between two colons:
select min(SUBSTRING(field,charindex(''',''',recorddata,charindex(''',''',recorddata)+1)+3,CHARINDEX(''',''',field,charindex(''',''',field,charindex(''',''',field)+1)+3) - (charindex(''',''',field,charindex(''',''',field)+1)+3))) as fld from TBLSYNCEXPORT where SUBSTRING(field,2,CHARINDEX(''',''',field,0)-2) = #type and substring(field,CHARINDEX(''',''',field)+3,3) = #person and SUBSTRING(field,charindex(''',''',field,charindex(''',''',field)+1)+3,CHARINDEX(''',''',field,charindex(''',''',field,charindex(''',''',field)+1)+3) - (charindex(''',''',field,charindex(''',''',field)+1)+3)) > #prev_type
is there a better method that this one?
If you prefer a more clear way, at least for me, you can do something like this:
CREATE TABLE #destination_table(
value varchar(10)
)
DECLARE #position INT
DECLARE #source_string VARCHAR( 1000 )
SELECT #source_string = "'6 33','318011385','3183300153','Z','21.11.2011 13:33:22','51','51','2','0','032425','','','','','8 50318011100 318069332','','21.11.2011','21.11.2011','','0','','','GOT','0','0','0','0','0','0','0','0','0','0','0','21.11.2011','4','','','','','','','','','','','','',''"
SELECT #position = CHARINDEX(',', #source_string )
WHILE #position <> 0
BEGIN
INSERT INTO #destination_table VALUES( LEFT( #source_string, #position-1 ) )
SELECT #source_string = STUFF( #source_string, 1, #position, NULL )
SELECT #position = CHARINDEX(',', #source_string )
END
INSERT INTO #destination_table VALUES( #source_string)
SELECT * FROM #destination_table
-- or select what you need
select value from #destination_table where id = 2
drop table #destination_table
It'll insert the different values in a table and then you can choose the needed values.