How to split comma separated text in MySQL stored procedure - sql

How to split comma separated text (list of IDs) in MySQL stored procedure to use result in SQL "IN" statement.
SELECT * FROM table WHERE table.id IN (splitStringFunction(commaSeparatedData, ','));

This is simple as hell for MySQL:
SELECT * FROM table WHERE FIND_IN_SET(table.id, commaSeparatedData);
Reference: http://dev.mysql.com/doc/refman/5.0/en/string-functions.html#function_find-in-set

You could use a prepared statement inside the stored procedure to achieve this. You can create the whole select query as a string inside a variable and then concatenate in the comma delimited string into its IN clause. Then you can make a prepared statement from the query string variable and execute it.
DELIMITER ;;
create procedure testProc(in listString varchar(255))
BEGIN
set #query = concat('select * from testTable where id in (',listString,');');
prepare sql_query from #query;
execute sql_query;
END
;;
DELIMITER ;
call testProc("1,2,3");

You could try this MySql example. Before you use it, put some type safety checks in there (i.e. check id is integer, or match against regular expression before insert).
# BEGIN split statements ids
DECLARE current_pos INT DEFAULT 1;
DECLARE delim CHAR DEFAULT ',';
DECLARE current CHAR DEFAULT '';
DECLARE current_id VARCHAR(100) DEFAULT '';;
CREATE TEMPORARY TABLE ids (`id` VARCHAR(100));
split_ids: LOOP
SET current = MID(statement_ids, current_pos, 1);
IF (current_pos = LENGTH(statement_ids)) THEN
IF current != delim THEN SET current_id = CONCAT(current_id,current); END IF;
INSERT INTO ids(id) VALUES (current_id);
LEAVE split_ids;
END IF;
IF current = delim THEN
INSERT INTO ids(id) VALUES (current_id);
SET current_id = '';
ELSE
SET current_id = CONCAT(current_id,current);
END IF;
SET current_pos = current_pos+1;
END LOOP split_ids;
# END split statement ids
# to check ids are correct
SELECT * FROM ids;
# to use the ids:
SELECT * FROM statements WHERE id IN (SELECT id FROM ids);

OK, slightly "easier" but less geeky way for people like me:
say you have one table 'combined_city_state' which looks like:
'Chicago, Illinois'
copy that to 2 other tables:
CREATE TABLE city LIKE combined_city_state;
INSERT city SELECT * FROM combined_city_state;
CREATE TABLE state LIKE combined_city_state;
INSERT state SELECT * FROM combined_city_state;
You now have 3 tables with the same data as 'combined_city_state'.
Install this function:
CREATE FUNCTION SPLIT_STR(
x VARCHAR(255),
delim VARCHAR(12),
pos INT
)
RETURNS VARCHAR(255)
RETURN REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos),
LENGTH(SUBSTRING_INDEX(x, delim, pos -1)) + 1),
delim, '');
Then apply this to each table to remove the extra index of data:
UPDATE firms
SET city = (SELECT SPLIT_STR((city), ',', 1));
UPDATE firms
SET state = (SELECT SPLIT_STR((state), ',', 2));
This leaves you with one column of just cities, one of just states. You can now remove the original 'combined_city_state' column if you don't need anymore.

You can do it two ways:
SQL Library
Natively with REGEXP

I'm surprised this one-liner isn't properly mentioned here:
SELECT * FROM table
WHERE id in (SELECT convert(int,Value) FROM dbo.Split(#list_string,',')
All you need is a Split SQL function like the one below which will come in handy in other ways as well:
CREATE FUNCTION dbo.Split
(
#List nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Value nvarchar(100)
)
AS
BEGIN
While (Charindex(#SplitOn,#List)>0)
Begin
Insert Into #RtnValue (value)
Select
Value = ltrim(rtrim(Substring(#List,1,Charindex(#SplitOn,#List)-1)))
Set #List = Substring(#List,Charindex(#SplitOn,#List)+len(#SplitOn),len(#List))
End
Insert Into #RtnValue (Value)
Select Value = ltrim(rtrim(#List))
Return
END

You can use find_in_set() function for collection filter
how-to-split-and-search-in-comma-separated-values-in-mysql
SELECT * FROM table WHERE find_in_set(table.id,commaSeparatedData) > 0;

I have parsed data with hyphens in it. The example below uses a fixed text string to demonstrate, just change the references to relevant column names in the table. I played for ages with a way to ensure it worked on codes with varying numbers of components and in the end decided to add the where clause. Most data you are trying to parse would have a fixed number of columns.
select
SUBSTRING_INDEX(TS,"-",1) as "1",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",2)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",2)))-1)) as "2",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",3)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",3)))-1)) as "3",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",4)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",4)))-1)) as "4",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",5)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",5)))-1)) as "5",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",6)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",6)))-1)) as "6",reverse(left(reverse(SUBSTRING_INDEX(TS,"-",7)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",7)))-1)) as "7",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",8)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",8)))-1)) as "8",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",9)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",9)))-1)) as "9",
reverse(left(reverse(SUBSTRING_INDEX(TS,"-",10)),locate("-",reverse(SUBSTRING_INDEX(TS,"-",10)))-1)) as "10"
from (select "aaa-bbb-ccc-ddd-eee-fff-ggg-hhh-iii-jjj" as TS) as S
where (LENGTH(TS)-LENGTH(REPLACE(TS,'-',''))) =9

A bit strange but:
SET #i = 1;
set #str = 'a,b,c,d,e,f,g,h';
select temp.length into #length from
(select
ROUND(
(
LENGTH(dt.data)
- LENGTH( REPLACE (dt.data, ",", "") )
) / LENGTH(",")
)+1 AS length
from (select #str as data) dt
) temp;
SET #query = CONCAT('select substring_index(
substring_index(#str, '','', seq),
'','',
-1
) as letter from seq_', #i, '_to_',#length);
PREPARE q FROM #query;
EXECUTE q;

Related

How to select from text list randomly?

I'm trying to build SQL function that I can use as a default value for a column. The function is about selecting an avatar image path randomly if the user didn't assign an image.
I have tried to but a completely wrong example to just approach the image not the solution
what I need to do is something like this
select top 1 from "avatar1,png, avatar2,png, avatar3.png, avatar4.png, avatar5.png" order by rand();
and I will convert it to a function like this
CREATE FUNCTION dbo.ReturnAvatar()
RETURNS nvarchar(100)
AS
BEGIN
DECLARE #ret nvarchar(100);
SET #ret = (select top 1 from "avatar1,png, avatar2,png, avatar3.png, avatar4.png, avatar5.png" as tbl order by rand());
RETURN #ret;
END;
this is just to explain the idea that I'm not able to apply. I don't know if SQL server has something like this or not.
Here is one way:
CREATE VIEW getNewID AS SELECT newid() as new_id
CREATE FUNCTION dbo.ReturnAvatar()
RETURNS nvarchar(100)
AS
BEGIN
DECLARE #ret nvarchar(100);
SET #ret = (SELECT TOP 1 value
FROM
STRING_SPLIT('avatar1.png,avatar2.png,avatar3.png,avatar4.png,avatar5.png', ',')
ORDER BY (SELECT new_id FROM getNewID));
RETURN #ret;
END;
Note that your current CSV string of filenames does not seem proper, because comma does not indicate the start of the extension in either Windows or Linux. So, I have assumed dot everywhere. In addition, if you want to use STRING_SPLIT, you may only split on a single character. Therefore, I assume that comma will be the delimiter here.
You do not need to create a table at all. Simply put the number inside your string and choose the number randomly:
select 'avatar'+str(round(rand()*5+1,0))+'.png'
would be fine.
Put that into your function and you are all set.
rand() produces 0..1(excl.) so you can simply multiply it by 5 and add 1 to get your range of 1...5
Demo: http://sqlfiddle.com/#!18/9eecb/82866
Documentation:
ROUND ( numeric_expression , length [ ,function ] )
STR ( float_expression [, length [, decimal]])
rand(seed)
So essentially you could boil it down to:
select 'avatar'+ltrim(str(rand()*5+1,20,0))+'.png'
with
ltrim(string) taking care of the space
create function dbo.ReturnAvatar(#uid uniqueidentifier, #avatars int = 10)
returns varchar(100)
as
begin
return ('avatar' + cast(abs(checksum(#uid)) % isnull(abs(#avatars), 10)+1 as varchar(100)) + '.png')
end
go
create table myusers
(
username varchar(50),
theavatar varchar(100) default( dbo.ReturnAvatar(newid(), default))
);
insert into myusers(username)
select top (10000) 'user' + cast(row_number() over(order by(select null)) as varchar(50))
from master.dbo.spt_values as a
cross join master.dbo.spt_values as b;
go
select theavatar, count(*)
from myusers
group by theavatar;
go
drop table myusers;

How to replace all special characters in string

I have a table with the following columns:
dbo.SomeInfo
- Id
- Name
- InfoCode
Now I need to update the above table's InfoCode as
Update dbo.SomeInfo
Set InfoCode= REPLACE(Replace(RTRIM(LOWER(Name)),' ','-'),':','')
This replaces all spaces with - & lowercase the name
When I do check the InfoCode, I see there are Names with some special characters like
Cathe Friedrich''s Low Impact
coffeyfit-cardio-box-&-burn
Jillian Michaels: Cardio
Then I am manually writing the update sql against this as
Update dbo.SomeInfo
SET InfoCode= 'cathe-friedrichs-low-impact'
where Name ='Cathe Friedrich''s Low Impact '
Now, this solution is not realistic for me. I checked the following links related to Regex & others around it.
UPDATE and REPLACE part of a string
https://www.codeproject.com/Questions/456246/replace-special-characters-in-sql
But none of them is hitting the requirement.
What I need is if there is any character other [a-z0-9] replace it - & also there should not be continuous -- in InfoCode
The above Update sql has set some values of InfoCode as the-dancer's-workout®----starter-package
Some Names have value as
Sleek Technique™
The Dancer's-workout®
How can I write Update sql that could handle all such special characters?
Using NGrams8K you could split the string into characters and then rather than replacing every non-acceptable character, retain only certain ones:
SELECT (SELECT '' + CASE WHEN N.token COLLATE Latin1_General_BIN LIKE '[A-z0-9]'THEN token ELSE '-' END
FROM dbo.NGrams8k(V.S,1) N
ORDER BY position
FOR XML PATH(''))
FROM (VALUES('Sleek Technique™'),('The Dancer''s-workout®'))V(S);
I use COLLATE here as on my default collation in my instance the '™' is ignored, therefore I use a binary collation. You may want to use COLLATE to switch the string back to its original collation outside of the subquery.
This approach is fully inlinable:
First we need a mock-up table with some test data:
DECLARe #SomeInfo TABLE (Id INT IDENTITY, InfoCode VARCHAR(100));
INSERT INTO #SomeInfo (InfoCode) VALUES
('Cathe Friedrich''s Low Impact')
,('coffeyfit-cardio-box-&-burn')
,('Jillian Michaels: Cardio')
,('Sleek Technique™')
,('The Dancer''s-workout®');
--This is the query
WITH cte AS
(
SELECT 1 AS position
,si.Id
,LOWER(si.InfoCode) AS SourceText
,SUBSTRING(LOWER(si.InfoCode),1,1) AS OneChar
FROM #SomeInfo si
UNION ALL
SELECT cte.position +1
,cte.Id
,cte.SourceText
,SUBSTRING(LOWER(cte.SourceText),cte.position+1,1) AS OneChar
FROM cte
WHERE position < DATALENGTH(SourceText)
)
,Cleaned AS
(
SELECT cte.Id
,(
SELECT CASE WHEN ASCII(cte2.OneChar) BETWEEN 65 AND 90 --A-Z
OR ASCII(cte2.OneChar) BETWEEN 97 AND 122--a-z
OR ASCII(cte2.OneChar) BETWEEN 48 AND 57 --0-9
--You can easily add more ranges
THEN cte2.OneChar ELSE '-'
--You can easily nest another CASE to deal with special characters like the single quote in your examples...
END
FROM cte AS cte2
WHERE cte2.Id=cte.Id
ORDER BY cte2.position
FOR XML PATH('')
) AS normalised
FROM cte
GROUP BY cte.Id
)
,NoDoubleHyphens AS
(
SELECT REPLACE(REPLACE(REPLACE(normalised,'-','<>'),'><',''),'<>','-') AS normalised2
FROM Cleaned
)
SELECT CASE WHEN RIGHT(normalised2,1)='-' THEN SUBSTRING(normalised2,1,LEN(normalised2)-1) ELSE normalised2 END AS FinalResult
FROM NoDoubleHyphens;
The first CTE will recursively (well, rather iteratively) travers down the string, character by character and a return a very slim set with one row per character.
The second CTE will then GROUP the Ids. This allows for a correlated sub-query, where the actual check is performed using ASCII-ranges. FOR XML PATH('') is used to re-concatenate the string. With SQL-Server 2017+ I'd suggest to use STRING_AGG() instead.
The third CTE will use a well known trick to get rid of multiple occurances of a character. Take any two characters which will never occur in your string, I use < and >. A string like a--b---c will come back as a<><>b<><><>c. After replacing >< with nothing we get a<>b<>c. Well, that's it...
The final SELECT will cut away a trailing hyphen. If needed you can add similar logic to get rid of a leading hyphen. With v2017+ There was TRIM('-') to make this easier...
The result
cathe-friedrich-s-low-impact
coffeyfit-cardio-box-burn
jillian-michaels-cardio
sleek-technique
the-dancer-s-workout
You can create a User-Defined-Function for something like that.
Then use the UDF in the update.
CREATE FUNCTION [dbo].LowerDashString (#str varchar(255))
RETURNS varchar(255)
AS
BEGIN
DECLARE #result varchar(255);
DECLARE #chr varchar(1);
DECLARE #pos int;
SET #result = '';
SET #pos = 1;
-- lowercase the input and remove the single-quotes
SET #str = REPLACE(LOWER(#str),'''','');
-- loop through the characters
-- while replacing anything that's not a letter to a dash
WHILE #pos <= LEN(#str)
BEGIN
SET #chr = SUBSTRING(#str, #pos, 1)
IF #chr LIKE '[a-z]' SET #result += #chr;
ELSE SET #result += '-';
SET #pos += 1;
END;
-- SET #result = TRIM('-' FROM #result); -- SqlServer 2017 and beyond
-- multiple dashes to one dash
WHILE #result LIKE '%--%' SET #result = REPLACE(#result,'--','-');
RETURN #result;
END;
GO
Example snippet using the function:
-- using a table variable for demonstration purposes
declare #SomeInfo table (Id int primary key identity(1,1) not null, InfoCode varchar(100) not null);
-- sample data
insert into #SomeInfo (InfoCode) values
('Cathe Friedrich''s Low Impact'),
('coffeyfit-cardio-box-&-burn'),
('Jillian Michaels: Cardio'),
('Sleek Technique™'),
('The Dancer''s-workout®');
update #SomeInfo
set InfoCode = dbo.LowerDashString(InfoCode)
where (InfoCode LIKE '%[^A-Z-]%' OR InfoCode != LOWER(InfoCode));
select *
from #SomeInfo;
Result:
Id InfoCode
-- -----------------------------
1 cathe-friedrichs-low-impact
2 coffeyfit-cardio-box-burn
3 jillian-michaels-cardio
4 sleek-technique-
5 the-dancers-workout-

Create temp table from comma separated values with more than one column

I'm trying to pass a comma separated string such as this: "101:string1,102:string2" into a stored proc and create a temp table out of it. The temp table would have two columns, one integer and one string. It would have two rows for this example. The comma delimits the rows, and the colons delimit the two columns. Anyone know how I can do this? I'm using sql server. Thanks in advance!
EDIT: By the way, I'm not asking about how to create a temp table, only how to create the function.
You can try a Table-Valued Function instead of a temp table. Something like this:
CREATE FUNCTION ListToTable
(
#list nvarchar(4000)
)
RETURNS #return TABLE
(
n int,
s nvarchar(15)
)
AS
BEGIN
SET #list = NULLIF(ltrim(rtrim(#list)),'')
DECLARE #xml AS XML = CAST('<root><row><n>' +
REPLACE(REPLACE(#list,
',', '</s></row><row><n>'),
':', '</n><s>') +
'</s></row></root>' AS XML) ;
INSERT INTO #return (n, s)
SELECT root.row.value('n[1]', 'int')
, root.row.value('s[1]', 'nvarchar(4000)')
FROM #xml.nodes('/root/row') as root(row)
RETURN
END
Usage:
select * from dbo.ListToTable('101:string1,102:string2')
Output:
n s
----------- ---------------
101 string1
102 string2

ESCAPE SEQUENCE NOT SUPPORT IN SQL

My Table Contains
Id(int) name(nvarchar(300)) path(nvarchar(3000))
--------------------------------------------------------------
8 Subunit1_1 વસૂલાત/SubUnit/!##$%^&*()_+{}|:"<>?,.;'[]\-=
my Query:
select * from tbl1 where Path = 'વસૂલાત/SubUnit/!##$%^&*()_+{}|:"<>?,.;''[]\-='
I am Getting Empty Table.backslash and single quotes are used.
Use N prefix in your search string something like this...
select * from tbl1
where Path = N'વસૂલાત/SubUnit/!##$%^&*()_+{}|:"<>?,.;''[]\-='
Because you have these unicode characters in your strings, you need to tell sql server explicitly that string may contain some unicode character by prefixing it with N.
Same is true when you are inserting, updating unicode data in sql server.
Your Updated Stored Procedure
CREATE PROCEDURE [dbo].[spSCS_ManageOrgunits]
#DomainId int,
#orgunitpath nvarchar(3000),
#iDisplayStart int,
#iDisplayLength int
AS
BEGIN
SET NOCOUNT ON;
IF #orgunitpath = ''
BEGIN
SELECT a.[row],a.OrgUnitId,a.did,a.OrgUnitName,a.OrgUnitPath,a.ScheduledStatus,a.AutoSyncStatus
FROM
(
SELECT ROW_NUMBER() OVER (ORDER BY OrgUnit_tbl.OrgUnitId) AS row,OrgUnitId,did,OrgUnitName,OrgUnitPath,ScheduledStatus,AutoSyncStatus
FROM OrgUnit_tbl
WHERE did = #DomainId AND OrgUnitPath = #orgunitpath
) AS a
WHERE a.[row] >= #iDisplayStart AND a.[row] < #iDisplayStart+#iDisplayLength
END
ELSE
BEGIN
SELECT OrgUnitId,did,OrgUnitName,OrgUnitPath,ScheduledStatus,AutoSyncStatus
FROM OrgUnit_tbl
WHERE did = #DomainId AND OrgUnitPath = #orgunitpath
END
END

SQL How to find if all values from one field exist in another field in any order

I am trying to match data from an external source to an in house source. For example one table would have a field with a value of "black blue" and another table would have a field with a value of "blue black". I am trying to figure out how to check if all individual words in the first table are contained in a record the 2nd table in any order. It's not always two words that need to be compared it could be 3 or 4 as well. I know I could use a cursor and build dynamic sql substituting the space with the AND keywod and using the contains function but I'm hoping not to have to do that.
Any help would be much appreciated.
Try doing something like this: Split the data from the first table on the space into a temporary table variable. Then use CHARINDEX to determine if each word is contained in the second table's record. Then just do this for each word in the first record and if the count is the same as the successful checks then you know every word from the first record is used in the second.
Edit: Use a Split function such as:
CREATE FUNCTION dbo.Split (#sep char(1), #s varchar(512))
RETURNS table
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END) AS s
FROM Pieces
)
Here's another method you could try, you could sample some simple attributes of your strings such as, length, number of spaces, etc.; then you could use a cross-join to create all of the possible string match combinations.
Then within your where-clause you can sort by matches, the final piece of which in this example is a check using the patindex() function to see if the sampled piece of the first string is in the second string.
-- begin sample table variable set up
declare #s table(
id int identity(1,1)
,string varchar(255)
,numSpace int
,numWord int
,lenString int
,firstPatt varchar(255)
);
declare #t table(
id int identity(1,1)
,string varchar(255)
,numSpace int
,numWord int
,lenString int
);
insert into #t(string)
values ('my name');
insert into #t(string)
values ('your name');
insert into #t(string)
values ('run and jump');
insert into #t(string)
values ('hello my name is');
insert into #s(string)
values ('name my');
insert into #s(string)
values ('name your');
insert into #s(string)
values ('jump and run');
insert into #s(string)
values ('my name is hello');
update #s
set numSpace = len(string)-len(replace(string,' ',''));
update #s
set numWord = len(string)-len(replace(string,' ',''))+1;
update #s
set lenString = len(string);
update #s
set firstPatt = rtrim(substring(string,1,charindex(' ',string,0)));
update #t
set numSpace = len(string)-len(replace(string,' ',''));
update #t
set numWord = len(string)-len(replace(string,' ',''))+1;
update #t
set lenString = len(string);
-- end sample table variable set up
-- select all combinations of strings using a cross join
-- and sort the entries in your where clause
-- the pattern index checks to see if the sampled string
-- from the first table variable is in the second table variable
select *
from
#s s cross join #t t
where
s.numSpace = t.numspace
and s.numWord = t.numWord
and s.lenString = t.lenString
and patindex('%'+s.firstPatt+'%',t.string)>0;