Can I tokenize a string using t-SQL

Can I tokenize a string using t-SQL - sql

I was wondering if I have SQL Server 2008 table that was created like this:
CREATE TABLE tbl (id INT PRIMARY KEY,
dvt NVARCHAR(32),
d0 TINYINT,
d1 TINYINT,
d2 TINYINT);
INSERT INTO tbl (id, dvt, d0, d1, d2)
VALUES(1, '1', NULL, NULL, NULL);
INSERT INTO tbl (id, dvt, d0, d1, d2)
VALUES(2, '', NULL, NULL, NULL);
INSERT INTO tbl (id, dvt, d0, d1, d2)
VALUES(3, '2,5', NULL, NULL, NULL);
INSERT INTO tbl (id, dvt, d0, d1, d2)
VALUES(4, '13, 34, 45, 5', NULL, NULL, NULL);
INSERT INTO tbl (id, dvt, d0, d1, d2)
VALUES(5, '1,8, 10', NULL, NULL, NULL);
I need to take the string from the 'dvt' column and split it into 'd0', 'd1' and 'd2' columns. The 'dvt' value can be separated by commas.
I can do this using C# and a tokenization function but I was wondering if it's possible to do the same using SQL?
Columns BEFORE:
1, "1", NULL, NULL, NULL
2, "", NULL, NULL, NULL
3, "2,5", NULL, NULL, NULL
4, "13, 34, 45, 5", NULL, NULL, NULL
5, "1,8, 10", NULL, NULL, NULL
Columns AFTER:
1, "1", 1, NULL, NULL
2, "", NULL, NULL, NULL
3, "2,5", 2, 5, NULL
4, "13, 34, 45, 5", 13, 34, 45 -- 5 is discarded
5, "1,8, 10", 1, 8, 10

The main problem with this type of code is re-use of calculations.
SQL Server is good at caching results (If you type the exact same CHARINDEX() caluculation 5 times, it only calculates once and re-uses that result 4 times).
That's little consolation for the poor coder who has to type or maintain that code though.
SQL Server 2005 onward has CROSS APPLY that does help somewhat. The logic is repeated, but the results can be referenced repeatedly, rather that the calculation typed repeatedly.
SELECT
*,
SUBSTRING(dvt, 1, ISNULL(comma1.pos-1, LEN(dvt)) ) AS item1,
SUBSTRING(dvt, comma1.pos+1, ISNULL(comma2.pos-1, LEN(dvt))-comma1.pos) AS item2,
SUBSTRING(dvt, comma2.pos+1, ISNULL(comma3.pos-1, LEN(dvt))-comma2.pos) AS item3
FROM
(
SELECT 'ab,c,def,hij' AS dvt
UNION ALL
SELECT 'xyz,abc' AS dvt
)
AS data
OUTER APPLY
(SELECT NULLIF(CHARINDEX(',', data.dvt, 1 ), 0) AS pos ) AS comma1
OUTER APPLY
(SELECT NULLIF(CHARINDEX(',', data.dvt, comma1.pos+1), 0) AS pos WHERE comma1.pos > 0) AS comma2
OUTER APPLY
(SELECT NULLIF(CHARINDEX(',', data.dvt, comma2.pos+1), 0) AS pos WHERE comma2.pos > 0) AS comma3
OUTER APPLY
(SELECT NULLIF(CHARINDEX(',', data.dvt, comma3.pos+1), 0) AS pos WHERE comma3.pos > 0) AS comma4
Another option is to simply write a table valued user defined function that does this (even when the result of the function is always one row). Then you simply CROSS APPLY that function.

Try something like this
;WITH Vals AS (
SELECT id,
dvt,
CAST('<r>'+REPLACE(dvt,',','</r><r>')+'</r>' AS XML).query('/r[1]').value('.','varchar(max)') d1,
CAST('<r>'+REPLACE(dvt,',','</r><r>')+'</r>' AS XML).query('/r[2]').value('.','varchar(max)') d2,
CAST('<r>'+REPLACE(dvt,',','</r><r>')+'</r>' AS XML).query('/r[3]').value('.','varchar(max)') d3
FROM tbl
)
SELECT id,
dvt,
CASE WHEN d1 = '' THEN NULL ELSE d1 END d1,
CASE WHEN d2 = '' THEN NULL ELSE d2 END d2,
CASE WHEN d3 = '' THEN NULL ELSE d3 END d3
FROM Vals

It's possible.
You could do it with some repetitive calls to CHARINDEX and checking for nulls but it may be better and clearer to write a FUNCTION to split the string.

I needed String tokenizer for Sybase; separation by 1 or more spaces in name data
Name date clean and has no commas or other special characters
declare #test varchar(60)
select #test=str_replace(lower(rtrim('Jayanta Narayan Choudhuri'))," ",",")
exec sp_splitwords #test
This is based on a neat hint by Kenny Lucas from http://www.sql9.com/?id=102
drop proc sp_splitwords
go
create proc sp_splitwords(#instr varchar(80)) as
begin
declare #pos int,
#word varchar(80),
#list varchar(81)
create table #words(word varchar(80))
select #list = #instr + ','
set #pos = patindex('%,,%',#list)
while #pos > 0
begin
select #list = str_replace(#list,',,',',')
set #pos = patindex('%,,%',#list)
end
set #pos = patindex('%,%',#list)
while #pos > 0
begin
set #word = substring(#list, 1,#pos-1)
set #list = substring(#list, #pos+1,len(#list)-#pos)
if NOT( #word is null OR LEN(#word) = 0 )
insert into #words (word) values (#word)
set #pos = patindex('%,%',#list)
end
select * from #words
order by len(word) desc
drop table #words
end
I could port Metaphone SQL Function to Sybase
http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=125724
Sybase allows recursion in functions from a beautiful workaround
http://www.sypron.nl/quiz2008a.html#jan08
CREATE FUNCTION Metaphone2 (#str VARCHAR(100))
RETURNS VARCHAR(25) AS
BEGIN
RETURN #str
END
DROP FUNCTION Metaphone2
GO
CREATE FUNCTION Metaphone2 (#str VARCHAR(100))
RETURNS VARCHAR(25) AS
BEGIN
RETURN dbo.Metaphone(#str)
END
Changed 1 line of Function pasted from http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=125724
Combination of metaphone and string tokenizer means I can fuzzy search names
first middle and surname and rotations thereof

Related

sql SERVER - distinct selection based on priority columns

hello I would like to find a solution to solve my problem in a single request if possible.
For the moment I take all the records then I go through the lines one by one to eliminate what I don't want.
I have 2 tables : first table with links
the second with the prefered label for the url
the second table must be consulted keeping only the row with maximum priority
priority rules are
the current user then
the user group and finally
everyone.
if the hidden column is true, exclude any reference to the url
here is the expected result.
Unfortunately, I don't see any other solution than to multiply the conditions on several selects and unions.
if you have a idea to solve my problem, thank you in advance for your help

It appears as though you can rely on pref_id for the preference ordering, correct? If so, you could try:
SELECT *
FROM table2
INNER JOIN table1 ON table2.url_id = table1.url_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY table1.url
ORDER BY pref_id ASC
) = 1
This will partition by the url and then provide only the one with lowest pref_id.
I didn't test this SQL as I wasn't sure which RDBMS you're running on, but I used Rasgo to translate the SQL.

maybe of interest in this tricky query:
select so.*, table1.url from
(select distinct t.url_id,
(select pref_id from table2 s where s.url_id = t.url_id order by "user" is null, "group" is null limit 1) pref_id
from table2 t
where not exists(select 1 from table2 s where s.hide and s.url_id = t.url_id)
) ids
join table2 so on so.pref_id = ids.pref_id
join table1 ON table1.url_id = ids.url_id
order by so.url_id;

here is my solution but i think there is better to do.
in the condition's select, I built a column which gives a level note according to the priorities
DECLARE #CUR_USER VARCHAR(10) = 'ROBERT'
DECLARE #CUR_GROUP VARCHAR(10) = 'DEV'
DECLARE #TABLE1 TABLE (
URL_ID INT
,URLNAME VARCHAR(100)
);
DECLARE #TABLE2 TABLE (
PREF_ID INT
,URL_ID INT
,FAVORITE_LABEL VARCHAR(100)
,USER_GROUP VARCHAR(10)
,USER_CODE VARCHAR(10)
,HIDE_URL DECIMAL(1, 0) DEFAULT 0
);
INSERT INTO #TABLE1
VALUES
(1, 'https://stackoverflow.com/')
,(2, 'https://www.microsoft.com/')
,(3, 'https://www.apple.com/')
,(4, 'https://www.wikipedia.org/')
;
INSERT INTO #TABLE2
VALUES
(1000, 1, 'find everything', NULL, 'ROBERT', 0)
,(1001, 1, 'a question ? find the answer here', 'DEV', NULL, 0)
,(1002, 1, 'StackOverFlow', NULL, NULL, 0)
,(1003, 2, 'Windows', 'DEV', NULL, 0)
,(1004, 2, 'Microsoft', NULL, NULL, 0)
,(1005, 3, 'Apple', NULL, NULL, 0)
,(1006, 4, 'Free encyclopedia', NULL, 'ROBERT', 1)
,(1007, 4, 'Wikipedia', NULL, NULL, 0)
,(1008, 1, 'StackOverFlow FOR MAT', 'MAT', NULL, 0)
,(1009, 2, 'Microsoft FOR MAT', 'MAT', NULL, 0)
,(1010, 3, 'Apple', 'MAT', NULL, 1)
,(1011, 4, 'Wikipedia FOR MAT', 'MAT', NULL, 0)
,(1012, 1, 'StackOverFlow', NULL, 'JEAN', 1)
,(1013, 2, 'Microsoft ', NULL, 'JEAN', 0)
,(1014, 3, 'Apple', NULL, 'JEAN', 0)
,(1015, 4, 'great encyclopedia', NULL, 'JEAN', 0)
;
SELECT t2.* ,t1.URLName
FROM #TABLE1 t1
INNER JOIN #TABLE2 t2 ON t1.URL_ID = t2.URL_ID
WHERE EXISTS (
SELECT 1
FROM (
SELECT TOP (1) test.PREF_ID
,CASE
-- if I do not comment this case: jean from the MAT group will not see apple
-- WHEN Hide_Url = 1
-- THEN 3
WHEN USER_code IS NOT NULL
THEN 2
WHEN USER_GROUP IS NOT NULL
THEN 1
ELSE 0
END AS ROW_LEVEL
FROM #TABLE2 test
WHERE (
(
test.USER_GROUP IS NULL
AND test.user_group IS NULL
AND test.USER_code IS NULL
)
OR (test.USER_GROUP = #CUR_GROUP)
OR (test.USER_code = #CUR_USER)
)
AND t2.URL_ID = test.URL_ID
ORDER BY ROW_LEVEL DESC
) test
WHERE test.PREF_ID = t2.PREF_ID
AND Hide_Url = 0
)

Simply use an ORDER BY clause that puts the preferred row first. You can use this in the window function ROW_NUMBER and work with this or use a lateral top(1) join with CROSS APPLY.
select *
from urls
cross apply
(
select top(1) *
from labels
where labels.url_id = urls.url_id
where [Group] is not null or [user] is not null or hide is not null
order by
case when [Group] is null then 2 else 1 end,
case when [user] is null then 2 else 1 end,
case when hide is null then 2 else 1 end
) top_labels
order by urls.url_id;

Want to compare 4 different columns with the result of CTE

I have created a CTE (common table Expression) as follows:
DECLARE #N VARCHAR(100)
WITH CAT_NAM AS (
SELECT ID, NAME
FROM TABLE1
WHERE YEAR(DATE) = YEAR(GETDATE())
)
SELECT #N = STUFF((
SELECT ','''+ NAME+''''
FROM CAT_NAM
WHERE ID IN (20,23,25,30,37)
FOR XML PATH ('')
),1,1,'')
The result of above CTE is 'A','B','C','D','F'
Now I need to check 4 different columns CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4 in the result of CTE and form it as one column like follow:
Select
case when CAT_NAM_1 in (#N) then CAT_NAM_1
when CAT_NAM_2 in (#N) then CAT_NAM_2
when CAT_NAM_3 in (#N) then CAT_NAM_3
when CAT_NAM_4 in (#N) then CAT_NAM_4
end as CAT
from table2
When I'm trying to do the above getting error please help me to do.
If my approach is wrong help me with right one.

I am not exactly sure what you are trying to do, but if I understand the following script shows one possible technique. I have created some table variables to mimic the data you presented and then wrote a SELECT statement to do what I think you asked (but I am not sure).
DECLARE #TABLE1 AS TABLE (
ID INT NOT NULL,
[NAME] VARCHAR(10) NOT NULL,
[DATE] DATE NOT NULL
);
INSERT INTO #TABLE1(ID,[NAME],[DATE])
VALUES (20, 'A', '2021-01-01'), (23, 'B', '2021-02-01'),
(25, 'C', '2021-03-01'),(30, 'D', '2021-04-01'),
(37, 'E', '2021-05-01'),(40, 'F', '2021-06-01');
DECLARE #TABLE2 AS TABLE (
ID INT NOT NULL,
CAT_NAM_1 VARCHAR(10) NULL,
CAT_NAM_2 VARCHAR(10) NULL,
CAT_NAM_3 VARCHAR(10) NULL,
CAT_NAM_4 VARCHAR(10) NULL
);
INSERT INTO #TABLE2(ID,CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4)
VALUES (1,'A',NULL,NULL,NULL),(2,NULL,'B',NULL,NULL);
;WITH CAT_NAM AS (
SELECT ID, [NAME]
FROM #TABLE1
WHERE YEAR([DATE]) = YEAR(GETDATE())
AND ID IN (20,23,25,30,37,40)
)
SELECT CASE
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_1) THEN CAT_NAM_1
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_2) THEN CAT_NAM_2
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_3) THEN CAT_NAM_3
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_4) THEN CAT_NAM_4
ELSE '?' -- not sure what you want if there is no match
END AS CAT
FROM #TABLE2;

You can do a bit of set-based logic for this
SELECT
ct.NAME
FROM table2 t2
CROSS APPLY (
SELECT v.NAME
FROM (VALUES
(t2.CAT_NAM_1),
(t2.CAT_NAM_2),
(t2.CAT_NAM_3),
(t2.CAT_NAM_4)
) v(NAME)
INTERSECT
SELECT ct.NAM
FROM CAT_NAM ct
WHERE ct.ID IN (20,23,25,30,37)
) ct;

Why does this query take so long, and how I can increase its performance?

I have a query which, to summarize, inserts N identical rows into a table with the only difference being that 1 column's values go from 0, 1, ..., N and I don't inserted insert any if there is already a column with that value. My procedure is
BEGIN
DECLARE #i INT = 0;
WHILE(#i <= #boardPageCeiling)
BEGIN
IF NOT EXISTS(SELECT 1 FROM PageArchives WHERE PageType = 'Board' AND PageNum = #i)
BEGIN
INSERT INTO PageArchives
(PageType, ThreadId, Html, PageNum, RetrievalAttempted, RetrievalSucceeded, RetrievalDate, RetrievalPriority, ProcessAttempted, ProcessingSucceeded, ProcessDate)
VALUES
('Board', NULL, NULL, #i, 0, NULL, NULL, 0, 0, NULL, NULL)
END
SET #i = #i + 1;
END
END
and this is taking 20+ seconds to execute when boardPageCeiling = 6000. Seems extraordinarily long given the complexity.

insert the below table:
WITH NumberTable AS (
SELECT 1 as Number
UNION ALL
SELECT Number+1
FROM NumberTable
WHERE Number < 100
)
SELECT 'Board', NULL, NULL, Number, 0, NULL, NULL, 0, 0, NULL, NULL FROM NumberTable
where Number not in (SELECT PageNum FROM PageArchives WHERE PageType = 'Board')
OPTION (MAXRECURSION 0);
Just a sample query,replace "100" by your self"

Just use a single query. The only remotely tricky part is generating a sequence of numbers:
WITH digits(d) as (
SELECT *
FROM (VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9)) v(d)
),
n(n) as (
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 as seqnum
FROM d CROSS JOIN d CROSS JOIN d CROSS JOIN d
)
INSERT INTO PageArchives (PageType, PageNum, RetrievalDate, RetrievalPriority, ProcessAttempted)
SELECT 'Board', PageNum, 0, 0, 0
FROM n
WHERE n.n <= #boardPageCeiling AND
NOT EXISTS (SELECT 1 FROM PageArchives pa WHERE pa.PageNum = n.n);
I removed the NULL values from the INSERT because these are probably set to NULL by default.

You can improve that query by using a merge statement

SQL Server: Insert Multiple Rows to a table based on a column in a different table

I have a table
CREATE TABLE [StudentsByKindergarten]
(
[FK_KindergartenId] [int] IDENTITY(1,1) NOT NULL,
[StudentList] [nvarchar]
)
where the entries are
(1, "John, Alex, Sarah")
(2, "")
(3, "Jonny")
(4, "John, Alex")
I want to migrate this information to the following table.
CREATE TABLE [KindergartenStudents]
(
[FK_KindergartenId] [int] NOT NULL,
[StudentName] [nvarchar] NOT NULL)
)
so that it will have
(1, "John")
(1, "Alex")
(1, "Sarah")
(3, "Jonny")
(4, "John")
(4, "Alex")
I think I can achieve split function using something like the answer here: How do I split a string so I can access item x?
Using the function here:
http://www.codeproject.com/Articles/7938/SQL-User-Defined-Function-to-Parse-a-Delimited-Str
I can do something like this,
INSERT INTO [KindergartenStudents] ([FK_KindergartenId], [Studentname])
SELECT
sbk.FK_KindergartenId,
parsed.txt_value
FROM
[StudentsByKindergarten] sbk, dbo.fn_ParseText2Table(sbk.StudentList,',') parsed
GO
but doesn't seem to work.

Based on this question, I've learned a better approach for this problem. You just need to use CROSS APPLY with your suggested function fn_ParseText2Table.
Sample Fiddle
INSERT INTO KindergartenStudents
(FK_KindergartenId, StudentName)
SELECT
sbk.FK_KindergartenId,
parsed.txt_value
FROM
StudentsByKindergarten sbk
CROSS APPLY
fn_ParseText2Table(sbk.StudentList, ',') parsed

I've used the function that you suggested (fn_ParseText2Table) and the following T-SQL is working. You can test it with this fiddle: link.
BEGIN
DECLARE
#ID int,
#iterations int
-- Iterate the number of not empty rows
SET #iterations =
(SELECT
COUNT(*)
FROM
StudentsByKindergarten
WHERE
DATALENGTH(StudentList) > 0
)
WHILE ( #iterations > 0 )
BEGIN
-- Select the ID of row_number() = #iteration
SET #ID =
(SELECT
FK_KindergartenId
FROM
(SELECT
*,
ROW_NUMBER() OVER (ORDER BY FK_KindergartenId DESC) as rn
FROM
StudentsByKindergarten
WHERE
DATALENGTH(StudentList) > 0) rows
WHERE
rows.rn = #iterations
)
SET #iterations -= 1
-- Insert the parsed values
INSERT INTO KindergartenStudents
(FK_KindergartenId, StudentName)
SELECT
#ID,
parsed.txt_value
FROM
fn_ParseText2Table
(
(SELECT
StudentList
FROM
StudentsByKindergarten
WHERE
FK_KindergartenId = #ID),
',') parsed
END
END

tree execution in Sql Server

I have a small DSL where user can express certain conditions for some actions. Now i need to resolve those conditions on sql server.
The nodes in condition are AND/OR/ atom, where AND/OR are binary expressions and atom is Identifier == Operand, with == being the only operator.
So i created following table in Sql Server to store the tree.
CREATE TABLE [dbo].[Condition]([Id] [hierarchyid], [Order] [int] NULL,
[NodeType] [nchar](10),[Identifier] [nvarchar](50) ,[Operand] [nvarchar](255) NULL)
Is there any way to walk this tree and eval its node in a sql statement ? I can do this in C# with compiled code, but i am stuck thinking about it in sql. I need it in sql because the filtering of data has to happen in sql.
for example if condition is
(T=="T1" || T=="T2") && (R=="R1" || R =="R2") || T=="T3"
The table will look like
Id Order NodeType Identifier Operand Id.ToString()
------------------------------------------------------------------------------------
0x NULL OR NULL NULL /
0x58 1 AND NULL NULL /1/
0x5AC0 1 OR NULL NULL /1/1/
0x5AD6 1 Expr T T1 /1/1/1/
0x5ADA 2 Expr T T2 /1/1/2/
0x5B40 2 OR NULL NULL /1/2/
0x5B56 1 Expr R R1 /1/2/1/
0x5B5A 2 Expr R R2 /1/2/2/
0x68 2 Expr T T3 /2/

Since we don't know the values of the terms until we evaluate them, we need to do this from the bottom up. In code, we have a stack that can keep track of where we are. I didn't really want to build a stack, so I evaluated all the terms from the bottom up.
I tried to do this with a CTE, but failed. I couldn't get both of the terms in the recursive member of the CTE. Hence, I had to write my own loop.
There's a table holding all the intermediate values #I. Each loop we get closer to the top of the tree. When we have the value of the root node, we are done. We'll also be clearing out the unnecessary rows so that at the end, we just have one row.
Here's the schema:
CREATE TABLE [dbo].[Condition](
[Id] hierarchyid,
[Order] [int] NULL,
[NodeType] [nchar](10),
[Identifier] [nvarchar](50),
[Operand] [nvarchar](255) NULL);
insert Condition (id, "Order", NodeType, Identifier, Operand) values
(0x, null, 'OR', null, null),
(0x58, 1, 'AND', null, null),
(0x5ac0, 1, 'OR', null, null),
(0x5ad6, 1, 'Expr', 'T', 'T1'),
(0x5ada, 2, 'Expr', 'T', 'T2'),
(0x5b40, 2, 'OR', null, null),
(0x5b56, 1, 'Expr', 'R', 'R1'),
(0x5b5a, 2, 'Expr', 'R', 'R2'),
(0x68, 2, 'Expr', 'T', 'T3');
Here's the code assuming that #T and #R are the identifiers we are looking at:
declare #T varchar(max) = 'T1';
declare #R varchar(max) = 'R1';
declare #I table (
id hierarchyId,
"order" int,
value bit
);
insert #I (id, "order", value)
select id, "order", case when operand =
case when identifier = 'T' then #T when identifier = 'R' then #R end
then 1 else 0 end
from condition
where nodetype = 'Expr';
while not exists (select * from #I where id = 0x) begin
insert #I (id, "order", value)
select node.id, node."order",
case
when nodetype = 'AND' then
case when L.value = 1 and R.value = 1 then 1 else 0 end
when nodetype = 'OR' then
case when L.value = 1 or R.value = 1 then 1 else 0 end
end
from condition node
join #I L on L.id.GetAncestor(1) = node.id and L."order" = 1
join #I R on R.id.GetAncestor(1) = node.id and R."order" = 2
delete from #I where id.GetAncestor(1) in (select id from #I)
end
select *, id.ToString() from #I
Here's the fiddle: http://sqlfiddle.com/#!6/8e5cc/1

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Can I tokenize a string using t-SQL - sql

It's possible. You could do it with some repetitive calls to CHARINDEX and checking for nulls but it may be better and clearer to write a FUNCTION to split the string.

Related

sql SERVER - distinct selection based on priority columns

Want to compare 4 different columns with the result of CTE

Why does this query take so long, and how I can increase its performance?

SQL Server: Insert Multiple Rows to a table based on a column in a different table

tree execution in Sql Server

Categories

Resources