SQL Function - Fuzzy Matching with Levenshtein Distance Algorithm - Return Lowest Value Only

SQL Function - Fuzzy Matching with Levenshtein Distance Algorithm - Return Lowest Value Only - sql

Problem: Need SQL function to return the 'lowest' matching value using the Levenshtein algorithm.
Code:
CREATE FUNCTION ufn_levenshtein(#s1 nvarchar(3999), #s2 nvarchar(3999))
RETURNS int
AS
BEGIN
DECLARE #s1_len int, #s2_len int
DECLARE #i int, #j int, #s1_char nchar, #c int, #c_temp int
DECLARE #cv0 varbinary(8000), #cv1 varbinary(8000)
SELECT
#s1_len = LEN(#s1),
#s2_len = LEN(#s2),
#cv1 = 0x0000,
#j = 1, #i = 1, #c = 0
WHILE #j <= #s2_len
SELECT #cv1 = #cv1 + CAST(#j AS binary(2)), #j = #j + 1
WHILE #i <= #s1_len
BEGIN
SELECT
#s1_char = SUBSTRING(#s1, #i, 1),
#c = #i,
#cv0 = CAST(#i AS binary(2)),
#j = 1
WHILE #j <= #s2_len
BEGIN
SET #c = #c + 1
SET #c_temp = CAST(SUBSTRING(#cv1, #j+#j-1, 2) AS int) +
CASE WHEN #s1_char = SUBSTRING(#s2, #j, 1) THEN 0 ELSE 1 END
IF #c > #c_temp SET #c = #c_temp
SET #c_temp = CAST(SUBSTRING(#cv1, #j+#j+1, 2) AS int)+1
IF #c > #c_temp SET #c = #c_temp
SELECT #cv0 = #cv0 + CAST(#c AS binary(2)), #j = #j + 1
END
SELECT #cv1 = #cv0, #i = #i + 1
END
RETURN #c
END
IF OBJECT_ID('tempdb..#ExistingCustomers') IS NOT NULL
DROP TABLE #ExistingCustomers;
CREATE TABLE #ExistingCustomers
(
Customer VARCHAR(255),
ID INT
)
INSERT #ExistingCustomers SELECT 'Ed''s Barbershop', 1002
INSERT #ExistingCustomers SELECT 'GroceryTown', 1003
INSERT #ExistingCustomers SELECT 'Candy Place', 1004
INSERT #ExistingCustomers SELECT 'Handy Man', 1005
IF OBJECT_ID('tempdb..#POTENTIALCUSTOMERS') IS NOT NULL
DROP TABLE #POTENTIALCUSTOMERS;
CREATE TABLE #POTENTIALCUSTOMERS(Customer VARCHAR(255));
INSERT #POTENTIALCUSTOMERS SELECT 'Eds Barbershop'
INSERT #POTENTIALCUSTOMERS SELECT 'Grocery Town'
INSERT #POTENTIALCUSTOMERS SELECT 'Candy Place'
INSERT #POTENTIALCUSTOMERS SELECT 'Handee Man'
INSERT #POTENTIALCUSTOMERS SELECT 'The Apple Farm'
INSERT #POTENTIALCUSTOMERS SELECT 'Ride-a-Long Bikes'
SELECT A.Customer,
b.ID,
b.Customer as cust,
dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) as ValueLev
FROM #POTENTIALCUSTOMERS a
LEFT JOIN #ExistingCustomers b ON dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) < 15;
This returns:
What I would like to return:
Explanation: The results are the 'lowest' values from the Levenshtein algorithm. There are two rows where the Levenshtein scores are the same The Apple Farm and Ride-a-Long Bikes, in which case any of the values is fine, just as long as it is one value.
References:
SQL Fuzzy Join - MSSQL
http://www.kodyaz.com/articles/fuzzy-string-matching-using-levenshtein-distance-sql-server.aspx

You can use CTE to get the result you want if you partition by the potential customer and use the ValueLev to order the results:
;WITH CTE AS
(
SELECT RANK() OVER (PARTITION BY a.Customer ORDER BY dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) ASC) AS RowNbr,
A.Customer,
b.ID,
b.Customer as cust,
dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) as ValueLev
FROM #POTENTIALCUSTOMERS a
LEFT JOIN #ExistingCustomers b ON dbo.ufn_levenshtein(REPLACE(A.Customer, ' ', ''), REPLACE(B.Customer, ' ', '')) < 15
)
SELECT Customer,
MIN(ID) AS ID,
MIN(cust) AS cust,
ValueLev
FROM CTE
WHERE CTE.RowNbr = 1
GROUP BY Customer, ValueLev
As you don't mind which result is returned in the case of duplicate ValueLev, use GROUP BY and MIN to scale the results down to one per potential customer.
Output:
Customer ID cust ValueLev
Candy Place 1004 Candy Place 0
Grocery Town 1003 GroceryTown 0
Eds Barbershop 1002 Ed's Barbershop 1
Handee Man 1005 Handy Man 2
The Apple Farm 1004 Candy Place 9
Ride-a-Long Bikes 1003 Candy Place 14

Related

How to insert a hyphen between blocks of alpha and numeric characters

I need to insert hyphens between blocks of alpha and numeric text in a string.
I am not even sure how to start on this problem.
ABC123 -> ABC-123
ABC123XYZ -> ABC-123-XYZ
D123 -> D-123
123C -> 123-C

This one will work for you with single value.
DECLARE #CODE VARCHAR(50) = '12ABC123XYZ'
,#NEWCODE VARCHAR(100) = ''
;WITH CTE
AS (
SELECT NUMBER
,SUBSTRING(#CODE, NUMBER, 1) AS VAL
FROM master.dbo.spt_values
WHERE TYPE = 'P'
AND number BETWEEN 1
AND LEN(#CODE)
)
SELECT #NEWCODE = #NEWCODE + CASE
WHEN ISNUMERIC(C1.VAL) <> ISNUMERIC(ISNULL(C2.VAL, C1.VAL))
THEN '-' + C1.VAL
ELSE C1.VAL
END
FROM CTE C1
LEFT JOIN CTE C2 ON C1.number = C2.number + 1
SELECT #NEWCODE
Result : 12-ABC-123-XYZ
And If you want this to work with table column, you need to create a scalar function.
CREATE FUNCTION CODE_SPLIT
(
#CODE VARCHAR(50)
)
RETURNS VARCHAR(100)
AS
BEGIN
DECLARE #NEWCODE VARCHAR(100) ='';
;WITH CTE
AS (
SELECT NUMBER
,SUBSTRING(#CODE, NUMBER, 1) AS VAL
FROM master.dbo.spt_values
WHERE TYPE = 'P'
AND number BETWEEN 1
AND LEN(#CODE)
)
SELECT #NEWCODE = #NEWCODE + CASE
WHEN ISNUMERIC(C1.VAL) <> ISNUMERIC(ISNULL(C2.VAL, C1.VAL))
THEN '-' + C1.VAL
ELSE C1.VAL
END
FROM CTE C1
LEFT JOIN CTE C2 ON C1.number = C2.number + 1
RETURN #NEWCODE
END
GO
And call it on your actual table
Schema:
SELECT * INTO #TAB FROM(
SELECT 'ABC123' AS CODE
UNION ALL
SELECT 'ABC123XYZ'
UNION ALL
SELECT 'D123'
UNION ALL
SELECT '123C'
)A
SELECT CODE, dbo.CODE_SPLIT(CODE) AS NEWCODE FROM #TAB
Result:
+-----------+-------------+
| CODE | NEWCODE |
+-----------+-------------+
| ABC123 | ABC-123 |
| ABC123XYZ | ABC-123-XYZ |
| D123 | D-123 |
| 123C | 123-C |
+-----------+-------------+

patindex('%[0-9]%') return index of first number.
patindex('%[^0-9]%') return index of first non-numeric character.
You could use recursive CTE and PATINDEX like this.
DECLARE #SampleData AS TABLE
(
TextValue varchar(100)
)
INSERT INTO #SampleData
VALUES ('ABC124'), ('ABC123XYZ'), ('123C'), ('ABC'), ('1A2B3C')
;WITH cte AS
(
SELECT sd.TextValue AS RootText,
sd.TextValue AS CurrentText,
CAST('' AS varchar(100)) AS Result
FROM #SampleData sd
UNION ALL
SELECT
c.RootText,
CASE
WHEN patindex('%[0-9]%', c.CurrentText) = 0 OR patindex('%[^0-9]%', c.CurrentText) = 0
THEN ''
WHEN patindex('%[0-9]%', c.CurrentText) > patindex('%[^0-9]%', c.CurrentText)
THEN RIGHT(c.CurrentText, len(c.CurrentText) - patindex('%[0-9]%', c.CurrentText) + 1)
ELSE RIGHT(c.CurrentText, len(c.CurrentText) - patindex('%[^0-9]%', c.CurrentText) + 1)
END AS CurrentText,
CAST(
CASE
WHEN patindex('%[0-9]%', c.CurrentText) = 0 OR patindex('%[^0-9]%', c.CurrentText) = 0
THEN Result + '-' + c.CurrentText
WHEN patindex('%[0-9]%', c.CurrentText) > patindex('%[^0-9]%', c.CurrentText)
THEN Result + '-' + LEFT(CurrentText, patindex('%[0-9]%', c.CurrentText) - 1)
ELSE Result + '-' + LEFT(CurrentText, patindex('%[^0-9]%', c.CurrentText) - 1)
END AS varchar(100)
) AS Result
FROM cte c
WHERE LEN(CurrentText) > 0
)
SELECT cte.RootText, STUFF(cte.Result, 1,1,'') AS Result FROM cte
WHERE cte.CurrentText = ''
Demo link: http://rextester.com/FTYA72053

SQL Every combination of ID's

Quite a tricky scenario. I have a table as below. Basically I want to get all combinations of ranges from each RangeSet in SQL Server 2012.
Best I show an example of structure and desired output. The problem is the number of RangeSetID's can be dynamic and the number of RangeID's can be dynamic in each range set
RangeID RangeSetID
------------------
1 4
2 4
3 4
4 4
5 2
6 2
7 2
8 2
9 2
10 2
11 1
12 1
13 1
14 1
15 1
16 1
17 3
18 3
19 3
20 3
I need the output to recursively create the below dataset of rates:
1 5 11 17 (first from range4, first from range2, first from range1, first from range3)
1 5 11 18 (first from range4, first from range2, first from range1, second from range3)
1 5 11 19 (first from range4, first from range2, first from range1, third from range3)
1 5 11 20 (first from range4, first from range2, first from range1, fourth from range3)
1 5 12 17 (first from range4, first from range2, second from range1, first from range3)
1 5 12 18 (first from range4, first from range2, second from range1, second from range3)
1 5 12 19
1 5 12 20
And so on until I reach the last RangeID from each RangeSetID and result in
4 10 16 20 (last from range4, last from range2, last from range1, last from range3)
Which will ultimately result in the below where RateID 1 is showing the first result vertically to allow for the dynamic number of RangeSetID's
RateID RangeID
------------------
1 1
1 5
1 11
1 17
2 1
2 5
2 11
2 18
This should result in 11,000 rows (approx). I have tried CROSS JOIN's etc but I cannot get this working at all.
Any geniuses out there please?
Thanks

Guess this should help. Happy coding!
;WITH CTE
AS
(
SELECT * FROM (
SELECT ROW_NUMBER() over (order by [RangeID1] , [RangeID2], [RangeID3], [RangeID4]) as 'RateID', [RangeID1] , [RangeID2], [RangeID3], [RangeID4] FROM
(
select A.RangeID as [RangeID1], B.RangeID as [RangeID2], C.RangeID as [RangeID3], D.RangeID as [RangeID4]
from [Range] as A
inner join [Range] as B on (A.RangeID <= B.RangeID)
inner join [Range] as C on (B.RangeID <= C.RangeID)
inner join [Range] as D on (C.RangeID <= D.RangeID)
where A.RangeSetID <> B.RangeSetID
and B.RangeSetID <> C.RangeSetID
and C.RangeSetID <> D.RangeSetID
) as A) T
UNPIVOT ( RangeID FOR N IN ([RangeID1] , [RangeID2], [RangeID3], [RangeID4] ))P
)
SELECT RateID, RangeID
FROM CTE

This sort of works, but it's way overcomplicated and could do with some tweaking. Note that I changed your sample data, to include gaps to test this works properly.
DECLARE #table TABLE (
range_id INT,
range_set_id INT);
INSERT INTO #table SELECT 1, 4;
INSERT INTO #table SELECT 2, 4;
INSERT INTO #table SELECT 3, 4;
INSERT INTO #table SELECT 5, 4;
INSERT INTO #table SELECT 8, 2;
INSERT INTO #table SELECT 10, 2;
INSERT INTO #table SELECT 17, 2;
INSERT INTO #table SELECT 18, 2;
INSERT INTO #table SELECT 19, 2;
INSERT INTO #table SELECT 20, 2;
INSERT INTO #table SELECT 21, 1;
INSERT INTO #table SELECT 23, 1;
INSERT INTO #table SELECT 28, 1;
INSERT INTO #table SELECT 29, 1;
INSERT INTO #table SELECT 30, 1;
INSERT INTO #table SELECT 33, 1;
INSERT INTO #table SELECT 35, 3;
INSERT INTO #table SELECT 38, 3;
INSERT INTO #table SELECT 39, 3;
INSERT INTO #table SELECT 40, 3;
--Work out the order of the range_set_ids
WITH ordered AS (
SELECT
range_set_id,
range_id,
ROW_NUMBER() OVER (PARTITION BY range_set_id ORDER BY range_id) AS sequential_id
FROM
#table),
ranges AS (
SELECT
range_set_id,
MIN(range_id) AS range_id
FROM
#table
GROUP BY
range_set_id),
range_order AS (
SELECT
range_set_id,
ROW_NUMBER() OVER (ORDER BY range_id) AS order_id
FROM
ranges),
set_count AS (
SELECT
MAX(order_id) AS max_order_id
FROM
range_order),
start_and_end AS (
SELECT
o.range_set_id,
o.order_id,
MIN(range_id) AS min_range_id,
MAX(range_id) AS max_range_id,
COUNT(range_id) AS iterations
FROM
range_order o
INNER JOIN #table t ON t.range_set_id = o.range_set_id
GROUP BY
o.range_set_id,
o.order_id),
toggles AS (
SELECT
s.range_set_id,
s.order_id,
s.iterations AS toggle
FROM
start_and_end s
CROSS JOIN set_count c
WHERE
s.order_id = c.max_order_id
UNION ALL
SELECT
s.range_set_id,
s.order_id,
t.toggle * (s.iterations) AS toggle
FROM
toggles t
INNER JOIN start_and_end s ON s.order_id = t.order_id - 1
WHERE
s.order_id > 0),
toggle_count AS (
SELECT
MAX(toggle * s.iterations) AS max_toggle
FROM
toggles t
CROSS JOIN set_count c
INNER JOIN start_and_end s ON s.order_id = c.max_order_id),
all_combos AS (
SELECT
1 AS rate_set_id,
o.range_set_id,
1 AS sequential_id,
o.order_id,
lt.toggle AS reset_toggle,
ISNULL(t.toggle, 1) AS increment_toggle,
1 AS current_toggle
FROM
range_order o
CROSS JOIN set_count c
INNER JOIN toggles lt ON lt.order_id = o.order_id
LEFT JOIN toggles t ON t.order_id = o.order_id + 1
UNION ALL
SELECT
a.rate_set_id + 1,
a.range_set_id,
CASE
WHEN a.current_toggle = a.reset_toggle THEN 1 --flip back at the end
WHEN a.current_toggle % a.increment_toggle != 0 THEN a.sequential_id --something lower is still toggling
ELSE a.sequential_id + 1 --toggle
END,
a.order_id,
a.reset_toggle,
a.increment_toggle,
CASE
WHEN a.current_toggle < a.reset_toggle THEN a.current_toggle + 1
ELSE 1
END
FROM
all_combos a
CROSS JOIN set_count sc
CROSS JOIN toggle_count tc
WHERE
a.rate_set_id < tc.max_toggle)
SELECT
a.rate_set_id,
a.range_set_id,
o.range_id
FROM
all_combos a
INNER JOIN ordered o ON o.range_set_id = a.range_set_id AND o.sequential_id = a.sequential_id
ORDER BY
a.rate_set_id,
a.order_id
OPTION (MAXRECURSION 0);

Implemented same logic in dynamic query. This should work for you, I guess
declare #i int = 1;
declare #count int = 0;
declare #cols varchar(max) = '';
declare #select varchar(max) = 'select ';
declare #join varchar(max);
declare #where varchar(max);
declare #query varchar(max);
declare #range varchar(100);
declare #prevrange varchar(100);
declare #rangeid varchar(100);
select #count =count(distinct RangeSetID) from [Range];
while #count > 0
begin
set #range = 'Range' + cast(#i as varchar(max));
set #rangeid = 'RangeID' + cast(#i as varchar(max));
set #cols = #cols + #rangeid + ', ';
set #select = #select + #range + '.RangeID as '+#rangeid + ', ';
if #i = 1
begin
set #join = ' from [Range] as ' + #range;
set #where = 'where ' + #range + '.RangeSetID <> ';
end
else
begin
set #prevrange = 'Range' + cast((#i - 1) as varchar(max));
set #join = #join + ' inner join [Range] as ' + #range + ' on (' + #prevrange + '.RangeID <= ' + #range + '.RangeID)';
if(#count = 1)
set #where = #where + #range+ '.RangeSetID';
else
set #where = #where + #range+ '.RangeSetID and '+ #range+ '.RangeSetID <> ';
end
set #i = #i + 1;
set #count = #count - 1;
end
set #query = '
;WITH CTE
AS
(
SELECT * FROM (
SELECT ROW_NUMBER() over (order by '+ SUBSTRING(#cols, 0, LEN(#cols)) + ') as ''RateID'', '+ SUBSTRING(#cols, 0, LEN(#cols)) +' FROM
(
' + SUBSTRING(#select, 0, LEN(#select)) + char(13) + #join + char(13) + #where + '
) as A) T
UNPIVOT ( RangeID FOR N IN ('+(SUBSTRING(#cols, 0, LEN(#cols))) +' ))P
)
SELECT RateID, RangeID
FROM CTE
';
exec (#query);

Hot to convert a variable with value '1,2,3' to a table (every number as a record)

Working on SQL (2005 and 2008)
the variable with value '1,2,3' would be call #cedis and this could to have N number for example
set #cedis='1' or set #cedis='1,2,3,4,5,6,7' or set #cedis='125,98,91'
so important, its this must to be a select only, a loop could not to be use, only a select!
this must to return a (result as ) table with values for example
set #cedis='1,2,3,4' this must to return a result
number 1 2 3 4
declare #cedis varchar(max)
set #cedis='1,58,123,8'
;with datos as
(
my select with is going to return me the table
)
select * from datos
result set is
number
1
58
123
8

If am not wrong this is what you need
DECLARE #cedis VARCHAR(500)='1,2,3,4'
SELECT Split.a.value('.', 'VARCHAR(100)') Numbers
FROM (SELECT Cast ('<M>' + Replace(#cedis, ',', '</M><M>') + '</M>' AS XML) AS Numbers) AS A
CROSS APPLY Numbers.nodes ('/M') AS Split(a)
Result:
Numbers
-------
1
2
3
4

A table valued function would do it.
CREATE FUNCTION [dbo].[fn_Split](#text VARCHAR(MAX), #delimiter VARCHAR(5) = ',')
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value VARCHAR(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
END
SET #text = RIGHT(#text, (LEN(#text) - (#index+LEN(#delimiter)-1)))
END
RETURN
END
You can call it as follows:
SELECT *
FROM dbo.fn_Split(#cedis,',')

Here is a more generic solution that breaks any given string into a table based on any given separator:
http://rextester.com/VSRDLS48817
Not an original idea, but I've found it very useful.
create function [dbo].[SplitString]
(
#str nvarchar(255),
#separator char(1)
)
returns table
AS
return (
with tokens(p, a, b) AS (
select
cast(1 as int),
cast(1 as int),
charindex(#separator, #str)
union all
select
p + 1,
b + 1,
charindex(#separator, #str, b + 1)
from tokens
where b > 0
)
select
p-1 ItemIndex,
substring(
#str,
a,
case when b > 0 then b-a ELSE LEN(#str) end)
AS Item
from tokens
);

This is another one approach to get required output result
DECLARE #cedis VARCHAR(MAX) ,
#delimeter VARCHAR(10)
SET #cedis = '1,58,123,8,14144,15,155231,15,3647,2347,45,76,68,2354,577,5'
SET #delimeter = ','
SET #cedis = #cedis + #delimeter;
WITH datos
AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM datos
WHERE n <= LEN(#cedis)
),
cte
AS ( SELECT T.N ,
ROW_NUMBER() OVER ( ORDER BY T.N ) AS RN
FROM datos AS T
WHERE SUBSTRING(#cedis, T.N, LEN(#delimeter)) = #delimeter
AND LEN(#cedis) >= T.N
)
SELECT SUBSTRING(#cedis, COALESCE(R.N + LEN(#delimeter), 1),
L.N - COALESCE(R.N + LEN(#delimeter), 1)) AS part ,
L.RN AS ID
FROM cte AS L
LEFT JOIN cte AS R ON L.RN = R.RN + 1
OPTION ( MAXRECURSION 1000 )

Replace values in a CSV string

I have a list of products in comma separated fashion and since the item list was replaced with new product items, I am trying to modify this CSV list with new product item list.
create table #tmp
(
id int identity(1,1) not null,
plist varchar(max) null
);
create table #tmpprod
(
oldid int null,
newid int null
);
insert into #tmp(plist) values
('10,11,15,17,19'),
('22,34,44,25'),
('5,6,8,9');
insert into #tmpprod(oldid, newid) values
(5, 109),
(9, 110),
(10, 111),
(15, 112),
(19, 113),
(30, 114),
(34, 222),
(44, 333);
I am trying to use a split fn to convert into rows and then replace these values and then convert columns to rows again. Is it possible in any other manner?
The output will be as:
id
newlist
1
111,11,112,17,113
2
22,222,333,25
3
109,6,8,110

Convert your comma separated list to XML. Use a numbers table, XQuery and position() to get the separate ID's with the position they have in the string. Build the comma separated string using the for xml path('') trick with a left outer join to #tempprod and order by position().
;with C as
(
select T.id,
N.number as Pos,
X.PList.value('(/i[position()=sql:column("N.Number")])[1]', 'int') as PID
from #tmp as T
cross apply (select cast('<i>'+replace(plist, ',', '</i><i>')+'</i>' as xml)) as X(PList)
inner join master..spt_values as N
on N.number between 1 and X.PList.value('count(/i)', 'int')
where N.type = 'P'
)
select C1.id,
stuff((select ','+cast(coalesce(T.newid, C2.PID) as varchar(10))
from C as C2
left outer join #tmpprod as T
on C2.PID = T.oldid
where C1.id = C2.id
order by C2.Pos
for xml path(''), type).value('.', 'varchar(max)'), 1, 1, '')
from C as C1
group by C1.id
Try on SE-Data

Assuming SQL Server 2005 or better, and assuming order isn't important, then given this split function:
CREATE FUNCTION [dbo].[SplitInts]
(
#List VARCHAR(MAX),
#Delimiter CHAR(1)
)
RETURNS TABLE
AS
RETURN ( SELECT Item FROM ( SELECT Item = x.i.value('(./text())[1]', 'int')
FROM
( SELECT [XML] = CONVERT(XML, '<i>' + REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.') ) AS a CROSS APPLY [XML].nodes('i') AS x(i)
) AS y WHERE Item IS NOT NULL);
GO
You can get this result in the following way:
;WITH x AS
(
SELECT id, item, oldid, [newid], rn = ROW_NUMBER() OVER
(PARTITION BY id
ORDER BY PATINDEX('%,' + RTRIM(s.Item) + ',%', ',' + t.plist + ','))
FROM #tmp AS t CROSS APPLY dbo.SplitInts(t.plist, ',') AS s
LEFT OUTER JOIN #tmpprod AS p ON p.oldid = s.Item
)
SELECT id, newlist = STUFF((SELECT ',' + RTRIM(COALESCE([newid], Item))
FROM x AS x2 WHERE x2.id = x.id
FOR XML PATH(''),
TYPE).value(N'./text()[1]', N'varchar(max)'), 1, 1, '')
FROM x GROUP BY id;
Results:
id
newlist
1
111,11,112,17,113
2
22,222,333,25
3
109,6,8,110
Note that the ROW_NUMBER() / OVER / PARTITION BY / ORDER BY is only there to try to coerce the optimizer to return the rows in that order. You may observe this behavior today and it can change tomorrow depending on statistics or data changes, optimizer changes (service packs, CUs, upgrade, etc.) or other variables.
Long story short: if you're depending on that order, just send the set back to the client, and have the client construct the comma-delimited list. It's probably where this functionality belongs anyway.
That said, in SQL Server 2017+, we can guarantee retaining the order by splitting with OPENJSON() and reassembling with STRING_AGG():
;WITH x AS
(
SELECT o.id, val = COALESCE(n.newid, p.value), p.[key]
FROM #tmp AS o CROSS APPLY
OPENJSON('["' + REPLACE(o.pList, ',', '","') + '"]') AS p
LEFT OUTER JOIN #tmpprod AS n
ON p.value = n.oldid
)
SELECT id, newlist = STRING_AGG(val, ',')
WITHIN GROUP (ORDER BY [key])
FROM x GROUP BY id;
Example db<>fiddle

Thanks for this question - I've just learned something new. The following code is an adaptation of an article written by Rob Volk on exactly this topic. This is a very clever query! I won't copy all of the content down here. I have adapted it to create the results you're looking for in your example.
CREATE TABLE #nums (n INT)
DECLARE #i INT
SET #i = 1
WHILE #i < 8000
BEGIN
INSERT #nums VALUES(#i)
SET #i = #i + 1
END
CREATE TABLE #tmp (
id INT IDENTITY(1,1) not null,
plist VARCHAR(MAX) null
)
INSERT INTO #tmp
VALUES('10,11,15,17,19'),('22,34,44,25'),('5,6,8,9')
CREATE TABLE #tmpprod (
oldid INT NULL,
newid INT NULL
)
INSERT INTO #tmpprod VALUES(5, 109),(9, 110),(10, 111),(15, 112),(19, 113),(30, 114),(34, 222),(44, 333)
;WITH cte AS (SELECT ID, NULLIF(SUBSTRING(',' + plist + ',' , n , CHARINDEX(',' , ',' + plist + ',' , n) - n) , '') AS prod
FROM #nums, #tmp
WHERE ID <= LEN(',' + plist + ',') AND SUBSTRING(',' + plist + ',' , n - 1, 1) = ','
AND CHARINDEX(',' , ',' + plist + ',' , n) - n > 0)
UPDATE t SET plist = (SELECT CAST(CASE WHEN tp.oldid IS NULL THEN cte.prod ELSE tp.newid END AS VARCHAR) + ','
FROM cte LEFT JOIN #tmpprod tp ON cte.prod = tp.oldid
WHERE cte.id = t.id FOR XML PATH(''))
FROM #tmp t WHERE id = t.id
UPDATE #tmp SET plist = SUBSTRING(plist, 1, LEN(plist) -1)
WHERE LEN(plist) > 0 AND SUBSTRING(plist, LEN(plist), 1) = ','
SELECT * FROM #tmp
DROP TABLE #tmp
DROP TABLE #tmpprod
DROP TABLE #nums
The #nums table is a table of sequential integers, the length of which must be greater than the longest CSV you have in your table. The first 8 lines of the script create this table and populate it. Then I've copied in your code, followed by the meat of this query - the very clever single-query parser, described in more detail in the article pointed to above. The common table expression (WITH cte...) does the parsing, and the update script recompiles the results into CSV and updates #tmp.

Adam Machanic's blog contains this posting of a T-SQL only UDF which can accept T-SQL's wildcards for use in replacement.
http://dataeducation.com/splitting-a-string-of-unlimited-length/
For my own use, I adjusted the varchar sizes to max. Also note that this UDF performs rather slowly, but if you cannot use the CLR, it may be an option. The minor changes I made to the author's code may limit use of this to SQL Server 2008r2 and later.
CREATE FUNCTION dbo.PatternReplace
(
#InputString VARCHAR(max),
#Pattern VARCHAR(max),
#ReplaceText VARCHAR(max)
)
RETURNS VARCHAR(max)
AS
BEGIN
DECLARE #Result VARCHAR(max) = ''
-- First character in a match
DECLARE #First INT
-- Next character to start search on
DECLARE #Next INT = 1
-- Length of the total string -- 0 if #InputString is NULL
DECLARE #Len INT = COALESCE(LEN(#InputString), 0)
-- End of a pattern
DECLARE #EndPattern INT
WHILE (#Next <= #Len)
BEGIN
SET #First = PATINDEX('%' + #Pattern + '%', SUBSTRING(#InputString, #Next, #Len))
IF COALESCE(#First, 0) = 0 --no match - return
BEGIN
SET #Result = #Result +
CASE --return NULL, just like REPLACE, if inputs are NULL
WHEN #InputString IS NULL
OR #Pattern IS NULL
OR #ReplaceText IS NULL THEN NULL
ELSE SUBSTRING(#InputString, #Next, #Len)
END
BREAK
END
ELSE
BEGIN
-- Concatenate characters before the match to the result
SET #Result = #Result + SUBSTRING(#InputString, #Next, #First - 1)
SET #Next = #Next + #First - 1
SET #EndPattern = 1
-- Find start of end pattern range
WHILE PATINDEX(#Pattern, SUBSTRING(#InputString, #Next, #EndPattern)) = 0
SET #EndPattern = #EndPattern + 1
-- Find end of pattern range
WHILE PATINDEX(#Pattern, SUBSTRING(#InputString, #Next, #EndPattern)) > 0
AND #Len >= (#Next + #EndPattern - 1)
SET #EndPattern = #EndPattern + 1
--Either at the end of the pattern or #Next + #EndPattern = #Len
SET #Result = #Result + #ReplaceText
SET #Next = #Next + #EndPattern - 1
END
END
RETURN(#Result)
END

How do I join all values from multiple rows into a single row?

Lets say I have the query:
SELECT Foo FROM Bar
Which returns
Foo
A
B
C
What I really what is:
Foo
A,B,C
So all of the values from all of the rows has been collapsed into a single row (the commas are optional).
Is there a way to use a select statement to do this because I do not want to use cursors?

DECLARE #foos VARCHAR(4000)
SELECT #foos = COALESCE(#foos + ',', '') + Foo FROM Bar
SELECT #foos AS Foo

SELECT
(
SELECT
CASE
WHEN ROW_NUMBER() OVER(ORDER BY bar) = 1 THEN ''
ELSE ', '
END + CAST(bar AS VARCHAR)
FROM foo
ORDER BY bar
FOR XML PATH('')
)

Ross,
this should get you started.
DECLARE #r VARCHAR(8000)
SELECT #r = (SELECT DISTINCT Foo + ', ' FROM Bar FOR XML PATH(''))
IF #r IS NOT NULL AND #r <> '' SET #r = SUBSTRING(#r, 1, LEN(#r)-1)
SELECT #r

Try the following
declare #joined varchar(max)
set #joined = ''
select #joined = #joined + IsNull(Foo + ',', '')
from Bar (nolock)
--; Drop last "," if necessary
set #joined = substring(#joined, 1, len(#joined) - (case when len(#joined) > 0 then 1 else 0 end))
select #joined as foo

select max(a),max(b),max(c) from
(
select 'a' as a,null as b,null as c
union
select null,'b',null
union
select null,null,'c'
) as x

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Function - Fuzzy Matching with Levenshtein Distance Algorithm - Return Lowest Value Only - sql

Related

How to insert a hyphen between blocks of alpha and numeric characters

SQL Every combination of ID's

Hot to convert a variable with value '1,2,3' to a table (every number as a record)

Replace values in a CSV string

How do I join all values from multiple rows into a single row?

Categories

Resources