Trying to update substrings in SQL - sql

Can anyone help with some SQL syntax?
I have a table (TABLE A) which contains a 54 character field (FIELD A) which in essence contains 8x6 blocks of data.
These can be broken down into substrings:
substr(FIELD A,1,6) as A
substr(FIELD A,7,6) as B
substr(FIELD A,13,6) as C
substr(FIELD A,19,6) as D
substr(FIELD A,25,6) as E
substr(FIELD A,31,6) as F
substr(FIELD A,37,6) as G
substr(FIELD A,43,6) as H
substr(FIELD A,49,6) as I
What I need to do is if there an occurrence of '404040' in any of these substring fields (A to I), replace them with '000000'. The '404040' has to be in these exact positions, not just a 'like %404040%' anywhere in FIELD A. I don't think I can perform an Update on a substring(?) but my efforts at using the Replace haven't yet worked. Can anyone suggest a solution?
I'm using SQL Developer 3.2.20.10 and Oracle 12.
Many Thanks AP

You can achieve this using two functions Replacepos to replace the string at the position and replacepos1 to mention the no_of_chars to be replaced.After that you can just use the below SQL
WITH data
AS (SELECT ROWNUM rw,
Lpad('404040', 54, '404040') A
FROM dual),
d1
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 1, Replacepos1(a, '0', 1, 6)) A
FROM data),
d2
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 7, Replacepos1(a, '0', 7, 6)) A
FROM d1),
d3
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 13, Replacepos1(a, '0', 13, 6))A
FROM d2),
d4
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 19, Replacepos1(a, '0', 19, 6))A
FROM d3),
d5
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 25, Replacepos1(a, '0', 25, 6))A
FROM d4),
d6
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 31, Replacepos1(a, '0', 31, 6))A
FROM d5),
d7
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 37, Replacepos1(a, '0', 37, 6))A
FROM d6),
d8
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 43, Replacepos1(a, '0', 43, 6))A
FROM d7),
d9
AS (SELECT rw,
Decode(Instr(a, '404040', 1), 49, Replacepos1(a, '0', 49, 6))A
FROM d8)
SELECT A
FROM d9;
The Functions specifications below
CREATE OR replace FUNCTION Replacepos(source_in IN VARCHAR2,
replacechar_in IN VARCHAR2,
position_start IN NUMBER)
RETURN VARCHAR2
IS
l_returnvalue VARCHAR2(32767);
position_in NUMBER;
BEGIN
-- copy from the source string up to, but not including,
-- the character position
position_in := position_start;
-- to be replaced
l_returnvalue := Substr(str1 => source_in, pos => 1, len => position_in - 1)
;
-- add the replacement character
-- just a single character, but more can be sent in,
-- so substring the parameter
l_returnvalue := l_returnvalue
|| Substr(str1 => replacechar_in, pos => 1, len => 1);
-- copy the rest of the source string
l_returnvalue := l_returnvalue
|| Substr(str1 => source_in, pos => position_in + 1);
RETURN l_returnvalue;
END replacepos;
AND the below function
CREATE OR replace FUNCTION Replacepos1 (source_in IN VARCHAR2,
replacechar_in IN VARCHAR2,
position_start IN NUMBER,
no_of_chars IN NUMBER)
RETURN VARCHAR2
IS
l_returnvalue VARCHAR2(32767);
position_in NUMBER;
BEGIN
l_returnvalue := source_in;
FOR i IN 1..no_of_chars LOOP
l_returnvalue := Replacepos(l_returnvalue, replacechar_in,
position_start + i - 1);
END LOOP;

Related

While loop doesn't exit when eof is detected

I'm having troubles with the eof sequence at the while loop. Basically I have to read a txt file (sequence) and each character has a different character that will be printed on an exit.txt file. But my while loop doesn't recognize the eof. Here's my code.
program LaboratorioPascal;
uses crt;
var
sec, sal: Textfile;
v: char;
por_especial, cont_palabra, cont_caracter, cont_especial: integer;
vocales2: set of char;
pares: set of char;
impares: set of char;
consonantes: set of char;
consonantes2: set of char;
procedure numeros(var x: char);
begin
case x of
'0': Write(sal, '0');
'1': Write(sal, '1');
'2': Write(sal, '4');
'3': begin
Write(sal, '2');
Write(sal, '7');
end;
'4': Write(sal, '8');
'5': begin
Write(sal, '1');
Write(sal, '2');
Write(sal, '5');
end;
'6': begin
Write(sal, '1');
Write(sal, '2');
end;
'7': begin
Write(sal, '3');
Write(sal, '4');
Write(sal, '3');
end;
'8': begin
Write(sal, '1');
Write(sal, '6');
end;
'9': begin
Write(sal, '7');
Write(sal, '2');
Write(sal, '9');
end;
else Exit;
end;
end;
function vocales(var s: char): char;
begin
case s of
'e': vocales := 'u';
'a': vocales := 'o';
'i': vocales := 'a';
'o': vocales := 'e';
else vocales := 'i';
end;
end;
begin
assign(sec, 'input.txt'); // Le asignamos un archivo del cual lea
reset(sec); // arrancamos la secuencia
read(sec, v); // leemos la secuencia. avz(sec, v)
assign(sal, 'salida.txt');
rewrite(sal);
vocales2 := ['a', 'e', 'i', 'o', 'u'];
pares := ['0', '2', '4', '6', '8'];
impares := ['1', '3', '5', '7', '9'];
consonantes := ['b', 'c', 'd', 'f', 'g', 'h', 'j','k','l','m', 'n'];
consonantes2 := ['p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'];
por_especial := 0;
cont_palabra := 0;
cont_caracter := 0;
cont_especial := 0;
writeln('El objetivo de este programa es cifrar un mensaje para favorecer a la inteligencia Rusa.');
while not eof(sec) do
begin
while v = ' ' do
begin
write(sal, ' ');
read(sec, v);
end;
cont_palabra := cont_palabra + 1;
while v <> ' ' do
begin
if (v in consonantes) or (v in consonantes2) then
begin
write(sal, '1');
end
else
begin
if v in vocales2 then
begin
Write(sal, vocales(v));
end
else
begin
if v in pares then;
begin
numeros(v);
end;
begin
if v in impares then
begin
numeros(v);
end
else
begin
cont_especial := cont_especial + 1;
Write(sal, '#');
end;
end;
end;
end;
read(sec, v);
end;
end;
write(cont_palabra, ' se crifraon con [Exito]');
close(sec);
close(sal);
end.
But the result I have in the exit file (salida.txt) is
1o1ao i1o 1u1 i1 1e1111ie 1iu 1u 1e1ae o i1o 11a11u1o### 1a1########################################################################################################################################################################################################
I've done my research about the eof topic, but I can't find anything about pascal. And if I try to put an
if eof then
Exit;
end;
inside the while loop, and it just read one character from the input.txt file.
The problem is that you are in the inner loop ("while v <> ' ' do") when you come to the end of your input file.
If the last character in the input file is a space, you jump out of the inner loop and out of the outer loop, because you reached eof.
But if it isn't, you stay in the inner loop, and keep reading beyond the eof, until you encounter a space or a problem.
You can change the inner loop's
"while v <> ' ' do"
to
"while (v <> ' ') and (not eof(sec)) do".
Or make it one loop and handle the space in an if statement.

How much unique data is there, put it all in a table

I would like to query in SQL how many unique values ​​there are and how many rows are there. In Python, I could do it like this. But how do I do this in SQL so that I get the result like at the bottom?
In Python I could do the following
d = {'sellerid': [1, 1, 1, 2, 2, 3, 3, 3], 'modelnumber': [85, 45, 85, 12 ,85, 74, 85, 12]
, 'modelgroup': [2, 3, 2, 1, 2, 3, 2, 1 ]}
df = pd.DataFrame(data=d)
display(df.head(10))
df['Dataframe']='df'
unique_sellerid = df['sellerid'].nunique()
print("unique_sellerid", unique_sellerid)
unique_modelnumber = df['modelnumber'].nunique()
print("unique_modelnumber", unique_modelnumber)
unique_modelgroup = df['modelgroup'].nunique()
print("unique_modelgroup", unique_modelgroup)
total_rows = df.shape[0]
print("total_rows", total_rows)
[OUT]
unique_sellerid 3
unique_modelnumber 4
unique_modelgroup 3
total_rows 8
I want a query like
Here is the dummy table
CREATE TABLE cars (
sellerid INT NOT NULL,
modelnumber INT NOT NULL,
modelgroup INT,
);
INSERT INTO cars
(sellerid , modelnumber, modelgroup )
VALUES
(1, 85, 2),
(1, 45, 3),
(1, 85, 2),
(2, 12, 1),
(2, 85, 2),
(3, 74, 3),
(3, 85, 2),
(3, 12, 1);
You could use the count(distinct column) aggregation function like :
select
count(distinct col1) as nunique_col1,
count(distinct col2) as nunique_col2,
count(1) as nb_rows
from database
Also in pandas, you can also apply the nunique() function on the dataset, rather than doing it on each column: df.nunique()

Pure PostgreSQL replacement for PL/R sample() function?

Our new database does not (and will not) support PL/R usage, which we rely on extensively to implement a random weighted sample function:
CREATE OR REPLACE FUNCTION sample(
ids bigint[],
size integer,
seed integer DEFAULT 1,
with_replacement boolean DEFAULT false,
probabilities numeric[] DEFAULT NULL::numeric[])
RETURNS bigint[]
LANGUAGE 'plr'
COST 100
VOLATILE
AS $BODY$
set.seed(seed)
ids = as.integer(ids)
if (length(ids) == 1) {
s = rep(ids,size)
} else {
s = sample(ids,size, with_replacement,probabilities)
}
return(s)
$BODY$;
Is there a purely SQL approach to this same function? This post shows an approach that selects a single random row, but does not have the functionality of sampling multiple groups at once.
As far as I know, SQL Fiddle does not support PLR, so see below for a quick replication example:
CREATE TABLE test
(category text, uid integer, weight numeric)
;
INSERT INTO test
(category, uid, weight)
VALUES
('a', 1, 45),
('a', 2, 10),
('a', 3, 25),
('a', 4, 100),
('a', 5, 30),
('b', 6, 20),
('b', 7, 10),
('b', 8, 80),
('b', 9, 40),
('b', 10, 15),
('c', 11, 20),
('c', 12, 10),
('c', 13, 80),
('c', 14, 40),
('c', 15, 15)
;
SELECT category,
unnest(diffusion_shared.sample(array_agg(uid ORDER BY uid),
1,
1,
True,
array_agg(weight ORDER BY uid))
) as uid
FROM test
WHERE category IN ('a', 'b')
GROUP BY category;
Which outputs:
category uid
'a' 4
'b' 8
Any ideas?

Get values after and before specific character in SQL/PL SQL?

I have a string value as a parameter and I need to parse it. My value is :
param := ('1234#5432#4567#8763');
I have to get 1234, 5432, 4567 and 8763 values partially. I will set these values different parameters.
How can I solve it with SQL?
Thanks,
select level, regexp_substr(a,'\d+',1,level)
from(select '1234#5432#4567#8763' a from dual)
connect by level <= regexp_count(a,'#') + 1
Assuming that you are in PL/SQL and you need to split a value of a parameter or a variable into four variables, this could be a way:
declare
param varchar2(100);
param1 varchar2(100);
param2 varchar2(100);
param3 varchar2(100);
param4 varchar2(100);
begin
param := '1234#5432#4567#8763';
--
param1 := substr(param, 1, instr(param, '#', 1, 1)-1);
param2 := substr(param, instr(param, '#', 1, 1) +1 , instr(param, '#', 1, 2) - instr(param, '#', 1, 1)-1);
param3 := substr(param, instr(param, '#', 1, 2) +1 , instr(param, '#', 1, 3) - instr(param, '#', 1, 2)-1);
param4 := substr(param, instr(param, '#', 1, 3) +1 );
--
dbms_output.put_line('Param1: ' || param1);
dbms_output.put_line('Param2: ' || param2);
dbms_output.put_line('Param3: ' || param3);
dbms_output.put_line('Param4: ' || param4);
end;
With regular expressions, you can get the same result by searching the 1st, 2nd, ... occurrence of a string that is followed by a # or by the end of the line ('$'); a better explanation of this approach is described in the link gave by Gary_W in his comment
...
param1 := regexp_substr(param, '(.*?)(#|$)', 1, 1, '', 1 );
param2 := regexp_substr(param, '(.*?)(#|$)', 1, 2, '', 1 );
param3 := regexp_substr(param, '(.*?)(#|$)', 1, 3, '', 1 );
param4 := regexp_substr(param, '(.*?)(#|$)', 1, 4, '', 1 );
...

SQL Server Fuzzy Search with Percentage of match

I am using SQL Server 2008 R2 SP1.
I have a table with about 36034 records of customers.
I am trying to implement Fuzy search on Customer Name field.
Here is Function for Fuzzy Search
ALTER FUNCTION [Party].[FuzySearch]
(
#Reference VARCHAR(200) ,
#Target VARCHAR(200)
)
RETURNS DECIMAL(5, 2)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #score DECIMAL(5, 2)
SELECT #score = CASE WHEN #Reference = #Target
THEN CAST(100 AS NUMERIC(5, 2))
WHEN #Reference IS NULL
OR #Target IS NULL
THEN CAST(0 AS NUMERIC(5, 2))
ELSE ( SELECT [Score %] = CAST(SUM(LetterScore)
* 100.0 / MAX(WordLength
* WordLength) AS NUMERIC(5,
2))
FROM ( -- do
SELECT seq = t1.n ,
ref.Letter ,
v.WordLength ,
LetterScore = v.WordLength
- ISNULL(MIN(tgt.n),
v.WordLength)
FROM ( -- v
SELECT
Reference = LEFT(#Reference
+ REPLICATE('_',
WordLength),
WordLength) ,
Target = LEFT(#Target
+ REPLICATE('_',
WordLength),
WordLength) ,
WordLength = WordLength
FROM
( -- di
SELECT
WordLength = MAX(WordLength)
FROM
( VALUES
( DATALENGTH(#Reference)),
( DATALENGTH(#Target)) ) d ( WordLength )
) di
) v
CROSS APPLY ( -- t1
SELECT TOP ( WordLength )
n
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200)
) t2 ( n )
) t1
CROSS APPLY ( SELECT
Letter = SUBSTRING(Reference,
t1.n, 1)
) ref
OUTER APPLY ( -- tgt
SELECT TOP ( WordLength )
n = ABS(t1.n
- t2.n)
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200) ) t2 ( n )
WHERE
SUBSTRING(#Target,
t2.n, 1) = ref.Letter
) tgt
GROUP BY t1.n ,
ref.Letter ,
v.WordLength
) do
)
END
RETURN #score
END
Here is the query to call the function
select [Party].[FuzySearch]('First Name Middle Name Last Name', C.FirstName) from dbo.Customer C
This is taking about 2 minutes 22 seconds to give me the percentage of fuzzy match for all
How can I fix this to run in lessthan a second. Any suggestions on my function to make it more robust.
Expected ouput is 45.34, 40.00, 100.00, 23.00, 81.23.....
The best I have been able to do is simplify some of the query, and change it to a table valued function. Scalar functions are notoriously poor performers, and the benefit of an inline TVF is that the query definition is expanded out into the main query, much like a view.
This reduces the execution time significantly on the tests I have done.
ALTER FUNCTION dbo.FuzySearchTVF (#Reference VARCHAR(200), #Target VARCHAR(200))
RETURNS TABLE
AS
RETURN
( WITH N (n) AS
( SELECT TOP (ISNULL(CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target)
THEN DATALENGTH(#Reference)
ELSE DATALENGTH(#Target)
END, 0))
ROW_NUMBER() OVER(ORDER BY n1.n)
FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N1 (n)
CROSS JOIN (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N2 (n)
CROSS JOIN (VALUES (1), (1)) AS N3 (n)
WHERE #Reference IS NOT NULL AND #Target IS NOT NULL
), Src AS
( SELECT Reference = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN #Reference
ELSE #Reference + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
Target = CASE WHEN DATALENGTH(#Target) > DATALENGTH(#Reference) THEN #Target
ELSE #Target + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
WordLength = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN DATALENGTH(#Reference) ELSE DATALENGTH(#Target) END
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
), Scores AS
( SELECT seq = t1.n ,
Letter = SUBSTRING(s.Reference, t1.n, 1),
s.WordLength ,
LetterScore = s.WordLength - ISNULL(MIN(ABS(t1.n - t2.n)), s.WordLength)
FROM Src AS s
CROSS JOIN N AS t1
INNER JOIN N AS t2
ON SUBSTRING(#Target, t2.n, 1) = SUBSTRING(s.Reference, t1.n, 1)
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY t1.n, SUBSTRING(s.Reference, t1.n, 1), s.WordLength
)
SELECT [Score] = 100
WHERE #Reference = #Target
UNION ALL
SELECT 0
WHERE #Reference IS NULL OR #Target IS NULL
UNION ALL
SELECT CAST(SUM(LetterScore) * 100.0 / MAX(WordLength * WordLength) AS NUMERIC(5, 2))
FROM Scores
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY WordLength
);
And this would be called as:
SELECT f.Score
FROM dbo.Customer AS c
CROSS APPLY [dbo].[FuzySearch]('First Name Middle Name Last Name', c.FirstName) AS f
It is still a fairly complex function though, and, depending on the number of records in your customer table, I think getting it down to 1 second is going to be a bit of a challenge.
This is how I could accomplish this:
Explained further # SQL Server Fuzzy Search - Levenshtein Algorithm
Create below file using any editor of your choice:
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public partial class StoredFunctions
{
[Microsoft.SqlServer.Server.SqlFunction(IsDeterministic = true, IsPrecise = false)]
public static SqlDouble Levenshtein(SqlString stringOne, SqlString stringTwo)
{
#region Handle for Null value
if (stringOne.IsNull)
stringOne = new SqlString("");
if (stringTwo.IsNull)
stringTwo = new SqlString("");
#endregion
#region Convert to Uppercase
string strOneUppercase = stringOne.Value.ToUpper();
string strTwoUppercase = stringTwo.Value.ToUpper();
#endregion
#region Quick Check and quick match score
int strOneLength = strOneUppercase.Length;
int strTwoLength = strTwoUppercase.Length;
int[,] dimention = new int[strOneLength + 1, strTwoLength + 1];
int matchCost = 0;
if (strOneLength + strTwoLength == 0)
{
return 100;
}
else if (strOneLength == 0)
{
return 0;
}
else if (strTwoLength == 0)
{
return 0;
}
#endregion
#region Levenshtein Formula
for (int i = 0; i <= strOneLength; i++)
dimention[i, 0] = i;
for (int j = 0; j <= strTwoLength; j++)
dimention[0, j] = j;
for (int i = 1; i <= strOneLength; i++)
{
for (int j = 1; j <= strTwoLength; j++)
{
if (strOneUppercase[i - 1] == strTwoUppercase[j - 1])
matchCost = 0;
else
matchCost = 1;
dimention[i, j] = System.Math.Min(System.Math.Min(dimention[i - 1, j] + 1, dimention[i, j - 1] + 1), dimention[i - 1, j - 1] + matchCost);
}
}
#endregion
// Calculate Percentage of match
double percentage = System.Math.Round((1.0 - ((double)dimention[strOneLength, strTwoLength] / (double)System.Math.Max(strOneLength, strTwoLength))) * 100.0, 2);
return percentage;
}
};
Name it levenshtein.cs
Go to Command Prompt. Go to the file directory of levenshtein.cs then call csc.exe /t: library /out: UserFunctions.dll levenshtein.cs you may have to give the full path of csc.exe from NETFrameWork 2.0.
Once your DLL is ready. Add it to the assemblies Database>>Programmability>>Assemblies>> New Assembly.
Create function in your database:
CREATE FUNCTION dbo.LevenshteinSVF
(
#S1 NVARCHAR(200) ,
#S2 NVARCHAR(200)
)
RETURNS FLOAT
AS EXTERNAL NAME
UserFunctions.StoredFunctions.Levenshtein
GO
In my case I had to enable clr:
sp_configure 'clr enabled', 1
GO
reconfigure
GO
Test the function:
SELECT dbo.LevenshteinSVF('James','James Bond')
Result: 50 % match