How to compare two strings based on percent match in SQL - sql

I want to post a solution to a interesting problem I was facing in T-SQL.
The problem:
Compare two string fields based on a percent match.
In addition, the two strings may have the words in them translocated.
For example: "Joni Bravo" and "Bravo Joni". These two strings should return a match of 100%, which means that position is not relevant. Few more things worth noting are that this code is made to compare strings that have space as delimiter in them. If the first string doesnt have space the match is set to 100% without actual check. This was not developed, because the strings this function is ment to compare always contain two or more words. Also, it is written on MS SQL Server 2017 if that mathers.

So here is the solution, hope this helps anyone :)
gl
/****** Object: UserDefinedFunction [dbo].[STRCOMP] Script Date: 29/03/2018 15:31:45 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[STRCOMP] (
-- Add the parameters for the function here
#name_1 varchar(255),#name_2 varchar(255)
)
RETURNS float
AS
BEGIN
-- Declare the return variable and any needed variable here
declare #p int = 0;
declare #c int = 0;
declare #br int = 0;
declare #p_temp int = 0;
declare #emergency_stop int = 0;
declare #fixer int = 0;
declare #table1_temp table (
row_id int identity(1,1),
str1 varchar (255));
declare #table2_temp table (
row_Id int identity(1,1),
str2 varchar (255));
declare #n int = 1;
declare #count int = 1;
declare #result int = 0;
declare #total_result float = 0;
declare #result_temp int = 0;
declare #variable float = 0.0;
--clean the two strings from unwanted symbols and numbers
set #name_1 = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(#name_1,'!',''),' ',' '),'1',''),'2',''),'3',''),'4',''),'5',''),'0',''),'6',''),'7',''),'8',''),'9','');
set #name_2 = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(#name_2,'!',''),' ',' '),'1',''),'2',''),'3',''),'4',''),'5',''),'0',''),'6',''),'7',''),'8',''),'9','');
--check if the first string has more than one words inside. If the string does
--not have more than one words, return 100%
set #c = charindex(' ',substring(#name_1,#p,len(#name_1)));
IF(#c = 0)
BEGIN
RETURN 100.00
END;
--main logic of the operation. This is based on sound indexing and comparing the
--outcome. This loops through the string whole words and determines their soundex
--code and then compares it one against the other to produce a definitive number --showing the raw match between the two strings #name_1 and #name_2.
WHILE (#br != 2 or #emergency_stop = 20)
BEGIN
insert into #table1_temp(str1)
select substring (#name_1,#p,#c);
set #p = len(substring (#name_1,#p,#c))+2;
set #p = #p + #p_temp - #fixer;
set #p_temp = #p;
set #c = CASE WHEN charindex(' ',substring(#name_1,#p,len(#name_1))) = 0 THEN len(#name_1) ELSE charindex(' ',substring(#name_1,#p,len(#name_1))) END;
set #fixer = 1;
set #br = CASE WHEN charindex(' ',substring(#name_1,#p,len(#name_1))) = 0 THEN #br + 1 ELSE 0 END;
set #emergency_stop = #emergency_stop +1;
END;
set #p = 0;
set #br = 0;
set #emergency_stop = 0;
set #fixer = 0;
set #p_temp = 0;
set #c = charindex(' ',substring(#name_2,#p,len(#name_2)));
WHILE (#br != 2 or #emergency_stop = 20)
BEGIN
insert into #table2_temp(str2)
select substring (#name_2,#p,#c);
set #p = len(substring (#name_2,#p,#c))+2;
set #p = #p + #p_temp - #fixer;
set #p_temp = #p;
set #c = CASE WHEN charindex(' ',substring(#name_2,#p,len(#name_2))) = 0 THEN len(#name_2) ELSE charindex(' ',substring(#name_2,#p,len(#name_2))) END;
set #fixer = 1;
set #br = CASE WHEN charindex(' ',substring(#name_2,#p,len(#name_2))) = 0 THEN #br + 1 ELSE 0 END;
set #emergency_stop = #emergency_stop +1;
END;
WHILE((select str1 from #table1_temp where row_id = #n) is not null)
BEGIN
set #count = 1;
set #result = 0;
WHILE((select str2 from #table2_temp where row_id = #count) is not null)
BEGIN
set #result_temp = DIFFERENCE((select str1 from #table1_temp where row_id = #n),(select str2 from #table2_temp where row_id = #count));
IF(#result_temp > #result)
BEGIN
set #result = #result_temp;
END;
set #count = #count + 1;
END;
set #total_result = #total_result + #result;
set #n = #n + 1;
END;
--gather the results and transform them in a percent match.
set #variable = (select #total_result / (select max(row_count) from (
select max(row_id) as row_count from #table1_temp
union
select max(row_id) as row_count from #table2_temp) a));
RETURN #variable/4 * 100;
END
GO
PS: I decided to write it in a user-defined function just for the needs of my project.

Related

Generate Next Alphanumeric Code on SQL Server

I need to create a consecutive sequence of varchar(5) (always 5 chars only) code starting from PREVIOUS code.
For example
'00000', '00001', '00002'...'00009', '0000A', '0000B'...'0000Z', '00010','00011'...'ZZZZZ'.
So if I have #PREVIOUS_CODE = '00000', #NEXT_CODE will be '00001'.
If I have #PREVIOUS_CODE = '00009', #NEXT_CODE will be '0000A'
If I have #PREVIOUS_CODE = '0000Z', #NEXT_CODE will be '00010'
So I need something like that
USE [DATABASE]
GO
CREATE PROCEDURE [dbo].[spGetNextCode]
#PREVIOUS_CODE VARCHAR(5)
AS
DECLARE #NEXT_CODE VARCHAR(5)
DO STUFF
...
SELECT #NEXT_CODE AS NEXT_CODE
GO
Any Help?
Just keep an integer counter in the same table and convert it. I'm using the following SQL Server function in one of my applications:
CREATE FUNCTION [dbo].[GetAlphanumericCode]
(
#number BIGINT,
#leadingzeroes INT = 0
)
RETURNS varchar(255)
AS
BEGIN
DECLARE #charPool varchar(36)
SET #charPool = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
DECLARE #result varchar(255)
IF #number < 0
RETURN ''
IF #number = 0
SET #result = '0'
ELSE BEGIN
SET #result = ''
WHILE (#number > 0)
BEGIN
SET #result = substring(#charPool, #number % 36 + 1, 1) + #result
SET #number = #number / 36
END
END
IF #leadingzeroes > 0 AND len(#result) < #leadingzeroes
SET #result = right(replicate('0', #leadingzeroes) + #result, #leadingzeroes)
RETURN #result
END
It should be a trivial task to rewrite it as a stored procedure

SQL Server stored procedure: finding the values of Odd and even

I'm using #subno as Input. And I had to find the odd and even numbers. 16 is not a fix and it can be any other number. My question is how to find the odd and even number of my input?
Lastly, subno includes ( . ) dot at any position. e.g 123456.789123 I need to find the odd and even number of "123456" and the odd and even number of "789123" the dot ( . ) is the separator.
Once you find the odd for the left side, sum them up together. Once you find the even number for left side sum it up as well and then add the total odd to the total even values. That goes the same for the right side eg "789123".
Please help me. this is my 2nd week of trying to find the solution. Once you find all the total values for each side, multiply them together. example "123456" - total value of odd and even * the "789123" total value of odd and even.
It is for the the check digit validation. Validating the subscriber number. after validating through the calculation it should match the calculated reference number to the valid check digit number. It's the business rule. Kind of algorithm
create procedure ProcedureName
(#subno VARCHAR(16), --Input the 16 subscriber number
#result INT OUT,
)
as
begin
IF(LEN(#subno) <> 16)
SET #result = 1 -- INVALID RESULT
ELSE
IF(#subno % 2 = 0)
SET #result = #subno - even numbers
ELSE
SET #result = #subno --odd numbers
end
Please see below my sample work
-- this is the sample
create procedure ProcedureName
(
#subno VARCHAR(20), --Subscriber no
#result INT OUT, --result is invalid for 1, valid for 0
#payamt int
)
as
DECLARE #WA VARCHAR(2)
DECLARE #Weights varchar(9)
DECLARE #I INT
DECLARE #WD INT
DECLARE #WP INT
DECLARE #A INT
DECLARE #B INT
DECLARE #R INT
DECLARE #WR INT
SET #WR = 0
SET #R = 0
SET #A = 0
SET #B = 0
SET #WP = 0
SET #I = 0
BEGIN
IF (LEN(#subNo) = 7) AND (SUBSTRING(#subno,1,1) = '2') OR (SUBSTRING(#subno,1,1) = '9')
BEGIN
SET #result = 0 --VALID
END
ELSE IF(LEN(#subno) = 8) AND (SUBSTRING(#subno,1,1) = '2') OR
(SUBSTRING(#subno,1,1) = '9')
BEGIN
SET #result = 0 --VALID
END
ELSE IF(LEN(#subno) = 9)
BEGIN
SET #WA = SUBSTRING(#subno,1,2)
IF(#WA = '65')
set #result = 1 -- INVALID
else
BEGIN
SET #Weights = '12121212'
SET #WA = SUBSTRING(#subno,9,1)
SET #WD = 0
SET #I = 1
WHILE #I<9
BEGIN
SET #WP = cast(SUBSTRING(#Weights, #I,1)as int) * cast(SUBSTRING(#subno, #I, 1) as int)
IF(#WP > 9)
BEGIN
SET #A = SUBSTRING(CAST(#WP AS VARCHAR),1,1)
SET #B = SUBSTRING(CAST(#WP AS VARCHAR),2,1)
SET #WP = CAST(#A AS INT) + CAST(#B AS INT)
END
SET #WD = #WP + #WD
SET #I = #I + 1
END
SET #R = #WD % 10
IF(#R <> 0)
SET #WR = 10 - #R
ELSE
SET #WR = #R
IF(#WR <> CAST(#WA AS INT))
BEGIN
SET #result = 1 -- INVALID
END
ELSE
BEGIN
SET #result = 0 -- VALID
END
END
END
ELSE IF (LEN(#subno) = 10)
BEGIN
SET #I =1
SET #WD = 0
SET #Weights = '121212121'
SET #WA = SUBSTRING(#subno,10,1)
WHILE(#I < 10)
BEGIN
SET #WP = CAST(SUBSTRING(#Weights, #I, 1)AS INT) * CAST(SUBSTRING(#subno, #I, 1) AS INT)
IF(#WP > 9)
BEGIN
SET #A = SUBSTRING(CAST(#WP AS VARCHAR),1,1)
SET #B = SUBSTRING(CAST(#WP AS VARCHAR),2,1)
SET #WP = CAST(#A AS INT) + CAST(#B AS INT)
END
SET #WD = #WP + #WD
SET #I = #I + 1
END
SET #R = #WD % 10
IF(#R <> 0)
SET #WR = 10 - #R
ELSE
SET #WR = #R
IF (#WR<> #WA)
BEGIN
SET #result = 1 -- INVALID
END
ELSE
BEGIN
SET #result = 0 -- VALID
END
END
ELSE
SET #result = 1 -- INVALID
END
Split the values which u get . Then iterate iver each side and add them. Please see the sample below.
declare #v varchar (16) , #num1 varchar(20) , #num2 varchar(20)
set #v = '1234567.78906656'
select #num1 = substring(#v,0,charindex('.',#v))
select #num2 = substring(#v,charindex('.',#v)+1,len(#v))
--select #num1 = convert(int, substring(#v,0,charindex('.',#v)))
--select #num2 = substring(#v,charindex('.',#v)+1,len(#v))
declare #index int = 1 ,#len INT , #char CHAR
declare #TotalOddL int = 0
declare #TotalEvenL int = 0
DECLARE #FullTotL INT = 0
declare #TotalOddR int = 0
declare #TotalEvenR int = 0
DECLARE #FullTotR INT = 0
DECLARE #TEMP INT
set #len= LEN(#num1)
WHILE #index <= #len
BEGIN
set #char = SUBSTRING(#num1, #index, 1)
SET #TEMP = cast(#char as int)
IF(#TEMP % 2 = 0)
SET #TotalEvenL = #TotalEvenL + #char
else
SET #TotalOddL = #TotalOddL + #char
SET #FullTotL = #TotalEvenL + #TotalOddL
SET #index= #index+ 1
END
Select 'LeftSide total' , #FullTotL
Select 'Left Side odd' , #TotalOddL
Select 'Left Side Even' , #TotalEvenL
SET #index = 1
set #len= LEN(#num2)
WHILE #index <= #len
BEGIN
set #char = SUBSTRING(#num2, #index, 1)
SET #TEMP = cast(#char as int)
IF(#TEMP % 2 = 0)
SET #TotalEvenR= #TotalEvenR + #char
else
SET #TotalOddR = #TotalOddR + #char
SET #FullTotR = #TotalEvenR + #TotalOddR
SET #index= #index+ 1
END
select 'TotalRSide' , #FullTotR
select 'RsideOdd' , #TotalOddR
select 'RSideEven' , #TotalEvenR
select 'Multiplied value' , #FullTotR * #FullTotL
create or replace procedure prc_even_odd(i_number in number, o_result out varchar2)
as
begin
if (mod(i_number,2) = 0) then
o_result := 'EVEN';
else
o_result := 'ODD';
end prc_even;

Function to check the input number of words

Need help to create a function that returns TRUE or FALSE. TRUE - if type 1 or 3 words (like '__hello_', '_hello', '_hello my frend' - spaces should be cut), if the condition is not fulfilled FALSE
CREATE FUNCTION dbo.nazvFac(#f nvarchar(30))
RETURNS bit
AS
BEGIN
DECLARE #l int = 1, #s nvarchar(30), #i int = 0, #b bit
WHILE LTRIM(RTRIM(LEN(#f))) >= #l --error here, but I do not know how to fix it
BEGIN
SET #s = SUBSTRING(#f, #l, 1)
IF #s BETWEEN 'А' AND 'я'
SET #l += 1
ELSE IF #s = ' '
BEGIN
SET #l -= 1
SET #s = SUBSTRING(#f, #l, 1)
SET #s = RTRIM(#s)
SET #l += 2
SET #i += 1
END
ELSE
BREAK
END
IF #i = 0 OR #i = 2
SET #b = 'TRUE'
ELSE
SET #b = 'FALSE'
RETURN #b
END
GO
WHILE LTRIM(RTRIM(LEN(#f))) >= #l --error here, but I do not know how to fix it
LEN() returns an int, which you are then passing to a string function: RTRIM().
You want to return TRUE only if there are one or three words? This should do it:
CREATE FUNCTION dbo.nazvFac(#f NVARCHAR(30))
RETURNS BIT
AS
BEGIN
DECLARE #l INT, #b BIT
SET #l = LEN(#f) - LEN(REPLACE(#f, ' ', '')) + 1
IF #l == 1 OR #l == 3
SET #b = 'TRUE'
ELSE
SET #b = 'FALSE'
RETURN #b
END
Also, JC. is right about the len() error.
You should trim the string and then check the length.
CREATE FUNCTION dbo.nazvFac(#f NVARCHAR(30))
RETURNS BIT
AS
BEGIN
DECLARE #l INT = 1, #s NVARCHAR(30), #i INT = 0, #b BIT
WHILE LEN(LTRIM(RTRIM(#f))) >= #l --I think youn need to trim the string and then check length
BEGIN
SET #s = SUBSTRING(#f, #l, 1)
IF #s BETWEEN 'А' AND 'я'
SET #l += 1
ELSE IF #s = ' '
BEGIN
SET #l -= 1
SET #s = SUBSTRING(#f, #l, 1)
SET #s = RTRIM(#s)
SET #l += 2
SET #i += 1
END
ELSE
BREAK
END
IF #i = 0 OR #i = 2
SET #b = 'TRUE'
ELSE
SET #b = 'FALSE'
RETURN #b
END
GO

search in a string creditcard numeric value

I want to find a credit card numeric value in a sql string.
for example;
DECLARE #value1 NVARCHAR(MAX) = 'The payment is the place 1234567812345678'
DECLARE #value2 NVARCHAR(MAX) = 'The payment is the place 123456aa7812345678'
DECLARE #value3 NVARCHAR(MAX) = 'The payment1234567812345678is the place'
The result should be :
#value1Result 1234567812345678
#value2Result NULL
#value3Result 1234567812345678
16 digits must be together without space.
How to do this in a sql script or a function?
edit :
if I want to find these 2 credit card value.
#value4 = 'card 1 is : 4034349183539301 and the other one is 3456123485697865'
how should I implement the scripts?
You can use PathIndex as
PATINDEX('%[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%', yourStr)
if the result is 0 then it doesnt containg 16 digits other was it contains.
It can be used withing a Where statement or Select statement based on your needs
You can write as:
SELECT case when Len(LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)) = 16
then LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
else ''
end
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM table1
) d
) t
Demo
DECLARE #value1 NVARCHAR(MAX) = 'card 1 is : 4034349183539301 and the other one is 3456123485697865'
DECLARE #Lenght INT
,#Count INT
,#Candidate CHAR
,#cNum INT
,#result VARCHAR(16)
SELECT #Count = 1
SELECT #cNum = 0
SELECT #result = ''
SELECT #Lenght = LEN(#value1)
WHILE #Count <= #Lenght
BEGIN
SELECT #Candidate = SUBSTRING(#value1, #Count, 1)
IF #Candidate != ' '
AND ISNUMERIC(#Candidate) = 1
BEGIN
SET #cNum = #cNum + 1
SET #result = #result + #Candidate
END
ELSE
BEGIN
SET #cNum = 1
SET #result = ''
END
IF #cNum > 16
BEGIN
SELECT #result 'Credit Number'
END
SET #Count = #Count + 1
END
There you go kind sir.
DECLARE
#value3 NVARCHAR(MAX) = 'The payment1234567812345678is the place',
#MaxCount int,
#Count int,
#Numbers NVARCHAR(100)
SELECT #Count = 1
SELECT #Numbers = ''
SELECT #MaxCount = LEN(#value3)
WHILE #Count <= #MaxCount
BEGIN
IF (UNICODE(SUBSTRING(#value3,#Count,1)) >= 48 AND UNICODE(SUBSTRING(#value3,#Count,1)) <=57)
SELECT #Numbers = #Numbers + SUBSTRING(#value3,#Count,1)
SELECT #Count = #Count + 1
END
PRINT #Numbers
You can make this as a function if you are planning to use it a lot.

SQL Server 2005:charindex starting from the end

I have a string 'some.file.name',I want to grab 'some.file'.
To do that,I need to find the last occurrence of '.' in a string.
My solution is :
declare #someStr varchar(20)
declare #reversedStr varchar(20)
declare #index int
set #someStr = '001.002.003'
set #reversedStr = reverse(#someStr)
set #index = len(#someStr) - charindex('.',#reversedStr)
select left(#someStr,#index)
Well,isn't it too complicated?I was just intented to using 'some.file' in a where-clause.
Anyone has a good idea?
What do you need to do with it?? Do you need to grab the characters after the last occurence of a given delimiter?
If so: reverse the string and search using the normal CHARINDEX:
declare #test varchar(100)
set #test = 'some.file.name'
declare #reversed varchar(100)
set #reversed = REVERSE(#test)
select
REVERSE(SUBSTRING(#reversed, CHARINDEX('.', #reversed)+1, 100))
You'll get back "some.file" - the characters up to the last "." in the original file name.
There's no "LASTCHARINDEX" or anything like that in SQL Server directly. What you might consider doing in SQL Server 2005 and up is great a .NET extension library and deploy it as an assembly into SQL Server - T-SQL is not very strong with string manipulation, whereas .NET really is.
A very simple way is:
SELECT
RIGHT(#str, CHARINDEX('.', REVERSE(#str)) - 1)
This will also work:
DECLARE
#test VARCHAR(100)
SET #test = 'some.file.name'
SELECT
LEFT(#test, LEN(#test) - CHARINDEX('.', REVERSE(#test)))
Take one ')'
declare #test varchar(100)
set #test = 'some.file.name'
select left(#test,charindex('.',#test)+charindex('.',#test)-1)
CREATE FUNCTION [dbo].[Instr] (
-------------------------------------------------------------------------------------------------
-- Name: [dbo].[Instr]
-- Purpose: Find The Nth Value Within A String
-------------------------------------------------------------------------------------------------
-- Revisions:
-- 25-FEB-2011 - HESSR - Initial Revision
-------------------------------------------------------------------------------------------------
-- Parameters:
-- 1) #in_FindString - NVARCHAR(MAX) - INPUT - Input Find String
-- 2) #in_String - NVARCHAR(MAX) - INPUT - Input String
-- 3) #in_StartPos - SMALLINT - INPUT - Position In The String To Start Looking From
-- (If Start Position Is Negative, Search Begins At The End Of The String)
-- (Negative 1 Starts At End Position 1, Negative 3 Starts At End Position Minus 2)
-- 4) #in_Nth - SMALLINT - INPUT - Nth Occurrence To Find The Location For
-------------------------------------------------------------------------------------------------
-- Returns: SMALLINT - Position Of String Segment (Not Found = 0)
-------------------------------------------------------------------------------------------------
#in_FindString NVARCHAR(MAX),
#in_String NVARCHAR(MAX),
#in_StartPos SMALLINT = NULL,
#in_Nth SMALLINT = NULL
)
RETURNS SMALLINT
AS
BEGIN
DECLARE #loc_FindString NVARCHAR(MAX);
DECLARE #loc_String NVARCHAR(MAX);
DECLARE #loc_Position SMALLINT;
DECLARE #loc_StartPos SMALLINT;
DECLARE #loc_Nth SMALLINT;
DECLARE #loc_Idx SMALLINT;
DECLARE #loc_FindLength SMALLINT;
DECLARE #loc_Length SMALLINT;
SET #loc_FindString = #in_FindString;
SET #loc_String = #in_String;
SET #loc_Nth = ISNULL(ABS(#in_Nth), 1);
SET #loc_FindLength = LEN(#loc_FindString+N'.') - 1;
SET #loc_Length = LEN(#loc_String+N'.') - 1;
SET #loc_StartPos = ISNULL(#in_StartPos, 1);
SET #loc_Idx = 0;
IF (#loc_StartPos = ABS(#loc_StartPos))
BEGIN
WHILE (#loc_Idx < #loc_Nth)
BEGIN
SET #loc_Position = CHARINDEX(#loc_FindString,#loc_String,#loc_StartPos);
IF (#loc_Position > 0)
SET #loc_StartPos = #loc_Position + #loc_FindLength
ELSE
SET #loc_Idx = #loc_Nth;
SET #loc_Idx = #loc_Idx + 1;
END;
END
ELSE
BEGIN
SET #loc_StartPos = ABS(#loc_StartPos);
SET #loc_FindString = REVERSE(#in_FindString);
SET #loc_String = REVERSE(#in_String);
WHILE (#loc_Idx < #loc_Nth)
BEGIN
SET #loc_Position = CHARINDEX(#loc_FindString,#loc_String,#loc_StartPos);
IF (#loc_Position > 0)
SET #loc_StartPos = #loc_Position + #loc_FindLength
ELSE
SET #loc_Idx = #loc_Nth;
SET #loc_Idx = #loc_Idx + 1;
END;
IF (#loc_Position > 0)
SET #loc_Position = #loc_Length - #loc_Position + (1 - #loc_FindLength) + 1;
END;
RETURN (#loc_Position);
END;
GO
Here is a shorter version
DECLARE #someStr varchar(20)
set #someStr = '001.002.003'
SELECT REVERSE(Substring(REVERSE(#someStr),CHARINDEX('.', REVERSE(#someStr))+1,20))