I'm having troubles with the eof sequence at the while loop. Basically I have to read a txt file (sequence) and each character has a different character that will be printed on an exit.txt file. But my while loop doesn't recognize the eof. Here's my code.
program LaboratorioPascal;
uses crt;
var
sec, sal: Textfile;
v: char;
por_especial, cont_palabra, cont_caracter, cont_especial: integer;
vocales2: set of char;
pares: set of char;
impares: set of char;
consonantes: set of char;
consonantes2: set of char;
procedure numeros(var x: char);
begin
case x of
'0': Write(sal, '0');
'1': Write(sal, '1');
'2': Write(sal, '4');
'3': begin
Write(sal, '2');
Write(sal, '7');
end;
'4': Write(sal, '8');
'5': begin
Write(sal, '1');
Write(sal, '2');
Write(sal, '5');
end;
'6': begin
Write(sal, '1');
Write(sal, '2');
end;
'7': begin
Write(sal, '3');
Write(sal, '4');
Write(sal, '3');
end;
'8': begin
Write(sal, '1');
Write(sal, '6');
end;
'9': begin
Write(sal, '7');
Write(sal, '2');
Write(sal, '9');
end;
else Exit;
end;
end;
function vocales(var s: char): char;
begin
case s of
'e': vocales := 'u';
'a': vocales := 'o';
'i': vocales := 'a';
'o': vocales := 'e';
else vocales := 'i';
end;
end;
begin
assign(sec, 'input.txt'); // Le asignamos un archivo del cual lea
reset(sec); // arrancamos la secuencia
read(sec, v); // leemos la secuencia. avz(sec, v)
assign(sal, 'salida.txt');
rewrite(sal);
vocales2 := ['a', 'e', 'i', 'o', 'u'];
pares := ['0', '2', '4', '6', '8'];
impares := ['1', '3', '5', '7', '9'];
consonantes := ['b', 'c', 'd', 'f', 'g', 'h', 'j','k','l','m', 'n'];
consonantes2 := ['p', 'q', 'r', 's', 't', 'v', 'w', 'x', 'y', 'z'];
por_especial := 0;
cont_palabra := 0;
cont_caracter := 0;
cont_especial := 0;
writeln('El objetivo de este programa es cifrar un mensaje para favorecer a la inteligencia Rusa.');
while not eof(sec) do
begin
while v = ' ' do
begin
write(sal, ' ');
read(sec, v);
end;
cont_palabra := cont_palabra + 1;
while v <> ' ' do
begin
if (v in consonantes) or (v in consonantes2) then
begin
write(sal, '1');
end
else
begin
if v in vocales2 then
begin
Write(sal, vocales(v));
end
else
begin
if v in pares then;
begin
numeros(v);
end;
begin
if v in impares then
begin
numeros(v);
end
else
begin
cont_especial := cont_especial + 1;
Write(sal, '#');
end;
end;
end;
end;
read(sec, v);
end;
end;
write(cont_palabra, ' se crifraon con [Exito]');
close(sec);
close(sal);
end.
But the result I have in the exit file (salida.txt) is
1o1ao i1o 1u1 i1 1e1111ie 1iu 1u 1e1ae o i1o 11a11u1o### 1a1########################################################################################################################################################################################################
I've done my research about the eof topic, but I can't find anything about pascal. And if I try to put an
if eof then
Exit;
end;
inside the while loop, and it just read one character from the input.txt file.
The problem is that you are in the inner loop ("while v <> ' ' do") when you come to the end of your input file.
If the last character in the input file is a space, you jump out of the inner loop and out of the outer loop, because you reached eof.
But if it isn't, you stay in the inner loop, and keep reading beyond the eof, until you encounter a space or a problem.
You can change the inner loop's
"while v <> ' ' do"
to
"while (v <> ' ') and (not eof(sec)) do".
Or make it one loop and handle the space in an if statement.
I am using SQL Server 2008 R2 SP1.
I have a table with about 36034 records of customers.
I am trying to implement Fuzy search on Customer Name field.
Here is Function for Fuzzy Search
ALTER FUNCTION [Party].[FuzySearch]
(
#Reference VARCHAR(200) ,
#Target VARCHAR(200)
)
RETURNS DECIMAL(5, 2)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #score DECIMAL(5, 2)
SELECT #score = CASE WHEN #Reference = #Target
THEN CAST(100 AS NUMERIC(5, 2))
WHEN #Reference IS NULL
OR #Target IS NULL
THEN CAST(0 AS NUMERIC(5, 2))
ELSE ( SELECT [Score %] = CAST(SUM(LetterScore)
* 100.0 / MAX(WordLength
* WordLength) AS NUMERIC(5,
2))
FROM ( -- do
SELECT seq = t1.n ,
ref.Letter ,
v.WordLength ,
LetterScore = v.WordLength
- ISNULL(MIN(tgt.n),
v.WordLength)
FROM ( -- v
SELECT
Reference = LEFT(#Reference
+ REPLICATE('_',
WordLength),
WordLength) ,
Target = LEFT(#Target
+ REPLICATE('_',
WordLength),
WordLength) ,
WordLength = WordLength
FROM
( -- di
SELECT
WordLength = MAX(WordLength)
FROM
( VALUES
( DATALENGTH(#Reference)),
( DATALENGTH(#Target)) ) d ( WordLength )
) di
) v
CROSS APPLY ( -- t1
SELECT TOP ( WordLength )
n
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200)
) t2 ( n )
) t1
CROSS APPLY ( SELECT
Letter = SUBSTRING(Reference,
t1.n, 1)
) ref
OUTER APPLY ( -- tgt
SELECT TOP ( WordLength )
n = ABS(t1.n
- t2.n)
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200) ) t2 ( n )
WHERE
SUBSTRING(#Target,
t2.n, 1) = ref.Letter
) tgt
GROUP BY t1.n ,
ref.Letter ,
v.WordLength
) do
)
END
RETURN #score
END
Here is the query to call the function
select [Party].[FuzySearch]('First Name Middle Name Last Name', C.FirstName) from dbo.Customer C
This is taking about 2 minutes 22 seconds to give me the percentage of fuzzy match for all
How can I fix this to run in lessthan a second. Any suggestions on my function to make it more robust.
Expected ouput is 45.34, 40.00, 100.00, 23.00, 81.23.....
The best I have been able to do is simplify some of the query, and change it to a table valued function. Scalar functions are notoriously poor performers, and the benefit of an inline TVF is that the query definition is expanded out into the main query, much like a view.
This reduces the execution time significantly on the tests I have done.
ALTER FUNCTION dbo.FuzySearchTVF (#Reference VARCHAR(200), #Target VARCHAR(200))
RETURNS TABLE
AS
RETURN
( WITH N (n) AS
( SELECT TOP (ISNULL(CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target)
THEN DATALENGTH(#Reference)
ELSE DATALENGTH(#Target)
END, 0))
ROW_NUMBER() OVER(ORDER BY n1.n)
FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N1 (n)
CROSS JOIN (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N2 (n)
CROSS JOIN (VALUES (1), (1)) AS N3 (n)
WHERE #Reference IS NOT NULL AND #Target IS NOT NULL
), Src AS
( SELECT Reference = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN #Reference
ELSE #Reference + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
Target = CASE WHEN DATALENGTH(#Target) > DATALENGTH(#Reference) THEN #Target
ELSE #Target + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
WordLength = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN DATALENGTH(#Reference) ELSE DATALENGTH(#Target) END
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
), Scores AS
( SELECT seq = t1.n ,
Letter = SUBSTRING(s.Reference, t1.n, 1),
s.WordLength ,
LetterScore = s.WordLength - ISNULL(MIN(ABS(t1.n - t2.n)), s.WordLength)
FROM Src AS s
CROSS JOIN N AS t1
INNER JOIN N AS t2
ON SUBSTRING(#Target, t2.n, 1) = SUBSTRING(s.Reference, t1.n, 1)
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY t1.n, SUBSTRING(s.Reference, t1.n, 1), s.WordLength
)
SELECT [Score] = 100
WHERE #Reference = #Target
UNION ALL
SELECT 0
WHERE #Reference IS NULL OR #Target IS NULL
UNION ALL
SELECT CAST(SUM(LetterScore) * 100.0 / MAX(WordLength * WordLength) AS NUMERIC(5, 2))
FROM Scores
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY WordLength
);
And this would be called as:
SELECT f.Score
FROM dbo.Customer AS c
CROSS APPLY [dbo].[FuzySearch]('First Name Middle Name Last Name', c.FirstName) AS f
It is still a fairly complex function though, and, depending on the number of records in your customer table, I think getting it down to 1 second is going to be a bit of a challenge.
This is how I could accomplish this:
Explained further # SQL Server Fuzzy Search - Levenshtein Algorithm
Create below file using any editor of your choice:
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public partial class StoredFunctions
{
[Microsoft.SqlServer.Server.SqlFunction(IsDeterministic = true, IsPrecise = false)]
public static SqlDouble Levenshtein(SqlString stringOne, SqlString stringTwo)
{
#region Handle for Null value
if (stringOne.IsNull)
stringOne = new SqlString("");
if (stringTwo.IsNull)
stringTwo = new SqlString("");
#endregion
#region Convert to Uppercase
string strOneUppercase = stringOne.Value.ToUpper();
string strTwoUppercase = stringTwo.Value.ToUpper();
#endregion
#region Quick Check and quick match score
int strOneLength = strOneUppercase.Length;
int strTwoLength = strTwoUppercase.Length;
int[,] dimention = new int[strOneLength + 1, strTwoLength + 1];
int matchCost = 0;
if (strOneLength + strTwoLength == 0)
{
return 100;
}
else if (strOneLength == 0)
{
return 0;
}
else if (strTwoLength == 0)
{
return 0;
}
#endregion
#region Levenshtein Formula
for (int i = 0; i <= strOneLength; i++)
dimention[i, 0] = i;
for (int j = 0; j <= strTwoLength; j++)
dimention[0, j] = j;
for (int i = 1; i <= strOneLength; i++)
{
for (int j = 1; j <= strTwoLength; j++)
{
if (strOneUppercase[i - 1] == strTwoUppercase[j - 1])
matchCost = 0;
else
matchCost = 1;
dimention[i, j] = System.Math.Min(System.Math.Min(dimention[i - 1, j] + 1, dimention[i, j - 1] + 1), dimention[i - 1, j - 1] + matchCost);
}
}
#endregion
// Calculate Percentage of match
double percentage = System.Math.Round((1.0 - ((double)dimention[strOneLength, strTwoLength] / (double)System.Math.Max(strOneLength, strTwoLength))) * 100.0, 2);
return percentage;
}
};
Name it levenshtein.cs
Go to Command Prompt. Go to the file directory of levenshtein.cs then call csc.exe /t: library /out: UserFunctions.dll levenshtein.cs you may have to give the full path of csc.exe from NETFrameWork 2.0.
Once your DLL is ready. Add it to the assemblies Database>>Programmability>>Assemblies>> New Assembly.
Create function in your database:
CREATE FUNCTION dbo.LevenshteinSVF
(
#S1 NVARCHAR(200) ,
#S2 NVARCHAR(200)
)
RETURNS FLOAT
AS EXTERNAL NAME
UserFunctions.StoredFunctions.Levenshtein
GO
In my case I had to enable clr:
sp_configure 'clr enabled', 1
GO
reconfigure
GO
Test the function:
SELECT dbo.LevenshteinSVF('James','James Bond')
Result: 50 % match