SQL: Build dynamic query from list of tables received from user - sql

I am attempting to build a dynamic query based on a list of tables as received from a user.
I have a couple attempted solutions below.
I think the CTE is the way to go but am having difficulty figuring out how to make it happen.
I would really appreciate whatever genius can let me know how to do this!
these are the tables:
W, WD, WE, WSF, WSFE, XDF, XDFE, Y, YD, Z, ZD
these are the columns to join by for each group of tables:
W, Y, WD, WE, WSF
WID
WSF, WSFE, XDF
WSFID
XDF, XDFE
XDFID
Y, YD, Z
YID
Z, ZD
ZID
if the user selects W, Y, Z then build this query (which could then be executed by exec or sp_executesql):
select * from #W w join #Y y on y.WID = w.WID join #Z z on z.YID = y.YID
declare #Fields table (
ID int identity not NULL,
Name varchar(200)
)
declare #Tables table (
ID int identity not NULL,
Field varchar(200),
TempTable varchar(200)
)
declare #QueryTables table (
ID int identity not NULL,
[Table] varchar(200),
Alias varchar(20)
)
declare #QueryJoins table (
ID int identity not NULL,
Table1 varchar(20),
Col1 varchar(200),
Table2 varchar(20),
Col2 varchar(200)
)
insert #Fields
values
('W'),
('Y'),
('Z')
insert #Tables
values
('W', '#W'),
('WD', '#WD'),
('WE', '#WE'),
('WSF', '#WSF'),
('WSFE', '#WSFE'),
('XDF', '#XDF'),
('XDFE', '#XDFE'),
('Y', '#Y'),
('YD', '#YD'),
('Z', '#Z'),
('ZD', '#ZD')
insert #QueryTables
values
('#W', 'w'),
('#WD', 'wd'),
('#WE', 'we'),
('#WSF', 'wsf'),
('#WSFE', 'wsfe'),
('#XDF', 'xdf'),
('#XDFE', 'xdfe'),
('#Y', 'y'),
('#YD', 'yd'),
('#Z', 'z'),
('#ZD', 'zd')
insert #QueryJoins
values
('w', 'WID', 'wd', 'WID'),
('w', 'WID', 'we', 'WID'),
('w', 'WID', 'wsf', 'WID'),
('w', 'WID', 'xdf', 'WID'),
('w', 'WID', 'y', 'WID'),
('wd', 'WID', 'w', 'WID'),
('we', 'WID', 'wd', 'WID'),
('wsf', 'WID', 'wd', 'WID'),
('wsf', 'WSFID', 'wsfe', 'WSFID'),
('wsfe', 'WSFID', 'wsf', 'WSFID'),
('wsf', 'WSFID', 'xdf', 'WSFID'),
('xdf', 'WID', 'w', 'WID'),
('xdf', 'WSFID', 'wsf', 'WSFID'),
('xdf', 'XDFID', 'xdfe', 'XDFID'),
('xdfe', 'XDFID', 'xdf', 'XDFID'),
('y', 'WID', 'w', 'WID'),
('y', 'YID', 'yd', 'YID'),
('yd', 'YID', 'y', 'YID'),
('y', 'YID', 'z', 'YID'),
('z', 'YID', 'y', 'YID'),
('z', 'ZID', 'zd', 'ZID'),
('zd', 'ZID', 'z', 'ZID')
--attempted solution number 1:
select
*
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
join #QueryTables vqt2
on vqt2.Alias = vqj.Table2
join #Tables vt2
on vt2.TempTable = vqt2.[Table]
join #Fields vf2
on vf2.Name = vt2.Field
--attempted solution number 2:
;with cte (FieldID, [Table], Table1, Col1, Table2, Col2, I) as (
select
vf.ID as FieldID,
vqt.[Table],
vqj.Table1,
vqj.Col1,
vqj.Table2,
vqj.Col2,
1
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
union all
select
vf.ID as FieldID,
vqt.[Table],
vqj.Table1,
vqj.Col1,
vqj.Table2,
vqj.Col2,
I + 1
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable
join #QueryJoins vqj
on vqj.Table1 = vqt.Alias
join cte cte
on cte.Table1 = vqj.Table2
and cte.Table2 = vqj.Table1
where I <= FieldID --a feeble attempt to control the recursion
)
select * from cte

An interesting problem, definitely a data modeling code smell (if Z, Y, and YD all have a YID column, sounds like you need a bridge table or similar construct to manage the relationships there. Or if Z and YD are small lookups, just join them all and let the optimizer handle the overhead.)
Note: I made the fields self join on Name < Name, so you only need the QueryJoins data where Table1 < Table2. This would cut your QueryJoins table in half, but you're also missing some of the "correct" joins (ie you have we/wd but not wd/we)
declare #Fields table (
ID int identity not NULL,
Name varchar(200)
)
declare #Tables table (
ID int identity not NULL,
Field varchar(200),
TempTable varchar(200)
)
declare #QueryTables table (
ID int identity not NULL,
[Table] varchar(200),
Alias varchar(20)
)
declare #QueryJoins table (
ID int identity not NULL,
Table1 varchar(20),
Col1 varchar(200),
Table2 varchar(20),
Col2 varchar(200)
)
insert #Fields
values
('W'),
('Y'),
('Z')
insert #Tables
values
('W', '#W'),
('WD', '#WD'),
('WE', '#WE'),
('WSF', '#WSF'),
('WSFE', '#WSFE'),
('XDF', '#XDF'),
('XDFE', '#XDFE'),
('Y', '#Y'),
('YD', '#YD'),
('Z', '#Z'),
('ZD', '#ZD')
insert #QueryTables
values
('#W', 'w'),
('#WD', 'wd'),
('#WE', 'we'),
('#WSF', 'wsf'),
('#WSFE', 'wsfe'),
('#XDF', 'xdf'),
('#XDFE', 'xdfe'),
('#Y', 'y'),
('#YD', 'yd'),
('#Z', 'z'),
('#ZD', 'zd')
insert #QueryJoins
values
('w', 'WID', 'wd', 'WID'),
('w', 'WID', 'we', 'WID'),
('w', 'WID', 'wsf', 'WID'),
('w', 'WID', 'xdf', 'WID'),
('w', 'WID', 'y', 'WID'),
('wd', 'WID', 'w', 'WID'),
('we', 'WID', 'wd', 'WID'),
('wsf', 'WID', 'wd', 'WID'),
('wsf', 'WSFID', 'wsfe', 'WSFID'),
('wsfe', 'WSFID', 'wsf', 'WSFID'),
('wsf', 'WSFID', 'xdf', 'WSFID'),
('xdf', 'WID', 'w', 'WID'),
('xdf', 'WSFID', 'wsf', 'WSFID'),
('xdf', 'XDFID', 'xdfe', 'XDFID'),
('xdfe', 'XDFID', 'xdf', 'XDFID'),
('y', 'YID', 'yd', 'YID'),
('yd', 'YID', 'y', 'YID'),
('y', 'YID', 'z', 'YID'),
('z', 'ZID', 'zd', 'ZID'),
('zd', 'ZID', 'z', 'ZID')
;
with a as (
select
row_number() over (order by Name) as rn, Name, Field, TempTable, [Table], Alias
from #Fields vf
join #Tables vt
on vt.Field = vf.Name
join #QueryTables vqt
on vqt.[Table] = vt.TempTable )
select 'select * from ' + stuff((
select
concat(
case when a.rn =1 then a.TempTable else '' end,
' ',
case when a.rn =1 then a.Alias else '' end,
' join ',
a2.TempTable,
' ' ,
a2.alias,
' on ',
q.Table1,
'.',
q.Col1,
' = ',
q.Table2 ,
'.',
q.Col2 ) from a
left join a a2
on a.name < a2.name
inner join #QueryJoins q
on q.Table1 = a.alias
and q.Table2 = a2.alias
for xml path('')), 1, 1, '')

Related

How can I put SELECT result set to SELECT column?

I have two tables: Job(ID,Name, etc.) and Address(ID, Job_ID, Name etc). I want to get result like this:
[
{
"Job_ID": 1,
"JobName": "Test",
"Addresses": [
{
"ID": 1,
"Name": "King street"
},
{
"ID": 2,
"Name": "Queen`s street
}
]
}
]
My current query that gets only one address for a job looks like this:
SELECT TOP 100
JO.ID,
JO.Closed as Deleted,
JO.Number as JobNumber,
JO.Name as JobName,
Convert(date, JO.Start_Date) as Start_Date,
JO.Job_Status_ID as Status,
A.ID as Address_ID,
A.Name as Name,
A.Number as Number,
A.Sort_Name as Sort_Name,
A.Address_1 as Address_1,
A.Address_2 as Address_2,
A.ZipCode as ZIP,
A.E_Mail_Address as Email,
A.Web_Site_URL as Web_Site_URL,
A.TAXRATE as Tax_Rate,
A.State
FROM Job JO
INNER JOIN Address A ON A.Job_Id = JO.ID
Is it possible without pivot table(Address_ID, Job_ID)?
You can use FOR JSON to convert you results to JSON. This gives the result you are looking for:
CREATE TABLE #Job (ID INT NOT NULL, Name VARCHAR(50));
INSERT #Job (ID, Name)
VALUES (1, 'Job 1'), (2, 'Job 2');
CREATE TABLE #Address (ID INT NOT NULL, JobID INT NOT NULL, Name VARCHAR(50));
INSERT #Address (ID, JobID, Name)
VALUES (1, 1, 'King street'), (2, 1, 'Queen''s street'), (3, 2, 'Address 3'), (4, 2, 'Address 4');
SELECT JobID = j.ID,
JobName = j.Name,
Addresses = ( SELECT a.ID, a.Name
FROM #Address AS a
WHERE a.JobID = j.ID
FOR JSON AUTO
)
FROM #Job AS j
FOR JSON AUTO;

What is the function to divide 2 values from the same column with conditions

I would like to return my total return divided by my total sale which are under the same column
select
(select count(order_type_id) from ods_emea_all.order_emea
where order_type_id in ('Return', 'RETURN')
and brand_cd =('PB')
and iso_country_cd IN ('IT', 'ES', 'GB', 'FR', 'DE'))*100/
(select count(order_type_id )
from ods_emea_all.order_emea
where order_type_id in ('Sale','SALE')
and brand_cd =('PB')
and iso_country_cd IN ('IT', 'ES', 'GB', 'FR', 'DE'))
AS brand_return
from ods_emea_all.order_emea
If you have the following table with contents:
CREATE TABLE order_emea (
order_type_id VARCHAR(100),
brand_cd VARCHAR(100),
iso_country_cd VARCHAR(100)
);
INSERT INTO order_emea VALUES
('Sale', 'PB', 'IT'),
('SALE', 'PB', 'FR'),
('Sale', 'PB', 'IT'),
('sale', 'PB', 'ES'),
('SALe', 'PB', 'ES'),
('sAle', 'PB', 'GB'),
('saLe', 'PB', 'FR'),
('sale', 'PB', 'DE'),
('Sale', 'PB', 'DE'),
('sale', 'PB', 'FR'),
('Return', 'PB', 'FR'),
('RETURN', 'PB', 'FR'),
('return', 'PB', 'GB'),
('REturn', 'PB', 'IT'),
('rEturn', 'PB', 'IT');
The following query will get you what you want:
WITH base_table_n_returns_to_n_sales AS (
SELECT
SUM(CASE WHEN
LOWER(order_type_id) = 'return'
AND LOWER(brand_cd) = 'pb'
AND LOWER(iso_country_cd) IN ('IT', 'ES', 'GB', 'FR', 'DE')
THEN 1 ELSE 0 END) AS n_returns,
SUM(CASE WHEN
LOWER(order_type_id) = 'sale'
AND LOWER(brand_cd) = 'pb'
AND LOWER(iso_country_cd) IN ('IT', 'ES', 'GB', 'FR', 'DE')
THEN 1 ELSE 0 END) AS n_sales
FROM order_emea
)
SELECT
*,
n_returns / CAST(NULLIF(n_sales, 0) AS FLOAT) AS ratio_n_returns_to_n_sales
FROM base_table_n_returns_to_n_sales
See this fiddle.

Insert does not work with cursor

I have a problem that my Insert statement doesn't work. I created temp tables, then the insert statement is working based on two columns in this temp tables.
The data exists in the temp tables, but the insert is not working with no error appearing. Thanks in advance
DECLARE JPCUSTOMER_CURSOR CURSOR FOR
SELECT
cu.CustomerNo, jp.ID
FROM
CUSTOMERNO# cu, SalesmanNo# sa, JourneyPlan JP
WHERE
cu.OCCURRENCE = sa.OCCURRENCE
AND jp.AssignedTO = sa.SalesmanNo
OPEN JPCUSTOMER_CURSOR
FETCH NEXT FROM JPCUSTOMER_CURSOR INTO #CUST_ID,#JP_ID
WHILE (##FETCH_STATUS = 0)
BEGIN
INSERT INTO [JPCustomer] ([CustomerID ], [JPID], [Frequency], [StartWeek],
[sat], [sun], [mon], [tue], [wed], [thu], [fri],
[VisitOrder], [ModifiedOn], [ModifiedBy],
[CreatedOn], [Createdby],
[RecordSource], [IsPotential])
VALUES (#CUST_ID, #JP_ID, 4, 1,
1, 1, 1, 1, 1, 1, 1,
NULL, NULL, NULL,
NULL, NULL, 0, 0)
END
CLOSE JPCUSTOMER_CURSOR;
DEALLOCATE JPCUSTOMER_CURSOR;
You don't need a cursor for what you are doing. This will be much faster:
INSERT INTO [JPCustomer] ([CustomerID ],
[JPID],
[Frequency],
[StartWeek],
[sat],
[sun],
[mon],
[tue],
[wed],
[thu],
[fri],
[VisitOrder],
[ModifiedOn],
[ModifiedBy],
[CreatedOn],
[Createdby],
[RecordSource],
[IsPotential])
select cu.CustomerNo,
jp.ID,
4,
1,
1,
1,
1,
1,
1,
1,
1,
NULL,
NULL,
NULL,
NULL,
NULL,
0,
0
from CUSTOMERNO# cu
join SalesmanNo# sa on cu.OCCURRENCE = sa.OCCURRENCE
join JourneyPlan JP on jp.AssignedTO = sa.SalesmanNo
You don't need and shouldn't use a cursor for this. Just an INSERT ... SELECT. EG
with q as
(
select cu.CustomerID CUST_ID, jp.ID JPID
from CUSTOMERNO# cu
join SalesmanNo# sa
on cu.OCCURRENCE = sa.OCCURRENCE
join JourneyPlan JP
on jp.AssignedTO = sa.SalesmanNo
)
INSERT INTO [JPCustomer] ([CustomerID],
[JPID],
[Frequency],
[StartWeek],
[sat],
[sun],
[mon],
[tue],
[wed],
[thu],
[fri],
[VisitOrder],
[ModifiedOn],
[ModifiedBy],
[CreatedOn],
[Createdby],
[RecordSource],
[IsPotential])
SELECT CustomerID,
JPID,
4,
1,
1,
1,
1,
1,
1,
1,
1,
NULL,
NULL,
NULL,
NULL,
NULL,
0,
0
from q;

SQL Query joining 5 tables

Trying to create a query to select all jobs that are unpaid and who the customer is for that job.
So the is required to first work out a calculation of what they owe (by a sum of s_partorders qty x price found in s_parts) and then minus what they have paid from s_payments.
This query joins it all together but I don't know how to group it by jobNumber because there can be multiple payments and multiple part orders.
SELECT a.jobNumber, a.customerID, a.quoteStatus, a.costDelivery, a.costCallout, a.costLabour, b.customerID, b.firstName, b.lastName, c.paymentID, c.jobNumber, c.amount, d.orderID, d.jobNumber, d.partID, d.quantity, e.partID, e.sellPrice
FROM s_jobcards a
INNER JOIN s_customers b ON a.customerID = b.customerID
INNER JOIN s_payments c ON a.jobNumber = c.jobNumber
INNER JOIN s_partOrders d ON a.jobNumber = d.jobNumber
INNER JOIN s_parts e ON d.partID = e.partID
WHERE a.quoteStatus = 0
Sorry it's quite messy and incomplete...
Included table structure and some test data.
CREATE TABLE IF NOT EXISTS `s_customers` (
`customerID` int(20) NOT NULL AUTO_INCREMENT,
`firstName` text NOT NULL,
`lastName` text NOT NULL,
`address` text NOT NULL,
`suburb` text NOT NULL,
`state` text NOT NULL,
`postcode` text NOT NULL,
`phone` text NOT NULL,
`altPhone` text NOT NULL,
`email` text NOT NULL,
`notes` text NOT NULL,
`postAddress` text NOT NULL,
`serviceDueDate` date NOT NULL,
PRIMARY KEY (`customerID`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=4 ;
INSERT INTO `s_customers` (`customerID`, `firstName`, `lastName`, `address`, `suburb`, `state`, `postcode`, `phone`, `altPhone`, `email`, `notes`, `postAddress`, `serviceDueDate`) VALUES
(1, 'David', 'Davinci', '654 Fake Road', 'Canning Vale', 'WA', '6164', '9546446', '45645646', 'dave#website.com', 'This guy is a butt', 'Cockburn Central', '2014-12-24'),
(2, 'Timmy', 'Trumpet', '69 something Street', 'Cockburn Central', 'WA', '6164', '9456456', '92344643', 'timmy#trumpet.com', 'Timmah?', '45 Timmy Street', '0000-00-00'),
(3, 'Jerry', 'Tester', '', '', '', '', '', '', '', '', '', '0000-00-00');
CREATE TABLE IF NOT EXISTS `s_jobcards` (
`jobNumber` int(6) NOT NULL AUTO_INCREMENT,
`dateReceived` date NOT NULL,
`workRequired` text NOT NULL,
`workCompleted` text NOT NULL,
`dateCompleted` date NOT NULL,
`customerID` int(5) NOT NULL,
`serviceTime` int(5) NOT NULL,
`serviceTech` int(1) NOT NULL,
`workOutstanding` text NOT NULL,
`quoteStatus` tinyint(1) NOT NULL DEFAULT '1',
`costDelivery` double NOT NULL,
`costCallout` double NOT NULL,
`costLabour` double NOT NULL,
PRIMARY KEY (`jobNumber`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=6 ;
INSERT INTO `s_jobcards` (`jobNumber`, `dateReceived`, `workRequired`, `workCompleted`, `dateCompleted`, `customerID`, `serviceTime`, `serviceTech`, `workOutstanding`, `quoteStatus`, `costDelivery`, `costCallout`, `costLabour`) VALUES
(1, '2013-11-18', 'Create new service software. Yeah! 4534', 'Not a whole lot yet.?', '0000-00-00', 1, 5, 2, 'Complete this software?', 0, 50, 90, 90),
(2, '2013-11-18', 'work required', 'work done!', '0000-00-00', 1, 1, 3, 'work outstanding', 0, 0, 0, 0),
(3, '2014-12-01', 'Work harder.23432432 gdf', 'Go go!', '2014-12-01', 2, 1, 3, '', 1, 0, 0, 0),
(4, '0000-00-00', 'Whack some moles.', '', '0000-00-00', 3, 0, 1, '', 1, 0, 0, 0),
(5, '0000-00-00', '', '', '0000-00-00', 1, 0, 0, '', 1, 0, 0, 0);
CREATE TABLE IF NOT EXISTS `s_partOrders` (
`orderID` int(11) NOT NULL AUTO_INCREMENT,
`jobNumber` int(11) NOT NULL,
`partID` int(11) NOT NULL,
`quantity` int(11) NOT NULL,
PRIMARY KEY (`orderID`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=5 ;
INSERT INTO `s_partOrders` (`orderID`, `jobNumber`, `partID`, `quantity`) VALUES
(2, 0, 1, 5),
(3, 1, 1, 2),
(4, 1, 1, 6);
CREATE TABLE IF NOT EXISTS `s_parts` (
`partID` int(10) NOT NULL AUTO_INCREMENT,
`partNumber` varchar(50) NOT NULL,
`partDescription` text NOT NULL,
`modelID` int(5) NOT NULL,
`buyPrice` double NOT NULL,
`sellPrice` double NOT NULL,
`notes` text NOT NULL,
PRIMARY KEY (`partID`),
UNIQUE KEY `partNumber` (`partNumber`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=2 ;
INSERT INTO `s_parts` (`partID`, `partNumber`, `partDescription`, `modelID`, `buyPrice`, `sellPrice`, `notes`) VALUES
(1, '3453453453', 'Test Part', 1, 10.02, 30.5, 'This is a test part.');
CREATE TABLE IF NOT EXISTS `s_payments` (
`paymentID` int(11) NOT NULL AUTO_INCREMENT,
`amount` double NOT NULL,
`type` text NOT NULL,
`jobNumber` int(11) NOT NULL,
`paymentDate` date NOT NULL,
PRIMARY KEY (`paymentID`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=3 ;
INSERT INTO `s_payments` (`paymentID`, `amount`, `type`, `jobNumber`, `paymentDate`) VALUES
(2, 200, 'Visa', 1, '2014-12-05'),
(3, 20, 'Visa', 1, '2014-12-05');
Use outer joins where there might be no matching records, and aggregating data to jobnumber before joining will assist in ensuring the numbers are accurate:
select
j.jobNumber, j.customerID, j.quoteStatus, j.costDelivery, j.costCallout, j.costLabour
, c.customerID, c.firstName, c.lastName
, p.parts_sellprice
, sum(jp.amount) as paid
from s_jobcards as j
inner join s_customers as c on j.customerID = c.customerID
left outer join s_payments as jp on j.jobNumber = jp.jobNumber
left outer join (
select
d.jobNumber, sum(d.quantity * e.sellPrice) parts_sellprice
from s_partOrders d
left outer join s_parts e ON d.partID = e.partID
group by
d.jobNumber
) as p on j.jobNumber = p.jobNumber
group by
j.jobNumber, j.customerID, j.quoteStatus, j.costDelivery, j.costCallout, j.costLabour
, c.customerID, c.firstName, c.lastName
;
nb: I have assumed the sell price is multiplied by quantity
see this sqlfiddle demo: http://sqlfiddle.com/#!2/96f4c/1
select
j.jobNumber, j.customerID, j.quoteStatus, j.costDelivery, j.costCallout, j.costLabour
, c.customerID, c.firstName, c.lastName
, p.parts_sellprice
, sum(jp.amount) as paid
, (j.costDelivery + j.costCallout + j.costLabour + p.parts_sellprice) as Total_Cost
, (j.costDelivery + j.costCallout + j.costLabour + p.parts_sellprice) - sum(jp.amount) as Amount_Outstanding
from s_jobcards as j
inner join s_customers as c on j.customerID = c.customerID
left outer join s_payments as jp on j.jobNumber = jp.jobNumber
left outer join (
select
d.jobNumber, sum(d.quantity * e.sellPrice) parts_sellprice
from s_partOrders d
left outer join s_parts e ON d.partID = e.partID
group by
d.jobNumber
) as p on j.jobNumber = p.jobNumber
group by
j.jobNumber, j.customerID, j.quoteStatus, j.costDelivery, j.costCallout, j.costLabour
, c.customerID, c.firstName, c.lastName
;

SQL Server Fuzzy Search with Percentage of match

I am using SQL Server 2008 R2 SP1.
I have a table with about 36034 records of customers.
I am trying to implement Fuzy search on Customer Name field.
Here is Function for Fuzzy Search
ALTER FUNCTION [Party].[FuzySearch]
(
#Reference VARCHAR(200) ,
#Target VARCHAR(200)
)
RETURNS DECIMAL(5, 2)
WITH SCHEMABINDING
AS
BEGIN
DECLARE #score DECIMAL(5, 2)
SELECT #score = CASE WHEN #Reference = #Target
THEN CAST(100 AS NUMERIC(5, 2))
WHEN #Reference IS NULL
OR #Target IS NULL
THEN CAST(0 AS NUMERIC(5, 2))
ELSE ( SELECT [Score %] = CAST(SUM(LetterScore)
* 100.0 / MAX(WordLength
* WordLength) AS NUMERIC(5,
2))
FROM ( -- do
SELECT seq = t1.n ,
ref.Letter ,
v.WordLength ,
LetterScore = v.WordLength
- ISNULL(MIN(tgt.n),
v.WordLength)
FROM ( -- v
SELECT
Reference = LEFT(#Reference
+ REPLICATE('_',
WordLength),
WordLength) ,
Target = LEFT(#Target
+ REPLICATE('_',
WordLength),
WordLength) ,
WordLength = WordLength
FROM
( -- di
SELECT
WordLength = MAX(WordLength)
FROM
( VALUES
( DATALENGTH(#Reference)),
( DATALENGTH(#Target)) ) d ( WordLength )
) di
) v
CROSS APPLY ( -- t1
SELECT TOP ( WordLength )
n
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200)
) t2 ( n )
) t1
CROSS APPLY ( SELECT
Letter = SUBSTRING(Reference,
t1.n, 1)
) ref
OUTER APPLY ( -- tgt
SELECT TOP ( WordLength )
n = ABS(t1.n
- t2.n)
FROM
( VALUES ( 1),
( 2), ( 3), ( 4),
( 5), ( 6), ( 7),
( 8), ( 9),
( 10), ( 11),
( 12), ( 13),
( 14), ( 15),
( 16), ( 17),
( 18), ( 19),
( 20), ( 21),
( 22), ( 23),
( 24), ( 25),
( 26), ( 27),
( 28), ( 29),
( 30), ( 31),
( 32), ( 33),
( 34), ( 35),
( 36), ( 37),
( 38), ( 39),
( 40), ( 41),
( 42), ( 43),
( 44), ( 45),
( 46), ( 47),
( 48), ( 49),
( 50), ( 51),
( 52), ( 53),
( 54), ( 55),
( 56), ( 57),
( 58), ( 59),
( 60), ( 61),
( 62), ( 63),
( 64), ( 65),
( 66), ( 67),
( 68), ( 69),
( 70), ( 71),
( 72), ( 73),
( 74), ( 75),
( 76), ( 77),
( 78), ( 79),
( 80), ( 81),
( 82), ( 83),
( 84), ( 85),
( 86), ( 87),
( 88), ( 89),
( 90), ( 91),
( 92), ( 93),
( 94), ( 95),
( 96), ( 97),
( 98), ( 99),
( 100), ( 101),
( 102), ( 103),
( 104), ( 105),
( 106), ( 107),
( 108), ( 109),
( 110), ( 111),
( 112), ( 113),
( 114), ( 115),
( 116), ( 117),
( 118), ( 119),
( 120), ( 121),
( 122), ( 123),
( 124), ( 125),
( 126), ( 127),
( 128), ( 129),
( 130), ( 131),
( 132), ( 133),
( 134), ( 135),
( 136), ( 137),
( 138), ( 139),
( 140), ( 141),
( 142), ( 143),
( 144), ( 145),
( 146), ( 147),
( 148), ( 149),
( 150), ( 151),
( 152), ( 153),
( 154), ( 155),
( 156), ( 157),
( 158), ( 159),
( 160), ( 161),
( 162), ( 163),
( 164), ( 165),
( 166), ( 167),
( 168), ( 169),
( 170), ( 171),
( 172), ( 173),
( 174), ( 175),
( 176), ( 177),
( 178), ( 179),
( 180), ( 181),
( 182), ( 183),
( 184), ( 185),
( 186), ( 187),
( 188), ( 189),
( 190), ( 191),
( 192), ( 193),
( 194), ( 195),
( 196), ( 197),
( 198), ( 199),
( 200) ) t2 ( n )
WHERE
SUBSTRING(#Target,
t2.n, 1) = ref.Letter
) tgt
GROUP BY t1.n ,
ref.Letter ,
v.WordLength
) do
)
END
RETURN #score
END
Here is the query to call the function
select [Party].[FuzySearch]('First Name Middle Name Last Name', C.FirstName) from dbo.Customer C
This is taking about 2 minutes 22 seconds to give me the percentage of fuzzy match for all
How can I fix this to run in lessthan a second. Any suggestions on my function to make it more robust.
Expected ouput is 45.34, 40.00, 100.00, 23.00, 81.23.....
The best I have been able to do is simplify some of the query, and change it to a table valued function. Scalar functions are notoriously poor performers, and the benefit of an inline TVF is that the query definition is expanded out into the main query, much like a view.
This reduces the execution time significantly on the tests I have done.
ALTER FUNCTION dbo.FuzySearchTVF (#Reference VARCHAR(200), #Target VARCHAR(200))
RETURNS TABLE
AS
RETURN
( WITH N (n) AS
( SELECT TOP (ISNULL(CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target)
THEN DATALENGTH(#Reference)
ELSE DATALENGTH(#Target)
END, 0))
ROW_NUMBER() OVER(ORDER BY n1.n)
FROM (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N1 (n)
CROSS JOIN (VALUES (1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) AS N2 (n)
CROSS JOIN (VALUES (1), (1)) AS N3 (n)
WHERE #Reference IS NOT NULL AND #Target IS NOT NULL
), Src AS
( SELECT Reference = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN #Reference
ELSE #Reference + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
Target = CASE WHEN DATALENGTH(#Target) > DATALENGTH(#Reference) THEN #Target
ELSE #Target + REPLICATE('_', DATALENGTH(#Target) - DATALENGTH(#Reference))
END,
WordLength = CASE WHEN DATALENGTH(#Reference) > DATALENGTH(#Target) THEN DATALENGTH(#Reference) ELSE DATALENGTH(#Target) END
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
), Scores AS
( SELECT seq = t1.n ,
Letter = SUBSTRING(s.Reference, t1.n, 1),
s.WordLength ,
LetterScore = s.WordLength - ISNULL(MIN(ABS(t1.n - t2.n)), s.WordLength)
FROM Src AS s
CROSS JOIN N AS t1
INNER JOIN N AS t2
ON SUBSTRING(#Target, t2.n, 1) = SUBSTRING(s.Reference, t1.n, 1)
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY t1.n, SUBSTRING(s.Reference, t1.n, 1), s.WordLength
)
SELECT [Score] = 100
WHERE #Reference = #Target
UNION ALL
SELECT 0
WHERE #Reference IS NULL OR #Target IS NULL
UNION ALL
SELECT CAST(SUM(LetterScore) * 100.0 / MAX(WordLength * WordLength) AS NUMERIC(5, 2))
FROM Scores
WHERE #Reference IS NOT NULL
AND #Target IS NOT NULL
AND #Reference != #Target
GROUP BY WordLength
);
And this would be called as:
SELECT f.Score
FROM dbo.Customer AS c
CROSS APPLY [dbo].[FuzySearch]('First Name Middle Name Last Name', c.FirstName) AS f
It is still a fairly complex function though, and, depending on the number of records in your customer table, I think getting it down to 1 second is going to be a bit of a challenge.
This is how I could accomplish this:
Explained further # SQL Server Fuzzy Search - Levenshtein Algorithm
Create below file using any editor of your choice:
using System;
using System.Data;
using System.Data.SqlClient;
using System.Data.SqlTypes;
using Microsoft.SqlServer.Server;
public partial class StoredFunctions
{
[Microsoft.SqlServer.Server.SqlFunction(IsDeterministic = true, IsPrecise = false)]
public static SqlDouble Levenshtein(SqlString stringOne, SqlString stringTwo)
{
#region Handle for Null value
if (stringOne.IsNull)
stringOne = new SqlString("");
if (stringTwo.IsNull)
stringTwo = new SqlString("");
#endregion
#region Convert to Uppercase
string strOneUppercase = stringOne.Value.ToUpper();
string strTwoUppercase = stringTwo.Value.ToUpper();
#endregion
#region Quick Check and quick match score
int strOneLength = strOneUppercase.Length;
int strTwoLength = strTwoUppercase.Length;
int[,] dimention = new int[strOneLength + 1, strTwoLength + 1];
int matchCost = 0;
if (strOneLength + strTwoLength == 0)
{
return 100;
}
else if (strOneLength == 0)
{
return 0;
}
else if (strTwoLength == 0)
{
return 0;
}
#endregion
#region Levenshtein Formula
for (int i = 0; i <= strOneLength; i++)
dimention[i, 0] = i;
for (int j = 0; j <= strTwoLength; j++)
dimention[0, j] = j;
for (int i = 1; i <= strOneLength; i++)
{
for (int j = 1; j <= strTwoLength; j++)
{
if (strOneUppercase[i - 1] == strTwoUppercase[j - 1])
matchCost = 0;
else
matchCost = 1;
dimention[i, j] = System.Math.Min(System.Math.Min(dimention[i - 1, j] + 1, dimention[i, j - 1] + 1), dimention[i - 1, j - 1] + matchCost);
}
}
#endregion
// Calculate Percentage of match
double percentage = System.Math.Round((1.0 - ((double)dimention[strOneLength, strTwoLength] / (double)System.Math.Max(strOneLength, strTwoLength))) * 100.0, 2);
return percentage;
}
};
Name it levenshtein.cs
Go to Command Prompt. Go to the file directory of levenshtein.cs then call csc.exe /t: library /out: UserFunctions.dll levenshtein.cs you may have to give the full path of csc.exe from NETFrameWork 2.0.
Once your DLL is ready. Add it to the assemblies Database>>Programmability>>Assemblies>> New Assembly.
Create function in your database:
CREATE FUNCTION dbo.LevenshteinSVF
(
#S1 NVARCHAR(200) ,
#S2 NVARCHAR(200)
)
RETURNS FLOAT
AS EXTERNAL NAME
UserFunctions.StoredFunctions.Levenshtein
GO
In my case I had to enable clr:
sp_configure 'clr enabled', 1
GO
reconfigure
GO
Test the function:
SELECT dbo.LevenshteinSVF('James','James Bond')
Result: 50 % match