SQL query to match entire sentence against keywords - sql

I am trying to implement faq section on the website, where after will ask a question, the whole sentence will be matched again list of keywords and if any matched will be found, this will be returned back to user.
The database is running on MS SQL 2014,
+----+----------------------------------+----------------------------------------------------------+
| ID | Keywords | Answer |
+----+----------------------------------+----------------------------------------------------------+
| 1 | opening, open, hour, hours, time | We are open from 9AM to 6PM every day, Monday to Sunday. |
+----+----------------------------------+----------------------------------------------------------+
In this case, let's assume user will ask following question:
What time are you open?
This will be matched against the Keywords, as the 'time' is used in question and is among keywords, the first answer will be returned.
I would prefer to avoid using like for every single word in sentence if possible.
I tried using contains, but this doesn't work well with whole sentence:
SELECT * FROM FAQ
WHERE CONTAINS(Keywords,'"What time are you open?"');
http://sqlfiddle.com/#!6/895e5/1
Any help would be appreciated.

I suggest normalize your table and try this one
--create temporary table and populate data
Declare #keywordTable as TAble (ID INT, Keyword varchar(100))
declare #AnswerTable as table (ID int, Answer nvarchar(max))
declare #question nvarchar(max) = 'What time are you open?'
SET #question = REPLACE(#question,'?','')
INSERT #keywordTable
values
(1,'opening'),
(1,'open'),
(1,'hours'),
(1,'hour'),
(1,'time'),
(2,'keyword2'),
(2,'second')
insert #AnswerTable
values (1, 'We are open from 9AM to 6PM every day, Monday to Sunday.'),
(2, 'second question')
display data table
SELECT * FROM #keywordTable
ID Keyword 1 opening 1 open 1 hours 1 hour
1 time 2 keyword2 2 second
SELECT * FROM #AnswerTable
ID Answer 1 We are open from 9AM to 6PM every day, Monday to
Sunday. 2 second question
use function to split the question by words
ALTER FUNCTION [MDM].[fn_SplitString]
(
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #output TABLE(splitdata NVARCHAR(MAX)
)
BEGIN
DECLARE #start INT, #end INT
SELECT #start = 1, #end = CHARINDEX(#delimiter, #string)
WHILE #start < LEN(#string) + 1 BEGIN
IF #end = 0
SET #end = LEN(#string) + 1
INSERT INTO #output (splitdata)
VALUES(SUBSTRING(#string, #start, #end - #start))
SET #start = #end + 1
SET #end = CHARINDEX(#delimiter, #string, #start)
END
RETURN
END
This is the result of split function
SELECT * FROM [MDM].[fn_SplitString](#question,' ')
splitdata What time are you open
Final Query
SELECT Answer from #AnswerTable where ID in (select ID FROM #keywordTable where keyword in (SELECT * FROM [MDM].[fn_SplitString](#question,' ')))
Final Result
Answer We are open from 9AM to 6PM every day, Monday to Sunday.

Well one way could be break down the comma separated keyword into different rows and match them with the sentence.
And you can use any standard splitter. See this link here. I've used XML based because most likely your input should not contain any valid character.
I've also added multiple matching to SQL. This way if more than one answer has same keyword, the code will pick up the one with most keywords.
See working demo
declare #question varchar(max)
set #question='What time are you open?'
create table t ( ID int,Keywords varchar(max), Answer varchar(max));
insert into t
values
( 1,'opening, open, hour, hours, time','We are open from 9AM to 6PM every day, Monday to Sunday.'),
(2,'open,weekends', 'No we don''t!')
SELECT TOP 1
Max(Answer) as Answer
FROM t outer apply (
SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(Keywords, ',', '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
)T1
WHERE #question like '%'+Item+'%'
group by Id
order by count(1) desc

Related

How to get all the two characters long substrings separated by dot (.) from an email address in SQL server. I want Scalar function

I have one email column that is having values like this 'claudio.passerini#uni.re.dit.mn.us'. I want to take two characters strings between dot (to check for the countries and states codes).
i want result like this
col1=re,mn,us
Solution
To do exactly what you've asked; i.e. pull back just the 2 char codes from within the email address's domain, you could use a function such as this:
create function dbo.fn_Get2AlphaCharCodesFromEmail
(
#email nvarchar(254) --max length of an email is 254: http://stackoverflow.com/questions/386294/what-is-the-maximum-length-of-a-valid-email-address
) returns nvarchar(254)
as
begin
declare #result nvarchar(254) = null
, #maxLen int = 254
;with cte(i, remainder,result) as
(
select cast(0 as int)
, cast('.' + substring(#email,charindex('#',#email)+1,#maxLen) + '.' as nvarchar(254))
, cast(null as nvarchar(254))
union all
select cast(i+1 as int)
, cast(substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+3,#maxLen)as nvarchar(254))
, cast(coalesce(result + ',','') + substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+1,2) as nvarchar(254))
from cte
where patindex('%.[A-Z][A-Z].%',remainder) > 0
)
select top 1 #result = result from cte order by i desc;
Return #result;
end
go
--demo
select dbo.fn_Get2AlphaCharCodesFromEmail ('claudio.passerini#uni.re.dit.mn.us')
--returns: re,mn,us
select dbo.fn_Get2AlphaCharCodesFromEmail ('claudio.passerini#uni.123.dit.mnx.usx')
--returns: NULL
Explanation
Create a function called fn_Get2AlphaCharCodesFromEmail in the schema dbo which takes a single parameter, #email which is a string of up to 254 characters, and returns a string of up to 254 characters.
create function dbo.fn_Get2AlphaCharCodesFromEmail
(
#email nvarchar(254)
) returns nvarchar(254)
as
begin
--... code that does the work goes here
end
declare the variables we'll be using later on.
#result holds the value we'll be returning from the function
#maxLen records the maximum length of an email; this makes it slightly easier should this length ever need to change; though not entirely simple since we have to specify the 254 length in our column & variable definitions later on anyway.
declare #result nvarchar(254) = null
, #maxLen int = 254
Now comes the interesting bit. We create a common table expression with 3 columns:
i is used to record which iteration each record was produced in; the highest value of i is the last record to be created.
remainder is used to hold the yet-to-be processed characters from the email.
result is used to record the 2 char codes; each new row adds another value to this column's comma separated values.
;with cte(i, remainder,result) as
(
--code to iterate through the email string, breaking it down, goes here
)
this gives us our first row in the cte "table".
The cast statements throughout this part are to ensure we have a consistent data type, as data types in a CTE are implicit, and not always correct
we initialise i (i.e. the first column) with value 0 to say that this is our first row (we could choose pretty much any value here; it doesn't matter
we initialise remainder (i.e. 2nd column) as the part of the email address which follows the # character; i.e. the email's domain.
we initialise result (i.e. 3rd column) as null; as we've not yet found a result (i.e. a 2 char string within the email's domain)
there is no from component as we're just getting data from the #email variable; no tables/views/etc are required.
select cast(0 as int)
, cast('.' + substring(#email,charindex('#',#email)+1,#maxLen) + '.' as nvarchar(254))
, cast(null as nvarchar(254))
union all is used to combing the first result(s) with the results of the next (recurring) statement. NB: The CTE code before this statement is run once to give initial values; the code after is run once for each new set of rows generated.
union all
The recurring code in the CTE is applied to new rows in the CTE until no new rows are generated.
i takes the value of the previous iteration's row's i incremented by 1.
select cast(i+1 as int)
remainder takes the previous iteration's remainder, and removes everything before (and including) the next 2 character code (result).
patindex('%.[A-Z][A-Z].%',remainder) returns a number giving the location of the a string containing a dot followed by 2 letters followed by a dot, occurring anywhere in the input string
, cast(substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+3,#maxLen)as nvarchar(254))
result uses the same logic as remainder, only it takes the 2 characters found, rather than everything after them. These characters are added on to the end of the previous iteartion's row's result value, separated by a comma.
, cast(coalesce(result + ',','') + substring(remainder,patindex('%.[A-Z][A-Z].%',remainder)+1,2) as nvarchar(254))
the from cte part just says that we're referencing the same "table" we're creating; i.e. this is how the recursion occurs
from cte
the where statement is used to prevent infinite recursion; i.e. once there are no more 2 char codes left in the remainder, stop looking.
where patindex('%.[A-Z][A-Z].%',remainder) > 0
Once we've found all the 2 char codes in the string, we know that the last row's result will contain the complete set; as such we assign this single row's value to the #result variable.
select top 1 #result = result
the from statement shows we're referencing the data we created in our with cte statement
from cte
the order by is used to determine which record comes first (i.e. which record is the top 1 record). We want it to be the last row generated by the CTE. Since we've been incrementing i by 1 each time, this last record will have the highest value of i, so by sorting by i desc (descending) that last generated row will be the row we get.
order by i desc;
Finally, we return the result generated above.
Return #result;
Alternative Approach
However, if you're trying to extract information from your emails, I'd recommend an alternate approach... have a list of values that you're looking for, and compare your email with that, without having to break apart the email address (beyond splitting on the # to ensure you're only checking the email's domain).
declare #countryCodes table (code nchar(2), name nvarchar(64)) --you'd use a real table for this; I'm just using a table variable so this demo's throwaway code
insert into #countryCodes (code, name)
values
('es','Spain')
,('fr','France')
,('uk','United Kingdom')
,('us','USA')
--etc.
--check a single mail
declare #mail nvarchar(256) = 'claudio.passerini#uni.re.dit.mn.us'
if exists (select top 1 1 from #countryCodes where '.' + substring(#mail,charindex('#',#mail)+1,256) + '.' like '%.' + code + '.%')
begin
select name from #countryCodes where '.' + substring(#mail,charindex('#',#mail)+1,256) + '.' like '%.' + code + '.%'
end
else
begin
select 'no results found'
end
--check a bunch of mails
declare #emailsToCheck table (email nvarchar(256))
insert into #emailsToCheck (email)
values
('claudio.passerini#uni.re.dit.mn.us')
,('someone#someplace.co.uk')
,('cant.see.me#never.never.land')
,('some.fr.address.hidden#france.not.in.this.bit')
select e.email, c.name
from #emailsToCheck e
left outer join #countryCodes c
on '.' + substring(email,charindex('#',email)+1,256) + '.' like '%.' + code + '.%'
order by e.email, c.name
If yo want individual columns you will need to pivot your data after splitting out your strings with a table valued function as per Marc's answer. If you are happy having them in rows, you can just use the select statement inside the brackets.
Query to get the data
declare #t table (Email nvarchar(50));
insert into #t values('claudio.passerini#uni.re.dit.mn.us'),('claudio.passerini#uni.ry.dit.mn.urg'),('claudio.passerini#uni.rn.dit.mn.uk');
select Email
,[1]
,[2]
,[3]
,[4]
,[5]
,[6]
from(
select t.Email
,s.Item
,row_number() over (partition by t.Email order by s.Item) as rn
from #t t
cross apply dbo.DelimitedSplit8K(t.Email,'.') s
where len(s.Item) = 2
) a
pivot
(
max(Item) for rn in([1],[2],[3],[4],[5],[6])
) pvt
Table valued function to split out the strings, courtesy of Jeff Moden
http://www.sqlservercentral.com/articles/Tally+Table/72993/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
You can create your own function to split strings.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnSplitString]
(
#string NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #output TABLE(splitdata NVARCHAR(MAX)
)
BEGIN
set #delimiter = coalesce(#delimiter, dbo.cSeparador());
DECLARE #start INT, #end INT
SELECT #start = 1, #end = CHARINDEX(#delimiter, #string)
WHILE #start < LEN(#string) + 1 BEGIN
IF #end = 0
SET #end = LEN(#string) + 1
INSERT INTO #output (splitdata)
VALUES(SUBSTRING(#string, #start, #end - #start))
SET #start = #end + 1
SET #end = CHARINDEX(#delimiter, #string, #start)
END
RETURN
END
Using this function you can get all your country&state codes :
select splitdata from dbo.fnSplitString('claudio.passerini#uni.re.dit.mn.us', '.')
where len(splitdata) = 2
You can modify that query to concatenate the result on a single string :
SELECT
STUFF((SELECT ',' + splitdata
FROM dbo.fnSplitString('claudio.passerini#uni.re.dit.mn.us', '.')
WHERE len(splitdata) = 2
FOR XML PATH('')), 1, 1, '')
Here is how you put it into an scalar function :
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnCountryCodes](#email nvarchar(max)) returns nvarchar(max)
AS
BEGIN
RETURN (SELECT
STUFF((SELECT ',' + splitdata
FROM dbo.fnSplitString(#email, '.')
WHERE len(splitdata) = 2
FOR XML PATH('')), 1, 1, ''));
END
You call it like this :
select dbo.fnCountryCodes('claudio.passerini#uni.re.dit.mn.us')
Alternatively you can create a table-valued function that returns all the 2 characters long substrings from the domain of a mail address :
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[fnCountryCodes] (#email NVARCHAR(MAX))
RETURNS #output TABLE(subdomain1 nvarchar(2), subdomain2 nvarchar(2), subdomain3 nvarchar(2), subdomain4 nvarchar(2), subdomain5 nvarchar(2))
as
BEGIN
DECLARE #subdomain1 nvarchar(2);
DECLARE #subdomain2 nvarchar(2);
DECLARE #subdomain3 nvarchar(2);
DECLARE #subdomain4 nvarchar(2);
DECLARE #subdomain5 nvarchar(2);
DECLARE CURSOR_SUBDOMAINS CURSOR FOR select splitdata from dbo.fnSplitString(#email, '.') where len(splitdata) = 2;
OPEN CURSOR_SUBDOMAINS;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO #subdomain1;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO #subdomain2;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO #subdomain3;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO #subdomain4;
FETCH NEXT FROM CURSOR_SUBDOMAINS INTO #subdomain5;
CLOSE CURSOR_SUBDOMAINS;
DEALLOCATE CURSOR_SUBDOMAINS;
INSERT INTO #output (subdomain1, subdomain2, subdomain3, subdomain4, subdomain5)
values (#subdomain1, #subdomain2, #subdomain3, #subdomain4, #subdomain5)
RETURN
END
You use it like that :
select * from dbo.fnCountryCodes('claudio.passerini#uni.re.dit.mn.us')

Parsing a string in SQL with If statement

I have a table with a string in some columns values that tells me if I should delete the row....however this string needs some parsing to understand whether to delete or not.
What is the string: it tells me the recurrence of meetings eg everyday starting 21st march for 10 meetings.
My table is a single column called recurrence:
Recurrence
-------------------------------
daily;1;21/03/2015;times;10
daily;1;01/02/2016;times;8
monthly;1;01/01/2016;times;2
weekly;1;21/01/2016;times;4
What to do: if the meetings are finished then remove the row.
The string is of the following format
<frequency tag>;<frequency number>;<start date>;times;<no of times>
For example
daily;1;21/03/2016;times;10
everyday starting 21st march, for 10 times
Does anybody know how I would calculate if the string indicates all meetings are in past? I want a select statement that tells me if the recurrence values are in past - true or false
I added one string ('weekly;1;21/05/2016;times;4') that definitely must not be deleted to show some output. At first try to add to temp table `#table1' all data from your table and check if all is deleted well.
DECLARE #table1 TABLE (
Recurrence nvarchar(max)
)
DECLARE #xml xml
INSERT INTO #table1 VALUES
('daily;1;21/03/2016;times;10'),
('daily;1;21/03/2015;times;10'),
('daily;1;01/02/2016;times;8'),
('monthly;1;01/01/2016;times;2'),
('weekly;1;21/01/2016;times;4'),
('weekly;1;21/05/2016;times;4')
SELECT #xml= (
SELECT CAST('<s><r>' + REPLACE(Recurrence,';','</r><r>') + '</r><r>'+ Recurrence+'</r></s>' as xml)
FROM #table1
FOR XML PATH ('')
)
;WITH cte as (
SELECT t.v.value('r[1]','nvarchar(10)') as how,
t.v.value('r[2]','nvarchar(10)') as every,
CONVERT(date,t.v.value('r[3]','nvarchar(10)'),103) as since,
t.v.value('r[4]','nvarchar(10)') as what,
t.v.value('r[5]','int') as howmany,
t.v.value('r[6]','nvarchar(max)') as Recurrence
FROM #xml.nodes('/s') as t(v)
)
DELETE t
FROM #table1 t
LEFT JOIN cte c ON c.Recurrence=t.Recurrence
WHERE
CASE WHEN how = 'daily' THEN DATEADD(day,howmany,since)
WHEN how = 'weekly' THEN DATEADD(week,howmany,since)
WHEN how = 'monthly' THEN DATEADD(month,howmany,since)
ELSE NULL END < GETDATE()
SELECT * FROM #table1
Output:
Recurrence
-----------------------------
weekly;1;21/05/2016;times;4
(1 row(s) affected)

How to get query result with stored procedure (convert item quantity from one table into my unit defined in second table)

I have two MSSQL2008 tables like this:
I have problem on the unit conversion logic.
The result I expect like this :
1589 cigar = 1ball, 5slop, 8box, 2pcs
52 pen = 2box, 12pcs
Basically I'm trying to take number (qty) from one table and to convert (split) him into the units which I defined in other table!
Note : Both table are allowed to add new row and new data (dinamic)
How can I get these results through a SQL stored procedure?
i totally misunderstand the question lest time so previous answer is removed (you can see it in edit but it's not relevant for this question)... However i come up with solution that may solve your problem...
NOTE: one little think about this solution, if you enter the value in second table like this
+--------+-------+
| Item | qty |
+--------+-------+
| 'cigar'| 596 |
+--------+-------+
result for this column will be
598cigar = 0ball, 5slop, 8box, 0pcs
note that there is a ball and pcs is there even if their value is 0, that probably can be fix if you don't want to show that value but I let you to play with it...
So let's back to solution and code. Solution have two stored procedures first one is the main and that one is the one you execute. I call it sp_MainProcedureConvertMe. Here is a code for that procedure:
CREATE PROCEDURE sp_MainProcedureConvertMe
AS
DECLARE #srcTable TABLE(srcId INT IDENTITY(1, 1), srcItem VARCHAR(50), srcQty INT)
DECLARE #xTable TABLE(xId INT IDENTITY(1, 1), xVal1 VARCHAR(1000), xVal2 VARCHAR(1000))
DECLARE #maxId INT
DECLARE #start INT = 1
DECLARE #sItem VARCHAR(50)
DECLARE #sQty INT
DECLARE #val1 VARCHAR(1000)
DECLARE #val2 VARCHAR(1000)
INSERT INTO #srcTable (srcItem, srcQty)
SELECT item, qty
FROM t2
SELECT #maxId = (SELECT MAX(srcId) FROM #srcTable)
WHILE #start <= #maxId
BEGIN
SELECT #sItem = (SELECT srcItem FROM #srcTable WHERE srcId = #start)
SELECT #sQty = (SELECT srcQty FROM #srcTable WHERE srcId = #start)
SELECT #val1 = (CAST(#sQty AS VARCHAR) + #sItem)
EXECUTE sp_ConvertMeIntoUnit #sItem, #sQty, #val2 OUTPUT
INSERT INTO #xTable (xVal1, xVal2)
VALUES (#val1, #val2)
SELECT #start = (#start + 1)
CONTINUE
END
SELECT xVal1 + ' = ' + xVal2 FROM #xTable
GO
This stored procedure have two variables as table #srcTable is basically your second table but instead of using id of your table it's create new srcId which goes from 1 to some number and it's auto_increment it's done because of while loop to avoid any problems when there is some deleted values etc. so we wanna be sure that there wont be any skipped number or something like that.
There is few more variables some of them is used to make while loop work other one is to store data. I think it's not hard to figure out from code what are they used for...
While loop iterate throughout all rows from #srcTable take values processing them and insert them into #xTable which basically hold result.
In while loop we execute second stored procedure which have a task to calculate how many unit of something is there in specific number of item. I call her sp_ConvertMeIntoUnit and here is a code for her:
CREATE PROCEDURE sp_ConvertMeIntoUnit
#inItemName VARCHAR(50),
#inQty INT,
#myResult VARCHAR(5000) OUT
AS
DECLARE #rTable TABLE(rId INT IDENTITY(1, 1), rUnit VARCHAR(50), rQty INT)
DECLARE #yTable TABLE(yId INT IDENTITY(1, 1), yVal INT, yRest INT)
DECLARE #maxId INT
DECLARE #start INT = 1
DECLARE #quentity INT = #inQty
DECLARE #divider INT
DECLARE #quant INT
DECLARE #rest INT
DECLARE #result VARCHAR(5000)
INSERT INTO #rTable(rUnit, rQty)
SELECT unit, qty
FROM t1
WHERE item = #inItemName
ORDER BY qty DESC
SELECT #maxId = (SELECT MAX(rId) FROM #rTable)
WHILE #start <= #maxId
BEGIN
SELECT #divider = (SELECT rQty FROM #rTable WHERE rId = #start)
SELECT #quant = (#quentity / #divider)
SELECT #rest = (#quentity % #divider)
INSERT INTO #yTable(yVal, yRest)
VALUES (#quant, #rest)
SELECT #quentity = #rest
SELECT #start = (#start + 1)
CONTINUE
END
SELECT #result = COALESCE(#result + ', ', '') + CAST(y.yVal AS VARCHAR) + r.rUnit FROM #rTable AS r INNER JOIN #yTable AS y ON r.rId = y.yId
SELECT #myResult = #result
GO
This procedure contain three parametars it's take two parameters from the first one and one is returned as result (OUTPUT). In parameters are Item and Quantity.
There are also two variables as table #rTable we stored values as #rId which is auto increment and always will go from 1 to some number no matter what is there Id's in the first table. Other two values are inserted there from the first table based on #inItemName parameter which is sanded from first procedure... From the your first table we use unit and quantity and stored them with rId into table #rTable ordered by Qty from biggest number to lowest. This is a part of code for that
INSERT INTO #rTable(rUnit, rQty)
SELECT unit, qty
FROM t1
WHERE item = #inItemName
ORDER BY qty DESC
Then we go into while loop where we do some maths. Basically we store into variable #divider values from #rTable. In the first iteration we take the biggest value calculate how many times it's contain into the number (second parameter we pass from first procedure is qty from the yours second table) and store it into #quant than we also calculate modulo and store it into variable #rest. This line
SELECT #rest = (#quentity % #divider)
After that we insert our values into #yTable. Before we and with iteration in while loop we assign #quentity variable value of #rest value because we need to work just with the remainder not with whole quantity any more. In second iteration we take next (the second greatest number in our #rTable) number and procedure repeat itself...
When while loop finish we create a string. This line here:
SELECT #result = COALESCE(#result + ', ', '') + CAST(y.yVal AS VARCHAR) + r.rUnit FROM #rTable AS r INNER JOIN #yTable AS y ON r.rId = y.yId
This is the line you want to change if you want to exclude result with 0 (i talk about them at the beginning of answer)...
And at the end we store result into output variable #myResult...
Result of this stored procedure will return string like this:
+--------------------------+
| 1ball, 5slop, 8box, 2pcs |
+--------------------------+
Hope I didn't miss anything important. Basically only think you should change here is the name of the table and their columns (if they are different) in first stored procedure instead t2 here
INSERT INTO...
SELECT item, qty
FROM t2
And in second one instead of t1 (and column if needed) here..
INSERT INTO...
SELECT unit, qty
FROM t1
WHERE item = #inItemName
ORDER BY qty DESC
Hope i help a little or give you an idea how this can be solved...
GL!
You seem to want string aggregation – something that does not have a simple instruction in Transact-SQL and is usually implemented using a correlated FOR XML subquery.
You have not provided names for your tables. For the purpose of the following example, the first table is called ItemDetails and the second one, Items:
SELECT
i.item,
i.qty,
details = (
SELECT
', ' + CAST(d.qty AS varchar(10)) + d.unit
FROM
dbo.ItemDetails AS d
WHERE
d.item = i.item
FOR XML
PATH (''), TYPE
).value('substring(./text()[1], 3)', 'nvarchar(max)')
FROM
dbo.Items AS i
;
For the input provided in the question, the above query would return the following output:
item qty details
----- ----------- ------------------------------
cigar 1598 1pcs, 1000ball, 12box, 100slop
pen 52 1pcs, 20box
You can further arrange the data into strings as per your requirement. I would recommend you do it in the calling application and use SQL only as your data source. However, if you must, you can do the concatenation in SQL as well.
Note that the above query assumes that the same unit does not appear more than once per item in ItemDetails. If it does and you want to aggregate qty values per unit before producing the detail line, you will need to change the query a little:
SELECT
i.item,
i.qty,
details = (
SELECT
', ' + CAST(SUM(d.qty) AS varchar(10)) + d.unit
FROM
dbo.ItemDetails AS d
WHERE
d.item = i.item
GROUP BY
d.unit
FOR XML
PATH (''), TYPE
).value('substring(./text()[1], 3)', 'nvarchar(max)')
FROM
dbo.Items AS i
;

Remove a sentence from a paragraph that has a specific pattern with T-SQL

I have a large number of descriptions that can be anywhere from 5 to 20 sentences each. I am trying to put a script together that will locate and remove a sentence that contains a word with numbers before or after it.
before example: Hello world. Todays department has 345 employees. Have a good day.
after example: Hello world. Have a good day.
My main problem right now is identifying the violation.
Here "345 employees" is what causes the sentence to be removed. However, each description will have a different number and possibly a different variation of the word employee.
I would like to avoid having to create a table of all the different variations of employee.
JTB
This would make a good SQL Puzzle.
Disclaimer: there are probably TONS of edge cases that would blow this up
This would take a string, split it out into a table with a row for each sentence, then remove the rows that matched a condition, and then finally join them all back into a string.
CREATE FUNCTION dbo.fn_SplitRemoveJoin(#Val VARCHAR(2000), #FilterCond VARCHAR(100))
RETURNS VARCHAR(2000)
AS
BEGIN
DECLARE #tbl TABLE (rid INT IDENTITY(1,1), val VARCHAR(2000))
DECLARE #t VARCHAR(2000)
-- Split into table #tbl
WHILE CHARINDEX('.',#Val) > 0
BEGIN
SET #t = LEFT(#Val, CHARINDEX('.', #Val))
INSERT #tbl (val) VALUES (#t)
SET #Val = RIGHT(#Val, LEN(#Val) - LEN(#t))
END
IF (LEN(#Val) > 0)
INSERT #tbl VALUES (#Val)
-- Filter out condition
DELETE FROM #tbl WHERE val LIKE #FilterCond
-- Join back into 1 string
DECLARE #i INT, #rv VARCHAR(2000)
SET #i = 1
WHILE #i <= (SELECT MAX(rid) FROM #tbl)
BEGIN
SELECT #rv = IsNull(#rv,'') + IsNull(val,'') FROM #tbl WHERE rid = #i
SET #i = #i + 1
END
RETURN #rv
END
go
CREATE TABLE #TMP (rid INT IDENTITY(1,1), sentence VARCHAR(2000))
INSERT #tmp (sentence) VALUES ('Hello world. Todays department has 345 employees. Have a good day.')
INSERT #tmp (sentence) VALUES ('Hello world. Todays department has 15 emps. Have a good day. Oh and by the way there are 12 employees somewhere else')
SELECT
rid, sentence, dbo.fn_SplitRemoveJoin(sentence, '%[0-9] Emp%')
FROM #tmp t
returns
rid | sentence | |
1 | Hello world. Todays department has 345 employees. Have a good day. | Hello world. Have a good day.|
2 | Hello world. Todays department has 15 emps. Have a good day. Oh and by the way there are 12 employees somewhere else | Hello world. Have a good day. |
I've used the split/remove/join technique as well.
The main points are:
This uses a pair of recursive CTEs, rather than a UDF.
This will work with all English sentence endings: . or ! or ?
This removes whitespace to make the comparison for "digit then employee" so you don't have to worry about multiple spaces and such.
Here's the SqlFiddle demo, and the code:
-- Split descriptions into sentences (could use period, exclamation point, or question mark)
-- Delete any sentences that, without whitespace, are like '%[0-9]employ%'
-- Join sentences back into descriptions
;with Splitter as (
select ID
, ltrim(rtrim(Data)) as Data
, cast(null as varchar(max)) as Sentence
, 0 as SentenceNumber
from Descriptions -- Your table here
union all
select ID
, case when Data like '%[.!?]%' then right(Data, len(Data) - patindex('%[.!?]%', Data)) else null end
, case when Data like '%[.!?]%' then left(Data, patindex('%[.!?]%', Data)) else Data end
, SentenceNumber + 1
from Splitter
where Data is not null
), Joiner as (
select ID
, cast('' as varchar(max)) as Data
, 0 as SentenceNumber
from Splitter
group by ID
union all
select j.ID
, j.Data +
-- Don't want "digit+employ" sentences, remove whitespace to search
case when replace(replace(replace(replace(s.Sentence, char(9), ''), char(10), ''), char(13), ''), char(32), '') like '%[0-9]employ%' then '' else s.Sentence end
, s.SentenceNumber
from Joiner j
join Splitter s on j.ID = s.ID and s.SentenceNumber = j.SentenceNumber + 1
)
-- Final Select
select a.ID, a.Data
from Joiner a
join (
-- Only get max SentenceNumber
select ID, max(SentenceNumber) as SentenceNumber
from Joiner
group by ID
) b on a.ID = b.ID and a.SentenceNumber = b.SentenceNumber
order by a.ID, a.SentenceNumber
One way to do this. Please note that it only works if you have one number in all sentences.
declare #d VARCHAR(1000) = 'Hello world. Todays department has 345 employees. Have a good day.'
declare #dr VARCHAR(1000)
set #dr = REVERSE(#d)
SELECT REVERSE(RIGHT(#dr,LEN(#dr) - CHARINDEX('.',#dr,PATINDEX('%[0-9]%',#dr))))
+ RIGHT(#d,LEN(#d) - CHARINDEX('.',#d,PATINDEX('%[0-9]%',#d)) + 1)

dynamic interval creation in SQL

I have the following problem, that I would like to solve with transact-sql.
I have something like this
Start | End | Item
1 | 5 | A
3 | 8 | B
and I want to create something like
Start | End | Item-Combination
1 | 2 | A
3 | 5 | A-B
6 | 8 | B
For the Item-Combination concatenation I already thought of using the FOR XML statement. But in order to create the different new intervals... I really don't know how to approach it. Any idea?
Thanks.
I had a very similar problem with some computer usage data. I had session data indicating login/logout times. I wanted to find the times (hour of day per day of week) that were the most in demand, that is, the hours where the most users were logged in. I ended up solving the problem client-side using hash tables. For each session, I would increment the bucket for a particular location corresponding to the day of week and hour of day for each day/hour for which the session was active. After examining all sessions the hash table values show the number of logins during each hour for each day of the week.
I think you could do something similar, keeping track of each item seen for each start/end value. You could then reconstruct the table by collapsing adjacent entries that have the same item combination.
And, no, I could not think of a way to solve my problem with SQL either.
This is a fairly typical range-finding problem, with the concatenation thrown in. Not sure if the following fits exactly, but it's a starting point. (Cursors are usually best avoided except in the small set of cases where they are faster than set-based solutions, so before the cursor haters get on me please note I use a cursor here on purpose because this smells to me like a cursor-friendly problem -- I typically avoid them.)
So if I create data like this:
CREATE TABLE [dbo].[sourceValues](
[Start] [int] NOT NULL,
[End] [int] NOT NULL,
[Item] [varchar](100) NOT NULL
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[sourceValues] WITH CHECK ADD CONSTRAINT [End_after_Start] CHECK (([End]>[Start]))
GO
ALTER TABLE [dbo].[sourceValues] CHECK CONSTRAINT [End_after_Start]
GO
declare #i int; set #i = 0;
declare #start int;
declare #end int;
declare #item varchar(100);
while #i < 1000
begin
set #start = ABS( CHECKSUM( newid () ) % 100 ) + 1 ; -- "random" int
set #end = #start + ( ABS( CHECKSUM( newid () ) % 10 ) ) + 2; -- bigger random int
set #item = char( ( ABS( CHECKSUM( newid() ) ) % 5 ) + 65 ); -- random letter A-E
print #start; print #end; print #item;
insert into sourceValues( Start, [End], Item) values ( #start , #end, #item );
set #i += 1;
end
Then I can treat the problem like this: each "Start" AND each "End" value represents a change in the collection of current Items, either adding one or removing one, at a certain time. In the code below I alias that notion as "event," meaning an Add or Remove. Each start or end is like a time, so I use the term "tick." If I make a collection of all the events, ordered by event time (Start AND End), I can iterate through it while keeping a running tally in an in-memory table of all the Items that are in play. Each time the tick value changes, I take a snapshot of that tally:
declare #tick int;
declare #lastTick int;
declare #event varchar(100);
declare #item varchar(100);
declare #concatList varchar(max);
declare #currentItemsList table ( Item varchar(100) );
create table #result ( Start int, [End] int, Items varchar(max) );
declare eventsCursor CURSOR FAST_FORWARD for
select tick, [event], item from (
select start as tick, 'Add' as [event], item from sourceValues as adds
union all
select [end] as tick, 'Remove' as [event], item from sourceValues as removes
) as [events]
order by tick
set #lastTick = 1
open eventsCursor
fetch next from eventsCursor into #tick, #event, #item
while ##FETCH_STATUS = 0
BEGIN
if #tick != #lastTick
begin
set #concatList = ''
select #concatList = #concatlist + case when len( #concatlist ) > 0 then '-' else '' end + Item
from #currentItemsList
insert into #result ( Start, [End], Items ) values ( #lastTick, #tick, #concatList )
end
if #event = 'Add' insert into #currentItemsList ( Item ) values ( #item );
else if #event = 'Remove' delete top ( 1 ) from #currentItemsList where Item = #item;
set #lastTick = #tick;
fetch next from eventsCursor into #tick, #event, #item;
END
close eventsCursor
deallocate eventsCursor
select * from #result order by start
drop table #result
Using a cursor for this special case allows just one "pass" through the data, like a running totals problem. Itzik Ben-Gan has some great examples of this in his SQL 2005 books.
Thanks a lot for all the answers, for the moment I have found a way of doing it. SInce I'm dealing with a datawarehouse, and I have a Time dimension, I could do some joins with Time dimension in the style"inner join DimTime t on t.date between f.start_date and end_date".
It's not very good from the performance point of view, but it seems it's working for me.
I'll give a try to onupdatecascade implementation, to see which suits better for me.
This will exactly emulates and solves the mentioned problem:
-- prepare problem, it can have many rows with overlapping ranges
declare #range table
(
Item char(1) primary key,
[Start] int,
[End] int
)
insert #range select 'A', 1, 5
insert #range select 'B', 3, 8
-- unroll the ranges into helper table
declare #usage table
(
Item char(1),
Number int
)
declare
#Start int,
#End int,
#Item char(1)
declare table_cur cursor local forward_only read_only for
select [Start], [End], Item from #range
open table_cur
fetch next from table_cur into #Start, #End, #Item
while ##fetch_status = 0
begin
with
Num(Pos) as -- generate numbers used
(
select cast(#Start as int)
union all
select cast(Pos + 1 as int) from Num where Pos < #End
)
insert
#usage
select
#Item,
Pos
from
Num
option (maxrecursion 0) -- just in case more than 100
fetch next from table_cur into #Start, #End, #Item
end
close table_cur
deallocate table_cur
-- compile overlaps
;
with
overlaps as
(
select
Number,
(
select
Item + '-'
from
#usage as i
where
o.Number = i.Number
for xml path('')
)
as Items
from
#usage as o
group by
Number
)
select
min(Number) as [Start],
max(Number) as [End],
left(Items, len(Items) - 1) as Items -- beautify
from
overlaps
group by
Items