initialize and increment variable inside cte query sqlserver 2008 - sql

I am using sqlserver 2008 ,I want to initialize and increment variable (#NUMTwo) both at the same time, in my second part(Problem Line).
I am creating a cte query.
Is this possible , if yes then please let me know.
following is a sample example.I hope i am clear.
CREATE table #TempTable
(
childProductID INT,parentProductID INT,productModel varchar(50),[Num2] VARCHAR(100)
)
DECLARE #NUMTwo INT = 0
WITH tableR AS
(
-- First Part
SELECT childProductID = null,parentProductID=null,productModel from Products where productid in (#a),[Num2] = convert(varchar(100), '')
UNION ALL
--Second Part
SELECT e.childProductID,e.parentProductID,prd.productModel FROM ProductIncludes AS e
,[Num2] = convert(varchar(100),'1.' + #NUMTwo+=1 ) -- Problem line
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)
INSERT INTO #TempTable(childProductID,parentProductID,productModel,[Num2])
SELECT childProductID,parentProductID,productModel,[Num2]
END
SELECT * FROM #TempTable

You need to "Initialize" a column in the acnhor part of the query, and then "oncrement" this column in the recursive parts.
Something like
DECLARE #NUMTwo INT = 0
;WITH Test AS (
SELECT [Num2] = convert(varchar(MAX), ''),
#NUMTwo [N]
UNION ALL
SELECT [Num2] = '1.' + convert(varchar(MAX),[N]+1),
[N]+1
FROM TEst
WHERE [N] < 10
)
SELECT *
FROM Test
SQL Fiddle DEMO

If the parameter #NUMTwo is just for numbering rows you can use the ROW_NUMBER() OVER(...) instead of it like so:
WITH tableR AS
(
SELECT childProductID = NULL, parentProductID = NULL,
productModel, NUMTwo = CAST('0' AS VARCHAR(10))
FROM Products
WHERE
productid in (#a),
[Num2] = convert(varchar(100), '')
UNION ALL
SELECT e.childProductID, e.parentProductID,
prd.productModel,
NUMTwo = '1.' +
CAST( ROW_NUMBER() OVER(ORDER BY (SELECT 0)) AS VARCHAR(10))
FROM ProductIncludes AS e
INNER JOIN Products AS PRD ON e.childProductID = PRD.productID
WHERE parentProductID in (#a)
)

Related

How to complete and fill in gaps between dates in SQL?

I have data in Redshift that I'm aggregating to the Year-Quarter level i.e. number of items by Year-Quarter
I need to show a continuous trend and hence I need to fill-in the gaps in Year-Quarter. The picture below should give a clearer idea of my current data and desired output.
How can I achieve this in Redshift SQL?
A query like this should do the trick:
create table test (yq int, items int);
INSERT INTO test Values (20201,10),(20204, 15),(20213, 25),(20222, 30);
with recursive quarters(q) as (
select min(yq) as q
from test
union all
select decode(right(q::text, 1), 4, q + 7, q + 1) as q
from quarters
where q < (select max(yq) from test)
)
select q as yq, decode(items is null, true,
lag(items ignore nulls) over (order by q), items) as items
from test t
right join quarters q
on t.yq = q.q
order by q;
It uses a recursive CTE to generate the quarters range needed, right joins this with the source data, and then uses a LAG() window function to populate the items if the value is NULL.
This is known as forward filling values:
CREATE TABLE #Temp
(
[YQ] nvarchar(5),
[items] int
)
INSERT INTO #Temp Values ('20201',10),('20204', 15),('20213', 25),('20222', 30)
---------------------------------------------------------------------------------
DECLARE #start int, #end int, #starty int, #endy int
SELECT #start=1, #end=4
SELECT #starty=MIN(Substring(YQ,0,5)), #endy=MIN(Substring(YQ,0,5)) from #Temp
;With cte1(y) as
(
Select #starty as y
union all
Select y + 1
from cte1
where y <= #endy + 1
)
, cte2(n) as
(
Select #start as n
union all
Select n + 1
from cte2
where n < #end
)
SELECT t1.YQ AS 'Year-Quarter',
CASE WHEN t2.items is null then (SELECT TOP 1 MAX(items) from #Temp WHERE items is not null and YQ < t1.YQ) ELSE t2.items END AS '# Items'
FROM
(
SELECT CAST(cte1.y AS nvarchar(4)) + CAST(cte2.n AS nvarchar(1)) AS YQ
FROM cte1, cte2
) t1
LEFT JOIN #Temp t2 ON t2.YQ = t1.YQ
WHERE t1.YQ <= (SELECT MAX(YQ) FROM #Temp)
ORDER BY t1.YQ, t2.items

SQL String join to table

Given few strings as
SET #Codes1 = 3,4
SET #Codes2 = 1
SET #Codes3 = --empty
Table -- TblCode
Id Code
1 A
2 B
3 C
4 D
How to convert the #Codes1, #Codes2, #Codes3 with join to the table TblCode so it returns the following output :
1. #Codes1 = CD
2. #Codes2 = A
3. #Codes3 = --empty
Note that the concatenation for the output is without the comma.
PS - This is a small example to a much larger and complex data set. Kindly ignore any wrongful design pattern here.
You can try this. I added the answer just for #Codes1, but it works with #Codes2 and #Codes3 too.
DECLARE #TblCode TABLE (Id INT, Code VARCHAR(2))
INSERT INTO #TblCode
VALUES(1, 'A'),
(2,'B'),
(3,'C'),
(4,'D')
DECLARE #Codes1 VARCHAR(10) = '3,4'
DECLARE #Codes2 VARCHAR(10) = '1'
DECLARE #Codes3 VARCHAR(10) = NULL
DECLARE #CodesOut VARCHAR(10) = ''
;WITH CTE_1 AS (
SELECT CODE= #Codes1 + ','
)
, CTE_2 AS -- It silit text to rows
(
SELECT RIGHT(CTE_1.CODE, LEN(CTE_1.CODE) - CHARINDEX(',',CTE_1.CODE)) CODE , SUBSTRING(CTE_1.CODE, 0, CHARINDEX(',',CTE_1.CODE)) ID, CHARINDEX(',',CTE_1.CODE) AS CI
FROM CTE_1
UNION ALL
SELECT RIGHT(CTE_2.CODE, LEN(CTE_2.CODE) - CHARINDEX(',',CTE_2.CODE)) CODE , SUBSTRING(CTE_2.CODE, 0, CHARINDEX(',',CTE_2.CODE)) ID, CHARINDEX(',',CTE_2.CODE) AS CI
FROM CTE_2 WHERE LEN(CTE_2.CODE) > 0
)
SELECT #CodesOut = #CodesOut + C.Code FROM CTE_2 INNER JOIN #TblCode C ON CTE_2.ID = C.Id
SELECT #CodesOut
Result:
CD
You can use a recursive CTE. Here is one method:
with c as (
select c.*, row_number() over (partition by id) as seqnum
from c
),
cte as (
select cast(#codes as varchar(max)) as str,
replace(#codes, id, code) as newstr,
1 as lev
from c
where seqnum = 1
union all
select str, replace(newstr, id, code), lev + 1
from cte join
c
on c.seqnum = cte.lev + 1
)
select top (1) newstr
from cte
order by lev desc;
If there is an error in the syntax, set up a SQL Fiddle or Rextester or something similar so it can be fixed.

SQL split and merge string

this is my problem:
I got a string from a column like this:
**0756FJ89045GJD38**.pdf
Now i have to generate a path by this string:
/home/ars/07/56/FJ/89/04/5G/JD/38/0756FJ89045GJD38.pdf
I have to take two characters and build it up to one path level from left to right.
Maybe u can help me, thanks!
This may help:
DECLARE #p nvarchar(100) = '**0756FJ89045GJD38**.pdf',
#n int = 3
;WITH cte AS (
SELECT STUFF(REPLACE(SUBSTRING(#p,1,CHARINDEX('.',#p)-1),'*',''),1,0,'/') as p, 1 [level]
UNION ALL
SELECT STUFF(p,[level]+#n,0,'/'), [level]+#n
FROM CTE
WHERE LEN(STUFF(p,[level]+#n,0,'/')) >= [level]+#n
)
SELECT TOP 1 #p = '/home/ars'+p +'/'+REPLACE(#p,'*','')
FROM cte
ORDER BY [level] DESC
SELECT #p
Output:
/home/ars/07/56/FJ/89/04/5G/JD/38/0756FJ89045GJD38.pdf
EDIT:
If there is a table with PDF file names and all names are equal size, than you can do this way:
DECLARE #n int = 3
;WITH pdf AS (
SELECT *
FROM (VALUES
('**0756FJ89045GJD38**.pdf'),
('**1729DA8CD189700A**.pdf'),
('**A6710936BCD47832**.pdf'),
('**00A764D617B93978**.pdf')
) as t(file_)
)
,cte AS (
SELECT file_, STUFF(REPLACE(SUBSTRING(file_,1,CHARINDEX('.',file_)-1),'*',''),1,0,'/') as p, 1 [level]
FROM pdf
UNION ALL
SELECT file_, STUFF(p,[level]+#n,0,'/'), [level]+#n
FROM CTE
WHERE LEN(STUFF(p,[level]+#n,0,'/')) >= [level]+#n
)
SELECT TOP 1 WITH TIES '/home/ars'+p +'/' + REPLACE(c.file_,'**','')
FROM cte c
ORDER BY ROW_NUMBER() OVER (PARTITION BY file_ ORDER BY [level]) DESC
Output:
/home/ars/00/A7/64/D6/17/B9/39/78/00A764D617B93978.pdf
/home/ars/A6/71/09/36/BC/D4/78/32/A6710936BCD47832.pdf
/home/ars/17/29/DA/8C/D1/89/70/0A/1729DA8CD189700A.pdf
/home/ars/07/56/FJ/89/04/5G/JD/38/0756FJ89045GJD38.pdf

SQL replace from list

I'm trying to figure our how I can replace a string using data from another table
I have a table that looks like this:
Id Translation
1 Peter
2 Sandra
3 Olga
Now I want to select all and replace the translations using a list that looks like this:
Original New
e #
r ?
lg *%
So that the select list looks like this:
Id Translation
1 P#t#?
2 Sand?a
3 O*%a
So, for each translation, I need to have a REPLACE(Translation,Original,New).
Or in other words: I need to go through every "Translation" in my first list and make another loop in my replacement table to see what to replace
Bare in mind that the first list has 25'000 rows and the second has 50'000, so I can't just type it by hand :)
EDIT
Just to clarify:
The Original and New from my look up table can be both letters and words so the table can looks like this:
Original New
one two
three fifty
sun moon
To do this in one query, you need to use a recursive CTE. Something like:
with trans as (
select t.original, t.new, row_number() over (order by t.original) as seqnum,
count(*) over () as cnt
from translations
),
t as (
select tt.id, tt.string, replace(tt.string, trans.original, trans.new) as replaced,
seqnum + 1 as seqnum, cnt
from totranslate tt join
trans
on trans.id = 1
union all
select t.id, t.string, replace(t.string, trans.original, trans.new),
seqnum + 1 as seqnum, cnt
from t join
trans
on t.seqnum = trans.id
where t.seqnum <= t.cnt
)
select t.id, t.string, t.replaced
from t
where seqnum = cnt;
You can use a UDF:
CREATE FUNCTION [dbo].[Translate]
(
-- Add the parameters for the function here
#Str nvarchar(max)
)
RETURNS nvarchar(max)
AS
BEGIN
DECLARE #Result nvarchar(max) = #Str;
SELECT #Result = replace(#Result,Original,New) from dbo.Mappings order BY Pos;
RETURN #Result;
END
Here I assumed the table containing translations is called dbo.Mappings and beside the Original and New columns you need another column Pos int which will be used to determine the order in which the translations are applied (to address the problems mentioned by #Thorsten Kettner in comments)
Also with recursive cte:
DECLARE #translations TABLE
(
Id INT ,
Translation NVARCHAR(20)
)
INSERT INTO #translations
VALUES ( 1, 'Peter' ),
( 2, 'Sandra' ),
( 3, 'Olga' )
DECLARE #replacements TABLE
(
Original VARCHAR(2) ,
New VARCHAR(2)
)
INSERT INTO #replacements
VALUES ( 'e', '#' ),
( 'r', '?' ),
( 'lg', '*%' );
WITH cte1 AS (SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY (SELECT 1)) rn
FROM #translations CROSS JOIN #replacements),
cte2 AS (SELECT Id, rn, REPLACE(Translation, Original, New) AS NTranslation
FROM cte1
WHERE rn = 1
UNION ALL
SELECT c2.Id, c2.rn + 1, REPLACE(c2.NTranslation, c1.Original, c1.New)
FROM cte1 c1
JOIN cte2 c2 ON c2.Id = c1.Id AND c2.rn + 1 = c1.rn)
SELECT * FROM cte2
WHERE rn = (SELECT COUNT(*) FROM #replacements)
ORDER BY Id
EDIT:
WITH cte1 AS (SELECT t.*, p.Id AS Old, p.Code, ROW_NUMBER() OVER (PARTITION BY t.id ORDER BY (SELECT 1)) rn
FROM translations t CROSS JOIN Property p),
cte2 AS (SELECT Id, rn, REPLACE(Trans, Old, Code) AS NTranslation
FROM cte1
WHERE rn = 1
UNION ALL
SELECT c2.Id, c2.rn + 1, REPLACE(c2.NTranslation, c1.Old, c1.Code)
FROM cte1 c1
JOIN cte2 c2 ON c2.Id = c1.Id AND c2.rn + 1 = c1.rn)
SELECT * FROM cte2
WHERE rn = (SELECT COUNT(*) FROM Property)
ORDER BY Id
Here is something I worked out that will allow you to replace multiple characters with one specified string.
[Split2] is stolen from https://blogs.msdn.microsoft.com/amitjet/2009/12/11/convert-comma-separated-string-to-table-4-different-approaches/
USE <Your Database>
GO
CREATE FUNCTION [dbo].[Split2]
(
#strString varchar(4000)
)
RETURNS #Result TABLE
(
RID INT IDENTITY(0,1) Primary Key
,Value varchar(4000)
)
AS
BEGIN
WITH StrCTE(start, stop) AS
(
SELECT 1, CHARINDEX(',' , #strString )
UNION ALL
SELECT stop + 1, CHARINDEX(',' ,#strString , stop + 1)
FROM StrCTE
WHERE stop > 0
)
INSERT INTO #Result
SELECT SUBSTRING(#strString , start, CASE WHEN stop > 0 THEN stop - start ELSE 4000 END) AS stringValue
FROM StrCTE
RETURN
END
GO
USE <Your Database>
GO
CREATE FUNCTION [dbo].[MultiReplace]
(
#MyString varchar(MAX)
,#RepChars varchar(4000)
,#NewChars varchar(4000)
)
RETURNS varchar(MAX)
AS
BEGIN
DECLARE #CurRow int = 0
DECLARE #MaxRow int
SELECT #MaxRow = MAX(RID)
FROM dbo.split2 ( #RepChars )
WHILE #CurRow <= #MaxRow
BEGIN
SELECT #MyString = REPLACE(#MyString,VALUE,#NewChars)
FROM dbo.split2 ( #RepChars )
WHERE RID = #CurRow
SET #CurRow = #CurRow + 1
END
RETURN (#MyString);
END
GO
In this example I replace each character with no space
SELECT [dbo].[MultiReplace]('6th month 2016-06 (test / requested)',',1st,2nd,3rd,4th,5th,6th,0,1,2,3,4,5,6,7,8,9,(,),/,-,+, ','')
Result:
monthtestrequested
I hope this is useful for you.

Join [one word per row] to rows of phrases with [multiple words per row]

Please excuse the length of the question. I included a test script to demo the situation and my best attempt at a solution.
There are two tables:
test_WORDS = Words extracted in order from several sources. The OBJ_FK column is the ID of the source. WORD_ID is an identifier for the word itself that is unique within the source. Each row contains one word.
test_PHRASE = a list of phrases to be searched for in test_WORDS. The PHRASE_TEXT column is a space separated phrase like 'foo bar' (see below) so that each row contains multiple words.
Requirement:
Return the first word from test_WORDS that is the start of a matching a phrase from test_PHRASE.
I would prefer something set based to avoid RBAR approach below. Also my solution is limited to 5 word phrases. I need to support up to 20 word phrases. Is it possible to match the words from a row in test_PHRASE to contiguous rows in the test_WORD without cursors?
After breaking the phrase words out into a temporary table, the problem boils down to matching portions of two sets together in row order.
-- Create test data
CREATE TABLE [dbo].[test_WORDS](
[OBJ_FK] [bigint] NOT NULL, --FK to the source object
[WORD_ID] [int] NOT NULL, --The word order in the source object
[WORD_TEXT] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_test_WORDS] PRIMARY KEY CLUSTERED
(
[OBJ_FK] ASC,
[WORD_ID] ASC
)
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[test_PHRASE](
[ID] [int], --PHRASE ID
[PHRASE_TEXT] [nvarchar](150) NOT NULL --Space-separated phrase
CONSTRAINT [PK_test_PHRASE] PRIMARY KEY CLUSTERED
(
[ID] ASC
)
)
GO
INSERT INTO dbo.test_WORDS
SELECT 1,1,'aaa' UNION ALL
SELECT 1,2,'bbb' UNION ALL
SELECT 1,3,'ccc' UNION ALL
SELECT 1,4,'ddd' UNION ALL
SELECT 1,5,'eee' UNION ALL
SELECT 1,6,'fff' UNION ALL
SELECT 1,7,'ggg' UNION ALL
SELECT 1,8,'hhh' UNION ALL
SELECT 2,1,'zzz' UNION ALL
SELECT 2,2,'yyy' UNION ALL
SELECT 2,3,'xxx' UNION ALL
SELECT 2,4,'www'
INSERT INTO dbo.test_PHRASE
SELECT 1, 'bbb ccc ddd' UNION ALL --should match
SELECT 2, 'ddd eee fff' UNION ALL --should match
SELECT 3, 'xxx xxx xxx' UNION ALL --should NOT match
SELECT 4, 'zzz yyy xxx' UNION ALL --should match
SELECT 5, 'xxx www ppp' UNION ALL --should NOT match
SELECT 6, 'zzz yyy xxx www' --should match
-- Create variables
DECLARE #maxRow AS INTEGER
DECLARE #currentRow AS INTEGER
DECLARE #phraseSubsetTable AS TABLE(
[ROW] int IDENTITY(1,1) NOT NULL,
[ID] int NOT NULL, --PHRASE ID
[PHRASE_TEXT] nvarchar(150) NOT NULL
)
--used to split the phrase into words
--note: No permissions to sys.dm_fts_parser
DECLARE #WordList table
(
ID int,
WORD nvarchar(50)
)
--Records to be returned to caller
DECLARE #returnTable AS TABLE(
OBJECT_FK INT NOT NULL,
WORD_ID INT NOT NULL,
PHRASE_ID INT NOT NULL
)
DECLARE #phrase AS NVARCHAR(150)
DECLARE #phraseID AS INTEGER
-- Get subset of phrases to simulate a join that would occur in production
INSERT INTO #phraseSubsetTable
SELECT ID, PHRASE_TEXT
FROM dbo.test_PHRASE
--represent subset of phrases caused by join in production
WHERE ID IN (2,3,4)
-- Loop each phrase in the subset, split into rows of words and return matches to the test_WORDS table
SET #maxRow = ##ROWCOUNT
SET #currentRow = 1
WHILE #currentRow <= #maxRow
BEGIN
SELECT #phrase=PHRASE_TEXT, #phraseID=ID FROM #phraseSubsetTable WHERE row = #currentRow
--clear previous phrase that was split into rows
DELETE FROM #WordList
--Recursive Function with CTE to create recordset of words, one per row
;WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(' ', #phrase)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(' ', #phrase, stop + 1)
FROM Pieces
WHERE stop > 0)
--Create the List of words with the CTE above
insert into #WordList
SELECT pn,
SUBSTRING(#phrase, start, CASE WHEN stop > 0 THEN stop-start ELSE 1056 END) AS WORD
FROM Pieces
DECLARE #wordCt as int
select #wordCt=count(ID) from #WordList;
-- Do the actual query using a CTE with a rownumber that repeats for every SOURCE OBJECT
;WITH WordOrder_CTE AS (
SELECT OBJ_FK, WORD_ID, WORD_TEXT,
ROW_NUMBER() OVER (Partition BY OBJ_FK ORDER BY WORD_ID) AS rownum
FROM test_WORDS)
--CREATE a flattened record of the first word in the phrase and join it to the rest of the words.
INSERT INTO #returnTable
SELECT r1.OBJ_FK, r1.WORD_ID, #phraseID AS PHRASE_ID
FROM WordOrder_CTE r1
INNER JOIN #WordList w1 ON r1.WORD_TEXT = w1.WORD and w1.ID=1
LEFT JOIN WordOrder_CTE r2
ON r1.rownum = r2.rownum - 1 and r1.OBJ_FK = r2.OBJ_FK
LEFT JOIN #WordList w2 ON r2.WORD_TEXT = w2.WORD and w2.ID=2
LEFT JOIN WordOrder_CTE r3
ON r1.rownum = r3.rownum - 2 and r1.OBJ_FK = r3.OBJ_FK
LEFT JOIN #WordList w3 ON r3.WORD_TEXT = w3.WORD and w3.ID=3
LEFT JOIN WordOrder_CTE r4
ON r1.rownum = r4.rownum - 3 and r1.OBJ_FK = r4.OBJ_FK
LEFT JOIN #WordList w4 ON r4.WORD_TEXT = w4.WORD and w4.ID=4
LEFT JOIN WordOrder_CTE r5
ON r1.rownum = r5.rownum - 4 and r1.OBJ_FK = r5.OBJ_FK
LEFT JOIN #WordList w5 ON r5.WORD_TEXT = w5.WORD and w5.ID=5
WHERE (#wordCt < 2 OR w2.ID is not null) and
(#wordCt < 3 OR w3.ID is not null) and
(#wordCt < 4 OR w4.ID is not null) and
(#wordCt < 5 OR w5.ID is not null)
--loop
SET #currentRow = #currentRow+1
END
--Return the first words of each matching phrase
SELECT OBJECT_FK, WORD_ID, PHRASE_ID FROM #returnTable
GO
--Clean up
DROP TABLE [dbo].[test_WORDS]
DROP TABLE [dbo].[test_PHRASE]
Edited solution:
This is an edit of the correct solution provided below to account for non-contiguous word IDs. Hope this helps someone as much as it did me.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = ROW_NUMBER() OVER
(PARTITION BY OBJ_FK ORDER BY WORD_ID DESC),
totalInSrc = COUNT(WORD_ID) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.rowcnt BETWEEN nw1.rowcnt AND nw1.totalInSrc
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt, nw1.totalInSrc
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Note: The final query I used in production uses indexed temp tables instead of CTEs. I also limited the length of the PHRASE_TEXT column to my needs. With these improvements, I was able to reduce my query time from over 3 minutes to 3 seconds!
Here's a solution that uses a different approach: instead of splitting the phrases into words it combines the words into phrases.
Edited: changed the rowcnt expression to using COUNT(*) OVER …, as suggested by #ErikE in the comments.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = COUNT(*) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.WORD_ID BETWEEN nw1.WORD_ID AND nw1.rowcnt
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Using a Split function should work.
Split Function
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
SQL Statement
SELECT DISTINCT p.*
FROM dbo.test_PHRASE p
LEFT OUTER JOIN (
SELECT p.ID
FROM dbo.test_PHRASE p
CROSS APPLY dbo.Split(p.PHRASE_TEXT, ' ') sp
LEFT OUTER JOIN dbo.test_WORDS w ON w.WORD_TEXT = sp.Data
WHERE w.OBJ_FK IS NULL
) ignore ON ignore.ID = p.ID
WHERE ignore.ID IS NULL
This performs a little better than other solutions given. if you don't need WORD_ID, just WORD_TEXT, you can remove a whole column. I know this was over a year ago, but I wonder if you can get 3 seconds down to 30 ms? :)
If this query seems good, then my biggest speed advice is to put the entire phrases into a separate table (using your example data, it would have only 2 rows with phrases of length 8 words and 4 words).
SELECT
W.OBJ_FK,
X.Phrase,
P.*,
Left(P.PHRASE_TEXT,
IsNull(NullIf(CharIndex(' ', P.PHRASE_TEXT), 0) - 1, 2147483647)
) WORD_TEXT,
Len(Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', ' ' + X.Phrase) - 1))
- Len(Replace(
Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', X.Phrase) - 1), ' ', '')
)
WORD_ID
FROM
(SELECT DISTINCT OBJ_FK FROM dbo.test_WORDS) W
CROSS APPLY (
SELECT RTrim((SELECT WORD_TEXT + ' '
FROM dbo.test_WORDS W2
WHERE W.OBJ_FK = W2.OBJ_FK
ORDER BY W2.WORD_ID
FOR XML PATH (''))) Phrase
) X
INNER JOIN dbo.test_PHRASE P
ON X.Phrase LIKE '%' + P.PHRASE_TEXT + '%';
Here's another version for curiosity's sake. It doesn't perform quite as well.
WITH Calc AS (
SELECT
P.ID,
P.PHRASE_TEXT,
W.OBJ_FK,
W.WORD_ID StartID,
W.WORD_TEXT StartText,
W.WORD_ID,
Len(W.WORD_TEXT) + 2 NextPos,
Convert(varchar(150), W.WORD_TEXT) MatchingPhrase
FROM
dbo.test_PHRASE P
INNER JOIN dbo.test_WORDS W
ON P.PHRASE_TEXT + ' ' LIKE W.WORD_TEXT + ' %'
UNION ALL
SELECT
C.ID,
C.PHRASE_TEXT,
C.OBJ_FK,
C.StartID,
C.StartText,
W.WORD_ID,
C.NextPos + Len(W.WORD_TEXT) + 1,
Convert(varchar(150), C.MatchingPhrase + Coalesce(' ' + W.WORD_TEXT, ''))
FROM
Calc C
INNER JOIN dbo.test_WORDS W
ON C.OBJ_FK = W.OBJ_FK
AND C.WORD_ID + 1 = W.WORD_ID
AND Substring(C.PHRASE_TEXT, C.NextPos, 2147483647) + ' ' LIKE W.WORD_TEXT + ' %'
)
SELECT C.OBJ_FK, C.PHRASE_TEXT, C.StartID, C.StartText, C.ID
FROM Calc C
WHERE C.PHRASE_TEXT = C.MatchingPhrase;