replace value in varchar(max) field with join - sql

I have a table that contains text field with placeholders. Something like this:
Row Notes
1. This is some notes ##placeholder130## this ##myPlaceholder##, #oneMore#. End.
2. Second row...just a ##test#.
(This table contains about 1-5k rows on average. Average number of placeholders in one row is 5-15).
Now, I have a lookup table that looks like this:
Name Value
placeholder130 Dog
myPlaceholder Cat
oneMore Cow
test Horse
(Lookup table will contain anywhere from 10k to 100k records)
I need to find the fastest way to join those placeholders from strings to a lookup table and replace with value. So, my result should look like this (1st row):
This is some notes Dog this Cat, Cow. End.
What I came up with was to split each row into multiple for each placeholder and then join it to lookup table and then concat records back to original row with new values, but it takes around 10-30 seconds on average.

You could try to split the string using a numbers table and rebuild it with for xml path.
select (
select coalesce(L.Value, T.Value)
from Numbers as N
cross apply (select substring(Notes.notes, N.Number, charindex('##', Notes.notes + '##', N.Number) - N.Number)) as T(Value)
left outer join Lookup as L
on L.Name = T.Value
where N.Number <= len(notes) and
substring('##' + notes, Number, 2) = '##'
order by N.Number
for xml path(''), type
).value('text()[1]', 'varchar(max)')
from Notes
SQL Fiddle
I borrowed the string splitting from this blog post by Aaron Bertrand

SQL Server is not very fast with string manipulation, so this is probably best done client-side. Have the client load the entire lookup table, and replace the notes as they arrived.
Having said that, it can of course be done in SQL. Here's a solution with a recursive CTE. It performs one lookup per recursion step:
; with Repl as
(
select row_number() over (order by l.name) rn
, Name
, Value
from Lookup l
)
, Recurse as
(
select Notes
, 0 as rn
from Notes
union all
select replace(Notes, '##' + l.name + '##', l.value)
, r.rn + 1
from Recurse r
join Repl l
on l.rn = r.rn + 1
)
select *
from Recurse
where rn =
(
select count(*)
from Lookup
)
option (maxrecursion 0)
Example at SQL Fiddle.
Another option is a while loop to keep replacing lookups until no more are found:
declare #notes table (notes varchar(max))
insert #notes
select Notes
from Notes
while 1=1
begin
update n
set Notes = replace(n.Notes, '##' + l.name + '##', l.value)
from #notes n
outer apply
(
select top 1 Name
, Value
from Lookup l
where n.Notes like '%##' + l.name + '##%'
) l
where l.name is not null
if ##rowcount = 0
break
end
select *
from #notes
Example at SQL Fiddle.

I second the comment that tsql is just not suited for this operation, but if you must do it in the db here is an example using a function to manage the multiple replace statements.
Since you have a relatively small number of tokens in each note (5-15) and a very large number of tokens (10k-100k) my function first extracts tokens from the input as potential tokens and uses that set to join to your lookup (dbo.Token below). It was far too much work to look for an occurrence of any of your tokens in each note.
I did a bit of perf testing using 50k tokens and 5k notes and this function runs really well, completing in <2 seconds (on my laptop). Please report back how this strategy performs for you.
note: In your example data the token format was not consistent (##_#, ##_##, #_#), I am guessing this was simply a typo and assume all tokens take the form of ##TokenName##.
--setup
if object_id('dbo.[Lookup]') is not null
drop table dbo.[Lookup];
go
if object_id('dbo.fn_ReplaceLookups') is not null
drop function dbo.fn_ReplaceLookups;
go
create table dbo.[Lookup] (LookupName varchar(100) primary key, LookupValue varchar(100));
insert into dbo.[Lookup]
select '##placeholder130##','Dog' union all
select '##myPlaceholder##','Cat' union all
select '##oneMore##','Cow' union all
select '##test##','Horse';
go
create function [dbo].[fn_ReplaceLookups](#input varchar(max))
returns varchar(max)
as
begin
declare #xml xml;
select #xml = cast(('<r><i>'+replace(#input,'##' ,'</i><i>')+'</i></r>') as xml);
--extract the potential tokens
declare #LookupsInString table (LookupName varchar(100) primary key);
insert into #LookupsInString
select distinct '##'+v+'##'
from ( select [v] = r.n.value('(./text())[1]', 'varchar(100)'),
[r] = row_number() over (order by n)
from #xml.nodes('r/i') r(n)
)d(v,r)
where r%2=0;
--tokenize the input
select #input = replace(#input, l.LookupName, l.LookupValue)
from dbo.[Lookup] l
join #LookupsInString lis on
l.LookupName = lis.LookupName;
return #input;
end
go
return
--usage
declare #Notes table ([Id] int primary key, notes varchar(100));
insert into #Notes
select 1, 'This is some notes ##placeholder130## this ##myPlaceholder##, ##oneMore##. End.' union all
select 2, 'Second row...just a ##test##.';
select *,
dbo.fn_ReplaceLookups(notes)
from #Notes;
Returns:
Tokenized
--------------------------------------------------------
This is some notes Dog this Cat, Cow. End.
Second row...just a Horse.

Try this
;WITH CTE (org, calc, [Notes], [level]) AS
(
SELECT [Notes], [Notes], CONVERT(varchar(MAX),[Notes]), 0 FROM PlaceholderTable
UNION ALL
SELECT CTE.org, CTE.[Notes],
CONVERT(varchar(MAX), REPLACE(CTE.[Notes],'##' + T.[Name] + '##', T.[Value])), CTE.[level] + 1
FROM CTE
INNER JOIN LookupTable T ON CTE.[Notes] LIKE '%##' + T.[Name] + '##%'
)
SELECT DISTINCT org, [Notes], level FROM CTE
WHERE [level] = (SELECT MAX(level) FROM CTE c WHERE CTE.org = c.org)
SQL FIDDLE DEMO
Check the below devioblog post for reference
devioblog post

To get speed, you can preprocess the note templates into a more efficient form. This will be a sequence of fragments, with each ending in a substitution. The substitution might be NULL for the last fragment.
Notes
Id FragSeq Text SubsId
1 1 'This is some notes ' 1
1 2 ' this ' 2
1 3 ', ' 3
1 4 '. End.' null
2 1 'Second row...just a ' 4
2 2 '.' null
Subs
Id Name Value
1 'placeholder130' 'Dog'
2 'myPlaceholder' 'Cat'
3 'oneMore' 'Cow'
4 'test' 'Horse'
Now we can do the substitutions with a simple join.
SELECT Notes.Text + COALESCE(Subs.Value, '')
FROM Notes LEFT JOIN Subs
ON SubsId = Subs.Id WHERE Notes.Id = ?
ORDER BY FragSeq
This produces a list of fragments with substitutions complete. I am not an MSQL user, but in most dialects of SQL you can concatenate these fragments in a variable quite easily:
DECLARE #Note VARCHAR(8000)
SELECT #Note = COALESCE(#Note, '') + Notes.Text + COALSCE(Subs.Value, '')
FROM Notes LEFT JOIN Subs
ON SubsId = Subs.Id WHERE Notes.Id = ?
ORDER BY FragSeq
Pre-processing a note template into fragments will be straightforward using the string splitting techniques of other posts.
Unfortunately I'm not at a location where I can test this, but it ought to work fine.

I really don't know how it will perform with 10k+ of lookups.
how does the old dynamic SQL performs?
DECLARE #sqlCommand NVARCHAR(MAX)
SELECT #sqlCommand = N'PlaceholderTable.[Notes]'
SELECT #sqlCommand = 'REPLACE( ' + #sqlCommand +
', ''##' + LookupTable.[Name] + '##'', ''' +
LookupTable.[Value] + ''')'
FROM LookupTable
SELECT #sqlCommand = 'SELECT *, ' + #sqlCommand + ' FROM PlaceholderTable'
EXECUTE sp_executesql #sqlCommand
Fiddle demo

And now for some recursive CTE.
If your indexes are correctly set up, this one should be very fast or very slow. SQL Server always surprises me with performance extremes when it comes to the r-CTE...
;WITH T AS (
SELECT
Row,
StartIdx = 1, -- 1 as first starting index
EndIdx = CAST(patindex('%##%', Notes) as int), -- first ending index
Result = substring(Notes, 1, patindex('%##%', Notes) - 1)
-- (first) temp result bounded by indexes
FROM PlaceholderTable -- **this is your source table**
UNION ALL
SELECT
pt.Row,
StartIdx = newstartidx, -- starting index (calculated in calc1)
EndIdx = EndIdx + CAST(newendidx as int) + 1, -- ending index (calculated in calc4 + total offset)
Result = Result + CAST(ISNULL(newtokensub, newtoken) as nvarchar(max))
-- temp result taken from subquery or original
FROM
T
JOIN PlaceholderTable pt -- **this is your source table**
ON pt.Row = T.Row
CROSS APPLY(
SELECT newstartidx = EndIdx + 2 -- new starting index moved by 2 from last end ('##')
) calc1
CROSS APPLY(
SELECT newtxt = substring(pt.Notes, newstartidx, len(pt.Notes))
-- current piece of txt we work on
) calc2
CROSS APPLY(
SELECT patidx = patindex('%##%', newtxt) -- current index of '##'
) calc3
CROSS APPLY(
SELECT newendidx = CASE
WHEN patidx = 0 THEN len(newtxt) + 1
ELSE patidx END -- if last piece of txt, end with its length
) calc4
CROSS APPLY(
SELECT newtoken = substring(pt.Notes, newstartidx, newendidx - 1)
-- get the new token
) calc5
OUTER APPLY(
SELECT newtokensub = Value
FROM LookupTable
WHERE Name = newtoken -- substitute the token if you can find it in **your lookup table**
) calc6
WHERE newstartidx + len(newtxt) - 1 <= len(pt.Notes)
-- do this while {new starting index} + {length of txt we work on} exceeds total length
)
,lastProcessed AS (
SELECT
Row,
Result,
rn = row_number() over(partition by Row order by StartIdx desc)
FROM T
) -- enumerate all (including intermediate) results
SELECT *
FROM lastProcessed
WHERE rn = 1 -- filter out intermediate results (display only last ones)

Related

Generate a comma-separated list of numbers in a single string

Is there a way to generate a comma-separated string of a series of numbers where the "begin" and "end" numbers are provided?
For example, provide the numbers 1 and 10 and the output would be a single value of: 1,2,3,4,5,6,7,8,9,10
10/10/2019 edit explaining why I'm interested in this:
My workplace writes queries with several columns in the SELECT statement plus aggregate functions. Then a GROUP BY clause using the column numbers. I figured using a macro that creates a comma-separated list to copy/paste in would save some time.
SELECT t.colA
, t.colB
, t.colC
, t.colD
, t.colE
, t.colF
, t.colG
, t.colH
, t.colI
, t.colJ
, sum(t.colK) as sumK
, sum(t.colL) as sumL
, sum(t.colM) as sumM
FROM t
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
;
You can use a recursive CTE to generate your numbers, and xml_agg to generate your string:
with recursive nums (counter) as
( select * from (select cast(1 as bigint) as counter) t
union all
select
counter + 1
from nums
where counter between 1 and 9
)
select
trim(trailing ',' from cast(xmlagg(cast(counter as varchar(2)) || ',' order by counter) as varchar(100)))
from nums
Check these methods in SQL Server-
IF OBJECT_ID('TEMPDB..#Sample') IS NOT NULL
DROP TABLE #Sample
Create table #Sample
(
NUM int
)
declare #n int
select #n=10
insert into #Sample(NUM)
SELECT NUM FROM (select row_number() over (order by (select null)) AS NUM from sys.columns) A WHERE NUM<=#N
--Method 1 (For SQL SERVER -NEW VERSION Support)
SELECT STRING_AGG(NUM,',') AS EXPECTED_RESULT FROM #Sample
--Method 1 (For SQL SERVER -OLD VERSION Support)
select DISTINCT STUFF(CAST((
SELECT ' ,' +CAST(c.num AS VARCHAR(MAX))
FROM (
SELECT num
FROM #Sample
) c
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 2, '') AS EXPECTED_RESULT
from #Sample t
While loop seems appropriate
declare #begin int=1
declare #end int=11
declare #list varchar(500)
if #begin > #end
begin
select 'error, beginning number ' + convert(varchar(500),#begin)
+ ' must not be greater than ending number '
+ convert(varchar(500),#end) + '.' err
return
end
else
set #list = convert(varchar(500),#begin)
;
while #begin < #end
begin
set #begin += 1
set #list = #list + ',' + convert(varchar(500),#begin)
end
select #list
You might want to use varchar(5000) or something depending on how big you want it to get.
disclaimer -- I don't know if this works with teradata
I'm not sure there is a good direct way to generate a series in Teradata. You can fake it a few different ways though. Here's a comma separated list of numbers from 5 to 15, for example:
SELECT TRIM(TRAILING ',' FROM (XMLAGG(TRIM(rn)|| ',' ) (VARCHAR(10000))))
FROM (SELECT 4 + ROW_NUMBER() OVER (ORDER BY Sys_Calendar."CALENDAR".day_of_calendar) as rn FROM Sys_Calendar."CALENDAR" QUALIFY rn <= 15) t
I've only used sys_calendar.calendar here because it's a big table. Any big table would do here though.
Here's one way to do it in Teradata:
SELECT ARRAY_AGG(src.RowNum)
FROM (
SELECT ROW_NUMBER() OVER() AS RowNum
FROM sys_calendar.calendar
QUALIFY RowNum BETWEEN <begin_num> AND <end_num>
) src
This will give you the output as an ARRAY data type, which you can probably cast as a VARCHAR. It also assumes begin_num > 0 and <end_num> is less than the number of rows in the sys_calendar.calendar view. You can always fiddle with this to fit your required range of values.
There are also DelimitedBuild UDFs out there (if you can find one) that can be used to convert row values into delimited strings.
The cheapest way to achieve your goal is this one (no functions, or joins to tables required):
WITH RECURSIVE NumberRanges(TheNumber,TheString) AS
(
SELECT 1 AS TheNumber,casT(1 as VARCHAR(500)) as TheString
FROM
(
SELECT * FROM (SELECT NULL AS X) X
) DUMMYTABLE
UNION ALL
SELECT
TheNumber + 1 AS TheNumber,
TheString ||',' || TRIM(TheNumber+1)
FROM NumberRanges
WHERE
TheNumber < 10
)
SELECT TheString
FROM NumberRanges
QUALIFY ROW_NUMBER() OVER ( ORDER BY TheNumber DESC) = 1;
Result String: 1,2,3,4,5,6,7,8,9,10

How to create a single string in SQL, based on Hierarchical Data - CTE and XML?

I have a database table storing stock locations for various products.
One part number can have several Departments, each department can have several racks and so on (shelves and then rows).
I need to concatenate this information to fit the dataset on label (on a single line).
To retrieve the dataset my query is as follows.
select d.Department_Name, r.Rack_Name, s.Shelf_Name, rw.Row_Name from Part_Locator l
join Part_Numbers p
on l.Part_Id=p.Part_Id
join PL_Departments d
on l.Department_Id=d.Department_Id
join PL_Racks r
on l.Rack_Id=r.Rack_Id
join PL_Shelfs s
on l.Shelf_Id=s.Shelf_Id
join PL_Rows rw
on l.Row_Id=rw.Row_Id
where p.Part_Number='1BODY000997'
Results are as follows:
I can make a nested look to build a string, but I believe a combination of recursive CTE, Stuff and XML could so the same for me. So far I can find many examples, but none that I can understand well enough to make them work together application.
The desired output is as follows:
Assembly - Rack IA-10 - Shelf A - Row 1,2,3,4,5,6 - Shelf B - Row 1,2,3,4,5,6 - Shelf C - Row 1,2,3,4,5,6
I am not opposed to changing that format as long as it reads logically. Also please note there can multiple departments.
Any help to get me going in the right direction would be great.
Thanks in advance!
I was able to get this working using cursor. I also created a view to remove the joins from the query, much easier to read.
DECLARE
#partnumber varchar(50)='1BODY000997', #dept varchar(50), #rack varchar(50), #shelf varchar(50), #row varchar(50), #text varchar(500)='';
DECLARE department_cursor CURSOR FOR
SELECT DISTINCT Department_Name FROM Part_Locations WHERE Part_Number=#partnumber
OPEN department_cursor;
FETCH NEXT FROM department_cursor INTO #dept;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #text=#text+CAST(#dept as varchar(50))+' - ';
DECLARE rack_cursor CURSOR FOR
SELECT DISTINCT Rack_Name FROM Part_Locations WHERE Part_Number=#partnumber AND Department_Name=#dept
OPEN rack_cursor;
FETCH NEXT FROM rack_cursor INTO #rack
WHILE ##FETCH_STATUS = 0
BEGIN
SET #text=#text+'Rack:'+CAST(#rack as varchar(50))+' - ';
DECLARE shelf_cursor CURSOR FOR
SELECT DISTINCT Shelf_Name FROM Part_Locations WHERE Part_Number=#partnumber AND Department_Name=#dept AND Rack_Name=#rack
OPEN shelf_cursor;
FETCH NEXT FROM shelf_cursor INTO #shelf
WHILE ##FETCH_STATUS = 0
BEGIN
SET #text=#text+'Shelf:'+CAST(#shelf as varchar(50))+' (Row ';
SET #text=#text+(SELECT abc=STUFF((SELECT DISTINCT ',' + Row_Name FROM Part_Locations WHERE Part_Number=#partnumber AND Department_Name=#dept AND Rack_Name=#rack AND Shelf_Name=#shelf FOR XML PATH ('')), 1, 1, ''))
SET #text=#text+') ';
FETCH NEXT FROM shelf_cursor INTO #shelf
END;
CLOSE shelf_cursor;
DEALLOCATE shelf_cursor;
FETCH NEXT FROM rack_cursor INTO #rack
END;
CLOSE rack_cursor;
DEALLOCATE rack_cursor;
FETCH NEXT FROM department_cursor INTO #dept;
END
CLOSE department_cursor;
DEALLOCATE department_cursor;
PRINT (#text)
this gives me good output of..
Assembly - Rack:IA-10 - Shelf:A (Row 1,2,3,4,5,6) Shelf:B (Row 1,2,3,4,5,6) Shelf:C (Row 1,2,3,4,5,6)
As mentioned, you could do a cte, or you could do a running field (similar to running total) , or xml path. Actually, forget the running field, too hard. Below are two examples , one with xml, followed by a cte. I used in that link I sent earlier to the the xml path. Due to the complexity, I needed to do two xml paths ( first one into a temp table)
Note that I created a table (ITEMS) and put your results into it ...
SELECT DISTINCT i.Dept, i.rack, i.shelf,
stuff( ( select ',' + i2.rowname
from ITEMS i2
where i2.Dept = i.Dept and
i2.rack = i.rack and
i2.shelf =i.shelf
FOR XML PATH('')),1,1,'') as RownumbersCommaSeparated
INTO #T1
FROM ITEMS i
SELECT
stuff( ( select ' ' + T.Dept + ' Rack ' + T.rack + ' Shelf ' + T.shelf + ' Rows ' + T.RownumbersCommaSeparated
from #T1 T
FOR XML PATH('')),1,1,'' ) as DesiredOutput
The common table expression is a bit longer ! There are TWO recursions ...
;
WITH
CTE0 AS
(
SELECT i.*,
ROW_NUMBER() OVER(ORDER BY i.Dept,i.rack,i.shelf, i.rowname) recordnumber,
RANK() OVER ( ORDER BY i.Dept,i.rack,i.shelf) Parent,
RANK() OVER (PARTITION BY i.Dept,i.rack,i.shelf ORDER BY i.Dept,i.rack,i.shelf,i.rowname) rowidForeachParent
FROM ITEMS i
),
CTE1 AS
( SELECT
i.Parent,
i.Dept + ' Rack ' +
i.rack + ' Shelf ' +
i.shelf + ' Rows ' as CoreData,
MAX(i.rowidforeachparent) maxrowidforeachparent
FROM cte0 i
group by i.Parent, i.Dept, i.rack, i.shelf
),
CTE2 AS
(
SELECT
i1.Parent,
i1.maxrowidforeachparent,
CAST( i1.CoreData + ',' + i0.rowname AS varchar(MAX) ) as outputX,
i0.rowidForeachParent
FROM cte1 i1
join cte0 i0 on
i0.Parent = i1.Parent AND
I0.rowidForeachParent = 1
UNION ALL
SELECT
i1.Parent,
i1.maxrowidforeachparent,
i1.outputX + ',' + i0.rowname,
i0.rowidForeachParent
FROM cte2 i1
join cte0 i0 on
i0.Parent = i1.Parent AND
I0.rowidForeachParent = I1.rowidForeachParent+1
) ,
CTE3 AS
(
SELECT i.outputX ,
ROW_NUMBER() OVER (ORDER BY Parent) ParentId
FROM cte2 i
where i.rowidForeachParent = i.maxrowidforeachparent
),
CTE4 AS
(
SELECT outputX, i3.ParentId FROM CTE3 i3
where i3.ParentId = 1
UNION ALL
SELECT i4.outputX + ' ' + i3.outputX, i3.ParentId FROM CTE3 i3
join CTE4 i4 on i3.ParentId = i4.ParentId + 1
)
select top 1 * from cte4 order by ParentId desc

Join [one word per row] to rows of phrases with [multiple words per row]

Please excuse the length of the question. I included a test script to demo the situation and my best attempt at a solution.
There are two tables:
test_WORDS = Words extracted in order from several sources. The OBJ_FK column is the ID of the source. WORD_ID is an identifier for the word itself that is unique within the source. Each row contains one word.
test_PHRASE = a list of phrases to be searched for in test_WORDS. The PHRASE_TEXT column is a space separated phrase like 'foo bar' (see below) so that each row contains multiple words.
Requirement:
Return the first word from test_WORDS that is the start of a matching a phrase from test_PHRASE.
I would prefer something set based to avoid RBAR approach below. Also my solution is limited to 5 word phrases. I need to support up to 20 word phrases. Is it possible to match the words from a row in test_PHRASE to contiguous rows in the test_WORD without cursors?
After breaking the phrase words out into a temporary table, the problem boils down to matching portions of two sets together in row order.
-- Create test data
CREATE TABLE [dbo].[test_WORDS](
[OBJ_FK] [bigint] NOT NULL, --FK to the source object
[WORD_ID] [int] NOT NULL, --The word order in the source object
[WORD_TEXT] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_test_WORDS] PRIMARY KEY CLUSTERED
(
[OBJ_FK] ASC,
[WORD_ID] ASC
)
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[test_PHRASE](
[ID] [int], --PHRASE ID
[PHRASE_TEXT] [nvarchar](150) NOT NULL --Space-separated phrase
CONSTRAINT [PK_test_PHRASE] PRIMARY KEY CLUSTERED
(
[ID] ASC
)
)
GO
INSERT INTO dbo.test_WORDS
SELECT 1,1,'aaa' UNION ALL
SELECT 1,2,'bbb' UNION ALL
SELECT 1,3,'ccc' UNION ALL
SELECT 1,4,'ddd' UNION ALL
SELECT 1,5,'eee' UNION ALL
SELECT 1,6,'fff' UNION ALL
SELECT 1,7,'ggg' UNION ALL
SELECT 1,8,'hhh' UNION ALL
SELECT 2,1,'zzz' UNION ALL
SELECT 2,2,'yyy' UNION ALL
SELECT 2,3,'xxx' UNION ALL
SELECT 2,4,'www'
INSERT INTO dbo.test_PHRASE
SELECT 1, 'bbb ccc ddd' UNION ALL --should match
SELECT 2, 'ddd eee fff' UNION ALL --should match
SELECT 3, 'xxx xxx xxx' UNION ALL --should NOT match
SELECT 4, 'zzz yyy xxx' UNION ALL --should match
SELECT 5, 'xxx www ppp' UNION ALL --should NOT match
SELECT 6, 'zzz yyy xxx www' --should match
-- Create variables
DECLARE #maxRow AS INTEGER
DECLARE #currentRow AS INTEGER
DECLARE #phraseSubsetTable AS TABLE(
[ROW] int IDENTITY(1,1) NOT NULL,
[ID] int NOT NULL, --PHRASE ID
[PHRASE_TEXT] nvarchar(150) NOT NULL
)
--used to split the phrase into words
--note: No permissions to sys.dm_fts_parser
DECLARE #WordList table
(
ID int,
WORD nvarchar(50)
)
--Records to be returned to caller
DECLARE #returnTable AS TABLE(
OBJECT_FK INT NOT NULL,
WORD_ID INT NOT NULL,
PHRASE_ID INT NOT NULL
)
DECLARE #phrase AS NVARCHAR(150)
DECLARE #phraseID AS INTEGER
-- Get subset of phrases to simulate a join that would occur in production
INSERT INTO #phraseSubsetTable
SELECT ID, PHRASE_TEXT
FROM dbo.test_PHRASE
--represent subset of phrases caused by join in production
WHERE ID IN (2,3,4)
-- Loop each phrase in the subset, split into rows of words and return matches to the test_WORDS table
SET #maxRow = ##ROWCOUNT
SET #currentRow = 1
WHILE #currentRow <= #maxRow
BEGIN
SELECT #phrase=PHRASE_TEXT, #phraseID=ID FROM #phraseSubsetTable WHERE row = #currentRow
--clear previous phrase that was split into rows
DELETE FROM #WordList
--Recursive Function with CTE to create recordset of words, one per row
;WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(' ', #phrase)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(' ', #phrase, stop + 1)
FROM Pieces
WHERE stop > 0)
--Create the List of words with the CTE above
insert into #WordList
SELECT pn,
SUBSTRING(#phrase, start, CASE WHEN stop > 0 THEN stop-start ELSE 1056 END) AS WORD
FROM Pieces
DECLARE #wordCt as int
select #wordCt=count(ID) from #WordList;
-- Do the actual query using a CTE with a rownumber that repeats for every SOURCE OBJECT
;WITH WordOrder_CTE AS (
SELECT OBJ_FK, WORD_ID, WORD_TEXT,
ROW_NUMBER() OVER (Partition BY OBJ_FK ORDER BY WORD_ID) AS rownum
FROM test_WORDS)
--CREATE a flattened record of the first word in the phrase and join it to the rest of the words.
INSERT INTO #returnTable
SELECT r1.OBJ_FK, r1.WORD_ID, #phraseID AS PHRASE_ID
FROM WordOrder_CTE r1
INNER JOIN #WordList w1 ON r1.WORD_TEXT = w1.WORD and w1.ID=1
LEFT JOIN WordOrder_CTE r2
ON r1.rownum = r2.rownum - 1 and r1.OBJ_FK = r2.OBJ_FK
LEFT JOIN #WordList w2 ON r2.WORD_TEXT = w2.WORD and w2.ID=2
LEFT JOIN WordOrder_CTE r3
ON r1.rownum = r3.rownum - 2 and r1.OBJ_FK = r3.OBJ_FK
LEFT JOIN #WordList w3 ON r3.WORD_TEXT = w3.WORD and w3.ID=3
LEFT JOIN WordOrder_CTE r4
ON r1.rownum = r4.rownum - 3 and r1.OBJ_FK = r4.OBJ_FK
LEFT JOIN #WordList w4 ON r4.WORD_TEXT = w4.WORD and w4.ID=4
LEFT JOIN WordOrder_CTE r5
ON r1.rownum = r5.rownum - 4 and r1.OBJ_FK = r5.OBJ_FK
LEFT JOIN #WordList w5 ON r5.WORD_TEXT = w5.WORD and w5.ID=5
WHERE (#wordCt < 2 OR w2.ID is not null) and
(#wordCt < 3 OR w3.ID is not null) and
(#wordCt < 4 OR w4.ID is not null) and
(#wordCt < 5 OR w5.ID is not null)
--loop
SET #currentRow = #currentRow+1
END
--Return the first words of each matching phrase
SELECT OBJECT_FK, WORD_ID, PHRASE_ID FROM #returnTable
GO
--Clean up
DROP TABLE [dbo].[test_WORDS]
DROP TABLE [dbo].[test_PHRASE]
Edited solution:
This is an edit of the correct solution provided below to account for non-contiguous word IDs. Hope this helps someone as much as it did me.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = ROW_NUMBER() OVER
(PARTITION BY OBJ_FK ORDER BY WORD_ID DESC),
totalInSrc = COUNT(WORD_ID) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.rowcnt BETWEEN nw1.rowcnt AND nw1.totalInSrc
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt, nw1.totalInSrc
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Note: The final query I used in production uses indexed temp tables instead of CTEs. I also limited the length of the PHRASE_TEXT column to my needs. With these improvements, I was able to reduce my query time from over 3 minutes to 3 seconds!
Here's a solution that uses a different approach: instead of splitting the phrases into words it combines the words into phrases.
Edited: changed the rowcnt expression to using COUNT(*) OVER …, as suggested by #ErikE in the comments.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = COUNT(*) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.WORD_ID BETWEEN nw1.WORD_ID AND nw1.rowcnt
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Using a Split function should work.
Split Function
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
SQL Statement
SELECT DISTINCT p.*
FROM dbo.test_PHRASE p
LEFT OUTER JOIN (
SELECT p.ID
FROM dbo.test_PHRASE p
CROSS APPLY dbo.Split(p.PHRASE_TEXT, ' ') sp
LEFT OUTER JOIN dbo.test_WORDS w ON w.WORD_TEXT = sp.Data
WHERE w.OBJ_FK IS NULL
) ignore ON ignore.ID = p.ID
WHERE ignore.ID IS NULL
This performs a little better than other solutions given. if you don't need WORD_ID, just WORD_TEXT, you can remove a whole column. I know this was over a year ago, but I wonder if you can get 3 seconds down to 30 ms? :)
If this query seems good, then my biggest speed advice is to put the entire phrases into a separate table (using your example data, it would have only 2 rows with phrases of length 8 words and 4 words).
SELECT
W.OBJ_FK,
X.Phrase,
P.*,
Left(P.PHRASE_TEXT,
IsNull(NullIf(CharIndex(' ', P.PHRASE_TEXT), 0) - 1, 2147483647)
) WORD_TEXT,
Len(Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', ' ' + X.Phrase) - 1))
- Len(Replace(
Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', X.Phrase) - 1), ' ', '')
)
WORD_ID
FROM
(SELECT DISTINCT OBJ_FK FROM dbo.test_WORDS) W
CROSS APPLY (
SELECT RTrim((SELECT WORD_TEXT + ' '
FROM dbo.test_WORDS W2
WHERE W.OBJ_FK = W2.OBJ_FK
ORDER BY W2.WORD_ID
FOR XML PATH (''))) Phrase
) X
INNER JOIN dbo.test_PHRASE P
ON X.Phrase LIKE '%' + P.PHRASE_TEXT + '%';
Here's another version for curiosity's sake. It doesn't perform quite as well.
WITH Calc AS (
SELECT
P.ID,
P.PHRASE_TEXT,
W.OBJ_FK,
W.WORD_ID StartID,
W.WORD_TEXT StartText,
W.WORD_ID,
Len(W.WORD_TEXT) + 2 NextPos,
Convert(varchar(150), W.WORD_TEXT) MatchingPhrase
FROM
dbo.test_PHRASE P
INNER JOIN dbo.test_WORDS W
ON P.PHRASE_TEXT + ' ' LIKE W.WORD_TEXT + ' %'
UNION ALL
SELECT
C.ID,
C.PHRASE_TEXT,
C.OBJ_FK,
C.StartID,
C.StartText,
W.WORD_ID,
C.NextPos + Len(W.WORD_TEXT) + 1,
Convert(varchar(150), C.MatchingPhrase + Coalesce(' ' + W.WORD_TEXT, ''))
FROM
Calc C
INNER JOIN dbo.test_WORDS W
ON C.OBJ_FK = W.OBJ_FK
AND C.WORD_ID + 1 = W.WORD_ID
AND Substring(C.PHRASE_TEXT, C.NextPos, 2147483647) + ' ' LIKE W.WORD_TEXT + ' %'
)
SELECT C.OBJ_FK, C.PHRASE_TEXT, C.StartID, C.StartText, C.ID
FROM Calc C
WHERE C.PHRASE_TEXT = C.MatchingPhrase;

SQL: Find rows where Column contains all of the given words

I have some column EntityName, and I want to have users to be able to search names by entering words separated by space. The space is implicitly considered as an 'AND' operator, meaning that the returned rows must have all of the words specified, and not necessarily in the given order.
For example, if we have rows like these:
abba nina pretty balerina
acdc you shook me all night long
sth you are me
dream theater it's all about you
when the user enters: me you, or you me (the results must be equivalent), the result has rows 2 and 3.
I know I can go like:
WHERE Col1 LIKE '%' + word1 + '%'
AND Col1 LIKE '%' + word2 + '%'
but I wanted to know if there's some more optimal solution.
The CONTAINS would require a full text index, which (for various reasons) is not an option.
Maybe Sql2008 has some built-in, semi-hidden solution for these cases?
The only thing I can think of is to write a CLR function that does the LIKE comparisons. This should be many times faster.
Update: Now that I think about it, it makes sense CLR would not help. Two other ideas:
1 - Try indexing Col1 and do this:
WHERE (Col1 LIKE word1 + '%' or Col1 LIKE '%' + word1 + '%')
AND (Col1 LIKE word2 + '%' or Col1 LIKE '%' + word2 + '%')
Depending on the most common searches (starts with vs. substring), this may offer an improvement.
2 - Add your own full text indexing table where each word is a row in the table. Then you can index properly.
Function
CREATE FUNCTION [dbo].[fnSplit] ( #sep CHAR(1), #str VARCHAR(512) )
RETURNS TABLE AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #str)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #str, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT
pn AS Id,
SUBSTRING(#str, start, CASE WHEN stop > 0 THEN stop - start ELSE 512 END) AS Data
FROM
Pieces
)
Query
DECLARE #FilterTable TABLE (Data VARCHAR(512))
INSERT INTO #FilterTable (Data)
SELECT DISTINCT S.Data
FROM fnSplit(' ', 'word1 word2 word3') S -- Contains words
SELECT DISTINCT
T.*
FROM
MyTable T
INNER JOIN #FilterTable F1 ON T.Col1 LIKE '%' + F1.Data + '%'
LEFT JOIN #FilterTable F2 ON T.Col1 NOT LIKE '%' + F2.Data + '%'
WHERE
F2.Data IS NULL
Source: SQL SELECT WHERE field contains words
http://msdn.microsoft.com/en-us/magazine/cc163473.aspx
You're going to end up with a full table scan anyway.
The collation can make a big difference apparently. Kalen Delaney in the book "Microsoft SQL Server 2008 Internals" says:
Collation can make a huge difference
when SQL Server has to look at almost
all characters in the strings. For
instance, look at the following:
SELECT COUNT(*) FROM tbl WHERE longcol LIKE '%abc%'
This may execute 10 times faster or more with a binary collation than a nonbinary Windows collation. And with varchar data, this executes up to seven or eight times faster with a SQL collation than with a Windows collation.
WITH Tokens AS(SELECT 'you' AS Token UNION ALL SELECT 'me')
SELECT ...
FROM YourTable AS t
WHERE (SELECT COUNT(*) FROM Tokens WHERE y.Col1 LIKE '%'+Tokens.Token+'%')
=
(SELECT COUNT(*) FROM Tokens) ;
This should ideally be done with the help of Full text search as mentioned above.
BUT,
If you don't have full text configured for your DB, here is a performance intensive solution for doing a prioritized string search.
-- table to search in
drop table if exists dbo.myTable;
go
CREATE TABLE dbo.myTable
(
myTableId int NOT NULL IDENTITY (1, 1),
code varchar(200) NOT NULL,
description varchar(200) NOT NULL -- this column contains the values we are going to search in
) ON [PRIMARY]
GO
-- function to split space separated search string into individual words
drop function if exists [dbo].[fnSplit];
go
CREATE FUNCTION [dbo].[fnSplit] (#StringInput nvarchar(max),
#Delimiter nvarchar(1))
RETURNS #OutputTable TABLE (
id nvarchar(1000)
)
AS
BEGIN
DECLARE #String nvarchar(100);
WHILE LEN(#StringInput) > 0
BEGIN
SET #String = LEFT(#StringInput, ISNULL(NULLIF(CHARINDEX(#Delimiter, #StringInput) - 1, -1),
LEN(#StringInput)));
SET #StringInput = SUBSTRING(#StringInput, ISNULL(NULLIF(CHARINDEX
(
#Delimiter, #StringInput
),
0
), LEN
(
#StringInput)
)
+ 1, LEN(#StringInput));
INSERT INTO #OutputTable (id)
VALUES (#String);
END;
RETURN;
END;
GO
-- this is the search script which can be optionally converted to a stored procedure /function
declare #search varchar(max) = 'infection upper acute genito'; -- enter your search string here
-- the searched string above should give rows containing the following
-- infection in upper side with acute genitointestinal tract
-- acute infection in upper teeth
-- acute genitointestinal pain
if (len(trim(#search)) = 0) -- if search string is empty, just return records ordered alphabetically
begin
select 1 as Priority ,myTableid, code, Description from myTable order by Description
return;
end
declare #splitTable Table(
wordRank int Identity(1,1), -- individual words are assinged priority order (in order of occurence/position)
word varchar(200)
)
declare #nonWordTable Table( -- table to trim out auxiliary verbs, prepositions etc. from the search
id varchar(200)
)
insert into #nonWordTable values
('of'),
('with'),
('at'),
('in'),
('for'),
('on'),
('by'),
('like'),
('up'),
('off'),
('near'),
('is'),
('are'),
(','),
(':'),
(';')
insert into #splitTable
select id from dbo.fnSplit(#search,' '); -- this function gives you a table with rows containing all the space separated words of the search like in this e.g., the output will be -
-- id
-------------
-- infection
-- upper
-- acute
-- genito
delete s from #splitTable s join #nonWordTable n on s.word = n.id; -- trimming out non-words here
declare #countOfSearchStrings int = (select count(word) from #splitTable); -- count of space separated words for search
declare #highestPriority int = POWER(#countOfSearchStrings,3);
with plainMatches as
(
select myTableid, #highestPriority as Priority from myTable where Description like #search -- exact matches have highest priority
union
select myTableid, #highestPriority-1 as Priority from myTable where Description like #search + '%' -- then with something at the end
union
select myTableid, #highestPriority-2 as Priority from myTable where Description like '%' + #search -- then with something at the beginning
union
select myTableid, #highestPriority-3 as Priority from myTable where Description like '%' + #search + '%' -- then if the word falls somewhere in between
),
splitWordMatches as( -- give each searched word a rank based on its position in the searched string
-- and calculate its char index in the field to search
select myTable.myTableid, (#countOfSearchStrings - s.wordRank) as Priority, s.word,
wordIndex = CHARINDEX(s.word, myTable.Description) from myTable join #splitTable s on myTable.Description like '%'+ s.word + '%'
-- and not exists(select myTableid from plainMatches p where p.myTableId = myTable.myTableId) -- need not look into myTables that have already been found in plainmatches as they are highest ranked
-- this one takes a long time though, so commenting it, will have no impact on the result
),
matchingRowsWithAllWords as (
select myTableid, count(myTableid) as myTableCount from splitWordMatches group by(myTableid) having count(myTableid) = #countOfSearchStrings
)
, -- trim off the CTE here if you don't care about the ordering of words to be considered for priority
wordIndexRatings as( -- reverse the char indexes retrived above so that words occuring earlier have higher weightage
-- and then normalize them to sequential values
select s.myTableid, Priority, word, ROW_NUMBER() over (partition by s.myTableid order by wordindex desc) as comparativeWordIndex
from splitWordMatches s join matchingRowsWithAllWords m on s.myTableId = m.myTableId
)
,
wordIndexSequenceRatings as ( -- need to do this to ensure that if the same set of words from search string is found in two rows,
-- their sequence in the field value is taken into account for higher priority
select w.myTableid, w.word, (w.Priority + w.comparativeWordIndex + coalesce(sequncedPriority ,0)) as Priority
from wordIndexRatings w left join
(
select w1.myTableid, w1.priority, w1.word, w1.comparativeWordIndex, count(w1.myTableid) as sequncedPriority
from wordIndexRatings w1 join wordIndexRatings w2 on w1.myTableId = w2.myTableId and w1.Priority > w2.Priority and w1.comparativeWordIndex>w2.comparativeWordIndex
group by w1.myTableid, w1.priority,w1.word, w1.comparativeWordIndex
)
sequencedPriority on w.myTableId = sequencedPriority.myTableId and w.Priority = sequencedPriority.Priority
),
prioritizedSplitWordMatches as ( -- this calculates the cumulative priority for a field value
select w1.myTableId, sum(w1.Priority) as OverallPriority from wordIndexSequenceRatings w1 join wordIndexSequenceRatings w2 on w1.myTableId = w2.myTableId
where w1.word <> w2.word group by w1.myTableid
),
completeSet as (
select myTableid, priority from plainMatches -- get plain matches which should be highest ranked
union
select myTableid, OverallPriority as priority from prioritizedSplitWordMatches -- get ranked split word matches (which are ordered based on word rank in search string and sequence)
),
maximizedCompleteSet as( -- set the priority of a field value = maximum priority for that field value
select myTableid, max(priority) as Priority from completeSet group by myTableId
)
select priority, myTable.myTableid , code, Description from maximizedCompleteSet m join myTable on m.myTableId = myTable.myTableId
order by Priority desc, Description -- order by priority desc to get highest rated items on top
--offset 0 rows fetch next 50 rows only -- optional paging

How can I pivot these key+values rows into a table of complete entries?

Maybe I demand too much from SQL but I feel like this should be possible. I start with a list of key-value pairs, like this:
'0:First, 1:Second, 2:Third, 3:Fourth'
etc. I can split this up pretty easily with a two-step parse that gets me a table like:
EntryNumber PairNumber Item
0 0 0
1 0 First
2 1 1
3 1 Second
etc.
Now, in the simple case of splitting the pairs into a pair of columns, it's fairly easy. I'm interested in the more advanced case where I might have multiple values per entry, like:
'0:First:Fishing, 1:Second:Camping, 2:Third:Hiking'
and such.
In that generic case, I'd like to find a way to take my 3-column result table and somehow pivot it to have one row per entry and one column per value-part.
So I want to turn this:
EntryNumber PairNumber Item
0 0 0
1 0 First
2 0 Fishing
3 1 1
4 1 Second
5 1 Camping
Into this:
Entry [1] [2] [3]
0 0 First Fishing
1 1 Second Camping
Is that just too much for SQL to handle, or is there a way? Pivots (even tricky dynamic pivots) seem like an answer, but I can't figure how to get that to work.
No, in SQL you can't infer columns dynamically based on the data found during the same query.
Even using the PIVOT feature in Microsoft SQL Server, you must know the columns when you write the query, and you have to hard-code them.
You have to do a lot of work to avoid storing the data in a relational normal form.
Alright, I found a way to accomplish what I was after. Strap in, this is going to get bumpy.
So the basic problem is to take a string with two kinds of delimiters: entries and values. Each entry represents a set of values, and I wanted to turn the string into a table with one column for each value per entry. I tried to make this a UDF, but the necessity for a temporary table and dynamic SQL meant it had to be a stored procedure.
CREATE PROCEDURE [dbo].[ParseValueList]
(
#parseString varchar(8000),
#itemDelimiter CHAR(1),
#valueDelimiter CHAR(1)
)
AS
BEGIN
SET NOCOUNT ON;
IF object_id('tempdb..#ParsedValues') IS NOT NULL
BEGIN
DROP TABLE #ParsedValues
END
CREATE TABLE #ParsedValues
(
EntryID int,
[Rank] int,
Pair varchar(200)
)
So that's just basic set up, establishing the temp table to hold my intermediate results.
;WITH
E1(N) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1),--Brute forces 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --Uses a cross join to generate 100 rows (10 * 10)
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --Uses a cross join to generate 10,000 rows (100 * 100)
cteTally(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY N) FROM E4)
That beautiful piece of SQL comes from SQL Server Central's Forums and is credited to "a guru." It's a great little 10,000 line tally table perfect for string splitting.
INSERT INTO #ParsedValues
SELECT ItemNumber AS EntryID, ROW_NUMBER() OVER (PARTITION BY ItemNumber ORDER BY ItemNumber) AS [Rank],
SUBSTRING(Items.Item, T1.N, CHARINDEX(#valueDelimiter, Items.Item + #valueDelimiter, T1.N) - T1.N) AS [Value]
FROM(
SELECT ROW_NUMBER() OVER (ORDER BY T2.N) AS ItemNumber,
SUBSTRING(#parseString, T2.N, CHARINDEX(#itemDelimiter, #parseString + #itemDelimiter, T2.N) - T2.N) AS Item
FROM cteTally T2
WHERE T2.N < LEN(#parseString) + 2 --Ensures we cut out once the entire string is done
AND SUBSTRING(#itemDelimiter + #parseString, T2.N, 1) = #itemDelimiter
) AS Items, cteTally T1
WHERE T1.N < LEN(#parseString) + 2 --Ensures we cut out once the entire string is done
AND SUBSTRING(#valueDelimiter + Items.Item, T1.N, 1) = #valueDelimiter
Ok, this is the first really dense meaty part. The inner select is breaking up my string along the item delimiter (the comma), using the guru's string splitting method. Then that table is passed up to the outer select which does the same thing, but this time using the value delimiter (the colon) to each row. The inner RowNumber (EntryID) and the outer RowNumber over Partition (Rank) are key to the pivot. EntryID show which Item the values belong to, and Rank shows the ordinal of the values.
DECLARE #columns varchar(200)
DECLARE #columnNames varchar(2000)
DECLARE #query varchar(8000)
SELECT #columns = COALESCE(#columns + ',[' + CAST([Rank] AS varchar) + ']', '[' + CAST([Rank] AS varchar)+ ']'),
#columnNames = COALESCE(#columnNames + ',[' + CAST([Rank] AS varchar) + '] AS Value' + CAST([Rank] AS varchar)
, '[' + CAST([Rank] AS varchar)+ '] AS Value' + CAST([Rank] AS varchar))
FROM (SELECT DISTINCT [Rank] FROM #ParsedValues) AS Ranks
SET #query = '
SELECT '+ #columnNames +'
FROM #ParsedValues
PIVOT
(
MAX([Value]) FOR [Rank]
IN (' + #columns + ')
) AS pvt'
EXECUTE(#query)
DROP TABLE #ParsedValues
END
And at last, the dynamic sql that makes it possible. By getting a list of Distinct Ranks, we set up our column list. This is then written into the dynamic pivot which tilts the values over and slots each value into the proper column, each with a generic "Value#" heading.
Thus by calling EXEC ParseValueList with a properly formatted string of values, we can break it up into a table to feed into our purposes! It works (but is probably overkill) for simple key:value pairs, and scales up to a fair number of columns (About 50 at most, I think, but that'd be really silly.)
Anyway, hope that helps anyone having a similar issue.
(Yeah, it probably could have been done in something like SQLCLR as well, but I find a great joy in solving problems with pure SQL.)
Though probably not optimal, here's a more condensed solution.
DECLARE #DATA varchar(max);
SET #DATA = '0:First:Fishing, 1:Second:Camping, 2:Third:Hiking';
SELECT
DENSE_RANK() OVER (ORDER BY [Data].[row]) AS [Entry]
, [Data].[row].value('(./B/text())[1]', 'int') as "[1]"
, [Data].[row].value('(./B/text())[2]', 'varchar(64)') as "[2]"
, [Data].[row].value('(./B/text())[3]', 'varchar(64)') as "[3]"
FROM
(
SELECT
CONVERT(XML, '<A><B>' + REPLACE(REPLACE(#DATA , ',', '</B></A><A><B>'), ':', '</B><B>') + '</B></A>').query('.')
) AS [T]([c])
CROSS APPLY [T].[c].nodes('/A') AS [Data]([row]);
Hope is not too late.
You can use the function RANK to know the position of each Item per PairNumber. And then use Pivot
SELECT PairNumber, [1] ,[2] ,[3]
FROM
(
SELECT PairNumber, Item, RANK() OVER (PARTITION BY PairNumber order by EntryNumber) as RANKing
from tabla) T
PIVOT
(MAX(Item)
FOR RANKing in ([1],[2],[3])
)as PVT