Recursive query from the same table - sql

I have different product serial numbers in one table ProdHistory which contains, as the table name suggest, production history.
For example I have product serial SER001 which uses parts with its own serial number.
We also produce these parts thus uses the same table ProdHistory to track its subparts.
The same goes with the subparts and if it has sub-sub parts.
Sample Table
IF OBJECT_ID('tempDB.dbo.#SAMPLETable') IS NOT NULL DROP TABLE #SAMPLETable
CREATE TABLE #SAMPLETable
(
ITEMSEQ INT IDENTITY(1,1),
SERIAL NVARCHAR(10) COLLATE SQL_Latin1_General_CP850_CI_AS,
ITEMID NVARCHAR(10) COLLATE SQL_Latin1_General_CP850_CI_AS,
PARTSERIAL NVARCHAR(10) COLLATE SQL_Latin1_General_CP850_CI_AS,
PARTID NVARCHAR(10) COLLATE SQL_Latin1_General_CP850_CI_AS,
CREATEDDATETIME DATETIME
)
INSERT INTO
#SAMPLETable (SERIAL,ITEMID,PARTSERIAL,PARTID,CREATEDDATETIME)
VALUES ('SER0001','ASY-1342','ITM0001','PRT-0808','2017-01-17'),
('SER0001','ASY-1342','ITM0002','PRT-0809','2017-01-17'),
('SER0001','ASY-1342','ITM0003','PRT-0810','2017-01-17'),
('SER0001','ASY-1342','ITM0004','PRT-0811','2017-01-17'),
('ITM0001','PRT-0808','UNT0001','PRT-2020','2017-01-16'),
('ITM0002','PRT-0809','UNT0002','PRT-2021','2017-01-16'),
('ITM0002','PRT-0809','UNT0003','PRT-2022','2017-01-16'),
('ITM0003','PRT-0810','UNT0004','PRT-2023','2017-01-16'),
('UNT0002','PRT-2021','DTA0000','PRT-1919','2017-01-15'),
('UNT0003','PRT-2022','DTA0001','PRT-1818','2017-01-15'),
('DTA0001','PRT-1818','LST0001','PRT-1717','2017-01-14')
The question is, if I'm given just the main serial number, how can I return all the parts and subparts serial associated with it?
Sample Result:
MainSerial SubSerial1 SubSerial2 SubSerial3 SubSerial4
-------------------------------------------------------
SER0001 ITM0001 UNT0001
SER0001 ITM0002 UNT0002 DTA0000
SER0001 ITM0002 UNT0003 DTA0001 LST0001
SER0001 ITM0003 UNT0004
SER0001 ITM0004
In above, it is not definite how many parts and subparts there are for a serial number.
I did not post my code since what I'm doing right now is to query it one by one.
If I have known number of subparts, I can do nested Joins, however it is not.
Another question is, if I'm just given any of the subparts above, is it possible to return the same result?

I think a way is to use Dynamic SQL like this:
-- Variables to generate SQL query string dynamically
declare #cols nvarchar(max) = '', #joins nvarchar(max) = '', #sql nvarchar(max) = '';
-- Using CTE to iterate parent-child records
with cte(i, cols, joins, itemId, serial, partId, partSerial) as (
select
1, -- Level or depth of hierarchically tree
N's1.serial MainSerial, s1.partSerial SubSerial'+cast(1 as varchar(max)),
N'yourTable s'+cast(1 as varchar(max)),
s.itemId, s.serial, s.partId, s.partSerial
from yourTable s
-- A way to filter root-parents is filtering items those are not in parts
where s.itemId not in (select si.partId from yourTable si)
union all
select
i+1,
cols + N', s'+cast(i+1 as varchar(max))+N'.partSerial SubSerial'+cast(i+1 as varchar(max)),
joins + N' left join yourTable s'+cast(i+1 as varchar(max))+N' on s'+cast(i as varchar(max))+N'.partId = s'+cast(i+1 as varchar(max))+N'.itemId',
st.itemId, st.serial, st.partId, st.partSerial
from cte
join #sampleTable st on cte.partId = st.itemId
)
-- Now we need only strings of deepest level
select top(1)
#cols = cols, #joins = joins
from cte
order by i desc;
-- Finalize and executing query string
set #sql = N'select ' + #cols + N' from ' + #joins + N' where s1.itemId not in (select s.partId from yourTable s)';
exec(#sql);
Additional Note: Generated query is:
select s1.serial MainSerial
, s1.partSerial SubSerial1
, s2.partSerial SubSerial2
, s3.partSerial SubSerial3
, s4.partSerial SubSerial4
--, ...
from yourTable s1
left join yourTable s2 on s1.partId = s2.itemId
left join yourTable s3 on s2.partId = s3.itemId
left join yourTable s4 on s3.partId = s4.itemId
--left join ...
where s1.itemId not in (select s.partId from yourTable s);

Related

How to Build select Query split Temp Value to two column one Per Number And Another to Text when Flag Allow 1?

I work on a query for SQL Server 2012. I have an issue: I can't build select
Query split Column Temp value to two Column When row in the temp table #nonparametric has the flag Allow = 1,
it must split column Temp value from #nonparametric to two column when the flag Allow = 1 .
suppose column Temp value has value 50.40 kg it must split to two column
First column with number so it will have 50.40 and it will be same Name as Parametric .
Second column with Text so it will have kg and it will be same Name as Parametric + 'Units'.
meaning Name will be ParametricUnit .
I need to build query that split this on two column when Flag Allow =1 .
create table #nonparametricdata
(
PART_ID nvarchar(50) ,
CompanyName nvarchar(50),
PartNumber nvarchar(50),
DKFeatureName nvarchar(100),
Tempvalue nvarchar(50),
FlagAllow bit
)
insert into #nonparametricdata
values
('1222','Honda','silicon','package','15.50Am',0),
('1900','MERCEIS','GLASS','family','90.00Am',1),--Build select query split data because FlagAllow=1
('5000','TOYOTA','alominia','source','70.20kg',0),
('8000','MACDA','motor','parametric','50.40kg',1),----Build select query split data because FlagAllow=1
('8900','JEB','mirror','noparametric','75.35kg',0)
create table #FinalTable
(
DKFeatureName nvarchar(50),
DisplayOrder int
)
insert into #FinalTable (DKFeatureName,DisplayOrder)
values
('package',3),
('family',4),
('source',5),
('parametric',2),
('noparametric',1)
what I try is below :
DECLARE #SelectqueryData NVARCHAR(MAX)
SELECT
#SelectqueryData = STUFF(
(
SELECT ', ' + case when B.FlagAllow = 1 then '['+A.DKFeatureName+'],['+A.DKFeatureName+'Unit]' else quotename(A.DKFeatureName) end
FROM #FinalTable A
join (Select distinct DKFeatureName,FlagAllow
From #nonparametricdata
) B on A.DKFeatureName=B.DKFeatureName
ORDER BY DisplayOrder
FOR XML PATH ('')
),1,2,''
)
select #SelectqueryData
--select #SelectqueryData from table
Expected Result is :
[noparametric], [parametric]--QueryGetNumber,[parametricUnit]--QueryGetUnitOfMeasure
, [package], [family]--QueryGetNumber,[familyUnit]--QueryGetUnitOfMeasure, [source]
when make query above it must give me result as image(for Explain Only) :
You're looking for a DYNAMIC PIVOT
Example
DECLARE #SelectqueryData NVARCHAR(MAX)
SELECT #SelectqueryData = STUFF( (
SELECT ', ' + case when B.FlagAllow = 1 then '['+A.DKFeatureName+'],['+A.DKFeatureName+'Unit]' else quotename(A.DKFeatureName) end
FROM #FinalTable A
join (Select distinct DKFeatureName,FlagAllow
From #nonparametricdata
) B on A.DKFeatureName=B.DKFeatureName
ORDER BY DisplayOrder
FOR XML PATH ('')
),1,2,''
)
Declare #SQL varchar(max) = '
Select *
From (
Select A.Part_ID
,A.PartNumber
,A.CompanyName
,B.*
From #nonparametricdata A
Cross Apply ( values ( DKFeatureName ,case when FlagAllow=1 then left(TempValue,patindex(''%[A-Z]%'',TempValue+''A'')-1) else TempValue end )
,( DKFeatureName+''Unit'',case when FlagAllow=1 then substring(TempValue,patindex(''%[A-Z]%'',TempValue+''A''),10) else null end )
) B(Item,Value)
) src
Pivot (max(value) for Item in ('+#SelectqueryData+') ) pvt
'
--Print #SQL
Exec(#SQL)
Returns

How to get column-level dependencies in a view

I've made some research on the matter but don't have solution yet. What I want to get is column-level dependencies in a view. So, let's say we have a table like this
create table TEST(
first_name varchar(10),
last_name varchar(10),
street varchar(10),
number int
)
and a view like this:
create view vTEST
as
select
first_name + ' ' + last_name as [name],
street + ' ' + cast(number as varchar(max)) as [address]
from dbo.TEST
What I'd like is to get result like this:
column_name depends_on_column_name depends_on_table_name
----------- --------------------- --------------------
name first_name dbo.TEST
name last_name dbo.TEST
address street dbo.TEST
address number dbo.TEST
I've tried sys.dm_sql_referenced_entities function, but referencing_minor_id is always 0 there for views.
select
referencing_minor_id,
referenced_schema_name + '.' + referenced_entity_name as depends_on_table_name,
referenced_minor_name as depends_on_column_name
from sys.dm_sql_referenced_entities('dbo.vTEST', 'OBJECT')
referencing_minor_id depends_on_table_name depends_on_column_name
-------------------- --------------------- ----------------------
0 dbo.TEST NULL
0 dbo.TEST first_name
0 dbo.TEST last_name
0 dbo.TEST street
0 dbo.TEST number
The same is true for sys.sql_expression_dependencies and for obsolete sys.sql_dependencies.
So do I miss something or is it impossible to do?
There're some related questions (Find the real column name of an alias used in a view?), but as I said - I haven't found a working solution yet.
EDIT 1: I've tried to use DAC to query if this information is stored somewhere in System Base Tables but haven't find it
This solution could answer your question only partially. It won't work for columns that are expressions.
You could use sys.dm_exec_describe_first_result_set to get column information:
#include_browse_information
If set to 1, each query is analyzed as if it has a FOR BROWSE option on the query. Additional key columns and source table information are returned.
CREATE TABLE txu(id INT, first_name VARCHAR(10), last_name VARCHAR(10));
CREATE TABLE txd(id INT, id_fk INT, address VARCHAR(100));
CREATE VIEW v_txu
AS
SELECT t.id AS PK_id,
t.first_name AS name,
d.address,
t.first_name + t.last_name AS name_full
FROM txu t
JOIN txd d
ON t.id = d.id_fk
Main query:
SELECT name, source_database, source_schema,
source_table, source_column
FROM sys.dm_exec_describe_first_result_set(N'SELECT * FROM v_txu', null, 1) ;
Output:
+-----------+--------------------+---------------+--------------+---------------+
| name | source_database | source_schema | source_table | source_column |
+-----------+--------------------+---------------+--------------+---------------+
| PK_id | fiddle_0f9d47226c4 | dbo | txu | id |
| name | fiddle_0f9d47226c4 | dbo | txu | first_name |
| address | fiddle_0f9d47226c4 | dbo | txd | address |
| name_full | null | null | null | null |
+-----------+--------------------+---------------+--------------+---------------+
DBFiddleDemo
It is a solution based on query plan. It has some adventages
almost any select queries can be processed
no SchemaBinding
and disadventages
has not been tested properly
can become broken suddenly if Microsoft change XML query plan.
The core idea is that every column expression inside XML query plan is defined in "DefinedValue" node. First subnode of "DefinedValue" is a reference to output column and second one is a expression. The expression computes from input columns and constant values.
As mentioned above It's based only on empirical observation and needs to be tested properly.
It's a invocation example:
exec dbo.GetColumnDependencies 'select * from dbo.vTEST'
target_column_name | source_column_name | const_value
---------------------------------------------------
address | Expr1007 | NULL
name | Expr1006 | NULL
Expr1006 | NULL | ' '
Expr1006 | [testdb].[dbo].first_name | NULL
Expr1006 | [testdb].[dbo].last_name | NULL
Expr1007 | NULL | ' '
Expr1007 | [testdb].[dbo].number | NULL
Expr1007 | [testdb].[dbo].street | NULL
It's code.
First of all get XML query plan.
declare #select_query as varchar(4000) = 'select * from dbo.vTEST' -- IT'S YOUR QUERY HERE.
declare #select_into_query as varchar(4000) = 'select top (1) * into #foo from (' + #select_query + ') as src'
, #xml_plan as xml = null
, #xml_generation_tries as tinyint = 10
;
while (#xml_plan is null and #xml_generation_tries > 0) -- There is no guaranty that plan will be cached.
begin
execute (#select_into_query);
select #xml_plan = pln.query_plan
from sys.dm_exec_query_stats as qry
cross apply sys.dm_exec_sql_text(qry.sql_handle) as txt
cross apply sys.dm_exec_query_plan(qry.plan_handle) as pln
where txt.text = #select_into_query
;
end
if (#xml_plan is null
) begin
raiserror(N'Can''t extract XML query plan from cache.' ,15 ,0);
return;
end
;
Next is a main query. It's biggest part is recursive common table expression for column extraction.
with xmlnamespaces(default 'http://schemas.microsoft.com/sqlserver/2004/07/showplan'
,'http://schemas.microsoft.com/sqlserver/2004/07/showplan' as shp -- Used in .query() for predictive namespace using.
)
, cte_column_dependencies as
(
The seed of recursion is a query that extracts columns for #foo table that store 1 row of interested select query.
select
(select foo_col.info.query('./ColumnReference') for xml raw('shp:root') ,type) -- Becouse .value() can't extract attribute from root node.
as target_column_info
, (select foo_col.info.query('./ScalarOperator/Identifier/ColumnReference') for xml raw('shp:root') ,type)
as source_column_info
, cast(null as xml) as const_info
, 1 as iteration_no
from #xml_plan.nodes('//Update/SetPredicate/ScalarOperator/ScalarExpressionList/ScalarOperator/MultipleAssign/Assign')
as foo_col(info)
where foo_col.info.exist('./ColumnReference[#Table="[#foo]"]') = 1
The recursive part searches for "DefinedValue" node with depended column and extract all "ColumnReference" and "Const" subnodes that used in column expression. It's over complicated by XML to SQL conversions.
union all
select
(select internal_col.info.query('.') for xml raw('shp:root') ,type)
, source_info.column_info
, source_info.const_info
, prev_dependencies.iteration_no + 1
from #xml_plan.nodes('//DefinedValue/ColumnReference') as internal_col(info)
inner join cte_column_dependencies as prev_dependencies -- Filters by depended columns.
on prev_dependencies.source_column_info.value('(//ColumnReference/#Column)[1]' ,'nvarchar(4000)') = internal_col.info.value('(./#Column)[1]' ,'nvarchar(4000)')
and exists (select prev_dependencies.source_column_info.value('(.//#Schema)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Schema)[1]' ,'nvarchar(4000)'))
and exists (select prev_dependencies.source_column_info.value('(.//#Database)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Database)[1]' ,'nvarchar(4000)'))
and exists (select prev_dependencies.source_column_info.value('(.//#Server)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Server)[1]' ,'nvarchar(4000)'))
cross apply ( -- Becouse only column or only constant can be places in result row.
select (select source_col.info.query('.') for xml raw('shp:root') ,type) as column_info
, null as const_info
from internal_col.info.nodes('..//ColumnReference') as source_col(info)
union all
select null as column_info
, (select const.info.query('.') for xml raw('shp:root') ,type) as const_info
from internal_col.info.nodes('..//Const') as const(info)
) as source_info
where source_info.column_info is null
or (
-- Except same node selected by '..//ColumnReference' from its sources. Sorry, I'm not so well to check it with XQuery simple.
source_info.column_info.value('(//#Column)[1]' ,'nvarchar(4000)') <> internal_col.info.value('(./#Column)[1]' ,'nvarchar(4000)')
and (select source_info.column_info.value('(//#Schema)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Schema)[1]' ,'nvarchar(4000)')) is null
and (select source_info.column_info.value('(//#Database)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Database)[1]' ,'nvarchar(4000)')) is null
and (select source_info.column_info.value('(//#Server)[1]' ,'nvarchar(4000)') intersect select internal_col.info.value('(./#Server)[1]' ,'nvarchar(4000)')) is null
)
)
Finally, It's select statement that convert XML to appropriate human text.
select
-- col_dep.target_column_info
--, col_dep.source_column_info
--, col_dep.const_info
coalesce(col_dep.target_column_info.value('(.//shp:ColumnReference/#Server)[1]' ,'nvarchar(4000)') + '.' ,'')
+ coalesce(col_dep.target_column_info.value('(.//shp:ColumnReference/#Database)[1]' ,'nvarchar(4000)') + '.' ,'')
+ coalesce(col_dep.target_column_info.value('(.//shp:ColumnReference/#Schema)[1]' ,'nvarchar(4000)') + '.' ,'')
+ col_dep.target_column_info.value('(.//shp:ColumnReference/#Column)[1]' ,'nvarchar(4000)')
as target_column_name
, coalesce(col_dep.source_column_info.value('(.//shp:ColumnReference/#Server)[1]' ,'nvarchar(4000)') + '.' ,'')
+ coalesce(col_dep.source_column_info.value('(.//shp:ColumnReference/#Database)[1]' ,'nvarchar(4000)') + '.' ,'')
+ coalesce(col_dep.source_column_info.value('(.//shp:ColumnReference/#Schema)[1]' ,'nvarchar(4000)') + '.' ,'')
+ col_dep.source_column_info.value('(.//shp:ColumnReference/#Column)[1]' ,'nvarchar(4000)')
as source_column_name
, col_dep.const_info.value('(/shp:root/shp:Const/#ConstValue)[1]' ,'nvarchar(4000)')
as const_value
from cte_column_dependencies as col_dep
order by col_dep.iteration_no ,target_column_name ,source_column_name
option (maxrecursion 512) -- It's an assurance from infinite loop.
All what you need is mentioned into definition of view.
so we can extract this information via following the next steps:-
Assign the view definition into a string variable.
Split it with (,) comma.
Split the alias with (+) plus operator via using CROSS APPLY with XML.
use the system tables for getting the accurate information like original table.
Demo:-
Create PROC psp_GetLevelDependsView (#sViewName varchar(200))
AS
BEGIN
Declare #stringToSplit nvarchar(1000),
#name NVARCHAR(255),
#dependsTableName NVARCHAR(50),
#pos INT
Declare #returnList TABLE ([Name] [nvarchar] (500))
SELECT TOP 1 #dependsTableName= table_schema + '.'+ TABLE_NAME
FROM INFORMATION_SCHEMA.VIEW_COLUMN_USAGE
select #stringToSplit = definition
from sys.objects o
join sys.sql_modules m on m.object_id = o.object_id
where o.object_id = object_id( #sViewName)
and o.type = 'V'
WHILE CHARINDEX(',', #stringToSplit) > 0
BEGIN
SELECT #pos = CHARINDEX(',', #stringToSplit)
SELECT #name = SUBSTRING(#stringToSplit, 1, #pos-1)
INSERT INTO #returnList
SELECT #name
SELECT #stringToSplit = SUBSTRING(#stringToSplit, #pos+1, LEN(#stringToSplit)-#pos)
END
INSERT INTO #returnList
SELECT #stringToSplit
select COLUMN_NAME , b.Name as Expression
Into #Temp
FROM INFORMATION_SCHEMA.COLUMNS a , #returnList b
WHERE TABLE_NAME= #sViewName
And (b.Name) like '%' + ( COLUMN_NAME) + '%'
SELECT A.COLUMN_NAME as column_name,
Split.a.value('.', 'VARCHAR(100)') AS depends_on_column_name , #dependsTableName as depends_on_table_name
Into #temp2
FROM
(
SELECT COLUMN_NAME,
CAST ('<M>' + REPLACE(Expression, '+', '</M><M>') + '</M>' AS XML) AS Data
FROM #Temp
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a);
SELECT b.column_name , a.COLUMN_NAME as depends_on_column_name , b.depends_on_table_name
FROM INFORMATION_SCHEMA.VIEW_COLUMN_USAGE a , #temp2 b
WHERE VIEW_NAME= #sViewName
and b.depends_on_column_name like '%' + a.COLUMN_NAME + '%'
drop table #Temp
drop table #Temp2
END
Test:-
exec psp_GetLevelDependsView 'vTest'
Result:-
column_name depends_on_column_name depends_on_table_name
----------- --------------------- --------------------
name first_name dbo.TEST
name last_name dbo.TEST
address street dbo.TEST
address number dbo.TEST
I was playing around with this but didn't have time to go any further. Maybe this will help:
-- Returns all table columns called in the view and the objects they pull from
SELECT
v.[name] AS ViewName
,d.[referencing_id] AS ViewObjectID
,c.[name] AS ColumnNames
,OBJECT_NAME(d.referenced_id) AS ReferencedTableName
,d.referenced_id AS TableObjectIDsReferenced
FROM
sys.views v
INNER JOIN sys.sql_expression_dependencies d ON d.referencing_id = v.[object_id]
INNER JOIN sys.objects o ON d.referencing_id = o.[object_id]
INNER JOIN sys.columns c ON d.referenced_id = c.[object_id]
WHERE v.[name] = 'vTEST'
-- Returns all output columns in the view
SELECT
OBJECT_NAME([object_id]) AS ViewName
,[object_id] AS ViewObjectID
,[name] AS OutputColumnName
FROM sys.columns
WHERE OBJECT_ID('vTEST') = [object_id]
-- Get the view definition
SELECT
VIEW_DEFINITION
FROM INFORMATION_SCHEMA.VIEWS
WHERE TABLE_NAME = 'vTEST'
Unfortunately, SQL Server does not explicitly store mapping between source table columns and view columns. I suspect the main reason is simply due to the potential complexity of views (expression columns, functions called on those columns, nested queries etc.).
The only way that I can think of to determine the mapping between view columns and source columns would be to either parse the query associated to the view or parse the execution plan of the view.
The approach I have outlined here focuses on the second option and relies on the fact that SQL Server will avoid generating output lists for columns not required by a query.
The first step is to get the list of dependent tables and their associated columns required for the view. This can be achieved via the standard system tables in SQL Server.
Next, we enumerate all of the view’s columns via a cursor.
For each view column, we create a temporary wrapper stored procedure that only selects the single column in question from view. Because only a single column is requested SQL Server will only retrieve the information needed to output that single view column.
The newly created procedure will run the query in format only mode and will therefore not cause any actual I/O operations on the database, but it will generate an estimated execution plan when executed. After the query plan is generate, we query the output lists from the execution plan. Since we know which view column was selected we can now associate the output list to view column in question. We can further refine the association by only associating columns that form part of our original dependency list, this will eliminate expression outputs from the result set.
Note that with this method if the view needs to join different tables together to generate the output then all columns required to generate the output will be returned even if it is not directly used in the column expression since it is still in directly required.
The following stored procedure demonstrates the above implementation method:
CREATE PROCEDURE ViewGetColumnDependencies
(
#viewName NVARCHAR(50)
)
AS
BEGIN
CREATE TABLE #_suppress_output
(
result NVARCHAR(500) NULL
);
DECLARE #viewTableColumnMapping TABLE
(
[ViewName] NVARCHAR(50),
[SourceObject] NVARCHAR(50),
[SourceObjectColumnName] NVARCHAR(50),
[ViewAliasColumn] NVARCHAR(50)
)
-- Get list of dependent tables and their associated columns required for the view.
INSERT INTO #viewTableColumnMapping
(
[ViewName]
,[SourceObject]
,[SourceObjectColumnName]
)
SELECT v.[name] AS [ViewName]
,'[' + OBJECT_NAME(d.referenced_major_id) + ']' AS [SourceObject]
,c.[name] AS [SourceObjectColumnName]
FROM sys.views v
LEFT OUTER JOIN sys.sql_dependencies d ON d.object_id = v.object_id
LEFT OUTER JOIN sys.columns c ON c.object_id = d.referenced_major_id AND c.column_id = d.referenced_minor_id
WHERE v.[name] = #viewName;
DECLARE #aliasColumn NVARCHAR(50);
-- Next, we enumerate all of the views columns via a cursor.
DECLARE ViewColumnNameCursor CURSOR FOR
SELECT aliases.name AS [AliasName]
FROM sys.views v
LEFT OUTER JOIN sys.columns AS aliases on v.object_id = aliases.object_id -- c.column_id=aliases.column_id AND aliases.object_id = object_id('vTEST')
WHERE v.name = #viewName;
OPEN ViewColumnNameCursor
FETCH NEXT FROM ViewColumnNameCursor
INTO #aliasColumn
DECLARE #tql_create_proc NVARCHAR(MAX);
DECLARE #queryPlan XML;
WHILE ##FETCH_STATUS = 0
BEGIN
/*
For each view column, we create a temporary wrapper stored procedure that
only selects the single column in question from view. The stored procedure
will run the query in format only mode and will therefore not cause any
actual I/O operations on the database, but it will generate an estimated
execution plan when executed.
*/
SET #tql_create_proc = 'CREATE PROCEDURE ___WrapView
AS
SET FMTONLY ON;
SELECT CONVERT(NVARCHAR(MAX), [' + #aliasColumn + ']) FROM [' + #viewName + '];
SET FMTONLY OFF;';
EXEC (#tql_create_proc);
-- Execute the procedure to generate a query plan. The insert into the temp table is only done to
-- suppress the empty result set from being displayed as part of the output.
INSERT INTO #_suppress_output
EXEC ___WrapView;
-- Get the query plan for the wrapper procedure that was just executed.
SELECT #queryPlan = [qp].[query_plan]
FROM [sys].[dm_exec_procedure_stats] AS [ps]
JOIN [sys].[dm_exec_query_stats] AS [qs] ON [ps].[plan_handle] = [qs].[plan_handle]
CROSS APPLY [sys].[dm_exec_query_plan]([qs].[plan_handle]) AS [qp]
WHERE [ps].[database_id] = DB_ID() AND OBJECT_NAME([ps].[object_id], [ps].[database_id]) = '___WrapView'
-- Drop the wrapper view
DROP PROCEDURE ___WrapView
/*
After the query plan is generate, we query the output lists from the execution plan.
Since we know which view column was selected we can now associate the output list to
view column in question. We can further refine the association by only associating
columns that form part of our original dependency list, this will eliminate expression
outputs from the result set.
*/
;WITH QueryPlanOutputList AS
(
SELECT T.X.value('local-name(.)', 'NVARCHAR(max)') as Structure,
T.X.value('./#Table[1]', 'NVARCHAR(50)') as [SourceTable],
T.X.value('./#Column[1]', 'NVARCHAR(50)') as [SourceColumnName],
T.X.query('*') as SubNodes
FROM #queryPlan.nodes('*') as T(X)
UNION ALL
SELECT QueryPlanOutputList.structure + N'/' + T.X.value('local-name(.)', 'nvarchar(max)'),
T.X.value('./#Table[1]', 'NVARCHAR(50)') as [SourceTable],
T.X.value('./#Column[1]', 'NVARCHAR(50)') as [SourceColumnName],
T.X.query('*')
FROM QueryPlanOutputList
CROSS APPLY QueryPlanOutputList.SubNodes.nodes('*') as T(X)
)
UPDATE #viewTableColumnMapping
SET ViewAliasColumn = #aliasColumn
FROM #viewTableColumnMapping CM
INNER JOIN
(
SELECT DISTINCT QueryPlanOutputList.Structure
,QueryPlanOutputList.[SourceTable]
,QueryPlanOutputList.[SourceColumnName]
FROM QueryPlanOutputList
WHERE QueryPlanOutputList.Structure like '%/OutputList/ColumnReference'
) SourceColumns ON CM.[SourceObject] = SourceColumns.[SourceTable] AND CM.SourceObjectColumnName = SourceColumns.SourceColumnName
FETCH NEXT FROM ViewColumnNameCursor
INTO #aliasColumn
END
CLOSE ViewColumnNameCursor;
DEALLOCATE ViewColumnNameCursor;
DROP TABLE #_suppress_output
SELECT *
FROM #viewTableColumnMapping
ORDER BY [ViewAliasColumn]
END
The stored procedure can now be executed as follow:
EXEC dbo.ViewGetColumnDependencies #viewName = 'vTEST'

SQL Pivot Convert Null to 0 [duplicate]

I tried to convert the (null) values with 0 (zeros) output in PIVOT function but have no success.
Below is the table and the syntax I've tried:
SELECT
CLASS,
[AZ],
[CA],
[TX]
FROM #TEMP
PIVOT (SUM(DATA)
FOR STATE IN ([AZ], [CA], [TX])) AS PVT
ORDER BY CLASS
CLASS AZ CA TX
RICE 10 4 (null)
COIN 30 3 2
VEGIE (null) (null) 9
I tried to use the ISNULL but did not work.
PIVOT SUM(ISNULL(DATA,0)) AS QTY
What syntax do I need to use?
SELECT CLASS,
isnull([AZ],0),
isnull([CA],0),
isnull([TX],0)
FROM #TEMP
PIVOT (SUM(DATA)
FOR STATE IN ([AZ], [CA], [TX])) AS PVT
ORDER BY CLASS
If you have a situation where you are using dynamic columns in your pivot statement you could use the following:
DECLARE #cols NVARCHAR(MAX)
DECLARE #colsWithNoNulls NVARCHAR(MAX)
DECLARE #query NVARCHAR(MAX)
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(Name)
FROM Hospital
WHERE Active = 1 AND StateId IS NOT NULL
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
SET #colsWithNoNulls = STUFF(
(
SELECT distinct ',ISNULL(' + QUOTENAME(Name) + ', ''No'') ' + QUOTENAME(Name)
FROM Hospital
WHERE Active = 1 AND StateId IS NOT NULL
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
EXEC ('
SELECT Clinician, ' + #colsWithNoNulls + '
FROM
(
SELECT DISTINCT p.FullName AS Clinician, h.Name, CASE WHEN phl.personhospitalloginid IS NOT NULL THEN ''Yes'' ELSE ''No'' END AS HasLogin
FROM Person p
INNER JOIN personlicense pl ON pl.personid = p.personid
INNER JOIN LicenseType lt on lt.licensetypeid = pl.licensetypeid
INNER JOIN licensetypegroup ltg ON ltg.licensetypegroupid = lt.licensetypegroupid
INNER JOIN Hospital h ON h.StateId = pl.StateId
LEFT JOIN PersonHospitalLogin phl ON phl.personid = p.personid AND phl.HospitalId = h.hospitalid
WHERE ltg.Name = ''RN'' AND
pl.licenseactivestatusid = 2 AND
h.Active = 1 AND
h.StateId IS NOT NULL
) AS Results
PIVOT
(
MAX(HasLogin)
FOR Name IN (' + #cols + ')
) p
')
You cannot place the IsNull() until after the data is selected so you will place the IsNull() around the final value in the SELECT:
SELECT CLASS,
IsNull([AZ], 0) as [AZ],
IsNull([CA], 0) as [CA],
IsNull([TX], 0) as [TX]
FROM #TEMP
PIVOT
(
SUM(DATA)
FOR STATE IN ([AZ], [CA], [TX])
) AS PVT
ORDER BY CLASS
Sometimes it's better to think like a parser, like T-SQL parser. While executing the statement, parser does not have any value in Pivot section and you can't have any check expression in that section. By the way, you can simply use this:
SELECT CLASS
, IsNull([AZ], 0)
, IsNull([CA], 0)
, IsNull([TX], 0)
FROM #TEMP
PIVOT (
SUM(DATA)
FOR STATE IN (
[AZ]
, [CA]
, [TX]
)
) AS PVT
ORDER BY CLASS
You have to account for all values in the pivot set. you can accomplish this using a cartesian product.
select pivoted.*
from (
select cartesian.key1, cartesian.key2, isnull(relationship.[value],'nullvalue') as [value]
from (
select k1.key1, k2.key2
from ( select distinct key1 from relationship) k1
,( select distinct key2 from relationship) k2
) cartesian
left outer join relationship on relationship.key1 = cartesian.key1 and relationship.key2 = carterisan.key2
) data
pivot (
max(data.value) for ([key2_v1], [key2_v2], [key2_v3], ...)
) pivoted
To modify the results under pivot, you can put the columns in the selected fields and then modify them accordingly. May be you can use DECODE for the columns you have built using pivot function.
Kranti A
I have encountered a similar problem. The root cause is that (use your scenario for my case), in the #temp table, there is no record for:
a. CLASS=RICE and STATE=TX
b. CLASS=VEGIE and (STATE=AZ or STATE=CA)
So, when MSSQL does pivot for no record, MSSQL always shows NULL for MAX, SUM, ... (aggregate functions).
None of above solutions (IsNull([AZ], 0)) works for me, but I do get ideas from these solutions.
Sorry, it really depends on the #TEMP table. I can only provide some suggestions.
Make sure #TEMP table have records for below condition, even Data is null.
a. CLASS=RICE and STATE=TX
b. CLASS=VEGIE and (STATE=AZ or STATE=CA)
You may need to use cartesian product: select A.*, B.* from A, B
In the select query for #temp, if you need to join any table with WHERE, then would better put where inside another sub select query. (Goal is 1.)
Use isnull(DATA, 0) in #TEMP table.
Before pivot, make sure you have achieved Goal 1.
I can't give an answer to the original question, since there is no enough info for #temp table. I have pasted my code as example here.
SELECT * FROM (
SELECT eeee.id as enterprise_id
, eeee.name AS enterprise_name
, eeee.indicator_name
, CONVERT(varchar(12) , isnull(eid.[date],'2019-12-01') , 23) AS data_date
, isnull(eid.value,0) AS indicator_value
FROM (select ei.id as indicator_id, ei.name as indicator_name, e.* FROM tbl_enterprise_indicator ei, tbl_enterprise e) eeee
LEFT JOIN (select * from tbl_enterprise_indicator_data WHERE [date]='2020-01-01') eid
ON eeee.id = eid.enterprise_id and eeee.indicator_id = enterprise_indicator_id
) AS P
PIVOT
(
SUM(P.indicator_value) FOR P.indicator_name IN(TX,CA)
) AS T

Dynamic SELECT statement, generate columns based on present and future values

Currently building a SELECT statement in SQL Server 2008 but would like to make this SELECT statement dynamic, so the columns can be defined based on values in a table. I heard about pivot table and cursors, but seems kind of hard to understand at my current level, here is the code;
DECLARE #date DATE = null
IF #date is null
set # date = GETDATE() as DATE
SELECT
Name,
value1,
value2,
value3,
value4
FROM ref_Table a
FULL OUTER JOIN (
SELECT
PK_ID ID,
sum(case when FK_ContainerType_ID = 1 then 1 else null) Box,
sum(case when FK_ContainerType_ID = 2 then 1 else null) Pallet,
sum(case when FK_ContainerType_ID = 3 then 1 else null) Bag,
sum(case when FK_ContainerType_ID = 4 then 1 else null) Drum
from
Packages
WHERE
#date between PackageStart AND PackageEnd
group by PK_ID ) b on a.Name = b.ID
where
Group = 0
The following works great for me , but PK_Type_ID and the name of the column(PackageNameX,..) are hard coded, I need to be dynamic and it can build itself based on present or futures values in the Package table.
Any help or guidance on the right direction would be greatly appreciated...,
As requested
ref_Table (PK_ID, Name)
1, John
2, Mary
3, Albert
4, Jane
Packages (PK_ID, FK_ref_Table_ID, FK_ContainerType_ID, PackageStartDate, PackageEndDate)
1 , 1, 4, 1JAN2014, 30JAN2014
2 , 2, 3, 1JAN2014, 30JAN2014
3 , 3, 2, 1JAN2014, 30JAN2014
4 , 4, 1, 1JAN2014, 30JAN2014
ContainerType (PK_ID, Type)
1, Box
2, Pallet
3, Bag
4, Drum
and the result should look like this;
Name Box Pallet Bag Drum
---------------------------------------
John 1
Mary 1
Albert 1
Jane 1
The following code like I said works great, the issue is the Container table is going to grow and I need to replicated the same report without hard coding the columns.
What you need to build is called a dynamic pivot. There are plenty of good references on Stack if you search out that term.
Here is a solution to your scenario:
IF OBJECT_ID('tempdb..##ref_Table') IS NOT NULL
DROP TABLE ##ref_Table
IF OBJECT_ID('tempdb..##Packages') IS NOT NULL
DROP TABLE ##Packages
IF OBJECT_ID('tempdb..##ContainerType') IS NOT NULL
DROP TABLE ##ContainerType
SET NOCOUNT ON
CREATE TABLE ##ref_Table (PK_ID INT, NAME NVARCHAR(50))
CREATE TABLE ##Packages (PK_ID INT, FK_ref_Table_ID INT, FK_ContainerType_ID INT, PackageStartDate DATE, PackageEndDate DATE)
CREATE TABLE ##ContainerType (PK_ID INT, [Type] NVARCHAR(50))
INSERT INTO ##ref_Table (PK_ID,NAME)
SELECT 1,'John' UNION
SELECT 2,'Mary' UNION
SELECT 3,'Albert' UNION
SELECT 4,'Jane'
INSERT INTO ##Packages (PK_ID, FK_ref_Table_ID, FK_ContainerType_ID, PackageStartDate, PackageEndDate)
SELECT 1,1,4,'2014-01-01','2014-01-30' UNION
SELECT 2,2,3,'2014-01-01','2014-01-30' UNION
SELECT 3,3,2,'2014-01-01','2014-01-30' UNION
SELECT 4,4,1,'2014-01-01','2014-01-30'
INSERT INTO ##ContainerType (PK_ID, [Type])
SELECT 1,'Box' UNION
SELECT 2,'Pallet' UNION
SELECT 3,'Bag' UNION
SELECT 4,'Drum'
DECLARE #DATE DATE, #PARAMDEF NVARCHAR(MAX), #COLS NVARCHAR(MAX), #SQL NVARCHAR(MAX)
SET #DATE = '2014-01-15'
SET #COLS = STUFF((SELECT DISTINCT ',' + QUOTENAME(T.[Type])
FROM ##ContainerType T
FOR XML PATH, TYPE).value('.', 'NVARCHAR(MAX)'),1,1,'')
SET #SQL = 'SELECT [Name], ' + #COLS + '
FROM (SELECT [Name], [Type], 1 AS Value
FROM ##ref_Table R
JOIN ##Packages P ON R.PK_ID = P.FK_ref_Table_ID
JOIN ##ContainerType T ON P.FK_ContainerType_ID = T.PK_ID
WHERE #DATE BETWEEN P.PackageStartDate AND P.PackageEndDate) X
PIVOT (COUNT(Value) FOR [Type] IN (' + #COLS + ')) P
'
PRINT #COLS
PRINT #SQL
SET #PARAMDEF = '#DATE DATE'
EXEC SP_EXECUTESQL #SQL, #PARAMDEF, #DATE=#DATE
Output:
Name Bag Box Drum Pallet
Albert 0 0 0 1
Jane 0 1 0 0
John 0 0 1 0
Mary 1 0 0 0
Static Query:
SELECT [Name],[Box],[Pallet],[Bag],[Drum] FROM
(
SELECT *
FROM
(
SELECT rf.Name,cnt.[Type], pk.PK_ID AS PKID, rf.PK_ID AS RFID
FROM ref_Table rf INNER JOIN Packages pk ON rf.PK_ID = pk.FK_ref_Table_ID
INNER JOIN ContanerType cnt ON cnt.PK_ID = pk.FK_ContainerType_ID
) AS SourceTable
PIVOT
(
COUNT(PKID )
FOR [Type]
IN ( [Box],[Pallet],[Bag],[Drum])
) AS PivotTable
) AS Main
ORDER BY RFID
Dynamic Query:
DECLARE #columnList nvarchar (MAX)
DECLARE #pivotsql nvarchar (MAX)
SELECT #columnList = STUFF(
(
SELECT ',' + '[' + [Type] + ']'
FROM ContanerType
FOR XML PATH( '')
)
,1, 1,'' )
SET #pivotsql =
N'SELECT [Name],' + #columnList + ' FROM
(
SELECT *
FROM
(
SELECT rf.Name,cnt.[Type], pk.PK_ID AS PKID, rf.PK_ID AS RFID
FROM ref_Table rf INNER JOIN Packages pk ON rf.PK_ID = pk.FK_ref_Table_ID
INNER JOIN ContanerType cnt ON cnt.PK_ID = pk.FK_ContainerType_ID
) AS SourceTable
PIVOT
(
COUNT(PKID )
FOR [Type]
IN ( ' + #columnList + ')
) AS PivotTable
) AS Main
ORDER BY RFID;'
EXEC sp_executesql #pivotsql
Following my tutorial below will help you to understand the PIVOT functionality:
We write sql queries in order to get different result sets like full, partial, calculated, grouped, sorted etc from the database tables. However sometimes we have requirements that we have to rotate our tables. Sounds confusing?
Let's keep it simple and consider the following two screen grabs.
SQL Table:
Expected Results:
Wow, that's look like a lot of work! That is a combination of tricky sql, temporary tables, loops, aggregation......, blah blah blah
Don't worry let's keep it simple, stupid(KISS).
MS SQL Server 2005 and above has a function called PIVOT. It s very simple to use and powerful. With the help of this function we will be able to rotate sql tables and result sets.
Simple steps to make it happen:
Identify all the columns those will be part of the desired result set.
Find the column on which we will apply aggregation(sum,ave,max,min etc)
Identify the column which values will be the column header.
Specify the column values mentioned in step3 with comma separated and surrounded by square brackets.
So, if we now follow above four steps and extract information from the above sales table, it will be as below:
Year, Month, SalesAmount
SalesAmount
Month
[Jan],[Feb] ,[Mar] .... etc
We are nearly there if all the above steps made sense to you so far.
Now we have all the information we need. All we have to do now is to fill the below template with required information.
Template:
Our SQL query should look like below:
SELECT *
FROM
(
SELECT SalesYear, SalesMonth,Amount
FROM Sales
) AS SourceTable
PIVOT
(
SUM(Amount )
FOR SalesMonth
IN ( [Jan],[Feb] ,[Mar],
[Apr],[May],[Jun] ,[Jul],
[Aug],[Sep] ,[Oct],[Nov] ,[Dec])
) AS PivotTable;
In the above query we have hard coded the column names. Well it's not fun when you have to specify a number of columns.
However, there is a work arround as follows:
DECLARE #columnList nvarchar (MAX)
DECLARE #pivotsql nvarchar (MAX)
SELECT #columnList = STUFF(
(
SELECT ',' + '[' + SalesMonth + ']'
FROM Sales
GROUP BY SalesMonth
FOR XML PATH( '')
)
,1, 1,'' )
SET #pivotsql =
N'SELECT *
FROM
(
SELECT SalesYear, SalesMonth,Amount
FROM Sales
) AS SourceTable
PIVOT
(
SUM(Amount )
FOR SalesMonth
IN ( ' + #columnList +' )
) AS PivotTable;'
EXEC sp_executesql #pivotsql
Hopefully this tutorial will be a help to someone somewhere.
Enjoy coding.

Join [one word per row] to rows of phrases with [multiple words per row]

Please excuse the length of the question. I included a test script to demo the situation and my best attempt at a solution.
There are two tables:
test_WORDS = Words extracted in order from several sources. The OBJ_FK column is the ID of the source. WORD_ID is an identifier for the word itself that is unique within the source. Each row contains one word.
test_PHRASE = a list of phrases to be searched for in test_WORDS. The PHRASE_TEXT column is a space separated phrase like 'foo bar' (see below) so that each row contains multiple words.
Requirement:
Return the first word from test_WORDS that is the start of a matching a phrase from test_PHRASE.
I would prefer something set based to avoid RBAR approach below. Also my solution is limited to 5 word phrases. I need to support up to 20 word phrases. Is it possible to match the words from a row in test_PHRASE to contiguous rows in the test_WORD without cursors?
After breaking the phrase words out into a temporary table, the problem boils down to matching portions of two sets together in row order.
-- Create test data
CREATE TABLE [dbo].[test_WORDS](
[OBJ_FK] [bigint] NOT NULL, --FK to the source object
[WORD_ID] [int] NOT NULL, --The word order in the source object
[WORD_TEXT] [nvarchar](50) NOT NULL,
CONSTRAINT [PK_test_WORDS] PRIMARY KEY CLUSTERED
(
[OBJ_FK] ASC,
[WORD_ID] ASC
)
) ON [PRIMARY]
GO
CREATE TABLE [dbo].[test_PHRASE](
[ID] [int], --PHRASE ID
[PHRASE_TEXT] [nvarchar](150) NOT NULL --Space-separated phrase
CONSTRAINT [PK_test_PHRASE] PRIMARY KEY CLUSTERED
(
[ID] ASC
)
)
GO
INSERT INTO dbo.test_WORDS
SELECT 1,1,'aaa' UNION ALL
SELECT 1,2,'bbb' UNION ALL
SELECT 1,3,'ccc' UNION ALL
SELECT 1,4,'ddd' UNION ALL
SELECT 1,5,'eee' UNION ALL
SELECT 1,6,'fff' UNION ALL
SELECT 1,7,'ggg' UNION ALL
SELECT 1,8,'hhh' UNION ALL
SELECT 2,1,'zzz' UNION ALL
SELECT 2,2,'yyy' UNION ALL
SELECT 2,3,'xxx' UNION ALL
SELECT 2,4,'www'
INSERT INTO dbo.test_PHRASE
SELECT 1, 'bbb ccc ddd' UNION ALL --should match
SELECT 2, 'ddd eee fff' UNION ALL --should match
SELECT 3, 'xxx xxx xxx' UNION ALL --should NOT match
SELECT 4, 'zzz yyy xxx' UNION ALL --should match
SELECT 5, 'xxx www ppp' UNION ALL --should NOT match
SELECT 6, 'zzz yyy xxx www' --should match
-- Create variables
DECLARE #maxRow AS INTEGER
DECLARE #currentRow AS INTEGER
DECLARE #phraseSubsetTable AS TABLE(
[ROW] int IDENTITY(1,1) NOT NULL,
[ID] int NOT NULL, --PHRASE ID
[PHRASE_TEXT] nvarchar(150) NOT NULL
)
--used to split the phrase into words
--note: No permissions to sys.dm_fts_parser
DECLARE #WordList table
(
ID int,
WORD nvarchar(50)
)
--Records to be returned to caller
DECLARE #returnTable AS TABLE(
OBJECT_FK INT NOT NULL,
WORD_ID INT NOT NULL,
PHRASE_ID INT NOT NULL
)
DECLARE #phrase AS NVARCHAR(150)
DECLARE #phraseID AS INTEGER
-- Get subset of phrases to simulate a join that would occur in production
INSERT INTO #phraseSubsetTable
SELECT ID, PHRASE_TEXT
FROM dbo.test_PHRASE
--represent subset of phrases caused by join in production
WHERE ID IN (2,3,4)
-- Loop each phrase in the subset, split into rows of words and return matches to the test_WORDS table
SET #maxRow = ##ROWCOUNT
SET #currentRow = 1
WHILE #currentRow <= #maxRow
BEGIN
SELECT #phrase=PHRASE_TEXT, #phraseID=ID FROM #phraseSubsetTable WHERE row = #currentRow
--clear previous phrase that was split into rows
DELETE FROM #WordList
--Recursive Function with CTE to create recordset of words, one per row
;WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(' ', #phrase)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(' ', #phrase, stop + 1)
FROM Pieces
WHERE stop > 0)
--Create the List of words with the CTE above
insert into #WordList
SELECT pn,
SUBSTRING(#phrase, start, CASE WHEN stop > 0 THEN stop-start ELSE 1056 END) AS WORD
FROM Pieces
DECLARE #wordCt as int
select #wordCt=count(ID) from #WordList;
-- Do the actual query using a CTE with a rownumber that repeats for every SOURCE OBJECT
;WITH WordOrder_CTE AS (
SELECT OBJ_FK, WORD_ID, WORD_TEXT,
ROW_NUMBER() OVER (Partition BY OBJ_FK ORDER BY WORD_ID) AS rownum
FROM test_WORDS)
--CREATE a flattened record of the first word in the phrase and join it to the rest of the words.
INSERT INTO #returnTable
SELECT r1.OBJ_FK, r1.WORD_ID, #phraseID AS PHRASE_ID
FROM WordOrder_CTE r1
INNER JOIN #WordList w1 ON r1.WORD_TEXT = w1.WORD and w1.ID=1
LEFT JOIN WordOrder_CTE r2
ON r1.rownum = r2.rownum - 1 and r1.OBJ_FK = r2.OBJ_FK
LEFT JOIN #WordList w2 ON r2.WORD_TEXT = w2.WORD and w2.ID=2
LEFT JOIN WordOrder_CTE r3
ON r1.rownum = r3.rownum - 2 and r1.OBJ_FK = r3.OBJ_FK
LEFT JOIN #WordList w3 ON r3.WORD_TEXT = w3.WORD and w3.ID=3
LEFT JOIN WordOrder_CTE r4
ON r1.rownum = r4.rownum - 3 and r1.OBJ_FK = r4.OBJ_FK
LEFT JOIN #WordList w4 ON r4.WORD_TEXT = w4.WORD and w4.ID=4
LEFT JOIN WordOrder_CTE r5
ON r1.rownum = r5.rownum - 4 and r1.OBJ_FK = r5.OBJ_FK
LEFT JOIN #WordList w5 ON r5.WORD_TEXT = w5.WORD and w5.ID=5
WHERE (#wordCt < 2 OR w2.ID is not null) and
(#wordCt < 3 OR w3.ID is not null) and
(#wordCt < 4 OR w4.ID is not null) and
(#wordCt < 5 OR w5.ID is not null)
--loop
SET #currentRow = #currentRow+1
END
--Return the first words of each matching phrase
SELECT OBJECT_FK, WORD_ID, PHRASE_ID FROM #returnTable
GO
--Clean up
DROP TABLE [dbo].[test_WORDS]
DROP TABLE [dbo].[test_PHRASE]
Edited solution:
This is an edit of the correct solution provided below to account for non-contiguous word IDs. Hope this helps someone as much as it did me.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = ROW_NUMBER() OVER
(PARTITION BY OBJ_FK ORDER BY WORD_ID DESC),
totalInSrc = COUNT(WORD_ID) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.rowcnt BETWEEN nw1.rowcnt AND nw1.totalInSrc
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt, nw1.totalInSrc
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Note: The final query I used in production uses indexed temp tables instead of CTEs. I also limited the length of the PHRASE_TEXT column to my needs. With these improvements, I was able to reduce my query time from over 3 minutes to 3 seconds!
Here's a solution that uses a different approach: instead of splitting the phrases into words it combines the words into phrases.
Edited: changed the rowcnt expression to using COUNT(*) OVER …, as suggested by #ErikE in the comments.
;WITH
numberedwords AS (
SELECT
OBJ_FK,
WORD_ID,
WORD_TEXT,
rowcnt = COUNT(*) OVER (PARTITION BY OBJ_FK)
FROM dbo.test_WORDS
),
phrasedwords AS (
SELECT
nw1.OBJ_FK,
nw1.WORD_ID,
nw1.WORD_TEXT,
PHRASE_TEXT = RTRIM((
SELECT [text()] = nw2.WORD_TEXT + ' '
FROM numberedwords nw2
WHERE nw1.OBJ_FK = nw2.OBJ_FK
AND nw2.WORD_ID BETWEEN nw1.WORD_ID AND nw1.rowcnt
ORDER BY nw2.OBJ_FK, nw2.WORD_ID
FOR XML PATH ('')
))
FROM numberedwords nw1
GROUP BY nw1.OBJ_FK, nw1.WORD_ID, nw1.WORD_TEXT, nw1.rowcnt
)
SELECT *
FROM phrasedwords pw
INNER JOIN test_PHRASE tp
ON LEFT(pw.PHRASE_TEXT, LEN(tp.PHRASE_TEXT)) = tp.PHRASE_TEXT
ORDER BY pw.OBJ_FK, pw.WORD_ID
Using a Split function should work.
Split Function
CREATE FUNCTION dbo.Split
(
#RowData nvarchar(2000),
#SplitOn nvarchar(5)
)
RETURNS #RtnValue table
(
Id int identity(1,1),
Data nvarchar(100)
)
AS
BEGIN
Declare #Cnt int
Set #Cnt = 1
While (Charindex(#SplitOn,#RowData)>0)
Begin
Insert Into #RtnValue (data)
Select
Data = ltrim(rtrim(Substring(#RowData,1,Charindex(#SplitOn,#RowData)-1)))
Set #RowData = Substring(#RowData,Charindex(#SplitOn,#RowData)+1,len(#RowData))
Set #Cnt = #Cnt + 1
End
Insert Into #RtnValue (data)
Select Data = ltrim(rtrim(#RowData))
Return
END
SQL Statement
SELECT DISTINCT p.*
FROM dbo.test_PHRASE p
LEFT OUTER JOIN (
SELECT p.ID
FROM dbo.test_PHRASE p
CROSS APPLY dbo.Split(p.PHRASE_TEXT, ' ') sp
LEFT OUTER JOIN dbo.test_WORDS w ON w.WORD_TEXT = sp.Data
WHERE w.OBJ_FK IS NULL
) ignore ON ignore.ID = p.ID
WHERE ignore.ID IS NULL
This performs a little better than other solutions given. if you don't need WORD_ID, just WORD_TEXT, you can remove a whole column. I know this was over a year ago, but I wonder if you can get 3 seconds down to 30 ms? :)
If this query seems good, then my biggest speed advice is to put the entire phrases into a separate table (using your example data, it would have only 2 rows with phrases of length 8 words and 4 words).
SELECT
W.OBJ_FK,
X.Phrase,
P.*,
Left(P.PHRASE_TEXT,
IsNull(NullIf(CharIndex(' ', P.PHRASE_TEXT), 0) - 1, 2147483647)
) WORD_TEXT,
Len(Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', ' ' + X.Phrase) - 1))
- Len(Replace(
Left(X.Phrase, PatIndex('%' + P.PHRASE_TEXT + '%', X.Phrase) - 1), ' ', '')
)
WORD_ID
FROM
(SELECT DISTINCT OBJ_FK FROM dbo.test_WORDS) W
CROSS APPLY (
SELECT RTrim((SELECT WORD_TEXT + ' '
FROM dbo.test_WORDS W2
WHERE W.OBJ_FK = W2.OBJ_FK
ORDER BY W2.WORD_ID
FOR XML PATH (''))) Phrase
) X
INNER JOIN dbo.test_PHRASE P
ON X.Phrase LIKE '%' + P.PHRASE_TEXT + '%';
Here's another version for curiosity's sake. It doesn't perform quite as well.
WITH Calc AS (
SELECT
P.ID,
P.PHRASE_TEXT,
W.OBJ_FK,
W.WORD_ID StartID,
W.WORD_TEXT StartText,
W.WORD_ID,
Len(W.WORD_TEXT) + 2 NextPos,
Convert(varchar(150), W.WORD_TEXT) MatchingPhrase
FROM
dbo.test_PHRASE P
INNER JOIN dbo.test_WORDS W
ON P.PHRASE_TEXT + ' ' LIKE W.WORD_TEXT + ' %'
UNION ALL
SELECT
C.ID,
C.PHRASE_TEXT,
C.OBJ_FK,
C.StartID,
C.StartText,
W.WORD_ID,
C.NextPos + Len(W.WORD_TEXT) + 1,
Convert(varchar(150), C.MatchingPhrase + Coalesce(' ' + W.WORD_TEXT, ''))
FROM
Calc C
INNER JOIN dbo.test_WORDS W
ON C.OBJ_FK = W.OBJ_FK
AND C.WORD_ID + 1 = W.WORD_ID
AND Substring(C.PHRASE_TEXT, C.NextPos, 2147483647) + ' ' LIKE W.WORD_TEXT + ' %'
)
SELECT C.OBJ_FK, C.PHRASE_TEXT, C.StartID, C.StartText, C.ID
FROM Calc C
WHERE C.PHRASE_TEXT = C.MatchingPhrase;