distance between given set and some other sets - sql server 2005 - sql

I have a table which contains the item and category ids:
create table SomeTable (
ItemId int,
CategoryId int
)
Given some category ids (Set X) I would like to determine all item ids that share at least one category id and some stats for each of these item ids:
A – Number of category ids of item id that are not in set x
B – Number of category ids shared between item id and set x
C – Number of category ids in set x but which are not associated with item id
I have written some tsql code which involves a cross join and several ctes plus left joins. It works but is fairly slow.
I am sure someone must have encountered a similar problem. I would provide the code but the above description is simplified. Thanks.

Here's a couple of ideas. (I don't know how they'll compare performance wise with what you have already. Left for you to benchmark.)
set nocount on;
-- create a sample table
declare #T table ( ItemId int identity(1,1), CategoryId int );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 200 );
insert #T values ( 200 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 500 );
insert #T values ( 500 );
insert #T values ( 500 );
insert #T values ( 600 );
insert #T values ( 700 );
insert #T values ( 800 );
insert #T values ( 800 );
insert #T values ( 800 );
insert #T values ( 900 );
-- grab some CategoryIDs to work with
declare #X table ( CategoryId int );
insert #X
select CategoryID=200 union
select CategoryID=400 union
select CategoryID=600 union
select CategoryID=800
-- A. Number of category ids of item id that are not in set x
select distinct t.CategoryID from #T t
where not exists(select 1 from #X x where t.CategoryID = x.CategoryID)
-- or, using the set difference operator
select CategoryID from #T
except
select CategoryID from #X
-- B. Number of category ids shared between item id and set x
select distinct x.CategoryID from #X x
join #T t on t.CategoryID = x.CategoryID;
-- or, using set intersection
select CategoryID from #T
intersect
select CategoryID from #X
-- C. Number of category ids in set x but which are not associated with item id
select distinct x.CategoryID from #X x
where not exists(select 1 from #T t where t.CategoryID = x.CategoryID)
-- or, using the set difference operator
select CategoryID from #X
except
select CategoryID from #T

The problem with CTE is they are run each time they are referenced and do not have constraints. Load your Set X into a temporary table with primary key on ID. Then run the same joins against the temporary and you should see big performance gain. SQL does much better when joins are based on primary keys.

Related

Insert a table variable into a temp table with multiple columns (ID, Number, etc.)

I need to insert multiple Table variables into one temp table.
One of the table variables is:
DECLARE ##TempTable_Number TABLE (Number bigint)
insert into ##TempTable_Number (Number) values ('000000000000');
insert into ##TempTable_Number (Number) values ('100000000000');
This works for inserting just one table variable
select * into ##GlobalTempTable_1 from ##TempTable_Number
I have a couple more table variables like
DECLARE ##TempTable_ID TABLE (Number int)
insert into ##TempTable_ID (ID) values ('1');
insert into ##TempTable_ID (ID) values ('12');
etc...
I tried this to insert data from multiple table variables into one TempTable:
Select * into ####GlobalTempTable_1 From ##TempTable_ID, ##TempTable_Number;
The query goes to a continuous loop...
EDIT:
One of the table variables is:
DECLARE ##TempTable_Number TABLE (Number bigint, ID int)
insert into ##gvTempTable (Number) values ('21212321332332');
insert into ##gvTempTable (Number) values ('100000000000');
insert into ##gvTempTable (ID) values ('1');
insert into ##gvTempTable (ID) values ('12');
select * into ##GlobalTempTable from ##gvTempTable;
select * from ##GlobalTempTable;
This returns a kind of a cartesian product
Use UNION ALL:
SELECT ID
INTO ##GlobalTempTable_1
FROM ##TempTable_ID
UNION ALL
SELECT Number
FROM ##TempTable_Number;
LiveDemo
Select * into ####GlobalTempTable_1 From ##TempTable_ID, ##TempTable_Number;
The query goes to a continuous loop...
It is probably not loop but very long query. Keep in mind that you do Cartesian product.
So your query is the same as:
SELECT *
INTO ##GlobalTempTable_1
FROM ##TempTable_ID
CROSS JOIN ##TempTable_Number;
And the result is NxM records where N is number of records in first table and M in the second.
Try like this,
DECLARE #TempTable TABLE (
ID INT
,Number BIGINT
)
INSERT INTO #TempTable (Number)
VALUES ('21212321332332');
INSERT INTO #TempTable (Number)
VALUES ('100000000000');
INSERT INTO #TempTable (ID)
VALUES ('1');
INSERT INTO #TempTable (ID)
VALUES ('12');
--select * into #GlobalTempTable from ##gvTempTable;
--select * from ##GlobalTempTable;
SELECT *
FROM #TempTable
SELECT A.ID
,B.Number
FROM (
SELECT ID
,ROW_NUMBER() OVER (
ORDER BY ID
) TempId
FROM #TempTable
WHERE id IS NOT NULL
) A
INNER JOIN (
SELECT number
,ROW_NUMBER() OVER (
ORDER BY id
) TempId
FROM #TempTable
WHERE number IS NOT NULL
) B ON A.TempId = B.TempId

Assign multiple values to Table variable in SQL

DECLARE #ID INT
SET #ID = (select top 1 USER_REQ_JOB_ID
from T8504_USER_REQ_JOB
where JOB_GRP_ID = 160
order by LST_UPDT_TS desc)
SELECT INPUT_PARM_VAL_TX
from TBL_RPT_JOB_INPUT_PARAM
where USER_REQ_JOB_ID = #ID
This returns these results:
USA
USCC
6
7
2
These five records what I get I want to assign to five different variables to use in stored procedure.
I was trying with table variable like this :
declare #CID table (
Region Char(3)
,Segment Char(3)
,MasterContractId int
,ctcid int
,templateid int)
insert into #CID (Region,Segment,MasterContractId,ctcid,templateid)
But how to insert that 5 rows here?
INSERT INTO #CID
select * from
(
select
'Temp' + convert(char(1), row_number() over (order by (select 0))) as columnName,
INPUT_PARM_VAL_TX as Value
from TBL_RPT_JOB_INPUT_PARAM where USER_REQ_JOB_ID = #ID
) d
pivot
(
max(value)
for columnname in (Temp1, Temp2, Temp3, Temp4, Temp5)
) piv;
See if this helps.
Take a look at this fiddle for an example.
Courtesy:
Add row number to this T-SQL query
Efficiently convert rows to columns in sql server
EDIT: The sql adds an extra column to generate row numbers to use it as an extra column, which is pivoted as column heading.
it's really gross, but one way you could probably do it is this (though you'll need to apply it to your case):
http://sqlfiddle.com/#!6/d41d8/21507
declare #table TABLE (value varchar(50))
INSERT INTO #table
VALUES ('first')
INSERT INTO #table
VALUES ('second')
INSERT INTO #table
VALUES (3)
INSERT INTO #table
VALUES (4)
DECLARE #temp TABLE (id int identity(1,1), value varchar(50))
INSERT INTO #temp
SELECT [value]
FROM #table t
SELECT *
FROM #temp
DECLARE #CID TABLE (Region varchar(50), cont varchar(50), another int, andAnother int)
INSERT INTO #CID
(
Region,
cont,
another,
andAnother
)
VALUES
(
(SELECT value FROM #temp WHERE id = 1), -- Region - varchar
(SELECT value FROM #temp WHERE id = 2), -- cont - varchar
(SELECT value FROM #temp WHERE id = 3), -- another - int
(SELECT value FROM #temp WHERE id = 4) -- andAnother - int
)
SELECT * FROM #cid
note that i assumed you're using mssql, you did not specify

INSERT multiple rows and OUTPUT original (source) values

I would like to INSERT multpile rows (using INSERT SELECT), and OUTPUT all the new and old IDs into a "mapping" table.
How can I get the original ID (or any source values) in the OUTPUT clause? I don't see a way to get any source values there.
Here is a minimal code example:
-- create some test data
declare #t table (id int identity, name nvarchar(max))
insert #t ([name]) values ('item 1')
insert #t ([name]) values ('another item')
-- duplicate items, storing a mapping from src ID => dest ID
declare #mapping table (srcid int, [newid] int)
insert #t ([name])
output ?????, inserted.id into #mapping-- I want to use source.ID but it's unavailable here.
select [name] from #t as source
-- show results
select * from #t
select * from #mapping
My actual scenario is more complex, so for example I cannot create a temp column on the data table in order to store a "original ID" temporarily, and I cannot uniquely identify items by anything other than the 'ID' column.
Interesting question. For your example, a possible cheat is to depend on the fact that you are doubling the number of rows. Assuming that rows are never deleted and the [id] column remains dense:
-- create some test data
declare #t table (id int identity, name nvarchar(max))
insert #t ([name]) values ('item 1')
insert #t ([name]) values ('another item')
-- duplicate items, storing a mapping from src ID => dest ID
declare #mapping table (srcid int, [newid] int)
declare #Rows as Int = ( select Count(42) from #t )
insert #t ([name])
output inserted.id - #Rows, inserted.id into #mapping
select [name] from #t as source order by source.id -- Note 'order by' clause.
-- show results
select * from #t
select * from #mapping

SQL Group By Modulo of Row Count

I have the following sample data:
Id Name Quantity
1 Red 1
2 Red 3
3 Blue 1
4 Red 1
5 Yellow 3
So for this example, there are a total of 5 Red, 1 Blue, and 3 Yellow. I am looking for a way to group them by Color, but with a maximum of 2 items per group (sorting is not important). Like so:
Name QuantityInPackage
Red 2
Red 2
Red 1
Blue 1
Yellow 2
Yellow 1
Any suggestions on how to accomplish this using T-SQL on MS-SQL 2005?
I would define a table containing sequential numbers, say 1 to 1000 and join that table (unless your database supports generating these numbers in the query like Oracle using CONNECT BY):
Table num
n
1
2
3
...
I tried the following query using Oracle (should work with TSQL too):
With summed_colors As (
Select name, Sum(quantity) quantity
From colors
Group By name
)
Select
name,
Case When n*2-1 = quantity Then 1 Else 2 End quantityInPackage
From summed_colors
Join nums On ( n*2-1 <= quantity )
Order By name, quantityInPackage Desc
and it returns
Blue 1
Red 2
Red 2
Red 1
Yellow 2
Yellow 1
You need to use a numbers table to unpivot your data to make multiple rows:
DECLARE #PackageSize AS int
SET #PackageSize = 2
DECLARE #numbers AS TABLE (Number int)
INSERT INTO #numbers
VALUES (1)
INSERT INTO #numbers
VALUES (2)
INSERT INTO #numbers
VALUES (3)
INSERT INTO #numbers
VALUES (4)
INSERT INTO #numbers
VALUES (5)
INSERT INTO #numbers
VALUES (6)
INSERT INTO #numbers
VALUES (7)
INSERT INTO #numbers
VALUES (8)
INSERT INTO #numbers
VALUES (9)
INSERT INTO #numbers
VALUES (10)
DECLARE #t AS TABLE
(
Id int
,Nm varchar(6)
,Qty int
)
INSERT INTO #t
VALUES (1, 'Red', 1)
INSERT INTO #t
VALUES (2, 'Red', 3)
INSERT INTO #t
VALUES (3, 'Blue', 1)
INSERT INTO #t
VALUES (4, 'Red', 1)
INSERT INTO #t
VALUES (5, 'Yellow', 3) ;
WITH Totals
AS (
SELECT Nm
,SUM(Qty) AS TotalQty
,SUM(Qty) / #PackageSize AS NumCompletePackages
,SUM(Qty) % #PackageSize AS PartialPackage
FROM #t
GROUP BY Nm
)
SELECT Totals.Nm
,#PackageSize AS QuantityInPackage
FROM Totals
INNER JOIN #numbers AS numbers
ON numbers.Number <= Totals.NumCompletePackages
UNION ALL
SELECT Totals.Nm
,PartialPackage AS QuantityInPackage
FROM Totals
WHERE PartialPackage <> 0
It's not grouping or modulo/division that's the hard part here, it's the fact that you need to do an aggregate (sum) and then explode the data again. There aren't actually any "Red 2" rows, you have to create them somehow.
For SQL Server 2005+, I would probably use a function do the "exploding":
CREATE FUNCTION dbo.CreateBuckets
(
#Num int,
#MaxPerGroup int
)
RETURNS TABLE
AS RETURN
WITH First_CTE AS
(
SELECT CASE
WHEN #MaxPerGroup < #Num THEN #MaxPerGroup
ELSE #Num
END AS Seed
),
Sequence_CTE AS
(
SELECT Seed AS [Current], Seed AS Total
FROM First_CTE
UNION ALL
SELECT
CASE
WHEN (Total + #MaxPerGroup) > #Num THEN (#Num - Total)
ELSE #MaxPerGroup
END,
Total + #MaxPerGroup
FROM Sequence_CTE
WHERE Total < #Num
)
SELECT [Current] AS Num
FROM Sequence_CTE
Then, in the main query, group (sum) the data first and then use the bucket function:
WITH Totals AS
(
SELECT Name, SUM(Quantity) AS Total
FROM Table
GROUP BY Name
)
SELECT Name, b.Num AS QuantityInPackage
FROM Totals
CROSS APPLY dbo.CreateBuckets(Total, 2) b
This should work for any bucket size, doesn't have to be 2 (just change the parameter).
This is very crude, but it works.
CREATE TABLE #Colors
(
Id int,
Name varchar(50),
Quantity int
)
INSERT INTO #Colors VALUES (1, 'Red', 1)
INSERT INTO #Colors VALUES (2, 'Red', 3)
INSERT INTO #Colors VALUES (3, 'Blue', 1)
INSERT INTO #Colors VALUES (4, 'Red', 1)
INSERT INTO #Colors VALUES (5, 'Yellow', 3)
INSERT INTO #Colors VALUES (6, 'Green', 2)
SELECT
Name,
SUM(Quantity) AS TotalQuantity
INTO #Summed
FROM
#Colors
GROUP BY
Name
SELECT
Name,
TotalQuantity / 2 AS RecordsWithQuantity2,
TotalQuantity % 2 AS RecordsWithQuantity1
INTO #SortOfPivot
FROM
#Summed
ORDER BY
Name
DECLARE #RowCount int
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
DECLARE #Name varchar(50)
DECLARE #TwosInsertCount int
DECLARE #OnesInsertCount int
CREATE TABLE #Result (Name varchar(50), Quantity int)
WHILE #RowCount > 0
BEGIN
SET #Name = (SELECT TOP 1 Name FROM #SortOfPivot)
SET #TwosInsertCount = (SELECT TOP 1 RecordsWithQuantity2 FROM #SortOfPivot)
SET #OnesInsertCount = (SELECT TOP 1 RecordsWithQuantity1 FROM #SortOfPivot)
WHILE #TwosInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 2)
SET #TwosInsertCount = #TwosInsertCount - 1
END
WHILE #OnesInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 1)
SET #OnesInsertCount = #OnesInsertCount - 1
END
DELETE FROM #SortOfPivot WHERE Name = #Name
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
END
SELECT * FROM #Result
DROP TABLE #Colors
DROP TABLE #Result
DROP TABLE #Summed
DROP TABLE #SortOfPivot

Find lowest common parent in recursive SQL table

Suppose I have a recursive table (e.g. employees with managers) and a list of size 0..n of ids. How can I find the lowest common parent for these ids?
For example, if my table looks like this:
Id | ParentId
---|---------
1 | NULL
2 | 1
3 | 1
4 | 2
5 | 2
6 | 3
7 | 3
8 | 7
Then the following sets of ids lead to the following results (the first one is a corner case):
[] => 1 (or NULL, doesn't really matter)
[1] => 1
[2] => 2
[1,8] => 1
[4,5] => 2
[4,6] => 1
[6,7,8] => 3
How to do this?
EDIT: Note that parent isn't the correct term in all cases. It's the lowest common node in all paths up the tree. The lowest common node can also be a node itself (for example in the case [1,8] => 1, node 1 is not a parent of node 1 but node 1 itself).
Kind regards,
Ronald
Here's one way of doing it; it uses a recursive CTE to find the ancestry of a node, and uses "CROSS APPLY" over the input values to get the common ancestry; you just change the values in #ids (table variable):
----------------------------------------- SETUP
CREATE TABLE MyData (
Id int NOT NULL,
ParentId int NULL)
INSERT MyData VALUES (1,NULL)
INSERT MyData VALUES (2,1)
INSERT MyData VALUES (3,1)
INSERT MyData VALUES (4,2)
INSERT MyData VALUES (5,2)
INSERT MyData VALUES (6,3)
INSERT MyData VALUES (7,3)
INSERT MyData VALUES (8,7)
GO
CREATE FUNCTION AncestorsUdf (#Id int)
RETURNS TABLE
AS
RETURN (
WITH Ancestors (Id, ParentId)
AS (
SELECT Id, ParentId
FROM MyData
WHERE Id = #Id
UNION ALL
SELECT md.Id, md.ParentId
FROM MyData md
INNER JOIN Ancestors a
ON md.Id = a.ParentId
)
SELECT Id FROM Ancestors
);
GO
----------------------------------------- ACTUAL QUERY
DECLARE #ids TABLE (Id int NOT NULL)
DECLARE #Count int
-- your data (perhaps via a "split" udf)
INSERT #ids VALUES (6)
INSERT #ids VALUES (7)
INSERT #ids VALUES (8)
SELECT #Count = COUNT(1) FROM #ids
;
SELECT TOP 1 a.Id
FROM #ids
CROSS APPLY AncestorsUdf(Id) AS a
GROUP BY a.Id
HAVING COUNT(1) = #Count
ORDER BY a.ID DESC
Update if the nodes aren't strictly ascending:
CREATE FUNCTION AncestorsUdf (#Id int)
RETURNS #result TABLE (Id int, [Level] int)
AS
BEGIN
WITH Ancestors (Id, ParentId, RelLevel)
AS (
SELECT Id, ParentId, 0
FROM MyData
WHERE Id = #Id
UNION ALL
SELECT md.Id, md.ParentId, a.RelLevel - 1
FROM MyData md
INNER JOIN Ancestors a
ON md.Id = a.ParentId
)
INSERT #result
SELECT Id, RelLevel FROM Ancestors
DECLARE #Min int
SELECT #Min = MIN([Level]) FROM #result
UPDATE #result SET [Level] = [Level] - #Min
RETURN
END
GO
and
SELECT TOP 1 a.Id
FROM #ids
CROSS APPLY AncestorsUdf(Id) AS a
GROUP BY a.Id, a.[Level]
HAVING COUNT(1) = #Count
ORDER BY a.[Level] DESC
After doing some thinking and some hints in the right direction from Marc's answer (thanks), I came up with another solution myself:
DECLARE #parentChild TABLE (Id INT NOT NULL, ParentId INT NULL);
INSERT INTO #parentChild VALUES (1, NULL);
INSERT INTO #parentChild VALUES (2, 1);
INSERT INTO #parentChild VALUES (3, 1);
INSERT INTO #parentChild VALUES (4, 2);
INSERT INTO #parentChild VALUES (5, 2);
INSERT INTO #parentChild VALUES (6, 3);
INSERT INTO #parentChild VALUES (7, 3);
INSERT INTO #parentChild VALUES (8, 7);
DECLARE #ids TABLE (Id INT NOT NULL);
INSERT INTO #ids VALUES (6);
INSERT INTO #ids VALUES (7);
INSERT INTO #ids VALUES (8);
DECLARE #count INT;
SELECT #count = COUNT(1) FROM #ids;
WITH Nodes(Id, ParentId, Depth) AS
(
-- Start from every node in the #ids collection.
SELECT pc.Id , pc.ParentId , 0 AS DEPTH
FROM #parentChild pc
JOIN #ids i ON pc.Id = i.Id
UNION ALL
-- Recursively find parent nodes for each starting node.
SELECT pc.Id , pc.ParentId , n.Depth - 1
FROM #parentChild pc
JOIN Nodes n ON pc.Id = n.ParentId
)
SELECT n.Id
FROM Nodes n
GROUP BY n.Id
HAVING COUNT(n.Id) = #count
ORDER BY MIN(n.Depth) DESC
It now returns the entire path from the lowest common parent to the root node but that is a matter of adding a TOP 1 to the select.