Existing SQL Server 2008 script improvement - sql

SQL Server 2008
I have two tables with OrderIds and ItemIds. I need a resulting table with each OrderId from forst table linked with OrderId from second table where the number of identical ItemIds is maximum.
I did a script that does this using two loops but if the number of OrderIds in those tables is big (~1000) it means the loop has to be run 1000x1000 times, which might be too long. Ca this be achieved in a better way?
See my below my already written script:
drop table #Match, #OrderRec, #OrderSent
create table #Match(
OrderIdRec int NULL,
OrderIdSent int NULL)
create table #OrderRec(
OrderIdRec int NOT NULL,
ItemId int NULL)
create table #OrderSent(
OrderIdSent int NOT NULL,
ItemId int NULL)
insert #OrderRec values (1, 1)
insert #OrderRec values (1, 5)
insert #OrderRec values (1, 7)
insert #OrderRec values (1, 4)
insert #OrderRec values (1, 15)
insert #OrderRec values (1, 10)
insert #OrderRec values (2, 21)
insert #OrderRec values (2, 15)
insert #OrderRec values (2, 21)
insert #OrderRec values (2, 26)
insert #OrderRec values (5, 4)
insert #OrderRec values (5, 3)
insert #OrderRec values (5, 12)
insert #OrderRec values (5, 1)
insert #OrderSent values (121, 1)
insert #OrderSent values (121, 2)
insert #OrderSent values (121, 5)
insert #OrderSent values (121, 10)
insert #OrderSent values (121, 9)
insert #OrderSent values (122, 6)
insert #OrderSent values (122, 7)
insert #OrderSent values (122, 9)
insert #OrderSent values (122, 11)
insert #OrderSent values (142, 1)
insert #OrderSent values (142, 12)
insert #OrderSent values (142, 4)
insert #OrderSent values (142, 11)
set nocount on
declare #OrderIdRec int,
#OrderIdSent int,
#cnt numeric(10),
#cnt_max numeric(10),
#OrderIdSentMax int
select #OrderIdRec = MIN(OrderIdRec)
from #OrderRec
while ISNULL(#OrderIdRec,0) > 0
begin
select #OrderIdSent = MIN(OrderIdSent)
from #OrderSent
set #cnt_max = 0
set #OrderIdSentMax = NULL
while ISNULL(#OrderIdSent,0) > 0
begin
set #cnt = 0
select #cnt = COUNT(*)
from #OrderRec r
inner join #OrderSent t
on t.ItemId = r.ItemId
where r.OrderIdRec = #OrderIdRec
and t.OrderIdSent = #OrderIdSent
if isnull(#cnt, 0) > #cnt_max
begin
set #cnt_max = #cnt
set #OrderIdSentMax = #OrderIdSent
end
select #OrderIdSent = MIN(OrderIdSent)
from #OrderSent
where OrderIdSent > #OrderIdSent
end
insert #Match(
OrderIdRec,
OrderIdSent)
values (#OrderIdRec, #OrderIdSentMax)
select #OrderIdRec = MIN(OrderIdRec)
from #OrderRec
where OrderIdRec > #OrderIdRec
end
select *
from #Match
order by OrderIdRec
The actual script starts with set nocount on, what is before is just to create a set of data to play with.
The result is:
OrderIdRec OrderIdSent
1 121
2 NULL
5 142

;WITH s AS
(
SELECT OrderIdRec, OrderIdSent,
rn = ROW_NUMBER() OVER (PARTITION BY OrderIdRec ORDER BY c DESC)
FROM
(
SELECT r.OrderIdRec, s.OrderIdSent,
c = COUNT(*) OVER (PARTITION BY r.OrderIdRec, s.OrderIdSent)
FROM #OrderRec AS r
INNER JOIN #OrderSent AS s
ON r.ItemId = s.ItemId
) AS s2
),
d AS (SELECT OrderIdRec FROM #OrderRec GROUP BY OrderIdRec)
SELECT d.OrderIdRec, s.OrderIdSent
FROM d LEFT OUTER JOIN s
ON d.OrderIdRec = s.OrderIdRec AND s.rn = 1
ORDER BY d.OrderIdRec;

The following query gets the counts for all pairs between the two tables:
select orec.OrderId, osent.OrderId, count(*) as cnt
from OrderRec orec join
OrderSent osent
on orec.itemId = osent.itemId
group by orec.OrderId, osent.OrderId;
The following gets the highest cnt value for each orec.OrderId:
select oo.*
from (select orec.OrderId, osent.OrderId, count(*) as cnt,
row_number() over (partition by orec.OrderId, osent.OrderId order by count(*) desc
) as seqnum
from OrderRec orec join
OrderSent osent
on orec.itemId = osent.itemId
group by orec.OrderId, osent.OrderId
) oo
where seqnum = 1;

Related

Get all employees that belong to exact same location list as the passed in employee

I have a table called EmployeeLocationAssn:
CREATE TABLE EmployeeLocationAssn (
[EmployeeLocationAssnId] [int] IDENTITY(1,1) NOT NULL,
[EmployeeId] [int] NOT NULL,
[LocationId] [int] NOT NULL
)
This table contains data for employees and their associated locations.
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (1, 1)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (1, 2)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (2, 1)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (2, 2)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (3, 1)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (3, 2)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (4, 1)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (4, 2)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (4, 3)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (4, 4)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (5, 3)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (5, 4)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (6, 1)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (6, 2)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (6, 3)
INSERT INTO EmployeeLocationAssn (EmployeeId, LocationId) VALUES (6, 4)
I want to get all employees that have the exactly the same location list as passed in employee id.
Example:
If the user passes EmployeeId = 1, then the query should return all employees that have the same locations.
Output:
#EmployeeId = 1
1
2
3
Employees 4 and 6 has locations 1, 2, 3 & 4. It doesn't exactly match with location 1 & 2 that Employee 1 has and Employee 5 has a completely different location list (3, 4).
#EmployeeId = 4
4
6
Employees 1, 2, and 3 has locations 1 & 2. It doesn't exactly match with locations 1, 2, 3 & 4 that Employee 4 has and Employee 5 has a partial location list (3, 4). Only Employee 4 & 6 has the same location list (1, 2, 3, 4).
#EmployeeId = 5
5
Employees 1, 2, and 3 has locations 1 & 2. It doesn't exactly match with locations 3 & 4 that Employee 5 has and Employee 4 & 6 has a bigger location list (1, 2, 3, 4).
I started writing a query but got all confused, here is what I have which of course is not correct.
DECLARE #EmployeeId int = 1
Select ELA.EmployeeId, ELA.LocationId from EmployeeLocationAssn ELA
Where not exists
(Select ELA.LocationId from EmployeeLocationAssn ELA2 where ELA2.EmployeeId = #EmployeeId
EXCEPT
Select ELA.LocationId from EmployeeLocationAssn ELA3 where ELA3.EmployeeId = ELA.EmployeeId)
and ELA.EmployeeId <> #EmployeeId;
You can use string_agg (if using SQL Server 2017+) to compare the Employees:
declare #EmployeeId int = 1;
with cte as (
select E.EmployeeId
, string_agg(E.LocationId,',') within group (order by LocationId asc) LocationGroup
from EmployeeLocationAssn E
group by E.EmployeeId
)
select EmployeeId
from cte
where LocationGroup = (select LocationGroup from cte where EmployeeId = #EmployeeId);
Or for xml path if using a lower version:
declare #EmployeeId int = 1;
with cte as (
select E.EmployeeId,
substring(
(
select ',' + convert(varchar(12),E1.LocationId) as [text()]
from #EmployeeLocationAssn E1
where E1.EmployeeId = E.EmployeeId
order by E1.LocationId
for xml path ('')
), 2, 1000) LocationGroup
from #EmployeeLocationAssn E
group by E.EmployeeId
)
select EmployeeId
from cte
where LocationGroup = (select LocationGroup from cte where EmployeeId = #EmployeeId);
dbfiddle
Here's an alternative query that works for your criteria, checking the Locations match those of the selected employee, are not in those not used by the selected employee, and the number of locations match.
declare #EmployeeId int=1
;with x as (
select locationid, Count(*) over() Qty
from EmployeeLocationAssn
where employeeid=#EmployeeId group by LocationId
)
select distinct EmployeeId
from x join EmployeeLocationAssn e on e.LocationId=x.LocationId
where e.employeeid not in (
select EmployeeId
from EmployeeLocationAssn e2
where LocationId not in (select locationId from x)
)
and x.qty=(select Count(*) from EmployeeLocationAssn e3 where e3.EmployeeId=e.EmployeeId)

SQL Server : SELECT query to get DISTINCT and MAX display order value

I have a product table, Category table, and Mapping table. Category saved as a category tree. If a single product has mapped with the last category in a hierarchy of level three. All the levels saved in the mapping table with the same product id.
eg : Assume there is category tre like this Electronic>LapTops>DELL and when product id = 1 assigned to category 'DELL' mapping will save as [1,Electronic],[1,LapTops],[1,DELL]
When I get data with a select query all the category levels appear with the same product Id.
My problem is I need to retrieve data as [productId, ProductName, LastCategortLevel, CategoryName, CategoryId].
Refer actual result below. I just need to pick the highlighted product with the last category level which is the highest category order level.
I can't use another stored procedure or function because it's a small part of a large stored procedure.
The actual database tables are very big. But I have tried to implement the same scenario with small temp tables. see the below queries.
DECLARE #Products TABLE (ProductId INT NOT NULL)
INSERT INTO #Products(ProductId)
SELECT ProductId
FROM (VALUES (1), (2), (3), (4)) as x (ProductId)
DECLARE #Categories TABLE (CategoId INT NOT NULL,
Name VARCHAR(MAX) NOT NULL,
ParentCategoryId INT NOT NULL,
DisplayOrder INT NOT NULL)
-- 1st category tree
INSERT INTO #Categories VALUES (10, 'Electronic', 0, 1)
INSERT INTO #Categories VALUES (11, 'LapTops', 10, 2)
INSERT INTO #Categories VALUES (12, 'DELL', 11, 3)
INSERT INTO #Categories VALUES (13, 'HP', 11, 3)
-- 2st category tree
INSERT INTO #Categories VALUES (14, 'Clothes', 0, 1)
INSERT INTO #Categories VALUES (15, 'T-Shirts', 14, 2)
INSERT INTO #Categories VALUES (16, 'Red', 15, 3)
INSERT INTO #Categories VALUES (17, 'Denim', 14, 2)
INSERT INTO #Categories VALUES (18, 'Levise', 17, 3)
DECLARE #Product_Category_Mappings TABLE(MappingId INT NOT NULL,
ProductId INT NOT NULL,
CategoryId INT NOT NULL)
INSERT INTO #Product_Category_Mappings VALUES (100, 1, 10)
INSERT INTO #Product_Category_Mappings VALUES (101, 1, 11)
INSERT INTO #Product_Category_Mappings VALUES (102, 1, 12)
INSERT INTO #Product_Category_Mappings VALUES (103, 2, 10)
INSERT INTO #Product_Category_Mappings VALUES (104, 2, 11)
INSERT INTO #Product_Category_Mappings VALUES (105, 2, 12)
INSERT INTO #Product_Category_Mappings VALUES (106, 3, 14)
INSERT INTO #Product_Category_Mappings VALUES (107, 3, 15)
INSERT INTO #Product_Category_Mappings VALUES (108, 3, 16)
INSERT INTO #Product_Category_Mappings VALUES (109, 4, 14)
INSERT INTO #Product_Category_Mappings VALUES (110, 4, 17)
INSERT INTO #Product_Category_Mappings VALUES (111, 4, 18)
SELECT *
FROM #Products P
INNER JOIN #Product_Category_Mappings M ON M.ProductId = P.ProductId
INNER JOIN #Categories C ON C.CategoId = M.CategoryId
WHERE M.ProductId = P.ProductId
ORDER BY P.ProductId, C.DisplayOrder
Result of the above script. How I get highlighted rows?
For each ProductId, you want the row with highest DisplayOrder. You can use window functions:
SELECT *
FROM (
SELECT *, ROW_NUMBER() OVER(PARTITION BY P.ProductId ORDER BY C.DisplayOrder DESC) rn
FROM #Products P
INNER JOIN #Product_Category_Mappings M ON M.ProductId = P.ProductId
INNER JOIN #Categories C ON C.CategoId = M.CategoryId
WHERE M.ProductId = P.ProductId
) t
WHERE rn = 1
ORDER BY P.ProductId, C.DisplayOrder

SQL Server: stored procedure using recursive CTE finding values matching a total

I need to find within a stored procedure which values match a wanted total following valex's solution recursive query in SQL Server
The following works pretty well assuming the CTE anchor recordset is very small
CREATE TABLE #t ([id] INT, [num] FLOAT);
DECLARE #wanted FLOAT = 100000
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
/* when you add more records the query becomes too slow, remove this comment
to test*/
/*,(18,10000),(19,78000),(20,10000),(21,10000),(22,80000),(23,19000),
(24,8000),(25,5000),(26,10000),(27,4000),(28,46000),(29,48000),(30,20000),
(31,10000),(32,25000),(33,10000),(34,13000),(35,16000),(36,10000),
(37,5000), 38,5000),(39,30000),(40,15000),(41,10000)*/
;
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
WITH CTE AS
(
SELECT
id, num AS CSum,
CAST(id AS VARCHAR(MAX)) AS path
FROM
#t
WHERE num <= #wanted
UNION ALL
SELECT
#t.id, #t.num + CTE.CSum AS CSum,
CTE.path + ',' + CAST(#t.id AS VARCHAR(MAX)) AS path
FROM
#T
INNER JOIN
CTE ON #T.num + CTE.CSum <= #wanted AND CTE.id < #T.id
WHERE
#T.num + CTE.CSum <= #wanted
)
SELECT TOP 1 Path
FROM CTE
WHERE CTE.CSum = #wanted
ORDER BY id
DROP TABLE #t
It will return 3,4 which are the first 2 rows whose [num] values gives the #wanted total.
This works reasonably fast when there are just a few records in the temp table #t but when you remove the comment and all remaining records (from id 17 to id 41) the query just takes forever because the CTE grows exponentially.
Is there a way to speed up the code? i just need the first matching total (the list anchor dataset is ordered so a result like 3,4 is better than 8,20,22)
What if you took an iterative approach? This would be pretty simple to give the ability to stop as soon as a solution is found.
This was put together quickly, so you may can optimize further. I tested for your example (ran in less than 1 second) and several other combinations and levels of depth.
Result Depth Total IdList NumList
------ ----------- ----------- ---------- -------------
Found 1 100000 3,4 53000,47000
Full Code:
-- Configuration
DECLARE #wanted FLOAT = 100000
DECLARE #MaxDepth INT = 10 -- Customize how many levels you want to look
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
IF OBJECT_ID('tempdb..#T') IS NULL BEGIN
CREATE TABLE #T (Id INT, Num INT)
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
END
-- Setup processing table
IF OBJECT_ID('tempdb..#U') IS NOT NULL DROP TABLE #U
CREATE TABLE #U (
MaxId INT,
Total INT,
IdList VARCHAR(MAX),
NumList VARCHAR(MAX)
)
-- Initial population from source table
INSERT #U
SELECT Id, Num,
CONVERT(VARCHAR(10), Id),
CONVERT(VARCHAR(10), Num)
FROM #T
-- Iterative approach
DECLARE #Depth INT = 0
WHILE NOT EXISTS (SELECT * FROM #U WHERE Total = #wanted) BEGIN
-- Increment depth
SET #Depth = #Depth + 1
IF #Depth >= #MaxDepth BEGIN
PRINT 'Max depth reached'
RETURN -- Stop processing further
END
-- Calculate sum for this depth
IF OBJECT_ID('tempdb..#V') IS NOT NULL
DROP TABLE #V
SELECT
T.Id AS MaxId,
U.Total + T.Num AS Total,
U.IdList + ',' + CONVERT(VARCHAR(10), T.Id) AS IdList,
U.NumList + ',' + CONVERT(VARCHAR(10), T.Num) AS NumList
INTO #V
FROM #U U
INNER JOIN #T T
ON U.MaxId < T.Id
-- Replace data for next iteration
TRUNCATE TABLE #U
INSERT #U
SELECT * FROM #V
-- Check if no more combinations available
IF ##ROWCOUNT = 0 BEGIN
PRINT 'All combinations tested'
RETURN -- Stop processing further
END
END
-- Return result
SELECT TOP 1 'Found' AS [Result], #Depth AS Depth, Total, IdList, NumList FROM #U WHERE Total = #wanted

Complex SQL query for inventory app

Given the following 2 tables, I need to find the warehouses that have all the parts in the right quantity to build a particular kit, or more appropriately, how many kits each can warehouse can build.
Inventory table: Warehouse, Part, and QuantityOnHand
Kit table: Kit, Part, QuantityForKit
For example: Kit1 requires 1 of Part1, 2 of Part2, and 1 of Part3. Warehouse A has 20 Part1, 5 Part2 and 3 Part3. Warehouse B has 5 Part1, 10 Part2, and no Part3.
Warehouse A can only build 2 of Kit1 because it doesn't have enough Part2 to make more than 2 kits. Warehouse B can't build any Kit1 because it doesn't have all the necessary parts.
I've got the following demo that works, but it seems really cumbersome and uses mostly table/index scans. Our inventory table is large and this just runs too slow. I'm looking for a better way to accomplish the same thing. In the demo there's an unbounded cross join, but in the actual app, it's limited to a single kit.
CREATE TABLE #warehouse
(
Warehouse CHAR(1) NOT NULL PRIMARY KEY
)
INSERT INTO #warehouse VALUES ('A')
INSERT INTO #warehouse VALUES ('B')
INSERT INTO #warehouse VALUES ('C')
INSERT INTO #warehouse VALUES ('D')
CREATE TABLE #inventory
(
Warehouse CHAR(1) NOT NULL ,
Part INT NOT NULL ,
OnHand INT NOT NULL ,
CONSTRAINT pk_inventory PRIMARY KEY CLUSTERED (Part, Warehouse)
)
INSERT INTO #inventory VALUES ('A', 1, 20)
INSERT INTO #inventory VALUES ('A', 2, 5)
INSERT INTO #inventory VALUES ('A', 3, 3)
INSERT INTO #inventory VALUES ('B', 1, 5)
INSERT INTO #inventory VALUES ('B', 2, 10)
INSERT INTO #inventory VALUES ('C', 1, 1)
INSERT INTO #inventory VALUES ('C', 3, 1)
INSERT INTO #inventory VALUES ('D', 1, 1)
INSERT INTO #inventory VALUES ('D', 2, 2)
INSERT INTO #inventory VALUES ('D', 3, 1)
CREATE TABLE #kit
(
Kit INT NOT NULL ,
Part INT NOT NULL ,
Quantity INT NOT NULL ,
CONSTRAINT pk_kit PRIMARY KEY CLUSTERED (Kit, Part)
)
INSERT INTO #kit VALUES (1, 1, 1)
INSERT INTO #kit VALUES (1, 2, 2)
INSERT INTO #kit VALUES (1, 3, 1)
INSERT INTO #kit VALUES (2, 1, 1)
INSERT INTO #kit VALUES (2, 2, 1)
-- Here's the statement I need to optimize
SELECT
Warehouse,
Kit,
MIN(Capacity) AS [Capacity]
FROM
(
SELECT
A.Warehouse,
A.Kit,
A.Part,
ISNULL(B.OnHand, 0) AS [Quantity],
ISNULL(B.OnHand, 0) / A.Quantity AS Capacity
FROM
(
SELECT *
FROM
#warehouse
CROSS JOIN
-- (SELECT * FROM
#kit
-- WHERE #kit.Kit = #Kit) K
) A
LEFT OUTER JOIN
#inventory B
ON A.Warehouse = B.Warehouse
AND A.Part = B.Part
) C
GROUP BY
Warehouse,
Kit
;
Suggestions appreciated.
Try this:
SELECT warehouse, MIN(capacity) FROM (
SELECT i.warehouse, i.onhand / k.quantity as capacity
FROM #kit k
JOIN #inventory i
ON k.part = i.part AND k.quantity <= i.onhand
WHERE k.kit = #kit) c
GROUP BY warehouse
HAVING COUNT(*) = (SELECT COUNT(*) FROM #kit WHERE kit = #kit)
sqlfiddle here

SQL CTE counting childs recursion

I'd like (using cte) to count children in table in that way to have at parent level number of all children including theirs children. Is there any sample available?
CREATE TABLE t_parent (id INT NOT NULL PRIMARY KEY, parentID INT NOT NULL)
INSERT
INTO t_parent
VALUES (1, 0)
INSERT
INTO t_parent
VALUES (2, 1)
INSERT
INTO t_parent
VALUES (3, 1)
INSERT
INTO t_parent
VALUES (4, 2)
INSERT
INTO t_parent
VALUES (5, 1)
INSERT
INTO t_parent
VALUES (6, 5)
INSERT
INTO t_parent
VALUES (7, 5);
WITH q AS
(
SELECT id, parentId
FROM t_parent
UNION ALL
SELECT p.id, p.parentID
FROM q
JOIN t_parent p
ON p.id = q.parentID
)
SELECT id, COUNT(*)
FROM q
GROUP BY
id