Related
Trying to convert a row-by-row percentage calculation query to dynamic by using window function over a partition by column. Not sure this is the right way please suggest.
create table qdetails
(
qcode int,
qcode_detail_01 int,
qcode_detail_02 int
);
insert into qdetails(qcode, qcode_detail_01, qcode_detail_02)
values (25, 999, 56),
(95, 999, 67),
(96, 999, 68),
(21, 888, 56),
(22, 888, 67),
(26, 888, 68);
create table qmaster
(
qcode int,
qtype text,
qvalue int
);
insert into qmaster (qcode, qtype, qvalue)
values
(25, 'XYZ', 25),
(95, 'XYZ', 34),
(96, 'XYZ', 17),
(99, 'XYZ', 6),
(91, 'XYZ', 4),
(92, 'XYZ', 14),
(21, 'ABC', 7),
(22, 'ABC', 23),
(23, 'ABC', 11),
(24, 'ABC', 6),
(24, 'ABC', 4),
(26, 'ABC', 14);
For these table structures I have the following code; I'm trying to do this in a single query without repeating this for each row. This is on SQLite but that should not matter.
select
a.qcode_detail_01,
b.qtype,
a.qcode,
b.qvalue,
/* calculating % for one qtype at a time manually*/
sum(cast(b.qvalue as float))/(select sum(cast(b.qvalue as float)) from qmaster b where b.qtype = "XYZ") as 'Percentage'
from
qdetails a,
qmaster b
where
b.qtype = "XYZ"
and a.qcode = b.qcode
group by
a.qcode, a.qcode_detail_01;
/*repeat for ABC*/
select
a.qcode_detail_01,
b.qtype,
a.qcode,
b.qvalue,
/* calculating % for one qtype at a time manually*/
sum(cast(b.qvalue as float))/(select sum(cast(b.qvalue as float)) from qmaster b where b.qtype = "ABC") as 'Percentage'
from
qdetails a,
qmaster b
where
b.qtype = "ABC"
and a.qcode = b.qcode
group by
a.qcode, a.qcode_detail_01;
The part that is not working is this:
/*Avoid Repetition by doing this dynamically using Window Function */
select
a.qcode_detail_01,
b.qtype,
a.qcode,
b.qvalue,
cast(b.qvalue as float)/sum(cast(b.qvalue as float)) OVER (PARTITION BY b.qtype) as 'Percentage'
from qdetails a,
qmaster b
and a.qcode = b.qcode
group by a.qcode,a.qcode_detail_01;
Here is a SQLFiddle for the same; please advise.
For the sample data above, i'm looking to get the following result with a single query
qcode_detail_01 qtype qcode qvalue Percentage
999 XYZ 25 25 0.25
999 XYZ 95 34 0.34
999 XYZ 96 17 0.17
888 ABC 21 7 0.1076923076923077
888 ABC 22 23 0.35384615384615387
888 ABC 26 14 0.2153846153846154
The simplest way to do this is to join a CTE that returns the total values for each qtype:
WITH cte AS (SELECT qtype, SUM(qvalue) total_value FROM qmaster GROUP BY qtype)
SELECT d.qcode_detail_01, m.qtype, d.qcode, m.qvalue,
SUM(CAST(m.qvalue AS FLOAT)) / c.total_value Percentage
FROM qdetails d
INNER JOIN qmaster m ON d.qcode = m.qcode
INNER JOIN cte c ON c.qtype = m.qtype
GROUP BY d.qcode, d.qcode_detail_01, m.qtype
ORDER BY d.qcode_detail_01;
Another way to do it, with a LEFT join and window function SUM():
SELECT *
FROM (
SELECT DISTINCT d.qcode_detail_01, m.qtype, d.qcode, m.qvalue,
SUM(CAST(m.qvalue AS FLOAT)) /
SUM(SUM(CAST(m.qvalue AS FLOAT))) OVER (PARTITION BY m.qtype) AS Percentage
FROM qmaster m LEFT JOIN qdetails d
ON d.qcode = m.qcode
GROUP BY m.qcode, d.qcode_detail_01, m.qtype
)
WHERE qcode_detail_01 IS NOT NULL;
See the demo.
Also, use proper joins with ON clauses.
I got 2 table like it
table 1
id cost
1 200
2 300
3 500
4 700
NULL NULL
NULL NULL
table 2
1 200
2 300
3 500
4 700
5 1000
6 2500
and I did it :
sum(coalesce(table1.cost,table2.cost))
my aim is to got
200+300+500+700+1000+2500= 5200
If I do
sum(coalesce(table1.cost,table2.cost))
I got
200+300+500+700+1000+2500= 5200
OR I GOT IT ?
200+300+500+700+200+300+500+700+1000+2500= 6900
Thanks for answer, I'm curious to know how to have only 5200 and not 6900
I use snowflake SQL
Please share you queries to have more understanding of the problem.
With the mere data provided, I think you are looking for Full Outer Join as you need to include data from both tables. I have built a solution in SQL server using table variables. Both the queries gives the same result as 5200.
DECLARE #table1 table(Id INT NULL, Cost INT NULL)
DECLARE #table2 table(Id INT NULL, Cost INT NULL)
INSERT INTO #table1(Id, Cost)
VALUES(1, 200), (2, 300), (3, 500), (4, 700), (NULL, NULL), (NULL, NULL)
INSERT INTO #table2(Id, Cost)
VALUES(1, 200), (2, 300), (3, 500), (4, 700), (5, 1000), (6, 2500)
SELECT
SUM(COALESCE(t1.Cost, t2.Cost))
FROM
#table1 t1
FULL OUTER JOIN
#table2 t2
ON t1.Id = t2.Id
SELECT
SUM(COALESCE(t2.Cost, t1.Cost))
FROM
#table1 t1
FULL OUTER JOIN
#table2 t2
ON t1.Id = t2.Id
I need to find within a stored procedure which values match a wanted total following valex's solution recursive query in SQL Server
The following works pretty well assuming the CTE anchor recordset is very small
CREATE TABLE #t ([id] INT, [num] FLOAT);
DECLARE #wanted FLOAT = 100000
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
/* when you add more records the query becomes too slow, remove this comment
to test*/
/*,(18,10000),(19,78000),(20,10000),(21,10000),(22,80000),(23,19000),
(24,8000),(25,5000),(26,10000),(27,4000),(28,46000),(29,48000),(30,20000),
(31,10000),(32,25000),(33,10000),(34,13000),(35,16000),(36,10000),
(37,5000), 38,5000),(39,30000),(40,15000),(41,10000)*/
;
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
WITH CTE AS
(
SELECT
id, num AS CSum,
CAST(id AS VARCHAR(MAX)) AS path
FROM
#t
WHERE num <= #wanted
UNION ALL
SELECT
#t.id, #t.num + CTE.CSum AS CSum,
CTE.path + ',' + CAST(#t.id AS VARCHAR(MAX)) AS path
FROM
#T
INNER JOIN
CTE ON #T.num + CTE.CSum <= #wanted AND CTE.id < #T.id
WHERE
#T.num + CTE.CSum <= #wanted
)
SELECT TOP 1 Path
FROM CTE
WHERE CTE.CSum = #wanted
ORDER BY id
DROP TABLE #t
It will return 3,4 which are the first 2 rows whose [num] values gives the #wanted total.
This works reasonably fast when there are just a few records in the temp table #t but when you remove the comment and all remaining records (from id 17 to id 41) the query just takes forever because the CTE grows exponentially.
Is there a way to speed up the code? i just need the first matching total (the list anchor dataset is ordered so a result like 3,4 is better than 8,20,22)
What if you took an iterative approach? This would be pretty simple to give the ability to stop as soon as a solution is found.
This was put together quickly, so you may can optimize further. I tested for your example (ran in less than 1 second) and several other combinations and levels of depth.
Result Depth Total IdList NumList
------ ----------- ----------- ---------- -------------
Found 1 100000 3,4 53000,47000
Full Code:
-- Configuration
DECLARE #wanted FLOAT = 100000
DECLARE #MaxDepth INT = 10 -- Customize how many levels you want to look
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
IF OBJECT_ID('tempdb..#T') IS NULL BEGIN
CREATE TABLE #T (Id INT, Num INT)
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
END
-- Setup processing table
IF OBJECT_ID('tempdb..#U') IS NOT NULL DROP TABLE #U
CREATE TABLE #U (
MaxId INT,
Total INT,
IdList VARCHAR(MAX),
NumList VARCHAR(MAX)
)
-- Initial population from source table
INSERT #U
SELECT Id, Num,
CONVERT(VARCHAR(10), Id),
CONVERT(VARCHAR(10), Num)
FROM #T
-- Iterative approach
DECLARE #Depth INT = 0
WHILE NOT EXISTS (SELECT * FROM #U WHERE Total = #wanted) BEGIN
-- Increment depth
SET #Depth = #Depth + 1
IF #Depth >= #MaxDepth BEGIN
PRINT 'Max depth reached'
RETURN -- Stop processing further
END
-- Calculate sum for this depth
IF OBJECT_ID('tempdb..#V') IS NOT NULL
DROP TABLE #V
SELECT
T.Id AS MaxId,
U.Total + T.Num AS Total,
U.IdList + ',' + CONVERT(VARCHAR(10), T.Id) AS IdList,
U.NumList + ',' + CONVERT(VARCHAR(10), T.Num) AS NumList
INTO #V
FROM #U U
INNER JOIN #T T
ON U.MaxId < T.Id
-- Replace data for next iteration
TRUNCATE TABLE #U
INSERT #U
SELECT * FROM #V
-- Check if no more combinations available
IF ##ROWCOUNT = 0 BEGIN
PRINT 'All combinations tested'
RETURN -- Stop processing further
END
END
-- Return result
SELECT TOP 1 'Found' AS [Result], #Depth AS Depth, Total, IdList, NumList FROM #U WHERE Total = #wanted
I hope you can help me.
I need to display the records in HH_Solution_Audit table -- if 2 or more staffs enter the room within 10 minutes. Here are the requirements:
Display only the events that have a timestamp (LAST_UPDATED) interval of less than or equal to 10 minutes. Therefore, I must compare the current row to the next row and previous row to check if their DATEDIFF is less than or equal to 10 minutes. I’m done with this part.
Show only the records if the number of distinct STAFF_GUID inside the room for less than or equal to 10 minutes is at least 2.
HH_Solution_Audit Table Details:
ID - PK
STAFF_GUID - staff id
LAST_UPDATED - datetime when a staff enters a room
Here's what I got so far. This satisfies requirement # 1 only.
CREATE TABLE HH_Solution_Audit (
ID INT PRIMARY KEY,
STAFF_GUID NVARCHAR(1),
LAST_UPDATED DATETIME
)
GO
INSERT INTO HH_Solution_Audit VALUES (1, 'b', '2013-04-25 9:01')
INSERT INTO HH_Solution_Audit VALUES (2, 'b', '2013-04-25 9:04')
INSERT INTO HH_Solution_Audit VALUES (3, 'b', '2013-04-25 9:13')
INSERT INTO HH_Solution_Audit VALUES (4, 'a', '2013-04-25 10:15')
INSERT INTO HH_Solution_Audit VALUES (5, 'a', '2013-04-25 10:30')
INSERT INTO HH_Solution_Audit VALUES (6, 'a', '2013-04-25 10:33')
INSERT INTO HH_Solution_Audit VALUES (7, 'a', '2013-04-25 10:41')
INSERT INTO HH_Solution_Audit VALUES (8, 'a', '2013-04-25 11:02')
INSERT INTO HH_Solution_Audit VALUES (9, 'a', '2013-04-25 11:30')
INSERT INTO HH_Solution_Audit VALUES (10, 'a', '2013-04-25 11:45')
INSERT INTO HH_Solution_Audit VALUES (11, 'a', '2013-04-25 11:46')
INSERT INTO HH_Solution_Audit VALUES (12, 'a', '2013-04-25 11:51')
INSERT INTO HH_Solution_Audit VALUES (13, 'a', '2013-04-25 12:24')
INSERT INTO HH_Solution_Audit VALUES (14, 'b', '2013-04-25 12:27')
INSERT INTO HH_Solution_Audit VALUES (15, 'b', '2013-04-25 13:35')
DECLARE #numOfPeople INT = 2,
--minimum number of people that must be inside
--the room for #lengthOfStay minutes
#lengthOfStay INT = 10,
--number of minutes of stay
#dateFrom DATETIME = '04/25/2013 00:00',
#dateTo DATETIME = '04/25/2013 23:59';
WITH cteSource AS
(
SELECT ID, STAFF_GUID, LAST_UPDATED,
ROW_NUMBER() OVER (ORDER BY LAST_UPDATED) AS row_num
FROM HH_SOLUTION_AUDIT
WHERE LAST_UPDATED >= #dateFrom AND LAST_UPDATED <= #dateTo
)
SELECT [current].ID, [current].STAFF_GUID, [current].LAST_UPDATED
FROM
cteSource AS [current]
LEFT OUTER JOIN
cteSource AS [previous] ON [current].row_num = [previous].row_num + 1
LEFT OUTER JOIN
cteSource AS [next] ON [current].row_num = [next].row_num - 1
WHERE
DATEDIFF(MINUTE, [previous].LAST_UPDATED, [current].LAST_UPDATED)
<= #lengthOfStay
OR
DATEDIFF(MINUTE, [current].LAST_UPDATED, [next].LAST_UPDATED)
<= #lengthOfStay
ORDER BY [current].ID, [current].LAST_UPDATED
Running the query returns IDs:
1, 2, 3, 5, 6, 7, 10, 11, 12, 13, 14
That satisfies requirement # 1 of having less than or equal to 10 minutes interval between the previous row, current row and next row.
Can you help me with the 2nd requirement? If it's applied, the returned IDs should only be:
13, 14
Here's an idea. You don't need ROW_NUMBER and previous and next records. You just need to queries unioned - one looking for everyone that have someone checked X minutes behind, and another looking for X minutes upfront. Each uses a correlated sub-query and COUNT(*) to find number of matching people. If number is greater then your #numOfPeople - that's it.
EDIT: new version: Instead of doing two queries with 10 minutes upfront and behind, we'll only check for 10 minutes behind - selecting those that match in cteLastOnes. After that will go in another part of query to search for those that actually exist within those 10 minutes. Ultimately again making union of them and the 'last ones'
WITH cteSource AS
(
SELECT ID, STAFF_GUID, LAST_UPDATED
FROM HH_SOLUTION_AUDIT
WHERE LAST_UPDATED >= #dateFrom AND LAST_UPDATED <= #dateTo
)
,cteLastOnes AS
(
SELECT * FROM cteSource c1
WHERE #numOfPeople -1 <= (SELECT COUNT(DISTINCT STAFF_GUID)
FROM cteSource c2
WHERE DATEADD(MI,#lengthOfStay,c2.LAST_UPDATED) > c1.LAST_UPDATED
AND C2.LAST_UPDATED <= C1.LAST_UPDATED
AND c1.STAFF_GUID <> c2.STAFF_GUID)
)
SELECT * FROM cteLastOnes
UNION
SELECT * FROM cteSource s
WHERE EXISTS (SELECT * FROM cteLastOnes l
WHERE DATEADD(MI,#lengthOfStay,s.LAST_UPDATED) > l.LAST_UPDATED
AND s.LAST_UPDATED <= l.LAST_UPDATED
AND s.STAFF_GUID <> l.STAFF_GUID)
SQLFiddle DEMO - new version
SQLFiddle DEMO - old version
Based on the following table
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
4 10 B
5 4 B
6 1 B
7 10 C
8 3 C
9 30 C
I want to check if the total effort against a name is less than 40 then add a row with effort = 40 - (Total Effort) for the name. The ID of the new row can be anything. If the total effort is greater than 40 then trucate the data for one of the rows to make it 40.
So after applying the logic above table will be
ID Effort Name
-------------------------
1 1 A
2 1 A
3 8 A
10 30 A
4 10 B
5 4 B
6 1 B
11 25 B
7 10 C
8 3 C
9 27 C
I was thinking of opening a cursor, keeping a counter of the total effort, and based on the logic insert existing and new rows in another temporary table.
I am not sure if this is an efficient way to deal with this. I would like to learn if there is a better way.
I think the first part could be done this way:
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) < 40)
The second part is harder. Perhaps you could do something like this instead?
INSERT INTO tbl(Effort, Name)
SELECT 40 - SUM(Effort), Name
FROM tbl
GROUP BY Name
HAVING SUM(Effort) <> 40)
What this does is, rather than making changes to your actual data, adds a row with a negative number for the Name if the total effort is > 40 hours, or a positive value if it is < 40 hours. This seems much safer for your data integrity than messing with the original values.
In SQL Server 2008, this may be done with a single MERGE statement:
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A'),
(2, 1, 'A'),
(3, 8, 'A'),
(4, 10, 'B'),
(5, 4, 'B'),
(6, 1, 'B'),
(7, 10, 'C'),
(8, 3, 'C'),
(9, 30, 'C'),
(10, 60, 'C')
SELECT *
FROM #efforts
ORDER BY
name, id
;WITH total AS
( SELECT *
FROM #efforts e
UNION ALL
SELECT ROW_NUMBER() OVER(ORDER BY name) +
(
SELECT MAX(id)
FROM #efforts
),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
),
source AS
(
SELECT *,
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id <= e.id
) AS ce,
COALESCE(
(
SELECT SUM(effort)
FROM total ep
WHERE ep.name = e.name
AND ep.id < e.id
), 0) AS cp
FROM total e
)
MERGE
INTO #efforts e
USING source s
ON e.id = s.id
WHEN MATCHED AND 40 BETWEEN cp AND ce THEN
UPDATE
SET e.effort = s.effort + 40 - ce
WHEN MATCHED AND cp > 40 THEN
DELETE
WHEN NOT MATCHED BY TARGET THEN
INSERT (id, effort, name)
VALUES (id, effort, name);
SELECT *
FROM #efforts
ORDER BY
name, id
In SQL Server 2005, you'll need two statements (in one transaction):
DECLARE #efforts TABLE (id INT NOT NULL PRIMARY KEY, effort INT NOT NULL, name CHAR(1))
INSERT
INTO #efforts
VALUES (1, 1, 'A')
INSERT
INTO #efforts
VALUES (2, 1, 'A')
INSERT
INTO #efforts
VALUES (3, 8, 'A')
INSERT
INTO #efforts
VALUES (4, 10, 'B')
INSERT
INTO #efforts
VALUES (5, 4, 'B')
INSERT
INTO #efforts
VALUES (6, 1, 'B')
INSERT
INTO #efforts
VALUES (7, 10, 'C')
INSERT
INTO #efforts
VALUES (8, 3, 'C')
INSERT
INTO #efforts
VALUES (9, 30, 'C')
INSERT
INTO #efforts
VALUES (10, 60, 'C')
;WITH total AS
(
SELECT *,
COALESCE(
(
SELECT SUM(effort)
FROM #efforts ep
WHERE ep.name = e.name
AND ep.id <= e.id
), 0) AS cp
FROM #efforts e
)
DELETE
FROM total
WHERE cp > 40
INSERT
INTO #efforts
SELECT (
SELECT MAX(id)
FROM #efforts
) +
ROW_NUMBER() OVER (ORDER BY name),
40 - SUM(effort),
name
FROM #efforts
GROUP BY
name
HAVING SUM(effort) < 40
SELECT *
FROM #efforts
ORDER BY
name, id
This will give you the names that need modify:
SELECT Name, SUM(Effort)
FROM Table
GROUP BY Name
HAVING SUM(Effort) < 40
Select this into a temp table, Add a column for 40 - SUM, then create an insert statement from that. Much better than a cursor.
This will do the first part:
Insert Into dbo.Test (Name, Effort)
Select t.Name, 40 - SUM(t.Effort)
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) < 40
And this will do the second part:
Update a
Set a.Effort = a.Effort - b.AmountToDeduct
From dbo.Test a
Join (
Select t.Name, (40 - SUM(t.Effort)) as 'AmountToDeduct'
From dbo.Test t
Group By t.Name
Having SUM(t.Effort) > 40
)b on a.Name = b.Name
Where a.ID = (Select MAX(c.ID)
From dbo.Test c
Where c.Name = a.Name
)