Find all intersections of all sets of ranges in PostgreSQL - sql

I'm looking for an efficient way to find all the intersections between sets of timestamp ranges. It needs to work with PostgreSQL 9.2.
Let's say the ranges represent the times when a person is available to meet. Each person may have one or more ranges of times when they are available. I want to find all the time periods when a meeting can take place (ie. during which all people are available).
This is what I've got so far. It seems to work, but I don't think it's very efficient, since it considers one person's availability at a time.
WITH RECURSIVE td AS
(
-- Test data. Returns:
-- ["2014-01-20 00:00:00","2014-01-31 00:00:00")
-- ["2014-02-01 00:00:00","2014-02-20 00:00:00")
-- ["2014-04-15 00:00:00","2014-04-20 00:00:00")
SELECT 1 AS entity_id, '2014-01-01'::timestamp AS begin_time, '2014-01-31'::timestamp AS end_time
UNION SELECT 1, '2014-02-01', '2014-02-28'
UNION SELECT 1, '2014-04-01', '2014-04-30'
UNION SELECT 2, '2014-01-15', '2014-02-20'
UNION SELECT 2, '2014-04-15', '2014-05-05'
UNION SELECT 3, '2014-01-20', '2014-04-20'
)
, ranges AS
(
-- Convert to tsrange type
SELECT entity_id, tsrange(begin_time, end_time) AS the_range
FROM td
)
, min_max AS
(
SELECT MIN(entity_id), MAX(entity_id)
FROM td
)
, inter AS
(
-- Ranges for the lowest ID
SELECT entity_id AS last_id, the_range
FROM ranges r
WHERE r.entity_id = (SELECT min FROM min_max)
UNION ALL
-- Iteratively intersect with ranges for the next higher ID
SELECT entity_id, r.the_range * i.the_range
FROM ranges r
JOIN inter i ON r.the_range && i.the_range
WHERE r.entity_id > i.last_id
AND NOT EXISTS
(
SELECT *
FROM ranges r2
WHERE r2.entity_id < r.entity_id AND r2.entity_id > i.last_id
)
)
-- Take the final set of intersections
SELECT *
FROM inter
WHERE last_id = (SELECT max FROM min_max)
ORDER BY the_range;

I created the tsrange_interception_agg aggregate
create function tsrange_interception (
internal_state tsrange, next_data_values tsrange
) returns tsrange as $$
select internal_state * next_data_values;
$$ language sql;
create aggregate tsrange_interception_agg (tsrange) (
sfunc = tsrange_interception,
stype = tsrange,
initcond = $$[-infinity, infinity]$$
);
Then this query
with td (id, begin_time, end_time) as
(
values
(1, '2014-01-01'::timestamp, '2014-01-31'::timestamp),
(1, '2014-02-01', '2014-02-28'),
(1, '2014-04-01', '2014-04-30'),
(2, '2014-01-15', '2014-02-20'),
(2, '2014-04-15', '2014-05-05'),
(3, '2014-01-20', '2014-04-20')
), ranges as (
select
id,
row_number() over(partition by id) as rn,
tsrange(begin_time, end_time) as tr
from td
), cr as (
select r0.tr tr0, r1.tr as tr1
from ranges r0 cross join ranges r1
where
r0.id < r1.id and
r0.tr && r1.tr and
r0.id = (select min(id) from td)
)
select tr0 * tsrange_interception_agg(tr1) as interseptions
from cr
group by tr0
having count(*) = (select count(distinct id) from td) - 1
;
interseptions
-----------------------------------------------
["2014-02-01 00:00:00","2014-02-20 00:00:00")
["2014-01-20 00:00:00","2014-01-31 00:00:00")
["2014-04-15 00:00:00","2014-04-20 00:00:00")

If you have a fixed number of entities you want to cross reference, you can use a cross join for each of them, and build the intersection (using the * operator on ranges).
Using a cross join like this is probably less efficient, though. The following example has more to do with explaining the more complex example below.
WITH td AS
(
SELECT 1 AS entity_id, '2014-01-01'::timestamp AS begin_time, '2014-01-31'::timestamp AS end_time
UNION SELECT 1, '2014-02-01', '2014-02-28'
UNION SELECT 1, '2014-04-01', '2014-04-30'
UNION SELECT 2, '2014-01-15', '2014-02-20'
UNION SELECT 2, '2014-04-15', '2014-05-05'
UNION SELECT 4, '2014-01-20', '2014-04-20'
)
,ranges AS
(
-- Convert to tsrange type
SELECT entity_id, tsrange(begin_time, end_time) AS the_range
FROM td
)
SELECT r1.the_range * r2.the_range * r3.the_range AS r
FROM ranges r1
CROSS JOIN ranges r2
CROSS JOIN ranges r3
WHERE r1.entity_id=1 AND r2.entity_id=2 AND r3.entity_id=4
AND NOT isempty(r1.the_range * r2.the_range * r3.the_range)
ORDER BY r
In this case a multiple cross join is probably less efficient because you don't actually need to have all the possible combinations of every range in reality, since isempty(r1.the_range * r2.the_range) is enough to make isempty(r1.the_range * r2.the_range * r3.the_range) true.
I don't think you can avoid going through each person's availability at time, since you want them all to be meet anyway.
What may help is to build the set of intersections incrementally, by cross joining each person's availability to the previous subset you've calculated using another recursive CTE (intersections in the example below). You then build the intersections incrementally and get rid of the empty ranges, both stored arrays:
WITH RECURSIVE td AS
(
SELECT 1 AS entity_id, '2014-01-01'::timestamp AS begin_time, '2014-01-31'::timestamp AS end_time
UNION SELECT 1, '2014-02-01', '2014-02-28'
UNION SELECT 1, '2014-04-01', '2014-04-30'
UNION SELECT 2, '2014-01-15', '2014-02-20'
UNION SELECT 2, '2014-04-15', '2014-05-05'
UNION SELECT 4, '2014-01-20', '2014-04-20'
)
,ranges AS
(
-- Convert to tsrange type
SELECT entity_id, tsrange(begin_time, end_time) AS the_range
FROM td
)
,ranges_arrays AS (
-- Prepare an array of all possible intervals per entity
SELECT entity_id, array_agg(the_range) AS ranges_arr
FROM ranges
GROUP BY entity_id
)
,numbered_ranges_arrays AS (
-- We'll join using pos+1 next, so we want continuous integers
-- I've changed the example entity_id from 3 to 4 to demonstrate this.
SELECT ROW_NUMBER() OVER () AS pos, entity_id, ranges_arr
FROM ranges_arrays
)
,intersections (pos, subranges) AS (
-- We start off with the infinite range.
SELECT 0::bigint, ARRAY['[,)'::tsrange]
UNION ALL
-- Then, we unnest the previous intermediate result,
-- cross join it against the array of ranges from the
-- next row in numbered_ranges_arrays (joined via pos+1).
-- We take the intersection and remove the empty array.
SELECT r.pos,
ARRAY(SELECT x * y FROM unnest(r.ranges_arr) x CROSS JOIN unnest(i.subranges) y WHERE NOT isempty(x * y))
FROM numbered_ranges_arrays r
INNER JOIN intersections i ON r.pos=i.pos+1
)
,last_intersections AS (
-- We just really want the result from the last operation (with the max pos).
SELECT subranges FROM intersections ORDER BY pos DESC LIMIT 1
)
SELECT unnest(subranges) r FROM last_intersections ORDER BY r
I'm not sure whether this is likely to perform better, unfortunately. You'd probably need a larger dataset to have meaningful benchmarks.

OK, I wrote and tested this in TSQL but it should run or at least be close enough for you to translate back, it's all fairly vanilla constructs. Except maybe the between, but that can be broken into a < clause and a > clause. (thanks #Horse)
WITH cteSched AS ( --Schedule for everyone
-- Test data. Returns:
-- ["2014-01-20 00:00:00","2014-01-31 00:00:00")
-- ["2014-02-01 00:00:00","2014-02-20 00:00:00")
-- ["2014-04-15 00:00:00","2014-04-20 00:00:00")
SELECT 1 AS entity_id, '2014-01-01' AS begin_time, '2014-01-31' AS end_time
UNION SELECT 1, '2014-02-01', '2014-02-28'
UNION SELECT 1, '2014-04-01', '2014-04-30'
UNION SELECT 2, '2014-01-15', '2014-02-20'
UNION SELECT 2, '2014-04-15', '2014-05-05'
UNION SELECT 3, '2014-01-20', '2014-04-20'
), cteReq as ( --List of people to schedule (or is everyone in Sched required? Not clear, doesn't hurt)
SELECT 1 as entity_id UNION SELECT 2 UNION SELECT 3
), cteBegins as (
SELECT distinct begin_time FROM cteSched as T
WHERE NOT EXISTS (SELECT entity_id FROM cteReq as R
WHERE NOT EXISTS (SELECT * FROM cteSched as X
WHERE X.entity_id = R.entity_id
AND T.begin_time BETWEEN X.begin_time AND X.end_time ))
) SELECT B.begin_time, MIN(S.end_time ) as end_time
FROM cteBegins as B cross join cteSched as S
WHERE B.begin_time between S.begin_time and S.end_time
GROUP BY B.begin_time
-- NOTE: This assume users do not have schedules that overlap with themselves! That is, nothing like
-- John is available 2014-01-01 to 2014-01-15 and 2014-01-10 to 2014-01-20.
EDIT: Add output from above (when executed on SQL-Server 2008R2)
begin_time end_time
2014-01-20 2014-01-31
2014-02-01 2014-02-20
2014-04-15 2014-04-20

Related

BigQuery recursively join based on links between 2 ID columns

Given a table representing a many-many join between IDs like the following:
WITH t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
)
SELECT * FROM t
id_1
id_2
1
a
2
a
2
b
3
b
4
c
5
c
6
d
6
e
7
f
I would like to be able recursively join then aggregate rows in order to find each disconnected sub-graph represented by these links - that is each collection of IDs that are linked together:
The desired output for the example above would look something like this:
id_1_coll
id_2_coll
1, 2, 3
a, b
4, 5
c
6
d, e
7
f
where each row contains all the other IDs one could reach following the links in the table.
Note that 1 links to b even although there is no explicit link row because we can follow the path 1 --> a --> 2 --> b using the links in the first 3 rows.
One potential approach is to remodel the relationships between id_1 and id_2 such that we get all the links from id_1 to itself then use a recursive common table expression to traverse all the possible paths between id_1 values then aggregate (somewhat arbitrarily) to the lowest such value that can be reached from each id_1.
Explanation
Our steps are
Remodel the relationship into a series of self-joins for id_1
Map each id_1 to the lowest id_1 that it is linked to via a recursive CTE
Aggregate the recursive CTE using the lowest id_1s as the GROUP BY column and grabbing all the linked id_1 and id_2 values via the ARRAY_AGG() function
We can use something like this to remodel the relationships into a self join (1.):
SELECT
a.id_1, a.id_2, b.id_1 AS linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
Next - to set up the recursive table expression (2.) we can tweak the query above to also give us the lowest (LEAST) of the values for id_1 at each link then use this as the base iteration:
WITH RECURSIVE base_iter AS (
SELECT
a.id_1, b.id_1 AS linked_id, LEAST(a.id_1, b.id_1) AS lowest_linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
)
We can also grab the lowest id_1 value at this time:
id_1
linked_id
lowest_linked_id
1
2
1
2
1
1
2
3
2
3
2
2
4
5
4
5
4
4
For our recursive loop, we want to maintain an ARRAY of linked ids and join each new iteration such that the id_1 value of the n+1th iteration is equal to the linked_id value of the nth iteration AND the nth linked_id value is not in the array of previously linked ids.
We can code this as follows:
recursive_loop AS (
SELECT id_1, linked_id, lowest_linked_id, [linked_id ] AS linked_ids
FROM base_iter
UNION ALL
SELECT
prev_iter.id_1, prev_iter.linked_id,
iter.lowest_linked_id,
ARRAY_CONCAT(iter.linked_ids, [prev_iter.linked_id])
FROM base_iter AS prev_iter
JOIN recursive_loop AS iter
ON iter.id_1 = prev_iter.linked_id
AND iter.lowest_linked_id < prev_iter.lowest_linked_id
AND prev_iter.linked_id NOT IN UNNEST(iter.linked_ids )
)
Giving us the following results:
|id_1|linked_id|lowest_linked_id|linked_ids|
|----|---------|------------|---|
|3|2|1|[1,2]|
|2|3|1|[1,2,3]|
|4|5|4|[5]|
|1|2|1|[2]|
|5|4|4|[4]|
|2|3|2|[3]|
|2|1|1|[1]|
|3|2|2|[2]|
which we can now link back to the original table for the id_2 values then aggregate (3.) as shown in the complete query below
Solution
WITH RECURSIVE t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
),
base_iter AS (
SELECT
a.id_1, b.id_1 AS linked_id, LEAST(a.id_1, b.id_1) AS lowest_linked_id
FROM t as a
INNER JOIN t as b
ON a.id_2 = b.id_2
WHERE a.id_1 != b.id_1
),
recursive_loop AS (
SELECT id_1, linked_id, lowest_linked_id, [linked_id ] AS linked_ids
FROM base_iter
UNION ALL
SELECT
prev_iter.id_1, prev_iter.linked_id,
iter.lowest_linked_id,
ARRAY_CONCAT(iter.linked_ids, [prev_iter.linked_id])
FROM base_iter AS prev_iter
JOIN recursive_loop AS iter
ON iter.id_1 = prev_iter.linked_id
AND iter.lowest_linked_id < prev_iter.lowest_linked_id
AND prev_iter.linked_id NOT IN UNNEST(iter.linked_ids )
),
link_back AS (
SELECT
t.id_1, IFNULL(lowest_linked_id, t.id_1) AS lowest_linked_id, t.id_2
FROM t
LEFT JOIN recursive_loop
ON t.id_1 = recursive_loop.id_1
),
by_id_1 AS (
SELECT
id_1,
MIN(lowest_linked_id) AS grp
FROM link_back
GROUP BY 1
),
by_id_2 AS (
SELECT
id_2,
MIN(lowest_linked_id) AS grp
FROM link_back
GROUP BY 1
),
result AS (
SELECT
by_id_1.grp,
ARRAY_AGG(DISTINCT id_1 ORDER BY id_1) AS id1_coll,
ARRAY_AGG(DISTINCT id_2 ORDER BY id_2) AS id2_coll,
FROM
by_id_1
INNER JOIN by_id_2
ON by_id_1.grp = by_id_2.grp
GROUP BY grp
)
SELECT grp, TO_JSON(id1_coll) AS id1_coll, TO_JSON(id2_coll) AS id2_coll
FROM result ORDER BY grp
Giving us the required output:
grp
id1_coll
id2_coll
1
[1,2,3]
[a,b]
4
[4,5]
[c]
6
[6]
[d,e]
7
[7]
[f]
Limitations/Issues
Unfortunately this approach is inneficient (we have to traverse every single pathway before aggregating it back together) and fails with the real-world case where we have several million join rows. When trying to execute on this data BigQuery runs up a huge "Slot time consumed" then eventually errors out with:
Resources exceeded during query execution: Your project or organization exceeded the maximum disk and memory limit available for shuffle operations. Consider provisioning more slots, reducing query concurrency, or using more efficient logic in this job.
I hope there might be a better way of doing the recursive join such that pathways can be merged/aggregated as we go (if we have an id_1 value AND a linked_id in already in the list of linked_ids we dont need to check it further).
Using ROW_NUMBER() the query is as the follow:
WITH RECURSIVE
t AS (
SELECT 1 AS id_1, 'a' AS id_2,
UNION ALL SELECT 2, 'a'
UNION ALL SELECT 2, 'b'
UNION ALL SELECT 3, 'b'
UNION ALL SELECT 4, 'c'
UNION ALL SELECT 5, 'c'
UNION ALL SELECT 6, 'd'
UNION ALL SELECT 6, 'e'
UNION ALL SELECT 7, 'f'
),
t1 AS (
SELECT ROW_NUMBER() OVER(ORDER BY t.id_1) n, t.id_1, t.id_2 FROM t
),
t2 AS (
SELECT n, [n] n_arr, [id_1] arr_1, [id_2] arr_2, id_1, id_2 FROM t1
WHERE n IN (SELECT MIN(n) FROM t1 GROUP BY id_1)
UNION ALL
SELECT t2.n, ARRAY_CONCAT(t2.n_arr, [t1.n]),
CASE WHEN t1.id_1 NOT IN UNNEST(t2.arr_1)
THEN ARRAY_CONCAT(t2.arr_1, [t1.id_1])
ELSE t2.arr_1 END,
CASE WHEN t1.id_2 NOT IN UNNEST(t2.arr_2)
THEN ARRAY_CONCAT(t2.arr_2, [t1.id_2])
ELSE t2.arr_2 END,
t1.id_1, t1.id_2
FROM t2 JOIN t1 ON
t2.n < t1.n AND
t1.n NOT IN UNNEST(t2.n_arr) AND
(t2.id_1 = t1.id_1 OR t2.id_2 = t1.id_2) AND
(t1.id_1 NOT IN UNNEST(t2.arr_1) OR t1.id_2 NOT IN UNNEST(t2.arr_2))
),
t3 AS (
SELECT
n,
ARRAY_AGG(DISTINCT id_1 ORDER BY id_1) arr_1,
ARRAY_AGG(DISTINCT id_2 ORDER BY id_2) arr_2
FROM t2
WHERE n IN (SELECT MIN(n) FROM t2 GROUP BY id_1)
GROUP BY n
)
SELECT n, TO_JSON(arr_1), TO_JSON(arr_2) FROM t3 ORDER BY n
t1 : Append with row numbers.
t2 : Extract rows matching either id_1 or id_2 by recursive query.
t3 : Make arrays from id_1 and id_2 with ARRAY_AGG().
However, it may not help your Limitations/Issues.
The way this question is phrased makes it appear you want "show me distinct groups from a presorted list, unchained to a previous group". For that, something like this should suffice (assuming auto-incrementing order/one or both id's move to the next value):
SELECT GrpNr,
STRING_AGG(DISTINCT CAST(id_1 as STRING), ',') as id_1_coll,
STRING_AGG(DISTINCT CAST(id_2 as STRING), ',') as id_2_coll
FROM
(
SELECT id_1, id_2,
SUM(CASE WHEN a.id_1 <> a.previous_id_1 and a.id_2 <> a.previous_id_2 THEN 1 ELSE 0 END)
OVER (ORDER BY RowNr) as GrpNr
FROM
(
SELECT *,
ROW_NUMBER() OVER () as RowNr,
LAG(t.id_1, 1) OVER (ORDER BY 1) AS previous_id_1,
LAG(t.id_2, 1) OVER (ORDER BY 1) AS previous_id_2
FROM t
) a
ORDER BY RowNr
) a
GROUP BY GrpNr
ORDER BY GrpNr
I don't think this is the question you mean to ask. This seems to be a graph-walking problem as referenced in the other answers, and in the response from #GordonLinoff to the question here, which I tested (and presume works for BigQuery).
This can also be done using sequential updates as done by #RomanPekar
here (which I also tested). The main consideration seems to be performance. I'd assume dbms have gotten better at recursion since this was posted.
Rolling it up in either case should be fairly easy using String_Agg() as given above or as you have.
I'd be curious to see a more accurate representation of the data. If there is some consistency to how the data is stored/limitations to levels of nesting/other group structures there may be a shortcut approach other than recursion or iterative updates.

Single-column row-set exists in another table or a function returns positive value

I have following table structure: http://sqlfiddle.com/#!4/952e7/1
Now I am looking for a solution for the following problem:
Given an input data-time set (see below). And the SQL statement should return all of business IDs with a given business name, where every single date-times of the input set are either present in the ORDERS table or an additional function's statement is true (these both conditions are separately to be checked for each input date-time).
An example how the input date-time dataset looks like:
WITH DATES_TO_CHECK(DATETIME) AS(SELECT DATE '2021-01-03' FROM DUAL UNION ALL SELECT DATE '2020-04-08' FROM DUAL UNION ALL SELECT DATE '2020-05-07' FROM DUAL)
To be simple, the "additional function" should be a simple random number (if greather than 0.5 than true otherwise false, so the check is dbms_random.value > 0.5).
For one given date time it would look like:
SELECT BN.NAME, BD.ID
FROM BUSINESS_DATA BD, BUSINESS_NAME BN
WHERE BD.NAME_ID=BN.ID AND
BN.NAME='B1' AND
(TO_DATE('2021-01-03', 'YYYY-MM-DD') IN (SELECT OD.ORDERDATE FROM ORDERS OD WHERE OD.BUSINESS_ID=BD.ID)
OR dbms_random.value > 0.5)
ORDER BY BD.ID
Please help me, how this solution can be applied to the input date-time rowset above AND the specified name.
I don't any difference with the question you just deleted
This is the list of businesses named B1 and for which the number of order dates that match date input dates is equal to the number of input dates or dbms_random.value > 0.5
see SQL Fiddle
WITH DATES_TO_CHECK(DATETIME) AS(
SELECT DATE '2021-01-03' FROM DUAL
UNION ALL SELECT DATE '2020-04-08' fROM DUAL
UNION ALL SELECT DATE '2020-05-07' fROM DUAL
),
businesses_that_match as (
select
od.BUSINESS_ID, count(distinct OD.ORDERDATE)
from DATES_TO_CHECK dtc
left join ORDERS od on OD.ORDERDATE = dtc.datetime
group by od.BUSINESS_ID
having count(distinct OD.ORDERDATE) = (select count(distinct DATETIME) from DATES_TO_CHECK)
)
SELECT
BN.NAME, BD.ID
FROM BUSINESS_DATA BD
inner join BUSINESS_NAME BN on BD.NAME_ID=BN.ID
left join businesses_that_match btm on btm.BUSINESS_ID = bd.id
where bn.name = 'B1'
AND (btm.BUSINESS_ID is not null
OR dbms_random.value > 0.5
)

SQL hierarchy count totals report

I'm creating a report with SQL server 2012 and Report Builder which must show the total number of Risks at a high, medium and low level for each Parent Element.
Each Element contains a number of Risks which are rated at a certain level. I need the total for the Parent Elements. The total will include the number of all the Child Elements and also the number the Element itself may have.
I am using CTEs in my query- the code I have attached isn't working (there are no errors - it's just displaying the incorrect results) and I'm not sure that my logic is correct??
Hopefully someone can help. Thanks in advance.
My table structure is:
ElementTable
ElementTableId(PK) ElementName ElementParentId
RiskTable
RiskId(PK) RiskName RiskRating ElementId(FK)
My query:
WITH cte_Hierarchy(ElementId, ElementName, Generation, ParentElementId)
AS (SELECT ElementId,
NAME,
0,
ParentElementId
FROM Extract.Element AS FirtGeneration
WHERE ParentElementId IS NULL
UNION ALL
SELECT NextGeneration.ElementId,
NextGeneration.NAME,
Parent.Generation + 1,
Parent.ElementId
FROM Extract.Element AS NextGeneration
INNER JOIN cte_Hierarchy AS Parent
ON NextGeneration.ParentElementId = Parent.ElementId),
CTE_HighRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS HighRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'High'
GROUP BY r.ElementId),
CTE_LowRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS LowRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Low'
GROUP BY r.ElementId),
CTE_MedRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS MedRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Medium'
GROUP BY r.ElementId)
SELECT rd.ElementId,
rd.ElementName,
rd.ParentElementId,
Generation,
HighRisk,
MedRisk,
LowRisk
FROM cte_Hierarchy rd
LEFT OUTER JOIN CTE_HighRisk h
ON rd.ElementId = h.ElementId
LEFT OUTER JOIN CTE_MedRisk m
ON rd.ElementId = m.ElementId
LEFT OUTER JOIN CTE_LowRisk l
ON rd.ElementId = l.ElementId
WHERE Generation = 1
Edit:
Sample Data
ElementTableId(PK) -- ElementName -- ElementParentId
1 ------------------- Main --------------0
2 --------------------Element1-----------1
3 --------------------Element2 ----------1
4 --------------------SubElement1 -------2
RiskId(PK) RiskName RiskRating ElementId(FK)
a -------- Financial -- High ----- 2
b -------- HR --------- High ----- 3
c -------- Marketing -- Low ------- 2
d -------- Safety -----Medium ----- 4
Sample Output:
Element Name High Medium Low
Main ---------- 2 ---- 1 -------1
Here is your sample tables
SELECT * INTO #TABLE1
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
)TAB
SELECT * INTO #TABLE2
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
)TAB
We are finding the children of a parent, its count of High,Medium and Low and use cross join to show parent with all the combinations of its children's High,Medium and Low
UPDATE
The below variable can be used to access the records dynamically.
DECLARE #ElementTableId INT;
--SET #ElementTableId = 1
And use the above variable inside the query
;WITH CTE1 AS
(
SELECT *,0 [LEVEL] FROM #TABLE1 WHERE ElementTableId = #ElementTableId
UNION ALL
SELECT E.*,e2.[LEVEL]+1 FROM #TABLE1 e
INNER JOIN CTE1 e2 on e.ElementParentId = e2.ElementTableId
AND E.ElementTableId<>#ElementTableId
)
,CTE2 AS
(
SELECT E1.*,E2.*,COUNT(RiskRating) OVER(PARTITION BY RiskRating) CNT
from CTE1 E1
LEFT JOIN #TABLE2 E2 ON E1.ElementTableId=E2.ElementId
)
,CTE3 AS
(
SELECT DISTINCT T1.ElementName,C2.RiskRating,C2.CNT
FROM #TABLE1 T1
CROSS JOIN CTE2 C2
WHERE T1.ElementTableId = #ElementTableId
)
SELECT *
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE
RESULT
UPDATE 2
I am updating as per your new requirement
Here is sample table in which I have added extra data to test
SELECT * INTO #ElementTable
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
UNION ALL
SELECT 5, 'Main 2',0
UNION ALL
SELECT 6, 'Element21',5
UNION ALL
SELECT 7, 'SubElement21',6
UNION ALL
SELECT 8, 'SubElement22',7
UNION ALL
SELECT 9, 'SubElement23',7
)TAB
SELECT * INTO #RiskTable
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
UNION ALL
SELECT 'e' , 'Fincancial' ,'High' ,5
UNION ALL
SELECT 'f','HR','High',6
UNION ALL
SELECT 'g','HR','High',6
UNION ALL
SELECT 'h', 'Marketing','Low',7
UNION ALL
SELECT 'i', 'Safety','Medium',8
UNION ALL
SELECT 'j', 'Safety','High',8
)TAB
I have written the logic in query
;WITH CTE1 AS
(
-- Here you will find the level of every elements in the table
SELECT *,0 [LEVEL]
FROM #ElementTable WHERE ElementParentId = 0
UNION ALL
SELECT ET.*,CTE1.[LEVEL]+1
FROM #ElementTable ET
INNER JOIN CTE1 on ET.ElementParentId = CTE1.ElementTableId
)
,CTE2 AS
(
-- Filters the level and find the major parant of each child
-- ie, 100->150->200, here the main parent of 200 is 100
SELECT *,CTE1.ElementTableId MajorParentID,CTE1.ElementName MajorParentName
FROM CTE1 WHERE [LEVEL]=1
UNION ALL
SELECT CTE1.*,CTE2.MajorParentID,CTE2.MajorParentName
FROM CTE1
INNER JOIN CTE2 on CTE1.ElementParentId = CTE2.ElementTableId
)
,CTE3 AS
(
-- Since each child have columns for main parent id and name,
-- you will get the count of each element corresponding to the level you have selected directly
SELECT DISTINCT CTE2.MajorParentName,RT.RiskRating ,
COUNT(RiskRating) OVER(PARTITION BY MajorParentID,RiskRating) CNT
FROM CTE2
JOIN #RiskTable RT ON CTE2.ElementTableId=RT.ElementId
)
SELECT MajorParentName, ISNULL([High],0)[High], ISNULL([Medium],0)[Medium],ISNULL([Low],0)[Low]
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE

Sum of working days with date ranges from multiple records (overlapping)

suppose there are records as follows:
Employee_id, work_start_date, work_end_date
1, 01-jan-2014, 07-jan-2014
1, 03-jan-2014, 12-jan-2014
1, 23-jan-2014, 25-jan-2014
2, 15-jan-2014, 25-jan-2014
2, 07-jan-2014, 15-jan-2014
2, 09-jan-2014, 12-jan-2014
The requirement is to write an SQL select statment which would summarize the work days grouped by employee_id, but exclude the overlapped periods (meaning - take them into calculation only once).
The desired output would be:
Employee_id, worked_days
1, 13
2, 18
The calculations for working days in the date range are done like this:
If work_start_date = 5 and work_end_date = 9 then worked_days = 4 (9 - 5).
I could write a pl/sql function which solves this (manually iterating over the records and doing the calculation), but I'm sure it can be done using SQL for better performance.
Can someone please point me in the right direction?
Thanks!
This is a slightly modified query from similar question:
compute sum of values associated with overlapping date ranges
SELECT "Employee_id",
SUM( "work_end_date" - "work_start_date" )
FROM(
SELECT "Employee_id",
"work_start_date" ,
lead( "work_start_date" )
over (Partition by "Employee_id"
Order by "Employee_id", "work_start_date" )
As "work_end_date"
FROM (
SELECT "Employee_id", "work_start_date"
FROM Table1
UNION
SELECT "Employee_id","work_end_date"
FROM Table1
) x
) x
WHERE EXISTS (
SELECT 1 FROM Table1 t
WHERE t."work_start_date" > x."work_end_date"
AND t."work_end_date" > x."work_start_date"
OR t."work_start_date" = x."work_start_date"
AND t."work_end_date" = x."work_end_date"
)
GROUP BY "Employee_id"
;
Demo: http://sqlfiddle.com/#!4/4fcce/2
This is a tricky problem. For instance, you can't use lag(), because the overlapping period may not be the "previous" one. Or different periods can start and or stop on the same day.
The idea is to reconstruct the periods. How to do this? Find the records where the periods start -- that is, there is no overlap with any other. Then use this as a flag and count this flag cumulatively to count overlapping groups. Then getting the working days is just aggregation from there:
with ps as (
select e.*,
(case when exists (select 1
from emps e2
where e2.employee_id = e.employee_id and
e2.work_start_date <= e.work_start_date and
e2.work_end_date >= e.work_end_date
)
then 0 else 1
) as IsPeriodStart
from emps e
)
select employee_id, sum(work_end_date - work_start_date) as Days_Worked
from (select employee_id, min(work_start_date) as work_start_date,
max(work_end_date) as work_end_date
from (select ps.*,
sum(IsPeriod_Start) over (partition by employee_id
order by work_start_date
) as grp
from ps
) ps
group by employee_id, grp
) ps
group by employee_id;
date_tbl type
create or replace package RG_TYPE is
type date_tbl is table of date;
end;
function (result as a table with the dates between 2 parameters)
create or replace function dates
(
p_from date,
p_to date
) return rg_type.date_tbl pipelined
is
l_idx date:=p_from;
begin
loop
if l_idx>nvl(p_to,p_from) then
exit;
end if;
pipe row(l_idx);
l_idx:=l_idx+1;
end loop;
return;
end;
SQL:
select employee_id,sum(c)
from
(select e.employee_id,d.column_value,count(distinct w.employee_id) as c
from (select distinct employee_id from works) e,
table(dates((select min(work_start_date) as a from works),(select max(work_end_date) as b from works))) d,
works w
where e.employee_id=w.employee_id
and d.column_value>=w.work_start_date
and d.column_value<w.work_end_date
group by e.employee_id,d.column_value) Sub
group by employee_id
order by 1,2

Make a CTE from result of others CTE

I have several joined CTE. Something like:
;With CT1 AS(SELECT ..)
, CT2 AS(select)
SELECT *.T1,*T2 FROM CT1 T1 INNER JOIN CT2 T2 WHERE (some Condition ) GROUP BY (F1,F2, etc)
Now I need to join the result of this query to another CTE. What’s the best way? Can I make a CTE with the result of this Query? Any help would be greatly appreciated.
You can keep creating new CTEs based on previously defined ones. They may joined or otherwise combined, subject to the rules for CTEs.
; with
ArabicRomanConversions as (
select *
from ( values
( 0, '', '', '', '' ), ( 1, 'I', 'X', 'C', 'M' ), ( 2, 'II', 'XX', 'CC', 'MM' ), ( 3, 'III', 'XXX', 'CCC', 'MMM' ), ( 4, 'IV', 'XL', 'CD', '?' ),
( 5, 'V', 'L', 'D', '?' ), ( 6, 'VI', 'LX', 'DC', '?' ), ( 7, 'VII', 'LXX', 'DCC', '?' ), ( 8, 'VIII', 'LXXX', 'DCCC', '?' ), ( 9, 'IX', 'XC', 'CM', '?' )
) as Placeholder ( Arabic, Ones, Tens, Hundreds, Thousands )
),
Numbers as (
select 1 as Number
union all
select Number + 1
from Numbers
where Number < 3999 ),
ArabicAndRoman as (
select Number as Arabic,
( select Thousands from ArabicRomanConversions where Arabic = Number / 1000 ) +
( select Hundreds from ArabicRomanConversions where Arabic = Number / 100 % 10 ) +
( select Tens from ArabicRomanConversions where Arabic = Number / 10 % 10 ) +
( select Ones from ArabicRomanConversions where Arabic = Number % 10 ) as Roman
from Numbers ),
Squares as (
select L.Arabic, L.Roman, R.Arabic as Square, R.Roman as RomanSquare
from ArabicAndRoman as L inner join
ArabicAndRoman as R on R.Arabic = L.Arabic * L.Arabic
where L.Arabic < 16 ),
Cubes as (
select S.Arabic, S.Roman, S.Square, S.RomanSquare, A.Arabic as Cube, A.Roman as RomanCube
from Squares as S inner join
ArabicAndRoman as A on A.Arabic = S.Square * S.Arabic )
select *
from Cubes
order by Arabic
option ( MaxRecursion 3998 )
This is a format I have used a few times where a temp table is used to buffer one complex CTE which is output and then used again from temp with a second CTE.
It is useful if you need 2 result sets or if the complete CTE as one massive statement causes speed issues (breaking it up can be a huge performance improvement in some cases)
-- I do this "DROP" because in some cases where query is executed over and
-- over sometimes the object is not cleared before next transaction.
BEGIN TRY DROP TABLE #T_A END TRY BEGIN CATCH END CATCH;
WITH A AS (
SELECT 'A' AS Name, 1 as Value
UNION ALL SELECT 'B', 2
)
SELECT *
INTO #T_A
FROM A;
SELECT *
FROM #T_A ; -- Generate First Output Table
WITH B AS (
SELECT 'A' AS Name, 234 as Other
UNION ALL SELECT 'B', 456
)
-- Generate second result set from Temp table.
SELECT B.*, A. Value
FROM B JOIN #T_A AS A ON A.Name=B.Name
This produces a 2 table result set. Which is also handy in .NET filling a DataSet.