Recursive/hierarchical query in BigQuery - google-bigquery

I have a recursion/hierarchical problem that I'm trying to figure out in BigQuery.
I have a list of employees and each employee has a manager ID. I need to be able to enter a single Employee_ID and return an array of every person beneath them.
CREATE TABLE p_RLS.testHeirarchy
(
Employee_ID INT64,
Employee_Name STRING,
Position STRING,
Line_Manager_ID INT64
);
INSERT INTO p_RLS.testHeirarchy (Employee_ID, Employee_Name, Position, Line_Manager_ID)
VALUES(1,'Joe','Worker',11),
(2,'James','Worker',11),
(3,'Jack','Worker',11),
(4,'Jill','Worker',12),
(5,'Jan','Worker',12),
(6,'Jacquie','Worker',13),
(7,'Joaquin','Worker',14),
(8,'Jeremy','Worker',14),
(9,'Jade','Worker',15),
(10,'Jocelyn','Worker',15),
(11, 'Bob', 'Store Manager',16),
(12, 'Bill', 'Store Manager',16),
(13, 'Barb', 'Store Manager',16),
(14, 'Ben', 'Store Manager',17),
(15, 'Burt', 'Store Manager',17),
(16, 'Sally','Group Manager',18),
(17, 'Sam','Group Manager',19),
(18, 'Anna', 'Ops Manager',20),
(19, 'Amy', 'Ops Manager',20),
(20, 'Zoe', 'State Manager', NULL);
My desired output would resemble:
SELECT 20 as Employee_ID, [19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] as Reports;
SELECT 11 as Employee_ID, [3,2,1] as Reports;
SELECT 1 as Employee_ID, [] as Reports;
I have got the following working but it seems very ugly/inconvenient and doesn't support unlimited levels:
WITH test as (
SELECT L0.Employee_ID, L0.Employee_Name, L0.Position, L0.Line_Manager_ID,
ARRAY_AGG(DISTINCT L1.Employee_ID IGNORE NULLS) as Lvl1,
ARRAY_AGG(DISTINCT L2.Employee_ID IGNORE NULLS) as Lvl2,
ARRAY_AGG(DISTINCT L3.Employee_ID IGNORE NULLS) as Lvl3,
ARRAY_AGG(DISTINCT L4.Employee_ID IGNORE NULLS) as Lvl4,
ARRAY_AGG(DISTINCT L5.Employee_ID IGNORE NULLS) as Lvl5,
ARRAY_AGG(DISTINCT L6.Employee_ID IGNORE NULLS) as Lvl6,
ARRAY_AGG(DISTINCT L7.Employee_ID IGNORE NULLS) as Lvl7
FROM p_RLS.testHeirarchy as L0
LEFT OUTER JOIN p_RLS.testHeirarchy L1 ON L0.Employee_ID = L1.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L2 ON L1.Employee_ID = L2.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L3 ON L2.Employee_ID = L3.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L4 ON L3.Employee_ID = L4.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L5 ON L4.Employee_ID = L5.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L6 ON L5.Employee_ID = L6.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L7 ON L6.Employee_ID = L7.Line_Manager_ID
WHERE L0.Employee_ID = 16
GROUP BY 1,2,3,4)
SELECT
Employee_ID, ARRAY_CONCAT(
IFNULL(Lvl1,[]),
IFNULL(Lvl2,[]),
IFNULL(Lvl3,[]),
IFNULL(Lvl4,[]),
IFNULL(Lvl5,[]),
IFNULL(Lvl6,[]),
IFNULL(Lvl7,[])) as All_reports
FROM test
Is there a better way to do this? Is a recursive approach possible in BigQuery?

Recursive CTE was recently introduced !
This makes things so much easier
with recursive iterations as (
select line_manager_id, employee_id, 1 pos from your_table
union all
select b.line_manager_id, a.employee_id, pos + 1
from your_table a join iterations b
on b.employee_id = a.line_manager_id
)
select line_manager_id, string_agg('' || employee_id order by pos, employee_id desc) as reports_as_list
from iterations
where not line_manager_id is null
group by line_manager_id
order by line_manager_id desc
If applied to sample data in question - output is

Below is for BigQuery Standard SQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE initialData AS WITH input AS (
SELECT 1 Employee_ID,'Joe' Employee_Name,'Worker' Position,11 Line_Manager_ID UNION ALL
SELECT 2,'James','Worker',11 UNION ALL
SELECT 3,'Jack','Worker',11 UNION ALL
SELECT 4,'Jill','Worker',12 UNION ALL
SELECT 5,'Jan','Worker',12 UNION ALL
SELECT 6,'Jacquie','Worker',13 UNION ALL
SELECT 7,'Joaquin','Worker',14 UNION ALL
SELECT 8,'Jeremy','Worker',14 UNION ALL
SELECT 9,'Jade','Worker',15 UNION ALL
SELECT 10,'Jocelyn','Worker',15 UNION ALL
SELECT 11, 'Bob', 'Store Manager',16 UNION ALL
SELECT 12, 'Bill', 'Store Manager',16 UNION ALL
SELECT 13, 'Barb', 'Store Manager',16 UNION ALL
SELECT 14, 'Ben', 'Store Manager',17 UNION ALL
SELECT 15, 'Burt', 'Store Manager',17 UNION ALL
SELECT 16, 'Sally','Group Manager',18 UNION ALL
SELECT 17, 'Sam','Group Manager',19 UNION ALL
SELECT 18, 'Anna', 'Ops Manager',20 UNION ALL
SELECT 19, 'Amy', 'Ops Manager',20 UNION ALL
SELECT 20, 'Zoe', 'State Manager', NULL
)
SELECT * FROM input;
CREATE TEMP TABLE ttt AS
SELECT Line_Manager_ID, ARRAY_AGG(Employee_ID) Reports FROM initialData WHERE NOT Line_Manager_ID IS NULL GROUP BY Line_Manager_ID;
LOOP
SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);
CREATE OR REPLACE TEMP TABLE ttt1 AS
SELECT Line_Manager_ID, ARRAY(SELECT DISTINCT Employee_ID FROM UNNEST(Reports) Employee_ID ORDER BY Employee_ID DESC) Reports
FROM (
SELECT Line_Manager_ID, ARRAY_CONCAT_AGG(Reports) Reports
FROM (
SELECT t2.Line_Manager_ID, ARRAY_CONCAT(t1.Reports, t2.Reports) Reports
FROM ttt t1, ttt t2
WHERE (SELECT COUNTIF(t1.Line_Manager_ID = Employee_ID) FROM UNNEST(t2.Reports) Employee_ID) > 0
) GROUP BY Line_Manager_ID
);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT * FROM ttt1 UNION ALL
SELECT * FROM ttt WHERE NOT Line_Manager_ID IN (SELECT Line_Manager_ID FROM ttt1);
IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT Employee_ID,
(
SELECT STRING_AGG(CAST(Employee_ID AS STRING), ',' ORDER BY Employee_ID DESC)
FROM ttt.Reports Employee_ID
) Reports_as_list
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
with result
Row Employee_ID Reports_as_list
1 20 19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
2 19 17,15,14,10,9,8,7
3 18 16,13,12,11,6,5,4,3,2,1
4 17 15,14,10,9,8,7
5 16 13,12,11,6,5,4,3,2,1
6 15 10,9
7 14 8,7
8 13 6
9 12 5,4
10 11 3,2,1
11 10 null
12 9 null
13 8 null
14 7 null
15 6 null
16 5 null
17 4 null
18 3 null
19 2 null
20 1 null
In case if you need Reports as array - replace last statement in above script with below
SELECT Employee_ID, Reports Reports_as_array
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
Note: depends on level of nesting in your hierarchy - you might need to adjust 10 in OR run_away_stop > 10

To the question: "Is a recursive approach possible in BigQuery?"
Yes!
Now that BigQuery supports scripting and loops I solved some recursive problems from the Advent of Code with BigQuery:
https://towardsdatascience.com/advent-of-code-sql-bigquery-31e6a04964d4
CREATE TEMP TABLE planets AS SELECT 'YOU' planet;
LOOP
SET steps = steps+1
;
CREATE OR REPLACE TEMP TABLE planets AS
SELECT DISTINCT planet
FROM (
SELECT origin planet FROM t1 WHERE dest IN (SELECT planet FROM planets)
UNION ALL
SELECT dest planet FROM t1 WHERE origin IN (SELECT planet FROM planets)
)
;
IF 'SAN' IN (SELECT * FROM planets )
THEN LEAVE;
END IF;
END LOOP
;
SELECT steps-2
I would use a similar approach to navigate the graph and annotate all parent relationships.
Soon: I'll write a blog post on the specifics of tree traversal to get everyone under x. But this code will help you in the meantime.

Related

Value need from range, based on Priority in SQL ,

There is 3 cases: (In all cases value needs to be picked up based on priority)
case 1 : zip exist between range
case 2: zip does not exist between range
case 3 : overlap range
Table
Temp1
state
zip_start
zip_end
Priority
Value
NY
100
200
1
A
NY
150
250
3
c
NY
null
null
2
B
Data
state
zip
NY
201
NY
400
OUTPUT :
state
zip_start
zip_end
Priority
Value
zip
NY
null
null
2
B
201
NY
null
null
2
B
400
I am trying with below code , but It's not picking the data based on priority:
SELECT ZIP,ZIP_START,ZIP_END,VALUE,PRIORITY,STATE,IX FROM
(
SELECT TMP1.*,
ROW_NUMBER () OVER (PARTITION BY STATE,ZIP ORDER BY PRIORITY ) IX
FROM
(
WITH CASE_1 AS
( SELECT
temp1.*
,DATA.ZIP
FROM TEMP1
LEFT JOIN
"DATA" ON DATA.STATE = temp1.STATE
WHERE DATA.ZIP BETWEEN TEMP1.ZIP_START AND TEMP1 .ZIP_END
),
CASE_2 AS
(
SELECT
temp1.*
,DATA.ZIP
FROM "DATA"
LEFT JOIN
TEMP1 ON DATA.STATE = temp1.STATE
WHERE (ZIP_START IS NULL OR ZIP_START = '')
AND (ZIP_END IS NULL OR ZIP_END = '')
AND Not EXISTS
(SELECT 1 FROM CASE_1 WHERE CASE_1.zip=DATA.zip
AND CASE_1.STATE=DATA.STATE)
)
SELECT * FROM CASE_1
UNION
SELECT * FROM CASE_2
)TMP1
) TMP2
WHERE TMP2.IX = 1;
From Oracle 12, you can use a LATERAL join and filter when the zip is within range or when one-or-other end of the range is NULL the ORDER BY priority and FETCH the FIRST matched ROW ONLY:
SELECT t.*, d.zip
FROM data d
CROSS JOIN LATERAL (
SELECT *
FROM temp1 t
WHERE d.state = t.state
AND (t.zip_start <= d.zip OR t.zip_start IS NULL)
AND (t.zip_end >= d.zip OR t.zip_end IS NULL)
ORDER BY priority
FETCH FIRST ROW ONLY
) t
In earlier versions, you can join the two tables and then use the ROW_NUMBER analytic function to find the best match:
SELECT state, zip_start, zip_end, priority, value, zip
FROM (
SELECT t.*,
d.zip,
ROW_NUMBER() OVER (PARTITION BY d.ROWID ORDER BY t.priority) AS rn
FROM data d
INNER JOIN temp1 t
ON ( d.state = t.state
AND (t.zip_start <= d.zip OR t.zip_start IS NULL)
AND (t.zip_end >= d.zip OR t.zip_end IS NULL))
)
WHERE rn = 1;
Which, for the sample data:
CREATE TABLE Temp1 (state, zip_start, zip_end, Priority, Value) AS
SELECT 'NY', 100, 200, 1, 'A' FROM DUAL UNION ALL
SELECT 'NY', 150, 250, 3, 'c' FROM DUAL UNION ALL
SELECT 'NY', null, null, 2, 'B' FROM DUAL;
CREATE TABLE Data (state, zip) AS
SELECT 'NY', 201 FROM DUAL UNION ALL
SELECT 'NY', 400 FROM DUAL;
Both output:
STATE
ZIP_START
ZIP_END
PRIORITY
VALUE
ZIP
NY
null
null
2
B
201
NY
null
null
2
B
400
db<>fiddle here
CREATE TABLE TEMP1 (
STATE VARCHAR(10),
ZIP_START NUMBER,
ZIP_END NUMBER,
PRIORITY NUMBER,
VAL VARCHAR(10));
INSERT INTO TEMP1 VALUES ('NY', 100,200,1,'A');
INSERT INTO TEMP1 VALUES ('NY', 150,250,3,'C');
INSERT INTO TEMP1 VALUES ('NY', null,null,2,'B');
CREATE TABLE DATATABLE (
STATE VARCHAR(10),
ZIP NUMBER
);
INSERT INTO DATATABLE VALUES ('NY', 201);
INSERT INTO DATATABLE VALUES ('NY', 400);
The main idea is to figure out first how many times your condition (zip in range between zip_start and end) is met. This is why we use count_match variable.
Once you get if your data is priority 1,2 or 3, you match your data table with the temp table to get the value associated with that priority.
SELECT
t0.STATE,
t0.ZIP_START,
t0.ZIP_END,
CASE WHEN t0.COUNT_MATCH > 1 THEN 3
WHEN t0.COUNT_MATCH = 1 THEN 1
WHEN t0.COUNT_MATCH = 0 THEN 2 END AS PRIORITY,
t.VAL,
t0.ZIP
FROM
(
SELECT
t1.STATE,
MIN(t2.ZIP_START) AS ZIP_START,
MAX(t2.ZIP_END) AS ZIP_END,
COUNT(t2.STATE) AS COUNT_MATCH,
t1.ZIP
FROM DATATABLE t1
LEFT JOIN TEMP1 t2 ON (t1.STATE = t2.STATE AND t1.ZIP>=t2.ZIP_START AND t1.ZIP <= t2.ZIP_END)
GROUP BY
t1.STATE, t1.ZIP) t0
LEFT JOIN TEMP1 t ON (t0.STATE = t.STATE AND CASE WHEN t0.COUNT_MATCH > 1 THEN 3
WHEN t0.COUNT_MATCH = 1 THEN 1
WHEN t0.COUNT_MATCH = 0 THEN 2 END = t.PRIORITY)
;

prevent query from return duplicate results

I have a database contains information for a telecommunication company with the following table:
SUBSCRIBERS (SUB_ID , F_NAME , L_NANE , DATE_OF_BIRTH , COUNTRY)
LINES (LINE_ID , LINE_NUMBER)
SUBSCRIBERS_LINES (SUB_LINE_ID , SUB_ID "foreign key", LINE_ID "foreign key", ACTIVATION_DATE)
CALLS (CALL_ID , LINE_FROM "foreign key", LINE_TO "foreign key" , START_DATE_CALL, END_DATE_CALL)
I want to retrieve the names of top 3 subscribers who make the highest count number of calls (with duration less than 60 seconds for each call) in specific given day.
So, I write the following query :
with TEMPRESULT AS
(
select * from
(
select CALLS.LINE_FROM , count(*) totalcount
from CALLS
where (((END_DATE_CALL-START_DATE_DATE)*24*60*60)<=60 and to_char(S_DATE,'YYYY-MM-DD')='2015-12-12')
group by CALLS.LINE_FROM
order by totalcount DESC
)
where rownum <= 3
)
select F_NAME,L_NAME
from TEMPRESULT inner join SUBSCRIBERS_LINES on TEMPRESULT.LINE_FROM=SUBSCRIBERS_LINES.line_id inner join SUBSCRIBERS on SUBSCRIBERS_LINES.SUB_ID=SUBSCRIBERS.SUB_ID;
But this query will not work if one of the subscribers has more than one line,
for example:
(X1 has L1 and L2 lines
X2 has L3
X3 has L4)
if X1 talks 20 calls from L1, and 19 calls from L2
X2 talks 15 calls from L3
X3 talks 10 calls from L4
my query will return the following output:
X1
X1
X2
it must return :
X1
X2
X3
how to modify the query to not return duplicate name ?
The subquery must GROUP BY on SUB_ID (not on LINE_FROM). This will provide the total calls of a subscriber and not the top line calls.
In other words move the join in the subquery and group and order by SUB_ID.
DISTINCT in the main query is too late, you will get no duplicates but less results.
Could you try adding the DISTINCT keyword to the SELECT query at the bottom?
Something like this:
with TEMPRESULT AS
(
select * from
(
select CALLS.LINE_FROM , count(*) totalcount
from CALLS
where (((END_DATE_CALL-START_DATE_DATE)*24*60*60)<=60 and to_char(S_DATE,'YYYY-MM-DD')='2015-12-12')
group by CALLS.LINE_FROM
order by totalcount DESC
)
where rownum <= 3
)
select DISTINCT F_NAME,L_NAME
from TEMPRESULT
inner join SUBSCRIBERS_LINES on TEMPRESULT.LINE_FROM = SUBSCRIBERS_LINES.line_id
inner join SUBSCRIBERS on SUBSCRIBERS_LINES.SUB_ID = SUBSCRIBERS.SUB_ID;
In theory (I haven't tested it by creating this database) this should show:
X1
X2
X3
how about something like this
(T represents the result from your query)
WITH t AS
(SELECT 1 id, 'x1' subscriber, 'l1' line FROM dual
UNION ALL
SELECT 2, 'x1', 'l1' FROM dual
UNION ALL
SELECT 3, 'x1', 'l1' FROM dual
UNION ALL
SELECT 4, 'x1', 'l2' FROM dual
UNION ALL
SELECT 5, 'x1', 'l2' FROM dual
UNION ALL
SELECT 6, 'x1', 'l2' FROM dual
UNION ALL
SELECT 6, 'x1', 'l2' FROM dual
UNION ALL
SELECT 7, 'x2', 'l3' FROM dual
UNION ALL
SELECT 8, 'x2', 'l3' FROM dual
UNION ALL
SELECT 9, 'x3', 'l4' FROM dual
),
t1 AS
(SELECT COUNT(subscriber) totalcount,
line,
MAX(subscriber) keep (dense_rank last
ORDER BY line ) subscribers
FROM t
GROUP BY line
ORDER BY 1 DESC
)
SELECT subscribers,
listagg(line
||' had '
|| totalcount
|| ' calls ', ',') within GROUP (
ORDER BY totalcount) AS lines
FROM t1
GROUP BY subscribers
the results
subscribers lines
x1 l1 had 3 calls, l2 had 4 calls
x2 l3 had 2 calls
x3 l4 had 1 calls

SQL hierarchy count totals report

I'm creating a report with SQL server 2012 and Report Builder which must show the total number of Risks at a high, medium and low level for each Parent Element.
Each Element contains a number of Risks which are rated at a certain level. I need the total for the Parent Elements. The total will include the number of all the Child Elements and also the number the Element itself may have.
I am using CTEs in my query- the code I have attached isn't working (there are no errors - it's just displaying the incorrect results) and I'm not sure that my logic is correct??
Hopefully someone can help. Thanks in advance.
My table structure is:
ElementTable
ElementTableId(PK) ElementName ElementParentId
RiskTable
RiskId(PK) RiskName RiskRating ElementId(FK)
My query:
WITH cte_Hierarchy(ElementId, ElementName, Generation, ParentElementId)
AS (SELECT ElementId,
NAME,
0,
ParentElementId
FROM Extract.Element AS FirtGeneration
WHERE ParentElementId IS NULL
UNION ALL
SELECT NextGeneration.ElementId,
NextGeneration.NAME,
Parent.Generation + 1,
Parent.ElementId
FROM Extract.Element AS NextGeneration
INNER JOIN cte_Hierarchy AS Parent
ON NextGeneration.ParentElementId = Parent.ElementId),
CTE_HighRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS HighRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'High'
GROUP BY r.ElementId),
CTE_LowRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS LowRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Low'
GROUP BY r.ElementId),
CTE_MedRisk
AS (SELECT r.ElementId,
Count(r.RiskId) AS MedRisk
FROM Extract.Risk r
WHERE r.RiskRating = 'Medium'
GROUP BY r.ElementId)
SELECT rd.ElementId,
rd.ElementName,
rd.ParentElementId,
Generation,
HighRisk,
MedRisk,
LowRisk
FROM cte_Hierarchy rd
LEFT OUTER JOIN CTE_HighRisk h
ON rd.ElementId = h.ElementId
LEFT OUTER JOIN CTE_MedRisk m
ON rd.ElementId = m.ElementId
LEFT OUTER JOIN CTE_LowRisk l
ON rd.ElementId = l.ElementId
WHERE Generation = 1
Edit:
Sample Data
ElementTableId(PK) -- ElementName -- ElementParentId
1 ------------------- Main --------------0
2 --------------------Element1-----------1
3 --------------------Element2 ----------1
4 --------------------SubElement1 -------2
RiskId(PK) RiskName RiskRating ElementId(FK)
a -------- Financial -- High ----- 2
b -------- HR --------- High ----- 3
c -------- Marketing -- Low ------- 2
d -------- Safety -----Medium ----- 4
Sample Output:
Element Name High Medium Low
Main ---------- 2 ---- 1 -------1
Here is your sample tables
SELECT * INTO #TABLE1
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
)TAB
SELECT * INTO #TABLE2
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
)TAB
We are finding the children of a parent, its count of High,Medium and Low and use cross join to show parent with all the combinations of its children's High,Medium and Low
UPDATE
The below variable can be used to access the records dynamically.
DECLARE #ElementTableId INT;
--SET #ElementTableId = 1
And use the above variable inside the query
;WITH CTE1 AS
(
SELECT *,0 [LEVEL] FROM #TABLE1 WHERE ElementTableId = #ElementTableId
UNION ALL
SELECT E.*,e2.[LEVEL]+1 FROM #TABLE1 e
INNER JOIN CTE1 e2 on e.ElementParentId = e2.ElementTableId
AND E.ElementTableId<>#ElementTableId
)
,CTE2 AS
(
SELECT E1.*,E2.*,COUNT(RiskRating) OVER(PARTITION BY RiskRating) CNT
from CTE1 E1
LEFT JOIN #TABLE2 E2 ON E1.ElementTableId=E2.ElementId
)
,CTE3 AS
(
SELECT DISTINCT T1.ElementName,C2.RiskRating,C2.CNT
FROM #TABLE1 T1
CROSS JOIN CTE2 C2
WHERE T1.ElementTableId = #ElementTableId
)
SELECT *
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE
RESULT
UPDATE 2
I am updating as per your new requirement
Here is sample table in which I have added extra data to test
SELECT * INTO #ElementTable
FROM
(
SELECT 1 ElementTableId, 'Main' ElementName ,0 ElementParentId
UNION ALL
SELECT 2,'Element1',1
UNION ALL
SELECT 3, 'Element2',1
UNION ALL
SELECT 4, 'SubElement1',2
UNION ALL
SELECT 5, 'Main 2',0
UNION ALL
SELECT 6, 'Element21',5
UNION ALL
SELECT 7, 'SubElement21',6
UNION ALL
SELECT 8, 'SubElement22',7
UNION ALL
SELECT 9, 'SubElement23',7
)TAB
SELECT * INTO #RiskTable
FROM
(
SELECT 'a' RiskId, 'Fincancial' RiskName,'High' RiskRating ,2 ElementId
UNION ALL
SELECT 'b','HR','High',3
UNION ALL
SELECT 'c', 'Marketing','Low',2
UNION ALL
SELECT 'd', 'Safety','Medium',4
UNION ALL
SELECT 'e' , 'Fincancial' ,'High' ,5
UNION ALL
SELECT 'f','HR','High',6
UNION ALL
SELECT 'g','HR','High',6
UNION ALL
SELECT 'h', 'Marketing','Low',7
UNION ALL
SELECT 'i', 'Safety','Medium',8
UNION ALL
SELECT 'j', 'Safety','High',8
)TAB
I have written the logic in query
;WITH CTE1 AS
(
-- Here you will find the level of every elements in the table
SELECT *,0 [LEVEL]
FROM #ElementTable WHERE ElementParentId = 0
UNION ALL
SELECT ET.*,CTE1.[LEVEL]+1
FROM #ElementTable ET
INNER JOIN CTE1 on ET.ElementParentId = CTE1.ElementTableId
)
,CTE2 AS
(
-- Filters the level and find the major parant of each child
-- ie, 100->150->200, here the main parent of 200 is 100
SELECT *,CTE1.ElementTableId MajorParentID,CTE1.ElementName MajorParentName
FROM CTE1 WHERE [LEVEL]=1
UNION ALL
SELECT CTE1.*,CTE2.MajorParentID,CTE2.MajorParentName
FROM CTE1
INNER JOIN CTE2 on CTE1.ElementParentId = CTE2.ElementTableId
)
,CTE3 AS
(
-- Since each child have columns for main parent id and name,
-- you will get the count of each element corresponding to the level you have selected directly
SELECT DISTINCT CTE2.MajorParentName,RT.RiskRating ,
COUNT(RiskRating) OVER(PARTITION BY MajorParentID,RiskRating) CNT
FROM CTE2
JOIN #RiskTable RT ON CTE2.ElementTableId=RT.ElementId
)
SELECT MajorParentName, ISNULL([High],0)[High], ISNULL([Medium],0)[Medium],ISNULL([Low],0)[Low]
FROM CTE3
PIVOT(MIN(CNT)
FOR RiskRating IN ([High], [Medium],[Low])) AS PVTTable
SQL FIDDLE

Joining a list of values with table rows in SQL

Suppose I have a list of values, such as 1, 2, 3, 4, 5 and a table where some of those values exist in some column. Here is an example:
id name
1 Alice
3 Cindy
5 Elmore
6 Felix
I want to create a SELECT statement that will include all of the values from my list as well as the information from those rows that match the values, i.e., perform a LEFT OUTER JOIN between my list and the table, so the result would be like follows:
id name
1 Alice
2 (null)
3 Cindy
4 (null)
5 Elmore
How do I do that without creating a temp table or using multiple UNION operators?
If in Microsoft SQL Server 2008 or later, then you can use Table Value Constructor
Select v.valueId, m.name
From (values (1), (2), (3), (4), (5)) v(valueId)
left Join otherTable m
on m.id = v.valueId
Postgres also has this construction VALUES Lists:
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (num,letter)
Also note the possible Common Table Expression syntax which can be handy to make joins:
WITH my_values(num, str) AS (
VALUES (1, 'one'), (2, 'two'), (3, 'three')
)
SELECT num, txt FROM my_values
With Oracle it's possible, though heavier From ASK TOM:
with id_list as (
select 10 id from dual union all
select 20 id from dual union all
select 25 id from dual union all
select 70 id from dual union all
select 90 id from dual
)
select * from id_list;
the following solution for oracle is adopted from this source. the basic idea is to exploit oracle's hierarchical queries. you have to specify a maximum length of the list (100 in the sample query below).
select d.lstid
, t.name
from (
select substr(
csv
, instr(csv,',',1,lev) + 1
, instr(csv,',',1,lev+1 )-instr(csv,',',1,lev)-1
) lstid
from (select ','||'1,2,3,4,5'||',' csv from dual)
, (select level lev from dual connect by level <= 100)
where lev <= length(csv)-length(replace(csv,','))-1
) d
left join test t on ( d.lstid = t.id )
;
check out this sql fiddle to see it work.
Bit late on this, but for Oracle you could do something like this to get a table of values:
SELECT rownum + 5 /*start*/ - 1 as myval
FROM dual
CONNECT BY LEVEL <= 100 /*end*/ - 5 /*start*/ + 1
... And then join that to your table:
SELECT *
FROM
(SELECT rownum + 1 /*start*/ - 1 myval
FROM dual
CONNECT BY LEVEL <= 5 /*end*/ - 1 /*start*/ + 1) mypseudotable
left outer join myothertable
on mypseudotable.myval = myothertable.correspondingval
Assuming myTable is the name of your table, following code should work.
;with x as
(
select top (select max(id) from [myTable]) number from [master]..spt_values
),
y as
(select row_number() over (order by x.number) as id
from x)
select y.id, t.name
from y left join myTable as t
on y.id = t.id;
Caution: This is SQL Server implementation.
fiddle
For getting sequential numbers as required for part of output (This method eliminates values to type for n numbers):
declare #site as int
set #site = 1
while #site<=200
begin
insert into ##table
values (#site)
set #site=#site+1
end
Final output[post above step]:
select * from ##table
select v.id,m.name from ##table as v
left outer join [source_table] m
on m.id=v.id
Suppose your table that has values 1,2,3,4,5 is named list_of_values, and suppose the table that contain some values but has the name column as some_values, you can do:
SELECT B.id,A.name
FROM [list_of_values] AS B
LEFT JOIN [some_values] AS A
ON B.ID = A.ID

Concatenate results from a SQL query in Oracle

I have data like this in a table
NAME PRICE
A 2
B 3
C 5
D 9
E 5
I want to display all the values in one row; for instance:
A,2|B,3|C,5|D,9|E,5|
How would I go about making a query that will give me a string like this in Oracle? I don't need it to be programmed into something; I just want a way to get that line to appear in the results so I can copy it over and paste it in a word document.
My Oracle version is 10.2.0.5.
-- Oracle 10g --
SELECT deptno, WM_CONCAT(ename) AS employees
FROM scott.emp
GROUP BY deptno;
Output:
10 CLARK,MILLER,KING
20 SMITH,FORD,ADAMS,SCOTT,JONES
30 ALLEN,JAMES,TURNER,BLAKE,MARTIN,WARD
I know this is a little late but try this:
SELECT LISTAGG(CONCAT(CONCAT(NAME,','),PRICE),'|') WITHIN GROUP (ORDER BY NAME) AS CONCATDATA
FROM your_table
Usually when I need something like that quickly and I want to stay on SQL without using PL/SQL, I use something similar to the hack below:
select sys_connect_by_path(col, ', ') as concat
from
(
select 'E' as col, 1 as seq from dual
union
select 'F', 2 from dual
union
select 'G', 3 from dual
)
where seq = 3
start with seq = 1
connect by prior seq+1 = seq
It's a hierarchical query which uses the "sys_connect_by_path" special function, which is designed to get the "path" from a parent to a child.
What we are doing is simulating that the record with seq=1 is the parent of the record with seq=2 and so fourth, and then getting the full path of the last child (in this case, record with seq = 3), which will effectively be a concatenation of all the "col" columns
Adapted to your case:
select sys_connect_by_path(to_clob(col), '|') as concat
from
(
select name || ',' || price as col, rownum as seq, max(rownum) over (partition by 1) as max_seq
from
(
/* Simulating your table */
select 'A' as name, 2 as price from dual
union
select 'B' as name, 3 as price from dual
union
select 'C' as name, 5 as price from dual
union
select 'D' as name, 9 as price from dual
union
select 'E' as name, 5 as price from dual
)
)
where seq = max_seq
start with seq = 1
connect by prior seq+1 = seq
Result is: |A,2|B,3|C,5|D,9|E,5
As you're in Oracle 10g you can't use the excellent listagg(). However, there are numerous other string aggregation techniques.
There's no particular need for all the complicated stuff. Assuming the following table
create table a ( NAME varchar2(1), PRICE number);
insert all
into a values ('A', 2)
into a values ('B', 3)
into a values ('C', 5)
into a values ('D', 9)
into a values ('E', 5)
select * from dual
The unsupported function wm_concat should be sufficient:
select replace(replace(wm_concat (name || '#' || price), ',', '|'), '#', ',')
from a;
REPLACE(REPLACE(WM_CONCAT(NAME||'#'||PRICE),',','|'),'#',',')
--------------------------------------------------------------------------------
A,2|B,3|C,5|D,9|E,5
But, you could also alter Tom Kyte's stragg, also in the above link, to do it without the replace functions.
Here is another approach, using model clause:
-- sample of data from your question
with t1(NAME1, PRICE) as(
select 'A', 2 from dual union all
select 'B', 3 from dual union all
select 'C', 5 from dual union all
select 'D', 9 from dual union all
select 'E', 5 from dual
) -- the query
select Res
from (select name1
, price
, rn
, res
from t1
model
dimension by (row_number() over(order by name1) rn)
measures (name1, price, cast(null as varchar2(101)) as res)
(res[rn] order by rn desc = name1[cv()] || ',' || price[cv()] || '|' || res[cv() + 1])
)
where rn = 1
Result:
RES
----------------------
A,2|B,3|C,5|D,9|E,5|
SQLFiddle Example
Something like the following, which is grossly inefficient and untested.
create function foo returning varchar2 as
(
declare bar varchar2(8000) --arbitrary number
CURSOR cur IS
SELECT name,price
from my_table
LOOP
FETCH cur INTO r;
EXIT WHEN cur%NOTFOUND;
bar:= r.name|| ',' ||r.price || '|'
END LOOP;
dbms_output.put_line(bar);
return bar
)
Managed to get till here using xmlagg: using oracle 11G from sql fiddle.
Data Table:
COL1 COL2 COL3
1 0 0
1 1 1
2 0 0
3 0 0
3 1 0
SELECT
RTRIM(REPLACE(REPLACE(
XMLAgg(XMLElement("x", col1,',', col2, col3)
ORDER BY col1), '<x>'), '</x>', '|')) AS COLS
FROM ab
;
Results:
COLS
1,00| 3,00| 2,00| 1,11| 3,10|
* SQLFIDDLE DEMO
Reference to read on XMLAGG