Related
I have a recursion/hierarchical problem that I'm trying to figure out in BigQuery.
I have a list of employees and each employee has a manager ID. I need to be able to enter a single Employee_ID and return an array of every person beneath them.
CREATE TABLE p_RLS.testHeirarchy
(
Employee_ID INT64,
Employee_Name STRING,
Position STRING,
Line_Manager_ID INT64
);
INSERT INTO p_RLS.testHeirarchy (Employee_ID, Employee_Name, Position, Line_Manager_ID)
VALUES(1,'Joe','Worker',11),
(2,'James','Worker',11),
(3,'Jack','Worker',11),
(4,'Jill','Worker',12),
(5,'Jan','Worker',12),
(6,'Jacquie','Worker',13),
(7,'Joaquin','Worker',14),
(8,'Jeremy','Worker',14),
(9,'Jade','Worker',15),
(10,'Jocelyn','Worker',15),
(11, 'Bob', 'Store Manager',16),
(12, 'Bill', 'Store Manager',16),
(13, 'Barb', 'Store Manager',16),
(14, 'Ben', 'Store Manager',17),
(15, 'Burt', 'Store Manager',17),
(16, 'Sally','Group Manager',18),
(17, 'Sam','Group Manager',19),
(18, 'Anna', 'Ops Manager',20),
(19, 'Amy', 'Ops Manager',20),
(20, 'Zoe', 'State Manager', NULL);
My desired output would resemble:
SELECT 20 as Employee_ID, [19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1] as Reports;
SELECT 11 as Employee_ID, [3,2,1] as Reports;
SELECT 1 as Employee_ID, [] as Reports;
I have got the following working but it seems very ugly/inconvenient and doesn't support unlimited levels:
WITH test as (
SELECT L0.Employee_ID, L0.Employee_Name, L0.Position, L0.Line_Manager_ID,
ARRAY_AGG(DISTINCT L1.Employee_ID IGNORE NULLS) as Lvl1,
ARRAY_AGG(DISTINCT L2.Employee_ID IGNORE NULLS) as Lvl2,
ARRAY_AGG(DISTINCT L3.Employee_ID IGNORE NULLS) as Lvl3,
ARRAY_AGG(DISTINCT L4.Employee_ID IGNORE NULLS) as Lvl4,
ARRAY_AGG(DISTINCT L5.Employee_ID IGNORE NULLS) as Lvl5,
ARRAY_AGG(DISTINCT L6.Employee_ID IGNORE NULLS) as Lvl6,
ARRAY_AGG(DISTINCT L7.Employee_ID IGNORE NULLS) as Lvl7
FROM p_RLS.testHeirarchy as L0
LEFT OUTER JOIN p_RLS.testHeirarchy L1 ON L0.Employee_ID = L1.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L2 ON L1.Employee_ID = L2.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L3 ON L2.Employee_ID = L3.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L4 ON L3.Employee_ID = L4.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L5 ON L4.Employee_ID = L5.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L6 ON L5.Employee_ID = L6.Line_Manager_ID
LEFT OUTER JOIN p_RLS.testHeirarchy L7 ON L6.Employee_ID = L7.Line_Manager_ID
WHERE L0.Employee_ID = 16
GROUP BY 1,2,3,4)
SELECT
Employee_ID, ARRAY_CONCAT(
IFNULL(Lvl1,[]),
IFNULL(Lvl2,[]),
IFNULL(Lvl3,[]),
IFNULL(Lvl4,[]),
IFNULL(Lvl5,[]),
IFNULL(Lvl6,[]),
IFNULL(Lvl7,[])) as All_reports
FROM test
Is there a better way to do this? Is a recursive approach possible in BigQuery?
Recursive CTE was recently introduced !
This makes things so much easier
with recursive iterations as (
select line_manager_id, employee_id, 1 pos from your_table
union all
select b.line_manager_id, a.employee_id, pos + 1
from your_table a join iterations b
on b.employee_id = a.line_manager_id
)
select line_manager_id, string_agg('' || employee_id order by pos, employee_id desc) as reports_as_list
from iterations
where not line_manager_id is null
group by line_manager_id
order by line_manager_id desc
If applied to sample data in question - output is
Below is for BigQuery Standard SQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE initialData AS WITH input AS (
SELECT 1 Employee_ID,'Joe' Employee_Name,'Worker' Position,11 Line_Manager_ID UNION ALL
SELECT 2,'James','Worker',11 UNION ALL
SELECT 3,'Jack','Worker',11 UNION ALL
SELECT 4,'Jill','Worker',12 UNION ALL
SELECT 5,'Jan','Worker',12 UNION ALL
SELECT 6,'Jacquie','Worker',13 UNION ALL
SELECT 7,'Joaquin','Worker',14 UNION ALL
SELECT 8,'Jeremy','Worker',14 UNION ALL
SELECT 9,'Jade','Worker',15 UNION ALL
SELECT 10,'Jocelyn','Worker',15 UNION ALL
SELECT 11, 'Bob', 'Store Manager',16 UNION ALL
SELECT 12, 'Bill', 'Store Manager',16 UNION ALL
SELECT 13, 'Barb', 'Store Manager',16 UNION ALL
SELECT 14, 'Ben', 'Store Manager',17 UNION ALL
SELECT 15, 'Burt', 'Store Manager',17 UNION ALL
SELECT 16, 'Sally','Group Manager',18 UNION ALL
SELECT 17, 'Sam','Group Manager',19 UNION ALL
SELECT 18, 'Anna', 'Ops Manager',20 UNION ALL
SELECT 19, 'Amy', 'Ops Manager',20 UNION ALL
SELECT 20, 'Zoe', 'State Manager', NULL
)
SELECT * FROM input;
CREATE TEMP TABLE ttt AS
SELECT Line_Manager_ID, ARRAY_AGG(Employee_ID) Reports FROM initialData WHERE NOT Line_Manager_ID IS NULL GROUP BY Line_Manager_ID;
LOOP
SET (run_away_stop, rows_count) = (SELECT AS STRUCT run_away_stop + 1, COUNT(1) FROM ttt);
CREATE OR REPLACE TEMP TABLE ttt1 AS
SELECT Line_Manager_ID, ARRAY(SELECT DISTINCT Employee_ID FROM UNNEST(Reports) Employee_ID ORDER BY Employee_ID DESC) Reports
FROM (
SELECT Line_Manager_ID, ARRAY_CONCAT_AGG(Reports) Reports
FROM (
SELECT t2.Line_Manager_ID, ARRAY_CONCAT(t1.Reports, t2.Reports) Reports
FROM ttt t1, ttt t2
WHERE (SELECT COUNTIF(t1.Line_Manager_ID = Employee_ID) FROM UNNEST(t2.Reports) Employee_ID) > 0
) GROUP BY Line_Manager_ID
);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT * FROM ttt1 UNION ALL
SELECT * FROM ttt WHERE NOT Line_Manager_ID IN (SELECT Line_Manager_ID FROM ttt1);
IF (rows_count = (SELECT COUNT(1) FROM ttt) AND run_away_stop > 1) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT Employee_ID,
(
SELECT STRING_AGG(CAST(Employee_ID AS STRING), ',' ORDER BY Employee_ID DESC)
FROM ttt.Reports Employee_ID
) Reports_as_list
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
with result
Row Employee_ID Reports_as_list
1 20 19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1
2 19 17,15,14,10,9,8,7
3 18 16,13,12,11,6,5,4,3,2,1
4 17 15,14,10,9,8,7
5 16 13,12,11,6,5,4,3,2,1
6 15 10,9
7 14 8,7
8 13 6
9 12 5,4
10 11 3,2,1
11 10 null
12 9 null
13 8 null
14 7 null
15 6 null
16 5 null
17 4 null
18 3 null
19 2 null
20 1 null
In case if you need Reports as array - replace last statement in above script with below
SELECT Employee_ID, Reports Reports_as_array
FROM (SELECT DISTINCT Employee_ID FROM initialData) d
LEFT JOIN ttt ON Employee_ID = Line_Manager_ID
ORDER BY Employee_ID DESC;
Note: depends on level of nesting in your hierarchy - you might need to adjust 10 in OR run_away_stop > 10
To the question: "Is a recursive approach possible in BigQuery?"
Yes!
Now that BigQuery supports scripting and loops I solved some recursive problems from the Advent of Code with BigQuery:
https://towardsdatascience.com/advent-of-code-sql-bigquery-31e6a04964d4
CREATE TEMP TABLE planets AS SELECT 'YOU' planet;
LOOP
SET steps = steps+1
;
CREATE OR REPLACE TEMP TABLE planets AS
SELECT DISTINCT planet
FROM (
SELECT origin planet FROM t1 WHERE dest IN (SELECT planet FROM planets)
UNION ALL
SELECT dest planet FROM t1 WHERE origin IN (SELECT planet FROM planets)
)
;
IF 'SAN' IN (SELECT * FROM planets )
THEN LEAVE;
END IF;
END LOOP
;
SELECT steps-2
I would use a similar approach to navigate the graph and annotate all parent relationships.
Soon: I'll write a blog post on the specifics of tree traversal to get everyone under x. But this code will help you in the meantime.
This is my first post to this forum and I am very new to SQL so please bear with me.
I am attempting to modify some existing script to make a small change to a report to make it slightly more fit for purpose (the original was put together by a developer).
This report is looking to see whether two specific files have been read into a database for three entities each month or whether they are missing.
The output looks likes the below:
File A
YYYY:MM:DD A MISSING
B MISSING
C MISSING
YYYY:MM:DD A Present
B MISSING
C Present
The script is such that only the current year files are looked for with the exception of the files for the December of the previous year, however I want to also display the October and November results for the previous year.
Below is the relevant part of the script:
select distinct(k.filedate) as filedate, k.fid, case when r.fundid is null then 0 else 1 end as present
from XXXX database
right join
(
select convert(date,convert(varchar, year(#ReportDate) - 1) + '-12-01') as filedate, g.fid
from (
select 'XXXXFDGBP10' as fid
union
select 'XXXXUSD10' as fid
union
select 'XXXXUSD10' as fid
union
select 'XXXXA10' as fid
union
select 'XXXXB10' as fid
union
select 'XXXXGBPMGMT10' as fid
union
select 'XXXXMGMTSH10' as fid
) g
union
select convert(date,convert(varchar, year(#ReportDate)) + '-' + convert(varchar, h.m) + '-01') as filedate, s.fid
from (
select 'XXXXFDGBP10' as fid
union
select 'XXXXUSD10' as fid
union
select 'XXXXUSD10' as fid
union
select 'XXXXA10' as fid
union
select 'XXXXB10' as fid
union
select 'XXXXGBPMGMT10' as fid
union
select 'XXXXMGMTSH10' as fid
) s,
(
select 1 as m
union
select 2 as m
union
select 3 as m
union
select 4 as m
union
select 5 as m
union
select 6 as m
union
select 7 as m
union
select 8 as m
union
select 9 as m
union
select 10 as m
union
select 11 as m
union
select 12 as m
) h
) k
on r.fundid = k.fid and r.filedate = k.filedate
where
k.filedate >= convert(date,convert(varchar, year(#ReportDate) - 1) + '-12-01')
and k.filedate <= #ReportDate
So I want to add '11-01' and '10-01' to the 2016 return of the report. Does anyone know how I can do this?
Thanks in advance. Please let me know if this isn't clear or if anyone has any questions.
I'd rewrite your query as
;WITH months AS
(
SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12)) AS f("month")
),
years AS
(
SELECT * FROM (VALUES (year(#ReportDate)), (year(#ReportDate)-1)) AS f("year")
),
fids AS
(
SELECT * FROM (VALUES ('XXXXFDGBP10'), ('XXXXUSD10'), ('XXXXUSD10'), ('XXXXA10'), ('XXXXB10'), ('XXXXGBPMGMT10'), ('XXXXMGMTSH10')) AS f(fid)
),
k AS
(
SELECT
filedate = DATEADD(month, [month]-1, DATEADD(year, [year]-1900, 0)),
fid
FROM fids
CROSS JOIN years
CROSS JOIN months
)
SELECT
k.filedate,
k.fid,
present = case when r.fundid is null then 0 else 1 end
FROM XXXX r
RIGHT JOIN k ON r.fundid = k.fid and r.filedate = k.filedate
where
k.filedate >= convert(date,convert(varchar, year(#ReportDate) - 1) + '-10-01')
and k.filedate <= #ReportDate
I'm creating a NACHA file and if the number of records in the file is not a multiple of 10, we need to insert enough "dummy" records filled with nines (replicate('9',94)) to hit that next tens place.
I know that I could write a loop or perhaps fill a temp table with 10 records full of nines and select the top N. But those options feel clunky.
I was trying to think of a single select statement that could do it for me. Any ideas?
select nacha_rows
from NACHA_TABLE
union all
select replicate('9',94) --do this 0 to 9 times
The formula (10-COUNT(*)%10)%10 tells you how many rows to add, so you can just select that many dummy rows from an existing dummy table.
SELECT nacha_rows
FROM NACHA_TABLE
UNION ALL
SELECT TOP (SELECT (10-COUNT(*)%10)%10 FROM NACHA_TABLE) REPLICATE('9',94)
FROM master.dbo.spt_values
This should work. Created a temp table with 9 rows of the dummy data. Then used modulo to determine how many extra rows should be returned. Then return the full dataset. If you wanted to make it pretty you could take the modulo piece out and calculate it one time in a variable.
;WITH dummydata (num, nines)
AS (SELECT 1 AS num, Replicate('9', 94)
UNION ALL
SELECT num + 1, Replicate('9', 94)
FROM dummydata
WHERE num < 9)
SELECT *
FROM nacha_table
UNION ALL
SELECT nines
FROM dummydata
WHERE num >= CASE
WHEN (SELECT Count(1) % 10 FROM nacha_table) = 0 THEN 10
ELSE (SELECT Count(1) % 10 FROM nacha_table)
END
One idea is to prepare 9 filler rows than append only the ones needed to reach the next tens, same idea of JChao, with a different implementation
With Filler AS (
SELECT n.n, replicate('9',94) nacha_rows
FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) n(n)
)
SELECT nacha_rows
FROM NACHA_TABLE
UNION ALL
SELECT nacha_rows
FROM Filler
OUTER APPLY (SELECT count(1) % 10 last
FROM NACHA_TABLE) l
WHERE filler.n + l.last <= 10
AND l.last > 0 -- to prevent filler line when NACHA_TABLE has exactly 10x rows
SQLFiddle demo
Looks like you need a dummy select statement:
select '1' as [col1], 'abcdef' as [col 2]
union all
select '2' as [col1], 'abcdef' as [col 2]
union all
select '3' as [col1], 'abcdef' as [col 2]
union all
select '4' as [col1], 'abcdef' as [col 2]
A way using a set of 10 rows and joining;
;with T(ord) as
(
select 1 as ord union all select ord + 1 from T where ord < 10
)
select isnull(nacha_rows, replicate('9', 94)) from T left join (
select
ROW_NUMBER() over (order by nacha_rows) row, nacha_rows
from NACHA_TABLE
) T2 on row = ord
Edit; Just realised that of course the table could have > 10 rows in the first place in which case this wont work.
Suppose I have a list of values, such as 1, 2, 3, 4, 5 and a table where some of those values exist in some column. Here is an example:
id name
1 Alice
3 Cindy
5 Elmore
6 Felix
I want to create a SELECT statement that will include all of the values from my list as well as the information from those rows that match the values, i.e., perform a LEFT OUTER JOIN between my list and the table, so the result would be like follows:
id name
1 Alice
2 (null)
3 Cindy
4 (null)
5 Elmore
How do I do that without creating a temp table or using multiple UNION operators?
If in Microsoft SQL Server 2008 or later, then you can use Table Value Constructor
Select v.valueId, m.name
From (values (1), (2), (3), (4), (5)) v(valueId)
left Join otherTable m
on m.id = v.valueId
Postgres also has this construction VALUES Lists:
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (3, 'three')) AS t (num,letter)
Also note the possible Common Table Expression syntax which can be handy to make joins:
WITH my_values(num, str) AS (
VALUES (1, 'one'), (2, 'two'), (3, 'three')
)
SELECT num, txt FROM my_values
With Oracle it's possible, though heavier From ASK TOM:
with id_list as (
select 10 id from dual union all
select 20 id from dual union all
select 25 id from dual union all
select 70 id from dual union all
select 90 id from dual
)
select * from id_list;
the following solution for oracle is adopted from this source. the basic idea is to exploit oracle's hierarchical queries. you have to specify a maximum length of the list (100 in the sample query below).
select d.lstid
, t.name
from (
select substr(
csv
, instr(csv,',',1,lev) + 1
, instr(csv,',',1,lev+1 )-instr(csv,',',1,lev)-1
) lstid
from (select ','||'1,2,3,4,5'||',' csv from dual)
, (select level lev from dual connect by level <= 100)
where lev <= length(csv)-length(replace(csv,','))-1
) d
left join test t on ( d.lstid = t.id )
;
check out this sql fiddle to see it work.
Bit late on this, but for Oracle you could do something like this to get a table of values:
SELECT rownum + 5 /*start*/ - 1 as myval
FROM dual
CONNECT BY LEVEL <= 100 /*end*/ - 5 /*start*/ + 1
... And then join that to your table:
SELECT *
FROM
(SELECT rownum + 1 /*start*/ - 1 myval
FROM dual
CONNECT BY LEVEL <= 5 /*end*/ - 1 /*start*/ + 1) mypseudotable
left outer join myothertable
on mypseudotable.myval = myothertable.correspondingval
Assuming myTable is the name of your table, following code should work.
;with x as
(
select top (select max(id) from [myTable]) number from [master]..spt_values
),
y as
(select row_number() over (order by x.number) as id
from x)
select y.id, t.name
from y left join myTable as t
on y.id = t.id;
Caution: This is SQL Server implementation.
fiddle
For getting sequential numbers as required for part of output (This method eliminates values to type for n numbers):
declare #site as int
set #site = 1
while #site<=200
begin
insert into ##table
values (#site)
set #site=#site+1
end
Final output[post above step]:
select * from ##table
select v.id,m.name from ##table as v
left outer join [source_table] m
on m.id=v.id
Suppose your table that has values 1,2,3,4,5 is named list_of_values, and suppose the table that contain some values but has the name column as some_values, you can do:
SELECT B.id,A.name
FROM [list_of_values] AS B
LEFT JOIN [some_values] AS A
ON B.ID = A.ID
I have several joined CTE. Something like:
;With CT1 AS(SELECT ..)
, CT2 AS(select)
SELECT *.T1,*T2 FROM CT1 T1 INNER JOIN CT2 T2 WHERE (some Condition ) GROUP BY (F1,F2, etc)
Now I need to join the result of this query to another CTE. What’s the best way? Can I make a CTE with the result of this Query? Any help would be greatly appreciated.
You can keep creating new CTEs based on previously defined ones. They may joined or otherwise combined, subject to the rules for CTEs.
; with
ArabicRomanConversions as (
select *
from ( values
( 0, '', '', '', '' ), ( 1, 'I', 'X', 'C', 'M' ), ( 2, 'II', 'XX', 'CC', 'MM' ), ( 3, 'III', 'XXX', 'CCC', 'MMM' ), ( 4, 'IV', 'XL', 'CD', '?' ),
( 5, 'V', 'L', 'D', '?' ), ( 6, 'VI', 'LX', 'DC', '?' ), ( 7, 'VII', 'LXX', 'DCC', '?' ), ( 8, 'VIII', 'LXXX', 'DCCC', '?' ), ( 9, 'IX', 'XC', 'CM', '?' )
) as Placeholder ( Arabic, Ones, Tens, Hundreds, Thousands )
),
Numbers as (
select 1 as Number
union all
select Number + 1
from Numbers
where Number < 3999 ),
ArabicAndRoman as (
select Number as Arabic,
( select Thousands from ArabicRomanConversions where Arabic = Number / 1000 ) +
( select Hundreds from ArabicRomanConversions where Arabic = Number / 100 % 10 ) +
( select Tens from ArabicRomanConversions where Arabic = Number / 10 % 10 ) +
( select Ones from ArabicRomanConversions where Arabic = Number % 10 ) as Roman
from Numbers ),
Squares as (
select L.Arabic, L.Roman, R.Arabic as Square, R.Roman as RomanSquare
from ArabicAndRoman as L inner join
ArabicAndRoman as R on R.Arabic = L.Arabic * L.Arabic
where L.Arabic < 16 ),
Cubes as (
select S.Arabic, S.Roman, S.Square, S.RomanSquare, A.Arabic as Cube, A.Roman as RomanCube
from Squares as S inner join
ArabicAndRoman as A on A.Arabic = S.Square * S.Arabic )
select *
from Cubes
order by Arabic
option ( MaxRecursion 3998 )
This is a format I have used a few times where a temp table is used to buffer one complex CTE which is output and then used again from temp with a second CTE.
It is useful if you need 2 result sets or if the complete CTE as one massive statement causes speed issues (breaking it up can be a huge performance improvement in some cases)
-- I do this "DROP" because in some cases where query is executed over and
-- over sometimes the object is not cleared before next transaction.
BEGIN TRY DROP TABLE #T_A END TRY BEGIN CATCH END CATCH;
WITH A AS (
SELECT 'A' AS Name, 1 as Value
UNION ALL SELECT 'B', 2
)
SELECT *
INTO #T_A
FROM A;
SELECT *
FROM #T_A ; -- Generate First Output Table
WITH B AS (
SELECT 'A' AS Name, 234 as Other
UNION ALL SELECT 'B', 456
)
-- Generate second result set from Temp table.
SELECT B.*, A. Value
FROM B JOIN #T_A AS A ON A.Name=B.Name
This produces a 2 table result set. Which is also handy in .NET filling a DataSet.