Biq Query - Count - google-bigquery

Biq Query - Count - google-bigquery

I have table that displays
ID employeename Supervisorname
Need to display another column CountD: count of employee for the direct supervior,CountI which shows the count of employee indirect
Snapshot sharedenter image description here
Tried Count (*) over partition by Supervisorname but didnot help

Below is for BigQuery Standard SQL
Try below script
#standardSQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
CREATE TEMP TABLE input AS (
SELECT 1 id, 'A' employee, 'X' supervisor UNION ALL
SELECT 2, 'B', 'X' UNION ALL
SELECT 3, 'C', 'X' UNION ALL
SELECT 4, 'X', 'F' UNION ALL
SELECT 5, 'Y', 'F' UNION ALL
SELECT 6, 'F', 'G'
);
CREATE TEMP TABLE ttt AS SELECT supervisor, employee FROM input;
LOOP
SET (rows_count, run_away_stop) = ((SELECT COUNT(1) FROM ttt), run_away_stop + 1);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT supervisor, employee FROM ttt UNION DISTINCT
SELECT t1.supervisor, t2.employee
FROM input t1 JOIN ttt t2
ON t1.employee = t2.supervisor;
IF rows_count = (SELECT COUNT(1) FROM ttt) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT t1.*,
IFNULL(direct_employees, 0) AS direct_employees,
IFNULL(all_employees, 0) AS all_employees
FROM input t1
LEFT JOIN (
SELECT supervisor, COUNT(1) direct_employees
FROM input GROUP BY supervisor
) t2 ON t1.employee = t2.supervisor
LEFT JOIN (
SELECT supervisor, COUNT(1) all_employees
FROM ttt GROUP BY supervisor
) t3 ON t1.employee = t3.supervisor
ORDER BY 1;
it returns desired output
To apply to your real table
Remove CREATE TEMP TABLE input AS ( ... ) statement
Instead of input table use your real table reference as your_project.your_dataset.your_table
Also, take attention to run_away_stop > 10 expression - it takes care of loop running not more that 10 times - you can tune this number based on how deep hierarchy in your data
As an option - you can try use Array instead of Temp table as in example below
#standardSQL
DECLARE rows_count, run_away_stop INT64 DEFAULT 0;
DECLARE ttt ARRAY<STRUCT<supervisor STRING, employee STRING>> DEFAULT [];
CREATE TEMP TABLE input AS (
SELECT 1 id, 'A' employee, 'X' supervisor UNION ALL
SELECT 2, 'B', 'X' UNION ALL
SELECT 3, 'C', 'X' UNION ALL
SELECT 4, 'X', 'F' UNION ALL
SELECT 5, 'Y', 'F' UNION ALL
SELECT 6, 'F', 'G'
);
SET ttt = ARRAY(SELECT AS STRUCT supervisor, employee FROM input);
LOOP
SET (rows_count, run_away_stop) = (ARRAY_LENGTH(ttt), run_away_stop + 1);
SET ttt = ARRAY(
SELECT AS STRUCT * FROM (
SELECT supervisor, employee FROM UNNEST(ttt) UNION DISTINCT
SELECT t1.supervisor, t2.employee
FROM input t1 JOIN UNNEST(ttt) t2
ON t1.employee = t2.supervisor
));
IF rows_count = ARRAY_LENGTH(ttt) OR run_away_stop > 10 THEN BREAK; END IF;
END LOOP;
SELECT t1.*,
IFNULL(direct_employees, 0) AS direct_employees,
IFNULL(all_employees, 0) AS all_employees
FROM input t1
LEFT JOIN (
SELECT supervisor, COUNT(1) direct_employees
FROM input GROUP BY supervisor
) t2 ON t1.employee = t2.supervisor
LEFT JOIN (
SELECT supervisor, COUNT(1) all_employees
FROM UNNEST(ttt) GROUP BY supervisor
) t3 ON t1.employee = t3.supervisor
ORDER BY 1;

Related

Can "value in list or list is empty" be written shorter?

Given this SQL:
select * from table1
where
table1.columnFoo = 123
and
(
some_value is null
or
some_value in (select column1 from table2 where table1.colX=table2.colY)
or
not exists (select column1 from table2 where table1.colX=table2.colY)
);
-- some_value is a constant or an input parameter in an (PL/)SQL procedure
-- if it is non null, then we want to filter by it. Except if the list selection is empty.
Is there a way to write the "in list or list is empty" part shorter?
Preferably in a way that contains the list only once (see the Don't_repeat_yourself principle )
I'm interested for Oracle SQL or PL/SQL, but other information is also welcome.
As requested, a MRE that works in SQL*Plus:
create table table1 as select 1 id, 'one' name , 12 price from dual
union select 2 , 'two' , 22 from dual
union select 3 , 'thr' , 33 from dual;
create table table2 as select 1 id1, 88 idX, sysdate-1 validDate from dual -- valid
union select 1 , 99 , sysdate+2 from dual -- these two are not valid (yet)
union select 2 , 99 , sysdate+3 from dual;
var some_value number
--exec :some_value := 3 -- uncomment for non null values
with cte as (select id1,idX from table2 where validDate<sysdate)
select * from table1
where
table1.price > 10
and
(
:some_value is null
or
:some_value in (select idX from cte where table1.id=cte.id1)
or
not exists (select idX from cte where table1.id=cte.id1)
);

From Oracle 12, you could use a LATERAL join with conditional aggregation:
SELECT t1.*
FROM table1 t1
CROSS JOIN LATERAL(
SELECT 1 AS matched
FROM table2 t2
WHERE t1.colX=t2.colY
HAVING COUNT(*) = 0
OR COUNT(CASE t2.column1 WHEN :some_value THEN 1 END) > 0
) t2
WHERE t1.columnFoo = 123
AND ( :some_value is null OR t2.matched = 1);
Or a similar technique using EXISTS:
select *
from table1
WHERE columnFoo = 123
AND ( :some_value is null
OR EXISTS(
SELECT 1
FROM table2
WHERE table1.colX=colY
HAVING COUNT(*) = 0
OR COUNT(CASE column1 WHEN :some_value THEN 1 END) > 0
)
);
db<>fiddle here

BigQuery - create a table and populate with data

With BigQuery is it possible to set up and populate data into a table defined as part of a 'WITH...' statement?
Like...
> with table1 as (
select id,title from (
> {1,'Fred'),
> {2,'Joe'),
> {3,'Mary'), ) as tt )

with table1 as (
select 1 as id, 'Fred' as title union all
select 2, 'Joe' union all
select 3, 'Mary'
)
select * from table1

Oracle SQL - Count based on a condition to include distinct rows with zero matches

Is there a "better" way to refactor the query below that returns the number occurrences of a particular value (e.g. 'A') for each distinct id? The challenge seems to be keeping id = 2 in the result set even though the count is zero (id = 2 is never related to 'A'). It has a common table expression, NVL function, in-line view, distinct, and left join. Is all of that really needed to get this job done? (Oracle 19c)
create table T (id, val) as
select 1, 'A' from dual
union all select 1, 'B' from dual
union all select 1, 'A' from dual
union all select 2, 'B' from dual
union all select 2, 'B' from dual
union all select 3, 'A' from dual
;
with C as (select id, val, count(*) cnt from T where val = 'A' group by id, val)
select D.id, nvl(C.cnt, 0) cnt_with_zero from (select distinct id from T) D left join C on D.id = C.id
order by id
;
ID CNT_WITH_ZERO
---------- -------------
1 2
2 0
3 1

A simple way is conditional aggregation:
select id,
sum(case when val = 'A' then 1 else 0 end) as num_As
from t
group by id;
If you have another table with one row per id, you I would recommend:
select i.id,
(select count(*) from t where t.id = i.id and t.val = 'A') as num_As
from ids i;

Oracle SQL Replace multiple characters in different positions

I'm using Oracle 11g and I'm having trouble replacing multiple characters based on positions mentioned in a different table. For example:
Table 1
PRSKEY POSITION CHARACTER
123 3 ć
123 9 ć
Table 2
PRSKEY NAME
123 Becirovic
I have to replace the NAME in Table 2 to Bećirović.
I've tried regexp_replace but this function doesn't provide replacing more then 1 position, is there an easy way to fix this?

Here's another way to do it.
with tab1 as (select 123 as prskey, 3 as position, 'ć' as character from dual
union select 123, 9, 'ć' from dual),
tab2 as (select 123 as prskey, 'Becirovic' as name from dual)
select listagg(nvl(tab1.character, namechar)) within group(order by lvl)
from
(select prskey, substr(name, level, 1) as namechar, level as lvl
from tab2
connect by level <= length(name)
) splitname
left join tab1 on position = lvl and tab1.prskey = splitname.prskey
;

Simple solution using cursor ...
create table t1 (
prskey int,
pos int,
character char(1)
);
create table t2
(
prskey int,
name varchar2(100)
);
insert into t1 values (1, 1, 'b');
insert into t1 values (1, 3, 'e');
insert into t2 values (1, 'dear');
begin
for t1rec in (select * from t1) loop
update t2
set name = substr(name, 1, t1rec.pos - 1) || t1rec.character || substr(name, t1rec.pos + 1, length(name) - t1rec.pos)
where t2.prskey = t1rec.prskey;
end loop;
end;
/

I would prefer approach via PL/SQL, but in your tag only 'sql', so I made this monster:
with t as (
select 123 as id, 3 as pos, 'q' as new_char from dual
union all
select 123 as id, 6 as pos, 'z' as new_char from dual
union all
select 123 as id, 9 as pos, '1' as new_char from dual
union all
select 456 as id, 1 as pos, 'A' as new_char from dual
union all
select 456 as id, 4 as pos, 'Z' as new_char from dual
),
t1 as (
select 123 as id, 'Becirovic' as str from dual
union all
select 456 as id, 'Test' as str from dual
)
select listagg(out_text) within group (order by pos)
from(
select id, pos, new_char, str, prev, substr(str,prev,pos-prev)||new_char as out_text
from(
select id, pos, new_char, str, nvl(lag(pos) over (partition by id order by pos)+1,1) as prev
from (
select t.id, pos, new_char, str
from t, t1
where t.id = t1.id
) q
) a
) w
group by id
Result:
Beqirzvi1
AesZ

Exists - Not exists - Exclude records those are having status in 0 ignoring other status associated with that record

Below is my data.
with cte as(
select 'A' name, 0 status
union all select 'A' name, 1 status
union all select 'B' name, 1 status
union all select 'C' name, 2 status
union all select 'D' name, 1 status
)
I want to get only B, C, D as output from the query. Lets say, 0 is status-complete & I want to ignore records associated with it.
This I am able to achieve using the not in clause as below.
select * from cte c
where c.name not in (select cf.name from cte cf where cf.status=0)
But I want to achieve this using exists or not exists clause in where condition.
Could you please share the logic ?
thanks,

Can you please try with this:
SELECT * FROM cte c
WHERE NOT EXISTS (SELECT cf.name
FROM cte cf WHERE c.name = cf.name AND cf.status = 0)
For this we don't need any column in the where clause because we are addressing that conditional column as comparison in WHERE of sub query.

Please try this
with cte as(
select 'A' name, 0 status
union all select 'A' name, 1 status
union all select 'B' name, 1 status
union all select 'C' name, 2 status
union all select 'D' name, 1 status
)
Select * from cte c
where NOT EXISTS (select 1 from cte cf where cf.status=0 AND c.name = cf.name)

With NOT EXISTS
with cte as(
select 'A' name, 0 status
union all select 'A' name, 1 status
union all select 'B' name, 1 status
union all select 'C' name, 2 status
union all select 'D' name, 1 status
)
select * from cte out where NOT EXISTS
(select inn.name from cte inn WHERE out.name = inn.name and inn.status=0)

DECLARE #tbl1 AS TABLE
(
Name VARCHAR(50),
Status INT
)
INSERT INTO #tbl1 VALUES('A',0)
INSERT INTO #tbl1 VALUES('A',1)
INSERT INTO #tbl1 VALUES('B',1)
INSERT INTO #tbl1 VALUES('C',1)
INSERT INTO #tbl1 VALUES('D',1)
INSERT INTO #tbl1 VALUES('E',0)
With Not EXISTS:
SELECT
*
FROM #tbl1 T1
WHERE NOT EXISTS( SELECT T2.Name FROM #tbl1 T2 WHERE T2.Status=0 AND T1.Name=T2.Name)
With EXISTS:
SELECT
*
FROM #tbl1 T1
WHERE EXISTS( SELECT T2.Name FROM #tbl1 T2 WHERE T1.Name=T2.Name AND T1.Status=1 GROUP BY T2.Name having count(T2.Status)=1 )
Output:

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Biq Query - Count - google-bigquery

I have table that displays ID employeename Supervisorname Need to display another column CountD: count of employee for the direct supervior,CountI which shows the count of employee indirect Snapshot sharedenter image description here Tried Count (*) over partition by Supervisorname but didnot help

Related

Can "value in list or list is empty" be written shorter?

BigQuery - create a table and populate with data

Oracle SQL - Count based on a condition to include distinct rows with zero matches

Oracle SQL Replace multiple characters in different positions

Exists - Not exists - Exclude records those are having status in 0 ignoring other status associated with that record

Categories

Resources