Count of Table1_IDs in Table2_arrays - sql

I'm working with two tables:
CREATE TABLE Table1
(
id int,
name varchar
)
CREATE TABLE Table2
(
id int,
name varchar,
link array<int>
)
Table2.link contains values that correspond to Table1.id. I'd like to count how many times each Table1.id appears in an instance of Table2.link. This would be trivial using cell references in Excel, but I can't figure out how to do it with a SQL query.

Presto
select *
from (select l.id
,count(*) as cnt
from Table2 cross join unnest (link) as l(id)
group by l.id
) t2
where t2.id in (select id from Table1)
order by id
presto:default> select *
-> from (select l.id
-> ,count(*) as cnt
-> from Table2 cross join unnest (link) as l(id)
-> group by l.id
-> ) t2
-> where t2.id in (select id from Table1)
-> order by id;
id | cnt
----+-----
1 | 7
2 | 5
3 | 4
(3 rows)
PostgreSQL demo
create table Table1 (id int);
create table Table2 (arr int[]);
insert into Table1 values
(1),(2),(3)
;
insert into Table2 values
(array[1,5]),(array[1,3]),(array[1,2,3]),(array[2,3])
,(array[1,2,4]),(array[1,2]),(array[1,3,5]),(array[1,2,4])
;
select *
from (select unnest(arr) as id
,count(*) as cnt
from Table2
group by id
) t2
where t2.id in (select id from Table1)
order by id
+----+-----+
| id | cnt |
+----+-----+
| 1 | 7 |
+----+-----+
| 2 | 5 |
+----+-----+
| 3 | 4 |
+----+-----+

Related

BigQuery - Recursive query to get leaf rows

I have a table in BigQuery which contains self referencing data like -
id | name | parent_id
----------------------------
1 | Gross Margin | null
2 | Revenue | 1
3 | Sales A | 2
4 | Sales B | 2
5 | 1001 | 3
6 | 1002 | 4
7 | OPEX | null
8 | Salaries | 7
9 | Payroll | 8
10 | Allowances | 9
11 | Commissions | 9
I want to write a query that returns the leaf rows of any row. For example if I give Gross Margin (or 1) as input, the query should return 1001 and 1002 (or 5 and 6) as output. Similarly if I give OPEX (or 7) as input, the query should return Allowances and Commissions (or 10 and 11) as output.
Updated script with improved convergence logic
DECLARE run_away_stop INT64 DEFAULT 0;
DECLARE flag BYTES;
CREATE TEMP TABLE ttt AS
SELECT parent_id, ARRAY_AGG(id order by id) children FROM `project.dataset.table` WHERE NOT parent_id IS NULL GROUP BY parent_id;
LOOP
SET (run_away_stop, flag) = (SELECT AS STRUCT run_away_stop + 1, md5(to_json_string(array_agg(t order by parent_id))) FROM ttt t);
CREATE OR REPLACE TEMP TABLE ttt1 AS
SELECT parent_id, ARRAY(SELECT DISTINCT id FROM UNNEST(children) id order by id) children
FROM (
SELECT parent_id, ARRAY_CONCAT_AGG(children) children
FROM (
SELECT t2.parent_id, ARRAY_CONCAT(t1.children, t2.children) children
FROM ttt t1, ttt t2
WHERE (SELECT COUNTIF(t1.parent_id = id) FROM UNNEST(t2.children) id) > 0
) GROUP BY parent_id
);
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT * FROM ttt1 UNION ALL
SELECT * FROM ttt WHERE NOT parent_id IN (SELECT parent_id FROM ttt1);
IF (flag = (SELECT md5(to_json_string(array_agg(t order by parent_id))) FROM ttt t)) OR run_away_stop > 20 THEN BREAK; END IF;
END LOOP;
CREATE OR REPLACE TEMP TABLE ttt AS
SELECT id,
(
SELECT STRING_AGG(CAST(id AS STRING) order by id)
FROM ttt.children id
) children_as_list
FROM (SELECT DISTINCT id FROM `project.dataset.table`) d
LEFT JOIN ttt ON id = parent_id;
SELECT t1.id, STRING_AGG(child) leafs_list
FROM ttt t1, UNNEST(SPLIT(children_as_list)) child
JOIN (SELECT id FROM ttt WHERE children_as_list IS NULL) t2
ON t2.id = CAST(child AS INT64)
GROUP BY t1.id
ORDER BY id;
When applied to sample data in your question - it took 3 iterations and output is
Also, when applied to bigger example from your comments - it took 5 iterations and output is
BigQuery Team just introduced Recursive CTE! Hooray!!
With recursive cte you can use below approach
with recursive iterations as (
select id, parent_id from your_table
where not id in (
select parent_id from your_table
where not parent_id is null
)
union all
select b.id, a.parent_id
from your_table a join iterations b
on b.parent_id = a.id
)
select parent_id, string_agg('' || id order by id) as leafs_list
from iterations
where not parent_id is null
group by parent_id
If applied to sample data in your question - output is
Hope you agree it is more manageable and effective then when we were "forced" to use scripts for such logic!

SQL - Left Join many-to-many only once

I have a two tables that are setup like the following examples
tablea
ID | Name
1 | val1
1 | val2
1 | val3
2 | other1
3 | other
tableb
ID | Amount
1 | $100
2 | $50
My desired output would be to left join tableb to tablea but only join tableb once on each value. ID is the only relationship
tablea.ID | tablea.Name | tableb.id | tableb.amount
1 | val1 | 1 | $100
1 | val2
1 | val3
2 | other1 | 2 | $50
3 | other
Microsoft SQL
You can do the following:
select ROW_NUMBER() OVER(ORDER BY RowID ASC) as RowNum, ID , Name
from tablea
which gives you :
RowNum | RowID | Name
1 | 1 | val1
2 |1 | val2
3 |1 | val3
4 |2 | other1
5 |3 | other
You then get the minimum row number for each RowID:
Select RowId, min(RowNum)
From (
select ROW_NUMBER() OVER(ORDER BY RowID ASC) as RowNum, ID , Name
from tablea )
Group By RowId
Once you have this you can then join tableb onto tablea only where the RowId is the minimum
WITH cteTableA As (
select ROW_NUMBER() OVER(ORDER BY RowID ASC) as RowNum, ID , Name
from tablea ),
cteTableAMin As (
Select RowId, min(RowNum) as RowNumMin
From cteTableA
Group By RowId
)
Select a.RowID, a.Name, b.Amount
From cteTableA a
Left join cteTableAMin amin on a.RowNum = amin.RowNumMin
and a.ID = amin.RowId
Left join tableb b on amin.ID = b.ID
This can be tidied up... but helps to show whats going on.
Then you MUST specify which row in tableA you wish to join to. If there are more than one row in the other table, How can the query processor know which one you want ?
If you want the one with the lowest value of name, then you might do this:
Select * from tableB b
join tableA a
on a.id = b.Id
and a.name =
(Select min(name) from tableA
where id = b.id)
but even that won't work if there multiple rows with the same values for both id AND name. What you might really need is a Primary Key on tableA.
Use:
select
a.id,
a.name,
b.amount
from
(select
id,
name,
row_number() over (partition by id order by name) as rn
from tablea) a
left join (
select
id,
amount,
row_number() over (partition by id order by amount) as rn
from tableb) b
on a.id = b.id
and a.rn = b.rn
order by a.id, a.name

Get groups that are exactly equal to a table

I have a query that groups easily. I need to get the groups that have exactly the same records to another table (relationship).
I'm using ANSI-SQL under SQL Server, but I accept an answer of any implementation.
For example:
Table1:
Id | Value
---+------
1 | 1
1 | 2
1 | 3
2 | 4
3 | 2
4 | 3
Table2:
Value | ...
------+------
1 | ...
2 | ...
3 | ...
In my example, the result is:
Id |
---+
1 |
How imagined that it could be the code:
SELECT Table1.Id
FROM Table1
GROUP BY Table1.Id
HAVING ...? -- The group that has exactly the same elements of Table2
Thanks in advance!
You can try the following:
select t1.Id
from Table2 t2
join Table1 t1 on t1.value = t2.value
group by t1.Id
having count(distinct t1.value) = (select count(*) from Table2)
SQLFiddle
To get the same sets use an inner join:
SELECT Table1.Id
FROM Table1
INNER JOIN table2 ON table1.id=table2.id
GROUP BY Table1.Id
HAVING ...? --
CREATE TABLE #T1 (ID INT , [Values] INT) INSERT INTO #T1 VALUES (1,1),(1,2),(1,3),(2,4),(2,5),(3,6)
CREATE TABLE #T2 ([Values] INT) INSERT INTO #T2 VALUES (1),(2),(3),(4)
SELECT * FROM #T1
SELECT * FROM #T2
SELECT A.ID
FROM
( SELECT ID , COUNT(DISTINCT [Values]) AS Count FROM #T1
GROUP BY ID
) A
JOIN
(
SELECT T1.ID, COUNT(DISTINCT T2.[Values]) Count
FROM #T1 T1
JOIN #t2 T2
ON T1.[Values] = T2.[Values]
GROUP BY T1.ID
) B
ON A.ID = B.ID AND A.Count = B.Count

Insert data into temp table from 2 source tables

I have 2 SELECT statements that both return 13 rows from dirrefernt tables
I would like to create 1 temporary table with 2 columns and insert the 2 result rows into the 2 columns. Is there a way to do this?
So
1 - SELECT INPOS FROM TABLE1 returns
1,2,3,4,5,6,7,18,9,10,11,12,13
2 - SELECT CODE FROM TABLE2 returns
CODEA,CODEB,CODEC,CODED,CODEE,CODEF,CODEG,CODEH,CODEI,CODEJ,CODEK,CODEL,CODEM
I would like my temporary table to be
1 | CODEA
2 | CODEB
3 | CODEC
4 | CODED
5 | CODEE
6 | CODEF
7 | CODEG
8 | CODEH
9 | CODEI
10 | CODEJ
11 | CODEK
12 | CODEL
13 | CODEM
Try this:
WITH T1 AS (
SELECT ROW_NUMBER() OVER(ORDER BY INPOS) ID, INPOS FROM TABLE1
),
WITH T2 AS
(
SELECT ROW_NUMBER() OVER(ORDER BY CODE) ID, CODE FROM TABLE2
),
SELECT T1.INPOS, T2.CODE
FROM T1 INNER JOIN T2 ON T1.ID = T2.ID
Try something like this:
SELECT a.impos, b.code
FROM (
(
SELECT impos, RANK() OVER (ORDER BY impos ASC) AS link
FROM table1
) AS a INNER JOIN (
SELECT code, RANK() OVER (ORDER BY code ASC) AS link
FROM table2
) AS b ON a.link = b.link
)
sqlfiddle demo

Detect range and count from a table

A table with 2 columns ordered by group, number:
group_id | number
---------+--------
1 | 101
1 | 102
1 | 103
1 | 106
2 | 104
2 | 105
2 | 107
What SQL query should I write to get the following output:
group_id | number_from | number_to | total
---------+-------------+------------+-------
1 | 101 | 103 | 3
1 | 106 | 106 | 1
2 | 104 | 105 | 2
2 | 107 | 107 | 1
Here is SQL Fiddel Demo
Below is the script
create table Temp(A int,B int);
insert into temp values (1,101);
insert into temp values (1,102);
insert into temp values (1,103);
insert into temp values (1,106);
insert into temp values (2,104);
insert into temp values (2,105);
insert into temp values (2,107);
Select T2.A "group_id",
Min(T2.B) "number_from",
Max(T2.B) "number_to",
Max(T2.E) "total"
from
(
select *,(B-C) D,
rank() over
(PARTITION by T.A,(B-C) order by T.A,T.B) E,
rank() over
(order by T.A,(B-C)) F
from
(select A,B,row_number()
over (order by (select 0)) C
from temp) T
) T2
group by T2.A,T2.D,T2.F
order by 1,2
i used this as example table:
create table temp (id int, val int)
insert into temp values (1,101),(1,102),(2,102),(2,104),(2,107)
insert into temp values (2,103)
insert into temp values (2,105)
insert into temp values (2,108)
insert into temp values (2,110)
this is what you want:
select t1id,cnt, min(t1val) as min, max(t1val), count(t1val)
from (
select tt1.*,
(select count (*) from
(
select t1.id as t1id,
t1.val as t1val,
(select val from temp t2 where t1.id = t2.id and t2.val = t1.val+1 ) as t2val,
row_number() over (order by t1.id, t1.val ) as rn
from temp t1
) tt2
where tt2.t2val is null and tt2.rn < tt1.rn
) cnt
from (
select t1.id as t1id,
t1.val as t1val,
(select val from temp t2 where t1.id = t2.id and t2.val = t1.val+1 ) as t2val,
row_number() over (order by t1.id, t1.val ) as rn
from temp t1
) tt1
)ttt1
group by t1id, cnt
order by t1id, min
update: fixed bug if table is unsorted)
WITH RECURSIVE rope AS (
SELECT i1.id AS low
, i1.id AS high
, i1.grp AS grp
, 1::integer AS cnt
FROM islands i1
-- no left neighbor
WHERE NOT EXISTS ( SELECT * FROM islands x WHERE x.grp = i1.grp AND x.id = i1.id-1)
UNION ALL
SELECT ch.low AS low
, i2.id AS high
, i2.grp AS grp
, 1+ch.cnt AS cnt
FROM islands i2
-- connect to left neighbor
JOIN rope ch ON i2.grp = ch.grp AND i2.id = ch.high+1
)
SELECT * FROM rope r
-- suppress subchains
WHERE NOT EXISTS (
SELECT * FROM rope nx
WHERE nx.low = r.low AND nx.cnt > r.cnt
)
;