How to get previous N rows for multiple joining rows - sql

I am writing a SQL with Oracle Client 12 driver. I have two tables simplified as appended, and I want to get a table with following logic. The "B.TIME_B <= A0.TIME_A" seems created massive joining and made the query very slow. Please help to find best solution.
WITH A0 AS (
SELECT *
FROM A
WHERE A.EVENT = 'a0'
)
SELECT * FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY A0.TIME_A0 ORDER BY B.TIME_B DESC) RN,
A0.*,
B.*
FROM
A0,B
WHERE
B.TIME_B <= A0.TIME_A) B0
WHERE B0.RN <= 3
Find TIME_A, where EVENT_A = 'a0', as TIME_A0,
Find TIME_B = TIME_A0, as EVENT_B0,
And then get the row and previous 2 rows of table B, where EVENT_B0 found. N in this example is 3, and M is 2, but in real case both number are over 3000, so efficiency will be appreciated.
TableA
TIME_A EVENT_A
1 a1
2 a1
3 a1
4 a0
5 a2
6 a2
7 a3
8 a0
Table B
TIME_B EVENT_B
1 b1
2 b2
3 b3
4 b4
5 b5
6 b5
7 b6
8 b7
JOIN A_B
TIME_A EVENT_A TIME_B EVENT_B
4 a0 2 b2
4 a0 3 b3
4 a0 4 b4
8 a0 6 b5
8 a0 7 b6
8 a0 8 b7

Query 1:
If you are not going to have overlapping ranges then you can use:
SELECT *
FROM (
SELECT TIME_B,
EVENT_B,
MAX( TIME_A ) OVER ( ORDER BY TIME_B ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING )
AS TIME_A,
MAX( EVENT_A ) OVER ( ORDER BY TIME_B ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING )
AS EVENT_A
FROM tableB B
LEFT OUTER JOIN tableA A
ON ( B.TIME_B = A.TIME_A AND A.EVENT_A = 'a0' )
)
WHERE TIME_A IS NOT NULL;
Which only uses a single join and then finds the valid rows with analytic functions.
Output:
TIME_B | EVENT_B | TIME_A | EVENT_A
-----: | :------ | -----: | :------
2 | b2 | 4 | a0
3 | b3 | 4 | a0
4 | b4 | 4 | a0
6 | b5 | 8 | a0
7 | b6 | 8 | a0
8 | b7 | 8 | a0
db<>fiddle here
Query 2:
If you could have overlapping ranges then you could use a hierarchical query to generate the rows:
SELECT TIME_B,
EVENT_B,
CONNECT_BY_ROOT( TIME_A ) AS TIME_A,
CONNECT_BY_ROOT( EVENT_A ) AS EVENT_A
FROM (
SELECT A.*,
B.*,
ROW_NUMBER() OVER ( ORDER BY TIME_B ) AS rn
FROM tableB B
LEFT OUTER JOIN tableA A
ON ( B.TIME_B = A.TIME_A AND A.EVENT_A = 'a0' )
)
WHERE LEVEL <= 2
START WITH EVENT_A IS NOT NULL
CONNECT BY PRIOR rn -2 <= rn AND rn < PRIOR rn
ORDER BY time_a, time_b
Output:
TIME_B | EVENT_B | TIME_A | EVENT_A
-----: | :------ | -----: | :------
2 | b2 | 4 | a0
3 | b3 | 4 | a0
4 | b4 | 4 | a0
6 | b5 | 8 | a0
7 | b6 | 8 | a0
8 | b7 | 8 | a0
8 | b7 | 10 | a0
9 | b8 | 10 | a0
10 | b9 | 10 | a0
db<>fiddle here

This can be achieved using a simple join. No need to use any functions.
Try the following code, if TIME_A and TIME_B are continuous:
WITH tableA ( TIME_A, EVENT_A ) AS
(SELECT 1, 'a1' FROM DUAL UNION ALL
SELECT 2, 'a1' FROM DUAL UNION ALL
SELECT 3, 'a1' FROM DUAL UNION ALL
SELECT 4, 'a0' FROM DUAL UNION ALL
SELECT 5, 'a2' FROM DUAL UNION ALL
SELECT 6, 'a2' FROM DUAL UNION ALL
SELECT 7, 'a3' FROM DUAL UNION ALL
SELECT 8, 'a0' FROM DUAL),
tableB ( TIME_B, EVENT_B ) AS
(SELECT 1, 'b1' FROM DUAL UNION ALL
SELECT 2, 'b2' FROM DUAL UNION ALL
SELECT 3, 'b3' FROM DUAL UNION ALL
SELECT 4, 'b4' FROM DUAL UNION ALL
SELECT 5, 'b5' FROM DUAL UNION ALL
SELECT 6, 'b5' FROM DUAL UNION ALL
SELECT 7, 'b6' FROM DUAL UNION ALL
SELECT 8, 'b7' FROM DUAL)
SELECT
TIME_A,
EVENT_A,
TIME_B,
EVENT_B
FROM
TABLEA A
JOIN TABLEB B ON ( EVENT_A = 'a0'
AND TIME_B BETWEEN TIME_A - 2 AND TIME_A )
ORDER BY
TIME_A,
TIME_B
Try the following code, if TIME_A and TIME_B are not continuous:
WITH tableA ( TIME_A, EVENT_A ) AS
(SELECT 1, 'a1' FROM DUAL UNION ALL
SELECT 2, 'a1' FROM DUAL UNION ALL
SELECT 3, 'a1' FROM DUAL UNION ALL
SELECT 4, 'a0' FROM DUAL UNION ALL
SELECT 5, 'a2' FROM DUAL UNION ALL
SELECT 6, 'a2' FROM DUAL UNION ALL
SELECT 7, 'a3' FROM DUAL UNION ALL
SELECT 8, 'a0' FROM DUAL),
tableB ( TIME_B, EVENT_B ) AS
(SELECT 1, 'b1' FROM DUAL UNION ALL
SELECT 2, 'b2' FROM DUAL UNION ALL
SELECT 3, 'b3' FROM DUAL UNION ALL
SELECT 4, 'b4' FROM DUAL UNION ALL
SELECT 5, 'b5' FROM DUAL UNION ALL
SELECT 6, 'b5' FROM DUAL UNION ALL
SELECT 7, 'b6' FROM DUAL UNION ALL
SELECT 8, 'b7' FROM DUAL)
SELECT
TIME_A,
EVENT_A,
TIME_B,
EVENT_B FROM
(SELECT
TIME_A,
EVENT_A,
TIME_B,
EVENT_B,
ROW_NUMBER() OVER (PARTITION BY TIME_A ORDER BY TIME_B DESC NULLS LAST) AS RN
FROM
TABLEA A
JOIN TABLEB B ON ( EVENT_A = 'a0'
AND TIME_B <= TIME_A ))
WHERE RN <= 3
ORDER BY
TIME_A,
TIME_B
DB Fiddle demo
Cheers!!

Related

How to select first x records from second table

I would like to get all records from first table and only x records from second table.
How many records from second table I have info in first table :
My tables are
table1 :
WITH table1(a,b) AS
(
SELECT 'aa',3 FROM dual UNION ALL
SELECT 'bb',2 FROM dual UNION ALL
SELECT 'cc',4 FROM dual
)
SELECT *
FROM table1;
a | b (number of records from table2 (x))
------
aa | 3
bb | 2
cc | 4
table2 :
WITH table2(a,b) AS
(
SELECT 'aa','1xx' FROM dual UNION ALL
SELECT 'aa','2yy' FROM dual UNION ALL
SELECT 'aa','3ww' FROM dual UNION ALL
SELECT 'aa','4zz' FROM dual UNION ALL
SELECT 'aa','5qq' FROM dual UNION ALL
SELECT 'bb','1aa' FROM dual UNION ALL
SELECT 'bb','2bb' FROM dual UNION ALL
SELECT 'bb','3cc' FROM dual UNION ALL
SELECT 'cc','1oo' FROM dual UNION ALL
SELECT 'cc','2uu' FROM dual UNION ALL
SELECT 'cc','3tt' FROM dual UNION ALL
SELECT 'cc','4zz' FROM dual UNION ALL
SELECT 'cc','5rr' FROM dual
)
SELECT *
FROM table2;
a | b
--------
aa | 1xx
aa | 2yy
aa | 3ww
aa | 4zz
aa | 5qq
bb | 1aa
bb | 2bb
bb | 3cc
bb | 4dd
bb | 5ee
cc | 1oo
cc | 2uu
cc | 3tt
cc | 4zz
cc | 5rr
Expected Result:
a | b
--------
aa | 1xx
aa | 2yy
aa | 3ww
bb | 1aa
bb | 2bb
cc | 1oo
cc | 2uu
cc | 3tt
cc | 4zz
You can use ROW_NUMBER() analytic function with LEFT/RIGHT OUTER JOIN among the tables :
WITH t2 AS
(
SELECT t2.a,t2.b, ROW_NUMBER() OVER (PARTITION BY t2.a ORDER BY t2.b) AS rn
FROM table2 t2
)
SELECT t2.a, t2.b
FROM t2
LEFT JOIN table1 t1
ON t1.a = t2.a
WHERE rn <= t1.b
Demo
You need to write something like:
SELECT a,
b
FROM Table2 T,
( SELECT LEVEL L FROM DUAL
CONNECT BY LEVEL <= (SELECT MAX(b) FROM Table1)
) A
WHERE T.b>= A.L
ORDER BY T.a;
Ideally you should have a ordering column in table2. When you say first X rows it does not make any sense unless you have something like an id or date field to order the records.
Anyway, assuming the number part of the column b in table 2 for ordering and assuming the number would be followed by 2 characters only such as xx,yy etc you can use the logic below
Select Tb1.a, Tb1.b
from
(Select t.*, row_number() over (partition by a order by substr(b,1,length(b)-2)) as seq
from Table2 t) Tb1
join Table1 Tb2
on Tb1.a = Tb2.a
Where Tb1.seq <= Tb2.b;
Demo - https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=3030b2372bcbb007606bbb6481af9884
Again it's just a job for lateral:
WITH prep AS
(
SELECT *
FROM tab1,
LATERAL
(
SELECT LEVEL AS lvl
FROM dual
CONNECT BY LEVEL <= b
)
)
SELECT p.a, t2.b
FROM prep p
JOIN tab2 t2
ON p.lvl = regexp_substr(t2.b,'^\d+')
AND p.a = t2.a
ORDER BY p.a, p.lvl

Selecting rows with exist operator using OR conditions

I want to select cases from one table, where Code or DST or Short_text or long_text are equal(in 2 or more rows) AND ID are not equal.
ID Code DST Short_text Long_text
1 B 01 B 1 Bez1 Bezirk1
1 B 01 B 1 Bez1 Bezirk1
2 B 02 B 2 Bez2 Bezirk2
3 B 03 B 3 Bez3 Bezirk3
4 B 04 B 4 Bez4 Bezirk4
4 B 04 B 4 Bez4 Bezirk4
5 B 05 B 5 Bez5 Bezirk5
6 B 06 B 6 Bez6 Bezirk6
7 B 07 B 7 Bez7 Bezirk7
8 B 08 B 8 Bez8 Bezirk8
9 B 09 B 9 Bez9 Bezirk9
97 M 51 M 52 MA 51 Sport
96 M 51 M 51 MA 51 Sport
And I want to get the following result:
97 M 51 M 52 MA 51 Sport
96 M 51 M 51 MA 51 Sport
because they have different ID, but they have similar Code OR SImilar Short_text OR simmlar long_text.
Here is what I have tried:
select
ID,
CODE,
DST,
Short_text,
Long_text,
from Main_table tbl
where load_date = (select max(load_date) from Main_table)
and exists
(
select 1 from Main_table
where
tbl.ID != ID
and (tbl.CODE = CODE
or tbl.DST = DST
or tbl.short_text = short_text
or tbl.long_text = long_text)
);
But it doesn't give me a desired result.
Do you have ideas how can I improve my query?
That would be
SQL> select * from main_table;
ID CODE DST SHORT LONG_TE
---------- ---- ---- ----- -------
1 B 01 B 1 Bez1 Bezirk1
1 B 01 B 1 Bez1 Bezirk1
2 B 02 B 2 Bez2 Bezirk2
3 B 03 B 3 Bez3 Bezirk3
4 B 04 B 4 Bez4 Bezirk4
4 B 04 B 4 Bez4 Bezirk4
5 B 05 B 5 Bez5 Bezirk5
6 B 06 B 6 Bez6 Bezirk6
7 B 07 B 7 Bez7 Bezirk7
8 B 08 B 8 Bez8 Bezirk8
9 B 09 B 9 Bez9 Bezirk9
97 M 51 M 52 MA 51 Sport
96 M 51 M 51 MA 51 Sport
13 rows selected.
SQL> select a.*
2 from main_table a
3 join main_table b
4 on a.id <> b.id
5 and ( a.code = b.code
6 or a.dst = b.dst
7 or a.short_text = b.short_text
8 or a.long_text = b.long_text);
ID CODE DST SHORT LONG_TE
---------- ---- ---- ----- -------
97 M 51 M 52 MA 51 Sport
96 M 51 M 51 MA 51 Sport
SQL>
You can use analytic functions to avoid a self-join:
Oracle Setup:
CREATE TABLE table_name ( ID, Code, DST, Short_text, Long_text ) as
select 1, 'B 01', 'B 1', 'Bez1', 'Bezirk1' from dual union all
select 1, 'B 01', 'B 1', 'Bez1', 'Bezirk1' from dual union all
select 2, 'B 02', 'B 2', 'Bez2', 'Bezirk2' from dual union all
select 3, 'B 03', 'B 3', 'Bez3', 'Bezirk3' from dual union all
select 4, 'B 04', 'B 4', 'Bez4', 'Bezirk4' from dual union all
select 4, 'B 04', 'B 4', 'Bez4', 'Bezirk4' from dual union all
select 5, 'B 05', 'B 5', 'Bez5', 'Bezirk5' from dual union all
select 6, 'B 06', 'B 6', 'Bez6', 'Bezirk6' from dual union all
select 7, 'B 07', 'B 7', 'Bez7', 'Bezirk7' from dual union all
select 8, 'B 08', 'B 8', 'Bez8', 'Bezirk8' from dual union all
select 9, 'B 09', 'B 9', 'Bez9', 'Bezirk9' from dual union all
select 97, 'M 51', 'M 52', 'MA 51', 'Sport' from dual union all
select 96, 'M 51', 'M 52', 'MA 51', 'Sport' from dual;
Query:
SELECT ID, Code, DST, Short_text, Long_text
FROM (
SELECT t.*,
COUNT( DISTINCT id ) OVER ( PARTITION BY code ) AS num_code,
COUNT( DISTINCT id ) OVER ( PARTITION BY dst ) AS num_dst,
COUNT( DISTINCT id ) OVER ( PARTITION BY short_text ) AS num_short_text,
COUNT( DISTINCT id ) OVER ( PARTITION BY long_text ) AS num_long_text
FROM table_name t
)
WHERE num_code > 1
OR num_dst > 1
OR num_short_text > 1
OR num_long_text > 1
Output:
ID | CODE | DST | SHORT_TEXT | LONG_TEXT
-: | :--- | :--- | :--------- | :--------
96 | M 51 | M 52 | MA 51 | Sport
97 | M 51 | M 52 | MA 51 | Sport
db<>fiddle here
You can use count(*) aggregation containing having clauses consecutively :
select ID, Code, DST, Short_text, Long_text
from Main_table
where (Code, DST, Short_text, Long_text) in
(select Code, DST, Short_text, Long_text
from Main_table
group by Code, DST, Short_text, Long_text
having count(*) > 1)
group by ID, Code, DST, Short_text, Long_text
having count(*) = 1
or count(*) over (partition by...) analytic function to be considered including partition clauses with and without containing ID column :
with m2 as
(
select m.*,
count(*) over ( partition by Code, DST, Short_text, Long_text ) as cnt1,
count(*) over ( partition by ID, Code, DST, Short_text, Long_text ) as cnt2
from Main_table m
)
select ID, Code, DST, Short_text, Long_text
from m2
where cnt1 > 1 and cnt2 = 1
Demo
You can use below query
select mt1.ID, mt1.Code, mt1.DST, mt1.Short_text, mt1.Long_text from main_table as mt1
Cross Apply(
select * from main_table as mt2 where mt1.id!= mt2.id and ( mt1.code=mt2.code or mt1.short_text =mt2.short_text or mt1.long_text = mt2.long_text )
) cv

find the difference between 2 rows

I need to find the difference between the rows c2 in the below table
SEQ_ID Priv_ID Common_ID Source_ID C1 C2
------ -------- --------- --------- -- ---
1 1 C001 S1 abc 32331299300
2 1 C001 S1 def 12656678121
3 1 C001 S1 ghi 8966492700
4 1 C001 S2 abc 32331292233
5 1 C001 S2 ghi 8966492700
6 1 C001 S2 def 12656672000
expected output should be as below,
SEQ_ID Priv_ID Common_ID C1 C2
------ -------- --------- -- ---
1 1 C001 abc 7067
2 1 C001 def 6121
3 1 C001 ghi 0
Please assist.
How about this? I didn't use columns that are the same for all rows (so they don't make any difference).
SQL> with test (seq_id, source_id, c1, c2) as
2 (select 1, 's1', 'abc', 32331299300 from dual union all
3 select 2, 's1', 'def', 12656678121 from dual union all
4 select 3, 's1', 'ghi', 8966492700 from dual union all
5 select 4, 's2', 'abc', 32331292233 from dual union all
6 select 5, 's2', 'ghi', 8966492700 from dual union all
7 select 6, 's2', 'def', 12656672000 from dual
8 )
9 select min(seq_id) seq_id,
10 c1,
11 max(case when source_id = 's1' then c2 end) +
12 max(case when source_id = 's2' then -c2 end) c2
13 from test
14 group by c1
15 order by 1;
SEQ_ID C1 C2
---------- --- ----------
1 abc 7067
2 def 6121
3 ghi 0
SQL>
Hmmm . . . One method would be conditional aggregation. But the key is row_number():
select Priv_ID, Common_ID, c1,
max(case when source_ID = 'S1' then c2
when source_ID = 'S2' then -c2
end) as diff
from (select t.*,
row_number() over (partition by Priv_ID, Common_ID, c1 order by seq_id) as seqnum
from t
) t
group by Priv_ID, Common_ID, c1

Removing entries with duplicates in specific columns SQL

I have 3 columns: A B C. I only want rows that share the same value in col A but different values for both B and C.
1 | item1 | Jan | Amy
2 | item1 | Feb | Amy
3 | item2 | Mar | Bob
4 | item2 | Mar | Bill
5 | item3 | Apr | Charles
6 | item3 | May | Doug
7 | item4 | Jun | Felix
Out of the example above. I want it to show rows 5, 6 and 7.
Is there any good way of doing this?
If I understand well your need, this could be a way, with a single scan of the table:
with test(id, a, b, c) as
(
select 1, 'item1', 'Jan', 'Amy' from dual union all
select 2, 'item1', 'Feb', 'Amy' from dual union all
select 3, 'item2', 'Mar', 'Bob' from dual union all
select 4, 'item2', 'Mar', 'Bill' from dual union all
select 5, 'item3', 'Apr', 'Charles' from dual union all
select 6, 'item3', 'May', 'Doug' from dual union all
select 7, 'item4', 'Jun', 'Felix' from dual
)
select id, a, b, c
from (
select id, a, b, c,
count(distinct b) over (partition by a) count_b,
count(distinct c) over (partition by a) count_c,
count(1) over (partition by a) count_a
from test
)
where count_a = count_b
and count_a = count_c
The result:
ID A B C
---------- ----- --- -------
5 item3 Apr Charles
6 item3 May Doug
7 item4 Jun Felix
Use not exists:
select t.*
from t
where not exists (select 1
from t t2
where t2.a = t.a and
(t2.b = t.b or t2.c = t.c) and
t2.id <> t.id
);
This assumes that a column uniquely identifies each row. If you don't have one and the table doesn't have duplicates, then you can use:
select t.*
from t
where not exists (select 1
from t t2
where t2.a = t.a and
(t2.b = t.b or t2.c = t.c) and
not (t2.b = t.b and t2.c = t.c)
);

BigQuery SQL - a way to pass values from more than one row and more than one column to User Defined Function

I want to create a User Defined Function, (CREATE TEMPORARY FUNCTION) in BigQuery Standard SQL which will accept values aggregated from a bunch of rows.
My schema and table is similar to this:
| c1 | c2 | c3 | c4 |
|=======|=======|=======|=======|
| 1 | 1-1 | 3A | 4A |
| 1 | 1-1 | 3B | 4B |
| 1 | 1-1 | 3C | 4C |
| 1 | 1-2 | 3D | 4D |
| 2 | 2-1 | 3E | 4E |
| 2 | 2-1 | 3F | 4F |
| 2 | 2-2 | 3G | 4G |
| 2 | 2-2 | 3H | 4H |
I can't change the original schema to be made of nested or ARRAY fields.
I want to group by c1 and by c2 and pass values of c3 and c4 to a function, while being able to match between values from c3 and c4 for each row.
One way of doing so is using ARRAY_AGG and pass values as an Array, but ARRAY_AGG is non-deterministic so values from c3 and c4 might come with different orders than the source table.
Example:
CREATE TEMPORARY FUNCTION
tempConcatStrFunction(c3 ARRAY<STRING>, c4 ARRAY<STRING>)
RETURNS STRING
LANGUAGE js AS """
return
c3
.map((item, index) => [ item, c4[index] ].join(','))
.join(',');
""";
WITH T as (
SELECT c1, c2, ARRAY_AGG(c3) as c3, ARRAY_AGG(c4) as c4
GROUP BY c1, c2
)
SELECT c1, c2, tempConcatStrFunction(c3, c4) as str from T
The result should be:
| c1 | c2 | str |
|=======|=======|======================|
| 1 | 1-1 | 3A,4A,3B,4B,3C,4C |
| 1 | 1-2 | 3D,4D |
| 2 | 2-1 | 3E,4E,3F,4F |
| 2 | 2-2 | 3G,4G,3H,4H |
Any ideas how to achieve such results?
Any ideas how to achieve such results?
I understand your question is about how to keep c3 and c4 match each other in final string. How about just keep it super simple as below
SELECT c1, c2, STRING_AGG(CONCAT(c3, ',', c4)) AS str
FROM yourTable
GROUP BY c1, c2
A couple of examples that may help with setting up a query:
WITH T AS (
SELECT 1 AS c1, '1-1' AS c2, '3A' AS c3, '4A' AS c4 UNION ALL
SELECT 1, '1-1', '3B', '4B' UNION ALL
SELECT 1, '1-1', '3C', '4C' UNION ALL
SELECT 1, '1-2', '3D', '4D' UNION ALL
SELECT 2, '2-1', '3E', '4E' UNION ALL
SELECT 2, '2-1', '3F', '4F' UNION ALL
SELECT 2, '2-2', '3G', '4G' UNION ALL
SELECT 2, '2-2', '3H', '4H'
)
SELECT
c1,
c2,
STRING_AGG(CONCAT(c3, ',', c4)) AS str
FROM T
GROUP BY 1, 2;
This takes the unaggregated inputs (as in Mikhail's answer) and does string concatenation.
If the inputs are already aggregated into arrays, ideally they would repeat together, e.g.:
WITH T AS (
SELECT 1 AS c1, '1-1' AS c2, '3A' AS c3, '4A' AS c4 UNION ALL
SELECT 1, '1-1', '3B', '4B' UNION ALL
SELECT 1, '1-1', '3C', '4C' UNION ALL
SELECT 1, '1-2', '3D', '4D' UNION ALL
SELECT 2, '2-1', '3E', '4E' UNION ALL
SELECT 2, '2-1', '3F', '4F' UNION ALL
SELECT 2, '2-2', '3G', '4G' UNION ALL
SELECT 2, '2-2', '3H', '4H'
),
U AS (
SELECT
c1,
c2,
ARRAY_AGG(STRUCT(c3, c4)) AS arr
FROM T
)
SELECT
c1,
c2,
(SELECT STRING_AGG(CONCAT(c3, ',', c4)) FROM UNNEST(arr)) AS str
FROM U
GROUP BY 1, 2;
If the arrays are separate, but have a consistent order (and length), you can recombine them after the fact:
WITH T AS (
SELECT 1 AS c1, '1-1' AS c2, '3A' AS c3, '4A' AS c4 UNION ALL
SELECT 1, '1-1', '3B', '4B' UNION ALL
SELECT 1, '1-1', '3C', '4C' UNION ALL
SELECT 1, '1-2', '3D', '4D' UNION ALL
SELECT 2, '2-1', '3E', '4E' UNION ALL
SELECT 2, '2-1', '3F', '4F' UNION ALL
SELECT 2, '2-2', '3G', '4G' UNION ALL
SELECT 2, '2-2', '3H', '4H'
),
U AS (
SELECT
c1,
c2,
ARRAY_AGG(c3 ORDER BY c3, c4) AS arr3,
ARRAY_AGG(c4 ORDER BY c3, c4) AS arr4
FROM T
GROUP BY 1, 2
)
SELECT
c1,
c2,
(SELECT STRING_AGG(CONCAT(arr4[OFFSET(off)], ',', c3))
FROM UNNEST(arr3) AS c3 WITH OFFSET off) AS str
FROM U;