prestoSQL aggregate columns and rows into one column

prestoSQL aggregate columns and rows into one column - sql

I would like to aggregate some columns and rows into one column in prestoSQL table.
with example_table as (
select * from (
values ('A', 'nh', 7), ('A', 'mn', 4), ('A', 'sv', 3),
('B', 'tb', 6), ('B', 'ty', 5), ('A', 'rw', 2),
('C', 'op', 9), ('C', 'au', 8)
) example_table("id", "time", "value")
)
select id, agg(value, time) # Unexpected parameters (integer, VARCHAR(2)) for function array_agg. Expected: array_agg(T) T
from example_table
group by id
I would like to combine column "time" and "value" as one column and then aggregate all rows by "id" such that
id. time_value_agg
A. [['nh', 7], ['mn', 4], ['sv', 3], ['rw', 2]
B. [['tb', 6], ['tv',5]
C. [['op', 9], ['au', 8]]
the column
time_value_agg
should be an array of str. If the "time" col is not str, cast it to str.
I am not sure which function can be used for this ?
thanks

array_agg can be applied to single column only. If times are unique per id you can turn data into map:
select id, map(array_agg(time), array_agg(value)) time_value_agg
from example_table
group by id
Output:
id
time_value_agg
C
{op=9, au=8}
A
{mn=4, sv=3, rw=2, nh=7}
B
{ty=5, tb=6}
Or turn data into ROW type (or map) before aggregation:
select id,
array_agg(arr) time_value_agg
from (
select id, cast (row(time, value) as row(time varchar, value integer))arr
from example_table
)
group by id
Output:
id
time_value_agg
C
[{time=op, value=9}, {time=au, value=8}]
A
[{time=nh, value=7}, {time=mn, value=4}, {time=sv, value=3}, {time=rw, value=2}]
B
[{time=tb, value=6}, {time=ty, value=5}]

Related

Postgres SQL - How do I aggregated groups of multiple consecutive rows?

I have a data representing tagged continuous-spans in a single table with a <tag, start & stop>.
Example below.
I'm trying to combine multiple rows into a single row where the condition is that they create a "continuous span".
In the query below - I would like the functionality that LEFT_MOST_CONTINUOUS returns the minimum v_start of a continuous span (same for RIGHT_MOST_CONTINUOUS for maximum v_stop). Note that there might be more than a single continuous span (that should have different v_start and v_stop values).
Input:
WITH data AS (
SELECT *
FROM (VALUES
('a', 2, 3),
('a', 3, 5),
('a', 5, 7),
('a', 8, 10),
('a', 10, 12),
('a', 12, 14),
('b', 7, 8),
('b', 8, 10),
('b', 12, 15),
('c', 10, 11)
) AS T(tag, v_start, v_stop)
ORDER BY tag, v_start, v_stop
)
SELECT tag,
LEFT_MOST_CONTINUOUS(v_start) OVER (PARTITION BY tag),
RIGHT_MOST_CONTINUOUS(v_stop) OVER (PARTITION BY tag)
FROM data
ORDER BY 1, 2, 3
Where I expect to get the following output:
"a" 2 7
"a" 8 14
"b" 7 10
"b" 12 15
"c" 10 11
Since I want to merge the first 3 tuples (for tag "a") which are consecutive into a single value representing the entire span; same for the next 3 tuples (again for "a").
Then for "b" we can merge the next 2, but leave out the 3rd (which has it's v_start != the other's v_stop).
And "c" there is nothing to merge with.
Help appreciated,
Tal

You can use a gaps-and-islands approach by marking the first record of each group when either there is no previous record for the tag or the v_start is greater than v_stop of the previous record:
select tag, v_start, v_stop,
coalesce(lag(v_stop) over w < v_start, true) as is_end_grp
from data
window w as (partition by tag order by v_start)
Use a windowed sum() of the boolean is_end_grp cast to int (1 if true, 0 if false) to number the groups:
select tag, sum(is_end_grp::int) over (partition by tag
order by v_start) as grp_num,
v_start, v_stop
from mark_gaps
Aggregation over (tag, grp_num) will produce your desired result:
select tag, min(v_start) as v_start, max(v_stop) as v_stop
from numbered_groups
group by tag, grp_num
order by tag, v_start
Working DB<>Fiddle

Using the numbered_groups logic from #Mike Organek answer. I just started from a different place
WITH data AS (
SELECT *
, case when lead(v_start) over(partition by tag order by v_start) = v_stop then 0 else 1 end stopcheck
, case when lag(v_stop) over(partition by tag order by v_stop) = v_start then 0 else 1 end startcheck
FROM (VALUES
('a' , 2 , 3),
('a', 3, 5),
('a', 5, 7),
('a', 8, 10),
('a', 10, 12),
('a', 12, 14),
('b', 7, 8),
('b', 8, 10),
('b', 12, 15),
('c', 10, 11)
) AS T(tag, v_start, v_stop)
ORDER BY tag, v_start, v_stop
)
,cnt as (
select *
, sum(startcheck) over (partition by tag order by v_start) grpn
from data)
select c1.tag, c1.v_start, c2.v_stop
from cnt c1
inner join cnt c2
on c1.tag = c2.tag and c1.grpn = c2.grpn
where c1.startcheck = 1 and c2.stopcheck = 1
This logic is all based on the assumption that your data always starts where the last row left off, there is no overlap etc.
Create a startcheck and stopcheck by comparing the prior row and next row relatively. From here use another window function sum() over to order the start records (so we don't match start of second batch to stop of first batch)
Join the table to itself matching like tag and groups. Filtering start and stop records

You can use following query
WITH data AS (
SELECT *
FROM (VALUES
('a', 2, 3),
('a', 3, 5),
('a', 5, 7),
('a', 8, 10),
('a', 10, 12),
('a', 12, 14),
('b', 7, 8),
('b', 8, 10),
('b', 12, 15),
('c', 10, 11)
) AS T(tag, v_start, v_stop)
ORDER BY tag, v_start, v_stop
),
cte1 as(
select *,
case
when lag(v_stop)over(partition by tag order by(select null)) = v_start
then 0
else 1
end as grp
from data
),
cte2 as(
select *,
sum(grp) over (partition by tag order by v_start) as rnk
from cte1
)
select tag,min(v_start)v_start,max(v_stop)v_stop
from cte2
group by tag,rnk
order by tag
Demo in db<>fiddle

Select based on group membership

Suppose I have two tables t and o. I want to select all the rows of t in which t.feature = o.feature (ergo first two rows).
In addition, I also want to select all the rows whose t.grouping = t.grouping of the previously selected rows. In this case t.grouping 1, 2 ergo rows ('E', 1), ('F', 2) and ('G', 1). What is the most elegant way of achieving this?
CREATE TABLE t
( feature VARCHAR,
grouping BIGINT
);
INSERT INTO t (feature, grouping)
VALUES ('A', 1),
('B', 2),
('C', 5),
('D', 4),
('E', 1),
('F', 2),
('G', 1);
CREATE TABLE o
( feature VARCHAR
);
INSERT INTO o (feature)
VALUES ('A'),
('B');

If I understood correctly try this...
with cte as
(select grouping from t inner join o on t.feature=o.feature)
select t.*
from t
inner join cte on t.grouping=cte.grouping
or
select
*
from t
where grouping in
(select grouping from t inner join o on t.feature=o.feature )
DEMO

Generate a JSON array of values for each row

Assuming the following CTE:
with mytable as (
select column1 as foo, column2 as bar, column3 as baz
from (values
('a', 'b', 1),
('c', 'd', 2)
) v
)
Using array_agg() ouputs an array of values:
select
array_agg(v)
from mytable v;
-- {"(a,b,1)","(c,d,2)"}
but surprisingly (to me at least), using to_json() on this array restores the field names into an object for each row
select
to_json(array_agg(v))
from mytable v;
-- [{"foo":"a","bar":"b","baz":1},{"foo":"c","bar":"d","baz":2}]
How can we make PostgreSQL output an array of arrays instead, rendering each row as an array of values?
select
something(v)
from mytable v;
-- [["a", "b", 1],["c", "d", 2]]

You can convert a row into a json, then unnest the key/value pairs and then aggregate the values back:
with mytable (foo, bar, baz) as (
values
('a', 'b', 1),
('c', 'd', 2)
)
select jsonb_agg(x.vals)
from mytable m
cross join lateral (
select jsonb_agg(value order by idx) as vals
from json_each(row_to_json(m)) with ordinality as t(key,value,idx)
) x
It's important to use json to convert the row, if the order of the column values in the array is important for you.
If you need this often, you can put this into a function.
If the order of the column values in the array isn't important, you can use a JSON path function:
select jsonb_path_query_array(to_jsonb(m), '$.keyvalue().value')
from mytable m;

Besides the answer from a_horse_with_no_name, I just found a way to achieve this, assuming column names are known:
with mytable as (
select column1 as foo, column2 as bar, column3 as baz
from (values
('a', 'b', 1),
('c', 'd', 2)
) v
)
select
to_json(array_agg(x.vals))
from (
select
json_build_array(
v.foo,
v.bar,
v.baz
) as vals
from mytable v
) x
;
-- [["a", "b", 1],["c", "d", 2]]

Conditional group by with window function in Snowflake query

I have a table in Snowflake in following format:
create temp_test(name string, split string, value int)
insert into temp_test
values ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400), ('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000), ('B', 'd',4000), ('B','e', 5000)
First step, I needed only top 2 value per name (sorted on value), so I used following query to get that:
select name, split, value,
row_number() over (PARTITION BY (name) order by value desc) as row_num
from temp_test
qualify row_num <= 2
Which gives me following resultset:
NAME SPLIT VALUE ROW_NUM
A e 500 1
A d 400 2
B e 5000 1
B d 4000 2
Now, I need to sum values other than Top 2 and put it in a different Split named as "Others", like this:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000
How to do that in Snowflake query or SQL in general?

with data as (
select name, split, value,
row_number() over (partition by (name) order by value desc) as row_num
from temp_test
)
select
name,
case when row_num <= 2 then split else 'Others' end as split,
sum(value) as value
from data
group by name, case when row_num <= 2 then row_num else 3 end

Shawnt00's answer is good, but for the record in Snowflake this can be written simpler:
Firstly the group by at the end can refer to the results by index or name:
GROUP BY 1,2
or
GROUP BY name, split
also as the CASE only has too branches an IFF can be used and seems you are using a CTE to add the row_number you can push the IFF into the CTE also
WITH data AS (
SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) AS row_num,
IFF(row_num < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
and if super keen on small SQL push the ROW_NUMBER into the IFF:
WITH data AS (
SELECT name, value,
IFF(ROW_NUMBER() OVER (PARTITION BY name ORDER BY value DESC) < 3, split, 'Others') as n_split
FROM VALUES ('A','a', 100), ('A','b', 200), ('A','c',300), ('A', 'd', 400),
('A', 'e',500), ('B', 'a', 1000), ('B','b', 2000), ('B','c', 3000),
('B', 'd',4000), ('B','e', 5000)
v(name, split, value)
)
SELECT
name,
n_split AS split,
SUM(value) AS value
FROM data
GROUP BY name, n_split;
gives:
NAME SPLIT VALUE
A e 500
A d 400
A Others 600
B e 5000
B d 4000
B Others 6000

Joining two tables and finding the earliest date SQL

I have the following two tables and I need to get the following result:
Table 1
(A, 1, 01/01/2015),
(A, 1, 10/01/2015),
(A, 2, 20/01/2015),
(A, 2, 01/05/2015),
(B, 1, 20/02/2014),
(B, 1, 20/02/2015),
(B, 2, 20/02/2016),
(B, 2, 06/05/2015)
Table 2
(A, 1, 123),
(A, 1, 123),
(A, 2, 234),
(A, 2, 234),
(B, 1, 123),
(B, 2, 123),
I want to return the earliest date of each distinct combo:
(A, 123, 01/01/2015),
(A, 234, 20/01/2015),
(B, 123, 20/02/2014)
Code I have tried:
DECLARE #table1 TABLE (letter1 CHAR(1), num1 INT, date1 INT)
DECLARE #table2 TABLE (letter1 CHAR(1), num1 INT, num2 INT)
INSERT INTO #table1 VALUES
('A', 1, 01012015),
('A', 1, 10012015),
('A', 2, 20012015),
('A', 2, 01052015),
('B', 1, 20022014),
('B', 1, 20022015),
('B', 2, 20022016),
('B', 2, 06052015)
INSERT INTO #table2 VALUES
('A', 1, 123),
('A', 1, 123),
('A', 2, 234),
('A', 2, 234),
('B', 1, 123),
('B', 2, 123)
SELECT DISTINCT [#table1].letter1, num2, MIN(date1) FROM #table1
INNER JOIN #table2 ON [#table1].letter1 = [#table2].letter1 AND [#table1].num1 = [#table2].num1
GROUP BY [#table1].letter1, [#table1].num1, num2

You can use row_number() function :
select top (1) with ties t.letter1, t2.num2, t.date1
from table1 t inner join
table2 t2
on t2.letter1 = t.letter1 AND t2.num1 = t.num1
order by row_number() over (partition by t2.letter1, t2.num2 order by t.date1 desc);

Just giving a try . may be add date1 in group by cluase
SELECT DISTINCT [#table1].letter1, num2, MIN(date1) FROM #table1 INNER JOIN #table2 ON [#table1].letter1 = [#table2].letter1 AND [#table1].num1 = [#table2].num1 GROUP BY [#table1].letter1, [#table1].num1, num2,date1

;with cte as
(
select name, IIF(c = 1, 0, id) id, value from --- Here We separet the whole into two groups.
(
select name, id, value, count(*) c from #table2 group by name, id, value
) ct ---- Here one group (B) has same value (123).
---- And another group (A) have diff values (123,234))
)select c.name, c.value, min(t1.yyymmdd) from cte c
join #table1 t1
on c.name = t1.name
and c.id = t1.id ------ Id's join must. Because it has two different ids
and c.id <> 0 ------ 'A' group has been joined
group by c.name, value union
select c.name, c.value, min(t1.yyymmdd) Earlier_date from cte c
join #table1 t1
on c.name = t1.name ------ Id's join is no need. Because it has one id.
and c.id = 0 ------ 'B' group has been joined
group by c.name, value

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

prestoSQL aggregate columns and rows into one column - sql

Related

Postgres SQL - How do I aggregated groups of multiple consecutive rows?

Select based on group membership

Generate a JSON array of values for each row

Conditional group by with window function in Snowflake query

Joining two tables and finding the earliest date SQL

Categories

Resources