Filtering and calculating over JSONB object - sql

I have a table with JSONB column which holds data like this:
create table car_stats (
id int primary key,
city varchar,
date timestamp,
info varchar
stats jsonb
)
stats example:
[
{
"brand": "AUDI",
"status": "NEW"
},
{
"brand": "BMW",
"status": "PRODUCTION"
},
{
"brand": "BMW",
"status": "NEW"
},
{
"brand": "BMW",
"status": "NEW"
},
{
"brand": "BMW",
"status": "DELIVERED"
}
]
I want to count percentage of new / production / delivered of car's brand grouped by city and month
CITY MONTH BRAND NEW PRODUCTION DELIVERED
LONDON 3 AUDI 100% 0 0
PARIS 2 BMW 50% 25% 25%
I tried the following, but I have no idea how to calculate elements in JSON (e.g. all BMW in status NEW)
with cte as (
select stats ->> 'brand',
stats ->> 'status',
city,
date
from car_stats
group by city
),
grouped as (
select cte.brand,
cte.country,
cte.date,
ARRAY_TO_JSON(ARRAY_AGG(base)) as statistics
from cte
group by cte.brand, cte.city, cte.date
),
stats as (
count % statistics somehow here.. ?
)
)

You can associate each each element in stats with its corresponding city, and then use sum with group by:
with recursive cte(id, c, p, i, d) as (
select c.id, c.city, (c.stats ->> 0)::jsonb, 1, c.stats from car_stats c
union all
select c.id, c.c, (c.d ->> c.i)::jsonb, c.i+1, c.d from cte c where c.i < jsonb_array_length(c.d)
)
select c.c, extract(month from c1.date), c.p -> 'brand', c.p -> 'factory'
round(sum(case when (c.p -> 'status')::text = '"NEW"' then 1 else 0 end)/count(*)::decimal,2),
round(sum(case when (c.p -> 'status')::text = '"PRODUCTION"' then 1 else 0 end)/count(*)::decimal,2),
round(sum(case when (c.p -> 'status')::text = '"DELIVERED"' then 1 else 0 end)/count(*)::decimal,2)
from cte c join car_stats c1 on c.id = c1.id
group by c.c, extract(month from c1.date), c.p -> 'brand', c -> 'factory'
See demo.

First expand brand and status into separate rows using cross join lateral and then use count filter conditional aggregation.
with t as
(
select city, date_trunc('month', "date")::date y_month, brand, status
from car_stats
cross join lateral
(
select j ->> 'brand' brand,
j ->> 'status' status
from jsonb_array_elements(stats) j
) t
)
select city, y_month, brand,
count(*) filter (where status = 'NEW')::numeric/count(*)*100 "NEW",
count(*) filter (where status = 'PRODUCTION')::numeric/count(*)*100 "PRODUCTION",
count(*) filter (where status = 'DELIVERED')::numeric/count(*)*100 "DELIVERED"
from t
group by city, y_month, brand;

Related

Split a column using a delimiter coma and rename based on the same column in postgres

I have a table with column config and I want to split the config column and create new columns based on the value on the same column
company config
A User:xxx,Key:ABC,Role:admin
B User:yyy,Key:CTA,Role:Hr,Location:New York
C User:zzz,Location:London,Status:Active
Expected Output
company User Key Role Location Status
A xxx ABC admin
B yyy CTA Hr New York
C zzz London Active
That's a really poor data design. But Postgres has powerful string functions. You can do:
select t.company,
max(y.v) filter(where y.k = 'User') as "user",
max(y.v) filter(where y.k = 'Key') as "key",
max(y.v) filter(where y.k = 'Role') as "role",
max(y.v) filter(where y.k = 'Location') as location,
max(y.v) filter(where y.k = 'Status') as status
from mytable t
cross join lateral regexp_split_to_table(t.config, ',') x(val)
cross join lateral (values (split_part(x.val, ':', 1), split_part(x.val, ':', 2)) y(k, v)
group by t.company
This first turns the CSV lists to rows, and then extracts the key/value pairs. The final step is conditional aggregation so assign values to the each column.
I would probably write a function to convert this is a more "parseable" format:
create function convert_to_json(p_config text)
returns jsonb
as
$$
select jsonb_object_agg(items[1], items[2])
from (
select string_to_array(x.item, ':') items
from unnest(string_to_array(p_config, (','))) x(item)
) t
$$
language sql
immutable;
The function does not correctly deal with embedded commas in the values e.g. if you had something like User: "Dent, Arthur"
Then you can use it like this:
select company,
json_config ->> 'Key' as "Key",
json_config ->> 'User' as "User",
json_config ->> 'Role' as "Role",
json_config ->> 'Location' as "Location",
json_config ->> 'Status' as "Status"
from (
select company, convert_to_json(config) as json_config
from the_table
) t

recursive tree as a list (array items) in a new attribute value child

How to get hierarchy data(recursive tree) as a new column like below? (if there is last child then column child array is empty)
rows: [{
'id' : 1,
'parent_id': null,
'name': a
'child': [{
'id' : 2,
'parent_id': 1,
'name': a1,
'child': ...
}]
}]
WITH RECURSIVE t AS (
SELECT t.id AS id FROM category AS t WHERE parent_id is null
UNION ALL
SELECT child.id FROM category AS child JOIN t ON t.id = child.parent_id
)
SELECT * FROM category WHERE id IN (SELECT * FROM t);
https://www.db-fiddle.com/f/ufnG1WpBX4Z8jsBEg6bsLs/4
UPDATE
because I do it with node-postgres it is return json already so I made another version use row_to_json for easier to understand my question
WITH RECURSIVE t AS (
SELECT t.id AS id FROM category AS t WHERE parent_id = 1
UNION ALL
SELECT child.id FROM category AS child JOIN t ON t.id = child.parent_id
)
SELECT row_to_json(row) FROM (
SELECT * FROM category WHERE id IN (SELECT * FROM t)
) row;
https://www.db-fiddle.com/f/ufnG1WpBX4Z8jsBEg6bsLs/5
it returns data like below
[
{"id":3,"parent_id":1,"name":"a1"},
{"id":4,"parent_id":3,"name":"a2"}
]
expected output
{"id":3,"parent_id":1,"name":"a1", "child": [{"id":4,"parent_id":3,"name":"a2"}]}
SELECT row_to_json(row) FROM (
SELECT * FROM category WHERE id IN (SELECT * FROM t::json as something)
) row;

why Snowflake changing the order of JSON values when converting into flatten list?

I have JSON objects stored in the table and I am trying to write a query to get the first element from that JSON.
Replication Script
create table staging.par.test_json (id int, val varchar(2000));
insert into staging.par.test_json values (1, '{"list":[{"element":"Plumber"},{"element":"Craft"},{"element":"Plumbing"},{"element":"Electrics"},{"element":"Electrical"},{"element":"Tradesperson"},{"element":"Home services"},{"element":"Housekeepings"},{"element":"Electrical Goods"}]}');
insert into staging.par.test_json values (2,'
{
"list": [
{
"element": "Wholesale jeweler"
},
{
"element": "Fashion"
},
{
"element": "Industry"
},
{
"element": "Jewelry store"
},
{
"element": "Business service"
},
{
"element": "Corporate office"
}
]
}');
with cte_get_cats AS
(
select id,
val as category_list
from staging.par.test_json
),
cats_parse AS
(
select id,
parse_json(category_list) as c
from cte_get_cats
),
distinct_cats as
(
select id,
INDEX,
UPPER(cast(value:element AS varchar)) As c
from
cats_parse,
LATERAL flatten(INPUT => c:"list")
order by 1,2
) ,
cat_array AS
(
SELECT
id,
array_agg(DISTINCT c) AS sds_categories
FROM
distinct_cats
GROUP BY 1
),
sds_cats AS
(
select id,
cast(sds_categories[0] AS varchar) as sds_primary_category
from cat_array
)
select * from sds_cats;
Values: Categories
{"list":[{"element":"Plumber"},{"element":"Craft"},{"element":"Plumbing"},{"element":"Electrics"},{"element":"Electrical"},{"element":"Tradesperson"},{"element":"Home services"},{"element":"Housekeepings"},{"element":"Electrical Goods"}]}
Flattening it to a list gives me
["Plumber","Craft","Plumbing","Electrics","Electrical","Tradesperson","Home services","Housekeepings","Electrical Goods"]
Issue:
The order of this is not always same. Snowflake seems to change the ordering sometimes snowflake changes the order as per the alphabet.
How can I make this static. I do not want the order to be changed.
The problem is the way you're using ARRAY_AGG:
array_agg(DISTINCT c) AS sds_categories
Specifying it like that gives Snowflake no guidelines on how the contents of array should be arranged. You should not assume that the arrays will be created in the same order as their input records - it might, but it's not guaranteed. So you probably want to do
array_agg(DISTINCT c) within group (order by index) AS sds_categories
But that won't work, as if you use DISTINCT c, the value of index for each c is unknown. Perhaps you don't need DISTINCT, then this will work
array_agg(c) within group (order by index) AS sds_categories
If you do need DISTINCT, you need to somehow associate an index with a distinct c value. One way is to use a MIN function on index in the input. Here's a full query
with cte_get_cats AS
(
select id,
val as category_list
from staging.par.test_json
),
cats_parse AS
(
select id,
parse_json(category_list) as c
from cte_get_cats
),
distinct_cats as
(
select id,
MIN(INDEX) AS index,
UPPER(cast(value:element AS varchar)) As c
from
cats_parse,
LATERAL flatten(INPUT => c:"list")
group by 1,3
) ,
cat_array AS
(
SELECT
id,
array_agg(c) within group (order by index) AS sds_categories
FROM
distinct_cats
GROUP BY 1
),
sds_cats AS
(
select id,
cast(sds_categories[0] AS varchar) as sds_primary_category
from cat_array
)
select * from cat_array;

Select columns maximum and minimum value for all records

I have a table as below; I want to get the column names having maximum and minimum value except population column (ofcourse it will have maximum value) for all records.
State Population age_below_18 age_18_to_50 age_50_above
1 1000 250 600 150
2 4200 400 300 3500
Result :
State Population Maximum_group Minimum_group Max_value Min_value
1 1000 age_18_to_50 age_50_above 600 150
2 4200 age_50_above age_18_to_50 3500 300
Assuming none of the values are NULL, you can use greatest() and least():
select state, population,
(case when age_below_18 = greatest(age_below_18, age_18_to_50, age_50_above)
then 'age_below_18'
when age_below_18 = greatest(age_below_18, age_18_to_50, age_50_above)
then 'age_18_to_50'
when age_below_18 = greatest(age_below_18, age_18_to_50, age_50_above)
then 'age_50_above'
end) as maximum_group,
(case when age_below_18 = least(age_below_18, age_18_to_50, age_50_above)
then 'age_below_18'
when age_below_18 = least(age_below_18, age_18_to_50, age_50_above)
then 'age_18_to_50'
when age_below_18 = least(age_below_18, age_18_to_50, age_50_above)
then 'age_50_above'
end) as minimum_group,
greatest(age_below_18, age_18_to_50, age_50_above) as maximum_value,
least(age_below_18, age_18_to_50, age_50_above) as minimum_value
from t;
If your result set is actually being generated by a query, there is likely a better approach.
An alternative method "unpivots" the data and then reaggregates:
select state, population,
max(which) over (dense_rank first_value order by val desc) as maximum_group,
max(which) over (dense_rank first_value order by val asc) as minimum_group,
max(val) as maximum_value,
min(val) as minimum_value
from ((select state, population, 'age_below_18' as which, age_below_18 as val
from t
) union all
(select state, population, 'age_18_to_50' as which, age_18_to_50 as val
from t
) union all
(select state, population, 'age_50_above' as which, age_50_above as val
from t
)
) t
group by state, population;
This approach would have less performance than the first, although it is perhaps easier to implement as the number of values increases. However, Oracle 12C supports lateral joins, where a similar approach would have competitive performance.
with CTE as (
select T.*
--step2: rank value
,RANK() OVER (PARTITION BY "State", "Population" order by "value") "rk"
from (
--step1: union merge three column to on column
select
"State", "Population",
'age_below_18' as GroupName,
"age_below_18" as "value"
from TestTable
union all
select
"State", "Population",
'age_18_to_50' as GroupName,
"age_18_to_50" as "value"
from TestTable
union all
select
"State", "Population",
'age_50_above' as GroupName,
"age_50_above" as "value"
from TestTable
) T
)
select T1."State", T1."Population"
,T3.GroupName Maximum_group
,T4.GroupName Minimum_group
,T3."value" Max_value
,T4."value" Min_value
--step3: max rank get maxvalue,min rank get minvalue
from (select "State", "Population",max( "rk") as Max_rank from CTE group by "State", "Population") T1
left join (select "State", "Population",min( "rk") as Min_rank from CTE group by "State", "Population") T2
on T1."State" = T2."State" and T1."Population" = T2."Population"
left join CTE T3 on T3."State" = T1."State" and T3."Population" = T1."Population" and T1.Max_rank = T3."rk"
left join CTE T4 on T4."State" = T2."State" and T4."Population" = T2."Population" and T2.Min_rank = T4."rk"
SQL Fiddle DEMO LINK
Hope it help you :)
Another option: use a combination of UNPIVOT(), which "rotates columns into rows" (see: documentation) and analytic functions, which "compute an aggregate value based on a group of rows" (documentation here) eg
Test data
select * from T ;
STATE POPULATION YOUNGERTHAN18 BETWEEN18AND50 OVER50
1 1000 250 600 150
2 4200 400 300 3500
UNPIVOT
select *
from T
unpivot (
quantity for agegroup in (
youngerthan18 as 'youngest'
, between18and50 as 'middleaged'
, over50 as 'oldest'
)
);
-- result
STATE POPULATION AGEGROUP QUANTITY
1 1000 youngest 250
1 1000 middleaged 600
1 1000 oldest 150
2 4200 youngest 400
2 4200 middleaged 300
2 4200 oldest 3500
Include Analytic Functions
select distinct
state
, population
, max( quantity ) over ( partition by state ) maxq
, min( quantity ) over ( partition by state ) minq
, first_value ( agegroup ) over ( partition by state order by quantity desc ) biggest_group
, first_value ( agegroup ) over ( partition by state order by quantity ) smallest_group
from T
unpivot (
quantity for agegroup in (
youngerthan18 as 'youngest'
, between18and50 as 'middleaged'
, over50 as 'oldest'
)
)
;
-- result
STATE POPULATION MAXQ MINQ BIGGEST_GROUP SMALLEST_GROUP
1 1000 600 150 middleaged oldest
2 4200 3500 300 oldest middleaged
Example tested w/ Oracle 11g (see dbfiddle) and Oracle 12c.
Caution: {1} column (headings) need adjusting (according to your requirements). {2} If there are NULLs in your original table, you should adjust the query eg by using NVL().
An advantage of the described approach is: the code will remain rather clear, even if more 'categories' are used. Eg when working with 11 age groups, the query may look something like ...
select distinct
state
, population
, max( quantity ) over ( partition by state ) maxq
, min( quantity ) over ( partition by state ) minq
, first_value ( agegroup ) over ( partition by state order by quantity desc ) biggest_group
, first_value ( agegroup ) over ( partition by state order by quantity ) smallest_group
from T
unpivot (
quantity for agegroup in (
y10 as 'youngerthan10'
, b10_20 as 'between10and20'
, b20_30 as 'between20and30'
, b30_40 as 'between30and40'
, b40_50 as 'between40and50'
, b50_60 as 'between50and60'
, b60_70 as 'between60and70'
, b70_80 as 'between70and80'
, b80_90 as 'between80and90'
, b90_100 as 'between90and100'
, o100 as 'over100'
)
)
order by state
;
See dbfiddle.

Postgres json alias

I write SQL-query in PostgreSQL. I have now:
SELECT T.order,
(SELECT row_to_json(item) FROM (
SELECT T.title, T.subtitle, T.text
FROM table T
WHERE T.id = 1
) AS item)
FROM table T
WHERE T.id = 1;
The result is:
order | row_to_json
---------------+------------------------------------------------------
2 | {"title":"AAA","subtitle":"aaaa","text":"aaaa"}
But I need result:
order | row_to_json
---------------+------------------------------------------------------
2 | {"item":{"title":"AAA","subtitle":"aaaa","text":"aaaa"}}
Could you tell me how I can get it?
You wouldn't need subquery for such result and using Postgres jsonb function jsonb_build_object you can achieve your goal like that:
-- Test data:
WITH "table"( id, "order", title, subtitle, "text" ) AS (
VALUES ( 1::int, 2::int, 'AAA'::text, 'aaaa'::text, 'aaaa'::text),
( 2::int, 3::int, 'BBB'::text, 'bbbb'::text, 'bbbb'::text)
)
-- The query:
SELECT "order",
jsonb_build_object(
'items',
jsonb_build_object(
'title', title,
'subtitle', subtitle,
'text', "text"
)
) AS myjson
FROM "table"
WHERE id = 1;
-- Result:
order | myjson
-------+-----------------------------------------------------------------
2 | {"items": {"text": "aaaa", "title": "AAA", "subtitle": "aaaa"}}
(1 row)