Edit fields of a jsonb array in postgresql - sql

I have the following jsonb in db:
[
{
"state": 2,
"activity": "EJECUCIÓN",
"final_date": "2020-02-24",
"activity_id": 1,
"current_days": -7,
"initial_date": "2020-02-24",
},
{
"state": 2,
"activity": "REVISIÓN",
"final_date": "2020-02-25",
"activity_id": 2,
"current_days": 0,
"initial_date": "2020-02-25",
},
{
"state": 2,
"activity": "RECEPCIÓN",
"final_date": "2020-02-27",
"activity_id": 4,
"current_days": 0,
"initial_date": "2020-02-27"
} ]
I run the following query to update the current_days field:
WITH activity_state as ( SELECT taex_id,('{'||index-1||',current_days}')::text[] as pathe ,
((task_activity->>'final_date')::date - current_date) as current_days,
task_activity->'state' as state,
task_activity->>'final_date' as final_date,
task_activity->>'current_days' as curren FROM task_executions,jsonb_array_elements(taex_activitygraph) with ordinality arr(task_activity,index) WHERE task_activity->>'state' = '2' )
update task_executions SET taex_activitygraph = jsonb_set(taex_activitygraph,activity_state.pathe,to_jsonb(current_days),true) FROM activity_state WHERE task_executions.taex_id = activity_state.taex_id AND activity_state.state = '2'
But that query only updates me the first element of the JSON array that exists the others do not undergo changes although in the first part of the query.
( SELECT taex_id,('{'||index-1||',current_days}')::text[] as pathe ,
((task_activity->>'final_date')::date - current_date) as current_days,
task_activity->'state' as state,
task_activity->>'final_date' as final_date,
task_activity->>'current_days' as curren FROM task_executions,jsonb_array_elements(taex_activitygraph) with ordinality arr(task_activity,index) WHERE task_activity->>'state' = '2' )
It brings me all the elements of the array that should be updated but the second part that is where it is supposed to update them:
update task_executions SET taex_activitygraph = jsonb_set(taex_activitygraph,activity_state.pathe,to_jsonb(current_days),true) FROM activity_state WHERE task_executions.taex_id = activity_state.taex_id AND activity_state.state = '2'
Just update me the first item.

Assuming this structure and data:
postgres=# \d task_executions
Table "public.task_executions"
Column | Type | Collation | Nullable | Default
--------------------+-------+-----------+----------+---------
task_activitygraph | jsonb | | |
postgres=# SELECT jsonb_pretty(task_activitygraph) FROM task_executions ;
jsonb_pretty
--------------------------------------
[ +
{ +
"state": 2, +
"activity": "EJECUCIÓN", +
"final_date": "2020-02-24", +
"activity_id": 1, +
"current_days": -7, +
"initial_date": "2020-02-24"+
}, +
{ +
"state": 2, +
"activity": "REVISIÓN", +
"final_date": "2020-02-25", +
"activity_id": 2, +
"current_days": 0, +
"initial_date": "2020-02-25"+
} +
]
(1 row)
... this UPDATE should work:
postgres=# UPDATE task_executions
SET task_activitygraph = (
SELECT jsonb_agg(
CASE WHEN elem->>'state' = '2'
THEN
jsonb_set(
elem,
'{current_days}',
to_jsonb((elem->>'final_date')::date - current_date)
)
ELSE
elem
END
)
FROM jsonb_array_elements(task_activitygraph) AS a(elem)
);
UPDATE 1
Documentation: https://www.postgresql.org/docs/9.5/functions-json.html
Side note: In transactional databases (where you have many concurrent clients, and processing speed and storage efficiency matters), and if your objects have fixed structure, DO NOT STORE your data as JSON. Use relational data model instead.

Related

Format SQL output to custom JSON

I have this table which is very simple with this data
CREATE TABLE #Prices
(
ProductId int,
SizeId int,
Price int,
Date date
)
INSERT INTO #Prices
VALUES (1, 1, 100, '2020-01-01'),
(1, 1, 120, '2020-02-01'),
(1, 1, 130, '2020-03-01'),
(1, 2, 100, '2020-01-01'),
(1, 2, 100, '2020-02-01'),
(2, 1, 100, '2020-01-01'),
(2, 1, 120, '2020-02-01'),
(2, 1, 130, '2020-03-01'),
(2, 2, 100, '2020-01-01'),
(2, 2, 100, '2020-02-01')
I would like to format the output to be something like this:
{
"Products": [
{
"Product": 2,
"UnitSizes": [
{
"SizeId": 1,
"PerDate": [
{
"Date": "2020-01-02",
"Price": 870.0
},
{
"Date": "2021-04-29",
"Price": 900.0
}
]
},
{
"SizeId": 2,
"PerDate": [
{
"Date": "2020-01-02",
"Price": 435.0
},
{
"Date": "2021-04-29",
"Price": 450.0
}
]
}
]
},
{
"Product": 4,
"UnitSizes": [
{
"SizeId": 1,
"PerDate": [
{
"Date": "2020-01-02",
"Price": 900.0
}
]
}
]
}
]
}
I almost have it but I don't know how to format to get the array inside 'PerDate'. This is what I have
SELECT
ProductId AS [Product],
SizeId AS 'Sizes.SizeId',
date AS 'Sizes.PerDate.Date',
price AS 'Sizes.PerDate.Price'
FROM
#Prices
ORDER BY
ProductId, [Sizes.SizeId], Date
FOR JSON PATH, ROOT('Products')
I have tried with FOR JSON AUTO and nothing, I've tried with JSON_QUERY() but I was not able to achieve the result I want.
Every help will be very appreciated.
Thanks
Unfortunately, SQL Server does not have the JSON_AGG function, which means you would normally need to use a number of correlated subqueries and keep on rescanning the base table.
However, we can simulate it by using STRING_AGG against single JSON objects generated in an APPLY. This means that we only scan the base table once.
Use of JSON_QUERY with no path prevents double-escaping
WITH PerDate AS (
SELECT
p.ProductId,
p.SizeId,
PerDate = '[' + STRING_AGG(j.PerDate, ',') WITHIN GROUP (ORDER BY p.Date) + ']'
FROM #Prices AS p
CROSS APPLY ( -- This produces multiple rows of single JSON objects
SELECT p.Date, p.Price
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
) j(PerDate)
GROUP BY
p.ProductId,
p.SizeId
),
UnitSizes AS (
SELECT
p.ProductId,
UnitSizes = '[' + STRING_AGG(j.UnitSizes, ',') WITHIN GROUP (ORDER BY p.SizeId) + ']'
FROM PerDate p
CROSS APPLY (
SELECT p.SizeId, PerDate = JSON_QUERY(p.PerDate)
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
) j(UnitSizes)
GROUP BY
p.ProductId
)
SELECT
Product = p.ProductId,
UnitSizes = JSON_QUERY(p.UnitSizes)
FROM UnitSizes p
ORDER BY p.ProductId
FOR JSON PATH, ROOT('Products');
db<>fiddle
This is one way of doing it
DROP TABLE IF EXISTS #Prices
CREATE TABLE #Prices
(
ProductId INT,
SizeId INT,
Price INT,
Date DATE
)
-- SQL Prompt formatting off
INSERT INTO #Prices
VALUES (1, 1, 100, '2020-01-01'),
(1, 1, 120, '2020-02-01'),
(1, 1, 130, '2020-03-01'),
(1, 2, 100, '2020-01-01'),
(1, 2, 100, '2020-02-01'),
(2, 1, 100, '2020-01-01'),
(2, 1, 120, '2020-02-01'),
(2, 1, 130, '2020-03-01'),
(2, 2, 100, '2020-01-01'),
(2, 2, 100, '2020-02-01')
-- SQL Prompt formatting on
SELECT m.ProductId AS Product,
(
SELECT s.SizeId,
(
SELECT p.Date,
p.Price
FROM #Prices AS p
WHERE p.SizeId = s.SizeId
GROUP BY p.Date,
p.Price
ORDER BY p.Date
FOR JSON PATH
) AS PerDate
FROM #Prices AS s
WHERE s.ProductId = m.ProductId
GROUP BY s.SizeId
ORDER BY s.SizeId
FOR JSON PATH
) AS UnitSizes
FROM #Prices AS m
GROUP BY m.ProductId
ORDER BY m.ProductId
FOR JSON PATH, ROOT('Products')
Output:
{
"Products":
[
{
"Product": 1,
"UnitSizes":
[
{
"SizeId": 1,
"PerDate":
[
{
"Date": "2020-01-01",
"Price": 100
},
{
"Date": "2020-02-01",
"Price": 120
},
{
"Date": "2020-03-01",
"Price": 130
}
]
},
{
"SizeId": 2,
"PerDate":
[
{
"Date": "2020-01-01",
"Price": 100
},
{
"Date": "2020-02-01",
"Price": 100
}
]
}
]
},
{
"Product": 2,
"UnitSizes":
[
{
"SizeId": 1,
"PerDate":
[
{
"Date": "2020-01-01",
"Price": 100
},
{
"Date": "2020-02-01",
"Price": 120
},
{
"Date": "2020-03-01",
"Price": 130
}
]
},
{
"SizeId": 2,
"PerDate":
[
{
"Date": "2020-01-01",
"Price": 100
},
{
"Date": "2020-02-01",
"Price": 100
}
]
}
]
}
]
}

SQL Server: How to flatten nested arrays by merging values using

I have 10000 jsons with different ids each has 10000 names. How to flatten nested arrays by merging values usin SQL Server? Jsons can be read in any language, I'm looking for any SQL dialect that can transform the data as I'm using spark connectors. I use many SQL dialects including not limiting Spark SQL, Postgresql,MySql, SQLite and SQL Server...
NOTE: I was asked by Martijn Pieters to create duplicates to be specific for each SQL dialect so this is for SQL Server.
Notes:
Input dataframe has more than 10000 columns name_1_a, name_1000_xx so column(array) names can not be hardcoded as it will requires to write 10000 names
id, date, val has always the same naming convention across all columns and all jsons
array size can vary but date, val are always there so they can be hardcoded
date can be different in each array, for example name_1_a starts with 2001, but name_10000_xvz for id == 1 starts with 2000 and finnish with 2004, however for id == 2 starts with 1990 and finish with 2004
Input df:
root
|-- id: long (nullable = true)
|-- name_10000_xvz: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- date: long (nullable = true)
| | |-- val: long (nullable = true)
|-- name_1_a: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- date: long (nullable = true)
| | |-- val: long (nullable = true)
|-- name_1_b: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- date: long (nullable = true)
| | |-- val: long (nullable = true)
|-- name_2_a: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- date: long (nullable = true)
| | |-- val: long (nullable = true)
+---+------------------------------------------------------------------------+---------------------------------+---------------------------------+------------------------------------+
|id |name_10000_xvz |name_1_a |name_1_b |name_2_a |
+---+------------------------------------------------------------------------+---------------------------------+---------------------------------+------------------------------------+
|2 |[{1990, 39}, {2000, 30}, {2001, 31}, {2002, 32}, {2003, 33}, {2004, 34}]|[{2001, 1}, {2002, 2}, {2003, 3}]|[{2001, 4}, {2002, 5}, {2003, 6}]|[{2001, 21}, {2002, 22}, {2003, 23}]|
|1 |[{2000, 30}, {2001, 31}, {2002, 32}, {2003, 33}] |[{2001, 1}, {2002, 2}, {2003, 3}]|[{2001, 4}, {2002, 5}, {2003, 6}]|[{2001, 21}, {2002, 22}, {2003, 23}]|
+---+------------------------------------------------------------------------+---------------------------------+---------------------------------+------------------------------------+
Required output df:
+---+---------+----------+-----------+---------+----------------+
|id | date | name_1_a | name_1_b |name_2_a | name_10000_xvz |
+---+---------+----------+-----------+---------+----------------+
|1 | 2000 | 0 | 0 | 0 | 30 |
|1 | 2001 | 1 | 4 | 21 | 31 |
|1 | 2002 | 2 | 5 | 22 | 32 |
|1 | 2003 | 3 | 6 | 23 | 33 |
|2 | 1990 | 0 | 0 | 0 | 39 |
|2 | 2000 | 0 | 0 | 0 | 30 |
|2 | 2001 | 1 | 4 | 21 | 31 |
|2 | 2002 | 2 | 5 | 22 | 32 |
|2 | 2003 | 3 | 6 | 23 | 33 |
|2 | 2004 | 0 | 0 | 0 | 34 |
+---+---------+----------+-----------+---------+----------------+
Below are jsons for input df:
1.json
{ "id": 1, "name_1_a": [ { "date": 2001, "val": 1 }, { "date": 2002, "val": 2 }, { "date": 2003, "val": 3 } ], "name_1_b": [ { "date": 2001, "val": 4 }, { "date": 2002, "val": 5 }, { "date": 2003, "val": 6 } ], "name_2_a": [ { "date": 2001, "val": 21 }, { "date": 2002, "val": 22 }, { "date": 2003, "val": 23 } ], "name_10000_xvz": [ { "date": 2000, "val": 30 }, { "date": 2001, "val": 31 }, { "date": 2002, "val": 32 }, { "date": 2003, "val": 33 } ]}
2.json
{ "id": 2, "name_1_a": [ { "date": 2001, "val": 1 }, { "date": 2002, "val": 2 }, { "date": 2003, "val": 3 } ], "name_1_b": [ { "date": 2001, "val": 4 }, { "date": 2002, "val": 5 }, { "date": 2003, "val": 6 } ], "name_2_a": [ { "date": 2001, "val": 21 }, { "date": 2002, "val": 22 }, { "date": 2003, "val": 23 } ], "name_10000_xvz": [ { "date": 1990, "val": 39 }, { "date": 2000, "val": 30 }, { "date": 2001, "val": 31 }, { "date": 2002, "val": 32 }, { "date": 2003, "val": 33 }, { "date": 2004, "val": 34 } ]}}
OK, so we have 2 "problems" we need to solve here. Firstly, the fact that you need a dynamic number of columns as you don't know what names are you your data. This means you need dynamic SQL.
Next is the problem that not every name has a value for every year, so we need to also have a "year" table we can LEFT JOIN from so that we have a row for every name.
This, as a result, is going to be really messy, but it can be done. I've left comments where I can on this, but the best thing i can really suggest is taking the time to read the SQL, PRINTing/SELECTing the dynamic statement, and learning what it does.
First let's build a static version, so you can see what it would look like. So here I use a CTE to get all the years, and then another to get the data in a normalised format from the JSON. Finally unpivot the data using condititional aggregation:
--Sample JSON
DECLARE #JSON nvarchar(MAX) = N'{ "id": 1, "name_1_a": [ { "date": 2001, "val": 1 }, { "date": 2002, "val": 2 }, { "date": 2003, "val": 3 } ], "name_1_b": [ { "date": 2001, "val": 4 }, { "date": 2002, "val": 5 }, { "date": 2003, "val": 6 } ], "name_2_a": [ { "date": 2001, "val": 21 }, { "date": 2002, "val": 22 }, { "date": 2003, "val": 23 } ], "name_10000_xvz": [ { "date": 2000, "val": 30 }, { "date": 2001, "val": 31 }, { "date": 2002, "val": 32 }, { "date": 2003, "val": 33 } ]}';
--Get distinct Years
WITH Years AS(
SELECT DISTINCT V.date
FROM OPENJSON(#JSON) J
CROSS APPLY (SELECT *
FROM OPENJSON(J.[value])
WITH(date int)
WHERE ISJSON(J.[value]) = 1) V),
--Get Data
Data AS(
SELECT J.[key] AS [name],
V.date,
V.val
FROM OPENJSON(#JSON) J
CROSS APPLY (SELECT *
FROM OPENJSON(J.[value])
WITH(date int,
val int)
WHERE ISJSON(J.[value]) = 1) V)
--Final Select and Unpivot
SELECT JSON_VALUE(#JSON, '$.id') AS ID,
Y.Date,
ISNULL(MAX(CASE D.[name] WHEN 'name_1_a' THEN D.val END),0) AS name_1_a,
ISNULL(MAX(CASE D.[name] WHEN 'name_1_b' THEN D.val END),0) AS name_1_b,
ISNULL(MAX(CASE D.[name] WHEN 'name_2_a' THEN D.val END),0) AS name_2_a,
ISNULL(MAX(CASE D.[name] WHEN 'name_10000_xvz' THEN D.val END),0) AS name_10000_xvz
FROM Years Y
LEFT JOIN Data D ON Y.Date = D.Date
GROUP BY Y.Date;
As I mentioned, however, this isn't dynamic. This is, therefore where it gets a little more messy. I, for the below, am assuming you're using a recent version of SQL Server, and thus have access to STRING_AGG (if not, you'll need to use the old FOR XML PATH and STUFF method):
--Sample JSON
DECLARE #JSON nvarchar(MAX) = N'{ "id": 1, "name_1_a": [ { "date": 2001, "val": 1 }, { "date": 2002, "val": 2 }, { "date": 2003, "val": 3 } ], "name_1_b": [ { "date": 2001, "val": 4 }, { "date": 2002, "val": 5 }, { "date": 2003, "val": 6 } ], "name_2_a": [ { "date": 2001, "val": 21 }, { "date": 2002, "val": 22 }, { "date": 2003, "val": 23 } ], "name_10000_xvz": [ { "date": 2000, "val": 30 }, { "date": 2001, "val": 31 }, { "date": 2002, "val": 32 }, { "date": 2003, "val": 33 } ]}';
--Variables for dynamic SQL
DECLARE #SQL nvarchar(MAX),
#CRLF nchar(2) = NCHAR(13) + NCHAR(10);
DECLARE #Delimiter varchar(20) = N',' + #CRLF + N' ';
--You'll note the start is all the same
SET #SQL = N'--Get disinct Years' + #CRLF +
N'WITH Years AS(' + #CRLF +
N' SELECT DISTINCT V.date' + #CRLF +
N' FROM OPENJSON(#JSON) J' + #CRLF +
N' CROSS APPLY (SELECT *' + #CRLF +
N' FROM OPENJSON(J.[value]) ' + #CRLF +
N' WITH(date int) ' + #CRLF +
N' WHERE ISJSON(J.[value]) = 1) V),' + #CRLF +
N'--Get Data' + #CRLF +
N'Data AS(' + #CRLF +
N' SELECT J.[key] AS [name],' + #CRLF +
N' V.date,' + #CRLF +
N' V.val ' + #CRLF +
N' FROM OPENJSON(#JSON) J' + #CRLF +
N' CROSS APPLY (SELECT *' + #CRLF +
N' FROM OPENJSON(J.[value]) ' + #CRLF +
N' WITH(date int,' + #CRLF +
N' val int) ' + #CRLF +
N' WHERE ISJSON(J.[value]) = 1) V)' + #CRLF +
N'--Final Select and Unpivot' + #CRLF +
N'SELECT JSON_VALUE(#JSON, ''$.id'') AS ID,' + #CRLF +
N' Y.Date,' + #CRLF +
(SELECT STRING_AGG(N'ISNULL(MAX(CASE D.[name] WHEN ' + QUOTENAME(J.[key],'''') + N' THEN D.val END),0) AS ' + QUOTENAME(J.[key]),#Delimiter)
FROM OPENJSON(#JSON) J) + #CRLF +
N'FROM Years Y' + #CRLF +
N' LEFT JOIN Data D ON Y.Date = D.Date' + #CRLF +
N'GROUP BY Y.Date;';
PRINT #SQL; --YOur best friend for debugging
EXEC sys.sp_executesql #SQL, N'#JSON nvarchar(MAX)', #JSON;
db<>fiddle

folding over large BigQuery result

Is there any easy way for me to do something like Ocaml's fold_left on a result of a BigQuery query, where each iteration corresponds to one row in the result?
What product or approach would be the easiest way? It would be great if:
all I need to do is to supply the initial state and the 'folder' function
preferably, I'd like to write the 'folder' function in a functional language
I don't need to install any GCP package
Since I don't know which product or language would work, I cannot be more specific, but pseudocode would be like:
let my_init = []
let my_folder = fun state row ->
// append for now, but it will be complicated. I need to do some set operations here. The point is that I need some way of transferring "state" across rows, when I iterate over rows in a predefined order.
row.col1 :: state
let query = "SELECT col1, col2, col3 FROM table1 ORDER BY timestamp"
query |> List.fold my_folder my_init
The result that I want to get from this simplified example is the final "state".
--- UPDATED ---
There is no bound on the number of rows---if we receive more, we get more rows. Typically, the number is more than a few millions but it can be larger than that.
Here's a simplified example that shows the major problem I'm encountering. We have a table with a few columns:
timestamp
user_id: a string id
operation_json: a stringified JSON object, which is a list of operations, each of which corresponds to either:
add user_id to a set
remove user_id from a set
For example, the followings are valid rows:
----------+---------+----------------------------------------------
timestamp | user_id | operation_json
----------+---------+----------------------------------------------
1 | id1 | [ { "op": "add", "set": "set1" } ]
2 | id2 | [ { "op": "add", "set": "set1" } ]
3 | id1 | [ { "op": "add", "set": "set2" } ]
4 | id3 | [ { "op": "add", "set": "set2" } ]
5 | id1 | [ { "op": "remove", "set": "set1" } ]
----------+---------+----------------------------------------------
As a result, I'd like to get sets of users; i.e.,
set1 |-> { id2 }
set2 |-> { id1, id3 }
I thought fold_left-like operation would be convenient. The state would be map>, and the initial-state would be an empty map.
Below [quick and simple] example for BigQuery Standard SQL
#standardSQL
CREATE TEMP FUNCTION fold(arr ARRAY<INT64>, init INT64)
RETURNS FLOAT64
LANGUAGE js AS """
const reducer = (accumulator, currentValue) => accumulator + parseInt(currentValue);
return arr.reduce(reducer, 5);
""";
WITH `project.dataset.table` AS (
SELECT 1 id, [1, 2, 3, 4] arr, 5 initial_state UNION ALL
SELECT 2, [1, 2, 3, 4, 5, 6, 7], 10
)
SELECT id, fold(arr, initial_state) result
FROM `project.dataset.table`
output is
Row id result
1 1 15.0
2 2 33.0
I think it is self-explanatory enough
See more for JS UDF
folding list of rows
See below extension of above
Here you are assembling array from the result's rows before applying fold function (of course you have some limits for UDF here to have in mind and also on how big your ARRAY of rows can go, etc.
#standardSQL
CREATE TEMP FUNCTION fold(arr ARRAY<INT64>, init INT64)
RETURNS FLOAT64
LANGUAGE js AS """
const reducer = (accumulator, currentValue) => accumulator + parseInt(currentValue);
return arr.reduce(reducer, 5);
""";
WITH `project.dataset.table` AS (
SELECT 1 id, 1 item UNION ALL
SELECT 1, 2 UNION ALL
SELECT 1, 3 UNION ALL
SELECT 1, 4 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 2, 3 UNION ALL
SELECT 2, 4 UNION ALL
SELECT 2, 5 UNION ALL
SELECT 2, 6 UNION ALL
SELECT 2, 7
)
SELECT id, fold(ARRAY_AGG(item), 5) result
FROM `project.dataset.table`
GROUP BY id
Note, if you need to include more than one field from each row - you can use ARRAY of STRUCT as below example
ARRAY_AGG(STRUCT(id , item) ORDER by id)
Of course, you will need to adjust respectively signature of fold UDF
For example:
#standardSQL
CREATE TEMP FUNCTION fold(arr ARRAY<STRUCT<id INT64, item INT64>>, init INT64)
RETURNS FLOAT64
LANGUAGE js AS """
const reducer = (accumulator, currentValue) => accumulator + parseInt(currentValue.item);
return arr.reduce(reducer, 5);
""";
WITH `project.dataset.table` AS (
SELECT 1 id, 1 item UNION ALL
SELECT 1, 2 UNION ALL
SELECT 1, 3 UNION ALL
SELECT 1, 4 UNION ALL
SELECT 2, 1 UNION ALL
SELECT 2, 2 UNION ALL
SELECT 2, 3 UNION ALL
SELECT 2, 4 UNION ALL
SELECT 2, 5 UNION ALL
SELECT 2, 6 UNION ALL
SELECT 2, 7
)
SELECT id, fold(ARRAY_AGG(t), 5) result
FROM `project.dataset.table` t
GROUP BY id
Below approach has nothing to do with folding per se, but rather attempt to translate your challenge into set-based one (which is more natural for when you dealing with sql) by identifying the latest op action for each user per set and if it is "remove" just eliminate that user from further consideration - if it is "add" just use the latest "add" for that user / set. This in assumption that there cannot be multiple consecutive "add" action for the same user / set - rather - it can be add /remove / add and so on. of course this can be further adjusted based on real use case
So having above in mind - below example for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 ts, 'id1' user_id, '[ { "op": "add", "set": "set1" } ]' operation_json UNION ALL
SELECT 2, 'id2', '[ { "op": "add", "set": "set1" } ]' UNION ALL
SELECT 3, 'id1', '[ { "op": "add", "set": "set2" } ]' UNION ALL
SELECT 4, 'id3', '[ { "op": "add", "set": "set2" } ]' UNION ALL
SELECT 5, 'id1', '[ { "op": "remove", "set": "set1" } ]'
)
SELECT bin, STRING_AGG(user_id, ',' ORDER BY ts) result
FROM (
SELECT user_id, bin, ARRAY_AGG(ts ORDER BY ts DESC LIMIT 1)[OFFSET(0)] ts
FROM (
SELECT ts, user_id, op, bin, LAST_VALUE(op) OVER(win) fin
FROM (
SELECT ts, user_id,
JSON_EXTRACT_SCALAR(REGEXP_REPLACE(operation_json, r'^\[|\]$', ''), '$.op') op,
JSON_EXTRACT_SCALAR(REGEXP_REPLACE(operation_json, r'^\[|\]$', ''), '$.set') bin
FROM `project.dataset.table`
)
WINDOW win AS (
PARTITION BY user_id, bin
ORDER BY ts
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
)
WHERE fin = 'add'
GROUP BY user_id, bin
)
GROUP BY bin
-- ORDER BY bin
output is
Row bin result
1 set1 id2
2 set2 id1,id3
if to apply to below dummy data
WITH `project.dataset.table` AS (
SELECT 1 ts, 'id1' user_id, '[ { "op": "add", "set": "set1" } ]' operation_json UNION ALL
SELECT 2, 'id2', '[ { "op": "add", "set": "set1" } ]' UNION ALL
SELECT 3, 'id1', '[ { "op": "add", "set": "set2" } ]' UNION ALL
SELECT 4, 'id3', '[ { "op": "add", "set": "set2" } ]' UNION ALL
SELECT 5, 'id1', '[ { "op": "remove", "set": "set1" } ]' UNION ALL
SELECT 6, 'id1', '[ { "op": "add", "set": "set1" } ]' UNION ALL
SELECT 7, 'id1', '[ { "op": "remove", "set": "set1" } ]' UNION ALL
SELECT 8, 'id1', '[ { "op": "add", "set": "set1" } ]' UNION ALL
SELECT 9, 'id1', '[ { "op": "remove", "set": "set2" } ]' UNION ALL
SELECT 10, 'id1', '[ { "op": "add", "set": "set2" } ]'
)
result will be
Row bin result
1 set1 id2,id1
2 set2 id3,id1

Postgres Build Complex JSON Object from Wide Column Like Design to Key Value

I could really use some help here before my mind explodes...
Given the following data structure:
SELECT * FROM (VALUES (1, 1, 1, 1), (2, 2, 2, 2)) AS t(day, apple, banana, orange);
day | apple | banana | orange
-----+-------+--------+--------
1 | 1 | 1 | 1
2 | 2 | 2 | 2
I want to construct a JSON object which looks like the following:
{
"data": [
{
"day": 1,
"fruits": [
{
"key": "apple",
"value": 1
},
{
"key": "banana",
"value": 1
},
{
"key": "orange",
"value": 1
}
]
}
]
}
Maybe I am not so far away from my goal:
SELECT json_build_object(
'data', json_agg(
json_build_object(
'day', t.day,
'fruits', t)
)
) FROM (VALUES (1, 1, 1, 1), (2, 2, 2, 2)) AS t(day, apple, banana, orange);
Results in:
{
"data": [
{
"day": 1,
"fruits": {
"day": 1,
"apple": 1,
"banana": 1,
"orange": 1
}
}
]
}
I know that there is json_each which may do the trick. But I am struggling to apply it to the query.
Edit:
This is my updated query which, I guess, is pretty close. I have dropped the thought to solve it with json_each. Now I only have to return an array of fruits instead appending to the fruits object:
SELECT json_build_object(
'data', json_agg(
json_build_object(
'day', t.day,
'fruits', json_build_object(
'key', 'apple',
'value', t.apple,
'key', 'banana',
'value', t.banana,
'key', 'orange',
'value', t.orange
)
)
)
) FROM (VALUES (1, 1, 1, 1), (2, 2, 2, 2)) AS t(day, apple, banana, orange);
Would I need to add a subquery to prevent a nested aggregate function?
Use the function jsonb_each() to get pairs (key, value), so you do not have to know the number of columns and their names to get a proper output:
select jsonb_build_object('data', jsonb_agg(to_jsonb(s) order by day))
from (
select day, jsonb_agg(jsonb_build_object('key', key, 'value', value)) as fruits
from (
values (1, 1, 1, 1), (2, 2, 2, 2)
) as t(day, apple, banana, orange),
jsonb_each(to_jsonb(t)- 'day')
group by 1
) s;
The above query gives this object:
{
"data": [
{
"day": 1,
"fruits": [
{
"key": "apple",
"value": 1
},
{
"key": "banana",
"value": 1
},
{
"key": "orange",
"value": 1
}
]
},
{
"day": 2,
"fruits": [
{
"key": "apple",
"value": 2
},
{
"key": "banana",
"value": 2
},
{
"key": "orange",
"value": 2
}
]
}
]
}

multiply a value of each item of a json array with postgres 9.6

I tried many different things that I gathered here and there (official docs, blog posts, SO, …) but didn't succeed, so here's my question to you all:
Given this table:
basik=# select id, jsonb_pretty(range_price_list_values::jsonb) from product;
id | jsonb_pretty
--------------------------------------+--------------------------
cc80c862-c264-4bfe-a929-a52478c8d59e | [ +
| { +
| "to": 10, +
| "from": 5, +
| "price": 1 +
| }, +
| { +
| "to": 20, +
| "from": 15, +
| "price": 1298000+
| }, +
| { +
| "to": 30, +
| "from": 25, +
| "price": 500000 +
| } +
| ]
How to multiply by 1000 the price key of each element of each row of the table ?
PS: my failed tentative was to look around jsonb_* functions and window functions:
WITH prices as (select id, jsonb_array_elements(range_price_list_values::jsonb) from product)
UPDATE product SET range_price_list_values = JSONB_SET(
range_price_list_values::jsonb,
'{' || price.rank || ',price}', jsonb_extract_path('{' || price.rank || ',price}')::int * 1000, false
)::json;
Thanks for taking time to read! :)
You'll need a sub-select, as you want to update multiple fields in your JSON:
update product
set range_price_list_values = (
select jsonb_agg(case
when jsonb_typeof(elem -> 'price') = 'number'
then jsonb_set(elem, array['price'], to_jsonb((elem ->> 'price')::numeric * 1000))
else elem
end)
from jsonb_array_elements(range_price_list_values::jsonb) elem
)::json;
Note: this will only update numeric price keys, otherwise an exception would be thrown, when a price is not a number.
http://rextester.com/PQN70851
First that came (quite ugly):
t=# create table product (id text, range_price_list_values jsonb);
CREATE TABLE
t=# insert into product select 'cc80c862-c264-4bfe-a929-a52478c8d59e','[
t'# {
t'# "to": 10,
t'# "from": 5,
t'# "price": 1
t'# },
t'# {
t'# "to": 20,
t'# "from": 15,
t'# "price": 1298000
t'# },
t'# {
t'# "to": 30,
t'# "from": 25,
t'# "price": 500000
t'# }
t'# ]';
INSERT 0 1
t=# with b as (with a as (select id, jsonb_array_elements(range_price_list_values::jsonb) j from product) select id,jsonb_set(j,'{price}',((j->>'price')::int * 1000)::text::jsonb) from a) select distinct id, jsonb_pretty(concat('[',string_agg(jsonb_set::text,',') over (partition by id),']')::jsonb) from b;
id | jsonb_pretty
--------------------------------------+-----------------------------
cc80c862-c264-4bfe-a929-a52478c8d59e | [ +
| { +
| "to": 10, +
| "from": 5, +
| "price": 1000 +
| }, +
| { +
| "to": 20, +
| "from": 15, +
| "price": 1298000000+
| }, +
| { +
| "to": 30, +
| "from": 25, +
| "price": 500000000 +
| } +
| ]
(1 row)
having that in CTE, you can update values against it