Query json dictionary data in SQL - sql

My CLOB field in a table contains JSON and looks as following:
{"a":"value1", "b":"value2", "c":"value3"}
And I'm trying to write an SQL query to return a table with key and value fields like following:
key|value
---|------
a |value1
b |value2
c |value3
Any help would be hugely appreciated!

Use JSON_TABLE and then UNPIVOT if you want the values in rows instead of columns:
SELECT *
FROM (
SELECT p.*
FROM table_name t
CROSS JOIN
JSON_TABLE(
t.value,
'$'
COLUMNS (
a PATH '$.a',
b PATH '$.b',
c PATH '$.c'
)
) p
)
UNPIVOT ( value FOR key IN ( a, b, c ) );
So for some sample data:
CREATE TABLE table_name (
value CLOB CONSTRAINT ensure_json CHECK (value IS JSON)
);
INSERT INTO table_name ( value ) VALUES ( '{"a":"value1", "b":"value2", "c":"value3"}' );
This outputs:
KEY | VALUE
:-- | :-----
A | value1
B | value2
C | value3
db<>fiddle here
If you want to do it dynamically then you can parse the JSON in PL/SQL and use GET_KEYS to get a collection of key names and then access the correct one by its position and correlate that to the value using FOR ORDINALITY:
CREATE FUNCTION get_key(
pos IN PLS_INTEGER,
json IN CLOB
) RETURN VARCHAR2
AS
doc_keys JSON_KEY_LIST;
BEGIN
doc_keys := JSON_OBJECT_T.PARSE ( json ).GET_KEYS;
RETURN doc_keys( pos );
END get_key;
/
Then:
SELECT get_key( j.pos, t.value ) AS key,
j.value
FROM table_name t
CROSS APPLY JSON_TABLE(
t.value,
'$.*'
COLUMNS (
pos FOR ORDINALITY,
value PATH '$'
)
) j;
Outputs:
KEY | VALUE
:-- | :-----
a | value1
b | value2
c | value3
db<>fiddle here

Related

Oracle Select From JSON Array

I have a table for some 'settings' and in that table I have a record with a json array. It is a simple array, like this:
"['scenario1','scenario2','scenario3']"
I want to use a sub-select statement in a view to pull this information out so I can use it like this:
select * from table where field_scenario in (select ????? from settings_table where this=that)
I have been looking through documentation and googling for this but for the life of me I can't figure out how to 'pivot' the returning array into individual elements in order to use it.
Oracle 12c I believe, thanks in advance.
Do NOT use regular expression to parse JSON. Use a proper JSON parser:
select *
from table_name
where field_scenario in (
SELECT j.value
FROM settings_table s
OUTER APPLY (
SELECT value
FROM JSON_TABLE(
s.json,
'$[*]'
COLUMNS(
value VARCHAR2(50) PATH '$'
)
)
) j
)
Which, for the sample data:
CREATE TABLE settings_table ( json CLOB CHECK ( json IS JSON ) );
INSERT INTO settings_table ( json ) VALUES ( '["scenario1","scenario2","scenario3"]');
INSERT INTO settings_table ( json ) VALUES ( '["scenario5"]');
INSERT INTO settings_table ( json ) VALUES ( '["scenario \"quoted\""]');
INSERT INTO settings_table ( json ) VALUES ( '["scenario2,scenario4"]');
CREATE TABLE table_name ( id, field_scenario ) AS
SELECT LEVEL, 'scenario'||LEVEL FROM DUAL CONNECT BY LEVEL <= 6 UNION ALL
SELECT 7, 'scenario "quoted"' FROM DUAL;
Outputs:
ID | FIELD_SCENARIO
-: | :----------------
1 | scenario1
2 | scenario2
3 | scenario3
5 | scenario5
7 | scenario "quoted"
db<>fiddle here

Convert an array into a Map

I have a table with a column like
[{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}]
Which is of the format array<struct<key:string,value:array<string>>>
I want to convert the column into below format :
{"e":["253","203","204"],"st":["mi"],"k2":["1","2"]}
which is of the type map<string,array<string>>
I have tried exploding the array but that does not work. Any ideas how I can do this in hive.
Without use of external libraries it's impossible. Please refer to brickhouse or create your own UDAF.
Note: further code provides snippets to reproduce the problem and solving the problem that Hive's built-in functions can solve. i.e map<string,string> not map<string, array<string>>.
-- reproducing the problem
CREATE TABLE test_table(id INT, input ARRAY<STRUCT<key:STRING,value:ARRAY<STRING>>>);
INSERT INTO TABLE test_table
SELECT
1 AS id,
ARRAY(
named_struct("key","e", "value", ARRAY("253","203","204")),
named_struct("key","st", "value", ARRAY("mi")),
named_struct("key","k2", "value", ARRAY("1", "2"))
) AS input;
SELECT id, input FROM test_table;
+-----+-------------------------------------------------------------------------------------------------------+--+
| id | input |
+-----+-------------------------------------------------------------------------------------------------------+--+
| 1 | [{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}] |
+-----+-------------------------------------------------------------------------------------------------------+--+
With exploding and using STRUCT features, we can split the keys and values.
SELECT id, exploded_input.key, exploded_input.value
FROM (
SELECT id, exploded_input
FROM test_table LATERAL VIEW explode(input) d AS exploded_input
) x;
+-----+------+----------------------+--+
| id | key | value |
+-----+------+----------------------+--+
| 1 | e | ["253","203","204"] |
| 1 | st | ["mi"] |
| 1 | k2 | ["1","2"] |
+-----+------+----------------------+--+
The idea is to use your UDAF to "collect" a map while aggregating on id.
What Hive can solve with built in function is generating map<string,string> by converting rows to strings with a special delimiter, aggregate rows via another special delimiter and use str_to_map built-in function on the delimiters to generate map<string, string>.
SELECT
id,
str_to_map(
-- outputs: e:253,203,204#st:mi#k2:1,2 with delimiters between aggregated rows
concat_ws('#', collect_list(list_to_string)),
'#', -- first delimiter
':' -- second delimiter
) mapped_output
FROM (
SELECT
id,
-- outputs 3 rows: (e:253,203,203), (st:mi), (k2:1,2)
CONCAT(exploded_input.key,':' , CONCAT_WS(',', exploded_input.value)) as list_to_string
FROM (
SELECT id, exploded_input
FROM test_table LATERAL VIEW explode(input) d AS exploded_input
) x
) y
GROUP BY id;
Which outputs a string to string map like:
+-----+-------------------------------------------+--+
| id | mapped_output |
+-----+-------------------------------------------+--+
| 1 | {"e":"253,203,204","st":"mi","k2":"1,2"} |
+-----+-------------------------------------------+--+
with input_set as (
select array(named_struct('key','e','value',array('253','203','204')),named_struct('key','st','value',array('mi')),named_struct('key','k2','value',array('1','2'))) as input_array
), break_input_set as (
select y.col_num as y_col_num,y.col_value as y_col_value from input_set lateral view posexplode(input_set.input_array) y as col_num, col_value
), create_map as (
select map(y_col_value.key,y_col_value.value) as final_map from break_input_set
)
select * from create_map;
var Array = [{"key":"e","value":["253","203","204"]},{"key":"st","value":["mi"]},{"key":"k2","value":["1","2"]}];
var obj = {}
for(var i=0;i<Array.length;i++){
obj[Array[i].key] = Array[i].value
}
obj will be in the required format

Unexpected behavior in Oracle when using Group By with JSON_TABLE

I have a denormalized VIEW we'll call VIEW_X which looks like the following (It's just a regular simple view - not materialized or anything like that):
ID GROUP_ID PART_1_ID PART_2_ID
1 1723189 cd69f0f4-a5ed-4196-916d-401e98ffec75 X1
1 1723189 cd69f0f4-a5ed-4196-916d-401e98ffec75 X2
2 1723185 8d5132cb-1b6e-4e79-9698-fd1962eb808f K1
2 1723188 a191cb01-32ac-4ab4-bd6b-3ef777e395ca K1
It's denormalized in that it actually represents a structure like this:
{
id: 1,
group_id: 1723189,
part_1_id: 'cd69f0f4-a5ed-4196-916d-401e98ffec75'
part_2_ids: ["X1", "X2"]
}
the PART_2_ID in this view is the result of selecting from a JSON_TABLE where the data in the original table is stored in an array like ["X1", "X2"]:
JSON_TABLE(a.PART_2_IDS, '$' COLUMNS (
NESTED PATH '$[*]'
COLUMNS (
PART_2_ID VARCHAR2(4000) PATH '$'
)
)) p2
When I run a query like this on this view I get 0 results although the expected result is a single result with the ID of 2:
SELECT ID
FROM VIEW_X
WHERE PART_2_ID IN ('K1')
GROUP BY ID
HAVING COUNT(DISTINCT(PART_2_ID)) = 1
ID
--
(no results)
figure 1
Curiously enough if I run just the following I get the expected two results as there are two rows with ID 2 where there is a match on PART_2_ID as K1:
SELECT ID
FROM VIEW_X
WHERE PART_2_ID IN ('K1')
ID
--
2
2
If, however, I run either of the following queries I get a match on ID 1:
SELECT ID
FROM VIEW_X
WHERE PART_2_ID IN ('X1')
GROUP BY ID
HAVING COUNT(DISTINCT(PART_2_ID)) = 1
ID
--
1
SELECT ID
FROM VIEW_X
WHERE PART_2_ID IN ('X1', 'X2')
GROUP BY ID
HAVING COUNT(DISTINCT(PART_2_ID)) = 2
ID
--
1
I don't understand why figure 1 is not returning the expected result - is there something I'm overlooking? Is this a quirk with how JSON_TABLE works?
I cannot replicate this in:
Oracle Database 12c Enterprise Edition Release 12.1.0.2.0 - 64bit Production; or
Oracle Database 18c Enterprise Edition Release 18.0.0.0.0 - Production on https://livesql.oracle.com
Oracle Setup:
CREATE TABLE table1 ( document CLOB CONSTRAINT ensure_json CHECK (document IS JSON) );
INSERT INTO table1 ( document ) VALUES ( '{"id":1,"group_id":1723189,"part_1_id":"cd69f0f4-a5ed-4196-916d-401e98ffec75","part_2_ids":["X1","X2"]}' );
INSERT INTO table1 ( document ) VALUES ( '{"id":2,"group_id":1723185,"part_1_id":"8d5132cb-1b6e-4e79-9698-fd1962eb808f","part_2_ids":["K1"]}' );
INSERT INTO table1 ( document ) VALUES ( '{"id":2,"group_id":1723188,"part_1_id":"a191cb01-32ac-4ab4-bd6b-3ef777e395ca","part_2_ids":["K1"]}' );
CREATE VIEW VIEW_X AS
SELECT p.*
FROM table1 t
CROSS JOIN
JSON_TABLE(
t.document,
'$'
COLUMNS (
id PATH '$.id',
group_id PATH '$.group_id',
part_1_id PATH '$.part_1_id',
NESTED PATH '$.part_2_ids[*]'
COLUMNS (
PART_2_ID VARCHAR2(4000) PATH '$'
)
)
) p;
Query 1:
SELECT *
FROM VIEW_X;
Results:
ID GROUP_ID PART_1_ID PART_2_ID
---------- ---------- ------------------------------------ ---------
1 1723189 cd69f0f4-a5ed-4196-916d-401e98ffec75 X1
1 1723189 cd69f0f4-a5ed-4196-916d-401e98ffec75 X2
2 1723185 8d5132cb-1b6e-4e79-9698-fd1962eb808f K1
2 1723188 a191cb01-32ac-4ab4-bd6b-3ef777e395ca K1
Query 2:
SELECT ID
FROM VIEW_X
WHERE PART_2_ID IN ('K1')
GROUP BY ID
HAVING COUNT(DISTINCT(PART_2_ID)) = 1;
Results:
ID
--
2

Hive concat two map object

I have two tables as follow in hive:
Table 1
key1 | value1
int | map(int,array(int))
Table 2
key2 | value2
int | map(int,array(int))
and now I join the table on key and I want to concat two maps that have the same key. In other words the final should looklike .
Table
key | value
int | map(int,array(int))
I tried to use function collect_set when I am joining as follows:
collect_set(value1,value2)
but it through exception that only one input is required. Any thoughts or comments?
Thanks
COLLECT_SET() is an aggregate function so it wouldn't really be useful (or valid) if you are trying to combine things. One thing you could try is using COMBINE(). It can be found in this library of UDFs here. Suppose you had some data like:
table0:
idx map_kv
0 {2:[1,2,3,4], 3:[5,6,7,8,9]}
table1:
idx map_kv
0 {2:[5,6,7,8,9], 3:[1,2,3,4]}
Then you could do
Query:
ADD JAR /path/to/jar/brickhouse-0.7.1.jar;
CREATE TEMPORARY FUNCTION COLLECT AS 'brickhouse.udf.collect.CollectUDAF';
CREATE TEMPORARY FUNCTION COMBINE AS 'brickhouse.udf.collect.CombineUDF';
SELECT idx
, COLLECT(map_key, arr) AS final_map
FROM (
SELECT a.idx
, a.map_key
, COMBINE(map_val_0, map_val_1) AS arr
FROM (
SELECT idx
, map_key
, map_val_0
FROM database.table0
LATERAL VIEW EXPLODE(map_kv) exptbl0 AS map_key, map_val_0 ) a
JOIN (
SELECT idx
, map_key
, map_val_1
FROM database.table1
LATERAL VIEW EXPLODE(map_kv) exptbl1 AS map_key, map_val_1 ) b
ON a.idx=b.idx AND a.map_key=b.map_key ) c
GROUP BY idx;
This will produce:
Output:
idx final_map
0 {2:[1,2,3,4,5,6,7,8,9], 3:[5,6,7,8,9,1,2,3,4]}

PostgreSQL query on text array value

I have a table where one column has an array - but stored in a text format:
mytable
id ids
-- -------
1 '[3,4]'
2 '[3,5]'
3 '[3]'
etc ...
I want to find all records that have the value 5 as an array element in the ids column.
I was trying to achieve this by using the "string to array" function and removing the [ symbols with the translate function, but couldn't find a way.
You can do this: http://www.sqlfiddle.com/#!1/5c148/12
select *
from tbl
where translate(ids, '[]','{}')::int[] && array[5];
Output:
| ID | IDS |
--------------
| 2 | [3,5] |
You can also use bool_or: http://www.sqlfiddle.com/#!1/5c148/11
with a as
(
select id, unnest(translate(ids, '[]','{}')::int[]) as elem
from tbl
)
select id
from a
group by id
having bool_or(elem = 5);
To see the original elements:
with a as
(
select id, unnest(translate(ids, '[]','{}')::int[]) as elem
from tbl
)
select id, '[' || array_to_string(array_agg(elem), ',') || ']' as ids
from a
group by id
having bool_or(elem = 5);
Output:
| ID | IDS |
--------------
| 2 | [3,5] |
Postgresql DDL is atomic, if it's not late yet in your project, just structure your stringly-typed array to a real array: http://www.sqlfiddle.com/#!1/6e18c/2
alter table tbl
add column id_array int[];
update tbl set id_array = translate(ids,'[]','{}')::int[];
alter table tbl drop column ids;
Query:
select *
from tbl
where id_array && array[5]
Output:
| ID | ID_ARRAY |
-----------------
| 2 | 3,5 |
You can also use contains operator: http://www.sqlfiddle.com/#!1/6e18c/6
select *
from tbl
where id_array #> array[5];
I prefer the && syntax though, it directly connotes intersection. It reflects that you are detecting if there's an intersection between two sets(array is a set)
http://www.postgresql.org/docs/8.2/static/functions-array.html
If you store the string representation of your arrays slightly differently, you can cast to array of integer directly:
INSERT INTO mytable
VALUES
(1, '{3,4}')
,(2, '{3,5}')
,(3, '{3}');
SELECT id, ids::int[]
FROM mytable;
Else, you have to put in one more step:
SELECT (translate(ids, '[]','{}'))::int[]
FROM mytable
I would consider making the column an array type to begin with.
Either way, you can find your row like this:
SELECT id, ids
FROM (
SELECT id, ids, unnest(ids::int[]) AS elem
FROM mytable
) x
WHERE elem = 5