I have a very simple table containing 5 columns and the table will only hold 1 record at a time. I'm to generate a JSON string from the record and send it to an endpoint.
This is how the JSON string are to be formatted. As you can see it contains 2 'roots' and this is giving me a hard time getting the correct format
{
"fields": [
{
"fieldName": "Brand",
"values": [
"FORD"
]
},
{
"fieldName": "Engine",
"values": [
"V12"
]
},
{
"fieldName": "Location",
"values": [
"Monaco"
]
}
],
"categories": [
{
"fieldName": "Colour",
"values": [
[
{
"name": "Blue"
}
]
]
},
{
"fieldName": "Interior",
"values": [
[
{
"name": "Red"
}
]
]
}
]
}
This is my table containing the 5 columns
I have managed to create 2 separate SQL queries to get the JSON string. But I can't figure out how do it in one select.
SELECT (
SELECT X.* FROM (
SELECT CASE WHEN CarName IS NOT NULL THEN 'Brand' ELSE NULL END AS fieldName,
CarName AS [value]
FROM [dbo].[JSONBODY]
UNION
SELECT CASE WHEN Engine IS NOT NULL THEN 'Engine' ELSE NULL END AS fieldName,
Engine AS [value]
FROM [dbo].[JSONBODY]
UNION
SELECT CASE WHEN [location] IS NOT NULL THEN 'Location' ELSE NULL END AS fieldName,
[Location] AS [value]
FROM [dbo].[JSONBODY] ) X
FOR JSON PATH, ROOT('fields'))
SELECT (
SELECT Y.* FROM (
SELECT CASE WHEN Colour IS NOT NULL THEN 'Colour' ELSE NULL END AS fieldName,
JSON_QUERY('[["' + Colour + '"]]') AS 'value.name'
FROM [dbo].[JSONBODY]
UNION
SELECT CASE WHEN Interior IS NOT NULL THEN 'Interior' ELSE NULL END AS fieldName,
JSON_QUERY('[["' + Interior + '"]]') AS 'value.name'
FROM [dbo].[JSONBODY]) Y
FOR JSON PATH, ROOT('categories'))
And here are the 2 JSON strings:
{"fields":[{"fieldName":"Brand","value":"Ford"},{"fieldName":"Engine","value":"V6"},{"fieldName":"Location","value":"Boston"}]}
{"categories":[{"fieldName":"Colour","value":{"name":[["Blue"]]}},{"fieldName":"Interior","value":{"name":[["Black"]]}}]}
Question 1:
Is it possible to create the JSON string through a single SQL Select? And how can I do it?
Question 2:
If a column value is NULL it is excluded automatically from the JSON string. But I had to add the fieldName to the select and had hoped it would have exclude it from the JSON string if the corresponding field was NULL. However it creates a {}, in the JSON string. And this is not accepted when calling the endpoint. So is there another way to do it when a column value is NULL? I can of course delete it from the JSON string afterwards....
Hope the above makes sense
To do it as a single SELECT you can just UNION ALL the two results together
You can unpivot the values, then check them afterwards for nulls.
Unfortunately, SQL Server does not have JSON_AGG, so you have to bodge it with STRING_AGG and STRING_ESCAPE
SELECT
v.fieldName,
value = JSON_QUERY('[' + STRING_AGG('"' + STRING_ESCAPE(v.value, 'json') + '"', ',') + ']')
FROM [dbo].[JSONBODY] jb
CROSS APPLY (VALUES
('Brand', jb.Brand),
('Engine', jb.Engine),
('Location', jb.Location)
) v(fieldName, value)
GROUP BY
v.fieldName
FOR JSON PATH, ROOT('fields');
UNION ALL
SELECT
v.fieldName,
[value.name] = JSON_QUERY('[[' + STRING_AGG('"' + STRING_ESCAPE(v.value, 'json') + '"', ',') + ']]')
FROM [dbo].[JSONBODY] jb
CROSS APPLY (VALUES
('Colour', jb.Colour),
('Interior', jb.Interior)
) v(fieldName, value)
GROUP BY
v.fieldName
FOR JSON PATH, ROOT('categories');
If you know you will only ever have one row, you can simplify it by removing the GROUP BY
SELECT (
SELECT
v.fieldName,
value = JSON_QUERY('["' + STRING_ESCAPE(v.value, 'json') + '"]')
FROM [dbo].[JSONBODY] jb
CROSS APPLY (VALUES
('Brand', jb.Brand),
('Engine', jb.Engine),
('Location', jb.Location)
) v(fieldName, value)
WHERE v.value IS NOT NULL
FOR JSON PATH, ROOT('fields')
)
UNION ALL
SELECT (
SELECT
v.fieldName,
[value.name] = JSON_QUERY('[["' + STRING_ESCAPE(v.value, 'json') + '"]]')
FROM [dbo].[JSONBODY] jb
CROSS APPLY (VALUES
('Colour', jb.Colour),
('Interior', jb.Interior)
) v(fieldName, value)
WHERE v.value IS NOT NULL
FOR JSON PATH, ROOT('categories')
);
db<>fiddle
Related
I have a query that selects the rows from joined table as an array using ARRAY_AGG() function.
select
entity_number,
ARRAY_AGG('{"property_id":"'||property_id||'","value":"'||value||'"}') entity_properties from entities
join entity_properties
on entities.id = entity_properties.entity_id
where entities.id in (
select entity_id from entity_properties
where value = '6258006d824a25dabdb39a79.pdf'
)
group by entities.id;
what I get is:
[
{
"entity_number":"P1718238009-1",
"entity_properties":"[
\"{\"property_id\":\"006109cd-a100-437c-a683-f13413b448e6\",\"value\":\"Rozilik berildi\"}\",
\"{\"property_id\":\"010f5e23-d66f-4414-b54b-9647afc6762b\",\"value\":\"6258006d824a25dabdb39a79.pdf\"}\",
\"{\"property_id\":\"0a01904e-1ca0-40ef-bbe1-c90eaddea3fc\",\"value\":\"6260c9e9b06e4c2cc492c470_2634467.pdf\"}\"
]"
}
]
As you can see, it is not json parsable
To parse entity_properties as array of objects I need the data in this format
[
{
"entity_number":"P1718238009-1",
"entity_properties":[
{"property_id":"006109cd-a100-437c-a683-f13413b448e6","value":"Rozilik berildi"},
{"property_id":"010f5e23-d66f-4414-b54b-9647afc6762b","value":"6258006d824a25dabdb39a79.pdf"},
{"property_id":"0a01904e-1ca0-40ef-bbe1-c90eaddea3fc","value":"6260c9e9b06e4c2cc492c470_2634467.pdf"}
]
}
]
Can I achieve what I want with ARRAY_AGG()? How?
If not, what approach should I take?
Try using json_agg and json_build_object function
like this:
select
entity_number,
json_agg(json_build_object('property_id', property_id, 'value', value)) entity_properties from entities
join entity_properties
on entities.id = entity_properties.entity_id
where entities.id in (
select entity_id from entity_properties
where value = '6258006d824a25dabdb39a79.pdf'
)
group by entities.id;
Using a simplified sample data this query provides the first step of the aggregation
with tab as (
select * from (values
(1,'a','x'),
(1,'b','y'),
(2,'c','z')
) tab(entity_number,property_id,value)
)
select
entity_number,
json_agg( json_build_object('property_id', property_id, 'value', value)) entity_properties
from tab
group by 1
;
entity_number|entity_properties |
-------------+----------------------------------------------------------------------------+
1|[{"property_id" : "a", "value" : "x"}, {"property_id" : "b", "value" : "y"}]|
2|[{"property_id" : "c", "value" : "z"}]
Additional aggregation returns the final json array
with tab as (
select * from (values
(1,'a','x'),
(1,'b','y'),
(2,'c','z')
) tab(entity_number,property_id,value)
),
tab2 as (
select
entity_number,
json_agg( json_build_object('property_id', property_id, 'value', value)) entity_properties
from tab
group by 1
)
select
json_agg(
json_build_object(
'entity_number',
entity_number,
'entity_properties',
entity_properties
)
)
from tab2
[
{
"entity_number": 1,
"entity_properties": [
{
"value": "x",
"property_id": "a"
},
{
"value": "y",
"property_id": "b"
}
]
},
{
"entity_number": 2,
"entity_properties": [
{
"value": "z",
"property_id": "c"
}
]
}
]
Note that I used jsonb_pretty to format the output.
I have this column text in a table which contains following string
{
"person": {
"id": "b01d9bf1-998f-4fa8-879a-0f8d0de4b626",
"creationDate": [
2022,
1,
22
],
"modificationDate": [
2022,
1,
27
]
}
}
I have the following regexp_matches query:
select regexp_matches('"creationDate": [2022,1,22], "modificationDate": [2022,1,27],', '\[(.[^)]+)\]', 'g')
but I need to replace
"creationDate": [2022,1,22], "modificationDate": [2022,1,27],
to
"creationDate": "2022-01-22", "modificationDate": "2022-01-27",
I'm not very good working with regular expressions. Also the difficulty is in adding a leading zero to the month as you can see.
Regex-based
A nested regex replacement does the trick:
select regexp_replace(
regexp_replace(
'"creationDate": [2022,1,22], "modificationDate": [2022,1,27],'
, '\[(\d+),(\d+),(\d+)\]'
, '"\1-\2-\3"'
, 'g'
)
, '-(\d)-'
, '-0\1-'
, 'g');
The outer replacement only fires if the month is represented by a single digit.
JSON-based
Dwelling on the comment by #a_horse_with_no_name, the following query operates uses json operators:
select x.key
, (x.value ->> 0) || '-' || LPAD(x.value ->> 1, 2, '0') || '-' || LPAD(x.value ->> 2, 2, '0') mydate
from json_each ( '{"creationDate": [2022,1,22], "modificationDate": [2022,1,27] }'::json ) x
;
The query builds a set of records from a JSON object consisting of a key (the JSON property name) and a value of the native JSON datatype, which happens to be an array. The array elements are extracted, padded with leading zeros where appropriate and concatenated.
See the Postgresql docs for JSON operators and functions for more info.
Full-fledged example
Query to produce a recordset of persons containing their id plus the creation and modification date based on a json array of objects as given in the question.
select id
, ("creationDate" ->> 0) || '-' || LPAD("creationDate" ->> 1, 2, '0') || '-' || LPAD("creationDate" ->> 2, 2, '0') creation_date
, ("modificationDate" ->> 0) || '-' || LPAD("modificationDate" ->> 1, 2, '0') || '-' || LPAD("modificationDate" ->> 2, 2, '0') modification_date
from jsonb_to_recordset (
(
select jsonb_path_query_array ( orig.j, '$.person' ) part
from (
select '[
{ "person": { "id": "b01d9bf1-998f-4fa8-879a-0f8d0de4b626", "creationDate": [2022,1,22], "modificationDate": [2022,1,27] } }
, { "person": { "id": "deadcafe-998f-4fa8-879a-0f8d0de4b626", "creationDate": [2000,1,1], "modificationDate": [2000,12,31] } }
]'::jsonb j
) orig
)
) as x( id varchar(50), "creationDate" json, "modificationDate" json )
;
Available live here (dbfiddle.co.uk).
I have a table that looks like:
ID|FIELD1
1|[ { "list": [ {} ] } ]
2|[ { "list": [ { "item": "" } ] } ]
3|[ { "list": [ { "item": "Tag1" }, { "item": "Tag2" } ] } ]
And I want to get all the tags associated to this specific query such that I can just get a list:
Tag1,Tag2
I've tried
SELECT PARSE_JSON(FIELD1[0]['list'][0]['item']) FROM MY_TABLE
WHERE PARSE_JSON(FIELD1[0]['list'][0]) != '{}'
But I get
JSON: garbage in the numeric literal: 65-310 , pos 7
How can I properly unpack these values in SQL?
UPDATE: Clumsy Solution
SELECT LISTAGG(CODES,'\',\'') AS PROMO_CODES
FROM
(SELECT DISTINCT FIELD1[0]['list'][0]['item'] AS CODES FROM MY_TABLE
WHERE FIELD1[0]['list'][0] IS NOT NULL
AND FIELD1[0]['list'][0] != '{}'
AND FIELD1[0]['list'][0]['item'] != ''
)
Please have a look into below knowledge article, if this helps in your case:
https://community.snowflake.com/s/article/Dynamically-extracting-JSON-using-LATERAL-FLATTEN
As I see, the Clumsy Solution does not provide the correct result. It shows only Tag1. So here's my solution:
select LISTAGG( v.VALUE:item, ',' ) from MY_TABLE,
lateral flatten (parse_json(FIELD1[0]):list) v
WHERE v.VALUE:item <> '';
I would recommend to add DISTINCT to prevent duplicate tags in the output:
select LISTAGG( DISTINCT v.VALUE:item, ',' ) from MY_TABLE,
lateral flatten (parse_json(FIELD1[0]):list) v
WHERE v.VALUE:item <> '';
If there are more items in the FIELD1 array (ie 0,1,2), you may use this one:
select LISTAGG( DISTINCT v.VALUE:item, ',' ) from MY_TABLE,
lateral flatten(FIELD1) f,
lateral flatten (parse_json(f.VALUE):list) v
WHERE v.VALUE:item <> '';
I have the below JSON object. I need to write a select query to get the index values of Object JSON array. Kind of getting the sequence value.
{
"Model": [
{
"ModelName": "Test Model",
"Object": [
{
"ID": 1,
"Name": "ABC",
},
{
"ID": 11,
"Name": "ABCD",
},
{
"ID": 15,
"Name": "ABCDE",
},
]
}]}
Expected Output:
Index_Value
1
2
3
If I understand the question correctly and you want to get the index of the items in the Object JSON array, you need to use OPENJSON() with default schema. The result is a table with columns key, value and type and in case of JSON array, the key column holds the index of each item in the array (0-based):
JSON:
DECLARE #json nvarchar(max) = N'{
"Model":[
{
"ModelName":"Test Model",
"Object":[
{
"ID":1,
"Name":"ABC"
},
{
"ID":11,
"Name":"ABCD"
},
{
"ID":15,
"Name":"ABCDE"
}
]
}
]
}'
Statement:
SELECT CONVERT(int, j2.[key]) + 1 AS item_id
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') j2
But if you want to get the values of the ID keys in the Object JSON array, the statement is different:
SELECT j2.ID
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') WITH (
ID int '$.ID'
) j2
Note, that you need two OPENJSON() calls, because the input JSON has nested array structure. Of course, if Model JSON array has always one item, you may simplify the statement using an appropriate path:
SELECT CONVERT(int, [key]) + 1 AS item_id
FROM OPENJSON (#json, '$.Model[0].Object')
Finally, to get index, ID and Name, you should use the following statement, which assumes, that $.Model JSON array has more than one item and defines ID and Name columns with the appropraite data types:
SELECT
CONVERT(int, j2.[key]) + 1 AS ItemID,
j3.ID, j3.Name
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') j2
CROSS APPLY OPENJSON(j2.[value], '$') WITH (
ID int '$.ID',
Name varchar(50) '$.Name'
) j3
DECLARE #json nvarchar(max) = N'{
"Model":[
{
"ModelName":"Test Model",
"Object":[
{
"ID":1,
"Name":"ABC"
},
{
"ID":11,
"Name":"ABCD"
},
{
"ID":15,
"Name":"ABCDE"
}
]
}
]
}'
declare #i int=0;
SELECT
j2.ID, j2.Name
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value],concat('$.Object[',#i,']')) WITH (
ID i`enter code here`nt '$.ID', Name varchar(100) '$.Name'
) j2
Results:-
ID
Name
11
ABCD
you can select the key columns in select clause no need to mention in with of crossjoin.
SELECT
distinct t.id,
JSON_VALUE(AttsData.[value], '$.address') as address,
JSON_VALUE(AttsData.[value], '$.name') as name,
JSON_VALUE(AttsData.[value], '$.owner_name') as owner_name,
JSON_VALUE(AttsData.[value], '$.project') as project
,CONVERT(int, AttsData.[key]) index_id
FROM mytablewithjsonfeild t
CROSS APPLY OPENJSON (t."jsonfeild",N'$.parentkey') as AttsData
Above query, from the table I have cross joined the JSON field. and in select statement i have taken the specific keys.
and CONVERT(int, AttsData.[key]) to get the index of the elements
I have a json document in a column (record) with a table (TABLE) as below. Need to write a SQL query to bring all occurrences of values of fields "a", "b", 'k" within aaagroup.
Result should be:
NAME1 age1 comment1
NAME2 age2
NAME3 comment3
JSON data:
{
"reportfile": {
"aaa": {
"aaagroup": [{
"a": "NAME1",
"b": "age1",
"k": "comment1"
},
{
"a": "NAME2",
"b": "age2"
},
{
"a": "NAME3",
"k": "comment3"
}]
},
"dsa": {
"dsagroup": [{
"j": "Name"
},
{
"j": "Title"
}]
}
}
}
I used the below query for a single occurrence:
Data:
{"reportfile":{"aaa":{"aaagroup":[{"a":"NAME1","k":"age1}]},"dsa":{"dsagroup":[{"j":"USERNAME"}],"l":"1","m":"1"}}}
Query:
select
substr(cc.BUS_NME, 1, strpos(cc.BUS_NME,'"')-1) as BUS_NME,
substr(cc.AGE, 1, strpos(cc.AGE,'"')-1) as AGE
from
(substr(bb.aaa,strpos(bb.aaa,'"a":"')+5) as BUS_NME,
substr(bb.aaa,strpos(bb.aaa,'"k":"')+5) as AGE
from
(substr(aa.G, strpos(aa.G,'"aaagroup'),strpos(aa.G,'},')) as aaa
from
(select substr(record, strpos(record,'"aaagroup')) as G
from TABLE) aa) bb) cc
ush rani – If I am getting your question correctly, you will have a external table like this and you can try below query to get the desire result from external table
sample external table:
CREATE EXTERNAL TABLE Ext_JSON_data(
reportfile string
)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1'
)
LOCATION
's3://bucket/folder/'
Query to fetch desire result:
WITH the_table AS (
SELECT CAST(social AS MAP(VARCHAR, JSON)) AS social_data
FROM (
VALUES
(JSON '{"aaa": {"aaagroup": [{"a": "NAME1","b": "age1","k": "comment1"},{"a": "NAME2","b": "age2"},{"a": "NAME3","k": "comment3"}]},"dsa": {"dsagroup": [{"j": "Name"},{"j": "Title"}]}}')
) AS t (social)
),
cte_first_level as
(
SELECT
first_level_key
,CAST(first_level_value AS MAP(VARCHAR, JSON))As first_level_value
FROM the_table
CROSS JOIN UNNEST (social_data) AS t (first_level_key, first_level_value)
),
cte_second_level as
(
Select
first_level_key
,SECOND_level_key
,SECOND_level_value
from
cte_first_level
CROSS JOIN UNNEST (first_level_value) AS t (SECOND_level_key, SECOND_level_value)
)
SELECT
first_level_key
,SECOND_level_key
,SECOND_level_value
,items
,items['a'] value_of_a
,items['b'] value_of_b
,items['k'] value_of_k
from
cte_second_level
cross join unnest(cast(json_extract(SECOND_level_value, '$') AS ARRAY<MAP<VARCHAR, VARCHAR>>)) t (items)
Query Output :