SQL script to form the nested JSON as a output - sql

Consider I have below data,
create table #Temp(PropertyID nvarchar(255),BuildingID nvarchar(255),UnitID nvarchar(255),TenantName nvarchar(255),FieldName nvarchar(255),CurrentValue nvarchar(255),PreviousValue nvarchar(255))
insert into #Temp Values
('p1','B1','5','Spencer','Lease_EndDate','01/01/2021','03/01/2021'),
('p1','B1','5','Spencer','MonthlyBaseRent','3232','3000'),
('p1','B1','5','BCR','MonthlyBaseRent','1000','1100'),
('p1','B1','6','EA','MonthlyBaseRent','5000','5100'),
('p1','B2','5','VR','MonthlyBaseRent','3232','3000'),*
I need output as below nested JSON format, but I am getting flat JSON like [{},{},{}]
[
{
"PropertyID": "p1",
"Building": [
{
"BuildingID": "B1",
"Unit": [
{
"UnitID": "5",
"Tenant": [
{
"TenantName": "Spencer",
"Lease_EndDate": {
"CurrentValue": "01/01/2021",
"PreviousValue": "03/01/2021"
},
"MonthlyBaseRent": {
"CurrentValue": "3232",
"PreviousValue": "3000"
}
},
{
"TenantName": "BCR",
"MonthlyBaseRent": {
"CurrentValue": "1000",
"PreviousValue": "1100"
}
}
]
},
{
"UnitID": "6",
"Tenant": [
{
"TenantName": "EA",
"MonthlyBaseRent": {
"CurrentValue": "5000",
"PreviousValue": "5100"
}
}
]
}
]
},
{
"BuildingID": "B2",
"Unit": [
{
"UnitID": "5",
"Tenant": [
{
"TenantName": "VR",
"MonthlyBaseRent": {
"CurrentValue": "3232",
"PreviousValue": "3000"
}
}
]
}
]
}
]
}
]

This is an example of nested JSON, 2 levels. Use this template to build as much levels as needed.
select PropertyID ,BuildingID,
(select UnitID ,
(select t1.TenantName, t1.FieldName, t1.CurrentValue,t1.PreviousValue
from #Temp t1
where t1.PropertyID = t2.PropertyID and t1.BuildingID = t2.BuildingID and t1.UnitID = t2.UnitID
for json path) Tenant
from #Temp t2
where t2.PropertyID = t3.PropertyID and t2.BuildingID = t3.BuildingID
group by UnitID, PropertyID ,BuildingID
for json path) Unit
from #Temp t3
group by PropertyID ,BuildingID
for json path

Related

Query item in nested array

Customer appointments with top level locationId sample data set:
[
{
"locationId": 9999,
"customerAppointments": [
{
"customerId": "1",
"appointments": [
{
"appointmentId": "cbbce566-da59-42c2-8845-53976ba63d56",
"locationName": "Sullivan St"
},
{
"appointmentId": "5f09e2af-ddae-47aa-9f7c-fd1001a9c5e6",
"locationName": "Oak St"
}
]
},
{
"customerId": "2",
"appointments": [
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
},
{
"customerId": "3",
"appointments": []
}
]
},
{
...
},
{
...
}
]
I need to pull out appointment by locationId and customerId and only get the appointment for that customerId e.g
Sample response:
[
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
Tried below query, but it just returns all records for all customers ids (which is kind of expected):
db.getCollection("appointments").find(
{
"locationId" : NumberInt(9999),
"customerAppointments" : {
"$elemMatch" : {
"customerId" : "2"
}
}
}
);
But how can I get just the appointment record for a specific customerId?
When asking this question I was unaware of the older version of MongoDB driver (< v5) so we cannot use the $getField operator.
However, this query seems to work well:
db.getCollection("appointments").aggregate([
{
$match: {
"locationId": NumberInt(9999)
}
},
{
$unwind: "$customerAppointments"
},
{
$match: {
"customerAppointments.customerId": "2"
}
},
{
$project: {
appointments: "$customerAppointments.appointments"
}
}
]);
Yields:
{
"_id" : ObjectId("63eebe95c7a0da54804c1db2"),
"appointments" : [
{
"appointmentId" : "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName" : "Kellet St"
}
]
}

Parse Google API JSON file to rows and columns with OPENJSON in T-SQL

So I am trying to create a query than can handle a json file that we get with a data factory web request from the Google Analytics API 4 and store the result in an Azure sql table. The following query is the closest I got.
The dimension and metric headers seems to be column names and the values in the rows part should be the rows.
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
DECLARE #jsonexample2 NVARCHAR(MAX) = (SELECT [value] FROM OPENJSON(#jsonexample) where [key]= 'rows' )
SELECT *
from OPENJSON(#jsonexample2)
This blog seemed to have a good explanation but I still not got it working.
https://levelup.gitconnected.com/how-to-easily-parse-and-transform-json-in-sql-server-c0b091a964de
You can shred it down to something like this:
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
;with cols as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.dimensionHeaders') x
)
, metrics as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.metricHeaders') x
)
select CAST(x.[key] AS INT) AS id, c.v AS dimName, JSON_VALUE(dim.value, '$.value') AS dimValue
, m.v AS metName, JSON_VALUE(metr.value, '$.value') AS metValue
from openjson(#jsonexample, '$.rows') x
cross apply openjson(x.value, '$.dimensionValues') dim
cross apply openjson(x.value, '$.metricValues') metr
inner join cols c
ON c.k = dim.[key]
inner join metrics m
ON m.k = metr.[key]
Then you can probably figure out the rest.
Here is a bit code that will dynamically parse the metrics and dimension for Google Analytics. It should give you a good starting point :)
SELECT TOP 1 #json = JSON_QUERY(RawJson, '$.reports[0].columnHeader')
FROM TableName
SET #WithClause =
(
SELECT STRING_AGG(Line, ',')
FROM
(
SELECT REPLACE(r.value, 'ga:', '')+' '+CASE
WHEN r.value = 'ga:DATE' THEN 'DATE'
ELSE 'NVARCHAR(255)'
END+' '+'''$.dimensions['+r.[key]+']''' AS Line
FROM OPENJSON(#json, '$.dimensions') AS r
UNION ALL
SELECT REPLACE(JSON_VALUE(r.value, '$.name'), 'ga:', '')+' '+CASE
WHEN JSON_VALUE(r.value, '$.type') = 'TIME' THEN 'FLOAT'
WHEN JSON_VALUE(r.value, '$.type') = 'CURRENCY' THEN 'DECIMAL(9,2)'
ELSE JSON_VALUE(r.value, '$.type')
END+' '+'''$.metrics[0].values['+r.[key]+']'''
FROM OPENJSON(#json, '$.metricHeader.metricHeaderEntries') AS r
) AS a
);
SET #Query = '
SELECT d.*
INTO #temp_table
FROM TableNAme AS cm
CROSS APPLY OPENJSON(RawJson, ''$.reports[0].data.rows'') WITH ( '+#WithClause+ ') AS d';
--PRINT #Query;
EXECUTE (#Query);

AWS IoT rule sql select statement

I am trying to write a SQL select statement for my AWS IoT rule to extract the values 'gateway_id and 'rssi' from the following MQTT message:
{
"end_device_ids": {
"device_id": "imd2",
"application_ids": {
"application_id": "pennal"
},
"dev_eui": "004E3A0DF76DC9E9",
"join_eui": "70B3D57ED003CBE8",
"dev_addr": "260BA9D0"
},
"correlation_ids": [
"as:up:01G30W0J4D65P6D50QH1DN3ZQP",
"gs:conn:01G2ZZ7FT9BH6J93WRYS4ATVDM",
"gs:up:host:01G2ZZ7FTN14103H90QN71Q557",
"gs:uplink:01G30W0HXWMES1Z7X7F2MCFMPF",
"ns:uplink:01G30W0HXXJM5PNGJAD0W01GGH",
"rpc:/ttn.lorawan.v3.GsNs/HandleUplink:01G30W0HXWFR3HNGBZS7XJV15E",
"rpc:/ttn.lorawan.v3.NsAs/HandleUplink:01G30W0J4D18JZW199EM8WERGR"
],
"received_at": "2022-05-14T08:47:25.837680984Z",
"uplink_message": {
"session_key_id": "AYBlRLSz9n83bW3WU3+GfQ==",
"f_port": 1,
"f_cnt": 5013,
"frm_payload": "DiAAAA==",
"decoded_payload": {
"rainmm": 0,
"voltage": 3.616
},
"rx_metadata": [
{
"gateway_ids": {
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
},
"time": "2022-05-14T08:47:25.065794944Z",
"timestamp": 114306297,
"rssi": -126,
"channel_rssi": -126,
"snr": -8.25,
"uplink_token": "ChgKFgoKcGVubmFsLWd3MhIIrB8J//4FfsYQ+dnANhoMCJ3Z/ZMGEOPmv6sCIKjZvump7gYqCwid2f2TBhCA568f"
}
],
"settings": {
"data_rate": {
"lora": {
"bandwidth": 125000,
"spreading_factor": 11
}
},
"coding_rate": "4/5",
"frequency": "868100000",
"timestamp": 114306297,
"time": "2022-05-14T08:47:25.065794944Z"
},
"received_at": "2022-05-14T08:47:25.629041670Z",
"confirmed": true,
"consumed_airtime": "0.659456s",
"version_ids": {
"brand_id": "heltec",
"model_id": "cubecell-dev-board-class-a-otaa",
"hardware_version": "_unknown_hw_version_",
"firmware_version": "1.0",
"band_id": "EU_863_870"
},
"network_ids": {
"net_id": "000013",
"tenant_id": "ttn",
"cluster_id": "eu1",
"cluster_address": "eu1.cloud.thethings.network"
}
}
}
I have tried following the documentation here: AWS Documentation but am struggling with the nested part of the message.
my SQL statement at the moment is:
SELECT received_at as datetime, end_device_ids.device_id as device_id,
uplink_message.decoded_payload.rainmm as rainmm, uplink_message.decoded_payload.voltage as
voltage, uplink_message.settings.data_rate.lora.spreading_factor as sprfact,
uplink_message.consumed_airtime as time_on_air ,uplink_message.settings.timestamp as ts,
uplink_message.rx_metadata as rx,(select value gateway_ids from uplink_message.rx_metadata) as gw,
(select value rssi from uplink_message.rx_metadata)as rssi, get((select gateway_id from
uplink_message.rx_metadata),0).gateway_id as gwn FROM 'thethings/lorawan/matt-pennal-ire/uplink'
which returns
{
"datetime": "2022-05-15T12:19:11.947844474Z",
"device_id": "md4",
"rainmm": 5.842001296924288,
"voltage": 3.352,
"sprfact": 8,
"time_on_air": "0.092672s",
"ts": 3262497863,
"rx": [
{
"gateway_ids": {
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
},
"time": "2022-05-15T12:19:11.178463935Z",
"timestamp": 3262497863,
"rssi": -125,
"channel_rssi": -125,
"snr": -7.5,
"uplink_token": "ChgKFgoKcGVubmFsLWd3MhIIrB8J//4FfsYQx4jXkwwaDAi/34OUBhCCy9XhAiDY6prg+ckHKgsIv9+DlAYQv8mMVQ=="
}
],
"gw": [
{
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
}
],
"rssi": [
-125
]
}
but I would like it to return
{
"datetime": "2022-05-15T12:19:11.947844474Z",
"device_id": "md4",
"rainmm": 5.842001296924288,
"voltage": 3.352,
"sprfact": 8,
"time_on_air": "0.092672s",
"ts": 3262497863,
"gwn":"pennal_gw2"
"rssi":-126
}
Any help to get the values from the nested array would be greatly appreciated!

BigQuery concat nested array json

I have data that looks like
{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}
The output I want is concat all the values in the nested json array
price, "20003, 30075, 40060"
I tried some queries but failed to get the correct output.
You can use JSON_EXTRACT_ARRAY and ARRAY_TO_STRING:
WITH test_json AS (
SELECT
'''{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}''' AS json_string
),
values_concatenated AS (
SELECT ARRAY_TO_STRING(
ARRAY(
SELECT JSON_VALUE(json_values, '$.value')
FROM UNNEST((SELECT JSON_EXTRACT_ARRAY(json_string, '$.Attributes[0].values') AS json_values FROM test_json)) as json_values
),
', '
) as values
)
SELECT
(select json_value(json_string, '$.Attributes[0].name') from test_json),
(select values from values_concatenated)

How can I define jsonPath for given json?

{
"name": "ninja",
"contry": "India",
"Account": [
{
"id": "123",
"orgId": 223,
"investment": [
{
"invetmentId": "111",
"name": "India tech",
"performance": [
{
"id": "123",
"performanceSet": [
{
"amount": "231",
"currency": "USD"
},
{
"amount": "250",
"currency": "IND"
}
]
}
]
}
]
}
]
}
So I have to select the amount where the currency is USD?
And I tried it as "$.Account..investment.performance..performanceSet.amount[?(#.currency=~/.*USD/)]"
This JsonPath should work:
$..performanceSet[?(#.currency == "USD")].amount
Tested on:
{
"name":"ninja",
"contry":"India",
"Account":[
{
"id":"123",
"orgId":223,
"investment":[
{
"invetmentId":"111",
"name":"India tech",
"performance":[
{
"id":"123",
"performanceSet":[
{
"amount":"231",
"currency":"USD"
},
{
"amount":"250",
"currency":"IND"
}
]
}
]
},
{
"invetmentId":"112",
"name":"India tech 2",
"performance":[
{
"id":"124",
"performanceSet":[
{
"amount":"235",
"currency":"USD"
},
{
"amount":"250",
"currency":"IND"
}
]
}
]
}
]
}
]
}
which returns:
[
"231",
"235"
]
A good way to try it out is this site: https://jsonpath.com/
Read the docs: https://github.com/intuit/karate#jsonpath-filters
* def temp = $..performanceSet[?(#.currency=='USD')]
* match temp[0].amount == '231'
You can try it this way
$.Account..investment.performance..performanceSet.amount[?(#.currency=~/.*USD/)]