SQL to JSON parent/child relationship - sql

I have a table in my Microsoft SQL Server 2017 that looks like this:
+----+-------+----------+-------+-----------+
| ID | Level | ParentID | IsEnd | SomeText |
+----+-------+----------+-------+-----------+
| 1 | 1 | null | 1 | abc |
| 2 | 1 | null | 1 | asd |
| 3 | 2 | 1 | 1 | weqweq |
| 4 | 2 | 1 | 0 | lkjlkje |
| 5 | 3 | 4 | 1 | noonwqe |
| 6 | 3 | 4 | 0 | wet4t4 |
+----+-------+----------+-------+-----------+
And I would like to output a json string:
[{ ID: 1,
SomeText: 'abc',
Child2: [{
ID: 3,
SomeText: 'weqweq'
}, {
ID: 4,
SomeText: 'lkjlkje',
Child3: [{
ID: 5,
SomeText: 'noonwqe'
}, {
ID: 6,
SomeText: 'wet4t4'
}
]}
]
}]
IsEnd is a flag to know where you reached the last level.

You can use a recursive scalar UDF (User Defined Function) that builds the hierarchy starting from the root.
Here is the stub of an UDF you can start from:
create function dbo.udf_create_json_tree(#currentId int)
returns varchar(max)
begin
declare #json nvarchar(max)
declare #id int, #parentId int, #someText varchar(50)
select #id =[ID], #parentId = ParentID, #someText = SomeText
from dbo.tmp
where [ID] = #currentId
set #json =
(
select [ID], SomeText, json_query(dbo.udf_create_json_tree([ID])) as Child
from dbo.tmp
where ParentID = #currentId
for json auto
);
if(#parentId is null)
set #json = concat(
'[{"ID":' + cast (#id as nvarchar(50)) ,
',"SomeText":"' , #someText ,
'","Child":' , cast(#json as nvarchar(max)) ,
'}]'
)
return #json
end
Populate a table with your input values:
create table tmp ([ID] int, [Level] int, ParentID int, IsEnd bit, SomeText varchar(50))
insert into tmp values
(1, 1, null,1, 'abc' )
,(2, 1, null,1, 'asd' )
,(3, 2, 1 ,1, 'weqweq' )
,(4, 2, 1 ,0, 'lkjlkje')
,(5, 3, 4 ,1, 'noonwqe')
,(6, 3, 4 ,0, 'wet4t4' )
Now you can call the UDF on the first node (with ID=1):
select dbo.udf_create_json_tree(1)
Json result:
Formatted json result:
[{
"ID": 1,
"SomeText": "abc",
"Child": [{
"ID": 3,
"SomeText": "weqweq"
},
{
"ID": 4,
"SomeText": "lkjlkje",
"Child": [{
"ID": 5,
"SomeText": "noonwqe"
},
{
"ID": 6,
"SomeText": "wet4t4"
}]
}]
}]
If you really need to name each child node with the level number (Child2, Childx and so on) you'll probably want to implement a replace logic on "Child" string.

Related

Flatten JSON Object in Snowflake

I have a question about using the flatten function in Snowflake. I'm having trouble with extracting data from following path data:performance: of the following JSON-object:
{
"data": {
"metadata": {
"id": "001",
"created_at": "2020-01-01"
},
"performance": {
"2020-01-01": {
"ad_performances": [{
"ad": "XoGKkgcy7V3BDm6m",
"ad_impressions": 1,
"clicks": 0,
"device": "-3",
"total_net_amount": 0
}, {
"ad": "XoGKkgmFlHa3V5xj",
"ad_impressions": 17,
"clicks": 0,
"device": "-4",
"total_net_amount": 0
}, {
"ad": "XoGKkgmFlHa3V5xj",
"ad_impressions": 5,
"clicks": 0,
"device": "-5",
"total_net_amount": 0
}, {
"ad": "XoGKkgcy7V3BDm6m",
"ad_impressions": 19,
"clicks": 0,
"device": "-2",
"total_net_amount": 0
}, {
"ad": "XoGKkgcy7V3BDm6m",
"ad_impressions": 5,
"clicks": 0,
"device": "-1",
"total_net_amount": 0
}]
}
}
}
Desired result is a table with the "date" (2020-01-01), "ad" and "impressions".
I tried to achieve the desired result with:
select
key::date as date
,f.value:performances:ad as performances_array
,f.value:performances:impressions as performances_array
from <table>, lateral flatten (input => CLMN:performances) f;
but I´m not able to extract data from the "performance-array". Can someone help me out?
Thank you!
Can you try this one?
select f.KEY date,
l.VALUE:"ad" as performances_array,
l.VALUE:"impressions" as performances_array
from mydata, lateral flatten (input => CLMN:data.performance ) f,
lateral flatten (input => f.VALUE ) s,
lateral flatten (input => s.VALUE ) l
;
+------------+--------------------+--------------------+
| DATE | PERFORMANCES_ARRAY | PERFORMANCES_ARRAY |
+------------+--------------------+--------------------+
| 2020-01-01 | "XoGKkgcy7V3BDm6m" | 1 |
| 2020-01-01 | "XoGKkgmFlHa3V5xj" | 17 |
| 2020-01-01 | "XoGKkgmFlHa3V5xj" | |
| 2020-01-01 | "XoGKkgcy7V3BDm6m" | 19 |
| 2020-01-01 | "XoGKkgcy7V3BDm6m" | 5 |
+------------+--------------------+--------------------+
Only 2 LATERAL FLATTENs are required to extract the rows
select
a.key::date as ad_date,
b.value:ad::varchar as ad,
b.value:ad_impressions::int as impressions
from j
, lateral flatten(input => v:data:performance) a
, lateral flatten(input => a.value:ad_performances) b;
AD_DATE
AD
IMPRESSIONS
2020-01-01
XoGKkgcy7V3BDm6m
1
2020-01-01
XoGKkgmFlHa3V5xj
17
2020-01-01
XoGKkgmFlHa3V5xj
5
2020-01-01
XoGKkgcy7V3BDm6m
19
2020-01-01
XoGKkgcy7V3BDm6m
5
If you want to aggregate the data by ad date and ad,
with r as
(
select
a.key::date as ad_date,
b.value:ad::varchar as ad,
b.value:ad_impressions::int as impressions
from j
, lateral flatten(input => v:data:performance) a
, lateral flatten(input => a.value:ad_performances) b
)
select ad_date, ad, sum(impressions) as impressions
from r
group by ad_date, ad;
AD_DATE
AD
IMPRESSIONS
2020-01-01
XoGKkgcy7V3BDm6m
25
2020-01-01
XoGKkgmFlHa3V5xj
22

Ability to get the "index" (or ordinal value) for each array entry in BigQuery?

In a data column in BigQuery, I have a JSON object with the structure:
{
"sections": [
{
"secName": "Flintstones",
"fields": [
{ "fldName": "Fred", "age": 55 },
{ "fldName": "Barney", "age": 44 }
]
},
{
"secName": "Jetsons",
"fields": [
{ "fldName": "George", "age": 33 },
{ "fldName": "Elroy", "age": 22 }
]
}
]}
I'm hoping to unnest() and json_extract() to get results that resemble:
id | section_num | section_name | field_num | field_name | field_age
----+--------------+--------------+-----------+------------+-----------
1 | 1 | Flintstones | 1 | Fred | 55
1 | 1 | Flintstones | 2 | Barney | 44
1 | 2 | Jetsons | 1 | George | 33
1 | 2 | Jetsons | 2 | Elroy | 22
So far, I have the query:
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age
FROM `tick8s.test2` AS tbl
LEFT JOIN unnest(json_extract_array(tbl.data, '$.sections')) as curSection
LEFT JOIN unnest(json_extract_array(curSection, '$.fields')) as curField
that yields:
id | section_name | field_name | field_age
----+--------------+------------+-----------
1 | Flintstones | Fred | 55
1 | Flintstones | Barney | 44
1 | Jetsons | George | 33
1 | Jetsons | Elroy | 22
QUESTION: I'm not sure how, if possible, to get the section_num and field_num ordinal positions from their array index values?
(If you are looking to duplicate my results, I have a table named test2 with 2 columns:
id - INTEGER, REQUIRED
data - STRING, NULLABLE
and I insert the data with:
insert into tick8s.test2 values (1,
'{"sections": [' ||
'{' ||
'"secName": "Flintstones",' ||
'"fields": [' ||
'{ "fldName": "Fred", "age": 55 },' ||
'{ "fldName": "Barney", "age": 44 }' ||
']' ||
'},' ||
'{' ||
'"secName": "Jetsons",' ||
'"fields": [' ||
'{ "fldName": "George", "age": 33 },' ||
'{ "fldName": "Elroy", "age": 22 }' ||
']' ||
'}]}'
);
)
Do you just want with offset?
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
n_s,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age,
n_c
FROM `tick8s.test2` tbl LEFT JOIN
unnest(json_extract_array(tbl.data, '$.sections')
) curSection WITH OFFSET n_s LEFT JOIN
unnest(json_extract_array(curSection, '$.fields')
) curField WITH OFFSET n_c;

Kusto query how to iterator each row in a table as parameter to query in another table?

I have two 'PlayersNames' and 'PlayerSpendMondy'
How can I iterator 'PlayersNames' get each PlayerName then get how much money spend on each player?
Does Kusto query support this?
let PlayerName = datatable(name:string)
[
'player1',
'player2',
'player3',
];
let PlayerSpendMoney = datatable(name:string, spendMoney:int)
[
'player1', 1,
'player2', 3,
'player3', 4,
'player1', 1,
'player2', 5,
'player3', 1,
'player3', 1,
]
You could achieve that using the join operator.
For example:
let PlayerName = datatable(name:string)
[
'player1',
'player2',
'player3',
]
;
let PlayerSpendMoney = datatable(name:string, spendMoney:int)
[
'player1', 1,
'player2', 3,
'player3', 4,
'player1', 1,
'player2', 5,
'player3', 1,
'player3', 1,
]
;
PlayerName
| join kind=leftouter (
PlayerSpendMoney
| summarize sum(spendMoney) by name
) on $left.name == $right.name
| project name, sum_spendMoney
| name | sum_spendMoney |
|---------|----------------|
| player1 | 2 |
| player2 | 8 |
| player3 | 6 |

How to return a nested JSON array

I have a table with 3 columns as shown on Pic1.
Pic1:
My goal is to return a nested array as a JSON string:
"[
[7157688981.272619,7290098.188727271,null,null,null,null,null],
[9331221970.409422,-187354647.1071058,2949162.807512622,null,null,null,null],
[11015646818.20274,-482410203.4928556,14040634.58841678,-112035.0684939814,null,null,null],
...
]"
(in this case, 1 array wrapping 6 arrays of 7 floats/nulls each)
I managed to get results shown on Pic2 using this query:
Pic2:
SELECT FF,
[0] AS DO0 ,
[1] AS DO1 ,
[2] AS DO2 ,
[3] AS DO3 ,
[4] AS DO4 ,
[5] AS DO5 ,
[6] AS DO6
FROM (
SELECT TOP 36 FF, DO, Value
FROM MyTable WITH (NOLOCK)
WHERE Id = 100
ORDER BY FF, DO
) AS SourceTable
PIVOT (
MAX(value)
FOR DO IN ([0], [1], [2], [3], [4], [5], [6])
) AS PivotTable
Adding FOR JSON AUTO I get this result, which is the closer I came to the desired result:
"[
{
"FF": 0,
"DO0": 8099996673.580311,
"DO1": -11940671.04204195
},
{
"FF": 1,
"DO0": 10575727464.91492,
"DO1": -233647906.0869318,
"DO2": 3359200.530983179
},
{
"FF": 2,
"DO0": 11693775141.49235,
"DO1": -429492903.0961588,
"DO2": 10721235.27578629,
"DO3": -74363.98732124352
},
...
]"
Could anyone give me some pointers? Thanks a lot.
Edit: Adding sample data:
| FF | D0 | D1 | D2 | D3 | D4 | D5 | D6 |
|:---:|--------:|--------:|-----------:|---------:|--------:|------:|-----:|
| 0 | 809973 | -1191 | NULL | NULL | NULL | NULL | NULL |
| 1 | 1057564 | -23366 | 3359200 | NULL | NULL | NULL | NULL |
| 2 | 1169341 | -42943 | 1079235 | -74363 | NULL | NULL | NULL |
| 3 | 1071256 | -15127 | -7915416 | 362620 | -3310 | NULL | NULL |
| 4 | 1409215 | -153899 | 13408335 | -5336555 | 93451 | -586 | NULL |
| 5 | -328619 | 804878 | -125937545 | 774136 | -226559 | 30247 | -155 |
Edit 2: This is a solution I found:
DECLARE #json NVARCHAR(MAX) = (
SELECT [0] AS D0
, [1] AS D1
, [2] AS D2
, [3] AS D3
, [4] AS D4
, [5] AS D5
, [6] AS D6
FROM (
SELECT TOP 36 FF, DO, Value
FROM MyTable WITH (NOLOCK)
WHERE Id = #Id
ORDER BY FF, DO
) AS SourceTable
pivot
(
MAX(value)
FOR DO IN ([0], [1], [2], [3], [4], [5], [6])
) AS PivotTable for JSON AUTO
);
set #json = (SELECT REPLACE((select #json as json), '{', '['));
set #json = (SELECT REPLACE((select #json as json), '}', ']'));
set #json = (SELECT REPLACE((select #json as json), '"D0":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D1":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D2":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D3":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D4":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D5":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D6":', ''));
RETURN #json;
Result:
[
[
8099996673.580311,
-11940671.04204195
],
[
10575727464.91492,
-233647906.0869318,
3359200.530983179
],
[
11693775141.49235,
-429492903.0961588,
10721235.27578629,
-74363.98732124352
],
[
10712544156.84927,
-151227127.4954886,
-7915416.297312453,
362620.9659495770,
-3310.492070233489
],
[
14092371615.84298,
-1538992059.857372,
134808335.5258479,
-5336555.181154305,
93451.37357258648,
-586.4355493504229
],
[
-3286355419.227318,
8048710298.354312,
-1251937545.910397,
77054136.25657171,
-2226559.780061883,
30247.00533798033,
-155.7244489259102
]
]
Thanks #JeroenMostert for the tip!

Query both sides of a friend relationship in SQL

I have my Postgres table set up as such:
CREATE TABLE "Friendships" (
id integer DEFAULT PRIMARY KEY,
"fromUserId" integer NOT NULL REFERENCES "Users"(id),
"toUserId" integer NOT NULL REFERENCES "Users"(id)
);
When a user fetches their friendships, I run: SELECT * FROM Friendships WHERE fromUserId=XXXX.
How do I modify the query so that additional data is added to the results (true/false) based on whether toUserId also added that user?
Example result:
[
{ id: 444, fromUserId: 1, toUserId: 22, addedBack: false },
{ id: 445, fromUserId: 1, toUserId: 67, addedBack: true },
{ id: 446, fromUserId: 1, toUserId: 599, addedBack: true },
{ id: 447, fromUserId: 1, toUserId: 733, addedBack: false },
]
With EXISTS:
select f.*,
exists (
select 0 from "Friendships"
where "toUserId" = f."fromUserId" and "fromUserId" = f."toUserId"
) addedBack
from "Friendships" f
where f."fromUserId" = 1
For this sample data:
INSERT INTO "Friendships"(id, "fromUserId", "toUserId") VALUES
(444, 1, 22), (445, 1, 67), (446, 1, 599), (447, 1, 733),
(448, 67, 1), (449, 599, 1);
Results:
> id | fromUserId | toUserId | addedback
> --: | ---------: | -------: | :--------
> 444 | 1 | 22 | f
> 445 | 1 | 67 | t
> 446 | 1 | 599 | t
> 447 | 1 | 733 | f
See the demo.
You can use a left outer join to check for the reciprocate value:
select
f.id,
f.fromuserid,
f.touserid,
case when r.id is null then false else true end as addedback
from friendships f
left join friendships r on f.touserid = r.fromuserid
and r.touserid = f.fromuserid
where f.fromuserid = XXXX