How to return a nested JSON array - sql

I have a table with 3 columns as shown on Pic1.
Pic1:
My goal is to return a nested array as a JSON string:
"[
[7157688981.272619,7290098.188727271,null,null,null,null,null],
[9331221970.409422,-187354647.1071058,2949162.807512622,null,null,null,null],
[11015646818.20274,-482410203.4928556,14040634.58841678,-112035.0684939814,null,null,null],
...
]"
(in this case, 1 array wrapping 6 arrays of 7 floats/nulls each)
I managed to get results shown on Pic2 using this query:
Pic2:
SELECT FF,
[0] AS DO0 ,
[1] AS DO1 ,
[2] AS DO2 ,
[3] AS DO3 ,
[4] AS DO4 ,
[5] AS DO5 ,
[6] AS DO6
FROM (
SELECT TOP 36 FF, DO, Value
FROM MyTable WITH (NOLOCK)
WHERE Id = 100
ORDER BY FF, DO
) AS SourceTable
PIVOT (
MAX(value)
FOR DO IN ([0], [1], [2], [3], [4], [5], [6])
) AS PivotTable
Adding FOR JSON AUTO I get this result, which is the closer I came to the desired result:
"[
{
"FF": 0,
"DO0": 8099996673.580311,
"DO1": -11940671.04204195
},
{
"FF": 1,
"DO0": 10575727464.91492,
"DO1": -233647906.0869318,
"DO2": 3359200.530983179
},
{
"FF": 2,
"DO0": 11693775141.49235,
"DO1": -429492903.0961588,
"DO2": 10721235.27578629,
"DO3": -74363.98732124352
},
...
]"
Could anyone give me some pointers? Thanks a lot.
Edit: Adding sample data:
| FF | D0 | D1 | D2 | D3 | D4 | D5 | D6 |
|:---:|--------:|--------:|-----------:|---------:|--------:|------:|-----:|
| 0 | 809973 | -1191 | NULL | NULL | NULL | NULL | NULL |
| 1 | 1057564 | -23366 | 3359200 | NULL | NULL | NULL | NULL |
| 2 | 1169341 | -42943 | 1079235 | -74363 | NULL | NULL | NULL |
| 3 | 1071256 | -15127 | -7915416 | 362620 | -3310 | NULL | NULL |
| 4 | 1409215 | -153899 | 13408335 | -5336555 | 93451 | -586 | NULL |
| 5 | -328619 | 804878 | -125937545 | 774136 | -226559 | 30247 | -155 |

Edit 2: This is a solution I found:
DECLARE #json NVARCHAR(MAX) = (
SELECT [0] AS D0
, [1] AS D1
, [2] AS D2
, [3] AS D3
, [4] AS D4
, [5] AS D5
, [6] AS D6
FROM (
SELECT TOP 36 FF, DO, Value
FROM MyTable WITH (NOLOCK)
WHERE Id = #Id
ORDER BY FF, DO
) AS SourceTable
pivot
(
MAX(value)
FOR DO IN ([0], [1], [2], [3], [4], [5], [6])
) AS PivotTable for JSON AUTO
);
set #json = (SELECT REPLACE((select #json as json), '{', '['));
set #json = (SELECT REPLACE((select #json as json), '}', ']'));
set #json = (SELECT REPLACE((select #json as json), '"D0":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D1":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D2":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D3":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D4":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D5":', ''));
set #json = (SELECT REPLACE((select #json as json), '"D6":', ''));
RETURN #json;
Result:
[
[
8099996673.580311,
-11940671.04204195
],
[
10575727464.91492,
-233647906.0869318,
3359200.530983179
],
[
11693775141.49235,
-429492903.0961588,
10721235.27578629,
-74363.98732124352
],
[
10712544156.84927,
-151227127.4954886,
-7915416.297312453,
362620.9659495770,
-3310.492070233489
],
[
14092371615.84298,
-1538992059.857372,
134808335.5258479,
-5336555.181154305,
93451.37357258648,
-586.4355493504229
],
[
-3286355419.227318,
8048710298.354312,
-1251937545.910397,
77054136.25657171,
-2226559.780061883,
30247.00533798033,
-155.7244489259102
]
]
Thanks #JeroenMostert for the tip!

Related

Swap values between columns based on third column

I have a table like this:
src_id | src_source | dst_id | dst_source | metadata
--------------------------------------------------------
123 | A | 345 | B | some_string
234 | B | 567 | A | some_other_string
498 | A | 432 | A | another_one # this line should be ignored
765 | B | 890 | B | another_one # this line should be ignored
What I would like is:
A_id | B_id | metadata
-----------------------
123 | 345 | some string
567 | 234 | some_other_string
Here's the data to replicate:
data = [
("123", "A", "345", "B", "some_string"),
("234", "B", "567", "A", "some_other_string"),
("498", "A", "432", "A", "another_one"),
("765", "B", "890", "B", "another_two"),
]
cols = ["src_id", "src_source", "dst_id", "dst_source", "metadata"]
df = spark.createDataFrame(data).toDF(*cols)
I am a bit confused as to how to do this - I got to here:
output = (
df
.filter(F.col("src_source") != F.col("dst_source"))
.withColumn("A_id",
F.when(F.col("src_source") == "A", F.col("src_id")))
.withColumn("B_id",
F.when(F.col("src_source") == "B", F.col("src_id")))
)
I think i figured it out - I need to split the df and union again!
ab_df = (
df
.filter(F.col("src_source") != F.col("dst_source"))
.filter((F.col("src_source") == "A") & (F.col("dst_source") == "B"))
.select(F.col("src_id").alias("A_id"),
F.col("dst_id").alias("B_id"),
"metadata")
)
ba_df = (
df
.filter(F.col("src_source") != F.col("dst_source"))
.filter((F.col("src_source") == "B") & (F.col("dst_source") == "A"))
.select(F.col("src_id").alias("B_id"),
F.col("dst_id").alias("A_id"),
"metadata")
)
all = ab_df.unionByName(ba_df)
You can do it without union, just in one select, without the need to write the same filter twice.
output = (
df
.filter(F.col("src_source") != F.col("dst_source"))
.select(
F.when(F.col("src_source") == "A", F.col("src_id")).otherwise(F.col("dst_id")).alias("A_id"),
F.when(F.col("src_source") == "A", F.col("dst_id")).otherwise(F.col("src_id")).alias("B_id"),
"metadata"
)
)
output.show()
# +----+----+-----------------+
# |A_id|B_id| metadata|
# +----+----+-----------------+
# | 123| 345| some_string|
# | 567| 234|some_other_string|
# +----+----+-----------------+

Ability to get the "index" (or ordinal value) for each array entry in BigQuery?

In a data column in BigQuery, I have a JSON object with the structure:
{
"sections": [
{
"secName": "Flintstones",
"fields": [
{ "fldName": "Fred", "age": 55 },
{ "fldName": "Barney", "age": 44 }
]
},
{
"secName": "Jetsons",
"fields": [
{ "fldName": "George", "age": 33 },
{ "fldName": "Elroy", "age": 22 }
]
}
]}
I'm hoping to unnest() and json_extract() to get results that resemble:
id | section_num | section_name | field_num | field_name | field_age
----+--------------+--------------+-----------+------------+-----------
1 | 1 | Flintstones | 1 | Fred | 55
1 | 1 | Flintstones | 2 | Barney | 44
1 | 2 | Jetsons | 1 | George | 33
1 | 2 | Jetsons | 2 | Elroy | 22
So far, I have the query:
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age
FROM `tick8s.test2` AS tbl
LEFT JOIN unnest(json_extract_array(tbl.data, '$.sections')) as curSection
LEFT JOIN unnest(json_extract_array(curSection, '$.fields')) as curField
that yields:
id | section_name | field_name | field_age
----+--------------+------------+-----------
1 | Flintstones | Fred | 55
1 | Flintstones | Barney | 44
1 | Jetsons | George | 33
1 | Jetsons | Elroy | 22
QUESTION: I'm not sure how, if possible, to get the section_num and field_num ordinal positions from their array index values?
(If you are looking to duplicate my results, I have a table named test2 with 2 columns:
id - INTEGER, REQUIRED
data - STRING, NULLABLE
and I insert the data with:
insert into tick8s.test2 values (1,
'{"sections": [' ||
'{' ||
'"secName": "Flintstones",' ||
'"fields": [' ||
'{ "fldName": "Fred", "age": 55 },' ||
'{ "fldName": "Barney", "age": 44 }' ||
']' ||
'},' ||
'{' ||
'"secName": "Jetsons",' ||
'"fields": [' ||
'{ "fldName": "George", "age": 33 },' ||
'{ "fldName": "Elroy", "age": 22 }' ||
']' ||
'}]}'
);
)
Do you just want with offset?
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
n_s,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age,
n_c
FROM `tick8s.test2` tbl LEFT JOIN
unnest(json_extract_array(tbl.data, '$.sections')
) curSection WITH OFFSET n_s LEFT JOIN
unnest(json_extract_array(curSection, '$.fields')
) curField WITH OFFSET n_c;

Query both sides of a friend relationship in SQL

I have my Postgres table set up as such:
CREATE TABLE "Friendships" (
id integer DEFAULT PRIMARY KEY,
"fromUserId" integer NOT NULL REFERENCES "Users"(id),
"toUserId" integer NOT NULL REFERENCES "Users"(id)
);
When a user fetches their friendships, I run: SELECT * FROM Friendships WHERE fromUserId=XXXX.
How do I modify the query so that additional data is added to the results (true/false) based on whether toUserId also added that user?
Example result:
[
{ id: 444, fromUserId: 1, toUserId: 22, addedBack: false },
{ id: 445, fromUserId: 1, toUserId: 67, addedBack: true },
{ id: 446, fromUserId: 1, toUserId: 599, addedBack: true },
{ id: 447, fromUserId: 1, toUserId: 733, addedBack: false },
]
With EXISTS:
select f.*,
exists (
select 0 from "Friendships"
where "toUserId" = f."fromUserId" and "fromUserId" = f."toUserId"
) addedBack
from "Friendships" f
where f."fromUserId" = 1
For this sample data:
INSERT INTO "Friendships"(id, "fromUserId", "toUserId") VALUES
(444, 1, 22), (445, 1, 67), (446, 1, 599), (447, 1, 733),
(448, 67, 1), (449, 599, 1);
Results:
> id | fromUserId | toUserId | addedback
> --: | ---------: | -------: | :--------
> 444 | 1 | 22 | f
> 445 | 1 | 67 | t
> 446 | 1 | 599 | t
> 447 | 1 | 733 | f
See the demo.
You can use a left outer join to check for the reciprocate value:
select
f.id,
f.fromuserid,
f.touserid,
case when r.id is null then false else true end as addedback
from friendships f
left join friendships r on f.touserid = r.fromuserid
and r.touserid = f.fromuserid
where f.fromuserid = XXXX

SQL to JSON parent/child relationship

I have a table in my Microsoft SQL Server 2017 that looks like this:
+----+-------+----------+-------+-----------+
| ID | Level | ParentID | IsEnd | SomeText |
+----+-------+----------+-------+-----------+
| 1 | 1 | null | 1 | abc |
| 2 | 1 | null | 1 | asd |
| 3 | 2 | 1 | 1 | weqweq |
| 4 | 2 | 1 | 0 | lkjlkje |
| 5 | 3 | 4 | 1 | noonwqe |
| 6 | 3 | 4 | 0 | wet4t4 |
+----+-------+----------+-------+-----------+
And I would like to output a json string:
[{ ID: 1,
SomeText: 'abc',
Child2: [{
ID: 3,
SomeText: 'weqweq'
}, {
ID: 4,
SomeText: 'lkjlkje',
Child3: [{
ID: 5,
SomeText: 'noonwqe'
}, {
ID: 6,
SomeText: 'wet4t4'
}
]}
]
}]
IsEnd is a flag to know where you reached the last level.
You can use a recursive scalar UDF (User Defined Function) that builds the hierarchy starting from the root.
Here is the stub of an UDF you can start from:
create function dbo.udf_create_json_tree(#currentId int)
returns varchar(max)
begin
declare #json nvarchar(max)
declare #id int, #parentId int, #someText varchar(50)
select #id =[ID], #parentId = ParentID, #someText = SomeText
from dbo.tmp
where [ID] = #currentId
set #json =
(
select [ID], SomeText, json_query(dbo.udf_create_json_tree([ID])) as Child
from dbo.tmp
where ParentID = #currentId
for json auto
);
if(#parentId is null)
set #json = concat(
'[{"ID":' + cast (#id as nvarchar(50)) ,
',"SomeText":"' , #someText ,
'","Child":' , cast(#json as nvarchar(max)) ,
'}]'
)
return #json
end
Populate a table with your input values:
create table tmp ([ID] int, [Level] int, ParentID int, IsEnd bit, SomeText varchar(50))
insert into tmp values
(1, 1, null,1, 'abc' )
,(2, 1, null,1, 'asd' )
,(3, 2, 1 ,1, 'weqweq' )
,(4, 2, 1 ,0, 'lkjlkje')
,(5, 3, 4 ,1, 'noonwqe')
,(6, 3, 4 ,0, 'wet4t4' )
Now you can call the UDF on the first node (with ID=1):
select dbo.udf_create_json_tree(1)
Json result:
Formatted json result:
[{
"ID": 1,
"SomeText": "abc",
"Child": [{
"ID": 3,
"SomeText": "weqweq"
},
{
"ID": 4,
"SomeText": "lkjlkje",
"Child": [{
"ID": 5,
"SomeText": "noonwqe"
},
{
"ID": 6,
"SomeText": "wet4t4"
}]
}]
}]
If you really need to name each child node with the level number (Child2, Childx and so on) you'll probably want to implement a replace logic on "Child" string.

split a string in a column and pivot in sql

I am using sql server 2014 and I have a table like this (##tt_allresults)
ID | Area | Event |
1 | FB1 | Dev_Chg, Old Value: 0, New Value: 50, Workstation: Blah1, Function: Blah1 func |
1 | FB2 | Dev_Chg, Old Value: 99, New Value: 5, Workstation: Blah2, Function: Blah2 func |
1 | FB1 | Dev_Chg, Old Value: 50, New Value: 55, Workstation: Blah1, Function: Blah1 func |
I would like to from a table like so (Expected Output)
Area | Old Value | New Value | Function |
FB1 | 0 | 50 | Blah1 func |
FB2 | 99 | 5 | Blah2 func |
FB1 | 50 | 55 | Blah1 func |
This is what i have tried so far
Declare #id int
WHILE EXISTS(SELECT * FROM ##tt_allresults)
BEGIN
Select Top 1 #id = Id from ##tt_allresults
-- Do the work --
Declare #area nvarchar(100)
set #area = (Select Area from ##tt_allresults where id = #id)
Insert into ##tt_changedetails
select #area, * from fnsplit((Select [event] from ##tt_allresults where id = #id),',')
-- Scrap the ID and Move On --
Delete ##tt_allresults where ID = #id
END
select * from ##tt_changedetails
I get the following result
Area | ChangeDetails |
FB1 | Dev_Chg |
FB1 | Old value :0 |
FB1 | New Value :50 |
FB1 | Workstation :blah1 |
FB1 | Function :blah1 func |
FB2 | Dev_Chg |
FB2 | Old value :99 |
FB2 | New Value :5 |
FB2 | Workstation :blah2 |
FB2 | Function :blah2 func |
FB1 | Dev_Chg |
FB1 | Old value :50 |
FB1 | New Value :55 |
FB1 | Workstation :blah1 |
FB1 | Function :blah1 func |
How do i split my initial table and pivot it based on the split. I want to see the following result
FB1 | 0 | 50 | Blah1 func |
FB2 | 99 | 5 | Blah2 func |
FB1 | 50 | 55 | Blah1 func |
No need for a UDF, can all be done with the help of a CROSS APPLY and an a little XML
You can expand or contract as needed. I left 9 Positions to illustrate
1) Without a Function
Declare #YourTable table (ID int,Area varchar(25),Event varchar(500))
Insert Into #YourTable values
(1,'FB1','Dev_Chg, Old Value: 0, New Value: 50, Workstation: Blah1, Function: Blah1 func'),
(1,'FB2','Dev_Chg, Old Value: 99, New Value: 5, Workstation: Blah2, Function: Blah2 func'),
(1,'FB1','Dev_Chg, Old Value: 50, New Value: 55, Workstation: Blah1, Function: Blah1 func')
Select A.Area
,[Old Value] = Substring(Pos2,CharIndex(':',Pos2)+1,Len(Pos2))
,[New Value] = Substring(Pos3,CharIndex(':',Pos3)+1,Len(Pos3))
,[Function] = Substring(Pos5,CharIndex(':',Pos5)+1,Len(Pos5))
From #YourTable A
Cross Apply (
Select Pos1 = ltrim(rtrim(xDim.value('/x[1]','varchar(max)')))
,Pos2 = ltrim(rtrim(xDim.value('/x[2]','varchar(max)')))
,Pos3 = ltrim(rtrim(xDim.value('/x[3]','varchar(max)')))
,Pos4 = ltrim(rtrim(xDim.value('/x[4]','varchar(max)')))
,Pos5 = ltrim(rtrim(xDim.value('/x[5]','varchar(max)')))
,Pos6 = ltrim(rtrim(xDim.value('/x[6]','varchar(max)')))
,Pos7 = ltrim(rtrim(xDim.value('/x[7]','varchar(max)')))
,Pos8 = ltrim(rtrim(xDim.value('/x[8]','varchar(max)')))
,Pos9 = ltrim(rtrim(xDim.value('/x[9]','varchar(max)')))
From (Select Cast('<x>' + replace((Select A.Event as [*] For XML Path('')),',','</x><x>')+'</x>' as xml) as xDim) as A
) B
Returns
Area Old Value New Value Function
FB1 0 50 Blah1 func
FB2 99 5 Blah2 func
FB1 50 55 Blah1 func
Or 2 With A Function
Select A.Area
,[Old Value] = Substring(Pos2,CharIndex(':',Pos2)+1,Len(Pos2))
,[New Value] = Substring(Pos3,CharIndex(':',Pos3)+1,Len(Pos3))
,[Function] = Substring(Pos5,CharIndex(':',Pos5)+1,Len(Pos5))
From #YourTable A
Cross Apply [dbo].[udf-Str-Parse-Row](A.Event,',') B
The UDF if needed
ALTER FUNCTION [dbo].[udf-Str-Parse-Row] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select Pos1 = ltrim(rtrim(xDim.value('/x[1]','varchar(max)')))
,Pos2 = ltrim(rtrim(xDim.value('/x[2]','varchar(max)')))
,Pos3 = ltrim(rtrim(xDim.value('/x[3]','varchar(max)')))
,Pos4 = ltrim(rtrim(xDim.value('/x[4]','varchar(max)')))
,Pos5 = ltrim(rtrim(xDim.value('/x[5]','varchar(max)')))
,Pos6 = ltrim(rtrim(xDim.value('/x[6]','varchar(max)')))
,Pos7 = ltrim(rtrim(xDim.value('/x[7]','varchar(max)')))
,Pos8 = ltrim(rtrim(xDim.value('/x[8]','varchar(max)')))
,Pos9 = ltrim(rtrim(xDim.value('/x[9]','varchar(max)')))
From (Select Cast('<x>' + replace((Select #String as [*] For XML Path('')),#Delimiter,'</x><x>')+'</x>' as xml) as xDim) as A
)
--Select * from [dbo].[udf-Str-Parse-Row]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse-Row]('John <test> Cappelletti',' ')
If it helps with the visualization, the CROSS APPLY (which can easily be a TVF) produces the following
here is the correct query I hope it helps .
declare #str varchar(1000)
declare #temp as table (id int , area varchar(10) , [event] varchar(100))
insert into #temp (id,area,[event]) values (1,'FB1','Dev_Chg, Old Value: 0, New Value: 50, Workstation: Blah1, Function: Blah1 func')
insert into #temp (id,area,[event]) values (1,'FB2','Dev_Chg, Old Value: 99, New Value: 5, Workstation: Blah2, Function: Blah2 func')
insert into #temp (id,area,[event]) values (1,'FB1','Dev_Chg, Old Value: 50, New Value: 55, Workstation: Blah1, Function: Blah1 func')
set #str ='Dev_Chg, Old Value: 0, New Value: 50, Workstation: Blah1, Function: Blah1 func'
select * from (
select area, RTRIM( LTRIM( SUBSTRING( String,0 , CHARINDEX (':',String ))) )as theader , SUBSTRING( String, CHARINDEX (':',String )+1,15) as tvalue from (
select * from
#temp cross apply
dbo.ufn_CSVToTable ([event])
) b
where b.String!='Dev_Chg'
)
as final
pivot ( max ( tvalue ) for theader in ([Old Value] , [New Value],[Workstation],[Function])
) as pvt
-- I used a tabled valued function to complete it the source is this
ALTER FUNCTION [dbo].[ufn_CSVToTable] ( #StringInput VARCHAR(8000) )
RETURNS #OutputTable TABLE ( [String] nVARCHAR(1000) )
AS
BEGIN
DECLARE #String nVARCHAR(1000)
WHILE LEN(#StringInput) > 0
BEGIN
SET #String = LEFT(#StringInput,
ISNULL(NULLIF(CHARINDEX(',', #StringInput) - 1, -1),
LEN(#StringInput)))
SET #StringInput = SUBSTRING(#StringInput,
ISNULL(NULLIF(CHARINDEX(',', #StringInput), 0),
LEN(#StringInput)) + 1, LEN(#StringInput))
INSERT INTO #OutputTable ( [String] )
VALUES ( #String )
END
RETURN
END