U-SQL Extract data from Json that contain Array - azure-data-lake

I want to get from my json records that containt always the same PartnerId and Name from users array. I'm currently trying with this code:
#jsonFile =
EXTRACT partnerId int,
users string
FROM #INPUT_FILE
USING new Microsoft.Analytics.Samples.Formats.Json.JsonExtractor();
#followingUsersArray =
SELECT partnerId,
Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(users) AS following_array
FROM #jsonFile;
#followingUsers =
SELECT partnerId AS PartnerId,
following_array["name"] AS FriendName
FROM #followingUsersArray;
But i'm not get any result. Here's my json example file:
{
"partnerId": 2,
"users": [{
"name": "Anna ROGOWSKA",
"profile_image_url": "http://pbs.twimg.com/profile_images/884844399338901504/0OYl8JA6_normal.jpg",
"created_at": "2012-09-30T19:52:15+02:00",
"location": "Sopot,Poland",
"id_str": "855093368"
},
{
"name": "Anna BARAŃSKA",
"profile_image_url": "http://pbs.twimg.com/profile_images/884844399338901504/0OYl8JA6_normal.jpg",
"created_at": "2012-09-30T19:52:15+02:00",
"location": "Sopot,Poland",
"id_str": "855093368"
}
]
}
The result what i want is :
2,"Anna ROGOWSKA"
2,"Anna BARAŃSKA"

You should leverage the CROSS APPLY EXPLODE functionality of U-SQL.
I tested this with your json file and it worked:
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
USING Microsoft.Analytics.Samples.Formats.Json;
DECLARE #path string = #"C:\Users\testUser\Documents\Visual Studio 2015\Projects\USQL_Json\";
DECLARE #input string = #path + #"sample.json";
DECLARE #to string = #path + #"output.csv";
#jsonFile =
EXTRACT partnerId int,
users string
FROM #input
USING new JsonExtractor();
#followingUsers =
SELECT partnerId AS PartnerId,
JsonFunctions.JsonTuple(users).Values AS user_array
FROM #jsonFile;
#tabUsers =
SELECT PartnerId,
JsonFunctions.JsonTuple(t_user)["name"] AS FriendName
FROM #followingUsers
CROSS APPLY
EXPLODE(user_array) AS A(t_user);
OUTPUT #tabUsers
TO #to
USING Outputters.Csv();
The Output is:
2,"Anna ROGOWSKA"
2,"Anna BARANSKA"

Related

Using JSON_VALUE for parse column in SQL Server table

I have never worked with JSON in SQL Server before that's why need some help.
I have written a simple snippet of code:
DECLARE #json NVARCHAR(4000)
SET #json =
N'{
"id":"40476",
"tags":[
{
"id":"5f5883",
},
{
"id":"5fc8",
}
],
"type":"student",
"external_id":"40614476"
}'
SELECT
JSON_value(#json, '$.tags[0].id') as tags
In sample above I write code how get first "id" from "tags".
But how looks like script if in "tags" not 2 "id", but an unknown number this "id" and result should be in column like this:
1 5f5883
2 5fc8
You may use OPENJSON() with explicit schema to parse the $.tags JSON array:
DECLARE #json NVARCHAR(4000)
SET #json =
N'{
"id":"40476",
"tags":[
{
"id":"5f5883"
},
{
"id":"5fc8"
}
],
"type":"student",
"external_id":"40614476"
}'
SELECT id
FROM OPENJSON(#json, '$.tags') WITH (id varchar(10) '$.id')
Result:
id
------
5f5883
5fc8
If you want to get the index of each id in the $.tags JSON array, then you need a combination of OPENJSON() with default schema and JSON_VALUE():
SELECT CONVERT(int, [key]) AS rn, JSON_VALUE([value], '$.id') AS id
FROM OPENJSON(#json, '$.tags')
Result:
rn id
----------
0 5f5883
1 5fc8

JSON_MODIFY all values without looping

See sample below. How would I use JSON_MODIFY or other way to modify all the "disc" values to "100" without having to update each array item in a loop?
create table #temp_data (json_text nvarchar(max))
insert into #temp_data select
'
"curr":"USD",
"items":[
{
"line":1,
"disc":10,
},
{
"line":2,
"disc":11
},
{
"line":3,
"disc":12,
}
]
}'
select * from #temp_data
We don't actually need to parse the rest of the JSON, we only need the $.items part. So we can APPLY that property with OPENJSON, then reassemble it with JSON_MODIFY:
UPDATE t
SET json_text = JSON_MODIFY(t.json_text, '$.items', v.items)
FROM temp_data t
CROSS APPLY (
SELECT line, disc = 100
FROM OPENJSON(t.json_text, '$.items') WITH (line int) AS items
FOR JSON PATH
) v(items);
In the WITH block, we need to add all properties we are not modifying, then in the inner SELECT we add any columns we want to change.
One way would be to use the traditional SQL DML UPDATE to replace the json_text with a new piece of JSON derived from the existing. Something like this
JSON_MODIFY could be added if that's a requirement.
drop table if exists #temp_data;
go
create table #temp_data (json_text nvarchar(max));
insert into #temp_data(json_text) values(
N'{
"curr":"USD",
"items":[
{
"line":1,
"disc":10
},
{
"line":2,
"disc":11
},
{
"line":3,
"disc":12
}
]
}');
update #temp_data
set json_text=( select json_value(d.json_text, N'strict $.curr') curr,
(select j2.line, j2.disc+100 as disc
from #temp_data td
cross apply openjson(td.json_text)
with(curr nvarchar(20),
items nvarchar(max) as json) j1
cross apply openjson(j1.items)
with(line int,
disc int) j2
for json path) items
from #temp_data d
for json path, without_array_wrapper);
select * from #temp_data;
{
"curr": "USD",
"items": [
{
"line": 1,
"disc": 210
},
{
"line": 2,
"disc": 211
},
{
"line": 3,
"disc": 212
}
]
}
It's not possible to use a wildcard in a JSON_MODIFY() call, so one possible approach are the following steps:
Parse the $.items array from the stored JSON using OPENJSON() and explicit schema including the line key.
Set the new value for the disc key for each item of this array.
Generate the JSON again using FOR JSON PATH.
Update the table with JSON_MODIFY().
Table:
create table #temp_data (json_text nvarchar(max))
insert into #temp_data (json_text)
VALUES (N'
{
"curr":"USD",
"items":[
{"line":1, "disc":10},
{"line":2, "disc":11},
{"line":3, "disc":12}
]
}')
Statement:
UPDATE #temp_data
SET json_text = JSON_MODIFY(
json_text,
'$.items',
(
SELECT line, 100 AS disc
FROM OPENJSON(json_text, '$.items') WITH (line int '$.line')
FOR JSON PATH
)
)
Result:
json_text
{
"curr":"USD",
"items":[{"line":1,"disc":100},{"line":2,"disc":100},{"line":3,"disc":100}]
}

Create json key value from table column name and data

Is it possible to create JSON key value from a table SELECT statement, where column name as key and the column value as value
declare #T table(Id int, ItemName varchar(10), CategoryId int, ItemDate date)
insert into #T
values(1,'ABC',100, '1/1/2020')
to return something as below
{
"id": 1,
"table": "tableName",
"data": [{
"key": "ItemName",
"value": "ABC"
},
{
"key": "CategoryId",
"value": "100"
},
{
"key": "ItemDate",
"value": "1/1/2020"
}
]
}
I have looked at selecting as JSON but stuck here
select *
from #T
for json auto
You may try to use VALUES table value constructor and FOR JSON AUTO. As is mentioned in the documentation, when ... you specify the AUTO option, the format of the JSON output is automatically determined based on the order of columns in the SELECT list and their source tables.
Table:
CREATE TABLE Tbl (
Id int,
ItemName varchar(10),
CategoryId int,
ItemDate date
)
INSERT INTO Tbl
VALUES
(1, 'ABC', 100, '1/1/2020'),
(2, 'DEF', 200, '2/2/2020')
Statement:
SELECT t.Id, data.[key], data.[value]
FROM Tbl t
CROSS APPLY (VALUES
('ItemName', CONVERT(varchar(max), ItemName)),
('CategoryId', CONVERT(varchar(max), CategoryId)),
('ItemDate', CONVERT(varchar(max), ItemDate))
) Data ([key], [value])
FOR JSON AUTO
Result:
[
{
"Id":1,
"Data":[
{"key":"ItemName", "value":"ABC"},
{"key":"CategoryId","value":"100"},
{"key":"ItemDate","value":"2020-01-01"}
]
},
{
"Id":2,
"Data":[
{"key":"ItemName", "value":"DEF"},
{"key":"CategoryId", "value":"200"},
{"key":"ItemDate", "value":"2020-02-02"}
]
}
]
As an additional option you may try to build the inner JSON for each row:
SELECT
Id,
(
SELECT [key], [value]
FROM (VALUES
('ItemName', CONVERT(varchar(max), ItemName)),
('CategoryId', CONVERT(varchar(max), CategoryId)),
('ItemDate', CONVERT(varchar(max), ItemDate))
) v ([key], [value])
FOR JSON PATH
) AS Data
FROM Tbl
FOR JSON AUTO

How to write a select query to get the index value from Json object

I have the below JSON object. I need to write a select query to get the index values of Object JSON array. Kind of getting the sequence value.
{
"Model": [
{
"ModelName": "Test Model",
"Object": [
{
"ID": 1,
"Name": "ABC",
},
{
"ID": 11,
"Name": "ABCD",
},
{
"ID": 15,
"Name": "ABCDE",
},
]
}]}
Expected Output:
Index_Value
1
2
3
If I understand the question correctly and you want to get the index of the items in the Object JSON array, you need to use OPENJSON() with default schema. The result is a table with columns key, value and type and in case of JSON array, the key column holds the index of each item in the array (0-based):
JSON:
DECLARE #json nvarchar(max) = N'{
"Model":[
{
"ModelName":"Test Model",
"Object":[
{
"ID":1,
"Name":"ABC"
},
{
"ID":11,
"Name":"ABCD"
},
{
"ID":15,
"Name":"ABCDE"
}
]
}
]
}'
Statement:
SELECT CONVERT(int, j2.[key]) + 1 AS item_id
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') j2
But if you want to get the values of the ID keys in the Object JSON array, the statement is different:
SELECT j2.ID
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') WITH (
ID int '$.ID'
) j2
Note, that you need two OPENJSON() calls, because the input JSON has nested array structure. Of course, if Model JSON array has always one item, you may simplify the statement using an appropriate path:
SELECT CONVERT(int, [key]) + 1 AS item_id
FROM OPENJSON (#json, '$.Model[0].Object')
Finally, to get index, ID and Name, you should use the following statement, which assumes, that $.Model JSON array has more than one item and defines ID and Name columns with the appropraite data types:
SELECT
CONVERT(int, j2.[key]) + 1 AS ItemID,
j3.ID, j3.Name
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value], '$.Object') j2
CROSS APPLY OPENJSON(j2.[value], '$') WITH (
ID int '$.ID',
Name varchar(50) '$.Name'
) j3
DECLARE #json nvarchar(max) = N'{
"Model":[
{
"ModelName":"Test Model",
"Object":[
{
"ID":1,
"Name":"ABC"
},
{
"ID":11,
"Name":"ABCD"
},
{
"ID":15,
"Name":"ABCDE"
}
]
}
]
}'
declare #i int=0;
SELECT
j2.ID, j2.Name
FROM OPENJSON (#json, '$.Model') j1
CROSS APPLY OPENJSON(j1.[value],concat('$.Object[',#i,']')) WITH (
ID i`enter code here`nt '$.ID', Name varchar(100) '$.Name'
) j2
Results:-
ID
Name
11
ABCD
you can select the key columns in select clause no need to mention in with of crossjoin.
SELECT
distinct t.id,
JSON_VALUE(AttsData.[value], '$.address') as address,
JSON_VALUE(AttsData.[value], '$.name') as name,
JSON_VALUE(AttsData.[value], '$.owner_name') as owner_name,
JSON_VALUE(AttsData.[value], '$.project') as project
,CONVERT(int, AttsData.[key]) index_id
FROM mytablewithjsonfeild t
CROSS APPLY OPENJSON (t."jsonfeild",N'$.parentkey') as AttsData
Above query, from the table I have cross joined the JSON field. and in select statement i have taken the specific keys.
and CONVERT(int, AttsData.[key]) to get the index of the elements

How to make JSON from SQL query in MS SQL 2014

Question: What is best solution to generate JSON from a SQL query in MS SQL 2014? I created a procedure, but it is very slow.
My Example:
DECLARE #customers xml;
DECLARE #json NVARCHAR(max);
SET #customers = (SELECT * FROM dbo.Customers FOR XML path, root)
EXEC [dbo].[HTTP_JSON] #customers, #json
EXEC [dbo].[HTTP_JSON](#Shopping)
Create PROCEDURE [dbo].[HTTP_JSON]
#parameters xml, #response NVARCHAR(max) OUTPUT
WITH EXEC AS CALLER
AS
set #response = (SELECT Stuff(
(SELECT * from
(SELECT ',
{'+
Stuff((SELECT ',"'+coalesce(b.c.value('local-name(.)', 'NVARCHAR(MAX)'),'')+'":"'+
b.c.value('text()[1]','NVARCHAR(MAX)') +'"'
from x.a.nodes('*') b(c)
for xml path(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)')
,1,1,'')+'}'
from #parameters.nodes('/root/*') x(a)
) JSON(theLine)
for xml path(''),TYPE).value('.','NVARCHAR(MAX)' )
,1,1,''))
GO
Just for fun, I created a scalar function based off of my prior answer.
Aside from the obvious XML parameter, I added two additional: 1) Include Header (illustrated below), and 2) ToLower case (I prefer my JSON field names in lower case which links to my classes and such).
If the query is more than one record, a formatted array will be returned.
Declare #Table table (ID int,Active bit,First_Name varchar(50),Last_Name varchar(50),EMail varchar(50))
Insert into #Table values
(1,1,'John','Smith','john.smith#email.com'),
(2,0,'Jane','Doe' ,'jane.doe#email.com')
Select A.ID
,A.Last_Name
,A.First_Name
,B.JSON
From #Table A
Cross Apply (Select JSON=[dbo].[udf-Str-JSON](0,1,(Select A.* For XML Raw)) ) B
Returns
ID Last_Name First_Name JSON
1 Smith John {"id":"1","active":"1","first_name":"John","last_name":"Smith","email":"john.smith#email.com"}
2 Doe Jane {"id":"2","active":"0","first_name":"Jane","last_name":"Doe","email":"jane.doe#email.com"}
Or even more simply
Select JSON=[dbo].[udf-Str-JSON](0,1,(Select * From #Table for XML RAW))
Returns with Header ON
{
"status": {
"successful": "true",
"timestamp": "2016-10-09 06:08:16 GMT",
"rows": "2"
},
"results": [{
"id": "1",
"active": "1",
"first_name": "John",
"last_name": "Smith",
"email": "john.smith#email.com"
}, {
"id": "2",
"active": "0",
"first_name": "Jane",
"last_name": "Doe",
"email": "jane.doe#email.com"
}]
}
Returns with Header Off
[{
"id": "1",
"active": "1",
"first_name": "John",
"last_name": "Smith",
"email": "john.smith#email.com"
}, {
"id": "2",
"active": "0",
"first_name": "Jane",
"last_name": "Doe",
"email": "jane.doe#email.com"
}]
The UDF
ALTER FUNCTION [dbo].[udf-Str-JSON] (#IncludeHead int,#ToLowerCase int,#XML xml)
Returns varchar(max)
AS
Begin
Declare #Head varchar(max) = '',#JSON varchar(max) = ''
; with cteEAV as (Select RowNr=Row_Number() over (Order By (Select NULL))
,Entity = xRow.value('#*[1]','varchar(100)')
,Attribute = xAtt.value('local-name(.)','varchar(100)')
,Value = xAtt.value('.','varchar(max)')
From #XML.nodes('/row') As R(xRow)
Cross Apply R.xRow.nodes('./#*') As A(xAtt) )
,cteSum as (Select Records=count(Distinct Entity)
,Head = IIF(#IncludeHead=0,IIF(count(Distinct Entity)<=1,'[getResults]','[[getResults]]'),Concat('{"status":{"successful":"true","timestamp":"',Format(GetUTCDate(),'yyyy-MM-dd hh:mm:ss '),'GMT','","rows":"',count(Distinct Entity),'"},"results":[[getResults]]}') )
From cteEAV)
,cteBld as (Select *
,NewRow=IIF(Lag(Entity,1) over (Partition By Entity Order By (Select NULL))=Entity,'',',{')
,EndRow=IIF(Lead(Entity,1) over (Partition By Entity Order By (Select NULL))=Entity,',','}')
,JSON=Concat('"',IIF(#ToLowerCase=1,Lower(Attribute),Attribute),'":','"',Value,'"')
From cteEAV )
Select #JSON = #JSON+NewRow+JSON+EndRow,#Head = Head From cteBld, cteSum
Return Replace(#Head,'[getResults]',Stuff(#JSON,1,1,''))
End
-- Parameter 1: #IncludeHead 1/0
-- Parameter 2: #ToLowerCase 1/0 (converts field name to lowercase
-- Parameter 3: (Select * From ... for XML RAW)
**EDIT - Corrected Typo
The following should create the JSON array for just about any data set. However, I have not created a way to convert bit to true/false yet.
Just one point to consider: The FIRST column in the initial SELECT has to be the Primary Key which is equates to the ENTITY field. In this case, Select * from #User for XML RAW ... ID is the Entity and just so happens to be the first field in the table
As far as performance, 500 records with 19 fields creates a JSON string 191,987 bytes in 0.694 seconds (50 records in 0.098 seconds)
Consider the following:
Declare #User table (ID int,Active bit,First_Name varchar(50),Last_Name varchar(50),EMail varchar(50),LastOn DateTime)
Insert into #User values
(1,1,'John','Smith','john.smith#email.com','2016-10-05 17:32:41.903'),
(2,0,'Jane','Doe' ,'jane.doe#email.com','2016-10-05 08:25:18.203')
Declare #XML xml = (Select * From #User for XML RAW)
Declare #JSON varchar(max) = ''
;with cteEAV as (
Select RowNr = Row_Number() over (Order By (Select NULL))
,Entity = xRow.value('#*[1]','varchar(100)')
,Attribute = xAtt.value('local-name(.)','varchar(100)')
,Value = xAtt.value('.','varchar(max)')
From #XML.nodes('/row') As A(xRow)
Cross Apply A.xRow.nodes('./#*') As B(xAtt) )
,cteBld as (
Select *
,NewRow = IIF(Lag(Entity,1) over (Partition By Entity Order By (Select NULL))=Entity,'',',{')
,EndRow = IIF(Lead(Entity,1) over (Partition By Entity Order By (Select NULL))=Entity,',','}')
,JSON = Concat('"',Attribute,'":','"',Value,'"')
From cteEAV )
Select #JSON = #JSON+NewRow+JSON+EndRow
From cteBld
Select '['+Stuff(#JSON,1,1,'')+']'
Returns
[{"ID":1, "Active":1, "First_Name":"John", "Last_Name":"Smith", "EMail":"john.smith#email.com", "LastOn":"2016-10-05T17:32:41.903", "TotalSales":25569.0000} ,{"ID":2, "Active":0, "First_Name":"Jane", "Last_Name":"Doe", "EMail":"jane.doe#email.com", "LastOn":"2016-10-05T08:25:18.203", "TotalSales":22888.0000}]
A more readable version
cteEAV will dynamically unpivot the data and generate the following:
cteBLD will extend and add flags New/End Row
The Final Select
This will put it all together and generate one final string which can be wrapped or nested as you please.