How to get data from json column in mssql - sql

I'm struggling to write a query that gets value from json column with some specific conditions. I have a table named Table1 with a column of type nvarchar(max) named Data that contains some json values. The json itself looks like this:
{
"Addresses": [
{
"ApartmentNumber": "1",
"City": "Rome",
"CountryCode": "IT",
"HouseNumber": "2",
"Post": "Rome",
"PostalCode": "11-111",
"Region": "Rome",
"Street": "Italian",
"StreetPrefix": "St.",
"TypeCode": "PERMANENT"
},
{
"ApartmentNumber": "11",
"City": "Madrid",
"CountryCode": "ES",
"HouseNumber": "22",
"Post": "Madrid",
"PostalCode": "11-111",
"Region": "Madrid",
"Street": "Spanish",
"StreetPrefix": "St.",
"TypeCode": "CORRESPONDENCE"
}
],
"Contacts": [
{
"TypeCode": "EMAIL",
"DefaultContact": false,
"Value": "sample#xyz.com"
}
],
"PersonData": {
"BirthDate": "1968-08-03T00:00:00",
"CitizenshipCode": "US",
"DeathDate": "0001-01-01T00:00:00",
"FirstName": "John",
"Gender": "M",
"LastName": "Jones"
}
}
I would like to get a value of CountryCode from the Addresses node where TypeCode is "CORRESPONDENCE". I tried to achieve that with combinations of JSON_VALUE and JSON_QUERY functions but I failed. Below are some examples of my trials:
query:
SELECT JSON_QUERY(t.Data, '$.Addresses') AS Address FROM [Table1] t
result:
[
{
"ApartmentNumber": "1",
"City": "Rome",
"CountryCode": "IT",
"HouseNumber": "2",
"Post": "Rome",
"PostalCode": "11-111",
"Region": "Rome",
"Street": "Italian",
"StreetPrefix": "St.",
"TypeCode": "PERMANENT"
},
{
"ApartmentNumber": "11",
"City": "Madrid",
"CountryCode": "ES",
"HouseNumber": "22",
"Post": "Madrid",
"PostalCode": "11-111",
"Region": "Madrid",
"Street": "Spanish",
"StreetPrefix": "St.",
"TypeCode": "CORRESPONDENCE"
}
]
or this:
query:
select top 1 JSON_VALUE(t.Data, '$.PersonData.LastName') FROM [Table1] t where ISJSON(t.Data) > 0 and JSON_VALUE(pd.BusinessPartner, '$.PersonData.Gender') = 'F'
result:
"Jones"
but when i to write similar query with Addresses as condition:
query:
select top 1 JSON_VALUE(t.Data, '$.Addresses.CountryCode') FROM [Table1] t where ISJSON(t.Data) > 0 and JSON_VALUE(t.Data,'$.Addresses.TypeCode') = 'CORRESPONDENCE'
I get empty string as the result.
Thanks in advance

From SQL Server 2016, you can query on JSON column. See the documentation : Work with JSON data
The interesting part for you it's Analyze JSON data with SQL queries.
This done :
select Id, PostalCode
from Address
CROSS APPLY OPENJSON (Address.Data, N'$.Addresses')
WITH (
TypeCode varchar(50) '$.TypeCode',
PostalCode varchar(50) '$.PostalCode'
) AS AddressesJsonData
WHERE TypeCode = N'PERMANENT'

Related

Select data from Json array MS SQL Server

I have to select data from Json like this:
[
{
"id": 10100,
"externalId": "100000035",
"name": "Test1",
"companyId": 10099,
"phone": "0738003811",
"email": "test#Test.com",
"mainAddress": {
"county": "UK",
"province": "test",
"zipCode": "01234",
"city": "test",
"street": "test",
"gln": "44,37489331;26,21941193",
"country": {
"iso2": "UK",
"iso3": "UK"
}
},
"active": false,
"main": true,
"stores": [
"Test"
],
"attributes": [
{
"attributeId": 1059,
"attributeName": "CH6 name",
"attributeExternalId": null,
"attributeValueId": 74292,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Unknown"
},
{
"attributeId": 1061,
"attributeName": "BD",
"attributeExternalId": null,
"attributeValueId": 81720,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Not assigned"
}
],
"daysSinceLastOrder": null
},
{
"id": 62606,
"externalId": "VL_LC_000190",
"name": "Test",
"companyId": 17793,
"phone": "44333424",
"email": "test#email.com",
"mainAddress": {
"firmName": "test",
"county": "test",
"province": "test",
"zipCode": "247555",
"city": "test",
"street": "test",
"gln": "44.8773851;23.9223518",
"country": {
"iso2": "RO",
"iso3": "ROU"
},
"phone": "07547063789"
},
"active": true,
"main": false,
"stores": [
"Valcea"
],
"attributes": [
{
"attributeId": 1042,
"attributeName": "Type of location",
"attributeExternalId": "TYPE_OF_DIVISION",
"attributeValueId": 34506,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": "Small OTC (<40mp)",
"attributePlainValue": "Small OTC (<40mp)"
},
{
"attributeId": 17,
"attributeName": "Limit for payment",
"attributeExternalId": "LIMIT_FOR_PAYMENT_IN_DAYS",
"attributeValueId": 59120,
"attributeValueType": "NUMBER",
"attributeValueEid": null,
"attributePlainValue": "28"
}
],
"daysSinceLastOrder": 147
}
]
I know how to select data from simple json object using "FROM OPENJSON",
but now I have to select a
AttributeValueId, AttributeId and AttributeName, attributePlainValue and CompanyId for each Attribute. So I dont know how to select data from attributes array and then how to join to this CompanyId which is one level up.
Maybe someone knows how write this query.
As mentioned by #lptr in the comments:
You need to pass the result of one OPENJSON to another, using CROSS APPLY. You can select a whole JSON object or array as a property, by using the syntax AS JSON
select
t1.companyid,
t2.*
from openjson(#j)
with (
companyId int,
attributes nvarchar(max) as json
) as t1
cross apply openjson(t1.attributes)
with
(
attributeId int,
attributeName nvarchar(100),
attributeValueId nvarchar(100),
attributePlainValue nvarchar(100)
) as t2;
db<>fiddle
For example, you can use code like this.
f1.metaData->"$.identity.customerID" = '.$customerID.'

Get nested data c.<type>.<type> in SQL Query?

I've been searching everywhere for this seemingly simple action: I'd like to select only a certain few data type from a nested source.
The closest that I can get to the solution is this:
SELECT c.receipt_number, c.millitime, c.email, c.phone, c.shipping, c.shipping_note, c.amount_paid, i.description, i.quantity
FROM c
JOIN i IN c.line_items
WHERE c.millitime > 1627813253000
But this will create lots of duplicated data, like the receipt_number, email, etc in the example:
[
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"description": "Curry Chicken",
"quantity": 1
},
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"description": "Chicken Noodle",
"quantity": 8
}
]
Is there a way to create a nested result of c.line_items with just the desired data description and quantity? The final result should be similar to:
[
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"line_items": [
{
"description": "Chicken Noodle",
"quantity": 8
},
{
"description": "Curry Chicken",
"quantity": 1
}
]
}
]
I have created the same in
You can use distinct keyword to remove duplicates from the results.
Add c.line_items to your select list and change i.description, i.quantity to c.description, c.quantity to add view under line_items.
Query:
SELECT distinct c.receipt_number, c.millitime, c.email, c.phone, c.shipping, c.shipping_note, c.amount_paid, c.line_items
FROM c
JOIN i IN c.line_items
WHERE c.millitime > 1627813253000
Result:
Reference: Azure Cosmos DB SQL query - JSON items

Deduplicate table SQL with nested rows (type STRUCT)

I have a SQL table (in BigQuery) with possible duplicated rows. The table has over 20 columns, some of them are nested (data type "STRUCT)". I want to deduplicate the table.
I can't simply query SELECT DISTINCT * because I get an error
Query error: Column options of type STRUCT cannot be used in SELECT DISTINCT
So far, I tried to create a unique ID based on a hash of certain columns.
I have now this unique ID (called sha256), but I can't figure out a way of selecting only rows with unique hash.
I tried to GROUP BY, but it doesn't work with STRUCT type, and I tried also to INNER JOIN with a table containing only unique hashed, but I get duplicates also.
For reference, here are 2 example rows of the dataset:
{
"sha256": "un2k3TUtzwzmQMvxfrjztsh/A/GW3WWzV4U4CezqceA=",
"has_phone": true,
"options": {
"sub_toplist": true,
"gallery": false,
"urgent": false,
"has_option": true,
"photosup": true,
"booster": false
},
"calendar": {
"dates": null
},
"owner": {
"siren": null,
"pro_rates_link": null,
"user_id": "f0d94687-1a24-4ed4-8adb-7faded053ca8",
"type": "private",
"no_salesmen": true,
"name": "marius",
"store_id": "5022456"
},
"location": {
"feature": {
"properties": null,
"geometry": {
"coordinates": [
"9.41733",
"42.54701"
],
"type": "Point"
},
"type": "Feature"
},
"is_shape": true,
"provider": "here",
"lng": "9.41733",
"lat": "42.54701",
"zipcode": "20290",
"city_label": "Lucciana 20290",
"city": "Lucciana",
"region_name": "Corse",
"department_name": null,
"source": "city",
"department_id": "0",
"region_id": "9"
},
"attributes": {
"pro_rates_link": null,
"immo_sell_type": "old",
"ges": "a",
"square": "92",
"rooms": "4",
"energy_rate": "b",
"is_import": false,
"custom_ref": null,
"lease_type": "sell",
"real_estate_type": "1",
"fai_included": null,
"type_real_estate_sale": null
},
"price_calendar": null,
"price": [
"270000"
],
"body": "text",
"url": "https://www.example.fr/ventes_immobilieres/1729537955.htm",
"category_name": "Ventes immobilières",
"category_id": "9",
"images": {
"urls_thumb": [
"https://img3.example.fr/ad-thumb/d63e236ce3546906b3ce661640a7cf858d0a0593.jpg"
],
"urls": [
"https://img3.example.fr/ad-image/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg",
],
"nb_images": "7",
"small_url": "https://img3.example.fr/ad-small/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg",
"thumb_url": "https://img3.example.fr/ad-thumb/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg"
},
"ad_type": "offer",
"first_publication_date": "2020-01-02 15:00:46 UTC",
"status": "active",
"subject": "Villa à Lucciana",
"index_date": "2020-01-16 15:00:45 UTC",
"expiration_date": "2020-03-02 15:00:46 UTC",
"list_id": "1729537955"
},
{
"sha256": "wCMrggkqSJ3PgbkuWAgBpCMtFfkJDRlz6TOeO5Nngsg=",
"has_phone": true,
"options": {
"sub_toplist": false,
"gallery": false,
"urgent": false,
"has_option": false,
"photosup": false,
"booster": false
},
"calendar": {
"dates": null
},
"owner": {
"siren": null,
"pro_rates_link": null,
"user_id": "ae0f432d-0aa2-4828-a20b-3472255588b4",
"type": "private",
"no_salesmen": true,
"name": "M.Milleliri",
"store_id": "12132533"
},
"location": {
"feature": {
"properties": null,
"geometry": {
"coordinates": [
"9.1917",
"41.54506"
],
"type": "Point"
},
"type": "Feature"
},
"is_shape": true,
"provider": "here",
"lng": "9.1917",
"lat": "41.54506",
"zipcode": "20146",
"city_label": "Sotta 20146",
"city": "Sotta",
"region_name": "Corse",
"department_name": null,
"source": "city",
"department_id": "0",
"region_id": "9"
},
"attributes": {
"pro_rates_link": null,
"immo_sell_type": "old",
"ges": "Non renseigné",
"square": null,
"rooms": null,
"energy_rate": "Non renseigné",
"is_import": false,
"custom_ref": null,
"lease_type": "sell",
"real_estate_type": "3",
"fai_included": null,
"type_real_estate_sale": null
},
"price_calendar": null,
"price": [
"100000"
],
"body": "text",
"url": "https://www.example.fr/ventes_immobilieres/1736199673.htm",
"category_name": "Ventes immobilières",
"category_id": "9",
"images": {
"urls_thumb": [
"https://img3.example.fr/ad-thumb/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"urls": [
"https://img3.example.fr/ad-image/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"urls_large": [
"https://img3.example.fr/ad-large/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"nb_images": "1",
"small_url": "https://img3.example.fr/ad-small/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg",
"thumb_url": "https://img3.example.fr/ad-thumb/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
},
"ad_type": "offer",
"first_publication_date": "2020-01-16 14:21:05 UTC",
"status": "active",
"subject": "Terrain 1250 m2 Sotta",
"index_date": "2020-01-16 14:21:05 UTC",
"expiration_date": "2020-03-16 14:21:05 UTC",
"list_id": "1736199673"
}
and the query I'm working on so far:
WITH
table_unique_hash AS (
SELECT
DISTINCT(SHA256(CONCAT(FORMAT_TIMESTAMP('%Y/%m/%d_%H:%M:%S_', index_date), CAST(list_id AS STRING)))) AS sha256
FROM
`test_bucket_data.daily_table`),
table_hashed AS (
SELECT
SHA256(CONCAT(FORMAT_TIMESTAMP('%Y/%m/%d_%H:%M:%S_', index_date), CAST(list_id AS STRING))) AS sha256, *
FROM
`test_bucket_data.daily_table`)
SElECT * FROM table_hashed
limit 10;
A solution would be to find a way of inner join the table_hashed and the table_unique_hash on the sha256 column...
Thanks for your help!
I found a workaround based on this topic. The combination of GROUP BY and ANY function for all the STRUCT columns made it work!
SELECT
has_phone,
ANY_VALUE(options) as options,
ANY_VALUE(calendar) as calendar,
ANY_VALUE(owner) as owner,
ANY_VALUE(location) as location,
ANY_VALUE(attributes) as attributes,
price_calendar,
price,
body,
url,
category_name,
category_id,
ANY_VALUE(images) as images,
ad_type,
first_publication_date,
status,
subject,
index_date,
expiration_date,
list_id,
FROM
`{table_name}`
Group by
has_phone,
price_calendar,
price,
body,
url,
category_name,
category_id,
ad_type,
first_publication_date,
status,
subject,
index_date,
expiration_date,
list_id
Note: my "price" field was previously an array; I transformed it in my source json to an int

How to select column with json without sending column name?

I'm setting up an API with Node and postgreSQL-node (pg), and when I query for the column in my data base that contains JSON it returns an array of objects with the columns name and the json I want to access.
My current query is:
select jsondata
from breweries
The output is:
[
{
"jsondata": {
"id": 2,
"name": "Avondale Brewing Co",
"brewery_type": "micro",
"street": "201 41st St S",
"city": "Birmingham",
"state": "Alabama",
"postal_code": "35222-1932",
"country": "United States",
"longitude": "-86.774322",
"latitude": "33.524521",
"phone": "2057775456",
"website_url": "http://www.avondalebrewing.com",
"updated_at": "2018-08-23T23:19:57.825Z",
"tag_list": []
}
},
{"jsondata": {...}},
{...}
]
My expectation is to get and array with the contents inside "jsondata" without the name of the column "jsondata", but I can't find a way to access it one level in my query.
Edit:
Here is what I expect:
[
{
"id": 2,
"name": "Avondale Brewing Co",
"brewery_type": "micro",
"street": "201 41st St S",
"city": "Birmingham",
"state": "Alabama",
"postal_code": "35222-1932",
"country": "United States",
"longitude": "-86.774322",
"latitude": "33.524521",
"phone": "2057775456",
"website_url": "http://www.avondalebrewing.com",
"updated_at": "2018-08-23T23:19:57.825Z",
"tag_list": []
},
{...},
{...}
]
I'm not sure what exact output you're expecting, but you can convert the array of dictionaries to rows and then return the jsondata values as individual rows using this:
SELECT jsonb_array_elements(jsondata)->'jsondata'
FROM breweries
Is that along the lines of what you want?
SQL Fiddle

How to update a field in a nested array in Bigquery?

I am trying to update a table that has STRUCT(a few fields, ARRAY(STRUCT)).
The field that I need to update is inside the array and I am having trouble with making it work.
Here is the layout of the the two tables:
CREATE TABLE mydatset.orders (
order_id string,
order_time timestamp,
trans STRUCT <
id string,
amount INT64,
accounts ARRAY<STRUCT <
role STRING ,
account_id STRING,
region STRING,
amount INT64> > >
)
CREATE TABLE mydatset.relocations (
account_id string,
region string
)
Trying to update the region of any account in the array accounts if that account exists in the relocations table:
update mydataset.orders a
set trans = (SELECT AS STRUCT trans.* REPLACE(ARRAY(SELECT STRUCT<role STRING, account_id STRING, region STRING, amount INT64>
(cp.role, cp.account_id,
case when cp.account_id = ll.account_id then ll.region else cp.region end ,
cp.amount
)
) as accounts )
from unnest(trans.accounts) cp
left join unnest(relocs.chgs) ll
on cp.account_id = ll.account_id
)
from (select array_agg(struct (account_id, region) ) chgs
from`mydataset.relocations`
) relocs
where true
The syntax works, but the sql doesn't perform the expected update. The account's region in the orders table is not changed after running the above update!
(I have seen BigQuery UPDATE nested array field and this case is slightly different. The array is inside a struct and itself is an array of struct)
Appreciate any help.
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]