BigQuery - Add elements from array into array of structs - sql

I have a struct which looks like this:
{"event": {
"timestamp": [
"2019-01-13 17:21:08.570140 UTC",
"2019-01-14 14:10:55.475515 UTC",
"2019-01-09 14:02:51.848917 UTC"
],
"properties": [
{"device_model": "iPhone", "country": "United Kingdom"},
{"device_model": "Android", "country": "United States"},
{"device_model": "iPhone", "country": "Sweden"}
]
}
I would like to achieve this: so that each timestamp enters corresponding struct.
{"event": [
{"timestamp": "2019-01-13 17:21:08.570140 UTC","device_model":
"iPhone", "country": "United Kingdom"},
{"timestamp": "2019-01-14 14:10:55.475515 UTC", "device_model":
"Android", "country": "United States"},
{"timestamp": "2019-01-09 14:02:51.848917 UTC", "device_model":
"iPhone", "country": "Sweden"}
]
}
I created the current structure from a query like this:
WITH
events AS (
SELECT
"customer_1" AS customer_id,
"timestamp_1" AS timestamp,
STRUCT("iphone" AS device_model,
"uk" AS country ) AS properties
UNION ALL
SELECT
"customer_2" AS customer_id,
"timestamp_2" AS timestamp,
STRUCT("android" AS device_model,
"us" AS country) AS properties
UNION ALL
SELECT
"customer_2" AS customer_id,
"timestamp_3" AS timestamp,
STRUCT("iphone" AS device_model,
"sweden" AS country) AS properties )
SELECT
customer_id,
STRUCT(ARRAY_AGG(timestamp) AS timestamp,
ARRAY_AGG(properties) AS properties) AS event
FROM
events
GROUP BY
customer_id
How can I modify the query to achieve the desired structure?
--- Edit
I could do it this way, but this requires knowing the schema of properties at the time when I am generating query - which is possible, but not very pretty. Is there a simpler way?
WITH
events AS (
SELECT
"customer_1" AS customer_id,
"timestamp_1" AS timestamp,
STRUCT("iphone" AS device_model,
"uk" AS country ) AS properties
UNION ALL
SELECT
"customer_2" AS customer_id,
"timestamp_2" AS timestamp,
STRUCT("android" AS device_model,
"us" AS country) AS properties
UNION ALL
SELECT
"customer_2" AS customer_id,
"timestamp_3" AS timestamp,
STRUCT("iphone" AS device_model,
"sweden" AS country) AS properties )
SELECT
customer_id,
ARRAY_AGG(properties) AS event
FROM (
SELECT
customer_id,
struct(timestamp as timestamp,
properties.device_model as device_model,
properties.country as country) as properties
FROM
events)
GROUP BY
customer_id

You could do something like this leveraging SELECT AS STRUCT and using properties as a selector.
SELECT
customer_id,
ARRAY_AGG(properties) AS prop
FROM (
SELECT
customer_id,
(
SELECT
AS STRUCT timestamp,
properties.*) AS properties
FROM
events e )
GROUP BY
1
this returns:
[
{
"customer_id": "customer_1",
"prop": [
{
"timestamp": "timestamp_1",
"device_model": "iphone",
"country": "uk"
}
]
},
{
"customer_id": "customer_2",
"prop": [
{
"timestamp": "timestamp_2",
"device_model": "android",
"country": "us"
},
{
"timestamp": "timestamp_3",
"device_model": "iphone",
"country": "sweden"
}
]
}
]
You could further write the piece like:
SELECT AS STRUCT e.* except(customer_id)

Related

how can we replicate hive nested structs in snowflake?

I have nested struct data in DB I need to migrate that to snowflake how can I replicate that nested struct in snowflake. In snowflake I don't have struct data type it is only variant.
so google finds:
https://gist.github.com/irajhedayati/c595e349d68b7a5074da81f1b8c6eec5
which has some code like:
INSERT into calls_nested
SELECT
'5' AS call_id,
'Jack' AS name,
45 AS age,
named_struct('first_name', 'Joe', 'last_name', 'Doe') AS account,
named_struct('home', '514-111-2222', 'work', '514-333-4444') AS phone_directory,
array(
named_struct('street', '1 Guy', 'city', 'Montreal'),
named_struct('street', '1 McGill', 'city', 'Montreal')
) AS addresses;
named_struct -> object_construct
array -> array_construct
and you just keep stacking them if that's what you want.
SELECT
'5' AS call_id,
'Jack' AS name,
45 AS age,
object_construct('first_name', 'Joe', 'last_name', 'Doe') AS account,
object_construct('home', '514-111-2222', 'work', '514-333-4444') AS phone_directory,
array_construct(
object_construct('street', '1 Guy', 'city', 'Montreal'),
object_construct('street', '1 McGill', 'city', 'Montreal')
) AS addresses,
object_construct('call_id', call_id, 'name', name, 'age' ,age, 'account', account, 'phone_directory', phone_directory, 'addresses',addresses) as object_of_objects
;
CALL_ID
NAME
AGE
ACCOUNT
PHONE_DIRECTORY
ADDRESSES
OBJECT_OF_OBJECTS
5
Jack
45
{ "first_name": "Joe", "last_name": "Doe" }
{ "home": "514-111-2222", "work": "514-333-4444" }
[ { "city": "Montreal", "street": "1 Guy" }, { "city": "Montreal", "street": "1 McGill" } ]
{ "account": { "first_name": "Joe", "last_name": "Doe" }, "addresses": [ { "city": "Montreal", "street": "1 Guy" }, { "city": "Montreal", "street": "1 McGill" } ], "age": 45, "call_id": "5", "name": "Jack", "phone_directory": { "home": "514-111-2222", "work": "514-333-4444" } }

Count & group records with column A and do a count on column B with where clause

I have a table called arb_visits which has columns country & clicked. I'm trying to create a query where I group the visits by country and also get the count where clicked is 1.
I have managed to create the queries alone, but I have an issue combining them.
I managed to get the countries and their count by:
SELECT country, count(*) as visits
FROM arb_visits
GROUP BY country
ORDER BY visits DESC
Which gives the below output:
[
{
"country": "United States",
"visits": 113,
"id": null
},
{
"country": "Canada",
"visits": 85,
"id": null
},
{
"country": "Germany",
"visits": 84,
"id": null
}
]
And the same with clicks:
SELECT country, COUNT(*) as clicks
FROM arb_visits
WHERE clicked == 1
GROUP BY country
ORDER BY clicks DESC"
Which gives the below output:
[
{
"country": "United States",
"clicks": 59,
"id": null
},
{
"country": "Canada",
"clicks": 44,
"id": null
},
{
"country": "Germany",
"clicks": 43,
"id": null
}
]
How am I able to combine these 2 and get an output as:
[
{
"country": "United States",
"visits": 113,
"clicks": 59
},
{
"country": "Canada",
"visits": 85,
"clicks": 44
},
{
"country": "Germany",
"visits": 84,
"clicks": 43
}
]
In production, I'm using PostgreSQL, and in development I'm using SQLite.
I have tried, but with no luck. I'm kind of new to SQL.
Use count with filter.
SELECT country,
count(*) as visits,
count(*) filter (where clicked = 1) as clicks
FROM arb_visits
GROUP BY country
ORDER BY visits DESC;
If your DBMS does not support filter on aggregate functions then
SELECT country,
count(*) as visits,
sum(case when clicked = 1 then 1 else 0 end) as clicks
FROM arb_visits
GROUP BY country
ORDER BY visits DESC;

How to get data from json column in mssql

I'm struggling to write a query that gets value from json column with some specific conditions. I have a table named Table1 with a column of type nvarchar(max) named Data that contains some json values. The json itself looks like this:
{
"Addresses": [
{
"ApartmentNumber": "1",
"City": "Rome",
"CountryCode": "IT",
"HouseNumber": "2",
"Post": "Rome",
"PostalCode": "11-111",
"Region": "Rome",
"Street": "Italian",
"StreetPrefix": "St.",
"TypeCode": "PERMANENT"
},
{
"ApartmentNumber": "11",
"City": "Madrid",
"CountryCode": "ES",
"HouseNumber": "22",
"Post": "Madrid",
"PostalCode": "11-111",
"Region": "Madrid",
"Street": "Spanish",
"StreetPrefix": "St.",
"TypeCode": "CORRESPONDENCE"
}
],
"Contacts": [
{
"TypeCode": "EMAIL",
"DefaultContact": false,
"Value": "sample#xyz.com"
}
],
"PersonData": {
"BirthDate": "1968-08-03T00:00:00",
"CitizenshipCode": "US",
"DeathDate": "0001-01-01T00:00:00",
"FirstName": "John",
"Gender": "M",
"LastName": "Jones"
}
}
I would like to get a value of CountryCode from the Addresses node where TypeCode is "CORRESPONDENCE". I tried to achieve that with combinations of JSON_VALUE and JSON_QUERY functions but I failed. Below are some examples of my trials:
query:
SELECT JSON_QUERY(t.Data, '$.Addresses') AS Address FROM [Table1] t
result:
[
{
"ApartmentNumber": "1",
"City": "Rome",
"CountryCode": "IT",
"HouseNumber": "2",
"Post": "Rome",
"PostalCode": "11-111",
"Region": "Rome",
"Street": "Italian",
"StreetPrefix": "St.",
"TypeCode": "PERMANENT"
},
{
"ApartmentNumber": "11",
"City": "Madrid",
"CountryCode": "ES",
"HouseNumber": "22",
"Post": "Madrid",
"PostalCode": "11-111",
"Region": "Madrid",
"Street": "Spanish",
"StreetPrefix": "St.",
"TypeCode": "CORRESPONDENCE"
}
]
or this:
query:
select top 1 JSON_VALUE(t.Data, '$.PersonData.LastName') FROM [Table1] t where ISJSON(t.Data) > 0 and JSON_VALUE(pd.BusinessPartner, '$.PersonData.Gender') = 'F'
result:
"Jones"
but when i to write similar query with Addresses as condition:
query:
select top 1 JSON_VALUE(t.Data, '$.Addresses.CountryCode') FROM [Table1] t where ISJSON(t.Data) > 0 and JSON_VALUE(t.Data,'$.Addresses.TypeCode') = 'CORRESPONDENCE'
I get empty string as the result.
Thanks in advance
From SQL Server 2016, you can query on JSON column. See the documentation : Work with JSON data
The interesting part for you it's Analyze JSON data with SQL queries.
This done :
select Id, PostalCode
from Address
CROSS APPLY OPENJSON (Address.Data, N'$.Addresses')
WITH (
TypeCode varchar(50) '$.TypeCode',
PostalCode varchar(50) '$.PostalCode'
) AS AddressesJsonData
WHERE TypeCode = N'PERMANENT'

Generate nested JSON from stored procedure

I have sample data in a SQL Server table in the following format
CREATE TABLE #tempA
(
HomeId int IDENTITY PRIMARY KEY,
City nvarchar(20),
State nchar(2),
Email VARCHAR(50)
);
INSERT INTO #tempA (City, State, Email)
VALUES ('Cleveland', 'OH', 'sd#aol.com')
INSERT INTO #tempA (City, State, Email)
VALUES ('Malibu', 'CA', 'sd#aol.com')
INSERT INTO #tempA (City, State, Email)
VALUES ('Atlanta', 'GA', 'ploll#aol.com')
SELECT * FROM #tempA
I need a JSON output returned by a stored procedure in the following format, I am trying to group it by email field, I tried using JSON AUTO but not able to achieve in the following format ? Any tips?
[
{
"Email": "sd#aol.com",
"Tasks": [
{
"City": "Cleveland",
"State": "OH"
},
{
"City": "Malibu",
"State": "CA"
}
]
},
{
"Email": "ploll#aol.com",
"Tasks": [
{
"City": "Atlanta",
"State": "GA"
}
]
}
]
One method would be to use a subquery for the State and Email values and group on the email column in the outer query:
SELECT A.Email,
(SELECT B.City,
B.State
FROM #tempA B
WHERE A.Email = B.Email
ORDER BY B.City ASC
FOR JSON AUTO) AS Tasks
FROM #tempA A
GROUP BY A.Email
ORDER BY A.Email DESC
FOR JSON AUTO;
Which gives:
[
{
"Email": "sd#aol.com",
"Tasks": [
{
"City": "Cleveland",
"State": "OH"
},
{
"City": "Malibu",
"State": "CA"
}
]
},
{
"Email": "ploll#aol.com",
"Tasks": [
{
"City": "Atlanta",
"State": "GA"
}
]
}
]

How to update a field in a nested array in Bigquery?

I am trying to update a table that has STRUCT(a few fields, ARRAY(STRUCT)).
The field that I need to update is inside the array and I am having trouble with making it work.
Here is the layout of the the two tables:
CREATE TABLE mydatset.orders (
order_id string,
order_time timestamp,
trans STRUCT <
id string,
amount INT64,
accounts ARRAY<STRUCT <
role STRING ,
account_id STRING,
region STRING,
amount INT64> > >
)
CREATE TABLE mydatset.relocations (
account_id string,
region string
)
Trying to update the region of any account in the array accounts if that account exists in the relocations table:
update mydataset.orders a
set trans = (SELECT AS STRUCT trans.* REPLACE(ARRAY(SELECT STRUCT<role STRING, account_id STRING, region STRING, amount INT64>
(cp.role, cp.account_id,
case when cp.account_id = ll.account_id then ll.region else cp.region end ,
cp.amount
)
) as accounts )
from unnest(trans.accounts) cp
left join unnest(relocs.chgs) ll
on cp.account_id = ll.account_id
)
from (select array_agg(struct (account_id, region) ) chgs
from`mydataset.relocations`
) relocs
where true
The syntax works, but the sql doesn't perform the expected update. The account's region in the orders table is not changed after running the above update!
(I have seen BigQuery UPDATE nested array field and this case is slightly different. The array is inside a struct and itself is an array of struct)
Appreciate any help.
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]