Query data from a text file and get a JSON column - sql

Using SQL Server 2019 Express Edition.
I have a text file like this:
/type/author /authors/OL1002354A 2 2008-08-20T18:07:53.62084 {"name": "Don L. Brigham", "personal_name": "Don L. Brigham", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:07:53.62084"}, "key": "/authors/OL1002354A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL100246A 1 2008-04-01T03:28:50.625462 {"name": "Talib Samat.", "personal_name": "Talib Samat.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100246A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1002700A 1 2008-04-01T03:28:50.625462 {"name": "Bengt E. Gustafsson Symposium (5th 1988 Stockholm, Sweden)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1002700A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1002807A 2 2008-08-20T18:12:02.683498 {"name": "Ary J. Lamme", "personal_name": "Ary J. Lamme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:12:02.683498"}, "key": "/authors/OL1002807A", "birth_date": "1940", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1002994A 5 2012-03-03T06:50:39.836886 {"name": "R. Baxter Miller", "personal_name": "R. Baxter Miller", "created": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "photos": [7075806, 6974916], "last_modified": {"type": "/type/datetime", "value": "2012-03-03T06:50:39.836886"}, "latest_revision": 5, "key": "/authors/OL1002994A", "type": {"key": "/type/author"}, "revision": 5}
/type/author /authors/OL100301A 1 2008-04-01T03:28:50.625462 {"name": "Ghazali Basri.", "personal_name": "Ghazali Basri.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL100301A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003201A 2 2008-08-20T18:14:55.775993 {"name": "Robert Smaus", "personal_name": "Robert Smaus", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:55.775993"}, "key": "/authors/OL1003201A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003202A 2 2008-08-20T18:14:56.005766 {"name": "Richard Mark Friedhoff", "personal_name": "Richard Mark Friedhoff", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:14:56.005766"}, "key": "/authors/OL1003202A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003235A 1 2008-04-01T03:28:50.625462 {"name": "Hunbatz Men", "personal_name": "Hunbatz Men", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003235A", "birth_date": "1941", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003719A 1 2008-04-01T03:28:50.625462 {"name": "NATO Advanced Research Workshop on Ras Oncogenes (1988 Athens, Greece)", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1003719A", "type": {"key": "/type/author"}, "revision": 1}
/type/author /authors/OL1003744A 2 2008-08-20T18:20:16.351762 {"name": "Jeanne Thieme", "personal_name": "Jeanne Thieme", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:20:16.351762"}, "key": "/authors/OL1003744A", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1003901A 2 2008-08-20T18:21:31.331678 {"name": "Kiiti Morita", "personal_name": "Kiiti Morita", "last_modified": {"type": "/type/datetime", "value": "2008-08-20T18:21:31.331678"}, "key": "/authors/OL1003901A", "birth_date": "1915", "type": {"key": "/type/author"}, "revision": 2}
/type/author /authors/OL1004047A 1 2008-04-01T03:28:50.625462 {"name": "Murphy, William M.", "personal_name": "Murphy, William M.", "last_modified": {"type": "/type/datetime", "value": "2008-04-01T03:28:50.625462"}, "key": "/authors/OL1004047A", "birth_date": "1942", "type": {"key": "/type/author"}, "revision": 1}
The columns are delimited by tabulation and rows by line feed.
I need to get the data inside the 4th column that is a JSON structure. For example I need the value of all "name" attributes.
I've imported the data using SSIS into a table and then I can CROSS APPLY OPENJSON(json_column) just fine to get the keys and values. But I was wondering if that couldn't be done with SQL/TSQL alone, using OPENROWSET directly and working with just the column that is formatted in JSON. Tried using OPENROWSET with CROSS APPLY OPENJSON(BulkColumn) but cannot be done since the rest of the columns aren't JSON formatted.
Any idea on how to avoid this error or a different approach?

You can use BULK INSERT to get the file into a temp-table and get it parsed as Tab-delimited file. Then using OPENJSON to get the JSON-data. The following worked for me:
DROP TABLE IF EXISTS #Temp;
CREATE TABLE #Temp (
/* Just some random column names*/
Author NVARCHAR(100),
AuthorPath NVARCHAR(100),
IntValue INT,
Created DATETIME2(3),
JsonData NVARCHAR(MAX)
);
BULK INSERT #Temp
FROM 'C:\Users\andre\Documents\temp\test.txt'
WITH (
FIELDTERMINATOR = '\t', --Tab delimited
ROWTERMINATOR = '\n' --New-line character for row termination
)
SELECT
Temp.*,
JsonData.[name]
FROM #Temp Temp
CROSS APPLY OPENJSON(Temp.JsonData,'$')
WITH(
[name] NVARCHAR(200) '$.name'
) JsonData

Related

Correct way to create "record" field in Avro schema

I am trying to understand Avro schemas and stuck with complex types (record). The problem is very simple: create a schema which contains one record filed with two primitive fields (string and timestamp) nested to record. I see two options for the schema:
option 1
{
"type": "record",
"name": "cool_subject",
"namespace": "com.example",
"fields": [
{
"name": "field_1",
"type": "record"
"fields": [
{"name": "operation", "type": "string"},
{"name": "timestamp", "type": "long", "logical_type": "timestamp_millis"}
]
}
]
}
option 2
{
"type": "record",
"name": "cool_subject",
"namespace": "com.example",
"fields": [
{
"name": "field_1",
"type": {
"type": "record",
"name": "field_1_type",
"fields": [
{"name": "operation", "type": "string"},
{"name": "timestamp", "type": {"type": "long", "logical_type": "timestamp_millis"}}
]
}
}
]
}
The difference is in the "type" attribute.
As far as I know opt2 is the correct way. Am I right? Is opt1 valid?
The second one is correct. The first one is not valid.
A record schema is something that looks like this:
{
"type": "record",
"name": <Name of the record>,
"fields": [...],
}
And for fields, it should be like this:
[
{
"name": <name of field>,
"type": <type of field>,
},
...
]
So in the case of a field which contains a record, it should always look like this:
[
{
"name": <name of field>,
"type": {
"type": "record",
"name": <Name of the record>,
"fields": [...],
}
},
...
]
The format in the first example would make it unclear if the name "field_1" was the name of the field or the name of the record.

PostgreSQL (v9.6) query that filters JSON array elements by key/value

We have a jsonb column with data of the type:
"basket": {
"total": 6,
"items": [
{ "type": "A", "name": "A", "price": 1 },
{ "type": "A", "name": "B", "price": 2 },
{ "type": "C", "name": "C", "price": 3 },
]
}
We need to construct few queries that will filter specific elements of the items[] array for SELECT and SUM.
We have PG v9.6 so using jsonb_path_query_array didn't work.
Using basket->'items' #> '{"type":"A"}' works to find all entries that has type-A.
But how do we get subquery to
select only basket items of type-A
sum of prices of items of type-A
Thank you!
This will select the required items:
select * from jsonb_array_elements('{"basket":
{
"total": 6,
"items": [
{ "type": "A", "name": "A", "price": 1 },
{ "type": "A", "name": "B", "price": 2 },
{ "type": "C", "name": "C", "price": 3 }
]
}}'::jsonb#>'{basket,items}') e(it)
where it->>'type' = 'A';
and this the sum of prices:
select sum(cast(it->>'price' as numeric)) from jsonb_array_elements('{"basket":
{
"total": 6,
"items": [
{ "type": "A", "name": "A", "price": 1 },
{ "type": "A", "name": "B", "price": 2 },
{ "type": "C", "name": "C", "price": 3 }
]
}}'::jsonb#>'{basket,items}') e(it)
where it->>'type' = 'A';

How to UNNest multiple arrays in BigQuery?

I have this json that is stored in a BigQuery table in 3 fields token, questions, answers
token:STRING, questions:STRING, answers:STRING
Questions and answers are STRING because they are dynamic fields.
token field has single value.
questions field has dictionary object with "fields" being list object and has 3 questions.
answers field is a list object with answers to the 3 questions and id will be used for matching a question to an answer. Below is the JSON download from bigquery
token questions answers
18e6d8e445 {"fields": [{"id": "L39FyvUohKDV", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}, {"id": "krs82KgxHwGb", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "lBzHtCuzHFM4", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}], "id": "SdzXVn", "title": "Google Shopping 5/4/18"} [{"field": {"id": "L39FyvUohKDV", "type": "short_text"}, "text": "t", "type": "text"}, {"field": {"id": "krs82KgxHwGb", "type": "opinion_scale"}, "number": 10, "type": "number"}, {"email": "t#t.com", "field": {"id": "lBzHtCuzHFM4", "type": "email"}, "type": "email"}]
949b2c57e3 {"fields": [{"id": "krs82KgxHwGb", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "lBzHtCuzHFM4", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}, {"id": "L39FyvUohKDV", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}], "id": "SdzXVn", "title": "Google Shopping 5/4/18"} [{"field": {"id": "krs82KgxHwGb", "type": "opinion_scale"}, "number": 10, "type": "number"}, {"email": "someone#mail.com", "field": {"id": "lBzHtCuzHFM4", "type": "email"}, "type": "email"}, {"field": {"id": "L39FyvUohKDV", "type": "short_text"}, "text": "they were awesome", "type": "text"}]
146c49cdd6 {"fields": [{"id": "CxhfK22a3XWE", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}, {"id": "oUZxPRaKjmFr", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "zUIP73oXpLD6", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}], "id": "kaiAsx", "title": "a - b"} [{"field": {"id": "CxhfK22a3XWE", "type": "short_text"}, "text": "nice", "type": "text"}, {"field": {"id": "oUZxPRaKjmFr", "type": "opinion_scale"}, "number": 2, "type": "number"}, {"email": "foo#bar.com", "field": {"id": "zUIP73oXpLD6", "type": "email"}, "type": "email"}]
#mikhail-berlyant provided this query below which gets me pretty close to what I am expecting. The only problem I am having is that I am unable to get the answer.
SELECT distinct token, id, title AS question,
JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.type') answer_type
--REPLACE(REGEXP_EXTRACT(b, r'"type":".+?"\s*,\s*".+?":(.+)'), '"', '') answer
FROM `v1-dev-main.typeform.responses`,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(definition, '$.fields'), r'"title":"(.+?)"')) title WITH OFFSET pos1,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(definition, '$.fields'), r'"id":"(.+?)"')) id WITH OFFSET pos2,
UNNEST(REGEXP_EXTRACT_ALL(answers, r'"field": {(.+?)}')) a WITH OFFSET pos3
--UNNEST(REGEXP_EXTRACT_ALL(answers, r'{(.+?),\s*"field":{.+?}')) b WITH OFFSET pos4
WHERE pos1 = pos2
--AND pos3 = pos4
AND id = JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.id')
Here is the result of above query
token id question answer_type
146c43c81cd5780839d3cdd6 zUIP73oXpLD6 Your email address email
146c493c1cd5780839d3cdd6 oUZxPRaKjmFr How useful is this feature turning out to be for you? opinion_scale
146c493c05d5780839d3cdd6 CxhfK22a3XWE What did you think about the changes? short_text
18e6d8e33df44a1aa451b445 lBzHtCuzHFM4 Your email address email
18e6d8e33df44a1aa451b445 L39FyvUohKDV What did you think about the changes? short_text
18e6d0fa014bfa1aa451b445 krs82KgxHwGb How useful is this feature turning out to be for you? opinion_scale
a63b20df691c9a949b2c57e3 krs82KgxHwGb How useful is this feature turning out to be for you? opinion_scale
a63b20df691c9a949b2c57e3 lBzHtCuzHFM4 Your email address email
a63b258ce0339a949b2c57e3 L39FyvUohKDV What did you think about the changes? short_text
Now, I am just missing the answer.
Below example is for BigQuery Standard SQL and makes some assumption about your data in terms of how those json strings are formatted - so it most will likely require some tuning for regexp's. But it works with below dummy data
#standardSQL
WITH `project.dataset.table` AS (
SELECT 12345 token,
'''{"fields": [
{"id":"1","title":"Question 1?"},
{"id":"2","title":"Questions 2?"},
{"id":"3","title":"Question 3?"}
]}''' questions,
'''[
{"type":"text", "text":"answer 1", "field":{"id":"1", "type":"short_text"}},
{"type":"number", "number":42, "field":{"id":"2", "type":"opinion_scale"}},
{"type":"email", "email":"an_account#example.com", "field":{"id":"3", "type":"email"}}
]''' answers
)
SELECT token, id, title AS question,
JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.type') answer_type,
REPLACE(REGEXP_EXTRACT(b, r'"type":".+?"\s*,\s*".+?":(.+)'), '"', '') answer
FROM `project.dataset.table`,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(questions, '$.fields'), r'"title":"(.+?)"')) title WITH OFFSET pos1,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(questions, '$.fields'), r'"id":"(.+?)"')) id WITH OFFSET pos2,
UNNEST(REGEXP_EXTRACT_ALL(answers, r'"field":{(.+?)}')) a WITH OFFSET pos3,
UNNEST(REGEXP_EXTRACT_ALL(answers, r'{(.+?),\s*"field":{.+?}')) b WITH OFFSET pos4
WHERE pos1 = pos2
AND pos3 = pos4
AND id = JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.id')
with results as
Row token id question answer_type answer
1 12345 1 Question 1? short_text answer 1
2 12345 2 Questions 2? opinion_scale 42
3 12345 3 Question 3? email an_account#example.com
Update based on below comments
#standardSQL
WITH `project.dataset.table` AS (
SELECT "12345" token, '{"fields": [{"id":"1","title":"Question 1?"},{"id":"2","title":"Questions 2?"},{"id":"3","title":"Question 3?"}]}' questions,'[ {"type":"text", "text":"answer 1", "field":{"id":"1", "type":"short_text"}},{"type":"number", "number":42, "field":{"id":"2", "type":"opinion_scale"}},{"type":"email", "email":"an_account#example.com", "field":{"id":"3", "type":"email"}}]' answers UNION ALL
SELECT "18e6d8e33df440fa014bfa1aa451b445", '{"fields": [{"id": "L39FyvUohKDV", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}, {"id": "krs82KgxHwGb", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "lBzHtCuzHFM4", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}], "id": "SdzXVn", "title": "Google Shopping 5/4/18"}', '[{"field": {"id": "L39FyvUohKDV", "type": "short_text"}, "text": "t", "type": "text"}, {"field": {"id": "krs82KgxHwGb", "type": "opinion_scale"}, "number": 10, "type": "number"}, {"email": "t#t.com", "field": {"id": "lBzHtCuzHFM4", "type": "email"}, "type": "email"}]"' UNION ALL
SELECT "a63b258ce03360df691c9a949b2c57e3", '{"fields": [{"id": "krs82KgxHwGb", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "lBzHtCuzHFM4", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}, {"id": "L39FyvUohKDV", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}], "id": "SdzXVn", "title": "Google Shopping 5/4/18"}', '[{"field": {"id": "krs82KgxHwGb", "type": "opinion_scale"}, "number": 10, "type": "number"}, {"email": "someone#mail.com", "field": {"id": "lBzHtCuzHFM4", "type": "email"}, "type": "email"}, {"field": {"id": "L39FyvUohKDV", "type": "short_text"}, "text": "they were awesome", "type": "text"}]"' UNION ALL
SELECT "146c493c051a0a481cd5780839d3cdd6", '{"fields": [{"id": "CxhfK22a3XWE", "properties": {}, "ref": "d8834652-3acf-4541-8354-1e3dcd716667", "title": "What did you think about the changes?", "type": "short_text"}, {"id": "oUZxPRaKjmFr", "properties": {}, "ref": "5b6e6796-635b-4595-9404-e81617d4540b", "title": "How useful is this feature turning out to be for you?", "type": "opinion_scale"}, {"id": "zUIP73oXpLD6", "properties": {}, "ref": "b76be913-19b9-4b8a-b2ac-3fb645a65a5c", "title": "Your email address", "type": "email"}], "id": "kaiAsx", "title": "a - b"}', '[{"field": {"id": "CxhfK22a3XWE", "type": "short_text"}, "text": "nice", "type": "text"}, {"field": {"id": "oUZxPRaKjmFr", "type": "opinion_scale"}, "number": 2, "type": "number"}, {"email": "foo#bar.com", "field": {"id": "zUIP73oXpLD6", "type": "email"}, "type": "email"}]"'
)
SELECT token, id, title AS question,
JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.type') answer_type,
COALESCE(JSON_EXTRACT_SCALAR(b, '$.text'),JSON_EXTRACT_SCALAR(b, '$.number'),JSON_EXTRACT_SCALAR(b, '$.email')) AS answer
FROM `project.dataset.table`,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(questions, '$.fields'), r'"title":\s*"(.+?)"')) title WITH OFFSET pos1,
UNNEST(REGEXP_EXTRACT_ALL(JSON_EXTRACT(questions, '$.fields'), r'"id":\s*"(.+?)"')) id WITH OFFSET pos2,
UNNEST(REGEXP_EXTRACT_ALL(answers, r'"field":\s*{(.+?)}')) a WITH OFFSET pos3,
UNNEST(REGEXP_EXTRACT_ALL(REGEXP_REPLACE(answers, r'"field":\s*{.+?}', '"field": ""'), r'{.+?}')) b WITH OFFSET pos4
WHERE pos1 = pos2
AND pos3 = pos4
AND id = JSON_EXTRACT_SCALAR(CONCAT('{',a,'}'), '$.id')
Output is
Row token id question answer_type answer
1 12345 1 Question 1? short_text answer 1
2 12345 2 Questions 2? opinion_scale 42
3 12345 3 Question 3? email an_account#example.com
4 18e6d8e33df440fa014bfa1aa451b445 L39FyvUohKDV What did you think about the changes? short_text t
5 18e6d8e33df440fa014bfa1aa451b445 krs82KgxHwGb How useful is this feature turning out to be for you? opinion_scale 10
6 18e6d8e33df440fa014bfa1aa451b445 lBzHtCuzHFM4 Your email address email t#t.com
7 a63b258ce03360df691c9a949b2c57e3 krs82KgxHwGb How useful is this feature turning out to be for you? opinion_scale 10
8 a63b258ce03360df691c9a949b2c57e3 lBzHtCuzHFM4 Your email address email someone#mail.com
9 a63b258ce03360df691c9a949b2c57e3 L39FyvUohKDV What did you think about the changes? short_text they were awesome
10 146c493c051a0a481cd5780839d3cdd6 CxhfK22a3XWE What did you think about the changes? short_text nice
11 146c493c051a0a481cd5780839d3cdd6 oUZxPRaKjmFr How useful is this feature turning out to be for you? opinion_scale 2
12 146c493c051a0a481cd5780839d3cdd6 zUIP73oXpLD6 Your email address email foo#bar.com
If you are sure about length of your arrays it's possible to ARRAY_CONCAT them first and perform UNNEST with concatenated version. It worked for me.

insert jsonb data in postgresql, option array or objects, valid way

I have this update, i've read postgresql documentation, but nothing clear about how to insert data, some tutorials options:
1.with '{}'
2.with {}
3.with '[]' <-- array of objects
and most dont' use '::jsonb' like is indicated on:
https://www.postgresql.org/docs/9.4/static/datatype-json.html
here my code:
UPDATE customer set phones ='{ {"type": "mobile", "phone": "001001"} ,
{"type": "fix", "phone": "002002"} }'::jsonb
where id ='4ca27243-6a55-4855-b0e6-d6e1d957f289';
I get this error:
ERROR: invalid input syntax for type json
LINE 1: UPDATE customer set phones ='{ {"type": "mobile", "phone": ...
^
DETAIL: Expected string or "}", but found "{".
CONTEXT: JSON data, line 1: { {...
SQL state: 22P02
Character: 29
I need just record a lit of phones, need to enclose in a big name object like? I mean for javascript , array of objets is not an object, but i dont know if that is accepted in jsonb of postresql
{ phones:[ {"type": "mobile", "phone": "001001"} ,
{"type": "fix", "phone": "002002"} ] }
Example 1 (object):
CREATE TABLE customer {
contact JSONB
}
update customer
set contact = '{ "phones":[ {"type": "mobile", "phone": "001001"} , {"type": "fix", "phone": "002002"} ] }'
where id = '4ca27243-6a55-4855-b0e6-d6e1d957f289';
Example 2 (array):
CREATE TABLE customer {
phones JSONB
}
update customer
set phones = '[ {"type": "mobile", "phone": "001001"} , {"type": "fix", "phone": "002002"} ]'
where id = '4ca27243-6a55-4855-b0e6-d6e1d957f289';
Notes:
My PostgreSQL version
select version();
PostgreSQL 11.2 (Debian 11.2-1.pgdg90+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516, 64-bit
Be sure to enclose the keys and values with double quotes.
'{}' is array type in postgres. if you use jsonb, use regular '[]' for array:
so=# select jsonb_pretty('{"phones":[ {"type": "mobile", "phone": "001001"} , {"type": "fix", "phone": "002002"} ] }');
jsonb_pretty
{
"phones": [
{
"type": "mobile",
"phone": "001001"
},
{
"type": "fix",
"phone": "002002"
}
]
}
(1 row)
Time: 0.486 ms
or:
so=# select jsonb_pretty('[ {"type": "mobile", "phone": "001001"} , {"type": "fix", "phone": "002002"} ]');
jsonb_pretty
[
{
"type": "mobile",
"phone": "001001"
},
{
"type": "fix",
"phone": "002002"
}
]
(1 row)

Big query DML insert-select as into repeated tables

I have a table structure (table name: Recalled_transaction) as follows:
[{
"name": "STR_NBR",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "RGSTR_NBR",
"type": "INTEGER",
"mode": "NULLABLE"
},
{
"name": "POS_TRANS_ID",
"type": "INTEGER",
"mode": "NULLABLE"
},
{
"name": "SLS_DT",
"type": "DATE",
"mode": "NULLABLE"
},
{
"name": "TRANS_ORIG_SRC",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "POS_APPL_TYP_CD",
"type": "STRING",
"mode": "NULLABLE"
},
{
"name": "USER_ID",
"type": "STRING",
"mode": "NULLABLE"
}
]
},
{
"name": "RECALLED_TXN",
"type": "RECORD",
"mode": "REPEATED",
"fields": [
{
"name": "POS_SEQ_NBR",
"type": "STRING",
"mode": "REPEATED"
},
{
"name": "SUB_SYS_CD",
"type": "STRING",
"mode": "NULLABLE"
}
]
}
]
I would like to insert from a set of regular tables into this structure using insert-select as (DML in standard sql). Has anyone done before. Any help is appreciated.
Thanks
I created a table with the same schema and put together a sample query to insert into it. In your particular case, since you have two tables, you will probably need to JOIN them and then use GROUP BY.
INSERT mydataset.SampleDmlTable
(STR_NBR, RGSTR_NBR, POS_TRANS_ID, SLS_DT, TRANS_ORIG_SRC, RECALLED_TXN)
WITH T AS (
SELECT CAST(x AS STRING) AS STR_NBR,
10 - x AS RGSTR_NBR,
x AS POS_TRANS_ID,
DATE_SUB(CURRENT_DATE(), INTERVAL x DAY) AS SLS_DT,
CONCAT('foo_', CAST(x AS STRING)) AS POS_APPL_TYP_CD,
CAST(x AS STRING) AS USER_ID,
[CONCAT('bar_', CAST(x AS STRING)), 'baz'] AS POS_SEQ_NBR,
CAST(10 - x AS STRING) AS SUB_SYS_CD
FROM UNNEST([1, 1, 0, 3, 2, 2, 2]) AS x
)
SELECT
STR_NBR,
RGSTR_NBR,
POS_TRANS_ID,
SLS_DT,
ARRAY_AGG(STRUCT(POS_APPL_TYP_CD, USER_ID)) AS TRANS_ORIG_SRC,
ARRAY_AGG(STRUCT(POS_SEQ_NBR, SUB_SYS_CD)) AS RECALLED_TXN
FROM T
GROUP BY 1, 2, 3, 4;