Postgres find unique values ​in json - sql

I am using Postgresql and have a table, with id, sender::jsonb and date, as follows:
id | sender | last login date
----+-----------------------------------------------------------------------------------+----------------------------------
1 | {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-10 14:49:36.234504 +00:00
2 | {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-09 14:49:36.234504 +00:00
3 | {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-11 14:49:36.234504 +00:00
4 | {"firstName": "Nickolai","lastName": "Nickov", "middleName": "Nikovich", } | 2021-04-30 14:49:36.234504 +00:00
5 | {"firstName": "Nickolai","lastName": "Nickov", "middleName": "Nikovich", } | 2021-04-29 14:49:36.234504 +00:00
6 | {"firstName": "Vladimir","lastName": "Vladimirovich","middleName": "Putout", } | 2021-04-15 14:49:36.234504 +00:00
7 | {"firstName": "Petr", "lastName": "Petrov", "middleName": "Petrovich", } | 2021-04-10 14:49:36.234504 +00:00
8 | {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-01 14:49:36.234504 +00:00
9 | {"firstName": "Ignat", "lastName": "Ignatov", "middleName": "Ignatovich", }| 2021-04-06 14:49:36.234504 +00:00
10| {"firstName": "Vladimir","lastName": "Vladimirovich","middleName": "Putout", } | 2021-04-17 14:49:36.234504 +00:00
11| {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-12 14:49:36.234504 +00:00
p.s.There may be other information in the "sender" column, but the search for uniqueness is only necessary by "firstName", "lastName", "midddleName"
It is necessary to return a result consisting of unique names and with the latest date. In particular, I want to get the result:
id | sender | last login date
----+-----------------------------------------------------------------------------------+----------------------------------
4 | {"firstName": "Nickolai","lastName": "Nickov", "middleName": "Nikovich", } | 2021-04-30 14:49:36.234504 +00:00
10| {"firstName": "Vladimir","lastName": "Vladimirovich","middleName": "Putout", } | 2021-04-17 14:49:36.234504 +00:00
11| {"firstName": "Ivan", "lastName": "Ivanov", "middleName": "Ivanovich", } | 2021-04-12 14:49:36.234504 +00:00
7 | {"firstName": "Petr", "lastName": "Petrov", "middleName": "Petrovich", } | 2021-04-10 14:49:36.234504 +00:00
9 | {"firstName": "Ignat", "lastName": "Ignatov", "middleName": "Ignatovich", }| 2021-04-06 14:49:36.234504 +00:00
Everything is very complicated by the fact that json is used. I had thoughts to do - "name" concatenation and perform group by and sorting, but unfortunately it does not work.

You can use distinct on() to do this:
select distinct on (firstname, lastname) id, sender, last_login_date
from (
select id, sender, last_login_date,
sender ->> 'firstName' as firstname,
sender ->> 'lastName' as lastname
from the_table
) t
order by firstname, lastname, last_login_date desc

you can do it using window function :
select * from
(
select * ,rank() over (partition by sender->> 'firstName',sender->> 'lastName' order by last_login_date desc) rn
from yourtable
) t
where rn = 1
order by last_login_date desc
db<>fiddle here

Related

Create tabular View by Spreading Data from JSON in Snowflake

I'm very new to Snowflake and I am working on creating a view from the table that holds JSON data as follows :
"data": {
"baseData": {
"dom_url": "https://www.soccertables.com/european_tables",
"event_id": "01b2722a-d8e6-4f67-95d0-8dd7ba088a4a",
"event_utc_time": "2020-05-11 09:01:14.821",
"ip_address": "125.238.134.96",
"table_1": [
{
"position": "1",
"team_name": "Liverpool",
"games_played": "29",
"games_won": "26",
"games_drawn": "2",
"games_lost": "1",
"goals_for": "75",
"goals_against": "35"
"points": "80"
},
{
"position": "2",
"team_name": "Man. City",
"games_played": "29",
"games_won": "20",
"games_drawn": "5",
"games_lost": "4",
"goals_for": "60",
"goals_against": "45"
"points": "65"
},
{
"position": "...",
"team_name": "...",
"games_played": "...",
"games_won": "...",
"games_drawn": "...",
"games_lost": "...",
"goals_for": "...",
"goals_against": "..."
"points": "..."
}
],
"unitID": "CN 8000",
"ver": "1.0.0"
},
"baseType": "MatchData"
},
"dataName": "CN8000.Prod.MatchData",
"id": "18a89f9e-9620-4453-a546-23412025e7c0",
"tags": {
"itrain.access.level1": "Private",
"itrain.access.level2": "Kumar",
"itrain.internal.deviceID": "",
"itrain.internal.deviceName": "",
"itrain.internal.encodeTime": "2022-03-23T07:41:19.000Z",
"itrain.internal.sender": "Harish",
"itrain.software.name": "",
"itrain.software.partNumber": 0,
"itrain.software.version": ""
},
"timestamp": "2021-02-25T07:32:31.000Z"
}
I want to extract the common values like dom_url, event_id, event_utc_time, ip_address along with each team_name in a separate column and the associated team details like position, games_played etc possibly in rows for each team name
E.g :
I've been trying Lateral flatten function but couldn't succeed so far
create or replace view AWSS3_PM.PUBLIC.PM_POWER_CN8000_V1(
DOM_URL,
EVENT_ID,
EVENT_UTC_TIME,
IP_ADDRESS,
TIMESTAMP,
POSITION,
GAMES_PLAYED,
GAMES_WON,
GAMES_LOST,
GAMES_DRAWN
) as
select c1:data:baseData:dom_url dom_url,
c1:data:baseData:event_id event_id,
c1:data:baseData:event_utc_time event_utc_time,
c1:data:baseData:ip_address ip_address,
c1:timestamp timestamp,
value:position TeamPosition,
value:games_played gamesPlayed,
value:games_won wins ,
value:games_lost defeats,
value:games_drawn draws
from pm_power, lateral flatten(input => c1:data:baseData:table_1);
Any help would be really grateful
Thanks,
Harish
#For the table Portion in JSON it would need flattening and transpose, example below -
Sample table -
select * from test_json;
+--------------------------------+
| TAB_VAL |
|--------------------------------|
| { |
| "table_1": [ |
| { |
| "games_drawn": "2", |
| "games_lost": "1", |
| "games_played": "29", |
| "games_won": "26", |
| "goals_against": "35", |
| "goals_for": "75", |
| "points": "80", |
| "position": "1", |
| "team_name": "Liverpool" |
| }, |
| { |
| "games_drawn": "5", |
| "games_lost": "4", |
| "games_played": "29", |
| "games_won": "20", |
| "goals_against": "45", |
| "goals_for": "60", |
| "points": "65", |
| "position": "2", |
| "team_name": "Man. City" |
| } |
| ] |
| } |
+--------------------------------+
1 Row(s) produced. Time Elapsed: 0.285s
Perform transpose after flattening JSON
select * from (
select figures,stats,team_name
from (
select
f.value:"games_drawn"::number as games_drawn,
f.value:"games_lost"::number as games_lost,
f.value:"games_played"::number as games_played,
f.value:"games_won"::number as games_won,
f.value:"goals_against"::number as goals_against,
f.value:"goals_for"::number as goals_for,
f.value:"points"::number as points,
f.value:"position"::number as position,
f.value:"team_name"::String as team_name
from
TEST_JSON, table(flatten(input=>tab_val:table_1, mode=>'ARRAY')) as f
) flt
unpivot (figures for stats in(games_drawn, games_lost, games_played, games_won, goals_against, goals_for, points,position))
) up
pivot (min(up.figures) for up.team_name in ('Liverpool','Man. City'));
+---------------+-------------+-------------+
| STATS | 'Liverpool' | 'Man. City' |
|---------------+-------------+-------------|
| GAMES_DRAWN | 2 | 5 |
| GAMES_LOST | 1 | 4 |
| GAMES_PLAYED | 29 | 29 |
| GAMES_WON | 26 | 20 |
| GOALS_AGAINST | 35 | 45 |
| GOALS_FOR | 75 | 60 |
| POINTS | 80 | 65 |
| POSITION | 1 | 2 |
+---------------+-------------+-------------+
8 Row(s) produced. Time Elapsed: 0.293s

Add Map within a Map in a column

In the Metadata column i have a Map type value:
+-----------+--------+-----------+--------------------------------+
| Noun| Pronoun| Adjective|Metadata |
+-----------+--------+-----------+--------------------------------+
| Homer| Simpson|Engineer |["Age": "50", "Country": "USA"] |
| Elon | Musk |King |["Age": "45", "Country": "RSA"] |
| Bart | Lee |Cricketer |["Age": "35", "Country": "AUS"] |
| Lisa | Jobs |Daughter |["Age": "35", "Country": "IND"] |
| Joe | Root |Player |["Age": "31", "Country": "ENG"] |
+-----------+--------+-----------+--------------------------------+
I want to append another Map type value in the Metadata against a key called tags.
+-----------+--------+-----------+--------------------------------------------------------------------+
| Noun| Pronoun| Adjective|Metadata |
+-----------+--------+-----------+--------------------------------------------------------------------+
| Homer| Simpson|Engineer |["Age": "50", "Country": "USA", "tags": ["Gen": "M", "Fit": "Yes"]] |
| Elon | Musk |King |["Age": "45", "Country": "RSA", "tags": ["Gen": "M", "Fit": "Yes"]] |
| Bart | Lee |Cricketer |["Age": "35", "Country": "AUS", "tags": ["Gen": "M", "Fit": "No"]] |
| Lisa | Jobs |Daughter |["Age": "35", "Country": "IND", "tags": ["Gen": "F", "Fit": "Yes"]] |
| Joe | Root |Player |["Age": "31", "Country": "ENG", "tags": ["Gen": "M", "Fit": "Yes"]] |
+-----------+--------+-----------+--------------------------------------------------------------------+
In the Metadata column, the outer Map is already a typedLit, adding another Map within it is not being allowed.
I implemented it using a struct. This is how it looks:
df.withColumn("Metadata", struct(lit("Age").alias("Age"), lit("Country").alias("Country"), typedLit(tags).alias("tags")))
It won't be exactly key val pair but still be queryable with alias.

Ability to get the "index" (or ordinal value) for each array entry in BigQuery?

In a data column in BigQuery, I have a JSON object with the structure:
{
"sections": [
{
"secName": "Flintstones",
"fields": [
{ "fldName": "Fred", "age": 55 },
{ "fldName": "Barney", "age": 44 }
]
},
{
"secName": "Jetsons",
"fields": [
{ "fldName": "George", "age": 33 },
{ "fldName": "Elroy", "age": 22 }
]
}
]}
I'm hoping to unnest() and json_extract() to get results that resemble:
id | section_num | section_name | field_num | field_name | field_age
----+--------------+--------------+-----------+------------+-----------
1 | 1 | Flintstones | 1 | Fred | 55
1 | 1 | Flintstones | 2 | Barney | 44
1 | 2 | Jetsons | 1 | George | 33
1 | 2 | Jetsons | 2 | Elroy | 22
So far, I have the query:
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age
FROM `tick8s.test2` AS tbl
LEFT JOIN unnest(json_extract_array(tbl.data, '$.sections')) as curSection
LEFT JOIN unnest(json_extract_array(curSection, '$.fields')) as curField
that yields:
id | section_name | field_name | field_age
----+--------------+------------+-----------
1 | Flintstones | Fred | 55
1 | Flintstones | Barney | 44
1 | Jetsons | George | 33
1 | Jetsons | Elroy | 22
QUESTION: I'm not sure how, if possible, to get the section_num and field_num ordinal positions from their array index values?
(If you are looking to duplicate my results, I have a table named test2 with 2 columns:
id - INTEGER, REQUIRED
data - STRING, NULLABLE
and I insert the data with:
insert into tick8s.test2 values (1,
'{"sections": [' ||
'{' ||
'"secName": "Flintstones",' ||
'"fields": [' ||
'{ "fldName": "Fred", "age": 55 },' ||
'{ "fldName": "Barney", "age": 44 }' ||
']' ||
'},' ||
'{' ||
'"secName": "Jetsons",' ||
'"fields": [' ||
'{ "fldName": "George", "age": 33 },' ||
'{ "fldName": "Elroy", "age": 22 }' ||
']' ||
'}]}'
);
)
Do you just want with offset?
SELECT id,
json_extract_scalar(curSection, '$.secName') as section_name,
n_s,
json_extract_scalar(curField, '$.fldName') as field_name,
json_extract_scalar(curField, '$.age') as field_age,
n_c
FROM `tick8s.test2` tbl LEFT JOIN
unnest(json_extract_array(tbl.data, '$.sections')
) curSection WITH OFFSET n_s LEFT JOIN
unnest(json_extract_array(curSection, '$.fields')
) curField WITH OFFSET n_c;

Convert a json which is a list of dictionaries into column/row format in Postgresql

I´ve a json which is a list of dictionaries with the next syntax:
[
{
"Date_and_Time": "Dec 29, 2017 15:35:37",
"Componente": "Bar",
"IP_Origen": "175.11.13.6",
"IP_Destino": "81.18.119.864",
"Country": "Brazil",
"Age": "3"
},
{
"Date_and_Time": "Dec 31, 2017 17:35:37",
"Componente": "Foo",
"IP_Origen": "176.11.13.6",
"IP_Destino": "80.18.119.864",
"Country": "France",
'Id': '123456',
'Car': 'Ferrari'
},
{
"Date_and_Time": "Dec 31, 2017 17:35:37",
"Age": "1",
"Country": "France",
'Id': '123456',
'Car': 'Ferrari'
},
{
"Date_and_Time": "Mar 31, 2018 14:35:37",
"Componente": "Foo",
"Country": "Germany",
'Id': '2468',
'Genre': 'Male'
}
]
The json is really big and each dictionary have different amount of key/values fields. And what I want to do is to create a table in postgresSQL where the key represents a column and the value a row. In the example explained above I would like table like this:
Date_and_Time | Componente | IP_Origen | IP_Destino | Country| Id | Car | Age| Genre
Dec 29, 2017 15:35:37 | Bar | 175.11.13.6 | 81.18.119.864 | Brazil | - | - | 3 | -
Dec 31, 2017 17:35:37 | Foo | 176.11.13.6 | 80.18.119.864 | France |123456 |Ferrari | - | -
Dec 31, 2017 17:35:37 | - | - | - | France |123456 |Ferrari | 1 | -
Mar 31, 2018 14:35:37 | Foo | - | - | Germany| 2468 | - | - | Male
The only solution I can think is putting the values one by one but this is no efficient at all
You can use jsonb_to_recordset to create record set out of your json and then use insert into to insert the records.
insert into table
select * from jsonb_to_recordset('<your json>'::jsonb)
as rec(Date_and_Time datetime, Componente text, IP_Origen text) --Specify all columns inside the table
Sample DBFiddle

How to create nested JSON return with aggregate function and dynamic key values using `jsonb_build_object`

This is what and example of the table looks like.
+---------------------+------------------+------------------+
| country_code | region | num_launches |
+---------------------+------------------+------------------+
| 'CA' | 'Ontario' | 5 |
+---------------------+------------------+------------------+
| 'CA' | 'Quebec' | 9 |
+---------------------+------------------+------------------+
| 'DE' | 'Bavaria' | 15 |
+---------------------+------------------+------------------+
| 'DE' | 'Saarland' | 12 |
+---------------------+------------------+------------------+
| 'DE' | 'Berlin' | 23 |
+---------------------+------------------+------------------+
| 'JP' | 'Tokyo' | 19 |
+---------------------+------------------+------------------+
I am able to write a query that returns each country_code with all regions nested within, but I am unable to get exactly what I am looking for.
My intended return looks like.
[
{ 'CA': [
{ 'Ontario': 5 },
{ 'Quebec': 9 }
]
},
{ 'DE': [
{ 'Bavaria': 15 },
{ 'Saarland': 12 },
{ 'Berlin': 23 }
]
},
{ 'JP': [
{ 'Tokyo': 19 }
]
}
]
How could this be calculated if the num_launches was not available?
+---------------------+------------------+
| country_code | region |
+---------------------+------------------+
| 'CA' | 'Ontario' |
+---------------------+------------------+
| 'CA' | 'Ontario' |
+---------------------+------------------+
| 'CA' | 'Ontario' |
+---------------------+------------------+
| 'CA' | 'Quebec' |
+---------------------+------------------+
| 'CA' | 'Quebec' |
+---------------------+------------------+
| 'DE' | 'Bavaria' |
+---------------------+------------------+
| 'DE' | 'Bavaria' |
+---------------------+------------------+
| 'DE' | 'Bavaria' |
+---------------------+------------------+
| 'DE' | 'Bavaria' |
+---------------------+------------------+
| 'DE' | 'Saarland' |
+---------------------+------------------+
| 'DE' | 'Berlin' |
+---------------------+------------------+
| 'DE' | 'Berlin' |
+---------------------+------------------+
| 'JP' | 'Tokyo' |
+---------------------+------------------+
Expected Return
[
{ 'CA': [
{ 'Ontario': 3 },
{ 'Quebec': 2 }
]
},
{ 'DE': [
{ 'Bavaria': 4 },
{ 'Saarland': 1 },
{ 'Berlin': 2 }
]
},
{ 'JP': [
{ 'Tokyo': 1 }
]
}
]
Thanks
You can try to use json_agg with json_build_object function in a subquery to get the array then do it again in the main query.
Schema (PostgreSQL v9.6)
CREATE TABLE T(
country_code varchar(50),
region varchar(50),
num_launches int
);
insert into t values ('CA','Ontario',5);
insert into t values ('CA','Quebec',9);
insert into t values ('DE','Bavaria',15);
insert into t values ('DE','Saarland',12);
insert into t values ('DE','Berlin',23);
insert into t values ('JP','Tokyo',19);
Query #1
select json_agg(json_build_object(country_code,arr)) results
from (
SELECT country_code,
json_agg(json_build_object(region,num_launches)) arr
FROM T
group by country_code
) t1;
results
[{"CA":[{"Ontario":5},{"Quebec":9}]},{"DE":[{"Bavaria":15},{"Saarland":12},{"Berlin":23}]},{"JP":[{"Tokyo":19}]}]
View on DB Fiddle