User selected values from JSON - sql

When a user fills out a form in a mobile application a json is created. I load this json into a postgres database and wanting to pull is apart and select the inputs that the user has selected.
I find this hard to explain you really need to see the json and the expected results. The json looks like this...
{
"iso_created_at":"2019-06-25T14:50:59+10:00",
"form_fields":[
{
"field_type":"DateAndTime",
"mandatory":false,
"form_order":0,
"editable":true,
"visibility":"public",
"label":"Time & Date Select",
"value":"2019-06-25T14:50:00+10:00",
"key":"f_10139_64_14",
"field_visibility":"public",
"data":{
"default_to_current":true
},
"id":89066
},
{
"field_type":"Image",
"mandatory":false,
"form_order":6,
"editable":true,
"visibility":"public",
"label":"Photos",
"value":[
],
"key":"f_10139_1_8",
"field_visibility":"public",
"data":{
},
"id":67682
},
{
"field_type":"DropDown",
"mandatory":true,
"form_order":2,
"editable":true,
"visibility":"public",
"label":"Customer ID",
"value":"f_10139_35_13_35_1",
"key":"f_10139_35_13",
"field_visibility":"public",
"data":{
"options":[
{
"is_default":false,
"display_order":0,
"enabled":true,
"value":"f_10139_35_13_35_1",
"label":"27"
}
],
"multi_select":false
},
"id":86039
},
{
"field_type":"CheckBox",
"mandatory":true,
"form_order":3,
"editable":true,
"visibility":"public",
"label":"Measure",
"value":[
"f_7422_10_7_10_1",
"f_7422_10_7_10_2"
],
"key":"f_10139_1_5",
"field_visibility":"public",
"data":{
"options":[
{
"is_default":true,
"display_order":0,
"enabled":true,
"value":"f_7422_10_7_10_1",
"label":"Kg"
},
{
"is_default":true,
"display_order":0,
"enabled":true,
"value":"f_7422_10_7_10_2",
"label":"Mm"
}
],
"multi_select":true
},
"id":67679
},
{
"field_type":"ShortTextBox",
"mandatory":true,
"form_order":4,
"editable":true,
"visibility":"public",
"label":"Qty",
"value":"1000",
"key":"f_10139_9_9",
"field_visibility":"public",
"data":{
},
"id":85776
}
],
"address":"Latitude: -37.811812 Longitude: 144.971745",
"shape_id":6456,
"category_id":75673,
"id":345,
"account_id":778
}
Can anyone help me?
Expected results:
account_id | report_id | field_label | field_value
------------------------------------------------------------------------
778 | 345 | Time & Date Select | 2019-06-25T14:50:00+10:00
778 | 345 | Photos | []
778 | 345 | Customer ID | 27
778 | 345 | Measure | Kg
778 | 345 | Measure | Mm
778 | 345 | Qty | 1000

like say #Amadan, might need to hardcode each field separately instead of making a clever loop ,especially with the fields "Customer ID" "and" "Measure" or any other that requires it.
You can use the json function: json_array_elements_text, here you have a example, you can adjust to your case, i am trying with your case:
select account_id::text,report_id::text,field_label::text,
--case ""Customer ID"" and ""Measure""
case
when field_label::text='"Customer ID"' then ((todo->'data'->'options')->0->'label')::text
when field_label::text='"Measure"' then ((todo->'data'->'options')->0->'label')::text ||',' ||((todo->'data'->'options')->1->'label')::text
else
field_value::text
end as field_value
from
(
select dato->'account_id' as account_id,dato->'id' as report_id,
(json_array_elements_text(dato->'form_fields')::json)->'label' as field_label,
(json_array_elements_text(dato->'form_fields')::json)->'value' as field_value,
(json_array_elements_text(dato->'form_fields')::json) as todo
from (
select '{"iso_created_at": "2019-06-25T14:50:59+10:00", "form_fields": [
{"field_type": "DateAndTime", "mandatory": false, "form_order": 0, "editable": true, "visibility": "public", "label": "Time & Date Select", "value": "2019-06-25T14:50:00+10:00", "key": "f_10139_64_14", "field_visibility": "public", "data":
{"default_to_current": true}, "id": 89066},
{"field_type": "Image", "mandatory": false, "form_order": 6, "editable": true, "visibility": "public", "label": "Photos", "value": [], "key": "f_10139_1_8", "field_visibility": "public", "data": {}, "id": 67682},
{"field_type": "DropDown", "mandatory": true, "form_order": 2, "editable": true, "visibility": "public", "label": "Customer ID", "value": "f_10139_35_13_35_1", "key": "f_10139_35_13", "field_visibility": "public", "data": {"options": [{"is_default": false, "display_order": 0, "enabled": true, "value": "f_10139_35_13_35_1", "label": "27"}], "multi_select": false}, "id": 86039},
{"field_type": "CheckBox", "mandatory": true, "form_order": 3, "editable": true, "visibility": "public", "label": "Measure", "value": ["f_7422_10_7_10_1","f_7422_10_7_10_2"], "key": "f_10139_1_5", "field_visibility": "public", "data": {"options": [{"is_default": true, "display_order": 0, "enabled": true, "value": "f_7422_10_7_10_1", "label": "Kg"},{"is_default": true, "display_order": 0, "enabled": true, "value": "f_7422_10_7_10_2", "label": "Mm"}], "multi_select": true}, "id": 67679},
{"field_type": "ShortTextBox", "mandatory": true, "form_order": 4, "editable": true, "visibility": "public", "label": "Qty", "value": "1000", "key": "f_10139_9_9", "field_visibility": "public", "data": {}, "id": 85776}
], "address": "Latitude: -37.811812 Longitude: 144.971745", "shape_id": 6456, "category_id": 75673, "id": 345, "account_id": 778}'::json as dato) as dat
) dat2
and i get this result, similar to you:
take this example and ajust to you
regards

You need to unnest the values in form_fields and then pick the label and the value from that JSON object:
select fd.account_id,
fd.report_id,
ff.field ->> 'label' as field_label,
ff.field ->> 'value' as field_value
from form_data fd
left join jsonb_array_elements(data -> 'form_fields') as ff(field) on true;
The left join is needed to still see the row from form_data even if no form_fields is available in the main JSON column.
The above assumes a table form_data with the columns account_id, report_id and data (which contains the JSON)
Online example: https://rextester.com/RNIBSB94484

Related

select node value from json column type

A table I called raw_data with three columns: ID, timestamp, payload, the column paylod is a json type having values such as:
{
"data": {
"author_id": "1461871206425108480",
"created_at": "2022-08-17T23:19:14.000Z",
"geo": {
"coordinates": {
"type": "Point",
"coordinates": [
-0.1094,
51.5141
]
},
"place_id": "3eb2c704fe8a50cb"
},
"id": "1560043605762392066",
"text": " ALWAYS # London, United Kingdom"
},
"matching_rules": [
{
"id": "1560042248007458817",
"tag": "london-paris"
}
]
}
From this I want to select rows where the coordinates is available, such as [-0.1094,51.5141]in this case.
SELECT *
FROM raw_data, json_each(payload)
WHERE json_extract(json_each.value, '$.data.geo.') IS NOT NULL
LIMIT 20;
Nothing was returned.
EDIT
NOT ALL json objects have the coordinates node. For example this value:
{
"data": {
"author_id": "1556031969062010881",
"created_at": "2022-08-18T01:42:21.000Z",
"geo": {
"place_id": "006c6743642cb09c"
},
"id": "1560079621017796609",
"text": "Dear Desperate sister say husband no dey oo."
},
"matching_rules": [
{
"id": "1560077018183630848",
"tag": "kaduna-kano-katsina-dutse-zaria"
}
]
}
The correct path is '$.data.geo.coordinates.coordinates' and there is no need for json_each():
SELECT *
FROM raw_data
WHERE json_extract(payload, '$.data.geo.coordinates.coordinates') IS NOT NULL;
See the demo.

Search a JSON column in a database

I'm looking to see if it's possible to search multiple database rows for a specific value that's stored in a json string. For instance I have a table called stashitems that contains a json column items that stores all of a players items. I would like to search the 1500 rows of data for a specific label or name value. Below is a snippet of one players stashitems.
How could I accomplish this for the entire table? Thanks for any help!
{
"8": {
"type": "item",
"slot": 8,
"amount": 948,
"weight": 100,
"name": "glass",
"label": "Glass",
"image": "glass.png",
"useable": false,
"unique": false,
"info": ""
},
"23": {
"type": "item",
"slot": 23,
"amount": 1,
"weight": 200,
"name": "crack_baggy",
"label": "Bag of Crack",
"image": "crack_baggy.png",
"useable": true,
"unique": false,
"info": ""
},
"47": {
"type": "item",
"slot": 47,
"amount": 1,
"weight": 20000,
"name": "diving_gear",
"label": "Diving Gear",
"image": "diving_gear.png",
"useable": true,
"unique": true,
"info": []
},
"48": {
"type": "item",
"slot": 48,
"amount": 1,
"weight": 20000,
"name": "diving_gear",
"label": "Diving Gear",
"image": "diving_gear.png",
"useable": true,
"unique": true,
"info": []
}
}
MariaDB 10.4.22
In SQL Server, you can very easily traverse JSON with the following syntax:
SELECT 'stashitems'
JSON_VALUE(json_column, '$.8.type') AS type
see this documentation.
It's also possible in MySQL, though the syntax is different.
Unfortunately, SQLite stores only text values, so a workaround is needed.
There are some possibilities like
SELECT JSON_SEARCH(#json, 'all', 'glass');
| JSON_SEARCH(#json, 'all', 'glass') |
| :--------------------------------- |
| ["$.8.name", "$.8.label"] |
SELECT #json Like '%glass%'
| #json Like '%glass%' |
| -------------------: |
| 1 |
db<>fiddle here

Select data from Json array MS SQL Server

I have to select data from Json like this:
[
{
"id": 10100,
"externalId": "100000035",
"name": "Test1",
"companyId": 10099,
"phone": "0738003811",
"email": "test#Test.com",
"mainAddress": {
"county": "UK",
"province": "test",
"zipCode": "01234",
"city": "test",
"street": "test",
"gln": "44,37489331;26,21941193",
"country": {
"iso2": "UK",
"iso3": "UK"
}
},
"active": false,
"main": true,
"stores": [
"Test"
],
"attributes": [
{
"attributeId": 1059,
"attributeName": "CH6 name",
"attributeExternalId": null,
"attributeValueId": 74292,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Unknown"
},
{
"attributeId": 1061,
"attributeName": "BD",
"attributeExternalId": null,
"attributeValueId": 81720,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Not assigned"
}
],
"daysSinceLastOrder": null
},
{
"id": 62606,
"externalId": "VL_LC_000190",
"name": "Test",
"companyId": 17793,
"phone": "44333424",
"email": "test#email.com",
"mainAddress": {
"firmName": "test",
"county": "test",
"province": "test",
"zipCode": "247555",
"city": "test",
"street": "test",
"gln": "44.8773851;23.9223518",
"country": {
"iso2": "RO",
"iso3": "ROU"
},
"phone": "07547063789"
},
"active": true,
"main": false,
"stores": [
"Valcea"
],
"attributes": [
{
"attributeId": 1042,
"attributeName": "Type of location",
"attributeExternalId": "TYPE_OF_DIVISION",
"attributeValueId": 34506,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": "Small OTC (<40mp)",
"attributePlainValue": "Small OTC (<40mp)"
},
{
"attributeId": 17,
"attributeName": "Limit for payment",
"attributeExternalId": "LIMIT_FOR_PAYMENT_IN_DAYS",
"attributeValueId": 59120,
"attributeValueType": "NUMBER",
"attributeValueEid": null,
"attributePlainValue": "28"
}
],
"daysSinceLastOrder": 147
}
]
I know how to select data from simple json object using "FROM OPENJSON",
but now I have to select a
AttributeValueId, AttributeId and AttributeName, attributePlainValue and CompanyId for each Attribute. So I dont know how to select data from attributes array and then how to join to this CompanyId which is one level up.
Maybe someone knows how write this query.
As mentioned by #lptr in the comments:
You need to pass the result of one OPENJSON to another, using CROSS APPLY. You can select a whole JSON object or array as a property, by using the syntax AS JSON
select
t1.companyid,
t2.*
from openjson(#j)
with (
companyId int,
attributes nvarchar(max) as json
) as t1
cross apply openjson(t1.attributes)
with
(
attributeId int,
attributeName nvarchar(100),
attributeValueId nvarchar(100),
attributePlainValue nvarchar(100)
) as t2;
db<>fiddle
For example, you can use code like this.
f1.metaData->"$.identity.customerID" = '.$customerID.'

azure search exact match of file name not returning exact results

I am indexing all the file names into the index. But when I search with exact file name in the search query it is returning all other file names also. below is my index definition.
{
"fields": [
{
"name": "id",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": false,
"sortable": false,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "FileName",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": false,
"analyzer": "keyword-analyzer",
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
}
],
"scoringProfiles": [],
"defaultScoringProfile": null,
"corsOptions": null,
"analyzers": [
{
"name": "keyword-analyzer",
"#odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"charFilters": [],
"tokenizer": "keyword_v2",
"tokenFilters": ["lowercase", "my_asciifolding", "my_word_delimiter"]
}
],
"tokenFilters": [
{
"#odata.type": "#Microsoft.Azure.Search.AsciiFoldingTokenFilter",
"name": "my_asciifolding",
"preserveOriginal": true
},
{
"#odata.type": "#Microsoft.Azure.Search.WordDelimiterTokenFilter",
"name": "my_word_delimiter",
"generateWordParts": true,
"generateNumberParts": false,
"catenateWords": false,
"catenateNumbers": false,
"catenateAll": false,
"splitOnCaseChange": true,
"preserveOriginal": true,
"splitOnNumerics": true,
"stemEnglishPossessive": false,
"protectedWords": []
}
],
"#odata.etag": "\"0x8D6FB2F498F9AD2\""
}
Below is my sample data
{
"value": [
{
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
},
{
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"id": "4",
"FileName": "SamplePSDFile-1psdxx2680.psd"
}
]
}
Below is the Analyze API results
{
"tokens": [
{
"token": "samplepsdfile_1psd2689.psd",
"startOffset": 0,
"endOffset": 26,
"position": 0
},
{
"token": "samplepsdfile",
"startOffset": 0,
"endOffset": 13,
"position": 0
},
{
"token": "psd",
"startOffset": 15,
"endOffset": 18,
"position": 1
},
{
"token": "psd",
"startOffset": 23,
"endOffset": 26,
"position": 2
}
]
}
When I search with the keyword "SamplePSDFile_1psd2689.psd", Azure search returning three records in the results instead of only document 3. Below is my search query and the results.
?search="SamplePSDFile_1psd2689.psd"&api-version=2019-05-06&$count=true&queryType=full&searchMode=All
{
"#odata.count": 3,
"value": [
{
"#search.score": 2.3387241,
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"#search.score": 2.2493405,
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"#search.score": 2.2493405,
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
}
]
}
How I can achieve my expected results. I tried with and without double quotes around the keyword all other options, but no luck. What I am doing wrong here in this case?
Some body suggested to use $filter, but that field wasn't filterable in our case.
Please help me on this.
If you are looking for exact match then you probably don't want any analyzer involved. Give it a try with this line
"analyzer": "keyword-analyzer"
changed to
"analyzer": null
If you need to be able to do exact match on the field and also support partial keyword searches then you need to index the field twice with different names. Maybe append “Exact” to the exact match field name and don’t use an analyzer for that one. The name without exact can have an analyzer. Then search on the field using the right field name index depending on the type of search.

Deduplicate table SQL with nested rows (type STRUCT)

I have a SQL table (in BigQuery) with possible duplicated rows. The table has over 20 columns, some of them are nested (data type "STRUCT)". I want to deduplicate the table.
I can't simply query SELECT DISTINCT * because I get an error
Query error: Column options of type STRUCT cannot be used in SELECT DISTINCT
So far, I tried to create a unique ID based on a hash of certain columns.
I have now this unique ID (called sha256), but I can't figure out a way of selecting only rows with unique hash.
I tried to GROUP BY, but it doesn't work with STRUCT type, and I tried also to INNER JOIN with a table containing only unique hashed, but I get duplicates also.
For reference, here are 2 example rows of the dataset:
{
"sha256": "un2k3TUtzwzmQMvxfrjztsh/A/GW3WWzV4U4CezqceA=",
"has_phone": true,
"options": {
"sub_toplist": true,
"gallery": false,
"urgent": false,
"has_option": true,
"photosup": true,
"booster": false
},
"calendar": {
"dates": null
},
"owner": {
"siren": null,
"pro_rates_link": null,
"user_id": "f0d94687-1a24-4ed4-8adb-7faded053ca8",
"type": "private",
"no_salesmen": true,
"name": "marius",
"store_id": "5022456"
},
"location": {
"feature": {
"properties": null,
"geometry": {
"coordinates": [
"9.41733",
"42.54701"
],
"type": "Point"
},
"type": "Feature"
},
"is_shape": true,
"provider": "here",
"lng": "9.41733",
"lat": "42.54701",
"zipcode": "20290",
"city_label": "Lucciana 20290",
"city": "Lucciana",
"region_name": "Corse",
"department_name": null,
"source": "city",
"department_id": "0",
"region_id": "9"
},
"attributes": {
"pro_rates_link": null,
"immo_sell_type": "old",
"ges": "a",
"square": "92",
"rooms": "4",
"energy_rate": "b",
"is_import": false,
"custom_ref": null,
"lease_type": "sell",
"real_estate_type": "1",
"fai_included": null,
"type_real_estate_sale": null
},
"price_calendar": null,
"price": [
"270000"
],
"body": "text",
"url": "https://www.example.fr/ventes_immobilieres/1729537955.htm",
"category_name": "Ventes immobilières",
"category_id": "9",
"images": {
"urls_thumb": [
"https://img3.example.fr/ad-thumb/d63e236ce3546906b3ce661640a7cf858d0a0593.jpg"
],
"urls": [
"https://img3.example.fr/ad-image/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg",
],
"nb_images": "7",
"small_url": "https://img3.example.fr/ad-small/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg",
"thumb_url": "https://img3.example.fr/ad-thumb/ac6bd9ce0cc3aa507727ddece51f437d77ae4cfa.jpg"
},
"ad_type": "offer",
"first_publication_date": "2020-01-02 15:00:46 UTC",
"status": "active",
"subject": "Villa à Lucciana",
"index_date": "2020-01-16 15:00:45 UTC",
"expiration_date": "2020-03-02 15:00:46 UTC",
"list_id": "1729537955"
},
{
"sha256": "wCMrggkqSJ3PgbkuWAgBpCMtFfkJDRlz6TOeO5Nngsg=",
"has_phone": true,
"options": {
"sub_toplist": false,
"gallery": false,
"urgent": false,
"has_option": false,
"photosup": false,
"booster": false
},
"calendar": {
"dates": null
},
"owner": {
"siren": null,
"pro_rates_link": null,
"user_id": "ae0f432d-0aa2-4828-a20b-3472255588b4",
"type": "private",
"no_salesmen": true,
"name": "M.Milleliri",
"store_id": "12132533"
},
"location": {
"feature": {
"properties": null,
"geometry": {
"coordinates": [
"9.1917",
"41.54506"
],
"type": "Point"
},
"type": "Feature"
},
"is_shape": true,
"provider": "here",
"lng": "9.1917",
"lat": "41.54506",
"zipcode": "20146",
"city_label": "Sotta 20146",
"city": "Sotta",
"region_name": "Corse",
"department_name": null,
"source": "city",
"department_id": "0",
"region_id": "9"
},
"attributes": {
"pro_rates_link": null,
"immo_sell_type": "old",
"ges": "Non renseigné",
"square": null,
"rooms": null,
"energy_rate": "Non renseigné",
"is_import": false,
"custom_ref": null,
"lease_type": "sell",
"real_estate_type": "3",
"fai_included": null,
"type_real_estate_sale": null
},
"price_calendar": null,
"price": [
"100000"
],
"body": "text",
"url": "https://www.example.fr/ventes_immobilieres/1736199673.htm",
"category_name": "Ventes immobilières",
"category_id": "9",
"images": {
"urls_thumb": [
"https://img3.example.fr/ad-thumb/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"urls": [
"https://img3.example.fr/ad-image/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"urls_large": [
"https://img3.example.fr/ad-large/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
],
"nb_images": "1",
"small_url": "https://img3.example.fr/ad-small/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg",
"thumb_url": "https://img3.example.fr/ad-thumb/4f3632dc8e5c50075aa6c6e4b559e2042546f009.jpg"
},
"ad_type": "offer",
"first_publication_date": "2020-01-16 14:21:05 UTC",
"status": "active",
"subject": "Terrain 1250 m2 Sotta",
"index_date": "2020-01-16 14:21:05 UTC",
"expiration_date": "2020-03-16 14:21:05 UTC",
"list_id": "1736199673"
}
and the query I'm working on so far:
WITH
table_unique_hash AS (
SELECT
DISTINCT(SHA256(CONCAT(FORMAT_TIMESTAMP('%Y/%m/%d_%H:%M:%S_', index_date), CAST(list_id AS STRING)))) AS sha256
FROM
`test_bucket_data.daily_table`),
table_hashed AS (
SELECT
SHA256(CONCAT(FORMAT_TIMESTAMP('%Y/%m/%d_%H:%M:%S_', index_date), CAST(list_id AS STRING))) AS sha256, *
FROM
`test_bucket_data.daily_table`)
SElECT * FROM table_hashed
limit 10;
A solution would be to find a way of inner join the table_hashed and the table_unique_hash on the sha256 column...
Thanks for your help!
I found a workaround based on this topic. The combination of GROUP BY and ANY function for all the STRUCT columns made it work!
SELECT
has_phone,
ANY_VALUE(options) as options,
ANY_VALUE(calendar) as calendar,
ANY_VALUE(owner) as owner,
ANY_VALUE(location) as location,
ANY_VALUE(attributes) as attributes,
price_calendar,
price,
body,
url,
category_name,
category_id,
ANY_VALUE(images) as images,
ad_type,
first_publication_date,
status,
subject,
index_date,
expiration_date,
list_id,
FROM
`{table_name}`
Group by
has_phone,
price_calendar,
price,
body,
url,
category_name,
category_id,
ad_type,
first_publication_date,
status,
subject,
index_date,
expiration_date,
list_id
Note: my "price" field was previously an array; I transformed it in my source json to an int