How to store JSON data in a meaningful way in Oracle - sql

Using Twitter API, I can get tweets like this :
{
"coordinates": null,
"created_at": "Mon Sep 24 03:35:21 +0000 2012",
"id_str": "250075927172759552",
"entities": {
"urls": [
],
"hashtags": [
{
"text": "freebandnames",
"indices": [
20,
34
]
}
],
"user_mentions": [
]
},
"in_reply_to_user_id_str": null,
"contributors": null,
"text": "Aggressive Ponytail #freebandnames",
"metadata": {
"iso_language_code": "en",
"result_type": "recent"
},
"retweet_count": 0,
"profile_background_color": "C0DEED",
"verified": false,
"geo_enabled": true,
"time_zone": "Pacific Time (US & Canada)",
"description": "Born 330 Live 310",
"default_profile_image": false,
"profile_background_image_url": "http://a0.twimg.com/images/themes/theme1/bg.png",
"statuses_count": 579,
"friends_count": 110,
"following": null,
"show_all_inline_media": false,
"screen_name": "sean_cummings"
},
"in_reply_to_screen_name": null,
"source": "Twitter for Mac",
"in_reply_to_status_id": null
}
You can see that this data is perfect for MongoDB, you can easily write the data to there. I want to store this data on an SQL db like Oracle. I don't know how to store nested parts like :
"entities": {
"urls": [
],
"hashtags": [
{
"text": "freebandnames",
"indices": [
20,
34
]
}
],
"user_mentions": [
]
Can you tell me how I should write such properties on Oracle? Should I create a new table for each nested property(which I am unwilling to do) or is there another way? Is there a magical such that I can store all Tweet data in one place like it's done on NoSQL? Thanks.

Related

JSON SQL column in azure data factory

I have a JSON type SQL column in SQL table as below example. I want the below code to be converted into separate columns such as drugs as table name and other attribute as column name, how can I use adf or any other means please guide. The below code is a single column in a table called report where I need to convert this into separate columns .
{
"drugs": {
"Codeine": {
"bin": "Y",
"name": "Codeine",
"icons": [
93,
103
],
"drug_id": 36,
"pathway": {
"code": "prodrug",
"text": "is **inactive**, its metabolites are active."
},
"targets": [],
"rxnorm_id": "2670",
"priclasses": [
"Analgesic/Anesthesiology"
],
"references": [
1,
16,
17,
100
],
"subclasses": [
"Analgesic agent",
"Antitussive agent",
"Opioid agonist",
"Phenanthrene "
],
"metabolizers": [
"CYP2D6"
],
"phenotype_ids": {
"metabolic": "5"
},
"relevant_genes": [
"CYP2D6"
],
"dosing_guidelines": [
{
"text": "Reduced morphine formation. Use label recommended age- or weight-specific dosing. If no response, consider alternative analgesics such as morphine or a non-opioid.",
"source": "CPIC",
"guidelines_id": 1
},
{
"text": "Analgesia: select alternative drug (e.g., acetaminophen, NSAID, morphine-not tramadol or oxycodone) or be alert to symptoms of insufficient pain relief.",
"source": "DPWG",
"guidelines_id": 362
}
],
"drug_report_notes": [
{
"text": "Predicted codeine metabolism is reduced.",
"icons_id": 58,
"sort_key": 58,
"references_id": null
},
{
"text": "Genotype suggests a possible decrease in exposure to the active metabolite(s) of codeine.",
"icons_id": 93,
"sort_key": 56,
"references_id": null
},
{
"text": "Professional guidelines exist for the use of codeine in patients with this genotype and/or phenotype.",
"icons_id": 103,
"sort_key": 50,
"references_id": null
}
]
}
Since this json is already in a SQL column, you don't need ADF to break it down to parts. You can use JSON functions in SQL server to do that.
example of few first columns:
declare #json varchar(max) = '{
"drugs": {
"Codeine": {
"bin": "Y",
"name": "Codeine",
"icons": [
93,
103
],
"drug_id": 36,
"pathway": {
"code": "prodrug",
"text": "is **inactive**, its metabolites are active."
},
"targets": [],
"rxnorm_id": "2670",
"priclasses": [
"Analgesic/Anesthesiology"
],
"references": [
1,
16,
17,
100
],
"subclasses": [
"Analgesic agent",
"Antitussive agent",
"Opioid agonist",
"Phenanthrene "
],
"metabolizers": [
"CYP2D6"
],
"phenotype_ids": {
"metabolic": "5"
},
"relevant_genes": [
"CYP2D6"
],
"dosing_guidelines": [
{
"text": "Reduced morphine formation. Use label recommended age- or weight-specific dosing. If no response, consider alternative analgesics such as morphine or a non-opioid.",
"source": "CPIC",
"guidelines_id": 1
},
{
"text": "Analgesia: select alternative drug (e.g., acetaminophen, NSAID, morphine-not tramadol or oxycodone) or be alert to symptoms of insufficient pain relief.",
"source": "DPWG",
"guidelines_id": 362
}
],
"drug_report_notes": [
{
"text": "Predicted codeine metabolism is reduced.",
"icons_id": 58,
"sort_key": 58,
"references_id": null
},
{
"text": "Genotype suggests a possible decrease in exposure to the active metabolite(s) of codeine.",
"icons_id": 93,
"sort_key": 56,
"references_id": null
},
{
"text": "Professional guidelines exist for the use of codeine in patients with this genotype and/or phenotype.",
"icons_id": 103,
"sort_key": 50,
"references_id": null
}
]
}
}
}
select JSON_VALUE(JSON_QUERY(#json,'$.drugs.Codeine'),'$.bin') as bin,
JSON_VALUE(JSON_QUERY(#json,'$.drugs.Codeine'),'$.name') as name,
JSON_VALUE(JSON_QUERY(#json,'$.drugs.Codeine'),'$.drug_id') as drug_id,
JSON_VALUE(JSON_QUERY(#json,'$.drugs.Codeine'),'$.icons[0]') as icon_1
'
You need to decide how to handle arrays, such as icons, where there are multiple values inside the same element.
References:
JSON_QUERY function
JSON_VALUE function

Weird Spring Boot initialization queries

So i started to log all queries of my spring boot application via a proxy data source and came across some queries i couldn't explain to myself.
This is the json log of said queries:
[
{
"name": "TOXI",
"connection": 3,
"isolation": "READ_COMMITTED",
"time": 2,
"success": true,
"type": "Prepared",
"batch": false,
"querySize": 1,
"batchSize": 0,
"query": [
"select * from information_schema.sequences"
],
"params": [
[]
]
},
{
"name": "TOXI",
"connection": 3,
"isolation": "READ_COMMITTED",
"time": 2,
"success": true,
"type": "Prepared",
"batch": false,
"querySize": 1,
"batchSize": 0,
"query": [
"select * from \"public\".\"toxi_image\" where 1=0"
],
"params": [
[]
]
},
{
"name": "TOXI",
"connection": 3,
"isolation": "READ_COMMITTED",
"time": 0,
"success": true,
"type": "Prepared",
"batch": false,
"querySize": 1,
"batchSize": 0,
"query": [
"select * from \"public\".\"toxi_tag\" where 1=0"
],
"params": [
[]
]
},
]
The first one still makes sense to me, but the second and third one is where my question start.
Why are they needed? Shouldn't the information schema hold all the table information thats needed? And why is making this statement only for 2 tables and not for the rest of the application?
One last thing to mention, the two tables/entities have a many to many correlation, if that has something to do with it.
Thank you in advance

azure search exact match of file name not returning exact results

I am indexing all the file names into the index. But when I search with exact file name in the search query it is returning all other file names also. below is my index definition.
{
"fields": [
{
"name": "id",
"type": "Edm.String",
"facetable": true,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": false,
"sortable": false,
"analyzer": null,
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "FileName",
"type": "Edm.String",
"facetable": false,
"filterable": false,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": false,
"analyzer": "keyword-analyzer",
"indexAnalyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
}
],
"scoringProfiles": [],
"defaultScoringProfile": null,
"corsOptions": null,
"analyzers": [
{
"name": "keyword-analyzer",
"#odata.type": "#Microsoft.Azure.Search.CustomAnalyzer",
"charFilters": [],
"tokenizer": "keyword_v2",
"tokenFilters": ["lowercase", "my_asciifolding", "my_word_delimiter"]
}
],
"tokenFilters": [
{
"#odata.type": "#Microsoft.Azure.Search.AsciiFoldingTokenFilter",
"name": "my_asciifolding",
"preserveOriginal": true
},
{
"#odata.type": "#Microsoft.Azure.Search.WordDelimiterTokenFilter",
"name": "my_word_delimiter",
"generateWordParts": true,
"generateNumberParts": false,
"catenateWords": false,
"catenateNumbers": false,
"catenateAll": false,
"splitOnCaseChange": true,
"preserveOriginal": true,
"splitOnNumerics": true,
"stemEnglishPossessive": false,
"protectedWords": []
}
],
"#odata.etag": "\"0x8D6FB2F498F9AD2\""
}
Below is my sample data
{
"value": [
{
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
},
{
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"id": "4",
"FileName": "SamplePSDFile-1psdxx2680.psd"
}
]
}
Below is the Analyze API results
{
"tokens": [
{
"token": "samplepsdfile_1psd2689.psd",
"startOffset": 0,
"endOffset": 26,
"position": 0
},
{
"token": "samplepsdfile",
"startOffset": 0,
"endOffset": 13,
"position": 0
},
{
"token": "psd",
"startOffset": 15,
"endOffset": 18,
"position": 1
},
{
"token": "psd",
"startOffset": 23,
"endOffset": 26,
"position": 2
}
]
}
When I search with the keyword "SamplePSDFile_1psd2689.psd", Azure search returning three records in the results instead of only document 3. Below is my search query and the results.
?search="SamplePSDFile_1psd2689.psd"&api-version=2019-05-06&$count=true&queryType=full&searchMode=All
{
"#odata.count": 3,
"value": [
{
"#search.score": 2.3387241,
"id": "2",
"FileName": "SamplePSDFile-1psd260.psd"
},
{
"#search.score": 2.2493405,
"id": "3",
"FileName": "SamplePSDFile_1psd2689.psd"
},
{
"#search.score": 2.2493405,
"id": "1",
"FileName": "SamplePSDFile_1psd2680.psd"
}
]
}
How I can achieve my expected results. I tried with and without double quotes around the keyword all other options, but no luck. What I am doing wrong here in this case?
Some body suggested to use $filter, but that field wasn't filterable in our case.
Please help me on this.
If you are looking for exact match then you probably don't want any analyzer involved. Give it a try with this line
"analyzer": "keyword-analyzer"
changed to
"analyzer": null
If you need to be able to do exact match on the field and also support partial keyword searches then you need to index the field twice with different names. Maybe append “Exact” to the exact match field name and don’t use an analyzer for that one. The name without exact can have an analyzer. Then search on the field using the right field name index depending on the type of search.

eBay API issues - cannot publish an offer

All of the following is being performed in eBay's API sandbox.
I am attempting to list an item by using the inventory API. Specifically, I have created an inventory item and a relevant offer for that item. When I make a POST request to the publish offer endpoint, I get the following error:
{
"errors": [
{
"errorId": 25016,
"domain": "API_INVENTORY",
"subdomain": "Selling",
"category": "REQUEST",
"message": "The title value is invalid. Seller Provided Title Value is missing."
},
{
"errorId": 25002,
"domain": "API_INVENTORY",
"subdomain": "Selling",
"category": "REQUEST",
"message": "A user error has occurred. The duration \"GTC\" day(s) is not available for this listing type, or invalid for category \"49996\".",
"parameters": [
{
"name": "0",
"value": "GTC"
},
{
"name": "1",
"value": "49996"
}
]
}
]
}
I can't see any reference in any of the API documentation to a "Seller Provided Title". The duration error is also confusing as the API says it only supports "GTC" listings. The product has a title so it must be in reference to something else.
My inventory item is as follows:
{
"sku": "13725",
"product": {
"title": "Harley Davidson bike",
"aspects": {
"Year": [
"2016"
],
"Model": [
"Road Glide Special"
],
"Manufacurer": [
"Harley-Davidson®"
],
"Type": [
"Touring"
],
"For Sale By": [
"Dealer"
],
"Vehicle Title": [
"Clear"
],
"Mileage": [
"13393"
],
"VIN (Vehicle Identification Number)": [
"1HD1KTM10GB627264"
],
"Color": [
"Black Quartz"
]
},
"description": "Item description goes here",
"imageUrls": [
"https://dw4i9za0jmiyk.cloudfront.net/2018/01/12/pre_ic60e5df584b870c3d2a55c86800eede_70618b24eb08.jpg"
]
},
"condition": "USED_EXCELLENT",
"availability": {
"pickupAtLocationAvailability": [
{
"quantity": 1,
"merchantLocationKey": "425",
"availabilityType": "IN_STOCK",
"fulfillmentTime": {
"value": 1,
"unit": "DAY"
}
}
]
}
}
And my offer object is as follows:
{
"offerId": "5852159010",
"sku": "13725",
"marketplaceId": "EBAY_MOTORS",
"format": "FIXED_PRICE",
"availableQuantity": 0,
"pricingSummary": {
"price": {
"value": "18294.0",
"currency": "USD"
}
},
"listingPolicies": {
"paymentPolicyId": "5807565000",
"fulfillmentPolicyId": "5806186000"
},
"categoryId": "49996",
"merchantLocationKey": "425",
"tax": {
"applyTax": false
},
"status": "UNPUBLISHED",
"eBayPlusEligible": false
}
I had similar issues on sandbox, and came to the conculsion it was broken.
They also have some limits on only certain categories working.
Have you tried it agains the live API, I have found this to be far more reliable, ignoring the fact doing development work live is dangerous!
For your info here is my working code offer:
inventory_template = {
"availability": {
"shipToLocationAvailability": {
"quantity": product.quantity_available
}
},
"condition": "NEW",
"product": {
"aspects": {spec.name: [spec.value] for spec in product.specifics},
"brand": product.product_brand,
"description": product.product_description,
"imageUrls": [
"https://ebay.mydomain.co.uk/{}".format(img.image_link) for img in product.images],
"mpn": product.product_mpn,
"title": product.product_title,
"upc": [
product.product_upc,
],
"ean": [
product.product_ean,
],
# "epid": "string"
},
"sku": sku,
}
offer_body = {
"availableQuantity": offer.available_quantity,
"categoryId": offer.category_id,
"listingDescription": html,
"listingPolicies": {
"paymentPolicyId": offer.payment_policy_id,
"returnPolicyId": offer.return_policy_id,
"fulfillmentPolicyId": offer.fulfillment_policy_id,
},
"merchantLocationKey": offer.merchant_location_key,
"pricingSummary": {
"price": {
"value": offer.summary_price_value,
"currency": offer.summary_price_currency
}
},
"sku": offer.sku,
"marketplaceId": offer.marketplace_id,
"format": offer.format
}
the offer.available_quantity etc are items from my database, its the structure I'm showing.

Improve search result based on field boost in elasticsearch

I am using ElasticSearch 1.7 first time and I have setup weight based on fields. It might change as per requirement. I am getting result from my query but issue is that if I change field weight dramatically then I can't see that much effect on records. Please check my below query and let me know if I am doing anything wrong.
ElasticSearch Query :
{
"from": 0,
"size": 10,
"highlight": {
"pre_tags": [
"<b>"
],
"post_tags": [
"</b>"
],
"fields": {
"title": {},
"description": {}
}
},
"query": {
"function_score": {
"query": {
"query_string": {
"query": "any keyword",
"fields": [
"fullText",
"title^100",
"authors^4",
"pubYear^4",
"publisher^4",
"abstract^2",
"documentTypeName^2",
"topic^6",
"topicSynonym^6"
"quality_value^6",
"domain^2"
],
"default_operator": "AND",
"analyze_wildcard": true
}
},
"score_mode": "sum",
"boost_mode": "sum",
"max_boost": 100
}
}
}
Sample Data:
{
"took": 44,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1465,
"max_score": 14.961364,
"hits": [
{
"_index": "snData",
"_type": "report",
"_id": "159",
"_score": 14.961364,
"_source": {
"str_ID": "159",
"topic": [
"strategy",
"Consumer-targeted strategy"
],
"topicSynonym": [
"assistance",
"coping",
"coping strategies",
"encouragement",
"support"
],
"fullText": "Background: As the incidence and prevalence of prostate cancer continue to rise, the number of men needing help and support to assist them in coping with disease and treatment-related symptoms and their psychosocial effects is likely to increase.",
"quality_value": 1,
"ID": 24034,
"title": "Psychosocial interventions for men with prostate cancer",
"authors": "Parahoo K E Noyes",
"pubYear": "2013",
"publisher": "",
"abstractEN": "Background: As the incidence and prevalence of prostate cancer continue to rise, the number of men needing help and support to assist them in coping with disease and treatment-related symptoms and their psychosocial effects is likely to increase.",
"uniqueID": "",
"documentTypeName": "Review of effects",
"viewCount": 28,
},
"highlight": {
"title": [
"Interventions for men with prostate <b>cancer</b>"
],
"abstract": [
"Background: As the incidence and prevalence of prostate <b>cancer</b> continue to rise, the number of men"
]
}
}
]
}
}
Use Case : If I change weight of quality_value or of anyone else then it should change result based on field weight. I am not sure whether my query is correct or I am missing anything. I am using ElasticSearch 1.7.