rivers with elastic search with relational data - sql

Consider the following sql schema:
Where Table_2 and Table_1 have a many to many relation
now I'm trying to create an elastic search river that will pull in all the data from table_2 but I want the rows from table_1 as well and not just the id's.
Here is what I believe will be my sql:
select t2.*, t1.Name from [Table_2] t2
join [Table_3] t3 on t2.ID = t3.table_2
join [Table_1] t1 on t1.ID = t3.table_1
Now after doing this I have noticed that I get duplicate rows IE for each relationship in Table_3 I will get one row, I understand why this is but what I want is one entry for Table_2 that has an entry for table one.
This is what I'm getting in elastic now
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_relation",
"_type": "relation",
"_id": "AUpUGlvaRCP4Gzd2p3K4",
"_score": 1,
"_source": {
"Name": [
"table_2test",
"Test1"
],
"ID": 1
}
},
{
"_index": "test_relation",
"_type": "relation",
"_id": "AUpUGlvaRCP4Gzd2p3K5",
"_score": 1,
"_source": {
"Name": [
"table_2test",
"Test2"
],
"ID": 1
}
}
]
}
}
But instead I want it to look like:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_relation",
"_type": "relation",
"_id": "AUpUGlvaRCP4Gzd2p3K4",
"_score": 1,
"_source": {
"Name": [
"table_2test",
],
Table_1 :[
{"Name": "Test1", "ID": "1"},
{"Name": "Test2", "ID": "2"}
]
"ID": 1
}
}
]
}
}
I was hoping to get away with using an elasticsearch river for sql but I'm not sure if it allows for this kind of query.

Related

Getting response in desired format with postgreSQL

There are a bunch of tables in my database named courses, topics, subtopics, assessments.
One course can have multiple topics. One topic can have multiple subtopics. One topic can have multiple assessments.
There is a query I'm trying to implement in which I need a response in a specific format. For which aggregation has been used. I'm getting the desired result except for assessments.
The format I want is an array of courses. For each course, there is an array of topics. For each topic, there is an array of subtopics. This much I'm able to achieve. Now, I want for each topic an array of assessments just like subtopics which I'm unable to achieve.
The Code:
select courses.course_id as id, courses.course_name as name, courses.course_duration as
duration, courses.course_description as description,
jsonb_agg(
jsonb_build_object(
'id', topics.topic_id,
'name', topics.topic_name,
'duration', topics.topic_duration,
'sequence_no', topics.sequence_no,
'description', topics.topic_description,
'subtopics', subtopic.subtopics,
'assessment_id', assessments.id
)
order by topics.topic_id
) topics
from courses
left join topics on courses.course_id = topics.course_id
cross join lateral (
select jsonb_agg(
jsonb_build_object(
'id', subtopics.subtopic_id,
'name', subtopics.subtopic_name,
'assignment_id', subtopics.assignment_id,
'homework_id', subtopics.homework_id,
'on_free_trial', subtopics.on_free_trial
)
order by subtopics.subtopic_id
) subtopics
from subtopics
where subtopics.topic_id = topics.topic_id
) subtopic
left join assessments on assessments.topic_id = topics.topic_id
cross join lateral (
select jsonb_agg(
jsonb_build_object(
'id', assessments.id
)
order by assessments.id
) assessments
from assessments
where assessments.topic_id = topics.topic_id
) assessment
group by courses.course_id
order by courses.course_id
The output is as follows:
{
"Response": [
{
"id": 1,
"name": "Data Structures and Algorithms",
"duration": 32,
"description": "A data structure is a named location that can be used to store and organize data. And, an algorithm is a collection of steps to solve a particular problem. Learning data structures",
"topics": [
{
"id": 1,
"name": "ARRAYS AND VECTORS",
"duration": 7,
"subtopics": [
{
"id": 1,
"name": "Pre-Computations in Array",
"homework_id": 1,
"assignment_id": 1,
"on_free_trial": null
},
{
"id": 2,
"name": "Pre-Computations",
"homework_id": 1,
"assignment_id": 2,
"on_free_trial": null
}
],
"description": "Static and Dynamic Arrays, Pre-Computations, Array Rotations, Sliding Window Technique, Bucketization Technique etc.",
"sequence_no": 1,
"assessment_id": 2
},
{
"id": 1,
"name": "ARRAYS AND VECTORS",
"duration": 7,
"subtopics": [
{
"id": 1,
"name": "Pre-Computations in Array",
"homework_id": 1,
"assignment_id": 1,
"on_free_trial": null
},
{
"id": 2,
"name": "Pre-Computations",
"homework_id": 1,
"assignment_id": 2,
"on_free_trial": null
}
],
"description": "Static and Dynamic Arrays, Pre-Computations, Array Rotations, Sliding Window Technique, Bucketization Technique etc.",
"sequence_no": 1,
"assessment_id": 1
},
{
"id": 2,
"name": "MATHS",
"duration": 7,
"subtopics": null,
"description": "Modular arithmetic, Combinatorics, Euclid’s Algorithm for GCD, Primality Testing : Sieve of Eratosthenes, Fast Factorization, Catalan Numbers",
"sequence_no": 2,
"assessment_id": null
}
]
},
{
"id": 2,
"name": "OPERATING SYSTEMS",
"duration": 30,
"description": "An exhaustive collection of all the must-know concepts of Operating Systems",
"topics": [
{
"id": 100,
"name": "Threads and Processes",
"duration": 4,
"subtopics": [
{
"id": 5,
"name": "Basics of Threads and Processes",
"homework_id": 100,
"assignment_id": 100,
"on_free_trial": null
},
{
"id": 6,
"name": "Inter Process Communication",
"homework_id": null,
"assignment_id": null,
"on_free_trial": null
}
],
"description": "Difference b/w Threads and Processes, PCB",
"sequence_no": 1,
"assessment_id": null
},
{
"id": 101,
"name": "Memory Management",
"duration": 3,
"subtopics": [
{
"id": 7,
"name": "Logical And Physical Memory",
"homework_id": 101,
"assignment_id": 101,
"on_free_trial": null
}
],
"description": "Logical and Physical Memory, Paging, Fragmentation",
"sequence_no": 2,
"assessment_id": null
}
]
},
{
"id": 3,
"name": "Python",
"duration": 4,
"description": "Basics of python language.",
"topics": [
{
"id": 3,
"name": "Python list",
"duration": 1,
"subtopics": null,
"description": "Basics of python.",
"sequence_no": 1,
"assessment_id": null
}
]
}
]
}
As one can see the topic is getting repeated because of assessment_id. I want it to be structured in form of an array Or array of objects as subtopics are.
You forgot to use the assessments in the json_build_object as you did with the subtopics:
select courses.course_id as id, courses.course_name as name, courses.course_duration as
duration, courses.course_description as description,
jsonb_agg(
jsonb_build_object(
'id', topics.topic_id,
'name', topics.topic_name,
'duration', topics.topic_duration,
'sequence_no', topics.sequence_no,
'description', topics.topic_description,
'subtopics', subtopic.subtopics,
'assessments', assessments.assessments
)
order by topics.topic_id
) topics
...

PostgreSQL json_build_object nested

First things first:
I'm using PostgreSQL 11.6, compiled by Visual C++ build 1800, 64-bit. :)
Im trying to create a JSON object directly from the database.
My desired result is
{
"1": [],
"2": [],
"3": []
}
Imagine my tables like:
MyIdTable
_id_|__key__
1 test1
2 test2
3 test3
MyKeyValueTable
__id__|__fkidmyidtable__|__value__
1 1 test
2 1 test1
3 2 test2
4 2 test3
Now I create a query
select
json_build_object(
a.id,
json_agg(
b.*
)
)
from "MyIdTable" a
inner join "MyKeyValueTable" b on a.id = b.fkidmyidtable group by a.id
This will get me as result, multiple rows with the desired result:
row 1: {
"1": [{ "id": 1, "fkidmyidtable": 1, "value": "test" }, { "id": 2, "fkidmyidtable": 1, "value": "test1" }]
}
row 2: {
"2": [{ "id": 3, "fkidmyidtable": 2, "value": "test2" }, { "id": 4, "fkidmyidtable": 2, "value": "test3" }]
}
After this I can use json_agg() to create almost my desired result. The issue is that it will create
[ { "json_build_object": {"1": [{ "id": 1, "fkidmyidtable": 1, "value": "test" }, { "id": 2, "fkidmyidtable": 1, "value": "test1" }]}, "json_build_object": { "2": [{ "id": 3, "fkidmyidtable": 2, "value": "test2" }, { "id": 4, "fkidmyidtable": 2, "value": "test3" }] }]
I would like to know if its possible to write a query to merge my created object into one json object like:
{
"1": [{ "id": 1, "fkidmyidtable": 1, "value": "test" }, { "id": 2, "fkidmyidtable": 1, "value": "test1" }],
"2": [{ "id": 3, "fkidmyidtable": 2, "value": "test2" }, { "id": 4, "fkidmyidtable": 2, "value": "test3" }]
}
Thank you very much in advance for taking the time to read :)!
If I followed you correctly, you can add another level of aggregation and use json_object_agg():
select json_object_agg(id, js) res
from (
select a.id, json_agg(b.*) js
from "MyIdTable" a
inner join "MyKeyValueTable" b on a.id = b.fkidmyidtable
group by a.id
) t

How to limit results of preload of gorm

type Item struct {
TopicId int `json:"topic_id"`
Topic *Topic `json:"topic,omitempty"`
BotId int `json:"bot_id"`
URL string `gorm:"varchar(250);unique" json:"url"`
Title string `gorm:"varchar(250)" json:"title"`
}
type Topic struct {
Title string `gorm:"varchar(250)" json:"title"`
Items []*Item `json:"items,omitempty"`
}
Here is the two models. I want to query the Topics with each have 5 latest items.
Without the limit for items, I could do this by,db.Model(&Topic{}).Preload("Items").
When I try to add some limit conditions to items:
db.Model(&Topic{}).Preload("Items", func(db *gorm.DB) *gorm.DB {
return db.Order("title DESC").Limit(5)
})
It will return 5 items in total, not 5 items for each topic.
Actual result:
"records": [
{
"id": 4,
"created_on": "2019-08-11T10:28:54.910022Z",
"title": "Topic 1",
},
{
"id": 5,
"created_on": "2019-08-11T10:29:26.952614Z",
"title": "Programming",
},
{
"id": 6,
"created_on": "2019-08-11T10:34:16.040229Z",
"title": "Topic 3",
"items": [
{
"id": 1,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 6,
"title": "Title One",
},
......
{
"id": 5,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 6,
"title": "Title five",
}
]
Expected results:
"records": [
{
"id": 4,
"created_on": "2019-08-11T10:28:54.910022Z",
"title": "Topic 1",
},
{
"id": 5,
"created_on": "2019-08-11T10:29:26.952614Z",
"title": "Programming",
"items": [
{
"id": 6,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 5,
"title": "Title six",
},
......
{
"id": 10,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 5,
"title": "Title ten",
}]
},
{
"id": 6,
"created_on": "2019-08-11T10:34:16.040229Z",
"title": "Topic 3",
"items": [
{
"id": 1,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 6,
"title": "Title One",
},
......
{
"id": 5,
"created_on": "2019-08-27T14:23:17.766055Z",
"topic_id": 6,
"title": "Title five",
}
]
The actual sql it generated is SELECT * FROM "item" WHERE "topic_id" IN (6,4,5) DESC LIMIT 5
It's obvious not the results I want, so how should I get the expected result with gorm?
For postgresql only.
type Topic struct {
Title string `gorm:"varchar(250);PRIMARY KEY" json:"title"`
// assume the foreign key between two tables are both Title.
Items []*Item `gorm:"foreignkey:Title;association_foreignkey:Title" json:"items,omitempty"`
}
var topics []Topic
db.Model(&Topic{}).Preload("Items", func(tx *gorm.DB) *gorm.DB {
return tx.Joins(`JOIN LATERAL (
SELECT i.url FROM items i WHERE i.title = items.title ORDER BY i.topic_id DESC LIMIT 5
) AS foo ON foo.url = items.url`)
}).Find(&topics)
You could use lateral join to limit the rows for each different value. After retrieving the rows of topics, gorm then send the following query to get the related rows from items:
SELECT "items".*
FROM "items"
JOIN LATERAL
(SELECT i.url
FROM items i
WHERE i.title = items.title
ORDER BY i.topic_id DESC
LIMIT 5) AS foo ON foo.url = items.url
WHERE ("title" IN (?))

Nested Join with aggregation in posgres

In my DB, there are two tables
EventType
ID (Primary key)
Name
ActivityType
ID (Primary key)
Name
EventTypeID (foreign key)
ParentActivityTypeID (Relation with self ID)
I have tried with the following query to aggregate the json
SELECT coalesce(json_build_object(
'EventTypeID', ev."ID",
'EventTypeName', ev."Name",
'ActivityType', json_agg(json_build_object('ID',ac."ID",'Name',ac."Name",'ParentActivityType',json_agg(select * from "Activity" where ))
), '{}'::json) AS item
FROM "EventType" as ev
JOIN "ActivityType" as ac ON ev."ID" = ac."EventTypeID"
GROUP BY ev."ID"
expected JSON output
[{
"EventTypeID": 2,
"EventTypeName": "On-Site Care",
"ActivityType": [
{
"ID": 1,
"Name": "Measurement",
"EventTypeID": 2,
"ParentActivityTypeID": null,
"SubActivityType": [
{
"ID": 17,
"Name": "abc",
"EventTypeID": 2,
"ParentActivityTypeID": 1
}
]
},
{
"ID": 2,
"Name": "Medication",
"EventTypeID": 2,
"ParentActivityTypeID": null
},
{
"ID": 3,
"Name": "Wellness check",
"EventTypeID": 2,
"ParentActivityTypeID": null
},
{
"ID": 4,
"Name": "Other",
"EventTypeID": 2,
"ParentActivityTypeID": null
}
]
},
{
"EventTypeID": 3,
"EventTypeName": "Care Call",
"ActivityType": [
{
"ID": 1,
"Name": "Measurement",
"EventTypeID": 3,
"ParentActivityTypeID": null,
"SubActivityType": [
{
"ID": 17,
"Name": "abc",
"EventTypeID": 3,
"ParentActivityTypeID": 1
}
]
},
{
"ID": 2,
"Name": "Medication",
"EventTypeID": 3,
"ParentActivityTypeID": null
},
{
"ID": 3,
"Name": "Wellness check",
"EventTypeID": 3,
"ParentActivityTypeID": null
},
{
"ID": 4,
"Name": "Other",
"EventTypeID": 3,
"ParentActivityTypeID": null
}
]
}
]
You can join self table as parent described as below.
SELECT coalesce(json_build_object(
'EventTypeID', ev."ID",
'EventTypeName', ev."Name",
'ActivityType', json_agg(json_build_object('ID',ac."ID",'Name',ac."Name",'ParentActivityType',json_agg(parent.*))
), '{}'::json) AS item
FROM "EventType" as ev
LEFT JOIN "EventType" as parent ON ev."ParentActivityTypeID" = parent."ID"
JOIN "ActivityType" as ac ON ev."ID" = ac."EventTypeID"
GROUP BY ev."ID"

ElasticSearch - return the complete value of a facet for a query

I've recently started using ElasticSearch. I try to complete some use cases. I have a problem for one of them.
I have indexed some users with their full name (e.g. "Jean-Paul Gautier", "Jean De La Fontaine").
I try to get all the full names responding to some query.
For example, I want the 100 most frequent full names beggining by "J"
{
"query": {
"query_string" : { "query": "full_name:J*" } }
},
"facets":{
"name":{
"terms":{
"field": "full_name",
"size":100
}
}
}
}
The result I get is all the words of the full names : "Jean", "Paul", "Gautier", "De", "La", "Fontaine".
How to get "Jean-Paul Gautier" and "Jean De La Fontaine" (all the full_name values begging by 'J') ? The "post_filter" option is not doing this, it only restrict this above subset.
I have to configure "how works" this full_name facet
I have to add some options to this current query
I have to do some "mapping" (very obscure for the moment)
Thanks
You just need to set "index": "not_analyzed" on the field, and you will be able to get back the full, unmodified field values in your facet.
Typically, it's nice to have one version of the field that isn't analyzed (for faceting) and another that is (for searching). The "multi_field" field type is useful for this.
So in this case, I can define a mapping as follows:
curl -XPUT "http://localhost:9200/test_index/" -d'
{
"mappings": {
"people": {
"properties": {
"full_name": {
"type": "multi_field",
"fields": {
"untouched": {
"type": "string",
"index": "not_analyzed"
},
"full_name": {
"type": "string"
}
}
}
}
}
}
}'
Here we have two sub-fields. The one with the same name as the parent will be the default, so if you search against the "full_name" field, Elasticsearch will actually use "full_name.full_name". "full_name.untouched" will give you the facet results you want.
So next I add two documents:
curl -XPUT "http://localhost:9200/test_index/people/1" -d'
{
"full_name": "Jean-Paul Gautier"
}'
curl -XPUT "http://localhost:9200/test_index/people/2" -d'
{
"full_name": "Jean De La Fontaine"
}'
And then I can facet on each field to see what is returned:
curl -XPOST "http://localhost:9200/test_index/_search" -d'
{
"size": 0,
"facets": {
"name_terms": {
"terms": {
"field": "full_name"
}
},
"name_untouched": {
"terms": {
"field": "full_name.untouched",
"size": 100
}
}
}
}'
and I get back the following:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"facets": {
"name_terms": {
"_type": "terms",
"missing": 0,
"total": 7,
"other": 0,
"terms": [
{
"term": "jean",
"count": 2
},
{
"term": "paul",
"count": 1
},
{
"term": "la",
"count": 1
},
{
"term": "gautier",
"count": 1
},
{
"term": "fontaine",
"count": 1
},
{
"term": "de",
"count": 1
}
]
},
"name_untouched": {
"_type": "terms",
"missing": 0,
"total": 2,
"other": 0,
"terms": [
{
"term": "Jean-Paul Gautier",
"count": 1
},
{
"term": "Jean De La Fontaine",
"count": 1
}
]
}
}
}
As you can see, the analyzed field returns single-word, lower-cased tokens (when you don't specify an analyzer, the standard analyzer is used), and the un-analyzed sub-field returns the unmodified original text.
Here is a runnable example you can play with:
http://sense.qbox.io/gist/7abc063e2611846011dd874648fd1b77450b19a5
Try altering the mapping for "full_name":
"properties": {
"full_name": {
"type": "string",
"index": "not_analyzed"
}
...
}
not_analyzed means that it will be kept as is, capitals, spaces, dashes etc, so that "Jean De La Fontaine" will stay findable and not be tokenized into "Jean" "De" "La" "Fontaine"
You can experiment with different analyzers using the api
Notice what the standard one does to a mulit part name:
GET /_analyze?analyzer=standard
{'Jean Claude Van Dame'}
{
"tokens": [
{
"token": "jean",
"start_offset": 2,
"end_offset": 6,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "claude",
"start_offset": 7,
"end_offset": 13,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "van",
"start_offset": 14,
"end_offset": 17,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "dame",
"start_offset": 18,
"end_offset": 22,
"type": "<ALPHANUM>",
"position": 4
}
]
}