Elasticsearch to SQL equivalent, get the first and last documents with filter - sql

I have an issue with elasticsearch documents. So the goal is I need to get the latest data close from index stocks and also the last data close from stocks within a specified date. Here's the example of the SQL equivalent:
SELECT (
SELECT `close` FROM `stocks` WHERE date >= 2022-06-01 ORDER by date DESC LIMIT 1
) as last_close,
(
SELECT `close` FROM `stocks` ORDER by date ASC LIMIT 1
) as latest_close FROM stocks
That's the goal I need to achieve, for the rest I think I don't need to share because the bottleneck is on that issue.
Edited: This is the mappings on my index stocks in elasticsearch:
{
"stocks": { -
"mappings": { -
"properties": { -
"avg": { -
"type": "double"
},
"board": { -
"type": "text"
},
"book": { -
"type": "double"
},
"change": { -
"type": "double"
},
"chg": { -
"type": "double"
},
"close": { -
"type": "double"
},
"date": { -
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
},
"der": { -
"type": "double"
},
"eps": { -
"type": "double"
},
"fve": { -
"type": "double"
},
"fvei": { -
"type": "double"
},
"group": { -
"type": "keyword"
},
"high": { -
"type": "double"
},
"low": { -
"type": "double"
},
"open": { -
"type": "double"
},
"paid_up_cap_shares": { -
"type": "text",
"fields": { -
"keyword": { -
"type": "keyword",
"ignore_above": 256
}
}
},
"pbv": { -
"type": "double"
},
"peg_analysis": { -
"type": "text"
},
"peg_ratio": { -
"type": "double"
},
"per": { -
"type": "double"
},
"prev": { -
"type": "double"
},
"roe": { -
"type": "double"
},
"stock": { -
"type": "keyword"
},
"trade_freq": { -
"type": "double"
},
"trade_val": { -
"type": "double"
},
"trade_vol": { -
"type": "double"
}
}
}
}
}
And here's the document example for the index:
{
"_index": "stocks",
"_id": "6odITIEBRQt2Zq4UUGu3",
"_score": 1.0,
"_source": {
"date": "2022-06-10 13:23:36",
"fvei": "112833.74",
"pbv": "0.0",
"prev": "97",
"book": "-1.185716459E8",
"roe": "-7.87",
"der": "-6.87",
"high": "91",
"avg": "91.0",
"fve": "91.0",
"low": "91",
"stock": "WINR",
"per": "0.0",
"close": "91",
"trade_vol": "46283600",
"group": "IDXPROPERT",
"paid_up_cap_shares": "0.1",
"trade_val": "4211807600",
"chg": "-6.59",
"change": "-6.0",
"peg_ratio": "0.0",
"eps": "9327072.3",
"trade_freq": "1433",
"peg_analysis": "negative growth",
"board": "RG",
"open": "91"
}
},
{
"_index": "stocks",
"_id": "7IdITIEBRQt2Zq4UUGu3",
"_score": 1.0,
"_source": {
"date": "2022-06-10 13:23:36",
"fvei": "66215.12",
"pbv": "0.0",
"prev": "685",
"book": "1946574.58",
"roe": "22.08",
"der": "3.62",
"high": "685",
"avg": "677.5",
"fve": "680.0",
"low": "670",
"stock": "TLDN",
"per": "0.0",
"close": "680",
"trade_vol": "577600",
"group": "IDXNONCYC",
"paid_up_cap_shares": "540.65",
"trade_val": "393001500",
"chg": "-0.73",
"change": "-5.0",
"peg_ratio": "0.0",
"eps": "429847.27",
"trade_freq": "117",
"peg_analysis": "negative growth",
"board": "RG",
"open": "685"
}
},

I have found the answer by using top_hits:
{
"query": {
"bool": {
"must": [
{
// range filter
"range": {
"date": {
"gte": "now-89d",
"lte": "now+1d"
}
}
}
]
}
},
"aggs": {
"group": {
"terms": {
"field": "stock"
},
"aggs": {
"last_doc": {
"top_hits": {
"size": 1,
"sort": [
{
"date": {
"order": "desc"
}
}
],
"_source": {
"includes": ["close"]
}
}
},
"first_doc": {
"top_hits": {
"size": 1,
"sort": [
{
"date": {
"order": "asc"
}
}
],
"_source": {
"includes": ["close"]
}
}
}
}
}
}
}

Related

How to set Datatype in Additional Column in ADF

I need to set datatype for Additional Column with Dynamic Content in Sink in ADF
By default its taking nvarchar(max) from Json obj but I need bigInt
Below is a Json Obj which create table with Additional column
{
"source": {
"type": "SqlServerSource",
"additionalColumns": [
{
"name": "ApplicationId",
"value": 3604509277250831000
}
],
"sqlReaderQuery": "SELECT * from Table A",
"queryTimeout": "02:00:00",
"isolationLevel": "ReadUncommitted",
"partitionOption": "None"
},
"sink": {
"type": "AzureSqlSink",
"writeBehavior": "insert",
"sqlWriterUseTableLock": false,
"tableOption": "autoCreate",
"disableMetricsCollection": false
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"typeConversion": true,
"typeConversionSettings": {
"allowDataTruncation": true,
"treatBooleanAsNumber": false
}
}
}
ADF Configuration
After create table Database - column with datatype
If I convert Dynamic content into Int
#int(pipeline().parameters.application.applicationId)
Then getting below warning
Please let me know how can I set Datatype in ADF
I also tried the same and getting same result.
By default its taking nvarchar(max) from Json obj but I need bigInt
To resolve this when you add additional column in your source data set and in Mapping click onimport schema it will import the schema of the source and also give you additional column in schema you have to change the type of the column as Int64 as shown in below image. in below image you can see after name there is additional means it is an additional column.
After this run your pipeline, It will create additional column with data type bigint .
{
"name": "pipeline2",
"properties": {
"activities": [
{
"name": "Copy data1",
"type": "Copy",
"dependsOn": [],
"policy": {
"timeout": "0.12:00:00",
"retry": 0,
"retryIntervalInSeconds": 30,
"secureOutput": false,
"secureInput": false
},
"userProperties": [],
"typeProperties": {
"source": {
"type": "JsonSource",
"additionalColumns": [
{
"name": "name",
"value": {
"value": "#pipeline().parameters.demo.age",
"type": "Expression"
}
}
],
"storeSettings": {
"type": "AzureBlobFSReadSettings",
"recursive": true,
"enablePartitionDiscovery": false
},
"formatSettings": {
"type": "JsonReadSettings"
}
},
"sink": {
"type": "AzureSqlSink",
"writeBehavior": "insert",
"sqlWriterUseTableLock": false,
"tableOption": "autoCreate",
"disableMetricsCollection": false
},
"enableStaging": false,
"translator": {
"type": "TabularTranslator",
"mappings": [
{
"source": {
"path": "$['taskId']"
},
"sink": {
"name": "taskId",
"type": "String"
}
},
{
"source": {
"path": "$['taskObtainedScore']"
},
"sink": {
"name": "taskObtainedScore",
"type": "String"
}
},
{
"source": {
"path": "$['multiInstance']"
},
"sink": {
"name": "multiInstance",
"type": "String"
}
},
{
"source": {
"path": "$['name']"
},
"sink": {
"name": "name",
"type": "Int64"
}
}
],
"collectionReference": ""
}
},
"inputs": [
{
"referenceName": "Json1",
"type": "DatasetReference"
}
],
"outputs": [
{
"referenceName": "AzureSqlTable1",
"type": "DatasetReference"
}
]
}
],
"parameters": {
"demo": {
"type": "object",
"defaultValue": {
"name": "John",
"age": 30,
"isStudent": true
}
}
},
"annotations": []
}
}
OUTPUT:

Equivalent to select distinct order by on Elasticsearch

I have an index called datalake, which contains records for stock data. My goal is to select the distinct stock and then order by date descending equivalent to this SQL
SELECT DISTINCT ON (stock) * FROM datalake ORDER BY stock, date DESC
Here are the settings of my index
{
"settings": {
"index": {
"number_of_shards": 3,
"number_of_replicas": 2
}
},
"mappings": {
"properties": {
"stock": {
"type": "keyword"
},
"group": {
"type": "text"
},
"paid_up_cap_shares": {
"type": "double"
},
"market_cap_share": {
"type": "double"
},
"book": {
"type": "double"
},
"eps": {
"type": "double"
},
"der": {
"type": "double"
},
"roe": {
"type": "double"
},
"per": {
"type": "double"
},
"pbv": {
"type": "double"
},
"avg": {
"type": "double"
},
"chg": {
"type": "double"
},
"fve": {
"type": "double"
},
"fvei": {
"type": "double"
},
"peg_ratio": {
"type": "double"
},
"npmpct": {
"type": "double"
},
"peg_analys": {
"type": "text"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"roa": {
"type": "double"
},
"total_equity": {
"type": "double"
},
"bvps": {
"type": "double"
},
"prev": {
"type": "double"
},
"trade_val": {
"type": "double"
},
"trade_vol": {
"type": "double"
},
"trade_freq": {
"type": "double"
},
"open": {
"type": "double"
},
"close": {
"type": "double"
},
"high": {
"type": "double"
},
"low": {
"type": "double"
},
"change": {
"type": "double"
}
}
}
}
And here's the query I tried to approach:
{
"aggs": {
"unique_names": {
"terms": {
"field": "stock"
}
},
"aggs": {
"bucket_sort": {
"sort": [
{
"date": {
"order": "desc"
}
}
]
}
}
}
}
but the result is always gave me errors like this:
{
"error": {
"root_cause": [
{
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: No aggregation found for path [date];"
}
],
"type": "action_request_validation_exception",
"reason": "Validation Failed: 1: No aggregation found for path [date];"
},
"status": 400
}
I'm trying to get the result from index datalake with distinct stock and ordered by date in descending result, how can I add the date field into aggregation without using aggregation for example sum, avg, etc.

Compare two structs in BigQuery recursively ignoring key order

Let's say I have a complex struct, perhaps with ten levels of nesting and repeated fields. Is there a built-in way to compare these two objects to see if they are the same minus the sorting of keys? This may be related to: Compare two json values for equality. An example might be:
{
"id": "0001",
"type": "donut",
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "type": "Powdered Sugar" , "id": "5007"},
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
],
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
}
}
Versus:
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "type": "None", "id": "5001" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
From the previous question, we learnt that JSON type allows such a comparison, then it is just a matter of how to use JSON type as a proxy to compare the 2 structs
with data as (
select struct<a string, b struct<x string, y string>>('a', ('x', 'y')) col1,
struct<b struct<y string, x string>, a string>(('y', 'x'), 'a') col2,
) select col1,
col2,
TO_JSON_STRING(PARSE_JSON(TO_JSON_STRING(col1))) = TO_JSON_STRING(PARSE_JSON(TO_JSON_STRING(col2)))
from data;

Generate JSON Schema with nested dependencies

I'm trying to generate a JSON schema with nested dependencies via https://rjsf-team.github.io/react-jsonschema-form/, here's what I came up with:
{
"type": "object",
"title": "Jira schema",
"properties": {
"summary": {
"type": "string"
},
"description": {
"type": "string"
},
"project": {
"type": "string",
"enum": [
"BE",
"FE"
],
"enumNames": [
"Backend Sprint",
"Frontend Sprint"
],
"default": "BE"
}
},
"required": ["project"],
"dependencies": {
"project": {
"oneOf": [
{
"properties": {
"project": {
"enum": ["BE"]
},
"issuetype": {
"enum": ["10001", "10002"],
"enumNames": ["Task", "Story"],
"default": "10001"
}
},
"required": ["issuetype"]
},
{
"properties": {
"project": {
"enum": ["FE"]
},
"issuetype": {
"enum": ["10003", "10004"],
"enumNames": ["Epic", "Bug"],
"default": "10003"
}
},
"required": ["issuetype"]
}
]
},
"issuetype": {
"oneOf": [
{
"properties":
{
"issuetype": {
"enum": ["10001"],
"enumNames": ["Task"]
},
"priority": {
"enum": ["1", "2", "3"],
"enumNames": ["High", "Medium", "Low"],
"default": "2"
}
}
},
{
"properties":
{
"issuetype": {
"enum": ["10002"],
"enumNames": ["Story"]
},
"priority": {
"enum": ["2", "3"],
"enumNames": ["Medium", "Low"],
"default": "2"
}
}
},
{
"properties":
{
"issuetype": {
"enum": ["10003"],
"enumNames": ["Epic"]
},
"priority": {
"enum": ["3"],
"enumNames": ["Low"],
"default": "3"
}
}
},
{
"properties":
{
"issuetype": {
"enum": ["10004"],
"enumNames": ["Bug"]
},
"priority": {
"enum": ["2", "3"],
"enumNames": ["Medium", "Low"],
"default": "2"
}
}
}
]
}
}
}
Ideally, when I select a project, both issuetype and priority should be updated, same applies to issuetype - when an issuetype is selected, priority should be updated.
Currently, I'm able to update priority by updating issuetype,not by updating project.
Any thoughts/ideas is highly appreciated!

How to insert mant-to-many object

I have two structures very similar to pests-owners, this is my structure:
{
"name": "Contratti",
"fields": {
"rischicontratti": {
"collection": "RischiContratti",
"via": "Contratti"
},
"intervento": {
"collection": "Interventi",
"via": "Contratto"
},
"Numero": {
"type": "string"
},
"Impresa": {
"object": "Imprese"
},
"Oggetto": {
"type": "string"
}
},
{
"name": "RischiContratti",
"fields": {
"Contratti": {
"object": "Contratti"
},
"Rischio": {
"object": "Rischi"
}
}
},
{
"name": "Rischi",
"fields": {
"rischicontratti": {
"collection": "RischiContratti",
"via": "Rischio"
},
"rischi_interventi": {
"collection": "rischi_interventi",
"via": "Rischio"
},
"Rischio": {
"type": "string"
}
}
When I'm inserting a Contratto I want to also insert all related Rischi. Is there a way to post all in one call? Or do I have to post first the Contratto and then loop each Rischi?