How do i change MongoDB JSON data to array - mongodb-query

I need to update the MongoDB field with the array of objects where JSON object to be updated with as an array
if I have something like this in MongoDB
"designSectionContents" : [
{
"_id" : "5bae17ecbd7595540145ec98",
"type" : "subSection",
"columns" : [
{
"0" : {
"itemId" : "5b7465980783d9a37058f160",
"type" : "field"
}
},
{
"0" : {
"itemId" : "5b7465630783d9a37058f15c",
"type" : "field"
}
},
{
"0" : {
"itemId" : "5b7465810783d9a37058f15e",
"type" : "field"
}
}
],
"subSectionContentLayout" : {
"labelPlacement" : "Top",
"columns" : 3
}
}
]
I want to change the above snippet to below in MongoDB
"designSectionContents" : [
{
"_id" : ObjectId("5bae17ecbd7595540145ec98"),
"type" : "subSection",
"columns" : [
[
{
"itemId" : "5b7465980783d9a37058f160",
"type" : "field"
}
],
[
{
"itemId" : "5b7465630783d9a37058f15c",
"type" : "field"
}
],
[
{
"itemId" : "5b7465810783d9a37058f15e",
"type" : "field"
}
]
]
}
]
curly braces opening and closing tag has to be changed to array

This should work:
db.collection.aggregate([
{
"$project": {
"designSectionContents": {
"$map": {
"input": "$designSectionContents",
"as": "designSectionContent",
"in": {
"_id": "$$designSectionContent._id",
"type": "$$designSectionContent.type",
"columns": {
"$map": {
"input": "$$designSectionContent.columns",
"as": "inp",
"in": [
"$$inp.0"
]
}
}
}
}
}
}
}
]);
Here's the working link.

Related

mapper_parsing_exception in Elasticsearch(Reason: No type specified for field [X])

I wanted to provide explicit mapping to the fields in my document, So I defined a mapping for my index demo and It looks like this below:
PUT /demo
{
"mappings": {
"properties": {
"X" : {
"X" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Sub_X" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
After running the query , I am getting error as :
{
"error" : {
"root_cause" : [
{
"type" : "mapper_parsing_exception",
"reason" : "No type specified for field [X]"
}
],
"type" : "mapper_parsing_exception",
"reason" : "Failed to parse mapping [_doc]: No type specified for field [X]",
"caused_by" : {
"type" : "mapper_parsing_exception",
"reason" : "No type specified for field [X]"
}
},
"status" : 400
}
The field X in json document looks like :
"X" : {
"X" : [
"a"
],
"Sub_X" : [
[
"b"
]
]
},
Please help me out with this elastic search mapper_parse_exception error.
What you have is called nested data type
You have X which in turn contains X and Sub_X.
Mapping:
{
"properties": {
"X": {
"type": "nested"
}
}
}
Data:
{
"X": {
"X": [
"a"
],
"Sub_X": [
[
"b"
]
]
}
}
Query:
{
"query": {
"nested": {
"path": "X",
"query": {
"bool": {
"must": [
{ "match": { "X.X": "a" }},
{ "match": { "X.Sub_X": "b" }}
]
}
}
}
}
}
It outputs the document.

How to load Nested json data into a single column in druid

I am trying to load nested json data in Apache druid:
Data-->
{
"a": "a_data",
"b": "b_data",
"c_blob_Column": {"aaaa"{"k":"sample"{"c":"sample2"}}}}
Spec -->
{ "type" : "kafka", "dataSchema" : { "dataSource" : "blob", "parser" : { "type" : "string", "parseSpec" : { "format" : "json", "dimensionsSpec" : { "dimensions" : [ "a", "b", "c_blob_Column"
]
},
"timestampSpec": {
"column": "timestamp",
"format": "iso"
}
}
},
"metricsSpec" : [],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "DAY",
"queryGranularity" : "none",
"rollup" : false
}
},
"ioConfig" : {
"topic":"blob_topic",
"consumerProperties":{
"bootstrap.servers":"<local server>"
},
"appendToExisting" : false,
"useEarliestOffset": true,
"taskDuration": "PT15M"
},
"tuningConfig" : {
"type" : "kafka",
"maxRowsPerSegment" : 5000000,
"maxRowsInMemory" : 25000
}
}
Output columns-->
a,b,c_blob_Column,__time
I am able to load the data but the issue is in the column c_blob_Column the data is not coming as in json form data Could someone please help me to find how to load the json blob data?
you can use jq expression:
"flattenSpec": {
"fields": [
{
"type": "jq",
"name": "c_blob_Column",
"expr": ".c_blob_Column | tojson"
}
]
}

Morphline config file not indexing avro nexted data

I am generating index for my avro data in solr. Index are only getting generated for data elements which are at root level and not which are nested.
Below is the sample schema (not including all of it)
My Avro Schema is as below.
{
"type" : "record",
"name" : "abcd",
"namespace" : "xyz",
"doc" : "Schema Definition for Low Fare Search Shopping Request/Response Data",
"fields" : [ {
"name" : "ShopID",
"type" : "string"
}, {
"name" : "RqSysTimestamp",
"type" : [ "null", "string" ],
"default" : null
}, {
"name" : "RqTimestamp",
"type" : [ "null", "string" ],
"default" : null
}, {
"name" : "RsSysTimestamp",
"type" : [ "null", "string" ],
"default" : null
}, {
"name" : "RsTimestamp",
"type" : [ "null", "string" ],
"default" : null
}, {
"name" : "Request",
"type" : {
"type" : "record",
"name" : "RequestStruct",
"fields" : [ {
"name" : "TransactionID",
"type" : [ "string", "null" ]
}, {
"name" : "AgentSine",
"type" : [ "string", "null" ]
}, {
"name" : "CabinPref",
"type" : [ {
"type" : "array",
"items" : {
"type" : "record",
"name" : "CabinStruct",
"fields" : [ {
"name" : "Cabin",
"type" : [ "string", "null" ]
}, {
"name" : "PrefLevel",
"type" : [ "string", "null" ]
} ]
}
}, "null" ]
}, {
"name" : "CountryCode",
"type" : [ "string", "null" ]
},
"name" : "PassengerStatus",
"type" : [ "string", "null" ]
}, {
}
How do i refer "TransactionID" in my morphline config file. I tried all options but it does not generate index for data elements which are nested.
Below is the sample of my morphline config file.
extractAvroPaths {
flatten : true
paths : {
ShopID : /ShopID
RqSysTimestamp : /RqSysTimestamp
RqTimestamp : /RqTimestamp
RsSysTimestamp :/RsSysTimestamp
RsTimestamp : /RsTimestamp
TransactionID : "/Request/RequestStruct/TransactionID"
AgentSine : "/Request/RequestStruct/AgentSine"
Cabin :/Cabin
PrefLevel :/PrefLevel
CountryCode :/CountryCode
FrequentFlyerStatus :/FrequentFlyerStatus
The toAvro command expects a java.util.Map as input on conversion to a nested Avro record. So this is my solution.
morphlines: [
{
id: convertJsonToAvro
importCommands: [ "org.kitesdk.**" ]
commands: [
# read the JSON blob
{ readJson: {} }
# java code
{
java {
imports : """
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.kitesdk.morphline.base.Fields;
import java.io.IOException;
import java.util.Set;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
"""
code : """
String jsonStr = record.getFirstValue(Fields.ATTACHMENT_BODY).toString();
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> map = null;
try {
map = (Map<String, Object>)mapper.readValue(jsonStr, Map.class);
} catch (IOException e) {
e.printStackTrace();
}
Set<String> keySet = map.keySet();
for (String o : keySet) {
record.put(o, map.get(o));
}
return child.process(record);
"""
}
}
# convert the extracted fields to an avro object
# described by the schema in this field
{ toAvro {
schemaFile: /etc/flume/conf/a1/like_user_event_realtime.avsc
} }
#{ logInfo { format : "loginfo: {}", args : ["#{}"] } }
# serialize the object as avro
{ writeAvroToByteArray: {
format: containerlessBinary
} }
]
}
]

MongoDB - How to make a query to get the average usage?

Here is how the documents in db look like.
/* 1 */
{
"_id" : 1,
"feat" : {
"processName": [
{
"value" : {
"value": "Process1"
}
}
],
"processUsage": [
{
"value" : {
"value": 23.21
}
}
]
}
}
/* 2 */
{
"_id" : 2,
"feat" : {
"processName": [
{
"value" : {
"value": "Process2"
}
}
],
"memoryUsage": [
{
"value" : {
"value": 2.411502e+05
}
}
]
}
}
/* 3 */
{
"_id" : 3,
"feat" : {
"processName": [
{
"value" : {
"value": "Process1"
}
}
],
"processUsage": [
{
"value" : {
"value": 67.42
}
}
]
}
}
/* 4 */
{
"_id" : 4,
"feat" : {
"processName": [
{
"value" : {
"value": "Process3"
}
}
],
"processUsage": [
{
"value" : {
"value": 39.97
}
}
]
}
}
/* 5 */
{
"_id" : 5,
"feat" : {
"processName": [
{
"value" : {
"value": "Process2"
}
}
],
"processUsage": [
{
"value" : {
"value": 21.05
}
}
]
}
}
Each process has entries with processUsage and memoryUsage. What I am interest in is the average processUsage. So, I'd like to ignore the entries with memoryUsage.
I tried $match + $group in an aggregate with $avg, but for each process I just got back as average 0.00000000.
Then I tried my luck with mapReduce using javascript, unfortunately it did not work out either.
Could someone just show me how to do that? By the way, I am using Robomongo 0.8.5
Edit:
The query looks like this:
db.database.aggregate([
{ $match : {"$feat.processUsage.value.value": {$gt : -1}
},
{
$group: {_id: "$feats.processName.value.value", average: {$avg:
"$feats.processUsage.value.value"}
}
])
You can use the following aggregate query:
db.test.aggregate(
[
{
$unwind : "$feat.processUsage"
},
{
$group: {
_id: "$feat.processName.value.value",
average: {$avg:"$feat.processUsage.value.value"}
}
}
]
)
Unwinding in the initial phase will let you filter documents that has processUsage key in document.
Result:
{ "_id" : [ "Process2" ], "average" : 21.05 }
{ "_id" : [ "Process3" ], "average" : 39.97 }
{ "_id" : [ "Process1" ], "average" : 45.315 }

SQL Where clause equivalent for Elastic Search

I am trying to create a aggregate results in elastic search but filter option is not working for me.
I can aggregate data without filter e.g.
select name , material ,sum(price)
from products group by name , material
curl -XGET 'http://localhost:9200/products/_search?pretty=true' -d'
{
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}'
but I am facing problems to write equivalent DSL query of :
select name , material ,sum(price)
from products
where material = "wood"
group by name , material
Should be something like this:
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"material": "wood"
}
}
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}
Use a filter if you know the exact value and do not need a match, else use a match query instead of the filtered query.
You can use match
{
"query": {
"bool": {
"must": [
{
"match": {
"material": "wood"
}
}
],
"filter": [
{
"match_all": {}
},
]
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}