How can I define Avro Schema to catch keys which may come or may not?

How can I define Avro Schema to catch keys which may come or may not? - schema

I'm trying to automatize a task in NiFi, where I have n possible records.
{
"id":"foo",
"date":"2020-06-24",
"key_1":
{
"bar":"value1"
}
}
Other example:
{
"id":"foo",
"date":"2020-06-24",
"key_2":
{
"bar":"value2"
}
}
And so on. What I'm pretending to do is use MergeRecord to mix these values by id and date like that:
[
{
"id":"foo",
"date":"2020-06-24",
"key_1":
{
"bar":"value1"
}
},
{
"id":"foo",
"date":"2020-06-24",
"key_2":
{
"bar":"value2"
}
},
...
]
But this is what I have:
[
{
"id":"foo",
"date":"2020-06-24",
"key_1":
{
"bar":"value1"
},
"key_2":null,
"key_3":null,...
},
{
"id":"foo",
"date":"2020-06-24",
"key_1":null,
"key_2":
{
"bar":"value2"
}
},
"key_3":null,
...
]
This is my avro schema:
{"name":"foo",
"type":"record",
"fields":[
{"name":"id","type":"string"},
{"name":"date","type":"string"},
{"name":"key_1",
"type":["null",{
"name":"key_1", "type":"record",
"fields":[
{"name":"bar","type":"double"}
]
}]},
{"name":"key_2",
"type":["null",{
"name":"key_2", "type":"record",
"fields":[
{"name":"bar","type":"double"}
]
}]},
{"name":"key_3",
"type":["null",{
"name":"key_3", "type":"record",
"fields":[
{"name":"bar","type":"double"}
]
}]},
...
],
}
I don't know even if there is any way to say NiFi some keys may come or may not. Defining it as type null is my best shot

Nullable fields will be your best bet. It's really Avro that's doing most of the heavy lifting here. Having a sparse record is fine.

Related

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?

If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.

One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

How to issue ticket in Amadeus after flight-order request?

{
"data":{
"type":"flight-order",
"id":"eJzTd9cPijL1Cg8FAAuUAn0%3D",
"associatedRecords":[
{
"reference":"RZ5JWU",
"creationDate":"2022-01-13T05:40:00.000",
"originSystemCode":"GDS",
"flightOfferId":"1"
}
],
"flightOffers":[
{
"type":"flight-offer",
"id":"1",
"source":"GDS",
"nonHomogeneous":false,
"lastTicketingDate":"2022-03-31",
"itineraries":[
{
"segments":[
{
"departure":{
"iataCode":"ISB",
"at":"2022-03-30T01:40:00"
},
"arrival":{
"iataCode":"DXB",
"terminal":"1",
"at":"2022-03-31T03:55:00"
},
"carrierCode":"PK",
"number":"233",
"aircraft":{
"code":"320"
},
"operating":{
},
"id":"1",
"numberOfStops":0,
"co2Emissions":[
{
"weight":141,
"weightUnit":"KG",
"cabin":"ECONOMY"
}
]
}
]
}
],
"price":{
"currency":"PKR",
"total":"25235.00",
"base":"15190.00",
"fees":[
{
"amount":"0.00",
"type":"TICKETING"
},
{
"amount":"0.00",
"type":"SUPPLIER"
},
{
"amount":"0.00",
"type":"FORM_OF_PAYMENT"
}
],
"grandTotal":"25234.00",
"billingCurrency":"PKR"
},
"pricingOptions":{
"fareType":[
"PUBLISHED"
],
"includedCheckedBagsOnly":true
},
"validatingAirlineCodes":[
"PK"
],
"travelerPricings":[
{
"travelerId":"1",
"fareOption":"STANDARD",
"travelerType":"ADULT",
"price":{
"currency":"PKR",
"total":"25234.00",
"base":"15190.00",
"taxes":[
{
"amount":"5000.00",
"code":"RG"
},
{
"amount":"2000.00",
"code":"SP"
},
{
"amount":"2800.00",
"code":"YD"
},
{
"amount":"244.00",
"code":"ZR"
}
],
"refundableTaxes":"10044.00"
},
"fareDetailsBySegment":[
{
"segmentId":"1",
"cabin":"ECONOMY",
"fareBasis":"VLOWPK",
"class":"V",
"includedCheckedBags":{
"weight":30,
"weightUnit":"KG"
}
}
]
}
]
}
],
"travelers":[
{
"id":"1",
"dateOfBirth":"2003-01-03",
"gender":"FEMALE",
"name":{
"firstName":"Fakhar",
"lastName":"Khan"
},
"documents":[
{
"number":"AG324234234",
"issuanceDate":"2015-01-17",
"expiryDate":"2025-01-17",
"issuanceCountry":"PK",
"issuanceLocation":"Pakistan",
"nationality":"PK",
"documentType":"PASSPORT",
"holder":true
}
],
"contact":{
"purpose":"STANDARD",
"phones":[
{
"deviceType":"MOBILE",
"countryCallingCode":"92",
"number":"3452345678"
}
],
"emailAddress":"hamidafridi.droidor#gmail.com"
}
}
],
"remarks":{
"general":[
{
"subType":"GENERAL_MISCELLANEOUS",
"text":"ONLINE BOOKING FROM INCREIBLE VIAJES"
}
]
},
"ticketingAgreement":{
"option":"DELAY_TO_CANCEL",
"delay":"6D"
},
"contacts":[
{
"addresseeName":{
"firstName":"PABLO RODRIGUEZ"
},
"address":{
"lines":[
"Calle Prado, 16"
],
"postalCode":"28014",
"countryCode":"ES",
"cityName":"Madrid"
},
"purpose":"STANDARD",
"phones":[
{
"deviceType":"LANDLINE",
"countryCallingCode":"34",
"number":"480080071"
},
{
"deviceType":"MOBILE",
"countryCallingCode":"33",
"number":"480080072"
}
],
"companyName":"INCREIBLE VIAJES",
"emailAddress":"support#increibleviajes.es"
}
]
},
"dictionaries":{
"locations":{
"ISB":{
"cityCode":"ISB",
"countryCode":"PK"
},
"DXB":{
"cityCode":"DXB",
"countryCode":"AE"
}
}
}
}
I request for create-order then returned #PNR and now want to issue ticket. #Amadeus

As of now, this Flight Create Orders API allows you to book a flight and generate a PNR, but it does not allow for ticketing. Therefore, one of the requirements in order to use the API in production is to sign a contract with an airline consolidator to issue tickets.
Please check the requirements on the API page. If you want help to find a consolidator get in touch with us via the support channel and we can recommend you one.

Counting $lookup and $unwind documents filtered with $match without getting rid of parent document when all results match

I have a collection "Owners" and I want to return a list of "Owner" matching a filter (any filter), plus the count of "Pet" from the "Pets" collection for that owner, except I don't want the dead pets. (made up example)
I need the returned documents to look exactly like an "Owner" document with the addition of the "petCount" field because I'm using Java Pojos with the Mongo Java driver.
I'm using AWS DocumentDB that does not support $lookup with filters yet. If it did I would use this and I'd be done:
db.Owners.aggregate( [
{ $match: {_id: UUID("b13e733d-2686-4266-a686-d3dae6501887")} },
{ $lookup: { from: 'Pets', as: 'pets', 'let': { ownerId: '$_id' }, pipeline: [ { $match: { $expr: { $ne: ['$state', 'DEAD'] } } } ] } },
{ $addFields: { petCount: { $size: '$pets' } } },
{ $project: { pets: 0 } }
]).pretty()
But since it doesn't this is what I got so far:
db.Owners.aggregate( [
{ $match: {_id: { $in: [ UUID("cbb921f6-50f8-4b0c-833f-934998e5fbff") ] } } },
{ $lookup: { from: 'Pets', localField: '_id', foreignField: 'ownerId', as: 'pets' } },
{ $unwind: { path: '$pets', preserveNullAndEmptyArrays: true } },
{ $match: { 'pets.state': { $ne: 'DEAD' } } },
{ "$group": {
"_id": "$_id",
"doc": { "$first": "$$ROOT" },
"pets": { "$push": "$pets" }
}
},
{ $addFields: { "doc.petCount": { $size: '$pets' } } },
{ $replaceRoot: { "newRoot": "$doc" } },
{ $project: { pets: 0 } }
]).pretty()
This works perfectly, except if an Owner only has "DEAD" pets, then the owner doesn't get returned because all the "document copies" got filtered out by the $match. I'd need the parent document to be returned with petCount = 0 when ALL of them are "DEAD". I cannot figure out how to do this.
Any ideas?
These are the supported operations for DocDB 4.0 https://docs.amazonaws.cn/en_us/documentdb/latest/developerguide/mongo-apis.html

EDIT: update to use $filter as $reduce not supported by aws document DB
You can use $filter to keep only not DEAD pets in the lookup array, then count the size of the remaining array.
Here is the Mongo playground for your reference.
$reduce version
You can use $reduce in your aggregation pipeline to to a conditional sum for the state.
Here is Mongo playground for your reference.

As of January 2022, Amazon DocumentDB added support for $reduce, the solution posted above should work for you.
Reference.

How to implement group by in Dataweave based on first column in CSV

I have an incoming CSV file that looks like this (notice that the first field is common - this is the order number)
36319602,100,12458,HARVEY NORMAN,
36319602,101,12459,HARVEY NORMAN,
36319602,102,12457,HARVEY NORMAN,
36319601,110,12458,HARVEY NORMAN,
36319601,111,12459,HARVEY NORMAN,
36319601,112,12457,HARVEY NORMAN,
36319603,110,12458,HARVEY NORMAN,
36319603,121,12459,HARVEY NORMAN,
36319603,132,12457,HARVEY NORMAN,
This is my current Dataweave code
list_of_orders: {
order: payload map ((payload01 , indexOfPayload01) -> {
order_dtl:
[{
seq_nbr: payload01[1],
route_nbr: payload01[2]
}],
order_hdr: {
ord_nbr: payload01[0],
company: payload01[3],
city: payload01[4],
}
})
}
An example of the desired output would be something like this ... (this is just mocked up). Notice how I would like a single header grouped by the first column which is the order number - but with multiple detail lines
"list_of_orders": {
"order": [
{
"order_dtl": [
{
seq_nbr: 100,
route_nbr: 12458
},
{
seq_nbr: 101,
route_nbr: 12459
},
{
seq_nbr: 102,
route_nbr: 12457
}
],
"order_hdr":
{
ord_nbr: 36319602,
company: HARVEY NORMAN
}
}
]
}
It works fine except that it is repeating the order_hdr key.
What they would like is a single header key with multiple details beneath.
The grouping is to be based on "ord_nbr: payload01[0]"
Any help appreciated
Thanks

I think you're using Dataweave 1. In dw1, this groupBy gets the desired output(Note you can change the field pointers [0],1 etc to field name mappings if you have them set up as metadata etc):
%dw 1.0
%output application/json
---
list_of_orders: {
order: (payload groupBy ($[0])) map {
order_dtl: $ map {
seq_nbr: $[1],
route_nbr: $[2]
},
order_hdr:
{
ord_nbr: $[0][0],
company: $[0][3]
}
}}
UPDATE
Here is the output for the new input sample with multiple orders:
{
"list_of_orders": {
"order": [
{
"order_dtl": [
{
"seq_nbr": "110",
"route_nbr": "12458"
},
{
"seq_nbr": "121",
"route_nbr": "12459"
},
{
"seq_nbr": "132",
"route_nbr": "12457"
}
],
"order_hdr": {
"ord_nbr": "36319603",
"company": "HARVEY NORMAN"
}
},
{
"order_dtl": [
{
"seq_nbr": "100",
"route_nbr": "12458"
},
{
"seq_nbr": "101",
"route_nbr": "12459"
},
{
"seq_nbr": "102",
"route_nbr": "12457"
}
],
"order_hdr": {
"ord_nbr": "36319602",
"company": "HARVEY NORMAN"
}
},
{
"order_dtl": [
{
"seq_nbr": "110",
"route_nbr": "12458"
},
{
"seq_nbr": "111",
"route_nbr": "12459"
},
{
"seq_nbr": "112",
"route_nbr": "12457"
}
],
"order_hdr": {
"ord_nbr": "36319601",
"company": "HARVEY NORMAN"
}
}
]
}
}

opendaylight bgp-linkstate not making "loc-rib"

ODL version: Carbon
I'm having a problem with getting BGP-LS into the Network Topology. As you can see from below REST output, I set up "bgp-example" and homed to an external eBGP linkstate peer. "effective-rib-in", "adj-rib-in", and "adj-rib-out" all populate - but "loc-rib" does not. For some reason, it is not inheriting the linkstate afi/safi.
I tried debugs for bgp & karaf but saw nothing out of the ordinary (that I could see) - any help would be much appreciated.
thanks
Erik
*bgp configuration
http://192.168.3.42:8181/restconf/config/openconfig-network-instance:network-instances/network-instance/global-bgp/protocols/protocol/openconfig-policy-types:BGP/bgp-example
{
"protocol": [
{
"name": "bgp-example",
"identifier": "openconfig-policy-types:BGP",
"bgp-openconfig-extensions:bgp": {
"global": {
"config": {
"router-id": "192.168.3.42",
"as": 65000
}
},
"neighbors": {
"neighbor": [
{
"neighbor-address": "192.168.3.41",
"config": {
"peer-type": "EXTERNAL",
"peer-as": 65111
},
"afi-safis": {
"afi-safi": [
{
"afi-safi-name": "bgp-openconfig-extensions:LINKSTATE"
}
]
}
}
]
}
}
}
]
}
*loc-rib empty
http://192.168.3.42:8181/restconf/operational/bgp-rib:bgp-rib/rib/bgp-example/loc-rib
{
"loc-rib": {
"tables": [
{
"afi": "bgp-types:ipv4-address-family",
"safi": "bgp-types:unicast-subsequent-address-family",
"bgp-inet:ipv4-routes": {}
}
]
}
}
as you can see, linkstate is making it into every rib, except loc-rib
http://192.168.3.42:8181/restconf/operational/bgp-rib:bgp-rib/rib/bgp-example
{
"rib": [
{
"id": "bgp-example",
"peer": [
{
"peer-id": "bgp://x.x.x.x",
"supported-tables": [
{
"afi": "bgp-types:ipv4-address-family",
"safi": "bgp-types:unicast-subsequent-address-family"
},
{
"afi": "bgp-linkstate:linkstate-address-family",
"safi": "bgp-linkstate:linkstate-subsequent-address-family"
}
],
"effective-rib-in": {
"tables": [
{
"afi": "bgp-linkstate:linkstate-address-family",
"safi": "bgp-linkstate:linkstate-subsequent-address-family",
"bgp-linkstate:linkstate-routes": {
"linkstate-route": [
{
"route-key": "AAMAMAIAAAAAAAAFMgEAABoCAAAEAAD+VwIBAAQAAAAAAgMABgEAFQmQAAEJAAUgCv0YAQ==",
"identifier": 1330,
"advertising-node-descriptors": {
"as-number": 65111,
"domain-id": 0,
"isis-node": {
"iso-system-id": "AQAVCZAA"
}
},
"prefix-descriptors": {
"ip-reachability-information": "x.x.x.x/32"
},
"attributes": {
"origin": {
"value": "igp"
},
"ipv4-next-hop": {
"global": "x.x.x.x"
},
"as-path": {
"segments": [
{
"as-sequence": [
65111
]
}
]
}
},
"protocol-id": "isis-level2"
}
}
rest of output truncated for brevity/readability

OK, figured this out.... turns out I had not enabled LINKSTATE afi/safi in the global config for ODL BGP. I had to DELETE my existing global config, then POST, add neighbors, peers, etc. Now I have the linkstate DB in the loc-rib, AND it's made it to the network topology - BUT - no idea how to view this topology via DLUX....

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How can I define Avro Schema to catch keys which may come or may not? - schema

Nullable fields will be your best bet. It's really Avro that's doing most of the heavy lifting here. Having a sparse record is fine.

Related

Mongodb aggregation to find outliers

How to issue ticket in Amadeus after flight-order request?

Counting $lookup and $unwind documents filtered with $match without getting rid of parent document when all results match

How to implement group by in Dataweave based on first column in CSV

opendaylight bgp-linkstate not making "loc-rib"

Categories

Resources