MongoDB Aggregation Pipeline Optimization - mongodb

I have an aggregation pipeline as mentioned below. Considering that there can be hundreds of thousands of documents on which I want to apply this pipeline, I have questions on how it can be optimized. Below are my questions:
1.) What is the most optimal index that be applied to the match query. Should I have an index = myArray.tags.tagKey, myArray.tags.tagValue, myArray.someRules.count, myArray.entryTime OR should I create separate indexes for each of these fields. What is the best way to do this?
2.) If you see my pipeline, I am applying match operator twice to filter the documents: Once in the beginning of pipeline to get the benefit of my index. Second, I am using it after unwinding operation to filter the specific array elements of the array "myArray". Is there any way I can combine them. In other words, can array elements be filtered using $match aggregation operator?
Here is an example of my document:
{
"_id" : "8001409457639",
"myArray" : [
{
"entryID" : ObjectId("5101ab1116b12614fb083022"),
"requestID" : "adb0d2asdfbe10",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 0,
"somethingElse" : 11
},
"entryTime" : ISODate("2015-09-22T19:25:19.014Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101a111c6b12614fb083022"),
"requestID" : "fc057asdf16480",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T19:44:26.558Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
},
{
"entryID" : ObjectId("5101b111c6b12614fb083011"),
"requestID" : "40c7a0ads2dd8c2",
"owner" : "ABC",
"version" : 1,
"requestType" : "1",
"someRules" : {
"count" : 10,
"somethingElse" : 0
},
"entryTime" : ISODate("2015-09-22T20:24:15.347Z"),
"tags" : [
{
"tagKey" : "Owner",
"tagValue" : "ABC"
},
{
"tagKey" : "Request Type",
"tagValue" : "1"
}
]
}
],
"lockAcquiredBy" : "james",
"lockStartTime" : ISODate("2015-11-18T22:36:05.266Z")
}
This is my aggregation pipeline:
[{
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}, {
"$unwind": "$myArray"
}, {
"$match": {
"$and": [{
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "ABC"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Owner",
"tagValue": "DEF"
}
}
}]
}, {
"$or": [{
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "4"
}
}
}, {
"myArray.tags": {
"$elemMatch": {
"tagKey": "Request Type",
"tagValue": "Retry"
}
}
}]
}],
"myArray.someRules.count": 0,
"myArray.entryTime": {
"$gte": {
"$date": "2016-01-05T01:59:07.763Z"
}
}
}
}
// More steps in the pipeline.
]

Related

match element in the array with aggregation

i have mongo db collection the follwing structure
{
{
"_id" : ObjectId("63e37afe7a3453d5014c011b"),
"schemaVersion" : NumberInt(1),
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"List" : [
{
"countryId" : NumberInt(1),
"name" : "Afghanistan",
},{
"countryId" : NumberInt(1),
"name" : "India",
},
{
"countryId" : NumberInt(1),
"name" : "USA",
}
}
i need to match the value with id, id1, id2, collectionName and name in the list to get country id for example if match the below value
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"name" : "Afghanistan",
i need result
{
"countryId" : 1,
"name" : "Afghanistan",
}
i tried like below
db.country_admin.aggregate([
{ $match: { collectionName: "Country" } },
{ $unwind : '$countryList' },
{ $project : { _id : 0, 'countryList.name' : 1, 'countryList.countryId' : 1 } }
]).pretty()
and i have following output
[
{
"List" : {
"countryId" : 1.0,
"name" : "Afghanistan"
}
},
{
"List" : {
"countryId" : 2.0,
"name" : "india"
}
},
{
"List" : {
"countryId" : 3.0,
"name" : "USA"
}
}]```
You can try using $filter to avoid $unwind like this example:
First $match by your desired condition(s).
Then $filter and get the first element (as "List.name": "Afghanistan" is used into $match stage there will be at least one result).
And output only values you want using $project.
db.collection.aggregate([
{
"$match": {
"Id": ObjectId("63e37afe7a3453d5014c0112"),
"Id1": ObjectId("63e37afe7a3453d5014c0113"),
"Id2": ObjectId("63e37afe7a3453d5014c0114"),
"collectionName": "Country",
"List.name": "Afghanistan",
}
},
{
"$project": {
"country": {
"$arrayElemAt": [
{
"$filter": {
"input": "$List",
"cond": {
"$eq": [
"$$this.name",
"Afghanistan"
]
}
}
},
0
]
}
}
},
{
"$project": {
"_id": 0,
"countryId": "$country.countryId",
"name": "$country.name"
}
}
])
Example here
By the way, using $unwind is also possible and you can check this example

how to sort an array in a nested array which is located under an object in mongodb

I have a collection data like below.
{
"name": "Devices",
"exten": {
"parameters": [{
"name": "Date",
"value": ["5","2"]
}, {
"name": "Time",
"value": ["2"]
}, {
"name": "Season",
"value": ["6"]
}
]
}
}
I want to take all data which is name "Devices" and sort by first index of "Value" which is parameter name is "Date"
ex: mongo will get
name = "devices"
exten.parameters.name = "Date"
will sort it by
exten.parameters.value[0]
in this example it will be sorted by "5".
below query returns 0 record.
db.brand.aggregate(
{ $match: {
"name" : "Devices"
}},
{ $unwind: "$exten.parameters" },
{ $match: {
'exten.parameters.name': 'Date'
}},
{ $sort: {
'exten.parameters.value': -1
}}
)
The following query can get us the expected output:
db.collection.aggregate([
{
$match:{
"name":"Devices"
}
},
{
$unwind:"$exten.parameters"
},
{
$match:{
"exten.parameters.name":"Date"
}
},
{
$project:{
"name":1,
"exten":1,
"firstParam":{
$arrayElemAt:["$exten.parameters.value",0]
}
}
},
{
$sort:{
"firstParam":1
}
},
{
$project:{
"firstParam":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : [
{
"name" : "Date",
"value" : [
"5",
"2"
]
},
{
"name" : "Date",
"value" : [
"2",
"7"
]
},
{
"name" : "Time",
"value" : [
"2"
]
},
{
"name" : "Season",
"value" : [
"6"
]
}
]
}
}
Output:
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : {
"name" : "Date",
"value" : [
"2",
"7"
]
}
}
}
{
"_id" : ObjectId("5da02fb86472ba670fd8c159"),
"name" : "Devices",
"exten" : {
"parameters" : {
"name" : "Date",
"value" : [
"5",
"2"
]
}
}
}

mongodb aggregation with array

I have data like this:
{
"_id" : ObjectId("..."),
"name" : "Entry 1",
"time" : ISODate("2013-12-28T06:00:00.000Z"),
"value" : 100
},
{
"_id" : ObjectId("..."),
"name" : "Entry 2",
"time" : ISODate("2013-12-28T06:00:00.000Z"),
"value" : 200
},
{
"_id" : ObjectId("..."),
"name" : "Entry 1",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 110
},
{
"_id" : ObjectId("..."),
"name" : "Entry 2",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 230
},
{
"_id" : ObjectId("..."),
"name" : "Entry 3",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 25
},
{
"_id" : ObjectId("..."),
"name" : "Entry 4",
"time" : ISODate("2013-12-28T11:00:00.000Z"),
"value" : 15
}
I need the result grouped by time with percentage for each entry like this (group entries by volume "others" when entries for time period more than two, but it's not necessary):
{
"_id": ISODate("2013-12-28T11:00:00.000Z"),
"entries": [
{
"name": "Entry 1",
"percentage": 33.3
},
{
"name": "Entry 2",
"percentage": 66.6
},
]
},
{
"_id": ISODate("2013-12-28T06:00:00.000Z"),
"entries": [
{
"name": "Entry 1",
"percentage": 28.9
},
{
"name": "Entry 2",
"percentage": 60.5
},
{
"name": "Others",
"percentage": 10.5
}
]
}
So the request I was try:
db.collection.aggregate([
{
"$addFields": {
"full_datetime": {"$substr": ["$time", 0, 19]}
}
},
{
"$group": {
"_id": "$full_datetime",
"value_sum": {"$sum": "$value"},
"entries": {
"$push": {
"name": "$name",
"percentage": {
"$multiply": [{
"$divide": ["$value", {"$literal": "$value_sum" }]
}, 100 ]
}
}
}
}
}
])
This request is not work because $value_sum does not exists inside $push.
Please help me how I can to send this $value_sum into the $push statement
You can use one more stage to calculate percentage using $map as,
db.collection.aggregate([
"$addFields": {
"full_datetime": {
"$substr": ["$time", 0, 19]
}
}
}, {
"$group": {
"_id": "$full_datetime",
"value_sum": {
"$sum": "$value"
},
"entries": {
"$push": {
"name": "$name",
"value": "$value"
}
}
}
}, {
"$project": {
"entriesNew": {
"$map": {
"input": "$entries",
"as": "entry",
"in": {
"name": "$$entry.name",
"percentage": {
"$multiply": [{
"$divide": ["$$entry.value", "$value_sum"]
}, 100]
}
}
}
}
}
}])
Output:
/* 1 */
{
"_id" : "2013-12-28T11:00:00",
"entries" : [
{
"name" : "Entry 1",
"percentage" : 28.9473684210526
},
{
"name" : "Entry 2",
"percentage" : 60.5263157894737
},
{
"name" : "Entry 3",
"percentage" : 6.57894736842105
},
{
"name" : "Entry 4",
"percentage" : 3.94736842105263
}
]
}
/* 2 */
{
"_id" : "2013-12-28T06:00:00",
"entries" : [
{
"name" : "Entry 1",
"percentage" : 33.3333333333333
},
{
"name" : "Entry 2",
"percentage" : 66.6666666666667
}
]
}

Issue retrieving subdocuments from MongoDB

I have the following dataset:
{
"_id" : ObjectId("59668a22734d1d48cf34de08"),
"name" : "Nobody Cares",
"menus" : [
{
"_id" : "menu_123",
"name" : "Weekend Menu",
"description" : "A menu for the weekend",
"groups" : [
{
"name" : "Spirits",
"has_mixers" : true,
"sizes" : [
"Single",
"Double"
],
"categories" : [
{
"name" : "Vodka",
"description" : "Maybe not necessary?",
"drinks" : [
{
"_id" : "drink_123",
"name" : "Absolut",
"description" : "Fancy ass vodka",
"sizes" : [
{
"_id" : "size_123",
"size" : "Single",
"price" : 300
}
]
}
]
}
]
}
],
"mixers" : [
{
"_id" : "mixer_1",
"name" : "Coca Cola",
"price" : 150
},
{
"_id" : "mixer_2",
"name" : "Lemonade",
"price" : 120
}
]
}
]
}
And I'm attempting to retrieve a single drink from that dataset, I'm using the following aggregate query:
db.getCollection('places').aggregate([
{ $match : {"menus.groups.categories.drinks._id" : "drink_123"} },
{ $unwind: "$menus" },
{ $project: { "_id": 1, "menus": { "groups": { "categories": { "drinks": { "name": 1 } } } } } }
])
However, it's returning the full structure of the dataset along with the correct data.
So instead of:
{
"_id": "drink_123",
"name": "Absolut"
}
I get:
{
"_id": ObjectId("59668a22734d1d48cf34de08"),
"menus": {
"groups": {
"categories": {
"drinks": { "name": "Absolut" }
}
}
}
}
For example. Any ideas how to just retrieve the subdocument?
If you need to retain the deeply nested model then this call will produce the desired output:
db.getCollection('places').aggregate([
{ $match : {"menus.groups.categories.drinks._id" : "drink_123"} },
{ $project: {"_id": '$menus.groups.categories.drinks._id', name: '$menus.groups.categories.drinks.name'}},
{ $unwind: "$name" },
{ $unwind: "$name" },
{ $unwind: "$name" },
{ $unwind: "$name" },
{ $unwind: "$_id" },
{ $unwind: "$_id" },
{ $unwind: "$_id" },
{ $unwind: "$_id" }
])
The numerous unwinds are the result of the deep nesting of the drinks subdocuments.
Though, FWIW, this sort of query does perhaps suggest that the model isn't 'read friendly'.

mongodb aggregation match multiple $and on the same field

i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}