Indexing not utilized during the MongoDB aggregation query - mongodb

I have stuck somewhere in MongoDB aggregate query. I tried to generate a summary report from the database which contains 110M records. during the report generation, I faced the following issues
1).Even though the collection is indexed they are not utilized for the search.
2).Once query execution finished memory of DB server not decreased.
3)query take considerable time to return the result.
im useing mongodb Atlas v4.2.8
sample document
{
"_id": {
"$oid": "5eb122f714d0510011e3a184"
},
"from": "Star_friends",
"to": "94713414047",
"accountName": "ZM",
"accountId": "ZM",
"campaignName": "test 1",
"campaignId": "5eb122f1e921c3001922f73c",
"campaignType": "BULK",
"status": {
"$numberInt": "3"
},
"reason": "No Routing",
"channel": "sms",
"messageType": {
"$numberInt": "1"
},
"event": "MT",
"content": "test 132",
"credit": {
"$numberInt": "1"
},
"msgId": "",
"createdDateTime": "2020-05-05T13:55:27.743Z",
"updatedTime": "2020-05-05T13:55:27.745Z",
"uDate": "2020-05-05",
"operator": "mobitel"
}
my query as follows
db.getCollection('report').aggregate([{
"$match": {
"createdDateTime": {
"$gt": "2020-09-14T00:00:01.000Z",
"$lt": "2020-09-15T23:59:99.999Z"
},
"messageType": {
"$in": [1, 2]
},
"channel": {
"$in": ["sms", "viber", "whatsapp"]
},
"accountId": {
"$in": ["ZM", "KEELLS"]
}
}
}, {
"$project": {
"_id": 0,
"channel": 1,
"messageType": 1,
"accountName": 1,
"accountId": 1,
"createdDateTime": 1,
"uDate": 1,
"credit": 1,
"status": 1
}
}, {
"$group": {
"_id": {
"channel": "$channel",
"messageType": "$messageType",
"accountName": "$accountName",
"accountId": "$accountId",
"filteredDate": {
"$substr": ["$createdDateTime", 0, 7]
},
"sortDate": "$uDate"
},
"total": {
"$sum": "$credit"
},
"send": {
"$sum": {
"$cond": [{
"$in": ["$status", [2, 15, 1, 14, 6, 17, 4, 5]]
}, "$credit", 0]
}
},
"delivered": {
"$sum": {
"$cond": [{
"$in": ["$status", [6, 17, 4]]
},
"$credit",
0
]
}
},
"deliveryFailed": {
"$sum": {
"$cond": [{
"$in": ["$status", [12, 5]]
}, "$credit", 0]
}
},
"failed": {
"$sum": {
"$cond": [{
"$in": ["$status", [3]]
}, "$credit", 0]
}
},
"datass": {
"$addToSet": {
"channel": "$channel",
"messageType": "$messageType",
"accountName": "$accountName",
"accountId": "$accountId",
"filteredDate": {
"$substr": ["$createdDateTime", 0, 7]
},
"sortDate": "$uDate"
}
}
}
}, {
"$unwind": "$datass"
}, {
"$project": {
"_id": 0
}
}, {
"$sort": {
"datass.sortDate": -1
}
}])
indexes as follows
accountId_1 / accountId_1_createdDateTime_-1 / campaignId_-1 / channel_1 / createdDateTime_-1 / messageType_1 / msgId_-1 / msgId_-1_status_1
I would be appreciated if someone can help me with this
Thanks

You gave us little information.
How many documents should average query like such return?
How long does it take to execute the said query?
What I can see here is that your match pipeline is good, because you are trying to filter out documents by fields that are indexed.
But what is a "performance smell" here is your $sort function which does sorting on non-indexed field. Try to do sorting immediately after $match.
Play with it a little more and try to figure out which stage of the pipeline is a performance bottle-neck.

I have resolved my issue by changing my indexes
accountId_1_createdDateTime_-1 /
msgId_-1_status_1 /
accountId_1_messageType_1_channel_1_createdDateTime_1_accountName_1_uDate_1_credit_1_status_1

Related

MongoDB multiple counts, single document, arrays

I have been searching on stackoverflow and cannot find exactly what I am looking for and hope someone can help. I want to submit a single query, get multiple counts back, for a single document, based on array of that document.
My data:
db.myCollection.InsertOne({
"_id": "1",
"age": 30,
"items": [
{
"id": "1",
"isSuccessful": true,
"name": null
},{
"id": "2",
"isSuccessful": true,
"name": null
},{
"id": "3",
"isSuccessful": true,
"name": "Bob"
},{
"id": "4",
"isSuccessful": null,
"name": "Todd"
}
]
});
db.myCollection.InsertOne({
"_id": "2",
"age": 22,
"items": [
{
"id": "6",
"isSuccessful": true,
"name": "Jeff"
}
]
});
What I need back is the document and the counts associated to the items array for said document. In this example where the document _id = "1":
{
"_id": "1",
"age": 30,
{
"totalIsSuccessful" : 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1,
"totalNameNull": 2
}
}
I have found that I can get this in 4 queries using something like this below, but I would really like it to be one query.
db.test1.aggregate([
{ $match : { _id : "1" } },
{ "$project": {
"total": {
"$size": {
"$filter": {
"input": "$items",
"cond": { "$eq": [ "$$this.isSuccessful", true ] }
}
}
}
}}
])
Thanks in advance.
I am assuming your expected result is invalid since you have an object literal in the middle of another object and also you have totalIsSuccessful for id:1 as 2 where it seems they should be 3. With that said ...
you can get similar output via $unwind and then grouping with $sum and $cond:
db.collection.aggregate([
{ $match: { _id: "1" } },
{ $unwind: "$items" },
{ $group: {
_id: "_id",
age: { $first: "$age" },
totalIsSuccessful: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalNotIsSuccessful: { $sum: { $cond: [{ "$ne": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalSuccessfulNull: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", null ] }, 1, 0 ] } },
totalNameNull: { $sum: { $cond: [ { "$eq": [ "$items.name", null ]}, 1, 0] } } }
}
])
The output would be this:
[
{
"_id": "_id",
"age": 30,
"totalIsSuccessful": 3,
"totalNameNull": 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1
}
]
You can see it working here

MongoDB - Aggregate by distinct field then count per day

I have a mongodb database that collects device data.
Example document is
{
"_id" : ObjectId("5c125a185dea1b0252c5352"),
"time" : ISODate("2018-12-13T15:09:42.536Z"),
"mac" : "10:06:21:3e:0a:ff",
}
The goal would be to count the unique mac values per day, from the first document in the db to the last document in the db.
I've been playing around and came to the conclusion that I would need to have multiple groups as well as projects during my aggregations.
This is what I tried - not sure if it's in the right direction or not or just completely messed up.
pipeline = [
{"$project": {
"_id": 1,
"mac": 1,
"day": {
"$dayOfMonth":"$time"
},
"month": {
"$month":"$time"
},
"year": {
"$year":"$time"
}
}
},
{
"$project": {
"_id": 1,
"mac": 1,
"time": {
"$concat": [{
"$substr":["$year", 0, 4]
},
"-", {
"$substr": ["$month", 0, 2]
},
"-",
{
"$substr":["$day", 0, 2]
}]
}
}
},
{
"$group": {
"_id": {
"time": "$time",
"mac": "$mac"
}
},
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1},
}
}
]
data = list(collection.aggregate(pipeline, allowDiskUse=True))
The output now doesn't look like it did any aggregation,
[{"_id": null, "count": 751050}]
I'm using Pymongo as my driver and using Mongodb 4.
Ideally it should just show the date and count (eg { "_id" : "2018-12-13", "count" : 2 }.
I would love some feedback and advice.
Thanks in advance.
I prefer to minimize the number of stages, and especially to avoid unnecessary $group stages. So I would do it with the following pipeline:
pipeline = [
{ '$group' : {
'_id': { '$dateToString': { 'format': "%Y-%m-%d", 'date': "$time" } },
'macs':{ '$addToSet': '$mac' }
} },
{$addFields:{ 'macs':{'$size':'$macs'}}}
]
There's an operator called "$dateToString", which would solve most of your problems.
Edit: Didn't read the question carefully, #Asya Kamsky, thank you for pointing out. Here' the new answer.
pipeline = [
{
"$group": {
"_id": {
"date": {
$dateToString: {
format: "%Y-%m-%d",
date: "$time"
}
},
"mac": "$mac"
}
}
},
{
"$group": {
"_id": "$_id.date",
"count": {
"$sum": 1
}
}
}
]
[
{
"$project": {
"_id": 1,
"mac": 1,
"time": { "$dateToString": { "format": "%Y-%m-%d", "date": "$time", "timezone": "Africa/Johannesburg"}}
},
},
{
"$group": {
"_id":{
"time": "$time",
"mac": "$mac",
}}},{
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1}
}},
{"$sort": SON([("_id", -1)])}
]
Does exactly what it should do.
Thanks. :)

mongo query: add a new field with ranking number based on another field

I am new to mongo queries. Currently I have a collection like this, which is used to create a d3 force-directed graph.
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763
},
{
"id": "jason#gmail.com",
"count": 1,
"nodeUpdatetime": 1525447783
}
],
"links": [{
"source": "bmw#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525312111
}, {
"source": "car#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525334013
}, {
"source": "bmw#gmail.com",
"target": "car#gmail.com",
"timestamp": 1525334118
}]
}
Using a mongo query, I would like to generate the output to something like this. Basically for the nested data under "nodes", add a new field called "topn" and rank them by count from 1 to 5. The remainder values are null. Can anyone help? Thank you!
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732,
"topn": 1
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null,
"topn": 2
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742,
"topn": 4
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847,
"topn": null
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758,
"topn": 5
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763,
"topn": 3
},
..............
The following should get you what you want:
db.collection.aggregate({
$unwind: "$nodes" // flatten the "nodes" array
}, {
$sort: { "nodes.count": -1 } // sort descending by "count"
}, {
$group: { // create the original structure again - just with sorted array elements
_id: "$_id",
nodes: { "$push": "$nodes" }
}
}, {
$addFields: {
"nodes": {
$zip: { // zip two arrays together
inputs: [
"$nodes", // the first one being the existing and now sorted "nodes" array
{ $range: [ 1, 6 ] } // and the second one being [ 1, 2, 3, 4, 5 ]
],
useLongestLength: true // do not stop after five elements but instead continue using a "null" value
}
}
}
}, {
$addFields: {
"nodes": {
$map: { // transform the "nodes" array
input: "$nodes",
as: "this",
in: {
$mergeObjects: [ // by merging two objects
{ $arrayElemAt: [ "$$this", 0] }, // the first sits at array position 0
{
topn: { $arrayElemAt: [ "$$this", 1] } // the second will be a new entity witha a "topn" field holding the second element in the array
}
]
}
}
}
}
})

Mongo request slow

I'm trying Mongo with a million enties. My request is really slow.
Do you have any ideas to optimize it ?
db.financial_transaction.runCommand({
"aggregate": "financial_transaction",
"pipeline": [ {
"$match": {
"transaction_type": { "$in": [ 1, 2 ] },
"created_at": { "$gte": new ISODate("2016-03-13T00:00:00+01:00"), "$lte": new ISODate("2017-12-13T23:59:00+01:00") },
"type": { "$in": [ "A", "C", "E" ] },
"sid": { "$in": [ 1, 3, 7, 9, 11, 13 ] },
"context": { "$in": [ "CL", "RE" ] } }
}, {
"$group": { "_id": { "paymentType": "$payment_type",
"paymentMethod": "$payment_method",
"responseCode": "$response_code",
"reasonCode": "$reason_code"
},
"count": { "$sum": 1 }, "total_amount": { "$sum": "$requested_amount" } } }, { "$sort": { "count": -1 } } ]
});
Indexes:
_id_
idx_context
idx_payment_method
idx_response_code
idx_reason_code
idx_created_at
idx_transaction_type
idc_payment_method_created_at_transaction_type_origin_auth_system
idx_created_at_context_transaction_type
idx_updated_at
I made a gist with the explain result: https://gist.github.com/sanchobouillant/bd59403242ebb1ec45582dff74b457a2

Projecting specific fields present inside an array, based on the value of some other field

Overview :
The documents, that I'm working upon, have two nested arrays in them - contentMetaData & text_content.
Within contentMetaData, we have the text_content and content_flag. Based on the value of the content_flag, I need to hide specific field within the text_content.
Requirement :
If the content_flag is true, text_content should have a single child - the text_note.
If the content_flag is false, text_content should have a single child - the text_description.
The structure and other details need to be preserved.
Documents SHOULD NOT be updated; the values need to be only hidden during projection.
Version Used : Mongo 2.6
Sample Document :
{
"_id": ObjectId("56f8dd19e4b0365115927b0f"),
"contentId": "cbc91805-2faa-4eff-8f84-02547173c152",
"contentMetaData": [
{
"_id": "1574b58f-b7fa-4cd5-b34f-98beeb657c97",
"name": "text_content",
"attributes": [],
"children": [
{
"_id": "97340ecf-fdbd-41e5-a6b2-01cc542f16ee",
"name": "text_note",
"value": "abc",
"type": "java.lang.String",
"attributes": [],
"children": [],
"noOfChildren": 0,
"positionIndex": 1
},
{
"_id": "19c5a3fb-54a2-4368-a89d-ea1d2554402d",
"name": "text_description",
"value": "def",
"type": "java.lang.String",
"attributes": [],
"children": [],
"noOfChildren": 0,
"positionIndex": 2
}
],
"noOfChildren": 2,
"positionIndex": 1
},
{
"_id": "4e8ef7c9-cffd-4b36-9109-89b263dff3c8",
"name": "content_flag",
"value": "true",
"type": "java.lang.String",
"attributes": [],
"children": [],
"noOfChildren": 0,
"positionIndex": 2
}
]
}
Sample Output :
{
"_id": ObjectId("56f8dd19e4b0365115927b0f"),
"contentId": "cbc91805-2faa-4eff-8f84-02547173c152",
"contentMetaData": [
{
"_id": "1574b58f-b7fa-4cd5-b34f-98beeb657c97",
"name": "text_content",
"attributes": [],
"children": [
{
"_id": "97340ecf-fdbd-41e5-a6b2-01cc542f16ee",
"name": "text_note",
"value": "abc",
"type": "java.lang.String",
"attributes": [],
"children": [],
"noOfChildren": 0,
"positionIndex": 1
}
],
"noOfChildren": 2,
"positionIndex": 1
},
{
"_id": "4e8ef7c9-cffd-4b36-9109-89b263dff3c8",
"name": "content_flag",
"value": "true",
"type": "java.lang.String",
"attributes": [],
"children": [],
"noOfChildren": 0,
"positionIndex": 2
}
]
}
I attempted using $map but it didn't work. I tried using $unwind, but was unable to $push the data back, in the desired format.
Sample Mongo Code :
db.content.aggregate([
{
$project: {
_id: 1,
contentId: 1,
contentMetaData: 1
tempMetaData: "$contentMetaData"
}
},
{
$unwind: "$contentMetaData"
},
{
$match: {
"contentMetaData.name": "content_flag"
}
},
{
$project: {
_id: 1,
contentId: 1,
contentMetaData: "$tempMetaData",
content_flag_value: "$contentMetaData.value"
}
},
{
$project: {
_id: 1,
contentId: 1,
contentMetaData: 1,
tempMetaData: "$contentMetaData",
content_flag_value: 1
}
},
{
$unwind: "$contentMetaData"
},
{
$match: {
"contentMetaData.name": "text_content"
}
},
{
$project: {
_id: 1,
contentId: 1,
contentMetaData: 1,
tempMetaData: "$contentMetaData",
content_flag_value: 1,
text_content : "$contentMetaData.children",
temp_text_content: "$text_content"
}
},
{
$unwind: "$text_content"
},
{
$group:{
_id:"$_id",
contentId:{$first:"$contentId"},
text_content:
{$max:
{$cond:
[
{$eq: ["$content_flag_value", "true"]},
{$cond:
[{$or:[
{$eq: ["$text_content.name","wk_link_url"]},
{$eq: ["$text_content.name","wk_link_description"]}
]},
"$text_content",
null]
},
null
]
}
},
contentMetaData:{$first:"$contentMetaData"}
}
},
{
$group:{
_id:"$_id",
contentId:{$first:"$contentId"},
contentMetaData:{$push:{"text_content":"$text_content"}}
}
},
{
$project: {
_id: 0,
contentId: 1,
contentMetaData: 1
}
}]).pretty()
I'm new to Mongo. Can somebody help me out with this?
You can try the below aggregation.
$map in combination with $setDifference to extract text_content and content_flag array.
$unwind to content_flag document.
$map to keep the current values in text_content and $map in combination with $setDifference to filter the children on the criteria.
$setUnion to join back the text_content and content_flag array into contentMetaData
db.collection.aggregate({
$project: {
_id: 1,
contentId: 1,
text_content: {
"$setDifference": [{
"$map": {
"input": "$contentMetaData",
"as": "text",
"in": {
"$cond": [{
$eq: ['$$text.name', "text_content"]
},
"$$text",
false
]
}
}
},
[false]
]
},
content_flag: {
"$setDifference": [{
"$map": {
"input": "$contentMetaData",
"as": "content",
"in": {
"$cond": [{
$eq: ['$$content.name', "content_flag"]
},
"$$content",
false
]
}
}
},
[false]
]
}
}
}, {
$unwind: "$content_flag"
}, {
$project: {
"_id": 1,
contentId: 1,
"contentMetaData": {
$setUnion: [{
$map: {
input: "$text_content",
as: "text",
in: {
"_id": "$$text._id",
"name": "$$text.name",
"attributes": "$$text.attributes",
"noOfChildren": "$$text.noOfChildren",
"positionIndex": "$$text.positionIndex",
"children": {
"$setDifference": [{
"$map": {
"input": "$$text.children",
"as": "child",
"in": {
"$cond": [{
"$cond": [{
$eq: ["$content_flag.value", "true"]
}, {
$eq: ["$$child.name", "text_note"]
}, {
$eq: ["$$child.name", "text_description"]
}]
},
"$$child",
false
]
}
}
},
[false]
]
}
}
}
},
["$content_flag"]
]
}
}
})
Update:
$map in combination with $setDifference to extract content_flag array.
$unwind to content_flag document.
$redact to go through a document level at a time and look for name field recursively and perform $$DESCEND and $$PRUNE on the criteria.
$project to format the final response.
db.collection.aggregate({
$project: {
_id: 1,
contentId: 1,
contentMetaData: 1,
content_flag: {
"$setDifference": [{
"$map": {
"input": "$contentMetaData",
"as": "content",
"in": {
"$cond": [{
$eq: ['$$content.name', "content_flag"]
},
"$$content",
false
]
}
}
},
[false]
]
}
}
}, {
$unwind: "$content_flag"
}, {
$redact: {
$cond: [{
$or: [{
$eq: ["$name", "text_content"]
}, {
$not: "$name"
}, {
$eq: ["$name", "content_flag"]
}, {
$and: [{
$eq: ["$name", "text_note"]
}, {
$eq: ["$$ROOT.content_flag.value", "true"]
}]
}, {
$and: [{
$eq: ["$name", "text_description"]
}, {
$eq: ["$$ROOT.content_flag.value", "false"]
}]
}]
},
"$$DESCEND",
"$$PRUNE"
]
}
}, {
$project: {
_id: 1,
contentId: 1,
contentMetaData: 1
}
});