Mongodb count all array elements in all objects matching by criteria - mongodb

I have a collection that is log of activity on objects like this:
{
"_id" : ObjectId("55e3fd1d7cb5ac9a458b4567"),
"object_id" : "1",
"activity" : [
{
"action" : "test_action",
"time" : ISODate("2015-08-31T00:00:00.000Z")
},
{
"action" : "test_action",
"time" : ISODate("2015-08-31T00:00:22.000Z")
}
]
}
{
"_id" : ObjectId("55e3fd127cb5ac77478b4567"),
"object_id" : "2",
"activity" : [
{
"action" : "test_action",
"time" : ISODate("2015-08-31T00:00:00.000Z")
}
]
}
{
"_id" : ObjectId("55e3fd0f7cb5ac9f458b4567"),
"object_id" : "1",
"activity" : [
{
"action" : "test_action",
"time" : ISODate("2015-08-30T00:00:00.000Z")
}
]
}
If i do followoing query:
db.objects.find({
"createddate": {$gte : ISODate("2015-08-30T00:00:00.000Z")},
"activity.action" : "test_action"}
}).count()
it returns count of documents containing "test_action" (3 in this set), but i need to get count of all test_actions (4 on this set). How do i do that?

The most "performant" way to do this is to skip the $unwind altogther and simply $group to count. Essentially "filter" arrays get the $size of the results to $sum:
db.objects.aggregate([
{ "$match": {
"createddate": {
"$gte": ISODate("2015-08-30T00:00:00.000Z")
},
"activity.action": "test_action"
}},
{ "$group": {
"_id": null,
"count": {
"$sum": {
"$size": {
"$setDifference": [
{ "$map": {
"input": "$activity",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.action", "test_action" ] },
"$$el",
false
]
}
}},
[false]
]
}
}
}
}}
])
Since MongoDB version 3.2 we can use $filter, which makes this much more simple:
db.objects.aggregate([
{ "$match": {
"createddate": {
"$gte": ISODate("2015-08-30T00:00:00.000Z")
},
"activity.action": "test_action"
}},
{ "$group": {
"_id": null,
"count": {
"$sum": {
"$size": {
"$filter": {
"input": "$activity",
"as": "el",
"cond": {
"$eq": [ "$$el.action", "test_action" ]
}
}
}
}
}
}}
])
Using $unwind causes the documents to de-normalize and effectively creates a copy per array entry. Where possible you should avoid this due the the often extreme cost. Filtering and counting array entries per document is much faster by comparison. As is a simple $match and $group pipeline compared to many stages.

You can do so by using aggregation:
db.objects.aggregate([
{$match: {"createddate": {$gte : ISODate("2015-08-30T00:00:00.000Z")}, {"activity.action" : "test_action"}}},
{$unwind: "$activity"},
{$match: {"activity.action" : "test_action"}}},
{$group: {_id: null, count: {$sum: 1}}}
])
This will produce a result like:
{
count: 4
}

Related

Filter Array Content to a Query containing $concatArrays

Given this function, I have a data set that I am querying. The data looks like this:
db.activity.insert(
{
"_id" : ObjectId("5908e64e3b03ca372dc945d5"),
"startDate" : ISODate("2017-05-06T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("5908ebf96ae5003a4471c9b2"),
"walkDistance" : "03",
"jogDistance" : "01",
"runDistance" : "08",
"sprintDistance" : "01"
}
]
}
)
db.activity.insert(
{
"_id" : ObjectId("58f79163bebac50d5b2ae760"),
"startDate" : ISODate("2017-05-07T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("58f7948fbebac50d5b2ae7f2"),
"walkDistance" : "01",
"jogDistance" : "02",
"runDistance" : "09",
"sprintDistance" : ""
}
]
}
)
Using this function, thanks to Neil Lunn, I am able to get my desired output:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
However, I cannot add a match statement to the beginning.
db.activity.aggregate([
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{$unwind: '$details'},
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Because it gives an error message of:
> $concatArrays only supports arrays, not string
How can I modify this query so that a $match statement can be added?
Don't $unwind the array you are feeding to $concatArrays. Instead apply $filter to only extract the matching values. And as stated, we can just use $setUnion for the 'unique concatenation' instead:
db.activity.aggregate([
{ "$match": { "startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" } },
{ "$project": {
"_id": 0,
"unique": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$details",
"cond": { "$eq": [ "$$this.code", "2" ] }
}
}
},
"in": {
"$setDifference": [
{ "$setUnion": [
"$$filtered.walkDistance",
"$$filtered.jogDistance",
"$$filtered.runDistance",
"$$filtered.sprintDistance"
]},
[""]
]
}
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Using $let makes things a bit cleaner syntax wise since you don't need to specify multiple $map and $filter statements "inline" as the source for $setUnion

Combining Unique Items From Arrays

I have a data set that I am querying. The data looks like this:
db.activity.insert(
{
"_id" : ObjectId("5908e64e3b03ca372dc945d5"),
"startDate" : ISODate("2017-05-06T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("5908ebf96ae5003a4471c9b2"),
"walkDistance" : "03",
"jogDistance" : "01",
"runDistance" : "08",
"sprintDistance" : "01"
}
]
}
)
db.activity.insert(
{
"_id" : ObjectId("58f79163bebac50d5b2ae760"),
"startDate" : ISODate("2017-05-07T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("58f7948fbebac50d5b2ae7f2"),
"walkDistance" : "01",
"jogDistance" : "02",
"runDistance" : "09",
"sprintDistance" : ""
}
]
}
)
My desired output looks as such:
[
{
"_id": null,
"uniqueValues": [
"03",
"01",
"08",
"02",
"09"
]
}
]
In order to do that, I've developed the following code:
db.activity.aggregate([
{
$facet: {
"walk": [
{$unwind: '$details'},
{$group: {_id: null, uniqueValues: {$addToSet: "$details.walkDistance"}}}
], "jog": [
{$unwind: '$details'},
{$group: {_id: null, uniqueValues: {$addToSet: "$details.jogDistance"}}}
], "run": [
{$unwind: '$details'},
{$group: {_id: null, uniqueValues: {$addToSet: "$details.runDistance"}}}
], "sprint": [
{$unwind: '$details'},
{$group: {_id: null, uniqueValues: {$addToSet: "$details.sprintDistance"}}}
]
}
}])
However, I am still getting 4 different facets with their own _id: null and uniqueValues array. How do I change the query so that they all included in a single array, and the "" is also excluded.
$facet really is not the best thing to use here. You should really just be applying $concatArrays and filtering down the result with $setDifference and $filter:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Returns the result:
/* 1 */
{
"_id" : null,
"uniqueArray" : [
"09",
"03",
"01",
"02",
"08"
]
}
So after bringing all the array values into a single array using $concatArrays, you apply $setDifference to reduce the list to the "unique" values. The $filter removes the "" values you don't want.
Then it's just a matter of applying $unwind on the singular and reduced list and bringing it back together in the $group with $addToSet to only keep unique values across documents.
You could also just $concatArrays only and then $unwind and $match, but the other operators don't really cost much and reduce some of the load by already narrowing down to "unique" within the document before you get to the $unwind. So it's better to do it that way.
Really this can even be broken down futher, to simply $setUnion and $setDifference since we are talking about "sets" afterall:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$setDifference": [
{ "$setUnion": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[""]
]
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
And that means that the overall statement becomes compatible back to MongoDB 2.6, or would be if all the forms such as $details.walkDistance were written out in their longer form using $map:
"$setDifference": [
{ "$setUnion": [
{ "$map": { "input": "$details", "as": "d", "in": "$$d.walkDistance" } },
{ "$map": { "input": "$details", "as": "d", "in": "$$d.jogDistance" } },
{ "$map": { "input": "$details", "as": "d", "in": "$$d.runDistance" } },
{ "$map": { "input": "$details", "as": "d", "in": "$$d.sprintDistance" } }
]},
[""]
]
On the other hand running $facet causes a "brute force" parse through the whole collection for every property from within the array, and $unwind being processed on each of those passes. So it's a really inefficient way to obtain the result. So don't do it that way.

How to match and group array elements with the max value in aggregation

I need help to get the array element having maximum value of a field(level) from a document. Then count the total occurences grouped by array element field "bssid".
Consider the following data
/* 1 */
{
"_id" : "18:59:36:0c:94:a3",
"timestamp" : "1460012567",
"apdata" : [{
"bssid" : "f4:b7:e2:56:e4:20",
"ssid" : "Test Network2",
"level" : -55
}, {
"bssid" : "b8:a3:86:67:03:56",
"ssid" : "Test Network1",
"level" : -76
}]
}
/* 2 */
{
"_id" : "d0:b3:3f:b9:42:38",
"timestamp" : "1460013345",
"apdata" : [{
"bssid" : "f4:b7:e2:56:e4:20",
"ssid" : "Test Network2",
"level" : -65
}, {
"bssid" : "b8:a3:86:67:03:56",
"ssid" : "Test Network1",
"level" : -46
}]
}
/* 3 */
{
"_id" : "d0:b3:3f:b9:42:41",
"timestamp" : "1460013145",
"apdata" : [{
"bssid" : "f4:b7:e2:56:e4:20",
"ssid" : "Test Network2",
"level" : -65
}, {
"bssid" : "b8:a3:86:67:03:56",
"ssid" : "Test Network1",
"level" : -46
}]
}
The output required is
{
"bssid" : "f4:b7:e2:56:e4:20",
"ssid" : "Test Network2",
"count" : 1
}, {
"bssid" : "b8:a3:86:67:03:56",
"ssid" : "Test Network1",
"count" : 2
}
Which is the count of times each bssid had the maximum value within the array of each document over the whole collection.
If you have MongoDB 3.2 available then you can do something like this:
db.sample.aggregate([
{ "$project": {
"apdata": {
"$arrayElemAt": [
{ "$filter": {
"input": "$apdata",
"as": "el",
"cond": {
"$eq": [
"$$el.level",
{ "$max": {
"$map": {
"input": "$apdata",
"as": "data",
"in": "$$data.level"
}
}}
]
}
}},
0
]
}
}},
{ "$group": {
"_id": "$apdata.bssid",
"ssid": { "$first": "$apdata.ssid" },
"count": { "$sum": 1 }
}}
])
For at least MongoDB 2.6 you need to do this:
db.sample.aggregate([
{ "$unwind": "$apdata" },
{ "$group": {
"_id": "$_id",
"apdata": { "$push": "$apdata" },
"max": { "$max": "$apdata.level" }
}},
{ "$unwind": "$apdata" },
{ "$redact": {
"$cond": {
"if": { "$eq": [ "$apdata.level", "$max" ] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$group": {
"_id": "$apdata.bssid",
"ssid": { "$first": "$apdata.ssid" },
"count": { "$sum": 1 }
}}
])
And for MongoDB 2.4 or 2.2 like this:
db.sample.aggregate([
{ "$unwind": "$apdata" },
{ "$group": {
"_id": "$_id",
"apdata": { "$push": "$apdata" },
"max": { "$max": "$apdata.level" }
}},
{ "$unwind": "$apdata" },
{ "$project": {
"apdata": 1,
"isMax": { "$eq": [ "$apdata.level", "$max" ] }
}},
{ "$match": { "isMax": true } },
{ "$group": {
"_id": "$apdata.bssid",
"ssid": { "$first": "$apdata.ssid" },
"count": { "$sum": 1 }
}}
])
In all cases $max is used to get the "maximum" value of of the array in each document "first", then you can use that to "filter" the array content prior to using it in a $group. The approaches to this only vary with version
MongoDB 3.2: Allows the $max to work directly on an "array" of values. So the $map is used to just get the "level" values and find out what that "max" actually is.
Then the $filter can be used to just return the array element which matches that "max" value, and finally $arrayElemAt is used to return that "only" ( out of two possible and "zero" index ) element as a plain document.
The whole process can be done in $group "only" if you basically repeat that whole statement for both the _id and in order to get the $first "ssid" value, but it's a bit easier to write in a $project separately to demonstrate.
MongoDB 2.6: This lacks the fancier operators and most notably the ability of $max to work "directly" on an array. The notable thing is the need to $unwind the array first and then actually $group just on the original document, solely in order to get that "max" value.
Then the process really needs you to $unwind again since you will be grouping on the element from the array later, and then use $redact to filter the content. This is a "logical" form of $match where you can directly compare the "level" against the computed "max" from the earlier stage. So the element that is not the "max" is removed.
MongoDB 2.4: Is again basically the same logic, except instead of $redact you actually need the physical $project in order to put a field in the document to use in filtering with $match.
All versions have the same final $group, where you supply the path to "apdata.bssid" for the grouping key and the $first result on that grouping boundary for the "ssid" and a simple $sum to count the occurrences of the grouping key in the results.
Everything returns just as follows:
{ "_id" : "f4:b7:e2:56:e4:20", "ssid" : "Test Network2", "count" : 1 }
{ "_id" : "b8:a3:86:67:03:56", "ssid" : "Test Network1", "count" : 2 }
Actually the most "efficient" form for MongoDB 3.2 would be as follows:
db.sample.aggregate([
{ "$group": {
"_id": {
"$arrayElemAt": [
{ "$map": {
"input": {
"$filter": {
"input": "$apdata",
"as": "el",
"cond": {
"$eq": [
"$$el.level",
{ "$max": {
"$map": {
"input": "$apdata",
"as": "data",
"in": "$$data.level"
}
}}
]
}
}
},
"as": "apdata",
"in": {
"bssid": "$$apdata.bssid",
"ssid": "$$apdata.ssid"
}
}},
0
]
},
"count": { "$sum": 1 }
}}
])
With a slightly different form due to the compound _id, but it is a single $group stage only, without repetition of the whole process to find the array element data for the "max" value:
{
"_id" : {
"bssid" : "b8:a3:86:67:03:56",
"ssid" : "Test Network1"
},
"count" : 2
}
{
"_id" : {
"bssid" : "f4:b7:e2:56:e4:20",
"ssid" : "Test Network2"
},
"count" : 1
}

How to find document and single subdocument matching given criterias in MongoDB collection

I have collection of products. Each product contains array of items.
> db.products.find().pretty()
{
"_id" : ObjectId("54023e8bcef998273f36041d"),
"shop" : "shop1",
"name" : "product1",
"items" : [
{
"date" : "01.02.2100",
"purchasePrice" : 1,
"sellingPrice" : 10,
"count" : 15
},
{
"date" : "31.08.2014",
"purchasePrice" : 10,
"sellingPrice" : 1,
"count" : 5
}
]
}
So, can you please give me an advice, how I can query MongoDB to retrieve all products with only single item which date is equals to the date I pass to query as parameter.
The result for "31.08.2014" must be:
{
"_id" : ObjectId("54023e8bcef998273f36041d"),
"shop" : "shop1",
"name" : "product1",
"items" : [
{
"date" : "31.08.2014",
"purchasePrice" : 10,
"sellingPrice" : 1,
"count" : 5
}
]
}
What you are looking for is the positional $ operator and "projection". For a single field you need to match the required array element using "dot notation", for more than one field use $elemMatch:
db.products.find(
{ "items.date": "31.08.2014" },
{ "shop": 1, "name":1, "items.$": 1 }
)
Or the $elemMatch for more than one matching field:
db.products.find(
{ "items": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ "shop": 1, "name":1, "items.$": 1 }
)
These work for a single array element only though and only one will be returned. If you want more than one array element to be returned from your conditions then you need more advanced handling with the aggregation framework.
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$unwind": "$items" },
{ "$match": { "items.date": "31.08.2014" } },
{ "$group": {
"_id": "$_id",
"shop": { "$first": "$shop" },
"name": { "$first": "$name" },
"items": { "$push": "$items" }
}}
])
Or possibly in shorter/faster form since MongoDB 2.6 where your array of items contains unique entries:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$project": {
"shop": 1,
"name": 1,
"items": {
"$setDifference": [
{ "$map": {
"input": "$items",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.date", "31.08.2014" ] },
"$$el",
false
]
}
}},
[false]
]
}
}}
])
Or possibly with $redact, but a little contrived:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$redact": {
"$cond": [
{ "$eq": [ { "$ifNull": [ "$date", "31.08.2014" ] }, "31.08.2014" ] },
"$$DESCEND",
"$$PRUNE"
]
}}
])
More modern, you would use $filter:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$addFields": {
"items": {
"input": "$items",
"cond": { "$eq": [ "$$this.date", "31.08.2014" ] }
}
}}
])
And with multiple conditions, the $elemMatch and $and within the $filter:
db.products.aggregate([
{ "$match": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ "$addFields": {
"items": {
"input": "$items",
"cond": {
"$and": [
{ "$eq": [ "$$this.date", "31.08.2014" ] },
{ "$eq": [ "$$this.purchasePrice", 1 ] }
]
}
}
}}
])
So it just depends on whether you always expect a single element to match or multiple elements, and then which approach is better. But where possible the .find() method will generally be faster since it lacks the overhead of the other operations, which in those last to forms does not lag that far behind at all.
As a side note, your "dates" are represented as strings which is not a very good idea going forward. Consider changing these to proper Date object types, which will greatly help you in the future.
Based on Neil Lunn's code I work with this solution, it includes automatically all first level keys (but you could also exclude keys if you want):
db.products.find(
{ "items.date": "31.08.2014" },
{ "shop": 1, "name":1, "items.$": 1 }
{ items: { $elemMatch: { date: "31.08.2014" } } },
)
With multiple requirements:
db.products.find(
{ "items": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ items: { $elemMatch: { "date": "31.08.2014", "purchasePrice": 1 } } },
)
Mongo supports dot notation for sub-queries.
See: http://docs.mongodb.org/manual/reference/glossary/#term-dot-notation
Depending on your driver, you want something like:
db.products.find({"items.date":"31.08.2014"});
Note that the attribute is in quotes for dot notation, even if usually your driver doesn't require this.

How to retrieve a sub document array in MongoDB

I have a collection in mongodb like this:
db.country_list.find().pretty()
{
"_id" : ObjectId("53917321ccbc96175d7a808b"),
"countries" : [
{
"countryName" : "Afghanistan",
"iso3" : "AFG",
"callingCode" : "93"
},
{
"countryName" : "Aland Islands",
"iso3" : "ALA",
"callingCode" : "358"
},
{
"countryName" : "Albania",
"iso3" : "ALB",
"callingCode" : "355"
}
]
}
like that i have 100 country details
i want to retrieve a country name where the calling code is 355.
I have tried like this
db.country_list.find({countries: {$elemMatch :{ 'callingCode':'355'} } } )
and like this
db.country_list.find({'countries.callingCode':'355'}).pretty()
but i am getting all records.How to get a specific record .Thanks in advance
What you want is the positional $ operator:
db.country_list.find(
{ "countries": { "$elemMatch" :{ "callingCode":"355"} } }
{ "countries.$": 1 }
)
Or even with the other syntax you tried:
db.country_list.find(
{ "countries.callingCode": "355"}
{ "countries.$": 1 }
)
This is because a "query" matches documents and is not a filter for the array contained in those documents. So the second argument there projects the field with the "position" that was matched on the query side.
If you need to match more than one array element, then you use the aggregation framework which has more flexibility:
db.country_list.aggregate([
// Matches the documents that "contain" the match
{ "$match": {
"countries.callingCode": "355"
}},
// Unwind the array to de-normalize as documents
{ "$unwind": "$countries" },
// Match to "filter" the array content
{ "$match": {
"countries.callingCode": "355"
}},
// Group back if you want an array
{ "$group": {
"_id": "$_id",
"countries": { "$push": "$countries" }
}}
])
Or with MongoDB 2.6 or greater you can do this without the $unwind and $group:
db.country_list.aggregate([
// Matches the documents that "contain" the match
{ "$match": {
"countries.callingCode": "355"
}},
// Project with "$map" to filter
{ "$project": {
"countries": {
"$setDifference": [
{ "$map": {
"input": "$countries",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.callingCode", "355" ] }
"$$el",
false
]
}
}},
[false]
]
}
}}
])