MongoDB aggregate/grouping by key-value pairs - mongodb

My data looks something like this:
{
"_id" : "9aa072e4-b706-47e6-9607-1a39e904a05a",
"customerId" : "2164289-4",
"channelStatuses" : {
"FOO" : {
"status" : "done"
},
"BAR" : {
"status" : "error"
}
},
"channel" : "BAR",
}
My aggregate/group looks like this:
{
"_id" : {
"customerId" : "$customerId",
"channel" : "$channel",
"status" : "$channelStatuses[$channel].status"
},
"count" : {
"$sum" : 1
}
}
So basically with the example data the group should give me a group grouped by:
{"customerId": "2164289-4", "channel": "BAR", "status": "error"}
But I cannot use []-indexing in a aggregate/group. What should I do instead?

You cannot get the result you want with the current structure using .aggregate(). You "could" change the structure to use an array rather than named keys, and the operation is actually quite simple.
So with a document like:
{
"_id" : "9aa072e4-b706-47e6-9607-1a39e904a05a",
"customerId" : "2164289-4",
"channelStatuses" : [
{
"channel": "FOO",
"status" : "done"
},
{
"channel": "BAR",
"status" : "error"
}
],
"channel" : "BAR",
}
You can then do in modern releases with $filter, $map and $arrayElemAt:
{ "$group": {
"_id": {
"customerId" : "$customerId",
"channel" : "$channel",
"status": {
"$arrayElemAt": [
{ "$map": {
"input": { "$filter": {
"input": "$chanelStatuses",
"as": "el",
"cond": { "$eq": [ "$$el.channel", "$channel" ] }
}},
"as": "el",
"in": "$$el.status"
}},
0
]
}
},
"count": { "$sum": 1 }
}}
Older versions of MongoDB are going to going to require $unwind to access the matched array element.
In MongoDB 2.6 then you can still "pre-filter" the array before unwind:
[
{ "$project": {
"customerId": 1,
"channel": 1,
"status": {
"$setDifference": [
{ "$map": {
"input": "$channelStatuses",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.channel", "$channel" ] },
"$$el.status",
false
]
}
}},
[false]
]
}
}},
{ "$unwind": "$status" },
{ "$group": {
"_id": {
"customerId": "$customerId",
"channel": "$channel",
"status": "$status"
},
"count": { "$sum": 1 }
}}
]
And anything prior to that you "filter" after $unwind instead:
[
{ "$unwind": "$channelStatuses" },
{ "$project": {
"customerId": 1,
"channel": 1,
"status": "$channelStatuses.status",
"same": { "$eq": [ "$channelStatuses.status", "$channel" ] }
}},
{ "$match": { "same": true } },
{ "$group": {
"_id": "$_id",
"customerId": { "$first": "$customerId" },
"channel": { "$first": "$channel" },
"status": { "$first": "$status" }
}},
{ "$group": {
"_id": {
"customerId": "$customerId",
"channel": "$channel",
"status": "$status"
},
"count": { "$sum": 1 }
}}
]
In a lesser version than MongoDB 2.6 you also need to $project the result of the equality test between the two fields and then $match on the result in a seperate stage. You might also note the "two" $group stages, since the first one removes any possible duplicates of the "channel" values after the filter via the $first accumulators. The following $group is exactly the same as in the previous listing.
But if you cannot change the structure and need "flexible" matching of keys where you cannot supply every name, then you must use mapReduce:
db.collection.mapReduce(
function() {
emit({
"customerId": this.customerId,
"channel": this.channel,
"status": this.channelStatuses[this.channel].status
},1);
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Where of course you can use that sort of notation

Related

MongoDB: Get all $matched elements individually from an array

I'm trying to get all matched elements individually, here is the sample data and the query.
// json
[
{
"name": "Mr Cool",
"ican": [
{
"subcategory": [
{
"id": "5bffdba824488b182ec86f8d", "name": "Cricket"
},
{
"id": "5bffdba824488b182ec86f8c", "name": "Footbal"
}
],
"category": "5bffdba824488b182ec86f88",
"name": "Sports"
}
]
}
]
// query
db.collection.aggregate([
{
"$match": {
"ican.subcategory.name": { $in: ["Cricket","Football"] }
}
},
{
"$project": { "_id": 1, "name": 1, }
}
])
I'm getting the combined result, I need the individual match record. I tried $all and $elementMatch but getting the same response. how can I get the results as below. I'm using $aggregate because I will be using $geoNear pipeline for getting the nearby users.
// current result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool"
}
]
// expected result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Cricket"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"name": "Mr Cool",
"subcategory: "Footbal"
}
]
Thank you
Try this Mongo Playground
db.col.aggregate([
{"$unwind" : "$ican"},
{"$unwind" : "$ican.subcategory"},
{"$match" : {"ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{"$group" : {"_id" : null,"data" : {"$push" : {"_id" : "$_id","name" : "$name","subcategory" : "$ican.subcategory.name"}}}},
{"$unwind" : "$data"},
{"$replaceRoot" : {"newRoot" : "$data"}}
])
You can use below aggregation without the $unwind and for better performance
db.collection.aggregate([
{ "$match": { "ican.subcategory.name": { "$in": ["Cricket","Football"] }}},
{ "$project": {
"ican": {
"$reduce": {
"input": "$ican",
"initialValue": [],
"in": {
"$concatArrays": [
{ "$filter": {
"input": {
"$map": {
"input": "$$this.subcategory",
"as": "s",
"in": { "name": "$name", "subcategory": "$$s.name" }
}
},
"as": "fil",
"cond": { "$in": ["$$fil.subcategory", ["Football"]] }
}},
"$$value"
]
}
}
}
}},
{ "$unwind": "$ican" },
{ "$replaceRoot": { "newRoot": "$ican" }}
])

Aggregate and Reduce Nested Documents and Arrays

EDIT:
Our use case:
We get continues reports from servers about visitors. We pre-aggregate the data on the servers for a few seconds aber after that insert these "reports" into MongoDB.
In our dashboard we would like to query the different browsers, OSes, geolocation (country etc.) based on time ranges.
So like: Within the last 7 days, there were 1000 visitors using Chrome, 500 from Germany, 200 from England and so on.
I'm pretty stuck with a MongoDB query we need for our dashboard.
We have following report entries:
{
"_id" : ObjectId("59b9d08e402025326e1a0f30"),
"channel_perm_id" : "c361049fb4144b0e81b71c0b6cfdc296",
"source_id" : "insomnia",
"start_timestamp" : ISODate("2017-09-14T00:42:54.510Z"),
"end_timestamp" : ISODate("2017-09-14T00:42:54.510Z"),
"timestamp" : ISODate("2017-09-14T00:42:54.510Z"),
"resource_uri" : "b755d62a-8c0a-4e8a-945f-41782c13535b",
"sources_info" : {
"browsers" : [
{
"name" : "Chrome",
"count" : NumberLong(2)
}
],
"operating_systems" : [
{
"name" : "Mac OS X",
"count" : NumberLong(2)
}
],
"continent_ids" : [
{
"name" : "EU",
"count" : NumberLong(1)
}
],
"country_ids" : [
{
"name" : "DE",
"count" : NumberLong(1)
}
],
"city_ids" : [
{
"name" : "Solingen",
"count" : NumberLong(1)
}
]
},
"unique_sources" : NumberLong(1),
"requests" : NumberLong(1),
"cache_hits" : NumberLong(0),
"cache_misses" : NumberLong(1),
"cache_hit_size" : NumberLong(0),
"cache_refill_size" : NumberLong("170000000000")
}
Now, we need to aggregate these reports based on timestamp.
So far, so easy:
db.channel_report.aggregate([{
$group: {
_id: {
$dateToString: {
format: "%Y",
date: "$timestamp"
}
},
sources_info: {
$push: "$sources_info"
}
},
}];
But now it gets difficult for me. As you might already noticed, the sources_info object is the problem.
Instead of just "pushing" all sources info into array per group, we need to actually accumulate it.
So, if we have something like this:
{
sources_info: [
{
browsers: [
{
name: "Chrome,
count: 1
}
]
},
{
browsers: [
{
name: "Chrome,
count: 1
}
]
}
]
}
The array should be reduced to this:
{
sources_info:
{
browsers: [
{
name: "Chrome,
count: 2
}
]
}
}
We migrated from MySQL to MongoDB for analytics, but I have no clue how to model this behaviour in Mongo. Regarding the docs I almost think it is not possible, at least not with the current data structure.
Is there a nice solution for this? Or maybe even a different kind of data structure?
Cheers,
Chris from StriveCDN
The basic problem you have is that you are using "named keys" where you probably really should be instead using values to a consistent attribute path. This means instead of keys like "browsers", this probably should simply be "type": "browser" and so on on each entry.
The reasoning for this should become apparent on the general approaches to aggregating the data. It also really helps in querying in general. But the approaches basically involve coercing your initial data format into this kind of structure in order to aggregate it first.
With most recent releases ( MongoDB 3.4.4 and greater ), we can work with your named keys via $objectToArray and manipulate as follows:
db.channel_report.aggregate([
{ "$project": {
"timestamp": 1,
"sources": {
"$reduce": {
"input": {
"$map": {
"input": { "$objectToArray": "$sources_info" },
"as": "s",
"in": {
"$map": {
"input": "$$s.v",
"as": "v",
"in": {
"type": "$$s.k",
"name": "$$v.name",
"count": "$$v.count"
}
}
}
}
},
"initialValue": [],
"in": { "$concatArrays": ["$$value", "$$this"] }
}
}
}},
{ "$unwind": "$sources" },
{ "$group": {
"_id": {
"year": { "$year": "$timestamp" },
"type": "$sources.type",
"name": "$sources.name"
},
"count": { "$sum": "$sources.count" }
}},
{ "$group": {
"_id": { "year": "$_id.year", "type": "$_id.type" },
"v": { "$push": { "name": "$_id.name", "count": "$count" } }
}},
{ "$group": {
"_id": "$_id.year",
"sources_info": {
"$push": { "k": "$_id.type", "v": "$v" }
}
}},
{ "$addFields": {
"sources_info": { "$arrayToObject": "$sources_info" }
}}
])
Taking that back a notch to MongoDB 3.4 ( which should be default on most hosted services by now ) you could alternately declare each key name manually:
db.channel_report.aggregate([
{ "$project": {
"timestamp": 1,
"sources": {
"$concatArrays": [
{ "$map": {
"input": "$sources_info.browsers",
"in": {
"type": "browsers",
"name": "$$this.name",
"count": "$$this.count"
}
}},
{ "$map": {
"input": "$sources_info.operating_systems",
"in": {
"type": "operating_systems",
"name": "$$this.name",
"count": "$$this.count"
}
}},
{ "$map": {
"input": "$sources_info.continent_ids",
"in": {
"type": "continent_ids",
"name": "$$this.name",
"count": "$$this.count"
}
}},
{ "$map": {
"input": "$sources_info.country_ids",
"in": {
"type": "country_ids",
"name": "$$this.name",
"count": "$$this.count"
}
}},
{ "$map": {
"input": "$sources_info.city_ids",
"in": {
"type": "city_ids",
"name": "$$this.name",
"count": "$$this.count"
}
}}
]
}
}},
{ "$unwind": "$sources" },
{ "$group": {
"_id": {
"year": { "$year": "$timestamp" },
"type": "$sources.type",
"name": "$sources.name"
},
"count": { "$sum": "$sources.count" }
}},
{ "$group": {
"_id": { "year": "$_id.year", "type": "$_id.type" },
"v": { "$push": { "name": "$_id.name", "count": "$count" } }
}},
{ "$group": {
"_id": "$_id.year",
"sources": {
"$push": { "k": "$_id.type", "v": "$v" }
}
}},
{ "$project": {
"sources_info": {
"browsers": {
"$arrayElemAt": [
"$sources.v",
{ "$indexOfArray": [ "$sources.k", "browsers" ] }
]
},
"operating_systems": {
"$arrayElemAt": [
"$sources.v",
{ "$indexOfArray": [ "$sources.k", "operating_systems" ] }
]
},
"continent_ids": {
"$arrayElemAt": [
"$sources.v",
{ "$indexOfArray": [ "$sources.k", "continent_ids" ] }
]
},
"country_ids": {
"$arrayElemAt": [
"$sources.v",
{ "$indexOfArray": [ "$sources.k", "country_ids" ] }
]
},
"city_ids": {
"$arrayElemAt": [
"$sources.v",
{ "$indexOfArray": [ "$sources.k", "city_ids" ] }
]
}
}
}}
])
We can even wind that back to MongoDB 3.2 by using $map and $filter in place of $indexOfArray, but the general approach is the main thing to explain.
Concatenate arrays
The main thing that needs to happen is to take the data from the many different arrays with named keys and make a "single array" with a "type" property representing each key name. This is arguably how the data should be stored in the first place, and the first aggregation stage of either approach comes out like this:
/* 1 */
{
"_id" : ObjectId("59b9d08e402025326e1a0f30"),
"timestamp" : ISODate("2017-09-14T00:42:54.510Z"),
"sources" : [
{
"type" : "browsers",
"name" : "Chrome",
"count" : NumberLong(2)
},
{
"type" : "operating_systems",
"name" : "Mac OS X",
"count" : NumberLong(2)
},
{
"type" : "continent_ids",
"name" : "EU",
"count" : NumberLong(1)
},
{
"type" : "country_ids",
"name" : "DE",
"count" : NumberLong(1)
},
{
"type" : "city_ids",
"name" : "Solingen",
"count" : NumberLong(1)
}
]
}
Unwind and Group
Part of the data you want to accumulate on actually includes those "type" and "name" properties from "within" the array. Whenever you need to accumulate across documents from "within an array", the process you use is $unwind in order to be able to access those values as part of the grouping key.
What this means is that after using $unwind on the combined array, you then want to $group on both of those keys and the reduced "timestamp" detail in order to $sum the "count" values.
Since you then have "sub-levels" of detail ( i.e each name of browser within browsers ) then you use additional $group pipeline stages, gradually decreasing the granularity of the grouping keys and using $push to accumulate the details into arrays.
In either case, omitting the very last stage of output the accumulated structure comes out as:
/* 1 */
{
"_id" : 2017,
"sources_info" : [
{
"k" : "continent_ids",
"v" : [
{
"name" : "EU",
"count" : NumberLong(1)
}
]
},
{
"k" : "city_ids",
"v" : [
{
"name" : "Solingen",
"count" : NumberLong(1)
}
]
},
{
"k" : "country_ids",
"v" : [
{
"name" : "DE",
"count" : NumberLong(1)
}
]
},
{
"k" : "browsers",
"v" : [
{
"name" : "Chrome",
"count" : NumberLong(2)
}
]
},
{
"k" : "operating_systems",
"v" : [
{
"name" : "Mac OS X",
"count" : NumberLong(2)
}
]
}
]
}
This really is the final state of the data, though not represented in the same form as it was originally found. It is arguably complete at this point as any further processing is merely cosmetic to output as named keys again.
Output to named keys
As shown the varied approaches are either looking up the array entries by the matching key name, or by using $arrayToObject to transform the array content back into an object with named keys.
An alternate is also to simply do that very last manipulation in code, as shown by this .map() example of manipulating the cursor result in the shell:
db.channel_report.aggregate([
{ "$project": {
"timestamp": 1,
"sources": {
"$reduce": {
"input": {
"$map": {
"input": { "$objectToArray": "$sources_info" },
"as": "s",
"in": {
"$map": {
"input": "$$s.v",
"as": "v",
"in": {
"type": "$$s.k",
"name": "$$v.name",
"count": "$$v.count"
}
}
}
}
},
"initialValue": [],
"in": { "$concatArrays": ["$$value", "$$this"] }
}
}
}},
{ "$unwind": "$sources" },
{ "$group": {
"_id": {
"year": { "$year": "$timestamp" },
"type": "$sources.type",
"name": "$sources.name"
},
"count": { "$sum": "$sources.count" }
}},
{ "$group": {
"_id": { "year": "$_id.year", "type": "$_id.type" },
"v": { "$push": { "name": "$_id.name", "count": "$count" } }
}},
{ "$group": {
"_id": "$_id.year",
"sources_info": {
"$push": { "k": "$_id.type", "v": "$v" }
}
}},
/*
{ "$addFields": {
"sources_info": { "$arrayToObject": "$sources_info" }
}}
*/
]).map( d => Object.assign(d,{
"sources_info": d.sources_info.reduce((acc,curr) =>
Object.assign(acc,{ [curr.k]: curr.v }),{})
}))
Which of course applies to either aggregation pipeline approach.
And of course even $concatArrays can be replaced with $setUnion as long as all the entries have a unique identifying combination of "name" and "type" ( as they appear to be ), and that means with application of modifying the final output by processing the cursor instead you can apply the technique even as far back as MongoDB 2.6.
Final Output
And the final output ( actually aggregated of course, but the question only samples one document ) accumulates for all the sub-keys and reconstructs from the last sample output as shown as:
{
"_id" : 2017,
"sources_info" : {
"continent_ids" : [
{
"name" : "EU",
"count" : NumberLong(1)
}
],
"city_ids" : [
{
"name" : "Solingen",
"count" : NumberLong(1)
}
],
"country_ids" : [
{
"name" : "DE",
"count" : NumberLong(1)
}
],
"browsers" : [
{
"name" : "Chrome",
"count" : NumberLong(2)
}
],
"operating_systems" : [
{
"name" : "Mac OS X",
"count" : NumberLong(2)
}
]
}
}
Where every array entry under each key of sources_info is reduced down to it's cumulative count for every other entry sharing the same "name".

Filter Array Content to a Query containing $concatArrays

Given this function, I have a data set that I am querying. The data looks like this:
db.activity.insert(
{
"_id" : ObjectId("5908e64e3b03ca372dc945d5"),
"startDate" : ISODate("2017-05-06T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("5908ebf96ae5003a4471c9b2"),
"walkDistance" : "03",
"jogDistance" : "01",
"runDistance" : "08",
"sprintDistance" : "01"
}
]
}
)
db.activity.insert(
{
"_id" : ObjectId("58f79163bebac50d5b2ae760"),
"startDate" : ISODate("2017-05-07T00:00:00Z"),
"details" : [
{
"code" : "2",
"_id" : ObjectId("58f7948fbebac50d5b2ae7f2"),
"walkDistance" : "01",
"jogDistance" : "02",
"runDistance" : "09",
"sprintDistance" : ""
}
]
}
)
Using this function, thanks to Neil Lunn, I am able to get my desired output:
db.activity.aggregate([
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
However, I cannot add a match statement to the beginning.
db.activity.aggregate([
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{$unwind: '$details'},
{$match: {"startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" },
{ "$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{ "$concatArrays": [
"$details.walkDistance",
"$details.jogDistance",
"$details.runDistance",
"$details.sprintDistance"
]},
[]
]
},
"cond": { "$ne": [ "$$this", "" ] }
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Because it gives an error message of:
> $concatArrays only supports arrays, not string
How can I modify this query so that a $match statement can be added?
Don't $unwind the array you are feeding to $concatArrays. Instead apply $filter to only extract the matching values. And as stated, we can just use $setUnion for the 'unique concatenation' instead:
db.activity.aggregate([
{ "$match": { "startDate" : ISODate("2017-05-06T00:00:00Z"), "details.code": "2" } },
{ "$project": {
"_id": 0,
"unique": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$details",
"cond": { "$eq": [ "$$this.code", "2" ] }
}
}
},
"in": {
"$setDifference": [
{ "$setUnion": [
"$$filtered.walkDistance",
"$$filtered.jogDistance",
"$$filtered.runDistance",
"$$filtered.sprintDistance"
]},
[""]
]
}
}
}
}},
{ "$unwind": "$unique" },
{ "$group": {
"_id": null,
"uniqueArray": { "$addToSet": "$unique" }
}}
])
Using $let makes things a bit cleaner syntax wise since you don't need to specify multiple $map and $filter statements "inline" as the source for $setUnion

Mongodb group by and sum and get media with Map

I have these collections in my database:
Items:
{ "IdUser" : "1", "IdItem" : "1" },
{ "IdUser" : "1", "IdItem" : "2" },
{ "IdUser" : "1", "IdItem" : "3" },
{ "IdUser" : "2", "IdItem" : "4" },
{ "IdUser" : "2", "IdItem" : "5" },
{ "IdUser" : "4", "IdItem" : "6" },
{ "IdUser" : "5", "IdItem" : "7" }
Users
{ "_id" : "1", "DateRegister" : ISODate("2016-03-29T22:00:38.764+0000") },
{ "_id" : "2", "DateRegister" : ISODate("2014-03-29T22:00:38.764+0000") },
{ "_id" : "2", "DateRegister" : ISODate("2015-02-29T22:00:38.764+0000") },
{ "_id" : "4", "DateRegister" : ISODate("2013-01-29T22:00:38.764+0000") },
{ "_id" : "5", "DateRegister" : ISODate("2016-04-29T22:00:38.764+0000") }
How can I obtain this result but FILTERED with users registered after 2015:
Users with one item: 2
Users with two items: 1
Users with three items: 1
I have tried with that, but I don't know how to filter... Thanks!
db.collection.aggregate([
{
"$group": {
"_id": "$IdUser",
"count": {
"$sum": { "$cond": [{ "$gt": [ "$IdItem", null ] }, 1, 0 ] }
}
}
},
{
"$group": {
"_id": "$count",
"users": { "$push": "$_id" }
}
},
{
"$project": {
"_id": 0,
"number_of_items": "$_id",
"number_of_users": { "$size": "$users" }
}
}
])
You may want to utilize the $lookup operator to perform a join of the items collection with the users collection and then do a $match filter on the DateRegistered field before piping the main grouping operations.
Following this example + the links herein to the documentation will give you an idea:
db.items.aggregate([
{
"$lookup": {
"from": "users",
"localField": "IdUser",
"foreignField": "_id",
"as": "user"
}
},
{ "$match": { "user.DateRegister": { "$gt": new Date(2015, 11, 31) } } },
{
"$group": {
"_id": "$IdUser",
"count": {
"$sum": { "$cond": [{ "$gt": [ "$IdItem", null ] }, 1, 0 ] }
}
}
},
{
"$group": {
"_id": "$count",
"users": { "$push": "$_id" }
}
},
{
"$project": {
"_id": 0,
"number_of_items": "$_id",
"number_of_users": { "$size": "$users" }
}
}
])
In the event that your MongoDB server does not support the $lookup operator, you will then need a workaround where you split the operations on the different collections i.e.
get a list of user id's that match the given date range criteria, this could be done with the distinct() method on the users collection with the date query option.
use that list in the items collection aggregation pipeline within the $match operator initial step.
The following demonstrates this:
// use distinct to get the user id's list
var userIds = db.users.distinct("_id", { "DateRegister": { "$gt": new Date(2015, 11, 31) } })
// perform your aggregation with a filtered collection using the list from the above operations
db.items.aggregate([
{ "$match": { "IdUser": { "$in": userIds } } },
{
"$group": {
"_id": "$IdUser",
"count": {
"$sum": { "$cond": [{ "$gt": [ "$IdItem", null ] }, 1, 0 ] }
}
}
},
{
"$group": {
"_id": "$count",
"users": { "$push": "$_id" }
}
},
{
"$project": {
"_id": 0,
"number_of_items": "$_id",
"number_of_users": { "$size": "$users" }
}
}
])

How to find document and single subdocument matching given criterias in MongoDB collection

I have collection of products. Each product contains array of items.
> db.products.find().pretty()
{
"_id" : ObjectId("54023e8bcef998273f36041d"),
"shop" : "shop1",
"name" : "product1",
"items" : [
{
"date" : "01.02.2100",
"purchasePrice" : 1,
"sellingPrice" : 10,
"count" : 15
},
{
"date" : "31.08.2014",
"purchasePrice" : 10,
"sellingPrice" : 1,
"count" : 5
}
]
}
So, can you please give me an advice, how I can query MongoDB to retrieve all products with only single item which date is equals to the date I pass to query as parameter.
The result for "31.08.2014" must be:
{
"_id" : ObjectId("54023e8bcef998273f36041d"),
"shop" : "shop1",
"name" : "product1",
"items" : [
{
"date" : "31.08.2014",
"purchasePrice" : 10,
"sellingPrice" : 1,
"count" : 5
}
]
}
What you are looking for is the positional $ operator and "projection". For a single field you need to match the required array element using "dot notation", for more than one field use $elemMatch:
db.products.find(
{ "items.date": "31.08.2014" },
{ "shop": 1, "name":1, "items.$": 1 }
)
Or the $elemMatch for more than one matching field:
db.products.find(
{ "items": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ "shop": 1, "name":1, "items.$": 1 }
)
These work for a single array element only though and only one will be returned. If you want more than one array element to be returned from your conditions then you need more advanced handling with the aggregation framework.
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$unwind": "$items" },
{ "$match": { "items.date": "31.08.2014" } },
{ "$group": {
"_id": "$_id",
"shop": { "$first": "$shop" },
"name": { "$first": "$name" },
"items": { "$push": "$items" }
}}
])
Or possibly in shorter/faster form since MongoDB 2.6 where your array of items contains unique entries:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$project": {
"shop": 1,
"name": 1,
"items": {
"$setDifference": [
{ "$map": {
"input": "$items",
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el.date", "31.08.2014" ] },
"$$el",
false
]
}
}},
[false]
]
}
}}
])
Or possibly with $redact, but a little contrived:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$redact": {
"$cond": [
{ "$eq": [ { "$ifNull": [ "$date", "31.08.2014" ] }, "31.08.2014" ] },
"$$DESCEND",
"$$PRUNE"
]
}}
])
More modern, you would use $filter:
db.products.aggregate([
{ "$match": { "items.date": "31.08.2014" } },
{ "$addFields": {
"items": {
"input": "$items",
"cond": { "$eq": [ "$$this.date", "31.08.2014" ] }
}
}}
])
And with multiple conditions, the $elemMatch and $and within the $filter:
db.products.aggregate([
{ "$match": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ "$addFields": {
"items": {
"input": "$items",
"cond": {
"$and": [
{ "$eq": [ "$$this.date", "31.08.2014" ] },
{ "$eq": [ "$$this.purchasePrice", 1 ] }
]
}
}
}}
])
So it just depends on whether you always expect a single element to match or multiple elements, and then which approach is better. But where possible the .find() method will generally be faster since it lacks the overhead of the other operations, which in those last to forms does not lag that far behind at all.
As a side note, your "dates" are represented as strings which is not a very good idea going forward. Consider changing these to proper Date object types, which will greatly help you in the future.
Based on Neil Lunn's code I work with this solution, it includes automatically all first level keys (but you could also exclude keys if you want):
db.products.find(
{ "items.date": "31.08.2014" },
{ "shop": 1, "name":1, "items.$": 1 }
{ items: { $elemMatch: { date: "31.08.2014" } } },
)
With multiple requirements:
db.products.find(
{ "items": {
"$elemMatch": { "date": "31.08.2014", "purchasePrice": 1 }
}},
{ items: { $elemMatch: { "date": "31.08.2014", "purchasePrice": 1 } } },
)
Mongo supports dot notation for sub-queries.
See: http://docs.mongodb.org/manual/reference/glossary/#term-dot-notation
Depending on your driver, you want something like:
db.products.find({"items.date":"31.08.2014"});
Note that the attribute is in quotes for dot notation, even if usually your driver doesn't require this.