MongoDB Aggregation group and count strings - mongodb

I have a problem with counting different LogStatusses from my collection. I'd like the following result from a query:
Month | ImporterName | NrOfError | NrOfDebug | NrOfInfo | NrOfWarning
So this includes grouping by Month and ImporterName and counting the number of documents with the different statusses.
My MongoDB Collection:
{
"_id" : "8ec84cb7-5099-4a9d-be00-a40200a67c5a",
"Messages" : [
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
},
{
"LogStatus" : "Error",
"Message" : "My test message"
}
],
"StartTime" : new Date("2014-12-15T10:06:09.00Z"),
"EndTime" : new Date("2014-12-15T13:06:09.00Z"),
"HasErrors" : true,
"HasWarnings" : false,
"ImporterName" : "MyImporter"
}
I already have the following query's:
db.SessionLogItems.aggregate
([
{
$project:
{
month :{$month : "$StartTime"},
name: "$ImporterName",
status: "$Messages.LogStatus",
_id: 0
}
}
])
result:
month: 12, "name" : "importername", status: ["Error", "Error", "Info"]
and
db.SessionLogItems.aggregate
([
{
$unwind: "$Messages"
},
{
$group: { _id: "$Messages", Number : {$sum : 1 }}
},
{
$sort: {Number : -1 }
}
])
result:
"_id" : { "LogStatus" : "Warning", "Message" : "My test warning" }, "Number" :5
"_id" : { "LogStatus" : "Error", "Message" : "My test message" }, "Number" : 5
But I can't seem to figure out the correct query. Any help is appreciated!
EDIT:
My example above is just one out of many documents. I have several importers which have a startTime and EndTime. The importers have several logmessages and four possible LogStatusses: "Error", "Info", "Debug", "Warning". I'd like to have an overview per month and per importer how many errors, infos, debugs and warnings they produced.

Assuming there is no overlap in your "month" between StartTime and EndTime values then you can simply use the StartTime value as the basis for a grouping key. Most of the magic for your other "fields" comes from the $cond operator which decides whether to count the value or not:
db.SessionLogItems.aggregate([
// Unwind the array to de-normalize the documents contained
{ "$unwind": "$Messages" },
// Month and Importer form the grouping key
{ "$group": {
"_id": {
"month": { "$month": "$StartTime" },
"ImporterName": "$ImporterName"
},
"NrOfError": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Error" ] },
1,
0
]
}
},
"NrOfDebug": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Debug" ] },
1,
0
]
}
},
"NrOfInfo": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Info" ] },
1,
0
]
}
},
"NrOfWarning": {
"$sum": {
"$cond": [
{ "$eq": [ "$Messages.LogStatus", "Warning" ] },
1,
0
]
}
}
}}
])
So basically the "Status" value is tested and where it is matched or not then the appropriate count value is added to the appropriate field.

Related

Change Values to Keys in MongoDB aggregation query

I have analytics documents in MongoDB that look like this:
{
"_id" : ObjectId("id1"),
"userObjectId" : "abc",
"eventType" : "First Signup",
"date" : ISODate("2017-09-10T20:46:42.144Z")
}
{
"_id" : ObjectId("id2"),
"userObjectId" : "abc",
"eventType" : "First Launch",
"date" : ISODate("2017-09-10T20:46:31.291Z")
}
Now I have constructed a query to group the results by date and event type:
{
"collection": "Analytics",
"aggregate": [
{
"$project": {
"yearMonthDay": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$date"
}
},
"date": 1,
"userObjectId": 1,
"_id": 1,
"eventType": 1
}
},
{
"$group": {
"_id": { "ymd": "$yearMonthDay", "event": "$eventType" },
"num_in_group_count": {
"$sum": 1
},
"date": { "$last": "$yearMonthDay" },
"event": { "$last": "$eventType" }
}
}
]
}
This does generate data by date and type like so:
{
"_id" : {
"ymd" : "2017-09-10",
"event" : "First Signup"
},
"num_in_group_count" : 2.0,
"date" : "2017-09-10",
"event" : "First Signup"
}
{
"_id" : {
"ymd" : "2017-09-10",
"event" : "First End Onboarding"
},
"num_in_group_count" : 1.0,
"date" : "2017-09-10",
"event" : "First Launch"
}
However I would like to graph this in Redash, so would really like the data structured with the events changed to keys like so:
{
"_id" : "2017-09-10",
"First Signup" : 2.0,
"First Launch" : 1.0,
"date" : "2017-09-12"
}
How can I achieve this query?
you should group by date first
Then use this query to get result kindly check for null values for First Signup & Login
db.getCollection('heya').aggregate([
{
"$group": {
"_id": "$date",
"First Login": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$eventType",
"First Launch"
]
},
"then": 1,
"else": 0
}
}
},
"First Signup": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$eventType",
"First Signup"
]
},
"then": 1,
"else": 0
}
}
},
"date": {
"$addToSet": "$date"
}
}
},
{
"$unwind": "$date"
}
]);

mongodb aggregation conditional adding field based on value in array

please excuse the title. could find a better description for what iam trying to do.
I have a collection of messages which stores the following information
code: a unique identification code of the message
from: phone number the message was sent from
to: phone number the message was sent to
message: the message text
readings: an array of ObjectIds. The ids reference documents in another collection names "users". if an ObjectId is here it means, that this message has been read by that particular user.
Example Data
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"from" : "+49157xxxxxxx",
"to" : "+49160xxxxxxxx",
"message" : "xxxxxxxx",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae"),
ObjectId("59c25751dcfdaf2944e32fae")
],
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"from" : "+49xxxxxxx",
"to" : "+49xxxxxxxx",
"message" : "xxxxxxx",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"readings" : [
ObjectId("59c25751dcfdaf2944ee2fae")
],
}
Want i want to achieve is that a message gets an additional field "hasRead" if a specific user has read the message.
Here is the result i want to achieve
{
"_id" : ObjectId("59ba30c95869d32a803e4c4d"),
"code" : "SM54c9366e9b8544e89bdcf2ee841adea7",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:33:39.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:33:32.324Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
/* 2 */
{
"_id" : ObjectId("59ba3270f53b7f2fb4fa807f"),
"code" : "SM04585672d02644018e3ff466d73c571d",
"to" : "+491606983534",
"message" : "Schau mer mal",
"createdAt" : ISODate("2017-09-14T07:40:42.000Z"),
"lastModifiedAt" : ISODate("2017-09-14T07:40:34.338Z"),
"status" : "delivered",
"room" : ObjectId("59bfa293bd7717251cecfae7"),
"hasRead" : true
}
I constructed an aggregation with the following stages but it looks so BIG for such a simple task and i wonder if there is a more elegant, lighter way to do so ?
The stages are:
$addFields: Checks if the readings array is 0. if 0 it adds a dummy ObjectId, else it sets the readings array
$unwind: Unwind the readings array
$addFields: adds a field "hasRead" upon checking if a specific ObjectId matches the "readings" field. True if equal else false
$group: Group by all fields except the "hasRead" field, "hasRead" based in $max hasRead
$project: Constructing the result to make it a flat object.
And here is my code:
db.getCollection('sms').aggregate([
{ $addFields: {
"readings": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: "$readings",
else: [ObjectId("000000000000000000000000")]
}
}
}},
{ $unwind: "$readings" },
{ $addFields: {
"hasRead": {
$cond: {
if: {
$eq: ["$readings", ObjectId("59c25751dcfdaf2944ee2fae")]
},
then: true,
else: false
}
}
}
},
{ $group: {
_id: {
_id: "$_id",
code: "$code",
from: "$from",
to: "$to",
message: "$message",
createdAt: "$createdAt",
lastModifiedAt: "$lastModifiedAt",
room: "$room"
},
hasRead: { $max: "$hasRead" }
}},
{ $project: {
"_id": "$_id._id",
"code": "$_id.code",
"from": "$_id.from",
"to": "$_id.to",
"message": "$_id.message",
"createdAt": "$_id.createdAt",
"lastModifiedAt": "$_id.lastModifiedAt",
"room": "$_id.room",
"hasRead": "$hasRead"
}}
])
After browsing thru answers Neil (see comment) gave to another questioni could simplfiy the query to this:
db.getCollection('sms').aggregate([
{ "$addFields": {
"hasRead" : {
"$filter": {
"input": { "$setUnion": [ "$readings", []] },
"as": "o",
"cond" : {
"$eq": [ "$$o",ObjectId("59c25751dcfdaf2944ee2fae")]
}
}
}
}
},
{ "$project": {
"_id": 1,
"code": 1,
"to": 1,
"message": 1,
"createdAt": 1,
"lastModifiedAt" : 1,
"status": 1,
"room": 1,
"hasRead": {
"$cond": {
if: { $or: [ { "$gt": [ {"$size": "$readings"},0] } ]} ,
then: true,
else: false
}
}
}
}
])
Way too late for this, but you can simply write:
db.getCollection("sms").aggregate([
{
$project: {
_id: 1,
code: 1,
to: 1,
message: 1,
createdAt: 1,
lastModifiedAt: 1,
status: 1,
room: 1,
hasRead: {
$in: [ObjectId("59c25751dcfdaf2944ee2fae"), "$readings"],
},
},
},
]);
often the simplest solution is the correct one :)

MongoDB Aggregation, group by subobject keys

I have a mongo collection whose schema looks like this:
_id: ObjectId(),
segments: {
activity: 'value1',
activation: 'value2',
plan: 'value3'
}
I'm trying to use the aggregation framework to find out how many of my documents have the value1 for the segment activity for instance.
The problem is that I want to do that for every segment in the same request if possible, and that I don't know how many segments I'll have or even their name.
Basically here's what I'd like to do:
If I have these two documents:
{ _id: 1, segments: { activity: 'active', activation: 'inactive', plan: 'free' }
{ _id: 2, segments: { activity: 'inactive', activation: 'inactive', plan: 'free' }
I want to be able to see that two of them have the activation segment to inactive and the free plan, and that activity have 1 inactive and 1 active values. Here is what I want to get:
{
activity: {
active: 1,
inactive: 1
},
activation: {
inactive: 2
},
plan: {
free: 2
}
}
So basically, if you could just $group by key it would be great! Something like this:
{
$group: {
_id: { $concat: [ '$segments.$key', '-', '$segments.$key.$value' ],
count: { $sum: 1 }
}
}
Or if I could unwind on each key...
To get the counts, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the subdocuments value, something like the following:
db.collection.aggregate([
{
"$group": {
"_id": "$_id",
"activity_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "active" ] }, 1, 0 ]
}
},
"activity_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"activation_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activation", "active" ] }, 1, 0 ]
}
},
"activation_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"plan_free": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.plan", "free" ] }, 1, 0 ]
}
}
}
},
{
"$project": {
"_id": 0,
"activity": {
"active": "$activity_active",
"inactive": "$activity_inactive"
},
"activation": {
"active": "$activation_active",
"inactive": "$activation_inactive"
},
"plan": {
"free": "$plan_free"
}
}
}
])
there could be a generic solution to this problem, but might need a bit post processing:
to get output similat to this:
{
"_id" : {
"activity" : "active",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}, {
"type" : "paid",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "active"
},
"plan" : [{
"type" : "paid",
"total" : 3
}, {
"type" : "free",
"total" : 6
}
]
}
use query like that:
db.collection.aggregate([{
$group : {
_id : {
activity : "$segments.activity",
activation : "$segments.activation",
plan : "$segments.plan"
},
total : {
$sum : 1
}
}
}, {
$group : {
_id : {
activity : "$_id.activity",
activation : "$_id.activation"
},
plan : {
$push : {
type : "$_id.plan",
total : "$total"
}
}
}
},
])

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}

Order by date in sub-document and then by document

I have a simple "Event" mongo schema. Two sample documents are below :
Event Document #1
{
"event_name": "Some nice event",
"venues": [
{
"venue_name": "venue #1",
"shows": [
{
"show_time": "2014-06-18T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-20T07:46:02.415Z",
"capacity": 40
}
]
},
{
"venue_name": "venue #2",
"shows": [
{
"show_time": "2014-06-17T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-24T07:46:02.415Z",
"capacity": 40
}
]
}
]
}
Event Document #2
{
"event_name": "Another nice event",
"venues": [
{
"venue_name": "venue #1",
"shows": [
{
"show_time": "2014-06-19T07:46:02.415Z",
"capacity": 20
},
{
"show_time": "2014-06-16T07:46:02.415Z",
"capacity": 40
}
]
}
]
}
I need to query this collection of event documents and fetch the events with the closest shows, with respective to a particular time.
So for e.g., if I had to find events happening on or after 16 Jun, I should get document #2 followed by document #1, with the venue sub-document order as [venue #2, venue #1].
On the other hand, if I wanted events happening on or after 18 Jun, I should get document #1, with [venue #1, venue #2], followed by document #2.
Essentially, I need to be able to sort by the start_time of the nested sub-document. And this sorting should work on multiple venue sub-documents.
According to mongo's documentation, this doesn't seem to be supported, so is there a way of using aggregation to achieve this?
Or is there a way to rejig the schema to support such queries?
Or is mongoDB the wrong use-case for such scenarios altogether?
Really good question. Hoping that your dates are real date but the lexical form should not really matter here. The following form should do it, as long as you take the dates into consideration:
db.event.aggregate([
// Match the "documents" that meet the condition
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-16") }
}},
// Unwind the arrays
{ "$unwind": "$venues" },
{ "$unwind": "$venues.shows" },
// Sort the entries just to float the nearest result
{ "$sort": { "venues.shows.show_time": 1 } },
// Find the "earliest" for the venue while grouping
{ "$group": {
"_id": {
"_id": "$_id",
"event_name": "$event_name",
"venue_name": "$venues.venue_name"
},
"shows": {
"$push": "$venues.shows"
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$venues.shows.show_time",
new Date("2014-06-16")
]},
"$venues.shows.show_time",
null
]
}
}
}},
// Sort those because of the order you want
{ "$sort": { "earliest": 1 } },
// Group back and with the "earliest" document
{ "$group": {
"_id": "$_id._id",
"event_name": { "$first": "$_id.event_name" },
"venues": {
"$push": {
"venue_name": "$_id.venue_name",
"shows": "$shows"
}
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$earliest",
new Date("2014-06-16")
]},
"$earliest",
null
]
}
}
}},
// Sort by the earliest document
{ "$sort": { "earliest": 1 } },
// Project the fields
{ "$project": {
"event_name": 1,
"venues": 1
}}
])
So most of this looks reasonable straightforward if you have some experience with the aggregation framework. If not then there is some general explaining, plus there are some "funky" things happening as we evaluate further.
The first steps in aggregation are to $match just like any normal query and then to $unwind the arrays you want to process. The "unwind" statement effectively "de-normalizes" the documents contained in the array to be standard documents by themselves.
The next $sort ends up as a "prettying up" function as the "earliest" event in each "set" will be at the top as a result.
As there are "two" levels of arrays, you do the grouping in two stages via the $group pipeline stage.
The first $group "groups" by "document", "event_name" and "venue". All of the shows are put back into their original array form, but at this time we extract the $min value for the "show_time".
The value taken is not just the ordinary "minimal" value. Here we use the $cond operator to make sure that the value returned must be "greater than or equal to" the date that you were requesting in the query initially. This makes sure that any "earlier" values are not taken into consideration when "sorting".
The next thing to do is to $sort on that "earliest" date, to keep the entries for the "venues" in order. The following stages then do the same as above, but "grouping" back to the original documents this time, then finally "sorting" in the order of which "show_time" would be the "earliest".
The result from the dates shown as input would be your desired result for the 16th:
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-16T07:46:02.415Z"),
"capacity" : 40
},
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-17T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
And by changing the input to the 18th you also get the desired result:
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-17T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-16T07:46:02.415Z"),
"capacity" : 40
},
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}
Also if you want to go further with this, just add an additional $match stage, and that can filter out "events" that occur before the date that is requested in the query:
db.event.aggregate([
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-18") }
}},
{ "$unwind": "$venues" },
{ "$unwind": "$venues.shows" },
{ "$match": {
"venues.shows.show_time": { "$gte": new Date("2014-06-18") }
}},
{ "$sort": { "venues.shows.show_time": 1 } },
{ "$group": {
"_id": {
"_id": "$_id",
"event_name": "$event_name",
"venue_name": "$venues.venue_name"
},
"shows": {
"$push": "$venues.shows"
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$venues.shows.show_time",
new Date("2014-06-18")
]},
"$venues.shows.show_time",
null
]
}
}
}},
{ "$sort": { "earliest": 1 } },
{ "$group": {
"_id": "$_id._id",
"event_name": { "$first": "$_id.event_name" },
"venues": {
"$push": {
"venue_name": "$_id.venue_name",
"shows": "$shows"
}
},
"earliest": {
"$min": {
"$cond": [
{ "$gte": [
"$earliest",
new Date("2014-06-18")
]},
"$earliest",
null
]
}
}
}},
{ "$sort": { "earliest": 1 } },
{ "$project": {
"event_name": 1,
"venues": 1
}}
])
With the result:
{
"_id" : ObjectId("53a952b5a1923f45a6c2d3de"),
"event_name" : "Some nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-18T07:46:02.415Z"),
"capacity" : 20
},
{
"show_time" : ISODate("2014-06-20T07:46:02.415Z"),
"capacity" : 40
}
]
},
{
"venue_name" : "venue #2",
"shows" : [
{
"show_time" : ISODate("2014-06-24T07:46:02.415Z"),
"capacity" : 40
}
]
}
]
}
{
"_id" : ObjectId("53a95263a1923f45a6c2d3dd"),
"event_name" : "Another nice event",
"venues" : [
{
"venue_name" : "venue #1",
"shows" : [
{
"show_time" : ISODate("2014-06-19T07:46:02.415Z"),
"capacity" : 20
}
]
}
]
}