need to convert the data in another format - mongodb

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.

You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

Related

Group and count in Mongo DB

I have many tweets object like this:
{
"_id" : ObjectId("5a2f4a381cb29b482553e2c9"),
"user_id" : 21898942,
"created_at" : ISODate("2009-03-09T19:48:50Z"),
"id" : 1301923516,
"place" : "",
"retweet_count" : 0,
"tweet" : "Save the Date! March 28th Vietnamese Cooking Class! Call to Reserve 312.255.0088",
"favorite_count" : 0
"type": A
}
I'm using this code to qroup the tweets by date and by type:
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
}
]
results = mongo.db.tweets.aggregate(pipeline)
Here is the result I get:
{
"_id": {
"date": "2009-03-17",
"type": A
},
"count": 4
,
{
"_id": {
"date": "2009-03-17",
"type": B
},
"count": 6
}
But now I want to have the result in this format:
{date: "2009-03-17", A: 4, B: 6, C: 9}
Is there anyway I can achieve this through aggregate directly?
Note: I'm using MongoDB and PyMongo
You can try the below aggregation query in 3.6 version.
Added the second group to create array of type and count value pairs followed by $mergeObjects to merge date key value with $arrayToObject, which produces create a type value key and count value pairs, to generate the expected response.
$replaceRoot to promote the document to the top level.
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.date",
"typeandcount": {
"$push": {
"k": "$_id.type",
"v": "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
"date": "$_id"
},
{
"$arrayToObject": "$typeandcount"
}
]
}
}
}
]
Mongo 3.4 version:
Replace the last stage with below
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{
"k": "date",
"v": "$_id"
}
],
"$typeandcount"
]
}
}
}
}

Aggregate Pipeline groups by day but projects a null date

I'm attempting to group the items in a collection by year/month/day. The grouping should be based on the pubDate and pubTimezoneOffset.
I've got an aggregate pipeline that:
- $project - adds the timezoneOffset to the pubDate
- $group - groups by the modified pubDate
- $project - removes the timezoneOffset
- $sort - sorts by pubDate
I tested each stage on it's own and it seems to be some issue with the second $project. In the final output the pubDate is null.
I've been going over it for a few hours now and can't see where I've gone wrong. What am I missing?
The aggregate pipeline:
db.messages.aggregate([
{
$project: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$add: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
}
},
{
$group: {
_id: {
year: { $year: '$pubDate' },
month: { $month: '$pubDate' },
day: { $dayOfMonth: '$pubDate' }
},
count: { $sum: 1 },
messages: {
$push: {
_id: '$_id',
pubTimezoneOffset: '$pubTimezoneOffset',
pubDate: '$pubDate'
}
}
}
},
{
$project: {
_id: 1,
messages: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$subtract: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
},
count: 1
}
},
{
$sort: {
'_id.year': -1,
'_id.month': -1,
'_id.day': -1
}
}
]).pretty();
To recreate the source data:
db.messages.insertOne({
pubDate: ISODate('2017-10-25T10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-25T11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
Running it in mongo shell outputs:
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 25
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b3"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b4"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 23
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b5"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b6"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}
Kudos for the attempt but, you actually have quite a few things conceptually incorrect here, with the basic error you are seeing is because your premise of "array projection" is incorrect. You are trying to refer to variables "inside the array" by simply notating the "property name".
What you actually need to do here is apply $map in order to apply the functions to "transform" each element:
db.messages.aggregate([
{ "$project": {
"pubTimezoneOffset": 1,
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$pubDate" },
"month": { "$month": "$pubDate" },
"day": { "$dayOfMonth": "$pubDate" }
},
"count": { "$sum": 1 },
"messages": {
"$push": {
"_id": "$_id",
"pubTimezoneOffset": "$pubTimezoneOffset",
"pubDate": "$pubDate"
}
}
}},
{ "$project": {
"messages": {
"$map": {
"input": "$messages",
"as": "m",
"in": {
"_id": "$$m._id",
"pubTimezoneOffset": "$$m.pubTimezoneOffset",
"pubDate": {
"$subtract": [
"$$m.pubDate",
{ "$add": [
{ "$multiply": [ "$$m.pubTimezoneOffset.hours", 60 * 60 * 1000 ] },
{ "$multiply": [ "$$m.pubTimezoneOffset.minutes", 60 * 1000 ] }
]}
]
}
}
}
},
"count": 1
}},
{ "$sort": { "_id": -1 } }
]).pretty();
Noting here that you are doing a lot of unnecessary work in "tranforming" the dates kept in the array, and then trying to "tranform" them back to the original state. Instead, you should have simply supplied a "variable" with $let to the _id of $group and left the original document state "as is" using $$ROOT instead of naming all the fields:
db.messages.aggregate([
{ "$group": {
"_id": {
"$let": {
"vars": {
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
},
"in": {
"year": { "$year": "$$pubDate" },
"month": { "$month": "$$pubDate" },
"day": { "$dayOfMonth": "$$pubDate" }
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Also note that $sort simply does actually consider all the "sub-keys" anyway, so there is no need to name them explicitly.
Back to your error, the point of $map is essentially because whilst you can notate array "field inclusion" with MongoDB 3.2 and above like this:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1
}
The thing you cannot do is actually "calculate values" on the elements themselves. You tried "$pubDate" which actually looks in the "ROOT" space for a property of that name, which does not exist and is null. If you then tried:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1,
"pubDate": "$messages.pubDate"
}
Then you would get "a result", but not the result you might think. Because what would actually be included in "every element" is the value of that property in each array element as a "new array" itself.
So the short and sweet is use $map instead, which iterates the array elements with a local variable referring to the current element for you to notate values for in expressions.
MongoDB 3.6
MongoDB date operators are all timezone aware. So instead of all the juggling then all you need do is supply the additional "timezone" parameter to any option and the conversion will be done for you.
As a sample:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": {
"$concat": [
{ "$cond": {
"if": { "$gt": [ "$pubTimezoneOffset", 0 ] },
"then": "+",
"else": "-"
}},
{ "$let": {
"vars": {
"hours": { "$substr": [{ "$abs": "$pubTimezoneOffset.hours" },0,2] },
"minutes": { "$substr": [{ "$abs": "$pubTimezoneOffset.minutes" },0,2] }
},
"in": {
"$concat": [
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$hours" }, 1 ] },
"then": { "$concat": [ "0", "$$hours" ] },
"else": "$$hours"
}},
":",
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$minutes" }, 1 ] },
"then": { "$concat": [ "0", "$$minutes" ] },
"else": "$$minutes"
}}
]
}
}}
]
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Note that most of the "juggling" in there is to convert your own "offset" to the "string" format required by the new operators. If you simply stored this as "offset": "-07:00" then you can instead simply write:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": "$offset"
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Please Reconsider
I can't let this pass without making a note that your general approach here is conceptually incorrect. Storing "offset" or "local time string" within the database is just intrinsically wrong.
The date information should be stored as UTC and should be returned as UTC. Sure you can and "should" covert when aggregating, but the general premise is that you always convert back to UTC. And "conversion" comes from the "locale of the observer" and not a "stored" adjustment. Because dates are always relative to the "observer" point of view, and are not from the "point of origin" as you seem to have interpreted it.
I put some lengthy detail on this on Group by Date with Local Time Zone in MongoDB about why you store this way and why "locale" conversion from the "observer" is necessary. That also details "Daylight savings considerations" from the observer point of view.
The basic premise there still remains the same when MongoDB becomes "timezone aware" in that you :
Store in UTC
Query with local time converted to UTC
Aggregate converted from the "observer" offset
Convert the "offset" back to UTC
Because at the end of the day it's the "clients" job to supply that "locale" conversion, since that's the part that "knows where it is".

Match the field in mongodb

I have mongodb sample data result like this:
{
"_id" : {
"month" : 3,
"day" : 24,
"year" : 2017
},
"commodity" : [
{
"commodityId" : ObjectId("58d434c30da1364f1e2d682d"),
"commodityStock" : "88889s"
}
],
"totalStock" : 0,
"count" : 1.0 }
my question is, How can i get the result where month = 3 with $match?
below is my query:
db.orders.aggregate(
[
{ $match : {_id.month : 3}},
{
$group : {
_id : { month: { $month: "$createdAt" }, day: { $dayOfMonth: "$createdAt" }, year: { $year: "$createdAt" } },
commodity : {$push : {
'commodityId' : "$commodity",
'commodityStock' : "$stock",
}
},
totalStock: { $sum: "$stock" },
count: { $sum: 1 }
}
}
]
)
You could use a $redact pipeline which incorporates the functionality of $project and $match so that you can filter the documents in the collection by using a logical condition with the $cond operator and uses the special operations $$KEEP to "keep" the document where the logical condition is true or $$PRUNE to "remove" the document where the condition was false.
db.orders.aggregate([
{
"$redact": {
"$cond": [
{ "$eq": [{ "$month": "$createdAt" }, 3]},
"$$KEEP",
"$$PRUNE"
]
}
},
{
"$group": {
"_id": {
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" },
"year": { "$year": "$createdAt" }
},
"commodity": {
"$push": {
"commodityId": "$commodity",
"commodityStock": "$stock",
}
},
"totalStock": { "$sum": "$stock" },
"count": { "$sum": 1 }
}
}
])
Keep in mind that $redact does not use indexes, it performs a collection scan, but if you need to take advantage of indexes use the $project and $match pipeline stages as:
db.orders.aggregate([
{
"$project": {
"createdAt": 1,
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" },
"year": { "$year": "$createdAt" },
"commodity": 1,
"stock": 1
}
},
{ "$match": { "month": 3 } },
{
"$group": {
"_id": { "month": "$month", "day": "$day", "year": "$year" },
"commodity": {
"$push": {
"commodityId": "$commodity",
"commodityStock": "$stock",
}
},
"totalStock": { "$sum": "$stock" },
"count": { "$sum": 1 }
}
}
])

Mongodb aggregation by day based on unix timestamp

I have googled alot, but not found any helpful solution... I want to find total number of daily users.
I have a collection named session_log having documents like following
{
"_id" : ObjectId("52c690955d3cdd831504ce30"),
"SORTID" : NumberLong(1388744853),
"PLAYERID" : 3,
"LASTLOGIN" : NumberLong(1388744461),
"ISLOGIN" : 1,
"LOGOUT" : NumberLong(1388744853)
}
I want to aggregate from LASTLOGIN...
This is my query:
db.session_log.aggregate(
{ $group : {
_id: {
LASTLOGIN : "$LASTLOGIN"
},
count: { $sum: 1 }
}}
);
But it is aggregating by each login time, not by each day. Any help would be appreciated
MongoDB 4.0 and newer
Use $toDate
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$toDate": {
"$multiply": [1000, "$LASTLOGIN"]
}
}
}
},
"count": { "$sum": 1 }
} }
])
or $convert
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$convert": {
"input": {
"$multiply": [1000, "$LASTLOGIN"]
},
"to": "date"
}
}
}
},
"count": { "$sum": 1 }
} }
])
MongoDB >= 3.0 and < 4.0:
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
},
"count": { "$sum": 1 }
} }
])
You would need to convert the LASTLOGIN field to a millisecond timestamp through multiplying the value by 1000
{ "$multiply": [1000, "$LASTLOGIN"] }
, then convert to a date
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
and this can be done in the $project pipeline by adding your milliseconds time to a zero-milliseconds Date(0) object, then extract $year, $month, $dayOfMonth parts from the converted date which you can then use in your $group pipeline to group the documents by the day.
You should thus change your aggregation pipeline to this:
var project = {
"$project":{
"_id": 0,
"y": {
"$year": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"m": {
"$month": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"d": {
"$dayOfMonth": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
}
},
group = {
"$group": {
"_id": {
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : 1 }
}
};
Running the aggregation pipeline:
db.session_log.aggregate([ project, group ])
would give the following results (based on the sample document):
{ "_id" : { "year" : 2014, "month" : 1, "day" : 3 }, "count" : 1 }
An improvement would be to run the above in a single pipeline as
var group = {
"$group": {
"_id": {
"year": {
"$year": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"mmonth": {
"$month": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"day": {
"$dayOfMonth": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
},
"count" : { "$sum" : 1 }
}
};
Running the aggregation pipeline:
db.session_log.aggregate([ group ])
First thing is your date is stored in timestamp so you need to first convert timestamp to ISODate using adding new Date(0) and multiply timestamp to 1000 then you will get the ISODate like this :
{"$add":[new Date(0),{"$multiply":[1000,"$LASTLOGIN"]}]} this convert to timestamp to ISODate.
Now using date aggregation you need to convert ISODate in required format using $concat and then group by final formatting date so aggregation query will be :
db.session_log.aggregate({
$project: {
date: {
$concat: [{
$substr: [{
$year: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}, "/", {
$substr: [{
$month: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}, "/", {
$substr: [{
$dayOfMonth: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}]
}
}
}, {
"$group": {
"_id": "$date",
"count": {
"$sum": 1
}
}
})
If you will used mongo version 3.0 and above then use dateToString operator to convert ISODate to predefined format, and aggregation query is :
db.session_log.aggregate({
"$project": {
"ISODate": {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}
}, {
"$project": {
"yearMonthDay": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$ISODate"
}
}
}
}, {
"$group": {
"_id": "$yearMonthDay",
"count": {
"$sum": 1
}
}
})

how to aggregate in mongoDB

I have a document called user.monthly, in that I have we used store 'day' : no. of clicks .
Here I have given 2 samples for different date
For month January
{
name : "devid",
date : ISODate("2014-01-21T11:32:42.392Z"),
daily: {'1':12,'9':13,'30':13}
}
For month February
{
name : "devid",
date : ISODate("2014-02-21T11:32:42.392Z"),
daily: {'3':12,'12':13,'25':13}
}
How can I aggregate this and get total clicks for January and February ?
Please help me to resolve my problem.
Your current schema is not helping you here as the "daily" field ( which we presume is your clicks per type or something like that ) is represented as a sub-document, which means that you need to explicitly name the path to each field in order to do something with it.
A better approach would be to put this information in an array:
{
"name" : "devid",
"date" : ISODate("2014-02-21T11:32:42.392Z"),
"daily": [
{ "type": "3", "clicks": 12 },
{ "type": "12", "clicks": 13 },
{ "type": "25", "clicks": 13 }
]
}
Then you have an aggregation statement that goes like this:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Unwind the "daily" array
{ "$unwind": "$daily" },
// Group the values together by "type" on "January" and "February"
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$daily.type"
},
"clicks": { "$sum": "$daily.clicks" }
}},
// Sort the result nicely
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
That form is pretty simple. Or even if you do not care about the type as a grouping and just want the month totals:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$unwind": "$daily" },
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": { "$sum": "$daily.clicks" }
}},
{ "$sort": { "_id.year": 1, "_id.month": 1 }}
])
But with the current sub-document form you currently have this becomes ugly:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": {
"$sum": {
"$add": [
{ "$ifNull": ["$daily.1", 0] },
{ "$ifNull": ["$daily.3", 0] },
{ "$ifNull": ["$daily.9", 0] },
{ "$ifNull": ["$daily.12", 0] },
{ "$ifNull": ["$daily.25", 0] },
{ "$ifNull": ["$daily.30", 0] },
]
}
}
}}
])
That shows that you have no other option here other than to specify what is essentially every possible field under daily ( so probably much larger ). Then we have to evaluate as that key may possibly not exist for a given document to return a default value.
For example, your first document has no key "daily.3" so without the $ifNull check the returned value would be null and invalidate the whole $sum process so that the total would be "0".
Grouping on those keys as in the first aggregate example gets even worse:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Project with an array to match all possible values
{ "$project": {
"date": 1,
"daily": 1,
"type": { "$literal": ["1", "3", "9", "12", "25", "30" ] }
}},
// Unwind the "type" array
{ "$unwind": "$type" },
// Project values onto the "type" while grouping
{ "$group" : {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$type"
},
"clicks": { "$sum": { "$cond": [
{ "$eq": [ "$type", "1" ] },
"$daily.1",
{ "$cond": [
{ "$eq": [ "$type", "3" ] },
"$daily.3",
{ "$cond": [
{ "$eq": [ "$type", "9" ] },
"$daily.9",
{ "$cond": [
{ "$eq": [ "$type", "12" ] },
"$daily.12",
{ "$cond": [
{ "$eq": [ "$type", "25" ] },
"$daily.25",
"$daily.30"
]}
]}
]}
]}
]}}
}},
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
Which is creating one big conditional evaluation using $cond to match out the values to the "type" which we projected all possible values in an array using the $literal operator.
If you do not have MongoDB 2.6 or greater you can always do this in place of the $literal operator statement:
"type": { "$cond": [1, ["1", "3", "9", "12", "25", "30" ], 0] }
Where essentially the true evaluation from $cond returns a "literal" declared value, which is how you specify an array. There is also the hidden $const operator that is not documented, but now exposed as $literal.
As you can see the structure here is doing you no favors, so the best option is to change it. But if you cannot and otherwise find the aggregation concept for this too hard to handle, then mapReduce offers an approach, but the processing will be much slower:
db.collection.mapReduce(
function () {
for ( var k in this.daily ) {
emit(
{
year: this.date.getFullYear(),
month: this.date.getMonth() + 1,
type: k
},
this.daily[k]
);
}
},
function(key,values) {
return Array.sum( values );
},
{
"query": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
},
"out": { "inline": 1 }
}
)
The general lesson here is that you will get the cleanest and fastest results by altering the document format and using the aggregation framework. But all the ways to do this are listed here.