Aggregate by timestamp and Sum by float - mongodb

I have a set of data in mongoDB that I have to sum up grouped by $timestamp. I succeeded in grouping them day by day, but now I need to sum them by another field.
Example data:
[
{
_id: "1442",
timestamp: "1458080642000",
iden: "15",
scores_today: "0.000000",
scores_total: "52337.000000"
}
]
My code
var project = {
"$project":{
"_id" : 0,
"y": {
"$year": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"m": {
"$month": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"d": {
"$dayOfMonth": {
"$add": [
new Date(0), "$timestamp"
]
}
},
"iden" : "$iden",
"totalTd" : "$scores_today"
"total" : "$scores_today_total"
}
},
group = {
"$group": {
"_id": {
"mac" : "$mac",
"year": "$y",
"month": "$m",
"day": "$d"
},
count : { "$sum" : "$total"}
countOther : { "$sum" : "$totalTd" }
}
};
mongoDB.collection('raw').aggregate([ project, group ]).toArray....
I'm not able to sum them. What I need to change?
I need to group them day by day (and this works ) and by iden ( works ) then sum up differents scores.

Related

need to convert the data in another format

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.
You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

Calculate date difference in year, month, day

I have the following query:
db.getCollection('user').aggregate([
{$unwind: "$education"},
{$project: {
duration: {"$divide":[{$subtract: ['$education.to', '$education.from'] }, 1000 * 60 * 60 * 24 * 365]}
}},
{$group: {
_id: '$_id',
"duration": {$sum: '$duration'}
}}]
])
Above query result is:
{
"_id" : ObjectId("59fabb20d7905ef056f55ac1"),
"duration" : 2.34794520547945
}
/* 2 */
{
"_id" : ObjectId("59fab630203f02f035301fc3"),
"duration" : 2.51232876712329
}
But what I want to do is get its duration in year+ month + day format, something like: 2 y, 3 m, 20 d.
One another point, if a course is going on the to field is null, and another field isGoingOn: true, so here I should calculate the duration by using current date instead of to field.
And user has array of course subdocuments
education: [
{
"courseName": "Java",
"from" : ISODate("2010-12-08T00:00:00.000Z"),
"to" : ISODate("2011-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "PHP",
"from" : ISODate("2013-12-08T00:00:00.000Z"),
"to" : ISODate("2015-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "Mysql",
"from" : ISODate("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn": true
}
]
One another point is this: that date may be not continuous in one subdocument to the other subdocument. A user may have a course for 1 year, and then after two years, he/she started his/her next course for 1 year, and 3 months (it means this user has a total of 2 years and 3-month course duration).
What I want is get date difference of each subdocument in educations array, and sum those. Suppose in my sample data Java course duration is 6 month, and 22 days, PHP course duration is 1 year, and 6 months, and 22 days, and the last one is from 8 Feb 2017 till now, and it's going on, so my education duration is the sum of these intervals.
Please try this aggregation to get date difference in days,months and years, added multiple $addFields stage compute and reduce differences to date, month range without underflow, and the assumption here is 1 month = 30 days
pipeline
db.edu.aggregate(
[
{
$addFields : {
trainingPeriod : {
$map : {
input : "$education",
as : "t",
in : {
year: {$subtract: [{$year : {$ifNull : ["$$t.to", new Date()]}}, {$year : "$$t.from"}]},
month: {$subtract: [{$month : {$ifNull : ["$$t.to", new Date()]}}, {$month : "$$t.from"}]},
dayOfMonth: {$subtract: [{$dayOfMonth : {$ifNull : ["$$t.to", new Date()]}}, {$dayOfMonth : "$$t.from"}]}
}
}
}
}
},
{
$addFields : {
trainingPeriod : {
$map : {
input : "$trainingPeriod",
as : "d",
in : {
year: "$$d.year",
month: {$cond : [{$lt : ["$$d.dayOfMonth", 0]}, {$subtract : ["$$d.month", 1]}, "$$d.month" ]},
day: {$cond : [{$lt : ["$$d.dayOfMonth", 0]}, {$add : [30, "$$d.dayOfMonth"]}, "$$d.dayOfMonth" ]}
}
}
}
}
},
{
$addFields : {
trainingPeriod : {
$map : {
input : "$trainingPeriod",
as : "d",
in : {
year: {$cond : [{$lt : ["$$d.month", 0]}, {$subtract : ["$$d.year", 1]}, "$$d.year" ]},
month: {$cond : [{$lt : ["$$d.month", 0]}, {$add : [12, "$$d.month"]}, "$$d.month" ]},
day: "$$d.day"
}
}
}
}
},
{
$addFields : {
total : {
$reduce : {
input : "$trainingPeriod",
initialValue : {year : 0, month : 0, day : 0},
in : {
year: {$add : ["$$this.year", "$$value.year"]},
month: {$add : ["$$this.month", "$$value.month"]},
day: {$add : ["$$this.day", "$$value.day"]}
}
}
}
}
},
{
$addFields : {
total : {
year : "$total.year",
month : {$add : ["$total.month", {$floor : {$divide : ["$total.day", 30]}}]},
day : {$mod : ["$total.day", 30]}
}
}
},
{
$addFields : {
total : {
year : {$add : ["$total.year", {$floor : {$divide : ["$total.month", 12]}}]},
month : {$mod : ["$total.month", 12]},
day : "$total.day"
}
}
}
]
).pretty()
result
{
"_id" : ObjectId("5a895d4721cbd77dfe857f95"),
"education" : [
{
"courseName" : "Java",
"from" : ISODate("2010-12-08T00:00:00Z"),
"to" : ISODate("2011-05-31T00:00:00Z"),
"isGoingOn" : false
},
{
"courseName" : "PHP",
"from" : ISODate("2013-12-08T00:00:00Z"),
"to" : ISODate("2015-05-31T00:00:00Z"),
"isGoingOn" : false
},
{
"courseName" : "Mysql",
"from" : ISODate("2017-02-08T00:00:00Z"),
"to" : null,
"isGoingOn" : true
}
],
"trainingPeriod" : [
{
"year" : 0,
"month" : 5,
"day" : 23
},
{
"year" : 1,
"month" : 5,
"day" : 23
},
{
"year" : 1,
"month" : 0,
"day" : 10
}
],
"total" : {
"year" : 2,
"month" : 11,
"day" : 26
}
}
>
Well you could just simply use the existing date aggregation operators as opposed to using math to convert to "days" as you presently have:
db.getCollection('user').aggregate([
{ "$unwind": "$education" },
{ "$group": {
"_id": "$_id",
"years": {
"$sum": {
"$subtract": [
{ "$subtract": [
{ "$year": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$year": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 1
}}
]
}
},
"months": {
"$sum": {
"$add": [
{ "$subtract": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": ["$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 12
}}
]
}
},
"days": {
"$sum": {
"$add": [
{ "$subtract": [
{ "$dayOfYear": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$dayOfYear": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 365
}}
]
}
}
}},
{ "$project": {
"years": {
"$add": [
"$years",
{ "$add": [
{ "$floor": { "$divide": [ "$months", 12 ] } },
{ "$floor": { "$divide": [ "$days", 365 ] } }
]}
]
},
"months": {
"$mod": [
{ "$add": [
"$months",
{ "$floor": {
"$multiply": [
{ "$divide": [ "$days", 365 ] },
12
]
}}
]},
12
]
},
"days": { "$mod": [ "$days", 365 ] }
}}
])
It is "sort of" an approximation on the "days" and "months" without the necessary operations to be "certain" of leap years, but it would get you the result which should be "near enough" for most purposes.
You can even do this without $unwind as long as your MongoDB version is 3.2 or greater:
db.getCollection('user').aggregate([
{ "$addFields": {
"duration": {
"$let": {
"vars": {
"edu": {
"$map": {
"input": "$education",
"as": "e",
"in": {
"$let": {
"vars": { "toDate": { "$ifNull": ["$$e.to", new Date()] } },
"in": {
"years": {
"$subtract": [
{ "$subtract": [
{ "$year": "$$toDate" },
{ "$year": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 1
}}
]
},
"months": {
"$add": [
{ "$subtract": [
{ "$ifNull": [{ "$month": "$$toDate" }, new Date() ] },
{ "$month": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 12
}}
]
},
"days": {
"$add": [
{ "$subtract": [
{ "$ifNull": [{ "$dayOfYear": "$$toDate" }, new Date() ] },
{ "$dayOfYear": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 365
}}
]
}
}
}
}
}
}
},
"in": {
"$let": {
"vars": {
"years": { "$sum": "$$edu.years" },
"months": { "$sum": "$$edu.months" },
"days": { "$sum": "$$edu.days" }
},
"in": {
"years": {
"$add": [
"$$years",
{ "$add": [
{ "$floor": { "$divide": [ "$$months", 12 ] } },
{ "$floor": { "$divide": [ "$$days", 365 ] } }
]}
]
},
"months": {
"$mod": [
{ "$add": [
"$$months",
{ "$floor": {
"$multiply": [
{ "$divide": [ "$$days", 365 ] },
12
]
}}
]},
12
]
},
"days": { "$mod": [ "$$days", 365 ] }
}
}
}
}
}
}}
])
This is because from MongoDB 3.4 you can use $sum directly with an array of or any list of expressions in stages like $addFields or $project, and the $map can apply those same "date aggregation operator" expressions against each array element in place of doing $unwind first.
So the main math can really be done in one part of "reducing" the array, and then each total can be adjusted by the general "divisors" for the years, and the "modulo" or "remainder" from any overruns in the months and days.
Essentially returns:
{
"_id" : ObjectId("5a07688e98e4471d8aa87940"),
"education" : [
{
"courseName" : "Java",
"from" : ISODate("2010-12-08T00:00:00.000Z"),
"to" : ISODate("2011-05-31T00:00:00.000Z"),
"isGoingOn" : false
},
{
"courseName" : "PHP",
"from" : ISODate("2013-12-08T00:00:00.000Z"),
"to" : ISODate("2015-05-31T00:00:00.000Z"),
"isGoingOn" : false
},
{
"courseName" : "Mysql",
"from" : ISODate("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn" : true
}
],
"duration" : {
"years" : 3.0,
"months" : 3.0,
"days" : 259.0
}
}
Given the 11th of November 2017
You can simplify your code by using client side processing with moment js library.
All the date time math is handled by moment js library. Use duration to calculate the reduced time diff
Use reduce to add the time diff across all the array elements followed by moment duration to output the time in years/months/days.
It solves two issues :
Gives you accurate difference in years month and days between two dates.
Gives you expected format.
For example:
var education = [
{
"courseName": "Java",
"from" : new Date("2010-12-08T00:00:00.000Z"),
"to" : new Date("2011-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "PHP",
"from" : new Date("2013-12-08T00:00:00.000Z"),
"to" : new Date("2015-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "Mysql",
"from" : new Date("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn": true
}
];
var reducedDiff = education.reduce(function(prevVal, elem) {
if(elem.isGoingOn) elem.to = new Date();
var diffDuration = moment(elem.to).diff(moment(elem.from));
return prevVal + diffDuration;
}, 0);
var duration = moment.duration(reducedDiff);
alert(duration.years() +" y, " + duration.months() + " m, " + duration.days() + " d " );
var durationstr = duration.years() +" y, " + duration.months() + " m, " + duration.days() + " d ";
MongoDb integration:
var reducedDiff = db.getCollection('user').find({},{education:1}).reduce(function(...

Aggregate Pipeline groups by day but projects a null date

I'm attempting to group the items in a collection by year/month/day. The grouping should be based on the pubDate and pubTimezoneOffset.
I've got an aggregate pipeline that:
- $project - adds the timezoneOffset to the pubDate
- $group - groups by the modified pubDate
- $project - removes the timezoneOffset
- $sort - sorts by pubDate
I tested each stage on it's own and it seems to be some issue with the second $project. In the final output the pubDate is null.
I've been going over it for a few hours now and can't see where I've gone wrong. What am I missing?
The aggregate pipeline:
db.messages.aggregate([
{
$project: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$add: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
}
},
{
$group: {
_id: {
year: { $year: '$pubDate' },
month: { $month: '$pubDate' },
day: { $dayOfMonth: '$pubDate' }
},
count: { $sum: 1 },
messages: {
$push: {
_id: '$_id',
pubTimezoneOffset: '$pubTimezoneOffset',
pubDate: '$pubDate'
}
}
}
},
{
$project: {
_id: 1,
messages: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$subtract: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
},
count: 1
}
},
{
$sort: {
'_id.year': -1,
'_id.month': -1,
'_id.day': -1
}
}
]).pretty();
To recreate the source data:
db.messages.insertOne({
pubDate: ISODate('2017-10-25T10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-25T11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
Running it in mongo shell outputs:
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 25
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b3"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b4"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 23
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b5"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b6"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}
Kudos for the attempt but, you actually have quite a few things conceptually incorrect here, with the basic error you are seeing is because your premise of "array projection" is incorrect. You are trying to refer to variables "inside the array" by simply notating the "property name".
What you actually need to do here is apply $map in order to apply the functions to "transform" each element:
db.messages.aggregate([
{ "$project": {
"pubTimezoneOffset": 1,
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$pubDate" },
"month": { "$month": "$pubDate" },
"day": { "$dayOfMonth": "$pubDate" }
},
"count": { "$sum": 1 },
"messages": {
"$push": {
"_id": "$_id",
"pubTimezoneOffset": "$pubTimezoneOffset",
"pubDate": "$pubDate"
}
}
}},
{ "$project": {
"messages": {
"$map": {
"input": "$messages",
"as": "m",
"in": {
"_id": "$$m._id",
"pubTimezoneOffset": "$$m.pubTimezoneOffset",
"pubDate": {
"$subtract": [
"$$m.pubDate",
{ "$add": [
{ "$multiply": [ "$$m.pubTimezoneOffset.hours", 60 * 60 * 1000 ] },
{ "$multiply": [ "$$m.pubTimezoneOffset.minutes", 60 * 1000 ] }
]}
]
}
}
}
},
"count": 1
}},
{ "$sort": { "_id": -1 } }
]).pretty();
Noting here that you are doing a lot of unnecessary work in "tranforming" the dates kept in the array, and then trying to "tranform" them back to the original state. Instead, you should have simply supplied a "variable" with $let to the _id of $group and left the original document state "as is" using $$ROOT instead of naming all the fields:
db.messages.aggregate([
{ "$group": {
"_id": {
"$let": {
"vars": {
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
},
"in": {
"year": { "$year": "$$pubDate" },
"month": { "$month": "$$pubDate" },
"day": { "$dayOfMonth": "$$pubDate" }
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Also note that $sort simply does actually consider all the "sub-keys" anyway, so there is no need to name them explicitly.
Back to your error, the point of $map is essentially because whilst you can notate array "field inclusion" with MongoDB 3.2 and above like this:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1
}
The thing you cannot do is actually "calculate values" on the elements themselves. You tried "$pubDate" which actually looks in the "ROOT" space for a property of that name, which does not exist and is null. If you then tried:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1,
"pubDate": "$messages.pubDate"
}
Then you would get "a result", but not the result you might think. Because what would actually be included in "every element" is the value of that property in each array element as a "new array" itself.
So the short and sweet is use $map instead, which iterates the array elements with a local variable referring to the current element for you to notate values for in expressions.
MongoDB 3.6
MongoDB date operators are all timezone aware. So instead of all the juggling then all you need do is supply the additional "timezone" parameter to any option and the conversion will be done for you.
As a sample:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": {
"$concat": [
{ "$cond": {
"if": { "$gt": [ "$pubTimezoneOffset", 0 ] },
"then": "+",
"else": "-"
}},
{ "$let": {
"vars": {
"hours": { "$substr": [{ "$abs": "$pubTimezoneOffset.hours" },0,2] },
"minutes": { "$substr": [{ "$abs": "$pubTimezoneOffset.minutes" },0,2] }
},
"in": {
"$concat": [
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$hours" }, 1 ] },
"then": { "$concat": [ "0", "$$hours" ] },
"else": "$$hours"
}},
":",
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$minutes" }, 1 ] },
"then": { "$concat": [ "0", "$$minutes" ] },
"else": "$$minutes"
}}
]
}
}}
]
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Note that most of the "juggling" in there is to convert your own "offset" to the "string" format required by the new operators. If you simply stored this as "offset": "-07:00" then you can instead simply write:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": "$offset"
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Please Reconsider
I can't let this pass without making a note that your general approach here is conceptually incorrect. Storing "offset" or "local time string" within the database is just intrinsically wrong.
The date information should be stored as UTC and should be returned as UTC. Sure you can and "should" covert when aggregating, but the general premise is that you always convert back to UTC. And "conversion" comes from the "locale of the observer" and not a "stored" adjustment. Because dates are always relative to the "observer" point of view, and are not from the "point of origin" as you seem to have interpreted it.
I put some lengthy detail on this on Group by Date with Local Time Zone in MongoDB about why you store this way and why "locale" conversion from the "observer" is necessary. That also details "Daylight savings considerations" from the observer point of view.
The basic premise there still remains the same when MongoDB becomes "timezone aware" in that you :
Store in UTC
Query with local time converted to UTC
Aggregate converted from the "observer" offset
Convert the "offset" back to UTC
Because at the end of the day it's the "clients" job to supply that "locale" conversion, since that's the part that "knows where it is".

Group and count over a start and end range

If I have data in the following format:
[
{
_id: 1,
startDate: ISODate("2017-01-1T00:00:00.000Z"),
endDate: ISODate("2017-02-25T00:00:00.000Z"),
type: 'CAR'
},
{
_id: 2,
startDate: ISODate("2017-02-17T00:00:00.000Z"),
endDate: ISODate("2017-03-22T00:00:00.000Z"),
type: 'HGV'
}
]
Is it possible to retrieve data grouped by 'type', but also with a count of the type for each of month in a given date range e.g. between 2017/1/1 to 2017/4/1 would return:
[
{
_id: 'CAR',
monthCounts: [
/*January*/
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 1
},
/*February*/
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
/*March*/
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 0
},
]
},
{
_id: 'HGV',
monthCounts: [
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 0
},
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 1
},
]
}
]
The returned format is not really important, but what I am trying to achieve is in a single query to retrieve a number of counts for the same grouping (one per month). The input could be simply a start and end date to report from or more likely it could be an array of the date ranges to group by.
The algorithm for this is to basically "iterate" values between the interval of the two values. MongoDB has a couple of ways to deal with this, being what has always been present with mapReduce() and with new features available to the aggregate() method.
I'm going expand on your selection to deliberately show an overlapping month since your examples did not have one. This will result in the "HGV" values appearing in "three" months of output.
{
"_id" : 1,
"startDate" : ISODate("2017-01-01T00:00:00Z"),
"endDate" : ISODate("2017-02-25T00:00:00Z"),
"type" : "CAR"
}
{
"_id" : 2,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-03-22T00:00:00Z"),
"type" : "HGV"
}
{
"_id" : 3,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-04-22T00:00:00Z"),
"type" : "HGV"
}
Aggregate - Requires MongoDB 3.4
db.cars.aggregate([
{ "$addFields": {
"range": {
"$reduce": {
"input": { "$map": {
"input": { "$range": [
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$startDate", new Date(0) ] },
1000
]
}},
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$endDate", new Date(0) ] },
1000
]
}},
60 * 60 * 24
]},
"as": "el",
"in": {
"$let": {
"vars": {
"date": {
"$add": [
{ "$multiply": [ "$$el", 1000 ] },
new Date(0)
]
},
"month": {
}
},
"in": {
"$add": [
{ "$multiply": [ { "$year": "$$date" }, 100 ] },
{ "$month": "$$date" }
]
}
}
}
}},
"initialValue": [],
"in": {
"$cond": {
"if": { "$in": [ "$$this", "$$value" ] },
"then": "$$value",
"else": { "$concatArrays": [ "$$value", ["$$this"] ] }
}
}
}
}
}},
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
The key to making this work is the $range operator which takes values for a "start" and and "end" as well as an "interval" to apply. The result is an array of values taken from the "start" and incremented until the "end" is reached.
We use this with startDate and endDate to generate the possible dates in between those values. You will note that we need to do some math here since the $range only takes a 32-bit integer, but we can take the milliseconds away from the timestamp values so that is okay.
Because we want "months", the operations applied extract the month and year values from the generated range. We actually generate the range as the "days" in between since "months" are difficult to deal with in math. The subsequent $reduce operation takes only the "distinct months" from the date range.
The result therefore of the first aggregation pipeline stage is a new field in the document which is an "array" of all the distinct months covered between startDate and endDate. This gives an "iterator" for the rest of the operation.
By "iterator" I mean than when we apply $unwind we get a copy of the original document for every distinct month covered in the interval. This then allows the following two $group stages to first apply a grouping to the common key of "month" and "type" in order to "total" the counts via $sum, and next $group makes the key just the "type" and puts the results in an array via $push.
This gives the result on the above data:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
}
]
}
Note that the coverage of "months" is only present where there is actual data. Whilst possible to produce zero values over a range, it requires quite a bit of wrangling to do so and is not very practical. If you want zero values then it is better to add that in post processing in the client once the results have been retrieved.
If you really have your heart set on the zero values, then you should separately query for $min and $max values, and pass these in to "brute force" the pipeline into generating the copies for each supplied possible range value.
So this time the "range" is made externally to all documents, and you then use a $cond statement into the accumulator to see if the current data is within the grouped range produced. Also since the generation is "external", we really don't need the MongoDB 3.4 operator of $range, so this can be applied to earlier versions as well:
// Get min and max separately
var ranges = db.cars.aggregate(
{ "$group": {
"_id": null,
"startRange": { "$min": "$startDate" },
"endRange": { "$max": "$endDate" }
}}
).toArray()[0]
// Make the range array externally from all possible values
var range = [];
for ( var d = new Date(ranges.startRange.valueOf()); d <= ranges.endRange; d.setUTCMonth(d.getUTCMonth()+1)) {
var v = ( d.getUTCFullYear() * 100 ) + d.getUTCMonth()+1;
range.push(v);
}
// Run conditional aggregation
db.cars.aggregate([
{ "$addFields": { "range": range } },
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": {
"$sum": {
"$cond": {
"if": {
"$and": [
{ "$gte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$startDate" }, 100 ] },
{ "$month": "$startDate" }
]}
]},
{ "$lte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$endDate" }, 100 ] },
{ "$month": "$endDate" }
]}
]}
]
},
"then": 1,
"else": 0
}
}
}
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
Which produces the consistent zero fills for all possible months on all groupings:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201701,
"count" : 0
},
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
},
{
"month" : 201703,
"count" : 0
},
{
"month" : 201704,
"count" : 0
}
]
}
MapReduce
All versions of MongoDB support mapReduce, and the simple case of the "iterator" as mentioned above is handled by a for loop in the mapper. We can get output as generated up to the first $group from above by simply doing:
db.cars.mapReduce(
function () {
for ( var d = this.startDate; d <= this.endDate;
d.setUTCMonth(d.getUTCMonth()+1) )
{
var m = new Date(0);
m.setUTCFullYear(d.getUTCFullYear());
m.setUTCMonth(d.getUTCMonth());
emit({ id: this.type, date: m},1);
}
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Which produces:
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-01-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-03-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-04-01T00:00:00Z")
},
"value" : 1
}
So it does not have the second grouping to compound to arrays, but we did produce the same basic aggregated output.

Mongodb aggregation by day based on unix timestamp

I have googled alot, but not found any helpful solution... I want to find total number of daily users.
I have a collection named session_log having documents like following
{
"_id" : ObjectId("52c690955d3cdd831504ce30"),
"SORTID" : NumberLong(1388744853),
"PLAYERID" : 3,
"LASTLOGIN" : NumberLong(1388744461),
"ISLOGIN" : 1,
"LOGOUT" : NumberLong(1388744853)
}
I want to aggregate from LASTLOGIN...
This is my query:
db.session_log.aggregate(
{ $group : {
_id: {
LASTLOGIN : "$LASTLOGIN"
},
count: { $sum: 1 }
}}
);
But it is aggregating by each login time, not by each day. Any help would be appreciated
MongoDB 4.0 and newer
Use $toDate
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$toDate": {
"$multiply": [1000, "$LASTLOGIN"]
}
}
}
},
"count": { "$sum": 1 }
} }
])
or $convert
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$convert": {
"input": {
"$multiply": [1000, "$LASTLOGIN"]
},
"to": "date"
}
}
}
},
"count": { "$sum": 1 }
} }
])
MongoDB >= 3.0 and < 4.0:
db.session_log.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
},
"count": { "$sum": 1 }
} }
])
You would need to convert the LASTLOGIN field to a millisecond timestamp through multiplying the value by 1000
{ "$multiply": [1000, "$LASTLOGIN"] }
, then convert to a date
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
and this can be done in the $project pipeline by adding your milliseconds time to a zero-milliseconds Date(0) object, then extract $year, $month, $dayOfMonth parts from the converted date which you can then use in your $group pipeline to group the documents by the day.
You should thus change your aggregation pipeline to this:
var project = {
"$project":{
"_id": 0,
"y": {
"$year": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"m": {
"$month": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"d": {
"$dayOfMonth": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
}
},
group = {
"$group": {
"_id": {
"year": "$y",
"month": "$m",
"day": "$d"
},
"count" : { "$sum" : 1 }
}
};
Running the aggregation pipeline:
db.session_log.aggregate([ project, group ])
would give the following results (based on the sample document):
{ "_id" : { "year" : 2014, "month" : 1, "day" : 3 }, "count" : 1 }
An improvement would be to run the above in a single pipeline as
var group = {
"$group": {
"_id": {
"year": {
"$year": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"mmonth": {
"$month": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
},
"day": {
"$dayOfMonth": {
"$add": [
new Date(0),
{ "$multiply": [1000, "$LASTLOGIN"] }
]
}
}
},
"count" : { "$sum" : 1 }
}
};
Running the aggregation pipeline:
db.session_log.aggregate([ group ])
First thing is your date is stored in timestamp so you need to first convert timestamp to ISODate using adding new Date(0) and multiply timestamp to 1000 then you will get the ISODate like this :
{"$add":[new Date(0),{"$multiply":[1000,"$LASTLOGIN"]}]} this convert to timestamp to ISODate.
Now using date aggregation you need to convert ISODate in required format using $concat and then group by final formatting date so aggregation query will be :
db.session_log.aggregate({
$project: {
date: {
$concat: [{
$substr: [{
$year: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}, "/", {
$substr: [{
$month: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}, "/", {
$substr: [{
$dayOfMonth: {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}, 0, 4]
}]
}
}
}, {
"$group": {
"_id": "$date",
"count": {
"$sum": 1
}
}
})
If you will used mongo version 3.0 and above then use dateToString operator to convert ISODate to predefined format, and aggregation query is :
db.session_log.aggregate({
"$project": {
"ISODate": {
"$add": [new Date(0), {
"$multiply": [1000, "$LASTLOGIN"]
}]
}
}
}, {
"$project": {
"yearMonthDay": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$ISODate"
}
}
}
}, {
"$group": {
"_id": "$yearMonthDay",
"count": {
"$sum": 1
}
}
})