MongoDB - aggregate by date, right-aligned boundaries - mongodb

I have some data in MongoDB that contains 10-minutely period-to-date sums:
db.test.insert({perEnd: ISODate('2013-06-05T18:00:00'), val: 7.3})
db.test.insert({perEnd: ISODate('2013-06-05T18:10:00'), val: 6.23})
db.test.insert({perEnd: ISODate('2013-06-05T18:20:00'), val: 4.1})
db.test.insert({perEnd: ISODate('2013-06-05T18:30:00'), val: 0.21})
db.test.insert({perEnd: ISODate('2013-06-05T18:40:00'), val: 12.1})
db.test.insert({perEnd: ISODate('2013-06-05T18:50:00'), val: 6.0})
db.test.insert({perEnd: ISODate('2013-06-05T19:00:00'), val: 8.9})
db.test.insert({perEnd: ISODate('2013-06-05T19:10:00'), val: .98})
db.test.insert({perEnd: ISODate('2013-06-05T19:20:00'), val: 14.7})
I would like to aggregate to find sums for each hour-ending period, so I should get the following values:
ending 2013-06-05 18:00:00 - 7.3
ending 2013-06-05 19:00:00 - 37.54
ending 2013-06-05 20:00:00 - 15.68
Using the built-in date operators doesn't work, because they round (truncate) all dates down to the nearest boundary, and I need to round up:
> db.test.aggregate({$group: {_id: {Year: {$year: "$perEnd"},
Day: {$dayOfYear: "$perEnd"},
Hour: {$hour: "$perEnd"}},
sum: {$sum: "$val"}}})
{
"result" : [
{ "_id" : { "Year" : 2013,
"Day" : 156,
"Hour" : 19 },
"sum" : 24.58 },
{ "_id" : { "Year" : 2013,
"Day" : 156,
"Hour" : 18 },
"sum" : 35.940000000000005 }
],
"ok" : 1
}
Anyone see a way to achieve this with decent performance?

You can do it using mongodb map-reduce:
var map = function(){
var date = new Date(this.perEnd.getTime());
if(date.getMinutes() > 0){
date.setHours(date.getHours() + 1, 0, 0, 0);
} else {
date.setHours(date.getHours(), 0, 0, 0);
}
emit(date, this.val);
};
var reduce = function(key, values){
return Array.sum(values)
};
db.collection.mapReduce(map, reduce, {out : {inline : 1}}, callback);
For your data I got the following result:
[ { _id: Wed Jun 05 2013 21:00:00 GMT+0300 (EEST), value: 7.3 },
{ _id: Wed Jun 05 2013 22:00:00 GMT+0300 (EEST), value: 37.54 },
{ _id: Wed Jun 05 2013 23:00:00 GMT+0300 (EEST), value: 15.68 } ]

Related

Mongodb $gte date query not working as expected

I have collection with Below data. While I am using
db.collection.find({ endDate: { $gte: new Date() } })
it's not showing result of current date which is ISODate("2018-07-06T14:59:08.794+0000").
{
"_id" : "GMDJcQMfs8j8EP9EE",
"endDate" : ISODate("2018-07-06T14:59:08.794+0000")
}
{
"_id" : "GMDJcQMfs12233",
"endDate" : ISODate("2020-02-21T00:00:00.000+0000")
}
{
"_id" : "GMDJerrr8j8EP9EE",
"endDate" : ISODate("2020-02-21T00:00:00.000+0000")
}
{
"_id" : "rrrJcQMfs8j8EP9EE",
"endDate" : ISODate("2020-02-21T00:00:00.000+0000")
}
You have to consider the time of the day. You can set the hours, minutes, seconds and milliseconds of the requested date back to zero.
var date = new Date()
date.setHours(0, 0, 0, 0)
db.collection.find({ endDate: { $gte: date } })
Now all records ending today and later will be returned.
The condition you have quoted is $gte and the date you are passing for to the condition is newDate().
print(new Date()) - Execute this command in Mongo shell it Should be giving you the current date
My Console output
Fri Jul 06 2018 14:28:54 GMT+0530 (India Standard Time)
If your date is also Jul 06, 2018 then you also wont get the Jul 05, 2018 in your results
Ok, i get back on my answer to your previous question and explain it :
db.test1.aggregate(
[
{
$project: {
endDate:1,
endDateFormatted:{$dateToString: {date:"$endDate",format:"%Y-%m-%d"}},
current:{$dateToString: {date:new Date(),format:"%Y-%m-%d"}}
}
},
{
$project: { ab: {$cmp: ['$endDateFormatted','$current']},endDate:1}
},
{
$match: {ab:{$eq:1}} // <= note de difference with your previous question
},
{
$project: {
endDate:1
}
},
]
);
Explanations :
$dateToString transforms provided ISOdate in provided format, ie
{$dateToString: {date:ISODate("2020-02-21T00:00:00.000+0000"),format:"%Y-%m-%d"}} outputs "2020-02-01"
$cmp compare first value A with second B, with following result :
A -1
A=B => 0
A>B => 1
just adapt ab criteria to what you need ({$eq:1} for future dates, {$eq:-1} for past dates, {$eq:0} for current date, {$ne:0} for not today dates, etc... )

Mongodb $count aggregation pipeline not found [duplicate]

This question already has answers here:
MongoDB SELECT COUNT GROUP BY
(9 answers)
Closed 5 years ago.
I'm trying to build an aggregation pipeline in Mongo to count the number of documents generated every 10 minutes in a fairly large dataset. Each document contains an ISODate in a field called requestDtsCal. I'm trying the following code (thanks to https://stackoverflow.com/users/3943271/wizard for the base code):
var baseDate = new Date(2017, 01, 11, 00, 00, 0);
var startDate = new Date(2017, 01, 11, 00, 00, 0);
var endDate = new Date(2018, 09, 20, 14, 25, 0);
var divisor = 10 * 60 * 1000; // 10 minutes in miliseconds
db.AUDIT.aggregate([
{
$match : {
requestDtsCal : {
$gte : startDate,
$lt : endDate
}
}
}, {
$group : {
_id : {
$subtract : [ "$requestDtsCal", {
$mod : [ {
$subtract : [ "$requestDtsCal", baseDate ]
}, divisor ]
} ]
},
dates : {
$push : "$requestDtsCal"
}
}
}, {
$count: "$requestDtsCal"
}
]).pretty();
If I run it without the last pipeline stage it returns an array of arrays of all the dates from each document within each range. As soon as I try and count the number of documents in each range with the last pipeline stage it fails with:
assert: command failed: {
"ok" : 0,
"errmsg" : "Unrecognized pipeline stage name: '$count'",
"code" : 16436
} : aggregate failed
_getErrorWithCode#src/mongo/shell/utils.js:25:13
doassert#src/mongo/shell/assert.js:16:14
assert.commandWorked#src/mongo/shell/assert.js:403:5
DB.prototype._runAggregate#src/mongo/shell/db.js:260:9
DBCollection.prototype.aggregate#src/mongo/shell/collection.js:1212:12
#(shell):1:1
2018-01-18T19:54:40.669-0800 E QUERY [thread1] Error: command failed: {
"ok" : 0,
"errmsg" : "Unrecognized pipeline stage name: '$count'",
"code" : 16436
} : aggregate failed :
_getErrorWithCode#src/mongo/shell/utils.js:25:13
doassert#src/mongo/shell/assert.js:16:14
assert.commandWorked#src/mongo/shell/assert.js:403:5
DB.prototype._runAggregate#src/mongo/shell/db.js:260:9
DBCollection.prototype.aggregate#src/mongo/shell/collection.js:1212:12
#(shell):1:1
Any ideas what I'm doing wrong? This is running against Mongo 3.2.11 FWIW.
Thanks,
Ian
If version is 3.2 then you can use $sum in $group pipeline to get count
instead of
{ $count: "$requestDtsCal" }
use
{$group : {_id : null, count : {$sum : 1}}} // _id your ids
I figured out an easy way to do this. I wasn't understanding groups properly.
var baseDate = new Date(2017, 01, 11, 00, 00, 0);
var startDate = new Date(2017, 01, 11, 00, 00, 0);
var endDate = new Date(2018, 09, 20, 14, 25, 0);
var divisor = 10 * 60 * 1000; // 10 minutes in miliseconds
db.AUDIT.aggregate([
{
$match : {
requestDtsCal : {
$gte : startDate,
$lt : endDate
}
}
}, {
$group : {
_id : {
$subtract : [ "$requestDtsCal", {
$mod : [ {
$subtract : [ "$requestDtsCal", baseDate ]
}, divisor ]
} ]
},
count: {$sum: 1}
}
}
]).pretty();
Works properly.
Ian

Aggregate trunc date function?

I worked with a bunch of SQL databases before; like Postgres and BigQuery and they have date truncation function (for instance: date_trunc or TIMESTAMP_TRUNC ).
I wonder if mongodb has a DATE_TRUNC function?
I have found the $trunc operator but it works for numbers only.
I want a DATE_TRUNC function to truncate a given Date (the timestamp type in other SQL databases) to a particular boundary, like beginning of year, beginning of month, beginning of hour, may be ok to compose a new Date by getting its year, month, date, hour.
Does someone have some kinds of workaround? Especially for beginning moment of WEEK, and beginning of ISOWEEK, does anyone have a good workaround?
Its possible to get the start of ISO week by doing arithmetic on date or timestamp field, here the start of week is Monday (1) and end of week is Sunday (7)
db.dd.aggregate(
[
{
$addFields : {
startOfWeek : 1, // Monday
currentDayOfWeek : {$dayOfWeek : "$date"},
daysToMinus : { $subtract : [{$dayOfWeek : "$date"} , 1] },
startOfThisWeek : { $subtract : [ "$date", {$multiply : [{ $subtract : [{$dayOfWeek : "$date"} , 1 ] }, 24, 60, 60, 1000 ] } ] }
}
}
]
).pretty()
document
> db.dd.find()
{ "_id" : ObjectId("5a62e2697702c6be61d672f4"), "date" : ISODate("2018-01-20T06:32:09.157Z") }
start of week
{
"_id" : ObjectId("5a62e2697702c6be61d672f4"),
"date" : ISODate("2018-01-20T06:32:09.157Z"),
"startOfWeek" : 1,
"currentDayOfWeek" : 7,
"daysToMinus" : 6,
"startOfThisWeek" : ISODate("2018-01-14T06:32:09.157Z")
}
>
It's possible to truncate date to iso week with $dateFromParts function:
For example
db.dd.aggregate(
{
$dateFromParts: {
isoWeekYear: { $isoWeekYear: "$date" },
isoWeek: { $isoWeek: "$date" }
}
}
)
For Fri, 22 Jun 2018 20:46:50 UTC +00:00 it returns Fri, 18 Jun 2018 00:00:00 UTC +00:00.
To truncate to hour, day, month, etc. it's easier to use $dateFromString and $dateToString. The following example truncated date to hour:
db.dd.aggregate(
{
$dateFromString: {
dateString: {
$dateToString: {
format: '%Y-%m-%dT%H:00:00+00:00',
date: '$date'
}
}
}
}
)
Can be combined $dateToParts and $dateFromParts
For year, month, day, hour, minute:
db.getCollection("data").aggregate([
{"$addFields": {
"dateVarFull": {"$dateToParts": {date: {"$toDate" : "2020-08-27T13:00:00Z"}}}
}},
{"$addFields": {
"dateVarTrunc": { "$dateFromParts": {
'year': "$dateVarFull.year",
'month': "$dateVarFull.month",
'day': "$dateVarFull.day"
}}
}}
])
Result:
{
"dateVarFull" : {
"year" : NumberInt(2020),
"month" : NumberInt(8),
"day" : NumberInt(27),
"hour" : NumberInt(13),
"minute" : NumberInt(0),
"second" : NumberInt(0),
"millisecond" : NumberInt(0)
},
"dateVarTrunc" : ISODate("2020-08-27T00:00:00.000+0000")
}
For week trunc use iso8601: true parameter:
db.getCollection("data").aggregate([
{"$addFields": {
"dateVarFull": {
"$dateToParts": {
date: {"$toDate" : "2020-08-27T13:00:00Z"},
iso8601: true
}
}
}},
{"$addFields": {
"dateVarTrunc": { "$dateFromParts": {
'isoWeekYear': "$dateVarFull.isoWeekYear",
'isoWeek': "$dateVarFull.isoWeek",
'isoDayOfWeek': 1
}}
}}
])
Result:
{
"dateVarFull" : {
"isoWeekYear" : NumberInt(2020),
"isoWeek" : NumberInt(35),
"isoDayOfWeek" : NumberInt(4),
"hour" : NumberInt(13),
"minute" : NumberInt(0),
"second" : NumberInt(0),
"millisecond" : NumberInt(0)
},
"dateVarTrunc" : ISODate("2020-08-24T00:00:00.000+0000")
}
Starting in Mongo 5, your wish has been granted with the $dateTrunc operator.
For instance, to truncate dates to their year:
// { date: ISODate("2021-12-05T13:20:56Z") }
// { date: ISODate("2019-04-27T05:00:32Z") }
db.collection.aggregate([
{ $project: { year: { $dateTrunc: { date: "$date", unit: "year" } } } }
])
// { year: ISODate("2021-01-01T00:00:00Z") }
// { year: ISODate("2019-01-01T00:00:00Z") }
You can truncate at different levels of units (year, months, day, hours, ... even quarters) using the unit parameter. And for a given unit at different multiples of units (for instance 3 years, 6 months, ...) using the binSize parameter.
And you can also specify the day at which weeks start:
// { date: ISODate("2021-12-05T13:20:56Z") } <= Sunday
// { date: ISODate("2021-12-06T05:00:32Z") } <= Monday
db.collection.aggregate([
{ $project: {
week: { $dateTrunc: { date: "$date", unit: "week", startOfWeek: "monday" } }
}}
])
// { week: ISODate("2021-11-29T00:00:00Z") }
// { week: ISODate("2021-12-06T00:00:00Z") }

MongoDB calculate count on date grouping

I want to calculate the allotment count of each day, by using day grouping on assigned_on I am able to get assignment count of each day, but those are unique count of each day.
In that count I also want that bed to be included which was assigned yesterday or few days before but not yet released.
For example, I have following records
{
"assigned_on":ISODate("2015-12-01T00:00:00Z"),
"released_on":ISODate("2015-12-01T14:01:23Z"),
"bed_id":1
},
{
"assigned_on":ISODate("2015-12-01T00:00:00Z"),
"released_on":ISODate("2015-12-04T14:01:23Z"),
"bed_id":2
},
{
"assigned_on":ISODate("2015-12-01T00:00:00Z"),
"released_on":ISODate("2015-12-01T14:01:23Z"),
"bed_id":3
},
{
"assigned_on":ISODate("2015-12-02T00:00:00Z"),
"released_on":ISODate("2015-12-02T14:01:23Z"),
"bed_id":1
},
{
"assigned_on":ISODate("2015-12-02T00:00:00Z"),
"released_on":ISODate("2015-12-02T14:01:23Z"),
"bed_id":3
},
{
"assigned_on":ISODate("2015-12-03T00:00:00Z"),
"released_on":ISODate("2015-12-03T14:01:23Z"),
"bed_id":1
},
{
"assigned_on":ISODate("2015-12-03T00:00:00Z"),
"released_on":ISODate("2015-12-03T14:01:23Z"),
"bed_id":3
}
Current query
db.test.aggregate([
{
"$match": {
"assigned_on": {
"$gte": ISODate("2015-12-01T00:00:00Z"),
"$lt": ISODate("2015-12-03T23:59:59Z")
}
}
},
{
"$group": {
"_id": {
"$dayOfMonth": "$assigned_on"
},
"Count": {
"$sum": 1
}
}
}
])
As by day grouping on assigned_on I get above result for day 1, 2 and 3, but I want to the Count for day 1, 2 and 3 as 3 for each in result because in second record the released_on date is 4th december which means that bed 2 was occupied on day 1, 2, 3 and 4.
Current output :
{ "_id" : 3, "Count" : 2 }
{ "_id" : 2, "Count" : 2 }
{ "_id" : 1, "Count" : 3 }
Expected output :
{ "_id" : 3, "Count" : 3 }
{ "_id" : 2, "Count" : 3 }
{ "_id" : 1, "Count" : 3 }
Edit : The _id are the dates that is 1st December, 2nd December and 3rd December and count are the number of beds allotted on respective days
An help or pointer will be very helpful
You can do it with the mongodb mapReduce:
var map = function(){
var startDate = new Date(this.assigned_on.getTime());
//set time to midnight
startDate.setHours(0,0,0,0);
//foreach date in date range [assigned_on, released_on) emit date with value(count) 1
for (var date = startDate; date < this.released_on; date.setDate(date.getDate() + 1)) {
if(this.bed_id) {
emit(date, 1);
}
}
};
//calculate total count foreach emitted date(key)
var reduce = function(key, values){
return Array.sum(values)
};
db.collection.mapReduce(map, reduce, {out : {inline : 1}}, callback);
For your data I got such result:
[ { _id: Tue Dec 01 2015 02:00:00 GMT+0200 (EET), value: 3 },
{ _id: Wed Dec 02 2015 02:00:00 GMT+0200 (EET), value: 3 },
{ _id: Thu Dec 03 2015 02:00:00 GMT+0200 (EET), value: 3 },
{ _id: Fri Dec 04 2015 02:00:00 GMT+0200 (EET), value: 1 } ]

Return objects from mongo inserted at similar time

I'm trying to group together documents that have been inserted into a collection by a user with a similar timeframe. I'm looking for documentation or a snippet to push me in the right direction on this pseudo code;
User submits a number of documents (over the course of five minutes) into a collection.
Documents are grouped together with similar timeframe.
User is returned a list of objects that were inserted within a timeframe (eg. 1hr).
Edit for clarity:
The publish function to the database with a timestamp is working and I can see documents stored in my collection correctly below;
{ "text" : "beef", "createdBy" : "X9Px6qKRjiB53ANST", "createdAt" : ISODate("2015-02-03T00:47:21.976Z"), "_id" : "sJhwcLCRS4CG6yfTe" }
{ "text" : "beef", "createdBy" : "X9Px6qKRjiB53ANST", "createdAt" : ISODate("2015-02-03T00:47:41.265Z"), "_id" : "NGBwiWZRsDBbNerSy" }
{ "text" : "Chicken", "createdBy" : "X9Px6qKRjiB53ANST", "createdAt" : ISODate("2015-02-03T02:47:21.163Z"), "_id" : "R2FYAjZamTWTy9RTW" }
{ "text" : "Chicken", "createdBy" : "X9Px6qKRjiB53ANST", "createdAt" : ISODate("2015-02-03T04:42:02.895Z"), "_id" : "F7u2EfBEmYLBaFgze" }
How do I group together the documents submitted within the same hour (eg both 'beef' documents and return that to the user? I have a basic return function that returns all entries but I can't find much information on grouping the data by the hour.
Meteor.publish('theFoods', function(){
var currentUser = this.userId;
return Foods.find({
createdBy: currentUser
})
});
You could add a helper like:
Template.myTemplate.helpers({
foodsByHour: function() {
var foods = Foods.find().fetch();
return _.chain(foods)
.groupBy(function(food) {
if (food.createdAt)
return food.createdAt.getHours();
})
.map(function(v, k) {return {hour: k, foods: v};})
.value();
}
});
Which will return an array of hour and foods pairs like this:
[ { hour: '16',
foods:
[ { text: 'beef',
createdBy: 'X9Px6qKRjiB53ANST',
createdAt: Mon Feb 02 2015 16:47:21 GMT-0800 (PST),
_id: 'sJhwcLCRS4CG6yfTe' },
{ text: 'beef',
createdBy: 'X9Px6qKRjiB53ANST',
createdAt: Mon Feb 02 2015 16:47:41 GMT-0800 (PST),
_id: 'NGBwiWZRsDBbNerSy' } ] },
{ hour: '18',
foods:
[ { text: 'Chicken',
createdBy: 'X9Px6qKRjiB53ANST',
createdAt: Mon Feb 02 2015 18:47:21 GMT-0800 (PST),
_id: 'R2FYAjZamTWTy9RTW' } ] },
{ hour: '20',
foods:
[ { text: 'Chicken',
createdBy: 'X9Px6qKRjiB53ANST',
createdAt: Mon Feb 02 2015 20:42:02 GMT-0800 (PST),
_id: 'F7u2EfBEmYLBaFgze' } ] } ]
Here's an example template:
<template name='myTemplate'>
{{#each foodsByHour}}
<h2>{{hour}}</h2>
{{#each foods}}
<p>{{text}}</p>
{{/each}}
{{/each}}
</template>