MongoDB aggreagte fill missing days [duplicate] - mongodb

This question already has answers here:
Fill missing dates in records
(5 answers)
Closed 3 years ago.
I have a product collection with the following documents:
{ "_id" : 1, "item" : "abc", created: ISODate("2014-10-01T08:12:00Z") }
{ "_id" : 2, "item" : "jkl", created: ISODate("2014-10-02T09:13:00Z") }
{ "_id" : 3, "item" : "hjk", created: ISODate("2014-10-02T09:18:00Z") }
{ "_id" : 4, "item" : "sdf", created: ISODate("2014-10-07T09:14:00Z") }
{ "_id" : 5, "item" : "xyz", created: ISODate("2014-10-15T09:15:00Z") }
{ "_id" : 6, "item" : "iop", created: ISODate("2014-10-16T09:15:00Z") }
I want to draw a chart describing product count by day, so I use mongodb aggregation framework to count product group by day:
var proj1 = {
"$project": {
"created": 1,
"_id": 0,
"h": {"$hour": "$created"},
"m": {"$minute": "$created"},
"s": {"$second": "$created"},
"ml": {"$millisecond": "$created"}
}
};
var proj2 = {
"$project": {
"created": {
"$subtract": [
"$created", {
"$add": [
"$ml",
{"$multiply": ["$s", 1000]},
{"$multiply": ["$m", 60, 1000]},
{"$multiply": ["$h", 60, 60, 1000]}
]
}]
}
}
};
db.product.aggregate([
proj1,
proj2,
{$group: {
_id: "$created",
count: {$sum: 1}
}},
{$sort: {_id: 1}}
])
The result in mongo shell is:
{
"result" : [
{
"_id" : ISODate("2014-10-01T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-02T00:00:00.000Z"),
"count" : 2
},
{
"_id" : ISODate("2014-10-07T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-15T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-16T00:00:00.000Z"),
"count" : 1
}
],
"ok" : 1
}
Of course, there is no product some days and the chart using the result set above looks like this:
But the desired chart should look like this:
So the question is: How can I add missing days (of the last 30 days, for example) to the result set with count = 0? That means, the desired result set should looks like this:
{
"result" : [
{
"_id" : ISODate("2014-09-16T00:00:00.000Z"),
"count" : 0
},
{
"_id" : ISODate("2014-09-17T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-01T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-02T00:00:00.000Z"),
"count" : 2
},
{
"_id" : ISODate("2014-10-03T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-07T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-09-08T00:00:00.000Z"),
"count" : 0
},
...
{
"_id" : ISODate("2014-10-15T00:00:00.000Z"),
"count" : 1
},
{
"_id" : ISODate("2014-10-16T00:00:00.000Z"),
"count" : 1
},
// also, add some extra days
{
"_id" : ISODate("2014-10-17T00:00:00.000Z"),
"count" : 0
},
{
"_id" : ISODate("2014-10-10T00:00:00.000Z"),
"count" : 0
}
],
"ok" : 1
}

Using aggregate to handle this question completely is a pain.
But it can be reached.
(MongoDB V2.6+ required)
var proj1 = {
"$project" : {
"created" : 1,
"_id" : 0,
"h" : {
"$hour" : "$created"
},
"m" : {
"$minute" : "$created"
},
"s" : {
"$second" : "$created"
},
"ml" : {
"$millisecond" : "$created"
}
}
};
var proj2 = {
"$project" : {
"created" : {
"$subtract" : [ "$created", {
"$add" : [ "$ml", {
"$multiply" : [ "$s", 1000 ]
}, {
"$multiply" : [ "$m", 60, 1000 ]
}, {
"$multiply" : [ "$h", 60, 60, 1000 ]
} ]
} ]
}
}
};
var group1 = {
$group : {
_id : "$created",
count : {
$sum : 1
}
}
};
var group2 = {
$group : {
_id : 0,
origin : {
$push : "$$ROOT"
},
maxDate : {
$max : "$_id"
}
}
};
var step = 24 * 60 * 60 * 1000; // milliseconds of one day
var project3 = {
$project : {
origin : 1,
extents : {
$map : {
"input" : [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29],
"as" : "e",
"in" : {
_id : {
$subtract : [ "$maxDate", {
$multiply : [ step, "$$e"]
}]
},
count : {
$add : [0]
}
}
}
}
}
};
var project4 = {
$project : {
_id : 0,
values : {
$setUnion : [ "$origin", "$extents"]
}
}
};
var unwind1 = {
$unwind : "$values"
};
var group3 = {
$group : {
_id : "$values._id",
count : {
$max : "$values.count"
}
}
};
db.product.aggregate([ proj1, proj2, group1, group2, project3, project4,
unwind1, group3, {
$sort : {
_id : 1
}
} ]);
I would like to fill the missing part at application end something like this for your reference:
function sortResult(x, y) {
var t1 = x._id.getTime();
var t2 = y._id.getTime();
if (t1 < t2) {
return -1;
} else if (t1 == t2) {
return 0;
} else {
return 1;
}
}
var result = db.product.aggregate();
var endDateMilliseconds = result[result.length - 1]._id.getTime();
var step = 24 * 60 * 60 * 1000; // milliseconds of one day
var map = {};
for (var i in result) {
map[ result[i]._id.getTime() ] = result[i];
}
for (var ms = endDateMilliseconds, x = 1; x < 30; x++) {
ms -= step;
if ( ! ( ms in map ) ) {
map[ms] = {_id : new Date(ms), count : 0};
}
}
var finalResult = [];
for (var x in map) {
finalResult.push(map[x]);
}
finalResult.sort(sortResult);
printjson(finalResult);

Ok, first of all: Non-existing values are evaluated to null (roughly translates to "nada", "nothing", "not there"), which isn't equal to 0, which is a well defined value.
MongoDB has no semantical understanding of the difference between 0 and 42, for example. So how should MongoDB decide which value to assume for a day in the time (of which mongo has no semantical understanding, too)?
Basically, you have two choices: save a 0 for each day when no value is to record or you iterate in your app over the days in the time you want to create a chart for and issue 0 for each day no value exists as a substitute. Id' suggest doing the former, since that would make it possible using the aggregation framework.

Related

MongoDB count distinct items

I have following query on a list with this fields : key,time,p,email
use app_db;
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"count" : {
"$sum" : 1.0
}
}
}
]
);
and the output is something like this :
{
"_id" : "2019-08-25",
"total" : NumberInt(623),
"count" : 400.0
}
{
"_id" : "2019-08-24",
"total" : NumberInt(2195),
"count" : 1963.0
}
{
"_id" : "2019-08-23",
"total" : NumberInt(1294),
"count" : 1706.0
}
{
"_id" : "2019-08-22",
"total" : NumberInt(53),
"count" : 1302.0
}
But I need the count to be distinctive on email field, which is count number of distinct email addresses who logged in per day and their p value is greater 0
You need $addToSet to get an array of unique email values per day and then you can use $size to count the number of items in that array:
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"emails" : {
"$addToSet": "$email"
}
}
},
{
$project: {
_id: 1,
total: 1,
countDistinct: { $size: "$emails" }
}
}
]
);

How to Group mongodb - mapReduce output?

i have a query regarding the mapReduce framework in mongodb, so i have a result of key value pair from mapReduce function , now i want to run the query on this output of mapReduce.
So i am using mapReduce to find out the stats of user like this
db.order.mapReduce(function() { emit (this.customer,{count:1,orderDate:this.orderDate.interval_start}) },
function(key,values){
var sum =0 ; var lastOrderDate;
values.forEach(function(value) {
if(value['orderDate']){
lastOrderDate=value['orderDate'];
}
sum+=value['count'];
});
return {count:sum,lastOrderDate:lastOrderDate};
},
{ query:{status:"DELIVERED"},out:"order_total"}).find()
which give me output like this
{ "_id" : ObjectId("5443765ae4b05294c8944d5b"), "value" : { "count" : 1, "orderDate" : ISODate("2014-10-18T18:30:00Z") } }
{ "_id" : ObjectId("54561911e4b07a0a501276af"), "value" : { "count" : 2, "lastOrderDate" : ISODate("2015-03-14T18:30:00Z") } }
{ "_id" : ObjectId("54561b9ce4b07a0a501276b1"), "value" : { "count" : 1, "orderDate" : ISODate("2014-11-01T18:30:00Z") } }
{ "_id" : ObjectId("5458712ee4b07a0a501276c2"), "value" : { "count" : 2, "lastOrderDate" : ISODate("2014-11-03T18:30:00Z") } }
{ "_id" : ObjectId("545f64e7e4b07a0a501276db"), "value" : { "count" : 15, "lastOrderDate" : ISODate("2015-06-04T18:30:00Z") } }
{ "_id" : ObjectId("54690771e4b0070527c657ed"), "value" : { "count" : 6, "lastOrderDate" : ISODate("2015-06-03T18:30:00Z") } }
{ "_id" : ObjectId("54696c64e4b07f3c07010b4a"), "value" : { "count" : 1, "orderDate" : ISODate("2014-11-18T18:30:00Z") } }
{ "_id" : ObjectId("546980d1e4b07f3c07010b4d"), "value" : { "count" : 4, "lastOrderDate" : ISODate("2015-03-24T18:30:00Z") } }
{ "_id" : ObjectId("54699ac4e4b07f3c07010b51"), "value" : { "count" : 30, "lastOrderDate" : ISODate("2015-05-23T18:30:00Z") } }
{ "_id" : ObjectId("54699d0be4b07f3c07010b55"), "value" : { "count" : 1, "orderDate" : ISODate("2014-11-16T18:30:00Z") } }
{ "_id" : ObjectId("5469a1dce4b07f3c07010b59"), "value" : { "count" : 2, "lastOrderDate" : ISODate("2015-04-29T18:30:00Z") } }
{ "_id" : ObjectId("5469a96ce4b07f3c07010b5e"), "value" : { "count" : 1, "orderDate" : ISODate("2014-11-16T18:30:00Z") } }
{ "_id" : ObjectId("5469c1ece4b07f3c07010b64"), "value" : { "count" : 9, "lastOrderDate" : ISODate("2015-04-15T18:30:00Z") } }
{ "_id" : ObjectId("5469f422e4b0ce7d5ee021ad"), "value" : { "count" : 5, "lastOrderDate" : ISODate("2015-06-01T18:30:00Z") } }
......
Now i want to run query and group the users on the basis of count in different categories like for user with count less than 5 in one group , 5-10 in another, etc
and want output something like this
{userLessThan5: 9 }
{user5to10: 2 }
{user10to15: 1 }
{user15to20: 0 }
....
Try this,
db.order.mapReduce(function() { emit (this.customer,{count:1,orderDate:this.orderDate.interval_start}) },
function(key,values){
var category; // add this new field
var sum =0 ; var lastOrderDate;
values.forEach(function(value) {
if(value['orderDate']){
lastOrderDate=value['orderDate'];
}
sum+=value['count'];
});
// at this point you are already aware in which category your records lies , just add a new field to mark it
if(sum < 5){ category: userLessThan5};
if(sum >= 5 && sum <=10){ category: user5to10};
if(sum <= 10 && sum >= 15){ category: user10to15};
if(sum <= 15 && sum >=20){ category: user15to20};
....
return {count:sum,lastOrderDate:lastOrderDate,category:category};
},
{ query:{status:"DELIVERED"},out:"order_total"}).find()
db.order_total.aggregate([{ $group: { "_id": "$value.category", "users": { $sum: 1 } } }]);
you will get you desired result
{userLessThan5: 9 }
{user5to10: 2 }
{user10to15: 1 }
{user15to20: 0 }
....
I wrote a query using your data in aggregation as per my knowledge, there may be better way to solve this problem.
var a=db.test.aggregate([{$match:{"value.count":{$lt:5}}},
{ $group: { _id:"$value.count",total:{"$sum":1}}},
{$group:{_id:"less than 5",total:{$sum:"$total"}}}])
var b=db.test.aggregate([{$match:{"value.count":{$lt:10,$gt:5}}},
{ $group: { _id:"$value.count",total:{"$sum":1}}},
{$group:{_id:"between 5 and 10",total:{$sum:"$total"}}}])
var c=db.test.aggregate([{$match:{"value.count":{$lt:15,$gt:10}}},
{ $group: { _id:"$value.count",total:{"$sum":1}}},
{$group:{_id:"between 10 and 15",total:{$sum:"$total"}}}])
insert a, b, c into another collection
You could try to group the output data after mapreduce to every 5 interval count through aggregate like below
db.data.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$subtract": [ "$value.count", 0 ] },
{ "$mod": [
{ "$subtract": [ "$value.count", 0 ] },
5
]}
]
},
"count": { "$sum": 1 }
}}
])
Also maybe here is one related question here.

MondoDB Aggregate - Absolute count for each day

I have the following (already aggregated)collection
{ "_id" : { "day" : "2015-02-01" }, "total" : 2 }
{ "_id" : { "day" : "2015-02-02" }, "total" : 3 }
{ "_id" : { "day" : "2015-02-03" }, "total" : 10 }
{ "_id" : { "day" : "2015-02-04" }, "total" : 10 }
{ "_id" : { "day" : "2015-02-05" }, "total" : 5 }
What i need is calculating an absolute value for each day, summing the previous days. So expected result would be in the case above
{ "_id" : { "day" : "2015-02-01" }, "absolutetotalforday" : 2 }
{ "_id" : { "day" : "2015-02-02" }, "absolutetotalforday" : 5 }
{ "_id" : { "day" : "2015-02-03" }, "absolutetotalforday" : 15 }
{ "_id" : { "day" : "2015-02-04" }, "absolutetotalforday" : 25 }
{ "_id" : { "day" : "2015-02-05" }, "absolutetotalforday" : 30 }
Currently no clue how to achieve this with 1 query. Of course i could do a sum for each day I'm interested in, but this might be a long time range
Any help appreciated
Because aggregation framework has no mechanism of knowing the value of a previous document, or the previous "grouped" value of a document, your best bet would be to use Map-Reduce in this case.
Map-Reduce will give you the "running total" for the current total at end of day you require although this won't be in the desired key, absolutetotalforday but in a key called value since the reduced values are always invalue key.
The following mapReduce() operation will give you the desired result, assuming the results from the previous aggregation operation were output to a separate collection named agg_results:
db.agg_results.mapReduce(
function() { emit( this._id, this.total ); },
function(key, values) { return Array.sum(values); },
{
"scope": { "total": 0 },
"finalize": function(key, value) {
total += value;
return total;
},
"out": { "inline": 1 }
}
);
Sample Output
{
"results" : [
{
"_id" : {
"day" : "2015-02-01"
},
"value" : 2
},
{
"_id" : {
"day" : "2015-02-02"
},
"value" : 5
},
{
"_id" : {
"day" : "2015-02-03"
},
"value" : 15
},
{
"_id" : {
"day" : "2015-02-04"
},
"value" : 25
},
{
"_id" : {
"day" : "2015-02-05"
},
"value" : 30
}
],
"timeMillis" : 0,
"counts" : {
"input" : 5,
"emit" : 5,
"reduce" : 0,
"output" : 5
},
"ok" : 1
}
Sorting the results will not work with inline results and with dates of String type. Instead, try converting the date strings to a JavaScript date object, write the results to a collection and then run a sort on that collection:
db.agg_results.mapReduce(
function() { emit( new Date(this._id.day), this.total ); },
function(key, values) { return Array.sum(values); },
{
"scope": { "total": 0 },
"finalize": function(key, value) {
total += value;
return total;
},
"out": "tmpResults"
}
);
Sample Output (with sort)
> db.tmpResults.find().sort({_id: 1})
{ "_id" : ISODate("2015-02-01T00:00:00Z"), "value" : 2 }
{ "_id" : ISODate("2015-02-02T00:00:00Z"), "value" : 5 }
{ "_id" : ISODate("2015-02-03T00:00:00Z"), "value" : 15 }
{ "_id" : ISODate("2015-02-04T00:00:00Z"), "value" : 25 }
{ "_id" : ISODate("2015-02-05T00:00:00Z"), "value" : 30 }
>

$avg in mongodb aggregation

Document looks like this:
{
"_id" : ObjectId("361de42f1938e89b179dda42"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{ "NAME" : "asd", "OBTAINED_SCORE" : 30}, { "NAME" : "acd", "OBTAINED_SCORE" : 36}
]
}
Aggregation function:
db.coll.aggregate([ {$unwind:"$SCORES"}, {$group : { _id : { user_id : "$user_id", evaluator_id : "$evaluator_id"}, AVG_SCORE : { $avg : "$SCORES.OBTAINED_SCORE" }}} ])
Suppose if there are two documents with same "user_id" (say u1) and different "evaluator_id" (say e1 and e2).
For example:
1) Average will work like this ((30 + 20) / 2 = 25). This is working for me.
2) But for { evaluator_id : "e1" } document, score is 30 for { "NAME" : "asd" } and { evaluator_id : "e2" } document, score is 0 for { "NAME" : "asd" }. In this case, I want the AVG_SCORE to be 30 only (not (30 + 0) / 2 = 15).
Is it possible through aggregation??
Could any one help me out.
It's possible by placing a $match between the $unwind and $group aggregation pipelines to first filter the arrays which match the specified condition to include in the average computation and that is, score array where the obtained score is not equal to 0 "SCORES.OBTAINED_SCORE" : { $ne : 0 }
db.coll.aggregate([
{
$unwind: "$SCORES"
},
{
$match : {
"SCORES.OBTAINED_SCORE" : { $ne : 0 }
}
},
{
$group : {
_id : {
user_id : "$user_id",
evaluator_id : "$evaluator_id"
},
AVG_SCORE : {
$avg : "$SCORES.OBTAINED_SCORE"
}
}
}
])
For example, the aggregation result for this document:
{
"_id" : ObjectId("5500aaeaa7ef65c7460fa3d9"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{
"NAME" : "asd",
"OBTAINED_SCORE" : 0
},
{
"NAME" : "acd",
"OBTAINED_SCORE" : 36
}
]
}
will yield:
{
"result" : [
{
"_id" : {
"user_id" : "u1",
"evaluator_id" : "e1"
},
"AVG_SCORE" : 36
}
],
"ok" : 1
}

How can I get a running total with mongodb aggregate framework?

I am fairly new to MongoDB and I am playing with the aggregate framework. One of the examples from the documentation shows the following, which returns total number of new user joins per month and lists the month joined:
db.users.aggregate(
[
{ $project : { month_joined : { $month : "$joined" } } } ,
{ $group : { _id : {month_joined:"$month_joined"} , number : { $sum : 1 } } },
{ $sort : { "_id.month_joined" : 1 } }
]
)
The code outputs the following:
{
"_id" : {
"month_joined" : 1
},
"number" : 3
},
{
"_id" : {
"month_joined" : 2
},
"number" : 9
},
{
"_id" : {
"month_joined" : 3
},
"number" : 5
}
Is it possible to also have each object contain the sum of all users that have joined since the start, so I don't have to run over the objects programmatically and calculate it myself?
Example desired output:
{
"_id" : {
"month_joined" : 1
},
"number" : 3,
"total": 3
},
{
"_id" : {
"month_joined" : 2
},
"number" : 9,
"total": 12
},
{
"_id" : {
"month_joined" : 3
},
"number" : 5,
"total": 17
}