Group By Hour using UNIX time stamp in mongodb - mongodb

I required records with the output of gender, count, and updated hour for two days.
db.FaceData.aggregate([ {$match: { 'Timestamp' : { $gte : 1448121600000, $lt : 1448294399000 }, 'DID' : "ABFR001" }}, {$group: { _id: {'Gen': '$Gen'}, count : { $sum : 1 } }} ]);
output:
------
{ "_id" : { "Gen" : 1 }, "count" : 3055 }
{ "_id" : { "Gen" : 0 }, "count" : 2866 }
In the above output I have to group by hour for two days, For Example, Every hour I need Gender, Count for 2days.
Timestamp is in millisecond.

You would need a mechanism to get the actual date object from the unix timestamp, one way is to add the timestamp to a zero-milliseconds Date() object, using the $add operator in the $project stage before the actual grouping aggregation pipeline.
Once you get the date, extract the hour part by using the $hour operator, something like the following:
db.FaceData.aggregate([
{
"$match": {
"Timestamp" : { $gte : 1448121600000, $lt : 1448294399000 },
"DID" : "ABFR001"
}
},
{
$project : {
"hourPart" : {
"$hour": { "$add": [ new Date(0), "$Timestamp" ] }
},
"Gen": 1
}
},
{
"$group": {
"_id": "$hourPart",
"Gen_0_count" : {
"$sum": {
"$cond": [ { "$eq": [ "$Gen", 0 ] }, 1, 0 ]
}
},
"Gen_1_count" : {
"$sum": {
"$cond": [ { "$eq": [ "$Gen", 1 ] }, 1, 0 ]
}
}
}
}
]);

{"$match": {
"Timestamp" : { $gte : 1448121600000, $lt : 1448294399000 },
"DID" : "ABFR001"
}} ,
{ "$group" : {
"_id" : {
"$divide" : [{ "$subtract" : [{"$divide" : ["$Timestamp", 1000]}, { "$mod" : [{"$divide" : ["$Tstmp", 1000]}, 3600] }] }, 3600 ]
},
"Male" : {
"$sum": {
"$cond": [ { "$eq": [ "$Gen", 0 ] }, 1, 0 ]
}
},
"Female" : {
"$sum": {
"$cond": [ { "$eq": [ "$Gen", 1 ] }, 1, 0 ]
}
}
} }

Related

Calculate date difference in year, month, day

I have the following query:
db.getCollection('user').aggregate([
{$unwind: "$education"},
{$project: {
duration: {"$divide":[{$subtract: ['$education.to', '$education.from'] }, 1000 * 60 * 60 * 24 * 365]}
}},
{$group: {
_id: '$_id',
"duration": {$sum: '$duration'}
}}]
])
Above query result is:
{
"_id" : ObjectId("59fabb20d7905ef056f55ac1"),
"duration" : 2.34794520547945
}
/* 2 */
{
"_id" : ObjectId("59fab630203f02f035301fc3"),
"duration" : 2.51232876712329
}
But what I want to do is get its duration in year+ month + day format, something like: 2 y, 3 m, 20 d.
One another point, if a course is going on the to field is null, and another field isGoingOn: true, so here I should calculate the duration by using current date instead of to field.
And user has array of course subdocuments
education: [
{
"courseName": "Java",
"from" : ISODate("2010-12-08T00:00:00.000Z"),
"to" : ISODate("2011-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "PHP",
"from" : ISODate("2013-12-08T00:00:00.000Z"),
"to" : ISODate("2015-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "Mysql",
"from" : ISODate("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn": true
}
]
One another point is this: that date may be not continuous in one subdocument to the other subdocument. A user may have a course for 1 year, and then after two years, he/she started his/her next course for 1 year, and 3 months (it means this user has a total of 2 years and 3-month course duration).
What I want is get date difference of each subdocument in educations array, and sum those. Suppose in my sample data Java course duration is 6 month, and 22 days, PHP course duration is 1 year, and 6 months, and 22 days, and the last one is from 8 Feb 2017 till now, and it's going on, so my education duration is the sum of these intervals.
Please try this aggregation to get date difference in days,months and years, added multiple $addFields stage compute and reduce differences to date, month range without underflow, and the assumption here is 1 month = 30 days
pipeline
db.edu.aggregate(
[
{
$addFields : {
trainingPeriod : {
$map : {
input : "$education",
as : "t",
in : {
year: {$subtract: [{$year : {$ifNull : ["$$t.to", new Date()]}}, {$year : "$$t.from"}]},
month: {$subtract: [{$month : {$ifNull : ["$$t.to", new Date()]}}, {$month : "$$t.from"}]},
dayOfMonth: {$subtract: [{$dayOfMonth : {$ifNull : ["$$t.to", new Date()]}}, {$dayOfMonth : "$$t.from"}]}
}
}
}
}
},
{
$addFields : {
trainingPeriod : {
$map : {
input : "$trainingPeriod",
as : "d",
in : {
year: "$$d.year",
month: {$cond : [{$lt : ["$$d.dayOfMonth", 0]}, {$subtract : ["$$d.month", 1]}, "$$d.month" ]},
day: {$cond : [{$lt : ["$$d.dayOfMonth", 0]}, {$add : [30, "$$d.dayOfMonth"]}, "$$d.dayOfMonth" ]}
}
}
}
}
},
{
$addFields : {
trainingPeriod : {
$map : {
input : "$trainingPeriod",
as : "d",
in : {
year: {$cond : [{$lt : ["$$d.month", 0]}, {$subtract : ["$$d.year", 1]}, "$$d.year" ]},
month: {$cond : [{$lt : ["$$d.month", 0]}, {$add : [12, "$$d.month"]}, "$$d.month" ]},
day: "$$d.day"
}
}
}
}
},
{
$addFields : {
total : {
$reduce : {
input : "$trainingPeriod",
initialValue : {year : 0, month : 0, day : 0},
in : {
year: {$add : ["$$this.year", "$$value.year"]},
month: {$add : ["$$this.month", "$$value.month"]},
day: {$add : ["$$this.day", "$$value.day"]}
}
}
}
}
},
{
$addFields : {
total : {
year : "$total.year",
month : {$add : ["$total.month", {$floor : {$divide : ["$total.day", 30]}}]},
day : {$mod : ["$total.day", 30]}
}
}
},
{
$addFields : {
total : {
year : {$add : ["$total.year", {$floor : {$divide : ["$total.month", 12]}}]},
month : {$mod : ["$total.month", 12]},
day : "$total.day"
}
}
}
]
).pretty()
result
{
"_id" : ObjectId("5a895d4721cbd77dfe857f95"),
"education" : [
{
"courseName" : "Java",
"from" : ISODate("2010-12-08T00:00:00Z"),
"to" : ISODate("2011-05-31T00:00:00Z"),
"isGoingOn" : false
},
{
"courseName" : "PHP",
"from" : ISODate("2013-12-08T00:00:00Z"),
"to" : ISODate("2015-05-31T00:00:00Z"),
"isGoingOn" : false
},
{
"courseName" : "Mysql",
"from" : ISODate("2017-02-08T00:00:00Z"),
"to" : null,
"isGoingOn" : true
}
],
"trainingPeriod" : [
{
"year" : 0,
"month" : 5,
"day" : 23
},
{
"year" : 1,
"month" : 5,
"day" : 23
},
{
"year" : 1,
"month" : 0,
"day" : 10
}
],
"total" : {
"year" : 2,
"month" : 11,
"day" : 26
}
}
>
Well you could just simply use the existing date aggregation operators as opposed to using math to convert to "days" as you presently have:
db.getCollection('user').aggregate([
{ "$unwind": "$education" },
{ "$group": {
"_id": "$_id",
"years": {
"$sum": {
"$subtract": [
{ "$subtract": [
{ "$year": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$year": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 1
}}
]
}
},
"months": {
"$sum": {
"$add": [
{ "$subtract": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": ["$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 12
}}
]
}
},
"days": {
"$sum": {
"$add": [
{ "$subtract": [
{ "$dayOfYear": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$dayOfYear": "$education.from" }
]},
{ "$cond": {
"if": {
"$gt": [
{ "$month": { "$ifNull": [ "$education.to", new Date() ] } },
{ "$month": "$education.from" }
]
},
"then": 0,
"else": 365
}}
]
}
}
}},
{ "$project": {
"years": {
"$add": [
"$years",
{ "$add": [
{ "$floor": { "$divide": [ "$months", 12 ] } },
{ "$floor": { "$divide": [ "$days", 365 ] } }
]}
]
},
"months": {
"$mod": [
{ "$add": [
"$months",
{ "$floor": {
"$multiply": [
{ "$divide": [ "$days", 365 ] },
12
]
}}
]},
12
]
},
"days": { "$mod": [ "$days", 365 ] }
}}
])
It is "sort of" an approximation on the "days" and "months" without the necessary operations to be "certain" of leap years, but it would get you the result which should be "near enough" for most purposes.
You can even do this without $unwind as long as your MongoDB version is 3.2 or greater:
db.getCollection('user').aggregate([
{ "$addFields": {
"duration": {
"$let": {
"vars": {
"edu": {
"$map": {
"input": "$education",
"as": "e",
"in": {
"$let": {
"vars": { "toDate": { "$ifNull": ["$$e.to", new Date()] } },
"in": {
"years": {
"$subtract": [
{ "$subtract": [
{ "$year": "$$toDate" },
{ "$year": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 1
}}
]
},
"months": {
"$add": [
{ "$subtract": [
{ "$ifNull": [{ "$month": "$$toDate" }, new Date() ] },
{ "$month": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 12
}}
]
},
"days": {
"$add": [
{ "$subtract": [
{ "$ifNull": [{ "$dayOfYear": "$$toDate" }, new Date() ] },
{ "$dayOfYear": "$$e.from" }
]},
{ "$cond": {
"if": { "$gt": [{ "$month": "$$toDate" },{ "$month": "$$e.from" }] },
"then": 0,
"else": 365
}}
]
}
}
}
}
}
}
},
"in": {
"$let": {
"vars": {
"years": { "$sum": "$$edu.years" },
"months": { "$sum": "$$edu.months" },
"days": { "$sum": "$$edu.days" }
},
"in": {
"years": {
"$add": [
"$$years",
{ "$add": [
{ "$floor": { "$divide": [ "$$months", 12 ] } },
{ "$floor": { "$divide": [ "$$days", 365 ] } }
]}
]
},
"months": {
"$mod": [
{ "$add": [
"$$months",
{ "$floor": {
"$multiply": [
{ "$divide": [ "$$days", 365 ] },
12
]
}}
]},
12
]
},
"days": { "$mod": [ "$$days", 365 ] }
}
}
}
}
}
}}
])
This is because from MongoDB 3.4 you can use $sum directly with an array of or any list of expressions in stages like $addFields or $project, and the $map can apply those same "date aggregation operator" expressions against each array element in place of doing $unwind first.
So the main math can really be done in one part of "reducing" the array, and then each total can be adjusted by the general "divisors" for the years, and the "modulo" or "remainder" from any overruns in the months and days.
Essentially returns:
{
"_id" : ObjectId("5a07688e98e4471d8aa87940"),
"education" : [
{
"courseName" : "Java",
"from" : ISODate("2010-12-08T00:00:00.000Z"),
"to" : ISODate("2011-05-31T00:00:00.000Z"),
"isGoingOn" : false
},
{
"courseName" : "PHP",
"from" : ISODate("2013-12-08T00:00:00.000Z"),
"to" : ISODate("2015-05-31T00:00:00.000Z"),
"isGoingOn" : false
},
{
"courseName" : "Mysql",
"from" : ISODate("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn" : true
}
],
"duration" : {
"years" : 3.0,
"months" : 3.0,
"days" : 259.0
}
}
Given the 11th of November 2017
You can simplify your code by using client side processing with moment js library.
All the date time math is handled by moment js library. Use duration to calculate the reduced time diff
Use reduce to add the time diff across all the array elements followed by moment duration to output the time in years/months/days.
It solves two issues :
Gives you accurate difference in years month and days between two dates.
Gives you expected format.
For example:
var education = [
{
"courseName": "Java",
"from" : new Date("2010-12-08T00:00:00.000Z"),
"to" : new Date("2011-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "PHP",
"from" : new Date("2013-12-08T00:00:00.000Z"),
"to" : new Date("2015-05-31T00:00:00.000Z"),
"isGoingOn": false
},
{
"courseName": "Mysql",
"from" : new Date("2017-02-08T00:00:00.000Z"),
"to" : null,
"isGoingOn": true
}
];
var reducedDiff = education.reduce(function(prevVal, elem) {
if(elem.isGoingOn) elem.to = new Date();
var diffDuration = moment(elem.to).diff(moment(elem.from));
return prevVal + diffDuration;
}, 0);
var duration = moment.duration(reducedDiff);
alert(duration.years() +" y, " + duration.months() + " m, " + duration.days() + " d " );
var durationstr = duration.years() +" y, " + duration.months() + " m, " + duration.days() + " d ";
MongoDb integration:
var reducedDiff = db.getCollection('user').find({},{education:1}).reduce(function(...

Group and count over a start and end range

If I have data in the following format:
[
{
_id: 1,
startDate: ISODate("2017-01-1T00:00:00.000Z"),
endDate: ISODate("2017-02-25T00:00:00.000Z"),
type: 'CAR'
},
{
_id: 2,
startDate: ISODate("2017-02-17T00:00:00.000Z"),
endDate: ISODate("2017-03-22T00:00:00.000Z"),
type: 'HGV'
}
]
Is it possible to retrieve data grouped by 'type', but also with a count of the type for each of month in a given date range e.g. between 2017/1/1 to 2017/4/1 would return:
[
{
_id: 'CAR',
monthCounts: [
/*January*/
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 1
},
/*February*/
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
/*March*/
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 0
},
]
},
{
_id: 'HGV',
monthCounts: [
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 0
},
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 1
},
]
}
]
The returned format is not really important, but what I am trying to achieve is in a single query to retrieve a number of counts for the same grouping (one per month). The input could be simply a start and end date to report from or more likely it could be an array of the date ranges to group by.
The algorithm for this is to basically "iterate" values between the interval of the two values. MongoDB has a couple of ways to deal with this, being what has always been present with mapReduce() and with new features available to the aggregate() method.
I'm going expand on your selection to deliberately show an overlapping month since your examples did not have one. This will result in the "HGV" values appearing in "three" months of output.
{
"_id" : 1,
"startDate" : ISODate("2017-01-01T00:00:00Z"),
"endDate" : ISODate("2017-02-25T00:00:00Z"),
"type" : "CAR"
}
{
"_id" : 2,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-03-22T00:00:00Z"),
"type" : "HGV"
}
{
"_id" : 3,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-04-22T00:00:00Z"),
"type" : "HGV"
}
Aggregate - Requires MongoDB 3.4
db.cars.aggregate([
{ "$addFields": {
"range": {
"$reduce": {
"input": { "$map": {
"input": { "$range": [
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$startDate", new Date(0) ] },
1000
]
}},
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$endDate", new Date(0) ] },
1000
]
}},
60 * 60 * 24
]},
"as": "el",
"in": {
"$let": {
"vars": {
"date": {
"$add": [
{ "$multiply": [ "$$el", 1000 ] },
new Date(0)
]
},
"month": {
}
},
"in": {
"$add": [
{ "$multiply": [ { "$year": "$$date" }, 100 ] },
{ "$month": "$$date" }
]
}
}
}
}},
"initialValue": [],
"in": {
"$cond": {
"if": { "$in": [ "$$this", "$$value" ] },
"then": "$$value",
"else": { "$concatArrays": [ "$$value", ["$$this"] ] }
}
}
}
}
}},
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
The key to making this work is the $range operator which takes values for a "start" and and "end" as well as an "interval" to apply. The result is an array of values taken from the "start" and incremented until the "end" is reached.
We use this with startDate and endDate to generate the possible dates in between those values. You will note that we need to do some math here since the $range only takes a 32-bit integer, but we can take the milliseconds away from the timestamp values so that is okay.
Because we want "months", the operations applied extract the month and year values from the generated range. We actually generate the range as the "days" in between since "months" are difficult to deal with in math. The subsequent $reduce operation takes only the "distinct months" from the date range.
The result therefore of the first aggregation pipeline stage is a new field in the document which is an "array" of all the distinct months covered between startDate and endDate. This gives an "iterator" for the rest of the operation.
By "iterator" I mean than when we apply $unwind we get a copy of the original document for every distinct month covered in the interval. This then allows the following two $group stages to first apply a grouping to the common key of "month" and "type" in order to "total" the counts via $sum, and next $group makes the key just the "type" and puts the results in an array via $push.
This gives the result on the above data:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
}
]
}
Note that the coverage of "months" is only present where there is actual data. Whilst possible to produce zero values over a range, it requires quite a bit of wrangling to do so and is not very practical. If you want zero values then it is better to add that in post processing in the client once the results have been retrieved.
If you really have your heart set on the zero values, then you should separately query for $min and $max values, and pass these in to "brute force" the pipeline into generating the copies for each supplied possible range value.
So this time the "range" is made externally to all documents, and you then use a $cond statement into the accumulator to see if the current data is within the grouped range produced. Also since the generation is "external", we really don't need the MongoDB 3.4 operator of $range, so this can be applied to earlier versions as well:
// Get min and max separately
var ranges = db.cars.aggregate(
{ "$group": {
"_id": null,
"startRange": { "$min": "$startDate" },
"endRange": { "$max": "$endDate" }
}}
).toArray()[0]
// Make the range array externally from all possible values
var range = [];
for ( var d = new Date(ranges.startRange.valueOf()); d <= ranges.endRange; d.setUTCMonth(d.getUTCMonth()+1)) {
var v = ( d.getUTCFullYear() * 100 ) + d.getUTCMonth()+1;
range.push(v);
}
// Run conditional aggregation
db.cars.aggregate([
{ "$addFields": { "range": range } },
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": {
"$sum": {
"$cond": {
"if": {
"$and": [
{ "$gte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$startDate" }, 100 ] },
{ "$month": "$startDate" }
]}
]},
{ "$lte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$endDate" }, 100 ] },
{ "$month": "$endDate" }
]}
]}
]
},
"then": 1,
"else": 0
}
}
}
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
Which produces the consistent zero fills for all possible months on all groupings:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201701,
"count" : 0
},
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
},
{
"month" : 201703,
"count" : 0
},
{
"month" : 201704,
"count" : 0
}
]
}
MapReduce
All versions of MongoDB support mapReduce, and the simple case of the "iterator" as mentioned above is handled by a for loop in the mapper. We can get output as generated up to the first $group from above by simply doing:
db.cars.mapReduce(
function () {
for ( var d = this.startDate; d <= this.endDate;
d.setUTCMonth(d.getUTCMonth()+1) )
{
var m = new Date(0);
m.setUTCFullYear(d.getUTCFullYear());
m.setUTCMonth(d.getUTCMonth());
emit({ id: this.type, date: m},1);
}
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Which produces:
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-01-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-03-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-04-01T00:00:00Z")
},
"value" : 1
}
So it does not have the second grouping to compound to arrays, but we did produce the same basic aggregated output.

MongoDB Aggregation, group by subobject keys

I have a mongo collection whose schema looks like this:
_id: ObjectId(),
segments: {
activity: 'value1',
activation: 'value2',
plan: 'value3'
}
I'm trying to use the aggregation framework to find out how many of my documents have the value1 for the segment activity for instance.
The problem is that I want to do that for every segment in the same request if possible, and that I don't know how many segments I'll have or even their name.
Basically here's what I'd like to do:
If I have these two documents:
{ _id: 1, segments: { activity: 'active', activation: 'inactive', plan: 'free' }
{ _id: 2, segments: { activity: 'inactive', activation: 'inactive', plan: 'free' }
I want to be able to see that two of them have the activation segment to inactive and the free plan, and that activity have 1 inactive and 1 active values. Here is what I want to get:
{
activity: {
active: 1,
inactive: 1
},
activation: {
inactive: 2
},
plan: {
free: 2
}
}
So basically, if you could just $group by key it would be great! Something like this:
{
$group: {
_id: { $concat: [ '$segments.$key', '-', '$segments.$key.$value' ],
count: { $sum: 1 }
}
}
Or if I could unwind on each key...
To get the counts, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the subdocuments value, something like the following:
db.collection.aggregate([
{
"$group": {
"_id": "$_id",
"activity_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "active" ] }, 1, 0 ]
}
},
"activity_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"activation_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activation", "active" ] }, 1, 0 ]
}
},
"activation_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"plan_free": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.plan", "free" ] }, 1, 0 ]
}
}
}
},
{
"$project": {
"_id": 0,
"activity": {
"active": "$activity_active",
"inactive": "$activity_inactive"
},
"activation": {
"active": "$activation_active",
"inactive": "$activation_inactive"
},
"plan": {
"free": "$plan_free"
}
}
}
])
there could be a generic solution to this problem, but might need a bit post processing:
to get output similat to this:
{
"_id" : {
"activity" : "active",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}, {
"type" : "paid",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "active"
},
"plan" : [{
"type" : "paid",
"total" : 3
}, {
"type" : "free",
"total" : 6
}
]
}
use query like that:
db.collection.aggregate([{
$group : {
_id : {
activity : "$segments.activity",
activation : "$segments.activation",
plan : "$segments.plan"
},
total : {
$sum : 1
}
}
}, {
$group : {
_id : {
activity : "$_id.activity",
activation : "$_id.activation"
},
plan : {
$push : {
type : "$_id.plan",
total : "$total"
}
}
}
},
])

MongoDB dateDiff between multiple documents

I have collection in my mongoDB which stores service given to customer along with their email address something like below
{
"_id" : ObjectId("56a84627f8fd4a136c0e944a"),
"Vehicle" : "Honda",
"ServiceSelected" : "FULL SERVICE",
"FullName" : "xyz",
"Email" : "xyz#xyz.com",
"BookingTime" : ISODate("2015-12-27T06:00:00.000Z")
},
{
"_id" : ObjectId("56a84627f8fd4a136c0e944b"),
"Vehicle" : "AUDI",
"ServiceSelected" : "FLAT TYRE",
"FullName" : "abc",
"Email" : "abc#abc.com",
"BookingTime" : ISODate("2015-12-26T06:00:00.000Z")
},
{
"_id" : ObjectId("56a84627f8fd4a136c0e944c"),
"Vehicle" : "BMW",
"ServiceSelected" : "OTHERS",
"FullName" : "def",
"Email" : "def#def.com",
"BookingTime" : ISODate("2015-12-25T06:00:00.000Z")
},
{
"_id" : ObjectId("56a84627f8fd4a136c0e944d"),
"Vehicle" : "BMW",
"ServiceSelected" : "OTHERS",
"FullName" : "def",
"Email" : "def#def.com",
"BookingTime" : ISODate("2015-12-30T06:00:00.000Z")
},
{
"_id" : ObjectId("56a84627f8fd4a136c0e944a"),
"Vehicle" : "Honda",
"ServiceSelected" : "FULL SERVICE",
"FullName" : "xyz",
"Email" : "xyz#xyz.com",
"BookingTime" : ISODate("2016-01-27T06:00:00.000Z")
}
From the above collection I want to fetch all the documents that have taken our service with a gap of at-least 30 days i.e. from the above collection "Email" : "xyz#xyz.com" should be returned but not "Email" : "def#def.com" as the second service was taken with in 5 days.
I know there is flaw in the design and an additional flag can be set while inserting the record from the application but I need to fetch the data for the existing records.
You need to use the $min and $max operators which respectively return the minimum and maximum value for "BookingTime" in your $group stage. The last stage in the pipeline is the $redact stage where you use a simple "date" math using the $divide and $subtract arithmetic operators.to return those documents where the number of days between first "service" and last "service" is greater than 30
db.collection.aggregate( [
{ "$group": {
"_id": "$Email",
"date1": { "$min": "$BookingTime" },
"date2": { "$max": "$BookingTime" }
}},
{ "$redact": {
"$cond": [
{ "$gte": [
{ "$divide": [
{ "$subtract": [ "$date2", "$date1" ] },
1000 * 60 * 60 * 24
]},
30
]},
"$$KEEP",
"$$PRUNE"
]
}}
])
Which returns:
{
"_id" : "xyz#xyz.com",
"date1" : ISODate("2015-12-27T06:00:00Z"),
"date2" : ISODate("2016-01-27T06:00:00Z")
}
Another way to do this is by using the $cond operator in a $project stage to avoid a collection scan.
db.collection.aggregate( [
{ "$group": {
"_id": "$Email",
"date1": { "$min": "$BookingTime" },
"date2": { "$max": "$BookingTime" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gte": 2 } } },
{ "$project": {
"emails": {
"$cond": [
{ "$gte": [
{ "$divide": [
{ "$subtract": [ "$date2", "$date1" ] },
1000 * 60 * 60 * 24
]},
30
] },
"$_id",
false
]
}
}},
{ "$match": { "emails": { "$ne": false } } }
])
You can get first sales date and last sales date by $min and $max:
db.services.aggregate({
$group: {
"_id" :"$Email",
lastSalesDate: { $max: "$BookingTime" },
firstSalesDate: { $min: "$BookingTime" }
}
}
)
After that you can add filter based on lastSalesDate. You can calculate ISO date which 30 days before. ex. ISODate("2015-12-28T00:00:00.000Z"). By $lt , you will get customers of 30 days before.
db.services.aggregate(
{
$group: {
"_id" :"$Email",
lastSalesDate: { $max: "$BookingTime" },
firstSalesDate: { $min: "$BookingTime" }
}
},
{
$match : {
"lastSalesDate" : { $lt: ISODate("2015-12-28T00:00:00.000Z") }
}
}
)
Results like:
{
"_id" : "abc#abc.com",
"lastSalesDate" : ISODate("2015-12-26T06:00:00.000+0000"),
"firstSalesDate" : ISODate("2015-12-26T06:00:00.000+0000")
}
This is what I used finally
db.services.aggregate(
{$group: {
"_id" :"$Email",
count:{$sum:1},
lastSalesDate: { $max: "$BookingTime" },
firstSalesDate: { $min: "$BookingTime" }
},
{$project:{
_id:1,count:1,dateDifference: { $divide:[ {$subtract: [ "$lastSalesDate", "$firstSalesDate" ]},86400000] }
}
},
{$match:{
count:{$gt:1},dateDifference:{$gt:20}
}
}
}
)
Count > 1 helped to filter the records which never repeated and datedifferentce > 20 is for days as I already converted milliseconds to days using division operation.

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}