Mongodb aggregate every two hours - mongodb

I have an aggregate query of the following form
db.mycollection.aggregate([
{
"$match":
{
"Time": { $gte: ISODate("2016-01-30T00:00:00.000+0000") }
}
},
{
"$group":
{
"_id":
{
"day": { "$dayOfYear": "$Time" },
"hour": { "$hour": "$Time" }
},
"Dishes": { "$addToSet": "$Dish" }
}
},
{
"$group":
{
"_id": "$_id.hour",
"Food":
{
"$push":
{
"Day": "$_id.day",
"NumberOfDishes": { "$size":"$Dishes" }
}
}
}
},
{
"$project":
{
"Hour": "$_id",
"Food": "$Food",
"_id" : 0
}
},
{
"$sort": { "Hour": 1 }
}
]);
Instead of doing this as above in one hour durations e.g. 0-1,1-2,2-3,3-4,4-5,...,23-24, I want to be able to do this in two hour durations. e.g. 0-2,2-4,4-6,...,22-24. Is there a way to do that?

Hint: use arithmetic aggregation operators in $project
Lets say, H=floor(hour/2), where hour is actual hour from document date. Then you can get H by applying $floor and $divide operators to this date
"H": { $floor: { $divide: [ { "$hour": "$Time" }, 2 ] } }
Here H corresponds to the pair of hours (Hours=[0,2) => H=0, Hours=[2,4) => H=1, Hours=[22,24) => H=11, etc.) and you can pass it to the $group stage with
$group: { "_id": { "day": { $dayOfYear: "$Time" }, "H": "$H" } }
Then you can output the pair of hours for specific H with
"Hours": [ { $multiply: [ "$H", 2 ] }, { $sum: [ { $multiply: [ "$H", 2 ] }, 2 ] } ]
Given collection of documents
{ "Time" : ISODate("2016-01-30T01:00:00Z"), "Dish" : "dish1" }
{ "Time" : ISODate("2016-01-30T02:00:00Z"), "Dish" : "dish2" }
{ "Time" : ISODate("2016-01-30T03:00:00Z"), "Dish" : "dish3" }
{ "Time" : ISODate("2016-01-30T04:00:00Z"), "Dish" : "dish4" }
{ "Time" : ISODate("2016-01-30T05:00:00Z"), "Dish" : "dish5" }
{ "Time" : ISODate("2016-01-30T06:00:00Z"), "Dish" : "dish6" }
{ "Time" : ISODate("2016-01-30T07:00:00Z"), "Dish" : "dish7" }
{ "Time" : ISODate("2016-01-30T08:00:00Z"), "Dish" : "dish8" }
{ "Time" : ISODate("2016-01-30T09:00:00Z"), "Dish" : "dish9" }
and using the next aggregate on it
db.mycollection.aggregate([
{
"$match":
{
"Time": { $gte: ISODate("2016-01-30T00:00:00.000+0000") }
}
},
{
"$project":
{
"Dish": 1,
"Time": 1,
"H": { $floor: { $divide: [ { $hour: "$Time" }, 2 ] } }
}
},
{
"$group":
{
"_id":
{
"day": { $dayOfYear: "$Time" },
"H": "$H"
},
"Dishes": { $addToSet: "$Dish" }
}
},
{
"$group":
{
"_id": "$_id.H",
"Food":
{
"$push":
{
"Day": "$_id.day",
"NumberOfDishes": { $size: "$Dishes" }
}
}
}
},
{
"$sort": { "_id": 1 }
},
{
"$project":
{
"Hours": [ { $multiply: [ "$_id", 2 ] }, { $sum: [ { $multiply: [ "$_id", 2 ] }, 2 ] } ],
"Food": "$Food",
"_id": 0
}
}
]);
provides the result
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 1 } ], "Hours" : [ 0, 2 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 2, 4 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 4, 6 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 6, 8 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 8, 10 ] }

Related

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

Sorting according to time in string in mongodb

How can i sort if my time(time_required) is saved in this format ?
quiz_customer_record
{
"_id" : ObjectId("5f16eb4a5007bd5395c76ed9"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:6 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:08.025Z"
},
{
"_id" : ObjectId("5f16eb5f5007bd5395c76edb"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:8 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:29.377Z"
}
I want to sort it according to time_required but its in string and is in format of Mins:Seconds. Yes its a pretty messed up. But do we have a solution? I want to use mongo query for that as there are so many records and i sort of need to use limit(for pagination). That is why it is necessary for using mongo query.
Expected Result- Sort type- descending()
{
"_id" : ObjectId("5f16eb5f5007bd5395c76edb"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:8 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:29.377Z"
},
{
"_id" : ObjectId("5f16eb4a5007bd5395c76ed9"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:6 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:08.025Z"
}
The query i'm using is
db.quiz_customer_record.aggregate([{ $match: { quiz_id:quiz_id}},
{
$sort: { attempt_date: -1 }
},
{
$group: {
_id: "$user_id",
result1: { $first: "$attempt_date" },
quiz_id: { $first: "$quiz_id" },
time_required: { $first: "$time_required" },
o_id: { $first: "$_id" }
}
},
{
$project: {
_id: "$o_id",
user_id: "$_id",
quiz_id:"$quiz_id",
time_required:"$time_required",
result1: 1
}
}
]).sort({time_required:-1})
Answer for mongo version less than 4.2
$set was added in 4.2 version. For earlier version $addFields can be used.
db.collection.aggregate([
{
"$addFields": {
"time_required_split": {
$substr: [
"$time_required",
0,
3
]
}
}
},
{
"$addFields": {
"time_required_split": {
$split: [
"$time_required_split",
":"
]
}
}
},
{
"$addFields": {
"time_seconds": {
$sum: [
{
"$multiply": [
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
0
]
}
},
60
]
},
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
1
]
}
}
]
}
}
},
{
"$sort": {
time_seconds: -1
}
},
{
"$project": {
"time_required_split": 0,
"time_seconds": 0
}
}
])
Mongo Playground
Try this query -
db.collection.aggregate([
{
"$set": {
"time_required_split": {
$substr: [
"$time_required",
0,
3
]
}
}
},
{
"$set": {
"time_required_split": {
$split: [
"$time_required_split",
":"
]
}
}
},
{
"$set": {
"time_seconds": {
$sum: [
{
"$multiply": [
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
0
]
}
},
60
]
},
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
1
]
}
}
]
}
}
},
{
"$sort": {
time_seconds: -1
}
},
{
"$project": {
"time_required_split": 0,
"time_seconds": 0
}
}
])
Mongo Playground
Let me know if don't understand any stage.

Get invoice total from a collection with arrays

I have a collection with this structure:
db.shops.insert({
"customer": "21c3",
"shopDate": new Date("2019-06-03T23:00:00Z"),
"shopId" : "Supermarket",
"items" : [
{
"productName": "Water",
"price": 3,
"quantity": 2
},
{
"productName": "Candies",
"price": 1,
"quantity": 5
}
]
});
I need a query to get the total amount of each shopId.
This is what I have done, but it is not working:
db.shops.aggregate(
[
{
$group : {
_id : $shopName,
totalSold: { $sum: { $multiply: [ "$price", "$quantity" ] } },
}
}
]
);
Here is how to do it:
db.shops.aggregate(
[
{
"$unwind": "$items"
},
{
"$group" : {
_id : "$shopId",
totalSold: { $sum: { $multiply: [ "$items.price", "$items.quantity" ] } },
}
}
]
);

How to Implement $bucket to group by multiple fields

At first bucket by age and boundaries is [0,20,30,40,50,200]
db.user.aggregate(
{$project: {_id:0, age:{$subtract:[{$year:new Date()}, {$year:"$birthDay"}]} } },
{$bucket:{
groupBy:"$age",
boundaries:[0,20,30,40,50,200]
}},
{ $project:{ _id:0,age:"$_id",count:1 } }
)
got below result
{ "count" : 5, "age" : 20 }
{ "count" : 1, "age" : 30 }
then further I want to stat every age range count of each city
{ city : "SH", age: 20, count: 2 }
{ city : "BJ", age: 20, count: 3 }
{ city : "BJ", age: 30, count: 1 }
So in this case how to implement it ?
In addition
db.user.aggregate(
{ $project: {_id:0, city:1, age:{$subtract:[{$year:new Date()}, {$year:"$birthDay"}]} } },
{ $group: { _id:"$city",ages:{$push:"$age"} } },
{ $project: {_id:0, city:"$_id",ages:1} }
)
{ "city" : "SH", "ages" : [ 26, 26 ] }
{ "city" : "BJ", "ages" : [ 27, 26, 26, 36 ] }
What you are talking about is actually implemented with $switch, within a regular $group stage:
db.user.aggregate([
{ "$group": {
"_id": {
"city": "$city",
"age": {
"$let": {
"vars": {
"age": { "$subtract" :[{ "$year": new Date() },{ "$year": "$birthDay" }] }
},
"in": {
"$switch": {
"branches": [
{ "case": { "$lt": [ "$$age", 20 ] }, "then": 0 },
{ "case": { "$lt": [ "$$age", 30 ] }, "then": 20 },
{ "case": { "$lt": [ "$$age", 40 ] }, "then": 30 },
{ "case": { "$lt": [ "$$age", 50 ] }, "then": 40 },
{ "case": { "$lt": [ "$$age", 200 ] }, "then": 50 }
]
}
}
}
}
},
"count": { "$sum": 1 }
}}
])
With the results:
{ "_id" : { "city" : "BJ", "age" : 30 }, "count" : 1 }
{ "_id" : { "city" : "BJ", "age" : 20 }, "count" : 3 }
{ "_id" : { "city" : "SH", "age" : 20 }, "count" : 2 }
The $bucket pipeline stage only takes a single field path. You can have multiple accumulators via the "output" option, but the "groupBy" is a single expression.
Note you can also use $let here in preference to a separate $project pipeline stage to calculate the "age".
N.B If you actually throw some erroneous expressions to $bucket you will get errors about $switch, which should hint to you that this is how it is implemented internally.
If you are worried about coding in the $switch then just generate it:
var ranges = [0,20,30,40,50,200];
var branches = [];
for ( var i=1; i < ranges.length; i++) {
branches.push({ "case": { "$lt": [ "$$age", ranges[i] ] }, "then": ranges[i-1] });
}
db.user.aggregate([
{ "$group": {
"_id": {
"city": "$city",
"age": {
"$let": {
"vars": {
"age": {
"$subtract": [{ "$year": new Date() },{ "$year": "$birthDay" }]
}
},
"in": {
"$switch": { "branches": branches }
}
}
}
},
"count": { "$sum": 1 }
}}
])
Supply another implementation by using Map-Reduce
db.user.mapReduce(
function(){
var age = new Date().getFullYear() - this.birthDay.getFullYear();
var ages = [0,20,30,40,50,200]
for(var i=1; i<ages.length; i++){
if(age < ages[i]){
emit({city:this.city,age:ages[i-1]},1);
break;
}
}
},
function(key, counts){
return Array.sum(counts);
},
{ out: "user_city_age_count" }
)

$push and $sum with the aggregation framework on sub-documents

I've a data as follows:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "PASS",
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z")
}
{
"_id" : ObjectId("55d4410544c96d6f6578f894"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "FAIL",
}
],
"runEndTime" : ISODate("2015-08-19T08:50:47.049Z")
}
And I was trying to get the result like this:
{
"executionProject": "Project1",
"data": [
{
"date": "2015-08-19 08:40:47",
"suitePass": 2,
"suiteFail": 1
},
{
"date": "2015-08-19 08:50:47",
"suitePass": 1,
"suiteFail": 2
}
]
}
Here I'm trying to group by executionProject and push the runEndTime and the pass and fail counts of suites to the result.
I tried this, but giving me wrong way of projection:
db.testruns.aggregate([
{
$project: {
executionProject: "$executionProject",
runEndTime: "$runEndTime",
suiteList: "$suiteList"
}
},
{
$unwind: "$suiteList"
},
{
$group: {
_id: "$executionProject",
runEndTime: {
$addToSet: "$runEndTime"
},
suite_pass: {
$sum: {
$cond: {
"if": {
$eq: ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
}
}
},
{
$group: {
_id: "$_id",
runEndTime: { $push: {runTime: "$runEndTime", suite_pass: "$suite_pass"} }
}
},
{
$project: {
executionProject: "$_id",
runEndTime: "$runEndTime",
_id: 0
}
}
]);
First you need to group by the document to get the suite totals, then you add to the array as you group on the project. Also don't forget to "sort" if you want things in order:
[
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"executionProject": { "$first": "$executionProject" },
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"date": { "$first": "$runEndTime" }
}},
{ "$sort": { "executionProject": 1, "date": 1 } },
{ "$group": {
"_id": "$executionProject",
"data": {
"$push": {
"suite-pass": "$suite-pass",
"suite-fail": "$suite-fail",
"date": "$date"
}
}
}}
]
Produces:
{
"_id" : "Project1",
"data" : [
{
"suite-pass" : 2,
"suite-fail" : 1,
"date" : ISODate("2015-08-19T08:40:47.049Z")
},
{
"suite-pass" : 1,
"suite-fail" : 2,
"date" : ISODate("2015-08-19T08:50:47.049Z")
}
]
}