I have documents which contain a date and I'm wondering how to group them according to quarterly basis?
My schema is:
var ekgsanswermodel = new mongoose.Schema({
userId: {type: Schema.Types.ObjectId},
topicId : {type: Schema.Types.ObjectId},
ekgId : {type: Schema.Types.ObjectId},
answerSubmitted :{type: Number},
dateAttempted : { type: Date},
title : {type: String},
submissionSessionId : {type: String}
});
1st quarter contains months 1, 2, 3. 2nd quarter contains months 4, 5, 6 and so on up-to 4th quarter.
My final result should be:
"result" : [
{
_id: {
quater:
},
_id: {
quater:
},
_id: {
quater:
},
_id: {
quater:
}
}
You could make use of the $cond operator to check if:
The $month is <= 3, project a field named quarter with
value as "one".
The $month is <= 6, project a field named quarter with
value as "two".
The $month is <= 9, project a field named quarter with
value as "three".
else the value of the field quarter would be "fourth".
Then $group by the quarter field.
Code:
db.collection.aggregate([
{
$project: {
date: 1,
quarter: {
$cond: [
{ $lte: [{ $month: "$date" }, 3] },
"first",
{
$cond: [
{ $lte: [{ $month: "$date" }, 6] },
"second",
{
$cond: [{ $lte: [{ $month: "$date" }, 9] }, "third", "fourth"],
},
],
},
],
},
},
},
{ $group: { _id: { quarter: "$quarter" }, results: { $push: "$date" } } },
]);
Specific to your schema:
db.collection.aggregate([
{
$project: {
dateAttempted: 1,
userId: 1,
topicId: 1,
ekgId: 1,
title: 1,
quarter: {
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 3] },
"first",
{
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 6] },
"second",
{
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 9] },
"third",
"fourth",
],
},
],
},
],
},
},
},
{ $group: { _id: { quarter: "$quarter" }, results: { $push: "$$ROOT" } } },
]);
You could use following to group documents quarterly.
{
$project : {
dateAttempted : 1,
dateQuarter: {
$trunc : {$add: [{$divide: [{$subtract: [{$month:
"$dateAttempted"}, 1]}, 3]}, 1]}
}
}
}
Starting in Mongo 5, it's a perfect use case for the new $dateTrunc aggregation operator:
// { date: ISODate("2012-10-11") }
// { date: ISODate("2013-02-27") }
// { date: ISODate("2013-01-12") }
// { date: ISODate("2013-03-11") }
// { date: ISODate("2013-07-14") }
db.collection.aggregate([
{ $group: {
_id: { $dateTrunc: { date: "$date", unit: "quarter" } },
total: { $count: {} }
}}
])
// { _id: ISODate("2012-10-01"), total: 1 }
// { _id: ISODate("2013-01-01"), total: 3 }
// { _id: ISODate("2013-07-01"), total: 1 }
$dateTrunc truncates your dates at the beginning of their quarter (the truncation unit). It's kind of a modulo on dates per quarter.
Quarters in the output will be defined by their first day (Q3 2013 will be 2013-07-01). And you can always adapt it using $dateToString projection for instance.
Related
I am facing a problem in MongoDB. Suppose, I have the following collection.
{ id: 1, issueDate: "07/05/2021", code: "31" },
{ id: 2, issueDate: "12/11/2020", code: "14" },
{ id: 3, issueDate: "02/11/2021", code: "98" },
{ id: 4, issueDate: "01/02/2021", code: "14" },
{ id: 5, issueDate: "06/23/2020", code: "14" },
{ id: 6, issueDate: "07/01/2020", code: "31" },
{ id: 7, issueDate: "07/05/2022", code: "14" },
{ id: 8, issueDate: "07/02/2022", code: "20" },
{ id: 9, issueDate: "07/02/2022", code: "14" }
The date field is in the format MM/DD/YYYY. My goal is to get the count of items with each season (spring (March-May), summer (June-August), autumn (September-November) and winter (December-February).
The result I'm expecting is:
count of fields for each season:
{ "_id" : "Summer", "count" : 6 }
{ "_id" : "Winter", "count" : 3 }
top 2 codes (first and second most recurring) per season:
{ "_id" : "Summer", "codes" : {14, 31} }
{ "_id" : "Winter", "codes" : {14, 98} }
How can this be done?
You should never store date/time values as string, store always proper Date objects.
You can use $setWindowFields opedrator for that:
db.collection.aggregate([
// Convert string into Date
{ $set: { issueDate: { $dateFromString: { dateString: "$issueDate", format: "%m/%d/%Y" } } } },
// Determine the season (0..3)
{
$set: {
season: { $mod: [{ $toInt: { $divide: [{ $add: [{ $subtract: [{ $month: "$issueDate" }, 1] }, 1] }, 3] } }, 4] }
}
},
// Count codes per season
{
$group: {
_id: { season: "$season", code: "$code" },
count: { $count: {} },
}
},
// Rank occurrence of codes per season
{
$setWindowFields: {
partitionBy: "$_id.season",
sortBy: { count: -1 },
output: {
rank: { $denseRank: {} },
count: { $sum: "$count" }
}
}
},
// Get only top 2 ranks
{ $match: { rank: { $lte: 2 } } },
// Final grouping
{
$group: {
_id: "$_id.season",
count: { $first: "$count" },
codes: { $push: "$_id.code" }
}
},
// Some cosmetic for output
{
$set: {
season: {
$switch: {
branches: [
{ case: { $eq: ["$_id", 0] }, then: 'Winter' },
{ case: { $eq: ["$_id", 1] }, then: 'Spring' },
{ case: { $eq: ["$_id", 2] }, then: 'Summer' },
{ case: { $eq: ["$_id", 3] }, then: 'Autumn' },
]
}
}
}
}
])
Mongo Playground
I will give you clues,
You need to use $group with _id as $month on issueDate, use accumulator $sum to get month wise count.
You can divide month by 3, to get modulo, using $toInt, $divide, then put them into category using $cond.
Another option:
db.collection.aggregate([
{
$addFields: {
"season": {
$switch: {
branches: [
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"06",
"07",
"08"
]
]
},
then: "Summer"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"03",
"04",
"05"
]
]
},
then: "Spring"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"12",
"01",
"02"
]
]
},
then: "Winter"
}
],
default: "No date found."
}
}
}
},
{
$group: {
_id: {
s: "$season",
c: "$code"
},
cnt1: {
$sum: 1
}
}
},
{
$sort: {
cnt1: -1
}
},
{
$group: {
_id: "$_id.s",
codes: {
$push: "$_id.c"
},
cnt: {
$sum: "$cnt1"
}
}
},
{
$project: {
_id: 0,
season: "$_id",
count: "$cnt",
codes: {
"$slice": [
"$codes",
2
]
}
}
}
])
Explained:
Add one more field for season based on $switch per month(extracted from issueDate string)
Group to collect per season/code.
$sort per code DESCENDING
group per season to form an array with most recurring codes in descending order.
Project the fields to the desired output and $slice the codes to limit only to the fist two most recurring.
Comment:
Indeed keeping dates in string is not a good idea in general ...
Playground
I have a pipeline that gives me the result according to the players belonging to a certain company in a daily, weekly, and monthly manner. I have a date filter in the dashboard which gives an iso date range. I want to limit or range my results according to the date filter? is there any way to do it in the pipeline?
if (chartType === 'Daily') {
statsPipelineCondition = {
_id: { day: { $dayOfMonth: '$createdAt' }, month: { $month: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.month': 1, '_id.day': 1 };
} else if (chartType === 'Monthly') {
statsPipelineCondition = {
_id: { month: { $month: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.month': 1 };
} else {
statsPipelineCondition = {
_id: { week: { $week: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.week': 1 };
}
const statsPipeline = [
{
$group: {
...statsPipelineCondition,
TOTAL: { $sum: 1 },
XR: { $sum: { $cond: [{ $in: ['$company', ['XR', 'CR', 'DX']] }, 1, 0] } },
CT: { $sum: { $cond: [{ $eq: ['$company', 'CT'] }, 1, 0] } },
MR: { $sum: { $cond: [{ $eq: ['$company', 'MR'] }, 1, 0] } },
MG: { $sum: { $cond: [{ $in: ['$company', ['NM', 'MM', 'MG']] }, 1, 0] } },
},
},
{
$sort: {
...sortCondition,
},
},
];
Date filter:
datefilter - { '$gte': '2020-09-01T04:49:50.899Z',
'$lte': '2020-11-03T04:49:50.899Z' }
You need to add a $match stage prior to the $group stage to filter our based on the range:
let datefilter = {
'$gte': new Date('2020-09-01T04:49:50.899Z'),
'$lte': new Date('2020-11-03T04:49:50.899Z')
};
const statsPipeline = [
{
$match: {
createdAt: datefilter
}
},
{
$group: {
...statsPipelineCondition,
TOTAL: { $sum: 1 },
XR: { $sum: { $cond: [{ $in: ['$company', ['XR', 'CR', 'DX']] }, 1, 0] } },
CT: { $sum: { $cond: [{ $eq: ['$company', 'CT'] }, 1, 0] } },
MR: { $sum: { $cond: [{ $eq: ['$company', 'MR'] }, 1, 0] } },
MG: { $sum: { $cond: [{ $in: ['$company', ['NM', 'MM', 'MG']] }, 1, 0] } },
},
},
{
$sort: {
...sortCondition,
},
},
];
Okay, So for some reason. MongoDB doesn't like dates in strings. It would be much better if we just convert the strings by an operator provided by the aggregation framework and this will make the things work.
{
$match: {
$expr: {
$and: [
{
$gte: [
'$createdAt',
{
$dateFromString: {
dateString: dateFilter.$gte,
},
},
],
},
{
$lte: [
'$createdAt',
{
$dateFromString: {
dateString: dateFilter.$lte,
},
},
],
},
],
},
},
},
Who knows a better solution to group Orders by date and sum total and count by source. Of course I can group by Source and then I get only totals for this source only, I can alter the result thereafter to get the desired result. But I would like to know if it is possible in one simple $group statement.
Eg. ordersByApp = 1, ordersByWEB = 2
Orders collection
{
_id: 'XCUZO0',
date: "2020-02-01T00:00:03.243Z"
total: 9.99,
source: 'APP'
},
{
_id: 'XCUZO1',
date: "2020-01-05T00:00:03.243Z"
total: 9.99,
source: 'WEB'
},
{
_id: 'XCUZO2',
date: "2020-01-02T00:00:03.243Z"
total: 9.99,
source: 'WEB'
}
My current aggregation
Order.aggregate([
{
$group: {
_id: {
month: { $month: "$date",
year: { $year: "$date" }
},
total: {
$sum: "$total"
}
}
}
])
Current result
[
{
_id: { month: 01, year: 2020 },
total: 19.98
},
{
_id: { month: 02, year: 2020 },
total: 9.99
}
]
Desired result, How can I achieve the below?
[
{
_id: { month: 01, year: 2020 },
total: 19.98,
countByApp: 1, <---
countByWEB: 0, <---
},
{
_id: { month: 02, year: 2020 },
total: 9.99,
countByWEB: 2, <---
countByAPP: 0 <---
}
]
You can use $cond like below:
Order.aggregate([
{
$group: {
_id: {
month: { $month: "$date" },
year: { $year: "$date" }
},
total: { $sum: "$total" },
countByApp: { $sum: { $cond: [ {$eq: [ "$source", "APP" ]} , 1, 0] } },
countByWeb: { $sum: { $cond: [ {$eq: [ "$source", "WEB" ]} , 1, 0] } },
}
}
])
Mongo Playground
I am trying to perform a MongoDB 3.6 aggregation and I can't figure out the right way.
The problem is following. After performing several aggregation steps I end up with result set like this:
[
{ _id: { month: 1, type: 'estimate' }, value: 50 },
{ _id: { month: 2, type: 'estimate' }, value: 40 },
{ _id: { month: 3, type: 'estimate' }, value: 35 },
{ _id: { month: 3, type: 'exact' }, value: 33.532 },
{ _id: { month: 4, type: 'estimate' }, value: 10 },
{ _id: { month: 4, type: 'exact' }, value: 11.244 },
]
It contains values grouped by month. Value for every month can be 'estimated' or 'exact'. Now I would like to reduce this result to achieve this:
[
{ _id: { month: 1 }, value: 50 },
{ _id: { month: 2 }, value: 40 },
{ _id: { month: 3 }, value: 33.532 },
{ _id: { month: 4 }, value: 11.244 },
]
Basically I want to use the value of type 'exact' whenever it's possible and only fallback to 'estimate' value in months where the 'exact' is not known.
Any help or tip will be greatly appreciated. I would like to perform that aggregation in the DB not on server.
You can simply $sort by type and then take use $first in next $group stage which will give you exact if exists and estimate otherwise. Try:
db.col.aggregate([
{
$sort: { "_id.type": -1 }
},
{
$group:{
_id: "$_id.month",
value: { $first: "$value" }
}
},
{
$sort: { _id: 1 }
}
])
Prints:
{ "_id" : 1, "value" : 50 }
{ "_id" : 2, "value" : 40 }
{ "_id" : 3, "value" : 33.532 }
{ "_id" : 4, "value" : 11.244 }
So sorting by type is considered as prioritizing here since we know that lexically exact will be before estimate. You can also be more explicit and add extra field called weight (evaluated using $cond) operator and then sort by that weight:
db.col.aggregate([
{
$addFields: {
weight: { $cond: [ { $eq: [ "$_id.type", "exact" ] }, 2, 1 ] }
}
},
{
$sort: { "weight": -1 }
},
{
$group:{
_id: "$_id.month",
value: { $first: "$value" }
}
},
{
$sort: { _id: 1 }
}
])
I'm trying to get my head around an aggregate pipeline in MongoDb with multiple groups.
I have the following data: https://gist.github.com/bomortensen/36e6b3fbc987a096be36a66bbfe30d82
Expected data would be: https://gist.github.com/bomortensen/7b220df1f1da83be838acfb2ed79a2ee (total quantity sum based on highest version, hourly)
I need to write a query which does the following:
Group the data by the field MeterId to get unique meter groups.
In each group I then need to group by the StartDate's year, month, day and hour since all objects StartDate is stored as quarters, but I need to aggregate them into whole hours.
Finally, I need to only get the highest version from the Versions array by VersionNumber
I've tried the following query, but must admit I'm stuck:
mycollection.aggregate([
{ $group: {
_id : { ediel: "$_id.MeterId", start: "$_id.StartDate" },
versions: { $push: "$Versions" }
}
},
{ $unwind: { path: "$versions" } },
{ $group: {
_id: {
hour: { $hour: "$_id.start.DateTime" },
key: "$_id"
},
quantitySum: { $sum: "$Versions.Quantity" }
}
},
{ $sort: { "_id.hour": -1 } }
]);
Does anyone know how I should do this? :-)
This would give :
1 $project : get $hour from date, create a maxVersion field per record
1 $unwind to remove the Versions array
1 $project to add a keep field that will contain a boolean to check if the record should be kept or not
1 $match that match only higher version number eg keep == true
1 $group that group by id/hour and sum the quantity
1 $project to set up your required format
Query is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$_id._id", StartDate: "$StartDate" },
hour: "$_id.hour",
QuantitySum: 1
}
}])
In your example output you take into account only the first higher versionNumber, You have { "VersionNumber" : 2, "Quantity" : 7.5 } and { "VersionNumber" : 2, "Quantity" : 8.4 } for hour 2 and id 1234 but you only take { "VersionNumber" : 2, "Quantity" : 7.5 }
I dont know if this is intended or not but in this case you want to take only the first MaxVersion number. After the $match, I added :
1 $group that push versions previously filter in an array
1 $project that $slice this array to take only the first element
1 $unwind to remove this array (which contains only one elemement)
The query that match your output is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", StartDate: "$_id.StartDate" },
Versions: { $push: "$Versions" },
hour: { "$first": "$hour" }
}
}, {
$project: {
_id: 1,
hour: 1,
Versions: { $slice: ["$Versions", 1] }
}
}, {
$unwind: "$Versions"
}, {
$sort: {
_id: 1
}
}, {
$group: {
_id: { _id: "$_id._id", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$MeterId._id", StartDate: "$StartDate" },
Hour: "$_id.hour",
QuantitySum: 1
}
}])
Output is :
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
Sorry, I just dont find a straight forward way to round the hour. You can try the following. You will unwind the versions, so you can apply the grouping to collect the max version, push the versions for the next step, which is to project to filter the matching records with max version and final project to sum the max versions quantity. Right now start dt is the min from the group. You should be fine as long as you have versions at the top of the hour.
db.collection.aggregate([{
$unwind: {
path: "$Versions"
}
}, {
$group: {
_id: {
MeterId: "$_id.MeterId",
start: {
$hour: "$_id.StartDate"
}
},
startDate: {
$min: "$_id.StartDate"
},
maxVersion: {
$max: "$Versions.VersionNumber"
},
Versions: {
$push: "$Versions"
}
}
}, {
$sort: {
"_id.start": -1
}
}, {
$project: {
_id: {
MeterId: "$_id.MeterId",
StartDate: "$startDate"
},
hour: "$_id.start",
Versions: {
$filter: {
input: "$Versions",
as: "version",
cond: {
$eq: ["$maxVersion", "$$version.VersionNumber"]
}
}
}
}
}, {
$project: {
_id: 1,
hour: 1,
QuantitySum: {
$sum: "$Versions.Quantity"
}
}
}]);
Sample Output
{
"_id": {
"MeterId": "1234",
"StartDate": ISODate("2016-09-20T02:00:00Z")
},
"QuantitySum": 15,
"hour": 2
}