How to group MongoDB aggregation [duplicate] - mongodb

Who knows a better solution to group Orders by date and sum total and count by source. Of course I can group by Source and then I get only totals for this source only, I can alter the result thereafter to get the desired result. But I would like to know if it is possible in one simple $group statement.
Eg. ordersByApp = 1, ordersByWEB = 2
Orders collection
{
_id: 'XCUZO0',
date: "2020-02-01T00:00:03.243Z"
total: 9.99,
source: 'APP'
},
{
_id: 'XCUZO1',
date: "2020-01-05T00:00:03.243Z"
total: 9.99,
source: 'WEB'
},
{
_id: 'XCUZO2',
date: "2020-01-02T00:00:03.243Z"
total: 9.99,
source: 'WEB'
}
My current aggregation
Order.aggregate([
{
$group: {
_id: {
month: { $month: "$date",
year: { $year: "$date" }
},
total: {
$sum: "$total"
}
}
}
])
Current result
[
{
_id: { month: 01, year: 2020 },
total: 19.98
},
{
_id: { month: 02, year: 2020 },
total: 9.99
}
]
Desired result, How can I achieve the below?
[
{
_id: { month: 01, year: 2020 },
total: 19.98,
countByApp: 1, <---
countByWEB: 0, <---
},
{
_id: { month: 02, year: 2020 },
total: 9.99,
countByWEB: 2, <---
countByAPP: 0 <---
}
]

You can use $cond like below:
Order.aggregate([
{
$group: {
_id: {
month: { $month: "$date" },
year: { $year: "$date" }
},
total: { $sum: "$total" },
countByApp: { $sum: { $cond: [ {$eq: [ "$source", "APP" ]} , 1, 0] } },
countByWeb: { $sum: { $cond: [ {$eq: [ "$source", "WEB" ]} , 1, 0] } },
}
}
])
Mongo Playground

Related

How to calculate average records per month?

My records like this [{ createdAt }, {createdAt}, {createdAt} ]
I need average records per month.
january => 3 records
february => 2 records etc..
You can try to $group by month and year when counting and by month when averaging:
db.collection.aggregate([
{
$group: {
_id: {
month: {
$month: "$createdAt"
},
year: {
$year: "$createdAt"
},
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month: "$_id.month"
},
average: {
$avg: "$count"
}
}
},
{
$project: {
_id: 0,
month: "$_id.month",
average: 1
}
}
])
Link to playground
Not fully clear what you mean by "average records per month" but I think it would be this:
db.collection.aggregate([
{
$group: {
_id: {
$dateTrunc: {
date: "$createdAt",
unit: "month"
}
},
count: { $count: {} }
}
},
{
$group: {
_id: null,
data: { $push: { k: { $toString: { $month: "$_id" } }, v: "$count" } }
}
},
{ $replaceWith: { $arrayToObject: "$data" } }
])
Getting the month name is not so easy, either you use a external library or build your own with $switch

Mongo Aggregate: Group and Sort Token error

I'm struggling with something which is most likely very stupid on my behalf.
I have a data set in a mongo collection and I wanted to get all the sum of documents grouped by year and month (which I can do fine). However I then want to have those results ordered by year and month.
Here is my query for getting the results with just the sum:
db.xxxx.aggregate([
{
$group: {
_id: { year:
{ $year: "$createdDate" },
month: { $month: "$createdDate" }
},
total_users: { $sum: 1 }
},
}
])
Which results in this:
[
{ _id: { year: 2021, month: 12 }, total_users: 191 },
{ _id: { year: 2021, month: 6 }, total_users: 51 },
{ _id: { year: 2022, month: 3 }, total_users: 165 },
{ _id: { year: 2022, month: 8 }, total_users: 61 },
{ _id: { year: 2021, month: 8 }, total_users: 182 },
{ _id: { year: 2021, month: 11 }, total_users: 81 },
{ _id: { year: 2022, month: 4 }, total_users: 155 },
{ _id: { year: 2022, month: 7 }, total_users: 10 },
{ _id: { year: 2022, month: 5 }, total_users: 31 },
{ _id: { year: 2022, month: 9 }, total_users: 23 },
{ _id: { year: 2021, month: 7 }, total_users: 48 },
{ _id: { year: 2021, month: 10 }, total_users: 75 },
{ _id: { year: 2021, month: 5 }, total_users: 11 },
{ _id: { year: 2022, month: 2 }, total_users: 300 },
{ _id: { year: 2021, month: 9 }, total_users: 131 },
{ _id: { year: 2022, month: 1 }, total_users: 172 },
{ _id: { year: 2022, month: 6 }, total_users: 65 }
]
Now once I try and sort by month and year I get a token error:
db.xxxx.aggregate([
{
$group: {
_id: { year:
{ $year: "$createdDate" },
month: { $month: "$createdDate" }
},
total_users: { $sum: 1 }
},
{$sort: {year:1}},
{$sort: {month:1}}
}
])
Resulting error:
Uncaught:
SyntaxError: Unexpected token (10:4)
8 | total_users: { $sum: 1 }
9 | },
> 10 | {$sort: {year:1}},
| ^
11 |
Atlas [primary] collection> {$sort: {month:1}}
1
Atlas [primary] collection> }
Uncaught:
SyntaxError: Unexpected token (1:0)
> 1 | }
| ^
2 |
Any help would be very welcome!!
Thanks
EDIT
Here's the query now the suggested sort query:
db.xxx.aggregate([
{
$group: {
_id: { year:
{ $year: "$createdDate" },
month: { $month: "$createdDate" }
},
total_users: { $sum: 1 }
},
{
$sort: {
"_id.year": 1,
"_id.month": 1
}
}
}
])
Note I'm still getting the same error
You want to use:
{$sort: {"_id.year": 1, "_id.month": 1}}
See how it works on the playground example
EDIT:
You have misplayed the }. The $group is missing one at the end and the $sort have extra one. It should be:
db.collection.aggregate([
{$group: {
_id: {year: {$year: "$createdDate"}, month: {$month: "$createdDate"}},
total_users: {$sum: 1}
}
},
{$sort: {"_id.year": 1, "_id.month": 1}}
])

MongoDB - get datewise/houlty aggregate count of column

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference
[
{
"_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
"status":"INTERNAL_UPDATE",
"conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
"messageIds":[],
"messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
"responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-04T11:40:06.552Z",
"source":{}
},
{
"_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
"status":"NEW",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
"messageIds":[],
"messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
"responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:30.897Z",
"source":{}
},
{
"_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
"status":"INTERNAL_UPDATE",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
"messageIds":[],
"messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
"responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:31.418Z",
"source":{}
},
{
"_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
"status":"NEW",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
"responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.174Z",
"source":{}
},
{
"_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
"status":"INTERNAL_UPDATE",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
"responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.736Z",
"source":{}
},
{
"_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
"status":"NEW",
"conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
"messageIds":[],
"messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
"responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T13:37:55.060Z",
"source":{}
}
]
Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.
Below is the query for same
db.documentName.aggregate([
{
'$match': {
'$and': [
{
timestamp: {
'$gte': ISODate('2021-05-01T00:00:00.000Z'),
'$lte': ISODate('2021-05-10T23:59:59.999Z')
}
},
{ 'source.author': { '$regex': 'user', '$options': 'i' } },
{},
{}
]
}
},
{ '$group': {
_id: {'conversationId': '$conversationId'} },
{ '$count': 'document_count' }
])
I have tried adding something like, $hour: '$timestamp' with comma separation beside conversationId in $group, but its of no use and is giving error.
The desired result I am trying to get for above data is, something like this
[{"date": "2021-05-04", "doc_count": 1},
{"date": "2021-05-05", "doc_count": 2},
{"date": "2021-05-06", "doc_count": 2}]
As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.
The question is not entirely clear to me, but it sounds like you want something like this:
The groupId is a field to rebuild the date including the hour, or not, according to your condition:
EDIT:
db.collection.aggregate([
{$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {$year: "$timestamp"},
month: {$month: "$timestamp"},
day: {$dayOfMonth: "$timestamp"},
hour: {$cond: [
{$gte: [
{$dateDiff: {
startDate: ISODate("2021-05-01T00:00:00.000Z"),
endDate: ISODate("2021-05-07T23:59:59.999Z"),
unit: "day"}}, 1]},
0,
{$hour: "$timestamp"}]}
}
}
}
},
{$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
{$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
{$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
])
See how it works on the playground example
As suggested by #nimrodserok, for mongo version 4.2.9 the query would be
db.collection.aggregate([
{
$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{
$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {
$year: "$timestamp"
},
month: {
$month: "$timestamp"
},
day: {
$dayOfMonth: "$timestamp"
},
hour: {
$cond: [
{
$gte: [
{
$subtract: [
{
$toLong: ISODate("2021-05-07T23:59:59.999Z")
},
{
$toLong: ISODate("2021-05-01T00:00:00.000Z")
}
]
},
86400000
]
},
0,
{
$hour: "$timestamp"
}
]
}
}
}
}
},
{
$group: {
_id: {
conversationId: "$conversationId",
groupId: "$groupId"
}
}
},
{
$group: {
_id: "$_id.groupId",
doc_count: {
$sum: 1
}
}
},
{
$project: {
date: {
$toString: "$_id"
},
doc_count: 1,
_id: 0
}
}
])

Mongodb aggregate $group and count for date ranges

I have documents like these:
{
"_id" : ObjectId("5cc80389c723e046f504b5a9"),
"adddress" : "string",
"checkIn" : "2019-04-30T08:12:57.909Z"
},
{
"_id" : ObjectId("5cc995f5a6f3eb7c483b019f"),
"adddress" : "string",
"checkIn" : "2019-05-01T12:49:57.561Z"
}
I have tried aggrgation like this:
var start = new Date("2019-04-30T08:12:57.909Z");
var end = new Date("2019-05-01T12:49:57.561Z");
var pipeline = [
{
$match: {
checkIn: {
$gte: start,
$lte: end
}
}
},
{
$group: {
_id: {
year: {
$year: "$checkIn"
},
month: {
$month: "$checkIn"
},
day: {
$dayOfYear: "$checkIn"
}
},
count: {
$sum: 1
}
}
}];
db.collections.aggregate(pipeline).toArray()
Is it possible to count them by checkIn date and get result like this:
"_id": [{
"checkIn": "2019-03-15T00:00:00Z",
"count": 4
}, {
"checkIn": "2019-04-30T00:00:00Z",
"count": 1
}, {
"checkIn": "2019-05-10T00:00:00Z",
"count": 1
}],
The result is shown the total number of the day.
{$project: {
checkIn: { $dateToString: { format: '%Y-%m-%d', date: '$checkIn' } }
}},
{$group: {
_id: '$checkIn',
checkIn: {$first: '$checkIn'},
count: {$sum: 1}
}},
{$sort: {checkIn: 1}}
Try this: I have tested this query and its working.
db.sample.aggregate([{
$addFields: {
date: {
$dateFromString: {
dateString: "$checkIn"
}
}
}
},{
$match: {
date: {
$gte: start,
$lte: end
}
}
},
{
$addFields: {
dateString: {
$dateToString: {
format: "%Y-%m-%d",
date: "$date"
}
}
}
},
{
$group: {
_id: "$dateString",
count: {
$sum: 1
}
}
}
]);

How to group date quarterly wise?

I have documents which contain a date and I'm wondering how to group them according to quarterly basis?
My schema is:
var ekgsanswermodel = new mongoose.Schema({
userId: {type: Schema.Types.ObjectId},
topicId : {type: Schema.Types.ObjectId},
ekgId : {type: Schema.Types.ObjectId},
answerSubmitted :{type: Number},
dateAttempted : { type: Date},
title : {type: String},
submissionSessionId : {type: String}
});
1st quarter contains months 1, 2, 3. 2nd quarter contains months 4, 5, 6 and so on up-to 4th quarter.
My final result should be:
"result" : [
{
_id: {
quater:
},
_id: {
quater:
},
_id: {
quater:
},
_id: {
quater:
}
}
You could make use of the $cond operator to check if:
The $month is <= 3, project a field named quarter with
value as "one".
The $month is <= 6, project a field named quarter with
value as "two".
The $month is <= 9, project a field named quarter with
value as "three".
else the value of the field quarter would be "fourth".
Then $group by the quarter field.
Code:
db.collection.aggregate([
{
$project: {
date: 1,
quarter: {
$cond: [
{ $lte: [{ $month: "$date" }, 3] },
"first",
{
$cond: [
{ $lte: [{ $month: "$date" }, 6] },
"second",
{
$cond: [{ $lte: [{ $month: "$date" }, 9] }, "third", "fourth"],
},
],
},
],
},
},
},
{ $group: { _id: { quarter: "$quarter" }, results: { $push: "$date" } } },
]);
Specific to your schema:
db.collection.aggregate([
{
$project: {
dateAttempted: 1,
userId: 1,
topicId: 1,
ekgId: 1,
title: 1,
quarter: {
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 3] },
"first",
{
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 6] },
"second",
{
$cond: [
{ $lte: [{ $month: "$dateAttempted" }, 9] },
"third",
"fourth",
],
},
],
},
],
},
},
},
{ $group: { _id: { quarter: "$quarter" }, results: { $push: "$$ROOT" } } },
]);
You could use following to group documents quarterly.
{
$project : {
dateAttempted : 1,
dateQuarter: {
$trunc : {$add: [{$divide: [{$subtract: [{$month:
"$dateAttempted"}, 1]}, 3]}, 1]}
}
}
}
Starting in Mongo 5, it's a perfect use case for the new $dateTrunc aggregation operator:
// { date: ISODate("2012-10-11") }
// { date: ISODate("2013-02-27") }
// { date: ISODate("2013-01-12") }
// { date: ISODate("2013-03-11") }
// { date: ISODate("2013-07-14") }
db.collection.aggregate([
{ $group: {
_id: { $dateTrunc: { date: "$date", unit: "quarter" } },
total: { $count: {} }
}}
])
// { _id: ISODate("2012-10-01"), total: 1 }
// { _id: ISODate("2013-01-01"), total: 3 }
// { _id: ISODate("2013-07-01"), total: 1 }
$dateTrunc truncates your dates at the beginning of their quarter (the truncation unit). It's kind of a modulo on dates per quarter.
Quarters in the output will be defined by their first day (Q3 2013 will be 2013-07-01). And you can always adapt it using $dateToString projection for instance.