Aggregating Mongo collection by year and then by month - mongodb

I have a Mongo collection that looks like this with a bunch of months, days, years:
[
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 999,
"Business": "Business 1",
},
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 5.99,
"Business": "Business 2",
},
{
"Date": ISODate("2021-07-17T21:41:56.000Z"),
"Amount": 20000,
"Business": "Business 2",
},
{
"Date": ISODate("2021-06-17T21:41:56.000Z"),
"Amount": 200,
"Business": "Business 5",
}
]
I have done an aggregation like this
db.collection.aggregate({
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
})
...which partially gives me what I want which is a sum of amounts per year and month.
[
{
"_id": {
"month": 6,
"year": 2021
},
"sum": 200
},
{
"_id": {
"month": 7,
"year": 2021
},
"sum": 20000
},
{
"_id": {
"month": 8,
"year": 2021
},
"sum": 1004.99
}
]
What I would like however is to have something like the below where the year is at the top and the months are aggregated in a sum so that it's easier to iterate in the frontend but I have not been able to get it no matter what I have tried:
[
{
"year": 2021,
"sumAmount": 21204.99,
"months": [
{
"month": 7,
"amount": 20000
},
{
"month": 6,
"amount": 200
},
{
"month": 8,
"amount": 1004.99
}
]
},
{ "year" : 2020,
....
}
]
I have been pretty close in using another $group and $push but I have not been able to get what in my mind is a second group by month. Any help will be appreciated!

You just need one more $group to get your expected result. For another sorting, you can put an $sort after the $group stage. You will need to use $push to keep the ordering in the final array.
db.collection.aggregate([
{
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
},
{
"$sort": {
"_id.year": 1,
"_id.month": 1
}
},
{
"$group": {
"_id": "$_id.year",
"sumAmount": {
$sum: "$sum"
},
"months": {
"$push": {
"month": "$_id.month",
"amount": "$sum"
}
}
}
}
])
Here is the Mongo playground for your reference.

Related

Mongodb pipeline on parse server document add pointer field with $lookup

To be honest I really know sql but I'm kind of new to mongodb noSql so I'm a bit lost.
I have made a pipeline that's just working fine.
The point was to group by day and mindmapId to count number of user viewed it and sum watching time and save it into a collection in order to make request on it after.
here's sample of data
MindMap
{
"_id": "Yg5uGI3Iy0",
"data": {
"id": "root",
"topic": "Main topic",
"expanded": true
},
"theme": "orange",
"_p_author": "_User$zqPzSKD7EM",
"_created_at": {
"$date": {
"$numberLong": "1658497264836"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1661334292749"
}
}
}
MindmapView
{
"_id": "qWR6HVIcvT",
"startViewDate": {
"$date": {
"$numberLong": "1658669095261"
}
},
"_p_user": "_User$VnrxG9gABO",
"_p_mindmap": "MindMap$Yg5uGI3Iy0",
"_created_at": {
"$date": {
"$numberLong": "1658669095274"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1658669095274"
}
}
}
Pipeline
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
}
}
}]
pipeline results
[{
"_id": {
"day": "2022-08-01",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 21
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 13
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 3
},{
"_id": {
"day": "2022-08-01",
"mindmapId": "YXa8omyChc"
},
"total": 2,
"watchTime": 1306837
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60
},{
"_id": {
"day": "2022-08-06",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 0
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 69
},{
"_id": {
"day": "2022-08-10",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 4
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "Yg5uGI3Iy0"
},
"total": 1,
"watchTime": 9
},
...
]
However to exploit this data faster I need to include the mindmap author inside the result collection.
The point is to group by day and mindmapId to count number of user viewed it and sum watching time and get the mindmap author and save it into a collection.
To do that I need to use $lookup but the result is kind of messy and the lookup act like a full join in sql. I've tried so much combination before this post.
Here's what I have mainly tried
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$lookup: {
from: 'MindMap',
localField: '_objectId',
foreignField: '_id.mindmapId',
as: 'tempMindmapPointer'
}
}, {
$unwind: '$tempMindmapPointer'
}, {
$match: {
'tempMindmapPointer._id': '_id.mindmapId'
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
},
author: {
$substr: [
'$tempMindmapPointer._p_author',
6,
-1
]
}
}
}]
the $match doesn't work here it make me have no results
If I remove $match it act like a full join user list with mindmap id list which I don't want
[{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "VnrxG9gABO"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "x6kNvG2O0X"
},...
]
I have tried to switch localField: '_objectId' foreignField:'_id.mindmapId' values.
I have also tried to make the lookup first and group by id{day,mindmapId,authorId} but I have never been able to make this compiling.
What could I do to make this request working ? I'm sure there is something to do with $match and $lookup
If I understand you correctly (since you didn't add the requested result), the simple option is:
db.MindmapView.aggregate([
{$group: {
_id: {
day: {$dateToString: {format: "%Y-%m-%d", date: "$startViewDate"}},
mindmapId: {$substr: ["$_p_mindmap", 8, -1]}
},
watchTime: {
$sum: {
$dateDiff: {startDate: "$_created_at", endDate: "$_updated_at", unit: "second"}
}
},
uniqueCount: {$addToSet: "$_p_user"}
}
},
{$project: {_id: 1, total: {$size: "$uniqueCount"}, watchTime: 1}},
{$lookup: {
from: "MindMap",
localField: "_id.mindmapId",
foreignField: "_id",
as: "author"
}
},
{$set: {author: {$first: "$author._p_author"}}}
])
See how it works on the playground example.
There is another option that may be a little more efficient, which is using the '$lookup' with a pipeline, to bring only the author from the MindMap collection instead of bringing the entire document and then filter it.
In this case the $lookup stage will be:
{
$lookup: {
from: "MindMap",
let: {id: "$_id.mindmapId"},
pipeline: [
{$match: {$expr: {$eq: ["$$id", "$_id"]}}},
{$project: {_p_author: 1, _id: 0}}
],
as: "author"
}
}

Aggregation: Grouping based on array element in MongoDB

I am new to MongoDB, trying to write an aggregation function such that my output for the input should be same as below
[
{
"_id": {
"month": 1,
"year": 2022
},
"childServices": [
{"service":"MCT Latency", "sli":99.9},
{"service":"MCT Packet Loss", "sli":99.9}
],
"service": "Network"
},
{
"_id": {
"month": 2,
"year": 2022
},
"childServices": [
{"service":"MCT Latency", "sli":98.9},
{"service":"MCT Packet Loss", "sli":99.9}
]
"service": "Network",
}
]
Tried with below, but it's not grouping each childService by date.
[{
$unwind: {
path: '$childServices'
}
}, {
$group: {
_id: {
month: {
$month: '$date'
},
year: {
$year: '$date'
}
},
service: {
$first: '$service'
},
childServices: {
$first: '$childServices.service'
},
sli: {
$avg: '$childServices.availability'
}
}
}, {
$sort: {
'_id.month': 1,
'_id.year': 1
}
}]
SAMPLE DATA
[{
"_id": {
"$oid": "62fc99c00f5b1cb61d5f1072"
},
"service": "Network",
"date": "01/02/2022 00:32:51",
"childServices": [
{
"service": "MCT Latency",
"availability": 99.9,
},
{
"service": "MCT Packet Loss",
"availability": 99.9,
}
},
{
"_id": {
"$oid": "62fc99df0f5b1cb61d5f1073"
},
"service": "Network",
"date": "02/02/2022 00:32:51",
"childServices": [
{
"service": "MCT Latency",
"availability": 98.3,
},
"service": "MCT Packet Loss",
"availability": 99.9,
}
}
]
Basically, I want to get into the childService > pick each service > group them by month+year and get their monthly avg.
Convert the date from a string to a date type, before grouping, like this:
db.collection.aggregate([
{
$unwind: {
path: "$childServices"
}
},
{
$addFields: {
date: {
"$toDate": "$date"
}
}
},
{
$group: { <---- Here we are grouping the data for each distinct combination of month, year and child service. This needs to be done because we are using $first accumulator
_id: {
month: {
$month: "$date"
},
year: {
$year: "$date"
},
service: "$childServices.service"
},
service: {
$first: "$service"
},
childServices: {
$first: "$childServices.service"
},
sli: {
$avg: "$childServices.availability"
}
}
},
{
"$group": { <-- In this group, we groupBy month and year, and we push the child services record into an array, using $push. This gives us, for every month and year, the average of all distinct childservices
"_id": {
month: "$_id.month",
year: "$_id.year"
},
"childServices": {
"$push": {
service: "$childServices",
sli: "$sli"
}
}
}
},
{
$sort: {
"_id.month": 1,
"_id.year": 1
}
}
])
Playground link.

How to get the last value of every month in MongoDB?

I have three fields, id, date and qty in my DB. I want to group them by the id and find the qty which is the latest quantity of every month. So for every month, the date with the latest day of every month, the qty will be returned for it.
If the input is
[
{
"id": "ABC",
"date": "2020-10-02 15:03:00.00",
"qty": 500,
},
{
"id": "ABC",
"date": "2020-10-31 20:22:00.00",
"qty": 100,
},
{
"id": "ABC",
"date": "2020-11-03 04:22:00.00",
"qty": 200,
},
{
"id": "ABC",
"date": "2020-11-18 04:22:00.00",
"qty": 50,
},
{
"id": "ABC1",
"date": "2020-11-05 04:22:00.00",
"qty": 5000,
},
{
"id": "ABC1",
"date": "2020-11-15 04:22:00.00",
"qty": 4580,
},
]
then the output should be
[
{
"id": "ABC",
"qtys": [
{
"date": "2020-10-31 20:22:00.00",
"qty": 100
},
{
"date": "2020-11-18 04:22:00.00",
"qty": 50
}
]
},
{
"id": "ABC1",
"qtys": [
{
"date": "2020-11-15 04:22:00.00",
"qty": 4580
}
]
},
]
$addFields to convert date field from string type to date type, if its already date type then ignore this stage
$sort by date in descending order
$group by id, month and year after extracting from date field
using $year and $month to get first document
$group by only id and construct array of quantities in qtys
db.collection.aggregate([
{ $addFields: { date: { $toDate: "$date" } } },
{ $sort: { date: -1 } },
{
$group: {
_id: {
id: "$id",
month: { $month: "$date" },
year: { $year: "$date" }
},
qtys: { $first: { date: "$date", qty: "$qty" } }
}
},
{
$group: {
_id: "$_id.id",
qtys: { $push: "$qtys" }
}
}
])
Playground

Mongo groupby date inside array

I have collection in my db as,
[
{
"groupName" : "testName",
"participants" : [
{
"participantEmail" : "test#test.com",
"lastClearedDate" : 12223213123
},
{
"participantEmail" : "test2#test.com",
"lastClearedDate" : 1234343243423
}
],
"messages" : [
{
"message":"sdasdasdasdasdasd",
"time":22312312312,
"sender":"test#test.com"
},
{
"message":"gfdfvd dssdfdsfs",
"time":2231231237789,
"sender":"test#test.com"
}
]
}
]
This is a collection of group which contains all the participants and messages in that group.
The time field inside the message is Timestamp.
I want get all the messages inside a group which are posted after the given date and grouped by date.
I wrote the following code,
ChatGroup.aggregate([
{ $match: { group_name: groupName } },
{ $unwind: "$messages" },
{ $match: { "messages.time": { $gte: messagesFrom } } },
{
$project: {
_id: 0,
y: {
$year: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
m: {
$month: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
},
d: {
$dayOfMonth: {
$add: [new Date(0), { $multiply: [1000, "$messages.time"] }]
}
}
}
},
{
$group: {
_id: {
year: "$y",
month: "$m",
day: "$d"
},
messages: { $push: "$messages" },
count: { $sum: 1 }
}
}
]).then(
group => {
console.log("length of messages", group);
resolve(group);
},
err => {
console.log(err);
}
);
});
and I getting the following output,
[
{
"_id": {
"year": 50694,
"month": 9,
"day": 5
},
"messages": [],
"count": 3
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 27
},
"messages": [],
"count": 1
},
{
"_id": {
"year": 50694,
"month": 8,
"day": 26
},
"messages": [],
"count": 10
}
]
I am not getting the messages but the count is correct.
Also the time which is displayed in the result is incorrect e.g. year, date and month.
Mongo version is 3.2.
I referred the groupby and push documentation from mongodb along with other stackoverflow questions on mongo group by.
What am I doing wrong?
Your timestamp is already in seconds. So, you don't need to convert them to millisecond by multiplying with 1000.
So your final query should be something like this
ChatGroup.aggregate([
{ "$match": {
"group_name": groupName,
"messages.time": { "$gte": messagesFrom }
}},
{ "$unwind": "$messages" },
{ "$match": { "messages.time": { "$gte": messagesFrom }}},
{ "$group": {
"_id": {
"year": { "$year": { "$add": [new Date(0), "$messages.time"] }},
"month": { "$month": { "$add": [new Date(0), "$messages.time"] }},
"day": { "$dayOfMonth": { "$add": [new Date(0), "$messages.time"] }}
},
"messages": { "$push": "$messages" },
"count": { "$sum": 1 }
}}
])
Add messages in $project
{
$project: {
_id: 0,
messages : 1,
.........
},
}

Group by day/month/week basis on the date range

This is in reference to this question.
This is my data set:
[
{
"rating": 4,
"ceatedAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"createdAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"ceatedAt": ISODate("2016-07-01T15:32:41.262+0000")
},
{
"rating": 5,
"createdAt": ISODate("2016-07-01T15:32:41.262+0000")
}
]
I want to be able to filter basis on week or month basis on the date range.
How would I do that in mongo?
This was the answer given for grouping by days.
db.collection.aggregate([
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m-%d", "date": "$ceatedAt" }
},
"createdAtMonth": { "$month": "$ceatedAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$formattedDate",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" },
}
}
])
For grouping on weekly basis, run the following pipeline which mainly uses the Date Aggregation Operators to extract the date parts:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtWeek",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" }
}
}
])
and for monthly aggregates, interchange the $group key to use the created month field:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtMonth",
"average": { "$avg": "$rating" },
"week": { "$first": "$createdAtWeek" }
}
}
])