Group by day/month/week basis on the date range - mongodb

This is in reference to this question.
This is my data set:
[
{
"rating": 4,
"ceatedAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"createdAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"ceatedAt": ISODate("2016-07-01T15:32:41.262+0000")
},
{
"rating": 5,
"createdAt": ISODate("2016-07-01T15:32:41.262+0000")
}
]
I want to be able to filter basis on week or month basis on the date range.
How would I do that in mongo?
This was the answer given for grouping by days.
db.collection.aggregate([
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m-%d", "date": "$ceatedAt" }
},
"createdAtMonth": { "$month": "$ceatedAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$formattedDate",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" },
}
}
])

For grouping on weekly basis, run the following pipeline which mainly uses the Date Aggregation Operators to extract the date parts:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtWeek",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" }
}
}
])
and for monthly aggregates, interchange the $group key to use the created month field:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtMonth",
"average": { "$avg": "$rating" },
"week": { "$first": "$createdAtWeek" }
}
}
])

Related

Mongodb pipeline on parse server document add pointer field with $lookup

To be honest I really know sql but I'm kind of new to mongodb noSql so I'm a bit lost.
I have made a pipeline that's just working fine.
The point was to group by day and mindmapId to count number of user viewed it and sum watching time and save it into a collection in order to make request on it after.
here's sample of data
MindMap
{
"_id": "Yg5uGI3Iy0",
"data": {
"id": "root",
"topic": "Main topic",
"expanded": true
},
"theme": "orange",
"_p_author": "_User$zqPzSKD7EM",
"_created_at": {
"$date": {
"$numberLong": "1658497264836"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1661334292749"
}
}
}
MindmapView
{
"_id": "qWR6HVIcvT",
"startViewDate": {
"$date": {
"$numberLong": "1658669095261"
}
},
"_p_user": "_User$VnrxG9gABO",
"_p_mindmap": "MindMap$Yg5uGI3Iy0",
"_created_at": {
"$date": {
"$numberLong": "1658669095274"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1658669095274"
}
}
}
Pipeline
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
}
}
}]
pipeline results
[{
"_id": {
"day": "2022-08-01",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 21
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 13
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 3
},{
"_id": {
"day": "2022-08-01",
"mindmapId": "YXa8omyChc"
},
"total": 2,
"watchTime": 1306837
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60
},{
"_id": {
"day": "2022-08-06",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 0
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 69
},{
"_id": {
"day": "2022-08-10",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 4
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "Yg5uGI3Iy0"
},
"total": 1,
"watchTime": 9
},
...
]
However to exploit this data faster I need to include the mindmap author inside the result collection.
The point is to group by day and mindmapId to count number of user viewed it and sum watching time and get the mindmap author and save it into a collection.
To do that I need to use $lookup but the result is kind of messy and the lookup act like a full join in sql. I've tried so much combination before this post.
Here's what I have mainly tried
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$lookup: {
from: 'MindMap',
localField: '_objectId',
foreignField: '_id.mindmapId',
as: 'tempMindmapPointer'
}
}, {
$unwind: '$tempMindmapPointer'
}, {
$match: {
'tempMindmapPointer._id': '_id.mindmapId'
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
},
author: {
$substr: [
'$tempMindmapPointer._p_author',
6,
-1
]
}
}
}]
the $match doesn't work here it make me have no results
If I remove $match it act like a full join user list with mindmap id list which I don't want
[{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "VnrxG9gABO"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "x6kNvG2O0X"
},...
]
I have tried to switch localField: '_objectId' foreignField:'_id.mindmapId' values.
I have also tried to make the lookup first and group by id{day,mindmapId,authorId} but I have never been able to make this compiling.
What could I do to make this request working ? I'm sure there is something to do with $match and $lookup
If I understand you correctly (since you didn't add the requested result), the simple option is:
db.MindmapView.aggregate([
{$group: {
_id: {
day: {$dateToString: {format: "%Y-%m-%d", date: "$startViewDate"}},
mindmapId: {$substr: ["$_p_mindmap", 8, -1]}
},
watchTime: {
$sum: {
$dateDiff: {startDate: "$_created_at", endDate: "$_updated_at", unit: "second"}
}
},
uniqueCount: {$addToSet: "$_p_user"}
}
},
{$project: {_id: 1, total: {$size: "$uniqueCount"}, watchTime: 1}},
{$lookup: {
from: "MindMap",
localField: "_id.mindmapId",
foreignField: "_id",
as: "author"
}
},
{$set: {author: {$first: "$author._p_author"}}}
])
See how it works on the playground example.
There is another option that may be a little more efficient, which is using the '$lookup' with a pipeline, to bring only the author from the MindMap collection instead of bringing the entire document and then filter it.
In this case the $lookup stage will be:
{
$lookup: {
from: "MindMap",
let: {id: "$_id.mindmapId"},
pipeline: [
{$match: {$expr: {$eq: ["$$id", "$_id"]}}},
{$project: {_p_author: 1, _id: 0}}
],
as: "author"
}
}

Aggregating Mongo collection by year and then by month

I have a Mongo collection that looks like this with a bunch of months, days, years:
[
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 999,
"Business": "Business 1",
},
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 5.99,
"Business": "Business 2",
},
{
"Date": ISODate("2021-07-17T21:41:56.000Z"),
"Amount": 20000,
"Business": "Business 2",
},
{
"Date": ISODate("2021-06-17T21:41:56.000Z"),
"Amount": 200,
"Business": "Business 5",
}
]
I have done an aggregation like this
db.collection.aggregate({
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
})
...which partially gives me what I want which is a sum of amounts per year and month.
[
{
"_id": {
"month": 6,
"year": 2021
},
"sum": 200
},
{
"_id": {
"month": 7,
"year": 2021
},
"sum": 20000
},
{
"_id": {
"month": 8,
"year": 2021
},
"sum": 1004.99
}
]
What I would like however is to have something like the below where the year is at the top and the months are aggregated in a sum so that it's easier to iterate in the frontend but I have not been able to get it no matter what I have tried:
[
{
"year": 2021,
"sumAmount": 21204.99,
"months": [
{
"month": 7,
"amount": 20000
},
{
"month": 6,
"amount": 200
},
{
"month": 8,
"amount": 1004.99
}
]
},
{ "year" : 2020,
....
}
]
I have been pretty close in using another $group and $push but I have not been able to get what in my mind is a second group by month. Any help will be appreciated!
You just need one more $group to get your expected result. For another sorting, you can put an $sort after the $group stage. You will need to use $push to keep the ordering in the final array.
db.collection.aggregate([
{
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
},
{
"$sort": {
"_id.year": 1,
"_id.month": 1
}
},
{
"$group": {
"_id": "$_id.year",
"sumAmount": {
$sum: "$sum"
},
"months": {
"$push": {
"month": "$_id.month",
"amount": "$sum"
}
}
}
}
])
Here is the Mongo playground for your reference.

need to convert the data in another format

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.
You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]

Active and Total User Count Mongodb

I want to write a group by query to written active user and total count(both active and inactive) grouped by a date column in mongodb. I am able to run them as two separate scripts but how to retrieve the same information in one script
db.user.aggregate(
{
"$match": { 'phoneInfo.verifiedFlag': true}
},
{
"$project": {
yearMonthDayUTC: { $dateToString: { format: "%Y-%m-%d", date: "$createdOn" } }
}
},
{
"$group": {
"_id": {day: "$yearMonthDayUTC"},
count: {
"$sum": 1
}
}
},
{
$sort: {
"_id.day": 1,
}
})
You can use the $cond operator in your group to create a conditional count as follows (assuming the inactive/active values are in a field called status):
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": { "$dateToString": { "format": "%Y-%m-%d", "date": "$createdOn" } },
"total": { "$sum": 1 },
"active_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "active" ] }, 1, 0 ]
}
},
"inactive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "inactive" ] }, 1, 0 ]
}
}
}
},
{ "$sort": { "_id": 1 } }
])
For different values you can adapt the following pipeline:
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": {
"day": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdOn"
}
},
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.day",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
}
},
{ "$sort": { "_id": 1 } }
])