To be honest I really know sql but I'm kind of new to mongodb noSql so I'm a bit lost.
I have made a pipeline that's just working fine.
The point was to group by day and mindmapId to count number of user viewed it and sum watching time and save it into a collection in order to make request on it after.
here's sample of data
MindMap
{
"_id": "Yg5uGI3Iy0",
"data": {
"id": "root",
"topic": "Main topic",
"expanded": true
},
"theme": "orange",
"_p_author": "_User$zqPzSKD7EM",
"_created_at": {
"$date": {
"$numberLong": "1658497264836"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1661334292749"
}
}
}
MindmapView
{
"_id": "qWR6HVIcvT",
"startViewDate": {
"$date": {
"$numberLong": "1658669095261"
}
},
"_p_user": "_User$VnrxG9gABO",
"_p_mindmap": "MindMap$Yg5uGI3Iy0",
"_created_at": {
"$date": {
"$numberLong": "1658669095274"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1658669095274"
}
}
}
Pipeline
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
}
}
}]
pipeline results
[{
"_id": {
"day": "2022-08-01",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 21
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 13
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 3
},{
"_id": {
"day": "2022-08-01",
"mindmapId": "YXa8omyChc"
},
"total": 2,
"watchTime": 1306837
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60
},{
"_id": {
"day": "2022-08-06",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 0
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 69
},{
"_id": {
"day": "2022-08-10",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 4
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "Yg5uGI3Iy0"
},
"total": 1,
"watchTime": 9
},
...
]
However to exploit this data faster I need to include the mindmap author inside the result collection.
The point is to group by day and mindmapId to count number of user viewed it and sum watching time and get the mindmap author and save it into a collection.
To do that I need to use $lookup but the result is kind of messy and the lookup act like a full join in sql. I've tried so much combination before this post.
Here's what I have mainly tried
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$lookup: {
from: 'MindMap',
localField: '_objectId',
foreignField: '_id.mindmapId',
as: 'tempMindmapPointer'
}
}, {
$unwind: '$tempMindmapPointer'
}, {
$match: {
'tempMindmapPointer._id': '_id.mindmapId'
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
},
author: {
$substr: [
'$tempMindmapPointer._p_author',
6,
-1
]
}
}
}]
the $match doesn't work here it make me have no results
If I remove $match it act like a full join user list with mindmap id list which I don't want
[{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "VnrxG9gABO"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "x6kNvG2O0X"
},...
]
I have tried to switch localField: '_objectId' foreignField:'_id.mindmapId' values.
I have also tried to make the lookup first and group by id{day,mindmapId,authorId} but I have never been able to make this compiling.
What could I do to make this request working ? I'm sure there is something to do with $match and $lookup
If I understand you correctly (since you didn't add the requested result), the simple option is:
db.MindmapView.aggregate([
{$group: {
_id: {
day: {$dateToString: {format: "%Y-%m-%d", date: "$startViewDate"}},
mindmapId: {$substr: ["$_p_mindmap", 8, -1]}
},
watchTime: {
$sum: {
$dateDiff: {startDate: "$_created_at", endDate: "$_updated_at", unit: "second"}
}
},
uniqueCount: {$addToSet: "$_p_user"}
}
},
{$project: {_id: 1, total: {$size: "$uniqueCount"}, watchTime: 1}},
{$lookup: {
from: "MindMap",
localField: "_id.mindmapId",
foreignField: "_id",
as: "author"
}
},
{$set: {author: {$first: "$author._p_author"}}}
])
See how it works on the playground example.
There is another option that may be a little more efficient, which is using the '$lookup' with a pipeline, to bring only the author from the MindMap collection instead of bringing the entire document and then filter it.
In this case the $lookup stage will be:
{
$lookup: {
from: "MindMap",
let: {id: "$_id.mindmapId"},
pipeline: [
{$match: {$expr: {$eq: ["$$id", "$_id"]}}},
{$project: {_p_author: 1, _id: 0}}
],
as: "author"
}
}
Related
I have a series of documents gathered by aggregation grouping. This is the result for one document:
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
},
{
"page": "/",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 3,
"createdAt": "2022-10-19T11:35:44.920Z"
},
I want to get this (in this case 2 documents as the time difference between array position 2 and 3 is greater than 2 hours):
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "39",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
}
],
"visitedPages": 2,
},
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 1,
},
I want to get a new grouping document if the time between an array position and the following array position is greater than 2 hours. On the last array position it show always show "2".
I tried $divide and $datediff. But this is not possible on the group stage as it's an unary operator. An approach I tried is to calculate the sum of start and end time by dividing. But how to execute this on an array level on the group stage? Maybe someone could point me in the right direction if possible at all?
You can group and then reduce, but another option is to use $setWindowFields to calculate your grouping index before grouping:
db.collection.aggregate([
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {prevtime: {
$push: "$time.time",
window: {documents: [-1, "current"]}
}}
}},
{$addFields: {
minutesDiff: {
$toInt: {
$dateDiff: {
startDate: {$first: "$prevtime"},
endDate: {$last: "$prevtime"},
unit: "minute"
}
}
}
}},
{$addFields: {deltaIndex: {$cond: [{$gt: ["$minutesDiff", 120]}, 1, 0]}}},
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {
groupIndex: {
$sum: "$deltaIndex",
window: {documents: ["unbounded", "current"]}
},
duration: {
$push: "$minutesDiff",
window: {documents: ["current", 1]}
}
}
}
},
{$set: {
duration: {
$cond: [
{$and: [
{$eq: [{$size: "$duration"}, 2]},
{$lte: [{$last: "$duration"}, 120]}
]},
{$last: "$duration"},
2
]
}
}},
{$group: {
_id: {ip: "$ip", myDate: "$date", groupIndex: "$groupIndex"},
date: {$first: "$date"},
allVisitedPages: {$push: {page: "$page", time: "$time", duration: "$duration"}},
visitedPages: {$sum: 1}
}},
{$unset: "_id.groupIndex"}
])
See how it works on the playground example
I have documents with below schema
id :
currencyCode : "USD"
businessDayStartDate : ""
hourZoneNumber : 1
customerCount : 0
itemQuantity : 4
nodeId : "STORE_DEV"
endpointId : "998"
amount : 4
I am trying to find documents that match nodeId and trying to aggregate customerCount, itemQuantity and amount for each hourZoneNumber.
Below is the query
db.getCollection("xxx").aggregate([
{ "$match": { "nodeId": { "$in":["STORE_DEV_1","STORE_DEV_2"] }, "businessDayStartDate" : { "$gte": "2022-03-04" , "$lte": "2022-03-07" } }},
{ "$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
},
{ "$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$addToSet": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
}
}
},
{ "$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$addToSet": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
"total": {
"$addToSet": {
"customerCount": { "$sum": "$hourZones.customerCount" },
"itemQuantity" : { "$sum": "$hourZones.itemQuantity" },
"amount" : { "$sum": "$hourZones.amount" }
}
}
}
},
{
$project: {
_id: 0,
nodeId: "$_id",
endpoints: 1,
hourZones: 1,
total: 1
}
}
])
Output is as below:
{
nodeId: 'STORE_DEV_2',
endpoints: [
{ endpointId: '998',
hourZones:
[
{ hourZoneNumber: 1,
customerCount: 0,
itemQuantity: 4,
amount: Decimal128("4") }
] } ],
total: [ { customerCount: 0, itemQuantity: 4, amount: Decimal128("4") } ],
}
{
nodeId: 'STORE_DEV_1',
endpoints:
[ { endpointId: '999',
hourZones:
[ { hourZoneNumber: 2,
customerCount: 2,
itemQuantity: 4,
amount: Decimal128("4") },
{ hourZoneNumber: 1,
customerCount: 4,
itemQuantity: 8,
amount: Decimal128("247.56") } ] } ],
total:
[ { customerCount: 6,
itemQuantity: 12,
amount: Decimal128("251.56") } ]
}
I want the output to be sorted as : First sort by nodeId, then by endpointId within the endpoints and lastly by hourZoneNumber within hourZones.
How do I do this ? I tried using sort() with all the three fields. But it did not work. Also, can someone please confirm if there is any better way than the above code, as I am new to Mongo DB.
Edit:
Please find sample input data at https://mongoplayground.net/p/FYm3QMMgrNI
Since you already have the separated data at the beginning, it is simply a matter of saving these values through the grouping and then sorting by them in the end.
Edit: In order to sort each inner array, we use $push instead of $addToSet inside the $group and $sort before each $group:
db.collection.aggregate([
{
"$match": {
"nodeId": {"$in": ["STORE_DEV_TTEC", "STORE_DEV_TTEZ"]
},
"businessDayStartDate": {"$gte": "2022-03-04", "$lte": "2022-03-07"}
}
},
{
"$sort": {"nodeId": 1, "endpointId": 1, "hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
{"$sort": {"_id.hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$push": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
hourZoneKey: {$first: "$_id.hourZoneNumber"}
}
},
{"$sort": {"_id.endpointId": 1}
},
{
"$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$push": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
endpointKey: {$first: "$_id.endpointId"},
hourZoneKey: {$first: "$hourZoneKey"}
}
},
{"$sort": {"nodeId": 1, "endpointKey": 1, "hourZoneKey": 1}
},
{
$project: {_id: 0, nodeId: "$_id", endpoints: 1, hourZones: 1, total: 1}
}
])
You can see it here
I have a Mongo collection that looks like this with a bunch of months, days, years:
[
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 999,
"Business": "Business 1",
},
{
"Date": ISODate("2021-08-05T04:59:54.000Z"),
"Amount": 5.99,
"Business": "Business 2",
},
{
"Date": ISODate("2021-07-17T21:41:56.000Z"),
"Amount": 20000,
"Business": "Business 2",
},
{
"Date": ISODate("2021-06-17T21:41:56.000Z"),
"Amount": 200,
"Business": "Business 5",
}
]
I have done an aggregation like this
db.collection.aggregate({
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
})
...which partially gives me what I want which is a sum of amounts per year and month.
[
{
"_id": {
"month": 6,
"year": 2021
},
"sum": 200
},
{
"_id": {
"month": 7,
"year": 2021
},
"sum": 20000
},
{
"_id": {
"month": 8,
"year": 2021
},
"sum": 1004.99
}
]
What I would like however is to have something like the below where the year is at the top and the months are aggregated in a sum so that it's easier to iterate in the frontend but I have not been able to get it no matter what I have tried:
[
{
"year": 2021,
"sumAmount": 21204.99,
"months": [
{
"month": 7,
"amount": 20000
},
{
"month": 6,
"amount": 200
},
{
"month": 8,
"amount": 1004.99
}
]
},
{ "year" : 2020,
....
}
]
I have been pretty close in using another $group and $push but I have not been able to get what in my mind is a second group by month. Any help will be appreciated!
You just need one more $group to get your expected result. For another sorting, you can put an $sort after the $group stage. You will need to use $push to keep the ordering in the final array.
db.collection.aggregate([
{
$group: {
_id: {
year: {
$year: "$Date"
},
month: {
$month: "$Date"
}
},
sum: {
$sum: "$Amount"
}
}
},
{
"$sort": {
"_id.year": 1,
"_id.month": 1
}
},
{
"$group": {
"_id": "$_id.year",
"sumAmount": {
$sum: "$sum"
},
"months": {
"$push": {
"month": "$_id.month",
"amount": "$sum"
}
}
}
}
])
Here is the Mongo playground for your reference.
I have a mongodb database that collects device data.
Example document is
{
"_id" : ObjectId("5c125a185dea1b0252c5352"),
"time" : ISODate("2018-12-13T15:09:42.536Z"),
"mac" : "10:06:21:3e:0a:ff",
}
The goal would be to count the unique mac values per day, from the first document in the db to the last document in the db.
I've been playing around and came to the conclusion that I would need to have multiple groups as well as projects during my aggregations.
This is what I tried - not sure if it's in the right direction or not or just completely messed up.
pipeline = [
{"$project": {
"_id": 1,
"mac": 1,
"day": {
"$dayOfMonth":"$time"
},
"month": {
"$month":"$time"
},
"year": {
"$year":"$time"
}
}
},
{
"$project": {
"_id": 1,
"mac": 1,
"time": {
"$concat": [{
"$substr":["$year", 0, 4]
},
"-", {
"$substr": ["$month", 0, 2]
},
"-",
{
"$substr":["$day", 0, 2]
}]
}
}
},
{
"$group": {
"_id": {
"time": "$time",
"mac": "$mac"
}
},
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1},
}
}
]
data = list(collection.aggregate(pipeline, allowDiskUse=True))
The output now doesn't look like it did any aggregation,
[{"_id": null, "count": 751050}]
I'm using Pymongo as my driver and using Mongodb 4.
Ideally it should just show the date and count (eg { "_id" : "2018-12-13", "count" : 2 }.
I would love some feedback and advice.
Thanks in advance.
I prefer to minimize the number of stages, and especially to avoid unnecessary $group stages. So I would do it with the following pipeline:
pipeline = [
{ '$group' : {
'_id': { '$dateToString': { 'format': "%Y-%m-%d", 'date': "$time" } },
'macs':{ '$addToSet': '$mac' }
} },
{$addFields:{ 'macs':{'$size':'$macs'}}}
]
There's an operator called "$dateToString", which would solve most of your problems.
Edit: Didn't read the question carefully, #Asya Kamsky, thank you for pointing out. Here' the new answer.
pipeline = [
{
"$group": {
"_id": {
"date": {
$dateToString: {
format: "%Y-%m-%d",
date: "$time"
}
},
"mac": "$mac"
}
}
},
{
"$group": {
"_id": "$_id.date",
"count": {
"$sum": 1
}
}
}
]
[
{
"$project": {
"_id": 1,
"mac": 1,
"time": { "$dateToString": { "format": "%Y-%m-%d", "date": "$time", "timezone": "Africa/Johannesburg"}}
},
},
{
"$group": {
"_id":{
"time": "$time",
"mac": "$mac",
}}},{
"$group": {
"_id": "$_id.time",
"count":{"$sum": 1}
}},
{"$sort": SON([("_id", -1)])}
]
Does exactly what it should do.
Thanks. :)
This is in reference to this question.
This is my data set:
[
{
"rating": 4,
"ceatedAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"createdAt": ISODate("2016-08-08T15:32:41.262+0000")
},
{
"rating": 3,
"ceatedAt": ISODate("2016-07-01T15:32:41.262+0000")
},
{
"rating": 5,
"createdAt": ISODate("2016-07-01T15:32:41.262+0000")
}
]
I want to be able to filter basis on week or month basis on the date range.
How would I do that in mongo?
This was the answer given for grouping by days.
db.collection.aggregate([
{
"$project": {
"formattedDate": {
"$dateToString": { "format": "%Y-%m-%d", "date": "$ceatedAt" }
},
"createdAtMonth": { "$month": "$ceatedAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$formattedDate",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" },
}
}
])
For grouping on weekly basis, run the following pipeline which mainly uses the Date Aggregation Operators to extract the date parts:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtWeek",
"average": { "$avg": "$rating" },
"month": { "$first": "$createdAtMonth" }
}
}
])
and for monthly aggregates, interchange the $group key to use the created month field:
db.collection.aggregate([
{
"$project": {
"createdAtWeek": { "$week": "$createdAt" },
"createdAtMonth": { "$month": "$createdAt" },
"rating": 1
}
},
{
"$group": {
"_id": "$createdAtMonth",
"average": { "$avg": "$rating" },
"week": { "$first": "$createdAtWeek" }
}
}
])