Calculate distinct count on fields in mongodb - mongodb

I have following collection:
[{
"id": 1,
"activity_type": "view",
"user_id": 1
},
{
"id": 2,
"activity_type": "save",
"user_id": 1
},
{
"id": 3,
"activity_type": "save",
"user_id": 1
},
{
"id": 4,
"activity_type": "save",
"user_id": 2
}]
I need to get a result like this:
[{
"activity_type": "view",
"count": 1,
"user_count": 1
},{
"activity_type": "save",
"count": 3,
"user_count": 2
}]
So far I reached on this:
db.getCollection('activities').aggregate([
{
$group:{_id:"$activity_type", count: {$sum: 1}}
},
{
$project:
{
_id: 0,
activity_type: "$_id",
count: 1
}
}
])
It gives me:
[{
"activity_type": "view",
"count": 1
},
{
"activity_type": "save",
"count": 3
}]
How can I add distinct user_id count as well?

What you need to do is use $addToSet in the group stage to gather the unique ids, after that in the $project stage you can use $size to show the proper user count.
db.collection.aggregate([
{
$group: {
_id: "$activity_type",
count: {
$sum: 1
},
user_ids: {
"$addToSet": "$user_id"
}
}
},
{
$project: {
_id: 0,
activity_type: "$_id",
user_count: {
$size: "$user_ids"
},
count: 1
}
}
])
Mongo Playground

Related

get rank in mongodb with date range

I have following stat data stored daily for users.
{
"_id": {
"$oid": "638df4e42332386e0e06d322"
},
"appointment_count": 1,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-05",
"customer_count": 1,
"lead_count": 1,
"door_knocks": 10
}
{
"_id": {
"$oid": "638f59a9bf33442a57c3aa99"
},
"lead_count": 2,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-06",
"video_viewed": 2,
"door_knocks": 9
}
And I'm using the following query to get the items by rank
user_stats_2022_12.aggregate([{"$match":{"company_id":5,"created_date":{"$gte":"2022-12-04","$lte":"2022-12-06"}}},{"$setWindowFields":{"partitionBy":"$company_id","sortBy":{"door_knocks":-1},"output":{"item_rank":{"$denseRank":{}},"stat_sum":{"$sum":"$door_knocks"}}}},{"$facet":{"metadata":[{"$count":"total"}],"data":[{"$skip":0},{"$limit":100},{"$sort":{"item_rank":1}}]}}])
It's giving me the rank but with the above data, the record with item_id: 2 are having different rank for same item_id. So I wanted to group them by item_id and then applied rank.
It's a little messy, but here's a playground - https://mongoplayground.net/p/JrJOo4cl9X1.
If you're going to sort by knocks after grouping, I'm assuming that you'll want the sum of door_knocks for a given item_id for this sort.
db.collection.aggregate([
{
$match: {
company_id: 5,
created_date: {
"$gte": "2022-12-04",
"$lte": "2022-12-06"
}
}
},
{
$group: {
_id: {
item_id: "$item_id",
company_id: "$company_id"
},
docs: {
$push: "$$ROOT"
},
total_door_knocks: {
$sum: "$door_knocks"
}
}
},
{
$setWindowFields: {
partitionBy: "$company_id",
sortBy: {
total_door_knocks: -1
},
output: {
item_rank: {
"$denseRank": {}
},
stat_sum: {
"$sum": "$total_door_knocks"
}
}
}
},
{
$unwind: "$docs"
},
{
$project: {
_id: "$docs._id",
appointment_count: "$docs.appointment_count",
company_id: "$docs.company_id",
created_date: "$docs.created_date",
customer_count: "$docs.customer_count",
door_knocks: "$docs.door_knocks",
item_id: "$docs.item_id",
item_type: "$docs.item_type",
lead_count: "$docs.lead_count",
item_rank: 1,
stat_sum: 1,
total_door_knocks: 1
}
},
{
$facet: {
metadata: [
{
"$count": "total"
}
],
data: [
{
"$skip": 0
},
{
"$limit": 100
},
{
"$sort": {
"item_rank": 1
}
}
]
}
}
])

Mongodb pipeline on parse server document add pointer field with $lookup

To be honest I really know sql but I'm kind of new to mongodb noSql so I'm a bit lost.
I have made a pipeline that's just working fine.
The point was to group by day and mindmapId to count number of user viewed it and sum watching time and save it into a collection in order to make request on it after.
here's sample of data
MindMap
{
"_id": "Yg5uGI3Iy0",
"data": {
"id": "root",
"topic": "Main topic",
"expanded": true
},
"theme": "orange",
"_p_author": "_User$zqPzSKD7EM",
"_created_at": {
"$date": {
"$numberLong": "1658497264836"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1661334292749"
}
}
}
MindmapView
{
"_id": "qWR6HVIcvT",
"startViewDate": {
"$date": {
"$numberLong": "1658669095261"
}
},
"_p_user": "_User$VnrxG9gABO",
"_p_mindmap": "MindMap$Yg5uGI3Iy0",
"_created_at": {
"$date": {
"$numberLong": "1658669095274"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1658669095274"
}
}
}
Pipeline
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
}
}
}]
pipeline results
[{
"_id": {
"day": "2022-08-01",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 21
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 13
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 3
},{
"_id": {
"day": "2022-08-01",
"mindmapId": "YXa8omyChc"
},
"total": 2,
"watchTime": 1306837
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60
},{
"_id": {
"day": "2022-08-06",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 0
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 69
},{
"_id": {
"day": "2022-08-10",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 4
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "Yg5uGI3Iy0"
},
"total": 1,
"watchTime": 9
},
...
]
However to exploit this data faster I need to include the mindmap author inside the result collection.
The point is to group by day and mindmapId to count number of user viewed it and sum watching time and get the mindmap author and save it into a collection.
To do that I need to use $lookup but the result is kind of messy and the lookup act like a full join in sql. I've tried so much combination before this post.
Here's what I have mainly tried
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$lookup: {
from: 'MindMap',
localField: '_objectId',
foreignField: '_id.mindmapId',
as: 'tempMindmapPointer'
}
}, {
$unwind: '$tempMindmapPointer'
}, {
$match: {
'tempMindmapPointer._id': '_id.mindmapId'
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
},
author: {
$substr: [
'$tempMindmapPointer._p_author',
6,
-1
]
}
}
}]
the $match doesn't work here it make me have no results
If I remove $match it act like a full join user list with mindmap id list which I don't want
[{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "VnrxG9gABO"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "x6kNvG2O0X"
},...
]
I have tried to switch localField: '_objectId' foreignField:'_id.mindmapId' values.
I have also tried to make the lookup first and group by id{day,mindmapId,authorId} but I have never been able to make this compiling.
What could I do to make this request working ? I'm sure there is something to do with $match and $lookup
If I understand you correctly (since you didn't add the requested result), the simple option is:
db.MindmapView.aggregate([
{$group: {
_id: {
day: {$dateToString: {format: "%Y-%m-%d", date: "$startViewDate"}},
mindmapId: {$substr: ["$_p_mindmap", 8, -1]}
},
watchTime: {
$sum: {
$dateDiff: {startDate: "$_created_at", endDate: "$_updated_at", unit: "second"}
}
},
uniqueCount: {$addToSet: "$_p_user"}
}
},
{$project: {_id: 1, total: {$size: "$uniqueCount"}, watchTime: 1}},
{$lookup: {
from: "MindMap",
localField: "_id.mindmapId",
foreignField: "_id",
as: "author"
}
},
{$set: {author: {$first: "$author._p_author"}}}
])
See how it works on the playground example.
There is another option that may be a little more efficient, which is using the '$lookup' with a pipeline, to bring only the author from the MindMap collection instead of bringing the entire document and then filter it.
In this case the $lookup stage will be:
{
$lookup: {
from: "MindMap",
let: {id: "$_id.mindmapId"},
pipeline: [
{$match: {$expr: {$eq: ["$$id", "$_id"]}}},
{$project: {_p_author: 1, _id: 0}}
],
as: "author"
}
}

MongoDB aggregation - group by multiple keys with count for every level of grouping

I have a flat document structure I'm trying to group together based on multiple keys.
The structure of documents is like this
[{
"_id": ObjectId("5f47eee763f55a095048f542"),
"channel": "Z_C",
"city": "A",
"status": 0
},
.
.
Where city, channel can have different values and status can only either be 0 or 1.
I'm trying to get a grouping result like this
[{
"city": "A",
"channels": [
{
"name": "Z_C",
"counts":[
{"status": 0,"count": 7},
{"status": 1,"count": 2},
],
"count": 9
},
{
"name": "S_C",
"counts":[
{"status": 0, "count": 2},
{"status": 1,"count": 9},
],
"count": 11
}
],
"count": 20
},
.
.
From the above result we can conclude that
City A has two channels Z_C and S_C
Z_C has 7 values with status of 0 and 2 values with status of 1, and total Z_C is 9
S_C has 2 values with status of 0 and 9 values with status of 1, and total S_C is 11
total count of values in city A are 20.
I don't need the result structure exactly as mentioned above as long as I can derive the 4 points from the result.
I've achieved this and here's my code on mongo playground to what I have so done far.
{
"_id": {
"city": "A"
},
"channels": [
{
"count": 7,
"name": "Z_C",
"status": 0
},
{
"count": 2,
"name": "S_C",
"status": 0
},
{
"count": 9,
"name": "S_C",
"status": 1
},
{
"count": 2,
"name": "Z_C",
"status": 1
}
],
"count": 20
}
How can I group the inner array of channels based on channel to get the expected result?
Thanks.
You can do with three groups,
you are almost right in first group
db.collection.aggregate([
{
$group: {
_id: {
city: "$city",
name: "$channel",
status: "$status"
},
count: { $sum: 1 }
}
},
just need to group with city and channel name because we need to prepare(construct) counts array for status
{
$group: {
_id: {
city: "$_id.city",
name: "$_id.name"
},
counts: {
$push: {
count: "$count",
status: "$_id.status"
}
},
count: { $sum: "$count" }
}
},
sort by city here
{ $sort: { "_id.city": 1 } },
finally construct channels array
{
$group: {
_id: "$_id.city",
channels: {
$push: {
name: "$_id.name",
counts: "$counts",
count: "$count"
}
},
count: { $sum: "$count" }
}
}
])
Playground

How to group data again in a single object with new keys after a mongodb $group?

I have a mongodb database with a collection of companies that look like this (it's just a sample, the actual collection is much larger):
[
{
"_id": 100,
"name": "Test Name 1",
"level": "1"
},
{
"_id": 101,
"name": "Test Name 2",
"level": "1"
},
{
"_id": 102,
"name": "Test Name 3",
"level": "2"
}
]
Where "level" can only range from 0 to 5
I'm trying to make an aggregate query with $group and $project that counts how many companies there are in each level, but according to the API specification I need follow, it needs to be formatted like this, in a single object:
{
"metrics": {
"companies": {
"total": <integer>,
"level1": <integer>,
"level2": <integer>,
"level3": <integer>,
"level4": <integer>,
"level5": <integer>
}
}
}
The closest I could get to this was using $group and $project like this:
Companies.aggregate([{
$group: {
_id: {
level: "$level"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
level: "$_id.level",
total: "$count"
}
}
])
Which gives the following result:
[
{
"level": 3,
"total": 108
},
{
"level": 5,
"total": 172
},
{
"level": 2,
"total": 624
},
{
"level": 4,
"total": 98
},
{
"level": 1,
"total": 137
},
{
"level": 0,
"total": 94
}
]
However, this result is an array and I need to put the data for each level in a single object with new keys "level1", "level2", etc, according to the specification.
I believe I need to make another $group operation but I couldn't find out how to do it.
Any ideas?
I'm not sure If I understand, but I suppose you just need to map it, like here:
> var aux = new Object;
> db.Companies.aggregate([
{
$group: {
_id: {
level: "$level"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
level: "$_id.level",
total: "$count"
}
}
]).forEach(function(a){aux["level"+a.level] = a.total;});
> printjson(aux);
{ "level2" : 1, "level1" : 2 }
I believe there could be better solution, but this one is working:
db.companies.aggregate([{
$group:{_id:{level: "$level"}, count: {$sum: 1}}},
{$group:{"_id": 0, levels: {$push: {_id:"$_id.level", count: "$count"}}, total: {$sum: "$count"}}},
{$unwind: "$levels"},
{$sort: {"levels._id": 1}},
{$group:{_id: 0, levels: {$push: {levels:"$levels.count"}}, "total": {$avg:"$total"}}},
{$project: {total: "$total", level1: {$arrayElemAt: ["$levels",0]}, level2: {$arrayElemAt: ["$levels", 1]}, level3: {$arrayElemAt: ["$levels",2]}, level4: {$arrayElemAt: ["$levels",3]},level5: {$arrayElemAt: ["$levels",4]} }},
{$project: {_id: 0, metrics: {companies: {total: "$total", level1: "$level1.levels", level2: "$level2.levels", level3: "$level3.levels",level4: "$level4.levels", level5: "$level5.levels"}}}}
])
Returned result:
{ "metrics" :
{ "companies" :
{ "total" : 7,
"level1" : 1,
"level2" : 2,
"level3" : 2,
"level4" : 1,
"level5" : 1
} } }

Aggregate Field Values to Separate Key Names

I have a collection in MongoDB with sample data something like this (simplified):
{
_id: 1,
username: "ted",
content: "4125151",
status: "complete"
}
{
_id: 2,
username: "sam",
content: "4151",
status: "new"
}
{
_id: 3,
username: "ted",
content: "511",
status: "new"
}
{
_id: 4,
username: "ted",
content: "411",
status: "in_progress"
}
{
_id: 5,
username: "pat",
content: "1sds51",
status: "complete"
}
{
_id: 6,
username: "ted",
content: "4151",
status: "in_progress"
}
{
_id: 7,
username: "ted",
content: "4125",
status: "in_progress"
}
I need to aggregate the data such that for each user, I get a count for each status value as well as a total number of records. The result should look like this:
[
{
username: “pat”,
new: 0,
in_progress: 0,
complete: 1,
total: 1
},
{
username: “sam”,
new: 1,
in_progress: 0,
complete: 0,
total: 1
},
{
username: “ted”,
new: 1,
in_progress: 3,
complete: 1,
total: 5
}
]
Or any format that will effectively serve the same purpose which is, I want to be able to use with ngRepeat to display on the front end in this format:
User New In Progress Complete Total
pat 0 0 1 1
sam 1 0 0 1
ted 1 3 1 5
I can perform this aggregation:
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"count": {
"$sum": 1
}
}
}
This gives me the individual count for each user/status combination that has at least one record. But then I have to piece it together to get it in the format that I can use on the front end. This is not at all ideal.
Is there a way to perform the aggregation to get the data in the format that I need?
What you want is a "conditional" aggregation of the values to produce a distinct field property for each status.
This is pretty simple to do using the $cond operator:
[
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } },
"total": { "$sum": 1 }
}}
]
Presuming of course those are the only "status" values, but if they are not then just add an additional $project to sum the fields you want:
[
{ "$match": { "status": { "$in": [ "new", "complete", "in_progress" ] } } },
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } }
}},
{ "$project": {
"new": 1,
"complete": 1,
"in_progress": 1,
"total": { "$add": [ "$new", "$complete", "$in_progress" ] }
]
Or just include that $add within the $group with the same calculations for the separate fields. But the $match is probably just the best idea if there are indeed other status values you don't want.
Another answer using $group twice and a $push, In this below query you need to compute the final total on UI side.
db.collection.aggregate([
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"statuscount": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.username",
"finalstatus": {
"$push": {
"Status": "$_id.status",
"statuscount": "$statuscount"
}
}
}
}
])