Grouping by array elements and find sum of common array fields - mongodb

The document has an array field and each of them has a unique Id field. After doing unwind on the array, I need to add a count field on each array document instead of Id. The count should be the sum of similar array fields (grouped by fields other than Id).
Sample document - https://mongoplayground.net/p/LUPqVw07unP
Expected result:
{
"originId": 123,
"taskMetric": {
"count": 2,
"status": "OPEN",
"assignee": "ABC"
}
}, {
"originId": 1,
"taskMetric": {
"count": 1,
"status": "COMPLETED",
"assignee": "CDE"
}
}, {
"originId": 1,
"taskMetric": {
"count": 1,
"status": "COMPLETED",
"assignee": "EFG"
}
}

Play
You have to add one more projection stage to avoid _id and add id to group stage as other fields to the following query.
db.collection.aggregate([
{
$unwind: "$tasks"
},
{
$group: {
"_id": {
"status": "$tasks.status",
"assignee": "$tasks.assignee"
},
"count": {
$sum: 1
},
"status": {
$first: "$tasks.status"
},
"assignee": {
$first: "$tasks.assignee"
}
}
}
])

Related

Including additional fields in a Mongodb aggregate query

I have a data structure like this. Each student will have multiple entries based on when they enter the classrooms. The query needs to get the latest record of each student based on a list of student ids and department name. It also should show the teacher id and last timestmap
[
{
"studentid": "stu-1234",
"dept": "geog",
"teacher_id": 1,
"LastSwipeTimestamp": "2021-11-25T10:50:00.5230694Z"
},
{
"studentid": "stu-1234",
"dept": "geog",
"teacher_id": 2,
"LastSwipeTimestamp": "2021-11-25T11:50:00.5230694Z"
},
{
"studentid": "stu-abc",
"dept": "geog",
"teacher_id": 11,
"LastSwipeTimestamp": "2021-11-25T09:15:00.5230694Z"
},
{
"studentid": "stu-abc",
"dept": "geog",
"teacher_id": 21,
"LastSwipeTimestamp": "2021-11-25T11:30:00.5230694Z"
}
]
Here is what I have, but it doesn't show teacher id or the last swipe timestamp. What do I need to change or add?
Maybe you need something like this
db.collection.aggregate([
{
$match: {
"studentid": {
"$in": [
"stu-abc",
"stu-1234"
]
},
"dept": "geog"
}
},
{
$sort: {
"LastSwipeTimestamp": -1
}
},
{
$group: {
"_id": {
"studentid": "$studentid",
"dept": "$dept"
},
"teacher_id": {
$first: "$teacher_id"
},
"LastSwipeTimestamp": {
$first: "$LastSwipeTimestamp"
}
}
},
{
$project: {
_id: 0,
"studentid": "$_id.studentid",
"dept": "$_id.dept",
"teacher_id": "$teacher_id",
"LastSwipeTimestamp": "$LastSwipeTimestamp"
}
}
])
explained:
You need to consider the not grouped fields in the $group stage so they are also available to the next $project stage...

Count the number of duplicate elements in MongoDB

I have the collection blow in mongodb:
{
"Id": "5",
"Group": [
{
"Name": "frank",
"Roll": "123"
}
]
},
{
"Id": "6",
"Group": [
{
"Name": "John",
"Roll": "124"
}
]
},
{
"Id": "7",
"Group": [
{
"Name": "John",
"Roll": "125"
}
]
}
The name "John" appears twice. I would like to display the number of each name that appears more than once:
{"Name": "John", "Count":2 }
You can use this aggregation query:
First $unwind to deconstruct the array and get all values as an object.
Then group by the name and $sum 1 for each name.
And then $match to get those values which exists more than one time (i.e. are repeated)
And last stage is to output values you want, in this case Name and Count.
db.collection.aggregate([
{
"$unwind": "$Group"
},
{
"$group": {
"_id": "$Group.Name",
"Count": {
"$sum": 1
},
}
},
{
"$match": {
"Count": {
"$gt": 1
}
}
},
{
"$project": {
"_id": 0,
"Name": "$_id",
"Count": 1
}
}
])
Example here

how to get the sum of a particular element in all the objects of an array using pymongo

Below is my collection
[{'_id': ObjectId('603e9cc2784fa0d80d8672cd'),
'name': 'balaji',
'items': [{'price': 1, 'price_range': 'A'},
{'price': 6, 'price_range': 'B'},
{'price': 4, 'price_range': 'C'}]}]
So in the above collection, we can see only one record and it contains an array with name items and this array contains objects with price and price_range attributes, may I know how to get the sum of all the prices in this array please, I tried with below query and it did not work
aggregation_string = [{"$match":{"name": "balaji"
}},{ "$group": {
"_id": None,
"count": { "$sum": "$items.price" }
}}]
db.sample_collection1.aggregate(aggregation_string)
and I am getting count as 0. Can someone please help me here.
In your example since you don't need to group the objects you can simply project the sum this way :
db.collection.aggregate([
{
"$match": {
"name": "balaji"
}
},
{
"$project": {
"name": 1,
"priceTotal": {
"$sum": "$items.price"
}
}
},
])
It should works from mongoDB 3.2 and I think it's the best way.
But if you absolutely need to use the $group, you have to do it this way:
db.collection.aggregate([
{
"$match": {
"name": "balaji"
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": {
"$sum": "$items.price"
}
}
}
}
])
It was your $sum query that was incomplete.
Or with the unwind operator to avoid doing twice the $sum :
db.collection.aggregate([
{
"$match": {
"name": "balaji"
}
},
{
"$unwind": "$items",
},
{
"$group": {
"_id": null,
"count": {
"$sum": "$items.price"
}
}
}
])

MongoDB find only the newest combination of fields

I have 3 collections:
1:
{
"_id": {
"$oid": "5f37dad305c1b9403bfe808b"
},
"nextBilling": {
"isAutomatic": true,
"isRedeemedForFutureBilling": false,
"time": 1000,
"duration": 1000
},
"userToken": "aaaa",
"__v": 0
}
2:
{
"_id": {
"$oid": "5f37dad305c1b9403bfe323b"
},
"nextBilling": {
"isAutomatic": true,
"isRedeemedForFutureBilling": false,
"time": 2000,
"duration": 1000
},
"userToken": "aaaa",
"__v": 0
}
3:
{
"_id": {
"$oid": "5f37dfj1b9403bfe323b"
},
"nextBilling": {
"isAutomatic": true,
"isRedeemedForFutureBilling": false,
"time": 1000,
"duration": 1000
},
"userToken": "bbbb",
"__v": 0
}
These are payments from users, and I want to get every users' payments, but only and only their last payment (which can be determined by nextBilling.time). So what I want to get at the very last is collections number 2 and 3. Users are identified by the userToken, so what I actually want is all combinations of (userToken and nextBilling.time) where nextBilling.time is the largest and only one result should be returned for each userToken.
What i'd do in mySql for achieving this would be sth like:
SELECT * FROM payments WHERE (user_token, next_billing_time) IN (SELECT user_token, MAX(ts_in_sec) FROM payments group by user_token)
I'm a bit lost about what I should do in mongoDB for this. I'm using mongoose.
You can try using aggregate(),
$sort by nextBilling.time descending order
db.collection.aggregate([
{ $sort: { "nextBilling.time": -1 } },
$group by userToken and set first nextBilling and id
{
$group: {
_id: "$userToken",
id: { $first: "$_id" },
nextBilling: { $first: "$nextBilling" }
}
},
$project to show required fields
{
$project: {
_id: "$id",
userToken: "$_id",
nextBilling: 1
}
}
])
Playground
You can do :
const result = await ModelName.aggregate([
{$group: {_id: '$userToken', "time": {$max: "$nextBilling.time"}}}
])

How to $push a field depending on a condition?

I'm trying to conditionally push a field into an array during the $group stage of the MongoDB aggregation pipeline.
Essentially I have documents with the name of the user, and an array of the actions they performed.
If I group the user actions like this:
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: $action"} } }
I get the following:
[{
"_id": {
"name": "Bob"
},
"actions": ["add", "wait", "subtract"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add, subtract"]
}]
So far so good. The idea would be to now group together the actions array to see which set of user actions are the most popular. The problem is that I need to remove the "wait" action before taking into account the group. Therefore the result should be something like this, taking into account that the "wait" element should not be considered in the grouping:
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 2
}]
Test #1
If I add this $group stage:
{ $group : { _id : "$actions", total: { $sum: 1} }}
I get the count that I want, but it takes into account the unwanted "wait" array element.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", "wait", "subtract"],
"total": 1
}]
Test #2
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: { $cond: { if:
{ $ne: [ "$action", 'wait']}, then: "$action", else: null } }}} }
{ $group : { _id : "$actions", total: { $sum: 1} }}
This is as close as I've gotten, but this pushes null values where the wait would be, and I can't figure out how to remove them.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", null, "subtract"],
"total": 1
}]
UPDATE:
My simplified documents look like this:
{
"_id": ObjectID("573e0c6155e2a8f9362fb8ff"),
"user": {
"name": "Bob",
},
"action": "add",
}
You need a preliminary $match stage in your pipeline to select only those documents where "action" is not equals to "wait".
db.collection.aggregate([
{ "$match": { "action": { "$ne": "wait" } } },
{ "$group": {
"_id": "$user.name",
"actions": { "$push": "$action" },
"total": { "$sum": 1 }
}}
])