MongoDB add grand total to sortByCount() in an aggregation pipeline - mongodb

I have grouped all the users by country, but I would also like to have a row showing the grand total (users are tagged to a single country in our use case).
Data Model / Sample Input
The collection is filled with objects representing a country (name) and each contains a list of user objects in an array under users.
{ _id: ObjectId("..."),
name: 'SG',
type: 'COUNTRY',
increment: 200,
users:
[ ObjectId("..."),
ObjectId("..."),
...
Query
db.collection.aggregate([{$match:{type:"COUNTRY"}},{$unwind:"$users"},{$sortByCount:"$name"}])
Current Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
Expected Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
{ _id: null, count: 96 } <<< TOTAL COUNT ADDED
Any tips to achieve this without resorting to complex or dirty tricks?

You can also try using $facet to calculate counts by country name and total count, and then combine them together. Something like this:
db.collection.aggregate([
{
$match: {
type: "COUNTRY"
}
},
{
"$unwind": "$users"
},
{
"$facet": {
"groupCountByCountry": [
{
"$sortByCount": "$name"
}
],
"totalCount": [
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
array: {
"$concatArrays": [
"$groupCountByCountry",
"$totalCount"
]
}
}
},
{
"$unwind": "$array"
},
{
"$replaceRoot": {
"newRoot": "$$ROOT.array"
}
}
])
Here's the playground link.

I recommend just doing this in memory as the alternative is "hacky" but in order to achieve this in Mongo you just need to group all documents, add a new documents and unwind again, like so:
db.collection.aggregate([
{
$group: {
_id: null,
roots: {
$push: "$$ROOT"
},
sum: {
$sum: "$count"
}
}
},
{
$addFields: {
roots: {
"$concatArrays": [
"$roots",
[
{
_id: null,
count: "$sum"
}
]
]
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: "$roots"
}
}
])
Mongo Playground

Related

Mongo query on last sorted array item

Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.

MongoDB get size of unwinded array

I'm trying to return size of 'orders' and sum of 'item' values for each 'order' for each order from documents like the example document:
orders: [
{
order_id: 1,
items: [
{
item_id: 1,
value:100
},
{
item_id: 2,
value:200
}
]
},
{
order_id: 2,
items: [
{
item_id: 3,
value:300
},
{
item_id: 4,
value:400
}
]
}
]
I'm using following aggregation to return them, everything works fine except I can't get size of 'orders' array because after unwind, 'orders' array is turned into an object and I can't call $size on it since it is an object now.
db.users.aggregate([
{
$unwind: "$orders"
},
{
$project: {
_id: 0,
total_values: {
$reduce: {
input: "$orders.items",
initialValue: 0,
in: { $add: ["$$value", "$$this.value"] }
}
},
order_count: {$size: '$orders'}, //I get 'The argument to $size must be an array, but was of type: object' error
}
},
])
the result I expected is:
{order_count:2, total_values:1000} //For example document
{order_count:3, total_values:1500}
{order_count:5, total_values:2500}
I found a way to get the results that I wanted. Here is the code
db.users.aggregate([
{
$project: {
_id: 1, orders: 1, order_count: { $size: '$orders' }
}
},
{ $unwind: '$orders' },
{
$project: {
_id: '$_id', items: '$orders.items', order_count: '$order_count'
}
},
{ $unwind: '$items' },
{
$project: {
_id: '$_id', sum: { $sum: '$items.value' }, order_count: '$order_count'
}
},
{
$group: {
_id: { _id: '$_id', order_count: '$order_count' }, total_values: { $sum: '$sum' }
}
},
])
output:
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f1"), order_count: 2 }, total_values: 1000 }
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f2"), order_count: 3 }, total_values: 1500 }

How to $count and $group within MongoDB aggregation?

I would like to count the status and group them by country.
Data:
[
{ id: 100, status: 'ordered', country: 'US', items: [] },
{ id: 101, status: 'ordered', country: 'UK', items: [] },
{ id: 102, status: 'shipped', country: 'UK', items: [] },
]
Desired aggregation outcome:
[
{ _id: 'US', status: { ordered: 1} },
{ _id: 'UK', status: { ordered: 1, shipped: 1 } }
]
I can $count and $group, but I am not sure how to put this together. Any hint is appreciated.
Thanks,
bluepuama
$group by country and status, and count total
$group by only country and construct array of status and count in key-value format
$set to update status field to object using $arrayToObject
db.collection.aggregate([
{
$group: {
_id: { country: "$country", status: "$status" },
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.country",
status: { $push: { k: "$_id.status", v: "$count" } }
}
},
{ $set: { status: { $arrayToObject: "$status" } } }
])
Playground
You can do it with a single $group stage like so:
db.collection.aggregate([
{
$group: {
_id: "$country",
"shipped": {
$sum: {
$cond: [
{
$eq: [
"$status",
"ordered"
]
},
0,
1
]
}
},
"ordered": {
$sum: {
$cond: [
{
$eq: [
"$status",
"shipped"
]
},
0,
1
]
}
}
}
},
{
$project: {
_id: 1,
status: {
shipped: "$shipped",
ordered: "$ordered"
}
}
}
])
Mongo Playground

Mongodb Group stage, and query last two document

In mongodb, if we want to take first or last document of the group stage, then the FIRST and LAST operator will helpful. I want to group the collection based on _id:"$department" and also take the LAST document and the LAST-1 document. Is there any way to achieve this.
Assume you have this collection:
[
{ department: "HR", employees: 20 },
{ department: "Finance", employees: 30 },
{ department: "Sales", employees: 5 },
{ department: "IT", employees: 50 }
]
Then you can run this aggregation:
db.collection.aggregate([
{ $sort: { department: 1 } },
{
$group: {
_id: null,
employees: { $sum: "$employees" },
all_departments: { $push: "$department" }
}
},
{
$set: {
last_departments: {
$concatArrays: [
[{ $arrayElemAt: ["$all_departments", -1] }],
[{ $arrayElemAt: ["$all_departments", -2] }]
]
}
}
}
])
Mongo Playground
Update
With $slice it is even shorter:
db.collection.aggregate([
{ $sort: { department: 1 } },
{
$group: {
_id: null,
employees: { $sum: "$employees" },
all_departments: { $push: "$department" }
}
},
{ $set: { last_departments: { $slice: ["$all_departments", -2] } } }
])

MongoDB multiple nested groups

I have documents in mongodb like this
{
_id: "5cfed55974c7c52ecc33ada8",
name: "Garona",
realm: "Blackrock",
faction: "Horde",
race: "Orc",
class: "Rogue",
guild: "",
level: 33,
lastSeen: "2019-06-10T00:00:00.000Z",
__v: 0
},
{
_id: "5cfed55974c7c52ecc33ade8",
name: "Muradin",
realm: "Alleria",
faction: "Alliance",
race: "Dwarf",
class: "Warrior",
guild: "Stormstout Brewing Co",
level: 42,
lastSeen: "2019-06-11T00:00:00.000Z",
__v: 0
}
What I'm trying to do, is to group by a fields and get a sum of it. So far I figured it out to do it for one field at once like so
{
$group: {
_id: {
classes: '1',
class: '$class'
},
total: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.classes',
total: { $sum: '$total' },
classes: {
$push: {
class: '$_id.class',
total: '$total'
}
}
}
}
Which produces something like this
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
}
}
But I want to do it for more than one field at once, so that I can get an output like this.
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
},
factions: [
{
faction: "Alliance",
total: 27
},
{
faction: "Horde",
total: 13
}
}
No I'm wondering if it is even possible to do it in one query in an easy way or if I would be better to do a seperate query for each field.
You can do this by using the $facet aggregation stage
Processes multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
I only slightly modified your original pipeline, and then just copied it for the 'factions' field.
The last 3 stages in my solution aren't really necessary, they just clean up the output a little bit.
You can probably take it from here, good luck.
db.collection.aggregate([
{
"$facet": {
"classes": [
{
$group: {
_id: "$class",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"classes": {
$push: {
class: "$_id",
total: "$total"
}
}
}
}
],
"factions": [
{
$group: {
_id: "$faction",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"factions": {
$push: {
faction: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
$unwind: "$classes"
},
{
$unwind: "$factions"
},
{
$project: {
"classes._id": 0,
"factions._id": 0
}
}
])
Output
[
{
"classes": {
"classes": [
{
"class": "Warrior",
"total": 1
},
{
"class": "Rogue",
"total": 1
}
],
"total": 2
},
"factions": {
"factions": [
{
"faction": "Alliance",
"total": 1
},
{
"faction": "Horde",
"total": 1
}
],
"total": 2
}
}
]