Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.
Related
I have grouped all the users by country, but I would also like to have a row showing the grand total (users are tagged to a single country in our use case).
Data Model / Sample Input
The collection is filled with objects representing a country (name) and each contains a list of user objects in an array under users.
{ _id: ObjectId("..."),
name: 'SG',
type: 'COUNTRY',
increment: 200,
users:
[ ObjectId("..."),
ObjectId("..."),
...
Query
db.collection.aggregate([{$match:{type:"COUNTRY"}},{$unwind:"$users"},{$sortByCount:"$name"}])
Current Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
Expected Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
{ _id: null, count: 96 } <<< TOTAL COUNT ADDED
Any tips to achieve this without resorting to complex or dirty tricks?
You can also try using $facet to calculate counts by country name and total count, and then combine them together. Something like this:
db.collection.aggregate([
{
$match: {
type: "COUNTRY"
}
},
{
"$unwind": "$users"
},
{
"$facet": {
"groupCountByCountry": [
{
"$sortByCount": "$name"
}
],
"totalCount": [
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
array: {
"$concatArrays": [
"$groupCountByCountry",
"$totalCount"
]
}
}
},
{
"$unwind": "$array"
},
{
"$replaceRoot": {
"newRoot": "$$ROOT.array"
}
}
])
Here's the playground link.
I recommend just doing this in memory as the alternative is "hacky" but in order to achieve this in Mongo you just need to group all documents, add a new documents and unwind again, like so:
db.collection.aggregate([
{
$group: {
_id: null,
roots: {
$push: "$$ROOT"
},
sum: {
$sum: "$count"
}
}
},
{
$addFields: {
roots: {
"$concatArrays": [
"$roots",
[
{
_id: null,
count: "$sum"
}
]
]
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: "$roots"
}
}
])
Mongo Playground
I'm trying to return size of 'orders' and sum of 'item' values for each 'order' for each order from documents like the example document:
orders: [
{
order_id: 1,
items: [
{
item_id: 1,
value:100
},
{
item_id: 2,
value:200
}
]
},
{
order_id: 2,
items: [
{
item_id: 3,
value:300
},
{
item_id: 4,
value:400
}
]
}
]
I'm using following aggregation to return them, everything works fine except I can't get size of 'orders' array because after unwind, 'orders' array is turned into an object and I can't call $size on it since it is an object now.
db.users.aggregate([
{
$unwind: "$orders"
},
{
$project: {
_id: 0,
total_values: {
$reduce: {
input: "$orders.items",
initialValue: 0,
in: { $add: ["$$value", "$$this.value"] }
}
},
order_count: {$size: '$orders'}, //I get 'The argument to $size must be an array, but was of type: object' error
}
},
])
the result I expected is:
{order_count:2, total_values:1000} //For example document
{order_count:3, total_values:1500}
{order_count:5, total_values:2500}
I found a way to get the results that I wanted. Here is the code
db.users.aggregate([
{
$project: {
_id: 1, orders: 1, order_count: { $size: '$orders' }
}
},
{ $unwind: '$orders' },
{
$project: {
_id: '$_id', items: '$orders.items', order_count: '$order_count'
}
},
{ $unwind: '$items' },
{
$project: {
_id: '$_id', sum: { $sum: '$items.value' }, order_count: '$order_count'
}
},
{
$group: {
_id: { _id: '$_id', order_count: '$order_count' }, total_values: { $sum: '$sum' }
}
},
])
output:
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f1"), order_count: 2 }, total_values: 1000 }
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f2"), order_count: 3 }, total_values: 1500 }
I would like to count the status and group them by country.
Data:
[
{ id: 100, status: 'ordered', country: 'US', items: [] },
{ id: 101, status: 'ordered', country: 'UK', items: [] },
{ id: 102, status: 'shipped', country: 'UK', items: [] },
]
Desired aggregation outcome:
[
{ _id: 'US', status: { ordered: 1} },
{ _id: 'UK', status: { ordered: 1, shipped: 1 } }
]
I can $count and $group, but I am not sure how to put this together. Any hint is appreciated.
Thanks,
bluepuama
$group by country and status, and count total
$group by only country and construct array of status and count in key-value format
$set to update status field to object using $arrayToObject
db.collection.aggregate([
{
$group: {
_id: { country: "$country", status: "$status" },
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.country",
status: { $push: { k: "$_id.status", v: "$count" } }
}
},
{ $set: { status: { $arrayToObject: "$status" } } }
])
Playground
You can do it with a single $group stage like so:
db.collection.aggregate([
{
$group: {
_id: "$country",
"shipped": {
$sum: {
$cond: [
{
$eq: [
"$status",
"ordered"
]
},
0,
1
]
}
},
"ordered": {
$sum: {
$cond: [
{
$eq: [
"$status",
"shipped"
]
},
0,
1
]
}
}
}
},
{
$project: {
_id: 1,
status: {
shipped: "$shipped",
ordered: "$ordered"
}
}
}
])
Mongo Playground
I have the following stage in my MongoDB aggregation pipeline that returns the qty and sum of sales, which works fine:
{
$lookup: {
from: 'sales',
let: { part: '$_id' },
pipeline: [
{ $match: { $and: [{ $expr: { $eq: ['$partner', '$$part'] } }] } },
{ $group: { _id: null, qty: { $sum: 1 }, soldFor: { $sum: '$soldFor' } } },
{ $project: { _id: 0, qty: 1, soldFor: 1 } }],
as: 'sales'}},
{ $unwind: { path: '$sales', preserveNullAndEmptyArrays: true } },
{ $project: { _id: 1, sales: 1 }
}
However, if there are no sales, then the $project projection returns an empty sales object, but what I'd really like is it to return a completed object, but with 0 - like this:
{
sales: {
qty: 0,
soldFor: 0
}
}
You can use $cond operator here
{
"$project": {
"_id": 1,
"sales": {
"$cond": [
{ "$eq": [{ "$size": "$sales" }, 0] },
{
"sales": {
"qty": 0,
"soldFor": 0
}
},
"$sales"
]
}
}
}
I have documents in mongodb like this
{
_id: "5cfed55974c7c52ecc33ada8",
name: "Garona",
realm: "Blackrock",
faction: "Horde",
race: "Orc",
class: "Rogue",
guild: "",
level: 33,
lastSeen: "2019-06-10T00:00:00.000Z",
__v: 0
},
{
_id: "5cfed55974c7c52ecc33ade8",
name: "Muradin",
realm: "Alleria",
faction: "Alliance",
race: "Dwarf",
class: "Warrior",
guild: "Stormstout Brewing Co",
level: 42,
lastSeen: "2019-06-11T00:00:00.000Z",
__v: 0
}
What I'm trying to do, is to group by a fields and get a sum of it. So far I figured it out to do it for one field at once like so
{
$group: {
_id: {
classes: '1',
class: '$class'
},
total: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.classes',
total: { $sum: '$total' },
classes: {
$push: {
class: '$_id.class',
total: '$total'
}
}
}
}
Which produces something like this
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
}
}
But I want to do it for more than one field at once, so that I can get an output like this.
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
},
factions: [
{
faction: "Alliance",
total: 27
},
{
faction: "Horde",
total: 13
}
}
No I'm wondering if it is even possible to do it in one query in an easy way or if I would be better to do a seperate query for each field.
You can do this by using the $facet aggregation stage
Processes multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
I only slightly modified your original pipeline, and then just copied it for the 'factions' field.
The last 3 stages in my solution aren't really necessary, they just clean up the output a little bit.
You can probably take it from here, good luck.
db.collection.aggregate([
{
"$facet": {
"classes": [
{
$group: {
_id: "$class",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"classes": {
$push: {
class: "$_id",
total: "$total"
}
}
}
}
],
"factions": [
{
$group: {
_id: "$faction",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"factions": {
$push: {
faction: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
$unwind: "$classes"
},
{
$unwind: "$factions"
},
{
$project: {
"classes._id": 0,
"factions._id": 0
}
}
])
Output
[
{
"classes": {
"classes": [
{
"class": "Warrior",
"total": 1
},
{
"class": "Rogue",
"total": 1
}
],
"total": 2
},
"factions": {
"factions": [
{
"faction": "Alliance",
"total": 1
},
{
"faction": "Horde",
"total": 1
}
],
"total": 2
}
}
]