I'm new to mongodb and I've been tasked with building an API capable of returning a total count of objects returned by a previously made aggregation pipeline. The pipeline contains multiple groupings and counts, and I'm wondering how I can tweak the current pipeline to return a total count of objects returned.
function projectionPipeline(islifecycle, pipeline) {
if (islifecycle) {
pipeline.push(
{
$project: {
state: '$state',
_id: 0,
lifecycle: {
$filter: {
input: '$classification',
as: 'lifecycle',
cond: {
$or: [
{
$eq: [
'$$lifecycle.name',
'Creation Lifecycle Status',
],
},
],
},
},
},
},
},
{
$project: {
state: '$state',
lifecycle: '$lifecycle.value',
},
},
{
$group: {
_id: {
state: '$state',
lifecycle: '$lifecycle',
},
counts: { $sum: 1 },
},
},
{
$group: {
_id: '$_id.state',
lifecycles: {
$push: {
lifecycle: '$_id.lifecycle',
lifecycleCount: '$counts',
},
},
count: { $sum: '$counts' },
},
},
{
$project: {
count: 1,
state: '$_id',
lifecycles: '$lifecycles',
_id: 0,
},
},
);
} else {
pipeline.push(
{
$group: {
_id: '$state',
counts: {
$sum: 1,
},
},
},
{
$project: {
counts: 1,
value: '$_id',
_id: 0,
},
},
);
}
}
Related
I've created an aggregate query but for some reason it doesn't seem to work for custom fields created in the aggregation pipeline.
return this.repository.mongo().aggregate([
{
$match: { q1_avg: { $regex: baseQuery['value'], $options: 'i' } }, // NOT WORKING
},
{
$group: {
_id: '$product_sku',
id: { $first: "$_id" },
product_name: { $first: '$product_name' },
product_category: { $first: '$product_category' },
product_sku: { $first: '$product_sku' },
q1_cnt: { $sum: 1 },
q1_votes: { $push: "$final_rating" }
},
},
{
$facet: {
pagination: [ { $count: 'total' } ],
data: [
{
$project: {
_id: 1,
id: 1,
product_name: 1,
product_category: 1,
product_sku: 1,
q1_cnt: 1,
q1_votes: {
$filter: {
input: '$q1_votes',
as: 'item',
cond: { $ne: ['$$item', null] }
}
},
},
},
{
$set: {
q1_avg: { $round: [ { $avg: '$q1_votes' }, 2 ] },
}
},
{ $unset: ['q1_votes'] },
{ $skip: skip },
{ $limit: limit },
{ $sort: sortList }
]
}
},
{ $unwind : "$pagination" },
]).next();
q1_avg value is an integer and as far as I know, regex only works with strings. Could that be the reason
Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.
I have the following resolver:
const result = await UserPassage.aggregate([
{ $sort: { createdAt: -1 } },
{
$group: {
_id: '$level',
level: { $first: '$level' },
passageId: { $first: '$passageId' },
userId: { $first: '$userId' },
type: { $first: '$type' },
category: { $first: '$category' },
score: { $first: '$score' },
completedStage: { $first: '$completedStage' },
userPassageStatsId: {
_id: { $first: '$_id' },
stats: {
readingTime: { $first: '$readingTime' },
qtdVocab: { $first: '$qtdVocab' },
qtdTestDone: { $first: '$qtdTestDone' },
totalQuiz: { $first: '$totalQuiz' },
progress: { $first: '$progress' },
},
},
},
},
{ $sort: { level: 1 } },
]);
await UserPassageStats.populate(result, { path: 'userPassageStatsId' });
The problem is that I need to populate 'userPassageStatsId' and return it but it's not working well returning the following error:
MongoError: The field 'userPassageStatsId' must be an accumulator object
does anyone knows what I am doing wrong?
$group can only contain _id or accumulator objects like $first, $last, $sum etc. In your case your building nested object and that syntax is not allowed - accumulator has to be on a top level. You can try two approaches, either return flat structure from $group and then reshape using $project:
{
$group: {
_id: '$level',
level: { $first: '$level' },
passageId: { $first: '$passageId' },
userId: { $first: '$userId' },
type: { $first: '$type' },
category: { $first: '$category' },
score: { $first: '$score' },
completedStage: { $first: '$completedStage' },
userPassageStatsId_id: { $first: '$_id' },
readingTime: { $first: '$readingTime' },
qtdVocab: { $first: '$qtdVocab' },
qtdTestDone: { $first: '$qtdTestDone' },
totalQuiz: { $first: '$totalQuiz' },
progress: { $first: '$progress' }
}
},
{
$project: {
_id: 1,
level: 1,
...,
userPassageStatsId: {
_id: "$userPassageStatsId_id",
stats: {
readingTime: "$readingTime",
...
}
}
}
}
or use $$ROOT to capture first object for every group and reshape it using $project:
{
$group: {
_id: '$level',
d: { $first: "$$ROOT" }
}
},
{
$project: {
_id: 1,
level: "$d.level",
...,
userPassageStatsId: {
_id: "$d._id",
stats: {
readingTime: "$d.readingTime",
...
}
}
}
}
I have documents in mongodb like this
{
_id: "5cfed55974c7c52ecc33ada8",
name: "Garona",
realm: "Blackrock",
faction: "Horde",
race: "Orc",
class: "Rogue",
guild: "",
level: 33,
lastSeen: "2019-06-10T00:00:00.000Z",
__v: 0
},
{
_id: "5cfed55974c7c52ecc33ade8",
name: "Muradin",
realm: "Alleria",
faction: "Alliance",
race: "Dwarf",
class: "Warrior",
guild: "Stormstout Brewing Co",
level: 42,
lastSeen: "2019-06-11T00:00:00.000Z",
__v: 0
}
What I'm trying to do, is to group by a fields and get a sum of it. So far I figured it out to do it for one field at once like so
{
$group: {
_id: {
classes: '1',
class: '$class'
},
total: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.classes',
total: { $sum: '$total' },
classes: {
$push: {
class: '$_id.class',
total: '$total'
}
}
}
}
Which produces something like this
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
}
}
But I want to do it for more than one field at once, so that I can get an output like this.
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
},
factions: [
{
faction: "Alliance",
total: 27
},
{
faction: "Horde",
total: 13
}
}
No I'm wondering if it is even possible to do it in one query in an easy way or if I would be better to do a seperate query for each field.
You can do this by using the $facet aggregation stage
Processes multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
I only slightly modified your original pipeline, and then just copied it for the 'factions' field.
The last 3 stages in my solution aren't really necessary, they just clean up the output a little bit.
You can probably take it from here, good luck.
db.collection.aggregate([
{
"$facet": {
"classes": [
{
$group: {
_id: "$class",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"classes": {
$push: {
class: "$_id",
total: "$total"
}
}
}
}
],
"factions": [
{
$group: {
_id: "$faction",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"factions": {
$push: {
faction: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
$unwind: "$classes"
},
{
$unwind: "$factions"
},
{
$project: {
"classes._id": 0,
"factions._id": 0
}
}
])
Output
[
{
"classes": {
"classes": [
{
"class": "Warrior",
"total": 1
},
{
"class": "Rogue",
"total": 1
}
],
"total": 2
},
"factions": {
"factions": [
{
"faction": "Alliance",
"total": 1
},
{
"faction": "Horde",
"total": 1
}
],
"total": 2
}
}
]
I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]