MongoDB Aggregate keep results - mongodb

Is there a way to keep results from a match under a new field, and under another new field some computed value?
I'm trying to extract a set of genres from a collection of movies, and also keep original results...
Document example:
{
"_id": "62e97ba6ec445b864fc3bc39",
"id": 19913,
"genres": [
"Comedy",
"Drama",
"Romance"
],
"imdb_id": "tt1022603",
"overview": "Tom, greeting-card writer and hopeless romantic...",
"title": "(500) Days of Summer",
"release_date": "2009-07-17",
}
Desired output:
{
result: [
... movies
]
categories: [
"Comedy",
"Drama",
"Romance"
]
}
What I have so far:
use('the_base');
function matchGenre(genre) {
return {
"$match": {
"genres": genre,
}
};
}
function limit(num) {
return {
"$limit": num
};
}
db.movie.aggregate([
matchGenre("Drama"),
limit(5),
{"$unwind": "$genres"},
{"$group": {
"_id": 0,
"gens": { "$addToSet": "$genres" }
}}
]);
My current result:
{
"_id": 0,
"gens": [
"Romance",
"Comedy",
"Thriller",
"Science Fiction",
"Fantasy",
"Drama",
"Crime",
"Action",
"Mystery",
"Adventure",
"Horror"
]
}

I would generally use facets.
Here's an example: https://mongoplayground.net/p/PbORyp4JaF5
db.collection.aggregate([
{
$facet: {
results: [
{
$match: {}
}
],
categories: [
{
$unwind: "$genres"
},
{
$sortByCount: "$genres"
}
],
release_date: [
{
$unwind: "$release_date"
},
{
$sortByCount: "$release_date"
}
]
}
}
])
I have taken the liberty to add an additional facet of release_date, and also ensure that there is a count present in each of the facets, as this is often helpful and required.

Related

Nested arrays $unwind and $group back together in mongoDB

We have three nested arrays:
principalCredits with 2 objects
credits with 2 objects each
awardNominations.edges with variable totals from 0 to 3
The task is to add a field to the third array of objects awardNominations.edges based on a lookup from eventsCollection.
Here's the data I have (simplified, can copy and paste into MongoDB Compass):
[{
"principalCredits": [
{
"category": {
"id": "director",
"text": "Directors"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
},
]
}
},
"category": {
"id": "director",
"text": "Director"
}
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 3,
"edges": [
{
"node": {
"id": "an0694012",
"isWinner": true,
"award": {
"id": "an0694012",
"year": 2015,
"text": "Best of Festival",
"event": {
"id": "ev0001486",
"text": "Jackson Wild Media Awards"
},
"category": {
"text": "Best of Festival"
}
}
}
},
{
"node": {
"id": "an0975779",
"isWinner": true,
"award": {
"id": "an0975779",
"year": 2017,
"text": "RTS West Television Award",
"event": {
"id": "ev0000571",
"text": "Royal Television Society, UK"
},
"category": {
"text": "Documentary"
}
}
}
},
{
"node": {
"id": "an0975781",
"isWinner": true,
"award": {
"id": "an0975781",
"year": 2015,
"text": "Grand Teton Prize",
"event": {
"id": "ev0001356",
"text": "Jackson Hole Film Festival"
},
"category": {
"text": "Best in Festival"
}
}
}
}
]
}
},
"category": {
"id": "director",
"text": "Director"
}
}
]
},
{
"category": {
"id": "writer",
"text": "Writers"
},
"totalCredits": 2,
"credits": [
{
"name": {
"id": "nm11813828",
"nameText": {
"text": "Pippa Ehrlich"
},
"awardNominations": {
"total": 2,
"edges": [
{
"node": {
"id": "an1393007",
"isWinner": true,
"award": {
"id": "an1393007",
"year": 2020,
"text": "Green Warsaw Award",
"event": {
"id": "ev0003786",
"text": "Millennium Docs Against Gravity"
},
"category": {
"text": null
}
}
}
},
{
"node": {
"id": "an1428940",
"isWinner": false,
"award": {
"id": "an1428940",
"year": 2021,
"text": "IDA Award",
"event": {
"id": "ev0000351",
"text": "International Documentary Association"
},
"category": {
"text": "Best Writing"
}
}
}
}
]
}
},
"category": {
"id": "writer",
"text": "Writer"
},
},
{
"name": {
"id": "nm1624755",
"nameText": {
"text": "James Reed"
},
"awardNominations": {
"total": 0,
"edges": []
}
},
"category": {
"id": "writer",
"text": "Writer"
},
}
]
}
]
}]
An example scored award should look like this:
{
"id": "an0975781",
"isWinner": true,
"award": { ... },
"score": 1.5
}
Once all the manipulation is done, the data needs to be in exactly the same shape as it was initially and with no null values. So in the case of the last array awardsNominations.edges it should be [] as it was, and not { node: { score: null }} or anything else.
To achieve this I have created an aggregation pipeline:
[
{
'$unwind': {
'path': '$principalCredits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits',
'preserveNullAndEmptyArrays': true
}
}, {
'$unwind': {
'path': '$principalCredits.credits.name.awardNominations.edges',
'preserveNullAndEmptyArrays': true
}
}, {
'$lookup': {
'from': 'eventsCollection',
'localField': 'principalCredits.credits.name.awardNominations.edges.node.award.event.id',
'foreignField': 'id',
'as': 'matchingEvent'
}
}, {
'$unwind': {
'path': '$matchingEvent',
'preserveNullAndEmptyArrays': true
}
}, {
'$addFields': {
'principalCredits.credits.name.awardNominations.edges.node.score': {
'$multiply': [
'$matchingEvent.importance', {
'$cond': {
'if': '$principalCredits.credits.name.awardNominations.edges.node.isWinner',
'then': 1.5,
'else': 1.2
}
}
]
}
}
}
]
The above pipeline assigns the score to each award. However, the null values are still there and I have absolutely no idea how to group it back together. I have tried to group with:
{
'$group': {
'_id': '$id',
'titleDoc': {
'$first': '$$ROOT'
},
'allPrincipalCredits': {
'$push': '$principalCredits'
}
}
}
To keep the root and then somehow sort all the records back into shape but could not get back to the orginal object structure.
Any help in putting it all together will be much appriciated!
I'm fairly good with simple aggregations, but this seems to be too much for me currently and would love to learn how to $group things back properly.
I've tried and put together all the knowledge I have so far from different sources and similar answers but can't seem to get it to work.
Lookup collection eventsCollection contains objects like this:
{
"_id": { "$oid": "62c57125d6943d92f83f6fff" },
"id": "ev0030197",
"text": "#AmLatino Film Festival",
"importance": 1
}
So the "rule" in restoring to original structure is that for each $unwind you did to "deconstruct" the document you now have to do a $group to restore it.
As you can imagine in such a pipeline this could be VERY cumbersome. but definitely doable.
However let me propose a different approach that is still very messy but much easier compared to the alternative, additionally it is more efficient from a performance perspective.
(just minor sidenot the reason your score is still null is because you have a syntax error in your $multiply function)
Anyways, The idea is to first gather all the unique event ids that exist in the in nested documents.
Then execute one lookup to fetch all the relevant events.
And finally adding the score field using $map and $mergeDocuments instead of $unwinding and $grouping, like so:
Mongo Playground
db.collection.aggregate([
{
$addFields: {
allEvents: {
$reduce: {
input: {
$map: {
input: "$principalCredits",
in: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: "$$edge.node.award.event.id"
}
}
}
}
}
},
initialValue: [],
in: {
"$concatArrays": [
{
"$reduce": {
input: "$$this",
initialValue: [],
in: {
"$concatArrays": [
"$$this",
"$$value"
]
}
}
},
"$$value"
]
}
}
}
}
},
{
"$lookup": {
"from": "eventsCollection",
"localField": "allEvents",
"foreignField": "id",
"as": "matchingEvents"
}
},
{
$addFields: {
principalCredits: {
$map: {
input: "$principalCredits",
in: {
$mergeObjects: [
"$$this",
{
credits: {
$map: {
input: "$$this.credits",
as: "credit",
in: {
$mergeObjects: [
"$$credit",
{
name: {
"$mergeObjects": [
"$$credit.name",
{
"awardNominations": {
"$mergeObjects": [
"$$credit.name.awardNominations",
{
edges: {
$map: {
input: "$$credit.name.awardNominations.edges",
as: "edge",
in: {
node: {
$mergeObjects: [
"$$edge.node",
{
score: {
"$multiply": [
{
$cond: [
"$$edge.node.isWinner",
1.5,
1.2
]
},
{
$first: {
$map: {
input: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
},
as: "matched",
in: "$$matched.importance"
}
}
}
]
}
}
]
}
}
}
}
}
]
}
}
]
}
}
]
}
}
}
}
]
}
}
}
}
},
{
$unset: [
"allEvents",
"matchingEvents"
]
}
])
Mongo Playground
I will just mention that you can make this much much much cleaner by involving some code while keeping the same approach suggested. first getting unique eventid with distinct. then fetching the matching importance for each event. Finally execute a single query using arrayFilters you can construct with this information.
Final side not is that the provided pipeline did not deal with null or missing values. So if an array is missing an error will be thrown as $map expects input to be a valid array.
This can easily be solved by just wrapping each of these expressions with $ifNull, like so:
{
$map: {
input: {$ifNull: ["$$this.credits",[]]}
}
}
This will also replace null values with an empty []
The deep buried keys (...award.event.id) in arrays confounds an easy approach without 1) messing up the structure as the OP has noted 2) incurring potentially very expensive multiple $unwind calls.
Recommendation: Two pass approach. Get the necessary importance values for the principalCredits objects in question, then go back and manually iterate over the collection, diving into the structure and applying the logic score = importance * isWinner? 1.2 : 1.5
PASS 1: Get the ev data
c=db.foo.aggregate([
{$project: {
XX: {$reduce: {
// Rapidly get to things we need to lookup:
input: '$principalCredits.credits.name.awardNominations.edges.node.award.event.id',
// We end up with a mess incl. empty arrays...
// [ [[ev1,ev2], [ev3,ev4]], [], [[ev1,...], [] ... ] ]
// Need to collapse all those arrays of arrays of arrays into
// a single list of ev values, hence a reduce within a reduce:
initialValue: [],
in: {$concatArrays: [
'$$value',
{$reduce: {
input: '$$this',
initialValue: [],
in: {$concatArrays: [ '$$value', '$$this' ] }
}} ]}
}}
}}
// XX is now [ ev1,ev2,ev3,ev4,ev1 ... ]
// The empty arrays are ignored. Don't worry about dupes.
,{$lookup: {
from: "Xev",
let: { evids: "$XX" },
pipeline: [
{$match: {$expr: {$in: ["$id","$$evids"]} } }
],
as: 'XX' // overwrite XX...
}}
]);
evdict = {}
c.forEach(function(d) {
d['XX'].forEach(function(ww) {
evdict[ww['id']] = ww;
});
});
{
"ev0003786" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb17f"),
"id" : "ev0003786",
"text" : "Millennium Docs Against Gravity",
"importance" : 1
},
"ev0000351" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb180"),
"id" : "ev0000351",
"text" : "International Documentary Association",
"importance" : 2
},
"ev0000571" : {
"_id" : ObjectId("62cd7f8138d0fbc0eacfb181"),
"id" : "ev0000571",
"text" : "Royal Television Society, UK",
"importance" : 3
}
}
PASS 2: Iterate main collection
Left as exercise to reader.
Note that if
The number of events is small.
There is no need or value in performing $match on the initial principalCredits collection (i.e. before the fancy $project/$reduce) to significantly reduce the lookup set into events
then this whole thing is unnecessary. Simply slurp all events into evdict with a quick find and proceed to pass 2.
There is potentially a very cool solution that can do this in one pass
UPDATED
See Tom's answer below.
Note to MongoDB 5.0 users: The new $getField function allows you to pluck out fields by name instead of having to use the standard trick of using dot notation in the $in clause to access the field. This might be clearer to some:
{$getField: {
"field": "importance",
"input": {
$first: {
$filter: {
input: "$matchingEvents",
as: "matchedEvent",
cond: {
$eq: [
"$$matchedEvent.id",
"$$edge.node.award.event.id"
]
}
}
}
}
}
}

MongoDB merge $facet results with corresponding items

Hello I have a collection like this
"_id" :"601bd0f4be72d839303adcd3",
"title":"Payment-1",
"initialBalance": {"$numberDecimal": "75"},
"paymentHistory":[
{"_id": "601bd1542df40f2ca8a769df","payment": {"$numberDecimal": "10"}},
{"_id": "601bd1542df40f2ca8a769de","payment": {"$numberDecimal": "20"}},
]
I want to calculate active balance (initialBalance - total of paymentHistory) for each payment.
I calculated total payments from paymentHistory for each document in collection.
this.aggregate([
{$match:{...}},
{
$facet:{
info:[
{$project:{_id:1,title:1,initialBalance:1}}
],
subPayments:[
{$unwind:"$paymentHistory"},
{$group:{_id:"$_id",total:{$sum:"$paymentHistory.payment"}}},
]
}
}
])
I get this result for above query.
"info": [
{
"_id": "601bd0f4be72d839303adcd3",
"title": "Payment-1",
"initialBalance": {"$numberDecimal": "580"},
},
...
],
"subPayments": [
{
"_id": "601bd0f4be72d839303adcd3",
"total": {"$numberDecimal": "80.75"}
},
...
]
I added following lines to aggregation.
{$facet:{...}},
{$project: {payments:{$setUnion:['$info','$subPayments']}}},
{$unwind: '$payments'},
{$replaceRoot: { newRoot: "$payments" }},
Now, I get this result
{
"_id": "601bd0f4be72d839303adcd3",
"total": {"$numberDecimal": "80.75"}
},
{
"_id": "601bd0f4be72d839303adcd3",
"title": "Payment-1",
"initialBalance": {"$numberDecimal": "580"},
},
{...}
I think if I group them via _id, then I calculate activeBalance in $project aggregation.
...
{
$group:{
_id:"$_id",
title:{$first:"$title"},
initialBalance:{$first:"$initialBalance"},
totalPayment:{$first: "$total"},
}
},
{
$set:{activeBalance:{$subtract:[{$ifNull:["$initialBalance",0]}, {$ifNull:["$totalPayment",0]}]}}
}
The problem is after $group aggregation fields return null.
{
"_id": "601bd0f4be72d839303adcd3",
"title": null,
"initialBalance": null,
"totalPayment": {
"$numberDecimal": "80.75"
},
"activeBalance": {
"$numberDecimal": "-80.75"
}
}
How can I solve this problem?
This is what #prasad_ suggested:
db.accounts.aggregate([
{
$addFields: {
activeBalance: {
$subtract: [
"$initialBalance",
{
$reduce: {
input: "$paymentHistory",
initialValue: 0,
in: { $sum: ["$$value", "$$this.payment"] }
}
}
]
}
}
}
]);

MongoDB: group, and then counting different values

I have a list of books. I would like to get the number of genres that the authors wrote, also I would like to add to the results which are those genres. My database looks like this:
{"_id": ObjectID("1), "title": "Harry Potter", "year": NumberInt(2000), "author": "JK. Rowling",
"genres": "Fantasy"},
"_id": ObjectID("2"), "title": "Harry Potter 99", "year": NumberInt(2020), "author": "JK. Rowling",
"genres": "Drama"}, "_id": ObjectID("2"), "title": "Harry Potter", "year": NumberInt(2000), "author": "JK. Rowling",
"genres": "Drama"}, {...}
So, my code so far looks like this:
phase1 = {$group : {"_id" : "$author"}, "countgenres" : {$sum : 1}}
phase2 = {$addFields : "genres"}}
phase3 = {$sort : {"numgenres" : -1}}
steps = [phase1, phase2, phase3]
db.database.aggregate(steps)
This is not working for me, so I would like if someone could help me to write a correct code to do this. The result should look like this:
{
"_id" : "JK. Rowling",
"countgenres" : 4,
"genres" : [
"War",
"Fantasy",
"Drama",
"Crime"
]
}
Thanks.
you cannot do that directly in the $group stage. Instead of that, you have to use an $addFields stage and use $reduce and $setUnion, to concat arrays without doubles.
Then you can just add a field with your new array size, and do the $sort.
db.collection.aggregate([
{
$group: {
_id: "$author",
genres: {
$push: "$genres"
}
}
},
{
$addFields: {
genres: {
"$reduce": {
"input": "$genres",
"initialValue": [],
"in": {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$addFields: {
countGenres: {
$size: "$genres"
}
}
}
])
You can test here
Try this query:
db.collection.aggregate([
{
"$match": {
"author": "JK. Rowling"
}
},
{
"$group": {
"_id": "$author",
"genres": {
"$addToSet": "$genres"
}
}
},
{
$addFields: {
genres: {
"$reduce": {
"input": "$genres",
"initialValue": [],
"in": {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
"countgenres": {
"$size": "$genres"
},
"genres": 1
}
}
])
First stage is $match by the author.
Then group and I've used $addToSet to avoid repeated values.
After that, with $addFields values are merged.
After the group, the field genres is output like this:
"genres": [
[
"War",
"Fantasy",
"Drama",
"Crime"
],
[
"War",
"Fantasy",
"Drama"
]
]
And to merge is neccessary to do $setUnion.
And, the last step is count the array size and output the values you want.
Example here

How to aggregate list in MongoDB?

I have a set of containers each with a set of items. The MongoDB document for a container looks like below. How do I list all the items in all the containers with the container name in each item?
{
"Container": 28392
...
"Items": [
{
"ItemName": "Foo",
...
},
{
"ItemName": "Bar",
...
}
]
}
Expected output:
[
{
"ItemName": "Foo",
"Container": 28392
...
},
{
"ItemName": "Bar",
"Container": 28392
...
},
{
"ItemName": "Baz",
"Container": 52892
...
}
]
Would this be possible with some kind of an unwind? If so how would I aggregate this?
After unwind you can use project stage to do what you want:
db.collection.aggregate([
{
$unwind: "$Items"
},
{
$project: {
Container: "$Container",
ItemName: "$Items.ItemName"
}
}
])
Playground:
https://mongoplayground.net/p/H-aWx07pCDt
Or if your mongodb version is equal or greater than 4.2, you can use replaceWith with mergeObjects, to get all fields in items array.
db.collection.aggregate([
{
$unwind: "$Items"
},
{
$replaceWith: {
$mergeObjects: [
{
Container: "$Container"
},
"$Items"
]
}
}
])
Playground:
https://mongoplayground.net/p/E5KqFlNq1CL
Input:
[
{
"Container": 28392,
"Items": [
{
"ItemName": "Foo",
"ItemDesc": "Foo desc"
},
{
"ItemName": "Bar",
"ItemDesc": "Bar desc"
}
]
}
]
Output:
[
{
"Container": 28392,
"ItemDesc": "Foo desc",
"ItemName": "Foo"
},
{
"Container": 28392,
"ItemDesc": "Bar desc",
"ItemName": "Bar"
}
]
Are you looking for a query like this?
db.collection.aggregate([
{
"$unwind": "$Items"
},
{
"$group": {
"_id": "$Container",
"All Items": {
"$push": {
"ItemName": "$Items.ItemName",
"Container": "$Container"
}
}
}
},
{
"$project": {
"_id": 0
}
}
])
Here's a link to a demo of the query - https://mongoplayground.net/p/QDxr-2VUa_f

Aggregation on complex objects

I have a collection with documents like the following:
{
"towers": [
{
"name": "foo",
"towers": [
{
"name": "A",
"buildType": "Apartament"
},
{
"name": "B",
"buildType": "Apartament"
}
]
},
{
"name": "xpto",
"towers": [
{
"name": "C",
"buildType": "House"
},
{
"name": "D",
"buildType": "Office"
}
]
}
]
}
All I need to know is what are all the possible values for "buildType", like:
Apartment
House
Office
It's a complex object and the data to aggregate is deep inside it. Is there any way to achieve the results I want?
You need to $unwind the two nested array that is "towers" and "towers.towers" and then use $group with "towers.towers.buildType" field to get the distinct values
db.collection.aggregate([
{ "$unwind": "$towers" },
{ "$unwind": "$towers.towers" },
{ "$group": {
"_id": "$towers.towers.buildType"
}}
])
Output
[
{
"_id": "Office"
},
{
"_id": "House"
},
{
"_id": "Apartament"
}
]
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path: "$towers",
}
},
// Stage 2
{
$unwind: {
path: "$towers.towers",
}
},
// Stage 3
{
$group: {
_id: '$_id',
buildType: {
$addToSet: '$towers.towers.buildType'
}
}
},
]
);