mongo aggregation - number of documents where field in one array is also in another one - mongodb

I have a Movies collection
...
{
...
"cast":[ "First Actor", "Second Actor" ],
"directors":[ "First Director", "Second Director" ]
},
{
...
"cast": [ "Actor Director", "First Actor" ],
"directors": [ "Actor Director", "Firt Director" ]
}
...
Using aggregation framework I need to get number of documents where at least one value from directors array is also in a cast array. How could I achieve it?

You can use $setIntersection to find common entries in both arrays, then filter documents by $size of the result gt than 0 (means that at least one element is common to arrays), and finally use $count stage to count documents that match this condition.
-- EDIT : Add $addFields stage in case of no array present for cast or directors
In case of any document that doesn't contain cast or directors array, you will get an error for size waiting for an array and getting a null value.
In order to avoid this, you need to add an $addField stage to define empty array instead of null, for cast and directors.
Here's the query :
db.collection.aggregate([
{
$addFields: {
directors: {
$cond: {
if: {
$isArray: "$directors"
},
then: "$directors",
else: []
}
},
cast: {
$cond: {
if: {
$isArray: "$cast"
},
then: "$cast",
else: []
}
}
}
},
{
$match: {
$expr: {
$gt: [
{
$size: {
$setIntersection: [
"$cast",
"$directors"
]
}
},
0
]
}
}
},
{
$count: "have_common_value"
}
])
You can test it here

Related

Mongoose custom $match function

I have objects that look like this
{
name: 'Object 1',
fruitList: ['apple','pear','orange','grape']
},
{
name: 'Object 2',
fruitList: ['melon','pear','apple','kiwi']
}
I need to retrieve all the objects that have apple before pear in their fruitList, in this example it would mean Object 1 only. Can I do a custom match function that iterates over that list and checks it it matches my criteria ?
You need a mechanism to compare the indexes of the fruits in question and use the comparison as a match condition with the $expr operator. Leverage the aggregation pipeline operators:
$indexOfArray - Searches an array for an occurrence of a specified value and returns the array index (zero-based) of the first occurrence.
$subtract - return the difference between the two indexes. If the value is negative then apple appears before pear in the list.
$lt - the comparison operator to use in $expr query that compares two values and returns true when the first value is less than the second value.
To get a rough idea of these operators at play in an aggregation pipeline, check out the following Mongo Playground.
The actual query you need is as follows:
db.collection.find({
$expr: {
lt: [
{
$subtract: [
{ $indexOfArray: [ '$fruitList', 'apple' ] },
{ $indexOfArray: [ '$fruitList', 'pear' ] }
]
},
0
]
}
})
Mongo Playground
For a generic regex based solution where the fruitList array may contain a basket of assorted fruits (in different cases) for instance:
"fruitList" : [
"mango",
"Apples",
"Banana",
"strawberry",
"peach",
"Pears"
]
The following query can address this challenge:
const getMapExpression = (fruit) => {
return {
$map: {
input: '$fruitList',
as: 'fruit',
in: {
$cond: [
{ $regexMatch: { input: '$$fruit', regex: fruit, options: 'i' } },
{ $literal: fruit },
'$$fruit'
]
}
}
}
}
db.collection.find({
$expr: {
$lt: [
{
$subtract: [
{ $indexOfArray: [ getMapExpression('apple'), 'apple' ] },
{ $indexOfArray: [ getMapExpression('pear'), 'pear' ] }
]
},
0
]
}
})

How do i find the total number of subjectsthat has no prerequisites using agregation?

I have tried several codes but it didn't work.
Example from the database,
one has a prerequisite and one does not have prerequisites and I would like to find the total number of the subject with no prerequisites :
db.Subject.insert(
{
"_id":ObjectId(),
"subject":{
"subCode":"CSCI321",
"subTitle":"Final Year Project",
"credit":6,
"type":"Core",
"assessments": [
{ "assessNum": 1,
"weight":30,
"assessType":"Presentation",
"description":"Prototype demonstration" },
{ "assignNum": 2,
"weight":70,
"assessType":"Implementation and Presentation",
"description":"Final product Presentation and assessment of product implementation by panel of project supervisors" }
]
}
}
)
db.Subject.insert(
{
"_id":ObjectId(),
"subject":{
"subCode":"CSCI203",
"subTitle":"Algorithm and Data Structures",
"credit":3,
"type":"Core",
"prerequisite": ["csci103"]
}})
one of the few codes that I tried using :
db.Subject.aggregate({$group:{"prerequisite":{"$exists": null}, count:{$sum:1}}});
Results :
_getErrorWithCode#src/mongo/shell/utils.js:25:13
doassert#src/mongo/shell/assert.js:18:14
_assertCommandWorked#src/mongo/shell/assert.js:534:17
assert.commandWorked#src/mongo/shell/assert.js:618:16
DB.prototype._runAggregate#src/mongo/shell/db.js:260:9
DBCollection.prototype.aggregate#src/mongo/shell/collection.js:1062:12
#(shell):1:1
You can use $match to eliminate unwanted documents and $group to calculate sum
db.collection.aggregate([
{
$match: {
"subject.prerequisite": {
"$exists": false
}
}
},
{
$group: {
_id: null,
total: {
$sum: 1
}
}
}
])
Working Mongo playground
This can be achieved within a single aggregation pipeline stage i.e. the $group step where you can use the BSON Types comparison order to aggregate the
documents where the 'subjects.prerequisites' field exists and has at least an element. The condition can be used as the group by key i.e. the _id field
in $group.
Consider running the following aggregation pipeline to get the desired results:
db.Subject.aggregate([
{ $group: {
_id: {
$cond: [
{
$or: [
{ $lte: ['$subject.prerequisite', null] },
{
$eq: [
{ $size: { $ifNull: ['$subject.prerequisite', [] ] } },
0
]
}
]
},
'noPrerequisite',
'havePrerequisite'
]
},
count: { $sum: 1 }
} }
])
The first condition in the OR simply returns true if a document does not have the embedded prerequisites field and the other satisfies these set of conditions:
if length of ( prerequisites || [] ) is zero
In the above, $cond takes a logical condition as its first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). When used as an expression in the _id field for $group, it groups all the documents into either true/false which is conditionally projected as "noPrerequisite" (true) OR "havePrerequisite" (false) in the group key.
The results will contain both counts for documents where the prerequisite field exists and for those without the field OR it has an empty array.

finding documents where an array in a subdocument has duplicates or is of certain sized

I have the following set of documents:
[
{
"_id":1,
"a":2,
"b":{
"q":[1,2,3],
"c":[{"d":1,"e":2,"f":3},{"d":1,"e":2,"f":3},{"d":2,"e":4,"f":5}]
}
},
{
"_id":2,
"a":4,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":2,"e":4,"f":5}],
"qq":5
}
},
{
"_id":3,
"b":{
"c":[{"d":2,"e":4,"f":5}],
"ff":5
}
},
{
"_id":4,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":1,"e":4,"f":5},{"d":2,"e":3,"f":5}],
"fre":5,
"qaacs":[]
}
},
{
"_id":5,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":1,"e":4,"f":5},{"d":2,"e":3,"f":5}],
"fre":5,
"qaacs":[]
}
}
]
My goal is to find the documents where the array "c" contains either n duplicate elements or m total elements.
For the sake of this example let's let n=2 and m=3.
In this case, the following should be the result
[
{
"_id":1,
"a":2,
"b":{
"q":[1,2,3],
"c":[{"d":1,"e":2,"f":3},{"d":1,"e":2,"f":3},{"d":2,"e":4,"f":5}]
}
},
{
"_id":2,
"a":4,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":2,"e":4,"f":5}],
"qq":5
}
},
{
"_id":4,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":1,"e":4,"f":5},{"d":2,"e":3,"f":5}],
"fre":5,
"qaacs":[]
}
},
{
"_id":5,
"b":{
"c":[{"d":2,"e":4,"f":5},{"d":1,"e":4,"f":5},{"d":2,"e":3,"f":5}],
"fre":5,
"qaacs":[]
}
}
]
Finding arrays that have a total of m elements can be obtained with this query
coll.aggregate([{"$match":{"b":{"$exists":True},"b.c":{"$size":m}}}]) but I don't know how to get the count of duplicates.
My first thought was to use $group on "c" and get counts that way but found that that isn't possible.
You need to make use of $setIntersection operator. When used with just an array, it will return unique values from the array.
Approach: To calculate n duplicates of an array, first, we will calculate the actual size of the array and the size of the unique elements of the array. Now, we will calculate the difference between these two numbers, if it's greater than equal to n-1, this means, the array has n duplicates (in this case it's 1, as n=2). Finally, using $or operator, we will get the required results.
Query:
db.coll.aggregate([
{
$match: {
$expr: {
$or: [
{
$gte: [
{
$subtract: [
{
$size: "$b.c"
},
{
$size: {
$setIntersection: "$b.c"
}
}
]
},
1 // Replace this with n-1
]
},
{
$eq: [
{
$size: "$b.c"
},
3 // Replace this with m
]
}
]
}
}
}
])

Mongodb comparing using a field with another field in an array of Objects

I have the below structure for my collection:
{
"price":123,
"totalPrices": [
{
"totPrice":123
}
]
}
I am trying to query for all the documents in my collection where price is not equals to totalPrice.totPrice (so above should not be returned).
But it keeps returning the documents which have equal prices as well (such as above sample).
This is the query I'm using:
{
$where : "this.price!== this.totalPrices.totPrice",
totalPrice:{$size:1}
}
What am I doing wrong :(
First, you need to match the size of the array totalPrices is equal to 1. Second, you need to unwind the totalPrices, since it's an array field. Last, you should match the equality of price and totalPrices.totPrice. Try the below code:
db.collection.aggregate([
{
$match: {
$expr: {
$eq: [
{
$size: "$totalPrices"
},
1
]
}
}
},
{
$unwind: "$totalPrices"
},
{
$match: {
$expr: {
$ne: [
"$price",
"$totalPrices.totPrice"
]
}
}
}
])
MongoPlayGroundLink

mongodb adding up values in array based on some condition

I can't get my head around a mongodb aggregation framework construction that adds up some values for each "_id" field documents... IF those values exist for the field "Wert".
E.g I have a document with _id field and a conditional ProduktTeilsummeDemonstrator":[] or "ProduktTeilsummeDemonstrator":[{Wert:342},{Wert:142}] that array can be empty or not, if it is empty, I want to add a new field "ProduktTeilsumme":0, else, I want to add up all values in that array to the new field...
The data that I have looks like this:
[{"_id":230,"ProduktSummeDemonstrator":713,"ProduktTeilsummeDemonstrator":[],"ProduktTeilsumme":null},{"_id":855,"ProduktSummeDemonstrator":1744,"ProduktTeilsummeDemonstrator":[],"ProduktTeilsumme":null},{"_id":767,"ProduktSummeDemonstrator":1010,"ProduktTeilsummeDemonstrator":[{"Zeitstempel":"2018-07-09T15:07:32.472Z","Wert":24},{"Zeitstempel":"2018-07-09T15:07:32.472Z","Wert":102},{"Zeitstempel":"2018-07-09T14:52:32.473Z","Wert":15},{"Zeitstempel":"2018-07-09T14:52:32.472Z","Wert":20},{"Zeitstempel":"2018-07-09T15:07:32.472Z","Wert":90},{"Zeitstempel":"2018-07-09T14:52:32.472Z","Wert":104},{"Zeitstempel":"2018-07-09T15:07:32.473Z","Wert":29},{"Zeitstempel":"2018-07-09T14:52:32.472Z","Wert":94},{"Zeitstempel":"2018-07-09T14:52:32.473Z","Wert":33},{"Zeitstempel":"2018-07-09T15:07:32.473Z","Wert":245},{"Zeitstempel":"2018-07-09T14:52:32.473Z","Wert":243},{"Zeitstempel":"2018-07-09T15:07:32.473Z","Wert":11}],"ProduktTeilsumme":null},{"_id":9,"ProduktSummeDemonstrator":94,"ProduktTeilsummeDemonstrator":[],"ProduktTeilsumme":null}]
I tried out different things with $reduce or $cond expressions, but somehow it won't add up: (Previously before that calculation stage I am grouping by ID and also filtering based on some time field condition..)
{
$project: {
ProduktSummeDemonstrator: "$ProduktSummeDemonstrator",
ProduktTeilsummeDemonstrator: {
$filter: {
input: "$res",
as: "res",
cond: { $and: [
{ $gte: ["$$res.Zeitstempel", new Date(req.params.start) ] },
{ $lte: ["$$res.Zeitstempel", new Date(req.params.end) ] }
] }
}
},
ProduktTeilsumme: {/*
$reduce: {
input: "$ProduktTeilsummeDemonstrator",
initialValue:0,
in: {
$add: ["$$value","$$this.Wert"]
}
} */
$cond: {
if: { $eq: [ "", "$ProduktTeilsummeDemonstrator" ] },
then: 0,
else: {
$reduce: {
input: "$ProduktTeilsummeDemonstrator",
initialValue: 0,
in: {
$add: ["$$value","$$this.Wert"]
}
}
}
}
}
}
}
at least for "_id":767 I should get some values back, but I am getting "null" always.
You have to use multiple project stages if you want to keep both the array and added value. One for $filtering ProduktTeilsummeDemonstrator followed by adding up array values.
Something like
[
{"$project":{
"ProduktSummeDemonstrator":1,
"ProduktTeilsummeDemonstrator":{
"$filter":{
"input":"$ProduktTeilsummeDemonstrator",
"as":"res",
"cond":{
"$and":[
{"$gte":["$$res.Zeitstempel", new Date(req.params.start)]},
{"$lte":["$$res.Zeitstempel", new Date(req.params.end)]}
]
}
}
}
}},
{"$project":{
"ProduktSummeDemonstrator":1,
"ProduktTeilsummeDemonstrator":1,
"ProduktTeilsumme":{"$sum":"$ProduktTeilsummeDemonstrator.Wert"}
}}
]