Mongo db not in query by having two subset of documents from same collection - mongodb

I am new to mongodb. Assume the following. There are 3 types of documents in one collection x, y and z.
docs = [{
"item_id": 1
"type": "x"
},
{
"item_id": 2
"type": "x"
},{
"item_id": 3
"type": "y",
"relavent_item_ids": [1, 2]
},
{
"item_id": 3
"type": "y",
"relavent_item_ids": [1, 2, 3]
},{
"item_id": 4
"type": "z",
}]
I want to get the following.
Ignore the documents with type z
Get all the documents of type x where it's item_id is not in relavent_item_ids of type y documents.
The result should have item_id field.
I tried doing match $in but this returns me all the records, I am unable to figure out how to have in condition with subset of documents of type y.

You can use below query
const item_ids = (await db.collection.find({ "type": "y" })).map(({ relavent_item_ids }) => relavent_item_ids)
const result = db.collection.find({
"item_id": { "$exists": true },
"type": { "$ne": "z", "$eq": "x" },
"relavent_item_ids": { "$nin": item_ids }
})
console.log({ result })
Ignore the documents with type z --> Use $ne not equal to query operator to filter out z types.
Get all the documents of type x where it's item_id is not in relavent_item_ids of type y documents --> Use $expr to match the same documents fields.
The result should have item_id field --> Use $exists query operator.

The solution:
db.test.aggregate( [
{
$facet: {
firstQuery: [
{
$match: { type: { $eq: "x", $ne: "z" } }
},
{
$project: {
item_id : 1, _id: 0
}
}
],
secondQuery: [
{
$match: { type: "y" }
},
{
$group: {
_id: null,
relavent: { $push: "$relavent_item_ids" }
}
},
{
$project: {
relavent: {
$reduce: {
input: "$relavent",
initialValue: [ ],
in: { $setUnion: [ "$$value", "$$this" ] }
}
}
}
}
]
}
},
{
$addFields: { secondQuery: { $arrayElemAt: [ "$secondQuery", 0 ] } }
},
{
$project: {
result: {
$filter: {
input: "$firstQuery" ,
as: "e",
cond: { $not: [ { $in: [ "$$e.item_id", "$secondQuery.relavent" ] } ] }
}
}
}
},
] )
Using the input documents in the question post and adding one more following document to the collection:
{
"item_id": 11,
"type": "x",
}
: only this document's item_id (value 11) will show in the output.
The aggregation uses a $facet to make two individual queries with a single pass. The first query gets all the "x" types (and ignores type "z") as an array. The second query gets an array of relavent_item_ids with unique values (from the documents of type "y"). The final, $project stage filters the first query result array with the condition:
Get all the documents of type x where it's item_id is not in
relavent_item_ids of type y documents

I am not sure if its an elegant solution.
db.getCollection('test').aggregate([
{
"$unwind": {
"path": "$relavent_item_ids",
"preserveNullAndEmptyArrays": true
}
},
{
"$group": {
"_id":null,
"relavent_item_ids": {"$addToSet":"$relavent_item_ids"},
"other_ids": {
"$addToSet":{
"$cond":[
{"$eq":["$type", "x"]},
"$item_id",
null
]
}
}
}
},
{
"$project":{
"includeIds": {"$setDifference":["$other_ids", "$relavent_item_ids"]}
}
},
{
"$unwind": "$includeIds"
},
{
"$match": {"includeIds":{"$ne":null}}
},
{
"$lookup":{
"from": "test",
"let": { "includeIds": "$includeIds"},
"pipeline": [
{ "$match":
{ "$expr":
{ "$and":
[
{ "$eq": [ "$item_id", "$$includeIds" ] },
{ "$eq": [ "$type", "x" ] }
]
}
}
}
],
"as": "result"
}
},
{
"$unwind": "$result"
},
])

Related

count based on nested key mongodb

How can i count based on xTag key is on doc
I tried this but it does not provide me actual count
db.collection.find({
"products.xTag": {
$exists: false
}
}).count();
when you run with $exist:true i would expect result 1
When you run with $exist:false i would expect result 3
Playground: https://mongoplayground.net/p/_gf7RzGc8oB
Structure:
[
{
"item": 1,
"products": [
{
"name": "xyz",
"xTag": 32423
},
{
"name": "abc"
}
]
},
{
"item": 2,
"products": [
{
"name": "bob",
},
{
"name": "foo"
}
]
}
]
It is not possible with find(), You can use aggregate(),
$unwind deconstruct products array
$match your condition
$count total documents
db.collection.aggregate([
{ $unwind: "$products" },
{ $match: { "products.xTag": { $exists: false } } },
{ $count: "count" }
])
Playground

MongoDB Aggregation: How to check if an object containing multiple properties exists in an array

I have an array of objects and I want to check if there is an object that matches multiple properties. I have tried using $in and $and but it does not work the way I want it to.
Here is my current implementation.
I have an array like
"choices": [
{
"name": "choiceA",
"id": 0,
"l": "k"
},
{
"name": "choiceB",
"id": 1,
"l": "j"
},
{
"name": "choiceC",
"id": 2,
"l": "l"
}
]
I am trying to write aggregation code that can check if there is an object that contains both "id":2 and "l":"j" properties. My current implementation checks if there is an object containing the first property then checks if there is an object containing the second one.
How can I get my desired results?
Below, see my aggregation query. The full code is here
db.poll.aggregate([
{
"$match": {
"_id": 100
}
},
{
$project: {
numberOfVotes: {
$and: [
{
$in: [
2,
"$choices.id"
]
},
{
$in: [
"j",
"$choices.l"
]
}
]
},
}
}
])
The above query returns true yet there is no object in the array both of the properties id:2 and "l":"J". I know the code works as expected. How can I get my desired results?
You want to use something like $elemMatch
db.collection.find({
choices: {
$elemMatch: {
id: 2,
l: "j"
}
}
})
MongoPlayground
EDIT
In an aggregation $project stage I would use $filter
db.poll.aggregate([
{
"$match": {
"_id": 100
}
},
{
$project: {
numberOfVotes: {
$gt: [
{
$size: {
$filter: {
input: "$choices",
as: "choice",
cond: {
$and: [
{
$eq: [
"$$choice.id",
2
]
},
{
$eq: [
"$$choice.l",
"j"
]
}
]
}
}
}
},
0
]
}
}
}
])
MongoPlayground

MongoDB - Conditionally match two arrays [duplicate]

This question already has answers here:
Retrieve only the queried element in an object array in MongoDB collection
(18 answers)
Closed 2 years ago.
There is the following array of objects:
let externalArray =
[
{ name: "xxx",
max: 100,
unit: "myUnit"
},
{ name: "yyy",
max: 90,
unit: "myUnit"
}
]
And the following mongodb structure:
[
{
"myList": [
{
"name": "xxx",
"amount": 66.3,
"unit": "myUnit"
},
{
"name": "yyy",
"amount": 11.6,
"unit": "myUnit"
},
{
"name": "zzz",
"amount": 6.9,
"unit": "myUnit"
}
]
}
]
How can I use the $match query inside an aggregation to only output the objects for which myList.amount <= externalArray.max and myList.unit equals externalArray.unit for the same name? Thank you so much for your help.
Since you're comparing against input array to an array in documents you need to do few more things along with $filter :
db.collection.aggregate([
/** Unwind (Split array into objects :: total docs = each doc * no.of objects in array of respective doc) */
{
$unwind: "$myList"
},
/** Iterate on input `externalArray` & check for all conditions. Creates an array field `matches` */
{
$addFields: {
matches: {
$filter: {
input: externalArray,
cond: {
$and: [ { $eq: [ "$$this.name", "$myList.name" ] },
{ $eq: [ "$$this.unit", "$myList.unit" ] },
{ $lte: [ "$myList.amount", "$$this.max" ] }
]
}
}
}
}
},
/** In filter step if it matches with any condition then `matches` will have 1 or more objects from `externalArray`
* Excluding all docs where there is no match */
{
$match: { matches: { $ne: [] } }
},
/** Remove unnecessary field */
{
$project: { matches: 0 }
},
/** Since we unwind the array - group back all docs based on `_id` */
{
$group: { _id: "$_id", myList: { $push: "$myList" } }
}
])
Test : mongoplayground
Note :
In the response :
myList array will contain only matched objects from externalArray.
You don't see documents where myList is not present or if no matching object exists between myList & externalArray (So there should be at-least one matching object between two arrays in order to get the document out).
You can transform your external array before passing it to aggregation pipeline to be the following structures and use as conditions.
We can use this in a $match expression to match the documents that has at least one match.
[{
"name": "xxx",
"amount": {
"$lte": 100
},
"unit": "myUnit"
},
{
"name": "yyy",
"amount": {
"$lte": 90
},
"unit": "myUnit"
}]
/* example js implementation */
const matchConditions = externalArray.map(({ name, max, unit })=> ({
name,
amount: { $lte: max },
unit
}))
And we can use the following in the $filter condition
[{
"$and": [
{ "$eq": ["name", "xxx"] },
{ "$lte" ["amount", 100 },
{ "$eq": ["unit", "myUnit"] }
]
},
{
"$and": [
{ "$eq": ["$$this.name", "yyy"] },
{ "$lte": ["$$this.amount", 90 },
{ "$eq": ["$$this.unit", "myUnit"] }
]
}]
/* example js implementation */
const filterConditions = externalArray.map(({ name, max, unit }) => ({
$and: [
{ $eq: ['$$this.name', name] },
{ $lte: ['$$this.amount', max },
{ $eq: ['$$this.unit', unit] }
]
}))
Then finally we can use the following aggregation
db.collection.aggregate([
{
$match: {
myList: {
$elemMatch: {
$or: matchConditions
}
}
}
},
{
$addFields: {
myList: {
$filter: {
input: '$myList',
cond: {
$or: filterConditions
}
}
}
}
}
])

Return Only the Keys from Document Where the Query condition was True

I'm having group of elements in MongoDB as given below:
{
"_id": ObjectId("5942643ea2042e12245de00c"),
"user": NumberInt(1),
"name": {
"value": "roy",
"time": NumberInt(121)
},
"lname": {
"value": "roy s",
"time": NumberInt(122)
},
"sname": {
"value": "roy 9",
"time": NumberInt(123)
}
}
but when I execute the query below
db.temp.find({
$or: [{
'name.time': {
$gte: 123
}
}, {
'lname.time': {
$gte: 123
}
}, {
'sname.time': {
$gte: 123
}
}]
})
it is returning the whole document which is correct.
Is there any way to fetch only specified object in which condition matched.Like in my document let condition within lname.time equl to 122 then only lname object will return rest will ignored.
The type of thing you are asking for is only really "practical" with MongoDB 3.4 in order to return this from the server.
Summary
The general case here is that the "projection" of fields by logical conditions is not straightforward. Whilst it would be nice if MongoDB had such a DSL for projection, this is basically delegated either to:
Do your manipulation "after" the results are returned from the server
Use the aggregation pipeline in order to manipulate the documents.
Therefore, in "CASE B" being "aggregation pipeline", this is really only a practical excercise if the steps involved "mimic" the standard .find() behavior of "query" and "project". Introducing other pipeline stages beyond that will only introduce performance problems greatly outweighing any gain from "trimming" the documents to return.
Thus the summary here is $match then $newRoot to "project", following the pattern. It is also I think a good "rule of thumb" to consider here that the aggregation approach "should only" be applied where there is a significant reduction in the size of data returned. I would expand by example saying that "if" the size of the keys to "trim" was actually in the Megabytes range on the returned result, then it is a worthwhile exercise to remove them "on the server".
In the case where such a saving would really only constitute "bytes" in comparison, then the most logical course is to simply allow the documents to return in the cursor "un-altered", and only then in "post processing" would you bother removing unwanted keys that did not meet the logical condition.
That said, On with the actual methods.
Aggregation Case
db.temp.aggregate([
{ "$match": {
"$or": [
{ "name.time": { "$gte": 123 } },
{ "lname.time": { "$gte": 123 } },
{ "sname.time": { "$gte": 123 } }
]
}},
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "_id", "v": "$_id" },
{ "k": "user", "v": "$user" },
],
{ "$filter": {
"input": [
{ "$cond": [
{ "$gte": [ "$name.time", 123 ] },
{ "k": "name", "v": "$name" },
false
]},
{ "$cond": [
{ "$gte": [ "$lname.time", 123 ] },
{ "k": "lname", "v": "$lname" },
false
]},
{ "$cond": [
{ "$gte": [ "$sname.time", 123 ] },
{ "k": "sname", "v": "$sname" },
false
]}
],
"as": "el",
"cond": "$$el"
}}
]
}
}
}}
])
It's a pretty fancy statement that relies on $arrayToObject and $replaceRoot to achieve the dynamic structure. At its core the "keys" are all represented in array form, where the "array" only contains those keys that actually pass the conditions.
Fully constructed after the conditions are filtered we turn the array into a document and return the projection to the new Root.
Cursor Processing Case
You can actually do this in the client code with ease though. For example in JavaScript:
db.temp.find({
"$or": [
{ "name.time": { "$gte": 123 } },
{ "lname.time": { "$gte": 123 } },
{ "sname.time": { "$gte": 123 } }
]
}).map(doc => {
if ( doc.name.time < 123 )
delete doc.name;
if ( doc.lname.time < 123 )
delete doc.lname;
if ( doc.sname.time < 123 )
delete doc.sname;
return doc;
})
In both cases you get the same desired result:
{
"_id" : ObjectId("5942643ea2042e12245de00c"),
"user" : 1,
"sname" : {
"value" : "roy 9",
"time" : 123
}
}
Where sname was the only field to meet the condition in the document and therefore the only one returned.
Dynamic Generation and DSL Re-use
Addressing Sergio's question then I suppose you can actually re-use the DSL from the $or condition to generate in both cases:
Considering the variable defined
var orlogic = [
{
"name.time" : {
"$gte" : 123
}
},
{
"lname.time" : {
"$gte" : 123
}
},
{
"sname.time" : {
"$gte" : 123
}
}
];
Then with cursor iteration:
db.temp.find({
"$or": orlogic
}).map(doc => {
orlogic.forEach(cond => {
Object.keys(cond).forEach(k => {
var split = k.split(".");
var op = Object.keys(cond[k])[0];
if ( op === "$gte" && doc[split[0]][split[1]] < cond[k][op] )
delete doc[split[0]];
else if ( op === "$lte" && doc[split[0]][split[1]] > cond[k][op] )
delete doc[split[0]];
})
});
return doc;
})
Which evaluates against the DSL to actually perform the operations without "hardcoded" ( somewhat ) if statements;
Then the aggregation approach would also be:
var pipeline = [
{ "$match": { "$or": orlogic } },
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "_id", "v": "$_id" },
{ "k": "user", "v": "$user" }
],
{ "$filter": {
"input": orlogic.map(cond => {
var obj = {
"$cond": {
"if": { },
"then": { },
"else": false
}
};
Object.keys(cond).forEach(k => {
var split = k.split(".");
var op = Object.keys(cond[k])[0];
obj.$cond.if[op] = [ `$${k}`, cond[k][op] ];
obj.$cond.then = { "k": split[0], "v": `$${split[0]}` };
});
return obj;
}),
"as": "el",
"cond": "$$el"
}}
]
}
}
}}
];
db.test.aggregate(pipeline);
So the same basic conditions where we re-use existing $or DSL to generate the required pipeline parts as opposed to hard coding them in.
The second argument to find specifies the fields to return (projection)
db.collection.find(query, projection)
https://docs.mongodb.com/manual/reference/method/db.collection.find/
as in example
db.bios.find( { }, { name: 1, contribs: 1 } )
db.temp.find({
"$elemMatch": "$or"[
{
'name.time': {
$gte: 123
}
},
{
'lname.time': {
$gte: 123
}
},
{
'sname.time': {
$gte: 123
}
}
]
},
{
{
"name.time": 1,
"lname.time": 1,
"sname.time": 1
}
}
})
My approach using aggregation pipeline
$project - Project is used to create an key for the documents name, sname and lname
Initial project Query
db.collection.aggregate([{$project: {_id:1, "tempname.name": "$name", "templname.lname":"$lname", "tempsname.sname":"$sname"}}]);
Result of this query is
{"_id":ObjectId("5942643ea2042e12245de00c"),"tempname":{"name":{"value":"roy","time":121}},"templname":{"lname":{"value":"roy s","time":122}},"tempsname":{"sname":{"value":"roy 9","time":123}}}
Use $project one more time to add the documents into an array
db.collection.aggregate([{$project: {_id:1, "tempname.name": "$name", "templname.lname":"$lname", "tempsname.sname":"$sname"}},
{$project: {names: ["$tempname", "$templname", "$tempsname"]}}])
Our document will be like this after the execution of second project
{"_id":ObjectId("5942643ea2042e12245de00c"),"names":[{"name":{"value":"roy","time":121}},{"lname":{"value":"roy s","time":122}},{"sname":{"value":"roy 9","time":123}}]}
Then use $unwind to break the array into separate documents
after breaking the documents use $match with $or to get the desired result
**
Final Query
**
db.collection.aggregate([
{
$project: {
_id: 1,
"tempname.name": "$name",
"templname.lname": "$lname",
"tempsname.sname": "$sname"
}
},
{
$project: {
names: [
"$tempname",
"$templname",
"$tempsname"
]
}
},
{
$unwind: "$names"
},
{
$match: {
$or: [
{
"names.name.time": {
$gte: 123
}
},
{
"names.lname.time": {
$gte: 123
}
},
{
"names.sname.time": {
$gte: 123
}
}
]
}
}
])
Final result of the query closer to your expected result(with an additional key)
{
"_id" : ObjectId("5942643ea2042e12245de00c"),
"names" : {
"sname" : {
"value" : "roy 9",
"time" : 123
}
}
}

Mongo Query to Return only a subset of SubDocuments

Using the example from the Mongo docs:
{ _id: 1, results: [ { product: "abc", score: 10 }, { product: "xyz", score: 5 } ] }
{ _id: 2, results: [ { product: "abc", score: 8 }, { product: "xyz", score: 7 } ] }
{ _id: 3, results: [ { product: "abc", score: 7 }, { product: "xyz", score: 8 } ] }
db.survey.find(
{ id: 12345, results: { $elemMatch: { product: "xyz", score: { $gte: 6 } } } }
)
How do I return survey 12345 (regardless of even if it HAS surveys or not) but only return surveys with a score greater than 6? In other words I don't want the document disqualified from the results based on the subdocument, I want the document but only a subset of subdocuments.
What you are asking for is not so much a "query" but is basically just a filtering of content from the array in each document.
You do this with .aggregate() and $project:
db.survey.aggregate([
{ "$project": {
"results": {
"$setDifference": [
{ "$map": {
"input": "$results",
"as": "el",
"in": {
"$cond": [
{ "$and": [
{ "$eq": [ "$$el.product", "xyz" ] },
{ "$gte": [ "$$el.score", 6 ] }
]}
]
}
}},
[false]
]
}
}}
])
So rather than "contrain" results to documents that have an array member matching the condition, all this is doing is "filtering" the array members out that do not match the condition, but returns the document with an empty array if need be.
The fastest present way to do this is with $map to inspect all elements and $setDifference to filter out any values of false returned from that inspection. The possible downside is a "set" must contain unique elements, so this is fine as long as the elements themselves are unique.
Future releases will have a $filter method, which is similar to $map in structure, but directly removes non-matching results where as $map just returns them ( via the $cond and either the matching element or false ) and is then better suited.
Otherwise if not unique or the MongoDB server version is less than 2.6, you are doing this using $unwind, in a non performant way:
db.survey.aggregate([
{ "$unwind": "$results" },
{ "$group": {
"_id": "$_id",
"results": { "$push": "$results" },
"matched": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$results.product", "xyz" ] },
{ "$gte": [ "$results.score", 6 ] }
]},
1,
0
]
}
}
}},
{ "$unwind": "$results" },
{ "$match": {
"$or": [
{
"results.product": "xyz",
"results.score": { "$gte": 6 }
},
{ "matched": 0 }
}},
{ "$group": {
"_id": "$_id",
"results": { "$push": "$results" },
"matched": { "$first": "$matched" }
}},
{ "$project": {
"results": {
"$cond": [
{ "$ne": [ "$matched", 0 ] },
"$results",
[]
]
}
}}
])
Which is pretty horrible in both design and perfomance. As such you are probably better off doing the filtering per document in client code instead.
You can use $filter in mongoDB 3.2
db.survey.aggregate([{
$match: {
{ id: 12345}
}
}, {
$project: {
results: {
$filter: {
input: "$results",
as: "results",
cond:{$gt: ['$$results.score', 6]}
}
}
}
}]);
It will return all the sub document that have score greater than 6. If you want to return only first matched document than you can use '$' operator.
You can use $redact in this way:
db.survey.aggregate( [
{ $match : { _id : 12345 }},
{ $redact: {
$cond: {
if: {
$or: [
{ $eq: [ "$_id", 12345 ] },
{ $and: [
{ $eq: [ "$product", "xyz" ] },
{ $gte: [ "$score", 6 ] }
]}
]
},
then: "$$DESCEND",
else: "$$PRUNE"
}
}
}
] );
It will $match by _id: 12345 first and then it will "$$PRUNE" all the subdocuments that don't have "product":"xyz" and don't have score greater or equal 6. I added the condition ($cond) { $eq: [ "$_id", 12345 ] } so that it wouldn't prune the whole document before it reaches the subdocuments.