mongodb - Find count of values in other collection - mongodb

I want to find how many "taskId" of below collectionOne is present in collectionTwo.
Here, "taskId" in below collectionOne is embedded inside a array in a document.
Also, Let me know the different approaches to do this (if possible).
db.collectionOne.find({"sId":"DCNrnPeKFrBv" }).pretty()
{
"_id" : "sX8o7mJhebs",
"sId" : "DCNrnPeKFrBv",
"data" : [
{
"oId" : "7SycYQ",
"taskId" : 146108906
},
{
"oId" : "7SycYQ",
"taskId" : 14623846
},
{
"oId" : "fANQ",
"taskId" : 1461982
},
{
"oId" : "fAeNQ",
"taskId" : 131732
},
{
"oId" : "t6AF5yn",
"taskId" : 197681
}
]
}
> db.collectionTwo.find().pretty().limit(2)
{
"_id" : 146108906,
"oId" : "7SycYQ",
"name" : "ABC"
}
{
"_id" : 1461982,
"oId" : "fANQ",
"name" : "XYZ"
}
In collectionTwo "_id" is equivalent to "taskId" of collectionOne.

Using the $lookup operator to do a left join on collectionTwo, you can get the counts as follows:
db.collectionOne.aggregate([
{ $match: { sId: "DCNrnPeKFrBv" }},
{ $lookup: {
from: "collectionTwo",
localField: "data.taskId",
foreignField: "_id",
as: "tasksCount"
} },
{ $addFields: {
tasksCount: { $size: "$tasksCount" }
} }
])
or if using older MongoDB server versions (below 3.2):
db.collectionOne.aggregate([
{ $unwind: "$data" },
{ $lookup: {
from: "collectionTwo",
localField: "data.taskId",
foreignField: "_id",
as: "tasks"
} },
{ $unwind: "$tasks" },
{ $group: {
_id: "$tasks._id",
count: { "$sum": 1 }
} },
{ $group: {
_id: null,
tasksCount: { "$sum": "$count" }
} }
])
--EDIT--
An alternative is to get a list of all the distinct taskIds in collectionOne, and use that list as count query on collectionTwo e.g.
var taskIds = db.collectionOne.distinct("data.taskId");
var tasksCount = db.collectionTwo.count({ "_id": { "$in": taskIds } });
printjson(taskIds);
printjson(tasksCount);

Related

How to $lookup by avoiding null values in mongodb aggregate

In here i'm using $lookup to to a left join from other collections, the query works fine but when some records missing values it returns
errmsg : $in requires an array as a second argument, found: null
Heres the querying document structure :
{
"no" : "2020921008981",
"sale" : {
"soldItems" : [
{
"itemId" : "5b55ac7f0550de00210a3b24",
},
{
"itemId" : "5b55ac7f0550de00215584re",
}
],
"bills" : [
{
"billNo" : "2020921053467",
"insurancePlanId" : "160",
},
{
"billNo" : "2020921053467",
"insurancePlanId" : "170",
}
],
"visitIds" : [
5b55ac7f0550de00210a3b24, 5b55ac7f0550de00210a3b24
]
}
}
the query :
db.case.aggregate([
{
$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{
$unwind: "$coveragePlans"
},
{
$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{
$lookup: {
from: "item",
let: { iid: "$salesOrder.purchaseItems.itemRefId" },
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", {
$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}
}
]
}
}
}
],
as: "items"
}
}
])
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"coveragePlans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
},
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "WXYZ"
"coveragePlans" : [
{
"_id" : "169",
"name" : "5ABC",
},
{
"_id" : "170",
"name" : "4XYZ",
}
]
}
item collection :
{
"_id" : ObjectId("5b55ac7f0550de00210a3b24"),
"code" : "ABCDE"
},
{
"_id" : ObjectId("5b55ac7f0550de00215584re"),
"code" : "PQRST"
}
How to avoid this and do null checks effectively before pipe-lining into the next stages? Tried with { $match: { "fieldName": { $exists: true, $ne: null } } } but it returns mongo error regarding the format. If its the way to go please mention the stage i should put that.. Thanks in advance
You can use $ifNull operator
let: { ipids: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
EDIT: To skip empty "$salesOrder.purchaseItems.itemRefId" values
let: { iid: {$filter: {input:"$salesOrder.purchaseItems.itemRefId", cond:{$ne:["$$this", ""]}}} },
You can get around that by not using $in.
It looks like this $map is executed separately for every document in the items collection. If you were to run the map in an $addFields stage, you could used the simple form of lookup to match the added field to _id, which would automagically handle missing, null, and array.
Remove the added field with a $project stage if necessary.
db.case.aggregate([
{$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{$unwind: "$coveragePlans"},
{$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }},
{$project: { _id: 0, name: 1 }}
],
as: "insurances"
}}
{$addFields:{
matchArray:{$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}}
}},
{$lookup: {
from: "item",
localField: "matchArray",
foreignField:"_id",
as: "items"
}},
{$project:{
arrayField: 0
}}
])

Lookup and aggregate multiple levels of subdocument in Mongodb

I've tried many answers to similar problems using $lookup, $unwind, and $match, but I can't get this to work for my sub-sub-subdocument situation.
I have this collection, Things:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
},
and this one-document collection, Venues:
{
"_id" : ObjectId("5b9acabbbf71f39223f8de6e"),
"name" : "The Office",
"floors" : [
{
"name" : "1st Floor",
"places" : [
{
"name" : "Front Entrance",
"alias" : "front_entrance"
}
]
},
{
"name" : "2nd Floor",
"places" : [
{
"name" : "Batcave",
"alias" : "batcave"
},
{
"name" : "Oz",
"alias" : "oz"
}
]
}
]
}
I want to return all the Things, but with the Venue's floors.places.name aggregated with each Thing if it exists if the aliases match between Things and Venues. So, I want to return:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
<-- nothing added here because
<-- it's not found in Venues
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Batcave" <-- matches
} <-- in Venues
] <--
} <--
] <--
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Oz" <-- matches
} <-- in Venues
] <--
} <--
] <--
}
I've gotten as far as the following query, but it only returns the entire Venues.floors array as an aggregate onto each Thing, which is way too much extraneous data aggregated. I just want to merge each relevant floor.place sub-subsubdocument from Venues into its corresponding Thing if it exists in Venues.
db.getCollection('things').aggregate([
{$lookup: {from: "venues",localField: "alias",foreignField: "floors.places.alias",as: "matches"}},
{
$replaceRoot: { newRoot: { $mergeObjects: [ { $arrayElemAt: [ "$matches", 0 ] }, "$$ROOT" ] } }
},
{ $project: { matches: 0 } }
])
I'm struggling with existing answers, which seem to change at MongoDB version 3.2, 3.4, 3.6, or 4.2 to include or not include $unwind, $pipeline, and other terms. Can someone explain how to get a sub-sub-subdocument aggregated like this? Thanks!
You can try this :
db.things.aggregate([
{
$lookup:
{
from: "venues",
let: { alias: "$alias" },
pipeline: [
{ $unwind: { path: "$floors", preserveNullAndEmptyArrays: true } },
{ $match: { $expr: { $in: ['$$alias', '$floors.places.alias'] } } },
/** Below stages are only if you've docs like doc 2 in Venues */
{ $addFields: { 'floors.places': { $filter: { input: '$floors.places', cond: { $eq: ['$$this.alias', '$$alias'] } } } } },
{ $group: { _id: '$_id', name: { $first: '$name' }, floors: { $push: '$floors' } } },
{$project : {'floors.places.alias': 1, _id :0}} // Optional
],
as: "matches"
}
}
])
Test : MongoDB-Playground
Since MongoDB v3.6, we may perform uncorrelated sub-queries which gives us more flexibility to join two collections.
Try this:
db.things.aggregate([
{
$lookup: {
from: "venues",
let: {
"alias": "$alias"
},
pipeline: [
{
$unwind: "$floors"
},
{
$project: {
_id: 0,
places: {
$filter: {
input: "$floors.places",
cond: {
$eq: [
"$$alias",
"$$this.alias"
]
}
}
}
}
},
{
$match: {
"places.0": {
$exists: true
}
}
},
{
$unset: "places.name"
}
],
as: "floors"
}
}
])
MongoPlayground

Mongodb aggretate apply sort to lookup results, and add field index number

The aggregate was executed.
I got the results using lookup, but I need a sort.
In addition, I want to assign an index to the result value.
CollectionA :
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA"),
ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
],
"name" : "jason"
}
CollectionB :
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02
},
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01
}
Query:
db.getCollection('A').aggregate([
{
$match : { "_id" : ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup : {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{ $sort: { "item.date" : -1 } }
]);
Want Result:
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01,
"index" : 0
},
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02,
"index" : 1
}],
"name" : "jason"
}
The current problem does not apply to the sort.
And I don't know how to designate an index.
Below Aggregation may you. For your desire result.
db.CollectionA.aggregate([
{
$match: { "_id": ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup: {
from: "CollectionB",
let: { contents: "$contents" },
pipeline: [
{
$match: { $expr: { $in: ["$_id", "$$contents"] } }
},
{ $sort: { date: 1 } }
],
as: "contents"
}
},
{
$project: {
contents: {
$map: {
input: { $range: [0, { $size: "$contents" }, 1 ] },
as: "element",
in: {
$mergeObjects: [
{ index: "$$element" },
{ $arrayElemAt: [ "$contents", "$$element" ]}
]
}
}
}
}
}
])
One way to go about it would be to unwind the array, sort it and then group it back
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$unwind: "$item"
},
{
$sort: {
"item.date": -1
}
},
{
$group: {
_id: "$_id",
contents: {
$push: "$item"
}
}
}
])
Another method is, (this is applicable only if the date field corresponds to the document creation date),
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$sort: {
"item": -1
}
}
])
Basically, this sorts on the basis of _id, and since _id is created using the creation date, it should sort accordingly.

How to check $setDifference in two array using mongo-query

UserDetails
{
"_id" : "5c23536f807caa1bec00e79b",
"UID" : "1",
"name" : "A",
},
{
"_id" : "5c23536f807caa1bec00e78b",
"UID" : "2",
"name" : "B",
}
UserProducts
{
"_id" : "5c23536f807caa1bec00e79c",
"UPID" : "100",
"UID" : "1"
},
{
"_id" : "5c23536f807caa1bec00e79c",
"UPID" : "200",
"UID" : "2"
}
Groups
{
"_id" : "5bb20d7556db6915846da55f",
"members" : {
"regularStudent" : [
"200" // UPID
],
}
}
Step 1
I have to take UID from UserDetails check with UserProducts then take UPID from UserProducts
Step 2
we have to check this UPID mapped to Groups collection or not ?.
members.regularStudent we are mapped UPID
Step 3
Suppose UPID not mapped means i want to print the UPID from from UserProducts
I have tried but couldn't complete this, kindly help me out on this.
Expected Output:
["100"]
Note: Expected Output is ["100"] , because UserProducts having UPID 100 & 200 but Groups collection mapped only 200.
My Code
db.UserDetails.aggregate(
{
$lookup: {
from: "UserProducts",
localField: "UID",
foreignField: "UID",
as: "userProduct"
}
},
{ $unwind: "$userProduct" },
{
"$project": { "_id" : 0, "userProduct.UPID" : 1 }
},
{
$group: {
_id: null,
userProductUPIDs: { $addToSet: "$userProduct.UPID" }
}
}
) // returns [ "100", "200" ]
db.Groups.aggregate([
{
$unwind: "$members.regularStudent"
},
{
$group: {
_id: null,
UPIDs: { $addToSet: "$members.regularStudent" }
}
}
]) // returns ["200"]
Now i want to check $setDifference of both array, so i had added below code but returning error like $userProductUPIDs is not defined
db.Groups.aggregate([
{
$unwind: "$members.regularStudent"
},
{
$group: {
_id: null,
UPIDs: { $addToSet: "$members.regularStudent" }
}
},
{
$project: {
members: {
$setDifference: [ $userProductUPIDs , "$members" ]
},
_id : 0
}
}
])
As this is a follow up to one of my previous answers I will try to fix your code. The bottom line is that you need two queries as you can't upgrade your database so the code should look like below:
var queryResult = db.UserDetails.aggregate(
{
$lookup: {
from: "UserProducts",
localField: "UID",
foreignField: "UID",
as: "userProduct"
}
},
{ $unwind: "$userProduct" },
{
"$project": { "_id" : 0, "userProduct.UPID" : 1 }
},
{
$group: {
_id: null,
userProductUPIDs: { $addToSet: "$userProduct.UPID" }
}
});
let userProductUPIDs = queryResult.toArray()[0].userProductUPIDs;
db.Groups.aggregate([
{
$unwind: "$members.regularStudent"
},
{
$group: {
_id: null,
UPIDs: { $addToSet: "$members.regularStudent" }
}
},
{
$project: {
members: {
$setDifference: [ userProductUPIDs , "$UPIDs" ]
},
_id : 0
}
}
]) // should return 100

Mongo how to $lookup with DBRef

I have a trouble(/(愒o愒)/~~). Suppose that collection A is
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"bid" : [
DBRef("B", ObjectId("582abcd85d2dfa67f44127e0")),
DBRef("B", ObjectId("582abcd85d2dfa67f44127e1"))
]
}
and Collection B:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
}
I don't know how to $lookup the 'bid'. I tried
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid", foreignField: "_id", as: "bs"}},
]
)
and
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
but it doesn't work. Anybody can help? Thanks.
Actually, the other answer is wrong. It is possible to do a lookup on a DBref field within your aggregator, and you don't need mapreduce for that.
Solution
db.A.aggregate([
{
$project: {
B_fk: {
$map: {
input: {
$map: {
input:"$bid",
in: {
$arrayElemAt: [{$objectToArray: "$$this"}, 1]
},
}
},
in: "$$this.v"}},
}
},
{
$lookup: {
from:"B",
localField:"B_fk",
foreignField:"_id",
as:"B"
}
}
])
result
{
"_id" : ObjectId("59bb79df1e9c00162566f581"),
"B_fk" : null,
"B" : [ ]
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
],
"B" : [
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt("1"),
"seq" : NumberInt("0")
}
]
}
Short Explanation
Loop through the DBRefs with $map, break each DBref into an array, keep only the $id field, then get rid of the k:v format with $$this.v, keeping only the ObjectId and removing all the rest. You can now lookup on the ObjectId.
Step-by-step Explanation
Within the aggregator, a DBRef BSON type can be handled like an object, with two or three fields (ref, id, and db).
If you do:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",0]}},
Second_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",1]}},
}
},
])
This is the result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
],
"Second_DBref_as_array" : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
]
}
Once you have transformed a dbref into an array, you can get rid of the useless fields by querying only the value at index 1, like this:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
Second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
},
"Second_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
}
Then you can get finally to the value you want by pointing to "$myvalue.v", just like this
db.A.aggregate([
{
$project: {
first_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
{
$project: {
first_DBref_as_ObjectId: "$first_DBref_as_array.v",
second_DBref_as_ObjectId: "$second_DBref_as_array.v"
}
}
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"first_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0"),
"second_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0")
}
Obviously, in a normal pipeline, you don't need all these redundant steps, using a nested $map, you can get to the same result in one go :
db.A.aggregate([
{
$project: {
B_fk: { $map : {input: { $map: { input:"$bid",
in: { $arrayElemAt: [{$objectToArray: "$$this"}, 1 ]}, } },
in: "$$this.v"}},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
]
}
I hope the explanation is clear enough, if not feel free to ask.
As of mongoDB 3.4, this is not possible. You can't use DBRef in the aggregation pipeline, except in the $match stage.
I strongly recommend you to get rid of DBRef and switch to manual references. However, if you really need to keep DBRef, here is an (ugly) solution:
first, create a new collection named "C", where DBRefs are replaced by their Ids using mapReduce:
db.A.mapReduce(
function() {
var key = this._id;
var value = [];
for ( var index = 0; index < this.bid.length; index++){
value.push(this.bid[index].$id);
}
emit(key, value);
},
function(key,values) {
return values;
},
{
"query": {},
"out": "C"
}
)
then, run your aggregation query on the new "C" collection:
db.C.aggregate([
{
$unwind:"$value"
},
{
$lookup:{
from:"B",
localField:"value",
foreignField:"_id",
as:"bs"
}
}
]);
output:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e0"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0"),
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e1"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
Just in case someone is coming here in 2021:
Starting with MongoDB 4.3.3 the second query of the OP does work:
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
The result is:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e0")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0")",
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e1")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
See SERVER-14466 for more information.