Use fields that start with $ in MongoDB aggregation lookup - mongodb

I have a MongoDB database that is populated by a Spring application using Spring Data. I want to perform a manual query to join two collections and extract some statistics from this data.
The first collection is named emailCampaign and contains this information (simplified):
{
"_id" : ObjectId("5db85687307b0a0d184448db"),
"name" : "Welcome email",
"subject" : "¡Welcome {{ user.name }}!",
"status" : "Sent",
"_class" : "com.mycompany.EmailCampaign"
}
The second collection is named campaignDelivery and contains this information (simplified):
/* 1 */
{
"_id" : ObjectId("5db183fb307b0aef3113361f"),
"campaign" : {
"$ref" : "emailCampaign",
"$id" : ObjectId("5db85687307b0a0d184448db")
},
"deliveries" : 3,
"_class" : "com.mycompany.CampaignDelivery"
}
/* 2 */
{
"_id" : ObjectId("5db85f2c307b0a0d184448e1"),
"campaign" : {
"$ref" : "emailCampaign",
"$id" : ObjectId("5db85687307b0a0d184448db")
},
"deliveries" : 5,
"_class" : "com.mycompany.CampaignDelivery"
}
Ultimately I want to obtain the sum of both deliveries field, but by now I'm stuck with the basic JOIN:
db.emailCampaign.aggregate([
{
$lookup: {
from: 'campaignDelivery',
localField: '_id',
foreignField: 'campaign.$id',
as: 'deliveries'
}
}
])
Throws the following error:
FieldPath field names may not start with '$'.
Escaping the dollar had no impact whatsoever, and I can't any examples of fields that start with dollars.

You can workaround it by using uncorrelated $lookup with $objectToArray in the sub-query to access campaign.$id:
db.emailCampaign.aggregate([
{ $lookup: {
from: "campaignDelivery",
let: { id: "$_id" },
pipeline: [
{ $addFields: {
refId: { $arrayElemAt: [
{ $filter: {
input: { $objectToArray: "$campaign" },
cond: { $eq: [ "$$this.k", { $literal: "$id" } ] }
} }
, 0
] }
} },
{ $match: {
$expr: { $eq: [
"$refId.v",
"$$id"
] }
} },
{ $project: {
refId: 0
} }
],
as: "deliveries"
} }
])

Related

How to $lookup by avoiding null values in mongodb aggregate

In here i'm using $lookup to to a left join from other collections, the query works fine but when some records missing values it returns
errmsg : $in requires an array as a second argument, found: null
Heres the querying document structure :
{
"no" : "2020921008981",
"sale" : {
"soldItems" : [
{
"itemId" : "5b55ac7f0550de00210a3b24",
},
{
"itemId" : "5b55ac7f0550de00215584re",
}
],
"bills" : [
{
"billNo" : "2020921053467",
"insurancePlanId" : "160",
},
{
"billNo" : "2020921053467",
"insurancePlanId" : "170",
}
],
"visitIds" : [
5b55ac7f0550de00210a3b24, 5b55ac7f0550de00210a3b24
]
}
}
the query :
db.case.aggregate([
{
$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{
$unwind: "$coveragePlans"
},
{
$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }
},
{
$project: { _id: 0, name: 1 }
}
],
as: "insurances"
}
},
{
$lookup: {
from: "item",
let: { iid: "$salesOrder.purchaseItems.itemRefId" },
pipeline: [
{
$match: {
$expr: {
$in: ["$_id", {
$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}
}
]
}
}
}
],
as: "items"
}
}
])
insurance collection :
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "HIJKL"
"coveragePlans" : [
{
"_id" : "160",
"name" : "UVWZ",
},
{
"_id" : "161",
"name" : "LMNO",
}
]
},
{
"_id" : ObjectId("5b55aca20550de00210a6d25"),
"name" : "WXYZ"
"coveragePlans" : [
{
"_id" : "169",
"name" : "5ABC",
},
{
"_id" : "170",
"name" : "4XYZ",
}
]
}
item collection :
{
"_id" : ObjectId("5b55ac7f0550de00210a3b24"),
"code" : "ABCDE"
},
{
"_id" : ObjectId("5b55ac7f0550de00215584re"),
"code" : "PQRST"
}
How to avoid this and do null checks effectively before pipe-lining into the next stages? Tried with { $match: { "fieldName": { $exists: true, $ne: null } } } but it returns mongo error regarding the format. If its the way to go please mention the stage i should put that.. Thanks in advance
You can use $ifNull operator
let: { ipids: {$ifNull:["$sale.bill.insurancePlanId", [] ]} },
EDIT: To skip empty "$salesOrder.purchaseItems.itemRefId" values
let: { iid: {$filter: {input:"$salesOrder.purchaseItems.itemRefId", cond:{$ne:["$$this", ""]}}} },
You can get around that by not using $in.
It looks like this $map is executed separately for every document in the items collection. If you were to run the map in an $addFields stage, you could used the simple form of lookup to match the added field to _id, which would automagically handle missing, null, and array.
Remove the added field with a $project stage if necessary.
db.case.aggregate([
{$lookup: {
from: "insurance",
let: { ipids: "$sale.bill.insurancePlanId" },
pipeline: [
{$unwind: "$coveragePlans"},
{$match: { $expr: { $in: ["$coveragePlans._id", "$$ipids"] } }},
{$project: { _id: 0, name: 1 }}
],
as: "insurances"
}}
{$addFields:{
matchArray:{$map: {
input: "$$iid",
in: { $toObjectId: "$$this" }
}}
}},
{$lookup: {
from: "item",
localField: "matchArray",
foreignField:"_id",
as: "items"
}},
{$project:{
arrayField: 0
}}
])

Lookup and aggregate multiple levels of subdocument in Mongodb

I've tried many answers to similar problems using $lookup, $unwind, and $match, but I can't get this to work for my sub-sub-subdocument situation.
I have this collection, Things:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
},
and this one-document collection, Venues:
{
"_id" : ObjectId("5b9acabbbf71f39223f8de6e"),
"name" : "The Office",
"floors" : [
{
"name" : "1st Floor",
"places" : [
{
"name" : "Front Entrance",
"alias" : "front_entrance"
}
]
},
{
"name" : "2nd Floor",
"places" : [
{
"name" : "Batcave",
"alias" : "batcave"
},
{
"name" : "Oz",
"alias" : "oz"
}
]
}
]
}
I want to return all the Things, but with the Venue's floors.places.name aggregated with each Thing if it exists if the aliases match between Things and Venues. So, I want to return:
{
"_id" : ObjectId("5a7241f7912cfc256468cb27"),
"name" : "Fortress of Solitude",
"alias" : "fortress_of_solitude",
<-- nothing added here because
<-- it's not found in Venues
},
{
"_id" : ObjectId("5a7247ec548c9ad042f579e2"),
"name" : "Batcave",
"alias" : "batcave",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Batcave" <-- matches
} <-- in Venues
] <--
} <--
] <--
},
{
"_id" : ObjectId("6a7247bc548c9ad042f579e8"),
"name" : "Oz",
"alias" : "oz",
"floors" : [ <-- this should be
{ <-- returned
"places" : [ <-- because
{ <-- the alias
name" : "Oz" <-- matches
} <-- in Venues
] <--
} <--
] <--
}
I've gotten as far as the following query, but it only returns the entire Venues.floors array as an aggregate onto each Thing, which is way too much extraneous data aggregated. I just want to merge each relevant floor.place sub-subsubdocument from Venues into its corresponding Thing if it exists in Venues.
db.getCollection('things').aggregate([
{$lookup: {from: "venues",localField: "alias",foreignField: "floors.places.alias",as: "matches"}},
{
$replaceRoot: { newRoot: { $mergeObjects: [ { $arrayElemAt: [ "$matches", 0 ] }, "$$ROOT" ] } }
},
{ $project: { matches: 0 } }
])
I'm struggling with existing answers, which seem to change at MongoDB version 3.2, 3.4, 3.6, or 4.2 to include or not include $unwind, $pipeline, and other terms. Can someone explain how to get a sub-sub-subdocument aggregated like this? Thanks!
You can try this :
db.things.aggregate([
{
$lookup:
{
from: "venues",
let: { alias: "$alias" },
pipeline: [
{ $unwind: { path: "$floors", preserveNullAndEmptyArrays: true } },
{ $match: { $expr: { $in: ['$$alias', '$floors.places.alias'] } } },
/** Below stages are only if you've docs like doc 2 in Venues */
{ $addFields: { 'floors.places': { $filter: { input: '$floors.places', cond: { $eq: ['$$this.alias', '$$alias'] } } } } },
{ $group: { _id: '$_id', name: { $first: '$name' }, floors: { $push: '$floors' } } },
{$project : {'floors.places.alias': 1, _id :0}} // Optional
],
as: "matches"
}
}
])
Test : MongoDB-Playground
Since MongoDB v3.6, we may perform uncorrelated sub-queries which gives us more flexibility to join two collections.
Try this:
db.things.aggregate([
{
$lookup: {
from: "venues",
let: {
"alias": "$alias"
},
pipeline: [
{
$unwind: "$floors"
},
{
$project: {
_id: 0,
places: {
$filter: {
input: "$floors.places",
cond: {
$eq: [
"$$alias",
"$$this.alias"
]
}
}
}
}
},
{
$match: {
"places.0": {
$exists: true
}
}
},
{
$unset: "places.name"
}
],
as: "floors"
}
}
])
MongoPlayground

Select Key from MongoDB key value pair

I am trying to perform lookup on MongoDb between two collections where one of the fields as attribute as an key-value pair where i have to choose only key as my local field parameter.
Sample Json:
key of the below json
{ "distributions" : {
"5cf88" : "5d023d4aa",
"5cfaca42e" : "5d0093a",
"5d023d490d" : "5d22abc69093a"
}
}
_id field of the below json:
{
"_id" : "5d22abc69093a",
"activatedBy" : {
"id" : "5bc53813055aec",
"name" : "Test1",
"roles" : [
"root"
]
}
}
Lookup query:
$lookup: {
from: 'collecection1',
localField: 'distributions.key',
foreignField: '_id',
as: 'Join'
}
How to get they key form distributions to use for lookup as i need only key of as my join parameter?
How to get they key form distributions to use for lookup as i need
only key of as my join parameter?
This aggregation query can do that using the $objectToArray aggregation operator:
db.collection1.aggregate( [
{ $addFields: { fieldNameValues: { $objectToArray: "$$ROOT" } } },
{ $unwind: "$fieldNameValues" },
{ $match: { $expr: { $eq: [ { $type: "$fieldNameValues.v" } , "object" ] } } },
{ $addFields: { objs: { $objectToArray: "$fieldNameValues.v" } } },
{ $unwind: "$objs" },
{ $project: { distributions: "$objs" } },
{ $lookup: {
from: 'collection2',
localField: 'distributions.v',
foreignField: '_id',
as: 'Join'
}
}
] )
where:
collection1:
{ "distributions" : {
"5cf88" : "5d023d4aa",
"5cfaca42e" : "5d0093a",
"5d023d490d" : "5d22abc69093a"
}
}
collection2:
{
"_id" : "5d22abc69093a",
"activatedBy" : {
"id" : "5bc53813055aec",
"name" : "Test1",
"roles" : [
"root"
]
}
}

Mongodb aggretate apply sort to lookup results, and add field index number

The aggregate was executed.
I got the results using lookup, but I need a sort.
In addition, I want to assign an index to the result value.
CollectionA :
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA"),
ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
],
"name" : "jason"
}
CollectionB :
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02
},
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01
}
Query:
db.getCollection('A').aggregate([
{
$match : { "_id" : ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup : {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{ $sort: { "item.date" : -1 } }
]);
Want Result:
{
"_id" : ObjectId("5a6cf47415621604942386cd"),
"contents" : [
{
"_id" : ObjectId("BBBBBBBBBBBBBBBBBBBBBBBB")
"title" : "a title",
"date" : 2018-01-01,
"index" : 0
},
{
"_id" : ObjectId("AAAAAAAAAAAAAAAAAAAAAAAA")
"title" : "a title",
"date" : 2018-01-02,
"index" : 1
}],
"name" : "jason"
}
The current problem does not apply to the sort.
And I don't know how to designate an index.
Below Aggregation may you. For your desire result.
db.CollectionA.aggregate([
{
$match: { "_id": ObjectId("5a6cf47415621604942386cd") }
},
{
$lookup: {
from: "CollectionB",
let: { contents: "$contents" },
pipeline: [
{
$match: { $expr: { $in: ["$_id", "$$contents"] } }
},
{ $sort: { date: 1 } }
],
as: "contents"
}
},
{
$project: {
contents: {
$map: {
input: { $range: [0, { $size: "$contents" }, 1 ] },
as: "element",
in: {
$mergeObjects: [
{ index: "$$element" },
{ $arrayElemAt: [ "$contents", "$$element" ]}
]
}
}
}
}
}
])
One way to go about it would be to unwind the array, sort it and then group it back
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$unwind: "$item"
},
{
$sort: {
"item.date": -1
}
},
{
$group: {
_id: "$_id",
contents: {
$push: "$item"
}
}
}
])
Another method is, (this is applicable only if the date field corresponds to the document creation date),
db.A.aggregate([
{
$match: {
"_id": ObjectId("5a6cf47415621604942386cd")
}
},
{
$lookup: {
from: "B",
localField: "contents",
foreignField: "_id",
as: "item"
}
},
{
$sort: {
"item": -1
}
}
])
Basically, this sorts on the basis of _id, and since _id is created using the creation date, it should sort accordingly.

Mongo how to $lookup with DBRef

I have a trouble(/(ㄒoㄒ)/~~). Suppose that collection A is
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"bid" : [
DBRef("B", ObjectId("582abcd85d2dfa67f44127e0")),
DBRef("B", ObjectId("582abcd85d2dfa67f44127e1"))
]
}
and Collection B:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"status" : NumberInt(1),
"seq" : NumberInt(0)
}
I don't know how to $lookup the 'bid'. I tried
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid", foreignField: "_id", as: "bs"}},
]
)
and
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
but it doesn't work. Anybody can help? Thanks.
Actually, the other answer is wrong. It is possible to do a lookup on a DBref field within your aggregator, and you don't need mapreduce for that.
Solution
db.A.aggregate([
{
$project: {
B_fk: {
$map: {
input: {
$map: {
input:"$bid",
in: {
$arrayElemAt: [{$objectToArray: "$$this"}, 1]
},
}
},
in: "$$this.v"}},
}
},
{
$lookup: {
from:"B",
localField:"B_fk",
foreignField:"_id",
as:"B"
}
}
])
result
{
"_id" : ObjectId("59bb79df1e9c00162566f581"),
"B_fk" : null,
"B" : [ ]
},
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
],
"B" : [
{
"_id" : ObjectId("582abcd85d2dfa67f44127e0"),
"status" : NumberInt("1"),
"seq" : NumberInt("0")
}
]
}
Short Explanation
Loop through the DBRefs with $map, break each DBref into an array, keep only the $id field, then get rid of the k:v format with $$this.v, keeping only the ObjectId and removing all the rest. You can now lookup on the ObjectId.
Step-by-step Explanation
Within the aggregator, a DBRef BSON type can be handled like an object, with two or three fields (ref, id, and db).
If you do:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",0]}},
Second_DBref_as_array: {$objectToArray:{$arrayElemAt:["$bid",1]}},
}
},
])
This is the result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
],
"Second_DBref_as_array" : [
{
"k" : "$ref",
"v" : "B"
},
{
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
]
}
Once you have transformed a dbref into an array, you can get rid of the useless fields by querying only the value at index 1, like this:
db.A.aggregate([
{
$project: {
First_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
Second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"First_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
},
"Second_DBref_as_array" : {
"k" : "$id",
"v" : ObjectId("582abcd85d2dfa67f44127e0")
}
}
Then you can get finally to the value you want by pointing to "$myvalue.v", just like this
db.A.aggregate([
{
$project: {
first_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
second_DBref_as_array: {$arrayElemAt: [{$objectToArray:{$arrayElemAt:["$bid",0]}},1]},
}
},
{
$project: {
first_DBref_as_ObjectId: "$first_DBref_as_array.v",
second_DBref_as_ObjectId: "$second_DBref_as_array.v"
}
}
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"first_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0"),
"second_DBref_as_ObjectId" : ObjectId("582abcd85d2dfa67f44127e0")
}
Obviously, in a normal pipeline, you don't need all these redundant steps, using a nested $map, you can get to the same result in one go :
db.A.aggregate([
{
$project: {
B_fk: { $map : {input: { $map: { input:"$bid",
in: { $arrayElemAt: [{$objectToArray: "$$this"}, 1 ]}, } },
in: "$$this.v"}},
}
},
])
result:
{
"_id" : ObjectId("582abcd85d2dfa67f44127e1"),
"B_fk" : [
ObjectId("582abcd85d2dfa67f44127e0"),
ObjectId("582abcd85d2dfa67f44127e1")
]
}
I hope the explanation is clear enough, if not feel free to ask.
As of mongoDB 3.4, this is not possible. You can't use DBRef in the aggregation pipeline, except in the $match stage.
I strongly recommend you to get rid of DBRef and switch to manual references. However, if you really need to keep DBRef, here is an (ugly) solution:
first, create a new collection named "C", where DBRefs are replaced by their Ids using mapReduce:
db.A.mapReduce(
function() {
var key = this._id;
var value = [];
for ( var index = 0; index < this.bid.length; index++){
value.push(this.bid[index].$id);
}
emit(key, value);
},
function(key,values) {
return values;
},
{
"query": {},
"out": "C"
}
)
then, run your aggregation query on the new "C" collection:
db.C.aggregate([
{
$unwind:"$value"
},
{
$lookup:{
from:"B",
localField:"value",
foreignField:"_id",
as:"bs"
}
}
]);
output:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e0"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0"),
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"value":ObjectId("582abcd85d2dfa67f44127e1"),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
Just in case someone is coming here in 2021:
Starting with MongoDB 4.3.3 the second query of the OP does work:
db.A.aggregate(
[
{$unwind: {path: "$bid"}},
{$lookup: {from: "B", localField: "bid.$id", foreignField: "_id", as: "bs"}},
]
)
The result is:
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e0")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e0")",
"status":1,
"seq":0
}
]
}{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"bid":DBRef("B", "ObjectId("582abcd85d2dfa67f44127e1")),
"bs":[
{
"_id":ObjectId("582abcd85d2dfa67f44127e1"),
"status":1,
"seq":0
}
]
}
See SERVER-14466 for more information.