How to get the matched fields from two collections in mongodb? - mongodb

I have two collections called "orders" and "items" as specified below:
db.orders.insert([
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 }
])
db.items.insert([
{ "_id" : 1, "item" : "almonds", description: "almond clusters", "instock" : 120 },
{ "_id" : 2, "item" : "bread", description: "raisin and nut bread", "instock" : 80 },
{ "_id" : 3, "item" : "pecans", description: "candied pecans", "instock" : 60 }
])
Now, I'm trying to fetch the document if the "item" field gets matched in both the collections.
I have tried using $lookup operator for this scenario, but somehow the query is getting failed when tried to run on the mongo shell.
My $lookup query:
db.orders.aggregate([
{
$lookup:
{
from: "items",
localField: “item”,
foreignField: “item”,
as: "ite"
}
},
{ $unwind: "$ite" },
{
$project: {
“item”: 1
}
}
])
Can anyone please help me out regarding the above mentioned scenario ...

Related

does $lookup use indexes in the foreignField key?

In the example below, if the collection inventory has an index on the sku field, will it be used in this $lookup operation?
db.orders.insertMany( [
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 },
{ "_id" : 3 }
] )
db.inventory.insertMany( [
{ "_id" : 1, "sku" : "almonds", "description": "product 1", "instock" : 120 },
{ "_id" : 2, "sku" : "bread", "description": "product 2", "instock" : 80 },
{ "_id" : 3, "sku" : "cashews", "description": "product 3", "instock" : 60 },
{ "_id" : 4, "sku" : "pecans", "description": "product 4", "instock" : 70 },
{ "_id" : 5, "sku": null, "description": "Incomplete" },
{ "_id" : 6 }
] )
db.orders.aggregate( [
{
$lookup:
{
from: "inventory",
localField: "item",
foreignField: "sku",
as: "inventory_docs"
}
}
] )
EDITED:
It does not. Why not?
{
"explainVersion" : "1",
"stages" : [
{
"$cursor" : {
"queryPlanner" : {
"namespace" : "6303c64faf8ef53d8ba2062f_y22_test2.orders",
"indexFilterSet" : false,
"parsedQuery" : {
},
"queryHash" : "8B3D4AB8",
"planCacheKey" : "D542626C",
"maxIndexedOrSolutionsReached" : false,
"maxIndexedAndSolutionsReached" : false,
"maxScansToExplodeReached" : false,
"winningPlan" : {
"stage" : "COLLSCAN",
"direction" : "forward"
},
"rejectedPlans" : [
]
}
}
},
{
"$lookup" : {
"from" : "inventory",
"as" : "inventory_docs",
"localField" : "item",
"foreignField" : "sku"
}
}
],
In the case of simple lookups (e.g., when specifying a localField + foreignField), an index will be used
Things are sadly more complicated when using a $lookup + pipeline, the following limitations apply:
Multikey indexes are not used.
Indexes are not used for comparisons where the operand is an array or the
operand type is undefined.
Indexes are not used for comparisons with more than one field path operand.
https://www.mongodb.com/docs/manual/reference/operator/aggregation/lookup/
It is really annoying that the explain() call doesn't provide any information on index usage of lookup stages. The best way I've found to determine whether an index was used was to (separately) use the $indexStats aggregation on the collection being looked up, in the above case:
db.inventory.aggregate([{$indexStats: {}}])
Then find the index you think is being used and watch the accesses.ops field.

MongoDB aggregation project the specific fields from lookup

This example is following https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#use-lookup-with-mergeobjects
db.orders.insert([
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 }
])
db.items.insert([
{ "_id" : 1, "item" : "almonds", description: "almond clusters", "instock" : 120 },
{ "_id" : 2, "item" : "bread", description: "raisin and nut bread", "instock" : 80 },
{ "_id" : 3, "item" : "pecans", description: "candied pecans", "instock" : 60 }
])
Aggregation:
db.orders.aggregate([
{
$lookup: {
from: "items",
localField: "item", // field in the orders collection
foreignField: "item", // field in the items collection
as: "fromItems"
}
},
{
$replaceRoot: { newRoot: { $mergeObjects: [ { $arrayElemAt: [ "$fromItems", 0 ] }, "$$ROOT" ] } }
},
{ $project: { fromItems: 0 } }
])
Result:
{ "_id" : 1, "item" : "almonds", "description" : "almond clusters", "instock" : 120, "price" : 12, "quantity" : 2 }
{ "_id" : 2, "item" : "pecans", "description" : "candied pecans", "instock" : 60, "price" : 20, "quantity" : 1 }
Question: How to modify the aggregation to project the specific fields? e.g. project "_id", "item" and "description" only:
{ "_id" : 1, "item" : "almonds", "description" : "almond clusters" }
{ "_id" : 2, "item" : "pecans", "description" : "candied pecans" }
You're getting an empty array, because the $lookup catching anything.
match the types
$addFields to convert
PLAYGROUND
This should be the first stage:
{
$addFields: {
itemId: {
$convert: {
input: "$itemId",
to: "int"
}
}
}
},
If you prefer, there is no need to add a stage
You could also remove addFields and use $lookup+let.
Modify the lookup this way:
{
$lookup: {
from: "items",
let: {
itemId: {
$convert: {
input: "$itemId",
to: "int"
}
}
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$_id",
"$$itemId"
]
}
}
}
],
/** field in the items collection*/
as: "fromItems"
}
}
PLAYGROUND2

MongoDB: Remove field in array with $lookup localField

I am beginner with MongoDB. I use $lookup in aggregation and use localField to get reference document.
db.orders.insert([
{ "_id" : 1, "item" : ['almonds','pecans','bread'], "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : ['cashews','catty'], "price" : 20, "quantity" : 1 }
])
I tried to use $lookup and localField in aggregation but I can't find way to remove field _id and description
db.inventory.insert([
{ "_id" : 1, "sku" : "almonds", description: "product 1", "instock" : 120 },
{ "_id" : 2, "sku" : "bread", description: "product 2", "instock" : 80 },
{ "_id" : 3, "sku" : "cashews", description: "product 3", "instock" : 60 },
{ "_id" : 4, "sku" : "pecans", description: "product 4", "instock" : 70 },
{ "_id" : 5, "sku": "catty", description: "Incomplete", "instock" : 100 },
{ "_id" : 6 }
])
Expected results:
[
{
"_id" : 1,
"item" : [
{ "sku" : "almonds", "instock" : 120 },
{ "sku" : "pecans", "instock" : 70 },
{ "sku" : "bread", "instock" : 80 }
],
"price" : 12,
"quantity" : 2
},
{
"_id" : 2,
"item" : [
{ "sku" : "cashews", "instock" : 60 },
{ "sku" : "catty", "instock" : 100 }
],
"price" : 20,
"quantity" : 1
}
]
You can try lookup with aggregation pipeline,
$lookup join with inventory collection
$match to match is inventory sku in item array
$project to display required fields
db.orders.aggregate([
{
$lookup: {
from: "inventory",
as: "item",
let: { i: "$item" },
pipeline: [
{ $match: { $expr: { $in: ["$sku", "$$i"] } } },
{
$project: {
_id: 0,
sku: 1,
instock: 1
}
}
]
}
}
])
Playground

How to join two collections based on the _id field in mongodb

I have two collections called orders and items as shown below. Now, I'm trying to join these collections based on the _id field. Can we use "$lookup" operator for this scenario? Or is there any other method to resolve this problem.
db.orders.insert([
{ "_id" : 1, "item" : "almonds", "price" : 12, "quantity" : 2 },
{ "_id" : 2, "item" : "pecans", "price" : 20, "quantity" : 1 }
])
db.items.insert([
{ "_id" : 1, "item" : "almonds", description: "almond clusters", "instock" : 120 },
{ "_id" : 2, "item" : "bread", description: "raisin and nut bread", "instock" : 80 },
{ "_id" : 3, "item" : "pecans", description: "candied pecans", "instock" : 60 }
])
Can anyone please help me out regarding this issue ...
Try following code:
db.orders.aggregate([
{
$lookup:
{
from: "items",
localField: "_id",
foreignField: "_id",
as: "item"
}
},
{ $unwind: "$item" },
{
$project: {
"_id": 1,
"price": 1,
"quantity": 1,
"description": "$item.description",
"instock": "$item.instock"
}
}
])
Since you know that there will be 1 to 1 relationship you can unwind $lookup results to have just one embedded item for each order. Then you can project your results to get flat structure of JSON. This will give you results in following shape:
{
"_id" : 1,
"price" : 12,
"quantity" : 2,
"description" : "almond clusters",
"instock" : 120
}
Try This.
db.orders.aggregate([
{ $lookup:
{
from: 'items',
localField: '_id',
foreignField: '_id',
as: 'get_data'
}
}
]).exec(function(err, res) {
if (err) throw err;
console.log(res);
});

How to ensure grouping via two separate criteria

Expanded from How to average the summed up values in mongodb?
Using MongoDB 2.4.8,
I have the following records
{
"category" : "TOYS",
"price" : 12,
"status" : "online",
"_id" : "35043"
}
{
"category" : "TOYS",
"price" : 13,
"status" : "offline",
"_id" : "35044"
}
{
"category" : "TOYS",
"price" : 22,
"status" : "online",
"_id" : "35045"
}
{
"category" : "BOOKS",
"price" : 13,
"status" : "offline",
"_id" : "35046"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "online",
"_id" : "35047"
}
{
"category" : "TOYS",
"price" : 19,
"status" : "unavailable",
"_id" : "35048"
}
{
"category" : "BOOKS",
"price" : 10,
"status" : "unavailable",
"_id" : "35049"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "unavailable",
"_id" : "35050"
}
I want to find the average price of all categories whose status is online OR offline and total price within a category is more than 50.
Toys offline and Toys online are considered two separate categories.
I adapted the answer given.
db.items.aggregate([
{$match:
{
$or: [
{status:"online"},
{status:"offline"}
]
}
},
{$group :
{
_id: "$category",
total_price: {$sum:"$price"},
}
},
{$match:
{
total_price:{$gt:50}
}
},
{$group :
{
_id: "1",
avg_price: {$avg:"$total_price"},
}
},
]);
But I believe this query I adapted grouped categories of the same name together which is not what I am looking for.
If online and offline are the only values for status, you can remove the initial $match step. If it is needed, it would be more appropriate to use the $in operator as these values could be found in the same index (if one existed).
I think the only step you are missing is that you can $group by multiple fields (i.e. category and status):
db.items.aggregate(
// If 'online' and 'offline' are the only possible status values, this may be unnecessary
{ $match: {
'status' : { $in: [ 'online', 'offline' ] }
}},
// Group by category & status
{ $group: {
_id: { category: "$category", status: "$status" },
total_price: { $sum: "$price" },
}},
// Only find groups where total_price is > 50
{ $match: {
total_price: { $gt:50 }
}},
// Find the average price for the group
{ $group : {
_id: null,
avg_price: {$avg:"$total_price"},
}}
)