join two tables based on $match and $lookup - mongodb

I've two collections and I'm trying to join based on condition.
Below are my two collections
cfg: [{"dpi": 1, "mid": "xyz"},{"dpi": 0, "mid": "abc"},........]
sts: [{"sts": 1, "mid": "xyz"},{"sts": 0, "mid": "abc"}........]
I want the result if dpi is 1 and sts is 1 by joining both collections by mid.
and below is the query I'm trying.
db.cfg.aggregate([
{ $match: { dpi: 1 } },
{
$lookup: {
from: 'sts',
as: 'Sts',
let: { mid: '$mid' },
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ['$mid', '$$mid'] },
{ $eq: ['$sts', 1] },
]
}
}
}
]
}
},
])
but I'm not getting any results. what should be the correct query?
mongo version: 4.4

Related

Relate and Count Between Two Collections in MongoDB

How can I count the number of completed houses designed by a specific architect in MongoDB?
I have the next two collections, "plans" and "houses".
Where the only relationship between houses and plans is that houses have the id of a given plan.
Is there a way to do this in MongoDB with just one query?
plans
{
_id: ObjectId("6388024d0dfd27246fb47a5f")
"hight": 10,
"arquitec": "Aneesa Wade",
},
{
_id: ObjectId("1188024d0dfd27246fb4711f")
"hight": 50,
"arquitec": "Smith Stone",
}
houses
{
_id: ObjectId
"plansId": "6388024d0dfd27246fb47a5f" -> string,
"status": "under construction",
},
{
_id: ObjectId
"plansId": "6388024d0dfd27246fb47a5f" -> string,
"status": "completed",
}
What I tried was to use mongo aggregations while using $match and $lookup.
The "idea" with clear errors would be something like this.
db.houses.aggregate([
{"$match": {"status": "completed"}},
{
"$lookup": {
"from": "plans",
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{ "$eq": [ "houses.plansId", { "$toString": "$plans._id" }]},
{ "plans.arquitec" : "Smith Stone" },
]
}
}
},
],
}
}
If it's a single join condition, simply do a project to object ID to avoid any complicated lookup pipelines.
Example playground - https://mongoplayground.net/p/gaqxZ7SzDTg
db.houses.aggregate([
{
$match: {
status: "completed"
}
},
{
$project: {
_id: 1,
plansId: 1,
status: 1,
plans_id: {
$toObjectId: "$plansId"
}
}
},
{
$lookup: {
from: "plans",
localField: "plans_id",
foreignField: "_id",
as: "plan"
}
},
{
$project: {
_id: 1,
plansId: 1,
status: 1,
plan: {
$first: "$plan"
}
}
},
{
$match: {
"plan.arquitec": "Some One"
}
}
])
Update: As per OP comment, added additional match stage for filtering the final result based on the lookup response.

Optimise MongoDB aggregate query performance

I have next DB structure:
Workspaces:
Key
Index
PK
id
id
content
Projects:
Key
Index
PK
id
id
FK
workspace
workspace_1
deleted
deleted_1
content
Items:
Key
Index
PK
id
id
FK
project
project_1
type
_type_1
deleted
deleted_1
content
I need to calculate a number of items of each type for each project in workspace, e.g. expected output:
[
{ _id: 'projectId1', itemType1Count: 100, itemType2Count: 50, itemType3Count: 200 },
{ _id: 'projectId2', itemType1Count: 40, itemType2Count: 100, itemType3Count: 300 },
....
]
After few attempts and some debugging I've created a query which provides output I needed:
const pipeline = [
{ $match: { workspace: 'workspaceId1' } },
{
$lookup: {
from: 'items',
let: { id: '$_id' },
pipeline: [
{
$match: {
$expr: {
$eq: ['$project', '$$id'],
},
},
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{ $project: { _id: 1, type: 1, deleted: 1 } },
],
as: 'items',
},
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{ $unwind: '$items' },
{ $match: { 'items.deleted': { $eq: false } } },
{
$group: {
_id: '$_id',
items: { $push: '$items' },
},
},
{
$project: {
_id: 1,
// Note: I have only 3 possible item types, so it's OK that it's names hardcoded.
itemType1Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type1'] },
},
},
},
itemType2Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type2'] },
},
},
},
itemType3Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type3'] },
},
},
},
},
},
]
const counts = await Project.aggregate(pipeline)
Query works like expected, but very slow... If I have some about 1000 items in one workspace it takes about 8 seconds to complete. Any ideas how to make it faster are appreciated.
Thanks.
Assuming your indexs are properly indexed that they contain the "correct" fields, we can still have some tweaks on the query itself.
Approach 1: keeping existing collection schema
db.projects.aggregate([
{
$match: {
workspace: "workspaceId1"
}
},
{
$lookup: {
from: "items",
let: {id: "$_id"},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$project","$$id"]},
{$eq: ["$deleted",false]}
]
}
}
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{
$project: {
_id: 1,
type: 1,
deleted: 1
}
}
],
as: "items"
}
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{
$unwind: "$items"
},
{
$group: {
_id: "$_id",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
},
itemType2Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type2"]},
"then": 1,
"else": 0
}
}
},
itemType3Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
}
}
}
])
There are 2 major changes:
moving the items.deleted : false condition into the $lookup subpipeline to lookup less items documents
skipped items: { $push: '$items' }. Instead, do a conditional sum in later $group stage
Here is the Mongo playground for your reference. (at least for the correctness of the new query)
Approach 2: If the collection schema can be modified. We can denormalize projects.workspace into the items collection like this:
{
"_id": "i1",
"project": "p1",
"workspace": "workspaceId1",
"type": "type1",
"deleted": false
}
In this way, you can skip the $lookup. A simple $match and $group will suffice.
db.items.aggregate([
{
$match: {
"deleted": false,
"workspace": "workspaceId1"
}
},
{
$group: {
_id: "$project",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$type","type1"]},
"then": 1,
"else": 0
}
}
},
...
Here is the Mongo playground with denormalized schema for your reference.

"iterate" through all document fields in mongodb

I have a collection with documents in this form:
{
"fields_names": ["field1", "field2", "field3"]
"field1": 1,
"field2": [1, 2, 3]
"field3": "12345"
}
where field1, field2, field3 are "dynamic" for each document (I have for each document the fields names in the "fields_names" array)
I would like to test whether 2 documents are equals using the aggregation framework.
I used $lookup stage for getting another documents.
My issue is: how can I "iterate" through the whole fields for my collection?
db.collection.aggregate([
{
{$match: "my_id": "test_id"},
{$lookup:
from: "collection"
let: my_id: "$my_id", prev_id: "$_id"
pipeline: [
{$match: "my_id": "$$my_id", "_id": {$ne: "$$prev_id"}}
]
as: "lookup_test"
}
}])
and in the pipeline of the lookup, I would like to iterate the "fields_names" array for getting the names of the fields, and then access their value and compare between the "orig document" (not the $lookup) and the other documents ($lookup documents).
OR: just to iterate all fields (not include the "fields_names" array)
I would like to fill the "lookup_test" array with all documents which as the same fields values..
You will have to compare the two "partial" parts of the document meaning you'll have to ( for each document ) do this in the $lookup, needless to say this is going to be a -very- expensive pipeline. With that said here's how I would do it:
db.collection.aggregate([
{
$match: {
"my_id": "test_id"
}
},
{
"$lookup": {
"from": "collection",
"let": {
id: "$_id",
partialRoot: {
$filter: {
input: {
"$objectToArray": "$$ROOT"
},
as: "fieldObj",
cond: {
"$setIsSubset": [
[
"$$fieldObj.k"
],
"$fields_names"
]
}
}
}
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$ne: [
"$$id",
"$_id"
]
},
{
$eq: [
{
$size: "$$partialRoot"
},
{
$size: {
"$setIntersection": [
"$$partialRoot",
{
$filter: {
input: {
"$objectToArray": "$$ROOT"
},
as: "fieldObj",
cond: {
"$setIsSubset": [
[
"$$fieldObj.k"
],
"$fields_names"
]
}
}
}
]
}
}
]
}
]
}
}
},
],
"as": "x"
}
}
])
Mongo Playground
If you could dynamically build the query through code you could make this much more efficient by using the same match query in the $lookup stage like so:
const query = { my_id: "test_id" };
db.collection.aggregate([
{
$match: query
},
{
$lookup: {
...
pipeline: [
{ $match: query },
... rest of pipeline ...
]
}
}
])
This way you're only matching documents who at least match the initial query, this should drastically improve query performance ( obviously dependant on field x value entropy )
One more caveat to note is that if x document match you will get the same result x times, meaning you probably want to add $limit: 1 stage to your pipeline.

mongodb aggregate apply a function to a field

As part of an aggregate I need to run this transformation:
let inheritances = await db.collection('inheritance').aggregate([
{ $match: { status: 1 }}, // inheritance active
{ $project: { "_id":1, "name": 1, "time_trigger": 1, "signers": 1, "tree": 1, "creatorId": 1, "redeem": 1, "p2sh": 1 } },
{ $lookup:
{
from: "user",
let: { creatorId: { $concat: [ "secretkey", { $toString: "$creatorId" } ] }, time_trigger: "$time_trigger"},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$_id", sha256( { $toString: "$$creatorId" } ) ] },
{ $gt: [ new Date(), { $add: [ { $multiply: [ "$$time_trigger", 24*60*60*1000 ] }, "$last_access" ] } ] },
]
}
}
},
],
as: "user"
},
},
{ $unwind: "$user" }
]).toArray()
creatorId comes from a lookup, and in order to compare it to _id I first need to do a sha256.
How can I do it?
Thanks.
External functions will not work with the aggregation framework. Everything is parsed to BSON by default. It is all basically processed from BSON operators to native C++ code implementation, This is by design for performance.
Basically in short, you can't do this. I recommend just storing the hashed value on every document as a new field, otherwise you'll have to do it in code just before the pipeline.

Different Fields Multiplication in MongoDB

Can we multiple two different fields from different collections in mongoDB?
any help will be highly appreciated...
Yes, you can using the Aggregation Pipeline $multiply operator. https://docs.mongodb.com/manual/reference/operator/aggregation/multiply/
What you want to do is join two collections together using $lookup https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/. In this case, I'll join the accounts and transactions collections on the account_id field.
Then we can project the fields we want to multiply. In this case, I'm getting the first element in the account array, which represents the account document I'm joining from the accounts collection.
Finally, I can multiply the two fields together.
[{
$lookup: {
from: 'accounts',
localField: 'account_id',
foreignField: 'account_id',
as: 'account'
}
}, {
$project: {
account: {
$arrayElemAt: ["$account", 0]
},
transaction_count: "$transaction_count",
}
}, {
$project: {
product: {
$multiply: ["$transaction_count", "$account.limit"]
}
}
}]
To reproduce my solution above, create a free cluster in Atlas (https://www.mongodb.com/cloud/atlas) and then load the sample data. Navigate to the Cluster's Collections. Then navigate to the sample_analytics database and the transactions collection. Then navigate to the Aggregation tab. Here you can create an Aggregation Pipeline stage by stage. It's incredibly helpful so you can see the output of each stage as you build the next. Below is a screenshot of the Aggregation Pipeline I described in my solution above.
If you don't have experience with the Aggregation Pipeline, I highly recommend MongoDB University's free course: https://university.mongodb.com/courses/M121/about
MongoDB aggregation operations allows us join two collections with $lookup method and compute field operation (i.e $multiply)
Given
"collection": [
{
id: 1,
"total": 5
},
{
id: 2,
"total": 2
}
],
"collection2": [
{
collId: 1,
"total": 3
},
{
collId: 2,
"total": 4
}
]
db.collection.aggregate([
{
$lookup: {
from: "collection2",
let: {
col_id: "$id",
col_total: "$total",
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$collId",
"$$col_id"
]
}
}
},
{
$project: {
summary: {
$multiply: [
"$total",
"$$col_total"
]
}
}
}
],
as: "result"
}
},
{
$addFields: {
result: {
$let: {
vars: {
tmp: {
$arrayElemAt: [
"$result",
0
]
}
},
in: "$$tmp.summary"
}
}
}
}
])
MongoPlayground
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"id": 1,
"result": 15,
"total": 5
},
{
"_id": ObjectId("5a934e000102030405000001"),
"id": 2,
"result": 8,
"total": 2
}
]