I have next data:
table1
[
{
"_id" : ObjectId("5ef3611fc3e39a4891c479d9"),
"bid" : ObjectId("db08c671b5174f49886ca8de"),
},
{
"_id" : ObjectId("5ef30da4c3e39a4891c479d8"),
"bid" : ObjectId("db08c671b5174f49886ca8de"),
}
]
table2:
[
{
"_id" : ObjectId("5ef3626fc3e39a4891c479da"),
"t1_id" : ObjectId("5ef30da4c3e39a4891c479d8"),
"bid" : ObjectId("db08c671b5174f49886ca8de")
}
]
I have next SQL query with two conditions on join
SELECT table1.* FROM table1
LEFT JOIN table2 t2 ON(t2.bid=table1.bid AND t2.t1_id=table1._id)
and try to adapt it to mongoDB.
I came with next query
db.getCollection("table1").aggregate(
[
{
"$project" : {
"_id" : NumberInt(0),
"leads" : "$$ROOT"
}
},
{
"$lookup" : {
"from" : "table2",
"as" : "t2",
"let" : {
"bid" : "$bid",
"t1_id" : "$t1_id"
},
"pipeline" : [
{
"$match" : {
"$expr" : {
"$and" : [
{ "$eq" : [ "$table1._id","$$t1_id"]},
{ "$eq" : [ "$table1.bid","$$bid"]}
]
}
}
}
]
}
},
{
"$unwind" : {
"path" : "$ps",
"preserveNullAndEmptyArrays" : true
}
}
],
);
I got next result
{
"table1" : {
"_id" : ObjectId("5ef30da4c3e39a4891c479d8"),
"bid" : ObjectId("db08c671b5174f49886ca8de"),
},
"t2" : [
{
"_id" : ObjectId("5ef3626fc3e39a4891c479da"),
"t1_id" : ObjectId("5ef30da4c3e39a4891c479d8"),
"bid" : ObjectId("db08c671b5174f49886ca8de")
}
]
},
{
"table1" : {
"_id" : ObjectId("5ef3611fc3e39a4891c479d9"),
"bid" : ObjectId("db08c671b5174f49886ca8de"),
},
"t2" : [
{
"_id" : ObjectId("5ef3626fc3e39a4891c479da"),
"t1_id" : ObjectId("5ef30da4c3e39a4891c479d8"),
"bid" : ObjectId("db08c671b5174f49886ca8de")
}
]
}
And I can`t understand, why second record from table1 matched with t2, though t2.t1_id != table1._id.
Can you help me find a reason of such results and fix mongo query?
let parameter of $lookup takes an expression from the "local" table (t1), which I assume $t1_id doesn't exist
as parameter corresponds to the output result field of the lookup, not an alias.
In the pipeline, expressions starting with $$ are variables declared in the let parameter. expressions starting with $ are expressions from the "foreign" table (t2)
So, to directly translate the $lookup pipeline stage from your SQL statement, it should be
{
"$lookup" : {
"from" : "table2",
"as" : "t2",
"let" : {
"bid" : "$bid",
"t1_id" : "$_id" // should be $_id, as from the SQL you want table1._id, not table1.t1_id
},
"pipeline" : [
{
"$match" : {
"$expr" : {
"$and" : [
{ "$eq" : [ "$t1_id", "$$t1_id"] }, // $t1_id is from t2, $$t1_id is from variable in let parameter
{ "$eq" : [ "$bid", "$$bid"] } // $bid is from t2, $$bid is from variable in let parameter
]
}
}
}
]
}
}
Related
I'm struggling to identified duplicated elements in my MongoDB records, here is my problem :
I have a Mongo collection named "elements".
Example of a record in this collection :
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
I would like to identify records where the array "items" contains objects with the same "ref_id".
In my example we can see that the last two objects of the "items" array have the same "ref_id" : ObjectId("5d1b2204e851271e80c823d5").
I tried a bunch of aggregate function but unfortunately couldn't came out with a solution.
The following query can get us the expected output:
db.elements.aggregate([
{
$unwind:"$items"
},
{
$group:{
"_id":"$_id",
"root":{
$first:"$$ROOT"
},
"items":{
$push:"$items"
},
"distinctItems":{
$addToSet: "$items.ref_id"
}
}
},
{
$match:{
$expr:{
$ne:[
{
$size:"$items"
},
{
$size:"$distinctItems"
}
]
}
}
},
{
$addFields:{
"root.items":"$items"
}
},
{
$replaceRoot:{
"newRoot":"$root"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
{
"_id" : ObjectId("5d654b9d7d0ab652c42315f2"),
"name" : "B",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
}
]
}
Output:
{
"_id" : ObjectId("5d1b2204e851271e80c824b6"),
"name" : "A",
"items" : [
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d7"),
"_id" : ObjectId("5d1b2205e851271e80c82534")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d6"),
"_id" : ObjectId("5d1b2205e851271e80c82533")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d8"),
"_id" : ObjectId("5d1b2205e851271e80c82532")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826a5")
},
{
"ref_id" : ObjectId("5d1b2204e851271e80c823d5"),
"_id" : ObjectId("5d1b3048e851271e80c826ad")
}
]
}
Explanation: We are populating an array of distinct ref_id from each document and matching if the size of the populated array is equal to the size of actual items array.
Below Query, we are using for fetching the data, how to convert this to as left join Query.
so if id column does not exist in a user_content table the value of count would be 0.
db.contents.aggregate([
{ "$lookup" : {
"from" : "user_content" ,
"localField" : "_id" ,
"foreignField" : "contentId" ,
"as" : "user_content"}
} ,
{ "$unwind" : {
"path" : "$user_content" ,
"preserveNullAndEmptyArrays" : true}
} ,
{ "$match" : { "user_content.liked" : true}} ,
{ "$group" : {
"_id" : "$_id" ,
"popularity" : {
"$first" : "$popularity"} ,
"user_content" : { "$push" : "$user_content"}
}
} ,
{ "$project" : {
"popularity" : 1 ,
"count" : { "$size" : [ "$user_content"]}}
} ,
{ "$skip" : 0} ,
{ "$limit" : 1000000}
]);
I have around 60 thousand document in users collection, and have the following query:
db.getCollection('users').aggregate([
{"$match":{"userType":"employer"}},
{"$lookup":{"from":"companies","localField":"_id","foreignField":"owner.id","as":"company"}},
{"$unwind":"$company"},
{"$lookup":{"from":"companytypes","localField":"company.type.id","foreignField":"_id","as":"companyType"}},
{"$unwind":"$companyType"},
{ $group: { _id: null, count: { $sum: 1 } } }
])
It takes around 12 seconds to count, even I call count function before list function, but my list function with limit: 10 response faster than count.
And following is explain result:
{
"stages" : [
{
"$cursor" : {
"query" : {
"userType" : "employer"
},
"fields" : {
"company" : 1,
"_id" : 1
},
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "jobs.users",
"indexFilterSet" : false,
"parsedQuery" : {
"userType" : {
"$eq" : "employer"
}
},
"winningPlan" : {
"stage" : "COLLSCAN",
"filter" : {
"userType" : {
"$eq" : "employer"
}
},
"direction" : "forward"
},
"rejectedPlans" : []
}
}
},
{
"$lookup" : {
"from" : "companies",
"as" : "company",
"localField" : "_id",
"foreignField" : "owner.id",
"unwinding" : {
"preserveNullAndEmptyArrays" : false
}
}
},
{
"$match" : {
"$nor" : [
{
"company" : {
"$eq" : []
}
}
]
}
},
{
"$group" : {
"_id" : {
"$const" : null
},
"total" : {
"$sum" : {
"$const" : 1
}
}
}
},
{
"$project" : {
"_id" : false,
"total" : true
}
}
],
"ok" : 1.0
}
$lookup operations are slow since they mimic the left join behavior, from the DOCS:
$lookup performs an equality match on the localField to the
foreignField from the documents of the from collection
Hence if there are no indexes in the fields used for joining the collections Mongodb is force to do a collection scan.
Adding an index for the foreignField attributes should prevent a collection scan and increase the performance even of a magnitude
I have a MongoDB collection that is looking like this:
{
players: [
{uuid: "A"},
{uuid: "B"}
]
},
{
players: [
{uuid: "A"},
{uuid: "C"}
]
},
{
players: [
{uuid: "D"},
{uuid: "E"}
]
}
I want to use results of a previous aggregation stage and now find all documents where a player shows up, using a $lookup stage:
from: "collection",
pipeline: [
{
$match: {
players: {
$elemMatch: {
uuid: "$playerId"
}
}
//using "players.uuid": "$playerId" doesn't work either
}
}
],
as: "field"
The input to my $lookup stage looks like this:
{
"playerId" : "A"
}
{
"playerId" : "B"
}
{
"playerId" : "C"
}
This query returns an empty array in field. It seems like $uuid is not getting evaluated correctly, because if I exchange $uuid with a hardcoded value (e.g. A), this query returns results.
I have also tried using the let property, this gave me the same result.
What am I doing wrong?
Using the documents you've provided. I believe this might work for you:
I've used $lookup to do a join onto the collection which holds the playerId, which creates an array called field. I then use $unwind to extract all the array elements from both field and player. Finally I use $cond to crosscheck if both values match.
db.getCollection('foo').aggregate([
{ $lookup : {
from: "bar",
localField: "players.uuid",
foreignField: "playerId",
as: "field"
} },
{ $unwind : "$players" },
{ $unwind : "$field" },
{ $project : {
"players": 1,
"field" : 1,
"isMatch": {
"$cond": [ { "$eq": ["$players.uuid", "$field.playerId"] }, 1, 0 ]
} } }
])
I've purposely left the output verbose..
/* 1 */
{
"_id" : ObjectId("5a7f534b337e8d2b97ff2ffb"),
"players" : {
"uuid" : "A"
},
"field" : {
"_id" : ObjectId("5a7f5374337e8d2b97ff2ffe"),
"playerId" : "A"
},
"isMatch" : 1.0
}
/* 2 */
{
"_id" : ObjectId("5a7f534b337e8d2b97ff2ffb"),
"players" : {
"uuid" : "A"
},
"field" : {
"_id" : ObjectId("5a7f539b337e8d2b97ff2fff"),
"playerId" : "B"
},
"isMatch" : 0.0
}
/* 3 */
{
"_id" : ObjectId("5a7f534b337e8d2b97ff2ffb"),
"players" : {
"uuid" : "B"
},
"field" : {
"_id" : ObjectId("5a7f5374337e8d2b97ff2ffe"),
"playerId" : "A"
},
"isMatch" : 0.0
}
/* 4 */
{
"_id" : ObjectId("5a7f534b337e8d2b97ff2ffb"),
"players" : {
"uuid" : "B"
},
"field" : {
"_id" : ObjectId("5a7f539b337e8d2b97ff2fff"),
"playerId" : "B"
},
"isMatch" : 1.0
}
/* 5 */
{
"_id" : ObjectId("5a7f5356337e8d2b97ff2ffc"),
"players" : {
"uuid" : "A"
},
"field" : {
"_id" : ObjectId("5a7f5374337e8d2b97ff2ffe"),
"playerId" : "A"
},
"isMatch" : 1.0
}
/* 6 */
{
"_id" : ObjectId("5a7f5356337e8d2b97ff2ffc"),
"players" : {
"uuid" : "A"
},
"field" : {
"_id" : ObjectId("5a7f53a8337e8d2b97ff3000"),
"playerId" : "C"
},
"isMatch" : 0.0
}
/* 7 */
{
"_id" : ObjectId("5a7f5356337e8d2b97ff2ffc"),
"players" : {
"uuid" : "C"
},
"field" : {
"_id" : ObjectId("5a7f5374337e8d2b97ff2ffe"),
"playerId" : "A"
},
"isMatch" : 0.0
}
/* 8 */
{
"_id" : ObjectId("5a7f5356337e8d2b97ff2ffc"),
"players" : {
"uuid" : "C"
},
"field" : {
"_id" : ObjectId("5a7f53a8337e8d2b97ff3000"),
"playerId" : "C"
},
"isMatch" : 1.0
}
abstract document in collection md given:
{
vals : [{
uid : string,
val : string|array
}]
}
the following, partially correct aggregation is given:
db.md.aggregate(
{ $unwind : "$vals" },
{ $match : { "vals.uid" : { $in : ["x", "y"] } } },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
that may lead to the following result:
"result" : [
{
"_id" : {
"uid" : "x"
},
"vals" : [
[
"24ad52bc-c414-4349-8f3a-24fd5520428e",
"e29dec2f-57d2-43dc-818a-1a6a9ec1cc64"
],
[
"5879b7a4-b564-433e-9a3e-49998dd60b67",
"24ad52bc-c414-4349-8f3a-24fd5520428e"
]
]
},
{
"_id" : {
"uid" : "y"
},
"vals" : [
"0da5fcaa-8d7e-428b-8a84-77c375acea2b",
"1721cc92-c4ee-4a19-9b2f-8247aa53cfe1",
"5ac71a9e-70bd-49d7-a596-d317b17e4491"
]
}
]
as x is the result aggregated on documents containing an array rather than a string, the vals in the result is an array of arrays. what i look for in this case is to have a flattened array (like the result for y).
for me it seems like that what i want to achieve by one aggegration call only, is currently not supported by any given operation as e.g. a type conversion cannot be done or unwind expectes in every case an array as input type.
is map reduce the only option i have? if not ... any hints?
thanks!
You can use the aggregation to do the computation you want without changing your schema (though you might consider changing your schema simply to make queries and aggregations of this field easier to write).
I broke up the pipeline into multiple steps for readability. I also simplified your document slightly, again for readability.
Sample input:
> db.md.find().pretty()
{
"_id" : ObjectId("512f65c6a31a92aae2a214a3"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a4"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a5"),
"uid" : "y",
"val" : "string2"
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a6"),
"uid" : "y",
"val" : [
"string3",
"string4"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a7"),
"uid" : "z",
"val" : [
"string"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a8"),
"uid" : "y",
"val" : [
"string1",
"string2"
]
}
Pipeline stages:
> project1 = {
"$project" : {
"uid" : 1,
"val" : 1,
"isArray" : {
"$cond" : [
{
"$eq" : [
"$val.0",
[ ]
]
},
true,
false
]
}
}
}
> project2 = {
"$project" : {
"uid" : 1,
"valA" : {
"$cond" : [
"$isArray",
"$val",
[
null
]
]
},
"valS" : {
"$cond" : [
"$isArray",
null,
"$val"
]
},
"isArray" : 1
}
}
> unwind = { "$unwind" : "$valA" }
> project3 = {
"$project" : {
"_id" : 0,
"uid" : 1,
"val" : {
"$cond" : [
"$isArray",
"$valA",
"$valS"
]
}
}
}
Final aggregation:
> db.md.aggregate(project1, project2, unwind, project3, group)
{
"result" : [
{
"_id" : "z",
"vals" : [
"string"
]
},
{
"_id" : "y",
"vals" : [
"string1",
"string4",
"string3",
"string2"
]
},
{
"_id" : "x",
"vals" : [
"string"
]
}
],
"ok" : 1
}
If you modify your schema using always "vals.val" field as an array field (even when the record contains only one element) you can do it easily as follows:
db.test_col.insert({
vals : [
{
uid : "uuid1",
val : ["value1"]
},
{
uid : "uuid2",
val : ["value2", "value3"]
}]
});
db.test_col.insert(
{
vals : [{
uid : "uuid2",
val : ["value4", "value5"]
}]
});
Using this approach you only need to use two $unwind operations: one unwinds the "parent" array and the second unwinds every "vals.val" value. So, querying like
db.test_col.aggregate(
{ $unwind : "$vals" },
{ $unwind : "$vals.val" },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
You can obtain your expected value:
{
"result" : [
{
"_id" : {
"uid" : "uuid2"
},
"vals" : [
"value5",
"value4",
"value3",
"value2"
]
},
{
"_id" : {
"uid" : "uuid1"
},
"vals" : [
"value1"
]
}
],
"ok" : 1
}
And no, you can't execute this query using your current schema, since $unwind fails when the field isn't an array field.