Mongodb recursive query - mongodb

I have the following schema in my taxon collection :
{
"_id": 1,
"na": [ "root_1",
"root_2",
"root_3" ],
"pa": 1
},{
"_id": 2,
"na": [ "name_1",
"name_2",
"name_3"],
"pa": 1
},{
"_id": 4,
"na": [ "otherName_1",
"otherName_2",
"otherName_3"],
"pa": 2
}
Each document is related to another by the parent field, which correspond to the _id of it's parent.
I would like to perform a recursive search to get the following result:
{ "_id": 4,
"nameList": [ "otherName_1",
"name_1",
"root_1"]
}
From document with a certain _id, get the first item of na array of each parent until document with _id: 1 is reached
I currently get this result by performing X queries (one by parent document, here 3 for example), but I'm pretty sure that this can be achieved using a single query. I already looked at the new $graphLookup operator, but couldn't manage to get my way with it...
Is it possible to achieve this in a single query using MongoDB 3.4.1?
Edit
I would run this for 50 documents each time, so the optimal solution would be to combine everything in a single query
for example, it would looks like
var listId = [ 4, 128, 553, 2728, ...];
var cursor = db.taxon.aggregate([
{$match:
{ _id: {$in: listId}}
}, ...
)];
and would output :
[{ "_id": 4,
"nameList": [ "otherName_1",
"name_1",
"root_1"]
}, { "_id": 128,
"nameList": [ "some_other_ame_1",
"some_name_1",
"root_1"]
}, { "_id": 553,
"nameList": [ "last_other_ame_1",
"last_name_1",
"root_1"]
} ... ]
try it online: mongoplayground.net/p/Gfp-L03Ub0Y

You can try below aggregation.
Stages $match - $graphLookup - $project.
$reduce to pick the first element from the each of $graphLookup nameList's na array.
db.taxon.aggregate([{
$match: {
_id: {
$in: listId
}
}
}, {
$graphLookup: {
from: "taxon",
startWith: "$_id",
connectFromField: "pa",
connectToField: "_id",
as: "nameList"
}
}, {
$project: {
nameList: {
$reduce: {
input: "$nameList",
initialValue: [],
in: {
"$concatArrays": ["$$value", {
$slice: ["$$this.na", 1]
}]
}
}
}
}
}])

Related

Get unique array elements in MongoDB by "master" element

I have mongodb rows with array element which looks like this:
{"data" : [1, 111]}
{"data" : [222, 1]}
{"data" : [1, 333]}
{"data" : [2, 444]}
How to get unique array elements by "master" element. So for example "master" element is 1 I should get result: [111, 222, 333] and not 444, because that array does not contain 1. If master element would be 2, the result should be: [444]
I tried something like this aggregation. Is it correct? Are there any performance issues? What indecies should be on table to make it fast?
[
{$match: {"data": 1}},
{$project : {a : '$data'}},
{$unwind: '$a'},
{$group: {_id: 'a', items: {$addToSet: '$a'}}}
]
You can use Aggregation framework:
$match to filter all documents that have "master" key in the "data" array.
$group to concatenate "data" arrays of all documents in one property called "result" and $filter to filter our "master" element from "data" arrays. ("result" will be an array that will have all documents "data" arrays as elements).
$reduce with $concatArrays to concatenate all "data" arrays inside "result" property.
db.collection.aggregate([
{
"$match": {
data: 1
}
},
{
"$group": {
"_id": null,
result: {
$addToSet: {
"$filter": {
"input": "$data",
"cond": {
"$ne": [
"$$this",
1
]
}
}
}
}
}
},
{
"$project": {
result: {
$reduce: {
input: "$result",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
}
])
Be aware that the "master" element has to be dynamically populated in first stage for $match pipeline, as well as in the second stage when performing filtering with $filter operator.
Here is the working example: https://mongoplayground.net/p/EtYwOqAE-PE
I think this works also
Test code here
keeps only the arrays that contain the master key
unwind them
group by {"_id" 1} is like group by null, all make it true, just added to have the master key as _id (on the group $$REMOVE system variable is used to not add the master key)
Query (where you see 1 put your master key, or a variable)
db.collection.aggregate([
{
"$match": {
"data": 1
}
},
{
"$unwind": {
"path": "$data"
}
},
{
"$group": {
"_id": 1,
"members": {
"$addToSet": {
"$cond": [
{
"$ne": [
"$data",
1
]
},
"$data",
"$$REMOVE"
]
}
}
}
}
])

Pretty $lookup on collection - mongoDB

I have two collection:
Competition
{
"_id": "326",
signed_up": [
{"_id": "00001","category": ["First"], "status": true}]
}
and Playing
{
"_id": "6076e504db319b11c077d473",
"competition_id": "326",
"player": {"player_id": "00001","handicap": 6},
"totalScore": 6
}
I want to add playing --> totalScore on competition.signed_up array, based on player_id field:
{
"_id": "326",
signed_up": [
{"_id": "00001","category": ["First"], "status": true, "totalScore": 6]
}
I do not know how to do...
I'm not telling you this is the optimal way, but it seems to work...
Let's start out with the data. I've added one player to the competition, just to make it a little easier to see that things works as expected:
db.competition.insertOne({
"_id": "326",
"signed_up": [{
"_id": "00001",
"category": ["First"],
"status": true
}, {
"_id": "00002",
"category": ["First"],
"status": true
}]
})
db.playing.insertMany([
{
"competition_id": "326",
"player": {
"playing_id": "00001"
},
"totalScore": 6
},
{
"competition_id": "326",
"player": {
"playing_id": "00002"
},
"totalScore": 2
}
]);
Now for the aggregation...
db.competition.aggregate([
// Even though the [documentation](https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#use--lookup-with-an-array) states that unwinding is no longer necessary,
// I'm not sure if that includes arrays of subdocuments or only arrays of primitives. So I've chosen to unwind anyway...
{
$unwind: "$signed_up"
},
// => { "_id": "326", "signed_up": { "_id": "00001", ....} }
// now we have each player in it's own document and can easily lookup the score from playing collection
{
$lookup: {
from: 'playing',
localField: 'signed_up._id',
foreignField: 'player.playing_id',
as: 'player'
}
},
// => { "_id": "326", "signed_up": {...}, "player": [{ competition_id": "326"...}, ..]}
// now we have the matching competition documents as an array on each document.
// But we know there will only be one match and don't really care for the array,
// so we have to do some gymnastics to get the data we want where we want it
{
$project: {
"signed_up": {
$let: {
vars: {
player: { $arrayElemAt: [ "$player", 0 ] }
},
in: {
$mergeObjects: [
"$signed_up",
{ "totalScore": "$$player.totalScore" }
]
}
}
}
}
},
// => { "_id": "326", "signed_up": { "_id": "00001", .... , "totalScore": 6 } }
// Now we're pretty much done, except that we need to group the documents back
// into the original competition documents
{
$group: {
_id: "$_id",
signed_up: {
$push: "$signed_up"
}
}
}
// => { "_id": "326", "signed_up": [ { "_id": "00001", ....}, {"_id": "00002", ...} ] }
// And that completes the pipeline.
]);
I see that you have the id from the competition document also on the playing document, so I suspect that you need an additional check on the lookup to make sure you get the correct match. The way the code I have works, is that if you have more than one competition, you will get all the competitions for a player added to the playing array after the lookup.
If you take a look at the example Specify Multiple Join Conditions with $lookup in the documentation, you see how you can change the $lookup stage to do a more precise match on the target documents by using a pipeline on the target collection. It also shows how you can include a projection in that pipeline to only return the data that you really want.
Edit
Take a look at the following alternative lookup step:
{
$lookup: {
from: 'playing',
let: { playerid: "$signed_up._id", compid: "$_id" },
pipeline: [
{ $match: {
$expr: {
$and: [
{ $eq: ["$player.playing_id","$$playerid" ] },
{ $eq: ["$competition_id", "$$compid" ] }
]
}
}
},
{ $project: {
_id: 0,
"totalScore": 1
}
}
],
as: 'player'
}
}
This stores the players id and competition id from the current document into two variables. Then it uses those two variables in a pipeline run against the other collection. In addition to the $match to select the right player/competition document, it also includes a $project to get rid of the other fields on the playing documents. It will still return an array of one object, but it might save some bytes of memory usage...

Getting the $Max value within a nested array

I'm attempting to extract the highest value from an child Array within Object, that is within a parent Array - in a single MongoDB document.
The child Array is called data contained within the list parent Array, where i'm trying to extract the highest number, when compared to the rest of the same values.
I've tried using $Group and $max (example below) among other things - however not getting much success. - I am getting an array returned with all the number values: [2,3]
How do I search through the list Array and data Array to return the highest number?
Expected Output for the below example: {output: 3}
Example in MongoPlayground: https://mongoplayground.net/p/qw9Kz_WVYiS
Mongo DB Setup and Documents
db={
"groups": [
{
"_id": ObjectId("602ed22af42c404096407dda"),
"groupName": "Name"
}
],
"inventory": [
{
"_id": ObjectId("602ed22af42c404096407ddc"),
"linkedGroup": ObjectId("602ed22af42c404096407dda"),
"list": [
{
"_id": ObjectId("602eeb0621a11045638b7082"),
"data": {
"number": 2
},
},
{
"_id": ObjectId("602eec75c37147459ed7b12c"),
"data": {
"number": 3
}
}
]
}
]
}
Query
db.groups.aggregate([
{
"$lookup": {
"from": "inventory",
"localField": "_id",
"foreignField": "linkedGroup",
"as": "inventory_links"
}
},
{
$group: {
_id: 1,
output: {
$max: "$inventory_links.list.data.number"
},
},
}
])
$reduce to find the maximum. With your query, you can add other stages,
{
$addFields: {
_id: 1,
inventory_links: {"$arrayElemAt": ["$inventory_links",0]}
}
},
{
$project: {
output: {
$reduce: {
input: "$inventory_links.list",
initialValue: 0,
in: {
$cond: [
{$gte: [ "$$this.data.number","$$value"]},
"$$this.data.number",
"$$value"
]
}
}
}
}
}
Working Mongo playground

Different Fields Multiplication in MongoDB

Can we multiple two different fields from different collections in mongoDB?
any help will be highly appreciated...
Yes, you can using the Aggregation Pipeline $multiply operator. https://docs.mongodb.com/manual/reference/operator/aggregation/multiply/
What you want to do is join two collections together using $lookup https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/. In this case, I'll join the accounts and transactions collections on the account_id field.
Then we can project the fields we want to multiply. In this case, I'm getting the first element in the account array, which represents the account document I'm joining from the accounts collection.
Finally, I can multiply the two fields together.
[{
$lookup: {
from: 'accounts',
localField: 'account_id',
foreignField: 'account_id',
as: 'account'
}
}, {
$project: {
account: {
$arrayElemAt: ["$account", 0]
},
transaction_count: "$transaction_count",
}
}, {
$project: {
product: {
$multiply: ["$transaction_count", "$account.limit"]
}
}
}]
To reproduce my solution above, create a free cluster in Atlas (https://www.mongodb.com/cloud/atlas) and then load the sample data. Navigate to the Cluster's Collections. Then navigate to the sample_analytics database and the transactions collection. Then navigate to the Aggregation tab. Here you can create an Aggregation Pipeline stage by stage. It's incredibly helpful so you can see the output of each stage as you build the next. Below is a screenshot of the Aggregation Pipeline I described in my solution above.
If you don't have experience with the Aggregation Pipeline, I highly recommend MongoDB University's free course: https://university.mongodb.com/courses/M121/about
MongoDB aggregation operations allows us join two collections with $lookup method and compute field operation (i.e $multiply)
Given
"collection": [
{
id: 1,
"total": 5
},
{
id: 2,
"total": 2
}
],
"collection2": [
{
collId: 1,
"total": 3
},
{
collId: 2,
"total": 4
}
]
db.collection.aggregate([
{
$lookup: {
from: "collection2",
let: {
col_id: "$id",
col_total: "$total",
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$collId",
"$$col_id"
]
}
}
},
{
$project: {
summary: {
$multiply: [
"$total",
"$$col_total"
]
}
}
}
],
as: "result"
}
},
{
$addFields: {
result: {
$let: {
vars: {
tmp: {
$arrayElemAt: [
"$result",
0
]
}
},
in: "$$tmp.summary"
}
}
}
}
])
MongoPlayground
Result
[
{
"_id": ObjectId("5a934e000102030405000000"),
"id": 1,
"result": 15,
"total": 5
},
{
"_id": ObjectId("5a934e000102030405000001"),
"id": 2,
"result": 8,
"total": 2
}
]

MongoDB multidimensional array projection

I just started learning MongoDB and can't find a solution for my problem.
Got that document:
> db.test.insert({"name" : "Anika", "arr" : [ [11, 22],[33,44] ] })
Please note the "arr" field which is a multidimensional array.
Now I'm looking for a query that returns only the value of arr[0][1] which is 22. I tried to achieve that by using $slice, however I don't know how to address the second dimension with that.
> db.test.find({},{_id:0,"arr":{$slice: [0,1]}})
{ "name" : "ha", "arr" : [ [ 11, 22 ] ] }
I also tried
> db.test.find({},{_id:0,"arr":{$slice: [0,1][1,1]}})
{ "name" : "ha", "arr" : [ [ 11, 22 ] ] }
The desired output would be either
22
or
{"arr":[[22]]}
Thank you
EDIT:
After reading the comments I think that I've simplified the example data too much and I have to provide more information:
There are many more documents in the collection like that one that
I've provided. But they all have the same structure.
There are more array elements than just two
In the real world the array contains really long texts (500kb-1mb),
so it is very expansive to transmit the whole data to the client.
Before the aggregation I will do a query by the 'name' field. Just
skipped that in the example for the sake of simplicity.
The target indexes are variable, so sometimes I need to know the
value of arr[0][1], the next time it is arr[1][4]
example data:
> db.test.insert({"name" : "Olivia", "arr" : [ [11, 22, 33, 44],[55,66,77,88],[99] ] })
> db.test.insert({"name" : "Walter", "arr" : [ [11], [22, 33, 44],[55,66,77,88],[99] ] })
> db.test.insert({"name" : "Astrid", "arr" : [ [11, 22, 33, 44],[55,66],[77,88],[99] ] })
> db.test.insert({"name" : "Peter", "arr" : [ [11, 22, 33, 44],[55,66,77,88],[99] ] })
example query:
> db.test.find({name:"Olivia"},{"arr:"...})
You can use the aggregation framework:
db.test.aggregate([
{ $unwind: '$arr' },
{ $limit: 1 },
{ $project: { _id: 0, arr: 1 } },
{ $unwind: '$arr' },
{ $skip: 1 },
{ $limit: 1 }
])
Returns:
{ "arr": 22 }
Edit: The original poster has modified my solution to suit his needs and came up with the following:
db.test.aggregate([
{ $match: { name:"Olivia" } },
{ $project: { _id: 0,arr: 1 } },
{ $unwind: '$arr' },
{ $skip: 1 },
{ $limit:1 },
{ $unwind: "$arr" },
{ $skip: 2 },
{ $limit: 1 }
])
This query will result in { arr: 77 } given the extended data provided by the OP. Note that $skip and $limit are needed to select the right elements in the array hierarchy.
The $slice form you ask for does not do multi-dimentional arrays. Each array is considered individually, and is therefore not supported that way by the current $slice.
As such it is actually done a lot shorter on indexed "first" and "last" values than has been suggested using .aggregate(), and presently:
db.test.aggregate([
{ "$unwind": "$arr" },
{ "$group": {
"_id": "$_id",
"arr": { "$first": "$arr" }
}},
{ "$unwind": "$arr" },
{ "$group": {
"_id": "$_id",
"arr": { "$last": "$arr" }
}}
])
But in future releases of MongoDB ( currently works in development branch 3.18 as of writing ) you have $arrayElemAt as an operator for the aggregation framework which works like this:
db.test.aggregate([
{ "$project": {
"arr": {
"$arrayElemAt": [
{ "$arrayElemAt": [ "$arr", 0 ] },
1
]
}
}}
])
Both basically come to the same { "arr": 22 } result, though the future available form works quite flexibly on array index values, rather than first and last.