Create granularity with flat docs in MongoDB - mongodb

First, I don't know if my method of storing is the good one but I tried like this.
I want to create a form (in front) from dynamical data and create a select form from theses.
I have a collection in mongo where each field refer to an enum.
{
{
_id: "xxx",
"Parent":"ParentEnum1",
"firstChild": "firstChildEnum2",
"secondChild": "secondChildEnum3",
"thirdChild":"thirdChildEnum1",
},
{
_id: "xxx",
"Parent":"ParentEnum1",
"firstChild": "firstChildEnum1",
"secondChild": "secondChildEnum3",
"thirdChild":"thirdChildEnum1",
},
{
_id: "xxx",
"Parent":"ParentEnum2",
"firstChild": "firstChildEnum2",
"secondChild": "secondChildEnum5",
"thirdChild":"thirdChildEnum8,
},
{
_id: "xxx",
"Parent":"ParentEnum2",
"firstChild": "firstChildEnum2",
"secondChild": "secondChildEnum1",
"thirdChild": null,
},
{
_id: "xxx",
"Parent":"ParentEnum4",
"firstChild": "firstChildEnum4",
"secondChild": "secondChildEnum1",
"thirdChild":"thirdChildEnum5,
},
{
_id: "xxx",
"Parent":"ParentEnum1",
"firstChild": "firstChildEnum4",
"secondChild": "secondChildEnum5",
"thirdChild":"thirdChildEnum7,
},
{
...
}
}
from that documents, I want to obtain a hierarchical structure like this:
{
"ParentEnum1":{
firstChildEnum4:{
secondChildEnum5:[
thirdChildEnum7,
]
},
firstChildEnum1:{
secondChildEnum3:[
thirdChildEnum1,
]
},
firstChildEnum2:{
secondChildEnum3:[
thirdChildEnum1,
]
}
},
"ParentEnum2":{
"firstChildEnum2":{
"secondChildEnum5":[
"thirdChildEnum8"
],
"secondChildEnum1":[]
}
},
"ParentEnum4":{
"firstChildEnum4":{
"secondChildEnum5":[
"thirdChildEnum7"
]
}
}
}
For that I tried to use aggregation group, but I can only get the first granularity with a $group and a $addToSet one the firstChild field.
I think I can do better with $bucket but I tried with no success (Maybe I didn't use it properly)
I don't know if I what I want to do is possible, then I am open to any other suggestions.
Do you have an Idea of a pipeline operator to use if it is possible?

Might be there will be some more efficient ways, but this is the limited for 4 levels as per your requirement,
$gorup by first top 3 levels and make array of third level field
$group by first top 2 levels and make array of second level as k(key) and v(value) format
$group by second 1 top level and make array of first level and convert second level from array to object using $objectToArray
convert second level from array to object using $arrayToObject, and make array of first level and convert to object using $arrayToObject,
$replaceRoot to replace first level object to root
db.collection.aggregate([
{
$group: {
_id: {
Parent: "$Parent",
firstChild: "$firstChild",
secondChild: "$secondChild"
},
thirdChild: { $push: "$thirdChild" }
}
},
{
$group: {
_id: {
Parent: "$_id.Parent",
firstChild: "$_id.firstChild"
},
secondChild: {
$push: { k: "$_id.secondChild", v: "$thirdChild" }
}
}
},
{
$group: {
_id: "$_id.Parent",
firstChild: {
$push: { k: "$_id.firstChild", v: { $arrayToObject: "$secondChild" } }
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: [[
{ k: "$_id", v: { $arrayToObject: "$firstChild" } }
]]
}
}
}
])
Playground

Related

MongoDB sorting does not work with inner array

I'm trying to query specific fields in my document and sort them by one of the fields, however, the engine seems to completely ignore the sort.
I use the query:
db.symbols.find({_id:'AAPL'}, {'income_statement.annual.totalRevenue':1,'income_statement.annual.fiscalDateEnding':1}).sort({'income_statement.annual.totalRevenue': 1})
This is the output:
[
{
_id: 'AAPL',
income_statement: {
annual: [
{
fiscalDateEnding: '2021-09-30',
totalRevenue: '363172000000'
},
{
fiscalDateEnding: '2020-09-30',
totalRevenue: '271642000000'
},
{
fiscalDateEnding: '2019-09-30',
totalRevenue: '256598000000'
},
{
fiscalDateEnding: '2018-09-30',
totalRevenue: '265595000000'
},
{
fiscalDateEnding: '2017-09-30',
totalRevenue: '229234000000'
}
]
}
}
]
I would expect to have the entries sorted by fiscalDateEnding, starting with 2017-09-30 ascending.
However, the order is fixed, even if I use -1 for sorting.
Any ideas?
The sort you are using is for the ordering of documents in the result set. This is different from the ordering of array elements inside the document.
For your case, if you are using a newer version of MongoDB (5.2+), you can use the $sortArray.
db.symbols.aggregate([
{
$project: {
_id: 1,
annual: {
$sortArray: {
input: "$income_statement.annual",
sortBy: {
fiscalDateEnding: 1
}
}
}
}
}
])
If you are using older version of MongoDB, you can do the followings to perform the sorting.
db.collection.aggregate([
{
"$unwind": "$income_statement.annual"
},
{
$sort: {
"income_statement.annual.fiscalDateEnding": 1
}
},
{
$group: {
_id: "$_id",
annual: {
$push: "$income_statement.annual"
}
}
},
{
"$project": {
_id: 1,
income_statement: {
annual: "$annual"
}
}
}
])
Here is the Mongo Playground for your reference.

MongoDB - How to write a nested group aggregation query

I have a collection in this format:
{
"place":"land",
"animal":"Tiger",
"name":"xxx"
},
{
"place":"land",
"animal":"Lion",
"name":"yyy"
}
I want to result to be something like this:
{
"place":"land".
"animals":{"Lion":"yyy", "Tiger":"xxx"}
}
I wrote the below query. I think there needs to be another group stage but not able to write it.
db.collection.aggregate({
'$group': {
'_id':{'place':'$place', 'animal':'$animal'},
'animalNames': {'$addToSet':'$name'}
}
})
What changes need to be made to get the required result?
$group - Group by animals. Push objects with { k: "animal", v: "name" } type into animals array.
$project - Decorate output document. Convert animals array to key-value pair via $arrayToObject.
db.collection.aggregate([
{
"$group": {
"_id": "$place",
"animals": {
"$push": {
k: "$animal",
v: "$name"
}
}
}
},
{
$project: {
_id: 0,
place: "$_id",
animals: {
"$arrayToObject": "$animals"
}
}
}
])
Sample Mongo Playground
If you are on version >=4.4, a reasonable alternative is to use the $function operator:
db.foo.aggregate([
{$project: {
'income_statement.annual': {
$function: {
body: function(arr) {
return arr.sort().reverse();
},
args: [ "$income_statement.annual" ],
lang: "js"
}}
}}
]);

Add number field in $project mongodb

I have an issue that need to insert index number when get data. First i have this data for example:
[
{
_id : 616efd7e56c9530018e318ac
student : {
name: "Alpha"
email: null
nisn: "0408210001"
gender : "female"
}
},
{
_id : 616efd7e56c9530018e318af
student : {
name: "Beta"
email: null
nisn: "0408210001"
gender : "male"
}
}
]
and then i need the output like this one:
[
{
no:1,
id:616efd7e56c9530018e318ac,
name: "Alpha",
nisn: "0408210001"
},
{
no:2,
id:616efd7e56c9530018e318ac,
name: "Beta",
nisn: "0408210002"
}
]
i have tried this code but almost get what i expected.
{
'$project': {
'_id': 0,
'id': '$_id',
'name': '$student.name',
'nisn': '$student.nisn'
}
}
but still confuse how to add the number of index. Is it available to do it in $project or i have to do it other way? Thank you for the effort to answer.
You can use $unwind which can return an index, like this:
db.collection.aggregate([
{
$group: {
_id: 0,
data: {
$push: {
_id: "$_id",
student: "$student"
}
}
}
},
{
$unwind: {path: "$data", includeArrayIndex: "no"}
},
{
"$project": {
"_id": 0,
"id": "$data._id",
"name": "$data.student.name",
"nisn": "$data.student.nisn",
"no": {"$add": ["$no", 1] }
}
}
])
You can see it works here .
I strongly suggest to use a $match step before these steps, otherwise you will group your entire collection into one document.
You need to run a pipeline with a $setWindowFields stage that allows you to add a new field which returns the position of a document (known as the document number) within a partition. The position number creation is made possible by the $documentNumber operator only available in the $setWindowFields stage.
The partition could be an extra field (which is constant) that can act as the window partition.
The final stage in the pipeline is the $replaceWith step which will promote the student embedded document to the top-level as well as replacing all input documents with the specified document.
Running the following aggregation will yield the desired results:
db.collection.aggregate([
{ $addFields: { _partition: 'students' }},
{ $setWindowFields: {
partitionBy: '$_partition',
sortBy: { _id: -1 },
output: { no: { $documentNumber: {} } }
} },
{ $replaceWith: {
$mergeObjects: [
{ id: '$_id', no: '$no' },
'$student'
]
} }
])

mongodb : find documents that contains the most a specific kind of property

I have a collection "MyCollection" where each document could have 0,1 or many properties named "propertyX" where X is the nth time the property appears in the document.
For example, my collection could looks like this :
{
"_id":ObjectId("XXXX")
"property1":xxxxxx
"property2":xxxxxx
"property3":xxxxxx
}
{
"_id":ObjectId("XXXX")
"property1":xxxxxx
"property2":xxxxxx
"property3":xxxxxx
"property4":xxxxxx
"property5":xxxxxx
}
{
"_id":ObjectId("XXXX")
"property1":xxxxxx
"property2":xxxxxx
"property3":xxxxxx
"property4":xxxxxx
"property5":xxxxxx
"property6":xxxxxx
"property7":xxxxxx
}
I'm trying to sort documents with a mongo query by the number of propertyX they had, so in my example, the third object must be at the top of my result (7 properties), then the second one (5 properties), then the first one (only 3 properties). What I've done for the moment, is implementing it in javascript (using mongo driver) and do a count of keys with the Object.Keys(myObject).filter(key=>key.startsWith('property')) but this is taking way too much time. Is there a way to do it directly with a mongo query, thanks
You can achieve this by using the aggregation tool.
First create a new property with the number of properties in you document, so you can use it to sort on a later stage:
db.getCollection("MyCollection").aggregate([
{
$addFields: {
properties: { $objectToArray: '$$ROOT' },
}
}
]);
At this stage your documents on the pipeline would look something like this:
{
"_id":ObjectId("XXXX")
"property1":xxxxxx
"property2":xxxxxx
"property3":xxxxxx
"properties: [
{ k: "_id", v: ObjectId("XXXX")},
{ k: "property1", v: xxxxxx },
{ k: "property2", v: xxxxxx },
{ k: "property3", v: xxxxxx },
]
}
Using the $size operator you can get the length of this array:
...
{
$addFields: {
properties: { $size: { $objectToArray: '$$ROOT' }},
}
}
...
So now you would have a property that shows the number of keys in your document:
{
"_id":ObjectId("XXXX"),
"property1":xxxxxx,
"property2":xxxxxx,
"property3":xxxxxx,
"properties: 4,
}
Which would make it possible to sort the documents by properties number:
...
{
$addFields: {
properties: { $size: { $objectToArray: '$$ROOT' }},
}
},
{
$sort: { properties: -1 }
}
...
To finish you can get rid of the property properties with $unset:
...
{
$addFields: {
properties: { $size: { $objectToArray: '$$ROOT' }},
}
},
{
$sort: { properties: -1 }
},
{
$unset: 'properties'
},
You would end up with the following:
[
{
"_id":ObjectId("XXXX"),
"property1":xxxxxx,
"property2":xxxxxx,
"property3":xxxxxx,
"property4":xxxxxx,
"property5":xxxxxx,
"property6":xxxxxx,
"property7":xxxxxx,
},
{
"_id":ObjectId("XXXX"),
"property1":xxxxxx,
"property2":xxxxxx,
"property3":xxxxxx,
"property4":xxxxxx,
"property5":xxxxxx,
},
{
"_id":ObjectId("XXXX"),
"property1":xxxxxx,
"property2":xxxxxx,
"property3":xxxxxx,
}
]

MongoDB aggregation: How to get the index of a document in a collection depending sorted by a document property

Assume I have a collection with millions of documents. Below is a sample of how the documents look like
[
{ _id:"1a1", points:[2,3,5,6] },
{ _id:"1a2", points:[2,6] },
{ _id:"1a3", points:[3,5,6] },
{ _id:"1b1", points:[1,5,6] },
{ _id:"1c1", points:[5,6] },
// ... more documents
]
I want to query a document by _id and return a document that looks like below:
{
_id:"1a1",
totalPoints: 16,
rank: 29
}
I know I can query the whole document, sort by descending order then get the index of the document I want by _id and add one to get its rank. But I have worries about this method.
If the documents are in millions won't this be 'overdoing' it. Querying a whole collection just to get one document? Is there a way to achieve what I want to achieve without querying the whole collection? Or the whole collection has to be involved because of the ranking?
I cannot save them ranked because the points keep on changing. The actual code is more complex but the take away is that I cannot save them ranked.
Total points is the sum of the points in the points array. The rank is calculated by sorting all documents in descending order. The first document becomes rank 1 and so on.
an aggregation pipeline like the following can get the result you want. but how it operates on a collection of millions of documents remains to be seen.
db.collection.aggregate(
[
{
$group: {
_id: null,
docs: {
$push: { _id: '$_id', totalPoints: { $sum: '$points' } }
}
}
},
{
$unwind: '$docs'
},
{
$replaceWith: '$docs'
},
{
$sort: { totalPoints: -1 }
},
{
$group: {
_id: null,
docs: { $push: '$$ROOT' }
}
},
{
$set: {
docs: {
$map: {
input: {
$filter: {
input: '$docs',
as: 'x',
cond: { $eq: ['$$x._id', '1a3'] }
}
},
as: 'xx',
in: {
_id: '$$xx._id',
totalPoints: '$$xx.totalPoints',
rank: {
$add: [{ $indexOfArray: ['$docs._id', '1a3'] }, 1]
}
}
}
}
}
},
{
$unwind: '$docs'
},
{
$replaceWith: '$docs'
}
])