Efficiency of indexed embedded array - mongodb

I am currently evaluating the efficiency of different databases for a use case. In Mongodb, would like to store around 1 million objects with the following structure. Each object will have between 5 and 10 objects in the foo array.
{
name:"my name",
foos:[
{
foo:"...",
bar:"..."
},
{
foo:"...",
bar:"..."
},
{
foo:"...",
bar:"..."
}
]
}
I often need to search for objects which where the foos collection contains an object with a specific property, e.g.:
// mongo collection
[
{
name:'my name',
foos:[
{
foo:'one_foo',
bar:'a_bar'
},
{
foo:'two_foo',
bar:'b_bar'
}
]
},
{
name:'another name',
foos:[
{
foo:'another foo',
bar:'a_bar'
},
{
foo:'just another foo',
bar:'c_bar'
}
]
}
]
// search (pseudo code)
{ foos: {$elemMatch: {bar: 'c_bar'}} }
// returns
{
name:'another name',
foos:[
{
foo:'another foo',
bar:'a_bar'
},
{
foo:'just another foo',
bar:'c_bar'
}
]
}
Can this efficiently be done with mongo and how should the indexes be set?
I don't want you to evaluate performance for me, just an idea how mongo performs for my use case or how optimization could look like.

MongoDB has documentation explaining how to create indexes on embedded documents, through dot notation:
Dot Notation (Reaching into Objects)
> db.blogposts.findOne()
{ title : "My First Post", author: "Jane",
comments : [{ by: "Abe", text: "First" },
{ by : "Ada", text : "Good post" } ]
}
> db.blogposts.find( { "comments.by" : "Ada" } )
> db.blogposts.ensureIndex( { "comments.by" : 1 } );
As for the performance characteristic... just test it with your dataset.

Related

Index not picked with nested field hierarchy but gets picked in the flatten mode

so i am struggling for 2 weeks on why does not my indexes get picked when i “explain” my queries.
i have this query:
{ “$and”: [
{ "extraProperties.class": "Residential" }, { "extraProperties.type": "Sale" }, { "extraProperties.propertyType": "Condo Apartment" }, { "extraProperties.propertyTypeStyle": "Apartment" } ] }
the above query wont pick this index :
{ “extraProperties.class”:1 , “extraProperties.type” : 1, “extraProperties.propertyType”:1,“extraProperties.propertyTypeStyle”:1}
i have been testing everything these days and finally i decided to flatten the hierarchy and now my query looks like this:
{ “$and”: [
{ “class”: “Residential” }, { “type”: “Sale” }, { “propertyType”: “Condo Apartment” }, { “propertyTypeStyle”: “Apartment” }
] }
now the above query will pick this index :
{ “class”:1 , “type” : 1, “propertyType”:1,“propertyTypeStyle”:1}
could someone explain what the hell is going on there?!?!
explain result:
https://drive.google.com/file/d/1bs_mqO-1FEBHQ_FsBWgP2TQ2jeiQz4q_/view?usp=sharing

Query and Update Child Documents without knowing keys

I have a collection with documents having the following format
{
name: "A",
details : {
matchA: {
comment: "Hello",
score: 5
},
matchI: {
score: 10
},
lastMatch:{
score: 5
}
}
},
{
name: "B",
details : {
match2: {
score: 5
},
match7: {
score: 10
},
firstMatch:{
score: 5
}
}
}
I don't immediatly know the name of the keys that are children of details, they don't follow a known format, there can be different amounts etc.
I would like to write a query which will update the children in such a manner that any subdocument with a score less than 5, gets a new field added (say lowScore: true).
I've looked around a bit and I found $ and $elemMatch, but those only work on arrays. Is there an equivalent for subdocuments? Is there some way of doing it using the aggregation pipeline?
I don't think you can do that using a normal update(). There is a way through the aggregation framework which itself, however, cannot alter any persisted data. So you will need to loop through the results and update your documents individually like e.g. here: Aggregation with update in mongoDB
This is the required query to transform your data into what you need for the subsequent update:
collection.aggregate({
$addFields: {
"details": {
$objectToArray: "$details" // transform "details" into uniform array of key-value pairs
}
}
}, {
$unwind: "$details" // flatten the array created above
}, {
$match: {
"details.v.score": {
$lt: 10 // filter out anything that's not relevant to us
// (please note that I used some other filter than the one you wanted "score less than 5" to get some results using your sample data
},
"details.v.lowScore": { // this filter is not really required but it seems to make sense to check for the presence of the field that you want to create in case you run the query repeatedly
$exists: false
}
}
}, {
$project: {
"fieldsToUpdate": "$details.k" // ...by populating the "details" array again
}
})
Running this query returns:
/* 1 */
{
"_id" : ObjectId("59cc0b6afab2f8c9e1404641"),
"fieldsToUpdate" : "matchA"
}
/* 2 */
{
"_id" : ObjectId("59cc0b6afab2f8c9e1404641"),
"fieldsToUpdate" : "lastMatch"
}
/* 3 */
{
"_id" : ObjectId("59cc0b6afab2f8c9e1404643"),
"fieldsToUpdate" : "match2"
}
/* 4 */
{
"_id" : ObjectId("59cc0b6afab2f8c9e1404643"),
"fieldsToUpdate" : "firstMatch"
}
You could then $set your new field "lowScore" using a cursor as described in the linked answer above.

Tricky MongoDB search challenge

I have a tricky mongoDB problem that I have never encountered.
The Documents:
The documents in my collection have a search object containing named keys and array values. The keys are named after one of eight categorys and the corresponding value is an array containing items from that category.
{
_id: "bRtjhGNQ3eNqTiKWa",
/* */
search :{
usage: ["accounting"],
test: ["knowledgetest", "feedback"]
},
test: {
type:"list",
vals: [
{name:'knowledgetest', showName: 'Wissenstest'},
{name:'feedback', showName: '360 Feedback'},
]
},
usage: {
type:"list",
vals: [
{name:'accounting', showName: 'Accounting'},
]
}
},
{
_id: "7bgvegeKZNXkKzuXs",
/* */
search :{
usage: ["recruiting"],
test: ["intelligence", "feedback"]
},
test: {
type:"list",
vals: [
{name:'intelligence', showName: 'Intelligenztest'},
{name:'feedback', showName: '360 Feedback'},
]
},
usage: {
type:"list",
vals: [
{name:'recruiting', showName: 'Recruiting'},
]
}
},
The Query
The query is an object containing the same category - keys and array - values.
{
usage: ["accounting", "assessment"],
test : ["feedback"]
}
The desired outcome
If the query is empty, I want all documents.
If the query has one category and any number of items, I want all the documents that have all of the items in the specified category.
If the query has more then one category, I want all the documents that have all of the items in all of the specified categorys.
My tries
I tried all kinds of variations of:
XX.find({
'search': {
"$elemMatch": {
'tool': {
"$in" : ['feedback']
}
}
}
No success.
EDIT
Tried: 'search.test': {$all: (query.test ? query.test : [])} which gives me no results if I have nothing selected; the right documents when I am only looking inside the test category; and nothing when I additionally look inside the usage category.
This is at the heart of my app, thus I historically put up a bounty.
let tools = []
const search = {}
for (var q in query) {
if (query.hasOwnProperty(q)) {
if (query[q]) {
search['search.'+q] = {$all: query[q] }
}
}
}
if (Object.keys(query).length > 0) {
tools = ToolsCollection.find(search).fetch()
} else {
tools = ToolsCollection.find({}).fetch()
}
Works like a charm
What I already hinted at in the comment: your document structure does not support efficient and simple searching. I can only guess the reason, but I suspect that you stick to some relational ideas like "schemas" or "normalization" which just don't make sense for a document database.
Without digging deeper into the problem of modeling, I could imagine something like this for your case:
{
_id: "bRtjhGNQ3eNqTiKWa",
/* */
search :{
usage: ["accounting"],
test: ["knowledgetest", "feedback"]
},
test: {
"knowledgetest" : {
"showName": "Wissenstest"
},
"feedback" : {
"showName": "360 Feedback"
}
},
usage: {
"accounting" : {
"values" : [ "knowledgetest", "feedback" ],
"showName" : "Accounting"
}
}
},
{
_id: "7bgvegeKZNXkKzuXs",
/* */
search : {
usage: ["recruiting"],
test: ["intelligence", "feedback"]
},
test: {
"intelligence" : {
showName: 'Intelligenztest'
},
"feedback" : {
showName: '360 Feedback'
}
},
usage: {
"recruiting" : {
"values" : [ "intelligence", "feedback" ],
"showName" : "Recruiting"
}
}
}
Then, a search for "knowledgetest" and "feedback" in "accounting" would be a simple
{ "usage.accounting.values" : { $all : [ "knowledgetest", "feedback"] } }
which can easily be used multiple times in an and condition:
{
{ "usage.accounting.values" : { $all : [ "knowledgetest", "feedback"] } },
{ "usage.anothercategory.values" : { $all [ "knowledgetest", "assessment" ] } }
}
Even the zero-times-case matches your search requirements, because an and-filter with none of these criteria yields {} which is the find-everything filter expression.
Once more, to make it absolutely clear: when using mongo, forget everything you know as "best practice" from the relational world. What you need to consider is: what are your queries, and how can my document model support these queries in an ideal way.

Exclude nested subdocuments in Mongodb without arrays

Here is how data are inserted in a "Products" MongoDB collection (using Meteor):
Products.insert(
{
productOne:
{
publicData:
{
pricePerUnit : 1,
label : "The first product"
},
privateData:
{
test1: "xxxxx",
test2: "xxxxx"
}
},
productTwo:
{
publicData:
{
pricePerUnit : 2,
label : "The second product"
},
privateData:
{
test1: "yyyyy",
test2: "yyyyy"
}
}
}
);
I would like to retrieve all the products, but without the "privateData" subdocuments, to get this:
{
productOne:
{
publicData:
{
pricePerUnit : 1,
label : "The first product"
}
},
productTwo:
{
publicData:
{
pricePerUnit : 2,
label : "The second product"
}
}
}
I tried several things with "$elemMatch" but honnestly I didn't succeed in anything, I have trouble understanding how I am even supposed to do that.
Would anyone have a suggestion? Any help would be greatly appreciated.
Thanks!
Your Query would be something Similar to this
Products.find({},{
fields: {
privateData: 0
}
}
privateData:0 will make sure that the field is omitted.
please Refer https://docs.mongodb.org/manual/tutorial/project-fields-from-query-results/ for more info
if you can use the aggregation framework you can use the $project operator:
db.<colletion_name>.aggregate( { $project: { publicData: 1} } );
And you will get back all of your documents with only the publicData field

MongoDB conditionally $addToSet sub-document in array by specific field

Is there a way to conditionally $addToSet based on a specific key field in a subdocument on an array?
Here's an example of what I mean - given the collection produced by the following sample bootstrap;
cls
db.so.remove();
db.so.insert({
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples"
}
]
});
n defines a unique document key. I only want one entry with the same n value in the array at any one time. So I want to be able to update the pfms array using n so that I end up with just this;
{
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples",
"mState": 1111234
}
]
}
Here's where I am at the moment;
db.so.update({
"Name": "fruitBowl",
},{
// not allowed to do this of course
// "$pull": {
// "pfms": { n: "apples" },
// },
"$addToSet": {
"pfms": {
"$each": [
{
"n": "apples",
"mState": 1111234
}
]
}
}
}
)
Unfortunately, this adds another array element;
db.so.find().toArray();
[
{
"Name" : "fruitBowl",
"_id" : ObjectId("53ecfef5baca2b1079b0f97c"),
"pfms" : [
{
"n" : "apples"
},
{
"n" : "apples",
"mState" : 1111234
}
]
}
]
I need to effectively upsert the apples document matching on n as the unique identifier and just set mState whether or not an entry already exists. It's a shame I can't do a $pull and $addToSet in the same document (I tried).
What I really need here is dictionary semantics, but that's not an option right now, nor is breaking out the document - can anyone come up with another way?
FWIW - the existing format is a result of language/driver serialization, I didn't choose it exactly.
further
I've gotten a little further in the case where I know the array element already exists I can do this;
db.so.update({
"Name": "fruitBowl",
"pfms.n": "apples",
},{
$set: {
"pfms.$.mState": 1111234,
},
}
)
But of course that only works;
for a single array element
as long as I know it exists
The first limitation isn't a disaster, but if I can't effectively upsert or combine $addToSet with the previous $set (which of course I can't) then it the only workarounds I can think of for now mean two DB round-trips.
The $addToSet operator of course requires that the "whole" document being "added to the set" is in fact unique, so you cannot change "part" of the document or otherwise consider it to be a "partial match".
You stumbled on to your best approach using $pull to remove any element with the "key" field that would result in "duplicates", but of course you cannot modify the same path in different update operators like that.
So the closest thing you will get is issuing separate operations but also doing that with the "Bulk Operations API" which is introduced with MongoDB 2.6. This allows both to be sent to the server at the same time for the closest thing to a "contiguous" operations list you will get:
var bulk = db.so.initializeOrderedBulkOp();
bulk.find({ "Name": "fruitBowl", "pfms.n": "apples": }).updateOne({
"$pull": { "pfms": { "n": "apples" } }
});
bulk.find({ "Name": "fruitBowl" }).updateOne({
"$push": { "pfms": { "n": "apples", "state": 1111234 } }
})
bulk.execute();
That pretty much is your best approach if it is not possible or practical to move the elements to another collection and rely on "upserts" and $set in order to have the same functionality but on a collection rather than array.
I have faced the exact same scenario. I was inserting and removing likes from a post.
What I did is, using mongoose findOneAndUpdate function (which is similar to update or findAndModify function in mongodb).
The key concept is
Insert when the field is not present
Delete when the field is present
The insert is
findOneAndUpdate({ _id: theId, 'likes.userId': { $ne: theUserId }},
{ $push: { likes: { userId: theUserId, createdAt: new Date() }}},
{ 'new': true }, function(err, post) { // do the needful });
The delete is
findOneAndUpdate({ _id: theId, 'likes.userId': theUserId},
{ $pull: { likes: { userId: theUserId }}},
{ 'new': true }, function(err, post) { // do the needful });
This makes the whole operation atomic and there are no duplicates with respect to the userId field.
I hope this helpes. If you have any query, feel free to ask.
As far as I know MongoDB now (from v 4.2) allows to use aggregation pipelines for updates.
More or less elegant way to make it work (according to the question) looks like the following:
db.runCommand({
update: "your-collection-name",
updates: [
{
q: {},
u: {
$set: {
"pfms.$[elem]": {
"n":"apples",
"mState": NumberInt(1111234)
}
}
},
arrayFilters: [
{
"elem.n": {
$eq: "apples"
}
}
],
multi: true
}
]
})
In my scenario, The data need to be init when not existed, and update the field If existed, and the data will not be deleted. If the datas have these states, you might want to try the following method.
// Mongoose, but mostly same as mongodb
// Update the tag to user, If there existed one.
const user = await UserModel.findOneAndUpdate(
{
user: userId,
'tags.name': tag_name,
},
{
$set: {
'tags.$.description': tag_description,
},
}
)
.lean()
.exec();
// Add a default tag to user
if (user == null) {
await UserModel.findOneAndUpdate(
{
user: userId,
},
{
$push: {
tags: new Tag({
name: tag_name,
description: tag_description,
}),
},
}
);
}
This is the most clean and fast method in the scenario.
As a business analyst , I had the same problem and hopefully I have a solution to this after hours of investigation.
// The customer document:
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK"
}
]
}
// The problem : I want to insert dateField "01.01.2016" to customer documents where customerCodes subdocument has a document with code "YK" but does not have dateField. The final document must be as follows :
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK" ,
"dateField" : "01.01.2016"
}
]
}
// The solution : the solution code is in three steps :
// PART 1 - Find the customers with customerCodes "YK" but without dateField
// PART 2 - Find the index of the subdocument with "YK" in customerCodes list.
// PART 3 - Insert the value into the document
// Here is the code
// PART 1
var myCursor = db.customers.find({ customerCodes:{$elemMatch:{code:"YK", dateField:{ $exists:false} }}});
// PART 2
myCursor.forEach(function(customer){
if(customer.customerCodes != null )
{
var size = customer.customerCodes.length;
if( size > 0 )
{
var iFoundTheIndexOfSubDocument= -1;
var index = 0;
customer.customerCodes.forEach( function(clazz)
{
if( clazz.code == "YK" && clazz.changeDate == null )
{
iFoundTheIndexOfSubDocument = index;
}
index++;
})
// PART 3
// What happens here is : If i found the indice of the
// "YK" subdocument, I create "updates" document which
// corresponds to the new data to be inserted`
//
if( iFoundTheIndexOfSubDocument != -1 )
{
var toSet = "customerCodes."+ iFoundTheIndexOfSubDocument +".dateField";
var updates = {};
updates[toSet] = "01.01.2016";
db.customers.update({ "id" : customer.id } , { $set: updates });
// This statement is actually interpreted like this :
// db.customers.update({ "id" : "1212" } ,{ $set: customerCodes.0.dateField : "01.01.2016" });
}
}
}
});
Have a nice day !