MongoDB: Update a field of an item in array with matching another field of that item - mongodb

I have a data structure like this:
We have some centers. A center has some switches. A switch has some ports.
{
"_id" : ObjectId("561ad881755a021904c00fb5"),
"Name" : "center1",
"Switches" : [
{
"Ports" : [
{
"PortNumber" : 2,
"Status" : "Empty"
},
{
"PortNumber" : 5,
"Status" : "Used"
},
{
"PortNumber" : 7,
"Status" : "Used"
}
]
}
]
}
All I want is to write an Update query to change the Status of the port that it's PortNumber is 5 to "Empty".
I can update it when I know the array index of the port (here array index is 1) with this query:
db.colection.update(
// query
{
_id: ObjectId("561ad881755a021904c00fb5")
},
// update
{
$set : { "Switches.0.Ports.1.Status" : "Empty" }
}
);
But I don't know the array index of that Port.
Thanks for help.

You would normally do this using the positional operator $, as described in the answer to this question:
Update field in exact element array in MongoDB
Unfortunately, right now the positional operator only supports one array level deep of matching.
There is a JIRA ticket for the sort of behavior that you want: https://jira.mongodb.org/browse/SERVER-831
In case you can make Switches into an object instead, you could do something like this:
db.colection.update(
{
_id: ObjectId("561ad881755a021904c00fb5"),
"Switch.Ports.PortNumber": 5
},
{
$set: {
"Switch.Ports.$.Status": "Empty"
}
}
)

Since you don't know the array index of the Port, I would suggest you dynamically create the $set conditions on the fly i.e. something which would help you get the indexes for the objects and then modify accordingly, then consider using MapReduce.
Currently this seems to be not possible using the aggregation framework. There is an unresolved open JIRA issue linked to it. However, a workaround is possible with MapReduce. The basic idea with MapReduce is that it uses JavaScript as its query language but this tends to be fairly slower than the aggregation framework and should not be used for real-time data analysis.
In your MapReduce operation, you need to define a couple of steps i.e. the mapping step (which maps an operation into every document in the collection, and the operation can either do nothing or emit some object with keys and projected values) and reducing step (which takes the list of emitted values and reduces it to a single element).
For the map step, you ideally would want to get for every document in the collection, the index for each Switches and Ports array fields and another key that contains the $set keys.
Your reduce step would be a function (which does nothing) simply defined as var reduce = function() {};
The final step in your MapReduce operation will then create a separate collection Switches that contains the emitted Switches array object along with a field with the $set conditions. This collection can be updated periodically when you run the MapReduce operation on the original collection.
Altogether, this MapReduce method would look like:
var map = function(){
for(var i = 0; i < this.Switches.length; i++){
for(var j = 0; j < this.Switches[i].Ports.length; j++){
emit(
{
"_id": this._id,
"switch_index": i,
"port_index": j
},
{
"index": j,
"Switches": this.Switches[i],
"Port": this.Switches[i].Ports[j],
"update": {
"PortNumber": "Switches." + i.toString() + ".Ports." + j.toString() + ".PortNumber",
"Status": "Switches." + i.toString() + ".Ports." + j.toString() + ".Status"
}
}
);
}
}
};
var reduce = function(){};
db.centers.mapReduce(
map,
reduce,
{
"out": {
"replace": "switches"
}
}
);
Querying the output collection Switches from the MapReduce operation will typically give you the result:
db.switches.findOne()
Sample Output:
{
"_id" : {
"_id" : ObjectId("561ad881755a021904c00fb5"),
"switch_index" : 0,
"port_index" : 1
},
"value" : {
"index" : 1,
"Switches" : {
"Ports" : [
{
"PortNumber" : 2,
"Status" : "Empty"
},
{
"PortNumber" : 5,
"Status" : "Used"
},
{
"PortNumber" : 7,
"Status" : "Used"
}
]
},
"Port" : {
"PortNumber" : 5,
"Status" : "Used"
},
"update" : {
"PortNumber" : "Switches.0.Ports.1.PortNumber",
"Status" : "Switches.0.Ports.1.Status"
}
}
}
You can then use the cursor from the db.switches.find() method to iterate over and update your collection accordingly:
var newStatus = "Empty";
var cur = db.switches.find({ "value.Port.PortNumber": 5 });
// Iterate through results and update using the update query object set dynamically by using the array-index syntax.
while (cur.hasNext()) {
var doc = cur.next();
var update = { "$set": {} };
// set the update query object
update["$set"][doc.value.update.Status] = newStatus;
db.centers.update(
{
"_id": doc._id._id,
"Switches.Ports.PortNumber": 5
},
update
);
};

Related

MongoDB Calculate Values from Two Arrays, Sort and Limit

I have a MongoDB database storing float arrays. Assume a collection of documents in the following format:
{
"id" : 0,
"vals" : [ 0.8, 0.2, 0.5 ]
}
Having a query array, e.g., with values [ 0.1, 0.3, 0.4 ], I would like to compute for all elements in the collection a distance (e.g., sum of differences; for the given document and query it would be computed by abs(0.8 - 0.1) + abs(0.2 - 0.3) + abs(0.5 - 0.4) = 0.9).
I tried to use the aggregation function of MongoDB to achieve this, but I can't work out how to iterate over the array. (I am not using the built-in geo operations of MongoDB, as the arrays can be rather long)
I also need to sort the results and limit to the top 100, so calculation after reading the data is not desired.
Current Processing is mapReduce
If you need to execute this on the server and sort the top results and just keep the top 100, then you could use mapReduce for this like so:
db.test.mapReduce(
function() {
var input = [0.1,0.3,0.4];
var value = Array.sum(this.vals.map(function(el,idx) {
return Math.abs( el - input[idx] )
}));
emit(null,{ "output": [{ "_id": this._id, "value": value }]});
},
function(key,values) {
var output = [];
values.forEach(function(value) {
value.output.forEach(function(item) {
output.push(item);
});
});
output.sort(function(a,b) {
return a.value < b.value;
});
return { "output": output.slice(0,100) };
},
{ "out": { "inline": 1 } }
)
So the mapper function does the calculation and output's everything under the same key so all results are sent to the reducer. The end output is going to be contained in an array in a single output document, so it is both important that all results are emitted with the same key value and that the output of each emit is itself an array so mapReduce can work properly.
The sorting and reduction is done in the reducer itself, as each emitted document is inspected the elements are put into a single tempory array, sorted, and the top results are returned.
That is important, and just the reason why the emitter produces this as an array even if a single element at first. MapReduce works by processing results in "chunks", so even if all emitted documents have the same key, they are not all processed at once. Rather the reducer puts it's results back into the queue of emitted results to be reduced until there is only a single document left for that particular key.
I'm restricting the "slice" output here to 10 for brevity of listing, and including the stats to make a point, as the 100 reduce cycles called on this 10000 sample can be seen:
{
"results" : [
{
"_id" : null,
"value" : {
"output" : [
{
"_id" : ObjectId("56558d93138303848b496cd4"),
"value" : 2.2
},
{
"_id" : ObjectId("56558d96138303848b49906e"),
"value" : 2.2
},
{
"_id" : ObjectId("56558d93138303848b496d9a"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d93138303848b496ef2"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d94138303848b497861"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d94138303848b497b58"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d94138303848b497ba5"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d94138303848b497c43"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d95138303848b49842b"),
"value" : 2.1
},
{
"_id" : ObjectId("56558d96138303848b498db4"),
"value" : 2.1
}
]
}
}
],
"timeMillis" : 1758,
"counts" : {
"input" : 10000,
"emit" : 10000,
"reduce" : 100,
"output" : 1
},
"ok" : 1
}
So this is a single document output, in the specific mapReduce format, where the "value" contains an element which is an array of the sorted and limitted result.
Future Processing is Aggregate
As of writing, the current latest stable release of MongoDB is 3.0, and this lacks the functionality to make your operation possible. But the upcoming 3.2 release introduces new operators that make this possible:
db.test.aggregate([
{ "$unwind": { "path": "$vals", "includeArrayIndex": "index" }},
{ "$group": {
"_id": "$_id",
"result": {
"$sum": {
"$abs": {
"$subtract": [
"$vals",
{ "$arrayElemAt": [ { "$literal": [0.1,0.3,0.4] }, "$index" ] }
]
}
}
}
}},
{ "$sort": { "result": -1 } },
{ "$limit": 100 }
])
Also limitting to the same 10 results for brevity, you get output like this:
{ "_id" : ObjectId("56558d96138303848b49906e"), "result" : 2.2 }
{ "_id" : ObjectId("56558d93138303848b496cd4"), "result" : 2.2 }
{ "_id" : ObjectId("56558d96138303848b498e31"), "result" : 2.1 }
{ "_id" : ObjectId("56558d94138303848b497c43"), "result" : 2.1 }
{ "_id" : ObjectId("56558d94138303848b497861"), "result" : 2.1 }
{ "_id" : ObjectId("56558d96138303848b499037"), "result" : 2.1 }
{ "_id" : ObjectId("56558d96138303848b498db4"), "result" : 2.1 }
{ "_id" : ObjectId("56558d93138303848b496ef2"), "result" : 2.1 }
{ "_id" : ObjectId("56558d93138303848b496d9a"), "result" : 2.1 }
{ "_id" : ObjectId("56558d96138303848b499182"), "result" : 2.1 }
This is made possible largely due to $unwind being modified to project a field in results that contains the array index, and also due to $arrayElemAt which is a new operator that can extract an array element as a singular value from a provided index.
This allows the "look-up" of values by index position from your input array in order to apply the math to each element. The input array is facilitated by the existing $literal operator so $arrayElemAt does not complain and recongizes it as an array, ( seems to be a small bug at present, as other array functions don't have the problem with direct input ) and gets the appropriate matching index value by using the "index" field produced by $unwind for comparison.
The math is done by $subtract and of course another new operator in $abs to meet your functionality. Also since it was necessary to unwind the array in the first place, all of this is done inside a $group stage accumulating all array members per document and applying the addition of entries via the $sum accumulator.
Finally all result documents are processed with $sort and then the $limit is applied to just return the top results.
Summary
Even with the new functionallity about to be availble to the aggregation framework for MongoDB it is debatable which approach is actually more efficient for results. This is largely due to there still being a need to $unwind the array content, which effectively produces a copy of each document per array member in the pipeline to be processed, and that generally causes an overhead.
So whilst mapReduce is the only present way to do this until a new release, it may actually outperform the aggregation statement depending on the amount of data to be processed, and despite the fact that the aggregation framework works on native coded operators rather than translated JavaScript operations.
As with all things, testing is always recommended to see which case suits your purposes better and which gives the best performance for your expected processing.
Sample
Of course the expected result for the sample document provided in the question is 0.9 by the math applied. But just for my testing purposes, here is a short listing used to generate some sample data that I wanted to at least verify the mapReduce code was working as it should:
var bulk = db.test.initializeUnorderedBulkOp();
var x = 10000;
while ( x-- ) {
var vals = [0,0,0];
vals = vals.map(function(val) {
return Math.round((Math.random()*10),1)/10;
});
bulk.insert({ "vals": vals });
if ( x % 1000 == 0) {
bulk.execute();
bulk = db.test.initializeUnorderedBulkOp();
}
}
The arrays are totally random single decimal point values, so there is not a lot of distribution in the listed results I gave as sample output.

MongoDB: Delete item inside multiple objects

Im trying to delete an item inside an object categorized inside multiple keys.
for example, deleting ObjectId("c") from every items section
This is the structure:
{
"somefield" : "value",
"somefield2" : "value",
"objects" : {
"/" : {
"color" : "#112233",
"items" : [
ObjectId("c"),
ObjectId("b")
]
},
"/folder1" : {
"color" : "#112233",
"items" : [
ObjectId("c"),
ObjectId("d")
]
},
"/folder2" : {
"color" : "112233",
"items" : []
},
"/testing" : {
"color" : "112233",
"items" : [
ObjectId("c"),
ObjectId("f")
]
}
}
}
I tried with pull and unset like:
db.getCollection('col').update(
{},
{ $unset: { 'objects.$.items': ObjectId("c") } },
{ multi: true }
)
and
db.getCollection('col').update(
{},
{ "objects": {"items": { $pull: [ObjectId("c")] } } },
{ multi: true }
)
Any idea? thanks!
The problem here is largely with the current structure of your document. MongoDB cannot "traverse paths" in an efficient way, and your structure currently has an "Object" ( 'objects' ) which has named "keys". What this means is that accessing "items" within each "key" needs the explicit path to each key to be able to see that element. There are no wildcards here:
db.getCollection("coll").find({ "objects./.items": Object("c") })
And that is the basic principle to "match" something as you cannot do it "across all keys" without resulting to JavaScript code, which is really bad.
Change the structure. Rather than "object keys", use "arrays" instead, like this:
{
"somefield" : "value",
"somefield2" : "value",
"objects" : [
{
"path": "/",
"color" : "#112233",
"items" : [
"c",
"b"
]
},
{
"path": "/folder1",
"color" : "#112233",
"items" : [
"c",
"d"
]
},
{
"path": "/folder2",
"color" : "112233",
"items" : []
},
{
"path": "/testing",
"color" : "112233",
"items" : [
"c",
"f"
]
}
]
}
It's much more flexible in the long run, and also allows you to "index" fields like "path" for use in query matching.
However, it's not going to help you much here, as even with a consistent query path, i.e:
db.getCollection("coll").find({ "objects.items": Object("c") })
Which is better, but the problem still persists that is it not possible to $pull from multiple sources ( whether object or array ) in the same singular operation. And that is augmented with "never" across multiple documents.
So the best you will ever get here is basically "trying" the "muti-update" concept until the options are exhausted and there is nothing left to "update". With the "modified" structure presented then you can do this:
var bulk = db.getCollection("coll").initializeOrderedBulkOp(),
count = 0,
modified = 1;
while ( modified != 0 ) {
bulk.find({ "objects.items": "c"}).update({
"$pull": { "objects.$.items": "c" }
});
count++;
var result = bulk.execute();
bulk = db.getCollection("coll").initializeOrderedBulkOp();
modified = result.nModified;
}
print("iterated: " + count);
That uses the "Bulk" operations API ( actually all shell methods now use it anyway ) to basically get a "better write response" that gives you useful information about what actually happened on the "update" attempt.
The point is that is basically "loops" and tries to match a document based on the "query" portion of the update and then tries to $pull from the matched array index an item from the "inner array" that matches the conditions given to $pull ( which acts as "query" in itself, just upon the array items ).
On each iteration you basically get the "nModified" value from the response, and when this is finally 0, then the operation is complete.
On the sample ( restructured ) given then this will take 4 iterations, being one for each "outer" array member. The updates are "multi" as implied by bulk .update() ( as opposed to .updateOne() ) already, and therefore the "maximum" iterations is determined by the "maximum" array elements present in the "outer" array across the whole collection. So if there is "one" document out of "one thousand" that has 20 entries then the iterations will be 20, and just because that document still has something that can be matched and modified.
The alternate case under your current structure does not bear mentioning. It is just plain "impossible" without:
Retrieving the document individually
Extracting the present keys
Running an individual $pull for the array under that key
Get next document, rinse and repeat
So "multi" is "right out" as an option and cannot be done, without some some possible "foreknowledge" of the possible "keys" under the "object" key in the document.
So please "change your structure" and be aware of the general limitations available.
You cannot possibly do this in "one" update, but at least if the maximum "array entries" your document has was "4", then it is better to do "four" updates over a "thousand" documents than the "four thousand" that would be required otherwise.
Also. Please do not "obfuscate" the ObjectId value in posts. People like to "copy/paste" code and data to test for themselves. Using something like ObjectId("c") which is not a valid ObjectId value would clearly cause errors, and therefore is not practical for people to use.
Do what "I did" in the listing, and if you want to abstract/obfuscate, then do it with "plain values" just as I have shown.
One approach that you could take is using JavaScript native methods like reduce to create the documents that will be used in the update.
You essentially need an operation like the following:
var itemId = ObjectId("55ba3a983857192828978fec");
db.col.find().forEach(function(doc) {
var update = {
"object./.items": itemId,
"object./folder1.items": itemId,
"object./folder2.items": itemId,
"object./testing.items": itemId
};
db.col.update(
{ "_id": doc._id },
{
"$pull": update
}
);
})
Thus to create the update object would require the reduce method that converts an array into an object:
var update = Object.getOwnPropertyNames(doc.objects).reduce(function(o, v, i) {
o["objects." + v + ".items"] = itemId;
return o;
}, {});
Overall, you would need to use the Bulk operations to achieve the above update:
var bulk = db.col.initializeUnorderedBulkOp(),
itemId = ObjectId("55ba3a983857192828978fec"),
count = 0;
db.col.find().forEach(function(doc) {
var update = Object.getOwnPropertyNames(doc.objects).reduce(function(o, v, i) {
o["objects." + v + ".items"] = itemId;
return o;
}, {});
bulk.find({ "_id": doc._id }).updateOne({
"$pull": update
})
count++;
if (count % 1000 == 0) {
bulk.execute();
bulk = db.col.initializeUnorderedBulkOp();
}
})
if (count % 1000 != 0) { bulk.execute(); }

MongoDB: convert array elements to fields

I am on MongoDB 2.6.0, and I have a collection with documents like this:
{ "mid" : 1021,
"day" : 298,
"data":[
{"ts" : 1,"kwh" : 0.017},
{"ts" : 2,"kwh" : 0.018},
{"ts" : 3,"kwh" : 0.019},
... ] }
I would like to flatten the array elements into individual fields like this:
{ "mid" : 1021,
"day" : 298,
"ts1" : 0.017,
"ts2" : 0.018,
"ts3" : 0.019,
...
}
This looks like it should be possible with the Aggregation framework, but I really can't figure out how to re-project the data array's "kwh" elements based on the value of "ts".
Anybody know how to do this?
Thanks for any help!
More of a mapReduce task really:
db.collection.mapReduce(
function () {
var copy = this;
var tdata = copy.data;
delete copy.data;
tdata.forEach(function(data) {
var key = "ts" + data.ts;
copy[key] = data.kwh;
});
var id = copy._id;
delete copy["_id"];
emit( id, copy );
},
function(){},
{ "out": { "inline": 1 } }
)
At present you cannot flexibly specify the value of a "key" using the aggregation framework. You wold have to explicitly name each key through projection in order to get the output you want.
MapReduce does not give "exactly" the same output as you want do to how it works. But it is the closest you will get without explicitly naming the keys.

Struggling with a map-reduce in MongoDb

Problem
I have a document with a _id and a Collection of Answers I am trying to write a map-reduce function to sum the total score of answers for each id.
Document
/* 0 */
{
"_id" : ObjectId("527b6ba88d251d58a18f3f0a"),
"Answers" : [{
"Score" : 2
}, {
"Score" : 0
}, {
"Score" : 2
}, {
"Score" : 2
}]
}
Here is the Map-Reduce I though would be correct reading the documentation
Map
function() {
this.Answers.forEach(function(val)
{
emit(this._id, val.Score);
});
}
also tried this
function() {
for (var i = 0; i < this.Answers; i++)
{
emit(this._id, this.Answers[i].Score);
}
}
Reduce
function(key, values)
{
return Array.sum(values);
}
I am getting no information back with this, but it does appear to be processing it takes 2-5 seconds to return. I guess I am not understanding something about map-reduce.
Also I am using MongoVUE to access MongoDB.
EDIT
I just ran my map reduce through the console and got this output
{
"results" : [ ],
"timeMillis" : 2506,
"counts" : {
"input" : 1655,
"emit" : 0,
"reduce" : 0,
"output" : 0
},
"ok" : 1,
}
so it's my map function that's incorrect I guess as nothing was emitted.
EDIT 2
Updated document with output from mongovue
In JavaScript loops adding the length property allows you to iterate by the count of the items in the array, so you cna change your second attempt to:
function() {
for (var i = 0; i < this.Answers.length; i++)
{
emit(this._id, this.Answers[i].Score);
}
}
It should also be noted that your reduce can run multiple times per key, specifically it can repeat every 101 rows, technically this shouldn't matter since you are summing up the array values and the previous reduce value will be passed as an array element in the new reduce so it should work just fine; however, good to keep in mind.
I think the 'this' variable is not what you expect in the .forEach() function in your map method. Try this instead;
function() {
var row = this;
this.Answers.forEach(function(val)
{
emit(row._id, val.Score);
});
}

Ordering for a todo list with MongoDB

I'm attempting to create my own todo list using Javascript, Python and MongoDB. I'm getting stuck on how to handle the task ordering.
My current idea is to have an order field in each task document and when the order changes on the client I would grab the task list from the db and reorder each task individually/sequentially. This seems awkward because large todo lists would mean large amount of queries. Is there a way to update a field in multiple documents sequentially?
I'm also looking for advice as to whether this is the best way to do this. I want to be able to maintain the todo list order but maybe I'm going about it the wrong way.
{
"_id" : ObjectId("50a658f2cace55034c68ce95"),
"order" : 1,
"title" : "task1",
"complete" : 0
}
{
"_id" : ObjectId("50a658fecace55034c68ce96"),
"order" : 2,
"title" : "task2",
"complete" : 1
}
{
"_id" : ObjectId("50a65907cace55034c68ce97"),
"order" : 3,
"title" : "task3",
"complete" : 1
}
{
"_id" : ObjectId("50a65911cace55034c68ce98"),
"order" : 4,
"title" : "task4",
"complete" : 0
}
{
"_id" : ObjectId("50a65919cace55034c68ce99"),
"order" : 5,
"title" : "task5",
"complete" : 0
}
Mongo is very very fast with queries, you should not be as concerned with performance as if you were using a full featured relational database. If you want to be prudent, just create a todo list of 1k items and try it out, it should be pretty instant.
for (var i = 0; i < orderedListOfIds.length; i++)
{
db.collection.update({ '_id': orderedListOfIds[i] }, { $set: { order:i } })
}
then
db.collection.find( { } ).sort( { order: 1 } )
Yes, mongo allows for updating multiple documents. Just use a modifier operation and multi=True. For example, this increments order by one for all documents with order greater than five:
todos.update({'order':{'$gt':5}}, {'$inc':{'order':1}}, multi=True)
As to the best way, usually it's better to use a "natural" ordering (by name, date, priority etc) rather than create a fake field just for that.
I'm doing something similar. I added a field ind to my list items. Here's how I move a list item to a new location:
moveItem: function (sourceIndex, targetIndex) {
var id = Items.findOne({ind:sourceIndex})._id;
var movinUp = targetIndex > sourceIndex;
shift = movinUp ? -1 : 1;
lowerIndex = Math.min(sourceIndex, targetIndex);
lowerIndex += movinUp ? 1 : 0;
upperIndex = Math.max(sourceIndex, targetIndex);
upperIndex -= movinUp ? 0 : 1;
console.log("Shifting items from "+lowerIndex+" to "+upperIndex+" by "+shift+".");
Items.update({ind: {$gte: lowerIndex,$lte: upperIndex}}, {$inc: {ind:shift}},{multi:true});
Items.update(id, {$set: {ind:targetIndex}});
}
if you're using native promises (es6) in mongoose mongoose.Promise = global.Promise you can do the following to batch:
function batchUpdate(res, req, next){
let ids = req.body.ids
let items = []
for(let i = 0; i < ids.length; i++)
items.push(db.collection.findOneAndUpdate({ _id:ids[i] }, { $set: { order:i } }))
Promise.all(items)
.then(() => res.status(200).send())
.catch(next)
}