I have a collection of documents like
doc:{
"_id":6,
item1:"something"
item2:[
{
subitem1:value1,
subitem2:value2
},
{
subitem1:value3,
subitem2:value4
}
]
}
And i want to insert a field with the data of the two other and then delete them to have this
doc:{
"_id":6,
item1:"something"
item2:[
{
subitem:{field:value1,field2:value2}
},
{
subitem:{field:value3,field2:value4}
}
]
}
I have to update all the document of the collection with 1 script.
I have tried several things like $set, $push but nothing works (with no error when executed)
My last script is
db.docs.find({}).update.forEach(
function(doc){
doc.item2.forEach(
function(item){
{ $set : {item.subitem = {field:item.subitem1,field2:item.subitem2}}}
}
)
db.docs.save(doc);
}
,false,true)
This doesn't generate error but do nothing.
And i didn't even found how to delete a field.
Please help me !
You should be looping with .bulkWrite() to commit the updates. The main thing to note here is what you are actually iterating, which is the collection items as well as the members of the target array to transform.
And either blow array the entire array with it's replacement:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2 = doc.item2.map(function(el) {
return { "subitem": { "field1": el.subitem1, "field2": el.subitem2 } };
});
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "item2": doc.item2 } }
}
});
// Test outside array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
Or preferably use positional matches as updates:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2.forEach(function(item) {
var updoc = { "subitem": { "field1": item.subitem1, "field2": item.subitem2 } };
ops.push({
"updateOne": {
"filter": {
"_id": doc._id,
"item2": {
"$elemMatch": {
"subitem1": item.subitem1,
"subitem2": item.subitem2
}
}
},
"update": { "$set": { "item2.$": updoc } }
}
});
// Test inside the array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
The reason why the latter case is better is the writes are actually atomic for each element so in high volume environments you would not get conflicting writes from other processes.
That's the speedy and safe way to transform your current array content. The first way will run a bit faster but I really would not recommend it on a live system. The second will still be very quick, but since it's updating one array element at a time in operations then there is a bit more to do.
In both cases the actual "wire communication" with the server happens only one in one thousand operations, so this removes the overhead of sending the request and waiting for the response of every single update.
Related
This code splits the nicknames field in the cities collection into an array, but it's way to slow:
db.cities
.find()
.snapshot()
.forEach(function(el) {
el.nicknames = el.nicknames.split('->')
db.cities.save(el)
})
This code also splits the nicknames field in the cities collection into an array and it's much faster, but it temporarily causes the database size to double which crashes my database.
db.cities.aggregate(
[
{ "$addFields": {
"nicknames": { "$split": [ "$nicknames", "->" ] }
}},
{ "$out": "cities" }
]
)
This seems like a trivial database task. There has to be a better way... right?
Yes, take advantage of the bulkWrite method for efficient bulk updates. You can split up the update operation into batches for large collections.
Using the cursor from the aggregate operation (minus the last $out pipeline), you can compose the bulk update operations as:
let bulkUpdateOps = [];
const cursor = db.cities.aggregate([
{ "$project": { "nicknames": { "$split": [ "$nicknames", "->" ] } } }
]);
cursor.forEach(doc => {
const { _id, nicknames } = doc;
bulkUpdateOps.push({
"updateOne": {
"filter": { _id },
"update": { "$set": { nicknames } },
"upsert": true
}
});
if (bulkUpdateOps.length === 1000) {
db.cities.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
db.cities.bulkWrite(bulkUpdateOps);
}
This query is doing the job fine :
db.collection.update(
{ "_id": oneIdProvided },
{ $inc: { "field": 5 } },{ upsert: true }
)
Now I would like to do the same operation multiple time with different IDs, I thought the good way was to use $in and therefore I tried :
db.collection.update(
{ "_id": { $in: oneArrayOfIds} },
{ $inc: { "field": 5 } },{ upsert: true }
)
Problem is : if one of the provided ID in the array is not existing in the collection, a new document is created (which is what I want) but will be attributed an automatic ID, not using the ID I provided and was looking for.
One solution I see could be to do first an insert query with my array of ID (those already existing would not be modified) and then doing my update query with upsert: false
Do you see a way of doing that in only one query ?
We can do this by performing multiple write operations using the bulkWrite() method.
function* range(start, end, step) {
for (let val=start; val<end; val+=step)
yield val
}
let oneArrayOfIds; // For example [1, 2, 3, 4]
let bulkOp = oneArrayOfIds.map( id => {
return {
"updateOne": {
"filter": { "_id": id },
"update": { "$set": { "field": 5 } },
"upsert": true
}
};
});
const limit = 1000;
const len = bulkOp.length;
let chunks = [];
if (len > 1000) {
for (let index of range(0, len, limit)) {
db.collection.bulkWrite(bulkOp.slice(index, index+limit));
}
} else {
db.collection.bulkWrite(bulkOp);
}
{
"_id" : 1,
"users" : 2329255
},
{
"_id" :2,
"users" : 2638831
}
how to update all documents users field divided by 100.
result will be
{
"_id" : 1,
"users" : 23292.55
},
{
"_id" : 2,
"users" : 26388.31
}
db.coll.update({}, {$set: {'users': {'$divide': ['$users', 100]}}})
----its not working
Try below query:
db.coll.find().snapshot().forEach(
function (e) {
e.users = e.users/100;
// save the updated document
db.coll.save(e);
}
)
Above query will change/update the data in DB. If you want to fetch records with devided value then use $ project:
db.coll.aggregate(
[
{ $project: { users: { $divide: [ "$users", 100 ] } } }
]
)
this will not update the data but will return you desired value.
Use as per your requirement.
The $divide operator is only valid for the aggregate() function, not the update() function. What you want to do is use the aggregate() method to create a computed field, iterate the results from
the aggregate() cursor to create bulk update operations that you can send to the server in one request, rather that sending each update request with each item in the result.
The following example demonstrates this:
var bulkUpdateOps = [];
db.coll.aggregate([
{ "$match": { "users": { "$exists": true } } }
{
"$project": {
"computed_field": {
"$divide": ["$users", 100]
}
}
}
]).forEach(function(doc){
bulkUpdateOps.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "users": doc.computed_field } }
}
});
if (bulkUpdateOps.length === 500) {
db.coll.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) db.coll.bulkWrite(bulkUpdateOps);
Or for MongoDB 2.6.x and 3.0.x releases, use this version of Bulk operations:
var bulk = db.coll.initializeUnorderedBulkOp(),
counter = 0;
db.coll.aggregate([
{ "$match": { "users": { "$exists": true } } }
{
"$project": {
"computed_field": {
"$divide": ["$users", 100]
}
}
}
]).forEach(function(doc) {
bulk.find({ "_id": doc._id })
.updateOne({ "$set": { "users": doc.computed_field } });
if (counter % 500 === 0) {
bulk.execute();
bulk = db.coll.initializeUnorderedBulkOp();
}
});
if (counter % 500 !== 0 ) bulk.execute();
The Bulk operations API in both cases will help reduce the IO load on the server by sending the requests only once in every 500 documents in the collection to process.
I want to sum up all the EMP_SALARY = (9000)2000+3000+4000 and I'm trying to update the value 9000 to total_employee_salary attribute.How can I do it in mongo shell.Can anyone please help me out regarding this ...
{
"_id" : ObjectId("571898dbc000041fe0b921eb"),
"ORGANIZATION" : "abc",
"TOTAL_EMPLOYEES" : 10,
"TOTAL_EMPLOYEES_SALARY" : 0,
"employees" : [
{
"EMP_NAME" : "vijay",
"EMP_SALARY" : 2000,
},
{
"EMP_NAME" : "vishnu",
"EMP_SALARY" : 3000,
},
{
"EMP_NAME" : "vishal",
"EMP_SALARY" : 4000,
}
]
}
If you are doing this in bulk for your collection, then the best way to do this is iterate with .bulkWrite() to write back:
var ops = [];
db.collection.find().forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": {
"TOTAL_EMPLOYEE_SALARY": Array.sum(doc.employees.map(function(emp) {
return emp.EMP_SALARY
}))
}
}
}
});
if ( ops.length == 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
}
For "super safe" code though, you probably should be using $inc on iteration of each array element instead:
var ops = [];
db.collection.find().forEach(function(doc) {
doc.employees.forEach(function(emp) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$inc": {
"TOTAL_EMPLOYEE_SALARY": emp.EMP_SALARY
}
}
}
});
if ( ops.length == 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
});
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
}
In earlier shell releases you do it using the "bulk" operations builder directly:
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find().forEach(function(doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$set": {
"TOTAL_EMPLOYEE_SALARY": Array.sum(doc.employees.map(function(emp) {
return emp.EMP_SALARY
}))
}
});
count++;
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if ( count % 1000 != 0 ) {
bulk.execute();
}
But what you really should be doing in all instances is updating the mongodb-shell package on your system, regardless of the server version used. A modern shell should really be updated just as with a modern API version with your programming language of choice.
You need to iterate documents anyway in order to update each one, so you might as well just sum the content from the array by reading each document.
Just for trivia sake, in modern MongoDB releases the $sum operator works both as an accumulator as well as now adding items in an array. So now you can do this:
db.collection.aggregate([
{ "$project": {
"TOTAL_EMPLOYEE_SALARY": {
"$sum": "$employees.EMP_SALARY"
}
}}
])
And that will give the total of the array in each document.
In earlier versions than MongoDB 3.2 though, you need to $unwind the array and $group instead:
db.collection.aggregate([
{ "$unwind": "$employees" },
{ "$group": {
"_id": "$_id",
"TOTAL_EMPLOYEE_SALARY": { "$sum": "$employees.EMP_SALARY" }
}}
])
db.collection.aggregate([
{
$group: {
_id: null,
"sum": {
$sum: "$employees.EMP_SALARY"
}
}
}
])
I have a documents in mongodb, containing some array. Now I need to have a field containing a quantity of items of this array. So I need to update documents adding this field.
Simply I thought this will work:
db.myDocument.update({
"itemsTotal": {
$exists: false
},
"items": {
$exists: true
}
}, {
$set: {
itemsTotal: {
$size: "$items"
}
}
}, {
multi: true
})
But it completes with "not okForStorage".
Also I tried to make an aggregation, but it throws exception:
"errmsg" : "exception: invalid operator '$size'",
"code" : 15999,
"ok" : 0
What is a best solution and what I do wrong? I'm starting to think about writing java tool for calculation totals and updating documents with it.
You can use the .aggregate() method to $project your documents and return the $size of the items array. After that you will need to loop through your aggregation result using the .forEach loop and $set the itemTotal field for your document using "Bulk" operation for maximum efficiency.
var bulkOp = db.myDocument.initializeUnorderedBulkOp();
var count = 0;
db.myDocument.aggregate([
{ "$match": {
"itemsTotal": { "$exists": false } ,
"items": { "$exists": true }
}},
{ "$project": { "itemsTotal": { "$size": "$items" } } }
]).forEach(function(doc) {
bulkOp.find({ "_id": doc._id }).updateOne({
"$set": { "itemsTotal": doc.itemsTotal }
});
count++;
if (count % 200 === 0) {
// Execute per 200 operations and re-init
bulkOp.execute();
bulkOp = db.myDocument.initializeUnorderedBulkOp();
}
})
// Clean up queues
if (count > 0) {
bulkOp.execute();
}
You could initialise a Bulk() operations builder to update the document in a loop as follows:
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find("itemsTotal": { "$exists": false },
"items": {
$exists: true
}
).forEach(function(doc) {
var items_size = doc.items.length;
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "itemsTotal": items_size }
});
count++;
if (count % 100 == 0) {
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
});
if (count % 100 != 0) { bulk.execute(); }
This is much easier starting with MongoDB v3.4, which introduced the $addFields aggregation pipeline operator. We'll also use the $out operator to output the result of the aggregation to the same collection (replacing the existing collection is atomic).
db.myDocuments.aggregate( [
{
$addFields: {
itemsTotal: { $size: "$items" } ,
},
},
{
$out: "myDocuments"
}
] )
WARNING: this solution requires that all documents to have the items field. If some documents don't have it, aggregate will fail with
"The argument to $size must be an array, but was of type: missing"
You might think you could add a $match to the aggregation to filter only documents containing items, but that means all documents not containing items will not be output back to the myDocuments collection, so you'll lose those permanently.