MongoDB: update all document on one field - mongodb

{
"_id" : 1,
"users" : 2329255
},
{
"_id" :2,
"users" : 2638831
}
how to update all documents users field divided by 100.
result will be
{
"_id" : 1,
"users" : 23292.55
},
{
"_id" : 2,
"users" : 26388.31
}
db.coll.update({}, {$set: {'users': {'$divide': ['$users', 100]}}})
----its not working

Try below query:
db.coll.find().snapshot().forEach(
function (e) {
e.users = e.users/100;
// save the updated document
db.coll.save(e);
}
)
Above query will change/update the data in DB. If you want to fetch records with devided value then use $ project:
db.coll.aggregate(
[
{ $project: { users: { $divide: [ "$users", 100 ] } } }
]
)
this will not update the data but will return you desired value.
Use as per your requirement.

The $divide operator is only valid for the aggregate() function, not the update() function. What you want to do is use the aggregate() method to create a computed field, iterate the results from
the aggregate() cursor to create bulk update operations that you can send to the server in one request, rather that sending each update request with each item in the result.
The following example demonstrates this:
var bulkUpdateOps = [];
db.coll.aggregate([
{ "$match": { "users": { "$exists": true } } }
{
"$project": {
"computed_field": {
"$divide": ["$users", 100]
}
}
}
]).forEach(function(doc){
bulkUpdateOps.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "users": doc.computed_field } }
}
});
if (bulkUpdateOps.length === 500) {
db.coll.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) db.coll.bulkWrite(bulkUpdateOps);
Or for MongoDB 2.6.x and 3.0.x releases, use this version of Bulk operations:
var bulk = db.coll.initializeUnorderedBulkOp(),
counter = 0;
db.coll.aggregate([
{ "$match": { "users": { "$exists": true } } }
{
"$project": {
"computed_field": {
"$divide": ["$users", 100]
}
}
}
]).forEach(function(doc) {
bulk.find({ "_id": doc._id })
.updateOne({ "$set": { "users": doc.computed_field } });
if (counter % 500 === 0) {
bulk.execute();
bulk = db.coll.initializeUnorderedBulkOp();
}
});
if (counter % 500 !== 0 ) bulk.execute();
The Bulk operations API in both cases will help reduce the IO load on the server by sending the requests only once in every 500 documents in the collection to process.

Related

Efficient MongoDB query to split a field into an array

This code splits the nicknames field in the cities collection into an array, but it's way to slow:
db.cities
.find()
.snapshot()
.forEach(function(el) {
el.nicknames = el.nicknames.split('->')
db.cities.save(el)
})
This code also splits the nicknames field in the cities collection into an array and it's much faster, but it temporarily causes the database size to double which crashes my database.
db.cities.aggregate(
[
{ "$addFields": {
"nicknames": { "$split": [ "$nicknames", "->" ] }
}},
{ "$out": "cities" }
]
)
This seems like a trivial database task. There has to be a better way... right?
Yes, take advantage of the bulkWrite method for efficient bulk updates. You can split up the update operation into batches for large collections.
Using the cursor from the aggregate operation (minus the last $out pipeline), you can compose the bulk update operations as:
let bulkUpdateOps = [];
const cursor = db.cities.aggregate([
{ "$project": { "nicknames": { "$split": [ "$nicknames", "->" ] } } }
]);
cursor.forEach(doc => {
const { _id, nicknames } = doc;
bulkUpdateOps.push({
"updateOne": {
"filter": { _id },
"update": { "$set": { nicknames } },
"upsert": true
}
});
if (bulkUpdateOps.length === 1000) {
db.cities.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
db.cities.bulkWrite(bulkUpdateOps);
}

How to sum up all the employee salaries and update it to one attribute in the array embedded doc in mongodb

I want to sum up all the EMP_SALARY = (9000)2000+3000+4000 and I'm trying to update the value 9000 to total_employee_salary attribute.How can I do it in mongo shell.Can anyone please help me out regarding this ...
{
"_id" : ObjectId("571898dbc000041fe0b921eb"),
"ORGANIZATION" : "abc",
"TOTAL_EMPLOYEES" : 10,
"TOTAL_EMPLOYEES_SALARY" : 0,
"employees" : [
{
"EMP_NAME" : "vijay",
"EMP_SALARY" : 2000,
},
{
"EMP_NAME" : "vishnu",
"EMP_SALARY" : 3000,
},
{
"EMP_NAME" : "vishal",
"EMP_SALARY" : 4000,
}
]
}
If you are doing this in bulk for your collection, then the best way to do this is iterate with .bulkWrite() to write back:
var ops = [];
db.collection.find().forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": {
"TOTAL_EMPLOYEE_SALARY": Array.sum(doc.employees.map(function(emp) {
return emp.EMP_SALARY
}))
}
}
}
});
if ( ops.length == 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
}
For "super safe" code though, you probably should be using $inc on iteration of each array element instead:
var ops = [];
db.collection.find().forEach(function(doc) {
doc.employees.forEach(function(emp) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$inc": {
"TOTAL_EMPLOYEE_SALARY": emp.EMP_SALARY
}
}
}
});
if ( ops.length == 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
});
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
}
In earlier shell releases you do it using the "bulk" operations builder directly:
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find().forEach(function(doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$set": {
"TOTAL_EMPLOYEE_SALARY": Array.sum(doc.employees.map(function(emp) {
return emp.EMP_SALARY
}))
}
});
count++;
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
if ( count % 1000 != 0 ) {
bulk.execute();
}
But what you really should be doing in all instances is updating the mongodb-shell package on your system, regardless of the server version used. A modern shell should really be updated just as with a modern API version with your programming language of choice.
You need to iterate documents anyway in order to update each one, so you might as well just sum the content from the array by reading each document.
Just for trivia sake, in modern MongoDB releases the $sum operator works both as an accumulator as well as now adding items in an array. So now you can do this:
db.collection.aggregate([
{ "$project": {
"TOTAL_EMPLOYEE_SALARY": {
"$sum": "$employees.EMP_SALARY"
}
}}
])
And that will give the total of the array in each document.
In earlier versions than MongoDB 3.2 though, you need to $unwind the array and $group instead:
db.collection.aggregate([
{ "$unwind": "$employees" },
{ "$group": {
"_id": "$_id",
"TOTAL_EMPLOYEE_SALARY": { "$sum": "$employees.EMP_SALARY" }
}}
])
db.collection.aggregate([
{
$group: {
_id: null,
"sum": {
$sum: "$employees.EMP_SALARY"
}
}
}
])

Transform Multiple Array Elements with update

I have a collection of documents like
doc:{
"_id":6,
item1:"something"
item2:[
{
subitem1:value1,
subitem2:value2
},
{
subitem1:value3,
subitem2:value4
}
]
}
And i want to insert a field with the data of the two other and then delete them to have this
doc:{
"_id":6,
item1:"something"
item2:[
{
subitem:{field:value1,field2:value2}
},
{
subitem:{field:value3,field2:value4}
}
]
}
I have to update all the document of the collection with 1 script.
I have tried several things like $set, $push but nothing works (with no error when executed)
My last script is
db.docs.find({}).update.forEach(
function(doc){
doc.item2.forEach(
function(item){
{ $set : {item.subitem = {field:item.subitem1,field2:item.subitem2}}}
}
)
db.docs.save(doc);
}
,false,true)
This doesn't generate error but do nothing.
And i didn't even found how to delete a field.
Please help me !
You should be looping with .bulkWrite() to commit the updates. The main thing to note here is what you are actually iterating, which is the collection items as well as the members of the target array to transform.
And either blow array the entire array with it's replacement:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2 = doc.item2.map(function(el) {
return { "subitem": { "field1": el.subitem1, "field2": el.subitem2 } };
});
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { "item2": doc.item2 } }
}
});
// Test outside array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
Or preferably use positional matches as updates:
var ops = [];
db.docs.find({
"item2": {
"$elemMatch": {
"subitem1": { "$exists": true },
"subitem2": { "$exists": true }
}
}
}).forEach(function(doc) {
doc.item2.forEach(function(item) {
var updoc = { "subitem": { "field1": item.subitem1, "field2": item.subitem2 } };
ops.push({
"updateOne": {
"filter": {
"_id": doc._id,
"item2": {
"$elemMatch": {
"subitem1": item.subitem1,
"subitem2": item.subitem2
}
}
},
"update": { "$set": { "item2.$": updoc } }
}
});
// Test inside the array looping
if ( ops.length == 1000 ) {
db.docs.bulkWrite(ops);
ops = []
}
});
});
if ( ops.length > 0 )
db.docs.bulkWrite(ops);
The reason why the latter case is better is the writes are actually atomic for each element so in high volume environments you would not get conflicting writes from other processes.
That's the speedy and safe way to transform your current array content. The first way will run a bit faster but I really would not recommend it on a live system. The second will still be very quick, but since it's updating one array element at a time in operations then there is a bit more to do.
In both cases the actual "wire communication" with the server happens only one in one thousand operations, so this removes the overhead of sending the request and waiting for the response of every single update.

Insert field with array size in mongo

I have a documents in mongodb, containing some array. Now I need to have a field containing a quantity of items of this array. So I need to update documents adding this field.
Simply I thought this will work:
db.myDocument.update({
"itemsTotal": {
$exists: false
},
"items": {
$exists: true
}
}, {
$set: {
itemsTotal: {
$size: "$items"
}
}
}, {
multi: true
})
But it completes with "not okForStorage".
Also I tried to make an aggregation, but it throws exception:
"errmsg" : "exception: invalid operator '$size'",
"code" : 15999,
"ok" : 0
What is a best solution and what I do wrong? I'm starting to think about writing java tool for calculation totals and updating documents with it.
You can use the .aggregate() method to $project your documents and return the $size of the items array. After that you will need to loop through your aggregation result using the .forEach loop and $set the itemTotal field for your document using "Bulk" operation for maximum efficiency.
var bulkOp = db.myDocument.initializeUnorderedBulkOp();
var count = 0;
db.myDocument.aggregate([
{ "$match": {
"itemsTotal": { "$exists": false } ,
"items": { "$exists": true }
}},
{ "$project": { "itemsTotal": { "$size": "$items" } } }
]).forEach(function(doc) {
bulkOp.find({ "_id": doc._id }).updateOne({
"$set": { "itemsTotal": doc.itemsTotal }
});
count++;
if (count % 200 === 0) {
// Execute per 200 operations and re-init
bulkOp.execute();
bulkOp = db.myDocument.initializeUnorderedBulkOp();
}
})
// Clean up queues
if (count > 0) {
bulkOp.execute();
}
You could initialise a Bulk() operations builder to update the document in a loop as follows:
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find("itemsTotal": { "$exists": false },
"items": {
$exists: true
}
).forEach(function(doc) {
var items_size = doc.items.length;
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "itemsTotal": items_size }
});
count++;
if (count % 100 == 0) {
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
});
if (count % 100 != 0) { bulk.execute(); }
This is much easier starting with MongoDB v3.4, which introduced the $addFields aggregation pipeline operator. We'll also use the $out operator to output the result of the aggregation to the same collection (replacing the existing collection is atomic).
db.myDocuments.aggregate( [
{
$addFields: {
itemsTotal: { $size: "$items" } ,
},
},
{
$out: "myDocuments"
}
] )
WARNING: this solution requires that all documents to have the items field. If some documents don't have it, aggregate will fail with
"The argument to $size must be an array, but was of type: missing"
You might think you could add a $match to the aggregation to filter only documents containing items, but that means all documents not containing items will not be output back to the myDocuments collection, so you'll lose those permanently.

Sum of Substrings in mongodb

We have field(s) in mongodb which has numbers in string form, values such as "$123,00,89.00" or "1234$" etc
Is it possible to customize $sum accumulators in mongodb, so that, certain processing can be done at each field value while the sum is performed. Such as substring or reg-ex processing etc.
The .mapReduce() method is what you need here. You cannot "cast" values in the aggregation framework from one "type" to another ( with the exception of "to string" or from Date to numeric ).
The JavaScript processing means that you can convert a string into a value for "summing". Somthing like this ( with a bit more work on a "safe" regex for the required "currency" values:
db.collection.mapReduce(
function() {
emit(null, this.amount.replace(/\$|,|\./g,"") / 100 );
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Or with .group() which also uses JavaScript procesing, but is a bit more restrcitive in it's requirements:
db.collection.group({
"key": null,
"reduce": function( curr,result ) {
result.total += curr.amount.replace(/\$|,|\./g,"") /100;
},
"initial": { "total": 0 }
});
So JavaScript processing is your only option as these sorts of operations are not supported in the aggregatation framework.
A number can be a string:
db.junk.aggregate([{ "$project": { "a": { "$substr": [ 1,0,1 ] } } }])
{ "_id" : ObjectId("55a458c567446a4351c804e5"), "a" : "1" }
And a Date can become a number:
db.junk.aggregate([{ "$project": { "a": { "$subtract": [ new Date(), new Date(0) ] } } }])
{ "_id" : ObjectId("55a458c567446a4351c804e5"), "a" : NumberLong("1436835669446") }
But there are no other operators to "cast" a "string" to "numeric" or even anthing to do a Regex replace as shown above.
If you want to use .aggregate() then you need to fix your data into a format that will support it, thus "numeric":
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find({ "amount": /\$|,\./g }).forEach(function(doc) {
doc.amount = doc.amount.replace(/\$|,|\./g,"") /100;
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "amount": doc.amount }
});
count++;
// execute once in 1000 operations
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
});
// clean up queued operations
if ( count % 1000 != 0 )
bulk.execute();
Then you can use .aggregate() on your "numeric" data:
db.collection.aggregate([
{ "$group": { "_id": null, "total": { "$sum": "$amount" } } }
])