Set operation with condition - mongodb

My object looks like:
{
_id: ObjectId(),
unpaid: 500,
paid: false
}
and the statement:
db.items.update({
unpaid: {
$gte: 500
}
}, {
$inc: {
unpaid: -500
}
});
When unpaid will be equal zero, the paid field needs also to change to true.
Is it possible to do this with one query?

Unfortunately, MongoDB queries are not flexible enough to trigger an post query event. You could do it in two ways, but both involve firing more than a single query:
The first approach, involves:
Finding and Updating all the records with unpaid >= 500 in a single
update statement.
Followed by another update statement which updates the paid field
to true for all unpaid = 0.
Code:
db.items.update({
"unpaid": {
$gte: 500
}
}, {
$inc: {
"unpaid": -500
}
});
db.items.update({
"unpaid":0,"paid":false
}, {
$set: {
"paid": true
}
});
This approach may not be suitable if atomic updates matter in your application, and could lead to adding extra app logic to see things correctly before the paid field has been updated.
The second approach, involves:
Finding all the records with unpaid >= 500.
Performing a bulk update. Each update will set both the unpaid and
the paid fields at the same time.
Code:
var bulk = db.items.initializeUnorderedBulkOp();
db.items.find({"unpaid":{$gte:500}}).forEach(function(doc){
var unpaid = doc.unpaid-500;
var update = {$set:{"unpaid":unpaid}};
if(unpaid <= 0){
update.$set["paid"] = true;
}
bulk.find({"_id":doc._id }).update(update);
})
bulk.execute();

Related

Unordered bulk update records in MongoDB shell

I've got a collection consisting of millions of documents that resemble the following:
{
_id: ObjectId('...'),
value: "0.53"
combo: [
{
h: 0,
v: "0.42"
},
{
h: 1,
v: "1.32"
}
]
}
The problem is that the values are stored as strings and I need to convert them to float/double.
I'm trying this and it's working but this'll take days to complete, given the volume of data:
db.collection.find({}).forEach(function(obj) {
if (typeof(obj.value) === "string") {
obj.value = parseFloat(obj.value);
db.collection.save(obj);
}
obj.combo.forEach(function(hv){
if (typeof(hv.value) === "string") {
hv.value = parseFloat(hv.value);
db.collection.save(obj);
}
});
});
I came across bulk update reading the Mongo docs and I'm trying this:
var bulk = db.collection.initializeUnorderedBulkOp();
bulk.find({}).update(
{
$set: {
"value": parseFloat("value"),
}
});
bulk.execute();
This runs... but I get a NAN as a value, which is because it thinks I'm trying to convert "value" to a float. I've tried different variations like this.value and "$value" but to no avail. Plus this approach only attempts to correct the value in the other object, not the ones in the array.
I'd appreciate any help. Thanks in advance!
Figured it out the following way:
1) To convert at the document level, I came across this post and the reply by Markus paved the way to my solution:
var bulk = db.collection.initializeUnorderedBulkOp()
var myDocs = db.collection.find()
var ops = 0
myDocs.forEach(
function(myDoc) {
bulk.find({ _id: myDoc._id }).updateOne(
{
$set : {
"value": parseFloat(myDoc.value),
}
}
);
if ((++ops % 1000) === 0){
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
}
)
bulk.execute();
2) The second part involved updating the array object values and I discovered the syntax to do so in the accepted answer on this post. In my case, I knew that there were 24 values in I ran this separately from the first query and the result looked like:
var bulk = db.collection.initializeUnorderedBulkOp()
var myDocs = db.collection.find()
var ops = 0
myDocs.forEach(
function(myDoc) {
bulk.find({ _id: myDoc._id }).update(
{
$set : {
"combo.0.v": parseFloat(myDoc.combo[0].v),
"combo.1.v": parseFloat(myDoc.combo[1].v),
"combo.2.v": parseFloat(myDoc.combo[2].v),
"combo.3.v": parseFloat(myDoc.combo[3].v),
"combo.4.v": parseFloat(myDoc.combo[4].v),
"combo.5.v": parseFloat(myDoc.combo[5].v),
"combo.6.v": parseFloat(myDoc.combo[6].v),
"combo.7.v": parseFloat(myDoc.combo[7].v),
"combo.8.v": parseFloat(myDoc.combo[8].v),
"combo.9.v": parseFloat(myDoc.combo[9].v),
"combo.10.v": parseFloat(myDoc.combo[10].v),
"combo.11.v": parseFloat(myDoc.combo[11].v),
"combo.12.v": parseFloat(myDoc.combo[12].v),
"combo.13.v": parseFloat(myDoc.combo[13].v),
"combo.14.v": parseFloat(myDoc.combo[14].v),
"combo.15.v": parseFloat(myDoc.combo[15].v),
"combo.16.v": parseFloat(myDoc.combo[16].v),
"combo.17.v": parseFloat(myDoc.combo[17].v),
"combo.18.v": parseFloat(myDoc.combo[18].v),
"combo.19.v": parseFloat(myDoc.combo[19].v),
"combo.20.v": parseFloat(myDoc.combo[20].v),
"combo.21.v": parseFloat(myDoc.combo[21].v),
"combo.22.v": parseFloat(myDoc.combo[22].v),
"combo.23.v": parseFloat(myDoc.combo[23].v)
}
}
);
if ((++ops % 1000) === 0){
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}
}
)
bulk.execute();
Just to give an idea regarding performance, the forEach was going through around 900 documents a minute, which for 15 million records would have taken days, literally! Not only that but this was only converting the types at the document level, not the array level. For that, I would have to loop through each document and loop through each array (15 million x 24 iterations)! With this approach (running both queries side by side), it completed both in under 6 hours.
I hope this helps someone else.

Update with upsert, but only update if date field of document in db is less than updated document

I am having a bit of an issue trying to come up with the logic for this. So, what I want to do is:
Bulk update a bunch of posts to my remote MongoDB instance BUT
If update, only update if lastModified field on the remote collection is less than lastModified field in the same document that I am about to update/insert
Basically, I want to update my list of documents if they have been modified since the last time I updated them.
I can think of two brute force ways to do it...
First, querying my entire collection, trying to manually remove and replace the documents that match the criteria, add the new ones, and then mass insert everything back to the remote collection after deleting everything in remote.
Second, query each item and then deciding, if there is one in remote, if I want to update it or no. This seems like it would be very tasking when dealing with remote collections.
If relevant, I am working on a NodeJS environment, using the mondodb npm package for database operations.
You can use the bulkWrite API to carry out the updates based on the logic you specified as it handles this better.
For example, the following snippet shows how to go about this assuming you already have the data from the web service you need to update the remote collection with:
mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err);
else {
var mongo_remote_collection = db.collection("remote_collection_name");
/* data is from http call to an external service or ideally
place this within the service callback
*/
mongoUpsert(mongo_remote_collection, data, function() {
db.close();
})
}
})
function mongoUpsert(collection, data_array, cb) {
var ops = data_array.map(function(data) {
return {
"updateOne": {
"filter": {
"_id": data._id, // or any other filtering mechanism to identify a doc
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
};
});
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
return cb(false);
}
If the data from the external service is huge then consider sending the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.
For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents. So for the above case in the first approach one could just write all the array at once as this is small but the 500 choice is for larger arrays.
var ops = [],
counter = 0;
data_array.forEach(function(data) {
ops.push({
"updateOne": {
"filter": {
"_id": data._id,
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
});
counter++;
if (counter % 500 === 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
ops = [];
}
})
if (counter % 500 != 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
}
}

Using TTL in MongoDB [duplicate]

I have a very certain thing i want to accomplish, and I wanted to make sure it is not possible in mongoose/mongoDB before I go and code the whole thing myself.
I checked mongoose-ttl for nodejs and several forums and didn't find quite what I need.
here it is:
I have a schema with a date field createDate. Now i wish to place a TTL on that field, so far so good, i can do it like so (expiration in 5000 seconds):
createDate: {type: Date, default: Date.now, expires: 5000}
but I would like my users to be able to "up vote" documents they like so those documents will get a longer period of time to live, without changing the other documents in my collection.
So, Can i change a TTL of a SINGLE document somehow once a user tells me he likes that document using mongoose or other existing npm related modules?
thank you
It has been more than a year, but this may be useful for others, so here is my answer:
I was trying accomplish this same thing, in order to allow a grace period after an entry deletion, so the user can cancel the operation afterwards.
As stated by Mike Bennett, you can use a TTL index making documents expire at a specific clock time.
Yo have to create an index, setting the expireAfterSeconds to zero:
db.yourCollection.createIndex({ "expireAt": 1 }, { expireAfterSeconds: 0 });
This will not affect any of the documents in your collection, unless you set expireAfterSeconds on a particular document like so:
db.log_events.insert( {
"expireAt": new Date('July 22, 2013 14:00:00'),
"logEvent": 2,
"logMessage": "Success!"
} )
Example in mongoose
Model
var BeerSchema = new Schema({
name: {
type: String,
unique: true,
required: true
},
description: String,
alcohol: Number,
price: Number,
createdAt: { type: Date, default: Date.now }
expireAt: { type: Date, default: undefined } // you don't need to set this default, but I like it there for semantic clearness
});
BeerSchema.index({ "expireAt": 1 }, { expireAfterSeconds: 0 });
Deletion with grace period
Uses moment for date manipulation
exports.deleteBeer = function(id) {
var deferred = q.defer();
Beer.update(id, { expireAt: moment().add(10, 'seconds') }, function(err, data) {
if(err) {
deferred.reject(err);
} else {
deferred.resolve(data);
}
});
return deferred.promise;
};
Revert deletion
Uses moment for date manipulation
exports.undeleteBeer = function(id) {
var deferred = q.defer();
// Set expireAt to undefined
Beer.update(id, { $unset: { expireAt: 1 }}, function(err, data) {
if(err) {
deferred.reject(err);
} else {
deferred.resolve(data);
}
});
return deferred.promise;
};
You could use the expire at clock time feature in mongodb. You will have to update the expire time each time you want to extend the expiration of a document.
http://docs.mongodb.org/manual/tutorial/expire-data/#expire-documents-at-a-certain-clock-time

MongoDB - Update text to Proper / title Case

We have a large collection of documents with various text case when entered for their description
eg
Desc =
'THE CAT"
or
"The Dog"
or
"the cow"
We want to make all consistent in Title (Or Proper case) where first letter of each word is upper and rest lower case.
"The Cat", "The Dog", "The Cow"
Looking for assistance in creating update query to do that on mass, rather than manual as data team is doing at present.
thanks
The algorithm for changing the title case below uses Array.prototype.map() method and the String.prototype.replace() method which returns a new string with some or all matches of a pattern replaced by a replacement.
In your case, the pattern for the replace() method will be a String to be replaced by a new replacement and will be treated as a verbatim string.
First off, you need to lowercase and split the string before applying the map() method. Once you define a function that implements the conversion, you then need to iterate your collection to apply an update with this function. Use the cursor.forEach() method on the cursor returned by find() to do the loop and within the loop you can then run an update on each document using the updateOne() method.
For relatively small datasets, the whole operation can be described by the following
function titleCase(str) {
return str.toLowerCase().split(' ').map(function(word) {
return word.replace(word[0], word[0].toUpperCase());
}).join(' ');
}
db.collection.find({}).forEach(function(doc){
db.collection.updateOne(
{ "_id": doc._id },
{ "$set": { "desc": titleCase(doc.desc) } }
);
});
For improved performance especially when dealing with huge datasets, take advantage of using a Bulk() API for updating the collection efficiently in bulk as you will be sending the operations to the server in batches (for example, say a batch size of 500). This gives you much better performance since you won't be sending every request to the server but just once in every 500 requests, thus making your updates more efficient and quicker.
The following demonstrates this approach, the first example uses the Bulk() API available in MongoDB versions >= 2.6 and < 3.2. It updates all the documents in the collection by transforming the title on the desc field using the above function.
MongoDB versions >= 2.6 and < 3.2:
function titleCase(str) {
return str.toLowerCase().split(' ').map(function(word) {
return word.replace(word[0], word[0].toUpperCase());
}).join(' ');
}
var bulk = db.collection.initializeUnorderedBulkOp(),
counter = 0;
db.collection.find().forEach(function (doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "desc": titleCase(doc.desc) }
});
counter++;
if (counter % 500 === 0) {
// Execute per 500 operations
bulk.execute();
// re-initialize every 500 update statements
bulk = db.collection.initializeUnorderedBulkOp();
}
})
// Clean up remaining queue
if (counter % 500 !== 0) { bulk.execute(); }
The next example applies to the new MongoDB version 3.2 which has since deprecated the Bulk() API and provided a newer set of apis using bulkWrite().
MongoDB version 3.2 and greater:
var ops = [],
titleCase = function(str) {
return str.toLowerCase().split(' ').map(function(word) {
return word.replace(word[0], word[0].toUpperCase());
}).join(' ');
};
db.Books.find({
"title": {
"$exists": true,
"$type": 2
}
}).forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": { "title": titleCase(doc.title) }
}
}
});
if (ops.length === 500 ) {
db.Books.bulkWrite(ops);
ops = [];
}
})
if (ops.length > 0)
db.Books.bulkWrite(ops);

Calculate amount of value changes in MongoDB

I have a database for example:
{ "_id": ObjectId("54575132a8269c77675ace49"),"power": false, "time": 1415008560000}
{ "_id": ObjectId("54575132a8269c77675ace50"),"power": true, "time": 1415008570000}
{ "_id": ObjectId("54575132a8269c77675ace51"),"power": false, "time": 1415008580000}
{ "_id": ObjectId("54575132a8269c77675ace52"),"power": false, "time": 1415008590000}
{ "_id": ObjectId("54575132a8269c77675ace53"),"power": true, "time": 1415008600000}
{ "_id": ObjectId("54575132a8269c77675ace54"),"power": false, "time": 1415008610000}
How can I calculate amount of power changes from true to false and opposite?
I could iterate through all entries and increase some variable if previous value is different than actual, but how to do this in mongo?
For this example result should be 4
You could use the aggregation framework to do this:
db.yourCollection.aggregate({ $group:{ _id:"$power", count:{$sum:1} } })
which should give you the following result:
{_id:true,count:2}
{_id:false, count:4}
By subtracting the difference of those two values from the total document count (db.yourCollection.count()), you should have the number of state changes:
var cursor = db.yourCollection.aggregate({ $group:{ _id:"$power", count:{$sum:1} } });
var count = db.yourCollection.count();
var changes = count - Math.abs(cursor[0].count - cursor[1].count);
EDIT: Revised approach
As per #JohhnyHK's sharp eye, he found a problem with the above. All kudos, upvotes and alike to him.
Calculating the number of changes
In order to calculate the changes even for large collections efficiently, with the given constraints, once could use map/reduce to count the changes, which should be pretty efficient even for very large collections.
var numberOfStateChanges = db.yourCollection.mapReduce(
// Mapping function
function(){
// Since in the sample data, there is no reasonable
// field for a key, we use an artificial one: 0
emit(0,this.power);
},
// Reduce function
function(key,values){
// The initial number of changes is 0
var changes=0;
// Our initial state, which does not count towards the changes,...
var state = values[0];
// ... hence we start to compare with the second item in the values array
for (var idx=1; idx < value.length; idx++){
// In case the current state is different from
// the one we are comparing with it, we have a state change
if(value[idx] != state) {
//... which we count...
changes +=1;
// ...and save.
state=value[idx]
}
}
return changes;
},
{
// We make sure the values are fed into the map function in the correct order
sort:{time:1},
// and return it directly instead of putting it into a collection, so we can process it
out:{inline:1}
}
).results[0].value
Now numberOfStateChanges holds the correct number of state changes.
Note
In order to have this map/reduce to be processed efficiently, we need an index on the field we are sorting by, time:
db.yourCollection.ensureIndex({time:1})