Remove all fields that are null - mongodb

How can I remove all fields that are null from all documents of a given collection?
I have a collection of documents such as:
{
'property1': 'value1',
'property2': 'value2',
...
}
but each document may have a null entry instead of a value entry.
I would like to save disk space by removing all null entries. The existence of the null entries does not contain any information in my case because I know the format of the JSON document a priori.

Starting Mongo 4.2, db.collection.update() can accept an aggregation pipeline, finally allowing the removal of a field based on its value:
// { _id: ObjectId("5d0e8...d2"), property1: "value1", property2: "value2" }
// { _id: ObjectId("5d0e8...d3"), property1: "value1", property2: null, property3: "value3" }
db.collection.update(
{},
[{ $replaceWith: {
$arrayToObject: {
$filter: {
input: { $objectToArray: "$$ROOT" },
as: "item",
cond: { $ne: ["$$item.v", null] }
}
}
}}],
{ multi: true }
)
// { _id: ObjectId("5d0e8...d2"), property1: "value1", property2: "value2" }
// { _id: ObjectId("5d0e8...d3"), property1: "value1", property3: "value3" }
In details:
The first part {} is the match query, filtering which documents to update (in our case all documents).
The second part [{ $replaceWith: { ... }] is the update aggregation pipeline (note the squared brackets signifying the use of an aggregation pipeline):
With $objectToArray, we first transform the document to an array of key/values such as [{ k: "property1", v: "value1" }, { k: "property2", v: null }, ...].
With $filter, we filter this array of key/values by removing items for which v is null.
We then transform back the filtered array of key/values to an object using $arrayToObject.
Finally, we replace the whole document by the modified one with $replaceWith.
Don't forget { multi: true }, otherwise only the first matching document will be updated.

// run in mongo shell
var coll = db.getCollection("collectionName");
var cursor = coll.find();
while (cursor.hasNext()) {
var doc = cursor.next();
var keys = {};
var hasNull = false;
for ( var x in doc) {
if (x != "_id" && doc[x] == null) {
keys[x] = 1;
hasNull = true;
}
}
if (hasNull) {
coll.update({_id: doc._id}, {$unset:keys});
}
}

This is an important question since mongodb cannot index null values (i.e. do not query for nulls or you will be waiting for a long time), so it is best to entirely avoid nulls and set default values using setOnInsert.
Here is a recursive solution to removing nulls:
/**
* RETRIEVES A LIST OF ALL THE KEYS IN A DOCUMENT, WHERE THE VALUE IS 'NULL' OR 'UNDEFINED'
*
* #param doc
* #param keyName
* #param nullKeys
*/
function getNullKeysRecursively(doc, keyName, nullKeys)
{
for (var item_property in doc)
{
// SKIP BASE-CLASS STUFF
if (!doc.hasOwnProperty(item_property))
continue;
// SKIP ID FIELD
if (item_property === "_id")
continue;
// FULL KEY NAME (FOR SUB-DOCUMENTS)
var fullKeyName;
if (keyName)
fullKeyName = keyName + "." + item_property;
else
fullKeyName = item_property;
// DEBUGGING
// print("fullKeyName: " + fullKeyName);
// NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS
if (doc[item_property] === null || doc[item_property] === undefined)
nullKeys[fullKeyName] = 1;
// RECURSE OBJECTS / ARRAYS
else if (doc[item_property] instanceof Object || doc[item_property] instanceof Array)
getNullKeysRecursively(doc[item_property], fullKeyName, nullKeys);
}
}
/**
* REMOVES ALL PROPERTIES WITH A VALUE OF 'NULL' OR 'UNDEFINED'.
* TUNE THE 'LIMIT' VARIABLE TO YOUR MEMORY AVAILABILITY.
* ONLY CLEANS DOCUMENTS THAT REQUIRE CLEANING, FOR EFFICIENCY.
* USES bulkWrite FOR EFFICIENCY.
*
* #param collectionName
*/
function removeNulls(collectionName)
{
var coll = db.getCollection(collectionName);
var lastId = ObjectId("000000000000000000000000");
var LIMIT = 10000;
while (true)
{
// GET THE NEXT PAGE OF DOCUMENTS
var page = coll.find({ _id: { $gt: lastId } }).limit(LIMIT);
if (! page.hasNext())
break;
// BUILD BULK OPERATION
var arrBulkOps = [];
page.forEach(function(item_doc)
{
lastId = item_doc._id;
var nullKeys = {};
getNullKeysRecursively(item_doc, null, nullKeys);
// ONLY UPDATE MODIFIED DOCUMENTS
if (Object.keys(nullKeys).length > 0)
// UNSET INDIVIDUAL FIELDS, RATHER THAN REWRITE THE ENTIRE DOC
arrBulkOps.push(
{ updateOne: {
"filter": { _id: item_doc._id },
"update": { $unset: nullKeys }
} }
);
});
coll.bulkWrite(arrBulkOps, { ordered: false } );
}
}
// GO GO GO
removeNulls('my_collection');
document before:
{
"_id": ObjectId("5a53ed8f6f7c4d95579cb87c"),
"first_name": null,
"last_name": "smith",
"features": {
"first": {
"a": 1,
"b": 2,
"c": null
},
"second": null,
"third" : {},
"fourth" : []
},
"other": [
null,
123,
{
"a": 1,
"b": "hey",
"c": null
}
]
}
document after:
{
"_id" : ObjectId("5a53ed8f6f7c4d95579cb87c"),
"last_name" : "smith",
"features" : {
"first" : {
"a" : 1,
"b" : 2
}
},
"other" : [
null,
123,
{
"a" : 1,
"b" : "hey"
}
]
}
As you can see, it removes null, undefined, empty objects and empty arrays. If you need it to be more/less aggressive, it is a matter of modifying the block "NULL FIELDS - MODIFY THIS BLOCK TO ADD CONSTRAINTS".
edits welcome, especially #stennie

You can use the mongo updateMany functionality, but you must do this by specifying the parameter you are going to update, such as the year parameter:
db.collection.updateMany({year: null}, { $unset : { year : 1 }})

Like this question mentioned (mongodb query without field name):
Unfortunately, MongoDB does not support any method of querying all fields with a particular value.
So, you can either iterate the document (like Wizard's example) or do it in non-mongodb way.
If this is a JSON file, remove all the lines with null in sed might works:
sed '/null/d' ./mydata.json

Update for 2022:
If you delete keys with values Null, [], "", {} from the DB, that won't reduce it's size on disk.
You need to do that before you upload data into the collection.
Tested it myself. I had 6.000.000 documents in collection. Ran script of Xavier Guihot. Before script it was 7.8GB, after the script it became 7.9GB.
I confirm, that script do the job and remove keys, it's just that it doesn't reduce the size of the DB space allocation.
Then I deleted completely the collection, and imported .json dumps, that had already been formatted (removed all keys with values Null, [], "", {}). After collection size was 6.1GB That's minus 22% of the original size.
Here is python script I used to remove all empty keys from json dumps:
import fileinput
import json
for line in fileinput.input(inplace=1):
j = {k:v for k, v in json.loads(line).items() if v}
print(line.replace(line, json.dumps(j)))
Just run the script with file name as argument, for example: python3 main.py dump-00001
ps: take into account, that you need to wait ~200 seconds after changes to DB, because WiredTiger keep history backup of data for consistency after you make changes. That's mean, that only after 200 sec you will see real storage allocation of DB. 200 sec is default value for that action.

Related

update specific element from nested document array mongodb where has two matches

I need to update or create if not exist, specific obj,set score.b1 =50 and total=100 where object match curse=5 block=2
{ "_id":"sad445"
"year":2020,
"grade":4,
"seccion":"A",
"id": 100,
"name": "pedro",
"notes":[{"curse":5,
"block":1,
"score":{ "a1": 5,"a2": 10, "a3": 15},
"total" : 50
},{
"curse":5,
"block":2,
"score":{ "b1": 10,"b2": 20, "b3": 30},
"total" : 20
}
]
}
I can update all obj but I need to update or create specific elem from the score and not all. and/or create objs "notes":[{curse, block and score}] if notes is empty notes:[]
notas.UpdateMany(
{"$and":[{"_id":"sad445"},{"notes":{"$elemMatch":{"curse":5,"block":3}}}]},
{"$set":{"updated_at":{"$date":{"$numberLong":"1620322881360"}},
"notes.$.score":{"vvkzo":15,"i2z4i":2,"i2z4i|pm":5},
"notes.$.total":100}},
{"multiple":false})
Demo - https://mongoplayground.net/p/VaE28ujeOPx
Use $ (update)
The positional $ operator identifies an element in an array to update without explicitly specifying the position of the element in the array.
the positional $ operator acts as a placeholder for the first element that matches the query document, and
the array field must appear as part of the query document.
db.collection.update({
"notes": {
"$elemMatch": { "block": 2, "curse": 5 }
}
},
{
$set: { "notes.$.score.b4": 40 }
})
Read upsert: true
Optional. When true, update() either:
Creates a new document if no documents match the query. For more
details see upsert behavior. Updates a single document that matches
the query. If both upsert and multi are true and no documents match
the query, the update operation inserts only a single document.
To avoid multiple upserts, ensure that the query field(s) are uniquely
indexed. See Upsert with Unique Index for an example.
Defaults to false, which does not insert a new document when no match
is found.
Update
Demo - https://mongoplayground.net/p/iQQDyjG2a_B
Use $function
db.collection.update(
{ "_id": "sad445" },
[
{
$set: {
notes: {
$function: {
body: function(notes) {
var record = { curse:5, block:2, score:{ b4:40 } };
if(!notes || !notes.length) { return [record]; } // create new record and return in case of no notes
var updated = false;
for (var i=0; i < notes.length; i++) {
if (notes[i].block == 2 && notes[i].curse == 5) { // check condition for update
updated = true;
notes[i].score.b4=40; break; // update here
}
}
if (!updated) notes.push(record); // if no update push the record in notes array
return notes;
},
args: [
"$notes"
],
lang: "js"
}
}
}
}
]
)
Try to add upsert: true.
Creates a new document if no documents match the query. Updates a single document that matches
the query.
notas.UpdateMany(
{"$and":[{"_id":"sad445"},{"notes":{"$elemMatch":{"curse":5,"block":3}}}]},
{"$set":{"updated_at":{"$date":{"$numberLong":"1620322881360"}},
"notes.$.score":{"vvkzo":15,"i2z4i":2,"i2z4i|pm":5},
"notes.$.total":100}},
{"multiple":false, "upsert":true})

Try to match conditional column in mongodb

I am new to mongodb and now using aggregate.
I am in a problem that I have 2 column let this column1 and column2 I want to match either by column1 or column2 inside $match Is it possible. I am getting stuck please help.
db Structure:
{
"_id" : ObjectId("55794aa1be1f8fe822da139d"),
"transactionType" : "1",
"_store" : {
"storeLocation" : "Pitampura",
"storeName" : "Godown",
"_id" : "5576b5c5e414d90c03d1e330"
}
}
I am try to filter according to transactionType and storeName, I am sending these 2 params to api but when storeName sended as empty string then only filter according to transactionType else by both paramater. Not wanted to use if-elseif.
Well of course it can suit your query. You just handle as follows:
// Initial data
var request = { "storeName": "", "transactionType": "1" };
// Transform to array
var conditions = Object.keys(request).map(function(key) {
var obj = {},
newKey = "";
if ( key == "storeName" ) {
newKey = "_store." + key;
} else {
newKey = key;
}
obj[newKey] = request[key];
return obj;
});
db.collection.find({ "$or": conditions });
Where the whole structure after transformation breaks down to :
db.collection.find({
"$or": [
{ "_store.storeName": "" },
{ "transactionType": "1" }
]
})
Which of course matches the document on the condition that "transactionType" is met.
So that is what $or does, considers that at least one of the conditions in the query arguments matches data in the document.
The other thing here is that since the data presented in the request is not a "direct match" for the data in the document, manipulation is done on the "key name" to use the correct "dot notation" form for acessing that element.
These are just basic queries, so the same rules apply to aggregation $match, which is just a query element itself:
db.collection.aggregate([
// Possibly other pipeline before
// Your match phase, which probably should be first
{ "$match": {
"$or": [
{ "_store.storeName": "" },
{ "transactionType": "1" }
]
}},
// Other aggregagtion pipeline
])

MongoDB conditionally $addToSet sub-document in array by specific field

Is there a way to conditionally $addToSet based on a specific key field in a subdocument on an array?
Here's an example of what I mean - given the collection produced by the following sample bootstrap;
cls
db.so.remove();
db.so.insert({
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples"
}
]
});
n defines a unique document key. I only want one entry with the same n value in the array at any one time. So I want to be able to update the pfms array using n so that I end up with just this;
{
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples",
"mState": 1111234
}
]
}
Here's where I am at the moment;
db.so.update({
"Name": "fruitBowl",
},{
// not allowed to do this of course
// "$pull": {
// "pfms": { n: "apples" },
// },
"$addToSet": {
"pfms": {
"$each": [
{
"n": "apples",
"mState": 1111234
}
]
}
}
}
)
Unfortunately, this adds another array element;
db.so.find().toArray();
[
{
"Name" : "fruitBowl",
"_id" : ObjectId("53ecfef5baca2b1079b0f97c"),
"pfms" : [
{
"n" : "apples"
},
{
"n" : "apples",
"mState" : 1111234
}
]
}
]
I need to effectively upsert the apples document matching on n as the unique identifier and just set mState whether or not an entry already exists. It's a shame I can't do a $pull and $addToSet in the same document (I tried).
What I really need here is dictionary semantics, but that's not an option right now, nor is breaking out the document - can anyone come up with another way?
FWIW - the existing format is a result of language/driver serialization, I didn't choose it exactly.
further
I've gotten a little further in the case where I know the array element already exists I can do this;
db.so.update({
"Name": "fruitBowl",
"pfms.n": "apples",
},{
$set: {
"pfms.$.mState": 1111234,
},
}
)
But of course that only works;
for a single array element
as long as I know it exists
The first limitation isn't a disaster, but if I can't effectively upsert or combine $addToSet with the previous $set (which of course I can't) then it the only workarounds I can think of for now mean two DB round-trips.
The $addToSet operator of course requires that the "whole" document being "added to the set" is in fact unique, so you cannot change "part" of the document or otherwise consider it to be a "partial match".
You stumbled on to your best approach using $pull to remove any element with the "key" field that would result in "duplicates", but of course you cannot modify the same path in different update operators like that.
So the closest thing you will get is issuing separate operations but also doing that with the "Bulk Operations API" which is introduced with MongoDB 2.6. This allows both to be sent to the server at the same time for the closest thing to a "contiguous" operations list you will get:
var bulk = db.so.initializeOrderedBulkOp();
bulk.find({ "Name": "fruitBowl", "pfms.n": "apples": }).updateOne({
"$pull": { "pfms": { "n": "apples" } }
});
bulk.find({ "Name": "fruitBowl" }).updateOne({
"$push": { "pfms": { "n": "apples", "state": 1111234 } }
})
bulk.execute();
That pretty much is your best approach if it is not possible or practical to move the elements to another collection and rely on "upserts" and $set in order to have the same functionality but on a collection rather than array.
I have faced the exact same scenario. I was inserting and removing likes from a post.
What I did is, using mongoose findOneAndUpdate function (which is similar to update or findAndModify function in mongodb).
The key concept is
Insert when the field is not present
Delete when the field is present
The insert is
findOneAndUpdate({ _id: theId, 'likes.userId': { $ne: theUserId }},
{ $push: { likes: { userId: theUserId, createdAt: new Date() }}},
{ 'new': true }, function(err, post) { // do the needful });
The delete is
findOneAndUpdate({ _id: theId, 'likes.userId': theUserId},
{ $pull: { likes: { userId: theUserId }}},
{ 'new': true }, function(err, post) { // do the needful });
This makes the whole operation atomic and there are no duplicates with respect to the userId field.
I hope this helpes. If you have any query, feel free to ask.
As far as I know MongoDB now (from v 4.2) allows to use aggregation pipelines for updates.
More or less elegant way to make it work (according to the question) looks like the following:
db.runCommand({
update: "your-collection-name",
updates: [
{
q: {},
u: {
$set: {
"pfms.$[elem]": {
"n":"apples",
"mState": NumberInt(1111234)
}
}
},
arrayFilters: [
{
"elem.n": {
$eq: "apples"
}
}
],
multi: true
}
]
})
In my scenario, The data need to be init when not existed, and update the field If existed, and the data will not be deleted. If the datas have these states, you might want to try the following method.
// Mongoose, but mostly same as mongodb
// Update the tag to user, If there existed one.
const user = await UserModel.findOneAndUpdate(
{
user: userId,
'tags.name': tag_name,
},
{
$set: {
'tags.$.description': tag_description,
},
}
)
.lean()
.exec();
// Add a default tag to user
if (user == null) {
await UserModel.findOneAndUpdate(
{
user: userId,
},
{
$push: {
tags: new Tag({
name: tag_name,
description: tag_description,
}),
},
}
);
}
This is the most clean and fast method in the scenario.
As a business analyst , I had the same problem and hopefully I have a solution to this after hours of investigation.
// The customer document:
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK"
}
]
}
// The problem : I want to insert dateField "01.01.2016" to customer documents where customerCodes subdocument has a document with code "YK" but does not have dateField. The final document must be as follows :
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK" ,
"dateField" : "01.01.2016"
}
]
}
// The solution : the solution code is in three steps :
// PART 1 - Find the customers with customerCodes "YK" but without dateField
// PART 2 - Find the index of the subdocument with "YK" in customerCodes list.
// PART 3 - Insert the value into the document
// Here is the code
// PART 1
var myCursor = db.customers.find({ customerCodes:{$elemMatch:{code:"YK", dateField:{ $exists:false} }}});
// PART 2
myCursor.forEach(function(customer){
if(customer.customerCodes != null )
{
var size = customer.customerCodes.length;
if( size > 0 )
{
var iFoundTheIndexOfSubDocument= -1;
var index = 0;
customer.customerCodes.forEach( function(clazz)
{
if( clazz.code == "YK" && clazz.changeDate == null )
{
iFoundTheIndexOfSubDocument = index;
}
index++;
})
// PART 3
// What happens here is : If i found the indice of the
// "YK" subdocument, I create "updates" document which
// corresponds to the new data to be inserted`
//
if( iFoundTheIndexOfSubDocument != -1 )
{
var toSet = "customerCodes."+ iFoundTheIndexOfSubDocument +".dateField";
var updates = {};
updates[toSet] = "01.01.2016";
db.customers.update({ "id" : customer.id } , { $set: updates });
// This statement is actually interpreted like this :
// db.customers.update({ "id" : "1212" } ,{ $set: customerCodes.0.dateField : "01.01.2016" });
}
}
}
});
Have a nice day !

Merge changeset documents in a query

I have recorded changes from an information system in a mongo database. Every time a set of values are set or changed, a record is saved in the mongo database.
The change collection is in the following form:
{ "user_id": 1, "timestamp": { "date" : "2010-09-22 09:28:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "valueA", "fieldB": "valueB", "fieldC": "valueC" } }
{ "user_id": 1, "timestamp": { "date" : "2010-09-24 19:01:52", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "new_valueA", "fieldB": null, "fieldD": "valueD" } }
{ "user_id": 1, "timestamp": { "date" : "2010-10-01 11:11:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldD": "new_valueD" } }
Of course there are thousands of records per user with different attributes which represent millions of records. What I want to do is to see a user status at a given time. By example, the user_id 1 at 2010-09-30 would be
fieldA: new_valueA
fieldC: valueC
fieldD: valueD
This means I need to flatten all the changes prior to a given date for a given user into a single record. Can I do that directly in mongo ?
Edit: I am using the 2.0 version of mongodb hence cannot benefit from the aggregation framework.
Edit: It sounds I have found the answer to my question.
var mapTimeAndChangesByUserId = function() {
var key = this.user_id;
var value = { timestamp: this.timestamp.date, changes: this.changes };
emit(key, value);
}
var reduceMergeChanges = function(user_id, changeset) {
var mergeFunction = function(a, b) { for (var attr in b) a[attr] = b[attr]; };
var result = {};
changeset.forEach(function(e) { mergeFunction(result, e.changes); });
return { timestamp: changeset.pop().timestamp, changes: result };
}
The reduce function merges the changes in the order they come and returns the result.
db.user_change.mapReduce(
mapTimeAndChangesByUserId,
reduceMergeChanges,
{
out: { inline: 1 },
query: { user_id: 1, "timestamp.date": { $lt: "2010-09-30" } },
sort: { "timestamp.date": 1 }
});
'results' : [
"_id": 1,
"value": {
"timestamp": "2010-09-24 19:01:52",
"changes": {
"fieldA": "new_valueA",
"fieldB": null,
"fieldC": "valueC",
"fieldD": "valueD"
}
}
]
Which is fine to me.
You could write a MR to do this.
Since the fields are a lot like tags you can modify a nice cookbook example of counting tags here: http://cookbook.mongodb.org/patterns/count_tags/ of course instead of counting you want the latest value applied (assumption since this is not clear in your question) for that field.
So lets get our map function:
map = function() {
if (!this.changes) {
// If there were not changes for some reason lets bail this record
return;
}
// We iterate the changes
for (index in this.changes) {
emit(index /* We emit the field name */, this.changes[index] /* We emit the field value */);
}
}
And now for our reduce:
reduce = function(values){
// This part is dependant upon your input query. If you add a sort of
// date (ts) DESC then you will prolly want the first index (0) not the last as
// gathered here by values.length
return values[values.length];
}
And this will output a single document per field change of the type:
{
_id: your_field_ie_fieldA,
value: whoop
}
You can then iterate the end of the (most likely) in line output and, bam, you have your changes.
This is of course one way of dong it and is not designed to be run completely in line to your app, however that all depends on the size of the data your working on; it could be run very close.
I am unsure whether the group and distinct can run on this but it looks like it might: http://docs.mongodb.org/manual/reference/method/db.collection.group/#db-collection-group however I should note that group is basically a MR wrapper but you could do something like (untested just like the MR above):
db.col.group( {
key: { 'changes.fieldA': 1, // the rest of the fields },
cond: { 'timestamp.date': { $gt: new Date( '01/01/2012' ) } },
reduce: function ( curr, result ) { },
initial: { }
} )
But it does require you to define the keys instead of just iterating them programmably (maybe a better way).

How to change the type of a field?

I am trying to change the type of a field from within the mongo shell.
I am doing this...
db.meta.update(
{'fields.properties.default': { $type : 1 }},
{'fields.properties.default': { $type : 2 }}
)
But it's not working!
The only way to change the $type of the data is to perform an update on the data where the data has the correct type.
In this case, it looks like you're trying to change the $type from 1 (double) to 2 (string).
So simply load the document from the DB, perform the cast (new String(x)) and then save the document again.
If you need to do this programmatically and entirely from the shell, you can use the find(...).forEach(function(x) {}) syntax.
In response to the second comment below. Change the field bad from a number to a string in collection foo.
db.foo.find( { 'bad' : { $type : 1 } } ).forEach( function (x) {
x.bad = new String(x.bad); // convert field to string
db.foo.save(x);
});
Convert String field to Integer:
db.db-name.find({field-name: {$exists: true}}).forEach(function(obj) {
obj.field-name = new NumberInt(obj.field-name);
db.db-name.save(obj);
});
Convert Integer field to String:
db.db-name.find({field-name: {$exists: true}}).forEach(function(obj) {
obj.field-name = "" + obj.field-name;
db.db-name.save(obj);
});
Starting Mongo 4.2, db.collection.update() can accept an aggregation pipeline, finally allowing the update of a field based on its own value:
// { a: "45", b: "x" }
// { a: 53, b: "y" }
db.collection.updateMany(
{ a : { $type: 1 } },
[{ $set: { a: { $toString: "$a" } } }]
)
// { a: "45", b: "x" }
// { a: "53", b: "y" }
The first part { a : { $type: 1 } } is the match query:
It filters which documents to update.
In this case, since we want to convert "a" to string when its value is a double, this matches elements for which "a" is of type 1 (double)).
This table provides the code representing the different possible types.
The second part [{ $set: { a: { $toString: "$a" } } }] is the update aggregation pipeline:
Note the squared brackets signifying that this update query uses an aggregation pipeline.
$set is a new aggregation operator (Mongo 4.2) which in this case modifies a field.
This can be simply read as "$set" the value of "a" to "$a" converted "$toString".
What's really new here, is being able in Mongo 4.2 to reference the document itself when updating it: the new value for "a" is based on the existing value of "$a".
Also note "$toString" which is a new aggregation operator introduced in Mongo 4.0.
In case your cast isn't from double to string, you have the choice between different conversion operators introduced in Mongo 4.0 such as $toBool, $toInt, ...
And if there isn't a dedicated converter for your targeted type, you can replace { $toString: "$a" } with a $convert operation: { $convert: { input: "$a", to: 2 } } where the value for to can be found in this table:
db.collection.updateMany(
{ a : { $type: 1 } },
[{ $set: { a: { $convert: { input: "$a", to: 2 } } } }]
)
For string to int conversion.
db.my_collection.find().forEach( function(obj) {
obj.my_value= new NumberInt(obj.my_value);
db.my_collection.save(obj);
});
For string to double conversion.
obj.my_value= parseInt(obj.my_value, 10);
For float:
obj.my_value= parseFloat(obj.my_value);
db.coll.find().forEach(function(data) {
db.coll.update({_id:data._id},{$set:{myfield:parseInt(data.myfield)}});
})
all answers so far use some version of forEach, iterating over all collection elements client-side.
However, you could use MongoDB's server-side processing by using aggregate pipeline and $out stage as :
the $out stage atomically replaces the existing collection with the
new results collection.
example:
db.documents.aggregate([
{
$project: {
_id: 1,
numberField: { $substr: ['$numberField', 0, -1] },
otherField: 1,
differentField: 1,
anotherfield: 1,
needolistAllFieldsHere: 1
},
},
{
$out: 'documents',
},
]);
To convert a field of string type to date field, you would need to iterate the cursor returned by the find() method using the forEach() method, within the loop convert the field to a Date object and then update the field using the $set operator.
Take advantage of using the Bulk API for bulk updates which offer better performance as you will be sending the operations to the server in batches of say 1000 which gives you a better performance as you are not sending every request to the server, just once in every 1000 requests.
The following demonstrates this approach, the first example uses the Bulk API available in MongoDB versions >= 2.6 and < 3.2. It updates all
the documents in the collection by changing all the created_at fields to date fields:
var bulk = db.collection.initializeUnorderedBulkOp(),
counter = 0;
db.collection.find({"created_at": {"$exists": true, "$type": 2 }}).forEach(function (doc) {
var newDate = new Date(doc.created_at);
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "created_at": newDate}
});
counter++;
if (counter % 1000 == 0) {
bulk.execute(); // Execute per 1000 operations and re-initialize every 1000 update statements
bulk = db.collection.initializeUnorderedBulkOp();
}
})
// Clean up remaining operations in queue
if (counter % 1000 != 0) { bulk.execute(); }
The next example applies to the new MongoDB version 3.2 which has since deprecated the Bulk API and provided a newer set of apis using bulkWrite():
var bulkOps = [];
db.collection.find({"created_at": {"$exists": true, "$type": 2 }}).forEach(function (doc) {
var newDate = new Date(doc.created_at);
bulkOps.push(
{
"updateOne": {
"filter": { "_id": doc._id } ,
"update": { "$set": { "created_at": newDate } }
}
}
);
})
db.collection.bulkWrite(bulkOps, { "ordered": true });
To convert int32 to string in mongo without creating an array just add "" to your number :-)
db.foo.find( { 'mynum' : { $type : 16 } } ).forEach( function (x) {
x.mynum = x.mynum + ""; // convert int32 to string
db.foo.save(x);
});
What really helped me to change the type of the object in MondoDB was just this simple line, perhaps mentioned before here...:
db.Users.find({age: {$exists: true}}).forEach(function(obj) {
obj.age = new NumberInt(obj.age);
db.Users.save(obj);
});
Users are my collection and age is the object which had a string instead of an integer (int32).
You can easily convert the string data type to numerical data type.
Don't forget to change collectionName & FieldName.
for ex : CollectionNmae : Users & FieldName : Contactno.
Try this query..
db.collectionName.find().forEach( function (x) {
x.FieldName = parseInt(x.FieldName);
db.collectionName.save(x);
});
I need to change datatype of multiple fields in the collection, so I used the following to make multiple data type changes in the collection of documents. Answer to an old question but may be helpful for others.
db.mycoll.find().forEach(function(obj) {
if (obj.hasOwnProperty('phone')) {
obj.phone = "" + obj.phone; // int or longint to string
}
if (obj.hasOwnProperty('field-name')) {
obj.field-name = new NumberInt(obj.field-name); //string to integer
}
if (obj.hasOwnProperty('cdate')) {
obj.cdate = new ISODate(obj.cdate); //string to Date
}
db.mycoll.save(obj);
});
demo change type of field mid from string to mongo objectId using mongoose
Post.find({}, {mid: 1,_id:1}).exec(function (err, doc) {
doc.map((item, key) => {
Post.findByIdAndUpdate({_id:item._id},{$set:{mid: mongoose.Types.ObjectId(item.mid)}}).exec((err,res)=>{
if(err) throw err;
reply(res);
});
});
});
Mongo ObjectId is just another example of such styles as
Number, string, boolean that hope the answer will help someone else.
I use this script in mongodb console for string to float conversions...
db.documents.find({ 'fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.fwtweaeeba = parseFloat( obj.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.0.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[0].content.fwtweaeeba = parseFloat( obj.versions[0].content.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.1.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[1].content.fwtweaeeba = parseFloat( obj.versions[1].content.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.2.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[2].content.fwtweaeeba = parseFloat( obj.versions[2].content.fwtweaeeba );
db.documents.save(obj); } );
And this one in php)))
foreach($db->documents->find(array("type" => "chair")) as $document){
$db->documents->update(
array('_id' => $document[_id]),
array(
'$set' => array(
'versions.0.content.axdducvoxb' => (float)$document['versions'][0]['content']['axdducvoxb'],
'versions.1.content.axdducvoxb' => (float)$document['versions'][1]['content']['axdducvoxb'],
'versions.2.content.axdducvoxb' => (float)$document['versions'][2]['content']['axdducvoxb'],
'axdducvoxb' => (float)$document['axdducvoxb']
)
),
array('$multi' => true)
);
}
The above answers almost worked but had a few challenges-
Problem 1: db.collection.save no longer works in MongoDB 5.x
For this, I used replaceOne().
Problem 2: new String(x.bad) was giving exponential number
I used "" + x.bad as suggested above.
My version:
let count = 0;
db.user
.find({
custID: {$type: 1},
})
.forEach(function (record) {
count++;
const actualValue = record.custID;
record.custID = "" + record.custID;
console.log(`${count}. Updating User(id:${record._id}) from old id [${actualValue}](${typeof actualValue}) to [${record.custID}](${typeof record.custID})`)
db.user.replaceOne({_id: record._id}, record);
});
And for millions of records, here are the output (for future investigation/reference)-