I have a mongodb instance with a lot of data, now I need to start up a new instance with the same structure without data.
how to get it done?
You can do that with the "query" option, with a query that does not return any document. Something like:
mongodump -q '{ "foo" : "bar" }'
This will dump all the dbs and indexes, you can then do a mongorestore to recreate them into another mongod instance
See documentation:
http://docs.mongodb.org/manual/reference/program/mongodump/#cmdoption--query
You can login into mongo shell and execute the following code statements to generate creating indexes statements. After that, use the statements to recreate indexes.
var collectionList = db.getCollectionNames();
for(var index in collectionList){
var collection = collectionList[index];
var cur = db.getCollection(collection).getIndexes();
if(cur.length == 1){
continue;
}
for(var index1 in cur){
var next = cur[index1];
if(next["name"] == '_id_'){
continue;
}
var unique=next["unique"]?true:false;
print("try{ db.getCollection(\""+collection+"\").createIndex("+JSON.stringify(next["key"])+",{unique:"+unique+"},{background:1})}catch(e){print(e)}");}}
There is really short and briliant script for create backup of indexes queries:
print(`// Backup indexes of : ${db.getName()} : database`);
print(`use ${db.getName()};`);
db.getCollectionNames().forEach(function (collection) {
indexes = db.getCollection(collection).getIndexes().forEach(function (index) {
if (index.name === '_id_') return; // skip defalut _id indexes
const keys = tojsononeline(index.key);
delete index.id; delete index.key; delete index.v; delete index.ns;
print(`db.${collection}.createIndex(${keys}, ${tojsononeline(index)});`);
});
});
You can run it directly from mongo shell like this:
mongo --quiet mongodb://localhost:27017/mydatabase indexes-backup.js
Output looks like:
db.user.createIndex({"user.email":1}, {"name":"userEmail", "background":true});
Based on Ivan's answer, I improved the script by adding more options like expireAfterSeconds (which was crucial for me) and an flag variable to drop indexes before creating them. dropFirst variable at the top of the script can be set to true to drop every index before creating it. Also, this script keeps existing names of the indexes.
var dropFirst = false;
for(var collection of db.getCollectionNames()) {
var indexes = db.getCollection(collection).getIndexes().filter(i => i.name !== '_id_');
if(indexes.length === 0) continue;
print(`\n// Collection: ${collection}`);
for(var index of indexes) {
var key = JSON.stringify(index.key);
var opts = [`name: "${index.name}"`, 'background: true'];
if(index['unique']) opts.push('unique: true');
if(index['hidden']) opts.push('hidden: true');
if(index['sparse']) opts.push('sparse: true');
if(index['expireAfterSeconds'] !== undefined) opts.push(`expireAfterSeconds: ${index['expireAfterSeconds']}`);
if(dropFirst) {
print(`try { db.getCollection("${collection}").dropIndex(${key}); } catch(e) { print('failed to drop ${key}:', e); }`);
}
print(`try { db.getCollection("${collection}").createIndex(${key}, {${opts.join(', ')}}) } catch(e) { print('failed to create ${key}:', e) }`);
}
}
I have a collection of Users in my database with corresponding unique ids. I am writing a function that takes an array of ids as an argument, e.g:
["user_id1", "user_id2", "user_id3", "user_id4"].
I want my query to return the first and only the first match. i.e. Using the example above, if user_id2 and user_id4 were the only two matching users in the database, my result would only return user_id2. User ids that are not in the database are ignored.
My current approach is to use a while loop, but I wanted to see if there was a better solution provided by Mongoose.
Current Pseudo Code:
function findOneUser(userIdArr) {
let user = 0;
let returnedUser;
while(!returnedUser || user < userIdArr.length) {
let id = userIdArr[user];
user = await User.findByID(id);
user++;
}
}
try this.
Use promises and mongoose findOne method:
let argumentArr = ["user_id1", "user_id2", "user_id3", "user_id4"];
let getUser = new Promise(function(resolve, reject) {
for (let i = 0; i < argumentArr.length; i++) {
User.findOne({_id:argumentArr[i]}).then(user => {
if(Object.keys(user).length > 0){
resolve(user)
}
}).catch(err => {reject(err)})
}
});
getUser.then(
(user) => {console.log(user);},//expected results: user(:object)
(err) => {console.log(err);}
);
}
Here is my helpers to diplay data from two collections
Template.Lirescategorie.helpers({
scategories: function () {
var cursor = Scategories.find();
var data = [];
cursor.forEach(function(somewhat) {
var categories = Categories.findOne({_id : somewhat.categorieID}, {categorie:1});
data.push({cat : categories.categorie, scat : somewhat.scategorie });
});
return data;
}
});
Here are my collections
categorie :
{
"_id": "LBKZQfZZSf4DRdeXo",
"categorie": "Citoyenneté"
}
scategorie
{
"_id": "cNHYpAEvC9ffjWkf5",
"categorieID": "LBKZQfZZSf4DRdeXo",
"scategorie": "Etat-Civil"
}
I'm pretty sure my helpers' code is not optimal. And i think by using _.map or something like that i can reduce the code.
Since i'm not really familiar to it, i'm looking for help about this.
Welcome to client-side joins. You can use .map() to get the list of categorieIDs and do the second find in one go with $in::
var cursor = Scategories.find();
var arrayOfCategorieIDs = cursor.map(function(s){return s.categorieId});
var categories = Categories.find({_id : {$in: arrayOfCategorieIDs}});
Then:
var sCategories = cursor.map(function(s){return s.scategorie}); // array of scategorie names
var categoryNames = categories.map(function(c){return s.categorie}); // array of categorie names
Then assemble these two arrays of strings into the array of objects you're looking for.
But there's a far simpler pattern for a client-side join: just iterate over one cursor and do the lookup into the related collection in a helper. For example:
html:
{{#each sCategories}}
{{sCategorie}}
{{categorie}}
{{/each}}
js:
Lirescategorie.helpers({
sCategories:(){
return Scategories.find();
},
categorie:(){
return Categories.findOne(this.categorieID).categorie;
}
});
So what I want to do is make findOne work more like it does in Meteor, but through the Mongo shell. In short I want to be able to do something like this db.collection.findOne("thisIsAnId") and have it lookup that id in that collection.
I tried loading a file that has this in it...
db.collection.findOne = function(query, fields, options){
if(typeof query === "string") {
return db.collection.originalFindOne({_id : query}, fields, options);
}
return db.collection.originalFindOne(query, fields, options);
}
Where originalFindOne would just chain to the default findOne, this didn't work at all. So after having no luck finding a way to override a default function I thought maybe I could create a new function like db.collection.simpleFindOne() or something, but I can't find a way to attach it to the mongo shell so that it will be available to any collection.
Anyone have some insight on how mongo internals work that could give me some help?
Try adding this snippet to one of your Mongo config files:
(function() {
// Duck-punch Mongo's `findOne` to work like Meteor's `findOne`
if (
typeof DBCollection !== "undefined" && DBCollection &&
DBCollection.prototype && typeof DBCollection.prototype.findOne === "function" &&
typeof ObjectId !== "undefined" && ObjectId
) {
var _findOne = DBCollection.prototype.findOne,
_slice = Array.prototype.slice;
DBCollection.prototype.findOne = function() {
var args = _slice.call(arguments);
if (args.length > 0 && (typeof args[0] === "string" || args[0] instanceof ObjectId)) {
args[0] = { _id: args[0] };
}
return _findOne.apply(this, args);
};
}
})();
I originally found the DBCollection object/class by typing db.myCollection.constructor into the Mongo shell. I then confirmed that findOne was defined on its prototype by verifying that (a) DBCollection was globally accessible, (b) that DBCollection.prototype existed, and (c) that typeof DBCollection.prototype.findOne === "function" (much like the snippet does).
Edit: Added a logic branch to also cover ObjectId-based IDs.
How can documents be moved from one collection to another collection in MongoDB?? For example: I have lot of documents in collection A and I want to move all 1 month older documents to collection B (these 1 month older documents should not be in collection A).
Using aggregation we can do copy. But what I am trying to do is moving of documents.
What method can be used to move documents?
The bulk operations #markus-w-mahlberg showed (and #mark-mullin refined) are efficient but unsafe as written. If the bulkInsert fails, the bulkRemove will still continue. To make sure you don't lose any records when moving, use this instead:
function insertBatch(collection, documents) {
var bulkInsert = collection.initializeUnorderedBulkOp();
var insertedIds = [];
var id;
documents.forEach(function(doc) {
id = doc._id;
// Insert without raising an error for duplicates
bulkInsert.find({_id: id}).upsert().replaceOne(doc);
insertedIds.push(id);
});
bulkInsert.execute();
return insertedIds;
}
function deleteBatch(collection, documents) {
var bulkRemove = collection.initializeUnorderedBulkOp();
documents.forEach(function(doc) {
bulkRemove.find({_id: doc._id}).removeOne();
});
bulkRemove.execute();
}
function moveDocuments(sourceCollection, targetCollection, filter, batchSize) {
print("Moving " + sourceCollection.find(filter).count() + " documents from " + sourceCollection + " to " + targetCollection);
var count;
while ((count = sourceCollection.find(filter).count()) > 0) {
print(count + " documents remaining");
sourceDocs = sourceCollection.find(filter).limit(batchSize);
idsOfCopiedDocs = insertBatch(targetCollection, sourceDocs);
targetDocs = targetCollection.find({_id: {$in: idsOfCopiedDocs}});
deleteBatch(sourceCollection, targetDocs);
}
print("Done!")
}
Update 2
Please do NOT upvote this answer any more. As written #jasongarber's answer is better in any aspect.
Update
This answer by #jasongarber is a safer approach and should be used instead of mine.
Provided I got you right and you want to move all documents older than 1 month, and you use mongoDB 2.6, there is no reason not to use bulk operations, which are the most efficient way of doing multiple operations I am aware of:
> var bulkInsert = db.target.initializeUnorderedBulkOp()
> var bulkRemove = db.source.initializeUnorderedBulkOp()
> var date = new Date()
> date.setMonth(date.getMonth() -1)
> db.source.find({"yourDateField":{$lt: date}}).forEach(
function(doc){
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
}
)
> bulkInsert.execute()
> bulkRemove.execute()
This should be pretty fast and it has the advantage that in case something goes wrong during the bulk insert, the original data still exists.
Edit
In order to prevent too much memory to be utilized, you can execute the bulk operation on every x docs processed:
> var bulkInsert = db.target.initializeUnorderedBulkOp()
> var bulkRemove = db.source.initializeUnorderedBulkOp()
> var x = 10000
> var counter = 0
> var date = new Date()
> date.setMonth(date.getMonth() -1)
> db.source.find({"yourDateField":{$lt: date}}).forEach(
function(doc){
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
counter ++
if( counter % x == 0){
bulkInsert.execute()
bulkRemove.execute()
bulkInsert = db.target.initializeUnorderedBulkOp()
bulkRemove = db.source.initializeUnorderedBulkOp()
}
}
)
> bulkInsert.execute()
> bulkRemove.execute()
Insert and remove:
var documentsToMove = db.collectionA.find({});
documentsToMove.forEach(function(doc) {
db.collectionB.insert(doc);
db.collectionA.remove(doc);
});
note: this method might be quite slow for large collections or collections holding large documents.
$out is use to create the new collection with data , so use $out
db.oldCollection.aggregate([{$out : "newCollection"}])
then use drop
db.oldCollection.drop()
you can use range query to get data from sourceCollection and keep the cursor data in variable and loop on it and insert to target collection:
var doc = db.sourceCollection.find({
"Timestamp":{
$gte:ISODate("2014-09-01T00:00:00Z"),
$lt:ISODate("2014-10-01T00:00:00Z")
}
});
doc.forEach(function(doc){
db.targetCollection.insert(doc);
})
Hope so it helps!!
First option (Using mongo dump)
1.Get a dump from collection
mongodump -d db -c source_collection
2.Restore from collection
mongorestore -d db -c target_collection dir=dump/db_name/source_collection.bson
Second Option
Running aggregate
db.getCollection('source_collection').aggregate([ { $match: {"emailAddress" : "apitester#mailinator.com"} }, { $out: "target_collection" } ])
Third Option (Slowest)
Running a through for loop
db.getCollection('source_collection').find().forEach(function(docs){ db.getCollection('target_collection').insert(docs); }) print("Rolleback Completed!");
May be from the performance point of view it's better to remove a lot of documents using one command(especially if you have indexes for query part) rather than deleting them one-by-one.
For example:
db.source.find({$gte: start, $lt: end}).forEach(function(doc){
db.target.insert(doc);
});
db.source.remove({$gte: start, $lt: end});
This is a restatement of #Markus W Mahlberg
Returning the favor - as a function
function moveDocuments(sourceCollection,targetCollection,filter) {
var bulkInsert = targetCollection.initializeUnorderedBulkOp();
var bulkRemove = sourceCollection.initializeUnorderedBulkOp();
sourceCollection.find(filter)
.forEach(function(doc) {
bulkInsert.insert(doc);
bulkRemove.find({_id:doc._id}).removeOne();
}
)
bulkInsert.execute();
bulkRemove.execute();
}
An example use
var x = {dsid:{$exists: true}};
moveDocuments(db.pictures,db.artifacts,x)
to move all documents that have top level element dsid from the pictures to the artifacts collection
Here's an update to #jasongarber's answer which uses the more recent mongo 'bulkWrite' operation (Read docs here), and also keeps the whole process asynchronous so you can run it as part of a wider script which depends on its' completion.
async function moveDocuments (sourceCollection, targetCollection, filter) {
const sourceDocs = await sourceCollection.find(filter)
console.log(`Moving ${await sourceDocs.count()} documents from ${sourceCollection.collectionName} to ${targetCollection.collectionName}`)
const idsOfCopiedDocs = await insertDocuments(targetCollection, sourceDocs)
const targetDocs = await targetCollection.find({_id: {$in: idsOfCopiedDocs}})
await deleteDocuments(sourceCollection, targetDocs)
console.log('Done!')
}
async function insertDocuments (collection, documents) {
const insertedIds = []
const bulkWrites = []
await documents.forEach(doc => {
const {_id} = doc
insertedIds.push(_id)
bulkWrites.push({
replaceOne: {
filter: {_id},
replacement: doc,
upsert: true,
},
})
})
if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
return insertedIds
}
async function deleteDocuments (collection, documents) {
const bulkWrites = []
await documents.forEach(({_id}) => {
bulkWrites.push({
deleteOne: {
filter: {_id},
},
})
})
if (bulkWrites.length) await collection.bulkWrite(bulkWrites, {ordered: false})
}
From MongoDB 3.0 up, you can use the copyTo command with the following syntax:
db.source_collection.copyTo("target_collection")
Then you can use the drop command to remove the old collection:
db.source_collection.drop()
I do like the response from #markus-w-mahlberg, however at times, I have seen the need to keep it a bit simpler for people. As such I have a couple of functions that are below. You could naturally wrap thing here with bulk operators as he did, but this code works with new and old Mongo systems equally.
function parseNS(ns){
//Expects we are forcing people to not violate the rules and not doing "foodb.foocollection.month.day.year" if they do they need to use an array.
if (ns instanceof Array){
database = ns[0];
collection = ns[1];
}
else{
tNS = ns.split(".");
if (tNS.length > 2){
print('ERROR: NS had more than 1 period in it, please pass as an [ "dbname","coll.name.with.dots"] !');
return false;
}
database = tNS[0];
collection = tNS[1];
}
return {database: database,collection: collection};
}
function insertFromCollection( sourceNS, destNS, query, batchSize, pauseMS){
//Parse and check namespaces
srcNS = parseNS(sourceNS);
destNS = parseNS(destNS);
if ( srcNS == false || destNS == false){return false;}
batchBucket = new Array();
totalToProcess = db.getDB(srcNS.database).getCollection(srcNS.collection).find(query,{_id:1}).count();
currentCount = 0;
print("Processed "+currentCount+"/"+totalToProcess+"...");
db.getDB(srcNS.database).getCollection(srcNS.collection).find(query).addOption(DBQuery.Option.noTimeout).forEach(function(doc){
batchBucket.push(doc);
if ( batchBucket.length > batchSize){
db.getDB(destNS.database).getCollection(destNS.collection)insert(batchBucket);
currentCount += batchBucket.length;
batchBucket = [];
sleep (pauseMS);
print("Processed "+currentCount+"/"+totalToProcess+"...");
}
}
print("Completed");
}
/** Example Usage:
insertFromCollection("foo.bar","foo2.bar",{"type":"archive"},1000,20);
You could obviously add a db.getSiblingDB(srcNS.database).getCollection(srcNS.collection).remove(query,true)
If you wanted to also remove the records after they are copied to the new location. The code can easily be built like that to make it restartable.
I had 2297 collection for 15 million of documents but some collection was empty.
Using only copyTo the script failed, but with this script optimization:
db.getCollectionNames().forEach(function(collname) {
var c = db.getCollection(collname).count();
if(c!==0){
db.getCollection(collname).copyTo('master-collection');
print('Copied collection ' + collname);
}
});
all works fine for me.
NB: copyTo is deprecated because it block the read/write operation: so I think is fine if you know that the database is not usable during this operation.
In my case for each didn't work. So I had to make some changes.
var kittySchema = new mongoose.Schema({
name: String
});
var Kitten = mongoose.model('Kitten', kittySchema);
var catSchema = new mongoose.Schema({
name: String
});
var Cat = mongoose.model('Cat', catSchema);
This is Model for both the collection
`function Recursion(){
Kitten.findOne().lean().exec(function(error, results){
if(!error){
var objectResponse = results;
var RequiredId = objectResponse._id;
delete objectResponse._id;
var swap = new Cat(objectResponse);
swap.save(function (err) {
if (err) {
return err;
}
else {
console.log("SUCCESSFULL");
Kitten.deleteOne({ _id: RequiredId }, function(err) {
if (!err) {
console.log('notification!');
}
else {
return err;
}
});
Recursion();
}
});
}
if (err) {
console.log("No object found");
// return err;
}
})
}`
I planned to arhieve 1000 records at a time using bulkinsert and bulkdelete methods of pymongo.
For both source and target
create mongodb objects to connect to the database.
instantiate the bulk objects. Note: I created a backup of bulk objects too. This will help me to rollback the insertion or removal when an error occurs.
example:
For source
// replace this with mongodb object creation logic
source_db_obj = db_help.create_db_obj(source_db, source_col)
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
For target
// replace this with mogodb object creation logic
target_db_obj = db_help.create_db_obj(target_db, target_col)
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
Obtain the source records that matches the filter criteria
source_find_results = source_db_obj.find(filter)
Loop through the source records
create target and source bulk operations
Append archived_at field with the current datetime to the target collection
//replace this with the logic to obtain the UTCtime.
doc['archived_at'] = db_help.getUTCTime()
target_bulk.insert(document)
source_bulk.remove(document)
for rollback in case of any errors or exceptions, create target_bulk_bak and source_bulk_bak operations.
target_bulk_bak.find({'_id':doc['_id']}).remove_one()
source_bulk_bak.insert(doc)
//remove the extra column
doc.pop('archieved_at', None)
When the record count to 1000, execute the target - bulk insertion and source - bulk removal. Note: this method takes target_bulk and source_bulk objects for execution.
execute_bulk_insert_remove(source_bulk, target_bulk)
When exception occurs, execute the target_bulk_bak removal and source_bulk_bak inesertions. This would rollback the changes. Since mongodb doesn't have rollback, I came up with this hack
execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)
Finally re-initialize the source and target bulk and bulk_bak objects. This is necessary because you can use them only once.
Complete code
def execute_bulk_insert_remove(source_bulk, target_bulk):
try:
target_bulk.execute()
source_bulk.execute()
except BulkWriteError as bwe:
raise Exception(
"could not archive document, reason: {}".format(bwe.details))
def archive_bulk_immediate(filter, source_db, source_col, target_db, target_col):
"""
filter: filter criteria for backup
source_db: source database name
source_col: source collection name
target_db: target database name
target_col: target collection name
"""
count = 0
bulk_count = 1000
source_db_obj = db_help.create_db_obj(source_db, source_col)
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
target_db_obj = db_help.create_db_obj(target_db, target_col)
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
source_find_results = source_db_obj.find(filter)
start = datetime.now()
for doc in source_find_results:
doc['archived_at'] = db_help.getUTCTime()
target_bulk.insert(doc)
source_bulk.find({'_id': doc['_id']}).remove_one()
target_bulk_bak.find({'_id': doc['_id']}).remove_one()
doc.pop('archieved_at', None)
source_bulk_bak.insert(doc)
count += 1
if count % 1000 == 0:
logger.info("count: {}".format(count))
try:
execute_bulk_insert_remove(source_bulk, target_bulk)
except BulkWriteError as bwe:
execute_bulk_insert_remove(source_bulk_bak, target_bulk_bak)
logger.info("Bulk Write Error: {}".format(bwe.details))
raise
source_bulk = source_db_obj.initialize_ordered_bulk_op()
source_bulk_bak = source_db_obj.initialize_ordered_bulk_op()
target_bulk = target_db_obj.initialize_ordered_bulk_op()
target_bulk_bak = target_db_obj.initialize_ordered_bulk_op()
end = datetime.now()
logger.info("archived {} documents to {} in ms.".format(
count, target_col, (end - start)))