mondgo db does not save with large documents - mongodb

I'm iterating over a an object with 10000 key/val pairs and creating 'gameItem' docs and adding them to my 'items' field as a hash object and then saving them to my mongo collection like so.
Here's my main collection
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
var myItemDbSchema = new Schema({
db_num: Number,
last_updated: Number,
items: {type: {}, default: {}}
}, { minimize: false });
module.exports = mongoose.model('myItemDb', myItemDbSchema);
and then my logic
PartnerAPI.getItems(function(err, partnerItems){
myItemDb.findOne({"db_num": 1}, function(myItemDB){
for(var partnerItem in partnerItems){
if(!myItemDB.items[partnerItem]){
var newItem = new gameItem({
name: partnerItems[partnerItem].name
});
myItemDB.items[partnerItem] = newItem;
}
myItemDB.markModified('items');
myItemDB.save(function(err){
logger.info("Saved");
});
}
});
});
partner items looks like this
{
'ballistic-weapon-1':{name:'bl1', value:'rare'}
},
{
'ballistic-weapon-2':{name:'bl2', value:'rare'}
}
This works fine if I only add no more than 300 to the items field and then mark items as modified and then save... but if I try to add all 10,000 and try to save - they don't. Is there a size limit for how many docs I can add to a field with a type {}?
Is there a better way to save these docs without having to write some logic to limit how many saves I do each iteration?

There is 16mb document limit. So if your document (BSON) is larger than 16 mb, mongodb throws exception.
docs: https://docs.mongodb.com/manual/reference/limits/

Related

How to avoid duplicates while looping through a sorted mongodb cursor

I try to avoid having duplicates returned by a mongodb cursor when write operations occurs while looping through documents.
The following code will loop for ever as we modifiy the updatedAt date while reading the collection.
const mongoose = require('mongoose');
mongoose.Promise = require('bluebird');
const Model = mongoose.model(
'Cat',
mongoose.Schema({ name: String }, { timestamps: true }).index({ updatedAt: 1 })
);
mongoose
.connect('mongodb://localhost/test', { useMongoClient: true })
.then(() => {
const docs = [];
for (var i = 0; i < 2000; i++) {
docs.push({ name: i });
}
return Model.insertMany(docs)
})
.then(() => {
const c = Model.find({}).sort({ updatedAt: 1 }).cursor();
return c.eachAsync((doc) => {
doc.markModified('name');
console.log(doc.name);
return doc.save();
});
});
I tried to use cursor.snapshot() and it works well but it cannot be used on a sorted query.
The only alternative I found is to send a count request first then apply a limit to the cursor without using snapshot at all. This seems to work as mongo seems to stack up modified document FIFO style.
The problem is that document can be inserted between the count and the cursor queries which leads to incomplete data returned by the cursor.
How can I loop through a cursor, using a sorted query without ending up with duplicates documents?

Performance on sorting by populated field using mongoose

I have learned that it is not possible to sort by populated field in mongodb during querying. Suppose I have a schema like below, and I have 1 million data in record. And i only need to return 10 records for each query, depending of the column sorting (asc/desc) and page defined. What are the effective solution to this problem?
Simplify problem:
In the front end, I will have a data table with column firstname, lastname, test.columnA and test.columnB. Each of this column is sortable by user.
My initial solution was to query everything out in mongoose, flattening it to json and using javascript to reorder and finally response the final 10 data only. But this will have bad performance impact with increasing data set.
var testSchema = {
columnA: { type: String },
columnB: { type: String },
}
var UserSchema = {
firstname: { type: string },
lastname: { type: string },
test: {
type: ObjectId,
ref: 'Test'
}
}

Mongoose: Populate a populated field

I'm using MongoDB as a log keeper for my app to then sync mobile clients. I have this models set up in NodeJS:
var UserArticle = new Schema({
date: { type: Number, default: Math.round((new Date()).getTime() / 1000) }, //Timestamp!
user: [{type: Schema.ObjectId, ref: "User"}],
article: [{type: Schema.ObjectId, ref: "Article"}],
place: Number,
read: Number,
starred: Number,
source: String
});
mongoose.model("UserArticle",UserArticle);
var Log = new Schema({
user: [{type: Schema.ObjectId, ref: "User"}],
action: Number, // O => Insert, 1 => Update, 2 => Delete
uarticle: [{type: Schema.ObjectId, ref: "UserArticle"}],
timestamp: { type: Number, default: Math.round((new Date()).getTime() / 1000) }
});
mongoose.model("Log",Log);
When I want to retrive the log I use the follwing code:
var log = mongoose.model('Log');
log
.where("user", req.session.user)
.desc("timestamp")
.populate("uarticle")
.populate("uarticle.article")
.run(function (err, articles) {
if (err) {
console.log(err);
res.send(500);
return;
}
res.json(articles);
As you can see, I want mongoose to populate the "uarticle" field from the Log collection and, then, I want to populate the "article" field of the UserArticle ("uarticle").
But, using this code, Mongoose only populates "uarticle" using the UserArticle Model, but not the article field inside of uarticle.
Is it possible to accomplish it using Mongoose and populate() or I should do something else?
Thank you,
From what I've checked in the documentation and from what I hear from you, this cannot be achieved, but you can populate the "uarticle.article" documents yourself in the callback function.
However I want to point out another aspect which I consider more important. You have documents in collection A which reference collection B, and in collection B's documents you have another reference to documents in collection C.
You are either doing this wrong (I'm referring to the database structure), or you should be using a relational database such as MySQL here. MongoDB's power relies in the fact you can embed more information in documents, thus having to make lesser queries (having your data in a single collection). While referencing something is ok, having a reference and then another reference doesn't seem like you're taking the full advantage of MongoDB here.
Perhaps you would like to share your situation and the database structure so we could help you out more.
You can use the mongoose-deep-populate plugin to do this. Usage:
User.find({}, function (err, users) {
User.deepPopulate(users, 'uarticle.article', function (err, users) {
// now each user document includes uarticle and each uarticle includes article
})
})
Disclaimer: I'm the author of the plugin.
I faced the same problem,but after hours of efforts i find the solution.It can be without using any external plugin:)
applicantListToExport: function (query, callback) {
this
.find(query).select({'advtId': 0})
.populate({
path: 'influId',
model: 'influencer',
select: { '_id': 1,'user':1},
populate: {
path: 'userid',
model: 'User'
}
})
.populate('campaignId',{'campaignTitle':1})
.exec(callback);
}
Mongoose v5.5.5 seems to allow populate on a populated document.
You can even provide an array of multiple fields to populate on the populated document
var batch = await mstsBatchModel.findOne({_id: req.body.batchId})
.populate({path: 'loggedInUser', select: 'fname lname', model: 'userModel'})
.populate({path: 'invoiceIdArray', model: 'invoiceModel',
populate: [
{path: 'updatedBy', select: 'fname lname', model: 'userModel'},
{path: 'createdBy', select: 'fname lname', model: 'userModel'},
{path: 'aircraftId', select: 'tailNum', model: 'aircraftModel'}
]});
how about something like:
populate_deep = function(type, instance, complete, seen)
{
if (!seen)
seen = {};
if (seen[instance._id])
{
complete();
return;
}
seen[instance._id] = true;
// use meta util to get all "references" from the schema
var refs = meta.get_references(meta.schema(type));
if (!refs)
{
complete();
return;
}
var opts = [];
for (var i=0; i<refs.length; i++)
opts.push({path: refs[i].name, model: refs[i].ref});
mongoose.model(type).populate(instance, opts, function(err,o){
utils.forEach(refs, function (ref, next) {
if (ref.is_array)
utils.forEach(o[ref.name], function (v, lnext) {
populate_deep(ref.ref_type, v, lnext, seen);
}, next);
else
populate_deep(ref.ref_type, o[ref.name], next, seen);
}, complete);
});
}
meta utils is rough... want the src?
or you can simply pass an obj to the populate as:
const myFilterObj = {};
const populateObj = {
path: "parentFileds",
populate: {
path: "childFileds",
select: "childFiledsToSelect"
},
select: "parentFiledsToSelect"
};
Model.find(myFilterObj)
.populate(populateObj).exec((err, data) => console.log(data) );

Is it possible to extract only embedded documents from a Model in Mongoose?

I'd like to run a query on a Model, but only return embedded documents where the query matches. Consider the following...
var EventSchema = new mongoose.Schema({
typ : { type: String },
meta : { type: String }
});
var DaySchema = new mongoose.Schema({
uid: mongoose.Schema.ObjectId,
events: [EventSchema],
dateR: { type: Date, 'default': Date.now }
});
function getem() {
DayModel.find({events.typ : 'magic'}, function(err, days) {
// magic. ideally this would return a list of events rather then days
});
}
That find operation will return a list of DayModel documents. But what I'd really like is a list of EventSchemas alone. Is this possible?
It's not possible to fetch the Event objects directly, but you can restrict which fields your query returns like this:
DayModel.find({events.typ : 'magic'}, ['events'], function(err, days) {
...
});
You will still need to loop through the results to extract the actual embedded fields from the documents returned by the query, however.

How to access a preexisting collection with Mongoose?

I have a large collection of 300 question objects in a database test. I can interact with this collection easily through MongoDB's interactive shell; however, when I try to get the collection through Mongoose in an express.js application I get an empty array.
My question is, how can I access this already existing dataset instead of recreating it in express? Here's some code:
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/test');
mongoose.model('question', new Schema({ url: String, text: String, id: Number }));
var questions = mongoose.model('question');
questions.find({}, function(err, data) { console.log(err, data, data.length); });
This outputs:
null [] 0
Mongoose added the ability to specify the collection name under the schema, or as the third argument when declaring the model. Otherwise it will use the pluralized version given by the name you map to the model.
Try something like the following, either schema-mapped:
new Schema({ url: String, text: String, id: Number},
{ collection : 'question' }); // collection name
or model mapped:
mongoose.model('Question',
new Schema({ url: String, text: String, id: Number}),
'question'); // collection name
Here's an abstraction of Will Nathan's answer if anyone just wants an easy copy-paste add-in function:
function find (name, query, cb) {
mongoose.connection.db.collection(name, function (err, collection) {
collection.find(query).toArray(cb);
});
}
simply do find(collection_name, query, callback); to be given the result.
for example, if I have a document { a : 1 } in a collection 'foo' and I want to list its properties, I do this:
find('foo', {a : 1}, function (err, docs) {
console.dir(docs);
});
//output: [ { _id: 4e22118fb83406f66a159da5, a: 1 } ]
You can do something like this, than you you'll access the native mongodb functions inside mongoose:
var mongoose = require("mongoose");
mongoose.connect('mongodb://localhost/local');
var connection = mongoose.connection;
connection.on('error', console.error.bind(console, 'connection error:'));
connection.once('open', function () {
connection.db.collection("YourCollectionName", function(err, collection){
collection.find({}).toArray(function(err, data){
console.log(data); // it will print your collection data
})
});
});
Update 2022
If you get an MongoInvalidArgumentError: The callback form of this helper has been removed. error message, here's the new syntax using async/await:
const mongoose = require("mongoose");
mongoose.connect('mongodb://localhost/productsDB');
const connection = mongoose.connection;
connection.on('error', console.error.bind(console, 'connection error:'));
connection.once('open', async function () {
const collection = connection.db.collection("Products");
collection.find({}).toArray(function(err, data){
console.log(data); // it will print your collection data
});
});
I had the same problem and was able to run a schema-less query using an existing Mongoose connection with the code below. I've added a simple constraint 'a=b' to show where you would add such a constraint:
var action = function (err, collection) {
// Locate all the entries using find
collection.find({'a':'b'}).toArray(function(err, results) {
/* whatever you want to do with the results in node such as the following
res.render('home', {
'title': 'MyTitle',
'data': results
});
*/
});
};
mongoose.connection.db.collection('question', action);
Are you sure you've connected to the db? (I ask because I don't see a port specified)
try:
mongoose.connection.on("open", function(){
console.log("mongodb is connected!!");
});
Also, you can do a "show collections" in mongo shell to see the collections within your db - maybe try adding a record via mongoose and see where it ends up?
From the look of your connection string, you should see the record in the "test" db.
Hope it helps!
Something else that was not obvious, to me at least, was that the when using Mongoose's third parameter to avoid replacing the actual collection with a new one with the same name, the new Schema(...) is actually only a placeholder, and doesn't interfere with the exisitng schema so
var User = mongoose.model('User', new Schema({ url: String, text: String, id: Number}, { collection : 'users' })); // collection name;
User.find({}, function(err, data) { console.log(err, data, data.length);});
works fine and returns all fields - even if the actual (remote) Schema contains none of these fields. Mongoose will still want it as new Schema(...), and a variable almost certainly won't hack it.
Go to MongoDB website, Login > Connect > Connect Application > Copy > Paste in 'database_url' > Collections > Copy/Paste in 'collection' .
var mongoose = require("mongoose");
mongoose.connect(' database_url ');
var conn = mongoose.connection;
conn.on('error', console.error.bind(console, 'connection error:'));
conn.once('open', function () {
conn.db.collection(" collection ", function(err, collection){
collection.find({}).toArray(function(err, data){
console.log(data); // data printed in console
})
});
});
I tried all the answers but nothing worked out, finally got the answer hoe to do it.
var mongoose = require('mongoose');
mongoose.connect('mongodb://0.0.0.0:27017/local');
// let model = require('./test1');
setTimeout(async () => {
let coll = mongoose.connection.db.collection(<Your collection name in plural form>);
// let data = await coll.find({}, {limit:2}).toArray();
// let data = await coll.find({name:"Vishal"}, {limit:2}).toArray();
// let data = await coll.find({name:"Vishal"}, {projection:{player:1, _id:0}}).toArray();
let data = await coll.find({}, {limit:3, sort:{name:-1}}).toArray();
console.log(data);
}, 2000);
I have also mentioned some of the criteria to filter out. Delete and update can also be done by this.
Thanks.
Make sure you're connecting to the right database as well as the right collection within the database.
You can include the name of the database in the connection string.
notice databasename in the following connection string:
var mongoose = require('mongoose');
const connectionString = 'mongodb+srv://username:password#hosturl.net/databasename';
mongoose.connect(connectionString);