Looking for help with reading from MongoDB in Node.JS - mongodb

I have a number of records stored in a MongoDB I'm trying to output them to the browser window by way of a Node.JS http server. I think I'm a good portion of the way along but I'm missing a few little things that are keeping it from actually working.
The code below uses node-mongo-native to connect to the database.
If there is anyone around who can help me make those last few connections with working in node I'd really appreciate it. To be fair, I'm sure this is just the start.
var sys = require("sys");
var test = require("assert");
var http = require('http');
var Db = require('../lib/mongodb').Db,
Connection = require('../lib/mongodb').Connection,
Server = require('../lib/mongodb').Server,
//BSON = require('../lib/mongodb').BSONPure;
BSON = require('../lib/mongodb').BSONNative;
var host = process.env['MONGO_NODE_DRIVER_HOST'] != null ? process.env['MONGO_NODE_DRIVER_HOST'] : 'localhost';
var port = process.env['MONGO_NODE_DRIVER_PORT'] != null ? process.env['MONGO_NODE_DRIVER_PORT'] : Connection.DEFAULT_PORT;
sys.puts("Connecting to " + host + ":" + port);
function PutItem(err, item){
var result = "";
if(item != null) {
for (key in item) {
result += key + '=' + item[key];
}
}
// sys.puts(sys.inspect(item)) // debug output
return result;
}
function ReadTest(){
var db = new Db('mydb', new Server(host, port, {}), {native_parser:true});
var result = "";
db.open(function (err, db) {
db.collection('test', function(err, collection) {
collection.find(function (err, cursor){
cursor.each( function (err, item) {
result += PutItem(err, item);
});
});
});
});
return result;
}
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end("foo"+ReadTest());
}).listen(8124);
console.log('Server running on 8124');
Sources:
- mongo connectivity code:
https://github.com/christkv/node-mongodb-native/blob/master/examples/simple.js
- node. http code: nodejs.org
EDIT CORRECTED CODE
Thanks to Mic below who got me rolling in the right direction. For anyone interested, the corrected solution is here:
function ReadTest(res){
var db = new Db('mydb', new Server(host, port, {}), {native_parser:true});
var result = "";
res.write("in readtest\n");
db.open(function (err, db) {
res.write("now open\n");
db.collection('test', function(err, collection) {
res.write("in collection\n");
collection.find(function (err, cursor){
res.write("found\n");
cursor.each( function (err, item) {
res.write("now open\n");
var x = PutItem(err, item);
sys.puts(x);
res.write(x);
if (item == null) {
res.end('foo');
}
});
});
});
});
}
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.write("start\n");
ReadTest(res);
}).listen(8124);
console.log('Server running on 8124');

My guess is that you are returning result, writing the response, and closing the connection before anything is fetched from the db.
One solution would be to pass the response object to where you actually need it, something like:
function readTest(res) {
db.open(function (err, db) {
db.collection('test', function(err, collection) {
collection.find(function (err, cursor) {
res.writeHead(200, {'Content-type' : 'text/plain'});
cursor.each( function (err, item) { res.write(item); });
res.end();
...
Of course, you should also handle errors and try to avoid nesting too many levels, but that's a different discussion.

Instead of writing all the low-level Mongodb access code, you might want to try a simple library like mongous so that you can focus on your data, not on MongoDB quirks.

You might want to try mongoskin too.

Reading documents
To apply specific value filters, we can pass specific values to the find() command. Here is a SQL query:
SELECT * FROM Table1 WHERE name = 'ABC'
which is equivalent to the following in MongoDB (notice Collection1 for Table1):
db.Collection1.find({name: 'ABC'})
We can chain count() to get the number of results, pretty() to get a readable result. The results can be further narrowed by adding additional parameters:
db.Collection1.find({name: 'ABC', rollNo: 5})
It's important to notice that these filters are ANDed together, by default. To apply an OR filter, we need to use $or. These filters will be specified depending upon the structure of the document. Ex: for object attribute name for an object school, we need to specify filter like "school.name" = 'AUHS'
We're using here the DOT notation, by trying to access a nested field name of a field school. Also notice that the filters are quoted, without which we'll get syntax errors.
Equality matches on arrays can be performed:
on the entire arrays
based on any element
based on a specific element
more complex matches using operators
In the below query:
db.Collection1.find({name: ['ABC','XYZ']})
MongoDB is going to identify documents by an exact match to an array of one or more values. Now for these types of queries, the order of elements matters, meaning that we will only match documents that have ABC followed by XYZ and those are the only 2 elements of the array name
{name:["ABC","GHI","XYZ"]},
{name:["DEF","ABC","XYZ"]}
In the above document, let's say that we need to get all the documnts where ABC is the first element. So, we'll use the below filter:
db.Schools.find({'name.0': 'ABC' })

Related

Unable to enter data in mongo database in express

router.get('/wiki/:topicname', function(req, res, next) {
var topicname = req.params.topicname;
console.log(topicname);
summary.wikitext(topicname, function(err, result) {
if (err) {
return res.send(err);
}
if (!result) {
return res.send('No article found');
}
$ = cheerio.load(result);
var db = req.db;
var collection = db.get('try1');
collection.insert({ "topicname" : topicname, "content": result }, function (err, doc){
if (err) {
// If it failed, return error
res.send("There was a problem adding the information to the database.");
}
else {
// And forward to success page
res.send("Added succesfully");
}
});
});
Using this code, I am trying to add the fetched content from Wikipedia in to the collection try1. The message "Added succesfully" is displayed. But the collection seems to be empty. The data is not inserted in the database
The data must be there, mongodb has { w: 1, j: true } write concern options by default so its only returns without an error if the document is truly inserted if there were any document to insert.
Things you should consider:
-Do NOT use insert function, its depricated use insertOne, insertMany or bulkWrite. ref.: http://mongodb.github.io/node-mongodb-native/2.1/api/Collection.html#insert
-The insert methods callback has two parameters. Error if there was an error, and result. The result object has several properties with could be used for after insert result testing like: result.insertedCount will return the number of inserted documents.
So according to these in your code you only test for error but you can insert zero documents without an error.
Also its not clear to me where do you get your database name from. Is the following correct in your code? Are you sure you are connected to the database you want to use?
var db = req.db;
Also you don't have to enclose your property names with " in your insert method. The insert should look something like this:
col.insertOne({topicname : topicname, content: result}, function(err, r) {
if (err){
console.log(err);
} else {
console.log(r.insertedCount);
}
});
Start your mongod server in a correct path,i.e, same path as that of what you are using to check the contents of collection.
sudo mongod --dbpath <actual-path>

Concurrent writes in mongodb

When I am inserting/updating a document in a collection, is the lock applied on the database or the collection. Suppose I have two collections and they are independant of each other in the same database and wants to do write operations on them concurrently. Is this possible?
Here is the code I am using to test this:
var assert = require('assert'),
MongoClient = require('mongodb').MongoClient,
async = require('async');
var station_list = require('./station_list.json'),
trains_list = require('./trains_list.json');
var stationList = [],
trainsList = [];
var MONGO_URL = 'mongodb://localhost:27017/test';
for(var i=0; i<station_list.stations.length; i++)
stationList.push(station_list.stations[i].station_code);
for(var i=0; i<trains_list.trains.length; i++)
trainsList.push(trains_list.trains[i].code);
console.log('trains : ' + trainsList.length + ' stations : ' + stationList.length);
populateTrains();
populateStations();
function populateTrains() {
async.eachSeries(trainsList, populateTrainDb, function (err) {
assert.equal(null, err);
});
}
function populateTrainDb(code, callback) {
MongoClient.connect(MONGO_URL, function (err, db) {
assert.equal(null, err);
var jsonData = {};
jsonData.code = code;
db.collection('trainsCon').replaceOne(
{'code' : code}, jsonData, {upsert: true, w:1}, function (err, res) {
assert.equal(null, err);
db.close();
callback();
});
});
}
function populateStations() {
async.eachSeries(stationList, populateStationDb, function (err) {
assert.equal(null, err);
});
}
function populateStationDb(code, callback) {
MongoClient.connect(MONGO_URL, function (err, db) {
assert.equal(null, err);
var jsonData = {};
jsonData.code = code;
db.collection('stationsCon').replaceOne(
{'code' : code}, jsonData, {upsert:true, w:1}, function (err, res) {
assert.equal(null, err);
db.close();
callback();
});
});
}
The two json files : station_list.json and trains_list.json have around 5000 entries. So after running the given program I get this error after a while :
C:\Users\Adnaan\Desktop\hopSmart\node_modules\mongodb\lib\server.js:242
process.nextTick(function() { throw err; })
^
AssertionError: null == { [MongoError: connect EADDRINUSE 127.0.0.1:27017]
name: 'MongoError',
message: 'connect EADDRINUSE 127.0.0.1:27017' }
at C:\Users\Adnaan\Desktop\hopSmart\testing.js:52:10
at C:\Users\Adnaan\Desktop\hopSmart\node_modules\mongodb\lib\mongo_client.js:276:20
at C:\Users\Adnaan\Desktop\hopSmart\node_modules\mongodb\lib\db.js:224:14
at null.<anonymous> (C:\Users\Adnaan\Desktop\hopSmart\node_modules\mongodb\lib\server.js:240:9)
at g (events.js:273:16)
at emitTwo (events.js:100:13)
at emit (events.js:185:7)
at null.<anonymous> (C:\Users\Adnaan\Desktop\hopSmart\node_modules\mongodb-core\lib\topologies\server.js:301:68)
at emitTwo (events.js:100:13)
at emit (events.js:185:7)
When I check the number of entries entered the database, around 4000 entries had already been entered in both the collections. So what I get from the above experiment was that an error might have occured when one write was being attempted while inside other collection a document must have been getting written.
So how should I proceed to have this concurrency without conflicting locks.
The answer to this question can be quite long and depends on various factors (MongoDB version, storage engine, type of operations you are doing, sharding, etc.). I can only recommend you to read carefully the Concurrency section of the MongoDB documentation, and in particular the lock granularity part.
Make sure to choose the right version of MongoDB first as the behaviour varies greatly from one version to another (e.g. database locking in pre-3.0 vs. collection locking for most operations in post-3.0 using NMAPv1).
I don't think it's concurrency issue with MongoDB, but I could be driver or even with test itself.
I have created a sample application couple of weeks ago to stress test MongoDB while working on a nasty bug. I used C# and MongoDB 3.0 on Windows 10. I have inserted million of documents in multithreaded environment but couldn't crash MongoDB.
Parallel.For(0, 10000, (x =>
{
var lstDocs = new List<BsonDocument>();
for (var i = 0; i < 100; i++)
{
lstDocs.Add(new BsonDocument(doc));
}
collection.InsertMany(lstDocs);
lstDocs.Clear();
}));
You can find code in gist here.
You should not be calling MongoClient.connect every time. That's causing a ton of connections to open and close all the time which is overloading mongo. You should let the MongoClient manage the connection pool. Change it so that you store the db object from MongoClient.connect. Something like this:
var db
MongoClient.connect(url, function(err, database){
db = database;
}

Compare two fields in Waterline/Sails.js query

I want to compare two fields in my Waterline query in Sails.js application, e.g.: SELECT * FROM entity E WHERE E.current < E.max.
I've tried the following code, but it's expecting integer value to be passed to it instead of column name:
Entity.find({
where: {
current: {'<': 'max'}
}
});
So, how do I compare two columns?
I have ran some tests and at the same time read the Waterline documentation. There is no indication of anything that could possibly do comparison of two fields/columns via .find() or .where() methods. Reference: http://sailsjs.org/documentation/concepts/models-and-orm/query-language
Instead, I have used .query() method to compare two fields via SQL string such as :
Entity.query("SELECT * FROM `Entity` E WHERE E.current < E.max", function( err, res) {
if(err) {
//exception
} else {
console.log('response',res);
}
});
The other way would be to use one query to get the max before putting it in the criteria.
EntityOne.find({
sort: 'column DESC'
}).limit(1)
.exec(function(err,found){
var max = found[0]
EntityTwo.find({
where: {
current: {'<': found}
}
}).exec((err,found) {
// Do stuff here
});
});
The query method is ultimately going to be faster however

Average Aggregation Queries in Meteor

Ok, still in my toy app, I want to find out the average mileage on a group of car owners' odometers. This is pretty easy on the client but doesn't scale. Right? But on the server, I don't exactly see how to accomplish it.
Questions:
How do you implement something on the server then use it on the client?
How do you use the $avg aggregation function of mongo to leverage its optimized aggregation function?
Or alternatively to (2) how do you do a map/reduce on the server and make it available to the client?
The suggestion by #HubertOG was to use Meteor.call, which makes sense and I did this:
# Client side
Template.mileage.average_miles = ->
answer = null
Meteor.call "average_mileage", (error, result) ->
console.log "got average mileage result #{result}"
answer = result
console.log "but wait, answer = #{answer}"
answer
# Server side
Meteor.methods average_mileage: ->
console.log "server mileage called"
total = count = 0
r = Mileage.find({}).forEach (mileage) ->
total += mileage.mileage
count += 1
console.log "server about to return #{total / count}"
total / count
That would seem to work fine, but it doesn't because as near as I can tell Meteor.call is an asynchronous call and answer will always be a null return. Handling stuff on the server seems like a common enough use case that I must have just overlooked something. What would that be?
Thanks!
As of Meteor 0.6.5, the collection API doesn't support aggregation queries yet because there's no (straightforward) way to do live updates on them. However, you can still write them yourself, and make them available in a Meteor.publish, although the result will be static. In my opinion, doing it this way is still preferable because you can merge multiple aggregations and use the client-side collection API.
Meteor.publish("someAggregation", function (args) {
var sub = this;
// This works for Meteor 0.6.5
var db = MongoInternals.defaultRemoteCollectionDriver().mongo.db;
// Your arguments to Mongo's aggregation. Make these however you want.
var pipeline = [
{ $match: doSomethingWith(args) },
{ $group: {
_id: whatWeAreGroupingWith(args),
count: { $sum: 1 }
}}
];
db.collection("server_collection_name").aggregate(
pipeline,
// Need to wrap the callback so it gets called in a Fiber.
Meteor.bindEnvironment(
function(err, result) {
// Add each of the results to the subscription.
_.each(result, function(e) {
// Generate a random disposable id for aggregated documents
sub.added("client_collection_name", Random.id(), {
key: e._id.somethingOfInterest,
count: e.count
});
});
sub.ready();
},
function(error) {
Meteor._debug( "Error doing aggregation: " + error);
}
)
);
});
The above is an example grouping/count aggregation. Some things of note:
When you do this, you'll naturally be doing an aggregation on server_collection_name and pushing the results to a different collection called client_collection_name.
This subscription isn't going to be live, and will probably be updated whenever the arguments change, so we use a really simple loop that just pushes all the results out.
The results of the aggregation don't have Mongo ObjectIDs, so we generate some arbitrary ones of our own.
The callback to the aggregation needs to be wrapped in a Fiber. I use Meteor.bindEnvironment here but one can also use a Future for more low-level control.
If you start combining the results of publications like these, you'll need to carefully consider how the randomly generated ids impact the merge box. However, a straightforward implementation of this is just a standard database query, except it is more convenient to use with Meteor APIs client-side.
TL;DR version: Almost anytime you are pushing data out from the server, a publish is preferable to a method.
For more information about different ways to do aggregation, check out this post.
I did this with the 'aggregate' method. (ver 0.7.x)
if(Meteor.isServer){
Future = Npm.require('fibers/future');
Meteor.methods({
'aggregate' : function(param){
var fut = new Future();
MongoInternals.defaultRemoteCollectionDriver().mongo._getCollection(param.collection).aggregate(param.pipe,function(err, result){
fut.return(result);
});
return fut.wait();
}
,'test':function(param){
var _param = {
pipe : [
{ $unwind:'$data' },
{ $match:{
'data.y':"2031",
'data.m':'01',
'data.d':'01'
}},
{ $project : {
'_id':0
,'project_id' : "$project_id"
,'idx' : "$data.idx"
,'y' : '$data.y'
,'m' : '$data.m'
,'d' : '$data.d'
}}
],
collection:"yourCollection"
}
Meteor.call('aggregate',_param);
}
});
}
If you want reactivity, use Meteor.publish instead of Meteor.call. There's an example in the docs where they publish the number of messages in a given room (just above the documentation for this.userId), you should be able to do something similar.
You can use Meteor.methods for that.
// server
Meteor.methods({
average: function() {
...
return something;
},
});
// client
var _avg = { /* Create an object to store value and dependency */
dep: new Deps.Dependency();
};
Template.mileage.rendered = function() {
_avg.init = true;
};
Template.mileage.averageMiles = function() {
_avg.dep.depend(); /* Make the function rerun when _avg.dep is touched */
if(_avg.init) { /* Fetch the value from the server if not yet done */
_avg.init = false;
Meteor.call('average', function(error, result) {
_avg.val = result;
_avg.dep.changed(); /* Rerun the helper */
});
}
return _avg.val;
});

Is there a way to perform a "dry run" of an update operation?

I am in the process of changing the schema for one of my MongoDB collections. (I had been storing dates as strings, and now my application stores them as ISODates; I need to go back and change all of the old records to use ISODates as well.) I think I know how to do this using an update, but since this operation will affect tens of thousands of records I'm hesitant to issue an operation that I'm not 100% sure will work. Is there any way to do a "dry run" of an update that will show me, for a small number of records, the original record and how it would be changed?
Edit: I ended up using the approach of adding a new field to each record, and then (after verifying that the data was right) renaming that field to match the original. It looked like this:
db.events.find({timestamp: {$type: 2}})
.forEach( function (e) {
e.newTimestamp = new ISODate(e.timestamp);
db.events.save(e);
} )
db.events.update({},
{$rename: {'newTimestamp': 'timestamp'}},
{multi: true})
By the way, that method for converting the string times to ISODates was what ended up working. (I got the idea from this SO answer.)
My advice would be to add the ISODate as a new field. Once confirmed that all looks good you could then unset the the string date.
Create a test environment with your database structure. Copy a handful of records to it. Problem solved. Not the solution you were looking for, I'm sure. But, I believe, this is the exact circumstances that a 'test environment' should be used for.
Select ID of particular records that you would like to monitor. place in the update {_id:{$in:[<your monitored id>]}}
Another option which depends of the amount of overhead it will cause you -
You can consider writing a script, that performs the find operation, add printouts or run in debug while the save operation is commented out. Once you've gained confidence you can apply the save operation.
var changesLog = [];
var errorsLog = [];
events.find({timestamp: {$type: 2}}, function (err, events) {
if (err) {
debugger;
throw err;
} else {
for (var i = 0; i < events.length; i++) {
console.log('events' + i +"/"+(candidates.length-1));
var currentEvent = events[i];
var shouldUpdateCandidateData = false;
currentEvent.timestamp = new ISODate(currentEvent.timestamp);
var change = currentEvent._id;
changesLog.push(change);
// // ** Dry Run **
// currentEvent.save(function (err) {
// if (err) {
// debugger;
// errorsLog.push(currentEvent._id + ", " + currentEvent.timeStamp + ', ' + err);
// throw err;
// }
// });
}
console.log('Done');
console.log('Changes:');
console.log(changesLog);
console.log('Errors:');
console.log(errorsLog);
return;
}
});
db.collection.find({"_manager": { $exists: true, $ne: null }}).forEach(
function(doc){
doc['_managers']=[doc._manager]; // String --> List
delete doc['_manager']; // Remove "_managers" key-value pair
printjson(doc); // Debug by output the doc result
//db.teams.save(doc); // Save all the changes into doc data
}
)
In my case the collection contain _manager and I would like to change it to _managers list. I have tested it in my local working as expected.
In the several latest versions of MongoDB (at least starting with 4.2), you could do that using a transaction.
const { MongoClient } = require('mongodb')
async function main({ dryRun }) {
const client = new MongoClient('mongodb://127.0.0.1:27017', {
maxPoolSize: 1
})
const pool = await client.connect()
const db = pool.db('someDB')
const session = pool.startSession()
session.startTransaction()
try {
const filter = { id: 'some-id' }
const update = { $rename: { 'newTimestamp': 'timestamp' } }
// This is the important bit
const options = { session: session }
await db.collection('someCollection').updateMany(
filter,
update,
options // using session
)
const afterUpdate = db.collection('someCollection')
.find(
filter,
options // using session
)
.toArray()
console.debug('updated documents', afterUpdate)
if (dryRun) {
// This will roll back any changes made within the session
await session.abortTransaction()
} else {
await session.commitTransaction()
}
} finally {
await session.endSession()
await pool.close()
}
}
const _ = main({ dryRun: true })