Better Alternative to MongoDB db.collection.group() - mongodb

I'm looking to group a collection of documents, then format them into something I can query dynamically, then query on the resulting collection. I've been able to group the collection and format it successfully using db.collection.group() but there are limitations when it comes to sharding that would prevent me from using it in production.
Here's that code:
var reduceFunction = function(current, result){
var provider = current.provider;
var collection = current.collection || provider;
result[provider] = result[provider] || {};
result[provider][collection] = result[provider][collection] || {};
result[provider][collection] = current.attributes;
}
var result = db.identity.group( {
key: { cookie: 1 },
cond: { organizationId: organization },
reduce: reduceFunction,
initial: {}
} );
Here's an example of the input documents:
{
"_id" : ObjectId("566c92712ae892aeb5278467"),
"provider" : "myProvider",
"collection": "myCollection",
"ip" : 111.111.111.111,
"cookie" : "12381902348asdf",
"updated" : ISODate("2015-12-12T21:32:33.824Z"),
"attributes" : {
company: "My Company"
}
}
Here's what my desired output would be (multiple input documents grouped by cookie)
{
"cookie" : "12381902348asdf",
"myProvider": {
"myCollection": {
"company": "myCompany"
}
},
"myOtherProvider": {
"myOtherCollection": {
"company": "myCompany"
}
}
}
so that way I could query using something like
collection.identity.find({"myProvider.myCollection.company": {$regex: /my/}})
This allows me to have conditions for multiple provider/collection/field groupings without having conflicts in the field names.

Related

Find records with field in a nested document when parent fields are not known

With a collection with documents like below, I need to find the documents where a particular field - eg. lev3_field2 (in document below) is present.
I tried the following, but this doesn't return any results, though the field lev3_field2 is present in some documents.
db.getCollection('some_collection').find({"lev3_field2": { $exists: true, $ne: null } })
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"lev1_field1" : "139521721",
"lev1_field2" : "276183",
"lev1_field3" : {
"lev2_field1" : "4",
"lev2_field2" : {
"lev3_field1" : "1",
"lev3_field2" : {
"lev4_field1" : "1",
"lev4_field2" : "1"
},
"lev3_field3" : "5"
},
"lev2_field3" : {
"lev3_field3" : "0",
"lev3_field4" : "0"
}
}
}
update1: this is an example, however in the real document it is not known what the parent fields are for the field to look for. So instead of lev3_field2 , I would be looking for `levM_fieldN'.
update2: Speed is not a primary concern for me, I can work with relatively a bit slower options as well, as the primary function is to find documents with the criteria discussed and once the document is found and the schema is understood, the query can be re-written for performance by including the parent keys.
To search a key in nested document you need to iterate the documents fields recursively, you can do this in JavaScript by the help of $where method in MongoDB
The below query will search if a key name exists in a documents and its subdocuments.
I have checked this with the example you have given, and it is working perfectly fine.
db.getCollection('test').find({ $where: function () {
var search_key = "lev3_field2";
function check_key(document) {
return Object.keys(document).some(function(key) {
if ( typeof(document[key]) == "object" ) {
if ( key == search_key ) {
return true;
} else {
return check_key(document[key]);
}
} else {
return ( key == search_key );
}
});
}
return check_key(this);
}}
);
There is no built-in function to iterate over document keys in MongoDB, but you can achieve this with MapReduce. The main advantage is that all the code is executed directly in the MongoDB database, and not in the js client, so there is no network overhead, hence it should be faster than client side js
here is the script :
var found;
// save a function in MongoDB to iterate over documents key and check for
// key name. Need to be done only once
db.system.js.save({
_id: 'findObjectByLabel',
value: function(obj, prop) {
Object.keys(obj).forEach(function(key) {
if (key === prop) {
found = true
}
if (!found && typeof obj[key] === 'object') {
findObjectByLabel(obj[key], prop)
}
})
}
})
// run the map reduce fonction
db.ex.mapReduce(
function() {
found = false;
var key = this._id
findObjectByLabel(this, 'lev3_field2')
value = found;
if (found) {
// if the document contains the key we are looking for,
// emit {_id: ..., value: true }
emit(key, value)
}
},
function(key, values) {
return values
}, {
'query': {},
'out': {inline:1}
}
)
this output ( run on 4 sample doc, with only one containing 'lev3_field2' )
{
"results" : [
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"value" : true
}
],
"timeMillis" : 18,
"counts" : {
"input" : 4,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
to run the script, copy it to a file name "script.js" for example, and then run from your shell
mongo databaseName < script.js
It's because you're trying to see if a nested field exists. This is the query you want:
db.some_collection.find({"lev1_field3.lev2_field2.lev3_field2": { $exists: true, $ne: null } })

how to query for exact mach in unknown number of subfields in mongodb

I have a collection where documents can have an unknown number of sub documents:
"agent_id": {
"0":"1234",
"1":"2234",...etc
How do I search for an exact match in all the agent_id sub-fields?
You need to dynamically create an object with properties that are a concatenation of the embedded document name agent_id with the dot (.) and the field name, enclosed in quotes, something like this:
var query = {
"agent_id.0": "78343",
"agent_id.1": "78343",
"agent_id.2": "78343",
"agent_id.3": "78343",
...
"agent_id.n": "78343"
}
One way to create the object is generate the sub-documents keys with mapReduce. The following demonstrates this approach. In the Map-Reduce operation, an array of keys in the agent_id subdocument is generated to an output collection "collection_keys" and then used to produce the find() query expression:
Suppose you populate a sample collection
db.collection.insert([
{
"agent_id": {
"0":"1234",
"1":"2234",
"56":"8451",
"74":"1475",
"10":"1234"
}
},
{
"agent_id": {
"5":"5874",
"18":"2351"
}
}
])
Running the following mapReduce operation
var mr = db.runCommand({
"mapreduce" : "collection",
"map" : function() {
for (var key in this.agent_id) { emit(key, null); }
},
"reduce" : function(key, stuff) {
return null
},
"out": "collection" + "_keys"
});
var query = { "$or": [] },
value = "1234";
db[mr.result].distinct("_id").forEach(function (key){
var obj = {};
obj["agent_id." + key] = value;
query["$or"].push(obj)
});
printjson(query);
will produce:
{
"$or" : [
{
"agent_id.0" : "1234"
},
{
"agent_id.1" : "1234"
},
{
"agent_id.10" : "1234"
},
{
"agent_id.18" : "1234"
},
{
"agent_id.5" : "1234"
},
{
"agent_id.56" : "1234"
},
{
"agent_id.74" : "1234"
}
]
})
You can then use the query document in your find() query:
db.collection.find(query)
which will produce the result:
/* 0 */
{
"_id" : ObjectId("561d5312cd05efc95a1ea1f4"),
"agent_id" : {
"0" : "1234",
"1" : "2234",
"56" : "8451",
"74" : "1475",
"10" : "1234"
}
}

Upserting on embedded document

I have the following document strucutre
{
"_id" : "NmBYYasdsa",
"objectId" : "asdsd"
"text" : "test",
....
"publishedAt" : ISODate("2015-05-28T15:31:51Z"),
"updatedAt" : ISODate("2015-05-28T15:31:51Z"),
"data" : {
.....
"likeCount" : 0,
"replyCount" : 0
}
}
That is use to synchronise my database with an external API. To do this, I poll the API once every minute and do a bulk upsert, matching on the object id to keep my database up to date.
Problem is that the data subdocument doesn't get updated when upserting, any ideas as to why?
My bulkwrite method
Mongo.Collection.prototype.upsertBulk = function(matcher, documents, options) {
if (_.isEmpty(documents)) { throw Error('Empty list of documents provided'); }
options = options || {};
var operations = documents.map(function(_document) {
_document._id = Random.id();
var operation = {
updateOne: {
filter: {},
update: { $set: _document },
upsert: true
},
};
operation['updateOne']['filter'][matcher] = _document[matcher];
return operation;
});
this.__mongoCollection(function(collection) {
collection.bulkWrite(operations, options, function(error, result) {
if (error) { throw error; }
return result;
});
});
};

How to do a find iterating in a array field

Hi i have a expressjs app using mongodb.
At first i find a tv by id on my "tvs" collection, i get it but now i want to find all user info from other collection "users".
This is my JSON for each collection:
tvs
{
"_id" : ObjectId("5203af83396d285ea2ecff8f"),
"brand" : "LG",
"comments" : [{
"user" : ObjectId("521dc636eda03d0f9cab3568"),
"text" : "Sold!"
}, {
"user" : ObjectId("521b2785eda03d0f9cab3566"),
"text" : "Nice TV"
}],
"model" : "47LS5600",
"price" : 499.0
}
users
{
"_id" : ObjectId("521b2785eda03d0f9cab3566"),
"name" : {
"first" : "Ruben",
"last" : "Montes"
}
}
And this is my code
var tvs = db.collection("tvs");
var users = db.collection("users");
exports.findById = function (req, res) {
var id = req.params.id;
tvs.findOne({'_id': new BSON.ObjectID(id)}, function (err, tv) {
users.find( { _id : tv.comments.user_id }).toArray(function (err, items) {
res.send( { tv: tv, users: items } );
});
})
}
I need to know how to iterate the comments array from tvs collection to get the the info user that post a comment
users.find( { _id : tv.comments.user_id })
You can do a bit more logic to efficiently grab the users as a batch using the $in operator.
var mongodb = require('mongodb')
, MongoClient = require('mongodb').MongoClient
, Server = require('mongodb').Server;
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
if (err) throw err;
var tvs = db.collection('tvs');
var users = db.collection('users');
var userNames = {};
var tvId = new mongodb.ObjectID("5203af83396d285ea2ecff8f"); // hard-code
// find a TV
tvs.findOne({ _id : tvId }, function (err, tv) {
var allUserIds = [];
if (tv && tv.comments) {
// build a list of all user IDs used in comments
// this doesn't filter duplicates right now
allUserIds = tv.comments.map(function (comment) {
return comment.user;
});
}
// using the list of UserIds, grab all of them ...,
// and just return the name
users.find({_id: { $in: allUserIds }}, { name: 1 })
.toArray(function (err, users_list) {
// if we got some
if (users_list && users_list.length > 0) {
for(var i= 0, len = users_list.length; i < len ; i++ ) {
userNames[users_list[i]._id] = users_list[i].name;
}
console.log("All comments ========");
// now all the usernames are indexed in userNames by Id
for(var i= 0, len = tv.comments.length; i < len ; i++ ) {
// swap id for name
tv.comments[i].user = userNames[tv.comments[i].user];
console.log(tv.comments[i]);
}
db.close(); // done with everything for this demo
}
});
});
});
I've used find and $in with an array of all userIds found in the comments for a single "tv". By using $in, it significantly reduces the number of calls needed to MongoDB to fetch single User documents. Also, using the second parameter of find, I've reduced the returned fields to just be name.
FYI -- I did simplify your structure to just be 'name' rather than 'first' and 'last'. You certainly can change it to match your exact needs.

Search in an array of an embedded object in MongoDB

Given this structure for a school object:
{
"grade_spans" :
{
"0": {
"grade_span_key" : "K_5",
"name": "Elementary School"
},
"1": {
"grade_span_key" : "6_8",
"name": "Junior High-School"
}
}
}
How do I find a school for a given grade_span_key?
db.schools.find({ "grade_span_key": "K_5" })
returns empty.
Update: Sorry, I copied the structure incorrectly. It's actually an Embedded Object not a collection.
Update #2: There was a doctrine2 annotation I was using incorrectly: #MongoDB\EmbedMany(strategy="set"). I change the strategy to pushAll (which is the default)
If this field is just embedded into the main document #sergios answer will work just fine and it is not clear why his query wouldn't work as you don't provide an example of the document structure only of the embedded structure.
Also as #JohnnyHK says, rebuild that object as an array since dynamic keys in this case would be harder.
If you are looking to pick out matching rows from the embedded document and not the full document. This is a little harder but is possible:
db.schools.aggregate({
{$unwind: "$grade_spans"},
{$match: {"grade_spans.grade_span_key": "K_5"}},
{$group: {_id: "$_id", grade_spans: {$push: "$grade_spans"}}}
})
Something like the above should return a document of the structure:
{
_id: {},
grade_spans:[{
"grade_span_key" : "K_5",
"name" : "Elementary School"
}]
}
You should use full path to the property, in dotted notation.
> db.schools.find({"grade_spans.grade_span_key": "K_5"})
{
"_id" : ObjectId("50801cc5ab582e310adc0e41"),
"grade_spans" : [
{
"grade_span_key" : "K_5",
"name" : "Elementary School"
},
{
"grade_span_key" : "6_8",
"name" : "Junior High-School"
}
]
}
Given this structure :
{
"grade_spans" : {
"0": { "grade_span_key" : "K_5",
"name": "Elementary School" },
"1": { "grade_span_key" : "6_8",
"name": "Junior High-School" }
}
}
You can try with map/reduce function :
var mapFunction = function() {
for (var i in this.grade_spans) {
// Add the name of the school in a list
var schools = [];
schools[0] = this.grade_spans[i].name;
// Create out object : { schools : ["Elementary School"] } or { schools : ["Junior High-School"] }
var out = {};
out.schools = schools;
// Create key (K_5 or 6_8)
var key = this.grade_spans[i].grade_span_key;
emit(key, out);
}
};
var reduceFunction = function(key, values) {
var schools = [];
for (var i = 0; i < values.length; i++) {
schools.push.apply(schools, values[i].schools);
}
return {schools:schools};
}
db.schools.mapReduce(
mapFunction,
reduceFunction,
{ out: "map_reduce_grade_spans", sort: {_id:1} }
)
And then :
db.map_reduce_grade_spans.find({_id:"K_5"});