Get all objects for each tag - mongodb

I'm new in mongodbs mapreduce and for sure I have not completely understood it for now. And I have a problem, which I try to solve for few days without success.
I have a collection of let's say posts with a tags field. Now I want to mapreduce a new collection of tags. Where every tag have an array of all posts ids that have this one particular tag assigned.
one of my attempts to do this (which doesn't do this right)
m = function() {
for (var i in this.tags) {
emit(this.tags[i], {"ids" : [this._id]});
};
}
r = function(key, emits) {
var total = {ids : []}
for (var i in emits) {
emits[i].ids.forEach(function(id) {
total.ids.push(id);
}
}
return total;
};
I know, that I have to pivot the date some how around, but I just cant get my head wrapped around it.

I think you're missing a ")" in your reduce function to close the emits[i].ids.forEach(). Is this what you're trying to do?
r = function (key, values) {
var total = {ids:[]};
for (var i in values) {
values[i].ids.forEach(
function (id){
total.ids.push(id);
}
);
}
return total;
}
input
{_id:2, tags: ["dog", "Jenna"]}
{_id:1, tags: ["cat", "Jenna"]}
result:
{"results" : [
{"_id" : "Jenna",
"value" : {"ids" : [2,1]}
},
{"_id" : "cat",
"value" : {"ids" : [1]}
},
{"_id" : "dog",
"value" : {"ids" : [2]}
}
],
"timeMillis" : 1,
"counts" : {
"input" : 2,
"emit" : 4,
"reduce" : 1,
"output" : 3
},
"ok" : 1,
}

Related

MongoDB query to return documents that only have keys amongst a predefined set

The MongoDB query language allows filtering documents based on the existence or absence of a given field with the $exists operator.
Is there a way, with the MongoDB syntax, and given a set K of allowed fields, to exclude documents that have fields not in K from the results, but:
not knowing in advance which extra fields (outside K) can be encountered
not using JavaScript, that is, the $where operator?
Example:
{
"Some field" : "foo"
}
{
"Some field" : "bar",
"Some other field" : "foobar"
}
With the set K = [ "Some field" ], only the first document is to be returned.
Note how this is not to be confused with a projection, which would return both documents but removing the extra field.
I'm not sure if MongoDB do support such kind of operations out of box but you can achieve so with help of mapReduce.
Assuming your sample data set;
// Variable for map
var map = function () {
var isAcceptable = true;
Object.keys(this).forEach(function (key) {
if (key != "_id" && white_list.indexOf(key) == -1) {
isAcceptable = false;
}
});
if (isAcceptable == true) {
emit(1, this);
}
};
// Variable for reduce
var reduce = function (key, values) {
return values;
};
db.collection.mapReduce(
map,
reduce,
{
scope: {"white_list": ["Some field"]},
out: {"inline": 1}
}
);
Will return:
{
"results" : [
{
"_id" : 1,
"value" : {
"_id" : ObjectId("57cd7503e55de957c62fb9c8"),
"Some field" : "foo"
}
}
],
"timeMillis" : 13,
"counts" : {
"input" : 2,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
Desired result will be in results.values of returned document. However, keep in mind limitation of MongoDB mapReduce and maximum size of BSON document.
Given a set of known fields K, you can construct a query that takes the set as input and gives a query with the $exists operator along with the corresponding fields projection. Using an example, suppose you have the following documents in a test collection
db.test.insert({ "fieldX": "foo", "fieldY": "bar", "fieldZ": 1 })
db.test.insert({ "fieldX": "123", "fieldY": "bar", "fieldZ": 2 })
db.test.insert({ "fieldY": "abc", "fieldZ": 3 })
db.test.insert({ "fieldX": "xyz", "fieldZ": 4 })
db.test.insert({ "fieldZ": 5 })
Then you can construct a query Q and a projection P from an input set K as follows:
var K = [ "fieldX", "fieldZ" ];
var or = K.map(function(field) {
var obj = {};
obj[field] = { "$exists": true };
return obj;
});
var P = K.reduce(function(doc, field) {
doc[field] = 1;
return doc;
}, {} );
var Q = { "$or": or };
db.test.find(Q, P);
Sample Output:
/* 1 */
{
"_id" : ObjectId("57cd78322c241f5870c82b7d"),
"fieldX" : "foo",
"fieldZ" : 1
}
/* 2 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7e"),
"fieldX" : "123",
"fieldZ" : 2
}
/* 3 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7f"),
"fieldZ" : 3
}
/* 4 */
{
"_id" : ObjectId("57cd78332c241f5870c82b80"),
"fieldX" : "xyz",
"fieldZ" : 4
}
/* 5 */
{
"_id" : ObjectId("57cd78332c241f5870c82b81"),
"fieldZ" : 5
}

Querying with array of parameters in mongodb

I have below collection in the DB, I want to retrieve data where birth month equal to given 2 months. lets say [1,2], or [4,5]
{
"_id" : ObjectId("55aa1e526fea82e9a4188f38"),
"name" : "Nilmini",
"birthDate" : 6,
"birthMonth" : 1
},
{
"_id" : ObjectId("55aa1e526fea82e9a4188f39"),
"name" : "Ruwan",
"birthDate" : 6,
"birthMonth" : 1
},{
"_id" : ObjectId("55aa1e526fea82e9a4188f40"),
"name" : "Malith",
"birthDate" : 6,
"birthMonth" : 1
},
{
"_id" : ObjectId("55aa1e526fea82e9a4188f7569"),
"name" : "Pradeep",
"birthDate" : 6,
"birthMonth" : 7
}
I use below query to get the result set, I could get the result for give one month,now I want to get results for multiple months.
var currentDay = moment().date();
var currentMonths = [];
var currentMonth = moment().month();
if(currentDay > 20){
currentMonths.push(moment().month());
currentMonths.push(moment().month()+1);
}else{
currentMonths.push(currentMonth);
}
// In blow query I am trying to pass the array to the 'birthMonth',
I'm getting nothing when I pass array to the query, I think there should be another way to do this,
Employee.find(
{
"birthDate": {$gte:currentDay}, "birthMonth": currentMonths
}, function(err, birthDays) {
res.json(birthDays);
});
I would really appreciate if you could help me to figure this out
You can use the $in operator to match against multiple values in an array like currentMonths.
So your query would be:
Employee.find(
{
"birthDate": {$gte:currentDay}, "birthMonth": {$in: currentMonths}
}, function(err, birthDays) {
res.json(birthDays);
});

Is it possible to retrieve a 'time span' from a MongoDB query, using the timestamp within an ObjectId?

We have a basic enquiry management tool that we're using to track some website enquiries in our administration suite, and we're using the ObjectId of each document in our enquiries collection to sort the enquiries by the date they were added.
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",
"customer" : {
"name" : "Test Enquiry",
"email" : "test#test.com",
"telephone" : "07890123456",
"mobile" : "07890123456",
"quote" : false,
"valuation" : false
},
"site" : [],
"test" : true,
"updates" : [
{
"_id" : ObjectId("53a007db144ff47be1000001"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "New Web Enquiry",
"substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),
"notes" : "New enquiry received from website.",
},
{
"_id" : ObjectId("53a80c977d299cfe91bacf81"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "Attempted Contact",
"substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),
"notes" : "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",
},
{
"_id" : ObjectId("53a80e539b966b8da5c40c36"),
"status" : "Approved",
"status_id" : ObjectId("52e77a49d85e95f00ebf6c72"),
"status_index" : 200,
"substatus" : "Enquiry Confirmed",
"substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),
"notes" : "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",
}
]
}
Within each enquiry is an updates array of objects which also have an ObjectId as their main identity field. We're using an $unwind and $group aggregation to pull the first and latest updates, as well as the count of updates, making sure we only take enquiries where there have been more than one update (as one is automatically inserted when the enquiry is made):
db.enquiries.aggregate([
{
$match: {
"test": true
}
},
{
$unwind: "$updates"
},
{
$group: {
"_id": "$_id",
"latest_update_id": {
$last: "$updates._id"
},
"first_update_id": {
$first: "$updates._id"
},
"update_count": {
$sum: 1
}
}
},
{
$match: {
"update_count": {
$gt: 1
}
}
}
])
This results in the following output:
{
"result" : [
{
"_id" : ObjectId("53a295ad122ea80200000005"),
"latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),
"first_update_id" : ObjectId("53a295ad122ea80200000003"),
"update_count" : 2
},
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3
}
],
"ok" : 1
}
This is then passed through to our code (node.js, in this case) where we perform a few operations on it and then present some information on our dashboard.
Ideally, I'd like to add another $group pipeline aggregation to the query which would subtract the timestamp of first_update_id from the timestamp of latest_update_id to give us a timespan, which we could then use $avg on.
Can anyone tell me if this is possible? (Thank you!)
As Neil already pointed out, you can't get to the timestamp from the ObjectId in the aggregation framework.
You said that speed is not important, so using MapReduce you can get what you want:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
var val = {
latest_update_id : last._id,
first_update_id : first._id,
update_count : this.updates.length,
diff: diff
}
emit(this._id, val);
}
};
var reduce = function() { };
db.runCommand(
{
mapReduce: "enquiries",
map: map,
reduce: reduce,
out: "mrresults",
query: { test : true}
}
);
This are the results:
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"value" : {
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3,
"diff" : 525944000
}
}
Edit:
If you want to get the average diff for all documents you can do it like this:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
emit("1", {diff : diff});
}
};
var reduce = function(key, values) {
var reducedVal = { count: 0, sum: 0 };
for (var idx = 0; idx < values.length; idx++) {
reducedVal.count += 1;
reducedVal.sum += values[idx].diff;
}
return reducedVal;
};
var finalize = function (key, reducedVal) {
reducedVal.avg = reducedVal.sum/reducedVal.count;
return reducedVal;
};
db.runCommand(
{
mapReduce: "y",
map: map,
reduce: reduce,
finalize : finalize,
out: "mrtest",
query: { test : true}
}
);
And the example output:
> db.mrtest.find().pretty()
{
"_id" : "1",
"value" : {
"count" : 2,
"sum" : 1051888000,
"avg" : 525944000
}
}

MongoDB Map-Reduce combine document with dynamic schema

I'm trying what I think should be a simple map reduce, but am having trouble because I can't find a reference of how to write the server side javascript.
Given two documents:
{
"_id" : ObjectId("530c8b58d95cd926144055d9"),
"atomic" : "p",
"doc" : {
"d1" : "t"
},
"array" : ["e"]
},
{
"_id" : ObjectId("530c8b71d95cd926144055da"),
"atomic" : "p",
"doc" : {
"d2" : "r"
},
"array" : ["f"]
}
I would like the result to be
{
"_id" : "p",
"value" : {
"doc" : {
"d1" : "t",
"d2" : "r"
},
"array" : ["e", "f"]
}
}
The map function is:
function () {
emit(
this.atomic,
{doc: this.doc, array: this.array}
);
}
The incorrect reduce function is:
function (key, values) {
var reduced = {doc:{}, array:[]};
values.forEach(function(val){
for(var i = 0; i < val.array.length; i++)
reduced.array.push(val.array[i]);
val.doc.forEach(function(kvp){reduced.doc.add(kvp.key, kvp.value);});
});
return reduced;
}
The part with the array is fine, it is trying to combine the documents that is messing up (i.e. not executing due to missing function). I've tried all permutations I can think off -- if I add the val.doc to an array then they all show up, it's just that I can't figure out how to merge it into a single document.
The fields in the doc will be dynamic so there is no way to reference it by name.
Any help would be appreciated.
Not sure the reduced.doc.add bit will work.
Maybe try:
function (key, values) {
var reduced = {doc:{}, array:[]};
values.forEach(function(val){
for(var i = 0; i < val.array.length; i++)
reduced.array.push(val.array[i]);
for (kvp in val.doc){
reduced.doc[kvp]=val.doc[kvp];
}
});
return reduced;
}

MongoDB find where key equals string from array

I am trying to find in a collection all of the documents that have the given key equal to one of the strings in an array.
Heres an example of the collection.
{
roomId = 'room1',
name = 'first'
},
{
roomId = 'room2',
name = 'second'
},
{
roomId = 'room3',
name = 'third'
}
And heres an example of the array to look through.
[ 'room2', 'room3' ]
What i thought would work is...
collection.find({ roomId : { $in : [ 'room2', 'room3' ]}}, function( e, r )
{
// r should return the second and third room
});
How can i achieve this?
One way this could be solve would be to do a for loop...
var roomIds = [ 'room2', 'room3' ];
for ( var i=0; i < roomIds.length; i++ )
{
collection.find({ id : roomIds[ i ]})
}
But this is not ideal....
What you posted should work - no looping required. The $in operator does the job:
> db.Room.insert({ "_id" : 1, name: 'first'});
> db.Room.insert({ "_id" : 2, name: 'second'});
> db.Room.insert({ "_id" : 3, name: 'third'});
> // test w/ int
> db.Room.find({ "_id" : { $in : [1, 2] }});
{ "_id" : 1, "name" : "first" }
{ "_id" : 2, "name" : "second" }
> // test w/ strings
> db.Room.find({ "name" : { $in : ['first', 'third'] }});
{ "_id" : 1, "name" : "first" }
{ "_id" : 3, "name" : "third" }
Isn't that what you expect?
Tested w/ MongoDB 2.1.1