Weird MongoDB MapReduce ObjectId.toString() behavior? - mongodb

I've run into some strange differences between the mongodb running on MongoHQ and the version running on my own development machine. Specifically, when calling .toString() on an object id inside a MapReduce map function, the results vary:
On my own machine:
ObjectId('foo').toString() // => 'foo'
On MongoHQ:
ObjectId('foo').toString() // => 'ObjectId(\'foo\')'
Note: The id's I use are actual mongodb id's - not just 'foo' etc. as in these examples
I would expect .toString() to behave like on my own machine - not how it's behaving on MongoHQ. How come it's not?
My local OSX version of MongoDB is installed using Homebrew and is version 2.0.1-x86_64
To show what's actually going on, I've build a little test case. Let's assume that we have a users collection with a friends attribute, being an array of user ids:
> db.users.find()
{ _id: ObjectId('a'), friends: [ObjectId('b'), ObjectId('c')] },
{ _id: ObjectId('b'), friends: [] },
{ _id: ObjectId('c'), friends: [] }
As you can see a is friends with b and c where as b and c isn't friends with anybody.
Now let's look at a working test-algorithm:
var map = function() {
this.friends.forEach(function(f) {
emit(f, { friends: 1, user: user, friend: f.toString() });
});
};
var reduce = function(k, vals) {
var result = { friends: 0, user: [], friend: [] };
vals.forEach(function(val) {
result.friends += val.friends;
result.user.push(val.user);
result.friend.push(val.friend);
});
return result;
};
var id = ObjectId('50237c6d5849260996000002');
var query = {
query : { friends: id },
out : { inline: 1 },
scope : { user: id.toString() },
jsMode : true,
verbose : true
};
db.users.mapReduce(map, reduce, query);
Assuming id is set to an id of a user who is a friend of someone in the users collection, then the output returned by the mapReduce method on MongoHQ will look like this:
{
"results" : [
{
"_id" : ObjectId("50237c555849260996000001"),
"value" : {
"friends" : 1,
"user" : "50237c6d5849260996000002",
"friend" : "ObjectId(\"50237c555849260996000001\")"
}
},
{
"_id" : ObjectId("50237c74c271be07f6000002"),
"value" : {
"friends" : 1,
"user" : "50237c6d5849260996000002",
"friend" : "ObjectId(\"50237c74c271be07f6000002\")"
}
}
],
"timeMillis" : 0,
"timing" : {
"mapTime" : 0,
"emitLoop" : 0,
"reduceTime" : 0,
"mode" : "mixed",
"total" : 0
},
"counts" : {
"input" : 1,
"emit" : 2,
"reduce" : 0,
"output" : 2
},
"ok" : 1,
}
As you can see, the friend attribute in each result is not just a string containing the id, but a string containing the actual method call.
Did I run this on my own machine, the results array would have been:
{
"_id" : ObjectId("50237c555849260996000001"),
"value" : {
"friends" : 1,
"user" : "50237c6d5849260996000002",
"friend" : "50237c555849260996000001"
}
},
{
"_id" : ObjectId("50237c74c271be07f6000002"),
"value" : {
"friends" : 1,
"user" : "50237c6d5849260996000002",
"friend" : "50237c74c271be07f6000002"
}
}

MongoHQ is running a different version of MongoDB than you are.
To get the behavior of your homebrew version, try changing your map function:
var map = function() {
this.friends.forEach(function(f) {
emit(f, { friends: 1, user: user.str, friend: f.str });
});
};

Related

MongoDB update all subelements from subarray [duplicate]

This question already has answers here:
How to Update Multiple Array Elements in mongodb
(16 answers)
Closed 6 years ago.
I have a collection with a following schema:
{
"_id" : ObjectId("52dfba46daf02aa4630cf529"),
"hotelVenue" : {
"rooms" : [
{
"clientId" : "ROOM_1",
"roomName" : "Executive"
},
{
"clientId" : "ROOM_2",
"roomName" : "Premium"
}
]
}
},
{
"_id" : ObjectId("52dfc2f9daf02aa2632bc8af"),
"hotelVenue" : {
"rooms" : [
{
"clientId" : "ROOM_1",
"roomName" : "Studio Room"
},
{
"clientId" : "ROOM_2",
"roomName" : "Soho Suite"
},
{
"clientId" : "ROOM_3",
"roomName" : "Luxury Suite"
}
]
}
}
I need to genearate unique id for all the records -> subarray.
i.e., in the example there is rooms so for each and every room type, I need give a unique id basically using ObjectId().
Output should look something like the below, where roomId being generated.
{
"_id" : ObjectId("52dfba46daf02aa4630cf529"),
"hotelVenue" : {
"rooms" : [
{
"clientId" : "ROOM_1",
"roomName" : "Executive",
"roomId" : "56f8cb3f0c658b4bc26172342"
},
{
"clientId" : "ROOM_2",
"roomName" : "Premium",
"roomId" : "56f8cb3f0c658b4bc26176d4"
}
]
}
}
I have written this script where it gives the following error: Error: Line 9: Unexpected token +
db.venues.find().forEach(function(data)
{
data.hotelVenue.rooms.forEach(function(roomItem)
{
db.venues.update({_id:data._id,'data.hotelVenue.rooms.clientId' : roomItem.clientId},
{
$set:
{
'hotelVenue.rooms.'+roomItem.clientId+'.roomId' : ObjectId()
}
});
});
})
EDIT: This should do it in a more efficient manner by only saving each document once;
db.venues.
find({"hotelVenue.rooms": { $elemMatch: {roomId: {$exists: 0}}}}).
forEach(function(doc) {
doc.hotelVenue.rooms.forEach(function(room) {
if(!room.roomId) {
room.roomId = ObjectId();
}
});
db.venues.save(doc);
});
The find filters out only documents that are in need of updating. After that, it's just a matter of updating the document as needed and calling save.
Of course, backups are in order before running potentially destructive queries from random people on the Internet against your production data set.

MongoDB fields limitation in array [duplicate]

This question already has answers here:
Retrieve only the queried element in an object array in MongoDB collection
(18 answers)
Closed 8 years ago.
I am looking for a way - and dont even now if this is possible - just to return a part of a list saved in mongodb.
Lets have a look in my currently document:
{
_id : 'MyId',
name : 'a string',
conversations : [
{
user : 'Mike',
input : 'Some input'
},
{
user : 'Stephano',
input : 'some other input'
}
]
}
What I now want to do is smth like this:
var myOutput;
myOutput = db.my_collection.find(
{
_id : 'MyId',
'conversations.user' : 'Mike'
}, {
_id : 1,
name : 1,
conversations : {
$where : {
user : 'Mike'
}
}
});
Goal is it just to get back the conversation array item where user has the value Mike.
Is this still possible in MongoDB ? didn't found any reference in the documentation for the field limitations in mongoDB.
Use the $ positional operator in a projection:
> db.my_collection.find({ "_id" : "MyId", "conversations.user" : "Mike" },
{ "_id" : 1, "name" : 1, "conversations.$" : 1 })
{
"_id" : 'MyId',
"name" : 'a string',
"conversations" : [
{ "user" : 'Mike', "input" : 'Some input' }
]
}
This projects only first matching array element.
Are you aware of the aggregation pipeline?
db.my_collection.aggregate([
{ "$match": { "_id": "MyId"}}, { "$unwind": "$conversations"},
{ "$match": {"conversations.user": "Mike"}}
])
Output
{
"_id" : "MyId",
"name" : "a string",
"conversations" :
{
"user" : "Mike",
"input" : "Some input"
}
}

Is it possible to retrieve a 'time span' from a MongoDB query, using the timestamp within an ObjectId?

We have a basic enquiry management tool that we're using to track some website enquiries in our administration suite, and we're using the ObjectId of each document in our enquiries collection to sort the enquiries by the date they were added.
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",
"customer" : {
"name" : "Test Enquiry",
"email" : "test#test.com",
"telephone" : "07890123456",
"mobile" : "07890123456",
"quote" : false,
"valuation" : false
},
"site" : [],
"test" : true,
"updates" : [
{
"_id" : ObjectId("53a007db144ff47be1000001"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "New Web Enquiry",
"substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),
"notes" : "New enquiry received from website.",
},
{
"_id" : ObjectId("53a80c977d299cfe91bacf81"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "Attempted Contact",
"substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),
"notes" : "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",
},
{
"_id" : ObjectId("53a80e539b966b8da5c40c36"),
"status" : "Approved",
"status_id" : ObjectId("52e77a49d85e95f00ebf6c72"),
"status_index" : 200,
"substatus" : "Enquiry Confirmed",
"substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),
"notes" : "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",
}
]
}
Within each enquiry is an updates array of objects which also have an ObjectId as their main identity field. We're using an $unwind and $group aggregation to pull the first and latest updates, as well as the count of updates, making sure we only take enquiries where there have been more than one update (as one is automatically inserted when the enquiry is made):
db.enquiries.aggregate([
{
$match: {
"test": true
}
},
{
$unwind: "$updates"
},
{
$group: {
"_id": "$_id",
"latest_update_id": {
$last: "$updates._id"
},
"first_update_id": {
$first: "$updates._id"
},
"update_count": {
$sum: 1
}
}
},
{
$match: {
"update_count": {
$gt: 1
}
}
}
])
This results in the following output:
{
"result" : [
{
"_id" : ObjectId("53a295ad122ea80200000005"),
"latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),
"first_update_id" : ObjectId("53a295ad122ea80200000003"),
"update_count" : 2
},
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3
}
],
"ok" : 1
}
This is then passed through to our code (node.js, in this case) where we perform a few operations on it and then present some information on our dashboard.
Ideally, I'd like to add another $group pipeline aggregation to the query which would subtract the timestamp of first_update_id from the timestamp of latest_update_id to give us a timespan, which we could then use $avg on.
Can anyone tell me if this is possible? (Thank you!)
As Neil already pointed out, you can't get to the timestamp from the ObjectId in the aggregation framework.
You said that speed is not important, so using MapReduce you can get what you want:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
var val = {
latest_update_id : last._id,
first_update_id : first._id,
update_count : this.updates.length,
diff: diff
}
emit(this._id, val);
}
};
var reduce = function() { };
db.runCommand(
{
mapReduce: "enquiries",
map: map,
reduce: reduce,
out: "mrresults",
query: { test : true}
}
);
This are the results:
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"value" : {
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3,
"diff" : 525944000
}
}
Edit:
If you want to get the average diff for all documents you can do it like this:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
emit("1", {diff : diff});
}
};
var reduce = function(key, values) {
var reducedVal = { count: 0, sum: 0 };
for (var idx = 0; idx < values.length; idx++) {
reducedVal.count += 1;
reducedVal.sum += values[idx].diff;
}
return reducedVal;
};
var finalize = function (key, reducedVal) {
reducedVal.avg = reducedVal.sum/reducedVal.count;
return reducedVal;
};
db.runCommand(
{
mapReduce: "y",
map: map,
reduce: reduce,
finalize : finalize,
out: "mrtest",
query: { test : true}
}
);
And the example output:
> db.mrtest.find().pretty()
{
"_id" : "1",
"value" : {
"count" : 2,
"sum" : 1051888000,
"avg" : 525944000
}
}

Get all objects for each tag

I'm new in mongodbs mapreduce and for sure I have not completely understood it for now. And I have a problem, which I try to solve for few days without success.
I have a collection of let's say posts with a tags field. Now I want to mapreduce a new collection of tags. Where every tag have an array of all posts ids that have this one particular tag assigned.
one of my attempts to do this (which doesn't do this right)
m = function() {
for (var i in this.tags) {
emit(this.tags[i], {"ids" : [this._id]});
};
}
r = function(key, emits) {
var total = {ids : []}
for (var i in emits) {
emits[i].ids.forEach(function(id) {
total.ids.push(id);
}
}
return total;
};
I know, that I have to pivot the date some how around, but I just cant get my head wrapped around it.
I think you're missing a ")" in your reduce function to close the emits[i].ids.forEach(). Is this what you're trying to do?
r = function (key, values) {
var total = {ids:[]};
for (var i in values) {
values[i].ids.forEach(
function (id){
total.ids.push(id);
}
);
}
return total;
}
input
{_id:2, tags: ["dog", "Jenna"]}
{_id:1, tags: ["cat", "Jenna"]}
result:
{"results" : [
{"_id" : "Jenna",
"value" : {"ids" : [2,1]}
},
{"_id" : "cat",
"value" : {"ids" : [1]}
},
{"_id" : "dog",
"value" : {"ids" : [2]}
}
],
"timeMillis" : 1,
"counts" : {
"input" : 2,
"emit" : 4,
"reduce" : 1,
"output" : 3
},
"ok" : 1,
}

MongoDB find where key equals string from array

I am trying to find in a collection all of the documents that have the given key equal to one of the strings in an array.
Heres an example of the collection.
{
roomId = 'room1',
name = 'first'
},
{
roomId = 'room2',
name = 'second'
},
{
roomId = 'room3',
name = 'third'
}
And heres an example of the array to look through.
[ 'room2', 'room3' ]
What i thought would work is...
collection.find({ roomId : { $in : [ 'room2', 'room3' ]}}, function( e, r )
{
// r should return the second and third room
});
How can i achieve this?
One way this could be solve would be to do a for loop...
var roomIds = [ 'room2', 'room3' ];
for ( var i=0; i < roomIds.length; i++ )
{
collection.find({ id : roomIds[ i ]})
}
But this is not ideal....
What you posted should work - no looping required. The $in operator does the job:
> db.Room.insert({ "_id" : 1, name: 'first'});
> db.Room.insert({ "_id" : 2, name: 'second'});
> db.Room.insert({ "_id" : 3, name: 'third'});
> // test w/ int
> db.Room.find({ "_id" : { $in : [1, 2] }});
{ "_id" : 1, "name" : "first" }
{ "_id" : 2, "name" : "second" }
> // test w/ strings
> db.Room.find({ "name" : { $in : ['first', 'third'] }});
{ "_id" : 1, "name" : "first" }
{ "_id" : 3, "name" : "third" }
Isn't that what you expect?
Tested w/ MongoDB 2.1.1