mongodb find element within a hash within a hash - mongodb

I am attempting to build a query to run from Mongo client that will allow access to the following element of a hash within a hash within a hash.
Here is the structure of the data:
"_id" : ObjectId("BSONID"),
"e1" : "value",
"e2" : "value",
"e3" : "value"),
"updated_at" : ISODate("2015-08-31T21:04:37.669Z"),
"created_at" : ISODate("2015-01-05T07:20:17.833Z"),
"e4" : 62,
"e5" : {
"sube1" : {
"26444745" : {
"subsube1" : "value",
"subsube2" : "value",
"subsube3" : "value I am looking for",
"subsube4" : "value",
"subsube5" : "value"
},
"40937803" : {
"subsube1" : "value",
"subsube2" : "value",
"subsube3" : "value I am looking for",
"subsube4" : "value",
"subsube5" : "value"
},
"YCPGF5SRTJV2TVVF" : {
"subsube1" : "value",
"subsube2" : "value",
"subsube3" : "value I am looking for",
"subsube4" : "value",
"subsube5" : "value"
}
}
}
So I have tried dotted notation based on a suggestion for "diving" into an wildcard named hash using db.my_collection.find({"e5.sube1.subsube4": "value I am looking for"}) which keeps coming back with an empty result set. I have also tried the find using a match instead of an exact value using /value I am lo/ and still an empty result set. I know there is at least 1 document which has the "value I am looking for".
Any ideas - note I am restricted to using the Mongo shell client.
Thanks.

So since this is not capable of being made into a javascript/mongo shell array I will go to plan B which is write some code be it Perl or Ruby and pull the result set into an array of hashes and walk each document/sub-document.
Thanks Mario for the help.

You have two issues:
You're missing one level.
You are checking subsube4 instead of subsube3
Depending on what subdocument of sube1 you want to check, you should do
db.my_collection.find({"e5.sube1.26444745.subsube4": "value I am looking for"})
or
db.my_collection.find({"e5.sube1.40937803.subsube4": "value I am looking for"})
or
db.my_collection.find({"e5.sube1.YCPGF5SRTJV2TVVF.subsube4": "value I am looking for"})
You could use the $or operator if you want to look in any one of the three.
If you don't know the keys of your documents, that's an issue with your schema design: you should use arrays instead of objects. Similar case: How to query a dynamic key - mongodb schema design
EDIT
Since you explain that you have a special request to know the count of "value I am looking for" only one time, we can run a map reduce. You can run those commands in the shell.
Define map function
var iurMapFunction = function() {
for (var key in this.e5.sube1) {
if (this.e5.sube1[key].subsube3 == "value I am looking for") {
var value = {
count: 1,
subkey: key
}
emit(key, value);
}
}
};
Define reduce function
var iurReduceFunction = function(keys, countObjVals) {
reducedVal = {
count: 0
};
for (var idx = 0; idx < countObjVals.length; idx++) {
reducedVal.count += countObjVals[idx].count;
}
return reducedVal;
};
Run mapreduce command
db.my_collection.mapReduce(iurMapFunction,
iurReduceFunction, {
out: {
replace: "map_reduce_result"
},
}
);
Find your counts
db.map_reduce_result.find()
This should give you, for each dynamic key in your object, the number of times it had an embedded field subsube3 with value value I am looking for.

Related

Why is mongo dot notation replacing an entire subdocument?

I've got the following doc in my db:
{
"_id": ObjectId("ABCDEFG12345"),
"options" : {
"foo": "bar",
"another": "something"
},
"date" : {
"created": 1234567890,
"updated": 0
}
}
And I want to update options.foo and date.updated at the same time using dot notation, like so:
var mongojs = require('mongojs');
var optionName = 'foo';
var optionValue = 'baz';
var updates = {};
updates['options.' + optionName] = optionValue;
updates['date.updated'] = new Date().getTime();
db.myCollection.findAndModify({
query : {
_id : ObjectId('ABCDEFG12345')
},
update : {
$set : updates
},
upsert : false,
new : true
}, function(error, doc, result) {
console.log(doc.options);
console.log(doc.date);
});
And this results in:
{
foo : 'baz',
another : 'something'
}
{
updated : 1234567890
}
Specifically, my pre-existing date.created field is getting clobbered even though I'm using dot notation.
Why is this only partially working? The options sub-document retains its pre-existing data (options.another), why doesn't the date sub-document retain its pre-existing data?
The behavior described typically happens when the object passed in the $set operator is of the form { "data" : { "updated" : 1234567890 } } rather than { "data.updated" : 1234567890 }, but I'm not familiar with dots in JavaScript enough to tell if that could be the cause on JS's side.
Also, it wouldn't explain why it happens with data and not options.
If you could print the object stored in the variable updates and that is sent to MongoDB in the update field, that would allow to tell on which side the issue is (JS or MongoDB).
i pass your code to a test environment and use the same library you are using. The mongojs library, for query by native ObjectId is like this mongojs.ObjectId("####") Can look the official documentation.
for the callback function in the findAndModify function, the docs parameter is an array so i navigate like an array
Note: [to concatenate the string i use template literals] (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals)
All work fine...

Searching with dynamic field name in MongoDB

I have a situation where records in Mongo DB are like :
{
"_id" : "xxxx",
"_class" : "xxxx",
"orgId" : xxx,
"targetKeyToOrgIdMap" : {
"46784_56139542ecaa34c13ba9e314" : 46784,
"47530_562f1bc5fc1c1831d38d1900" : 47530,
"700004280_56c18369fc1cde1e2a017afc" : 700004280
},
}
I have to find out the records where child nodes of targetKeyToOrgIdMap has a particular set of values. That means, I know what the value is going to be there in the record in "46784_56139542ecaa34c13ba9e314" : 46784 part. And the field name is variable, its combination of the value and some random string.
In above example, I have 46784, and I need to find all the records which have 46784 in that respective field.
Is there any way I can fire some regex or something like that or by using any other mean where I would get the records which has the value I need in the child nodes of the field targetKeyToOrgIdMap.
Thanks in advance
You could use MongoDB's $where like this:
db.myCollection.find( { $where: function() {
for (var key in obj.targetKeyToOrgIdMap) {
if (obj.targetKeyToOrgIdMap[key] == 46784){
return true;
}
}
}}).each { obj ->
println obj
}
But be aware that this will require a full table scan where the function is executed for each document. See documentation.

How to query a relative element using MongoDB

I have a document like this:
{
"whoKnows" : {
"name" : "Jeff",
"phone" : "123-123-1234"
},
"anotherElement" : {
"name" : "Jeff",
"phone" : "321-321-3211"
}
}
How can any instance of "name" by queried? For example, using a wildcard may look something like,
db.collection.find( { "*.name" : "Jeff" } )
Or if regex was support in the element place, it might look like,
db.collection.find( { /.*\.name/ : "Jeff" } )
Is it possible to accomplish this using MongoDB?
Side note: I'm not looking for a solution like,
db.collection.find({
"$or": [
{ "whoKnows.name" : "Jeff" },
{ "anotherElement.name" : "Jeff" }
]
})
I need a truly relative path solution as I do not know what the parent element will be (unless there is a way to generate the name of every element - then I could dynamically generate the $or clause at runtime).
Everything about this is fairly horrible, you cannot possibly index on something like the "name" values and your "path" to each attribute is going to vary everywhere. So this is really bad for queries.
I notice you mention "nested" structures, and you still could accommodate this with a similar proposal and some additional tagging, but I want you to consider this "phone book" type example:
{
"phones": [
{
"type": "Home",
"name" : "Jeff",
"phone" : "123-123-1234"
},
{
"type": "Work",
"name" : "Jeff",
"phone" : "123-123-1234"
},
]
}
Since this is actually sub-documents within an array, fields like "name" always share the same path, so not only can you index these (which is going to be good for performance) but the query is very basic:
db.collection({ "phones.name": "Jeff" })
That does exactly what you need by finding "Jeff" in any "name" entry. If you need a hierachy, then add some fields in those sub-documents to indicate the parent/child relationship that you can use in post processing. Or even as a materialized path which could aid your queries.
It really is the better approach.
If you really must keep this kind of structure then at least do something like this with the JavaScript that will bail out on the first match at depth:
db.collection.find(
function () {
var found = false;
var finder = function( obj, field, value ) {
if ( obj.hasOwnProperty(field) && obj[field] == value )
found = true;
if (found) return true;
for( var n in obj ) {
if ( Object.prototype.toString.call(obj[n]) === "[object Object]" ) {
finder( obj[n], field, value );
if (found) return true;
}
}
};
finder( this, "name", "Jeff" );
return found;
}
)
The format there is shorthand notation for the $where operator, which is pretty bad news for performance, but your structure isn't offering much other choice. At any rate, the function should recurse into each nested document until the "field" with the "value" is found.
For anything of production scale, really look at changing the structure to something that can be indexed and accessed quickly. The first example should give you a starting point. Relying on arbitrary JavaScript for queries as your present structure constrains you to is bad news.
If these are similar instance, what stops you in putting these in an array? That would be easier to query.
In it's current form this looks as good as writing your own $where condition to parse all document structure and is not an efficient operation!
Although highly inefficient and I wouldn't suggest using this in a production environment, following is one of the simplest way (with its own various catches) you can query:
db.query.find({$where: function() { x = tojsononeline(this); return x.indexOf('"name" : "Jeff",') >= 0; } })
Please note that this will cause a tablescan and if you have a pre-condition you may want to specify that before the where clause in the query.

Ordering fields from find query with projection

I have a Mongo find query that works well to extract specific fields from a large document like...
db.profiles.find(
{ "profile.ModelID" : 'LZ241M4' },
{
_id : 0,
"profile.ModelID" : 1,
"profile.AVersion" : 2,
"profile.SVersion" : 3
}
);
...this produces the following output. Note how the SVersion comes before the AVersion in the document even though my projection asked for AVersion before SVersion.
{ "profile" : { "ModelID" : "LZ241M4", "SVersion" : "3.5", "AVersion" : "4.0.3" } }
{ "profile" : { "ModelID" : "LZ241M4", "SVersion" : "4.0", "AVersion" : "4.0.3" } }
...the problem is that I want the output to be...
{ "profile" : { "ModelID" : "LZ241M4", "AVersion" : "4.0.3", "SVersion" : "3.5" } }
{ "profile" : { "ModelID" : "LZ241M4", "AVersion" : "4.0.3", "SVersion" : "4.0" } }
What do I have to do get the Mongo JavaScript shell to present the results of my query in the field order that I specify?
I have achieved it by projecting the fields using aliases, instead of including and excluding by 0 and 1s.
Try this:
{
_id : 0,
"profile.ModelID" :"$profile.ModelID",
"profile.AVersion":"$profile.AVersion",
"profile.SVersion":"$profile.SVersion"
}
I get it now. You want to return results ordered by "fields" rather the value of a fields.
Simple answer is that you can't do this. Maybe its possible with the new aggregation framework. But this seems overkill just to order fields.
The second object in a find query is for including or excluding returned fields not for ordering them.
{
_id : 0, // 0 means exclude this field from results
"profile.ModelID" : 1, // 1 means include this field in the results
"profile.AVersion" :2, // 2 means nothing
"profile.SVersion" :3, // 3 means nothing
}
Last point, you shouldn't need to do this, who cares what order the fields come-back in.
You application should be able to make use of the fields it needs regardless of the order the fields are in.
Another solution I applied to achieve this is the following:
db.profiles
.find({ "profile.ModelID" : 'LZ241M4' })
.toArray()
.map(doc => ({
profile: {
ModelID: doc.profile.ModelID,
AVersion: doc.profile.AVersion,
SVersion: doc.profile.SVersion
}
}))
Since version 2.6 (that came out in 2014) MongoDB preserves the order of the document fields following the write operation (source).
P.S. If you are using Python you might find this interesting.

Most efficient way to generate a list of Unigrams from a text field in MongoDB

I need to generate a vector of unigrams, i.e. a vector of all the unique words which appear in a specific text field that I have stored as part of a broader JSON object in MongoDB.
I'm not really sure what's the easiest and most efficient way to generate this vector. I was thinking of writing a simple Java app which could handle the tokenization (using something like OpenNLP), however I think that a better approach may be to try to tackle this using Mongo's Map-Reduce feature... However I'm not really sure how I could go about this.
Another option would be to use Apache Lucene indexing, but it would mean I'd still need to export this data in one by one. Which is really the same issue I would have with the custom Java or Ruby approach...
Map reduce sounds good however the Mongo data is growing by the day as more document are inserted. This isn't really a one off task as there are new documents being added all the time. Updates are very rare. I really don't want to run a Map-Reduce over the millions of documents every time I want to update my Unigram vector as I fear this will be very inefficient use of resources...
What would be the most efficient way to generate the unigram vector and then keep it updated?
Thanks!
Since you have not provided a sample document (object) format take this as a sample collection called 'stories'.
{ "_id" : ObjectId("4eafd693627b738f69f8f1e3"), "body" : "There was a king", "author" : "tom" }
{ "_id" : ObjectId("4eafd69c627b738f69f8f1e4"), "body" : "There was a queen", "author" : "tom" }
{ "_id" : ObjectId("4eafd72c627b738f69f8f1e5"), "body" : "There was a queen", "author" : "tom" }
{ "_id" : ObjectId("4eafd74e627b738f69f8f1e6"), "body" : "There was a jack", "author" : "tom" }
{ "_id" : ObjectId("4eafd785627b738f69f8f1e7"), "body" : "There was a humpty and dumpty . Humtpy was tall . Dumpty was short .", "author" : "jane" }
{ "_id" : ObjectId("4eafd7cc627b738f69f8f1e8"), "body" : "There was a cat called Mini . Mini was clever cat . ", "author" : "jane" }
For the given dataset, you can use the following javascript code to get to your solution. The collection "authors_unigrams" contains the result. All the code is supposed to be run using mongo console (http://www.mongodb.org/display/DOCS/mongo+-+The+Interactive+Shell).
First, we need to mark of all the new documents that have come afresh into the 'stories' collection. We do it using following command. It will add a new attribute called "mr_status" into each document and assign value "inprocess". Later, we will see that map-reduce operation will only take those documents in account which are having the value "inprocess" for the field "mr_status". This way, we can avoid reconsidering all the documents for map-reduce operation that have been already considered in any of the previous attempt, making the operation efficient as asked.
db.stories.update({mr_status:{$exists:false}},{$set:{mr_status:"inprocess"}},false,true);
Second, we define both map() and reduce() function.
var map = function() {
uniqueWords = function (words){
var arrWords = words.split(" ");
var arrNewWords = [];
var seenWords = {};
for(var i=0;i<arrWords.length;i++) {
if (!seenWords[arrWords[i]]) {
seenWords[arrWords[i]]=true;
arrNewWords.push(arrWords[i]);
}
}
return arrNewWords;
}
var unigrams = uniqueWords(this.body) ;
emit(this.author, {unigrams:unigrams});
};
var reduce = function(key,values){
Array.prototype.uniqueMerge = function( a ) {
for ( var nonDuplicates = [], i = 0, l = a.length; i<l; ++i ) {
if ( this.indexOf( a[i] ) === -1 ) {
nonDuplicates.push( a[i] );
}
}
return this.concat( nonDuplicates )
};
unigrams = [];
values.forEach(function(i){
unigrams = unigrams.uniqueMerge(i.unigrams);
});
return { unigrams:unigrams};
};
Third, we actually run the map-reduce function.
var result = db.stories.mapReduce( map,
reduce,
{query:{author:{$exists:true},mr_status:"inprocess"},
out: {reduce:"authors_unigrams"}
});
Fourth, we mark all the records that have been considered for map-reduce in last run as processed by setting "mr_status" as "processed".
db.stories.update({mr_status:"inprocess"},{$set:{mr_status:"processed"}},false,true);
Optionally, you can see the result collection "authors_unigrams" by firing following command.
db.authors_unigrams.find();