Find records with field in a nested document when parent fields are not known - mongodb

With a collection with documents like below, I need to find the documents where a particular field - eg. lev3_field2 (in document below) is present.
I tried the following, but this doesn't return any results, though the field lev3_field2 is present in some documents.
db.getCollection('some_collection').find({"lev3_field2": { $exists: true, $ne: null } })
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"lev1_field1" : "139521721",
"lev1_field2" : "276183",
"lev1_field3" : {
"lev2_field1" : "4",
"lev2_field2" : {
"lev3_field1" : "1",
"lev3_field2" : {
"lev4_field1" : "1",
"lev4_field2" : "1"
},
"lev3_field3" : "5"
},
"lev2_field3" : {
"lev3_field3" : "0",
"lev3_field4" : "0"
}
}
}
update1: this is an example, however in the real document it is not known what the parent fields are for the field to look for. So instead of lev3_field2 , I would be looking for `levM_fieldN'.
update2: Speed is not a primary concern for me, I can work with relatively a bit slower options as well, as the primary function is to find documents with the criteria discussed and once the document is found and the schema is understood, the query can be re-written for performance by including the parent keys.

To search a key in nested document you need to iterate the documents fields recursively, you can do this in JavaScript by the help of $where method in MongoDB
The below query will search if a key name exists in a documents and its subdocuments.
I have checked this with the example you have given, and it is working perfectly fine.
db.getCollection('test').find({ $where: function () {
var search_key = "lev3_field2";
function check_key(document) {
return Object.keys(document).some(function(key) {
if ( typeof(document[key]) == "object" ) {
if ( key == search_key ) {
return true;
} else {
return check_key(document[key]);
}
} else {
return ( key == search_key );
}
});
}
return check_key(this);
}}
);

There is no built-in function to iterate over document keys in MongoDB, but you can achieve this with MapReduce. The main advantage is that all the code is executed directly in the MongoDB database, and not in the js client, so there is no network overhead, hence it should be faster than client side js
here is the script :
var found;
// save a function in MongoDB to iterate over documents key and check for
// key name. Need to be done only once
db.system.js.save({
_id: 'findObjectByLabel',
value: function(obj, prop) {
Object.keys(obj).forEach(function(key) {
if (key === prop) {
found = true
}
if (!found && typeof obj[key] === 'object') {
findObjectByLabel(obj[key], prop)
}
})
}
})
// run the map reduce fonction
db.ex.mapReduce(
function() {
found = false;
var key = this._id
findObjectByLabel(this, 'lev3_field2')
value = found;
if (found) {
// if the document contains the key we are looking for,
// emit {_id: ..., value: true }
emit(key, value)
}
},
function(key, values) {
return values
}, {
'query': {},
'out': {inline:1}
}
)
this output ( run on 4 sample doc, with only one containing 'lev3_field2' )
{
"results" : [
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"value" : true
}
],
"timeMillis" : 18,
"counts" : {
"input" : 4,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
to run the script, copy it to a file name "script.js" for example, and then run from your shell
mongo databaseName < script.js

It's because you're trying to see if a nested field exists. This is the query you want:
db.some_collection.find({"lev1_field3.lev2_field2.lev3_field2": { $exists: true, $ne: null } })

Related

How to retrieve Mongodb child value without parents object/field name

Is there a way to query one type of child objects (child object/field with same name under different parents object/field ) directly without invoking the parents name in the find() command
For example I have
MondoDB
{
eatable.fruits.tomato
}
{
eatable.vegetables.tomato
}
Here each tomato is a parameter which have some value assigned in it, And I have tomato under two different objects/fields,
Is there a way to query and retrieve all values of tomato without using the field/object names "fruits" or "vegetables" in the find () command.
As of now, there is no direct way to search for child fields without knowing the parent fields but we do have the workaround. The idea is to first filter the documents by adding the constraints in find() method and then iterate over the response and print out the values of 'tomato' key where ever its present as leaf element.
Following is the example:
db.collection.find().forEach(
function processDoc(document){
let keys = Object.keys(document);
keys.forEach(
function(key){
let value = document[key];
if(typeof value === typeof Object()){
processDoc(value); // Its an embedded document, thus process it again
}else if(key == 'tomato'){
print(value);
}
}
);
}
);
Data set:
{
"_id" : ObjectId("5d63ac599d32d1c15cf5ea5e"),
"eatable" : {
"vegetables" : {
"tomato" : "veg1"
},
"fruits" : {
"tomato" : "fru1",
"banana": "fru1"
},
"misc" : {
"item" : {
"tomato" : "misc1"
}
}
}
}
{
"_id" : ObjectId("5d63ac599d32d1c15cf5ea5f"),
"eatable" : {
"fruits" : {
"tomato" : "fru2"
}
}
}
Output:
veg1
fru1
misc1
fru2

MongoDB - Aggregate fields inside and object

I have the following dataset in MongoDB:
{
_id: 574718ec2bc91f565db33897,
topic: {
T69: 0.9566255761668587
}
},
{
_id: 574718ec2bc91f565db33899,
topic: {
T257: 0.046038051058499445,
T2: 1.8206715756325407,
T31: 0.08838710118945285
}
},
{
_id: 574718ec2bc91f565db33889,
topic: {
T95: 0.37718859499517865,
T40: 0.2620479937270479,
T2: 0.3594989449758472,
T1: 1.9161288780994465
}
}
I've been trying to create an aggregation query which returns the sum of all topics, Tn, over the set of all such documents. Can anyone give me a pointer in the right direction? Since I'm new to MongoDB I couldn't find an answer to this problem (though this seemed related $unwind an object in aggregation framework).
Our best bet here is mapReduce. In our map function all we need is to iterate over the "topic" property and emit the value. To get the total sum in the collection we need to "emit" with null as key value.
In the reduce function we simply use the Array.sum method to return the sum.
db.coll.mapReduce(function() {
for (var key in this.topic) {
if (Object.prototype.hasOwnProperty.call(this.topic, key)) {
emit(null, this.topic[key])
}
}},
function(key, value) {
return Array.sum(value);
},
{ "out": { "inline": 1 } }
)
which produces:
{
"results" : [
{
"_id" : null,
"value" : 5.826586715844872
}
],
"timeMillis" : 26,
"counts" : {
"input" : 3,
"emit" : 8,
"reduce" : 1,
"output" : 1
},
"ok" : 1
}
If you want the "sum" for each document, simply call emit(this._id, this.topic[key]) in your map function instead of emit(null, this.topic[key])
I think you can't do it with the mongoDB aggregation framework (that works better with collections/array of subdocs), but is pretty simple with a map/reduce. For example you can try with:
db.YOURCOLLECTION.mapReduce(
function () {
var topic = this.topic;
Object.keys(topic).forEach(function(k) {
emit(k, topic[k]);
});
},
function (key, values) {
return Array.sum(values);
}
);

MongoDB: Update a field of an item in array with matching another field of that item

I have a data structure like this:
We have some centers. A center has some switches. A switch has some ports.
{
"_id" : ObjectId("561ad881755a021904c00fb5"),
"Name" : "center1",
"Switches" : [
{
"Ports" : [
{
"PortNumber" : 2,
"Status" : "Empty"
},
{
"PortNumber" : 5,
"Status" : "Used"
},
{
"PortNumber" : 7,
"Status" : "Used"
}
]
}
]
}
All I want is to write an Update query to change the Status of the port that it's PortNumber is 5 to "Empty".
I can update it when I know the array index of the port (here array index is 1) with this query:
db.colection.update(
// query
{
_id: ObjectId("561ad881755a021904c00fb5")
},
// update
{
$set : { "Switches.0.Ports.1.Status" : "Empty" }
}
);
But I don't know the array index of that Port.
Thanks for help.
You would normally do this using the positional operator $, as described in the answer to this question:
Update field in exact element array in MongoDB
Unfortunately, right now the positional operator only supports one array level deep of matching.
There is a JIRA ticket for the sort of behavior that you want: https://jira.mongodb.org/browse/SERVER-831
In case you can make Switches into an object instead, you could do something like this:
db.colection.update(
{
_id: ObjectId("561ad881755a021904c00fb5"),
"Switch.Ports.PortNumber": 5
},
{
$set: {
"Switch.Ports.$.Status": "Empty"
}
}
)
Since you don't know the array index of the Port, I would suggest you dynamically create the $set conditions on the fly i.e. something which would help you get the indexes for the objects and then modify accordingly, then consider using MapReduce.
Currently this seems to be not possible using the aggregation framework. There is an unresolved open JIRA issue linked to it. However, a workaround is possible with MapReduce. The basic idea with MapReduce is that it uses JavaScript as its query language but this tends to be fairly slower than the aggregation framework and should not be used for real-time data analysis.
In your MapReduce operation, you need to define a couple of steps i.e. the mapping step (which maps an operation into every document in the collection, and the operation can either do nothing or emit some object with keys and projected values) and reducing step (which takes the list of emitted values and reduces it to a single element).
For the map step, you ideally would want to get for every document in the collection, the index for each Switches and Ports array fields and another key that contains the $set keys.
Your reduce step would be a function (which does nothing) simply defined as var reduce = function() {};
The final step in your MapReduce operation will then create a separate collection Switches that contains the emitted Switches array object along with a field with the $set conditions. This collection can be updated periodically when you run the MapReduce operation on the original collection.
Altogether, this MapReduce method would look like:
var map = function(){
for(var i = 0; i < this.Switches.length; i++){
for(var j = 0; j < this.Switches[i].Ports.length; j++){
emit(
{
"_id": this._id,
"switch_index": i,
"port_index": j
},
{
"index": j,
"Switches": this.Switches[i],
"Port": this.Switches[i].Ports[j],
"update": {
"PortNumber": "Switches." + i.toString() + ".Ports." + j.toString() + ".PortNumber",
"Status": "Switches." + i.toString() + ".Ports." + j.toString() + ".Status"
}
}
);
}
}
};
var reduce = function(){};
db.centers.mapReduce(
map,
reduce,
{
"out": {
"replace": "switches"
}
}
);
Querying the output collection Switches from the MapReduce operation will typically give you the result:
db.switches.findOne()
Sample Output:
{
"_id" : {
"_id" : ObjectId("561ad881755a021904c00fb5"),
"switch_index" : 0,
"port_index" : 1
},
"value" : {
"index" : 1,
"Switches" : {
"Ports" : [
{
"PortNumber" : 2,
"Status" : "Empty"
},
{
"PortNumber" : 5,
"Status" : "Used"
},
{
"PortNumber" : 7,
"Status" : "Used"
}
]
},
"Port" : {
"PortNumber" : 5,
"Status" : "Used"
},
"update" : {
"PortNumber" : "Switches.0.Ports.1.PortNumber",
"Status" : "Switches.0.Ports.1.Status"
}
}
}
You can then use the cursor from the db.switches.find() method to iterate over and update your collection accordingly:
var newStatus = "Empty";
var cur = db.switches.find({ "value.Port.PortNumber": 5 });
// Iterate through results and update using the update query object set dynamically by using the array-index syntax.
while (cur.hasNext()) {
var doc = cur.next();
var update = { "$set": {} };
// set the update query object
update["$set"][doc.value.update.Status] = newStatus;
db.centers.update(
{
"_id": doc._id._id,
"Switches.Ports.PortNumber": 5
},
update
);
};

Mongodb update subdocument inside array field of a collection

I have a mongodb collection like
{
"_id" : ObjectId("5375ef2153bb790b20d8a660"),
"association" : [
{
"count" : 3,
"name" : "hayatdediğin"
},
{
"count" : 2,
"name" : "sadecesenolsan"
},
{
"count" : 2,
"name" : "üslupnamustur"
}
],
"tag_count" : 4,
"tag_name" : "vazgeçilmezolan",
"variation" : [
{
"count" : 4,
"name" : "VazgeçilmezOlan"
}
]
}
Each collection consists of tag_name, tag_count, array field association and array field variation. For each name inside association, there exists a different document same as this document. I need to add new field "total_count" inside each association dictionary whose value equals the tag_count of the name by querying the database.
I tried this code but its not working
db.hashtag.find().forEach(function (doc) {
if (doc.association.length != 0 ) {
doc.association.forEach(function (assoc) {
db.hashtag.find({'tag_name': assoc.name}).forEach(function(tag){
assoc.total_count=tag.tag_count;
})
});
}
});
After modifying each doc you need to call save on the collection to commit the change.
Assuming you're doing this in the shell:
db.hashtag.find().forEach(function (doc) {
if (doc.association.length != 0 ) {
doc.association.forEach(function (assoc) {
db.hashtag.find({'tag_name': assoc.name}).forEach(function(tag){
assoc.total_count=tag.tag_count;
});
});
// Save the changed doc back to the collection
db.hashtag.save(doc);
}
});
To update doc in database you have to use db.hashtag.update, not db.hashtag.find. Find only retrieves document from db.
I changed the previous method of looping using forEach and then saved the doc at last and the code worked.
db.hashtag.find().forEach(function (doc) {
var array = doc.association;
if (array != undefined){
for(var i=0;i<array.length;i++)
{
var obj = db.hashtag.findOne({'name':array[i].name});
var count = obj.count;
doc.association[i].total_count = count;
db.hashtag.save(doc);
}
}
});

Mongodb MapReduce for grouping up to n per category using Mongoid

I have a weird problem with MongoDB (2.0.2) map reduce.
So, the story goes like this:
I have Ad model (look for model source extract below) and I need to group up to n ads per category in order to have a nice ordered listing I can later use to do more interesting things.
# encoding: utf-8
class Ad
include Mongoid::Document
cache
include Mongoid::Timestamps
field :title
field :slug, :unique => true
def self.aggregate_latest_active_per_category
map = "function () {
emit( this.category, { id: this._id });
}"
reduce = "function ( key, value ) {
return { ads:v };
}"
self.collection.map_reduce(map, reduce, { :out => "categories"} )
end
All fun and games up until now.
What I expect is to get a result in a form which resembles (mongo shell for db.categories.findOne() ):
{
"_id" : "category_name",
"value" : {
"ads" : [
{
"id" : ObjectId("4f2970e9e815f825a30014ab")
},
{
"id" : ObjectId("4f2970e9e815f825a30014b0")
},
{
"id" : ObjectId("4f2970e9e815f825a30014b6")
},
{
"id" : ObjectId("4f2970e9e815f825a30014b8")
},
{
"id" : ObjectId("4f2970e9e815f825a30014bd")
},
{
"id" : ObjectId("4f2970e9e815f825a30014c1")
},
{
"id" : ObjectId("4f2970e9e815f825a30014ca")
},
// ... and it goes on and on
]
}
}
Actually, it would be even better if I could get value to contain only array but MongoDB complains about not supporting that yet, but, with later use of finalize function, that is not a big problem I want to ask about.
Now, back to problem. What actually happens when I do map reduce is that it spits out something like :
{
"_id" : "category_name",
"value" : {
"ads" : [
{
"ads" : [
{
"ads" : [
{
"ads" : [
{
"ads" : [
{
"id" : ObjectId("4f2970d8e815f825a3000011")
},
{
"id" : ObjectId("4f2970d8e815f825a3000017")
},
{
"id" : ObjectId("4f2970d8e815f825a3000019")
},
{
"id" : ObjectId("4f2970d8e815f825a3000022")
},
// ... on and on and on
... and while I could probably work out a way to use this it just doesn't look like something I should get.
So, my questions (finally) are:
Am I doing something wrong and what is it?
I there something wrong with MongoDB map reduce (I mean besides all the usual things when compared to hadoop)?
Yes, you're doing it wrong. Inputs and outputs of map and reduce should be uniform. Because they are meant to be executed in parallel, and reduce might be run over partially reduced results. Try these functions:
var map = function() {
emit(this.category, {ads: [this._id]});
};
var reduce = function(key, values) {
var result = {ads: []};
values.forEach(function(v) {
v.ads.forEach(function(a) {
result.ads.push(a)
});
});
return result;
}
This should produce documents like:
{_id: category, value: {ads: [ObjectId("4f2970d8e815f825a3000011"),
ObjectId("4f2970d8e815f825a3000019"),
...]}}