MongoDB - Aggregate fields inside and object - mongodb

I have the following dataset in MongoDB:
{
_id: 574718ec2bc91f565db33897,
topic: {
T69: 0.9566255761668587
}
},
{
_id: 574718ec2bc91f565db33899,
topic: {
T257: 0.046038051058499445,
T2: 1.8206715756325407,
T31: 0.08838710118945285
}
},
{
_id: 574718ec2bc91f565db33889,
topic: {
T95: 0.37718859499517865,
T40: 0.2620479937270479,
T2: 0.3594989449758472,
T1: 1.9161288780994465
}
}
I've been trying to create an aggregation query which returns the sum of all topics, Tn, over the set of all such documents. Can anyone give me a pointer in the right direction? Since I'm new to MongoDB I couldn't find an answer to this problem (though this seemed related $unwind an object in aggregation framework).

Our best bet here is mapReduce. In our map function all we need is to iterate over the "topic" property and emit the value. To get the total sum in the collection we need to "emit" with null as key value.
In the reduce function we simply use the Array.sum method to return the sum.
db.coll.mapReduce(function() {
for (var key in this.topic) {
if (Object.prototype.hasOwnProperty.call(this.topic, key)) {
emit(null, this.topic[key])
}
}},
function(key, value) {
return Array.sum(value);
},
{ "out": { "inline": 1 } }
)
which produces:
{
"results" : [
{
"_id" : null,
"value" : 5.826586715844872
}
],
"timeMillis" : 26,
"counts" : {
"input" : 3,
"emit" : 8,
"reduce" : 1,
"output" : 1
},
"ok" : 1
}
If you want the "sum" for each document, simply call emit(this._id, this.topic[key]) in your map function instead of emit(null, this.topic[key])

I think you can't do it with the mongoDB aggregation framework (that works better with collections/array of subdocs), but is pretty simple with a map/reduce. For example you can try with:
db.YOURCOLLECTION.mapReduce(
function () {
var topic = this.topic;
Object.keys(topic).forEach(function(k) {
emit(k, topic[k]);
});
},
function (key, values) {
return Array.sum(values);
}
);

Related

How to use aggregation function mongo db-query

I am new in MongoDB and I would like to use the aggregation function where I want to check type == topic and get the following output
Expected output
[
{
conceptName : 59d98cfd1c5edc24e4024d00
totalCount : 2
},
{
conceptName : 59d98cfd1c5edc24e4024d03
totalCount : 1
}
]
Sample input db.GroupContents
{
"_id" : "5a0948bb1c5edc7a5000521a",
"type" : "topic",
"groupID" : "5a0948bb1c5edc7a5000521a",
"pedagogyID" : "59d98cfa1c5edc24e40249a3",
}
Sample input db.PedagogyNodes
{
"_id" : "59d98cfa1c5edc24e40249a3",
"latestVersion" : "59d98cfa1c5edc24e402497f_1",
"createdAt" : "2017-10-08 04:27:06",
"updatedAt" : "2017-10-08 04:27:06"
}
Sample input db.PedagogyVersions
{
"_id" : "59d98cfa1c5edc24e402497f_1",
"type" : "topic",
"contentNodes" : {
"LearningNodes" : [
"59d98cfd1c5edc24e4024d00",
"59d98cfd1c5edc24e4024d03",
"59d98cfd1c5edc24e4024d00",
]
},
"createdAt" : "2017-10-08 04:27:06",
"updatedAt" : "2017-10-08 04:27:06"
}
What I have tried so far
var groupID = "5a0948bb1c5edc7a5000521a"; // Step 1
var records;
var pnDoc;
var pvDoc;
db.GroupContents.find({groupID : groupID}).forEach(function (doc){ // Step 2
var pedagogyID = doc.pedagogyID;
var records = db.getSiblingDB('PedagogyService');
records.PedagogyNodes.find({_id : pedagogyID}).forEach(function (pnDoc) { // Step 3
var latestVersion = pnDoc.latestVersion;
// addded aggregate function here
records.PedagogyVersions.aggregate([
{
$match:{_id:latestVersion} // Step 4
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
})
});
I am unable to write db query based on my expected answer, please help.
Understand my requirement
Step : 1 => I am passing `groupID = 5a0948bb1c5edc7a5000521a`
Step : 2 => we have to check from GroupContents where groupID = groupID then we have to take `pedagogyID`
Step : 3 => we have to check from PedagogyNodes where _id = pedagogyID then we have to take `latestVersion`
Step : 4 => we have to check from PedagogyVersions where _id = latestVersion then we have to take `contentNodes->LearningNodes`
Step : 5 => Finally we have to do the aggregation then we have display the result
Try to unwind the LearningNodes array and then count them by grouping them together
db.PedagogyNodes.aggregate([
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
In case you need to do any matches you can use the $match stage
db.PedagogyNodes.aggregate([
{
$match:{type:"topic"}
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
Answering the edited question =>
You were not able to view the output on the console since mongoshell does not print script output on the screen. To do this, do the following:
var result = records.PedagogyVersions.aggregate([......]);
result.forEach(function(resultDoc){
print(tojson(resultDoc))
})
To see the result of your aggregation you have to pass the callback to be executed as parameter.
records.PedagogyVersions.aggregate([
{
$match:{_id:latestVersion} // Step 4
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
], function(err, results) {
console.log(results);
});

Find records with field in a nested document when parent fields are not known

With a collection with documents like below, I need to find the documents where a particular field - eg. lev3_field2 (in document below) is present.
I tried the following, but this doesn't return any results, though the field lev3_field2 is present in some documents.
db.getCollection('some_collection').find({"lev3_field2": { $exists: true, $ne: null } })
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"lev1_field1" : "139521721",
"lev1_field2" : "276183",
"lev1_field3" : {
"lev2_field1" : "4",
"lev2_field2" : {
"lev3_field1" : "1",
"lev3_field2" : {
"lev4_field1" : "1",
"lev4_field2" : "1"
},
"lev3_field3" : "5"
},
"lev2_field3" : {
"lev3_field3" : "0",
"lev3_field4" : "0"
}
}
}
update1: this is an example, however in the real document it is not known what the parent fields are for the field to look for. So instead of lev3_field2 , I would be looking for `levM_fieldN'.
update2: Speed is not a primary concern for me, I can work with relatively a bit slower options as well, as the primary function is to find documents with the criteria discussed and once the document is found and the schema is understood, the query can be re-written for performance by including the parent keys.
To search a key in nested document you need to iterate the documents fields recursively, you can do this in JavaScript by the help of $where method in MongoDB
The below query will search if a key name exists in a documents and its subdocuments.
I have checked this with the example you have given, and it is working perfectly fine.
db.getCollection('test').find({ $where: function () {
var search_key = "lev3_field2";
function check_key(document) {
return Object.keys(document).some(function(key) {
if ( typeof(document[key]) == "object" ) {
if ( key == search_key ) {
return true;
} else {
return check_key(document[key]);
}
} else {
return ( key == search_key );
}
});
}
return check_key(this);
}}
);
There is no built-in function to iterate over document keys in MongoDB, but you can achieve this with MapReduce. The main advantage is that all the code is executed directly in the MongoDB database, and not in the js client, so there is no network overhead, hence it should be faster than client side js
here is the script :
var found;
// save a function in MongoDB to iterate over documents key and check for
// key name. Need to be done only once
db.system.js.save({
_id: 'findObjectByLabel',
value: function(obj, prop) {
Object.keys(obj).forEach(function(key) {
if (key === prop) {
found = true
}
if (!found && typeof obj[key] === 'object') {
findObjectByLabel(obj[key], prop)
}
})
}
})
// run the map reduce fonction
db.ex.mapReduce(
function() {
found = false;
var key = this._id
findObjectByLabel(this, 'lev3_field2')
value = found;
if (found) {
// if the document contains the key we are looking for,
// emit {_id: ..., value: true }
emit(key, value)
}
},
function(key, values) {
return values
}, {
'query': {},
'out': {inline:1}
}
)
this output ( run on 4 sample doc, with only one containing 'lev3_field2' )
{
"results" : [
{
"_id" : ObjectId("5884de15bebf420cf8bb2857"),
"value" : true
}
],
"timeMillis" : 18,
"counts" : {
"input" : 4,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
to run the script, copy it to a file name "script.js" for example, and then run from your shell
mongo databaseName < script.js
It's because you're trying to see if a nested field exists. This is the query you want:
db.some_collection.find({"lev1_field3.lev2_field2.lev3_field2": { $exists: true, $ne: null } })

How to add key-value pair to object in MongoDB

If I have a document with the following basic structure:
{
...
Monday: { a:1, b:2 },
Tuesday: { c:3, d:4 }
...
}
Am I able to 'push' an additional key:value pair to Monday's value? Result would be:
{
Monday: { a:1, b:2, z:8 },
Tuesday: { c:3, d:4 }
...
}
The $push operator seems to only work for arrays.
Just do something like that
db.foo.update({"_id" :ObjectId("...") },{$set : {"Monday.z":8}})
How to add a new key:value pair to all existing objects of a mongoDB documents
Old Key and Value Pairs
> db.students.find().pretty();
{ "_id" : ObjectId("601594f5a22527655335415c"), "name" : "Doddanna" }
{ "_id" : ObjectId("601594f5a22527655335415d"), "name" : "Chawan" }
Update New Key and Value Pairs Using updateMany() and $set
> db.students.updateMany({},{$set:{newKey1:"newValue1", newKey2:"newValue2", newKeyN:"newValueN"}});
{ "acknowledged" : true, "matchedCount" : 2, "modifiedCount" : 2 }
Have a look on Updated pretty result
> db.students.find().pretty();
{
"_id" : ObjectId("601594f5a22527655335415c"),
"name" : "Doddanna",
"newKey1" : "newValue1",
"newKey2" : "newValue2",
"newKeyN" : "newValueN"
}
{
"_id" : ObjectId("601594f5a22527655335415d"),
"name" : "Chawan",
"newKey1" : "newValue1",
"newKey2" : "newValue2",
"newKeyN" : "newValueN"
}
I know this might be irrelevant to the question but as a matter of fact, I opened this page because I was looking for an exact query with mongoose. here is my answer with mongoose.
If we have an abstract model (mongoose schema) named week in our javascript application then the code will be:
// javascript with mongoose
...
const key = "z";
const KeyValue = 8;
await week.updateOne({
_id, // mongoDb document id
},
{
$set:{
[`Monday.${key}`]: KeyValue,
},
},
{
upsert: true // options
},
);
...
var json = {
Monday: { a:1, b:2 },
Tuesday: { c:3, d:4 } }
json['Monday']['z'] = 8;
console.log(json);

Mongodb Is it possible to aggregate an object?

I am trying to aggregate the total sum of packets in this document.
{
"_id" : ObjectId("51a6cd102769c63e65061bda"),
"capture" : "1369885967",
"packets" : {
"0" : "595",
"1" : "596",
"2" : "595",
"3" : "595",
...
}
}
The closest I can get is about
db.collection.aggregate({ $match: { capture : "1369885967" } }, {$group: { _id:null, sum: {$sum:"$packets"}}});
However it returns sum 0, which is obviously wrong.
{ "result" : [ { "_id" : null, "sum" : 0 } ], "ok" : 1 }
How do I get the sum of all the packets?
Since you have the values in an object instead of an array, you'll need to use mapReduce.
// Emit the values as integers
var mapFunction =
function() {
for (key in this.packets) {
emit(null, parseInt(this.packets[key]));
}
}
// Reduce to a simple sum
var reduceFunction =
function(key, values) {
return Array.sum(values);
}
> db.collection.mapReduce(mapFunction, reduceFunction, {out: {inline:1}})
{
"results" : [
{
"_id" : null,
"value" : 2381
}
],
"ok" : 1,
}
If at all possible, you should emit the values as an array of a numeric type instead since that gives you more options (ie aggregation) and (unless the data set is large) probably performance benefits.
If you don't know how many keys are in the packet subdocument and since you also seem to be storing counts as strings (why???) you will have to use mapReduce.
Something like:
m=function() {
for (f in "this.packets") {
emit(null, +this.packets[f]);
};
r=function(k, vals) {
int sum=0;
vals.forEach(function(v) { sum+=v; } );
return sum;
}
db.collection.mapreduce(m, r, {out:{inline:1}, query:{your query condition here}});

Mongo: count the number of word occurrences in a set of documents

I have a set of documents in Mongo. Say:
[
{ summary:"This is good" },
{ summary:"This is bad" },
{ summary:"Something that is neither good nor bad" }
]
I'd like to count the number of occurrences of each word (case insensitive), then sort in descending order. The result should be something like:
[
"is": 3,
"bad": 2,
"good": 2,
"this": 2,
"neither": 1,
"nor": 1,
"something": 1,
"that": 1
]
Any idea how to do this? Aggregation framework would be preferred, as I understand it to some degree already :)
MapReduce might be a good fit that can process the documents on the server without doing manipulation on the client (as there isn't a feature to split a string on the DB server (open issue).
Start with the map function. In the example below (which likely needs to be more robust), each document is passed to the map function (as this). The code looks for the summary field and if it's there, lowercases it, splits on a space, and then emits a 1 for each word found.
var map = function() {
var summary = this.summary;
if (summary) {
// quick lowercase to normalize per your requirements
summary = summary.toLowerCase().split(" ");
for (var i = summary.length - 1; i >= 0; i--) {
// might want to remove punctuation, etc. here
if (summary[i]) { // make sure there's something
emit(summary[i], 1); // store a 1 for each word
}
}
}
};
Then, in the reduce function, it sums all of the results found by the map function and returns a discrete total for each word that was emitted above.
var reduce = function( key, values ) {
var count = 0;
values.forEach(function(v) {
count +=v;
});
return count;
}
Finally, execute the mapReduce:
> db.so.mapReduce(map, reduce, {out: "word_count"})
The results with your sample data:
> db.word_count.find().sort({value:-1})
{ "_id" : "is", "value" : 3 }
{ "_id" : "bad", "value" : 2 }
{ "_id" : "good", "value" : 2 }
{ "_id" : "this", "value" : 2 }
{ "_id" : "neither", "value" : 1 }
{ "_id" : "or", "value" : 1 }
{ "_id" : "something", "value" : 1 }
{ "_id" : "that", "value" : 1 }
A basic MapReduce example
var m = function() {
var words = this.summary.split(" ");
if (words) {
for(var i=0; i<words.length; i++) {
emit(words[i].toLowerCase(), 1);
}
}
}
var r = function(k, v) {
return v.length;
};
db.collection.mapReduce(
m, r, { out: { merge: "words_count" } }
)
This will insert word counts into a collection name words_count which you can sort (and index)
Note that it doesn't use stemming, omit punctuation, handles stop words etc.
Also note you can optimize the map function by accumulating repeating word(s) occurrences and emitting the count, not just 1
You can use #split.
Try Below query
db.summary.aggregate([
{ $project : { summary : { $split: ["$summary", " "] } } },
{ $unwind : "$summary" },
{ $group : { _id: "$summary" , total : { "$sum" : 1 } } },
{ $sort : { total : -1 } }
]);
Old question but since 4.2 this can be done with $regexFindAll now.
db.summaries.aggregate([
{$project: {
occurences: {
$regexFindAll: {
input: '$summary',
regex: /\b\w+\b/, // match words
}
}
}},
{$unwind: '$occurences'},
{$group: {
_id: '$occurences.match', // group by each word
totalOccurences: {
$sum: 1 // add up total occurences
}
}},
{$sort: {
totalOccurences: -1
}}
]);
This will output docs in the following format:
{
_id: "matchedwordstring",
totalOccurences: number
}