Group and count in MongoDB - mongodb

I'm trying to group and count the amount of elements for each group in mongodb 2.0.1, but with no success so far.
My DB schema looks like :
{
"_id" : ObjectId("4ece7544853b4b0941000000"),
"ResultSet" : {
"Results" : [
{
"quality" : 87,
"state" : "Franche-Comté"
}
]
}
}
I've been trying all sort of methods, following different tutorials, but it is each time the same result : an only null group... which I don't understand why.
The best query I have written so far is the following :
db.extract_2000.group( {
cond: { "ResultSet.Results.quality": {$exists: true} },
key: {"ResultSet.Results.state": true},
reduce: function(obj, glob) { glob.total++; glob.quality += obj.ResultSet.Results.quality },
initial: { total: 0, quality: 0 },
finalize: function(glob) {glob.avgquality = glob.quality / glob.total}
})
Which returns (once again) :
[
{
"ResultSet.Results.state" : null,
"total" : 2000,
"quality" : NaN,
"avgquality" : NaN
}
]
What am I doing wrong ?

This simply won't work as written. The key problem is here: key: {"ResultSet.Results.state": true}. ResultSet.Results is an array. When you ask for ResultSet.Results.state you are implying some type of for loop be done here. The group command is simply not capable of this.
Instead try the following M/R:
map = function() {
// Note that we emit once per result
foreach(var i in ResultSet.Results) {
key = this.ResultSet.Results[i];
value = { count: 1,
quality: this.ResultSet.Results[i].quality,
avg_quality: 0
};
emit(key, value);
}
}
reduce = function(key, values) {
// note that results has same fields as emitted value
var results = { count: 0, quality: 0, avg_quality: 0 };
foreach(var i in values){
results.count += values[i].count;
results.quality += values[i].quality;
// ignore avg_quality, we don't use it
}
return results;
}
You will also have to write a finalize for the average.
finalize = function(key, value) {
if (value.count > 0)
value.avg_quality = value.quality / value.count;
return value;
}

The map function
map = function() {
for(var i in this.Results) {
emit(this.Results[i].state,
{quality: this.Results[i].quality, total: 1, avgquality: 0}
);
}
}
The reduce function
reduce = function(key, values) {
var data = {quality: 0, total: 0, avgquality: 0};
for(var i=0; i<values.length; i++) {
data.quality += values[i].quality;
data.total += values[i].total;
}
return data;
}
In finalize function only calculate the avg.

Related

MongoDb query to update the string array to object array

Hi I am posting the question here first time. also I am new to mongodb. I have searched on the google but i did not find the answer. Currently in my collection i have list of string as
"keepOuts" : [
"198,168,282,115,292,180",
"356,120,466,192,330,252,346,186"
]
I need a mongo db query which which will update all the documents in my collection such as
"keepOuts" : [
{
"coordinates" : "126,200,166,185,158,236",
"height" : NumberInt(0)
},
{
"coordinates" : "146,141,169,107,192,154",
"height" : NumberInt(0)
}
],
Here for each entry in old keepOuts each object is created and it will go in key coordinates also additional field is set into the the same object with key height. height will be by default 0.
I got the query from this portal it gives me the split array but it does not update the collection. query is
db.quoteDetails1.aggregate([
{"$match":{keepOuts:{ $exists: true}}},
{"$unwind": {path : "$keepOuts"}},
{
$group: {"_id": "$_id",
result:{
$push:{
coordinates:"$keepOuts"}}}}])
Thank you all.
Try this!
db.quoteDetails1.find({}).toArray().forEach(
function (elem) {
for(var i = 0; i < elem.keepOuts.length; ++i) {
var obj = {"coordinates" : elem.keepOuts[i], "height": NumberInt(0)}
elem.keepOuts[i] = obj
}
db.quoteDetails1.update({_id: elem._id}, elem);
});
I got the answer by below javascript
db.getCollection('quoteDetails').find({keepOuts:{"$exists":true}}).forEach(
function(doc) {
if(doc.keepOuts.length > 0){
var array = doc.keepOuts;
if(typeof array[0] === "string"){
db.quoteDetails.update({"_id":doc._id},{"$set":{keepOuts :
arrayToObj(doc)
}});
}
}
}
);
function arrayToObj(doc){
print("processing for" + doc._id);
var array = doc.keepOuts;
var len = array.length;
var outerArray =[];
for(var i = 0; i<len; i++){
var obj = {};
var curVal = array[i];
obj["coordinates"]=curVal;
obj["height"]=0;
outerArray.push(obj);
}
return outerArray;
}

How can i remove empty string from a mongodb collection?

I have a "mongodb colllenctions" and I'd like to remove the "empty strings"with keys from it.
From this:
{
"_id" : ObjectId("56323d975134a77adac312c5"),
"year" : "15",
"year_comment" : "",
}
{
"_id" : ObjectId("56323d975134a77adac312c5"),
"year" : "",
"year_comment" : "asd",
}
I'd like to gain this result:
{
"_id" : ObjectId("56323d975134a77adac312c5"),
"year" : "15",
}
{
"_id" : ObjectId("56323d975134a77adac312c5"),
"year_comment" : "asd",
}
How could I solve it?
Please try executing following code snippet in Mongo shell which strips fields with empty or null values
var result=new Array();
db.getCollection('test').find({}).forEach(function(data)
{
for(var i in data)
{
if(data[i]==null || data[i]=='')
{
delete data[i]
}
}
result.push(data)
})
print(tojson(result))
Would start with getting a distinct list of all the keys in the collection, use those keys as your query basis and do an ordered bulk update using the Bulk API operations. The update statement uses the $unset operator to remove the fields.
The mechanism to get distinct keys list that you need to assemble the query is possible through Map-Reduce. The following mapreduce operation will populate a separate collection with all the keys as the _id values:
mr = db.runCommand({
"mapreduce": "my_collection",
"map" : function() {
for (var key in this) { emit(key, null); }
},
"reduce" : function(key, stuff) { return null; },
"out": "my_collection" + "_keys"
})
To get a list of all the dynamic keys, run distinct on the resulting collection:
db[mr.result].distinct("_id")
// prints ["_id", "year", "year_comment", ...]
Now given the list above, you can assemble your query by creating an object that will have its properties set within a loop. Normally your query will have this structure:
var keysList = ["_id", "year", "year_comment"];
var query = keysList.reduce(function(obj, k) {
var q = {};
q[k] = "";
obj["$or"].push(q);
return obj;
}, { "$or": [] });
printjson(query); // prints {"$or":[{"_id":""},{"year":""},{"year_comment":""}]}
You can then use the Bulk API (available with MongoDB 2.6 and above) as a way of streamlining your updates for better performance with the query above. Overall, you should be able to have something working as:
var bulk = db.collection.initializeOrderedBulkOp(),
counter = 0,
query = {"$or":[{"_id":""},{"year":""},{"year_comment":""}]},
keysList = ["_id", "year", "year_comment"];
db.collection.find(query).forEach(function(doc){
var emptyKeys = keysList.filter(function(k) { // use filter to return an array of keys which have empty strings
return doc[k]==="";
}),
update = emptyKeys.reduce(function(obj, k) { // set the update object
obj[k] = "";
return obj;
}, { });
bulk.find({ "_id": doc._id }).updateOne({
"$unset": update // use the $unset operator to remove the fields
});
counter++;
if (counter % 1000 == 0) {
// Execute per 1000 operations and re-initialize every 1000 update statements
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
})
If you need to update a single blank parameter or you prefer to do parameter by parameter, you can use the mongo updateMany functionality:
db.comments.updateMany({year: ""}, { $unset : { year : 1 }})

How to get average value from a hashmap in MongoDB?

I have a time data in my Mongo database. Each document equal a minute and contain 60 seconds as objects with value for each. How to get average value of all seconds in one minute?
A document looking like that:
{
"_id" : ObjectId("55575e4062771c26ec5f2287"),
"timestamp" : "2015-05-16T18:12:00.000Z",
"values" : {
"0" : "26.17",
"1" : "26.17",
"2" : "26.17",
...
"58" : "24.71",
"59" : "25.20"
}
}
You could take two approaches here:
Changing the schema and use the aggregation framework to get the average by using the $avg operator OR
Apply Map-Reduce.
Let's look at the first option. Currently as it is, the schema will not make it possible to use the aggregation framework because of the dynamic keys in the values subdocument. The ideal schema that would favour the aggregation framework would have the values field be an array which contains embedded key/value documents like this:
/* 0 */
{
"_id" : ObjectId("5559d66c9bbec0dd0344e4b0"),
"timestamp" : "2015-05-16T18:12:00.000Z",
"values" : [
{
"k" : "0",
"v" : 26.17
},
{
"k" : "1",
"v" : 26.17
},
{
"k" : "2",
"v" : 26.17
},
...
{
"k" : "58",
"v" : 24.71
},
{
"k" : "59",
"v" : 25.20
}
]
}
With MongoDB 3.6 and newer, use the aggregation framework to tranform the hashmaps to an array by using the $objectToArray operator then use $avg to calculate the average.
Consider running the following aggregate pipeline:
db.test.aggregate([
{
"$addFields": {
"values": { "$objectToArray": "$values" }
}
}
])
Armed with this new schema, you would then need to update your collection to change the string values to int by iterating the cursor returned from the aggregate method and using bulkWrite as follows:
var bulkUpdateOps = [],
cursor = db.test.aggregate([
{
"$addFields": {
"values": { "$objectToArray": "$values" }
}
}
]);
cursor.forEach(doc => {
const { _id, values } = doc;
let temp = values.map(item => {
item.key = item.k;
item.value = parseFloat(item.v) || 0;
delete item.k;
delete item.v;
return item;
});
bulkUpdateOps.push({
"updateOne": {
"filter": { _id },
"update": { "$set": { values: temp } },
"upsert": true
}
});
if (bulkUpdateOps.length === 1000) {
db.test.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
db.test.bulkWrite(bulkUpdateOps);
}
If your MongoDB version does not support the $objectToArray operator in the aggregation framework, then to convert the current schema into the one above takes a bit of native JavaScript functions with the MongoDB find() cursor's forEach() function as follows (assuming you have a test collection):
var bulkUpdateOps = [],
cursor = db.test.find();
cursor.forEach(doc => {
const { _id, values } = doc;
let temp = Object.keys(values).map(k => {
let obj = {};
obj.key = k;
obj.value = parseFloat(doc.values[k]) || 0;
return obj;
});
bulkUpdateOps.push({
"updateOne": {
"filter": { _id },
"update": { "$set": { values: temp } },
"upsert": true
}
});
if (bulkUpdateOps.length === 1000) {
db.test.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (bulkUpdateOps.length > 0) {
db.test.bulkWrite(bulkUpdateOps);
}
or
db.test.find().forEach(function (doc){
var keys = Object.keys(doc.values),
values = keys.map(function(k){
var obj = {};
obj.key = k;
obj.value = parseFloat(doc.values[k]) || 0;
return obj;
});
doc.values = values;
db.test.save(doc);
});
The collection will now have the above schema and thus follows the aggregation pipeline that will give you the average time in one minute:
db.test.aggregate([
{
"$fields": {
"average": { "$avg": "$values.value" }
}
}
])
Or for MongoDB 3.0 and lower
db.test.aggregate([
{ "$unwind": "$values" },
{
"$group": {
"_id": "$timestamp",
"average": {
"$avg": "$values.value"
}
}
}
])
For the above document, the output would be:
/* 0 */
{
"result" : [
{
"_id" : "2015-05-16T18:12:00.000Z",
"average" : 25.684
}
],
"ok" : 1
}
As for the other Map-Reduce option, the intuition behind the operation is you would use JavaScript to make the necessary transformations and calculate the final average. You would need to define three functions:
Map
When you tell Mongo to MapReduce, the function you provide as the map function will receive each document as the this parameter. The purpose of the map is to exercise whatever logic you need in JavaScript and then call emit 0 or more times to produce a reducible value.
var map = function(){
var obj = this.values;
var keys = Object.keys(obj);
var values = [];
keys.forEach(function(key){
var val = parseFloat(obj[key]);
var value = { count: 1, qty: val };
emit(this.timestamp, value);
});
};
For each document you need to emit a key and a value. The key is the first parameter to the emit function and represents how you want to group the values (in this case you will be grouping by the timestamp). The second parameter to emit is the value, which in this case is a little object containing the count of documents (always 1) and total value of each individual value object key i.e. for each second within the minute.
Reduce
Next you need to define the reduce function where Mongo will group the items you emit and pass them as an array to this reduce function It's inside the reduce function where you want to do the aggregation calculations and reduce all the objects to a single object.
var reduce = function(key, values) {
var result = {count: 0, total: 0 };
values.forEach(function(value){
result.count += value.count;
result.total += value.qty;
});
return result;
};
This reduce function returns a single result. It's important for the return value to have the same shape as the emitted values. It's also possible for MongoDB to call the reduce function multiple times for a given key and ask you to process a partial set of values, so if you need to perform some final calculation, you can also give MapReduce a finalize function.
Finalize
The finalize function is optional, but if you need to calculate something based on a fully reduced set of data, you'll want to use a finalize function. Mongo will call the finalize function after all the reduce calls for a set are complete. This would be the place to calculate the average of all the second values in a document/timestamp:
var finalize = function (key, value) {
value.average = value.total / value.count;
return value;
};
Putting It Together
With the JavaScript in place, all that is left is to tell MongoDB to execute a MapReduce:
var map = function(){
var obj = this.values;
var keys = Object.keys(obj);
var values = [];
keys.forEach(function(key){
var val = parseFloat(obj[key]);
var value = { count: 1, qty: val };
emit(this.timestamp, value);
});
};
var reduce = function(key, values) {
var result = {count: 0, total: 0 };
values.forEach(function(value){
result.count += value.count;
result.total += value.qty;
});
return result;
};
var finalize = function (key, value) {
value.average = value.total / value.count;
return value;
};
db.collection.mapReduce(
map,
reduce,
{
out: { merge: "map_reduce_example" },
finalize: finalize
}
)
And when you query the output collection map_reduce_example, db.map_reduce_example.find(), you get the result:
/* 0 */
{
"_id" : null,
"value" : {
"count" : 5,
"total" : 128.42,
"average" : 25.684
}
}
References:
A Simple MapReduce with MongoDB and C#
MongoDB docuumentation on mapReduce
This kind of data structure creates lots of conflicts and difficult to handled mongo operations. This case either you changed your schema design. But, if you not able to changed this schema then follow this :
In your schema having two major problem 1> keys dynamic and 2> values of given keys in string so you should use some programming code to calculating avg check below scripts
From ref this first calculated size of values
Object.size = function(obj) {
var size = 0,
key;
for (key in obj) {
if (obj.hasOwnProperty(key)) size++;
}
return size;
};
db.collectionName.find().forEach(function(myDoc) {
var objects = myDoc.values;
var value = 0;
// Get the size of an object
var size = Object.size(objects);
for (var key in objects) {
value = value + parseFloat(objects[key]); // parse string values to float
}
var avg = value / size
print(value);
print(size);
print(avg);
});

mongodb - Finding the Sum of a field (if it exists) in a collection

I have the following mongodb structure
{
"_id": ObjectId("507c80a143188f9610000003"),
"date": ISODate("2012-10-15T21: 31: 13.0Z"),
"uid": NumberInt(35920),
"comp": NumberInt(770),
"fields": {
"rating": {
"parent": "rating",
"weight": NumberInt(2),
"rel_weight": 0.11,
},
"capacity": {
"parent": "capacity",
"weight": NumberInt(4),
"rel_weight": 0.89,
},
}
}
The "fields" attribute has 2 fields "rating" and "capacity" in it. But, each entry might have a different set of fields. eg. dimension, price etc.
I would like to find all entries that have "rating" under "fields" and get a sum of "weight" attribute of all such entries.
I am a newbie to mongodb and I tried using the mapReduce function, but with no avail.
Given below is the code I used. Kindly let me know where I went wrong or if there is a better solution instead of this code.
function map(){
emit(this._id,{weight:this.fields.rating.weight});
}
function reduce(key,value){
var sum = 0;
for ( var i=0; i<value.length; i++ ) {
sum += value[i].amount;
}
return sum;
}
res = db.collection_name.mapReduce(map, reduce, { query: {"fields.rating" : { $exists: true } });
I was finally able to figure it out and get it working. I have shared my code below.
var map = function(){
if(this.fields.rating){
emit(this.fields.rating.parent,this.fields.rating.weight);
}
}
var reduce = function (k, vals) {
var sum = 0;
var count = 0;
for (var i in vals) {
sum += vals[i];
count++;
}
var avg = (sum/count);
return avg;
}
var res = db.myCollection.mapReduce(map, reduce, {out:"myoutput"});

mongoDB Map/Reduce

I am trying to figure out this map/reduce system in mongoDB. I have the following basic schema/layout in my collection.
{
_id: 1,
name: n1,
website: w1,
tags: [
myTag1,
myTag3
]
}
{
_id: 2,
name: n2,
website: w2,
tags: [
myTag2,
myTag3
]
}
{
_id: 3,
name: n3,
website: w3,
tags: [
myTag2,
myTag4
]
}
How can I retrieve an array of unique tags? I would like this to be returned to me for further use.
{
tags: [
myTag1,
myTag2,
myTag3,
myTag4
]
}
By the way this is what I have come up with, but it just returns the _id and tags of each item instead of combining the tags into a single object.
var map = function() {emit( this._id,{tags: this.tags});};
var reduce = function(key, values) {
var t = [];
values.forEach(function(doc) {
var tags = doc.tags;
tags.forEach(function(tag) {
if (!(tag in t)) {
t.push(tag);
}
});
});
return {tags: t};
};
var op = db.businesses.mapReduce(map, reduce, {out: "mr_results"});
db[op.result].find();
There's no need to use map-reduce in your case. Just use the distinct function:
db.businesses.distinct('tags')
You can try it in the mongo shell:
> use test
switched to db test
> db.businesses.insert({tags: ['tag1', 'tag2']})
> db.businesses.insert({tags: ['tag3', 'tag4']})
> db.businesses.find()
{ "_id" : ObjectId("4fa05b2b036495bf4ac9c0cc"), "tags" : [ "tag1", "tag2" ] }
{ "_id" : ObjectId("4fa05b33036495bf4ac9c0cd"), "tags" : [ "tag3", "tag4" ] }
> db.businesses.distinct('tags')
[ "tag1", "tag2", "tag3", "tag4" ]
Also, you should keep in mind that map/reduce in MongoDB is not suitable for real-time querying.
Using MongoDB MapReduce you could do it as follows:
function m() {
this.tags.forEach(function(x) { emit('tag', x); });
}
function r(k, v) {
var res = {};
v.forEach(function(x) { res[x] = true; });
return res;
}
db.businesses.mapReduce(m, r, {out:'out'});
// Now the 'out' collection has a record whose "value" property
// has a key for each of the tags in the source collection.
function getTags(coll) {
var tags=[], o=db[coll].find()[0].value;
for (var i in o) { tags.push(i) }
return tags; // Same as Object.keys(db[coll].find()[0].value)
}
listTags('out'); // => ['myTag1', 'myTag2', 'myTag3', 'myTag4']