how to iterate an object in mongodb documents? - mongodb

the document in my mongo collection like this:
{
"_id" : ObjectId("568f7e67676b4ddf133999e8"),
"auth_dic" : {
"2406" : [
"44735"
],
"6410" : [
"223423"
]
...
...
},
"user_id" : "fdasd23423"
}
this is one user, there are many id like '2406' '6410' in 'auth_dic'
i want to user map_reduce method to statistic how many users have the '2406', how many users have the '6410', and other types.
my mapper is:
mapper = Code('''
function(){
this.auth_dic.forEach(function(app_id){
emit(app_id, 1);
});
}
''')
my reducer is:
reducer = Code('''
function(key, values){
var total = 0;
for (var i = 0; i < values.length; i++) {
total += values[i];
}
return total;
}
''')
but the mapper can not work, beacause the function 'forEach' can not user to iterate a dic, what can i do to solve this problem?

Run the following MapReduce operation:
mr = db.runCommand({
"mapreduce" : "collectionName",
"map" : function() {
var map = this.auth_dic;
for (var key in map) {
if (map.hasOwnProperty(key)) { emit(key, 1); }
}
},
"reduce" : function(key, val) { return Array.sum(val); },
"out": "collectionName" + "_keys"
})
Then query the resulting collection so as to find all the counts for the dynamic keys:
db[mr.result].find()

Related

How to detect the re-reduce stage in MongoDB map/reduce?

I use the following map/reduce setup to collect some data into array:
map: function() { emit(this.key, [this.item]); },
reduce: function(key, values) {
var items = [];
values.forEach( function(value) {items.concat(value.item);} );
return items;
},
out: {reduce: "result_collection"}
I want to improve the code and detect if the resulting collection has been changed during the re-reduce stage (when mongo invokes reduce with the current content of the "result_collection").
In other words, how to know that any documents have been emitted by the Map contain "item" that does not exist in the "result_collection" yet (under the same key, of course)?
This information can help at some further processing stages e.g. query "result_collection" to get the documents that have been updated during the map/reduce stage.
If you must do this, use a finalize function to adjust the value after all reduction is finished. You'll have to add more logic to the reduce function to handle the modified output.
I'll show you an example with the simple map-reduce defined by the following map and reduce functions:
var map = function() { emit(this.k, this.v) }
var reduce = function(key, values) { return Array.sum(values) }
On documents that look like { "k" : 0, "v" : 1 }, the map-reduce defined by the above functions produces result documents that look like { "_id" : 0, "value" : 17 }. Define a finalize function to modify the final document:
var finalize = function (key, reducedValue) { return { "m" : true, "v" : reducedValue } }
Now modify reduce to handle an element of values that might be an object of the above form:
var reduce2 = function(key, values) {
var sum = 0;
for (var i = 0; i < values.length; i++) {
if (typeof values[i] == "object") { sum += values[i].v }
else { sum += values[i] }
}
return sum
}
Output looks like
{ "_id" : 0, "value" : { "m" : true, "v" : 14 } }
{ "_id" : 1, "value" : { "m" : true, "v" : 34 } }
{ "_id" : 2, "value" : { "m" : true, "v" : 8 } }
so you can tell what's been modified by value.m. Your further processing can set v.m to false so you'll see what hasn't been processed yet after each map-reduce.

mongodb: reduce function not action when only one result is there

I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?
This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.

map reduce function to count the documents- mongodb

I have a Collection with the following fields:
TestTable
{
"ID"
"Name"
"Ver"
"Serial"
"DateTime"
"FeatureID"
"FeatureName"
}
I want to have a map reduce function to get the count of the records in a particular Year.
The map reduce function i wrote is:
map= function(){
year= Date.UTC(this.DateTime.getFullYear());
emit({year: year}, {count: 1});
}
reduce= function(key, values){
var count=0;
for(v in values){
count+= v['count'];
});
return {count: count};
}
Now the output should give the count of douments in each year. Is the map reduce function correct?
The result i got is:
> db.Years.find()
{ "_id" : { "year" : NaN }, "value" : { "count" : NaN } }
> db.Years.find().count()
1
Which is not what i expected.
edited
one of the TestTable document:
> db.TestTable.findOne()
{
"_id" : ObjectId("527c48e99000cf10bc2a1d82"),
"ID" : "LogID16587",
"Name" : "LogName15247",
"Ver" : "VersionID11",
"Serial" : "ProductID727",
"DateTime" : ISODate("1998-12-15T18:30:00Z"),
"FeatureID" : "FeatureID465",
"FeatureName" : "FeatureName 1460"
}
Thanks in advance.
Date.UTC requires both year and month parameters. So your map function should look like this instead:
map= function(){
year= Date.UTC(this.DateTime.getFullYear(), 0);
emit({year: year}, {count: 1});
}
Also, don't use in to iterate over the elements of an array in your reduce method as it doesn't work in the way you're using it. Use a traditional for loop instead:
reduce= function(key, values){
var count=0;
for(var i=0; i<values.length; i++) {
count += values[i]['count'];
};
return {count: count};
}

How to do a find iterating in a array field

Hi i have a expressjs app using mongodb.
At first i find a tv by id on my "tvs" collection, i get it but now i want to find all user info from other collection "users".
This is my JSON for each collection:
tvs
{
"_id" : ObjectId("5203af83396d285ea2ecff8f"),
"brand" : "LG",
"comments" : [{
"user" : ObjectId("521dc636eda03d0f9cab3568"),
"text" : "Sold!"
}, {
"user" : ObjectId("521b2785eda03d0f9cab3566"),
"text" : "Nice TV"
}],
"model" : "47LS5600",
"price" : 499.0
}
users
{
"_id" : ObjectId("521b2785eda03d0f9cab3566"),
"name" : {
"first" : "Ruben",
"last" : "Montes"
}
}
And this is my code
var tvs = db.collection("tvs");
var users = db.collection("users");
exports.findById = function (req, res) {
var id = req.params.id;
tvs.findOne({'_id': new BSON.ObjectID(id)}, function (err, tv) {
users.find( { _id : tv.comments.user_id }).toArray(function (err, items) {
res.send( { tv: tv, users: items } );
});
})
}
I need to know how to iterate the comments array from tvs collection to get the the info user that post a comment
users.find( { _id : tv.comments.user_id })
You can do a bit more logic to efficiently grab the users as a batch using the $in operator.
var mongodb = require('mongodb')
, MongoClient = require('mongodb').MongoClient
, Server = require('mongodb').Server;
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
if (err) throw err;
var tvs = db.collection('tvs');
var users = db.collection('users');
var userNames = {};
var tvId = new mongodb.ObjectID("5203af83396d285ea2ecff8f"); // hard-code
// find a TV
tvs.findOne({ _id : tvId }, function (err, tv) {
var allUserIds = [];
if (tv && tv.comments) {
// build a list of all user IDs used in comments
// this doesn't filter duplicates right now
allUserIds = tv.comments.map(function (comment) {
return comment.user;
});
}
// using the list of UserIds, grab all of them ...,
// and just return the name
users.find({_id: { $in: allUserIds }}, { name: 1 })
.toArray(function (err, users_list) {
// if we got some
if (users_list && users_list.length > 0) {
for(var i= 0, len = users_list.length; i < len ; i++ ) {
userNames[users_list[i]._id] = users_list[i].name;
}
console.log("All comments ========");
// now all the usernames are indexed in userNames by Id
for(var i= 0, len = tv.comments.length; i < len ; i++ ) {
// swap id for name
tv.comments[i].user = userNames[tv.comments[i].user];
console.log(tv.comments[i]);
}
db.close(); // done with everything for this demo
}
});
});
});
I've used find and $in with an array of all userIds found in the comments for a single "tv". By using $in, it significantly reduces the number of calls needed to MongoDB to fetch single User documents. Also, using the second parameter of find, I've reduced the returned fields to just be name.
FYI -- I did simplify your structure to just be 'name' rather than 'first' and 'last'. You certainly can change it to match your exact needs.

SQL to MapReduce: how sql avg function could be done in MongoDB?

How could write the sql avg function in MapReduce MongoDB?
I've tried the following, sum the values and divide for a count. But the problem is where i do it, in reduce function or finalize function?
For example:
i have the following document
{
"_id" : ObjectId("511b7d1b3daee1b1446ecdfe"),
"l_linenumber" : 1,
"l_quantity" : 17,
"l_extendedprice" : 21168.23,
"l_discount" : 0.04,
"l_tax" : 0.02,
"l_returnflag" : "N",
"l_linestatus" : "O",
"l_shipdate" : ISODate("1996-03-13T03:00:00Z"),
"l_commitdate" : ISODate("1996-02-12T03:00:00Z"),
"l_receiptdate" : ISODate("1996-03-22T03:00:00Z"),
"l_shipinstruct" : "DELIVER IN PERSON",
"l_shipmode" : "TRUCK",
"l_comment" : "blithely regular ideas caj",
}
And the SQL query is:
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice*(1-l_discount)) as sum_disc_price,
sum(l_extendedprice*(1-l_discount)*(1+l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= DATE_SUB('1998-12-01',INTERVAL 90 DAY)
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
I done this mapreduce function:
db.runCommand({
mapreduce: "lineitem",
map: function Map() {
var dataInicial = new Date("Dec 1, 1998");
var dataFinal = new Date();
dataFinal.setDate(dataInicial.getDate()-90);
if( this.l_shipdate<=dataFinal) {
emit(
{returnflag: this.l_returnflag, linestatus: this.l_linestatus},
{
sum_qty: this.l_quantity,
sum_base_price: this.l_extendedprice,
sum_disc_price: this.l_extendedprice*(1-this.l_discount),
sum_charge: this.l_extendedprice*(1-this.l_discount)*(1+this.l_tax),
avg_qty: this.l_quantity,
avg_price: this.l_extendedprice,
avg_disc: this.l_discount,
count_order: 1
}
);
}
},
reduce: function(key, values) {
var ret = {sum_qty: 0, sum_base_price: 0, sum_disc_price: 0, sum_charge: 0,
avg_qty: 0, avg_price: 0, avg_disc: 0, count_order: 0};
for (var i = 0; i < values.length; i++) {
ret.sum_qty += values[i].sum_qty;
ret.sum_base_price += values[i].sum_base_price;
ret.sum_disc_price += values[i].sum_disc_price;
ret.sum_charge += values[i].sum_charge;
ret.avg_qty += values[i].avg_qty;
ret.avg_price += values[i].avg_price;
ret.avg_disc += values[i].avg_disc;
ret.count_order += values[i].count_order;
}
return ret;
},
finalize: function(key, value) {
value.avg_qty = value.avg_qty/value.count_order;
value.avg_price = value.avg_qty/value.count_order;
value.avg_disc = value.avg_qty/value.count_order;
return value;
},
out: 'query001'
});
The answer for avg_qty, avg_price, avg_disc are incorrect. Whats is going on? Or the sum and divide by count would be done inside reduce function?
Here is how you do it with MapReduce:
m = function (){
emit( {returnflag: this.l_returnflag, linestatus: this.l_linestatus} ,
{ sum_base_price: this.l_extendedprice, count : 1 } );
};
r = function (name, values){
var res = {sum_base_price : 0, count : 0};
values.forEach (function (v) {
res.sum_base_price += v[i].sum_base_price;
res.count += v[i].count;
}
return res;
};
f = function(key, res){
res.avg = res.sum_base_price / res.count;
return res;
};
Then, call mapReduce:
db.lineitem.mapReduce( m, r,
{ finalize : f,
out : {inline : 1},
query: {l_shipdate:{$lt:dataFinal}}
}
);
So you are not filtering in the map function, you are doing it in a query before calling map (that's more efficient).
In aggregation framework it would be:
db.lineitem.aggregate( [
{$match: {l_shipdate:{$lt:dataFinal}},
{$group: { _id: {returnflag: "$l_returnflag", linestatus: "$l_linestatus"},
sum_base_price: {$sum:"$l_extended_price"},
avg_base_price: {$avg:"$l_extended_price"},
count: {$sum: 1}
}
}
])
Add your other fields as needed...