mongodb map emit keys from subdocument - mongodb

I have a document which includes a subdocument:
{
"_id" : ObjectId("XXXXX"),
"SearchKey" : "1234",
"SearchTerms" : {
"STA" : ["1"],
"STB" : ["asdfasdf"],
"STC" : ["another"]
}
}
The SearchTerm elements are not fixed - sometimes we'll have STA without STC, for example.
I can do this:
var map = function() {
for (key in this.SearchTerms)
{
emit(key, 1);
}
}
but I can't do this:
var map = function() {
for (var i=0; i< this.SearchTerms.length; i++)
{
emit(this.SearchTerms[i], 1)
}
}
because the latter doesn't produce any results after the reduce. Why not?
As an aside - what I need to do is count the cross-product of the search terms over all documents, that is, find the incidence of (STA and STB) and (STA and STC) and (STB and STC) in the case above. If someone knows how to do that right away, that works even better.
As always, thanks for the help

The key that you emit should be a composite of both keys.
var map = function() {
if(!this.SearchTerms) return;
for(var i = 0 ; i < this.SearchTerms.length; i++){
var outerKey = this.SearchTerms[i];
for(var j = i + 1; j < this.SearchTerms.length; j++){
var innerKey = this.SearchTerms[j];
// assuming you don't care about counting (STA and STC) separately from (STC and STA),
// then order doesn't matter, lets make sure both occurences have the same key.
var compositeKey = (outerKey < innerKey) ? outerKey+"_"+innerKey : innerKey+"_"+outerKey;
emit(compositeKey, 1);
}
}
}

This is because this.SearchTerms is a dictionary/subdocument and not an array. this.SearchTerms[0] doesn't exist.
For the second question: something like this should work:
for (key1 in this.SearchTerms)
{
for (key2 in this.SearchTerms)
{
if (key1 < key2)
{
key = [key1, key2];
emit(key, 1);
}
}
}

Related

Getting a total count of key occurrences

I have a nested List that I've converted from a map:
final currentWeek = userProgram.weekMap[userProgram.currentWeek]?.values.toList();
Which gives me a dataset of more Lists, like this:
Each of these Lists has a completed key. I want to get a total count of this specific key so that I can do calculations based off of it.
I've tried doing nested for loops to get to this value, but I'm a little lost on getting the count of all the completed keys.
currentWeek?.forEach((e) {
e.forEach((d) {
print(d.completed);
});
});
Just use a simple loop and increment a variable:
var completedCount = 0;
for (var e in currentWeek ?? []) {
for (var d in e) {
if (d.completed) {
completedCount += 1;
}
}
}
print(completedCount);
Alternatively, if you prefer a functional approach:
var completedCount = currentWeek?.fold<int>(
0,
(countSoFar, step) => countSoFar + (step.completed ? 1 : 0),
) ??
0;

MongoDB - String to ObjectID and create new collection

db.firmalar.find().forEach(function(obj) {
for( var i = 0; i < obj.osgbIdleri.length; i++){ //ObjectId ARRAY
obj.osgbIdleri[i] = ObjectId(obj.osgbIdleri[i]);
}
//out:result (firmaId.id & firmaId.osgbIdleri.id(ObjectId))
});
I want to save the string field "obj.osgbIdleri" in each document in the "Firmalar" collection as the ObjectId field. I think I can do this using "aggregation". But when using "aggregation", I can not return every object in foreach. I want to create "firmaId.id" and "firmaId.osgbIdleri.id (ObjectID)" while creating a new collection.
(Posted on behalf of the OP).
db.firmalar.mapReduce(
function() {
for( var i = 0; i < this.osgbIdleri.length; i++){
this.osgbIdleri[i] = ObjectId(this.osgbIdleri[i]);
}
emit(this._id, this.osgbIdleri); },
function(key, values) {return value}, {
query:{},
out:"firmalarId"
}
).find()

Sort by best matches mongodb

I have collection of data in Mongodb, i want to give best matches suggestion while user input query in our suggestion box,
when user start typing com suggestion should be:
Computer
Computer Science
something more alike
I am sorting in Node by getting all matched data from mongo first and then give a rank to each data
function rank(name, q) {
var len = name.length,
lastIndex = -1;
for(var i = 0; i < q.length; i++) {
var n = name.indexOf(q[i], (lastIndex + 1));
if(n !== -1) {
len--;
lastIndex = n;
}
}
return len;
}
var query = 'com';
// giving rank to data
data = data.map(function(v) {
v.rank = rank(v.value, query);
return v;
});
// sorting by rank
data = data.sort(function(a, b) {
return a.rank - b.rank
});
It is giving me satisfied result, but it will be too slow while dealing with large data.
I want let mongodb engine to deal with sorting and give me just limited best matches result.
Maybe you could do it through mapreduce. Map-reduce is a data processing paradigm for condensing large volumes of data into useful aggregated results.
var mapFn = function(){
var len = this.name.length,
lastIndex = -1;
var q = 'com';
for(var i = 0; i < q.length; i++) {
var n = this.name.indexOf(q[i], (lastIndex + 1));
if(n !== -1) {
len--;
lastIndex = n;
}
}
emit(len, this);
};
var reduceFn = function(key, values){
return values.sort(function(a,b){
return a.name - b.name;
});
};
db.collection.mapReduce(mapFn, reduceFn, { out: { reduce: 'result_collection'}});

How to add or inc to an array in a update query?

If I have a doc which has an array which contains a items which represents counts for a day, perhaps like :-
{
data : [ {'20141102' : 2 },{'20141103' : 4 } ]
}
when I do an update, and I have a string '20141103' and then later a '20141104' I want to either inc the array entry or add a new array entry. Is this possible with an update?
Yes, it's feasible. I tried like this:
(run on mongo shell; both client and server are V2.6.4)
function tryAndFine(coll, key, value) {
var entry = {};
entry[key] = value;
var parent = 'data';
var prefix = parent + '.';
function incrementOnly() {
var criteria = {};
criteria[prefix + key] = {$exists : true};
var update = {};
update[prefix + "$." + key] = value;
var result = coll.update(criteria, {$inc : update});
// if increment fails, try to add a new one
if (result.nModified == 0) {
addNewElement();
}
}
function addNewElement() {
var criteria = {};
criteria[prefix + key] = {$exists : false};
var update = {};
update[parent] = entry;
var result = coll.update(criteria, {$push : update}, {upsert : true});
// if exists, try to increment the count
if (result.upserted == 0 && result.nModified == 0) {
incrementOnly();
}
}
// run entry
incrementOnly();
}
// test
var c = db.c;
c.drop();
tryAndFine(c, '20141103', 1);
tryAndFine(c, '20141103', 1);
tryAndFine(c, '20141104', 1);
tryAndFine(c, '20141105', 1);
tryAndFine(c, '20141104', 1);
// output
{
"_id" : ObjectId("54577e1a3502852bd4ad2395"),
"data" : [ {
"20141103" : 2
}, {
"20141104" : 2
}, {
"20141105" : 1
} ]
}

MongoDB map reduce count giving more results than a query

I have a collection users in Mongo and I execute this map reduce which I believe is the equivalent of a COUNT(*) GROUP BY origin:
> m = function() { for (i in this.membership) {
... emit( this.membership[i].platform_profile.origin, 1 );
... } }
function () {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
}
> r = function( id, values ) { var result = 0;
... for ( var i = 0; i < values.length; i ++ ) { result += values[i]; }
... return result; }
function (id, values) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
> db.users.mapReduce(m, r, {out : { inline: 1}});
{
"results" : [
{
"_id" : 0,
"value" : 15
},
{
"_id" : 1,
"value" : 449
},
...
}
But if I try to count how many documents have this field set to a specific value like 1, I get fewer results:
db.users.count({"membership.platform_profile.origin": 1});
424
What am I missing?
Are your count queries using a sparse index by any chance? My only guess there would be if some other query criteria resulted in documents absent from from index to be ignored from the count.
I recreated your schema with some fixture data and the results between map/reduce and simple count queries are in agreement:
db.users.drop();
var map = function() {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
};
var reduce = function(id, values ) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
var origins = {1: "a", 2: "b", 3: "c", 4: "d"};
for (var i = 0; i < 1000; ++i) {
var membership = [];
for (var o in origins) {
if (0 == i % o) {
membership.push({ platform_profile: { origin: origins[o] }});
}
}
db.users.save({ membership: membership });
}
db.users.mapReduce(map, reduce, {out: {inline: 1}}).results.forEach(function(result){
print(result["_id"] + ": " + result["value"]);
});
for (var o in origins) {
print(origins[o] + ": " + db.users.count({"membership.platform_profile.origin": origins[o]}));
}
Here's the output:
$ mongo --quiet mr_count.js
a: 1000
b: 500
c: 334
d: 250
a: 1000
b: 500
c: 334
d: 250
You can use the following map/reduce for the equivalent of COUNT(*) GROUP BY origin
Map/Reduce Functions :
map = function() {
if(!this.membership) return;
for (i in this.membership) {
if(!this.membership[i].platform_profile || !this.membership[i].platform_profile.origin) return;
emit(this.membership[i].platform_profile.origin, 1);
}
}
reduce = function(key, values) {
var count = 0;
for (v in values) {
count += values[v];
}
return count;
}
result = db.runCommand({
"mapreduce" : "users",
"map" : map,
"reduce" : reduce,
"out" : "users_count"
});
I had the same issue. I replaced x.length by Array.sum(x) in the reduce function (assuming you emit 1 in the map function) and it works. I agree x.length should work too, but I cannot explain why it does not.