Sum operation on documents with different structures - mongodb

I'm trying to use the aggregation pipeline to be able to do a report table with (sort,paging, etc) using mongodb for my back end. I run into a problem that my schema is not the same but i need to add all the values. I was trying to use the unwind command but it only works in arrays so run out of alternatives.
Sample Documents
{
"_id": "first",
"apple": 6,
"pears": 7,
"total_fruits": "13"
},
{
"_id": "second",
"apple": 6,
"bananas": 2,
"total_fruits": "8"
}
Desired Result
{
"_id": "result",
"apple": 12,
"pears": 7,
"bananas": 2,
"total_fruits": "21"
}

Instead of aggregating, why not use mapReduce()?
function sumKeyFromCollection(collectionName, keyName) {
map = function() { emit(keyName, this[keyName]); }
red = function(k, v) {
var i, sum = 0;
for (i in v) {
sum += v[i];
}
return sum;
}
return db[collectionName].mapReduce(map, red, {out : {inline: 1}});
}
For your example documents:
{
"_id": "first",
"apple": 6,
"pears": 7,
"total_fruits": "13"
},
{
"_id": "second",
"apple": 6,
"bananas": 2,
"total_fruits": "8"
}
You could call the function on each field name:
console.log(sumKeyFromCollection("collectionName", "apple"));
> { "_id": "...", "apple": 12 }
Then use each of them to produce your resultant document:
var fruits = [ "apple", "bananas", "total_fruits" ];
var results = [];
for (var i = 0; i < fruits.length; i++) {
results.push(sumKeyFromCollection("collectionName", fruits[i]));
}
EDIT Also, to get every field name in a collection programmatically:
function getAllCollectionFieldNames(collectionName) {
mr = db.runCommand({
"mapreduce" : collectionName,
"map" : function() {
for (var key in this) { emit(key, null); }
},
"reduce" : function(key, stuff) { return null; },
"out": "my_collection" + "_keys"
});
db[mr.result].distinct("_id");
}
Which returns an array of unique field names (even if they only appear in one document).

Related

How to save deletion in a deeply nested MongoDB document

I am new to MongoDB and I am using MongoDB shell to perform the operations.
I am working to remove the array named Process from all the Items, but it seems that I do not grasp the remove concept correctly.
The documents we use are deeply nested - we do not know how many items there are, or how deep the level of nesting.
What I tried so far is to use recursion to iterate through the items:
function removeAllProcessFields(docItems)
{
if(Array.isArray(docItems))
{
docItems.forEach(function(item)
{
print("idItem: "+item._id);
if(item.Process == null)
{
print("Process null");
}
else
{
$unset: { Process: ""}
}
removeAllProcessFields(item.Items);
})
}
}
var docs = db.getCollection('MyCollection').find({})
docs.forEach(function(doc)
{
print("idDoc: "+doc._id);
removeAllProcessFields(doc.Items);
})
But I have difficulties on using unset properly to save the operation.
An example document would be:
{
"_id": "622226d319517e83e8ed6151",
"Name": "test1",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614e",
"Name": "test-item",
"Description": "",
"Process": [{
"Name": "Step1"
}, {
"Name": "Step2"
}],
"Items": [{
"_id": "622226d319517e83e8ed614f",
"Name": "test-subItem1",
"Description": "",
"Process": [{
"Name": "StepSub1"
}, {
"Name": "StepSub2"
}, {
"Name": "StepSub3"
}],
"Items": []
},
{
"_id": "622226d319517e83e8ed6150",
"Name": "test-subItem2",
"Description": "",
"Process": [{
"Name": "StepSub4"
}, {
"Name": "StepSub5"
}, {
"Name": "StepSub6"
}],
"Items": []
}
]
}]
}
What I hope to achieve would be:
{
"_id": "622226d319517e83e8ed6151",
"Name": "test1",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614e",
"Name": "test-item",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614f",
"Name": "test-subItem1",
"Description": "",
"Items": []
},
{
"_id": "622226d319517e83e8ed6150",
"Name": "test-subItem2",
"Description": "",
"Items": []
}
]
}]
}
Something like this maybe using the $[] positional operator:
db.collection.update({},
{
$unset: {
"Items.$[].Items.$[].Process": 1,
"Items.$[].Process": 1
}
})
You just need to construct it in the recursion ...
playground
JavaScript recursive function example:
mongos> db.rec.find()
{ "_id" : ObjectId("622a6c46ae295edb276df8e2"), "Items" : [ { "a" : 1 }, { "Items" : [ { "Items" : [ { "Items" : [ ], "Process" : [ 1, 2, 3 ] } ], "Process" : [ 4, 5, 6 ] } ], "Process" : [ ] } ] }
mongos> db.rec.find().forEach(function(obj){ var id=obj._id,ar=[],z=""; function x(obj){ if(typeof obj.Items != "undefined" ){ obj.Items.forEach(function(k){ if( typeof k.Process !="undefined" ){ z=z+".Items.$[]";ar.push(z.substring(1)+".Process") }; if(typeof k.Items != "undefined"){x(k)}else{} }) }else{} };x(obj);ar.forEach(function(del){print( "db.collection.update({_id:ObjectId('"+id+"')},{$unset:{'"+del+"':1}})" );}) })
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Process':1}})
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Process':1}})
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Items.$[].Process':1}})
mongos>
Explained:
Loop over all documents in collection with forEach
Define recursive function x that will loop over any number of nested Items and identify if there is Process field and push to array ar
Finally loop over array ar and construct the update $unset query , in the example only printed for safety , but you can improve generating single query per document and executing unset query ...
Assuming you are on v>=4.4 you can use the "merge onto self" feature of $merge plus defining a recursive function to sweep through the collection and surgically remove one or a list of fields at any level of the hierarchy. The same sort of needs arise when processing json-schema data which is also arbitrarily hierarchical.
The solution below has extra logic to "mark" documents that had any modifications so the others can be removed from the update set passed to $merge. It also can be further refined to reduce some variables; it was edited down from a more general solution that had to examine keys and values.
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: function(obj, target) {
var didSomething = false;
var process = function(holder, spot, value) {
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
for(var jj = value.length - 1; jj >= 0; jj--) {
process(value, jj, value[jj]);
}
} else if(value instanceof Object) {
walkObj(value);
}
};
var walkObj = function(obj) {
Object.keys(obj).forEach(function(k) {
if(target.indexOf(k) > -1) {
delete obj[k];
didSomething = true;
} else {
process(obj, k, obj[k]);
}
});
}
// ENTRY POINT:
if(!Array.isArray(target)) {
target = [ target ]; // if not array, make it an array
}
walkObj(obj);
if(!didSomething) {
obj['__didNothing'] = true;
}
return obj;
},
// Invoke!
// You can delete multiple fields with an array, e.g.:
// ..., ['Process','Description']
args: [ "$$ROOT", 'Process' ],
lang: "js"
}}
}}
// Only let thru docs WITHOUT the marker:
,{$match: {'__didNothing':{$exists:false}} }
,{$merge: {
into: "foo",
on: [ "_id" ],
whenMatched: "merge",
whenNotMatched: "fail"
}}
]);

wrong result in MongoDB mapreduce function?

I have Collection "cars" from that want to get count of certified cars as trueCount and flaseCount where certified is boolean.
am issuing the following mapreduce query
map:-
function() { for (var idx = 0; idx < this.cars.length; idx++) {
var key = this.cars[idx].carName;
var value = {
count : 1,
certifiedCheck : this.cars[idx].certified
};
emit(key, value);
} }
reduce:-
function(key, values) {
certifiedCount = { trueCount: 0, falseCount: 0 };
values.forEach(function(value) {
if ( value.certifiedCheck )
certifiedCount.trueCount += value.count;
else
certifiedCount.falseCount += value.count;
});
return certifiedCount;
query:
{ "type": "cars" }
getting the following result :
{ "id" : "carName" , "value" : { "true" : 277.0 , "false" : NaN}};
even though I have 457 documents in the collection.
Please someone help me here to fix this issue.
Thanks in advance
You mixed up your map-reduce: to reduce to two keys "true" and "false" you need to emit these as keys. Then, the reducer will run per key.
As pseudo code:
map:
for each car
evaluate whether it should be true or false
key = (true/false)
emit(key, { count : 1 })
reduce:
(input is true/false as key, array of count-documents as value)
for each value-document
sum up the count
return key, sum
This should yields two documents with true / false as key and the respective sum as value.
You should consider using the aggregation framework for running the aggregation since it achieves the same result albeit faster than MapReduce as aggregation runs natively in the server (C++), MapReduce spawns separate javascript thread(s) to run JS code.
Thus said, if you run the following aggregation pipeline which uses the $cond operator to evaluate the counts based on the logic in the field expression, you will get a similar result:
Because you haven't showed your collection schema, I've assumed the following sample documents with a cars field as array having seen in your mapReduce you are doing a for loop on the cars property:
Populate test collection
db.collection.insert([
{ _id: 1, cars: [ { model: "A", certified: true }, { model: "B", certified: true } ] },
{ _id: 2, cars: [ { model: "A", certified: false }, { model: "B", certified: true } ] },
{ _id: 3, cars: [ { model: "A", certified: true }, { model: "B", certified: false } ] },
{ _id: 4, cars: [ { model: "A", certified: true }, { model: "B", certified: false } ] },
{ _id: 5, cars: [ { model: "A", certified: true }, { model: "B", certified: true } ] }
])
Run aggregation operation
db.collection.aggregate([
{ "$unwind": "$cars" },
{
"$group": {
"_id": "$cars.model",
"trueCount": {
"$sum": {
"$cond": [ "$cars.certified", 1, 0 ]
}
},
"falseCount": {
"$sum": {
"$cond": [ "$cars.certified", 0, 1 ]
}
}
}
}
])
Result:
/* 1 */
{
"_id" : "A",
"trueCount" : 4,
"falseCount" : 1
}
/* 2 */
{
"_id" : "B",
"trueCount" : 3,
"falseCount" : 2
}

Add existing fields to nested schema

I have documents in my db with schema:
var MySchema = new Schema({
Street: { type: String },
Age: { type: Number, default: null },
Date: { type: Date },
Stuff: [
{
_id:false,
ThisDate: { type: Date },
ThisStreet: { type: String }
}]
});
Right now it is (Stuff is empty):
db.person.findOne()
{
Street: 'TheStreet',
Age: 23,
Date: ISODate("2016-02-19T00:00:00.000Z"),
Stuff: []
}
I then want to update all documents. What I want to do is to move Street and Date fields into Stuff array and delete Street and Date fields from schema.
Like this:
db.person.findOne()
{
Age: 23,
Stuff : [
{
ThisDate : ISODate("2016-02-19T00:00:00.000Z"),
ThisStreet : "TheStreet"
}
]
}
How could I achieve this?
Best Regards
Since this is a "one off" operation I would do it in the shell rather than use any other framework.
For MongoDB 3.2.x releases and greater use, bulkWrite():
var ops = [];
db.person.find({
"Street": { "$exists": true },
"Date": { "$exists": true }
}).forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$unset": {
"Street": "",
"Date": ""
},
"$set": {
"Stuff": [{
"ThisDate": doc.Date,
"ThisStreet": doc.Street
}]
}
}
}
});
if ( ops.length == 1000 ) {
db.person.bulkWrite(ops);
ops = [];
}
})
if ( ops.length > 0 )
db.person.bulkWrite(ops);
Or for MongoDB 2.6.x and 3.0.x releases use this version of Bulk operations:
var bulk = db.person.initializeUnorderedBulkOp(),
count = 0;
db.person.find({
"Street": { "$exists": true },
"Date": { "$exists": true }
}).forEach(function(doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$unset": {
"Street": "",
"Date": ""
},
"$set": {
"Stuff": [{
"ThisDate": doc.Date,
"ThisStreet": doc.Street
}]
}
});
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.person.initializeUnorderedBulkOp();
}
});
if ( count % 1000 != 0 )
bulk.execute();
Bottom line is that you need to iterate the documents in the collection and write them back with the re-arranged content "one by one". At least the Bulk operations API in use in both cases will reduce the load of writing and responding with the server to only one in every 1000 documents in the collection to process.
Also, rather than rewriting the whole document you are using $unset to remove the desired fields and $set to write "just" the data you want
Working example
db.person.insert(
{
"Street": 'TheStreet',
"Age": 23,
"Date": ISODate("2016-02-19T00:00:00.000Z"),
"Stuff": []
}
)
Then after running either pdate above the result is:
{
"_id" : ObjectId("56e607c1ca8e7e3519b4ce93"),
"Age" : 23,
"Stuff" : [
{
"ThisDate" : ISODate("2016-02-19T00:00:00Z"),
"ThisStreet" : "TheStreet"
}
]
}
I'd suggest you to transform document using aggregation framework and update as described in code snippet below
db.person.aggregate([
{$project:{Age:1, Stuff:[{Date:"$Date", Street:"$Street"}]}}
]).forEach(function(o){
var id = o._id;
delete o._id;
db.person.update({_id:id, Street:{$exists: true}},o);
});
After successful execution, you document or documents should look like
{
"_id" : ObjectId("56e2cd45792861e14df1f0a9"),
"Age" : 23.0,
"Stuff" : [
{
"Date" : ISODate("2016-02-19T00:00:00.000+0000"),
"Street" : "TheStreet"
}
]
}

MongoDB - Match multiple values in array

I want to be able to find multiple documents that have three or more matching values in an array. Let's say we the following documents:
[{
name: 'John',
cars: [1, 2, 3, 4]
},
{
name: 'Jane',
cars: [1, 2, 3, 8]
},
{
name: 'Smith',
cars: [1, 8, 10]
}]
And we want to find documents that have at least three of the values (in cars) in the following array:
[1, 2, 3, 4, 5, 6, 7]
The results would then be:
[{
name: 'John',
cars: [1, 2, 3, 4]
},
{
name: 'Jane',
cars: [1, 2, 3, 8]
}]
Anyone know how to achieve this?
You can have a $in query issued and then by code filter the record having 3 or more entries in the desired array. (Here is some samle python code)
def dennisQuestion():
permissibleCars = [1,2,3,4,5,6,7]
cursor = db.collection.find({"cars": {"$in": permissibleCars}})
for record in cursor:
if len(set(permissible) & set(record["cars"]))) >= 3
yield record
This is a good question, and I don't think there's a simple way to do it with the usual operators that MongoDB gives you. However I can think of the following methods to achieve this:
1. New Field
Calculate this in app code and maintain the result in a new field on the document.
2. Brute Force
db.Collection.find( { $or: [
{ cars: $all [ 1, 2, 3 ] },
{ cars: $all [ 2, 3, 4 ] },
... list out all 35 combinations
] } )
3. Use $where
db.Collection.find( { cars: { $in: [1,2,3,4,5,6,7] }, $where: function() {
var numMatches = 0;
for (var i = 1; i <= 7; i++)
if (this.cars.indexOf(i) > -1) numMatches++;
return numMatches >= 3;
} } );
I had to slightly modify #Zaid Masud option 3 when the values where strings in Mongo 4.0.3:
db.Collection.find( { cars: { $in: ["s1", "s2", "s3" , "s4", "s5" , "s6" , "s7"] },
$where: function() {
var options = ["s1", "s2", "s3" , "s4", "s5" , "s6" , "s7"];
var numMatches = 0;
for (var i = 0; i < 7; i++)
if (this.cars.indexOf(options[i]) > -1)
numMatches++;
return numMatches >= 3;
}
} );
(This seemed a bit large for a comment)
For Mongo v4.4.1 this query works
[
{
$project: {
name: 1,
cars: 1,
show: {
$let: {
vars: {
"b": {
$gte: [{$size: {$setIntersection: [ [1,2,3,4,5,6,7],"$cars"]}},3]
}
},
in: "$$b"
}
}
}
},
{
$match: {
show: true,
}
},
{
$project: {
show: 0
}
}
]

MongoDB Map/Reduce only working in inline-mode

I wrote map/reduce/finalize function which are working fine during my tests with the out: {inline: true} option.
But if I try to save the result in a collection using out: {replace: 'test'} (or merge, ..) it won't show me the same result.
Does anyone has a clue, what I am doing wrong?
Thx
Inline:
db.runCommand({mapreduce: 'source', map: map_deliverstat, reduce: reduce_deliverstat, finalize: finalize_deliverstat, out: {inline:1}})
{
"_id": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": 469
},
"value": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": NumberLong(469),
"sum": 294,
"nomarker": 42,
"marker": 252,
"product1": 34,
"product2": 22,
"product3": 20,
"product4": 19,
"product5": 16
}
}
Replace:
db.runCommand({mapreduce: 'source', map: map_deliverstat, reduce: reduce_deliverstat, out: {replace: 'test'}, finalize: finalize_deliverstat})
{
"_id": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": 469
},
"value": {
"date": ISODate("2012-03-13T00:00:00Z"),
"customerid": NumberLong(469),
"sum": 2,
"nomarker": 0,
"marker": 2,
"product1": 0,
"product2": 0,
"product3": 0,
"product4": 0,
"product5": 0
}
}
another way of running map reduce, modify as per your need and see if this works for you
The following query counts the number of customers group by state
map = function() {
emit({state: this.CustomerState}, {count:1})
}
reduce = function(key, values) {
var count = 0;
values.forEach(function(v) {
count += v['count'];
});
return {state: key,count: count};
}
db.createCollection('map_temp')
db.customer_info.mapReduce(map, reduce, {out:'map_temp'})
You will have the result saved in map_temp collection