Update a mongo field based on another collections data - mongodb

I'm looking for a way to update a field based on the sum of the data of another collection.
I tried to bring all the meals and use forEach to call the Products collection for each meal, tested if it was working, but I got a time out.
meals.find().forEach(meal => {
var products = db.Products.find(
{ sku: { $in: meal.products } },
{ _id: 1, name: 1, sku: 1, nutritional_facts: 1 }
)
printjson(products)
})
My goal was to execute something like this below to get the desired result, but I got "SyntaxError: invalid for/in left-hand side". Is not possible to use for in inside a mongo query?
db.Meals.find({}).forEach(meal => {
const nutri_facts = {};
db.Products.find({ sku: { $in: meal.products } },
{ _id: 1, name: 1, sku: 1, nutri_facts: 1 }).forEach(product => {
for (let nutriFact in product.nutri_facts) {
nutri_facts[nutriFact] =
parseFloat(nutri_facts[nutriFact]) +
parseFloat(product.nutri_facts[nutriFact]);
}
}
});
for (let nutriFact in nutri_facts) {
meal.nutri_facts[nutriFact] =
nutri_facts[nutriFact];
}
}
db.Meals.updateOne({ _id: meal._id }, meal)
});
I also had a hard time trying to figure out how to use aggregate and lookup in this case but was not successful.
Is it possible to do that?
Example - Meals Document
{
_id: ObjectId("..."),
products : ["P068","L021","L026"], //these SKUs are part of this meal
nutri_facts: {
total_fat: 5g,
calories: 100kcal
(...other properties)
}
}
For each meal I need to look for its products on 'Products' collections using 'sku' field.
Then I will sum the nutritional facts of all products to get the meal nutritional facts.
Example Products Document
{
_id: ObjectId("..."),
sku: 'A010'
nutri_facts: {
total_fat: 2g,
calories: 40kcal
(...other properties)
}
}
I know that mongo might not be the best option in this case, but the entire application is already built using it.

For each meal I need to look for its products on 'Products'
collections using 'sku' field. Then I will sum the nutritional facts
of all products to get the meal nutritional facts.
db.Meals.find( { } ).forEach( meal => {
// print(meal._id);
const nutri_facts_var = { };
db.Products.find( { sku: { $in: meal.products } }, { nutri_facts: 1 }.forEach( product => {
// printjson(product.nutri_facts);
for ( let nutriFact in product.nutri_facts ) {
let units = (product.nutri_facts[nutriFact].split(/\d+/)).pop();
// print(units)
// Checks for the existence of the field and then adds or assigns
if ( nutri_facts_var[nutriFact] ) {
nutri_facts_var[nutriFact] = parseFloat( nutri_facts_var[nutriFact] ) + parseFloat( product.nutri_facts[nutriFact] );
}
else {
nutri_facts_var[nutriFact] = parseFloat( product.nutri_facts[nutriFact] );
}
nutri_facts_var[nutriFact] = nutri_facts_var[nutriFact] + units;
}
} );
// printjson(nutri_facts_var);
db.Meals.updateOne( { _id: meal._id }, { $set: { nutri_facts: nutri_facts_var } } );
} );
NOTES:
I use the variable nutri_facts_var name ( the var suffixed) so
that we can distinguish the user defined variable names easily from
the document fields names.
{ _id: 1, name: 1, sku: 1, nutri_facts: 1 } changed to {
nutri_facts: 1 }. The _id is included by default in a
projection. The fields name and sku are not needed.
db.Meals.updateOne({ _id: meal._id }, meal) is not a correct
syntax. The update operations use Update
Operators.
The corrected code: db.Meals.updateOne( { _id: meal._id }, { $set:
{ nutri_facts: nutri_facts_v } } ). Note we are updating the
nutifacts only, not all details.
Since the individual nutrifacts are stored as strings (e.g.,
"100kcal"), during arithmetic the string parts are stripped. So, we
capture the string units (e.g., "kcal") for each nutrifact and
append it later after the arithmetic. The following code strips and
stores the units part: let units =
(product.nutri_facts[nutriFact].split(/\d+/)).pop().
Use the mongo shell methods print and printjson to print the
contents of a variable or an object respectively - for debugging
purposes.
Note the query updates the Meal collection even if the nutri_facts field is not defined in it; the $set update operator creates new fields and sets the values in case the fields do not exist.

Related

Mongoose Update Instance from Nested Array

In Mongoose, lets say I have a User object pulled from MongoDB and that user has an array of Interests. Now I get an instance of one of that user's Interests.
var user = ...
var interest = ...
... //Make some changes to interest.
How do I update that Interest object (after making some changes to it) within the User array in the DB?
Edit
Here is my current code. It doesn't work and doesn't give an error.
User.update(
{
'_id': user._id,
'interests._id': interest._id
},
{
'$set': {
'interests.$.xyzProperty': interest.xyzProperty
}
},
function(err,obj){//some error checking}
);
If you set an if for each interest, you can access the interest by the $ operator.
user document
{
_id: ObectId('54b568531ef35a7c348f21f2'),
interests: [
{
_id: 12345,
title: 'Tacos',
description: 'I Love tacos'
},
{...},
{...},
]
}
If I know which interest sub document I want to update, I simply query it like so:
UserModel.find({_id: ObectId('54b568531ef35a7c348f21f2'), 'interests.i_d': 12345}).lean().exec(function (err, user) {
var interest = ... //find specific interest
interest.description = 'I love tacos... Like, a lot'.
UserModel.update(
{
_id: user._id,
'interests._id': interest._id
},
{
$set: {
'interests.$.description': interest.description
}
},
function (err, update) {
console.log(err, update);
}
);
});
This uses the $ positional operator and updates the specific sub document(or item in an array).

Resolving a ref inside of a MongoDB aggregate

I have a Product model object that has the following field in its schema:
category : { type: ObjectId, turnOn: false, ref: "category" }
It references a category model that has a title field in it:
var categorySchema = Schema({
title : { type: String }
});
I'm using the product.category property (which is of type ObjectId as shown above) in a MongoDB aggregate but really want the category.title property from the category model rather than _id in the final resultset.
The following code gets the job done, but you'll see that I'm having to do some looping at the end to "resolve" the title field for the given product.category (ObjectId). Is there anyway to do all of that within the aggregate? In other words, is there a way to get the category model object's title field in the groups that are returned rather than having to do the extra looping work? Based on posts I've researched I don't see a built-in way but wanted to double-check.
getProductsGroupedByCategory = function(callback) {
Category.find(function(err, cats) {
var aggregate = [
{
$group: {
_id: "$category",
products: {
$push: {
title: "$title",
authors: "$authors",
publishDate: "$publishDate",
description: "$description"
}
}
}
},
{
$sort: {
"_id": 1
}
}
];
Product.aggregate(aggregate, function(err, catProducts) {
//Grab name of category and associate with each group
//since we only have the category_id at this point
for (var i = 0; i<catProducts.length;i++) {
var catProduct = catProducts[i];
for (var j=0;j<cats.length;j++) {
if (catProduct._id.toString() === cats[j]._id.toString()) {
catProduct.category = cats[j].title;
}
}
};
callback(err, catProducts);
});
});
}, //more code follows
An example datum would be helpful along with what you need out of it. From What I understand you are looking to get the title in to the grouping criteria and that should be doing by having a compound grouping criteria i.e.
_id: {category: "$category", title: "$title"}
If the title is within an array, you should do unwind, group and then wind again to achieve the result.

Merge changeset documents in a query

I have recorded changes from an information system in a mongo database. Every time a set of values are set or changed, a record is saved in the mongo database.
The change collection is in the following form:
{ "user_id": 1, "timestamp": { "date" : "2010-09-22 09:28:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "valueA", "fieldB": "valueB", "fieldC": "valueC" } }
{ "user_id": 1, "timestamp": { "date" : "2010-09-24 19:01:52", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "new_valueA", "fieldB": null, "fieldD": "valueD" } }
{ "user_id": 1, "timestamp": { "date" : "2010-10-01 11:11:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldD": "new_valueD" } }
Of course there are thousands of records per user with different attributes which represent millions of records. What I want to do is to see a user status at a given time. By example, the user_id 1 at 2010-09-30 would be
fieldA: new_valueA
fieldC: valueC
fieldD: valueD
This means I need to flatten all the changes prior to a given date for a given user into a single record. Can I do that directly in mongo ?
Edit: I am using the 2.0 version of mongodb hence cannot benefit from the aggregation framework.
Edit: It sounds I have found the answer to my question.
var mapTimeAndChangesByUserId = function() {
var key = this.user_id;
var value = { timestamp: this.timestamp.date, changes: this.changes };
emit(key, value);
}
var reduceMergeChanges = function(user_id, changeset) {
var mergeFunction = function(a, b) { for (var attr in b) a[attr] = b[attr]; };
var result = {};
changeset.forEach(function(e) { mergeFunction(result, e.changes); });
return { timestamp: changeset.pop().timestamp, changes: result };
}
The reduce function merges the changes in the order they come and returns the result.
db.user_change.mapReduce(
mapTimeAndChangesByUserId,
reduceMergeChanges,
{
out: { inline: 1 },
query: { user_id: 1, "timestamp.date": { $lt: "2010-09-30" } },
sort: { "timestamp.date": 1 }
});
'results' : [
"_id": 1,
"value": {
"timestamp": "2010-09-24 19:01:52",
"changes": {
"fieldA": "new_valueA",
"fieldB": null,
"fieldC": "valueC",
"fieldD": "valueD"
}
}
]
Which is fine to me.
You could write a MR to do this.
Since the fields are a lot like tags you can modify a nice cookbook example of counting tags here: http://cookbook.mongodb.org/patterns/count_tags/ of course instead of counting you want the latest value applied (assumption since this is not clear in your question) for that field.
So lets get our map function:
map = function() {
if (!this.changes) {
// If there were not changes for some reason lets bail this record
return;
}
// We iterate the changes
for (index in this.changes) {
emit(index /* We emit the field name */, this.changes[index] /* We emit the field value */);
}
}
And now for our reduce:
reduce = function(values){
// This part is dependant upon your input query. If you add a sort of
// date (ts) DESC then you will prolly want the first index (0) not the last as
// gathered here by values.length
return values[values.length];
}
And this will output a single document per field change of the type:
{
_id: your_field_ie_fieldA,
value: whoop
}
You can then iterate the end of the (most likely) in line output and, bam, you have your changes.
This is of course one way of dong it and is not designed to be run completely in line to your app, however that all depends on the size of the data your working on; it could be run very close.
I am unsure whether the group and distinct can run on this but it looks like it might: http://docs.mongodb.org/manual/reference/method/db.collection.group/#db-collection-group however I should note that group is basically a MR wrapper but you could do something like (untested just like the MR above):
db.col.group( {
key: { 'changes.fieldA': 1, // the rest of the fields },
cond: { 'timestamp.date': { $gt: new Date( '01/01/2012' ) } },
reduce: function ( curr, result ) { },
initial: { }
} )
But it does require you to define the keys instead of just iterating them programmably (maybe a better way).

Select top N rows from each group

I use mongodb for my blog platform, where users can create their own blogs. All entries from all blogs are in an entries collection. The document of an entry looks like:
{
'blog_id':xxx,
'timestamp':xxx,
'title':xxx,
'content':xxx
}
As the question says, is there any way to select, say, last 3 entries for each blog?
You need to first sort the documents in the collection by the blog_id and timestamp fields, then do an initial group which creates an array of the original documents in descending order. After that you can slice the array with the documents to return the first 3 elements.
The intuition can be followed in this example:
db.entries.aggregate([
{ '$sort': { 'blog_id': 1, 'timestamp': -1 } },
{
'$group': {
'_id': '$blog_id',
'docs': { '$push': '$$ROOT' },
}
},
{
'$project': {
'top_three': {
'$slice': ['$docs', 3]
}
}
}
])
The only way to do this in basic mongo if you can live with two things :
An additional field in your entry document, let's call it "age"
A new blog entry taking an additional update
If so, here's how you do it :
Upon creating a new intro do your normal insert and then execute this update to increase the age of all posts (including the one you just inserted for this blog) :
db.entries.update({blog_id: BLOG_ID}, {age:{$inc:1}}, false, true)
When querying, use the following query which will return the most recent 3 entries for each blog :
db.entries.find({age:{$lte:3}, timestamp:{$gte:STARTOFMONTH, $lt:ENDOFMONTH}}).sort({blog_id:1, age:1})
Note that this solution is actually concurrency safe (no entries with duplicate ages).
Starting in Mongo 5.2, it's a perfect use case for the new $topN aggregation accumulator:
// { blog_id: "a", title: "plop", content: "smthg" }
// { blog_id: "b", title: "hum", content: "meh" }
// { blog_id: "a", title: "hello", content: "world" }
// { blog_id: "a", title: "what", content: "ever" }
db.collection.aggregate([
{ $group: {
_id: "$blog_id",
messages: { $topN: { n: 2, sortBy: { _id: -1 }, output: "$$ROOT" } }
}}
])
// {
// _id: "a",
// messages: [
// { blog_id: "a", title: "what", content: "ever" },
// { blog_id: "a", title: "hello", content: "world" }
// ]
// }
// {
// _id: "b",
// messages: [
// { blog_id: "b", title: "hum", content: "meh" }
// ]
// }
This applies a $topN group accumulation that:
takes for each group the top 2 (n: 2) elements
top 2, as defined by sortBy: { _id: -1 }, which in this case means by reversed order of insertion
and for each record pushes the whole record in the group's list (output: "$$ROOT") since $$ROOT represents the whole document being processed.
It's possible with group (aggregation), but this will create a full-table scan.
Do you really need exactly 3 or can you set a limit...e.g.: max 3 posts from the last week/month?
This answer using map reduce by drcosta from another question did the trick
In mongo, how do I use map reduce to get a group by ordered by most recent
mapper = function () {
emit(this.category, {top:[this.score]});
}
reducer = function (key, values) {
var scores = [];
values.forEach(
function (obj) {
obj.top.forEach(
function (score) {
scores[scores.length] = score;
});
});
scores.sort();
scores.reverse();
return {top:scores.slice(0, 3)};
}
function find_top_scores(categories) {
var query = [];
db.top_foos.find({_id:{$in:categories}}).forEach(
function (topscores) {
query[query.length] = {
category:topscores._id,
score:{$in:topscores.value.top}
};
});
return db.foo.find({$or:query});

How to change the type of a field?

I am trying to change the type of a field from within the mongo shell.
I am doing this...
db.meta.update(
{'fields.properties.default': { $type : 1 }},
{'fields.properties.default': { $type : 2 }}
)
But it's not working!
The only way to change the $type of the data is to perform an update on the data where the data has the correct type.
In this case, it looks like you're trying to change the $type from 1 (double) to 2 (string).
So simply load the document from the DB, perform the cast (new String(x)) and then save the document again.
If you need to do this programmatically and entirely from the shell, you can use the find(...).forEach(function(x) {}) syntax.
In response to the second comment below. Change the field bad from a number to a string in collection foo.
db.foo.find( { 'bad' : { $type : 1 } } ).forEach( function (x) {
x.bad = new String(x.bad); // convert field to string
db.foo.save(x);
});
Convert String field to Integer:
db.db-name.find({field-name: {$exists: true}}).forEach(function(obj) {
obj.field-name = new NumberInt(obj.field-name);
db.db-name.save(obj);
});
Convert Integer field to String:
db.db-name.find({field-name: {$exists: true}}).forEach(function(obj) {
obj.field-name = "" + obj.field-name;
db.db-name.save(obj);
});
Starting Mongo 4.2, db.collection.update() can accept an aggregation pipeline, finally allowing the update of a field based on its own value:
// { a: "45", b: "x" }
// { a: 53, b: "y" }
db.collection.updateMany(
{ a : { $type: 1 } },
[{ $set: { a: { $toString: "$a" } } }]
)
// { a: "45", b: "x" }
// { a: "53", b: "y" }
The first part { a : { $type: 1 } } is the match query:
It filters which documents to update.
In this case, since we want to convert "a" to string when its value is a double, this matches elements for which "a" is of type 1 (double)).
This table provides the code representing the different possible types.
The second part [{ $set: { a: { $toString: "$a" } } }] is the update aggregation pipeline:
Note the squared brackets signifying that this update query uses an aggregation pipeline.
$set is a new aggregation operator (Mongo 4.2) which in this case modifies a field.
This can be simply read as "$set" the value of "a" to "$a" converted "$toString".
What's really new here, is being able in Mongo 4.2 to reference the document itself when updating it: the new value for "a" is based on the existing value of "$a".
Also note "$toString" which is a new aggregation operator introduced in Mongo 4.0.
In case your cast isn't from double to string, you have the choice between different conversion operators introduced in Mongo 4.0 such as $toBool, $toInt, ...
And if there isn't a dedicated converter for your targeted type, you can replace { $toString: "$a" } with a $convert operation: { $convert: { input: "$a", to: 2 } } where the value for to can be found in this table:
db.collection.updateMany(
{ a : { $type: 1 } },
[{ $set: { a: { $convert: { input: "$a", to: 2 } } } }]
)
For string to int conversion.
db.my_collection.find().forEach( function(obj) {
obj.my_value= new NumberInt(obj.my_value);
db.my_collection.save(obj);
});
For string to double conversion.
obj.my_value= parseInt(obj.my_value, 10);
For float:
obj.my_value= parseFloat(obj.my_value);
db.coll.find().forEach(function(data) {
db.coll.update({_id:data._id},{$set:{myfield:parseInt(data.myfield)}});
})
all answers so far use some version of forEach, iterating over all collection elements client-side.
However, you could use MongoDB's server-side processing by using aggregate pipeline and $out stage as :
the $out stage atomically replaces the existing collection with the
new results collection.
example:
db.documents.aggregate([
{
$project: {
_id: 1,
numberField: { $substr: ['$numberField', 0, -1] },
otherField: 1,
differentField: 1,
anotherfield: 1,
needolistAllFieldsHere: 1
},
},
{
$out: 'documents',
},
]);
To convert a field of string type to date field, you would need to iterate the cursor returned by the find() method using the forEach() method, within the loop convert the field to a Date object and then update the field using the $set operator.
Take advantage of using the Bulk API for bulk updates which offer better performance as you will be sending the operations to the server in batches of say 1000 which gives you a better performance as you are not sending every request to the server, just once in every 1000 requests.
The following demonstrates this approach, the first example uses the Bulk API available in MongoDB versions >= 2.6 and < 3.2. It updates all
the documents in the collection by changing all the created_at fields to date fields:
var bulk = db.collection.initializeUnorderedBulkOp(),
counter = 0;
db.collection.find({"created_at": {"$exists": true, "$type": 2 }}).forEach(function (doc) {
var newDate = new Date(doc.created_at);
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "created_at": newDate}
});
counter++;
if (counter % 1000 == 0) {
bulk.execute(); // Execute per 1000 operations and re-initialize every 1000 update statements
bulk = db.collection.initializeUnorderedBulkOp();
}
})
// Clean up remaining operations in queue
if (counter % 1000 != 0) { bulk.execute(); }
The next example applies to the new MongoDB version 3.2 which has since deprecated the Bulk API and provided a newer set of apis using bulkWrite():
var bulkOps = [];
db.collection.find({"created_at": {"$exists": true, "$type": 2 }}).forEach(function (doc) {
var newDate = new Date(doc.created_at);
bulkOps.push(
{
"updateOne": {
"filter": { "_id": doc._id } ,
"update": { "$set": { "created_at": newDate } }
}
}
);
})
db.collection.bulkWrite(bulkOps, { "ordered": true });
To convert int32 to string in mongo without creating an array just add "" to your number :-)
db.foo.find( { 'mynum' : { $type : 16 } } ).forEach( function (x) {
x.mynum = x.mynum + ""; // convert int32 to string
db.foo.save(x);
});
What really helped me to change the type of the object in MondoDB was just this simple line, perhaps mentioned before here...:
db.Users.find({age: {$exists: true}}).forEach(function(obj) {
obj.age = new NumberInt(obj.age);
db.Users.save(obj);
});
Users are my collection and age is the object which had a string instead of an integer (int32).
You can easily convert the string data type to numerical data type.
Don't forget to change collectionName & FieldName.
for ex : CollectionNmae : Users & FieldName : Contactno.
Try this query..
db.collectionName.find().forEach( function (x) {
x.FieldName = parseInt(x.FieldName);
db.collectionName.save(x);
});
I need to change datatype of multiple fields in the collection, so I used the following to make multiple data type changes in the collection of documents. Answer to an old question but may be helpful for others.
db.mycoll.find().forEach(function(obj) {
if (obj.hasOwnProperty('phone')) {
obj.phone = "" + obj.phone; // int or longint to string
}
if (obj.hasOwnProperty('field-name')) {
obj.field-name = new NumberInt(obj.field-name); //string to integer
}
if (obj.hasOwnProperty('cdate')) {
obj.cdate = new ISODate(obj.cdate); //string to Date
}
db.mycoll.save(obj);
});
demo change type of field mid from string to mongo objectId using mongoose
Post.find({}, {mid: 1,_id:1}).exec(function (err, doc) {
doc.map((item, key) => {
Post.findByIdAndUpdate({_id:item._id},{$set:{mid: mongoose.Types.ObjectId(item.mid)}}).exec((err,res)=>{
if(err) throw err;
reply(res);
});
});
});
Mongo ObjectId is just another example of such styles as
Number, string, boolean that hope the answer will help someone else.
I use this script in mongodb console for string to float conversions...
db.documents.find({ 'fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.fwtweaeeba = parseFloat( obj.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.0.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[0].content.fwtweaeeba = parseFloat( obj.versions[0].content.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.1.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[1].content.fwtweaeeba = parseFloat( obj.versions[1].content.fwtweaeeba );
db.documents.save(obj); } );
db.documents.find({ 'versions.2.content.fwtweaeeba' : {$exists : true}}).forEach( function(obj) {
obj.versions[2].content.fwtweaeeba = parseFloat( obj.versions[2].content.fwtweaeeba );
db.documents.save(obj); } );
And this one in php)))
foreach($db->documents->find(array("type" => "chair")) as $document){
$db->documents->update(
array('_id' => $document[_id]),
array(
'$set' => array(
'versions.0.content.axdducvoxb' => (float)$document['versions'][0]['content']['axdducvoxb'],
'versions.1.content.axdducvoxb' => (float)$document['versions'][1]['content']['axdducvoxb'],
'versions.2.content.axdducvoxb' => (float)$document['versions'][2]['content']['axdducvoxb'],
'axdducvoxb' => (float)$document['axdducvoxb']
)
),
array('$multi' => true)
);
}
The above answers almost worked but had a few challenges-
Problem 1: db.collection.save no longer works in MongoDB 5.x
For this, I used replaceOne().
Problem 2: new String(x.bad) was giving exponential number
I used "" + x.bad as suggested above.
My version:
let count = 0;
db.user
.find({
custID: {$type: 1},
})
.forEach(function (record) {
count++;
const actualValue = record.custID;
record.custID = "" + record.custID;
console.log(`${count}. Updating User(id:${record._id}) from old id [${actualValue}](${typeof actualValue}) to [${record.custID}](${typeof record.custID})`)
db.user.replaceOne({_id: record._id}, record);
});
And for millions of records, here are the output (for future investigation/reference)-