merging two collections time stamped data and showing real time result - mongodb

The Inocmes and Expenses collections used complete separately in many places in whole app. there is only one page which have the below requirement. I don't believe there is no workaround in Mongodb, which really have millions of users :(
I am using React-Meteor in a project have two collection named Incomes and Expenses. income Doc look like below
{
"_id" : "euAeJYArsAyFWLJs6",
"account" : "3m5Zxsije9b6ZaNpu",
"amount" : 3,
"receivedAt" : ISODate("2017-07-07T06:21:00.000Z"),
"type" : "project",
"project" : {
"_id" : "ec2WLt3GzHNwhK7oK",
"name" : "test"
},
"owner" : "nM4ToQEbYBsC3NoQB",
"createdAt" : ISODate("2017-07-07T06:21:37.293Z")
}
and below how the expense Doc look like
{
"_id" : "snWDusDbkLHHY2Yry",
"account" : "3m5Zxsije9b6ZaNpu",
"amount" : 4,
"spentAt" : ISODate("2017-07-07T06:21:00.000Z"),
"description" : "4",
"category" : {
"_id" : "vh593tw9dZgNdNwtr",
"name" : "test",
"icon" : "icon-icons_tution-fee"
},
"owner" : "nM4ToQEbYBsC3NoQB",
"createdAt" : ISODate("2017-07-07T06:22:04.215Z")
}
Now I have a page called transactions where I have to show all transaction (incomes and expenses) based on Time so my publication code for transactions look like below
import { Meteor } from 'meteor/meteor';
import { Incomes } from '../../incomes/incomes.js';
import { Expenses } from '../../expences/expenses.js';
import { Counter } from 'meteor/natestrauser:publish-performant-counts';
let datefilter = (options, query) => {
let dateQuery = {$gte: new Date(options.dateFilter.start), $lte: new Date(options.dateFilter.end)};
let temp = {$or: [{receivedAt: dateQuery}, {spentAt: dateQuery}]};
query.$and.push(temp);
};
Meteor.publish('transactions', function(options) {
let query = {
owner: this.userId,
$and: []
};
if(options.accounts.length)
query['account'] = {$in: options.accounts};
options.dateFilter && datefilter(options, query);
//here i also apply other filter based on category and project which does not matter so i removed
if(!query.$and.length) delete query.$and;
//computing 'Transactions' below
return [
Incomes.find(query, {
sort: {
receivedAt: -1
},
limit: options.limit,
skip: options.skip
}),
Expenses.find(query, {
sort: {
spentAt: -1
},
limit: options.limit,
skip: options.skip
})
]
});
Till here every thing working fine until I have to implement pagination on transaction page, so here data have to be sorted by Date. that's the real problem. Assume my both collections have 10 record each and my Template page have to contain first 10 result, so I sent skip 0 and limit 10, in return I got 10 incomes and 10 expenses records and on second page there is no record because skip and limit sent 10 for both. so how to deal with it? I also used counter Technic but that didn't work. remember my data is real time too.
any help will be greatly appreciated :)

This is a Temporary Solution for a quick join Issue, You should
consider your data-set amount and passed all checks before apply Meantime I will change my schema as #NeilLunn suggested and plan migrations shortly.
For the adhoc Fix that meet the display requirements I applied the aggregate with the combination of $out. Now the code look like below
Meteor.publish('transaction', function(options){
//here I perform many filter based on query and options
//which deleted to shorten code on SO
//call asynchronous method just to get result delay :)
Meteor.call('copyTransactions', (err, res) => {
//something do here
});
//note the we return results from **Transactions** which is comes as third collection
return [
Transactions.find(query, {
sort: sortbyDate,
skip: options.skip,
limit: options.limit,
}),
new Counter('transactionsCount', Transactions.find(query, {
sort: sortbyDate
}))
];
});
Now publish Transactions (a separate collection) which I desired as a merge collection . Note this one ends with s in name as transactions so don't confuse with above one (transaction)
publish the merge collection separately as "transactions"
Meteor.publish('transactions', function(limit){
return Transactions.find(
{
owner: this.userId
});
});
and here is the most important method which called in publication to merge two collection in third collection in which I first aggregated all result with $out and then append second collection with batch insert
import { Expenses } from '../../../api/expences/expenses'
import { Incomes } from '../../../api/incomes/incomes'
import { Transactions } from '../transactions'
import Future from 'fibers/future';
export const copyTransactions = new ValidatedMethod({
name: 'copyTransactions',
validate:null,
run() {
//we are using future here to make it asynchronous
let fu = new Future();
//here Expenses directly copied in new collection name Transactions
// TODO: use $rename or $addField in aggregate instead of $project
Expenses.aggregate([{
$project: {
account : "$account",
amount : "$amount",
transactionAt : "$spentAt",
description : "$description",
category : "$category",
type: {
$literal: 'expense'
},
owner : "$owner",
createdAt : "$createdAt"
}
}, {
$out: "transactions"
} ]);
//now append Transactions collection with incomes with batch insert
Incomes.aggregate([{
$project: {
account : "$account",
amount : "$amount",
transactionAt : "$receivedAt",
type:{
$literal: 'income'
},
project : "$project",
owner : "$owner",
createdAt : "$createdAt"
}
}], function (err, result) {
//if no doc found then just return
if(!result.length){
fu.return('completed')
}
else{
Transactions.batchInsert(result, function(err, res){
fu.return('completed')
})
}
});
return fu.wait();
}
});
If Second Collection aggregated too with $out then it will
overwrite :(
#client I just have to subscribe the 'transaction' with my options and query and got the real time merge results from Transactions

Related

$push is not supported in mongoose bulk operation

I am using the bulk operation in mongoose to update multiple documents in a concurrent manner.
The update data looks as below:
let docUpdates = [
{
id : docId1,
status : "created",
$push : {
updates : {
status: "created",
referenceId : referenceId,
createdTimestamp : "2022-06-14T00:00:00.000Z"
}
}
},
{
id : docId2,
status : "completed",
$push : {
updates : {
status: "compelted",
referenceId : referenceId,
createdTimestamp : "2022-06-14T00:00:00.000Z"
}
}
},
];
// Update documents using bulk operation
let bulkUpdate = Model.collection.initializeOrderedBulkOp();
for (let i = 0; i < docUpdates.length; i++) {
bulkUpdate.find({_id : docUpdates[i].docId}).updateOne({ $set : docUpdates[i]})
}
When I execute the above snippet, I get the below error.
(node:468) UnhandledPromiseRejectionWarning: MongoBulkWriteError: The dollar ($) prefixed field '$push' in '$push' is not allowed in the context of an update's replacement document. Consider using an aggregation pipeline with $replaceWith.
at OrderedBulkOperation.handleWriteError (/usr/src/app/node_modules/mongoose/node_modules/mongodb/lib/bulk/common.js:884:22)
Could you please help in finding a work around for this error.
Thank you,
KK
The problem isn't that $push isn't supported, it's that the way you pass the arguments in the update document, it's interpreted as a top-level field. See the documentation about field name considerations, specifically the section on document modifying updates.
This section of your code updateOne({ $set : docUpdates[i]}) is equivalent to the following:
updateOne({
$set: {
id: docId1,
status: "created",
$push: {
updates: {
status: "created",
referenceId: referenceId,
createdTimestamp: "2022-06-14T00:00:00.000Z"
}
}
}
})
Because the $push is contained in the $set, you're telling MongoDB to set the value of a field named $push to a document with a field called updates, which has status, referenceId, and createdTimestamp fields.
What you want to do instead, is specify an update document with two different update operators. In other words, $push needs to be on the same level as the $set.
Something like:
updateOne({
$set: {
id: docId1,
status: "created"
},
$push: {
updates: {
status: "created",
referenceId: referenceId,
createdTimestamp: "2022-06-14T00:00:00.000Z"
}
}
})
How you represent that in the docUpdates array is up to you. Maybe you can do something like:
let docUpdates = [
{
id : docId1,
update : {
$set : {
status : "created"
},
$push : {
updates : {
status: "created",
referenceId : referenceId,
createdTimestamp : "2022-06-14T00:00:00.000Z"
}
}
}
}
// ... Other updates
]
And use it in the loop like:
bulkUpdate.find({_id : docUpdates[i].docId}).updateOne(docUpdates[i].update)

MongoDB Aggregation with DBRef

Is it possible to aggregate on data that is stored via DBRef?
Mongo 2.6
Let's say I have transaction data like:
{
_id : ObjectId(...),
user : DBRef("user", ObjectId(...)),
product : DBRef("product", ObjectId(...)),
source : DBRef("website", ObjectId(...)),
quantity : 3,
price : 40.95,
total_price : 122.85,
sold_at : ISODate("2015-07-08T09:09:40.262-0700")
}
The trick is "source" is polymorphic in nature - it could be different $ref values such as "webpage", "call_center", etc that also have different ObjectIds. For example DBRef("webpage", ObjectId("1")) and DBRef("webpage",ObjectId("2")) would be two different webpages where a transaction originated.
I would like to ultimately aggregate by source over a period of time (like a month):
db.coll.aggregate( { $match : { sold_at : { $gte : start, $lt : end } } },
{ $project : { source : 1, total_price : 1 } },
{ $group : {
_id : { "source.$ref" : "$source.$ref" },
count : { $sum : $total_price }
} } );
The trick is you get a path error trying to use a variable starting with $ either by trying to group by it or by trying to transform using expressions via project.
Any way to do this? Actually trying to push this data via aggregation to a subcollection to operate on it there. Trying to avoid a large cursor operation over millions of records to transform the data so I can aggregate it.
Mongo 4. Solved this issue in the following way:
Having this structure:
{
"_id" : LUUID("144e690f-9613-897c-9eab-913933bed9a7"),
"owner" : {
"$ref" : "person",
"$id" : NumberLong(10)
},
...
...
}
I needed to use "owner.$id" field. But because of "$" in the name of field, I was unable to use aggregation.
I transformed "owner.$id" -> "owner" using following snippet:
db.activities.find({}).aggregate([
{
$addFields: {
"owner": {
$arrayElemAt: [{ $objectToArray: "$owner" }, 1]
}
}
},
{
$addFields: {
"owner": "$owner.v"
}
},
{"$group" : {_id:"$owner", count:{$sum:1}}},
{$sort:{"count":-1}}
])
Detailed explanations here - https://dev.to/saurabh73/mongodb-using-aggregation-pipeline-to-extract-dbref-using-lookup-operator-4ekl
You cannot use DBRef values with the aggregation framework. Instead you need to use JavasScript processing of mapReduce in order to access the property naming that they use:
db.coll.mapReduce(
function() {
emit( this.source.$ref, this["total_price"] )
},
function(key,values) {
return Array.sum( values );
},
{
"query": { "sold_at": { "$gte": start, "$lt": end } },
"out": { "inline": 1 }
}
)
You really should not be using DBRef at all. The usage is basically deprecated now and if you feel you need some external referencing then you should be "manually referencing" this with your own code or implemented by some other library, with which you can do so in a much more supported way.

How can I write a Mongoose find query that uses another field as it's conditional?

Consider the following:
I have a Mongoose model called 'Person'. In the schema for the Person mode, each Person has two fields: 'children' and 'maximum_children'. Both fields are of type Number.
I would like to write a find query that returns Persons when that Persons 'children' value is less that it's 'maximum_children' value.
I have tried:
person_model.find({
children: {
$lt: maximum_children
}
}, function (error, persons) {
// DO SOMETHING ELSE
});
and
person_model.find({
children: {
$lt: 'maximum_children'
}
}, function (error, persons) {
// DO SOMETHING ELSE
});
I'm doing something wrong in trying to specify the field name that I want to compare 'children' against.
OK.
I found a solution, just after I posted this question.
The answer seems to be:
person_model.find({
$where: "children < maximum_children"}, function (error, persons)
}, {
// DO SOMETHING ELSE
});
Seems to work OK, although it seems messy.
$where must execute its JavaScript conditional against every doc so its performance can be quite poor. Instead, you can use aggregate to include a new field in a $project stage the indicates whether the doc matches or not and then filter on that:
person_model.aggregate([
{$project: {
isMatch: {$lt: ['$children', '$maximum_children']},
doc: '$$ROOT'
}},
{$match: {isMatch: true}},
{$project: {_id: 0, doc: 1}}
], function(err, results) {...});
This uses $$ROOT to include the original doc as the doc field of the projection, with a final $project used to remove the isMatch field that was added.
results looks like:
{
"doc" : {
"_id" : ObjectId("54d04591257efd80c6965ada"),
"children" : 5,
"maximum_children" : 10
}
},
{
"doc" : {
"_id" : ObjectId("54d04591257efd80c6965add"),
"children" : 5,
"maximum_children" : 6
}
}
If you want to remove the added doc level of the objects you can use Array#map on results like so:
results = results.map(function(item) { return item.doc; });
Which reshapes results to put them back into their original form:
{
"_id" : ObjectId("54d04591257efd80c6965ada"),
"children" : 5,
"maximum_children" : 10
},
{
"_id" : ObjectId("54d04591257efd80c6965add"),
"children" : 5,
"maximum_children" : 6
}

MongoDB conditionally $addToSet sub-document in array by specific field

Is there a way to conditionally $addToSet based on a specific key field in a subdocument on an array?
Here's an example of what I mean - given the collection produced by the following sample bootstrap;
cls
db.so.remove();
db.so.insert({
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples"
}
]
});
n defines a unique document key. I only want one entry with the same n value in the array at any one time. So I want to be able to update the pfms array using n so that I end up with just this;
{
"Name": "fruitBowl",
"pfms" : [
{
"n" : "apples",
"mState": 1111234
}
]
}
Here's where I am at the moment;
db.so.update({
"Name": "fruitBowl",
},{
// not allowed to do this of course
// "$pull": {
// "pfms": { n: "apples" },
// },
"$addToSet": {
"pfms": {
"$each": [
{
"n": "apples",
"mState": 1111234
}
]
}
}
}
)
Unfortunately, this adds another array element;
db.so.find().toArray();
[
{
"Name" : "fruitBowl",
"_id" : ObjectId("53ecfef5baca2b1079b0f97c"),
"pfms" : [
{
"n" : "apples"
},
{
"n" : "apples",
"mState" : 1111234
}
]
}
]
I need to effectively upsert the apples document matching on n as the unique identifier and just set mState whether or not an entry already exists. It's a shame I can't do a $pull and $addToSet in the same document (I tried).
What I really need here is dictionary semantics, but that's not an option right now, nor is breaking out the document - can anyone come up with another way?
FWIW - the existing format is a result of language/driver serialization, I didn't choose it exactly.
further
I've gotten a little further in the case where I know the array element already exists I can do this;
db.so.update({
"Name": "fruitBowl",
"pfms.n": "apples",
},{
$set: {
"pfms.$.mState": 1111234,
},
}
)
But of course that only works;
for a single array element
as long as I know it exists
The first limitation isn't a disaster, but if I can't effectively upsert or combine $addToSet with the previous $set (which of course I can't) then it the only workarounds I can think of for now mean two DB round-trips.
The $addToSet operator of course requires that the "whole" document being "added to the set" is in fact unique, so you cannot change "part" of the document or otherwise consider it to be a "partial match".
You stumbled on to your best approach using $pull to remove any element with the "key" field that would result in "duplicates", but of course you cannot modify the same path in different update operators like that.
So the closest thing you will get is issuing separate operations but also doing that with the "Bulk Operations API" which is introduced with MongoDB 2.6. This allows both to be sent to the server at the same time for the closest thing to a "contiguous" operations list you will get:
var bulk = db.so.initializeOrderedBulkOp();
bulk.find({ "Name": "fruitBowl", "pfms.n": "apples": }).updateOne({
"$pull": { "pfms": { "n": "apples" } }
});
bulk.find({ "Name": "fruitBowl" }).updateOne({
"$push": { "pfms": { "n": "apples", "state": 1111234 } }
})
bulk.execute();
That pretty much is your best approach if it is not possible or practical to move the elements to another collection and rely on "upserts" and $set in order to have the same functionality but on a collection rather than array.
I have faced the exact same scenario. I was inserting and removing likes from a post.
What I did is, using mongoose findOneAndUpdate function (which is similar to update or findAndModify function in mongodb).
The key concept is
Insert when the field is not present
Delete when the field is present
The insert is
findOneAndUpdate({ _id: theId, 'likes.userId': { $ne: theUserId }},
{ $push: { likes: { userId: theUserId, createdAt: new Date() }}},
{ 'new': true }, function(err, post) { // do the needful });
The delete is
findOneAndUpdate({ _id: theId, 'likes.userId': theUserId},
{ $pull: { likes: { userId: theUserId }}},
{ 'new': true }, function(err, post) { // do the needful });
This makes the whole operation atomic and there are no duplicates with respect to the userId field.
I hope this helpes. If you have any query, feel free to ask.
As far as I know MongoDB now (from v 4.2) allows to use aggregation pipelines for updates.
More or less elegant way to make it work (according to the question) looks like the following:
db.runCommand({
update: "your-collection-name",
updates: [
{
q: {},
u: {
$set: {
"pfms.$[elem]": {
"n":"apples",
"mState": NumberInt(1111234)
}
}
},
arrayFilters: [
{
"elem.n": {
$eq: "apples"
}
}
],
multi: true
}
]
})
In my scenario, The data need to be init when not existed, and update the field If existed, and the data will not be deleted. If the datas have these states, you might want to try the following method.
// Mongoose, but mostly same as mongodb
// Update the tag to user, If there existed one.
const user = await UserModel.findOneAndUpdate(
{
user: userId,
'tags.name': tag_name,
},
{
$set: {
'tags.$.description': tag_description,
},
}
)
.lean()
.exec();
// Add a default tag to user
if (user == null) {
await UserModel.findOneAndUpdate(
{
user: userId,
},
{
$push: {
tags: new Tag({
name: tag_name,
description: tag_description,
}),
},
}
);
}
This is the most clean and fast method in the scenario.
As a business analyst , I had the same problem and hopefully I have a solution to this after hours of investigation.
// The customer document:
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK"
}
]
}
// The problem : I want to insert dateField "01.01.2016" to customer documents where customerCodes subdocument has a document with code "YK" but does not have dateField. The final document must be as follows :
{
"id" : "1212",
"customerCodes" : [
{
"code" : "I"
},
{
"code" : "YK" ,
"dateField" : "01.01.2016"
}
]
}
// The solution : the solution code is in three steps :
// PART 1 - Find the customers with customerCodes "YK" but without dateField
// PART 2 - Find the index of the subdocument with "YK" in customerCodes list.
// PART 3 - Insert the value into the document
// Here is the code
// PART 1
var myCursor = db.customers.find({ customerCodes:{$elemMatch:{code:"YK", dateField:{ $exists:false} }}});
// PART 2
myCursor.forEach(function(customer){
if(customer.customerCodes != null )
{
var size = customer.customerCodes.length;
if( size > 0 )
{
var iFoundTheIndexOfSubDocument= -1;
var index = 0;
customer.customerCodes.forEach( function(clazz)
{
if( clazz.code == "YK" && clazz.changeDate == null )
{
iFoundTheIndexOfSubDocument = index;
}
index++;
})
// PART 3
// What happens here is : If i found the indice of the
// "YK" subdocument, I create "updates" document which
// corresponds to the new data to be inserted`
//
if( iFoundTheIndexOfSubDocument != -1 )
{
var toSet = "customerCodes."+ iFoundTheIndexOfSubDocument +".dateField";
var updates = {};
updates[toSet] = "01.01.2016";
db.customers.update({ "id" : customer.id } , { $set: updates });
// This statement is actually interpreted like this :
// db.customers.update({ "id" : "1212" } ,{ $set: customerCodes.0.dateField : "01.01.2016" });
}
}
}
});
Have a nice day !

Merge changeset documents in a query

I have recorded changes from an information system in a mongo database. Every time a set of values are set or changed, a record is saved in the mongo database.
The change collection is in the following form:
{ "user_id": 1, "timestamp": { "date" : "2010-09-22 09:28:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "valueA", "fieldB": "valueB", "fieldC": "valueC" } }
{ "user_id": 1, "timestamp": { "date" : "2010-09-24 19:01:52", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldA": "new_valueA", "fieldB": null, "fieldD": "valueD" } }
{ "user_id": 1, "timestamp": { "date" : "2010-10-01 11:11:02", "timezone_type" : 3, "timezone" : "Europe/Paris" } }, "changes: { "fieldD": "new_valueD" } }
Of course there are thousands of records per user with different attributes which represent millions of records. What I want to do is to see a user status at a given time. By example, the user_id 1 at 2010-09-30 would be
fieldA: new_valueA
fieldC: valueC
fieldD: valueD
This means I need to flatten all the changes prior to a given date for a given user into a single record. Can I do that directly in mongo ?
Edit: I am using the 2.0 version of mongodb hence cannot benefit from the aggregation framework.
Edit: It sounds I have found the answer to my question.
var mapTimeAndChangesByUserId = function() {
var key = this.user_id;
var value = { timestamp: this.timestamp.date, changes: this.changes };
emit(key, value);
}
var reduceMergeChanges = function(user_id, changeset) {
var mergeFunction = function(a, b) { for (var attr in b) a[attr] = b[attr]; };
var result = {};
changeset.forEach(function(e) { mergeFunction(result, e.changes); });
return { timestamp: changeset.pop().timestamp, changes: result };
}
The reduce function merges the changes in the order they come and returns the result.
db.user_change.mapReduce(
mapTimeAndChangesByUserId,
reduceMergeChanges,
{
out: { inline: 1 },
query: { user_id: 1, "timestamp.date": { $lt: "2010-09-30" } },
sort: { "timestamp.date": 1 }
});
'results' : [
"_id": 1,
"value": {
"timestamp": "2010-09-24 19:01:52",
"changes": {
"fieldA": "new_valueA",
"fieldB": null,
"fieldC": "valueC",
"fieldD": "valueD"
}
}
]
Which is fine to me.
You could write a MR to do this.
Since the fields are a lot like tags you can modify a nice cookbook example of counting tags here: http://cookbook.mongodb.org/patterns/count_tags/ of course instead of counting you want the latest value applied (assumption since this is not clear in your question) for that field.
So lets get our map function:
map = function() {
if (!this.changes) {
// If there were not changes for some reason lets bail this record
return;
}
// We iterate the changes
for (index in this.changes) {
emit(index /* We emit the field name */, this.changes[index] /* We emit the field value */);
}
}
And now for our reduce:
reduce = function(values){
// This part is dependant upon your input query. If you add a sort of
// date (ts) DESC then you will prolly want the first index (0) not the last as
// gathered here by values.length
return values[values.length];
}
And this will output a single document per field change of the type:
{
_id: your_field_ie_fieldA,
value: whoop
}
You can then iterate the end of the (most likely) in line output and, bam, you have your changes.
This is of course one way of dong it and is not designed to be run completely in line to your app, however that all depends on the size of the data your working on; it could be run very close.
I am unsure whether the group and distinct can run on this but it looks like it might: http://docs.mongodb.org/manual/reference/method/db.collection.group/#db-collection-group however I should note that group is basically a MR wrapper but you could do something like (untested just like the MR above):
db.col.group( {
key: { 'changes.fieldA': 1, // the rest of the fields },
cond: { 'timestamp.date': { $gt: new Date( '01/01/2012' ) } },
reduce: function ( curr, result ) { },
initial: { }
} )
But it does require you to define the keys instead of just iterating them programmably (maybe a better way).