MongoDB calculating score from existing fields and putting it into a new field in the same collection - mongodb

I'm working on Mongodb and I have one collection, let's say Collection1.
I have to calculate a score from existing fields in Collection1, and put the result into a new field Field8 in Collection1.
Collection1 :
db.Collection1.find().pretty().limit(2) {
"_id": ObjectId("5717a5d4578f3f2556f300f2"),
"Field1": "XXXX",
"Field2": 0,
"Field3": 169,
"Field4": 230,
"Field5": "...4.67", // This field refer to days in a week
"Field6": "ZZ",
"Field7": "LO"
}, {
"_id": ObjectId("17a5d4575f300f278f3f2556"),
"Field1": "YYYY",
"Field2": 1,
"Field3": 260,
"Field4": 80,
"Field5": "1.3....", // This field refer to days in a week
"Field6": "YY",
"Field7": "PK"
}
So, I have to do some calculations to my first collection's fields with the following formula, but I don't know how to proceed ? :
Score = C1*C2*C3*C4
C1 = 10 + 0.03*field3
C2 = 1 or 0.03 it depends on field2 if it equals 1 or 0
C3 = 1 or 2 .... or 7, it depends on field5 for example C3 for this document "Field5": "...4.67" should return 3, it means three days per week
C4 = 1 or field4^-0.6 if field2 equals 0 or 1
After calculating this score I should put it in new field Field8 in my Collection1 and get something just like this :
db.Collection1.find().pretty().limit(2) {
"_id": ObjectId("5717a5d4578f3f2556f300f2"),
"Field1": "XXXX",
"Field2": 0,
"Field3": 169,
"Field4": 230,
"Field5": "...4.67", // This field refer to days in a week
"Field6": "ZZ",
"Field7": "LO",
"Field8": Score // My calculated score
}, {
"_id": ObjectId("17a5d4575f300f278f3f2556"),
"Field1": "YYYY",
"Field2": 1,
"Field3": 260,
"Field4": 80,
"Field5": "1.3....", // This field refer to days in a week
"Field6": "YY",
"Field7": "PK",
"Field8": Score // My calculated score
}
How can I achieve the above?

Depending on your application needs, you can use the aggregation framework for calculating the score and use the bulkWrite() to update your collection. Consider the following example which uses the $project pipeline step as leeway for the score calculations with the arithmetic operators.
Since logic for calculating C3 in your question is getting a number from 1 to 7 which equals exactly 7 - number of points (.), the only feasible approach I can think of is to store an extra field that holds this value first before doing the aggregation. So your first step would be to create that extra field and you can go about it using the bulkWrite() as follows:
Step 1: Modify schema to accomodate extra daysInWeek field
var counter = 0, bulkUpdateOps = [];
db.collection1.find({
"Field5": { "$exists": true }
}).forEach(function(doc) {
// calculations for getting the number of points in Field5
var points, daysInWeek;
points = (doc.Field5.match(new RegExp(".", "g")) || []).length;
daysInWeek = 7 - points;
bulkUpdateOps.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": { "daysInWeek": daysInWeek }
}
}
});
counter++;
if (counter % 500 == 0) {
db.collection1.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (counter % 500 != 0) { db.collection1.bulkWrite(bulkUpdateOps); }
Ideally the above operation can also accomodate calculating the other constants in your question and therefore creating the Field8 as a result. However I believe computations like this should be done on the client and let MongoDB do what it does best on the server.
Step 2: Use aggregate to add Field8 field
Having created that extra field daysInWeek you can then construct an aggregation pipeline that projects the new variables using a cohort of arithmetic operators to do the computation (again, would recommend doing such computations on the application layer). The final projection will be the product of the computed fields which you can then use the aggregate result cursor to iterate and add Field8 to the collection with each document:
var pipeline = [
{
"$project": {
"C1": {
"$add": [
10,
{ "$multiply": [ "$Field3", 0.03 ] }
]
},
"C2": {
"$cond": [
{ "$eq": [ "$Field2", 1 ] },
1,
0.03
]
},
"C3": "$daysInWeek",
"C4": {
"$cond": [
{ "$eq": [ "$Field2", 1 ] },
{ "$pow": [ "$Field4", -0.6 ] },
1
]
}
}
},
{
"$project": {
"Field8": { "$multiply": [ "$C1", "$C2", "$C3", "$C4" ] }
}
}
],
counter = 0,
bulkUpdateOps = [];
db.collection1.aggregate(pipeline).forEach(function(doc) {
bulkUpdateOps.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$set": { "Field8": doc.Field8 }
}
}
});
counter++;
if (counter % 500 == 0) {
db.collection1.bulkWrite(bulkUpdateOps);
bulkUpdateOps = [];
}
});
if (counter % 500 != 0) { db.collection1.bulkWrite(bulkUpdateOps); }
For MongoDB >= 2.6 and <= 3.0, use the Bulk Opeartions API where you need to iterate the collection using the cursor's forEach() method, update each document in the collection.
Some of the arithmetic operators from the above aggregation pipeline are not available in MongoDB >= 2.6 and <= 3.0 so you will need to do the computations within the forEach() iteration.
Use the bulk API to reduce server write requests by bundling each update in bulk and sending to the server only once in every 500 documents in the collection for processing:
var bulkUpdateOps = db.collection1.initializeUnorderedBulkOp(),
cursor = db.collection1.find(), // cursor
counter = 0;
cursor.forEach(function(doc) {
// computations
var c1, c2, c3, c4, Field8;
c1 = 10 + (0.03*doc.Field3);
c2 = (doc.Field2 == 1) ? 1: 0.03;
c3 = 7 - (doc.Field5.match(new RegExp(".", "g")) || []).length;
c4 = (doc.Field2 == 1) ? Math.pow(doc.Field, -0.6) : 1;
Field8 = c1*c2*c3*c4;
bulkUpdateOps.find({ "_id": doc._id }).updateOne({
"$set": { "Field8": Field8 }
});
if (counter % 500 == 0) {
bulkUpdateOps.execute();
bulkUpdateOps = db.collection1.initializeUnorderedBulkOp();
}
})
if (counter % 500 != 0) { bulkUpdateOps.execute(); }

just make a function which returns your calculated value and call in your update mongodb query.
like
var cal = function(row){ return row.Field1 + row.Field2 * row.Field3; // use your formula according to your requirements};
var rows = db.collection1.find() // can use your filter;
rows.forEach(function(row){db.collection1.update({"_id":row._id},{$set:{"Field8":cal(row)}})});

Related

How to calculate simple moving average using mongodb mapreduce?

I have time series data as the following format in a mongodb collection:
{
"Name" : "AKBNK",
"Date" : ISODate("2009-01-02T00:00:00Z"),
"Close" : 3.256746559,
}
I want to calculate simple moving average using mongodb mapreduce. I tried it as the following to perform window sliding, but it works slowly when the period is big.
var mapper = function() {
var i = 0, j = counter;
if (j < period) {
j = period;
i = period - counter;
}
for (; i < period && j <= limit; i++, j++) {
emit (j, this.Close);
}
counter++;
}
var reducer = function(key, values) {
return Array.sum(values);
}
var finalizer = function(key, reducedValue) {
return reducedValue / period;
}
var period = 730;
db.data.mapReduce(mapper, reducer, {finalize: finalizer, out: "smaOut", query: {Name: "AKBNK"}, sort: {Date: -1}, limit: period * 2 - 1, scope: {counter: 1, period: period, limit: period * 2 - 1}});
Any advice how can I do this faster? How can I map the data?
You could try using the below aggregation pipeline which seems to produce the correct results at a quick glance but at a much higher speed:
db.data.aggregate({
$match: {
"Name": "AKBNK" // this stage will use and index if you have one on the "Name" field
}
}, {
$sort: { "Date": -1 }, // this stage will also use and index if you have one on "Date"
}, {
$group: {
"_id": null, // create one single document
"allCloseValues": { $push: "$Close" } // that shall contain an array with all "Close" values
}
}, {
$addFields: {
"copyOfAllCloseValues": "$allCloseValues" // duplicate the array
}
}, {
$unwind: {
"path": "$copyOfAllCloseValues", // flatten the created single document
"includeArrayIndex": "_id" // use the "_id" field to hold the array index
}
}, {
$project: {
avg: {
$avg: { // calculate the average of some part of the array "Close"
$slice: [ "$allCloseValues", "$_id", 730 ] // which shall start at index "_id" and take up to 730 values
}
}
}
}, {
$out: "smaOut" // write the resulting documents out to the "smaOut" collection
});

In Mongo, How to write search query to search document based on time, on Date object.?

We have Collection named Incident. In which we have one field StartTime(Date object type).
Every day, whenever incident condition is met then new Document entry will be created and inserted into the collection.
We have to get all the incident which, fall between 10PM to 6AM. (i.e from midnight to early morning).
But i face problem on how to write query for this use case.
Since we have date object, I can able to write query to search document between two Dates.
How to write search query for search based on time, on Date object.
Sample Data:
"StartTime" : ISODate("2015-10-16T18:15:14.211Z")
It's just not a good idea. But basically you apply the date aggregation operators:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$or": [
{ "$gte": [{ "$hour": "$StartTime" }, 22] },
{ "$lt": [{ "$hour": "$StartTime" }, 6 ] }
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
Using $redact that will only return or $$KEEP the documents that meet both conditions for the $hour extracted from the Date, and $$PRUNE or "remove" from results those that do not.
A bit shorter with MongoDB 3.6 and onwards, but really no different:
db.collection.find({
"$expr": {
"$or": [
{ "$gte": [{ "$hour": "$StartTime" }, 22] },
{ "$lt": [{ "$hour": "$StartTime" }, 6 ] }
]
}
})
Overall, not a good idea because the statement needs to scan the whole collection and calculate that logical condition.
A better way is to actually "store" the "time" as a separate field:
var ops = [];
db.collection.find().forEach(doc => {
// Get milliseconds from start of day
let timeMillis = doc.StartTime.valueOf() % (1000 * 60 * 60 * 24);
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": { "$set": { timeMillis } }
}
});
if ( ops.length > 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
ops = [];
}
Then you can simply query with something like:
var start = 22 * ( 1000 * 60 * 60 ), // 10PM
end = 6 * ( 1000 * 60 * 60 ); // 6AM
db.collection.find({
"$or": [
{ "timeMillis": { "$gte": start } },
{ "timeMillis": { "$lt": end } }
]
);
And that field can actually be indexed and so quickly and efficiently return results.

Mongodb get lastHour for each day

I have the following schema in mongodb, where the timestamp is the timestamp at an hourly level
{
"day_chan1" : 54.464,
"day_chan2" : 44.141,
"day_chan3" : 44.89,
"gatewayId" : "443719005AA3",
"timestamp" : ISODate("2016-02-15T23:00:00.000Z"),
"total_curr_chan" : 5.408,
"total_day_chan" : 143.495,
"type" : 1
}
I want to be able to query the last timestamp for the day for the last 7 days and 30 days. In order to do this, I am thinking of doing something like
var d = new Date(); // today!
for(var i =1; i <= 7; i++) {
var n = i; // go back n days!
d.setDate(d.getDate() - n);
d.setHours(23,0,0);
var query = {
gatewayId: req.params.deviceId,
timestamp: { $lt: new Date(d) }
};
db
.find(query,function(resp) {
//return the data here
});
}
But this creates a problem of multiple callbacks and I want to know if there is an easier way of doing so using aggregates or some other method
Use the $hour operator within the $project operator to extract the hour part of the timestamp, then query with $match to filter documents that do not satisfy the given hour criteria:
var pipeline = [
{
"$project": {
"day_chan1": 1,
"day_chan2": 1,
"day_chan3": 1,
"gatewayId": 1,
"timestamp": 1,
"total_curr_chan": 1,
"total_day_chan": 1,
"type": 1,
"hour": { "$hour": "$timestamp" }
}
},
{ "$match": { "hour": 23 } }
];
collection.aggregate(pipeline, function(err, result) {
//return the data here
console.log(result);
});
For arbitrary last hour it must be a bit more complex:
db.collection.aggregate([
{$match:{
timestamp:{$type: "date"}}
// add date constraints here
},
{$project:{
_id:1,
date:{"y":{$year:"$timestamp"}, "d":{$dayOfYear:"$timestamp"}},
doc:"$$CURRENT"}
},
{$group:{
_id:"$date",
maxtime: {$max:"$doc.timestamp"},
doc:{$push:"$doc"}}
},
{$unwind:"$doc"},
{$project:{
latest: {$cmp: ["$maxtime", "$doc.timestamp"]},
doc:"$doc"}
},
{$match:{"latest":0}}
])
With map-reduce it should be simpler, but may be slower.

mongo equivalent of sql query

i need to build a mongo query to get results from a collection which has the same structure as the following sql.
click for picture of table structure
my sql query:
SELECT * FROM (
SELECT
db.date,
db.points,
db.type,
db.name,
db.rank,
YEARWEEK( db.date ) AS year_week
FROM _MyDatabase db
WHERE
db.personId = 100 AND
db.date BETWEEN '2012-10-01' AND '2015-09-30'
ORDER BY
YEARWEEK( db.date ),
db.type,
db.points DESC
) x
GROUP BY
x.year_week DESC,
x.type;
the result looks like this
date points type name rank year_week
-------------------------------------------------
23.10.2014 2000 1 Fish 2 201442
12.10.2014 2500 1 Fish 2 201441
16.10.2014 800 2 Fish 2 201441
i have tried different group / aggregate queries so far, but i couldn't get a similar result. hopefully one of you has more mongo experience than i and can give me a hint on how to solve this.
You would want something like this:
var start = new Date(2012, 9, 1),
end = new Date(2015, 8, 30),
pipeline = [
{
"$match": {
"personId": 100,
"date": { "$gte": start, "$lte": end }
}
},
{
"$project": {
"date": 1, "points": 1, "type": 1, "name": 1, "rank": 1,
"year_week": { "$week": "$date" }
}
},
{
"$sort": {
"year_week": 1,
"type": 1,
"points": -1
}
},
{
"$group": {
"_id": {
"year_week": "$year_week",
"type": "$type"
}
}
}
];
db.getCollection("_MyDatabase").aggregate(pipeline);

Sum of Substrings in mongodb

We have field(s) in mongodb which has numbers in string form, values such as "$123,00,89.00" or "1234$" etc
Is it possible to customize $sum accumulators in mongodb, so that, certain processing can be done at each field value while the sum is performed. Such as substring or reg-ex processing etc.
The .mapReduce() method is what you need here. You cannot "cast" values in the aggregation framework from one "type" to another ( with the exception of "to string" or from Date to numeric ).
The JavaScript processing means that you can convert a string into a value for "summing". Somthing like this ( with a bit more work on a "safe" regex for the required "currency" values:
db.collection.mapReduce(
function() {
emit(null, this.amount.replace(/\$|,|\./g,"") / 100 );
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Or with .group() which also uses JavaScript procesing, but is a bit more restrcitive in it's requirements:
db.collection.group({
"key": null,
"reduce": function( curr,result ) {
result.total += curr.amount.replace(/\$|,|\./g,"") /100;
},
"initial": { "total": 0 }
});
So JavaScript processing is your only option as these sorts of operations are not supported in the aggregatation framework.
A number can be a string:
db.junk.aggregate([{ "$project": { "a": { "$substr": [ 1,0,1 ] } } }])
{ "_id" : ObjectId("55a458c567446a4351c804e5"), "a" : "1" }
And a Date can become a number:
db.junk.aggregate([{ "$project": { "a": { "$subtract": [ new Date(), new Date(0) ] } } }])
{ "_id" : ObjectId("55a458c567446a4351c804e5"), "a" : NumberLong("1436835669446") }
But there are no other operators to "cast" a "string" to "numeric" or even anthing to do a Regex replace as shown above.
If you want to use .aggregate() then you need to fix your data into a format that will support it, thus "numeric":
var bulk = db.collection.initializeOrderedBulkOp(),
count = 0;
db.collection.find({ "amount": /\$|,\./g }).forEach(function(doc) {
doc.amount = doc.amount.replace(/\$|,|\./g,"") /100;
bulk.find({ "_id": doc._id }).updateOne({
"$set": { "amount": doc.amount }
});
count++;
// execute once in 1000 operations
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.collection.initializeOrderedBulkOp();
}
});
// clean up queued operations
if ( count % 1000 != 0 )
bulk.execute();
Then you can use .aggregate() on your "numeric" data:
db.collection.aggregate([
{ "$group": { "_id": null, "total": { "$sum": "$amount" } } }
])