Map Reducing with timezones - mongodb

I'm trying to map reduce a bunch of data in order to generate a daily graph, the catch is that the application has users from all over the world and they want the data in their own timezone.
The current map reduction I have is pretty simple
var map = function(){
var userLogin = this;
var d = this.StartTime;
var start = d.getFullYear() + '-' + d.getMonth() + 1, + '-' + d.getDate();
var reduceValue = {
SuccessSession: 0,
FailSession: 0
}
if(userLogin.ExitReason.Severity <2)
{
reduceValue.SuccessSession += 1;
} else {
reduceValue.FailSession += 1;
}
emit({ClientId: this.ClientId, StartDate:start, IsAdmin: this.IsAdmin},
{SuccessSession: reduceValue.SuccessSession, FailSession: reduceValue.FailSession })
}
and the Reduce
var reduce = function(key, value) {
var reducedValue = {
SuccessSession: 0,
FailSession : 0
};
for(var idx = 0; idx < value.length; idx++)
{
reducedValue.SuccessSession += value[idx].SuccessSession;
reducedValue.FailSession += value[idx].FailSession;
}
return reducedValue;
};
The problem with this approach is that it calculates from midnight UTC to midnight UTC, however I'd prefer to be able to query the Map Reduced and still be able to query depending upon which timezone a user is in.
Is there any simple way to accomplish being able to mapreduce and yet keep the ability to use timezones?

No easy way. Map is a javascript, so you will need at least IANA TZ database there. Something like https://momentjs.com/timezone/ being available within the map function.
Would you consider aggregation framework instead of map-reduce? It has support of timezones and is much faster. The pipeline could be as simple as this:
db.collection.aggregate([
{ $group: {
_id: {
ClientId: "$ClientId",
start: { $dateToString: {
format: "%Y-%m-%d",
date: "$StartTime",
timezone: "Europe/Kiev"
} },
IsAdmin: "$IsAdmin"
},
"SuccessSession": { $sum: { $cond: {
if: { $lt: [ "$ExitReason.Severity", 2 ] },
then: 1,
else: 0
} } },
"FailSession":{ $sum: { $cond: {
if: { $lt: [ "$ExitReason.Severity", 2 ] },
then: 0,
else: 1
} } },
} }
])

Related

How to duplicate a document in MongoDB and increment a value in another field?

I need to duplicate a document from my collection orders by filtering the field cd_order with value "7650614875".
Collection orders look like this:
{
...
"cd_order": "7650614875"
...
}
I will copy this document more than 50 times (I will change this value in the query), so I also need to change the value from this field cd_order from this copied document to another value, so I though by converting into int and then use the function inc to increment 1 and then converting back to string.
I tried the query below but only the copy ocorrued, the rest didn't work:
var copy = db.orders.findOne({ cd_order: "7650614875" }, { _id: 0 });
for (var i = 0; i< 3; i++){
db.orders.insert(copy);
{ $convert: { input: $string, to: "int" } }
{ $inc: { "cd_order" : 1 } }
{ $convert: { input: $int, to: "string" } }
}
How can I duplicate this document, increment 1 into field cd_order to not be the same as the previous one, and also to print all new cd_order at the end?
Print example:
cd_order: 7650614875, 7650614876, 7650614877, 76506148758, ...
You can use $range to generate an array of increment. $unwind the array to add to cd_order and $merge to insert/update into the collection
db.collection.aggregate([
{
"$match": {
"cd_order": "7650614875"
}
},
{
$set: {
inc: {
"$range": [
0,
51,
1
]
}
}
},
{
"$unwind": "$inc"
},
{
$set: {
cd_order: {
$toString: {
$add: [
{
"$toLong": "$cd_order"
},
"$inc"
]
}
},
"inc": "$$REMOVE",
_id: "$$REMOVE"
}
},
{
"$merge": {
"into": "collection",
"on": "cd_order",
"whenMatched": "merge",
"whenNotMatched": "insert"
}
}
])
Mongo Playground
I was able to duplicate to convert the value and increment before duplicating the document, it worked well:
var copy = db.orders.findOne({ cd_order: "7650614877" }, { _id: 0 });
for (var i = 0; i < 100; i++) {
copy.cd_order = (parseInt(copy.cd_order) + 1).toString();
db.orders.insert(copy);
}
I was also able to print all values using this query:
var orders = db.orders.find({ "nm_ancestor": {$eq: "Luigi D'arpino"} }, { "cd_order": 1 });
var ordersString = "";
orders.forEach(function(order) {
if(ordersString.length>0) ordersString += ", ";
ordersString += order.cd_order;
});
print("cd_order: " + ordersString);

How to calculate simple moving average using mongodb mapreduce?

I have time series data as the following format in a mongodb collection:
{
"Name" : "AKBNK",
"Date" : ISODate("2009-01-02T00:00:00Z"),
"Close" : 3.256746559,
}
I want to calculate simple moving average using mongodb mapreduce. I tried it as the following to perform window sliding, but it works slowly when the period is big.
var mapper = function() {
var i = 0, j = counter;
if (j < period) {
j = period;
i = period - counter;
}
for (; i < period && j <= limit; i++, j++) {
emit (j, this.Close);
}
counter++;
}
var reducer = function(key, values) {
return Array.sum(values);
}
var finalizer = function(key, reducedValue) {
return reducedValue / period;
}
var period = 730;
db.data.mapReduce(mapper, reducer, {finalize: finalizer, out: "smaOut", query: {Name: "AKBNK"}, sort: {Date: -1}, limit: period * 2 - 1, scope: {counter: 1, period: period, limit: period * 2 - 1}});
Any advice how can I do this faster? How can I map the data?
You could try using the below aggregation pipeline which seems to produce the correct results at a quick glance but at a much higher speed:
db.data.aggregate({
$match: {
"Name": "AKBNK" // this stage will use and index if you have one on the "Name" field
}
}, {
$sort: { "Date": -1 }, // this stage will also use and index if you have one on "Date"
}, {
$group: {
"_id": null, // create one single document
"allCloseValues": { $push: "$Close" } // that shall contain an array with all "Close" values
}
}, {
$addFields: {
"copyOfAllCloseValues": "$allCloseValues" // duplicate the array
}
}, {
$unwind: {
"path": "$copyOfAllCloseValues", // flatten the created single document
"includeArrayIndex": "_id" // use the "_id" field to hold the array index
}
}, {
$project: {
avg: {
$avg: { // calculate the average of some part of the array "Close"
$slice: [ "$allCloseValues", "$_id", 730 ] // which shall start at index "_id" and take up to 730 values
}
}
}
}, {
$out: "smaOut" // write the resulting documents out to the "smaOut" collection
});

How do I replace string type with date type in MongoDB for an object array?

I know the method for converting dates for the first level
db.collection.find().forEach(function(x){
x.DateField = ISODate(x.DateField);
db.collection.save(x);})
This works with first level fields, but what should I do if i want to change fields of objects in arrays such as CreatedTime in the below json sample:
{
"Pharmacy": "b",
"Medicine": [
{
"MedName": "MedB",
"Quantity": 60,
"CreatedTime" : "2006-05-05T11:44:47.86Z"
},
{
"MedName": "MedC",
"Quantity": 34,
"CreatedTime" : "2006-11-23T12:28:44.86Z"
}
]
}
You can use the cursor from the aggregate query and bulk updates to update documents in 4.0 version.
Use $map with $toDate to keep the existing values and convert the string date to date type.
Here is the shell sample.
var bulk = db.collection.initializeUnorderedBulkOp();
var count = 0;
var batch = 50; // Change batch size as you need
db.collection.aggregate([
{"$project":{
"Medicine":{
"$map":{
"input":"$Medicine",
"in":{
"MedName":"$$this.MedName",
"Quantity":"$$this.Quantity",
"CreatedTime":{"$toDate":"$$this.CreatedTime"}
}
}
}
}}
]).forEach(function(doc){
bulk.find( {"_id" : doc._id}).updateOne(
{ "$set": {"Medicine" : doc.Medicine}}
);
count++;
if (count == batch) {
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
count = 0;
}
});
if (count > 0) {
bulk.execute();
}
db.getCollection('test').find({}).forEach(function(doc) {
doc.Medicine.forEach(function(doc1){
var dbl = new Date(doc1.CreatedTime);
doc1.CreatedTime = dbl;
});
db.test.save(doc);
});
EDITED: ======= Second approach: Source
db.logmessages.aggregate( [ {
$project: {
date: {
$dateFromString: {
dateString: '$date',
timezone: 'America/New_York'
}
}
}
} ] )

Mongodb get lastHour for each day

I have the following schema in mongodb, where the timestamp is the timestamp at an hourly level
{
"day_chan1" : 54.464,
"day_chan2" : 44.141,
"day_chan3" : 44.89,
"gatewayId" : "443719005AA3",
"timestamp" : ISODate("2016-02-15T23:00:00.000Z"),
"total_curr_chan" : 5.408,
"total_day_chan" : 143.495,
"type" : 1
}
I want to be able to query the last timestamp for the day for the last 7 days and 30 days. In order to do this, I am thinking of doing something like
var d = new Date(); // today!
for(var i =1; i <= 7; i++) {
var n = i; // go back n days!
d.setDate(d.getDate() - n);
d.setHours(23,0,0);
var query = {
gatewayId: req.params.deviceId,
timestamp: { $lt: new Date(d) }
};
db
.find(query,function(resp) {
//return the data here
});
}
But this creates a problem of multiple callbacks and I want to know if there is an easier way of doing so using aggregates or some other method
Use the $hour operator within the $project operator to extract the hour part of the timestamp, then query with $match to filter documents that do not satisfy the given hour criteria:
var pipeline = [
{
"$project": {
"day_chan1": 1,
"day_chan2": 1,
"day_chan3": 1,
"gatewayId": 1,
"timestamp": 1,
"total_curr_chan": 1,
"total_day_chan": 1,
"type": 1,
"hour": { "$hour": "$timestamp" }
}
},
{ "$match": { "hour": 23 } }
];
collection.aggregate(pipeline, function(err, result) {
//return the data here
console.log(result);
});
For arbitrary last hour it must be a bit more complex:
db.collection.aggregate([
{$match:{
timestamp:{$type: "date"}}
// add date constraints here
},
{$project:{
_id:1,
date:{"y":{$year:"$timestamp"}, "d":{$dayOfYear:"$timestamp"}},
doc:"$$CURRENT"}
},
{$group:{
_id:"$date",
maxtime: {$max:"$doc.timestamp"},
doc:{$push:"$doc"}}
},
{$unwind:"$doc"},
{$project:{
latest: {$cmp: ["$maxtime", "$doc.timestamp"]},
doc:"$doc"}
},
{$match:{"latest":0}}
])
With map-reduce it should be simpler, but may be slower.

MongoDB Count() vs. Aggregation

I've used aggregation in mongo a lot, I know performance benefits on the grouped counts and etc. But, do mongo have any difference in performance on those two ways to count all documents in a collection?:
collection.aggregate([
{
$match: {}
},{
$group: {
_id: null,
count: {$sum: 1}
}
}]);
and
collection.find({}).count()
Update: Second case:
Let's say we have this sample data:
{_id: 1, type: 'one', value: true}
{_id: 2, type: 'two', value: false}
{_id: 4, type: 'five', value: false}
With aggregate():
var _ids = ['id1', 'id2', 'id3'];
var counted = Collections.mail.aggregate([
{
'$match': {
_id: {
'$in': _ids
},
value: false
}
}, {
'$group': {
_id: "$type",
count: {
'$sum': 1
}
}
}
]);
With count():
var counted = {};
var type = 'two';
for (i = 0, len = _ids.length; i < len; i++) {
counted[_ids[i]] = Collections.mail.find({
_id: _ids[i], value: false, type: type
}).count();
}
.count() is by far faster. You can see the implementation by calling
// Note the missing parentheses at the end
db.collection.count
which returns the length of the cursor. of the default query (if count() is called with no query document), which in turn is implemented as returning the length of the _id_ index, iirc.
An aggregation, however, reads each and every document and processes it. This can only be halfway in the same order of magnitude with .count() when doing it over only some 100k of documents (give and take according to your RAM).
Below function was applied to a collection with some 12M entries:
function checkSpeed(col,iterations){
// Get the collection
var collectionUnderTest = db[col];
// The collection we are writing our stats to
var stats = db[col+'STATS']
// remove old stats
stats.remove({})
// Prevent allocation in loop
var start = new Date().getTime()
var duration = new Date().getTime()
print("Counting with count()")
for (var i = 1; i <= iterations; i++){
start = new Date().getTime();
var result = collectionUnderTest.count()
duration = new Date().getTime() - start
stats.insert({"type":"count","pass":i,"duration":duration,"count":result})
}
print("Counting with aggregation")
for(var j = 1; j <= iterations; j++){
start = new Date().getTime()
var doc = collectionUnderTest.aggregate([{ $group:{_id: null, count:{ $sum: 1 } } }])
duration = new Date().getTime() - start
stats.insert({"type":"aggregation", "pass":j, "duration": duration,"count":doc.count})
}
var averages = stats.aggregate([
{$group:{_id:"$type","average":{"$avg":"$duration"}}}
])
return averages
}
And returned:
{ "_id" : "aggregation", "average" : 43828.8 }
{ "_id" : "count", "average" : 0.6 }
The unit is milliseconds.
hth