Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate - mongodb

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.

You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

Related

Mongodb query to get count of field based on the value for a matching string

I have the following Mongodb document.
{
"_id" : ObjectId("62406bfaa1d66f8d99c6e97d"),
"skill": "Programming Language"
"supply" : [
{
"employeeName" : "A1",
"skillRating" : 3
},
{
"employeeName" : "A2",
"skillRating" : 4
},
{
"employeeName" : "A3",
"skillRating" : 4
},
{
"employeeName" : "A4",
"skillRating" : 4
},
{
"employeeName" : "A5",
"skillRating" : 3
},
{
"employeeName" : "A6",
"skillRating" : 4
},
{
"employeeName" : "A7",
"skillRating" : 2
},
{
"employeeName" : "A8",
"skillRating" : 2
},
{
"employeeName" : "A9",
"skillRating" : 4
},
{
"employeeName" : "A10",
"skillRating" : 3
},
{
"employeeName" : "A11",
"skillRating" : 3
},
{
"employeeName" : "A12",
"skillRating" : 3
},
{
"employeeName" : "A13",
"skillRating" : 2
},
{
"employeeName" : "A14",
"skillRating" : 4
},
{
"employeeName" : "A15",
"skillRating" : 4
}
]
}
How can I write a Mongodb query to produce the following output (i.e.: Get the count of occurrence of each value for a matching skill)
{
skillName : "Programming Language",
skillRating1: 0, <-- Count of skillRating with value 1
skillRating2: 3, <-- Count of skillRating with value 2
skillRating3: 5, <-- Count of skillRating with value 3
skillRating4: 7, <-- Count of skillRating with value 4
skillRating5: 0 <-- Count of skillRating with value 5
}
[Note: I am learning to write Mongodb queries]

You can go with aggregation,
$unwind to deconstruct the array
$group to get the sum of avg by _id and the avg
$arrayToObject to make the field to object with the help of $concat. Because we need the skillRating1,skillRating2...
$replaceRoot to get the object to root document
$project to decide whether to show or not
Here is the code,
db.collection.aggregate([
{ "$unwind": "$supply" },
{
"$group": {
"_id": { _id: "$_id", avg: "$supply.avgSkillRating" },
"count": { "$sum": 1 },
"skill": { "$first": "$skill" }
}
},
{
"$group": {
"_id": "$_id._id",
"skill": { "$first": "$skill" },
"data": {
$push: {
k: {
$concat: [ "avgSkillRating", { $toString: "$_id.avg" } ]
},
v: "$count"
}
}
}
},
{ "$addFields": { "data": { "$arrayToObject": "$data" } } },
{
"$replaceRoot": {
"newRoot": { "$mergeObjects": [ "$$ROOT", "$data" ] }
}
},
{ "$project": { data: 0 } }
])
Working Mongo playground

Maybe something like this:
db.collection.aggregate([
{
$unwind: "$supply"
},
{
$group: {
_id: "$supply.avgSkillRating",
cnt: {
$push: "$supply.avgSkillRating"
},
skill: {
$first: "$skill"
}
}
},
{
$project: {
z: [
{
"k": {
"$concat": [
"avgSkillRating",
{
$toString: "$_id"
}
]
},
"v": {
$size: "$cnt"
}
}
],
skill: 1
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
{
"$arrayToObject": "$z"
},
{
skillName: "$skill"
}
]
}
}
},
{
$group: {
_id: "$skillName",
x: {
$push: "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {"$mergeObjects": "$x"}
}
}
])
Explained:
Unwind the supply array
group avgSkillRating to array cnt ( to be possible to count )
form z array with k,v suitable for arrayToObject
mergeObjects to form the keys and values
group to join the objects and leave only single skillName
replace the root document with the newly formed document with the necesary details.
playground

Here's another version that also reports skillRatings with a zero count. This aggregation pipeline is essentially identical to #varman's answer and adds a complex (to me anyway) "$set"/"$map" to create the extra fields.
db.collection.aggregate([
{
"$unwind": "$supply"
},
{
"$group": {
"_id": { "_id": "$_id", "avg": "$supply.avgSkillRating" },
"count": { "$count": {} },
"skillName": { "$first": "$skill" }
}
},
{
"$group": {
"_id": "$_id._id",
"skillName": { "$first": "$skillName" },
"data": {
"$push": {
"_r": "$_id.avg",
"k": { $concat: [ "skillRating", { $toString: "$_id.avg" } ] },
v: "$count"
}
}
}
},
{
"$set": {
"data": {
"$map": {
"input": { "$range": [ 1, 6 ] },
"as": "rate",
"in": {
"$let": {
"vars": {
"idx": { "$indexOfArray": [ "$data._r", "$$rate" ] }
},
"in": {
"$cond": [
{ "$gte": [ "$$idx", 0 ] },
{
"k": {
"$getField": {
"field": "k",
"input": { "$arrayElemAt": [ "$data", "$$idx" ] }
}
},
"v": {
"$getField": {
"field": "v",
"input": { "$arrayElemAt": [ "$data", "$$idx" ] }
}
}
},
{
"k": { $concat: [ "skillRating", { $toString: "$$rate" } ] },
"v": 0
}
]
}
}
}
}
}
}
},
{ "$set": { "data": { "$arrayToObject": "$data" } } },
{ "$replaceWith": { "$mergeObjects": [ "$$ROOT", "$data" ] } },
{ "$unset": [ "data", "_id" ] }
])
Try it mongoplayground.net.

Is it possible to group (aggregate) objects with dates into incremental intervals in MongoDB?

I am currently trying to create an aggregation pipeline in MongoDB to group the items into incremental time intervals, but I only succeeded in grouping them in disjoint time intervals so far.
Sample data:
{
"eventID": "abc",
"date": ISODate("2020-11-05T12:05:11.790Z"),
...........
},
{
"eventID": "xyz",
"date": ISODate("2020-11-05T12:12:11.790Z"),
...........
},
{
"eventID": "klm",
"date": ISODate("2020-11-05T12:28:11.790Z"),
...........
}
Current solution:
$group: {
"_id": {
"year": { $year: "$date" },
"dayOfYear": { $dayOfYear: "$date" },
"hour": { $hour: "$date" },
"interval": {
"$subtract": [
{ "$minute": "$date" },
{ "$mod": [{ "$minute": "$date"}, 10 ] }
]
}
},
"grouped_data": { "$push": { "eventID": "$eventID", "date": "$date" },
"count": { $sum: 1 } }
}
Which returns the data grouped in 10 minutes intervals but those are disjoint intervals (time windows of 10minutes that do not intersect).
Eg:
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 0, // = interval beginning at minute 0 of 12th hour of the day
},
"grouped_data": [{ "eventID": "abc", "date": ISODate("2020-11-05T12:05:11.790Z" }],
"count": 1
},
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 10, // = beginning at minute 10
},
"grouped_data": [{ "eventID": "xyz", "date": ISODate("2020-11-05T12:12:11.790Z") }],
"count": 1
},
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 20, // = beginning at minute 20
},
"grouped_data": [{ "eventID": "klm", "date": ISODate("2020-11-05T12:28:11.790Z") }],
"count": 1
}
What I am actually looking for is grouping them in 10 minutes(or whatever is needed) incremental intervals. Eg: 0-9, 1-10, 2-11, etc. instead of 0-9, 10-19, 20-29 etc.
Edit:
The end goal here is to check if a count threshold is surpassed on a interval length defined by the user.
If user asks "Are there more than 2 events on a 10minute time window?", based on the sample data above and my current solution, the condition is not met. (1 event in 0-9 interval, and 1 event in 10-19). With incremental intervals I should be able to find that there are indeed 2 events in 10 minutes, but in the time interval 5-14. Eg:
{
"_id": {
*whatever logic for grouping in 10minutes window*
},
"grouped_data": [
{ "eventID": "abc", "date": ISODate("2020-11-05T12:05:11.790Z") },
{ "eventID": "xyz", "date": ISODate("2020-11-05T12:12:11.790Z") }],
"count": 2
},
{
"_id": {
*whatever logic for grouping in 10minutes window*
},
"grouped_data": [
{ "eventID": "klm", "date": ISODate("2020-11-05T12:28:11.790Z") }]
"count": 1
},

For me it is not clear which output you like to get, but this aggregation pipeline makes the sliding-window group:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" },
min_date: { $min: "$date" },
max_date: { $max: "$date" }
}
},
{
$addFields: {
interval: {
$range: [
{ $toInt: { $divide: [{ $toLong: "$min_date" }, 1000] } },
{ $toInt: { $divide: [{ $toLong: "$max_date" }, 1000] } },
10 * 60]
}
}
},
{
$set: {
interval: {
$map: {
input: "$interval",
in: { $toDate: { $multiply: ["$$this", 1000] } }
}
}
}
},
{ $unwind: "$interval" },
{
$project: {
grouped_data: {
$filter: {
input: "$data",
cond: {
$and: [
{ $gte: ["$$this.date", "$interval"] },
{ $lt: ["$$this.date", { $add: ["$interval", 1000 * 60 * 10] }] },
]
}
}
},
interval: 1
}
}
])
Boundaries are given by input data, however can also use fixes dates:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$addFields: {
interval: {
$range: [
{ $toInt: { $divide: [{ $toLong: ISODate("2020-01-01T00:00:00Z") }, 1000] } },
{ $toInt: { $divide: [{ $toLong: ISODate("2020-12-31T23:59:59Z") }, 1000] } },
10 * 60]
}
}
},
{
$set: {
interval: {
$map: {
input: "$interval",
in: { $toDate: { $multiply: ["$$this", 1000] } }
}
}
}
},
{ $unwind: "$interval" },
{
$project: {
grouped_data: {
$filter: {
input: "$data",
cond: {
$and: [
{ $gte: ["$$this.date", "$interval"] },
{ $lt: ["$$this.date", { $add: ["$interval", 1000 * 60 * 10] }] },
]
}
}
},
interval: 1
}
}
])

I will try to answer my own question, maybe it will help other people on the internet. The solution I came up with is based on the answer of #Wernfried (thanks!).
db.getCollection("events_en").aggregate([
{
$match: { eventID: "XYZ" }
},
{
$group: {
_id: null,
events: { $push: "$$ROOT" },
limit: { $push: { $toDate: { $add: [{ $toLong: "$date" }, 1000 * 60 * 10] } } }
}
},
{ $unwind: "$limit" },
{
$project: {
events: {
$filter: {
input: "$events",
cond: {
$and: [
{ $lt: ["$$this.date", "$limit"] },
{ $gte: ["$$this.date", { $subtract: ["$limit", 1000 * 60 * 10] }] },
]
}
}
},
limit: 1,
}
},
{
$addFields: {
count: {
$size: "$events"
}
}
}
])
This will create a limit for each event, based on its date + 10 minutes (or whatever). And afterwards it filters the events (which are now duplicated for each of the limit using $unwind: "$limit"), based on that limit. The result is something like this:
{
"_id" : null,
"limit" : ISODate("2020-11-05T12:28:27.000+0000"),
"events" : [
{
"_id" : 13,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:18:27.000+0000")
},
{
"_id" : 63,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:19:55.000+0000")
},
............................
{
"_id" : 90,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:27:57.000+0000")
}
],
"count" : 5
}
{
"_id" : null,
"limit" : ISODate("2020-11-05T12:29:55.000+0000"),
"events" : [
{
"_id" : 63,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:19:55.000+0000")
},
{
"_id" : 90,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:27:57.000+0000")
},
{
"_id" : 97,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:29:36.000+0000")
}
],
"count" : 3
}
As you can see, looking at the limit of each group and at the dates of the events in each group, these intervals are now incremental, not disjoint. (event X is found in multiple groups, as long as it doesnt exceeds the time interval of 10minutes)

Aggregate Pipeline groups by day but projects a null date

I'm attempting to group the items in a collection by year/month/day. The grouping should be based on the pubDate and pubTimezoneOffset.
I've got an aggregate pipeline that:
- $project - adds the timezoneOffset to the pubDate
- $group - groups by the modified pubDate
- $project - removes the timezoneOffset
- $sort - sorts by pubDate
I tested each stage on it's own and it seems to be some issue with the second $project. In the final output the pubDate is null.
I've been going over it for a few hours now and can't see where I've gone wrong. What am I missing?
The aggregate pipeline:
db.messages.aggregate([
{
$project: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$add: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
}
},
{
$group: {
_id: {
year: { $year: '$pubDate' },
month: { $month: '$pubDate' },
day: { $dayOfMonth: '$pubDate' }
},
count: { $sum: 1 },
messages: {
$push: {
_id: '$_id',
pubTimezoneOffset: '$pubTimezoneOffset',
pubDate: '$pubDate'
}
}
}
},
{
$project: {
_id: 1,
messages: {
_id: 1,
pubTimezoneOffset: 1,
pubDate: {
$subtract: [
'$pubDate', {
$add: [
{ $multiply: [ '$pubTimezoneOffset.hours', 60, 60, 1000 ] },
{ $multiply: [ '$pubTimezoneOffset.minutes', 60, 1000 ] }
]
}
]
}
},
count: 1
}
},
{
$sort: {
'_id.year': -1,
'_id.month': -1,
'_id.day': -1
}
}
]).pretty();
To recreate the source data:
db.messages.insertOne({
pubDate: ISODate('2017-10-25T10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-25T11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 10:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
db.messages.insertOne({
pubDate: ISODate('2017-10-24: 11:00:00:000Z'),
pubTimezoneOffset: {
hours: -7,
minutes: 0
}
});
Running it in mongo shell outputs:
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 25
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b3"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b4"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}
{
"_id" : {
"year" : 2017,
"month" : 10,
"day" : 23
},
"count" : 2,
"messages" : [
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b5"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
},
{
"_id" : ObjectId("59f0e8b47d0a206bdfde87b6"),
"pubTimezoneOffset" : {
"hours" : -7,
"minutes" : 0
},
"pubDate" : null
}
]
}

Kudos for the attempt but, you actually have quite a few things conceptually incorrect here, with the basic error you are seeing is because your premise of "array projection" is incorrect. You are trying to refer to variables "inside the array" by simply notating the "property name".
What you actually need to do here is apply $map in order to apply the functions to "transform" each element:
db.messages.aggregate([
{ "$project": {
"pubTimezoneOffset": 1,
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$pubDate" },
"month": { "$month": "$pubDate" },
"day": { "$dayOfMonth": "$pubDate" }
},
"count": { "$sum": 1 },
"messages": {
"$push": {
"_id": "$_id",
"pubTimezoneOffset": "$pubTimezoneOffset",
"pubDate": "$pubDate"
}
}
}},
{ "$project": {
"messages": {
"$map": {
"input": "$messages",
"as": "m",
"in": {
"_id": "$$m._id",
"pubTimezoneOffset": "$$m.pubTimezoneOffset",
"pubDate": {
"$subtract": [
"$$m.pubDate",
{ "$add": [
{ "$multiply": [ "$$m.pubTimezoneOffset.hours", 60 * 60 * 1000 ] },
{ "$multiply": [ "$$m.pubTimezoneOffset.minutes", 60 * 1000 ] }
]}
]
}
}
}
},
"count": 1
}},
{ "$sort": { "_id": -1 } }
]).pretty();
Noting here that you are doing a lot of unnecessary work in "tranforming" the dates kept in the array, and then trying to "tranform" them back to the original state. Instead, you should have simply supplied a "variable" with $let to the _id of $group and left the original document state "as is" using $$ROOT instead of naming all the fields:
db.messages.aggregate([
{ "$group": {
"_id": {
"$let": {
"vars": {
"pubDate": {
"$add": [
"$pubDate",
{ "$add": [
{ "$multiply": [ '$pubTimezoneOffset.hours', 60 * 60 * 1000 ] },
{ "$multiply": [ '$pubTimezoneOffset.minutes', 60 * 1000 ] }
]}
]
}
},
"in": {
"year": { "$year": "$$pubDate" },
"month": { "$month": "$$pubDate" },
"day": { "$dayOfMonth": "$$pubDate" }
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Also note that $sort simply does actually consider all the "sub-keys" anyway, so there is no need to name them explicitly.
Back to your error, the point of $map is essentially because whilst you can notate array "field inclusion" with MongoDB 3.2 and above like this:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1
}
The thing you cannot do is actually "calculate values" on the elements themselves. You tried "$pubDate" which actually looks in the "ROOT" space for a property of that name, which does not exist and is null. If you then tried:
"messages": {
"_id": 1,
"pubTimeZoneOffset": 1,
"pubDate": "$messages.pubDate"
}
Then you would get "a result", but not the result you might think. Because what would actually be included in "every element" is the value of that property in each array element as a "new array" itself.
So the short and sweet is use $map instead, which iterates the array elements with a local variable referring to the current element for you to notate values for in expressions.
MongoDB 3.6
MongoDB date operators are all timezone aware. So instead of all the juggling then all you need do is supply the additional "timezone" parameter to any option and the conversion will be done for you.
As a sample:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": {
"$concat": [
{ "$cond": {
"if": { "$gt": [ "$pubTimezoneOffset", 0 ] },
"then": "+",
"else": "-"
}},
{ "$let": {
"vars": {
"hours": { "$substr": [{ "$abs": "$pubTimezoneOffset.hours" },0,2] },
"minutes": { "$substr": [{ "$abs": "$pubTimezoneOffset.minutes" },0,2] }
},
"in": {
"$concat": [
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$hours" }, 1 ] },
"then": { "$concat": [ "0", "$$hours" ] },
"else": "$$hours"
}},
":",
{ "$cond": {
"if": { "$eq": [{ "$strLenCP": "$$minutes" }, 1 ] },
"then": { "$concat": [ "0", "$$minutes" ] },
"else": "$$minutes"
}}
]
}
}}
]
}
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Note that most of the "juggling" in there is to convert your own "offset" to the "string" format required by the new operators. If you simply stored this as "offset": "-07:00" then you can instead simply write:
db.messages.aggregate([
{ "$group": {
"_id": {
"$dateToString": {
"date": "$pubDate",
"format": "%Y-%m-%d",
"timezone": "$offset"
}
},
"docs": { "$push": "$$ROOT" }
}},
{ "$sort": { "_id": -1 } }
])
Please Reconsider
I can't let this pass without making a note that your general approach here is conceptually incorrect. Storing "offset" or "local time string" within the database is just intrinsically wrong.
The date information should be stored as UTC and should be returned as UTC. Sure you can and "should" covert when aggregating, but the general premise is that you always convert back to UTC. And "conversion" comes from the "locale of the observer" and not a "stored" adjustment. Because dates are always relative to the "observer" point of view, and are not from the "point of origin" as you seem to have interpreted it.
I put some lengthy detail on this on Group by Date with Local Time Zone in MongoDB about why you store this way and why "locale" conversion from the "observer" is necessary. That also details "Daylight savings considerations" from the observer point of view.
The basic premise there still remains the same when MongoDB becomes "timezone aware" in that you :
Store in UTC
Query with local time converted to UTC
Aggregate converted from the "observer" offset
Convert the "offset" back to UTC
Because at the end of the day it's the "clients" job to supply that "locale" conversion, since that's the part that "knows where it is".

MongoDB: Get the previous and next element in the embedded array

I have a database which has the following structure:
{
"_id" : ObjectId("59b8d72ab515211f3c161c4b"),
"Transport_event_id" : 1,
"Carrier_id" : 23,
"Payload_id" : 0,
"StartTime" : 214392.0,
"EndTime" : 362707.0,
"Move_events" : [
{
"Timestamp" : 214398,
"x_pos" : 13,
"y_pos" : 202
},{
"Timestamp" : 214845,
"x_pos" : 12,
"y_pos" : 202
},{
"Timestamp" : 216399,
"x_pos" : 12,
"y_pos" : 216
},{
"Timestamp" : 216842,
"x_pos" : 11,
"y_pos" : 216
},{
"Timestamp" : 219586,
"x_pos" : 10,
"y_pos" : 216
}
]
}
I've made the following query which will return the next 2 Elements form a Array after a specific TimeStamp.
var cursor = db.Transport_eventBeta.aggregate([
{ "$match": { "StartTime": { "$lte": query_time } } },
{ "$match": { "EndTime": { "$gte": query_time } } },
{
"$project": {
"Move_events": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$Move_events",
"as": "event",
"cond": { "$lte": ["$$event.Timestamp" , query_time] }
}
}
},
"in": {
"$slice": [
"$Move_events",
{"$size": "$$filtered"},
2
]
}
}
},
"Carrier_id": 1
}
}
])
while (cursor.hasNext()) {
print(cursor.next());
}
What I need are the documents befor and after this specific TimeStamp.
Some kind of this:
"$slice": [
"$Move_events",
{"$size": "$$filtered"} - 1,
2
]
But this doesn't work. How can I solve this problem? 2 separate queries are no option because of the duration.

You can try below aggregation query in 3.4.
The query will filter Move_events to keep events with timestamp less than input timestamp followed by $arrayElemAt to get the Move_events after and before event.
db.Transport_eventBeta.aggregatee([
{
"$match": {
"StartTime": {
"$lte": query_time
},
"EndTime": {
"$gte": query_time
}
}
},
{
"$project": {
"Move_events": {
"$let": {
"vars": {
"filtered": {
"$filter": {
"input": "$Move_events",
"as": "event",
"cond": {
"$lte": [
"$$event.Timestamp",
query_time
]
}
}
}
},
"in": [
{
"$arrayElemAt": [
"$Move_events",
{
"$subtract": [
{
"$size": "$$filtered"
},
1
]
}
]
},
{
"$arrayElemAt": [
"$Move_events",
{
"$size": "$$filtered"
}
]
}
]
}
}
}
}
])

Mongodb aggregate every two hours

I have an aggregate query of the following form
db.mycollection.aggregate([
{
"$match":
{
"Time": { $gte: ISODate("2016-01-30T00:00:00.000+0000") }
}
},
{
"$group":
{
"_id":
{
"day": { "$dayOfYear": "$Time" },
"hour": { "$hour": "$Time" }
},
"Dishes": { "$addToSet": "$Dish" }
}
},
{
"$group":
{
"_id": "$_id.hour",
"Food":
{
"$push":
{
"Day": "$_id.day",
"NumberOfDishes": { "$size":"$Dishes" }
}
}
}
},
{
"$project":
{
"Hour": "$_id",
"Food": "$Food",
"_id" : 0
}
},
{
"$sort": { "Hour": 1 }
}
]);
Instead of doing this as above in one hour durations e.g. 0-1,1-2,2-3,3-4,4-5,...,23-24, I want to be able to do this in two hour durations. e.g. 0-2,2-4,4-6,...,22-24. Is there a way to do that?

Hint: use arithmetic aggregation operators in $project
Lets say, H=floor(hour/2), where hour is actual hour from document date. Then you can get H by applying $floor and $divide operators to this date
"H": { $floor: { $divide: [ { "$hour": "$Time" }, 2 ] } }
Here H corresponds to the pair of hours (Hours=[0,2) => H=0, Hours=[2,4) => H=1, Hours=[22,24) => H=11, etc.) and you can pass it to the $group stage with
$group: { "_id": { "day": { $dayOfYear: "$Time" }, "H": "$H" } }
Then you can output the pair of hours for specific H with
"Hours": [ { $multiply: [ "$H", 2 ] }, { $sum: [ { $multiply: [ "$H", 2 ] }, 2 ] } ]
Given collection of documents
{ "Time" : ISODate("2016-01-30T01:00:00Z"), "Dish" : "dish1" }
{ "Time" : ISODate("2016-01-30T02:00:00Z"), "Dish" : "dish2" }
{ "Time" : ISODate("2016-01-30T03:00:00Z"), "Dish" : "dish3" }
{ "Time" : ISODate("2016-01-30T04:00:00Z"), "Dish" : "dish4" }
{ "Time" : ISODate("2016-01-30T05:00:00Z"), "Dish" : "dish5" }
{ "Time" : ISODate("2016-01-30T06:00:00Z"), "Dish" : "dish6" }
{ "Time" : ISODate("2016-01-30T07:00:00Z"), "Dish" : "dish7" }
{ "Time" : ISODate("2016-01-30T08:00:00Z"), "Dish" : "dish8" }
{ "Time" : ISODate("2016-01-30T09:00:00Z"), "Dish" : "dish9" }
and using the next aggregate on it
db.mycollection.aggregate([
{
"$match":
{
"Time": { $gte: ISODate("2016-01-30T00:00:00.000+0000") }
}
},
{
"$project":
{
"Dish": 1,
"Time": 1,
"H": { $floor: { $divide: [ { $hour: "$Time" }, 2 ] } }
}
},
{
"$group":
{
"_id":
{
"day": { $dayOfYear: "$Time" },
"H": "$H"
},
"Dishes": { $addToSet: "$Dish" }
}
},
{
"$group":
{
"_id": "$_id.H",
"Food":
{
"$push":
{
"Day": "$_id.day",
"NumberOfDishes": { $size: "$Dishes" }
}
}
}
},
{
"$sort": { "_id": 1 }
},
{
"$project":
{
"Hours": [ { $multiply: [ "$_id", 2 ] }, { $sum: [ { $multiply: [ "$_id", 2 ] }, 2 ] } ],
"Food": "$Food",
"_id": 0
}
}
]);
provides the result
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 1 } ], "Hours" : [ 0, 2 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 2, 4 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 4, 6 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 6, 8 ] }
{ "Food" : [ { "Day" : 30, "NumberOfDishes" : 2 } ], "Hours" : [ 8, 10 ] }

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate - mongodb

Related

Mongodb query to get count of field based on the value for a matching string

Is it possible to group (aggregate) objects with dates into incremental intervals in MongoDB?

Aggregate Pipeline groups by day but projects a null date

MongoDB: Get the previous and next element in the embedded array

Mongodb aggregate every two hours

Categories

Resources