How to group data by every hour

How to group data by every hour - mongodb

How do I get counts data grouped by every hour in 24 hours even if data is not present i.e. IF 0 will select 0
MonogDB 3.6
Input
[
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-03T20:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7a"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-04T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedae"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedad"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-06T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedab"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-06T10:45:36.208Z",
"type": "image"
}
]
Implementation
db.collection.aggregate({
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
},
},
{
$project: {
_id: 0,
hour: "$_id",
count: "$count"
}
},
{
$sort: {
hour: 1
}
})
Actual Output:
{
"count": 2,
"hour": "02"
},
{
"count": 1,
"hour": "20"
}
My expectation code show 24 hours event data is 0 or null and convert from example "02" as "02 AM" , "13" as "01 PM":
Expected Output
{
"count": 0,
"hour": "01" // 01 AM
},
{
"count": 2,
"hour": "02"
},
{
"count": 0,
"hour": "03"
},
{
"count": 0,
"hour": "04"
},
{
"count": 0,
"hour": "05"
},
{
"count": 1,
"hour": "20" // to 08 pm
}

Try this solution:
Explanation
We group by hour to count how many images are uploaded.
Then, we add extra field hour to create time interval (if you had v4.x, there is a better solution).
We flattern hour field (will create new documents) and split first 2 digits to match count and split last 2 digits to put AM / PM periods.
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
}
},
{
$addFields: {
hour: [
"0000",
"0101",
"0202",
"0303",
"0404",
"0505",
"0606",
"0707",
"0808",
"0909",
"1010",
"1111",
"1212",
"1301",
"1402",
"1503",
"1604",
"1705",
"1806",
"1907",
"2008",
"2109",
"2210",
"2311"
]
}
},
{
$unwind: "$hour"
},
{
$project: {
_id: 0,
hour: 1,
count: {
$cond: [
{
$eq: [
{
$substr: [
"$hour",
0,
2
]
},
"$_id"
]
},
"$count",
0
]
}
}
},
{
$group: {
_id: "$hour",
count: {
"$sum": "$count"
}
}
},
{
$sort: {
_id: 1
}
},
{
$project: {
_id: 0,
hour: {
$concat: [
{
$substr: [
"$_id",
2,
2
]
},
{
$cond: [
{
$gt: [
{
$substr: [
"$_id",
0,
2
]
},
"12"
]
},
" PM",
" AM"
]
}
]
},
count: "$count"
}
}
])
MongoPlayground

There's no "magic" solution, you'll have to hardcode it into your aggregation:
Heres an example using Mongo v3.2+ syntax with some $map and $filter magic:
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {"$in": [166]}
}
},
{
$group: {
_id: {$substr: ["$update_at", 11, 2]},
count: {"$sum": 1}
}
},
{
$group: {
_id: null,
hours: {$push: {hour: "$_id", count: "$count"}}
}
},
{
$addFields: {
hours: {
$map: {
input: {
$concatArrays: [
"$hours",
{
$map: {
input: {
$filter: {
input: ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"],
as: "missingHour",
cond: {
$not: {
$in: [
"$$missingHour",
{
$map: {
input: "$hours",
as: "hourObj",
in: "$$hourObj.hour"
}
}
]
}
}
}
},
as: "missingHour",
in: {hour: "$$missingHour", count: 0}
}
}
]
},
as: "hourObject",
in: {
count: "$$hourObject.count",
hour: {
$cond: [
{$eq: [{$substr: ["$$hourObject.hour", 0, 1]}, "0"]},
{$concat: ["$$hourObject.hour", " AM"]},
{
$concat: [{
$switch: {
branches: [
{case: {$eq: ["$$hourObject.hour", "13"]}, then: "1"},
{case: {$eq: ["$$hourObject.hour", "14"]}, then: "2"},
{case: {$eq: ["$$hourObject.hour", "15"]}, then: "3"},
{case: {$eq: ["$$hourObject.hour", "16"]}, then: "4"},
{case: {$eq: ["$$hourObject.hour", "17"]}, then: "5"},
{case: {$eq: ["$$hourObject.hour", "18"]}, then: "6"},
{case: {$eq: ["$$hourObject.hour", "19"]}, then: "7"},
{case: {$eq: ["$$hourObject.hour", "20"]}, then: "8"},
{case: {$eq: ["$$hourObject.hour", "21"]}, then: "9"},
{case: {$eq: ["$$hourObject.hour", "22"]}, then: "10"},
{case: {$eq: ["$$hourObject.hour", "23"]}, then: "11"},
],
default: "None"
}
}, " PM"]
}
]
}
}
}
}
}
},
{
$unwind: "$hours"
},
{
$project: {
_id: 0,
hour: "$hours.hour",
count: "$hours.count"
}
},
{
$sort: {
hour: 1
}
}
]);
A short explanation of the $addFields stage: we first add hours that we're missing, we then merge the two arrays (of the original found hours and the "new" missing hours), finally we convert to the required output ("01" to "01 AM").
If you're using Mongo v4+ I recommend you change the $group _id stage to use $dateFromString as its more consistent.
_id: {$hour: {$dateFromString: {dateString: "$update_at"}}}
If you do do that, you'll have to update the $filter and $map section to use numbers and not strings and eventually using $toString to cast into the format you want, hence the v4+ requirement.

You should store date values as Date objects instead of strings. I would do the formatting like this:
db.collection.aggregate(
[
{ $match: { ... } },
{
$group: {
_id: { h: { $hour: "$update_at" } },
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
hour: {
$switch: {
branches: [
{ case: { $lt: ["$_id.h", 10] }, then: { $concat: ["0", { $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 13] }, then: { $concat: [{ $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 22] }, then: { $concat: ["0", { $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } },
{ case: { $lt: ["$_id.h", 24] }, then: { $concat: [{ $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } }
]
}
},
hour24: "$_id.h",
count: 1
}
},
{ $sort: { hour24: 1 } }
])
As non-American I am not familiar with AM/PM rules, esp. for midnight and midday but I guess you get the principle.

Here is the query you can test it out, for MongoDB 4.0+
i will be improving query and update
const query = [{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: { $hour: "$update_at" },
count: {
"$sum": 1
}
},
},
{
$addFields: {
hourStr: { $toString: { $cond: { if: { $gte: ["$_id", 12] }, then: { $subtract: [12, { $mod: [24, '$_id'] }] }, else: "$_id" } } },
}
},
{
$project: {
formated: { $concat: ["$hourStr", { $cond: { if: { $gt: ["$_id", 12] }, then: " PM", else: " AM" } }] },
count: "$count",
hour: 1,
}
}]

If you want to output in Indian Time formate. then below code work!
const query = [
{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$project: {
"h": { "$hour": { date: "$update_at", timezone: "+0530" } },
}
},
{
$group:
{
_id: { $hour: "$h" },
count: { $sum: 1 }
}
}
];

Related

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.

This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example

From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB - How to bring age group data

How to bring age group base data from a collection in MongoDB i.e how many people are 0-18, 19-24, 25-34, 35+
[
{
"_id": ObjectId("608be7c608c7de2367c89638"),
"status": true,
"gender": "Male",
"first_name": "Vinter",
"last_name": "R",
"dob": "1-2-1999"
},
{
"_id": ObjectId("608be7c608c7de2267c89639"),
"status": true,
"gender": "Male",
"first_name": "Ray",
"last_name": "Morgan",
"dob": "1-2-2015"
}
....
]
See the Mongo Playground:
https://mongoplayground.net/p/4ydNg9Plh6P

Interesting question!
Would like to credit to #Takis and #YuTing.
Good hint from #Takis's comment on $bucket.
#YuTing's answer is good.
Think this answer is shorter by utilizing the feature provided by MongoDB.
$toDate - Convert date string to Date (supported for version 4.0 above).
$dateDiff - Date subtraction and get the unit (Supported in version 5).
$$CURRENT - Variable to get the current iterated document. For adding into persons array field (via $push).
$switch - To display group value based on conditions (Optional).
db.collection.aggregate([
{
"$addFields": {
"age": {
$dateDiff: {
startDate: {
$toDate: "$dob"
},
endDate: "$$NOW",
unit: "year"
}
}
}
},
{
$bucket: {
groupBy: "$age",
// Field to group by
boundaries: [
0,
19,
25,
35
],
// Boundaries for the buckets
default: "Other",
// Bucket id for documents which do not fall into a bucket
output: {
// Output for each bucket
"count": {
$sum: 1
},
"persons": {
$push: "$$CURRENT"
}
}
}
},
{
$project: {
_id: 0,
group: {
$switch: {
branches: [
{
case: {
$lt: [
"$_id",
19
]
},
then: "0-18"
},
{
case: {
$lt: [
"$_id",
25
]
},
then: "19-24"
},
{
case: {
$lt: [
"$_id",
35
]
},
then: "25-34"
}
],
default: "35+"
}
},
count: 1,
persons: 1
}
}
])
Sample Mongo Playground

use $bucket
db.collection.aggregate([
{
$bucket: {
groupBy: {
"$subtract": [
{
$year: new Date()
},
{
$toInt: {
$substr: [
"$dob",
{
$subtract: [
{
$strLenCP: "$dob"
},
4
]
},
4
]
}
}
]
},
// Field to group by
boundaries: [
0,
19,
25,
35,
100
],
// Boundaries for the buckets
default: "Other",
// Bucket id for documents which do not fall into a bucket
output: {
// Output for each bucket
"count": {
$sum: 1
},
"artists": {
$push: {
"name": {
$concat: [
"$first_name",
" ",
"$last_name"
]
},
"age": {
"$subtract": [
{
$year: new Date()
},
{
$toInt: {
$substr: [
"$dob",
{
$subtract: [
{
$strLenCP: "$dob"
},
4
]
},
4
]
}
}
]
}
}
}
}
}
}
])
mongoplayground

Check if current date between two dates in french MongoDB aggregation

I have a collection of restaurant documents in my MongoDB database with an hours field having the format below.
How can I check if a restaurant is open now using MongoDB aggregation?
My hours field has data like this (with french days):
{
"Lundi": [
"08:00",
"23:00"
],
"Mardi": [
"08:00",
"23:00"
],
"Mercredi": [
"08:00",
"23:00"
],
"Jeudi": [
"08:00",
"23:00"
],
"Vendredi": [
"08:00",
"23:00"
],
"Samedi": [
"08:00",
"23:00"
],
"Dimanche": [
"08:00",
"23:00"
]
}

Query
uses the system variable "$$NOW" to get the current time of server
converts each day open hours into a minutes range(to work on minutes also)
open = 01:00 close = 02:30 limits=[60,150]
if current hour is 01:30 then min-now=90
and then filters day=dayNow min-now inside the limits
(for example in our example case 90 is in the limit [60,150])
if at least 1 passed the filter store is open, else closed
Test code here
Query
aggregate(
[{"$project":{"_id":0}},
{"$project":
{"open":
{"$filter":
{"input":
{"$map":
{"input":{"$objectToArray":"$$ROOT"},
"in":["$$this.k", "$$this.v"]}},
"cond":
{"$let":
{"vars":
{"info":
{"day":{"$arrayElemAt":["$$r", 0]},
"limits":
[{"$add":
[{"$multiply":
[{"$toInt":
{"$arrayElemAt":
[{"$split":
[{"$arrayElemAt":[{"$arrayElemAt":["$$r", 1]}, 0]},
":"]},
0]}},
60]},
{"$toInt":
{"$arrayElemAt":
[{"$split":
[{"$arrayElemAt":[{"$arrayElemAt":["$$r", 1]}, 0]},
":"]},
1]}}]},
{"$add":
[{"$multiply":
[{"$toInt":
{"$arrayElemAt":
[{"$split":
[{"$arrayElemAt":[{"$arrayElemAt":["$$r", 1]}, 1]},
":"]},
0]}},
60]},
{"$toInt":
{"$arrayElemAt":
[{"$split":
[{"$arrayElemAt":[{"$arrayElemAt":["$$r", 1]}, 1]},
":"]},
1]}}]}],
"day-now":
{"$arrayElemAt":
[["Lundi", "Mardi", "Mercredi", "Jeudi", "Vendredi",
"Samedi", "Dimanche"],
{"$subtract":[{"$dayOfWeek":"$$NOW"}, 1]}]},
"min-now":
{"$add":
[{"$multiply":[{"$hour":"$$NOW"}, 60]},
{"$minute":"$$NOW"}]}}},
"in":
{"$and":
[{"$eq":["$$info.day", "$$info.day-now"]},
{"$gte":
["$$info.min-now", {"$arrayElemAt":["$$info.limits", 0]}]},
{"$lte":
["$$info.min-now",
{"$arrayElemAt":["$$info.limits", 1]}]}]}}},
"as":"r"}}}},
{"$project":{"open":{"$ne":["$open", []]}, "date-now":"$$NOW"}}])

Really an ugly data model. You have to translate french day names into number and the time values into Date objects. Then you can filter by day and times:
db.collection.aggregate([
{ $unset: "_id" },
{
$project: {
opening_times: {
$map: {
input: { $objectToArray: "$$ROOT" },
in: {
day: {
$switch: {
branches: [
{ case: { $eq: ["Lundi", "$$this.k"] }, then: 1 },
{ case: { $eq: ["Mardi", "$$this.k"] }, then: 2 },
{ case: { $eq: ["Mercredi", "$$this.k"] }, then: 3 },
{ case: { $eq: ["Jeudi", "$$this.k"] }, then: 4 },
{ case: { $eq: ["Vendredi", "$$this.k"] }, then: 5 },
{ case: { $eq: ["Samedi", "$$this.k"] }, then: 6 },
{ case: { $eq: ["Dimanche", "$$this.k"] }, then: 7 }
]
}
},
open: {
$dateFromParts: {
year: { $year: "$$NOW" }, month: { $month: "$$NOW" }, day: { $dayOfMonth: "$$NOW" },
hour: { $toInt: { $first: { $split: [{ $first: "$$this.v" }, ":"] } } },
minute: { $toInt: { $last: { $split: [{ $first: "$$this.v" }, ":"] } } },
timezone: "Europe/Paris"
}
},
close: {
$dateFromParts: {
year: { $year: "$$NOW" }, month: { $month: "$$NOW" }, day: { $dayOfMonth: "$$NOW" },
hour: { $toInt: { $first: { $split: [{ $last: "$$this.v" }, ":"] } } },
minute: { $toInt: { $last: { $split: [{ $last: "$$this.v" }, ":"] } } },
timezone: "Europe/Paris"
}
}
}
}
}
}
},
{
$project: {
open_today: {
$first: {
$filter: {
input: "$opening_times",
cond: { $eq: ["$$this.day", { $isoDayOfWeek: "$$NOW" }] }
}
}
}
}
},
{
$project: {
restaurant: {
$cond: {
if: {
$and: [
{ $gte: ["$$NOW", "$open_today.open"] },
{ $lt: ["$$NOW", "$open_today.close"] },
]
},
then: "open",
else: "close"
}
}
}
}
])
See Mongo playground

Is it possible to group (aggregate) objects with dates into incremental intervals in MongoDB?

I am currently trying to create an aggregation pipeline in MongoDB to group the items into incremental time intervals, but I only succeeded in grouping them in disjoint time intervals so far.
Sample data:
{
"eventID": "abc",
"date": ISODate("2020-11-05T12:05:11.790Z"),
...........
},
{
"eventID": "xyz",
"date": ISODate("2020-11-05T12:12:11.790Z"),
...........
},
{
"eventID": "klm",
"date": ISODate("2020-11-05T12:28:11.790Z"),
...........
}
Current solution:
$group: {
"_id": {
"year": { $year: "$date" },
"dayOfYear": { $dayOfYear: "$date" },
"hour": { $hour: "$date" },
"interval": {
"$subtract": [
{ "$minute": "$date" },
{ "$mod": [{ "$minute": "$date"}, 10 ] }
]
}
},
"grouped_data": { "$push": { "eventID": "$eventID", "date": "$date" },
"count": { $sum: 1 } }
}
Which returns the data grouped in 10 minutes intervals but those are disjoint intervals (time windows of 10minutes that do not intersect).
Eg:
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 0, // = interval beginning at minute 0 of 12th hour of the day
},
"grouped_data": [{ "eventID": "abc", "date": ISODate("2020-11-05T12:05:11.790Z" }],
"count": 1
},
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 10, // = beginning at minute 10
},
"grouped_data": [{ "eventID": "xyz", "date": ISODate("2020-11-05T12:12:11.790Z") }],
"count": 1
},
{
"_id": {
"year": 2020,
"dayOfYear": "314",
"hour": 12,
"interval": 20, // = beginning at minute 20
},
"grouped_data": [{ "eventID": "klm", "date": ISODate("2020-11-05T12:28:11.790Z") }],
"count": 1
}
What I am actually looking for is grouping them in 10 minutes(or whatever is needed) incremental intervals. Eg: 0-9, 1-10, 2-11, etc. instead of 0-9, 10-19, 20-29 etc.
Edit:
The end goal here is to check if a count threshold is surpassed on a interval length defined by the user.
If user asks "Are there more than 2 events on a 10minute time window?", based on the sample data above and my current solution, the condition is not met. (1 event in 0-9 interval, and 1 event in 10-19). With incremental intervals I should be able to find that there are indeed 2 events in 10 minutes, but in the time interval 5-14. Eg:
{
"_id": {
*whatever logic for grouping in 10minutes window*
},
"grouped_data": [
{ "eventID": "abc", "date": ISODate("2020-11-05T12:05:11.790Z") },
{ "eventID": "xyz", "date": ISODate("2020-11-05T12:12:11.790Z") }],
"count": 2
},
{
"_id": {
*whatever logic for grouping in 10minutes window*
},
"grouped_data": [
{ "eventID": "klm", "date": ISODate("2020-11-05T12:28:11.790Z") }]
"count": 1
},

For me it is not clear which output you like to get, but this aggregation pipeline makes the sliding-window group:
db.collection.aggregate([
{
$group: {
_id: null,
data: { $push: "$$ROOT" },
min_date: { $min: "$date" },
max_date: { $max: "$date" }
}
},
{
$addFields: {
interval: {
$range: [
{ $toInt: { $divide: [{ $toLong: "$min_date" }, 1000] } },
{ $toInt: { $divide: [{ $toLong: "$max_date" }, 1000] } },
10 * 60]
}
}
},
{
$set: {
interval: {
$map: {
input: "$interval",
in: { $toDate: { $multiply: ["$$this", 1000] } }
}
}
}
},
{ $unwind: "$interval" },
{
$project: {
grouped_data: {
$filter: {
input: "$data",
cond: {
$and: [
{ $gte: ["$$this.date", "$interval"] },
{ $lt: ["$$this.date", { $add: ["$interval", 1000 * 60 * 10] }] },
]
}
}
},
interval: 1
}
}
])
Boundaries are given by input data, however can also use fixes dates:
db.collection.aggregate([
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$addFields: {
interval: {
$range: [
{ $toInt: { $divide: [{ $toLong: ISODate("2020-01-01T00:00:00Z") }, 1000] } },
{ $toInt: { $divide: [{ $toLong: ISODate("2020-12-31T23:59:59Z") }, 1000] } },
10 * 60]
}
}
},
{
$set: {
interval: {
$map: {
input: "$interval",
in: { $toDate: { $multiply: ["$$this", 1000] } }
}
}
}
},
{ $unwind: "$interval" },
{
$project: {
grouped_data: {
$filter: {
input: "$data",
cond: {
$and: [
{ $gte: ["$$this.date", "$interval"] },
{ $lt: ["$$this.date", { $add: ["$interval", 1000 * 60 * 10] }] },
]
}
}
},
interval: 1
}
}
])

I will try to answer my own question, maybe it will help other people on the internet. The solution I came up with is based on the answer of #Wernfried (thanks!).
db.getCollection("events_en").aggregate([
{
$match: { eventID: "XYZ" }
},
{
$group: {
_id: null,
events: { $push: "$$ROOT" },
limit: { $push: { $toDate: { $add: [{ $toLong: "$date" }, 1000 * 60 * 10] } } }
}
},
{ $unwind: "$limit" },
{
$project: {
events: {
$filter: {
input: "$events",
cond: {
$and: [
{ $lt: ["$$this.date", "$limit"] },
{ $gte: ["$$this.date", { $subtract: ["$limit", 1000 * 60 * 10] }] },
]
}
}
},
limit: 1,
}
},
{
$addFields: {
count: {
$size: "$events"
}
}
}
])
This will create a limit for each event, based on its date + 10 minutes (or whatever). And afterwards it filters the events (which are now duplicated for each of the limit using $unwind: "$limit"), based on that limit. The result is something like this:
{
"_id" : null,
"limit" : ISODate("2020-11-05T12:28:27.000+0000"),
"events" : [
{
"_id" : 13,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:18:27.000+0000")
},
{
"_id" : 63,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:19:55.000+0000")
},
............................
{
"_id" : 90,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:27:57.000+0000")
}
],
"count" : 5
}
{
"_id" : null,
"limit" : ISODate("2020-11-05T12:29:55.000+0000"),
"events" : [
{
"_id" : 63,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:19:55.000+0000")
},
{
"_id" : 90,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:27:57.000+0000")
},
{
"_id" : 97,
"eventID" : "XYZ",
"date" : ISODate("2020-11-05T12:29:36.000+0000")
}
],
"count" : 3
}
As you can see, looking at the limit of each group and at the dates of the events in each group, these intervals are now incremental, not disjoint. (event X is found in multiple groups, as long as it doesnt exceeds the time interval of 10minutes)

Get objects containing max values for multiple fields using aggregation in mongodb

I want to fetch the documents having highest value for a list of specifics fields. I don't know if it's possible in only one request.
Consider below data:
_id:1, kills:12, deaths:6, assists:1
_id:2, kills:2, deaths:2, assists:22
_id:3, kills:1, deaths:2, assists:3
_id:4, kills:0, deaths:23, assists:4
_id:5, kills:6, deaths:3, assists:5
_id:6, kills:7, deaths:1, assists:6
Answer should be something like
maxKills: { _id:1, kills:12, deaths:6, assists:1 },
maxDeaths: { _id:4, kills:0, deaths:23, assists:4 },
maxAssists: { _id:2, kills:2, deaths:2, assists:22 },
I have tried several queries, but I can't get the whole objects containing the max values.
db.coll.aggregate([{
$group: {
_id: null,
kills: { $max: "$stats.kills" },
deaths: { $max: "$stats.deaths" },
assists: { $max: "$stats.assists" },
}
}])
For example here I have all the max values I want but I don't get the whole matches Objects.
---- UPDATE ----
With this answer https://stackoverflow.com/a/33361913/9188650, I've made it works but I receive data in a not really user friendly way.
{
"$group": {
"_id": null,
"maxKills": { "$max": "$stats.kills" },
"maxDeaths": { "$max": "$stats.deaths" },
"maxAssists": { "$max": "$stats.assists" },
"matches": {
"$push": {
"champion": "$champion",
"gameId": "$gameId",
"kills": "$stats.kills",
"deaths": "$stats.deaths",
"assists": "$stats.assists",
}
}
}
},
{
"$project": {
"_id": 0,
"maxKills": 1,
"maxDeaths": 1,
"maxAssists": 1,
"matches": {
"$setDifference": [
{
"$map": {
"input": "$matches",
"as": "match",
"in": {
$switch: {
branches: [
{ case: { $eq: ["$maxKills", "$$match.kills"] }, then: "$$match" },
{ case: { $eq: ["$maxDeaths", "$$match.deaths"] }, then: "$$match" },
{ case: { $eq: ["$maxAssists", "$$match.assists"] }, then: "$$match" },
],
default: false
}
}
}
},
[false]
]
}
}
}
It will returns:
{
"maxKills": 25,
"maxDeaths": 20,
"maxAssists": 39,
"matches": [
{
"champion": {
"id": 145,
"name": "Kai'Sa",
},
"gameId": 4263819967,
"kills": 25,
"deaths": 3,
"assists": 16
},
{
"champion": {
"id": 8,
"name": "Vladimir",
},
"gameId": 4262731529,
"kills": 8,
"deaths": 20,
"assists": 3
},
{
"champion": {
"id": 22,
"name": "Ashe",
},
"gameId": 4340383097,
"kills": 9,
"deaths": 7,
"assists": 39
},
{
"champion": {
"id": 23,
"name": "Tryndamere",
},
"gameId": 4352236936,
"kills": 25,
"deaths": 6,
"assists": 22
}
]
}
My last issue are cases when multiple objects have the same max value (as the example above, 2 matches have 25 kills). I only want the oldest one in these cases.

You can do it easier by using $filter and $arrayElemAt after $group stage:
db.collection.aggregate([
{
$group: {
_id: null,
maxKills: { $max: "$kills" },
maxDeaths: { $max: "$deaths" },
maxAssists: { $max: "$assists" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
_id: 0,
maxKills: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.kills", "$maxKills" ] } } }, 0 ] },
maxDeaths: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.deaths", "$maxDeaths" ] } } }, 0 ] },
maxAssists: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.assists", "$maxAssists" ] } } }, 0 ] }
}
}
])
Mongo Playground

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

How to group data by every hour - mongodb

Related

Get current state from snapshot documents - mongoDB

MongoDB - How to bring age group data

Check if current date between two dates in french MongoDB aggregation

Is it possible to group (aggregate) objects with dates into incremental intervals in MongoDB?

Get objects containing max values for multiple fields using aggregation in mongodb

Categories

Resources