Mongodb Aggregations - Group by date including condition - mongodb

I have a series of documents gathered by aggregation grouping. This is the result for one document:
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
},
{
"page": "/",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 3,
"createdAt": "2022-10-19T11:35:44.920Z"
},
I want to get this (in this case 2 documents as the time difference between array position 2 and 3 is greater than 2 hours):
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "39",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
}
],
"visitedPages": 2,
},
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 1,
},
I want to get a new grouping document if the time between an array position and the following array position is greater than 2 hours. On the last array position it show always show "2".
I tried $divide and $datediff. But this is not possible on the group stage as it's an unary operator. An approach I tried is to calculate the sum of start and end time by dividing. But how to execute this on an array level on the group stage? Maybe someone could point me in the right direction if possible at all?

You can group and then reduce, but another option is to use $setWindowFields to calculate your grouping index before grouping:
db.collection.aggregate([
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {prevtime: {
$push: "$time.time",
window: {documents: [-1, "current"]}
}}
}},
{$addFields: {
minutesDiff: {
$toInt: {
$dateDiff: {
startDate: {$first: "$prevtime"},
endDate: {$last: "$prevtime"},
unit: "minute"
}
}
}
}},
{$addFields: {deltaIndex: {$cond: [{$gt: ["$minutesDiff", 120]}, 1, 0]}}},
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {
groupIndex: {
$sum: "$deltaIndex",
window: {documents: ["unbounded", "current"]}
},
duration: {
$push: "$minutesDiff",
window: {documents: ["current", 1]}
}
}
}
},
{$set: {
duration: {
$cond: [
{$and: [
{$eq: [{$size: "$duration"}, 2]},
{$lte: [{$last: "$duration"}, 120]}
]},
{$last: "$duration"},
2
]
}
}},
{$group: {
_id: {ip: "$ip", myDate: "$date", groupIndex: "$groupIndex"},
date: {$first: "$date"},
allVisitedPages: {$push: {page: "$page", time: "$time", duration: "$duration"}},
visitedPages: {$sum: 1}
}},
{$unset: "_id.groupIndex"}
])
See how it works on the playground example

Related

mongodb set/update aggregated unwound fields

I have a document from mongo database looking like this:
{
"_id": "00000001",
"category": "Weather",
"city": "Salt Lake City",
"date": {
"$date": {
"$numberLong": "1663236000000"
}
},
"logs": {
"2022-09-14 12:00:00": {
"temp": 55,
"humidity": 25
},
"2022-09-14 14:00:00": {
"temp": 65,
"humidity": 35
}
}
}
I am trying to query it and have it look like this:
{
"_id": "00000001",
"category": "Weather",
"city": "Salt Lake City",
"date": {
"$date": {
"$numberLong": "1663236000000"
}
},
"2022-09-14 12:00:00": "55, 25",
"2022-09-14 14:00:00": "65, 35"
}
Currently my application query looks like:
collection.aggregate(
[{
$match: {
_id: {
$exists: true
}
}
},
{
$unwind: "$logs"
},
{
$addFields: {
"series._id": "$_id",
"series.category": "$category",
"series.city": "$city",
"series.date": "$date",
}
},
{
$replaceRoot: {
newRoot: "$logs"
},
}
])
which results in:
{
"_id": "00000001",
"category": "Weather",
"city": "Salt Lake City",
"date": {
"$date": {
"$numberLong": "1663236000000"
}
},
"2022-09-14 12:00:00": {
"temp": 55,
"humidity": 25
},
"2022-09-14 14:00:00": {
"temp": 65,
"humidity": 35
}
}
My problem is that the logs will add a new field every n hours, so the field names will be dynamic. I need to set/update the values for the unwound fields from objects to a string representation. How can I set/update field values for fields generated through $unwind aggregation like the example?
When field names are dynamic, one option is to use $objectToArray:
db.collection.aggregate([
{$match: {_id: {$exists: true}}},
{$set: {logs: {$objectToArray: "$logs"}}},
{$set: {logs: {
$map: {
input: "$logs",
in: {
k: "$$this.k",
v: {$concat: [
{$toString: "$$this.v.temp"},
", ",
{$toString: "$$this.v.humidity"}
]
}
}
}
}
}
},
{$set: {logs: {$arrayToObject: "$logs"}}}
])
See how it works on the playground example
BTW, $unwind is for arrays, not for objects, hence the comment by #CharchitKapoor.
Building off of #nimrod serok's answer, still needed to flatten the logs field in my case. Used mergeObjects to flatten the field into the root document, and then used unset to remove the original field. This probably isn't the best way to do this but it is working for me. Thanks
[{$match: {_id: {$exists: true}}},
{$set: {logs: {$objectToArray: "$logs"}}},
{$set: {logs: {
$map: {
input: "$logs",
in: {
k: "$$this.k",
v: {$concat: [
{$toString: "$$this.v.temp"},
", ",
{$toString: "$$this.v.humidity"}
]
}
}
}
}
}
},
{$replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", {$arrayToObject: "$logs"}] } } },
{$unset: "logs"}
]

Mongodb pipeline on parse server document add pointer field with $lookup

To be honest I really know sql but I'm kind of new to mongodb noSql so I'm a bit lost.
I have made a pipeline that's just working fine.
The point was to group by day and mindmapId to count number of user viewed it and sum watching time and save it into a collection in order to make request on it after.
here's sample of data
MindMap
{
"_id": "Yg5uGI3Iy0",
"data": {
"id": "root",
"topic": "Main topic",
"expanded": true
},
"theme": "orange",
"_p_author": "_User$zqPzSKD7EM",
"_created_at": {
"$date": {
"$numberLong": "1658497264836"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1661334292749"
}
}
}
MindmapView
{
"_id": "qWR6HVIcvT",
"startViewDate": {
"$date": {
"$numberLong": "1658669095261"
}
},
"_p_user": "_User$VnrxG9gABO",
"_p_mindmap": "MindMap$Yg5uGI3Iy0",
"_created_at": {
"$date": {
"$numberLong": "1658669095274"
}
},
"_updated_at": {
"$date": {
"$numberLong": "1658669095274"
}
}
}
Pipeline
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
}
}
}]
pipeline results
[{
"_id": {
"day": "2022-08-01",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 21
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 13
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "7YlZ6FPwiD"
},
"total": 1,
"watchTime": 3
},{
"_id": {
"day": "2022-08-01",
"mindmapId": "YXa8omyChc"
},
"total": 2,
"watchTime": 1306837
},{
"_id": {
"day": "2022-07-25",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 7
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60
},{
"_id": {
"day": "2022-08-06",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 0
},{
"_id": {
"day": "2022-08-11",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 69
},{
"_id": {
"day": "2022-08-10",
"mindmapId": "oGCQDQmaNK"
},
"total": 1,
"watchTime": 4
},{
"_id": {
"day": "2022-08-15",
"mindmapId": "Yg5uGI3Iy0"
},
"total": 1,
"watchTime": 9
},
...
]
However to exploit this data faster I need to include the mindmap author inside the result collection.
The point is to group by day and mindmapId to count number of user viewed it and sum watching time and get the mindmap author and save it into a collection.
To do that I need to use $lookup but the result is kind of messy and the lookup act like a full join in sql. I've tried so much combination before this post.
Here's what I have mainly tried
[{
$group: {
_id: {
day: {
$dateToString: {
format: '%Y-%m-%d',
date: '$startViewDate'
}
},
mindmapId: {
$substr: [
'$_p_mindmap',
8,
-1
]
}
},
watchTime: {
$sum: {
$dateDiff: {
startDate: '$_created_at',
endDate: '$_updated_at',
unit: 'second'
}
}
},
uniqueCount: {
$addToSet: '$_p_user'
}
}
}, {
$lookup: {
from: 'MindMap',
localField: '_objectId',
foreignField: '_id.mindmapId',
as: 'tempMindmapPointer'
}
}, {
$unwind: '$tempMindmapPointer'
}, {
$match: {
'tempMindmapPointer._id': '_id.mindmapId'
}
}, {
$project: {
_id: 1,
total: {
$size: '$uniqueCount'
},
watchTime: {
$sum: '$watchTime'
},
author: {
$substr: [
'$tempMindmapPointer._p_author',
6,
-1
]
}
}
}]
the $match doesn't work here it make me have no results
If I remove $match it act like a full join user list with mindmap id list which I don't want
[{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "VnrxG9gABO"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "zqPzSKD7EM"
},{
"_id": {
"day": "2022-08-17",
"mindmapId": "YXa8omyChc"
},
"total": 1,
"watchTime": 60,
"author": "x6kNvG2O0X"
},...
]
I have tried to switch localField: '_objectId' foreignField:'_id.mindmapId' values.
I have also tried to make the lookup first and group by id{day,mindmapId,authorId} but I have never been able to make this compiling.
What could I do to make this request working ? I'm sure there is something to do with $match and $lookup
If I understand you correctly (since you didn't add the requested result), the simple option is:
db.MindmapView.aggregate([
{$group: {
_id: {
day: {$dateToString: {format: "%Y-%m-%d", date: "$startViewDate"}},
mindmapId: {$substr: ["$_p_mindmap", 8, -1]}
},
watchTime: {
$sum: {
$dateDiff: {startDate: "$_created_at", endDate: "$_updated_at", unit: "second"}
}
},
uniqueCount: {$addToSet: "$_p_user"}
}
},
{$project: {_id: 1, total: {$size: "$uniqueCount"}, watchTime: 1}},
{$lookup: {
from: "MindMap",
localField: "_id.mindmapId",
foreignField: "_id",
as: "author"
}
},
{$set: {author: {$first: "$author._p_author"}}}
])
See how it works on the playground example.
There is another option that may be a little more efficient, which is using the '$lookup' with a pipeline, to bring only the author from the MindMap collection instead of bringing the entire document and then filter it.
In this case the $lookup stage will be:
{
$lookup: {
from: "MindMap",
let: {id: "$_id.mindmapId"},
pipeline: [
{$match: {$expr: {$eq: ["$$id", "$_id"]}}},
{$project: {_p_author: 1, _id: 0}}
],
as: "author"
}
}

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

How to group data by every hour

How do I get counts data grouped by every hour in 24 hours even if data is not present i.e. IF 0 will select 0
MonogDB 3.6
Input
[
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-03T20:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7a"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-04T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedae"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedad"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-06T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedab"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-06T10:45:36.208Z",
"type": "image"
}
]
Implementation
db.collection.aggregate({
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
},
},
{
$project: {
_id: 0,
hour: "$_id",
count: "$count"
}
},
{
$sort: {
hour: 1
}
})
Actual Output:
{
"count": 2,
"hour": "02"
},
{
"count": 1,
"hour": "20"
}
My expectation code show 24 hours event data is 0 or null and convert from example "02" as "02 AM" , "13" as "01 PM":
Expected Output
{
"count": 0,
"hour": "01" // 01 AM
},
{
"count": 2,
"hour": "02"
},
{
"count": 0,
"hour": "03"
},
{
"count": 0,
"hour": "04"
},
{
"count": 0,
"hour": "05"
},
{
"count": 1,
"hour": "20" // to 08 pm
}
Try this solution:
Explanation
We group by hour to count how many images are uploaded.
Then, we add extra field hour to create time interval (if you had v4.x, there is a better solution).
We flattern hour field (will create new documents) and split first 2 digits to match count and split last 2 digits to put AM / PM periods.
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
}
},
{
$addFields: {
hour: [
"0000",
"0101",
"0202",
"0303",
"0404",
"0505",
"0606",
"0707",
"0808",
"0909",
"1010",
"1111",
"1212",
"1301",
"1402",
"1503",
"1604",
"1705",
"1806",
"1907",
"2008",
"2109",
"2210",
"2311"
]
}
},
{
$unwind: "$hour"
},
{
$project: {
_id: 0,
hour: 1,
count: {
$cond: [
{
$eq: [
{
$substr: [
"$hour",
0,
2
]
},
"$_id"
]
},
"$count",
0
]
}
}
},
{
$group: {
_id: "$hour",
count: {
"$sum": "$count"
}
}
},
{
$sort: {
_id: 1
}
},
{
$project: {
_id: 0,
hour: {
$concat: [
{
$substr: [
"$_id",
2,
2
]
},
{
$cond: [
{
$gt: [
{
$substr: [
"$_id",
0,
2
]
},
"12"
]
},
" PM",
" AM"
]
}
]
},
count: "$count"
}
}
])
MongoPlayground
There's no "magic" solution, you'll have to hardcode it into your aggregation:
Heres an example using Mongo v3.2+ syntax with some $map and $filter magic:
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {"$in": [166]}
}
},
{
$group: {
_id: {$substr: ["$update_at", 11, 2]},
count: {"$sum": 1}
}
},
{
$group: {
_id: null,
hours: {$push: {hour: "$_id", count: "$count"}}
}
},
{
$addFields: {
hours: {
$map: {
input: {
$concatArrays: [
"$hours",
{
$map: {
input: {
$filter: {
input: ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"],
as: "missingHour",
cond: {
$not: {
$in: [
"$$missingHour",
{
$map: {
input: "$hours",
as: "hourObj",
in: "$$hourObj.hour"
}
}
]
}
}
}
},
as: "missingHour",
in: {hour: "$$missingHour", count: 0}
}
}
]
},
as: "hourObject",
in: {
count: "$$hourObject.count",
hour: {
$cond: [
{$eq: [{$substr: ["$$hourObject.hour", 0, 1]}, "0"]},
{$concat: ["$$hourObject.hour", " AM"]},
{
$concat: [{
$switch: {
branches: [
{case: {$eq: ["$$hourObject.hour", "13"]}, then: "1"},
{case: {$eq: ["$$hourObject.hour", "14"]}, then: "2"},
{case: {$eq: ["$$hourObject.hour", "15"]}, then: "3"},
{case: {$eq: ["$$hourObject.hour", "16"]}, then: "4"},
{case: {$eq: ["$$hourObject.hour", "17"]}, then: "5"},
{case: {$eq: ["$$hourObject.hour", "18"]}, then: "6"},
{case: {$eq: ["$$hourObject.hour", "19"]}, then: "7"},
{case: {$eq: ["$$hourObject.hour", "20"]}, then: "8"},
{case: {$eq: ["$$hourObject.hour", "21"]}, then: "9"},
{case: {$eq: ["$$hourObject.hour", "22"]}, then: "10"},
{case: {$eq: ["$$hourObject.hour", "23"]}, then: "11"},
],
default: "None"
}
}, " PM"]
}
]
}
}
}
}
}
},
{
$unwind: "$hours"
},
{
$project: {
_id: 0,
hour: "$hours.hour",
count: "$hours.count"
}
},
{
$sort: {
hour: 1
}
}
]);
A short explanation of the $addFields stage: we first add hours that we're missing, we then merge the two arrays (of the original found hours and the "new" missing hours), finally we convert to the required output ("01" to "01 AM").
If you're using Mongo v4+ I recommend you change the $group _id stage to use $dateFromString as its more consistent.
_id: {$hour: {$dateFromString: {dateString: "$update_at"}}}
If you do do that, you'll have to update the $filter and $map section to use numbers and not strings and eventually using $toString to cast into the format you want, hence the v4+ requirement.
You should store date values as Date objects instead of strings. I would do the formatting like this:
db.collection.aggregate(
[
{ $match: { ... } },
{
$group: {
_id: { h: { $hour: "$update_at" } },
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
hour: {
$switch: {
branches: [
{ case: { $lt: ["$_id.h", 10] }, then: { $concat: ["0", { $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 13] }, then: { $concat: [{ $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 22] }, then: { $concat: ["0", { $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } },
{ case: { $lt: ["$_id.h", 24] }, then: { $concat: [{ $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } }
]
}
},
hour24: "$_id.h",
count: 1
}
},
{ $sort: { hour24: 1 } }
])
As non-American I am not familiar with AM/PM rules, esp. for midnight and midday but I guess you get the principle.
Here is the query you can test it out, for MongoDB 4.0+
i will be improving query and update
const query = [{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: { $hour: "$update_at" },
count: {
"$sum": 1
}
},
},
{
$addFields: {
hourStr: { $toString: { $cond: { if: { $gte: ["$_id", 12] }, then: { $subtract: [12, { $mod: [24, '$_id'] }] }, else: "$_id" } } },
}
},
{
$project: {
formated: { $concat: ["$hourStr", { $cond: { if: { $gt: ["$_id", 12] }, then: " PM", else: " AM" } }] },
count: "$count",
hour: 1,
}
}]
If you want to output in Indian Time formate. then below code work!
const query = [
{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$project: {
"h": { "$hour": { date: "$update_at", timezone: "+0530" } },
}
},
{
$group:
{
_id: { $hour: "$h" },
count: { $sum: 1 }
}
}
];

How to get count of multiple fields based on value in mongodb?

Collection exists as below:
[
{"currentLocation": "Chennai", "baseLocation": "Bengaluru"},
{"currentLocation": "Chennai", "baseLocation": "Bengaluru"},
{"currentLocation": "Delhi", "baseLocation": "Bengaluru"},
{"currentLocation": "Chennai", "baseLocation": "Chennai"}
]
Expected Output:
[
{"city": "Chennai", "currentLocationCount": 3, "baseLocationCount": 1},
{"city": "Bengaluru", "currentLocationCount": 0, "baseLocationCount": 3},
{"city": "Delhi", "currentLocationCount": 1, "baseLocationCount": 0}
]
What I have tried is:
db.getCollection('users').aggregate([{
$group: {
"_id": "$baselocation",
baseLocationCount: {
$sum: 1
}
},
}, {
$project: {
"_id": 0,
"city": "$_id",
"baseLocationCount": 1
}
}])
Got result as:
[
{"city": "Chennai", "baseLocationCount": 1},
{"city": "Bengaluru", "baseLocationCount": "3"}
]
I'm not familiar with mongo, so any help?
MongoDB Version - 3.4
Neil Lunn and myself had a lovely argument over this topic the other day which you can read all about here: Group by day with Multiple Date Fields.
Here are two solutions to your precise problem.
The first one uses the $facet stage. Bear in mind, though, that it may not be suitable for large collections because $facet produces a single (potentially huge) document that might be bigger than the current MongoDB document size limit of 16MB (which only applies to the result document and wouldn't be a problem during pipeline processing anyway):
collection.aggregate(
{
$facet:
{
"current":
[
{
$group:
{
"_id": "$currentLocation",
"currentLocationCount": { $sum: 1 }
}
}
],
"base":
[
{
$group:
{
"_id": "$baseLocation",
"baseLocationCount": { $sum: 1 }
}
}
]
}
},
{ $project: { "result": { $setUnion: [ "$current", "$base" ] } } }, // merge results into new array
{ $unwind: "$result" }, // unwind array into individual documents
{ $replaceRoot: { newRoot: "$result" } }, // get rid of the additional field level
{ $group: { "_id": "$_id", "currentLocationCount": { $sum: "$currentLocationCount" }, "baseLocationCount": { $sum: "$baseLocationCount" } } }, // group into final result)
{ $project: { "_id": 0, "city": "$_id", "currentLocationCount": 1, "baseLocationCount": 1 } } // group into final result
)
The second one works based on the $map stage instead:
collection.aggregate(
{
"$project": {
"city": {
"$map": {
"input": [ "current", "base" ],
"as": "type",
"in": {
"type": "$$type",
"name": {
"$cond": {
"if": { "$eq": [ "$$type", "current" ] },
"then": "$currentLocation",
"else": "$baseLocation"
}
}
}
}
}
}
},
{ "$unwind": "$city" },
{
"$group": {
"_id": "$city.name",
"currentLocationCount": {
"$sum": {
"$cond": {
"if": { "$eq": [ "$city.type", "current" ] },
"then": 1,
"else": 0
}
}
},
"baseLocationCount": {
"$sum": {
"$cond": {
"if": { "$eq": [ "$city.type", "base" ] },
"then": 1,
"else": 0
}
}
}
}
}
)