Mongodb aggregate $group and count for date ranges - mongodb

I have documents like these:
{
"_id" : ObjectId("5cc80389c723e046f504b5a9"),
"adddress" : "string",
"checkIn" : "2019-04-30T08:12:57.909Z"
},
{
"_id" : ObjectId("5cc995f5a6f3eb7c483b019f"),
"adddress" : "string",
"checkIn" : "2019-05-01T12:49:57.561Z"
}
I have tried aggrgation like this:
var start = new Date("2019-04-30T08:12:57.909Z");
var end = new Date("2019-05-01T12:49:57.561Z");
var pipeline = [
{
$match: {
checkIn: {
$gte: start,
$lte: end
}
}
},
{
$group: {
_id: {
year: {
$year: "$checkIn"
},
month: {
$month: "$checkIn"
},
day: {
$dayOfYear: "$checkIn"
}
},
count: {
$sum: 1
}
}
}];
db.collections.aggregate(pipeline).toArray()
Is it possible to count them by checkIn date and get result like this:
"_id": [{
"checkIn": "2019-03-15T00:00:00Z",
"count": 4
}, {
"checkIn": "2019-04-30T00:00:00Z",
"count": 1
}, {
"checkIn": "2019-05-10T00:00:00Z",
"count": 1
}],

The result is shown the total number of the day.
{$project: {
checkIn: { $dateToString: { format: '%Y-%m-%d', date: '$checkIn' } }
}},
{$group: {
_id: '$checkIn',
checkIn: {$first: '$checkIn'},
count: {$sum: 1}
}},
{$sort: {checkIn: 1}}

Try this: I have tested this query and its working.
db.sample.aggregate([{
$addFields: {
date: {
$dateFromString: {
dateString: "$checkIn"
}
}
}
},{
$match: {
date: {
$gte: start,
$lte: end
}
}
},
{
$addFields: {
dateString: {
$dateToString: {
format: "%Y-%m-%d",
date: "$date"
}
}
}
},
{
$group: {
_id: "$dateString",
count: {
$sum: 1
}
}
}
]);

Related

How to calculate average records per month?

My records like this [{ createdAt }, {createdAt}, {createdAt} ]
I need average records per month.
january => 3 records
february => 2 records etc..
You can try to $group by month and year when counting and by month when averaging:
db.collection.aggregate([
{
$group: {
_id: {
month: {
$month: "$createdAt"
},
year: {
$year: "$createdAt"
},
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: {
month: "$_id.month"
},
average: {
$avg: "$count"
}
}
},
{
$project: {
_id: 0,
month: "$_id.month",
average: 1
}
}
])
Link to playground
Not fully clear what you mean by "average records per month" but I think it would be this:
db.collection.aggregate([
{
$group: {
_id: {
$dateTrunc: {
date: "$createdAt",
unit: "month"
}
},
count: { $count: {} }
}
},
{
$group: {
_id: null,
data: { $push: { k: { $toString: { $month: "$_id" } }, v: "$count" } }
}
},
{ $replaceWith: { $arrayToObject: "$data" } }
])
Getting the month name is not so easy, either you use a external library or build your own with $switch

How to set mongodb aggregate default value?

I need to assign a default value of zero on days with zero repair, but this is the result.
[
{ day: 21, month: 10, year: 2022, count: 2 },
{ day: 28, month: 10, year: 2022, count: 1 },
{ day: 24, month: 10, year: 2022, count: 2 }
]
I just need to access the weekly repair data, 0 should be the default on non-repair days
const result = await Repair.aggregate([
{
$match: {
createdDate: {
$gte: new Date(fromDate),
$lte: new Date(toDate),
},
},
},
{
$group: {
_id: {
day: "$day",
year: "$year",
month: "$month",
},
count: {
$sum: 1,
},
},
},
{
$project: {
_id: 0,
day: "$_id.day",
month: "$_id.month",
year: "$_id.year",
count: "$count",
},
},
]);
Without valid sample input data, it is difficult to give exact solution, but would be like this one:
db.collection.aggregate([
{
$match: {
createdDate: {
$gte: new Date(fromDate),
$lte: new Date(toDate),
},
},
},
{
$group: {
_id: { $dateTrunc: { date: "$createdDate", unit: "day" } },
count: { $sum: 1 },
},
},
{ $set: { createdDate: "$_id" } },
{
$densify: {
field: "createdDate",
range: {
step: 1,
unit: "day",
bounds: "full"
}
}
},
{
$fill: {
sortBy: { createdDate: 1 },
output: { count: { value: 0 } }
}
}
]);
Mongo Playground
Update
With MongoDB version 5 the code is a bit more complex. Would be this one:
db.collection.aggregate([
{
$match: {
createdDate: {
$gt: new Date("2022-10-23T00:00:00.000Z"),
$lt: new Date("2022-10-30T00:00:00.000Z")
}
}
},
{
$facet: {
repairs: [
{
$group: {
_id: { $dateTrunc: { date: "$createdDate", unit: "day" } },
count: { $count: {} }
}
},
{
$project: {
date: "$_id",
count: "$count",
_id: 0
}
}
]
}
},
{
$set: {
allDays: {
$range: [
0,
{
$add: [
{
$dateDiff: {
startDate: { $min: "$repairs.date" },
endDate: { $max: "$repairs.date" },
/*
or
startDate: new Date("2022-10-23T00:00:00.000Z"),
endDate: new Date("2022-10-30T00:00:00.000Z"),
*/
unit: "day",
}
},
1
]
}
]
}
}
},
{
$set: {
allDays: {
$map: {
input: "$allDays",
in: {
$dateAdd: {
startDate: { $min: "$repairs.date" },
unit: "day",
amount: "$$this"
}
}
}
}
}
},
{
$project: {
repairs: {
$map: {
input: "$allDays",
as: "day",
in: {
$mergeObjects: [
{ date: "$$day", count: 0 },
{
$first: {
$filter: {
input: "$repairs",
cond: {
$eq: [
"$$day",
"$$repairs.date"
]
},
as: "repairs"
}
}
}
]
}
}
}
}
},
{
$project: {
repairs: {
$map: {
input: "$repairs",
in: "$$this.count"
}
}
}
}
])
Mongo Playground
The result cannot be simple [ 2, 0, 0, 0, 1, 2 ], the result is always a JSON document, i.e. field and values. But you can do
db.collection.aggregate([...]).toArray().shift().repairs

MongoDB - get datewise/houlty aggregate count of column

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference
[
{
"_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
"status":"INTERNAL_UPDATE",
"conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
"messageIds":[],
"messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
"responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-04T11:40:06.552Z",
"source":{}
},
{
"_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
"status":"NEW",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
"messageIds":[],
"messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
"responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:30.897Z",
"source":{}
},
{
"_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
"status":"INTERNAL_UPDATE",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
"messageIds":[],
"messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
"responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:31.418Z",
"source":{}
},
{
"_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
"status":"NEW",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
"responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.174Z",
"source":{}
},
{
"_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
"status":"INTERNAL_UPDATE",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
"responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.736Z",
"source":{}
},
{
"_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
"status":"NEW",
"conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
"messageIds":[],
"messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
"responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T13:37:55.060Z",
"source":{}
}
]
Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.
Below is the query for same
db.documentName.aggregate([
{
'$match': {
'$and': [
{
timestamp: {
'$gte': ISODate('2021-05-01T00:00:00.000Z'),
'$lte': ISODate('2021-05-10T23:59:59.999Z')
}
},
{ 'source.author': { '$regex': 'user', '$options': 'i' } },
{},
{}
]
}
},
{ '$group': {
_id: {'conversationId': '$conversationId'} },
{ '$count': 'document_count' }
])
I have tried adding something like, $hour: '$timestamp' with comma separation beside conversationId in $group, but its of no use and is giving error.
The desired result I am trying to get for above data is, something like this
[{"date": "2021-05-04", "doc_count": 1},
{"date": "2021-05-05", "doc_count": 2},
{"date": "2021-05-06", "doc_count": 2}]
As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.
The question is not entirely clear to me, but it sounds like you want something like this:
The groupId is a field to rebuild the date including the hour, or not, according to your condition:
EDIT:
db.collection.aggregate([
{$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {$year: "$timestamp"},
month: {$month: "$timestamp"},
day: {$dayOfMonth: "$timestamp"},
hour: {$cond: [
{$gte: [
{$dateDiff: {
startDate: ISODate("2021-05-01T00:00:00.000Z"),
endDate: ISODate("2021-05-07T23:59:59.999Z"),
unit: "day"}}, 1]},
0,
{$hour: "$timestamp"}]}
}
}
}
},
{$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
{$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
{$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
])
See how it works on the playground example
As suggested by #nimrodserok, for mongo version 4.2.9 the query would be
db.collection.aggregate([
{
$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{
$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {
$year: "$timestamp"
},
month: {
$month: "$timestamp"
},
day: {
$dayOfMonth: "$timestamp"
},
hour: {
$cond: [
{
$gte: [
{
$subtract: [
{
$toLong: ISODate("2021-05-07T23:59:59.999Z")
},
{
$toLong: ISODate("2021-05-01T00:00:00.000Z")
}
]
},
86400000
]
},
0,
{
$hour: "$timestamp"
}
]
}
}
}
}
},
{
$group: {
_id: {
conversationId: "$conversationId",
groupId: "$groupId"
}
}
},
{
$group: {
_id: "$_id.groupId",
doc_count: {
$sum: 1
}
}
},
{
$project: {
date: {
$toString: "$_id"
},
doc_count: 1,
_id: 0
}
}
])

MongoDB Aggregate Query, Logins Averages

let pipeline = [{
$match: {
time: { $gt: 980985600 },
user_id: mongoose.Types.ObjectId("60316a2e7641bd0017ced7b1")
}
},
{
$project: {
newDate: { '$toDate': "$time" },
user_id: '$user_id'
}
},
{
$group: {
_id: { week: { $week: "$newDate" }, year: { $year: "$newDate" }},
count: { $sum: 1 }
}
}]
I am currently trying to perform an aggregate through mongoose to find the average logins per week for a specific user. So far I have been able to get to the total number of logins each week, but was curious if there was a way to find the average of these final groupings within the same function. How would I go about doing this?
Just add one last stage to your query:
{
$group: {
_id: null,
avg: { $avg: "$count" }
}
}
So try this:
let pipeline = [
{
$match: {
time: { $gt: 980985600 },
user_id: mongoose.Types.ObjectId("60316a2e7641bd0017ced7b1")
}
},
{
$project: {
newDate: { '$toDate': "$time" },
user_id: '$user_id'
}
},
{
$group: {
_id: { week: { $week: "$newDate" }, year: { $year: "$newDate" } },
count: { $sum: 1 }
}
},
{
$group: {
_id: null,
avg: { $avg: "$count" }
}
}
];

How to get sum of counted records using group by in mongodb?

I am trying to get the sum of count which I get from group, match. How can I get the same.
I have this code...
VisitorCompany.aggregate(
[
{
$match: {
$and:[
{ entry_date: { $gt: start, $lt: end } }
]
}
},
{
$group:
{
_id:
{
day: { $dayOfMonth: "$entry_date" },
month: { $month: "$entry_date" },
year: { $year: "$entry_date" }
},
count: { $sum:1 },
entry_date: { $first: "$entry_date" }
}
},
{
$project:
{
entry_date:
{
$dateToString: { format: "%Y-%m-%d", date: "$entry_date" }
},
count: 1,
_id: 0
}
},
{ $sort : { entry_date : -1 } },
])
and the output is ...
{
"count": 2,
"entry_date": "2018-12-12"
},
{
"count": 1
"entry_date": "2018-12-11"
}
Is anyone have idea that how to get sum of count i.e. 3 (2+1), means total number of records before group. thanks in advance.
Below modified query of yours will be giving you the sum of count, I have just added the
$group:{_id:"", sum:{$sum: "$count"}}}
to the existing aggregation pipeline query
Modified query
VisitorCompany.aggregate(
[
{
$match: {
$and:[
{ entry_date: { $gt: start, $lt: end } }
]
}
},
{
$group:
{
_id:
{
day: { $dayOfMonth: "$entry_date" },
month: { $month: "$entry_date" },
year: { $year: "$entry_date" }
},
count: { $sum:1 },
entry_date: { $first: "$entry_date" }
}
},
{
$project:
{
entry_date:
{
$dateToString: { format: "%Y-%m-%d", date: "$entry_date" }
},
count: 1,
_id: 0
}
},
{ $sort : { entry_date : -1 } },
{$group:{_id:"", sum:{$sum: "$count"}}}
])
The result
{ "_id" : "", "sum" : 3 }