Get sum of a column from mongodb along with the list of another column - mongodb

I have a collection "employees" with sample entries as below-
{
"_id" : ObjectId("62ccaa238a322322211"),
"employeeId" : "1234",
"date" : ISODate("2022-07-11T12:00:00.000+0000"),
"hours" : 15.0,
"createdBy" : "user1",
"createdDate" : ISODate("2023-02-19T21:54:27.213+0000"),
"updatedBy" : "user1",
"updatedDate" : ISODate("2023-02-19T21:54:27.213+0000"),
},
{
"_id" : ObjectId("62ccaa238a322388821"),
"employeeId" : "1234",
"date" : ISODate("2022-07-10T12:00:00.000+0000"),
"hours" : 25.0,
"createdBy" : "user1",
"createdDate" : ISODate("2023-02-19T22:54:27.213+0000"),
"updatedBy" : "user1",
"updatedDate" : ISODate("2023-02-19T22:54:27.213+0000"),
}
I am trying to get sum of hours for each employee along with the list of dates for those entries
{
employeeId :"1234"
hours : 40 // sum of the hours from both entries
dates : [2022-07-11, 2022-07-10] // list of `date` column
}
I tried below one but dont know how to adapt to get employeeId and sum
db.getCollection("employees").aggregate( [
{ $match : {
$and :[
{"employeeId" : "1234"},
{"date" : { $lte: new ISODate("2023-02-19") }}
]}},
{
$group:
{
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$date"
}
},
totalAmount: { $sum: "$hours" }
}
}
] )

You should group by employeeId instead of date.
db.collection.aggregate([
{
$group: {
_id: "$employeeId",
hours: {
$sum: "$hours"
},
dates: {
$push: {
$dateTrunc: {
date: "$date",
unit: "day"
}
}
}
}
}
])
Mongo Playground

Related

How to get the recent values of grouped result?

Below is the document which has an array name datum and I want to filter the records based on StatusCode, group by Year and sum the amount value from the recent record of distinct Types.
{
"_id" : ObjectId("5fce46ca6ac9808276dfeb8c"),
"year" : 2018,
"datum" : [
{
"StatusCode" : "A",
"Type" : "1",
"Amount" : NumberDecimal("100"),
"Date" : ISODate("2018-05-30T00:46:12.784Z")
},
{
"StatusCode" : "A",
"Type" : "1",
"Amount" : NumberDecimal("300"),
"Date" : ISODate("2023-05-30T00:46:12.784Z")
},
{
"StatusCode" : "A",
"Type" : "2",
"Amount" : NumberDecimal("420"),
"Date" : ISODate("2032-05-30T00:46:12.784Z")
},
{
"StatusCode" : "B",
"Type" : "2",
"Amount" : NumberDecimal("420"),
"Date" : ISODate("2032-05-30T00:46:12.784Z")
}
]
}
In my case following is the expected result :
{
Total : 720
}
I want to achieve the result in the following aggregate Query pattern
db.collection.aggregate([
{
$addFields: {
datum: {
$reduce: {
input: "$datum",
initialValue: {},
"in": {
$cond: [
{
$and: [
{ $in: ["$$this.StatusCode", ["A"]] }
]
},
"$$this",
"$$value"
]
}
}
}
}
},
{
$group: {
_id: "$year",
RecentValue: { $sum: "$datum.Amount" }
}
}
])
You can first $unwind the datum array. Do the filtering and sort by the date. Then get the record with latest datum by a $group. Finally do another $group to calculate the sum.
Here is a mongo playground for your reference.

Filter by nested arrays/objects values (on different levels) and $push by multiple level - MongoDB Aggregate

I have a document with multiple level of embedded subdocument each has some nested array. Using $unwind and sort, do sorting based on day in descending and using push to combine each row records into single array. This Push is working only at one level means it allows only one push. If want to do the same things on the nested level and retains the top level data, got "errmsg" : "Unrecognized expression '$push'".
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "1",
"salary" : 200,
},
{
"day" : "2",
"salary" : 201,
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "1",
"salary" : 300,
},
{
"day" : "2",
"salary" : 400,
}
]
}
],
}
Expected Result:
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "2",
"salary" : 201
},
{
"day" : "1",
"salary" : 200
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "2",
"salary" : 400
},
{
"day" : "1",
"salary" : 300
}
]
}
],
}
Just day will be sorted and remaining things are same
I have tried but it got unrecognized expression '$push'
db.employee.aggregate([
{$unwind: '$payroll'},
{$unwind: '$payroll.payrollDetails'},
{$sort: {'payroll.payrollDetails.day': -1}},
{$group: {_id: '$_id', payroll: {$push: {payrollDetails:{$push:
'$payroll.payrollDetails'} }}}}])
It requires two time $group, you can't use $push operator two times in a field,
$group by main id and payroll id, construct payrollDetails array
$sort by payroll id (you can skip if not required)
$group by main id and construct payroll array
db.employee.aggregate([
{ $unwind: "$payroll" },
{ $unwind: "$payroll.payrollDetails" },
{ $sort: { "payroll.payrollDetails.day": -1 } },
{
$group: {
_id: {
_id: "$_id",
pid: "$payroll._id"
},
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payrollDetails: { $push: "$payroll.payrollDetails" },
month: { $first: "$payroll.month" },
salary: { $first: "$payroll.salary" }
}
},
{ $sort: { "payroll._id": -1 } },
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payroll: {
$push: {
_id: "$_id.pid",
month: "$month",
salary: "$salary",
payrollDetails: "$payrollDetails"
}
}
}
}
])
Playground

Mongodb aggregate by day and delete duplicate value

I'm trying to clean a huge database.
Sample DB :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:22:31.254Z"),
"_id" : ObjectId("5a0062170f3c330012bafe77"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-06T13:32:41.084Z"),
"_id" : ObjectId("5a0064790f3c330012baff03"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff32"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
I have a lot of duplicate value but I need to clean only by day.
To obtain this for example :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
How can I aggregate by day and after delete last value duplicate?
I need to keep the values per day even if they are identical with another day.
The aggregation framework cannot update data at this stage. However, you can use the following aggregation pipeline in order to get the desired output and then use e.g. a bulk replace to update all your documents:
db.collection.aggregate({
$unwind: "$tracking" // flatten the "tracking" array into separate documents
}, {
$sort: {
"tracking.timeCheck": 1 // sort by timeCheck to allow us to use the $first operator in the next stage reliably
}
}, {
$group: {
_id: { // group by
"_id": "$_id", // "_id" and
"rank": "$tracking.rank", // "rank" and
"date": { // the "date" part of the "timeCheck" field
$dateFromParts : {
year: { $year: "$tracking.timeCheck" },
month: { $month: "$tracking.timeCheck" },
day: { $dayOfWeek: "$tracking.timeCheck" }
}
}
},
"doc": { $first: "$$ROOT" } // only keep the first document per group
}
}, {
$sort: {
"doc.tracking.timeCheck": 1 // restore ascending sort order - may or may not be needed...
}
}, {
$group: {
_id: "$_id._id", // merge everything again per "_id"
"addedAt": { $first: "$doc.addedAt" },
"__v": { $first: "$doc.__v" },
"check": { $first: "$doc.check" },
"lastCheck": { $first: "$doc.lastCheck" },
"tracking": { $push: "$doc.tracking" } // in order to join the tracking values into an array again
}
})

Aggregation query which returns other fields

I have data which looks like this.
{
"badgeId" : "ventura",
"date" : ISODate("2016-12-22T21:26:40.382+0000"),
"mistakes" : NumberInt(10)
}
{
"_id" : "a4usNGibIu",
"badgeId" : "dog",
"date" : ISODate("2016-12-21T21:26:40.382+0000"),
"mistakes" : NumberInt(10)
}
{
"_id" : ObjectId("580c77801d7723f3f7fe0e77"),
"badgeId" : "dog",
"date" : ISODate("2016-11-24T21:26:41.382+0000"),
"mistakes" : NumberInt(5)
}
I need documents grouped by badgeId where the mistakes is smallest and the corresponding date
I cannot use $min, $max, $first, $last on for the date in $group because I need the date from the row where mistakes is lowest.
I tried the following query where I am using $min, but it won't give the intended result as it will pick $min of the date
db.Badges.aggregate([
{
$match: otherMatchConditions
},
{
$group: {
_id: '$badgeId',
date: {
$min: '$date'
},
mistakes: {
$min: '$mistakes'
}
}
}
])
You may sort the results by mistakes and then take the corresponding $first of mistakes and date
[
{$sort: {mistakes: 1}},
{
$group: {
_id: '$badgeId',
date: {
$first: '$date'
},
mistakes: {
$first: '$mistakes'
}
}
}
]
I think your query is right but as you see dates are invalid here.
ISODate("2016-14-22T21:26:41.382+0000") and ISODate("2016-13-22T21:26:40.382+0000"),
in both dates you can see months is 13 and 14 , which is not valid month. Below are right data after putting in mongodb.
{
"_id" : ObjectId("580ca86c9a43fad551ba801f"),
"badgeId" : "ventura",
"date" : ISODate("2016-12-22T21:26:40.382Z"),
"mistakes" : 10
}
{
"_id" : "a4usNGibIu",
"badgeId" : "dog",
"date" : ISODate("2016-12-23T21:26:40.382Z"),
"mistakes" : 10
}
{
"_id" : ObjectId("580c77801d7723f3f7fe0e77"),
"badgeId" : "dog",
"date" : ISODate("2016-12-24T21:26:40.382Z"),
"mistakes" : 5
}
When applied your query
db.getCollection('COLLECTION_NAME').aggregate([
{
$group: {
_id: '$badgeId',
date: {
$min: '$date'
},
mistakes: {
$min: '$mistakes'
}
}
}
])
I got the below result.
{
"_id" : "dog",
"date" : ISODate("2016-12-23T21:26:40.382Z"),
"mistakes" : 5
}
{
"_id" : "ventura",
"date" : ISODate("2016-12-22T21:26:40.382Z"),
"mistakes" : 10
}

MongoDB aggregate Timezone for date add

I have a problem of MongoDB's aggregate of timezone is UTC. I have looked for solutions from many other existing issues, but it is still not working. My code as follows:
MongoDB version : 2.2
Data
{ "_id" : ObjectId("52a3c9df46c6a9627eeb0337"), "Counting" : { "id" : "b1a93dfda46c47848f9862031300d24c", "group" : "Salary", "user_id" : "4d4ad2d37a464ad09d9aca2fee4c760c", "subGroup" : "e–ae3?", "bank_id" : "97e0fecc322b49b48c4eb3c8425fea77", "fee" : 646, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-08-15T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.008Z"), "bank_name" : "9edb6897-cdb8-4ce4-8f08-f5792cfa83d9" } }
{ "_id" : ObjectId("52a3c9df46c6a9627eeb0338"), "Counting" : { "id" : "33b341fc71314daebe851397c5cbaa40", "group" : "Salary", "user_id" : "cb9e06649cf943e5b368f6b05fc126c6", "subGroup" : "e–ae3?", "bank_id" : "e8da8cdae3ae495ca76f873fb3460b6d", "fee" : 647, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-02-28T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.016Z"), "bank_name" : "6913b48a-1a95-48c5-81f5-6920031358d7"} }
{ "_id" : ObjectId("52a3c9df46c6a9627eeb033a"), "Counting" : { "id" : "f0d41ed9f29f47e7b68a05c378cf939d", "group" : "Salary", "user_id" : "847cadbf55f84615af3ee63922446b54", "subGroup" : "e–ae3?", "bank_id" : "f45d62b5e62f4b7fa8172870cd992f19", "fee" : 623, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-04-18T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.152Z"), "bank_name" : "30dd169e-723e-4748-93cd-2d7a45b4a3b7"} }
db.Product.aggregate([{
"$group": {
"_id": {
"tyear": {
"$year": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
"tMonth": {
"$month": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
"tDate": {
"$dayOfMonth": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
},
"count": {
"$sum": "$Counting.fee"
}
} }])
Error Message :
"errmsg" : "exception: $add does not support dates"
Reference
How to agregate by year-month-day on a different timezone
I'd recommend doing this in two-steps as a project then a group.
var millisecondsFromUTC = 8 * 60 * 60 * 1000; //PST is -8 hours from UTC
db.Product.aggregate([
{ $project : {
consumeDateLocal: {
$subtract : [ "$Counting.consumeDate", millisecondsFromUTC ]
},
fee: '$Counting.fee" } },
{ $group: {
_id: {
"tyear": { $year: "$consumeDateLocal" },
"tMonth": { "$month": "$consumeDateLocal" },
"tDate": { "$dayOfMonth": "consumeDateLocal" }
},
count: {
$sum: "$fee"
}
} } ], ...);
I do it like this.
millisecondsFromUTC = 8 * 3600 * 1000
Db.collection.aggreagte([
{$match: query},
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: {$add: ["$date", millisecondsFromUTC]}
},
click: {$sum: '$click'},
money: {$sum: {$divide: ['$money', 10000]}},
pv: {$sum: '$pv'},
req: {$sum: '$req'},
date: {$last: '$date'}
}
}]