How to count fields in Mongodb Aggregation - mongodb

I have a document with entries like this
{
"_id": ObjectId("5644c495d0807a1750043237"),
"siteid": "123456"
"amount": 1.32
}
Some documents have other amounts eg."cashbackAmount"
I want a sum and a count for each amount fields. Not every document contains all the amount fields.
I hjave tried the following
{
$group: {
"_id": "$siteid",
item2: { "$sum": "$amount" },
item3: { "$sum": "$totalAmount" },
item4: { "$sum": "$cashbackAmount" },
item5: { "$sum": "$unitPrice" },
}
}
It gives me the sum, but I cannot work out how to get the number times each amount field is present.
{ "$sum": 1 } does not work because that gives me all the documents that have any one of the totals fields.

I guess you probably want something like that
db.getCollection('amounts').aggregate([
{
$project: {
siteid: 1,
amount: 1,
totalAmount: 1,
unitPrice: 1,
cashbackAmount: 1,
amountPresent: {
$cond: {
if: "$amount",
then: 1,
else: 0
}
},
totalAmountPresent: {
$cond: {
if: "$totalAmount",
then: 1,
else: 0
}
},
cashbackAmountPresent: {
$cond: {
if: "$cashbackAmount",
then: 1,
else: 0
}
},
unitPricePresent: {
$cond: {
if: "$unitPrice",
then: 1,
else: 0
}
}
}
},
{
$group: {
"_id": "$siteid",
amountSum: { "$sum": "$amount" },
amountCount: { "$sum": "$amountPresent" },
totalAmountSum: { "$sum": "$totalAmount" },
totalAmountCount: { "$sum": "$totalAmountPresent" },
cashbackAmountSum: { "$sum": "$cashbackAmount" },
cashbackAmountCount: { "$sum": "$cashbackAmountPresent" },
unitPriceSum: { "$sum": "$unitPrice" },
unitPriceCount: { "$sum": "$unitPricePresent" }
}
}
])

If you know the amount fields in advance then you could do this in a single aggregation operation where you create the pipeline dynamically.
Check out the following demonstration:
var amountFields = ["amount", "totalAmount", "cashbackAmount", "unitPrice"],
groupOperator = { "$group": { "_id": "$siteid" } };
amountFields.forEach(function (field){
groupOperator["$group"][field+"Total"] = { "$sum": "$"+field };
groupOperator["$group"][field+"Count"] = {
"$sum": {
"$cond": [ { "$gt": [ "$"+field, null ] }, 1, 0 ]
}
};
});
db.test.aggregate([groupOperator]);
Populate Test Documents
db.test.insert([
{
"siteid": "123456",
"amount": 1.32
},
{
"siteid": "123456",
"cashbackAmount": 8.32
},
{
"siteid": "123456",
"cashbackAmount": 9.74
},
{
"siteid": "123456",
"unitPrice": 0.19
},
{
"siteid": "123456",
"amount": 27.8,
"totalAmount": 15.22,
"unitPrice": 5.10,
"cashbackAmount": 43.62
},
{
"siteid": "123456",
"unitPrice": 5.07
},
{
"siteid": "123456",
"amount": 12.98,
"totalAmount": 32.82
},
{
"siteid": "123456",
"amount": 6.65,
"unitPrice": 5.10
}
])
Sample Aggregation Output
{
"_id" : "123456",
"amountTotal" : 48.75,
"amountCount" : 4,
"totalAmountTotal" : 48.04,
"totalAmountCount" : 2,
"cashbackAmountTotal" : 61.68,
"cashbackAmountCount" : 3,
"unitPriceTotal" : 15.46,
"unitPriceCount" : 4
}

Related

MongoDB : group and count users by gender, civilStatus and professionalCategory

I have a collection of users, each user has a profile. I want to implement a query to make statistics on users.
This is my collection.
[
{
"_id": ObjectId("61d2db0d273a9076d630697b"),
"state": "VALIDATED",
"phone": "xxx",
"civilStatus": "SINGLE",
"gender": "MALE",
"professionalCategory": "STUDENT"
}
]
I want the result to contain an array of all genders of users in the database, and the number of users with each gender. same for civilStatus and professionalCategories
This is the result i am looking for :
{
"total": 2000
"validated": 1800,
"genders": [
{
"value": "MALE",
"count": 1200
},
{
"value": "FEMALE",
"count": 600
}
],
"civilStatus": [
{
"value": "SINGLE",
"count": "300"
}
...
],
"professionalCategories": [
{
"value": "STUDENT",
"count": "250"
}
...
]
}
I implemented the query, but I still have a few things that I don't know how to do.
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$push: "$gender"
},
civilStatus: {
$push: "$civilStatus"
},
professionalCategories: {
$push: "$professionalCategory"
}
}
}
])
This is the result of this query :
{
"total": 2000
"validated": 1800,
"genders": [
"MALE",
"MALE",
"FEMALE",
"MALE",
"FEMALE",
"FEMALE"
...
],
"civilStatus": [
"SINGLE",
"MARIED",
"SINGLE",
...
],
"professionalCategories": [
"STUDENT",
"WORKER",
"RETIRED"
...
]
}
I miss how to group each gender, civil Status and professional Category and calculate the number of users for each one.
I also tried this query, but I don't know how to complete the "count" field for each item of the array :
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$addToSet: {
value: "$gender",
count: {
//
}
}
},
civilStatus: {
$addToSet: {
value: "$civilStatus",
count: {
//
}
}
},
professionalCategories: {
$addToSet: {
value: "$professionalCategory",
count: {
//
}
}
},
}
}
])
if the query was to treat only one field, for example gender. it would have been easier with "unwind". but here I have 3 fields.
can someone help me please?
You can use following aggregation
Here is the code
db.collection.aggregate([
{
"$facet": {
"genders": [
{
"$group": {
"_id": "$gender",
"total": { $sum: 1 }
}
}
],
"civilStatus": [
{
"$group": {
"_id": "$civilStatus",
"total": { $sum: 1 }
}
}
],
"professionalCategory": [
{
"$group": {
"_id": "$professionalCategory",
"total": { $sum: 1 }
}
}
],
"validated": [
{
"$group": {
"_id": "$state",
"total": { "$sum": 1 }
}
}
]
}
},
{
$set: {
validated: {
"$filter": {
"input": "$validated",
"cond": {
"$eq": [ "$$this._id", "VALIDATED" ]
}
}
}
}
},
{
$set: {
validated: {
"$ifNull": [
{
"$arrayElemAt": [ "$validated", 0 ]
},
0
]
}
}
},
{
$set: { validated: "$validated.total" }
}
])
Working Mongo playground

Sort multiple levels of array after group in Mongo Java

I have documents with below schema
id :
currencyCode : "USD"
businessDayStartDate : ""
hourZoneNumber : 1
customerCount : 0
itemQuantity : 4
nodeId : "STORE_DEV"
endpointId : "998"
amount : 4
I am trying to find documents that match nodeId and trying to aggregate customerCount, itemQuantity and amount for each hourZoneNumber.
Below is the query
db.getCollection("xxx").aggregate([
{ "$match": { "nodeId": { "$in":["STORE_DEV_1","STORE_DEV_2"] }, "businessDayStartDate" : { "$gte": "2022-03-04" , "$lte": "2022-03-07" } }},
{ "$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
},
{ "$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$addToSet": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
}
}
},
{ "$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$addToSet": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
"total": {
"$addToSet": {
"customerCount": { "$sum": "$hourZones.customerCount" },
"itemQuantity" : { "$sum": "$hourZones.itemQuantity" },
"amount" : { "$sum": "$hourZones.amount" }
}
}
}
},
{
$project: {
_id: 0,
nodeId: "$_id",
endpoints: 1,
hourZones: 1,
total: 1
}
}
])
Output is as below:
{
nodeId: 'STORE_DEV_2',
endpoints: [
{ endpointId: '998',
hourZones:
[
{ hourZoneNumber: 1,
customerCount: 0,
itemQuantity: 4,
amount: Decimal128("4") }
] } ],
total: [ { customerCount: 0, itemQuantity: 4, amount: Decimal128("4") } ],
}
{
nodeId: 'STORE_DEV_1',
endpoints:
[ { endpointId: '999',
hourZones:
[ { hourZoneNumber: 2,
customerCount: 2,
itemQuantity: 4,
amount: Decimal128("4") },
{ hourZoneNumber: 1,
customerCount: 4,
itemQuantity: 8,
amount: Decimal128("247.56") } ] } ],
total:
[ { customerCount: 6,
itemQuantity: 12,
amount: Decimal128("251.56") } ]
}
I want the output to be sorted as : First sort by nodeId, then by endpointId within the endpoints and lastly by hourZoneNumber within hourZones.
How do I do this ? I tried using sort() with all the three fields. But it did not work. Also, can someone please confirm if there is any better way than the above code, as I am new to Mongo DB.
Edit:
Please find sample input data at https://mongoplayground.net/p/FYm3QMMgrNI
Since you already have the separated data at the beginning, it is simply a matter of saving these values through the grouping and then sorting by them in the end.
Edit: In order to sort each inner array, we use $push instead of $addToSet inside the $group and $sort before each $group:
db.collection.aggregate([
{
"$match": {
"nodeId": {"$in": ["STORE_DEV_TTEC", "STORE_DEV_TTEZ"]
},
"businessDayStartDate": {"$gte": "2022-03-04", "$lte": "2022-03-07"}
}
},
{
"$sort": {"nodeId": 1, "endpointId": 1, "hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
{"$sort": {"_id.hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$push": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
hourZoneKey: {$first: "$_id.hourZoneNumber"}
}
},
{"$sort": {"_id.endpointId": 1}
},
{
"$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$push": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
endpointKey: {$first: "$_id.endpointId"},
hourZoneKey: {$first: "$hourZoneKey"}
}
},
{"$sort": {"nodeId": 1, "endpointKey": 1, "hourZoneKey": 1}
},
{
$project: {_id: 0, nodeId: "$_id", endpoints: 1, hourZones: 1, total: 1}
}
])
You can see it here

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

Mongo Aggregation : $group and $project array to object for counts

I have documents like:
{
"platform":"android",
"install_date":20151029
}
platform - can have one value from [android|ios|kindle|facebook ] .
install_date - there are many install_dates
There are also many fields.
Aim : I am calculating installs per platform on particular date.
So I am using group by in aggregation framework and make counts by platform. Document should look like like:
{
"install_date":20151029,
"platform" : {
"android":1000,
"ios": 2000,
"facebook":1500
}
}
I have done like:
db.collection.aggregate([
{
$group: {
_id: { platform: "$platform",install_date:"$install_date"},
count: { "$sum": 1 }
}
},
{
$group: {
_id: { install_date:"$_id.install_date"},
platform: { $push : {platform :"$_id.platform", count:"$count" } }
}
},
{
$project : { _id: 0, install_date: "$_id.install_date", platform: 1 }
}
])
which Gives document like:
{
"platform": [
{
"platform": "facebook",
"count": 1500
},
{
"platform": "ios",
"count": 2000
},
{
"platform": "android",
"count": 1000
}
],
"install_date": 20151027
}
Problem:
Projecting array to single object as "platform"
With MongoDb 3.4 and newer, you can leverage the use of $arrayToObject operator to get the desired result. You would need to run the following aggregate pipeline:
db.collection.aggregate([
{ "$group": {
"_id": {
"date": "$install_date",
"platform": { "$toLower": "$platform" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.date",
"counts": {
"$push": {
"k": "$_id.platform",
"v": "$count"
}
}
} },
{ "$addFields": {
"install_date": "$_id",
"platform": { "$arrayToObject": "$counts" }
} },
{ "$project": { "counts": 0, "_id": 0 } }
])
For older versions, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the platform field value, something like the following:
db.collection.aggregate([
{ "$group": {
"_id": "$install_date",
"android_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "android" ] }, 1, 0 ]
}
},
"ios_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "ios" ] }, 1, 0 ]
}
},
"facebook_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "facebook" ] }, 1, 0 ]
}
},
"kindle_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "kindle" ] }, 1, 0 ]
}
}
} },
{ "$project": {
"_id": 0, "install_date": "$_id",
"platform": {
"android": "$android_count",
"ios": "$ios_count",
"facebook": "$facebook_count",
"kindle": "$kindle_count"
}
} }
])
In the above, $cond takes a logical condition as it's first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). This makes true/false returns into 1 and 0 to feed to $sum respectively.
So for example, if { "$eq": [ "$platform", "facebook" ] }, is true then the expression will evaluate to { $sum: 1 } else it will be { $sum: 0 }

Aggregate Field Values to Separate Key Names

I have a collection in MongoDB with sample data something like this (simplified):
{
_id: 1,
username: "ted",
content: "4125151",
status: "complete"
}
{
_id: 2,
username: "sam",
content: "4151",
status: "new"
}
{
_id: 3,
username: "ted",
content: "511",
status: "new"
}
{
_id: 4,
username: "ted",
content: "411",
status: "in_progress"
}
{
_id: 5,
username: "pat",
content: "1sds51",
status: "complete"
}
{
_id: 6,
username: "ted",
content: "4151",
status: "in_progress"
}
{
_id: 7,
username: "ted",
content: "4125",
status: "in_progress"
}
I need to aggregate the data such that for each user, I get a count for each status value as well as a total number of records. The result should look like this:
[
{
username: “pat”,
new: 0,
in_progress: 0,
complete: 1,
total: 1
},
{
username: “sam”,
new: 1,
in_progress: 0,
complete: 0,
total: 1
},
{
username: “ted”,
new: 1,
in_progress: 3,
complete: 1,
total: 5
}
]
Or any format that will effectively serve the same purpose which is, I want to be able to use with ngRepeat to display on the front end in this format:
User New In Progress Complete Total
pat 0 0 1 1
sam 1 0 0 1
ted 1 3 1 5
I can perform this aggregation:
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"count": {
"$sum": 1
}
}
}
This gives me the individual count for each user/status combination that has at least one record. But then I have to piece it together to get it in the format that I can use on the front end. This is not at all ideal.
Is there a way to perform the aggregation to get the data in the format that I need?
What you want is a "conditional" aggregation of the values to produce a distinct field property for each status.
This is pretty simple to do using the $cond operator:
[
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } },
"total": { "$sum": 1 }
}}
]
Presuming of course those are the only "status" values, but if they are not then just add an additional $project to sum the fields you want:
[
{ "$match": { "status": { "$in": [ "new", "complete", "in_progress" ] } } },
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } }
}},
{ "$project": {
"new": 1,
"complete": 1,
"in_progress": 1,
"total": { "$add": [ "$new", "$complete", "$in_progress" ] }
]
Or just include that $add within the $group with the same calculations for the separate fields. But the $match is probably just the best idea if there are indeed other status values you don't want.
Another answer using $group twice and a $push, In this below query you need to compute the final total on UI side.
db.collection.aggregate([
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"statuscount": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.username",
"finalstatus": {
"$push": {
"Status": "$_id.status",
"statuscount": "$statuscount"
}
}
}
}
])