Mongo Aggregation : $group and $project array to object for counts - mongodb

I have documents like:
{
"platform":"android",
"install_date":20151029
}
platform - can have one value from [android|ios|kindle|facebook ] .
install_date - there are many install_dates
There are also many fields.
Aim : I am calculating installs per platform on particular date.
So I am using group by in aggregation framework and make counts by platform. Document should look like like:
{
"install_date":20151029,
"platform" : {
"android":1000,
"ios": 2000,
"facebook":1500
}
}
I have done like:
db.collection.aggregate([
{
$group: {
_id: { platform: "$platform",install_date:"$install_date"},
count: { "$sum": 1 }
}
},
{
$group: {
_id: { install_date:"$_id.install_date"},
platform: { $push : {platform :"$_id.platform", count:"$count" } }
}
},
{
$project : { _id: 0, install_date: "$_id.install_date", platform: 1 }
}
])
which Gives document like:
{
"platform": [
{
"platform": "facebook",
"count": 1500
},
{
"platform": "ios",
"count": 2000
},
{
"platform": "android",
"count": 1000
}
],
"install_date": 20151027
}
Problem:
Projecting array to single object as "platform"

With MongoDb 3.4 and newer, you can leverage the use of $arrayToObject operator to get the desired result. You would need to run the following aggregate pipeline:
db.collection.aggregate([
{ "$group": {
"_id": {
"date": "$install_date",
"platform": { "$toLower": "$platform" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.date",
"counts": {
"$push": {
"k": "$_id.platform",
"v": "$count"
}
}
} },
{ "$addFields": {
"install_date": "$_id",
"platform": { "$arrayToObject": "$counts" }
} },
{ "$project": { "counts": 0, "_id": 0 } }
])
For older versions, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the platform field value, something like the following:
db.collection.aggregate([
{ "$group": {
"_id": "$install_date",
"android_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "android" ] }, 1, 0 ]
}
},
"ios_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "ios" ] }, 1, 0 ]
}
},
"facebook_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "facebook" ] }, 1, 0 ]
}
},
"kindle_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "kindle" ] }, 1, 0 ]
}
}
} },
{ "$project": {
"_id": 0, "install_date": "$_id",
"platform": {
"android": "$android_count",
"ios": "$ios_count",
"facebook": "$facebook_count",
"kindle": "$kindle_count"
}
} }
])
In the above, $cond takes a logical condition as it's first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). This makes true/false returns into 1 and 0 to feed to $sum respectively.
So for example, if { "$eq": [ "$platform", "facebook" ] }, is true then the expression will evaluate to { $sum: 1 } else it will be { $sum: 0 }

Related

MongoDB group by and SUM by array

I'm new in mongoDB.
This is one example of record from collection:
{
supplier: 1,
type: "sale",
items: [
{
"_id": ObjectId("60ee82dd2131c5032342070f"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420710"),
"itemBuySum": 10,
},
{
"_id": ObjectId("60ee82dd2131c50323420713"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420714"),
"itemBuySum": 20
}
]
}
I need to group by TYPE field and get the SUM. This is output I need:
{
supplier: 1,
sales: 90,
returns: 170
}
please check Mongo playground for better understand. Thank you!
$match - Filter documents.
$group - Group by type and add item into data array which leads to the result like:
[
[/* data 1 */],
[/* data 2 */]
]
$project - Decorate output document.
3.1. First $reduce is used to flatten the nested array to a single array (from Result (2)) via $concatArrays.
3.2. Second $reduce is used to aggregate $sum the itemBuySum.
db.collection.aggregate({
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$type",
"supplier": {
$first: "$supplier"
},
"data": {
"$push": "$items"
}
}
},
{
"$project": {
_id: 0,
"supplier": "$supplier",
"type": "$_id",
"returns": {
"$reduce": {
"input": {
"$reduce": {
input: "$data",
initialValue: [],
in: {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
},
"initialValue": 0,
"in": {
$sum: [
"$$value",
"$$this.itemBuySum"
]
}
}
}
}
})
Sample Mongo Playground
db.collection.aggregate([
{
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$ID",
"supplier": {
"$first": "$supplier"
},
"sale": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"sale"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
},
"returns": {
"$sum": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"return"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
}
}
}
},
{
"$project": {
_id: 0,
supplier: 1,
sale: 1,
returns: 1
}
}
])

How can I group boolean fields in mongodb?

Here's a very simple data example.
[
{
"aaa": true,
"bbb": 111,
},
{
"aaa": false,
"bbb": 111,
}
]
Then, what query should be executed so that I can get the result like this?
[
{
"_id": "0",
"bbb_sum": 222,
"aaa_and": false,
"aaa_or": true
}
]
Actually, I've tried with a query like this
db.collection.aggregate([
{
"$group": {
"_id": "0",
"bbb_sum": {
"$sum": "$bbb"
},
"aaa_and": {
"$and": ["$aaa", true]
},
"aaa_or": {
"$or": ["$aaa", false]
}
}
}
])
But the Mongo Playground complains query failed: (Location40237) The $and accumulator is a unary operator, that's quite confusing.
You can also find this simple test case here https://mongoplayground.net/p/8dqtXJ93vIx
Also, I've searched for similar questions on both Google and Stackoverflow, but I can't find one.
Thanks in advance!
Not like "$sum","$and" and "$or" are not aggregation operators that can be used in "$group". You can temporary save all the "aaa" field into an array and then use "$project" operator to process the data.
db.collection.aggregate([
{
"$group": {
"_id": "0",
"sum": {
"$sum": "$bbb"
},
"aaa_all": {
"$push": "$aaa"
}
}
},
{
"$project": {
"sum": 1,
"aaa_and": {
"$allElementsTrue": "$aaa_all"
},
"aaa_or": {
"$anyElementTrue": "$aaa_all"
}
}
}
])
Here is the case: https://mongoplayground.net/p/Y-Fs_Ch9lwk
$min and $max operators actually work with booleans too, false being considered smaller than true
thinking of them as 0 and 1 might be easier to understand :
$min: [a,…,n] will return 1/true only if all elements are 1/true => this is a AND
$max: [a,…,n] will return 0/false only if all elements are 0/false => this is a OR
(the operators will return booleans if input booleans, the analogy with numbers is only for the sake of comprehension)
So your request can simply become :
db.collection.aggregate([
{
"$group": {
"_id": "0",
"bbb_sum": {
"$sum": "$bbb"
},
"aaa_and": {
"$min": "$aaa"
},
"aaa_or": {
"$max": "$aaa"
}
}
}
])
You can do some logics as below
db.collection.aggregate([
{
"$group": {//Group by desired id
"_id": null,
"sum": {//Sum the value
"$sum": "$bbb"
},
"aaa_and": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$aaa",
true
]
},
"then": 1, //If true returns 1
"else": 0 // else 0
}
}
},
"total": { //helper to do the logic
$sum: 1
}
}
},
{
$project: {
aaa_and: {
"$eq": [//If total matches with number of true, all are true
"$total",
"$aaa_and"
]
},
aaa_or: {
"$ne": [//if value greater than 0, then there is at least one true
"$aaa_and",
"0"
]
},
sum: 1
}
}
])
playground

MongoDB: Assign document objects to field in '$project' stage

I have a user collection:
[
{"_id": 1,"name": "John", "age": 25, "valid_user": true}
{"_id": 2, "name": "Bob", "age": 40, "valid_user": false}
{"_id": 3, "name": "Jacob","age": 27,"valid_user": null}
{"_id": 4, "name": "Amelia","age": 29,"valid_user": true}
]
I run a '$facet' stage on this collection. Checkout this MongoPlayground.
I want to talk about the first output from the facet stage. The following is the response currently:
{
"user_by_valid_status": [
{
"_id": false,
"count": 1
},
{
"_id": true,
"count": 2
},
{
"_id": null,
"count": 1
}
]
}
However, I want to restructure the output in this way:
"analytics": {
"invalid_user": {
"_id": false
"count": 1
},
"valid_user": {
"_id": true
"count": 2
},
"user_with_unknown_status": {
"_id": null
"count": 1
}
}
The problem with using a '$project' stage along with 'arrayElemAt' is that the order may not be definite for me to associate an index with an attribute like 'valid_users' or others. Also, it gets further complicated because unlike the sample documents that I have shared, my collection may not always contain all the three categories of users.
Is there some way I can do this?
You can use $switch conditional operator,
$project to show value part in v with _id and count field as object, k to put $switch condition
db.collection.aggregate([
{
"$facet": {
"user_by_valid_status": [
{
"$group": {
"_id": "$valid_user",
"count": { "$sum": 1 }
}
},
{
$project: {
_id: 0,
v: { _id: "$_id", count: "$count" },
k: {
$switch: {
branches: [
{ case: { $eq: ["$_id", null] }, then: "user_with_unknown_status" },
{ case: { $eq: ["$_id", false] }, then: "invalid_user" },
{ case: { $eq: ["$_id", true] }, then: "valid_user" }
]
}
}
}
}
],
"users_above_30": [{ "$match": { "age": { "$gt": 30 } } }]
}
},
$project stage in root, convert user_by_valid_status array to object using $arrayToObject
{
$project: {
analytics: { $arrayToObject: "$user_by_valid_status" },
users_above_30: 1
}
}
])
Playground

Active and Total User Count Mongodb

I want to write a group by query to written active user and total count(both active and inactive) grouped by a date column in mongodb. I am able to run them as two separate scripts but how to retrieve the same information in one script
db.user.aggregate(
{
"$match": { 'phoneInfo.verifiedFlag': true}
},
{
"$project": {
yearMonthDayUTC: { $dateToString: { format: "%Y-%m-%d", date: "$createdOn" } }
}
},
{
"$group": {
"_id": {day: "$yearMonthDayUTC"},
count: {
"$sum": 1
}
}
},
{
$sort: {
"_id.day": 1,
}
})
You can use the $cond operator in your group to create a conditional count as follows (assuming the inactive/active values are in a field called status):
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": { "$dateToString": { "format": "%Y-%m-%d", "date": "$createdOn" } },
"total": { "$sum": 1 },
"active_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "active" ] }, 1, 0 ]
}
},
"inactive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "inactive" ] }, 1, 0 ]
}
}
}
},
{ "$sort": { "_id": 1 } }
])
For different values you can adapt the following pipeline:
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": {
"day": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdOn"
}
},
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.day",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
}
},
{ "$sort": { "_id": 1 } }
])

Combine three different Aggregate into a single one

I have three different Aggregations that I would like to combine to one.
objsCount = self.db.bidding.aggregate([
{ "$group": {
"_id": "$bid_item",
"number_of_objects": { "$sum": 1 }
}}
])
objsPrice = self.db.bidding.aggregate([
{"$match": {"$and" : [{"price": {"$ne":"null"}},{"users":{"$ne":"null"}}]}},
{ "$group": {
"_id": "$bid_item",
"total_price": { "$sum": '$price' },
"total_users": { "$sum": '$users' }
}}
])
objsHigestBid = self.db.bidding.aggregate([
{"$match": {"$and" : [{"highestBid": {"$ne":"null"}},{"users":{"$ne":"null"}}]}},
{ "$group": {
"_id": "$bid_item",
"total_number_of_users_after_bid": { "$sum": '$highestBid' }
}}
])
The output I'm looking for is:
[{ _id : '',
number_of_objects : '',
total_price : '',
total_users : '',
total_number_of_users_after_bid : ''
},...]
How could I change my "aggregates" to one single "aggregate" that make this possible?
With a single $group pipeline, you can take advantage of the $cond operator in the pipeline step to evaluate the counts based on the logic specified, something like the following:
db.bidding.aggregate([
{
"$group": {
"_id": "$bid_item",
"number_of_objects": { "$sum": 1 },
"total_price": {
"$sum": {
"$cond": [ { "$ne": [ "$price", "null" ] }, "$price", 0 ]
}
},
"total_users": {
"$sum": {
"$cond": [ { "$ne": [ "$users", "null" ] }, "$users", 0 ]
}
},
"total_number_of_users_after_bid": {
"$sum": {
"$cond": [
{
"$and": [
{ "$ne": [ "$highestBid", "null" ] },
{ "$ne": [ "$users", "null" ] }
]
},
"$highestBid", 0
]
}
}
}
}
])