How can I group boolean fields in mongodb? - mongodb

Here's a very simple data example.
[
{
"aaa": true,
"bbb": 111,
},
{
"aaa": false,
"bbb": 111,
}
]
Then, what query should be executed so that I can get the result like this?
[
{
"_id": "0",
"bbb_sum": 222,
"aaa_and": false,
"aaa_or": true
}
]
Actually, I've tried with a query like this
db.collection.aggregate([
{
"$group": {
"_id": "0",
"bbb_sum": {
"$sum": "$bbb"
},
"aaa_and": {
"$and": ["$aaa", true]
},
"aaa_or": {
"$or": ["$aaa", false]
}
}
}
])
But the Mongo Playground complains query failed: (Location40237) The $and accumulator is a unary operator, that's quite confusing.
You can also find this simple test case here https://mongoplayground.net/p/8dqtXJ93vIx
Also, I've searched for similar questions on both Google and Stackoverflow, but I can't find one.
Thanks in advance!

Not like "$sum","$and" and "$or" are not aggregation operators that can be used in "$group". You can temporary save all the "aaa" field into an array and then use "$project" operator to process the data.
db.collection.aggregate([
{
"$group": {
"_id": "0",
"sum": {
"$sum": "$bbb"
},
"aaa_all": {
"$push": "$aaa"
}
}
},
{
"$project": {
"sum": 1,
"aaa_and": {
"$allElementsTrue": "$aaa_all"
},
"aaa_or": {
"$anyElementTrue": "$aaa_all"
}
}
}
])
Here is the case: https://mongoplayground.net/p/Y-Fs_Ch9lwk

$min and $max operators actually work with booleans too, false being considered smaller than true
thinking of them as 0 and 1 might be easier to understand :
$min: [a,…,n] will return 1/true only if all elements are 1/true => this is a AND
$max: [a,…,n] will return 0/false only if all elements are 0/false => this is a OR
(the operators will return booleans if input booleans, the analogy with numbers is only for the sake of comprehension)
So your request can simply become :
db.collection.aggregate([
{
"$group": {
"_id": "0",
"bbb_sum": {
"$sum": "$bbb"
},
"aaa_and": {
"$min": "$aaa"
},
"aaa_or": {
"$max": "$aaa"
}
}
}
])

You can do some logics as below
db.collection.aggregate([
{
"$group": {//Group by desired id
"_id": null,
"sum": {//Sum the value
"$sum": "$bbb"
},
"aaa_and": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$aaa",
true
]
},
"then": 1, //If true returns 1
"else": 0 // else 0
}
}
},
"total": { //helper to do the logic
$sum: 1
}
}
},
{
$project: {
aaa_and: {
"$eq": [//If total matches with number of true, all are true
"$total",
"$aaa_and"
]
},
aaa_or: {
"$ne": [//if value greater than 0, then there is at least one true
"$aaa_and",
"0"
]
},
sum: 1
}
}
])
playground

Related

MongoDB group by and SUM by array

I'm new in mongoDB.
This is one example of record from collection:
{
supplier: 1,
type: "sale",
items: [
{
"_id": ObjectId("60ee82dd2131c5032342070f"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420710"),
"itemBuySum": 10,
},
{
"_id": ObjectId("60ee82dd2131c50323420713"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420714"),
"itemBuySum": 20
}
]
}
I need to group by TYPE field and get the SUM. This is output I need:
{
supplier: 1,
sales: 90,
returns: 170
}
please check Mongo playground for better understand. Thank you!
$match - Filter documents.
$group - Group by type and add item into data array which leads to the result like:
[
[/* data 1 */],
[/* data 2 */]
]
$project - Decorate output document.
3.1. First $reduce is used to flatten the nested array to a single array (from Result (2)) via $concatArrays.
3.2. Second $reduce is used to aggregate $sum the itemBuySum.
db.collection.aggregate({
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$type",
"supplier": {
$first: "$supplier"
},
"data": {
"$push": "$items"
}
}
},
{
"$project": {
_id: 0,
"supplier": "$supplier",
"type": "$_id",
"returns": {
"$reduce": {
"input": {
"$reduce": {
input: "$data",
initialValue: [],
in: {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
},
"initialValue": 0,
"in": {
$sum: [
"$$value",
"$$this.itemBuySum"
]
}
}
}
}
})
Sample Mongo Playground
db.collection.aggregate([
{
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$ID",
"supplier": {
"$first": "$supplier"
},
"sale": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"sale"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
},
"returns": {
"$sum": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"return"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
}
}
}
},
{
"$project": {
_id: 0,
supplier: 1,
sale: 1,
returns: 1
}
}
])

MongoDB: Assign document objects to field in '$project' stage

I have a user collection:
[
{"_id": 1,"name": "John", "age": 25, "valid_user": true}
{"_id": 2, "name": "Bob", "age": 40, "valid_user": false}
{"_id": 3, "name": "Jacob","age": 27,"valid_user": null}
{"_id": 4, "name": "Amelia","age": 29,"valid_user": true}
]
I run a '$facet' stage on this collection. Checkout this MongoPlayground.
I want to talk about the first output from the facet stage. The following is the response currently:
{
"user_by_valid_status": [
{
"_id": false,
"count": 1
},
{
"_id": true,
"count": 2
},
{
"_id": null,
"count": 1
}
]
}
However, I want to restructure the output in this way:
"analytics": {
"invalid_user": {
"_id": false
"count": 1
},
"valid_user": {
"_id": true
"count": 2
},
"user_with_unknown_status": {
"_id": null
"count": 1
}
}
The problem with using a '$project' stage along with 'arrayElemAt' is that the order may not be definite for me to associate an index with an attribute like 'valid_users' or others. Also, it gets further complicated because unlike the sample documents that I have shared, my collection may not always contain all the three categories of users.
Is there some way I can do this?
You can use $switch conditional operator,
$project to show value part in v with _id and count field as object, k to put $switch condition
db.collection.aggregate([
{
"$facet": {
"user_by_valid_status": [
{
"$group": {
"_id": "$valid_user",
"count": { "$sum": 1 }
}
},
{
$project: {
_id: 0,
v: { _id: "$_id", count: "$count" },
k: {
$switch: {
branches: [
{ case: { $eq: ["$_id", null] }, then: "user_with_unknown_status" },
{ case: { $eq: ["$_id", false] }, then: "invalid_user" },
{ case: { $eq: ["$_id", true] }, then: "valid_user" }
]
}
}
}
}
],
"users_above_30": [{ "$match": { "age": { "$gt": 30 } } }]
}
},
$project stage in root, convert user_by_valid_status array to object using $arrayToObject
{
$project: {
analytics: { $arrayToObject: "$user_by_valid_status" },
users_above_30: 1
}
}
])
Playground

Accumulate Documents with Dynamic Keys

I have a collection of documents that look like this
{
_id: 1,
weight: 2,
height: 3,
fruit: "Orange",
bald: "Yes"
},
{
_id: 2,
weight: 4,
height: 5,
fruit: "Apple",
bald: "No"
}
I need to get a result that aggregates the entire collection into this.
{
avgWeight: 3,
avgHeight: 4,
orangeCount: 1,
appleCount: 1,
baldCount: 1
}
I think I could map/reduce this, or I could query the averages and counts separately. The only values fruit could ever have are Apple and Orange. What other ways would you go about doing this? I've been away from MongoDB for a while now and maybe there are new amazing ways to do this I'm not aware of?
Aggregation Framework
The aggregation framework will do far better for you than what mapReduce can do, and the basic method is compatible with every release back to 2.2 when the aggregation framework was released.
If you have MongoDB 3.6 you can do
db.fruit.aggregate([
{ "$group": {
"_id": "$fruit",
"avgWeight": { "$avg": "$weight" },
"avgHeight": { "$avg": "$height" },
"baldCount": {
"$sum": { "$cond": [{ "$eq": ["$bald", "Yes"] }, 1, 0] }
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"data": {
"$push": {
"k": {
"$concat": [
{ "$toLower": "$_id" },
"Count"
]
},
"v": "$count"
}
},
"avgWeight": { "$avg": "$avgWeight" },
"avgHeight": { "$avg": "$avgHeight" },
"baldCount": { "$sum": "$baldCount" }
}},
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$data" },
{
"avgWeight": "$avgWeight",
"avgHeight": "$avgHeight",
"baldCount": "$baldCount"
}
]
}
}}
])
As a slight alternate, you can apply the $mergeObjects in the $group here instead:
db.fruit.aggregate([
{ "$group": {
"_id": "$fruit",
"avgWeight": { "$avg": "$weight" },
"avgHeight": { "$avg": "$height" },
"baldCount": {
"$sum": { "$cond": [{ "$eq": ["$bald", "Yes"] }, 1, 0] }
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"data": {
"$mergeObjects": {
"$arrayToObject": [[{
"k": {
"$concat": [
{ "$toLower": "$_id" },
"Count"
]
},
"v": "$count"
}]]
}
},
"avgWeight": { "$avg": "$avgWeight" },
"avgHeight": { "$avg": "$avgHeight" },
"baldCount": { "$sum": "$baldCount" }
}},
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$data",
{
"avgWeight": "$avgWeight",
"avgHeight": "$avgHeight",
"baldCount": "$baldCount"
}
]
}
}}
])
But there are reasons why I personally don't think that is the better approach, and that mostly leads to the next concept.
So even if you don't have a "latest" MongoDB release, you can simply reshape the output since that is all the last pipeline stage actually using the MongoDB 3.6 features is doing:
db.fruit.aggregate([
{ "$group": {
"_id": "$fruit",
"avgWeight": { "$avg": "$weight" },
"avgHeight": { "$avg": "$height" },
"baldCount": {
"$sum": { "$cond": [{ "$eq": ["$bald", "Yes"] }, 1, 0] }
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"data": {
"$push": {
"k": {
"$concat": [
{ "$toLower": "$_id" },
"Count"
]
},
"v": "$count"
}
},
"avgWeight": { "$avg": "$avgWeight" },
"avgHeight": { "$avg": "$avgHeight" },
"baldCount": { "$sum": "$baldCount" }
}},
/*
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$data" },
{
"avgWeight": "$avgWeight",
"avgHeight": "$avgHeight",
"baldCount": "$baldCount"
}
]
}
}}
*/
]).map( d =>
Object.assign(
d.data.reduce((acc,curr) => Object.assign(acc,{ [curr.k]: curr.v }), {}),
{ avgWeight: d.avgWeight, avgHeight: d.avgHeight, baldCount: d.baldCount }
)
)
And of course you can even just "hardcode" the keys:
db.fruit.aggregate([
{ "$group": {
"_id": null,
"appleCount": {
"$sum": {
"$cond": [{ "$eq": ["$fruit", "Apple"] }, 1, 0]
}
},
"orangeCount": {
"$sum": {
"$cond": [{ "$eq": ["$fruit", "Orange"] }, 1, 0]
}
},
"avgWeight": { "$avg": "$weight" },
"avgHeight": { "$avg": "$height" },
"baldCount": {
"$sum": {
"$cond": [{ "$eq": ["$bald", "Yes"] }, 1, 0]
}
}
}}
])
But it would not be recommended as your data might just change some day, and if there is a value to "group on" then it's better to actually use it than coercing with conditions.
In any form you return the same result:
{
"appleCount" : 1,
"orangeCount" : 1,
"avgWeight" : 3,
"avgHeight" : 4,
"baldCount" : 1
}
We do this with "two" $group stages, being once for accumulating "per fruit" and then secondly to compact all fruit to an array using $push under "k" and "v" values to keep their "key" and their "count". We do a little transformation on the "key" here using $toLower and $concat to join the strings. This is optional at this stage but easier in general.
The "alternate" for 3.6 is simply applying $mergeObjects within this earlier stage instead of $push since we already accumulated these keys. It's just really moving the $arrayToObject to a different stage in the pipeline. It's not really necessary and does not really have any specific advantage. If anything it just removes the flexible option as demonstrated with the "client transform" discussed later.
The "average" accumulations are done via $avg and the "bald" is counted using $cond to test the strings and feed a number to $sum. As the array is "rolled up" we can do all those accumulations again to total for everything.
As mentioned, the only part that actually relies on "new features" is all within the $replaceRoot stage which re-writes the "root" document. That's why this is optional as you can simply do these transformations after the same "already aggregated" data is returned from the database.
All we really do here is take that array with the "k" and "v" entries and turn it into an "object" with named keys via $arrayToObject and apply $mergeObjects on that result with the other keys we already produced at the "root". This transforms that array to be part of the main document returned in result.
The exact same transformation is applied using the JavaScript Array.reduce() and Object.assign() methods in the mongo shell compatible code. It's a very simple thing to apply and the Cursor.map() is generally a feature of most language implementations, so you can do these transforms before you start using the cursor results.
With ES6 compatible JavaScript environments ( not the shell ), we can shorten that syntax a little more:
.map(({ data, ...d }) => ({ ...data.reduce((o,[k,v]) => ({ ...o, [k]: v }), {}), ...d }) )
So it truly is a "one line" function, and that's a general reason why transformations like these are often better in the client code than the server anyway.
As a note on the usage of $cond, it is noted that using it for "hardcoded" evaluation is not really a good idea for several reasons. So it really does not make much sense to "force" that evaluation. Even with the data you present the "bald" would be better expressed as a Boolean value than a "string". If you change "Yes/No" to be true/false then even that "one" valid usage becomes:
"baldCount": { "$sum": { "$cond": ["$bald", 1, 0 ] } }
Which removes the need to "test" a condition on a string match since it's already true/false. MongoDB 4.0 adds another enhancement using $toInt to "coerce" the Boolean to an integer:
"baldCount": { "$sum": { "$toInt": "$bald" } }
That removes the need for $cond altogether, as would simply recording 1 or 0 but that change might cause a loss of clarity in the data, so it is still probably reasonable to have that sort of "coercion" there, but not really optimal anywhere else.
Even with the "dynamic" form using "two" $group stages for accumulation, the main work is still done in the first stage. It simply leaves the remaining accumulation on n result documents for the number of possible unique values of the grouping key. In this case "two", so even though it's an additional instruction there is no real overhead for the gain of having flexible code.
MapReduce
If you really have you're heart set on at least "trying" a mapReduce, then it's really a single pass with a finalize function just to make the averages
db.fruit.mapReduce(
function() {
emit(null,{
"key": { [`${this.fruit.toLowerCase()}Count`]: 1 },
"totalWeight": this.weight,
"totalHeight": this.height,
"totalCount": 1,
"baldCount": (this.bald === "Yes") ? 1 : 0
});
},
function(key,values) {
var output = {
key: { },
totalWeight: 0,
totalHeight: 0,
totalCount: 0,
baldCount: 0
};
for ( let value of values ) {
for ( let key in value.key ) {
if ( !output.key.hasOwnProperty(key) )
output.key[key] = 0;
output.key[key] += value.key[key];
}
Object.keys(value).filter(k => k != 'key').forEach(k =>
output[k] += value[k]
)
}
return output;
},
{
"out": { "inline": 1 },
"finalize": function(key,value) {
return Object.assign(
value.key,
{
avgWeight: value.totalWeight / value.totalCount,
avgHeight: value.totalHeight / value.totalCount,
baldCount: value.baldCount
}
)
}
}
)
Since we already ran through the process for the aggregate() method then the general points should be pretty familiar since we are basically doing much the same thing here.
The main differences are for an "average" you actually need the full totals and counts and of course you get a bit more control over accumulating via an "Object" with JavaScript code.
The results are basically the same, just with the standard mapReduce "bent" on how it presents them:
{
"_id" : null,
"value" : {
"orangeCount" : 1,
"appleCount" : 1,
"avgWeight" : 3,
"avgHeight" : 4,
"baldCount" : 1
}
}
Summary
The general catch being of course that MapReduce using interpreted JavaScript in order to execute has a much higher cost and slower execution than the native coded operations of the aggregation framework.There once may have been an option to use MapReduce for this kind of output on "larger" result sets, but since MongoDB 2.6 introduced "cursor" output for the aggregation framework then the scales have been firmly tipped in favor of the newer option.
Fact is that most "legacy" reasons for employing MapReduce is basically superseded by it's younger sibling as the aggregation framework gains new operations which remove the need for the JavaScript execution environment. It would be a fair comment to say that support for JavaScript execution is generally "dwindling", and once legacy options which used this from the beginning are being gradually removed from the product.
db.demo.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
weight: 1,
height: 1,
Orange: {
$cond: {
if: {
$eq: ["$fruit", 'Orange']
},
then: {
$sum: 1
},
else: 0
}
},
Apple: {
$cond: {
if: {
$eq: ["$fruit", 'Apple']
},
then: {
$sum: 1
},
else: 0
}
},
bald: {
$cond: {
if: {
$eq: ["$bald", 'Yes']
},
then: {
$sum: 1
},
else: 0
}
},
}
},
// Stage 2
{
$group: {
_id: null,
avgWeight: {
$avg: '$weight'
},
avgHeight: {
$avg: '$height'
},
orangeCount: {
$sum: '$Orange'
},
appleCount: {
$sum: '$Apple'
},
baldCount: {
$sum: '$bald'
}
}
},
]
);

group collection data by multiple fields mongodb

Collection Structure
Order = new Schema
index: { type: Number, unique: true }
number: Date
status: { type: String, enum: ['success', 'failure'] }
created_at: { type: Date, default: Date.now }
updated_at: { type: Date, default: Date.now }
I need help with a query that returns me an array of objects having data as success count and failure count grouped by date.
Ex-
orders = {
28-10-2016:{
success_count: 10,
failure_count: 10
},
29-10-2016: {
success_count: 10,
failure_count: 10
}
}
With the aggregation framework, the result will be slightly different from your "desired" output as instead of having hash keys, you get an array of objects with the _id key having a value that represents you group by field. For instance, instead of
{
"28-10-2016":{
"success_count": 10,
"failure_count": 10
},
"29-10-2016": {
"success_count": 10,
"failure_count": 10
}
}
you'd have a better structure like
[
{
"_id": "28-10-2016",
"success_count": 10,
"failure_count": 10
},
"_id": "29-10-2016",
"success_count": 10,
"failure_count": 10
}
]
Accomplishing the above result would require using the $cond operator in the $sum accumulator operator. The $cond operator will evaluate a logical condition based on its first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). This converts the true/false logic into 1 and 0 numerical values that feed into $sum respectively:
"success_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "success" ] }, 1, 0 ]
}
}
As a resulting pipeline, one needs to run the aggregation operation which uses the $dateToString operator in the _id key expression for the $group pipeline:
Orders.aggregate([
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"success_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "success" ] }, 1, 0 ]
}
},
"failure_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "failure" ] }, 1, 0 ]
}
}
}
}
], function (err, orders){
if (err) throw err;
console.log(orders);
})
However, there is a more flexible and better performant approach which executes much faster than the above, where the most efficient data structure for your aggregation result follows the schema for example:
orders = [
{
"_id": "28-10-2016",
"counts": [
{ "status": "success", "count": 10 },
{ "status": "failure", "count": 10 }
]
},
{
"_id": "29-10-2016",
"counts": [
{ "status": "success", "count": 10 },
{ "status": "failure", "count": 10 }
]
}
]
Then consider running an alternative pipeline as follows
Orders.aggregate([
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.date",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
}
}
], function (err, orders){
if (err) throw err;
console.log(orders);
})

Mongo Aggregation : $group and $project array to object for counts

I have documents like:
{
"platform":"android",
"install_date":20151029
}
platform - can have one value from [android|ios|kindle|facebook ] .
install_date - there are many install_dates
There are also many fields.
Aim : I am calculating installs per platform on particular date.
So I am using group by in aggregation framework and make counts by platform. Document should look like like:
{
"install_date":20151029,
"platform" : {
"android":1000,
"ios": 2000,
"facebook":1500
}
}
I have done like:
db.collection.aggregate([
{
$group: {
_id: { platform: "$platform",install_date:"$install_date"},
count: { "$sum": 1 }
}
},
{
$group: {
_id: { install_date:"$_id.install_date"},
platform: { $push : {platform :"$_id.platform", count:"$count" } }
}
},
{
$project : { _id: 0, install_date: "$_id.install_date", platform: 1 }
}
])
which Gives document like:
{
"platform": [
{
"platform": "facebook",
"count": 1500
},
{
"platform": "ios",
"count": 2000
},
{
"platform": "android",
"count": 1000
}
],
"install_date": 20151027
}
Problem:
Projecting array to single object as "platform"
With MongoDb 3.4 and newer, you can leverage the use of $arrayToObject operator to get the desired result. You would need to run the following aggregate pipeline:
db.collection.aggregate([
{ "$group": {
"_id": {
"date": "$install_date",
"platform": { "$toLower": "$platform" }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.date",
"counts": {
"$push": {
"k": "$_id.platform",
"v": "$count"
}
}
} },
{ "$addFields": {
"install_date": "$_id",
"platform": { "$arrayToObject": "$counts" }
} },
{ "$project": { "counts": 0, "_id": 0 } }
])
For older versions, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the platform field value, something like the following:
db.collection.aggregate([
{ "$group": {
"_id": "$install_date",
"android_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "android" ] }, 1, 0 ]
}
},
"ios_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "ios" ] }, 1, 0 ]
}
},
"facebook_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "facebook" ] }, 1, 0 ]
}
},
"kindle_count": {
"$sum": {
"$cond": [ { "$eq": [ "$platform", "kindle" ] }, 1, 0 ]
}
}
} },
{ "$project": {
"_id": 0, "install_date": "$_id",
"platform": {
"android": "$android_count",
"ios": "$ios_count",
"facebook": "$facebook_count",
"kindle": "$kindle_count"
}
} }
])
In the above, $cond takes a logical condition as it's first argument (if) and then returns the second argument where the evaluation is true (then) or the third argument where false (else). This makes true/false returns into 1 and 0 to feed to $sum respectively.
So for example, if { "$eq": [ "$platform", "facebook" ] }, is true then the expression will evaluate to { $sum: 1 } else it will be { $sum: 0 }