MongoDB aggregation: How can I get a count of columns based on a criteria - mongodb

I have the following documents:
{ "col1": "camera", "fps": 1, "lat": 3 },
{ "col1": "camera", "fps": 3, "lat": 2 }
{ "col1": "camera", "foo": 9, "bar": 7 }
{ "col1": "camera", "bar": 8, "bar": 1 }
{ "col1": "camera", "check": 4, "lat": 3 }
How can I get the following:
{ "fps": 2, "lat": 3, "bar": 3, "check": 1, "foo": 1 }
Where each of these values are the number of occurrences (count) of each key (fps appears 2, foo, appears once, etc)

Aggregate
Here a solution (i named my collection cameras) if keys are known in advance :
db.cameras.aggregate([
{
$project: {
_id: 1,
fps: {
$ifNull: [ "$fps", 0 ]
},
lat: {
$ifNull: [ "$lat", 0 ]
},
bar: {
$ifNull: [ "$bar", 0 ]
},
foo: {
$ifNull: [ "$foo", 0 ]
},
check: {
$ifNull: [ "$check", 0 ]
}
}
},
{
$project: {
_id: 1,
fps: {
$cond: [ { $eq: [ "$fps", 0 ] } , 0, 1 ]
},
lat: {
$cond: [ { $eq: [ "$lat", 0 ] } , 0, 1 ]
},
bar: {
$cond: [ { $eq: [ "$bar", 0 ] } , 0, 1 ]
},
foo: {
$cond: [ { $eq: [ "$foo", 0 ] } , 0, 1 ]
},
check: {
$cond: [ { $eq: [ "$check", 0 ] } , 0, 1 ]
}
}
},
{
$group: {
_id: null,
fps: {
$sum: "$fps"
},
lat: {
$sum: "$lat"
},
bar: {
$sum: "$bar"
},
foo: {
$sum: "$foo"
},
check: {
$sum: "$check"
}
}
}
])
Results :
{
"result" : [
{
"_id" : null,
"fps" : 2,
"lat" : 3,
"bar" : 2,
"foo" : 1
}
],
"ok" : 1
}
MapReduce
Another solution if keys are unknown is mapReduce :
db.cameras.mapReduce(
function() {
var keys = Object.keys(this);
keys.forEach(function(key) {
emit(key, 1);
});
},
function(key, values) {
return Array.sum(values);
},
{
query: {},
out: {
inline: 1
}
}
)
Results :
{
"results" : [
{
"_id" : "_id",
"value" : 5
},
{
"_id" : "bar",
"value" : 2
},
{
"_id" : "check",
"value" : 1
},
{
"_id" : "col1",
"value" : 5
},
{
"_id" : "foo",
"value" : 1
},
{
"_id" : "fps",
"value" : 2
},
{
"_id" : "lat",
"value" : 3
}
],
"timeMillis" : 1,
"counts" : {
"input" : 5,
"emit" : 19,
"reduce" : 5,
"output" : 7
},
"ok" : 1,
}
But the result is not exactly in the same format.

Related

Sorting according to time in string in mongodb

How can i sort if my time(time_required) is saved in this format ?
quiz_customer_record
{
"_id" : ObjectId("5f16eb4a5007bd5395c76ed9"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:6 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:08.025Z"
},
{
"_id" : ObjectId("5f16eb5f5007bd5395c76edb"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:8 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:29.377Z"
}
I want to sort it according to time_required but its in string and is in format of Mins:Seconds. Yes its a pretty messed up. But do we have a solution? I want to use mongo query for that as there are so many records and i sort of need to use limit(for pagination). That is why it is necessary for using mongo query.
Expected Result- Sort type- descending()
{
"_id" : ObjectId("5f16eb5f5007bd5395c76edb"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:8 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:29.377Z"
},
{
"_id" : ObjectId("5f16eb4a5007bd5395c76ed9"),
"quiz_id" : "5f05bbd10cf3166085be68fc",
"user_id" : "5f06e0ddf718c04de30ea47f",
"name" : "ABC",
"time_required" : "0:6 Mins",
"questions_attempted" : 0,
"total_quiz_questions" : 1,
"attempt_date" : "2020-07-21T13:19:08.025Z"
}
The query i'm using is
db.quiz_customer_record.aggregate([{ $match: { quiz_id:quiz_id}},
{
$sort: { attempt_date: -1 }
},
{
$group: {
_id: "$user_id",
result1: { $first: "$attempt_date" },
quiz_id: { $first: "$quiz_id" },
time_required: { $first: "$time_required" },
o_id: { $first: "$_id" }
}
},
{
$project: {
_id: "$o_id",
user_id: "$_id",
quiz_id:"$quiz_id",
time_required:"$time_required",
result1: 1
}
}
]).sort({time_required:-1})
Answer for mongo version less than 4.2
$set was added in 4.2 version. For earlier version $addFields can be used.
db.collection.aggregate([
{
"$addFields": {
"time_required_split": {
$substr: [
"$time_required",
0,
3
]
}
}
},
{
"$addFields": {
"time_required_split": {
$split: [
"$time_required_split",
":"
]
}
}
},
{
"$addFields": {
"time_seconds": {
$sum: [
{
"$multiply": [
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
0
]
}
},
60
]
},
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
1
]
}
}
]
}
}
},
{
"$sort": {
time_seconds: -1
}
},
{
"$project": {
"time_required_split": 0,
"time_seconds": 0
}
}
])
Mongo Playground
Try this query -
db.collection.aggregate([
{
"$set": {
"time_required_split": {
$substr: [
"$time_required",
0,
3
]
}
}
},
{
"$set": {
"time_required_split": {
$split: [
"$time_required_split",
":"
]
}
}
},
{
"$set": {
"time_seconds": {
$sum: [
{
"$multiply": [
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
0
]
}
},
60
]
},
{
$toInt: {
$arrayElemAt: [
"$time_required_split",
1
]
}
}
]
}
}
},
{
"$sort": {
time_seconds: -1
}
},
{
"$project": {
"time_required_split": 0,
"time_seconds": 0
}
}
])
Mongo Playground
Let me know if don't understand any stage.

Mongo query for an array is a subarray

I'm looking for a query that acts as $setIsSubset, except accounting for duplicate values.
For example, [1,1,2,3] is a subset of [1,2,3,4], because sets don't have duplicate values.
How can I write a query such that [1,1,2,3] is not a subset of [1,2,3,4]?
An example of expected outputs:
INPUT | TARGET | RESULT
[1] [1,2,3,4] TRUE
[1,2,3] [1,2,3,4] TRUE
[1,1,2,3] [1,2,3,4] FALSE
[1,2,3,4] [1,2,3,4] TRUE
[1,3] [1,2,3,4] TRUE
[1,11,5] [1,2,3,4] FALSE
[1,2,2,3] [1,2,3,4] FALSE
I would suggest not to do such heavy processing in mongo query as you can do the same task easily in any programming language. But, if you still need it in mongo, the following query can get you the expected output, provided both input and target arrays are sorted.
db.collection.aggregate([
{
$project:{
"modifiedInput":{
$reduce:{
"input":"$input",
"initialValue":{
"data":[],
"postfix":0,
"index":0,
"nextElem":{
$arrayElemAt:["$input",1]
}
},
"in":{
"data":{
$concatArrays:[
"$$value.data",
[
{
$concat:[
{
$toString:"$$this"
},
"-",
{
$toString:"$$value.postfix"
}
]
}
]
]
},
"postfix":{
$cond:[
{
$eq:["$$this","$$value.nextElem"]
},
{
$sum:["$$value.postfix",1]
},
0
]
},
"nextElem": {
$arrayElemAt:["$input", { $sum : [ "$$value.index", 2] }]
},
"index":{
$sum:["$$value.index",1]
}
}
}
},
"modifiedTarget":{
$reduce:{
"input":"$target",
"initialValue":{
"data":[],
"postfix":0,
"index":0,
"nextElem":{
$arrayElemAt:["$target",1]
}
},
"in":{
"data":{
$concatArrays:[
"$$value.data",
[
{
$concat:[
{
$toString:"$$this"
},
"-",
{
$toString:"$$value.postfix"
}
]
}
]
]
},
"postfix":{
$cond:[
{
$eq:["$$this","$$value.nextElem"]
},
{
$sum:["$$value.postfix",1]
},
0
]
},
"nextElem": {
$arrayElemAt:["$target", { $sum : [ "$$value.index", 2] }]
},
"index":{
$sum:["$$value.index",1]
}
}
}
}
}
},
{
$project:{
"_id":0,
"matched":{
$eq:[
{
$size:{
$setDifference:["$modifiedInput.data","$modifiedTarget.data"]
}
},
0
]
}
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d6e005db674d5c90f46d355"),
"input" : [
1
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d356"),
"input" : [
1,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d357"),
"input" : [
1,
1,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d358"),
"input" : [
1,
2,
3,
4
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d359"),
"input" : [
1,
3
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d35a"),
"input" : [
1,
5,
11
],
"target" : [
1,
2,
3,
4
]
}
{
"_id" : ObjectId("5d6e005db674d5c90f46d35b"),
"input" : [
1,
2,
2,
3
],
"target" : [
1,
2,
3,
4
]
}
Output:
{ "matched" : true }
{ "matched" : true }
{ "matched" : false }
{ "matched" : true }
{ "matched" : true }
{ "matched" : false }
{ "matched" : false }
Explanation: To avoid elimination of same values, we are adding the postfix counter to each. For example, [1,1,1,2,3,3,4,4] would become ["1-0","1-1","1-2","2-0","3-0","3-1","4-0","4-1","4-2"]. Afrer the conversion of both input and target arrays, the set difference is calculated. It's a match, if the size of set difference is zero.
You can try below aggregation:
let input = [1,2,3];
let inputSize = 3;
db.collection.aggregate([
{
$project: {
uniqueTarget: { $setUnion: [ "$target" ] }
}
},
{
$addFields: {
filtered: {
$reduce: {
input: input,
initialValue: "$uniqueTarget",
in: {
$filter: {
input: "$$value",
as: "current",
cond: { $ne: [ "$$this", "$$current" ] }
}
}
}
}
}
},
{
$project: {
result: {
$eq: [
{ $size: "$filtered" },
{ $subtract: [ { $size: "$uniqueTarget" }, inputSize ] }
]
}
}
}
])
It starts with $setUnion to ensure there are no duplicates in target array. Then you can run $reduce to iterate through input and remove currently processed element from target. Every iteration should remove single element so expected $size of filtered is equal $size of uniqueTarget - inputSize
Mongo Playground (1)
Mongo Playground (2)

Aggregate data grouped by date but from different date fields

I'm trying to get a query where the output is grouped by date, but the next fields will be based on different date fields.
So, for the date 2018-11 (year-month), how many registers were, how many activations were, how many customers and how many cancels. But each register/activation/customer/cancel, has to be counted in the month when it happened.
My data is stored as follow:
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-21T14:32:53.929Z"),
"hasBeenCustomerAt" : ISODate("2019-02-26T07:21:06Z"),
"hasRegisteredAt" : ISODate("2018-08-09T10:17:38.329Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-29T13:56:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-08-26T15:04:58.854Z"),
"hasBeenCustomerAt" : ISODate("2018-11-24T10:37:14Z"),
"hasRegisteredAt" : ISODate("2018-08-25T11:12:36.309Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-05-30T18:11:04Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-09-24T23:21:55.733Z"),
"hasBeenCustomerAt" : ISODate("2019-03-12T10:26:01Z"),
"hasRegisteredAt" : ISODate("2018-09-22T17:56:57.256Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-04-12T10:22:03Z")
}
}
{
"track" : {
"hasBeenCustomer" : true,
"hasActivated" : true,
"hasActivatedAt" : ISODate("2018-10-18T15:08:15.351Z"),
"hasBeenCustomerAt" : ISODate("2018-12-22T21:37:01Z"),
"hasRegisteredAt" : ISODate("2018-10-16T03:54:16.056Z"),
"hasCanceled" : true,
"hasCanceledAt" : ISODate("2019-01-22T21:39:03Z")
}
}
I have tried this:
db.user.aggregate(
[
{
$match:
{
projectId : "00001"
}
},
{
"$project": {
"createDate": {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasRegisteredAt"
}
},
activationAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasActivatedAt"
}
},
customerAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasBeenCustomerAt"
}
},
cancelAt: {
"$dateToString": {
"format": "%Y-%m",
"date": "$track.hasCanceledAt"
}
},
activations: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]
}
},
customers: {
"$sum": {
"$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]
}
},
cancels: {
"$sum": {
"$cond": [{
"$and": [
{ "$eq": [ "$status", 3 ] },
{ "$eq": [ "$track.hasCanceled", true ] }
]},
1,
0
]
}
}
}
},
{
$group:
{
_id: "$createDate",
users: {$sum: 1},
activations: {$sum: "$activations"},
activationsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$activationAt", "$createDate" ] },
1,
0
]
}
},
customers: {$sum: "$customers"},
customersM: {
"$sum": {
"$cond": [
{ "$eq": [ "$customerAt", "$createDate" ] },
1,
0
]
}
},
cancels: {$sum: "$cancels"},
cancelsM: {
"$sum": {
"$cond": [
{ "$eq": [ "$cancelAt", "$createDate" ] },
1,
0
]
}
},
}
},
{
$sort:
{
_id: 1
}
}
]
)
activationsM, customersM, cancelsM, are supposed to be the counting per month, independently from the _id field, but I have realized that, this query relieves in results matching the _id, and once it matches it, then it check the condintion. I need it to be sum, even if the _id doesn't match the hasActivatedAt, hasBeenCustomerAt, hasCanceledAt fields.
Hope I have explained it properly.
The desired output would be:
{ "_id" : "2018-06", "users" : 18, "activations" : 5, "activationsM" : 2, "customers" : 4, "customersM" : 0, "cancels" : 1, "cancelsM" : 0 }
{ "_id" : "2018-07", "users" : 78, "activations" : 39, "activationsM" : 31, "customers" : 11, "customersM" : 0, "cancels" : 7, "cancelsM" : 0 }
{ "_id" : "2018-08", "users" : 115, "activations" : 49, "activationsM" : 38, "customers" : 18, "customersM" : 0, "cancels" : 8, "cancelsM" : 0 }
Being fiedlM the total count for the correspondient field date and the _id date field.
Thanks.
Try as below:
db.collection.aggregate([
{
$facet: {
"TOTAL_ACTIVATION": [
{
$group: {
_id: "$track.hasActivated",
total: { "$sum": 1 },
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CUSTOMERS": [
{
$group: {
_id: "$track.hasBeenCustomer",
total: { "$sum": 1 },
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"TOTAL_CANCELLED": [
{
$group: {
_id: "$track.hasCanceled",
total: { "$sum": 1 },
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_ACTIVATION" : [
{
$group: {
_id: {
year: { $year: "$track.hasActivatedAt" },
month: { $month: "$track.hasActivatedAt" }
},
totalThisMonth: { $sum : 1},
"totalActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", true ] },
1,
0
]}
},
"totalNonActiveCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasActivated", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CUSTOMER" : [
{
$group: {
_id: {
year: { $year: "$track.hasBeenCustomerAt" },
month: { $month: "$track.hasBeenCustomerAt" }
},
totalThisMonth: { $sum : 1},
"totalCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", true ] },
1,
0
]}
},
"totalNonCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasBeenCustomer", false ] },
1,
0
]}
},
}
}
],
"MONTHLY_CANCELLED" : [
{
$group: {
_id: {
year: { $year: "$track.hasCanceledAt" },
month: { $month: "$track.hasCanceledAt" }
},
totalThisMonth: { $sum : 1},
"totalCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", true ] },
1,
0
]}
},
"totalNonCancelledCustomer": {
"$sum": { "$cond": [
{ "$eq": [ "$track.hasCanceled", false ] },
1,
0
]}
},
}
}
]
}
}
])
Result of this will be as below:
{
"TOTAL_ACTIVATION" : [
{
"_id" : true,
"total" : 4,
"totalActiveCustomer" : 4,
"totalNonActiveCustomer" : 0
}
],
"TOTAL_CUSTOMERS" : [
{
"_id" : true,
"total" : 4,
"totalCustomer" : 4,
"totalNonCustomer" : 0
}
],
"TOTAL_CANCELLED" : [
{
"_id" : true,
"total" : 4,
"totalCancelledCustomer" : 4,
"totalNonCancelledCustomer" : 0
}
],
"MONTHLY_ACTIVATION" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(10)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(9)
},
"totalThisMonth" : 1,
"totalActiveCustomer" : 1,
"totalNonActiveCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(8)
},
"totalThisMonth" : 2,
"totalActiveCustomer" : 2,
"totalNonActiveCustomer" : 0
}
],
"MONTHLY_CUSTOMER" : [
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(12)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(3)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(11)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2)
},
"totalThisMonth" : 1,
"totalCustomer" : 1,
"totalNonCustomer" : 0
}
],
"MONTHLY_CANCELLED" : [
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(5)
},
"totalThisMonth" : 1,
"totalCancelledCustomer" : 1,
"totalNonCancelledCustomer" : 0
},
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(4)
},
"totalThisMonth" : 2,
"totalCancelledCustomer" : 2,
"totalNonCancelledCustomer" : 0
}
]
}

Mongodb use multiple group operator in single aggregation

I am using mongodb aggregation for getting counts of different fields. Here are some documents from the mobile collection:-
{
"title": "Moto G",
"manufacturer": "Motorola",
"releasing": ISODate("2011-03-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Asus Zenfone 2",
"manufacturer": "Asus",
"releasing": ISODate("2014-10-00T10:26:48.424Z"),
"rating": "high"
}
{
"title": "Moto Z",
"manufacturer": "Motorola",
"releasing": ISODate("2016-10-12T10:26:48.424Z"),
"rating": "none"
}
{
"title": "Asus Zenfone 3",
"manufacturer": "Asus",
"releasing": ISODate("2016-08-00T10:26:48.424Z"),
"rating": "medium"
}
I can find manufacturer and rating counts but this fails:
db.mobile.aggregate([
{
$group: { _id: "$manufacturer", count: { $sum: 1 } }
}, {
$group: { _id: "$rating", count: { $sum: 1 } }
}
])
Output:-
{
"_id" : null,
"count" : 2.0
}
Expected Output something like:-
{
"_id":"Motorola",
"count" : 2.0
}
{
"_id":"Asus",
"count" : 2.0
}
{
"_id":"high",
"count" : 2.0
}
{
"_id":"none",
"count" : 1.0
}
{
"_id":"medium",
"count" : 1.0
}
I believe you are after an aggregation operation that groups the documents by the manufacturer and rating keys, then do a further group on the manufacturer while aggregating the ratings per manufacturer, something like the following pipeline:
db.mobile.aggregate([
{
"$group": {
"_id": {
"manufacturer": "$manufacturer",
"rating": "$rating"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.manufacturer",
"total": { "$sum": 1 },
"counts": {
"$push": {
"rating": "$_id.rating",
"count": "$count"
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "none",
"count" : 1
}
]
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"counts" : [
{
"rating" : "high",
"count" : 1
},
{
"rating" : "medium",
"count" : 1
}
]
}
or if you are after a more "flat" or "denormalised" result, run this aggregate operation:
db.mobile.aggregate([
{
"$group": {
"_id": "$manufacturer",
"total": { "$sum": 1 },
"high_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "high" ] }, 1, 0 ]
}
},
"medium_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "medium" ] }, 1, 0 ]
}
},
"low_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "low" ] }, 1, 0 ]
}
},
"none_ratings": {
"$sum": {
"$cond": [ { "$eq": [ "$rating", "none" ] }, 1, 0 ]
}
}
}
}
])
Sample Output
/* 1 */
{
"_id" : "Motorola",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 0,
"low_ratings" : 0,
"none_ratings" : 1
}
/* 2 */
{
"_id" : "Asus",
"total" : 2,
"high_ratings" : 1,
"medium_ratings" : 1,
"low_ratings" : 0,
"none_ratings" : 0
}

MongoDb - Query collection by aggregating the product of a sub-set of values

Let's say I have a collection:
[
{
_id: product_a,
values: [
{ id: 1, value: 0 },
{ id: 2, value: 1 },
{ id: 3, value: 2 },
]
},
{
_id: product_b,
values: [
{ id: 1, value: 1 },
{ id: 2, value: 2 },
{ id: 3, value: 2 },
]
},
// etc ...
];
Is there any way to query this collection by aggregating the product of a sub-set of "values"?
If I query { values: [ 1, 3 ] }, I would get something like:
[
{
_id: product_a,
result: 0 // since 0 * 2 = 0
},
{
_id: product_b,
result: 2 // since 1 * 2 = 2
},
// etc ...
];
Here is how you can do it with the schema the way you have it using 2.2 aggregation framework. Note that this would be a lot simpler if the id/value pairs were stored with id as key.
Your aggregation pipeline:
[
{
"$unwind" : "$values"
},
{
"$match" : {
"values.id" : {
"$in" : [
1,
3
]
}
}
},
{
"$project" : {
"_id" : 1,
"val1" : {
"$cond" : [
{
"$eq" : [
"$values.id",
1
]
},
"$values.value",
-1
]
},
"val3" : {
"$cond" : [
{
"$eq" : [
"$values.id",
3
]
},
"$values.value",
-1
]
}
}
},
{
"$group" : {
"_id" : "$_id",
"val1" : {
"$max" : "$val1"
},
"val3" : {
"$max" : "$val3"
}
}
},
{
"$project" : {
"_id" : 1,
"result" : {
"$multiply" : [
"$val1",
"$val3"
]
}
}
}
]
Note that the last step would do the job if the structure of your document was:
{_id: "product_x", values: [ {id1: value}, {id2: value} etc. ]}
Now run the query from the shell with:
> db.collection.aggregate(pipeline)
Or from your code via db.runCommand({"aggregate":"collection","pipeline":[...]})