Mongodb group by values and count the number of occurence - mongodb

I am trying to count how many times does a particular value occur in a collection.
{
_id:1,
field1: value,
field2: A,
}
{
_id:2,
field1: value,
field2: A,
}
{
_id:3,
field1: value,
field2: C,
}
{
_id:4,
field1: value,
field2: B,
}
what I want is to count how many times A occurs, B occurs and C occurs and return the count.
The output I want
{
A: 2,
B: 1,
C: 1,
}

You can use $facet in an aggregate pipeline like this:
$facet create "three ways" where in each one filter the values by desired key (A, B or C).
Then in a $project stage you can get the $size of the matched values.
db.collection.aggregate([
{
"$facet": {
"first": [
{
"$match": {
"field2": "A"
}
}
],
"second": [
{
"$match": {
"field2": "B"
}
}
],
"third": [
{
"$match": {
"field2": "C"
}
}
]
}
},
{
"$project": {
"A": {
"$size": "$first"
},
"B": {
"$size": "$second"
},
"C": {
"$size": "$third"
}
}
}
])
Example here

This is typical use case for $group stage in Aggregation Pipeline. You can do it like this:
$group - to group all the documents by field2
$sum - to count the number of documents for each value of field2
db.collection.aggregate([
{
"$group": {
"_id": "$field2",
"count": {
"$sum": 1
}
}
}
])
Working example

Leverage the $arrayToObject operator and a final $replaceWith pipeline to get the desired result. You would need to run the following aggregate pipeline:
db.collection.aggregate([
{ $group: {
_id: { $toUpper: '$field2' },
count: { $sum: 1 }
} },
{ $group: {
_id: null,
counts: {
$push: { k: '$_id', v: '$count' }
}
} },
{ $replaceWith: { $arrayToObject: '$counts' } }
])
Mongo Playground

Related

Find the distinct values in array of field, count them and write to another collection as array of string

Is is possible to get the distinct values of field that is an array of strings and write the output of distinct to another collection. As shown below from src_coll to dst_coll.
src_coll
{"_id": ObjectId("61968a26c05149a23ad391f4"),"letters": ["aa", "ab", "ac", "ad", "aa", "af"] , "numbers":[11,12,13,14] }
{"_id": ObjectId("61968a26c05149a23ad391f5"),"letters": ["ab", "af", "ag", "ah", "ai", "aj"] , "numbers":[15,16,17,18] }
{"_id": ObjectId("61968a26c05149a23ad391f6"),"letters": ["ac", "ad", "ae", "af", "ag", "ah"] , "numbers":[16,17,18,19] }
{"_id": ObjectId("61968a26c05149a23ad391f7"),"letters": ["ae", "af", "ag", "ah", "ai", "aj"] , "numbers":[17,18,19,20] }
dst_coll
{"_id": ObjectId("61968a26c05149a23ad391f8"),"all_letters": ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] }
I have seen the answer using distinct:
db.src_coll.distinct('letters') and using aggregate (if collection is huge, because i was getting error Executor error during distinct command :: caused by :: distinct too big, 16mb cap). I used:
db.src_coll.aggregate([ { $group: { _id: "$letters" } }, { $count: "letters_count" }], { allowDiskUse: true })
I do no know how to write the output of distinct or aggregate as show in dst_coll.
My collection contains 522 documents, Total Size = 314 MB, but the field letters contains thousands of string values in array for each document.
I appreciate your time to reply.
Thanks
Method I
I am assuming you are trying to create a single document containing all the distinct values in letters field across all documents in src_col. You can create a collection based on aggregation output using either $out or $merge. But $out would replace your collection if it already exists.
The unwinding array here would run out of memory in which case you will have to use { allowDiskUse: true } option.
db.collection.aggregate([
{
$unwind: "$letters"
},
{
$group: {
_id: null,
all_letters: {
"$addToSet": "$letters"
}
}
},
{
$merge: {
into: "dst_coll"
}
}
])
Demo
Method II
Another way to do this without $unwind is to use $reduce function which is more efficient.
db.collection.aggregate([
{
$group: {
_id: null,
all_letters: {
"$addToSet": "$letters"
}
}
},
{
$project: {
"all_letters": {
$reduce: {
input: "$all_letters",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$merge: {
into: "dst_coll"
}
}
])
Demo
Method III
Since we are going to create a single document from a collection using group, for large collections it's likely to run into memory issues. A way to avoid this would be to break down grouping into multiple stages, so each stage would not have to keep in memory lot of documents.
db.collection.aggregate([
{
$unwind: "$letters"
},
{
$bucketAuto: {
groupBy: "$_id",
buckets: 10000, // adjust the bucket size so that it outputs multiples documents for a range of documents.
output: {
"all_letters": {
"$addToSet": "$letters"
}
}
}
},
{
$bucketAuto: {
groupBy: "$_id",
buckets: 1000,
output: {
"all_letters": {
"$addToSet": "$all_letters"
}
}
}
},
{
$project: {
"all_letters": {
$reduce: {
input: "$all_letters",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$group: {
_id: null,
all_letters: {
"$addToSet": "$all_letters"
}
}
},
{
$project: {
"all_letters": {
$reduce: {
input: "$all_letters",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$merge: {
into: "dst_coll"
}
}
])
Refer to $bucketAuto and Aggregation Pipeline Limits.
Demo
Here's my solution for it but I'm not sure if it's the optimal way.
Algorithm:
Unwind all the array
Group by letters which will give only unique results
Group them again to get a single result
Use the $out stage to write the result to another collection:
Aggregation pipeline:
db.collection.aggregate([
{
$project: {
letters: 1,
_id: 0
}
},
{
$unwind: "$letters"
},
{
$group: {
_id: "$letters"
}
},
{
$group: {
_id: null,
allLetters: {
"$addToSet": "$_id"
}
}
},
{
$out: "your-collection-name"
}
])
Kindly see the docs for $out stage yourself.
See the solution on mongodb playground: Query

How to compute frequency for multiple fields using a single pipeline in MongoDB?

Is it possible to calculate the frequency of multiple fields with a single query in MongoDB? I can do that with separate $group stages for each field. How can I optimize it and build one pipeline that can do the job for all items?
I have the following pipeline in MongoDB 4.5
{
$match: {
field1: { $in: ['value1', 'value2'] },
field2: { $in: ['v1', 'v2'] },
}
},
{
$group: {
_id: {
field1: '$field1',
field2: '$field2'
},
frequency: { $sum: 1.0 }
}
}
From this, I obtain data like the following:
{
"_id": {
"field1": "value1",
"field2": "v1"
},
"count": 7.0
},
{
"_id": {
"field1": "value1",
"field2": "v2"
},
"count": 3.0
},
{
"_id": {
"field1": "value2",
"field2": "v1"
},
"count": 4.0
}
The result that I am trying to get is:
{
"field1": [
"value1": 10.0,
"value2": 4.0
],
"field2": [
"v1": 11.0,
"v2": 3.0
]
}
convert your required fields into array key-value format using $objectToArray
$unwind to deconstruct the above converted array
$group by key and value and count sum
$group by key and construct the array of value and count
$group by null and construct the array of field and above array after converting from $arrayToObject
$replaceToRoot to replace above array after converting from array to object
db.collection.aggregate([
{
$match: {
field1: { $in: ["value1", "value2"] },
field2: { $in: ["v1", "v2"] }
}
},
{
$project: {
arr: {
$objectToArray: {
fields1: "$field1",
fields2: "$field2"
}
}
}
},
{ $unwind: "$arr" },
{
$group: {
_id: {
k: "$arr.k",
v: "$arr.v"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
arr: {
$push: {
k: "$_id.v",
v: "$count"
}
}
}
},
{
$group: {
_id: null,
arr: {
$push: {
k: "$_id",
v: { $arrayToObject: "$arr" }
}
}
}
},
{ $replaceRoot: { newRoot: { $arrayToObject: "$arr" } } }
])
Playground

All fields containing given subfield in MongoDB

In MongoDB I know I can check if a subfield exist with $exists and dot notation, like this:
...{field.subfield: {$exists: 1}}...
but this obviously work only if I know which field to look for my subfield in.
I have documents in this format:
{
field1: {
subfield1: asd,
subfield2: asd
},
field2: {
subfield2: asd,
subfield3: asd
},
field3: {
subfield1: asd,
subfield3: asd,
}
}
In this example, say that my subfield of interest is subfield2, I want to project (or match, or whatever, I'm looking for a general answer) only field1 and field2, excluding field3 (that doesn't contain subfield2).
My documents could contain tens or hundreds of fields, so manually checking one by one isn't viable.
Is this what you want?
db.collection.aggregate([
{
"$project": {
_id: 0
}
},
{
"$project": {
"root": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$root"
},
{
"$match": {
"root.v.subfield2": {
"$exists": true
}
}
},
{
"$group": {
"_id": null,
"root": {
"$addToSet": "$root"
}
}
},
{
"$project": {
"root": {
"$arrayToObject": "$root"
}
}
},
{
"$project": {
"field1": "$root.field1",
"field2": "$root.field2",
_id: 0
}
}
])
Try it here

How to sort multiple array independently in one mongo document using aggregation

After some aggregations I have only one document like this:
{
a:[3,5,8,1,...],
b:[5,2,6,5,...],
c:[2,6,3,1,...]
}
What is the best way to get:
{
a:[1,2,3,4,...],
b:[1,2,3,4,...],
c:[1,2,3,4,...]
}
I need it to be sorted to do other aggregations so I can't use find or update.
The closest working solution I've got is the following aggregation:
[
{
$facet: {
a: [
{ $unwind: '$a' },
{ $sort: { a: 1 } },
{
$group: {
_id: '$_id',
a: { $push: '$a' }
}
}
],
b: [
{ $unwind: '$b' },
{ $sort: { b: 1 } },
{
$group: {
_id: '$_id',
b: { $push: '$b' }
}
}
],
c: [
{ $unwind: '$c' },
{ $sort: { c: 1 } },
{
$group: {
_id: '$_id',
c: { $push: '$c' }
}
}
]
}
},
{
$set: {
a: { $arrayElemAt: ['$a.a', 0] },
b: { $arrayElemAt: ['$b.b', 0] },
c: { $arrayElemAt: ['$c.c', 0] }
}
}
]
In short, what this does is: $facet performs identical operations on each a, b, c field in parallel; each field is split with $unwind, sorted in ascending order and then grouped on itself.
Until this point we already have sorted fields but each field is now an object with appropriate name (for example, { _id: '', a: [...] }). So, to return to the array we had earlier, another stage is to simply extract only the needed field from each object. This is done with $set and $arrayElemAt (which takes first array field).
So, in the end we have each field sorted in ascending order while having the same amount of documents.
Note: this only works if you have a single result document. If you have multiple of them, then you will need to slightly change stages:
$facet stage remains the same
Updated $set (second) stage is changed to:
Use $unwind + updated $set stages for each separate field (in this case, we have a, b, c):
{ $unwind: '$a' },
{ $unwind: '$b' },
{ $unwind: '$c' },
{ $set: { a: '$a.a' } },
{ $set: { b: '$b.b' } },
{ $set: { c: '$c.c' } }
This might not be the prettiest approach but I could not find shorter version.
Starting at version 4.4, you can use $function and sort your arrays directly with javascript.
db.collection.aggregate([
{
$set: {
"a": {
$function: {
body: "function(a){a.sort((x, y)=>x-y);return a;}",
args: [
"$a"
],
lang: "js"
}
},
"b": {
$function: {
body: "function(b){b.sort((x, y)=>x-y);return b;}",
args: [
"$b"
],
lang: "js"
}
},
"c": {
$function: {
body: "function(c){c.sort((x, y)=>x-y);return c;}",
args: [
"$c"
],
lang: "js"
}
}
}
}
])
You can test it here.

Get count of records with field existing in MongoDB

I have a MongoDB collection with records in the following format:
[
{ "item1": { "a": 1 }, "item2": { "a": 2 } },
{ "item1": { "a": 3 }, "item3": { "a": 4 } },
{ "item1": { "a": 5 }, "item2": { "a": 6 } },
]
I want to get a count of records having the fields item1, item2, and item3 (They don't need to be dynamic. I have only a finite set of items). What I need is a count of records with field existing in the following fashion:
{ "item1": 3, "item2": 2, "item3": 1 }
For getting the count for item1, I do this:
db.collection.find({ "item1": { $exists: true }}).count()
Is there an easy way to aggregate the count of all three items in a single query?
You can use $objectToArray and $arrayToObject to count your keys dynamically:
db.collection.aggregate([
{
$project: { root: { $objectToArray: "$$ROOT" } }
},
{
$unwind: "$root"
},
{
$group: { _id: "$root.k", total: { $sum: 1 } }
},
{
$group: { _id: null, obj: { $push: { k: "$_id", v: "$total" } } }
},
{
$replaceRoot: { newRoot: { $arrayToObject: "$obj" } }
},
{
$project: { _id: 0 }
}
])
Mongo Playground