MongoDB: Total count of distinct values in array field - mongodb

I've got simple documents like
{
...,
things: ["a", "b", "c"]
},
{
...,
things: ["b", "d", "e"]
},
...
and I want a total count of distinct things, in this case: ["a","b","c","d","e"], the result should be 5.
How to do this using an aggregate query?
My current attempt is
[
{
"$group": {
"_id": "things",
"things": { "$push": "$things" }
}
},
{
"$addFields": {
"allThings": {
"$reduce": {
"input": "$things",
"initialValue": [],
"in": {
"$concatArrays": ["$$value", "$$this"]
}
}
}
}
}
]
which yields all fields in a single array, but I have no idea how to count distinct values in there now.
Happy for any help!

This is literally the first MongoDB query I ever wrote. It works, but happy to hear about improvements:
db.collection.aggregate([
{
$group: {
_id: null,
things: { $push: "$things"}
}
},
{
$project: {
_id: 0,
things: {
$reduce: {
input: "$things",
initialValue: [],
in: {
$setUnion: [ "$$this", "$$value"]
}
}
}
}
},
{
$project: {
item: 1,
numberOfUniqueThings: { $size: "$things" }
}
}
])

Related

MongoDB get $sum of fields created via $addFields

I'm trying to get sum of fields that were created with $addFields operator.
I'd like to get sum of fields for the first month among all documents.
Please see link to the MongoDB sandbox.
Data:
[
{
"key": 1,
"account": "a",
"cases_total_date": {
"20220101": 1,
"20220102": 2,
"20220103": 3,
"20220501": 4,
"20221201": 5,
"20221202": 6,
}
},
{
"key": 2,
"account": "b",
"cases_total_date": {
"20220101": 11,
"20220102": 12,
"20220103": 13,
"20220501": 14,
"20221201": 15,
"20221202": 16,
}
}
]
Query I've tried:
db.collection.aggregate([
{
"$match": {
"account": {
"$in": [
"a",
"b"
]
}
}
},
{
"$addFields": {
"cases_total_months|202201": {
"$sum": [
"$cases_total_date.20220101",
"$cases_total_date.20220102",
"$cases_total_date.20220103"
]
}
}
},
{
"$group": {
"_id": "",
"cases_total_months|202201_all": {
"$sum": "$cases_total_months|20220101"
}
}
}
])
The response I've got vs expected:
[
{
"_id": "",
"cases_total_months|202201_all": 0 # EXPECTED sum of fields from 2 docs 6+36=42
}
]
Would appreciate any feedback. Thank you!
Using dynamic values as field names is considered an anti-pattern and introduces unnecessary complexity to the queries. With a proper schema, you can do something simple as this:
db.collection.aggregate([
{
"$set": {
"cases_total_months|202201_all": {
"$filter": {
"input": "$cases_total_date",
"as": "ctd",
"cond": {
$and: [
{
$eq: [
2022,
{
$year: "$$ctd.date"
}
]
},
{
$eq: [
1,
{
$month: "$$ctd.date"
}
]
}
]
}
}
}
}
},
{
$group: {
_id: null,
"cases_total_months|202201_all": {
$sum: {
$sum: "$cases_total_months|202201_all.value"
}
}
}
}
])
Mongo Playground
For your current schema, you can still rely on $objectToArray and iterate through the resulting k-v tuples to get what you need.
db.collection.aggregate([
{
$set: {
cases_total_date: {
"$objectToArray": "$cases_total_date"
}
}
},
{
$set: {
"cases_total_months|202201_all": {
"$filter": {
"input": "$cases_total_date",
"as": "ctd",
"cond": {
$eq: [
0,
{
"$indexOfCP": [
"$$ctd.k",
"202201"
]
}
]
}
}
}
}
},
{
$set: {
"cases_total_months|202201_all": {
$sum: "$cases_total_months|202201_all.v"
}
}
},
{
$group: {
_id: null,
"cases_total_months|202201_all": {
$sum: "$cases_total_months|202201_all"
}
}
}
])
Mongo Playground

Sum all the fields in documents in MongoDB

I have documents with a number of tokens in text, one document for a text. I need to calculate a total token count across documents. I don't know the exact number and keys of these fields, so I cannot use $sum for each field individually.
For example, I have two documents:
{
"count": {
"a": 1,
"b": 5,
"c": 7
}
}
{
"count": {
"a": 4,
"c": 2,
"d": 6
}
}
I want to aggregate them and get
{
"count": {
"a": 5,
"b": 5,
"c": 9,
"d": 6
}
}
As I understand, it is not possible, but I just wanted to make sure
Maybe something like this:
db.collection.aggregate([
{
$set: {
count: {
"$objectToArray": "$count"
}
}
},
{
$unwind: "$count"
},
{
$group: {
_id: "$count.k",
v: {
$sum: "$count.v"
}
}
},
{
$project: {
v: 1,
k: "$_id",
_id: 0
}
},
{
$group: {
_id: "total",
count: {
$push: {
k: "$k",
v: "$v"
}
}
}
},
{
$project: {
_id: 0,
count: {
"$arrayToObject": "$count"
}
}
}
])
Explained:
Convert the objects to array(to be easy to manipulate unknown number of fields )
Unwind the array
group by the keys , sum the values.
project the necessary key / values.
group all in single array
Project to convert the array to object as per the expectations
Playground1
Version 2 ( Without unwind ):
db.collection.aggregate([
{
$set: {
count: {
"$objectToArray": "$count"
}
}
},
{
"$group": {
"_id": "test",
count: {
"$push": "$count"
}
}
},
{
$set: {
count: {
$reduce: {
input: {
$concatArrays: "$count"
},
initialValue: [],
in: {
$setUnion: [
"$$this",
"$$value"
]
}
}
}
}
},
{
"$addFields": {
"count": {
"$arrayToObject": {
"$map": {
"input": "$count",
"as": "m",
"in": {
"k": "$$m.k",
"v": {
"$sum": {
"$map": {
"input": "$count",
"as": "d",
"in": {
"$cond": [
{
"$eq": [
"$$d.k",
"$$m.k"
]
},
"$$d.v",
0
]
}
}
}
}
}
}
}
}
}
}
])
Explained:
Convert object to array to be easier later
Group in single document with push
Concat the arrays to single array
Using two nested $map's group & $sum inside the k/v array and convert back to object.
Playground 2

Mongoose Summing Up subdocument array elements having same alias

I have a document like this:
_id:'someId',
sales:
[
{
_id:'111',
alias:'xxx',
amount:500,
name: Apple, //items with same alias always have same name and quantity
quantity:2
},
{
_id:'222',
alias:'abc',
amount:100,
name: Orange,
quantity:14
},
{
_id:'333',
alias:'xxx',
amount:300,
name: Apple, //items with same alias always have same name and quantity
quantity:2
}
]
The alias field is here to 'group' items/documents whenever they appear to have same alias i.e to be 'embeded' as one with the amount summed up.
I need to display some sort of a report in such a way that those elements which have same alias they should be displayed as ONE and the others which doesn't share same alias to remain as they are.
Example, For the sample document above, I need an output like this
[
{
alias:'xxx',
amount:800
},
{
alias:'abc',
amount:100
}
]
WHAT I HAVE TRIED
MyShop.aggregate([
{$group:{
_id: "$_id",
sales:{$last :"$sales"}
},
{$project:{
"sales.amount":1
}}
}
])
This just displays as a 'list' regardless of the alias. How do I achieve summing up amount based on the alias?
You can achieve this using $group
db.collection.aggregate([
{
$unwind: "$sales"
},
{
$group: {
_id: {
_id: "$_id",
alias: "$sales.alias"
},
sales: {
$first: "$sales"
},
_idsInvolved: {
$push: "$sales._id"
},
amount: {
$sum: "$sales.amount"
}
}
},
{
$group: {
_id: "$_id._id",
sales: {
$push: {
$mergeObjects: [
"$sales",
{
alias: "$_id.alias",
amount: "$amount",
_idsInvolved: "$_idsInvolved"
}
]
}
}
}
}
])
Mongo Playground
You can use below aggregation
db.collection.aggregate([
{
"$addFields": {
"sales": {
"$map": {
"input": {
"$setUnion": [
"$sales.alias"
]
},
"as": "m",
"in": {
"$let": {
"vars": {
"a": {
"$filter": {
"input": "$sales",
"as": "d",
"cond": {
"$eq": [
"$$d.alias",
"$$m"
]
}
}
}
},
"in": {
"amount": {
"$sum": "$$a.amount"
},
"alias": "$$m",
"_idsInvolved": "$$a._id"
}
}
}
}
}
}
}
])
MongoPlayground

$group inner array values without $unwind

I want to group objects in the array by same value for specified field and produce a count.
I have the following mongodb document (non-relevant fields are not present).
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
...otherFields
}
The following is what I want.
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
newArrayField: [
{ fieldA: value1, count: 1 },
{ fieldA: value2, count: 2 },
],
...otherFields
}
Here I grouped embedded documents by fieldA.
I know how to do it with unwind and 2 group stages the following way. (irrelevant stages are ommited)
Concrete example
// document structure
{
_id: ObjectId(...),
type: "test",
results: [
{ choice: "a" },
{ choice: "b" },
{ choice: "a" }
]
}
db.test.aggregate([
{ $match: {} },
{
$unwind: {
path: "$results",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
type: "$type",
choice: "$results.choice",
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
type: "$_id.type",
result: "$results.choice",
},
groupedResults: { $push: { count: "$count", choice: "$_id.choice" } }
}
}
])
You can use below aggregation
db.test.aggregate([
{ "$addFields": {
"newArrayField": {
"$map": {
"input": { "$setUnion": ["$arrayField.fieldA"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$size": {
"$filter": {
"input": "$arrayField",
"as": "d",
"cond": { "$eq": ["$$d.fieldA", "$$m"] }
}
}
}
}
}
}
}}
])
The below adds a new array field, which is generated by:
Using $setUnion to get unique set of array items, with inner $map to
extract only the choice field
Using $map on the unique set of items,
with inner $reduce on the original array, to sum all items where
choice matches
Pipeline:
db.test.aggregate([{
$addFields: {
newArrayField: {
$map: {
input: {
$setUnion: [{
$map: {
input: "$results",
in: { choice: "$$this.choice" }
}
}
]
},
as: "i",
in: {
choice: '$$i.choice',
count: {
$reduce: {
input: "$results",
initialValue: 0,
in: {
$sum: ["$$value", { $cond: [ { $eq: [ "$$this.choice", "$$i.choice" ] }, 1, 0 ] }]
}
}
}
}
}
}
}
}])
The $reduce will iterate over the results array n times, where n is the number of unique values of choice, so the performance will depend on that.

MongoDB > extract collection from nested array

I've been trying every method I found on SO with no success. Trying
to accomplish a seemingly simple task (very easy with json/lodash for example) in MongoDB..
I have a collection:
db.users >
[
{
_id: 'userid',
profile: {
username: 'abc',
tests: [
{
_id: 'testid',
meta: {
category: 'math',
date: '9/2/2017',
...
}
questions: [
{
type: 'add',
correct: true,
},
{
type: 'subtract',
correct: true,
},
{
type: 'add',
correct: false,
},
{
type: 'multiply',
correct: false,
},
]
},
...
]
}
},
...
]
I want to end up with an array grouped by question type:
[
{
type: 'add',
correct: 5,
wrong: 3,
},
{
type: 'subtract',
correct: 4,
wrong: 9
}
...
]
I've tried different variations of aggregate, last one is:
db.users.aggregate([
{ $match: { 'profile.tests.meta.category': 'math' }},
{
$project: {
tests: {
$filter: {
input: "$profile.tests",
as: "test",
cond: { $eq: ['$$test.meta.category', 'math'] }
}
}
}
},
{
$project: {
question: "$tests.questions"
}
},
{ $unwind: "$questions"},
])
Also tried adding $group at the end of the pipeline:
{
$group:
{
_id: '$questions.type',
res: {
$addToSet: { correct: {$eq:['$questions.chosenAnswer', '$questions.answers.correct'] }
}
}
}
No variation gave me what I'm looking for, I'm sure I'm missing a core concept, I've looked over the documentation and couldn't figure it out.. what I'm basically looking for is a flatMap to extract away all the questions of all users and group them by type.
If anyone can lead me in the right direction, I'll greatly appreciate it :) thx. (Also, I'm using Meteor, so any query has to work in Meteor mongo)
You can try below aggregation in 3.4.
$filter to filter math categories with $map to project questions array in each matching category followed by $reduce and $concatArrays to get all questions into single array for all matching categories.
$unwind questions array and $group by type and $sum to compute correct and wrong count.
db.users.aggregate([
{
"$match": {
"profile.tests.meta.category": "math"
}
},
{
"$project": {
"questions": {
"$reduce": {
"input": {
"$map": {
"input": {
"$filter": {
"input": "$profile.tests",
"as": "testf",
"cond": {
"$eq": [
"$$testf.meta.category",
"math"
]
}
}
},
"as": "testm",
"in": "$$testm.questions"
}
},
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$unwind": "$questions"
},
{
"$group": {
"_id": "$questions.type",
"correct": {
"$sum": {
"$cond": [
{
"$eq": [
"$questions.correct",
true
]
},
1,
0
]
}
},
"wrong": {
"$sum": {
"$cond": [
{
"$eq": [
"$questions.correct",
false
]
},
1,
0
]
}
}
}
}
])