2-level group by for objects in an array - mongodb

Good day SO Community,
I would like to ask for your help in creating the correct aggregation pipeline for sample data:
[
{
"group": "A",
"subgroup": "A1",
"name": "Abby"
},
{
"group": "A",
"subgroup": "A2",
"name": "Andy"
},
{
"group": "A",
"subgroup": "A2",
"name": "Amber"
},
{
"group": "B",
"subgroup": "B1",
"name": "Bart"
}
]
I want to group by group first, then for each group, group by subgroup.
The names will also go to their respective subgroup and the count is showing the actual count.
My expected output is as follows:
[
{
"_id": "B",
"count": 1,
"subgroup": [
{
"_id": "B1",
"count": 1,
"names": ["Bart"]
}
]
},
{
"_id": "A",
"count": 3,
"subgroup": [
{
"_id": "A1",
"count": 1,
"names":[ "Abby"]
},
{
"_id": "A2",
"count": 2,
"names": ["Amber", "Andy"]
}
]
}
]
I have tried this pipeline but it's not grouping the subgroups.
{
"$group": {
"_id": "$group",
"subgroup": {
"$addToSet": {
"_id": "$subgroup",
"name": "$name",
count: {
$sum: 1
}
}
},
count: {
$sum: 1
}
}
}
The aggregation pipeline and actual output can be seen in the playground:
https://mongoplayground.net/p/MO1fCf21Rez
Thank you!

$group - Group by group and subgroup. Perform count and add name into names array.
$group - Group by group. Perform total count and add the object for subgroup into subgroup array.
db.students.aggregate([
{
$group: {
_id: {
group: "$group",
subgroup: "$subgroup"
},
names: {
$push: "$name"
},
count: {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.group",
"subgroup": {
$addToSet: {
"_id": "$_id.subgroup",
"names": "$names",
count: "$count"
}
},
count: {
$sum: "$count"
}
}
}
])
Demo # Mongo Playground

Related

MongoDB - For each group select the records with the max value

In MongoDB, I'm trying to filter a collection down to only those documents that contain the most recent date by their respective group.
In traditional SQL I'd do something like:
Select *
From table a
Join (Select my_group, max(date) as max_date
From table group by my_group) b
ON a.my_group = b.my_group AND
a.date = b.max_date
With the following sample collection:
[
{
"_id": "123",
"item1": "group 1",
"item2": "abc",
"item3": "abc",
"date": "2022-01-01"
},
{
"_id": "234",
"item1": "group 1",
"item2": "abc",
"item3": "abc",
"date": "2022-01-02"
},
{
"_id": "345",
"item1": "group 1",
"item2": "abc",
"item3": "abc",
"date": "2022-01-02"
},
{
"_id": "789",
"item1": "group 2",
"item2": "abc",
"item3": "abc",
"date": "2022-01-01"
},
{
"_id": "678",
"item1": "group 2",
"item2": "abc",
"item3": "abc",
"date": "2022-01-02"
},
{
"_id": "456",
"item1": "group 2",
"item2": "abc",
"item3": "abc",
"date": "2022-01-02"
}
]
The expected output is:
[
{
"_id": "234",
"date": "2022-01-02",
"item1": "group 1",
"item2": "abc",
"item3": "abc"
},
{
"_id": "345",
"date": "2022-01-02",
"item1": "group 1",
"item2": "abc",
"item3": "abc"
},
{
"_id": "678",
"date": "2022-01-02",
"item1": "group 2",
"item2": "abc",
"item3": "abc"
},
{
"_id": "456",
"date": "2022-01-02",
"item1": "group 2",
"item2": "abc",
"item3": "abc"
}
]
My current best attempt is:
db.collection.aggregate([
{
$group: {
"_id": "$item1",
"max_date": {
$max: "$date"
},
"records": {
$push: "$$ROOT"
}
}
},
{
"$project": {
items: {
"$filter": {
"input": "$records",
"as": "records",
"cond": {
$eq: [
"$$records.date",
"$max_date"
]
}
}
}
}
},
{
$replaceRoot: {
newRoot: {
results: "$items"
}
}
}
])
Unfortunately, this returns the results partitioned by group. I've tried a few alternatives suggested by other posts & get a similar problem, eg:
How to group and select document corresponding to max within each group in MongoDB?
MongoDB get rows where max value grouped
Get all rows, groupped and with max value
Here's a playground example with the query & sample data.
You're close to the answer.
For the last 2 stages:
$unwind - Deconstruct the items array field to multiple documents.
$replaceWith - Replace the output document with items document.
db.collection.aggregate([
{
$group: {
"_id": "$item1",
"max_date": {
$max: "$date"
},
"records": {
$push: "$$ROOT"
}
}
},
{
"$project": {
items: {
"$filter": {
"input": "$records",
"as": "records",
"cond": {
$eq: [
"$$records.date",
"$max_date"
]
}
}
}
}
},
{
$unwind: "$items"
},
{
$replaceWith: "$items"
}
])
Sample Mongo Playground
Bonus
Although the query above is better, also would like to share the MongoDB query that is similar to SQL implementation.
$group - Group by item1 and get the max value of date.
$lookup - Self join the collection with item1 and date. And returns items array field.
$match - Filter the document with items not an empty array.
$unwind - Deconstruct the items array into multiple documents.
$replaceWith - Replace the output document with items document.
db.collection.aggregate([
{
$group: {
"_id": "$item1",
"max_date": {
$max: "$date"
}
}
},
{
$lookup: {
from: "collection",
let: {
item1: "$_id",
max_date: "$max_date"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$item1",
"$$item1"
]
},
{
$eq: [
"$date",
"$$max_date"
]
}
]
}
}
}
],
as: "items"
}
},
{
$match: {
items: {
$ne: []
}
}
},
{
$unwind: "$items"
},
{
$replaceWith: "$items"
}
])
Sample Mongo Playground (Bonus)

Adding Group counters and Items counter in MongoDB?

I have this Json :
[
{
"key": "guitar",
"name": "john"
},
{
"key": "guitar",
"name": "paul"
},
{
"key": "guitar",
"name": "george"
},
{
"key": "drums",
"name": "ringo"
}
]
Let's group by key and add items to each group :
db.collection.aggregate([
{
$sort: {
name: -1
}
},
{
$group: {
_id: "$key",
"projects": {
"$push": "$$ROOT"
},
count: {
$sum: 1
}
}
},
{
$sort: {
"_id": 1
}
}
])
Result:
[
{
"_id": "drums",
"count": 1,
"projects": [
{
"_id": ObjectId("5a934e000102030405000003"),
"key": "drums",
"name": "ringo"
}
]
},
{
"_id": "guitar",
"count": 3,
"projects": [
{
"_id": ObjectId("5a934e000102030405000001"),
"key": "guitar",
"name": "paul"
},
{
"_id": ObjectId("5a934e000102030405000000"),
"key": "guitar",
"name": "john"
},
{
"_id": ObjectId("5a934e000102030405000002"),
"key": "guitar",
"name": "george"
}
]
}
]
Question:
How can I add counter such as this :
[
{
"_id": "drums",
"Counter":0, // <--------------------------------
"count": 1,
"projects": [
{
"_id": ObjectId("5a934e000102030405000003"),
"key": "drums",
"name": "ringo",
"InnerCounter":0 // <--------------------------------
}
]
},
{
"_id": "guitar",
"Counter":1, // <--------------------------------
"count": 3,
"projects": [
{
"_id": ObjectId("5a934e000102030405000001"),
"key": "guitar",
"name": "paul",
"InnerCounter":0 // <--------------------------------
},
{
"_id": ObjectId("5a934e000102030405000000"),
"key": "guitar",
"name": "john",
"InnerCounter":1 // <--------------------------------
},
{
"_id": ObjectId("5a934e000102030405000002"),
"key": "guitar",
"name": "george",
"InnerCounter":2 // <--------------------------------
}
]
}
]
Live Demo
I don't think is there any other option to do this, You can try using $unwind to create index field in array using includeArrayIndex property,
db.collection.aggregate([
{ $sort: { name: -1 } },
{
$group: {
_id: "$key",
projects: { $push: "$$ROOT" }
}
},
// $unwind "projects" array and store index in field "projects.InnerCounter"
{
$unwind: {
path: "$projects",
includeArrayIndex: "projects.InnerCounter"
}
},
// again group by _id and reconstruct "projects" array
{
$group: {
_id: "$_id",
projects: { $push: "$projects" },
count: { $sum: 1 }
}
},
// sort by "_id"
{ $sort: { "_id": 1 } },
// group by $$ROOT and construct root array
{
$group: {
_id: null,
root: { $push: "$$ROOT" }
}
},
// deconstruct root array and add field "Counter" ofr index counter
{
$unwind: {
path: "$root",
includeArrayIndex: "root.Counter"
}
},
// replace to root
{ $replaceRoot: { newRoot: "$root" } }
])
Playground
Another option for projects array without $unwind,
Playground

Group by date in mongoDB while counting other fields

I've been using MongoDB for just a week and I have problems achieving this result: I want to group my documents by date while also keeping track of the number of entries that have a certain field set to a certain value.
So, my documents look like this:
{
"_id" : ObjectId("5f3f79fc266a891167ca8f65"),
"recipe" : "A",
"timestamp" : ISODate("2020-08-22T09:38:36.306Z")
}
where recipe is either "A", "B" or "C". Right now I'm grouping the documents by date using this pymongo query:
mongo.db.aggregate(
# Pipeline
[
# Stage 1
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
"progressivo": 1,
"temperatura_fusione": 1
}
},
# Stage 2
{
"$group": {
"_id": {
"createdAt": "$createdAt"
},
"products": {
"$sum": 1
}
}
},
# Stage 3
{
"$project": {
"label": "$_id.createdAt",
"value": "$products",
"_id": 0
}
}])
Which gives me results like this:
[{"label": "2020-08-22", "value": 1}, {"label": "2020-08-15", "value": 2}, {"label": "2020-08-11", "value": 1}, {"label": "2020-08-21", "value": 5}]
What I'd like to have is also the counting of how many times each recipe appears on every date. So, if for example on August 21 I have 2 entries with the "A" recipe, 3 with the "B" recipe and 0 with the "C" recipe, the desired output would be
{"label": "2020-08-21", "value": 5, "A": 2, "B":3, "C":0}
Do you have any tips?
Thank you!
You can do like following, what have you done is excellent. After that,
In second grouping, We just get total value and value of each recipe.
$map is used to go through/modify each objects
$arrayToObject is used to covert the array what we have done via map (key : value pair) to object
$ifNull is used for, sometimes your data might not have "A" or "B" or "C". But you need the value should be 0 if there is no name as expected output.
Here is the code
[
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
recipe: 1,
"progressivo": 1,
"temperatura_fusione": 1
}
},
{
"$group": {
"_id": {
"createdAt": "$createdAt",
"recipeName": "$recipe",
},
"products": {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.createdAt",
value: {
$sum: "$products"
},
recipes: {
$push: {
name: "$_id.recipeName",
val: "$products"
}
}
}
},
{
$project: {
"content": {
"$arrayToObject": {
"$map": {
"input": "$recipes",
"as": "el",
"in": {
"k": "$$el.name",
"v": "$$el.val"
}
}
}
},
value: 1
}
},
{
$project: {
_id: 1,
value: 1,
A: {
$ifNull: [
"$content.A",
0
]
},
B: {
$ifNull: [
"$content.B",
0
]
},
C: {
$ifNull: [
"$content.C",
0
]
}
}
}
]
Working Mongo playground

how to create an array on the output of a response using aggregate in Mongodb

I have in my collection a list of objects with this structure:
[
{
"country": "colombia",
"city":"medellin",
"calification": [
{
"_id": 1,
"stars": 5
},
{
"_id": 2,
"stars": 3
}
]
},
{
"country": "colombia",
"city":"manizales",
"calification": [
{
"_id": 1,
"stars": 5
},
{
"_id": 2,
"stars": 5
}
]
},
{
"country": "argentina",
"city":"buenos aires",
"calification": [
{
"_id": 1,
"stars": 5
},
]
},
{
"country": "perĂº",
"city":"cusco",
"calification": [
{
"_id": 3,
"stars": 3
},
]
}
]
I am trying to make a filter so that the output is an amount of arrays for each country. this is the example of the output i want.
avg would be result sum 'stars'/ calification.length
{
"colombia": [
{
"city": "medellin",
"avg": 4,
"calification": [
{
"_id": 1,
"stars": 5
},
{
"_id": 2,
"stars": 3
}
]
},
{
"city": "manizales",
"avg": 5,
"calification": [
{
"_id": 1,
"stars": 5
},
{
"_id": 2,
"stars": 3
}
]
}
],
"argentina": {
"city": "buenos aires",
"avg": 5,
"calification": [
{
"_id": 1,
"stars": 5
}
]
},
"peru": {
"city": "cusco",
"avg": 4,
"calification": [
{
"_id": 1,
"stars": 4
}
]
}
}
I am trying to do this:
Alcalde.aggregate([
{
$addFields: {
colombia: {
"$push": {
"$cond": [{ $eq: ["$country", "'Colombia'"] }, true, null]
}
}
}
},
{
$project: { colombia: "$colombia" }
}
]
how can i do it
We can make it more elegant.
MongoDB has $avg operator, let's use it. Also, we can use $group operator to group cities for the same country.
At the end, applying $replaceRoot + $arrayToObject** we transform into desired result.
** it's because we cannot use such expression: {"$country":"$city"}
$replaceRoot $arrayToObject
data : { { [ {
"key" : "val", --> "key" : "val", {k:"key", v: "val"}, --> "key" : "val",
"key2" : "val2" "key2" : "val2" {k:"key2", v: "val2"} "key2" : "val2"
} } ] }
Try this one:
Alcalde.aggregate([
{
$group: {
_id: "$country",
city: {
$push: {
"city": "$city",
"avg": { $avg: "$calification.stars"},
"calification": "$calification"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: [ [{ "k": "$_id", "v": "$city"}] ]
}
}
}
])
MongoPlayground
EDIT: Generic way to populate city inner object
$$ROOT is variable which stores root document
$mergeObjects adds / override fields to final object
Alcalde.aggregate([
{
$group: {
_id: "$country",
city: {
$push: {
$mergeObjects: [
"$$ROOT",
{
"avg": { "$avg": "$calification.stars" }
}
]
}
}
}
},
{
$project: {
"city.country": 0
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: [
[ { "k": "$_id", "v": "$city" } ]
]
}
}
}
])
MongoPlayground

MongoDB multiple counts, single document, arrays

I have been searching on stackoverflow and cannot find exactly what I am looking for and hope someone can help. I want to submit a single query, get multiple counts back, for a single document, based on array of that document.
My data:
db.myCollection.InsertOne({
"_id": "1",
"age": 30,
"items": [
{
"id": "1",
"isSuccessful": true,
"name": null
},{
"id": "2",
"isSuccessful": true,
"name": null
},{
"id": "3",
"isSuccessful": true,
"name": "Bob"
},{
"id": "4",
"isSuccessful": null,
"name": "Todd"
}
]
});
db.myCollection.InsertOne({
"_id": "2",
"age": 22,
"items": [
{
"id": "6",
"isSuccessful": true,
"name": "Jeff"
}
]
});
What I need back is the document and the counts associated to the items array for said document. In this example where the document _id = "1":
{
"_id": "1",
"age": 30,
{
"totalIsSuccessful" : 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1,
"totalNameNull": 2
}
}
I have found that I can get this in 4 queries using something like this below, but I would really like it to be one query.
db.test1.aggregate([
{ $match : { _id : "1" } },
{ "$project": {
"total": {
"$size": {
"$filter": {
"input": "$items",
"cond": { "$eq": [ "$$this.isSuccessful", true ] }
}
}
}
}}
])
Thanks in advance.
I am assuming your expected result is invalid since you have an object literal in the middle of another object and also you have totalIsSuccessful for id:1 as 2 where it seems they should be 3. With that said ...
you can get similar output via $unwind and then grouping with $sum and $cond:
db.collection.aggregate([
{ $match: { _id: "1" } },
{ $unwind: "$items" },
{ $group: {
_id: "_id",
age: { $first: "$age" },
totalIsSuccessful: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalNotIsSuccessful: { $sum: { $cond: [{ "$ne": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalSuccessfulNull: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", null ] }, 1, 0 ] } },
totalNameNull: { $sum: { $cond: [ { "$eq": [ "$items.name", null ]}, 1, 0] } } }
}
])
The output would be this:
[
{
"_id": "_id",
"age": 30,
"totalIsSuccessful": 3,
"totalNameNull": 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1
}
]
You can see it working here