Group based on multiple fields in a single query - mongodb

I have the following document structure
[
{
"tag": "abc",
"created_at": 2020-02-05T14:20:52.907+00:00
"likes": 12,
"comments": 3
},
{
"tag": "abc",
"created_at": 2020-02-04T14:20:52.907+00:00
"likes": 10,
"comments": 1
}
{
"tag": "abc",
"created_at": 2020-01-04T14:21:52.907+00:00
"likes": 12,
"comments": 3
},
{
"tag": "abc",
"created_at": 2020-01-04T14:22:52.907+00:00
"likes": 2,
"comments": 1
}
]
First, I want to group my documents based on tag value and then on created_at field to get something like this.
[
{
"tag_name": "abc",
"day_wise": [
{
"dayMonthYear": "2020-01-04",
"comments": 4,
"likes": 14
},
{
"dayMonthYear": "2020-02-04",
"comments": 1,
"likes": 10
},
{
"dayMonthYear": "2020-02-05",
"comments": 3,
"likes": 12
},
],
"month_wise": [
{
"monthYear": "2020-04",
"comments": 5,
"likes": 24
},
{
"monthYear": "2020-05",
"comments": 3,
"likes": 12
},
],
}
]
There can be multiple tags so need to group them accordingly. Can anyone please suggest what aggregation query can be applied to achieve this result.

You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": "$tag",
"data": {
"$push": {
"comments": "$comments",
"likes": "$likes",
"monthWise": { "$dateToString": { "date": "$created_at", "format": "%m-%Y" } },
"dateWise": { "$dateToString": { "date": "$created_at", "format": "%d-%m-%Y" } }
}
}
}},
{ "$project": {
"dateWise": {
"$map": {
"input": { "$setUnion": ["$data.dateWise"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$filter": {
"input": "$data",
"as": "d",
"cond": { "$eq": ["$$d.dateWise", "$$m"] }
}
}
}
}
},
"monthWise": {
"$map": {
"input": { "$setUnion": ["$data.monthWise"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$filter": {
"input": "$data",
"as": "d",
"cond": { "$eq": ["$$d.monthWise", "$$m"] }
}
}
}
}
}
}},
{ "$project": {
"dateWise": {
"$map": {
"input": "$dateWise",
"in": {
"dateWise": "$$this.fieldA",
"likes": { "$sum": "$$this.count.likes" },
"comments": { "$sum": "$$this.count.comments" }
}
}
},
"monthWise": {
"$map": {
"input": "$monthWise",
"in": { "monthWise": "$$this.fieldA",
"likes": { "$sum": "$$this.count.likes" },
"comments": { "$sum": "$$this.count.comments" }
}
}
}
}}
])
MongoPlayground

Try this one:
db.col.aggregate([
{
$facet:
{
day_wise: [
{
$group: {
_id: {
tag: "$tag",
day: { $dateFromParts: { year: { $year: "$created_at" }, month: { $month: "$created_at" }, day: { $dayOfMonth: "$created_at" } } }
},
likes: { $sum: "$likes" },
comments: { $sum: "$comments" }
}
},
{
$project: {
_id: "$_id.tag",
likes: 1,
comments: 1,
dayMonthYear: { $dateToString: { date: "$_id.day", format: "%Y-%m-%d" } },
day: "$_id.day"
}
}
],
month_wise: [
{
$group: {
_id: {
tag: "$tag",
month: { $dateFromParts: { year: { $year: "$created_at" }, month: { $month: "$created_at" } } }
},
likes: { $sum: "$likes" },
comments: { $sum: "$comments" }
}
},
{
$project: {
_id: "$_id.tag",
likes: 1,
comments: 1,
monthYear: { $dateToString: { date: "$_id.month", format: "%Y-%m" } },
month: "$_id.month"
}
}
],
tags: [{ $group: { _id: "$tag" } }]
}
},
{ $unwind: "$tags" },
{
$project: {
_id: "$tags._id",
day_wise: {
$filter: {
input: "$day_wise",
as: "item",
cond: { $eq: ["$tags._id", "$$item._id"] }
// here you can add additional conditions. e.g.
// { $gt: ["$day", { $subtract: ["$$NOW", 1000 * 60 * 60 * 5] }] } }
}
},
month_wise: {
$filter: {
input: "$month_wise",
as: "item",
cond: { $eq: ["$tags._id", "$$item._id"] }
}
}
}
},
{
$project: {
_id: 1,
day_wise: { likes: 1, comments: 1, dayMonthYear: 1 },
month_wise: { likes: 1, comments: 1, monthYear: 1 },
}
}
])
Mongo playground

Related

Large data (over 1million rows) aggregate

I am currently using the following query but hitting a small issue - It's really slow.
Is there any way that mongooseJS can either:
a) split this into smaller queries and run it in segments
b) a better way to query this data.
My query
[
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{
$sort: { count: -1 }
}
])
in NodeJS I am querying it like I have M10 atlas so no issue running the query it works in Mongodb compass
router.get('/pastlocation', (req, res) =>{
const pastlocation = require('mongoose').model('pastlistenerslocation');
pastlocation.aggregate([
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{ $sort: { count: -1 } }
])).allowDiskUse(true).exec(function(err,result) {
if(err){console.log(err)}
res.json(result);
});
});
Data looks like this:
{
"UUID": "9B3640AC-3AA9-4313-94DB-9DFFEC7362E1",
"location": {
"coordinates": [115.8802288492358, -31.92553401927643],
"type": "Point"
},
"timestamp": {
"$date": "2020-11-04T13:20:58.224Z"
},
"__v": 0
}

MongoDb add $avg element of an array in an array by $map mapping

Problem is that I want to 'enter' array utwor and count average of dlugosc_utworu
How looks my code:
db.artysci.aggregate({
"$project": {
_id: 0,
nazwa: 1,
nazwisko: 1,
"numberOfSongs": {
"$sum": {
"$map": {
"input": "$album",
"in": { "$size": { $ifNull: ["$$this.utwor", []] } }
}
}
},
"avgSongTime":{
"$avg": {
"$map": {
"input": "utwor",
"in": { $ifNull: ["$$this.dlugosc_trwania", []] }
}
}
}
}
})
I want to make this avg of "dlugosc_trwania" who is located in utwor array in album array.
Grid:
db.artysci.insert({
imie: 'Nik',
nazwisko: 'Kershaw',
rok_debiutu: 1983,
kraj_pochodzenia: ['Wielka Brytania'],
gatunek: 'pop',
album: [{
tytul:"Human Racing",
rok_edycji:1990,
gatunek: 'trash metal',
typ_nosnika: 'CD',
utwor: [{
numer: 1,
tytul_utworu: 'Dancing Girls',
dlugosc_trwania: 3.46
},
{
numer: 2,
tytul_utworu: 'Wouldn’t It Be Good',
dlugosc_trwania: 4.32
},
{
numer: 3,
tytul_utworu: 'Drum Talk',
dlugosc_trwania: 3.10
},
{
numer: 4,
tytul_utworu: 'Bogart',
dlugosc_trwania: 4.38
}
]
}
})
Thanks to Faizul Hassan for your help and Yours, if you help me <3
Copy pasting the solution here from our conversation in another post..
Here is the soultion for your second question.
Lets see an example using unwind:
Without divide/second:
db.notifications.aggregate([
{ $unwind: "$album" },
{ $unwind: "$album.utwor" },
{
$group: {
_id: "$_id",
avgDuration: { $avg: "$album.utwor.dlugosc_trwania" }
}
},
]);
With divide/second:
db.notifications.aggregate([
{ $unwind: "$album" },
{ $unwind: "$album.utwor" },
{
$group: {
_id: "$_id",
avgDuration: { $avg: { $divide: ["$album.utwor.dlugosc_trwania", 60] } }
}
},
]);

How to group data by every hour

How do I get counts data grouped by every hour in 24 hours even if data is not present i.e. IF 0 will select 0
MonogDB 3.6
Input
[
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-03T20:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7a"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-04T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedae"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedad"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-06T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedab"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-06T10:45:36.208Z",
"type": "image"
}
]
Implementation
db.collection.aggregate({
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
},
},
{
$project: {
_id: 0,
hour: "$_id",
count: "$count"
}
},
{
$sort: {
hour: 1
}
})
Actual Output:
{
"count": 2,
"hour": "02"
},
{
"count": 1,
"hour": "20"
}
My expectation code show 24 hours event data is 0 or null and convert from example "02" as "02 AM" , "13" as "01 PM":
Expected Output
{
"count": 0,
"hour": "01" // 01 AM
},
{
"count": 2,
"hour": "02"
},
{
"count": 0,
"hour": "03"
},
{
"count": 0,
"hour": "04"
},
{
"count": 0,
"hour": "05"
},
{
"count": 1,
"hour": "20" // to 08 pm
}
Try this solution:
Explanation
We group by hour to count how many images are uploaded.
Then, we add extra field hour to create time interval (if you had v4.x, there is a better solution).
We flattern hour field (will create new documents) and split first 2 digits to match count and split last 2 digits to put AM / PM periods.
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
}
},
{
$addFields: {
hour: [
"0000",
"0101",
"0202",
"0303",
"0404",
"0505",
"0606",
"0707",
"0808",
"0909",
"1010",
"1111",
"1212",
"1301",
"1402",
"1503",
"1604",
"1705",
"1806",
"1907",
"2008",
"2109",
"2210",
"2311"
]
}
},
{
$unwind: "$hour"
},
{
$project: {
_id: 0,
hour: 1,
count: {
$cond: [
{
$eq: [
{
$substr: [
"$hour",
0,
2
]
},
"$_id"
]
},
"$count",
0
]
}
}
},
{
$group: {
_id: "$hour",
count: {
"$sum": "$count"
}
}
},
{
$sort: {
_id: 1
}
},
{
$project: {
_id: 0,
hour: {
$concat: [
{
$substr: [
"$_id",
2,
2
]
},
{
$cond: [
{
$gt: [
{
$substr: [
"$_id",
0,
2
]
},
"12"
]
},
" PM",
" AM"
]
}
]
},
count: "$count"
}
}
])
MongoPlayground
There's no "magic" solution, you'll have to hardcode it into your aggregation:
Heres an example using Mongo v3.2+ syntax with some $map and $filter magic:
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {"$in": [166]}
}
},
{
$group: {
_id: {$substr: ["$update_at", 11, 2]},
count: {"$sum": 1}
}
},
{
$group: {
_id: null,
hours: {$push: {hour: "$_id", count: "$count"}}
}
},
{
$addFields: {
hours: {
$map: {
input: {
$concatArrays: [
"$hours",
{
$map: {
input: {
$filter: {
input: ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"],
as: "missingHour",
cond: {
$not: {
$in: [
"$$missingHour",
{
$map: {
input: "$hours",
as: "hourObj",
in: "$$hourObj.hour"
}
}
]
}
}
}
},
as: "missingHour",
in: {hour: "$$missingHour", count: 0}
}
}
]
},
as: "hourObject",
in: {
count: "$$hourObject.count",
hour: {
$cond: [
{$eq: [{$substr: ["$$hourObject.hour", 0, 1]}, "0"]},
{$concat: ["$$hourObject.hour", " AM"]},
{
$concat: [{
$switch: {
branches: [
{case: {$eq: ["$$hourObject.hour", "13"]}, then: "1"},
{case: {$eq: ["$$hourObject.hour", "14"]}, then: "2"},
{case: {$eq: ["$$hourObject.hour", "15"]}, then: "3"},
{case: {$eq: ["$$hourObject.hour", "16"]}, then: "4"},
{case: {$eq: ["$$hourObject.hour", "17"]}, then: "5"},
{case: {$eq: ["$$hourObject.hour", "18"]}, then: "6"},
{case: {$eq: ["$$hourObject.hour", "19"]}, then: "7"},
{case: {$eq: ["$$hourObject.hour", "20"]}, then: "8"},
{case: {$eq: ["$$hourObject.hour", "21"]}, then: "9"},
{case: {$eq: ["$$hourObject.hour", "22"]}, then: "10"},
{case: {$eq: ["$$hourObject.hour", "23"]}, then: "11"},
],
default: "None"
}
}, " PM"]
}
]
}
}
}
}
}
},
{
$unwind: "$hours"
},
{
$project: {
_id: 0,
hour: "$hours.hour",
count: "$hours.count"
}
},
{
$sort: {
hour: 1
}
}
]);
A short explanation of the $addFields stage: we first add hours that we're missing, we then merge the two arrays (of the original found hours and the "new" missing hours), finally we convert to the required output ("01" to "01 AM").
If you're using Mongo v4+ I recommend you change the $group _id stage to use $dateFromString as its more consistent.
_id: {$hour: {$dateFromString: {dateString: "$update_at"}}}
If you do do that, you'll have to update the $filter and $map section to use numbers and not strings and eventually using $toString to cast into the format you want, hence the v4+ requirement.
You should store date values as Date objects instead of strings. I would do the formatting like this:
db.collection.aggregate(
[
{ $match: { ... } },
{
$group: {
_id: { h: { $hour: "$update_at" } },
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
hour: {
$switch: {
branches: [
{ case: { $lt: ["$_id.h", 10] }, then: { $concat: ["0", { $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 13] }, then: { $concat: [{ $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 22] }, then: { $concat: ["0", { $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } },
{ case: { $lt: ["$_id.h", 24] }, then: { $concat: [{ $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } }
]
}
},
hour24: "$_id.h",
count: 1
}
},
{ $sort: { hour24: 1 } }
])
As non-American I am not familiar with AM/PM rules, esp. for midnight and midday but I guess you get the principle.
Here is the query you can test it out, for MongoDB 4.0+
i will be improving query and update
const query = [{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: { $hour: "$update_at" },
count: {
"$sum": 1
}
},
},
{
$addFields: {
hourStr: { $toString: { $cond: { if: { $gte: ["$_id", 12] }, then: { $subtract: [12, { $mod: [24, '$_id'] }] }, else: "$_id" } } },
}
},
{
$project: {
formated: { $concat: ["$hourStr", { $cond: { if: { $gt: ["$_id", 12] }, then: " PM", else: " AM" } }] },
count: "$count",
hour: 1,
}
}]
If you want to output in Indian Time formate. then below code work!
const query = [
{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$project: {
"h": { "$hour": { date: "$update_at", timezone: "+0530" } },
}
},
{
$group:
{
_id: { $hour: "$h" },
count: { $sum: 1 }
}
}
];

Get objects containing max values for multiple fields using aggregation in mongodb

I want to fetch the documents having highest value for a list of specifics fields. I don't know if it's possible in only one request.
Consider below data:
_id:1, kills:12, deaths:6, assists:1
_id:2, kills:2, deaths:2, assists:22
_id:3, kills:1, deaths:2, assists:3
_id:4, kills:0, deaths:23, assists:4
_id:5, kills:6, deaths:3, assists:5
_id:6, kills:7, deaths:1, assists:6
Answer should be something like
maxKills: { _id:1, kills:12, deaths:6, assists:1 },
maxDeaths: { _id:4, kills:0, deaths:23, assists:4 },
maxAssists: { _id:2, kills:2, deaths:2, assists:22 },
I have tried several queries, but I can't get the whole objects containing the max values.
db.coll.aggregate([{
$group: {
_id: null,
kills: { $max: "$stats.kills" },
deaths: { $max: "$stats.deaths" },
assists: { $max: "$stats.assists" },
}
}])
For example here I have all the max values I want but I don't get the whole matches Objects.
---- UPDATE ----
With this answer https://stackoverflow.com/a/33361913/9188650, I've made it works but I receive data in a not really user friendly way.
{
"$group": {
"_id": null,
"maxKills": { "$max": "$stats.kills" },
"maxDeaths": { "$max": "$stats.deaths" },
"maxAssists": { "$max": "$stats.assists" },
"matches": {
"$push": {
"champion": "$champion",
"gameId": "$gameId",
"kills": "$stats.kills",
"deaths": "$stats.deaths",
"assists": "$stats.assists",
}
}
}
},
{
"$project": {
"_id": 0,
"maxKills": 1,
"maxDeaths": 1,
"maxAssists": 1,
"matches": {
"$setDifference": [
{
"$map": {
"input": "$matches",
"as": "match",
"in": {
$switch: {
branches: [
{ case: { $eq: ["$maxKills", "$$match.kills"] }, then: "$$match" },
{ case: { $eq: ["$maxDeaths", "$$match.deaths"] }, then: "$$match" },
{ case: { $eq: ["$maxAssists", "$$match.assists"] }, then: "$$match" },
],
default: false
}
}
}
},
[false]
]
}
}
}
It will returns:
{
"maxKills": 25,
"maxDeaths": 20,
"maxAssists": 39,
"matches": [
{
"champion": {
"id": 145,
"name": "Kai'Sa",
},
"gameId": 4263819967,
"kills": 25,
"deaths": 3,
"assists": 16
},
{
"champion": {
"id": 8,
"name": "Vladimir",
},
"gameId": 4262731529,
"kills": 8,
"deaths": 20,
"assists": 3
},
{
"champion": {
"id": 22,
"name": "Ashe",
},
"gameId": 4340383097,
"kills": 9,
"deaths": 7,
"assists": 39
},
{
"champion": {
"id": 23,
"name": "Tryndamere",
},
"gameId": 4352236936,
"kills": 25,
"deaths": 6,
"assists": 22
}
]
}
My last issue are cases when multiple objects have the same max value (as the example above, 2 matches have 25 kills). I only want the oldest one in these cases.
You can do it easier by using $filter and $arrayElemAt after $group stage:
db.collection.aggregate([
{
$group: {
_id: null,
maxKills: { $max: "$kills" },
maxDeaths: { $max: "$deaths" },
maxAssists: { $max: "$assists" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
_id: 0,
maxKills: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.kills", "$maxKills" ] } } }, 0 ] },
maxDeaths: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.deaths", "$maxDeaths" ] } } }, 0 ] },
maxAssists: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.assists", "$maxAssists" ] } } }, 0 ] }
}
}
])
Mongo Playground

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]