Mongo db aggregation - $push and $slice top results - mongodb

I have the following documents in my db:
{uid: 1, score: 10}
{uid: 2, score: 11}
{uid: 3, score: 1}
{uid: 4, score: 6}
{uid: 5, score: 2}
{uid: 6, score: 3}
{uid: 7, score: 8}
{uid: 8, score: 10}
I want to split them into buckets by score - i.e.:
score
uids
(bucket name in aggregation)
[0,4)
3,5,6
0
[4,7)
4
4
[7,inf
1,2,7,8
7
For this, I created the following aggregation which works just fine:
db.scores.aggregation(
[
{
$bucket:
{
groupBy: "$score",
boundaries: [0, 4, 7],
default: 7,
output:
{
"total": {$sum: 1},
"top_frustrated":
{
$push: {
"uid": "$uid", "score": "$score"
}
},
},
}
},
]
)
However, I would like to return only the top 3 of every bucket - i.e, buckets 0, 4 should be the same, but bucket 7 should have only uids 1,2,8 returned (as uid 7 has the lowest score) - but to include the total count of documents as well, i.e. output of bucket "7" should look like:
{ "total" : 4, "top_scores" :
[
{"uid" : 2, "score" : 11},
{"uid" : 1, "score" : 10},
{"uid" : 8, "score" : 10},
]
}
I tried using $addFields with $sortArray and $slice, but it either won't work or return errors.
I can of course use $project but I was wondering if there is a more efficient way.
I am using Amazon DocumentDB.

You can use the $topN accumulator, instead of $push, like this:
db.collection.aggregate([
{
"$bucket": {
"groupBy": "$score",
"boundaries": [
0,
4,
7
],
"default": 7,
"output": {
"total": {
"$sum": 1
},
"top_frustrated": {
"$topN": {
"n": 3,
"sortBy": {
"score": -1
},
"output": {
"uid": "$uid",
"score": "$score"
}
}
}
},
}
},
])
Playground link.
The only catch here is this operator is present in MongoDB 5.2 and above.
For older versions, this will work:
db.collection.aggregate([
{
"$sort": {
score: -1
}
},
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
4,
7
],
default: 7,
output: {
"total": {
$sum: 1
},
"top_frustrated": {
$push: {
"uid": "$uid",
"score": "$score"
}
},
},
}
},
{
"$project": {
total: 1,
top_frustrated: {
"$slice": [
"$top_frustrated",
3
]
}
}
}
])
Playground link.

Related

MongoDB find minimum and maximum between grouped data

Will try to keep this concise with the input, result and desired/expected result. Need to find the minimum, maximum number of rows/records between the same "winCode" and the last time it occurred in the ordered data. So it makes me want to first group them by "winCode" which works perfectly, but I am not able to come up with something that would display how many records it took for the same "winCode" to appear last time, the minimum and maximum. Check desired output for more details. Below is the paste from: https://mongoplayground.net/p/bCzTO8ZLxNi
Input/collection
[
{
code: "1",
results: {
winCode: 3
}
},
{
code: "10",
results: {
winCode: 3
}
},
{
code: "8",
results: {
winCode: 2
}
},
{
code: "5",
results: {
winCode: 5
}
},
{
code: "5",
results: {
winCode: 4
}
},
{
code: "6",
results: {
winCode: 4
}
},
{
code: "7",
results: {
winCode: 5
}
},
{
code: "3",
results: {
winCode: 3
}
},
{
code: "9",
results: {
winCode: 2
}
},
{
code: "2",
results: {
winCode: 2
}
}
]
Current query
db.collection.aggregate([
{
$sort: {
code: -1
}
},
{
$group: {
_id: "$results.winCode",
count: {
$sum: 1
},
lastTimeOccurredCode: {
$first: "$code" // Any way to get it to display a count from the start to this point on how many records it went through to get the $first result?
},
}
},
{
$sort: {
_id: -1
}
},
])
Current output
[
{
"_id": 5,
"count": 2,
"lastTimeOccurredCode": "5"
},
{
"_id": 4,
"count": 2,
"lastTimeOccurredCode": "5"
},
{
"_id": 3,
"count": 3,
"lastTimeOccurredCode": "1"
},
{
"_id": 2,
"count": 3,
"lastTimeOccurredCode": "2"
}
]
Desired output
[
{
"_id": 5,
"count": 2,
"lastTimeOccurredRecordsCount": 4,
"minRecordsBetween": 3,
"maxRecordsBetween": 3
},
{
"_id": 4,
"count": 2,
"lastTimeOccurredRecordsCount": 5,
"minRecordsBetween": 1,
"maxRecordsBetween": 1
},
{
"_id": 3,
"count": 3,
"lastTimeOccurredRecordsCount": 1,
"minRecordsBetween": 1,
"maxRecordsBetween": 6
},
{
"_id": 2,
"count": 3,
"lastTimeOccurredRecordsCount": 3,
"minRecordsBetween": 1,
"maxRecordsBetween": 6
}
]
I have tried to add an $accumulator function, but I would need the $first functions result in it, but it's not available at the same $group stage. Feel like I am missing something here.
You can use $setWindowFields to define index and reduce to find the diff between them. If you want the index to be according to {$sort: {code: -1}}, then keep the $setWindowFields sortBy according to this example and remove the redundant {$sort: {code: -1}} step. If you want the index to be according to another sorting logic that only update the $setWindowFields sortBy.
Use $setWindowFields to define index
$sort according to your what you need (if it is different than the prev sort)
$group according to the $results.winCode and keep all index data.
Calculate the diff
Format
db.collection.aggregate([
{$setWindowFields: {
sortBy: {code: -1},
output: {index: {$sum: 1, window: {documents: ["unbounded", "current"]}}}
}},
{$sort: {code: -1}},
{$group: {
_id: "$results.winCode",
count: {$sum: 1},
lastTimeOccurredCode: {$first: "$code"},
index: {$push: "$index"}
}},
{$project: {
count: 1,
lastTimeOccurredCode: 1,
diff: {
$reduce: {
input: {$range: [1, {$size: "$index"}]},
initialValue: [],
in: {$concatArrays: [
"$$value",
[{$subtract: [
{$arrayElemAt: ["$index", "$$this"]},
{$arrayElemAt: ["$index", {$subtract: ["$$this", 1]}]}
]}]
]
}
}
}
}},
{$set: {
minRecordsBetween: {$min: "$diff"},
maxRecordsBetween: {$max: "$diff"},
diff: "$$REMOVE"
}},
{$sort: {_id: -1}}
])
See how it works on the playground example

MONGODB Aggregate query to give $min value of array field by element

Hi I am converting my existing website from php / mysql to node / mongodb , it is a golf society site which I use to log scores for each members rounds and provide results and statistics into their games. The main collection contains an array of 18 scores which is great for my results queries but I am having a problem with the statistical side, ie Avg score by Hole by Course, Lowest score ever by Hole by Course (eclectic) . I have come up with this aggregate query which works and gives me the result I require but it is ugly !! I am sure there must be a more elegant solution out there and feel I am missing a trick somewhere, I have looked at $map as I thought that might help but don't think it will. I would appreciate it if someone could offer any suggestions in tidying this code up, Thx.
{ _id:
{ date_played: 2019-06-21T00:00:00.000Z,
course_played: 1,
player_id: 1 },
score: [ 8, 4, 7, 4, 7, 1, 7, 5, 6, 4, 5, 7, 6, 4, 7, 5, 6, 7 ],
handicap: 23,
cash_won: 0,
sort_order: 2,
gross_score: 100,
gross_sfpts: 31,
skins_group: 1,
score_differential: 26.2,
pcc_adjustment: 0 }
{ _id:
{ date_played: 2016-08-14T00:00:00.000Z,
course_played: 1,
player_id: 1},
score: [ 5, 4, 5, 6, 5, 4, 8, 6, 1, 3, 3, 4, 3, 6, 3, 6, 4, 5 ],
handicap: 18,
cash_won: 14,
sort_order: 4,
gross_score: 81,
gross_sfpts: 44,
skins_group: 1,
score_differential: 12.1,
pcc_adjustment: 0 }
[
{
'$match': {
'_id.course_played': 1
}
}, {
'$project': {
'player_name': 1,
'hole01': {
'$arrayElemAt': [
'$score', 0
]
},
'hole02': {
'$arrayElemAt': [
'$score', 1
]
},
'hole03': {
'$arrayElemAt': [
'$score', 2
]
},
'hole04': {
'$arrayElemAt': [
'$score', 3
]
},
'hole05': {
'$arrayElemAt': [
'$score', 4
]
},
'hole06': {
'$arrayElemAt': [
'$score', 5
]
},
'hole07': {
'$arrayElemAt': [
'$score', 6
]
},
'hole08': {
'$arrayElemAt': [
'$score', 7
]
},
'hole09': {
'$arrayElemAt': [
'$score', 8
]
},
'hole10': {
'$arrayElemAt': [
'$score', 9
]
},
'hole11': {
'$arrayElemAt': [
'$score', 10
]
},
'hole12': {
'$arrayElemAt': [
'$score', 11
]
},
'hole13': {
'$arrayElemAt': [
'$score', 12
]
},
'hole14': {
'$arrayElemAt': [
'$score', 13
]
},
'hole15': {
'$arrayElemAt': [
'$score', 14
]
},
'hole16': {
'$arrayElemAt': [
'$score', 15
]
},
'hole17': {
'$arrayElemAt': [
'$score', 16
]
},
'hole18': {
'$arrayElemAt': [
'$score', 17
]
}
}
}, {
'$sort': {
'_id.player_id': 1
}
}, {
'$group': {
'_id': '$_id.player_id',
'name': {
'$first': '$player_name'
},
'hole1': {
'$min': '$hole01'
},
'hole2': {
'$min': '$hole02'
},
'hole3': {
'$min': '$hole03'
},
'hole4': {
'$min': '$hole04'
},
'hole5': {
'$min': '$hole05'
},
'hole6': {
'$min': '$hole06'
},
'hole7': {
'$min': '$hole07'
},
'hole8': {
'$min': '$hole08'
},
'hole9': {
'$min': '$hole09'
},
'hole10': {
'$min': '$hole10'
},
'hole11': {
'$min': '$hole11'
},
'hole12': {
'$min': '$hole12'
},
'hole13': {
'$min': '$hole13'
},
'hole14': {
'$min': '$hole14'
},
'hole15': {
'$min': '$hole15'
},
'hole16': {
'$min': '$hole16'
},
'hole17': {
'$min': '$hole17'
},
'hole18': {
'$min': '$hole18'
},
'rounds': {
'$sum': 1
}
}
}, {
'$addFields': {
'total': {
'$add': [
'$hole1', '$hole2', '$hole3', '$hole4', '$hole5', '$hole6', '$hole7', '$hole8', '$hole9', '$hole10', '$hole11', '$hole12', '$hole13', '$hole14', '$hole15', '$hole16', '$hole17', '$hole18'
]
}
}
}, {
'$sort': {
'total': 1
}
}, {
'$limit': 10
}
]
Which gives this as an example when run against the total database, which is the result I want but I would like all the "hole" fields to be returned in an Array as per the original score field.
{ _id: 1,
hole1: 5,
hole2: 4,
hole3: 5,
hole4: 4,
hole5: 5,
hole6: 2,
hole7: 3,
hole8: 3,
hole9: 3,
hole10: 3,
hole11: 2,
hole12: 3,
hole13: 4,
hole14: 2,
hole15: 3,
hole16: 3,
hole17: 3,
hole18: 3,
rounds: 562,
total: 53 }
You might $unwind the scores array, keeping the index as the hole number, then $group by player, course, and hole to get the score for each hole, $sort by hole number to make sure of the order, and then $group by player and course, pushing the scores back into an array.
db.collection.aggregate([
{$match: {"_id.player_id": 1}},
{$unwind: {
path: "$score",
includeArrayIndex: "hole"
}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id",
hole: "$hole"
},
minScore: {$min: "$score"},
rounds: {$sum: 1}
}},
{$sort: {"_id.hole": 1}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id"
},
score: {$push: "$minScore"},
total: {$sum: "$minScore"},
rounds: {$first: "$rounds"}
}}
])
Playground

Mongodb aggregate $group for non-existing items

I have document like this :
Documents :
{score: 1, value: 10}
{score: 3, value: 10}
{score: 1, value: 10}
{score: 4, value: 10}
{score: 1, value: 10}
{score: 5, value: 10}
{score: 5, value: 10}
{score: 10, value: 10}
In this collection, there is no score for 2,6,7,8,9 but I need output like below.
Output :
{score: 1, avg: 10}
{score: 2, avg: 0}
{score: 3, avg: 10}
{score: 4, avg: 10}
{score: 5, avg: 10}
{score: 6, avg: 0}
{score: 7, avg: 0}
{score: 8, avg: 0}
{score: 9, avg: 0}
{score: 10, avg: 10}
Any option in Mongo aggregate which will generate this. Please assist
You can try that using aggregation :
db.collection.aggregate([
{ $group: { _id: '$score', avg: { $avg: '$value' } } },
{ $group: { _id: '', min: { $min: '$_id' }, max: { $max: '$_id' }, data: { $push: '$$ROOT' } } },
{ $project: { _id: 0, data: 1, nums: { $range: ['$min', "$max", 1] } } },
{ $project: { data: { $concatArrays: ["$data", { $map: { input: { $setDifference: ["$nums", "$data._id"] }, in: { _id: '$$this', avg: 0 } } }] } } },
{ $unwind: '$data' }, { $replaceRoot: { newRoot: "$data" } }
])
Test : MongoDB-Playground
Assuming you know the range of scores, there's a trick to achieve exactly what you want :
1 - Insert in your collection a document for each score, with value field not set or set to null :
db.collection.insertMany([
{
score: 1,
},
{
score: 2,
},
{
score: 3,
},
{
score: 4,
},
{
score: 5,
},
{
score: 6,
},
{
score: 7,
},
{
score: 8,
},
{
score: 9,
},
{
score: 10,
}
]);
It's important for value field not to be set, because a value set at 0 will affect average calculation
Of course this operation must be performed only once.
Then you can apply the following aggregation, which will output exactly what you need :
db.collection.aggregate([
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11
],
output: {
avg: {
$avg: "$value"
}
}
}
},
{
$project: {
score: "$_id",
avg: {
$ifNull: [
"$avg",
0
]
},
_id: 0
}
}
])
Will output :
[
{
"avg": 10,
"score": 1
},
{
"avg": 0,
"score": 2
},
{
"avg": 10,
"score": 3
},
{
"avg": 10,
"score": 4
},
{
"avg": 10,
"score": 5
},
{
"avg": 0,
"score": 6
},
{
"avg": 0,
"score": 7
},
{
"avg": 0,
"score": 8
},
{
"avg": 0,
"score": 9
},
{
"avg": 10,
"score": 10
}
]
You can test it here.

How to group data again in a single object with new keys after a mongodb $group?

I have a mongodb database with a collection of companies that look like this (it's just a sample, the actual collection is much larger):
[
{
"_id": 100,
"name": "Test Name 1",
"level": "1"
},
{
"_id": 101,
"name": "Test Name 2",
"level": "1"
},
{
"_id": 102,
"name": "Test Name 3",
"level": "2"
}
]
Where "level" can only range from 0 to 5
I'm trying to make an aggregate query with $group and $project that counts how many companies there are in each level, but according to the API specification I need follow, it needs to be formatted like this, in a single object:
{
"metrics": {
"companies": {
"total": <integer>,
"level1": <integer>,
"level2": <integer>,
"level3": <integer>,
"level4": <integer>,
"level5": <integer>
}
}
}
The closest I could get to this was using $group and $project like this:
Companies.aggregate([{
$group: {
_id: {
level: "$level"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
level: "$_id.level",
total: "$count"
}
}
])
Which gives the following result:
[
{
"level": 3,
"total": 108
},
{
"level": 5,
"total": 172
},
{
"level": 2,
"total": 624
},
{
"level": 4,
"total": 98
},
{
"level": 1,
"total": 137
},
{
"level": 0,
"total": 94
}
]
However, this result is an array and I need to put the data for each level in a single object with new keys "level1", "level2", etc, according to the specification.
I believe I need to make another $group operation but I couldn't find out how to do it.
Any ideas?
I'm not sure If I understand, but I suppose you just need to map it, like here:
> var aux = new Object;
> db.Companies.aggregate([
{
$group: {
_id: {
level: "$level"
},
count: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
level: "$_id.level",
total: "$count"
}
}
]).forEach(function(a){aux["level"+a.level] = a.total;});
> printjson(aux);
{ "level2" : 1, "level1" : 2 }
I believe there could be better solution, but this one is working:
db.companies.aggregate([{
$group:{_id:{level: "$level"}, count: {$sum: 1}}},
{$group:{"_id": 0, levels: {$push: {_id:"$_id.level", count: "$count"}}, total: {$sum: "$count"}}},
{$unwind: "$levels"},
{$sort: {"levels._id": 1}},
{$group:{_id: 0, levels: {$push: {levels:"$levels.count"}}, "total": {$avg:"$total"}}},
{$project: {total: "$total", level1: {$arrayElemAt: ["$levels",0]}, level2: {$arrayElemAt: ["$levels", 1]}, level3: {$arrayElemAt: ["$levels",2]}, level4: {$arrayElemAt: ["$levels",3]},level5: {$arrayElemAt: ["$levels",4]} }},
{$project: {_id: 0, metrics: {companies: {total: "$total", level1: "$level1.levels", level2: "$level2.levels", level3: "$level3.levels",level4: "$level4.levels", level5: "$level5.levels"}}}}
])
Returned result:
{ "metrics" :
{ "companies" :
{ "total" : 7,
"level1" : 1,
"level2" : 2,
"level3" : 2,
"level4" : 1,
"level5" : 1
} } }

MongoChef Aggregation: In one query find and show average score for max 3, 2 and 1 'project month' grouped data

Using MongoChef aggregation, if you have data such as:
{_id: 1, Mnt: 2016-05-01, Score: 85}
{_id: 2, Mnt: 2016-05-01, Score: 85}
{_id: 3, Mnt: 2016-03-01, Score: 80}
{_id: 4, Mnt: 2016-03-01, Score: 80}
{_id: 5, Mnt: 2016-03-01, Score: 80}
{_id: 6, Mnt: 2016-01-01, Score: 75}
and want to:
Calculate max month in the collection (i.e. M1 : May 2016),
Group by "Mnt" - which might not be sequential latest months, e.g. collection above latest/largest 3 months being: 2016-May, 2016-March, 2016-January,
Find the latest X month totals,
Calculate the Average of each,
e.g.
{M1 : 85, M2 : 82, M3 : 80.8}
I.e.
M1 is average of max month in collection,
M2 is average of max 2 project months in collection
M3 is average of max 3 project months in collection etc.
this is a dirty solution, but will give you an overview how to start:
var i = 1;
var elemSum = 0;
var elemCount = 0;
db.a.aggregate([{
$group : {
_id : {
year : {
$year : "$Mnt"
},
month : {
$month : "$Mnt"
}
},
avg : {
$avg : "$Score"
},
elemCount : {
$sum : 1
},
elemSum : {
$sum : "$Score"
}
}
}, {
$sort : {
"_id.year" : -1,
"_id.month" : -1
}
},
{
$limit : 3
}, // first 3 records
]).forEach(function (doc) {
elemSum += doc.elemSum;
elemCount += doc.elemCount;
var result = elemSum / elemCount;
var x = "M" + i.toString() + ": ";
print(x + result.toString());
i++;
})
and I converted month field to iso time
db.a.insert([
{_id: 1, Mnt: new ISODate("2016-05-01T15:44:00.255Z"), Score: 85},
{_id: 2, Mnt: new ISODate("2016-05-01T15:44:00.255Z"), Score: 85},
{_id: 3, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 4, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 5, Mnt: new ISODate("2016-03-01T15:44:00.255Z"), Score: 80},
{_id: 6, Mnt: new ISODate("2016-01-01T15:44:00.255Z"), Score: 75}
])
Code that works - calculate a running 12-month and current month Net Promoter Scores:
db.Collection.aggregate(
// Pipeline
// Stage 1
{
$project: {
ID: "$ID",
Mnt: "$Mnt",
CntryReg: "$CntryReg",
Prom: "$Prom",
}
},
// Stage 2
{
$group: {
_id: '$Mnt',
docs: {
$push: {
Mnt: "$Mnt",
CntryReg: "$CntryReg",
Prom: "$Prom"
}}
}
},
// Stage 3
{
$sort: {
_id: -1
}
},
// Stage 4
{
$limit: 12
},
// Stage 5
{
$group: {
"_id": null,
"values": { "$push": "$docs" }
}
},
// Stage 6
{
$unwind: {
"path": "$values", "includeArrayIndex": "rank"
}
},
// Stage 7
{
$unwind: "$values"
},
// Stage 8
{
$project: {
_id: 0,
Mnt: "$values.Mnt",
CntryReg: "$values.CntryReg",
Prom: "$values.Prom",
rank: "$rank"
}
},
// Stage 9
{
$group: {
_id: {CntryReg:"$CntryReg"} ,
AR12: { $sum: { $cond : [{ $eq : ["$Prom", "D"]}, 1, 0]} },
Ind12: { $sum: { $cond : [{ $eq : ["$Prom", "I"]}, 1, 0]} },
Loy12: { $sum: { $cond : [{ $eq : ["$Prom", "P"]}, 1, 0]} },
Sum12: {$sum: 1 },
AR1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "D"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Loy1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "P"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Ind1: { $sum: { $cond : [{ $and : [{ $eq : ["$Prom", "I"]} , {$eq : ["$rank", 0]} ]}, 1, 0]} },
Sum1: { $sum: { $cond : [ { $eq : ["$rank", 0]}, 1, 0]} },