I have document like this :
Documents :
{score: 1, value: 10}
{score: 3, value: 10}
{score: 1, value: 10}
{score: 4, value: 10}
{score: 1, value: 10}
{score: 5, value: 10}
{score: 5, value: 10}
{score: 10, value: 10}
In this collection, there is no score for 2,6,7,8,9 but I need output like below.
Output :
{score: 1, avg: 10}
{score: 2, avg: 0}
{score: 3, avg: 10}
{score: 4, avg: 10}
{score: 5, avg: 10}
{score: 6, avg: 0}
{score: 7, avg: 0}
{score: 8, avg: 0}
{score: 9, avg: 0}
{score: 10, avg: 10}
Any option in Mongo aggregate which will generate this. Please assist
You can try that using aggregation :
db.collection.aggregate([
{ $group: { _id: '$score', avg: { $avg: '$value' } } },
{ $group: { _id: '', min: { $min: '$_id' }, max: { $max: '$_id' }, data: { $push: '$$ROOT' } } },
{ $project: { _id: 0, data: 1, nums: { $range: ['$min', "$max", 1] } } },
{ $project: { data: { $concatArrays: ["$data", { $map: { input: { $setDifference: ["$nums", "$data._id"] }, in: { _id: '$$this', avg: 0 } } }] } } },
{ $unwind: '$data' }, { $replaceRoot: { newRoot: "$data" } }
])
Test : MongoDB-Playground
Assuming you know the range of scores, there's a trick to achieve exactly what you want :
1 - Insert in your collection a document for each score, with value field not set or set to null :
db.collection.insertMany([
{
score: 1,
},
{
score: 2,
},
{
score: 3,
},
{
score: 4,
},
{
score: 5,
},
{
score: 6,
},
{
score: 7,
},
{
score: 8,
},
{
score: 9,
},
{
score: 10,
}
]);
It's important for value field not to be set, because a value set at 0 will affect average calculation
Of course this operation must be performed only once.
Then you can apply the following aggregation, which will output exactly what you need :
db.collection.aggregate([
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11
],
output: {
avg: {
$avg: "$value"
}
}
}
},
{
$project: {
score: "$_id",
avg: {
$ifNull: [
"$avg",
0
]
},
_id: 0
}
}
])
Will output :
[
{
"avg": 10,
"score": 1
},
{
"avg": 0,
"score": 2
},
{
"avg": 10,
"score": 3
},
{
"avg": 10,
"score": 4
},
{
"avg": 10,
"score": 5
},
{
"avg": 0,
"score": 6
},
{
"avg": 0,
"score": 7
},
{
"avg": 0,
"score": 8
},
{
"avg": 0,
"score": 9
},
{
"avg": 10,
"score": 10
}
]
You can test it here.
Related
I have the following documents in my db:
{uid: 1, score: 10}
{uid: 2, score: 11}
{uid: 3, score: 1}
{uid: 4, score: 6}
{uid: 5, score: 2}
{uid: 6, score: 3}
{uid: 7, score: 8}
{uid: 8, score: 10}
I want to split them into buckets by score - i.e.:
score
uids
(bucket name in aggregation)
[0,4)
3,5,6
0
[4,7)
4
4
[7,inf
1,2,7,8
7
For this, I created the following aggregation which works just fine:
db.scores.aggregation(
[
{
$bucket:
{
groupBy: "$score",
boundaries: [0, 4, 7],
default: 7,
output:
{
"total": {$sum: 1},
"top_frustrated":
{
$push: {
"uid": "$uid", "score": "$score"
}
},
},
}
},
]
)
However, I would like to return only the top 3 of every bucket - i.e, buckets 0, 4 should be the same, but bucket 7 should have only uids 1,2,8 returned (as uid 7 has the lowest score) - but to include the total count of documents as well, i.e. output of bucket "7" should look like:
{ "total" : 4, "top_scores" :
[
{"uid" : 2, "score" : 11},
{"uid" : 1, "score" : 10},
{"uid" : 8, "score" : 10},
]
}
I tried using $addFields with $sortArray and $slice, but it either won't work or return errors.
I can of course use $project but I was wondering if there is a more efficient way.
I am using Amazon DocumentDB.
You can use the $topN accumulator, instead of $push, like this:
db.collection.aggregate([
{
"$bucket": {
"groupBy": "$score",
"boundaries": [
0,
4,
7
],
"default": 7,
"output": {
"total": {
"$sum": 1
},
"top_frustrated": {
"$topN": {
"n": 3,
"sortBy": {
"score": -1
},
"output": {
"uid": "$uid",
"score": "$score"
}
}
}
},
}
},
])
Playground link.
The only catch here is this operator is present in MongoDB 5.2 and above.
For older versions, this will work:
db.collection.aggregate([
{
"$sort": {
score: -1
}
},
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
4,
7
],
default: 7,
output: {
"total": {
$sum: 1
},
"top_frustrated": {
$push: {
"uid": "$uid",
"score": "$score"
}
},
},
}
},
{
"$project": {
total: 1,
top_frustrated: {
"$slice": [
"$top_frustrated",
3
]
}
}
}
])
Playground link.
Hi I am converting my existing website from php / mysql to node / mongodb , it is a golf society site which I use to log scores for each members rounds and provide results and statistics into their games. The main collection contains an array of 18 scores which is great for my results queries but I am having a problem with the statistical side, ie Avg score by Hole by Course, Lowest score ever by Hole by Course (eclectic) . I have come up with this aggregate query which works and gives me the result I require but it is ugly !! I am sure there must be a more elegant solution out there and feel I am missing a trick somewhere, I have looked at $map as I thought that might help but don't think it will. I would appreciate it if someone could offer any suggestions in tidying this code up, Thx.
{ _id:
{ date_played: 2019-06-21T00:00:00.000Z,
course_played: 1,
player_id: 1 },
score: [ 8, 4, 7, 4, 7, 1, 7, 5, 6, 4, 5, 7, 6, 4, 7, 5, 6, 7 ],
handicap: 23,
cash_won: 0,
sort_order: 2,
gross_score: 100,
gross_sfpts: 31,
skins_group: 1,
score_differential: 26.2,
pcc_adjustment: 0 }
{ _id:
{ date_played: 2016-08-14T00:00:00.000Z,
course_played: 1,
player_id: 1},
score: [ 5, 4, 5, 6, 5, 4, 8, 6, 1, 3, 3, 4, 3, 6, 3, 6, 4, 5 ],
handicap: 18,
cash_won: 14,
sort_order: 4,
gross_score: 81,
gross_sfpts: 44,
skins_group: 1,
score_differential: 12.1,
pcc_adjustment: 0 }
[
{
'$match': {
'_id.course_played': 1
}
}, {
'$project': {
'player_name': 1,
'hole01': {
'$arrayElemAt': [
'$score', 0
]
},
'hole02': {
'$arrayElemAt': [
'$score', 1
]
},
'hole03': {
'$arrayElemAt': [
'$score', 2
]
},
'hole04': {
'$arrayElemAt': [
'$score', 3
]
},
'hole05': {
'$arrayElemAt': [
'$score', 4
]
},
'hole06': {
'$arrayElemAt': [
'$score', 5
]
},
'hole07': {
'$arrayElemAt': [
'$score', 6
]
},
'hole08': {
'$arrayElemAt': [
'$score', 7
]
},
'hole09': {
'$arrayElemAt': [
'$score', 8
]
},
'hole10': {
'$arrayElemAt': [
'$score', 9
]
},
'hole11': {
'$arrayElemAt': [
'$score', 10
]
},
'hole12': {
'$arrayElemAt': [
'$score', 11
]
},
'hole13': {
'$arrayElemAt': [
'$score', 12
]
},
'hole14': {
'$arrayElemAt': [
'$score', 13
]
},
'hole15': {
'$arrayElemAt': [
'$score', 14
]
},
'hole16': {
'$arrayElemAt': [
'$score', 15
]
},
'hole17': {
'$arrayElemAt': [
'$score', 16
]
},
'hole18': {
'$arrayElemAt': [
'$score', 17
]
}
}
}, {
'$sort': {
'_id.player_id': 1
}
}, {
'$group': {
'_id': '$_id.player_id',
'name': {
'$first': '$player_name'
},
'hole1': {
'$min': '$hole01'
},
'hole2': {
'$min': '$hole02'
},
'hole3': {
'$min': '$hole03'
},
'hole4': {
'$min': '$hole04'
},
'hole5': {
'$min': '$hole05'
},
'hole6': {
'$min': '$hole06'
},
'hole7': {
'$min': '$hole07'
},
'hole8': {
'$min': '$hole08'
},
'hole9': {
'$min': '$hole09'
},
'hole10': {
'$min': '$hole10'
},
'hole11': {
'$min': '$hole11'
},
'hole12': {
'$min': '$hole12'
},
'hole13': {
'$min': '$hole13'
},
'hole14': {
'$min': '$hole14'
},
'hole15': {
'$min': '$hole15'
},
'hole16': {
'$min': '$hole16'
},
'hole17': {
'$min': '$hole17'
},
'hole18': {
'$min': '$hole18'
},
'rounds': {
'$sum': 1
}
}
}, {
'$addFields': {
'total': {
'$add': [
'$hole1', '$hole2', '$hole3', '$hole4', '$hole5', '$hole6', '$hole7', '$hole8', '$hole9', '$hole10', '$hole11', '$hole12', '$hole13', '$hole14', '$hole15', '$hole16', '$hole17', '$hole18'
]
}
}
}, {
'$sort': {
'total': 1
}
}, {
'$limit': 10
}
]
Which gives this as an example when run against the total database, which is the result I want but I would like all the "hole" fields to be returned in an Array as per the original score field.
{ _id: 1,
hole1: 5,
hole2: 4,
hole3: 5,
hole4: 4,
hole5: 5,
hole6: 2,
hole7: 3,
hole8: 3,
hole9: 3,
hole10: 3,
hole11: 2,
hole12: 3,
hole13: 4,
hole14: 2,
hole15: 3,
hole16: 3,
hole17: 3,
hole18: 3,
rounds: 562,
total: 53 }
You might $unwind the scores array, keeping the index as the hole number, then $group by player, course, and hole to get the score for each hole, $sort by hole number to make sure of the order, and then $group by player and course, pushing the scores back into an array.
db.collection.aggregate([
{$match: {"_id.player_id": 1}},
{$unwind: {
path: "$score",
includeArrayIndex: "hole"
}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id",
hole: "$hole"
},
minScore: {$min: "$score"},
rounds: {$sum: 1}
}},
{$sort: {"_id.hole": 1}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id"
},
score: {$push: "$minScore"},
total: {$sum: "$minScore"},
rounds: {$first: "$rounds"}
}}
])
Playground
I have the following aggregation:
const buckets = await StatisticModel.aggregate([
{
$bucket: {
groupBy: '$ranking',
boundaries: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
},
},
])
Which returns the following object:
[
{ _id: 3, count: 6 },
{ _id: 4, count: 98 },
{ _id: 5, count: 81 },
{ _id: 6, count: 25 },
{ _id: 7, count: 4 }
]
How can I add the missing (empty) buckets?
This is a simple example but I have more complexe ones where I generate the boundaries and I want to return to the front-end all the buckets and not only the filled ones.
You can use below aggregation
db.collection.aggregate([
{ "$facet": {
"data": [
{ "$bucket": {
"groupBy": "$ranking",
"boundaries": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
}}
]
}},
{ "$addFields": {
"data": {
"$map": {
"input": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
"as": "i",
"in": {
"_id": "$$i",
"count": {
"$cond": [
{ "$eq": [{ "$indexOfArray": ["$data._id", "$$i"] }, -1] },
0,
{ "$arrayElemAt": ["$data.count", { "$indexOfArray": ["$data._id", "$$i"] }] }
]
}
}
}
}
}},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" }}
])
But better to do with javascript
const array = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11]
const array2 = [
{ "_id": 3, "count": 6 },
{ "_id": 4, "count": 98 },
{ "_id": 5, "count": 81 },
{ "_id": 6, "count": 25 },
{ "_id": 7, "count": 4 }
]
array.map((ar) => {
const index = array2.map((e) => { return e._id }).indexOf(ar)
if (index === -1) {
array2.push({ _id: ar, count: 0 })
}
})
console.log(array2)
Starting in Mongo 5.1, it's a perfect use case for the new $densify aggregation operator:
// { ranking: 3, count: 6 }
// { ranking: 4, count: 98 }
// { ranking: 6, count: 25 }
// { ranking: 7, count: 4 }
db.collection.aggregate([
{ $densify: {
field: "ranking",
range: { step: 1, bounds: [0, 12] }
}},
{ $set: { count: { $cond: [ { $not: ["$count"] }, 0, "$count" ] } } }
])
// { ranking: 0, count: 0 } <=
// { ranking: 1, count: 0 } <=
// { ranking: 2, count: 0 } <=
// { ranking: 3, count: 6 }
// { ranking: 4, count: 98 }
// { ranking: 5, count: 0 } <=
// { ranking: 6, count: 25 }
// { ranking: 7, count: 4 }
// { ranking: 8, count: 0 } <=
// { ranking: 9, count: 0 } <=
// { ranking: 10, count: 0 } <=
// { ranking: 11, count: 0 } <=
This:
densifies documents ($densify) by creating new documents in a sequence of documents where certain values for a field (in our case field: "ranking") are missing:
the step for our densification is 1: range: { step: 1, ... } since our buckets are following each other with a size of 1.
and we densify within the range [0, 12]: bounds: [0, 12]
finally sets ($set) count to 0 only for new documents included during the densify stage ({ count: { $cond: [ { $not: ["$count"] }, 0, "$count" ] } })
Note that I'm assuming your buckets are of equal size (assuming the missing 10 in your list is an oversight).
I want to retrieve data from mongodb, grouping and summing for a custom field based in a db field which can exist or not but I don't have the result I expect because I there is no data ggrupation (see attached file)enter image description here. The mongo statement is:
aggregate({
$match: {
owner: 'W99999',
creation_date: {
$gte: 1530748800,
$lte: 1531292133
}
},
$project: {
isWarm: {
$cond: [{
$not: ["$referral"] }, 1, 0 ]
},
isCold: {
$cond: [{
$not: ["$referral"] }, 0, 1 ]
},
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800]
}, 86400]
}
}]
}
},
$group: {
_id: {
isWarm: { $sum: "$isWarm" },
isCold: { $sum: "$isCold" },
daysBefore: '$daysBefore'
}
})
I think the problem is the "isWarm" and "isCold" condition for creating them. Thank you in advance.
UPDATE 05/07/2018.
Schema (trunked for security reasons):
{
"_id": "1",
"creation_date":"1515780901",
"referral: //This field is optional.
{
some_data: { }
},
more_data: { }
}
Result expected:
{ [
{ isCold: 3, isWarm: 2, daysBefore: 0 },
{ isCold: 2, isWarm: 5, daysBefore: 1 },
{ isCold: 5, isWarm: 0, daysBefore: 2 },
{ isCold: 1, isWarm: 2, daysBefore: 3 },
{ isCold: 1, isWarm: 1, daysBefore: 4 },
{ isCold: 1, isWarm: 0, daysBefore: 5 },
{ isCold: 0, isWarm: 0, daysBefore: 6 }
] }
I would like to have the object even if there is no documents to count (e.g. last line of the result).
RESOLVED: I need to test with real data.
I think I have a solution:
[{ $project: {
_id: 0,
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800] }, 86400]
}
}]
},
isWarm: {$cond: [{ $gte: ['$referral', null]}, 1, 0]},
isCold: {$cond: [{ $gte: ['$referral', null]}, 0, 1]}} },
{
$group:
{
_id: { creation_date: '$daysBefore' },
isWarm: { $sum: '$isWarm' },
isCold: { $sum: '$isCold' }
}
}]
The data in the database is like this, there are decimals, I need to sum.
the data:
[ { _id: 5ad16497f52e0e1160fb70ae,
state: 0,
percentage: 32,
serviceCharge: 1 },
{ _id: 5ad16600f52e0e1160fb70b1,
state: 0,
percentage: 0,
serviceCharge: 10.115384615384642 },
{ _id: 5ad167f6c782521402300b4a,
state: 0,
percentage: 0,
serviceCharge: 16.11538461538464 },
{ _id: 5ad167f9c782521402300b4c,
state: 0,
percentage: 0,
serviceCharge: 23.769230769230717 },
{ _id: 5ad49154eb7bc9401e0c469b,
state: 0,
percentage: 6,
serviceCharge: 6 },
{ _id: 5ad49154eb7bc9401e0c469c,
state: 2,
percentage: 0,
serviceCharge: 6 } ]
the code :
bet.aggregate(
{
$match: {
state: { $in: [0, 1, 2, 3] }
}
},
{
$group: {
_id: { state: "$state" },
serviceCharge: { $sum: "$serviceCharge" },
percentage: { $sum: "$percentage" },
income:{ $sum: { $subtract: [ "$serviceCharge","$percentage"] } }
},
},
{ $project: { "_id": 0, "state": "$_id.state", "serviceCharge": 1, "percentage": 1 ,"income":1} },
(err, ret) => {
console.log(ret)
})
Run, and the result is :
[
{ serviceCharge: 6, percentage: 0, income: 6, state: 2 },
{ serviceCharge: 57, percentage: 38, income: 19, state: 0 }
]
Where is my Decimals, and Why is the result of the calculation an integer?