MONGODB Aggregate query to give $min value of array field by element - mongodb

Hi I am converting my existing website from php / mysql to node / mongodb , it is a golf society site which I use to log scores for each members rounds and provide results and statistics into their games. The main collection contains an array of 18 scores which is great for my results queries but I am having a problem with the statistical side, ie Avg score by Hole by Course, Lowest score ever by Hole by Course (eclectic) . I have come up with this aggregate query which works and gives me the result I require but it is ugly !! I am sure there must be a more elegant solution out there and feel I am missing a trick somewhere, I have looked at $map as I thought that might help but don't think it will. I would appreciate it if someone could offer any suggestions in tidying this code up, Thx.
{ _id:
{ date_played: 2019-06-21T00:00:00.000Z,
course_played: 1,
player_id: 1 },
score: [ 8, 4, 7, 4, 7, 1, 7, 5, 6, 4, 5, 7, 6, 4, 7, 5, 6, 7 ],
handicap: 23,
cash_won: 0,
sort_order: 2,
gross_score: 100,
gross_sfpts: 31,
skins_group: 1,
score_differential: 26.2,
pcc_adjustment: 0 }
{ _id:
{ date_played: 2016-08-14T00:00:00.000Z,
course_played: 1,
player_id: 1},
score: [ 5, 4, 5, 6, 5, 4, 8, 6, 1, 3, 3, 4, 3, 6, 3, 6, 4, 5 ],
handicap: 18,
cash_won: 14,
sort_order: 4,
gross_score: 81,
gross_sfpts: 44,
skins_group: 1,
score_differential: 12.1,
pcc_adjustment: 0 }
[
{
'$match': {
'_id.course_played': 1
}
}, {
'$project': {
'player_name': 1,
'hole01': {
'$arrayElemAt': [
'$score', 0
]
},
'hole02': {
'$arrayElemAt': [
'$score', 1
]
},
'hole03': {
'$arrayElemAt': [
'$score', 2
]
},
'hole04': {
'$arrayElemAt': [
'$score', 3
]
},
'hole05': {
'$arrayElemAt': [
'$score', 4
]
},
'hole06': {
'$arrayElemAt': [
'$score', 5
]
},
'hole07': {
'$arrayElemAt': [
'$score', 6
]
},
'hole08': {
'$arrayElemAt': [
'$score', 7
]
},
'hole09': {
'$arrayElemAt': [
'$score', 8
]
},
'hole10': {
'$arrayElemAt': [
'$score', 9
]
},
'hole11': {
'$arrayElemAt': [
'$score', 10
]
},
'hole12': {
'$arrayElemAt': [
'$score', 11
]
},
'hole13': {
'$arrayElemAt': [
'$score', 12
]
},
'hole14': {
'$arrayElemAt': [
'$score', 13
]
},
'hole15': {
'$arrayElemAt': [
'$score', 14
]
},
'hole16': {
'$arrayElemAt': [
'$score', 15
]
},
'hole17': {
'$arrayElemAt': [
'$score', 16
]
},
'hole18': {
'$arrayElemAt': [
'$score', 17
]
}
}
}, {
'$sort': {
'_id.player_id': 1
}
}, {
'$group': {
'_id': '$_id.player_id',
'name': {
'$first': '$player_name'
},
'hole1': {
'$min': '$hole01'
},
'hole2': {
'$min': '$hole02'
},
'hole3': {
'$min': '$hole03'
},
'hole4': {
'$min': '$hole04'
},
'hole5': {
'$min': '$hole05'
},
'hole6': {
'$min': '$hole06'
},
'hole7': {
'$min': '$hole07'
},
'hole8': {
'$min': '$hole08'
},
'hole9': {
'$min': '$hole09'
},
'hole10': {
'$min': '$hole10'
},
'hole11': {
'$min': '$hole11'
},
'hole12': {
'$min': '$hole12'
},
'hole13': {
'$min': '$hole13'
},
'hole14': {
'$min': '$hole14'
},
'hole15': {
'$min': '$hole15'
},
'hole16': {
'$min': '$hole16'
},
'hole17': {
'$min': '$hole17'
},
'hole18': {
'$min': '$hole18'
},
'rounds': {
'$sum': 1
}
}
}, {
'$addFields': {
'total': {
'$add': [
'$hole1', '$hole2', '$hole3', '$hole4', '$hole5', '$hole6', '$hole7', '$hole8', '$hole9', '$hole10', '$hole11', '$hole12', '$hole13', '$hole14', '$hole15', '$hole16', '$hole17', '$hole18'
]
}
}
}, {
'$sort': {
'total': 1
}
}, {
'$limit': 10
}
]
Which gives this as an example when run against the total database, which is the result I want but I would like all the "hole" fields to be returned in an Array as per the original score field.
{ _id: 1,
hole1: 5,
hole2: 4,
hole3: 5,
hole4: 4,
hole5: 5,
hole6: 2,
hole7: 3,
hole8: 3,
hole9: 3,
hole10: 3,
hole11: 2,
hole12: 3,
hole13: 4,
hole14: 2,
hole15: 3,
hole16: 3,
hole17: 3,
hole18: 3,
rounds: 562,
total: 53 }

You might $unwind the scores array, keeping the index as the hole number, then $group by player, course, and hole to get the score for each hole, $sort by hole number to make sure of the order, and then $group by player and course, pushing the scores back into an array.
db.collection.aggregate([
{$match: {"_id.player_id": 1}},
{$unwind: {
path: "$score",
includeArrayIndex: "hole"
}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id",
hole: "$hole"
},
minScore: {$min: "$score"},
rounds: {$sum: 1}
}},
{$sort: {"_id.hole": 1}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id"
},
score: {$push: "$minScore"},
total: {$sum: "$minScore"},
rounds: {$first: "$rounds"}
}}
])
Playground

Related

Mongo db aggregation - $push and $slice top results

I have the following documents in my db:
{uid: 1, score: 10}
{uid: 2, score: 11}
{uid: 3, score: 1}
{uid: 4, score: 6}
{uid: 5, score: 2}
{uid: 6, score: 3}
{uid: 7, score: 8}
{uid: 8, score: 10}
I want to split them into buckets by score - i.e.:
score
uids
(bucket name in aggregation)
[0,4)
3,5,6
0
[4,7)
4
4
[7,inf
1,2,7,8
7
For this, I created the following aggregation which works just fine:
db.scores.aggregation(
[
{
$bucket:
{
groupBy: "$score",
boundaries: [0, 4, 7],
default: 7,
output:
{
"total": {$sum: 1},
"top_frustrated":
{
$push: {
"uid": "$uid", "score": "$score"
}
},
},
}
},
]
)
However, I would like to return only the top 3 of every bucket - i.e, buckets 0, 4 should be the same, but bucket 7 should have only uids 1,2,8 returned (as uid 7 has the lowest score) - but to include the total count of documents as well, i.e. output of bucket "7" should look like:
{ "total" : 4, "top_scores" :
[
{"uid" : 2, "score" : 11},
{"uid" : 1, "score" : 10},
{"uid" : 8, "score" : 10},
]
}
I tried using $addFields with $sortArray and $slice, but it either won't work or return errors.
I can of course use $project but I was wondering if there is a more efficient way.
I am using Amazon DocumentDB.
You can use the $topN accumulator, instead of $push, like this:
db.collection.aggregate([
{
"$bucket": {
"groupBy": "$score",
"boundaries": [
0,
4,
7
],
"default": 7,
"output": {
"total": {
"$sum": 1
},
"top_frustrated": {
"$topN": {
"n": 3,
"sortBy": {
"score": -1
},
"output": {
"uid": "$uid",
"score": "$score"
}
}
}
},
}
},
])
Playground link.
The only catch here is this operator is present in MongoDB 5.2 and above.
For older versions, this will work:
db.collection.aggregate([
{
"$sort": {
score: -1
}
},
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
4,
7
],
default: 7,
output: {
"total": {
$sum: 1
},
"top_frustrated": {
$push: {
"uid": "$uid",
"score": "$score"
}
},
},
}
},
{
"$project": {
total: 1,
top_frustrated: {
"$slice": [
"$top_frustrated",
3
]
}
}
}
])
Playground link.

mongoDb groupby with array object field

I have searched a lot to use groupby based on the array field value, but I didn't get proper results in google, so I'm posting here.
I have tried my best, it works 50% need to correct my query can anyone help me with this
I have a database value like
{"_id": "62b0bec8922dc767f8b933b4",
"seatSeletion": [{
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
}, {
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}],
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m",
},
{"_id": "62b0b91560f57e0cb220db02","seatSeletion": [{
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
}, {
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
}],
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m",
}
expected output
{
"seatSeletion": [
{
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
},
{
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
},
{
"_id": "62b0b90e60f57e0cb220db00",
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
},
{
"_id": "62b0b90e60f57e0cb220db01",
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
},
this is how I tried in my query
Bookings.aggregate([
{
$match: {
$and: [{ movieId: ObjectId(bookingParam.movieId) },
{ movieTiming: bookingParam.movieTiming },
{ movieDate: dateQuery },
]
}
},
{
$group: {
_id: {
seatSeletion: '$seatSeletion', movieTiming: '$movieTiming',
movieId: '$movieId', movieDate: '$movieDate', createdBy: "$createdBy", updatedBy: "$updatedBy", movies: "$movies"
}
}
},
{
$project: {
seatSeletion: '$_id.seatSeletion', movieTiming: '$_id.movieTiming',
movieId: '$_id.movieId', movieDate: '$_id.movieDate', movies: "$_id.movies",
_id: 0
}
}
])
but i got it like this
{
"seatSeletion": [
{
"_id": "62b0b91560f57e0cb220db03",
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
},
{
"_id": "62b0b91560f57e0cb220db04",
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
},
{
"seatSeletion": [
{
"_id": "62b0b90e60f57e0cb220db00",
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
},
{
"_id": "62b0b90e60f57e0cb220db01",
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
}
can anyone help me to fix this issue.
One option is using $reduce after the $group. It is important NOT to group by the seatSeletion as the value of this field is not common to these movies:
db.collection.aggregate([
{
$match: {
$and: [
{movieId: "62af1ff6cb38656a4ffe36aa"},
{movieTiming: "1:30 p.m"},
{movieDate: "2022-06-20T18:14:38.133+00:00"},
]
}
},
{
$group: {
_id: {movieTiming: "$movieTiming", movieId: "$movieId", movieDate: "$movieDate"},
seatSeletion: {$push: "$seatSeletion"}
}
},
{
$project: {
seatSeletion: {
$reduce: {
input: "$seatSeletion",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
},
movieTiming: "$_id.movieTiming",
movieId: "$_id.movieId",
movieDate: "$_id.movieDate",
_id: 0
}
}
])
See how it works on the playground example
Another option is using $unwind instead of $reduce, but it is generally considered slower:
db.collection.aggregate([
{
$match: {
$and: [
{movieId: "62af1ff6cb38656a4ffe36aa"},
{movieTiming: "1:30 p.m"},
{movieDate: "2022-06-20T18:14:38.133+00:00"},
]
}
},
{$unwind: "$seatSeletion"},
{
$group: {
_id: {movieTiming: "$movieTiming", movieId: "$movieId", movieDate: "$movieDate"},
seatSeletion: {$push: "$seatSeletion"}
}
},
{
$project: {
seatSeletion: 1,
movieTiming: "$_id.movieTiming",
movieId: "$_id.movieId",
movieDate: "$_id.movieDate",
_id: 0
}
}
])
See how it works on the playground example - unwind
more output nearly you expect
{
"_id": {
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m"
},
"seatSeletion": [
{ "rowNo": 0,"columnNo": 0,"seatNo": 3
},
{ "rowNo": 0,"columnNo": 1,"seatNo": 4
},
{ "rowNo": 0,"columnNo": 0,"seatNo": 1
},
{ "rowNo": 0,"columnNo": 1,"seatNo": 2
}
]
}
query
db.collection.aggregate(
{
$match: {}
},
{
$unwind: {
path: '$seatSeletion'
}
},
{
$group: {
_id:
{
movieId: '$movieId',
movieDate: '$movieDate',
movieTiming: '$movieTiming'
},
seatSeletion:
{ $push: '$seatSeletion' }
}
}
)

Mongodb aggregate $group for non-existing items

I have document like this :
Documents :
{score: 1, value: 10}
{score: 3, value: 10}
{score: 1, value: 10}
{score: 4, value: 10}
{score: 1, value: 10}
{score: 5, value: 10}
{score: 5, value: 10}
{score: 10, value: 10}
In this collection, there is no score for 2,6,7,8,9 but I need output like below.
Output :
{score: 1, avg: 10}
{score: 2, avg: 0}
{score: 3, avg: 10}
{score: 4, avg: 10}
{score: 5, avg: 10}
{score: 6, avg: 0}
{score: 7, avg: 0}
{score: 8, avg: 0}
{score: 9, avg: 0}
{score: 10, avg: 10}
Any option in Mongo aggregate which will generate this. Please assist
You can try that using aggregation :
db.collection.aggregate([
{ $group: { _id: '$score', avg: { $avg: '$value' } } },
{ $group: { _id: '', min: { $min: '$_id' }, max: { $max: '$_id' }, data: { $push: '$$ROOT' } } },
{ $project: { _id: 0, data: 1, nums: { $range: ['$min', "$max", 1] } } },
{ $project: { data: { $concatArrays: ["$data", { $map: { input: { $setDifference: ["$nums", "$data._id"] }, in: { _id: '$$this', avg: 0 } } }] } } },
{ $unwind: '$data' }, { $replaceRoot: { newRoot: "$data" } }
])
Test : MongoDB-Playground
Assuming you know the range of scores, there's a trick to achieve exactly what you want :
1 - Insert in your collection a document for each score, with value field not set or set to null :
db.collection.insertMany([
{
score: 1,
},
{
score: 2,
},
{
score: 3,
},
{
score: 4,
},
{
score: 5,
},
{
score: 6,
},
{
score: 7,
},
{
score: 8,
},
{
score: 9,
},
{
score: 10,
}
]);
It's important for value field not to be set, because a value set at 0 will affect average calculation
Of course this operation must be performed only once.
Then you can apply the following aggregation, which will output exactly what you need :
db.collection.aggregate([
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11
],
output: {
avg: {
$avg: "$value"
}
}
}
},
{
$project: {
score: "$_id",
avg: {
$ifNull: [
"$avg",
0
]
},
_id: 0
}
}
])
Will output :
[
{
"avg": 10,
"score": 1
},
{
"avg": 0,
"score": 2
},
{
"avg": 10,
"score": 3
},
{
"avg": 10,
"score": 4
},
{
"avg": 10,
"score": 5
},
{
"avg": 0,
"score": 6
},
{
"avg": 0,
"score": 7
},
{
"avg": 0,
"score": 8
},
{
"avg": 0,
"score": 9
},
{
"avg": 10,
"score": 10
}
]
You can test it here.

Mongodb grouping statement by custom file

I want to retrieve data from mongodb, grouping and summing for a custom field based in a db field which can exist or not but I don't have the result I expect because I there is no data ggrupation (see attached file)enter image description here. The mongo statement is:
aggregate({
$match: {
owner: 'W99999',
creation_date: {
$gte: 1530748800,
$lte: 1531292133
}
},
$project: {
isWarm: {
$cond: [{
$not: ["$referral"] }, 1, 0 ]
},
isCold: {
$cond: [{
$not: ["$referral"] }, 0, 1 ]
},
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800]
}, 86400]
}
}]
}
},
$group: {
_id: {
isWarm: { $sum: "$isWarm" },
isCold: { $sum: "$isCold" },
daysBefore: '$daysBefore'
}
})
I think the problem is the "isWarm" and "isCold" condition for creating them. Thank you in advance.
UPDATE 05/07/2018.
Schema (trunked for security reasons):
{
"_id": "1",
"creation_date":"1515780901",
"referral: //This field is optional.
{
some_data: { }
},
more_data: { }
}
Result expected:
{ [
{ isCold: 3, isWarm: 2, daysBefore: 0 },
{ isCold: 2, isWarm: 5, daysBefore: 1 },
{ isCold: 5, isWarm: 0, daysBefore: 2 },
{ isCold: 1, isWarm: 2, daysBefore: 3 },
{ isCold: 1, isWarm: 1, daysBefore: 4 },
{ isCold: 1, isWarm: 0, daysBefore: 5 },
{ isCold: 0, isWarm: 0, daysBefore: 6 }
] }
I would like to have the object even if there is no documents to count (e.g. last line of the result).
RESOLVED: I need to test with real data.
I think I have a solution:
[{ $project: {
_id: 0,
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800] }, 86400]
}
}]
},
isWarm: {$cond: [{ $gte: ['$referral', null]}, 1, 0]},
isCold: {$cond: [{ $gte: ['$referral', null]}, 0, 1]}} },
{
$group:
{
_id: { creation_date: '$daysBefore' },
isWarm: { $sum: '$isWarm' },
isCold: { $sum: '$isCold' }
}
}]

MongoDB. Aggregate the sum of two arrays sizes

With MongoDB 3.4.10 and mongoose 4.13.6 I'm able to count sizes of two arrays on the User model:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' }
})
where my Users are (per db.users.find())
{ "_id" : ObjectId("5a2b21e63023c6117085c240"), "rightVoted" : [ 2 ],
"leftVoted" : [ 1, 6 ] }
{ "_id" : ObjectId("5a2c0d68efde3416bc8b7020"), "rightVoted" : [ 2 ],
"leftVoted" : [ 1 ] }
Here I'm getting expected result:
[ { _id: '5a2b21e63023c6117085c240', leftVotesCount: 2, rightVotesCount: 1 },
{ _id: '5a2c0d68efde3416bc8b7020', leftVotesCount: 1, rightVotesCount: 1 } ]
Question. How can I get a cumulative value of leftVotesCount and rightVotesCount data? I tried folowing:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' },
'votesCount': { '$add': [ '$leftVotesCount', '$rightVotesCount' ] },
'votesCount2': { '$sum': [ '$leftVotesCount', '$rightVotesCount' ] }
})
But votesCount is null and votesCount2 is 0 for both users. I'm expecting votesCount = 3 for User 1 and votesCount = 2 for User 2.
$leftVotesCount, $rightVotesCount become available only on the next stage. Try something like:
User.aggregate()
.project({
'_id': 1,
'leftVotesCount': { '$size': '$leftVoted' },
'rightVotesCount': { '$size': '$rightVoted' }
})
.project({
'_id': 1,
'leftVotesCount': 1,
'rightVotesCount': 1
'votesCount': { '$add': [ '$leftVotesCount', '$rightVotesCount' ] },
'votesCount2': { '$sum': [ '$leftVotesCount', '$rightVotesCount' ] }
})
You can't reference the project variables created in the same project stage.
You can wrap the variables in a $let expression.
User.aggregate().project({
"$let": {
"vars": {
"leftVotesCount": {
"$size": "$leftVoted"
},
"rightVotesCount": {
"$size": "$rightVoted"
}
},
"in": {
"votesCount": {
"$add": [
"$$leftVotesCount",
"$$rightVotesCount"
]
},
"leftVotesCount": "$$leftVotesCount",
"rightVotesCount": "$$rightVotesCount"
}
}
})
It turned out that $add supports nested expressions, so I was able to solve the issue by excluding intermediate variables:
User.aggregate().project({
'_id': 1,
'votesCount': { '$add': [ { '$size': '$leftVoted' }, { '$size': '$rightVoted' } ] }
});
// [ {_id: '...', votesCount: 3}, {_id: '...', votesCount: 2} ]