Get last timestamp from an array in pymongo - mongodb

I've got the following list of documents:
[
{'id': 1,
'name': 'Coco',
'data': [{'X': 10, 'datetime': datetime.datetime(2020, 1, 1, 1, 0)},
{'X': 20, 'datetime': datetime.datetime(2019, 1, 1, 2, 0)}]
},
{'id': 2,
'name': 'Kiki',
'data': [{'X': 30, 'datetime': datetime.datetime(2015, 7, 10, 1, 0)},
{'X': 40, 'datetime': datetime.datetime(2017, 10, 4, 2, 0)}]
]
How do I get the last timestamp from each list in the data field? I would like to get something like:
[
{'id': 1,
'name': 'Coco',
'data': [{'X': 10, 'datetime': datetime.datetime(2020, 1, 1, 1, 0)}]
},
{'id': 2,
'name': 'Kiki',
'data': [{'X': 40, 'datetime': datetime.datetime(2017, 10, 4, 2, 0)}]
]

Try this:
db.collectionName.aggregate([
{
$addFields: {
"data": [{
$reduce: {
input: "$data",
initialValue: { $arrayElemAt: ["$data", 0] },
in: {
$cond: [
{ $gt: ["$$value.datetime", "$$this.datetime"] },
"$$value",
"$$this"
]
}
}
}]
}
}
]);
Output:
/* 1 createdAt:3/3/2021, 9:15:27 PM*/
{
"_id" : ObjectId("603faf17bcece4372062bcf5"),
"id" : 1,
"name" : "Coco",
"data" : [
{
"X" : 10,
"datetime" : ISODate("2020-02-01T01:00:00.000+05:30")
}
]
},
/* 2 createdAt:3/3/2021, 9:15:27 PM*/
{
"_id" : ObjectId("603faf17bcece4372062bcf6"),
"id" : 2,
"name" : "Kiki",
"data" : [
{
"X" : 40,
"datetime" : ISODate("2017-11-04T02:00:00.000+05:30")
}
]
}

Related

Mongo db aggregation - $push and $slice top results

I have the following documents in my db:
{uid: 1, score: 10}
{uid: 2, score: 11}
{uid: 3, score: 1}
{uid: 4, score: 6}
{uid: 5, score: 2}
{uid: 6, score: 3}
{uid: 7, score: 8}
{uid: 8, score: 10}
I want to split them into buckets by score - i.e.:
score
uids
(bucket name in aggregation)
[0,4)
3,5,6
0
[4,7)
4
4
[7,inf
1,2,7,8
7
For this, I created the following aggregation which works just fine:
db.scores.aggregation(
[
{
$bucket:
{
groupBy: "$score",
boundaries: [0, 4, 7],
default: 7,
output:
{
"total": {$sum: 1},
"top_frustrated":
{
$push: {
"uid": "$uid", "score": "$score"
}
},
},
}
},
]
)
However, I would like to return only the top 3 of every bucket - i.e, buckets 0, 4 should be the same, but bucket 7 should have only uids 1,2,8 returned (as uid 7 has the lowest score) - but to include the total count of documents as well, i.e. output of bucket "7" should look like:
{ "total" : 4, "top_scores" :
[
{"uid" : 2, "score" : 11},
{"uid" : 1, "score" : 10},
{"uid" : 8, "score" : 10},
]
}
I tried using $addFields with $sortArray and $slice, but it either won't work or return errors.
I can of course use $project but I was wondering if there is a more efficient way.
I am using Amazon DocumentDB.
You can use the $topN accumulator, instead of $push, like this:
db.collection.aggregate([
{
"$bucket": {
"groupBy": "$score",
"boundaries": [
0,
4,
7
],
"default": 7,
"output": {
"total": {
"$sum": 1
},
"top_frustrated": {
"$topN": {
"n": 3,
"sortBy": {
"score": -1
},
"output": {
"uid": "$uid",
"score": "$score"
}
}
}
},
}
},
])
Playground link.
The only catch here is this operator is present in MongoDB 5.2 and above.
For older versions, this will work:
db.collection.aggregate([
{
"$sort": {
score: -1
}
},
{
$bucket: {
groupBy: "$score",
boundaries: [
0,
4,
7
],
default: 7,
output: {
"total": {
$sum: 1
},
"top_frustrated": {
$push: {
"uid": "$uid",
"score": "$score"
}
},
},
}
},
{
"$project": {
total: 1,
top_frustrated: {
"$slice": [
"$top_frustrated",
3
]
}
}
}
])
Playground link.

MONGODB Aggregate query to give $min value of array field by element

Hi I am converting my existing website from php / mysql to node / mongodb , it is a golf society site which I use to log scores for each members rounds and provide results and statistics into their games. The main collection contains an array of 18 scores which is great for my results queries but I am having a problem with the statistical side, ie Avg score by Hole by Course, Lowest score ever by Hole by Course (eclectic) . I have come up with this aggregate query which works and gives me the result I require but it is ugly !! I am sure there must be a more elegant solution out there and feel I am missing a trick somewhere, I have looked at $map as I thought that might help but don't think it will. I would appreciate it if someone could offer any suggestions in tidying this code up, Thx.
{ _id:
{ date_played: 2019-06-21T00:00:00.000Z,
course_played: 1,
player_id: 1 },
score: [ 8, 4, 7, 4, 7, 1, 7, 5, 6, 4, 5, 7, 6, 4, 7, 5, 6, 7 ],
handicap: 23,
cash_won: 0,
sort_order: 2,
gross_score: 100,
gross_sfpts: 31,
skins_group: 1,
score_differential: 26.2,
pcc_adjustment: 0 }
{ _id:
{ date_played: 2016-08-14T00:00:00.000Z,
course_played: 1,
player_id: 1},
score: [ 5, 4, 5, 6, 5, 4, 8, 6, 1, 3, 3, 4, 3, 6, 3, 6, 4, 5 ],
handicap: 18,
cash_won: 14,
sort_order: 4,
gross_score: 81,
gross_sfpts: 44,
skins_group: 1,
score_differential: 12.1,
pcc_adjustment: 0 }
[
{
'$match': {
'_id.course_played': 1
}
}, {
'$project': {
'player_name': 1,
'hole01': {
'$arrayElemAt': [
'$score', 0
]
},
'hole02': {
'$arrayElemAt': [
'$score', 1
]
},
'hole03': {
'$arrayElemAt': [
'$score', 2
]
},
'hole04': {
'$arrayElemAt': [
'$score', 3
]
},
'hole05': {
'$arrayElemAt': [
'$score', 4
]
},
'hole06': {
'$arrayElemAt': [
'$score', 5
]
},
'hole07': {
'$arrayElemAt': [
'$score', 6
]
},
'hole08': {
'$arrayElemAt': [
'$score', 7
]
},
'hole09': {
'$arrayElemAt': [
'$score', 8
]
},
'hole10': {
'$arrayElemAt': [
'$score', 9
]
},
'hole11': {
'$arrayElemAt': [
'$score', 10
]
},
'hole12': {
'$arrayElemAt': [
'$score', 11
]
},
'hole13': {
'$arrayElemAt': [
'$score', 12
]
},
'hole14': {
'$arrayElemAt': [
'$score', 13
]
},
'hole15': {
'$arrayElemAt': [
'$score', 14
]
},
'hole16': {
'$arrayElemAt': [
'$score', 15
]
},
'hole17': {
'$arrayElemAt': [
'$score', 16
]
},
'hole18': {
'$arrayElemAt': [
'$score', 17
]
}
}
}, {
'$sort': {
'_id.player_id': 1
}
}, {
'$group': {
'_id': '$_id.player_id',
'name': {
'$first': '$player_name'
},
'hole1': {
'$min': '$hole01'
},
'hole2': {
'$min': '$hole02'
},
'hole3': {
'$min': '$hole03'
},
'hole4': {
'$min': '$hole04'
},
'hole5': {
'$min': '$hole05'
},
'hole6': {
'$min': '$hole06'
},
'hole7': {
'$min': '$hole07'
},
'hole8': {
'$min': '$hole08'
},
'hole9': {
'$min': '$hole09'
},
'hole10': {
'$min': '$hole10'
},
'hole11': {
'$min': '$hole11'
},
'hole12': {
'$min': '$hole12'
},
'hole13': {
'$min': '$hole13'
},
'hole14': {
'$min': '$hole14'
},
'hole15': {
'$min': '$hole15'
},
'hole16': {
'$min': '$hole16'
},
'hole17': {
'$min': '$hole17'
},
'hole18': {
'$min': '$hole18'
},
'rounds': {
'$sum': 1
}
}
}, {
'$addFields': {
'total': {
'$add': [
'$hole1', '$hole2', '$hole3', '$hole4', '$hole5', '$hole6', '$hole7', '$hole8', '$hole9', '$hole10', '$hole11', '$hole12', '$hole13', '$hole14', '$hole15', '$hole16', '$hole17', '$hole18'
]
}
}
}, {
'$sort': {
'total': 1
}
}, {
'$limit': 10
}
]
Which gives this as an example when run against the total database, which is the result I want but I would like all the "hole" fields to be returned in an Array as per the original score field.
{ _id: 1,
hole1: 5,
hole2: 4,
hole3: 5,
hole4: 4,
hole5: 5,
hole6: 2,
hole7: 3,
hole8: 3,
hole9: 3,
hole10: 3,
hole11: 2,
hole12: 3,
hole13: 4,
hole14: 2,
hole15: 3,
hole16: 3,
hole17: 3,
hole18: 3,
rounds: 562,
total: 53 }
You might $unwind the scores array, keeping the index as the hole number, then $group by player, course, and hole to get the score for each hole, $sort by hole number to make sure of the order, and then $group by player and course, pushing the scores back into an array.
db.collection.aggregate([
{$match: {"_id.player_id": 1}},
{$unwind: {
path: "$score",
includeArrayIndex: "hole"
}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id",
hole: "$hole"
},
minScore: {$min: "$score"},
rounds: {$sum: 1}
}},
{$sort: {"_id.hole": 1}},
{$group: {
_id: {
course_played: "$_id.course_played",
player_id: "$_id.player_id"
},
score: {$push: "$minScore"},
total: {$sum: "$minScore"},
rounds: {$first: "$rounds"}
}}
])
Playground

Java MongoDB Projection

I am referring mongodb official page for projection where I came across following example where elements of array in subdocument is filtered:
https://docs.mongodb.com/manual/reference/operator/aggregation/filter/#exp._S_filter
db.sales.aggregate([
{
$project: {
items: {
$filter: {
input: "$items",
as: "item",
cond: { $gte: [ "$$item.price", 100 ] }
}
}
}
}
])
I am trying to implement this in Java but I am not doing it correctly and elements in subdocument array are not filtered.
Input Collection:
{
_id: 0,
items: [
{ item_id: 43, quantity: 2, price: 10 },
{ item_id: 2, quantity: 1, price: 240 }
]
}
{
_id: 1,
items: [
{ item_id: 23, quantity: 3, price: 110 },
{ item_id: 103, quantity: 4, price: 5 },
{ item_id: 38, quantity: 1, price: 300 }
]
}
{
_id: 2,
items: [
{ item_id: 4, quantity: 1, price: 23 }
]
}
Expected Output Collection:
{
"_id" : 0,
"items" : [
{ "item_id" : 2, "quantity" : 1, "price" : 240 }
]
}
{
"_id" : 1,
"items" : [
{ "item_id" : 23, "quantity" : 3, "price" : 110 },
{ "item_id" : 38, "quantity" : 1, "price" : 300 }
]
}
{ "_id" : 2, "items" : [ ] }
In Java(mongo Driver 3.9.1), this is what I am doing:
Bson priceFilter = Filters.gte("items.price", 100);
mongoCollection.aggregate(
Aggregates.project(Projections.fields(priceFilter))
);
How do I project with aggregate function for the subdocument arrays where I need to filter out elements from subdocument array based on some condition?
In MongoDB Java Driver 3.9.1, collection.aggregate() takes a java.util.List as parameter. So you need to replace your Java code with the below.
mongoCollection.aggregate(
Arrays.asList(
Aggregates.project(Projections.computed("items",
new Document().append("$filter",
new Document().append("input", "$items").append("as", "item").append("cond",
new Document().append("$gte", Arrays.asList("$$item.price",100))))))
)
);

Mongodb sum after unwinding array of nested objects

I am new to mongodb aggregations. Given the following collection:
[
{
"ft": 1,
"pippo": 10,
"pluto": 5,
"detail": [
{
"ft": 1,
"pippo": 1,
"pluto": 2
},
{
"ft": 2,
"pippo": 1,
"pluto": 2
},
{
"ft": 1,
"pippo": 1,
"pluto": 2
},
{
"ft": 3,
"pippo": 1,
"pluto": 2
}
]
},
{
"ft": 2,
"pippo": 4,
"pluto": 3
}, {
"ft": 1,
"pippo": 1,
"pluto": 1
}
]
I would like to calculate the sum of all items having "ft":1.
I have tried this aggregation after unwinding the items contained in the array:
[
{
'$unwind': '$detail'
}, {
'$group': {
'_id': '$ft',
'count': {
'$sum': 1
},
'ft': {
'$first': '$ft'
},
'pippo': {
'$sum': '$pippo'
},
'pluto': {
'$sum': '$pluto'
}
}
}
]
And getting the following wrong result:
_id: 1,
count: 4,
pippo: 40,
pluto: 20
The correct result should be:
_id: 1,
count: 4, // all items having ft === 1
pippo: 13, // the sum of all pippo in all items where ft === 1
pluto: 10 // the sum of all pluto items where ft === 1

Possible to use the slice operator with the aggregation framework?

I have a list of employees, each who belong to a department and a company.
An employee also has a salary history. The last value is their current salary.
Example:
{
name: "Programmer 1"
employee_id: 1,
dept_id: 1,
company_id: 1,
salary: [50000,50100,50200]
},
{
name: "Programmer 2"
employee_id: 2,
dept_id: 1,
company_id: 1,
salary: [50000,50200,50300]
},
{
name: "Manager"
employee_id: 3,
dept_id: 2,
company_id: 1,
salary: [60000,60500,61000]
},
{
name: "Contractor (different company)"
employee_id: 4,
dept_id: 1,
company_id: 2,
salary: [60000,60500,75000]
}
I want to find the current average salary for employees, grouped by dept_id and company_id.
Something like:
db.employees.aggregate(
{ $project : { employee_id: 1, dept_id: 1, company_id: 1, salaries: 1}},
{ $unwind : "$salaries" },
{
"$group" : {
"_id" : {
"dept_id" : "$dept_id",
"company_id" : "$company_id",
},
current_salary_avg : { $avg : "$salaries.last()" }
}
}
);
In this case it would be
Company 1, Group 1: 50250
Company 1, Group 2: 61000
Company 2, Group 1: 75000
I've seen examples doing something similar with $unwind, but I'm struggling with getting the last value of salary. Is $slice the correct operator in this case, and if so how do I use it with project?
In this case you need to set up your pipeline as follows :
unwind the salary list to get all the salaries for each employee
group by employee, dept and company and get the last salary
group by dept and company and get the average salary
The code for this aggregation pipeline is :
use test;
db.employees.aggregate( [
{$unwind : "$salary"},
{
"$group" : {
"_id" : {
"dept_id" : "$dept_id",
"company_id" : "$company_id",
"employee_id" : "$employee_id",
},
"salary" : {$last: "$salary"}
}
},
{
"$group" : {
"_id" : {
"company_id" : "$_id.company_id",
"dept_id" : "$_id.dept_id",
},
"current_salary_avg" : {$avg: "$salary"}
}
},
{$sort :
{
"_id.company_id" : 1,
"_id.dept_id" : 1,
}
},
]);
Assuming that you have imported the data with:
mongoimport --drop -d test -c employees <<EOF
{ name: "Programmer 1", employee_id: 1, dept_id: 1, company_id: 1, salary: [50000,50100,50200]}
{ name: "Programmer 2", employee_id: 2, dept_id: 1, company_id: 1, salary: [50000,50200,50300]}
{ name: "Manager", employee_id: 3, dept_id: 2, company_id: 1, salary: [60000,60500,61000]}
{ name: "Contractor (different company)", employee_id: 4, dept_id: 1, company_id: 2, salary: [60000,60500,75000]}
EOF
Now you can use $slice in aggregation. To return elements from either the start or end of the array: { $slice: [ <array>, <n> ] }
To return elements from the specified position in the array: { $slice: [ <array>, <position>, <n> ] }.
And a couple of examples from the mongo page:
{ $slice: [ [ 1, 2, 3 ], 1, 1 ] } // [ 2 ]
{ $slice: [ [ 1, 2, 3 ], -2 ] } // [ 2, 3 ]
{ $slice: [ [ 1, 2, 3 ], 15, 2 ] } // [ ]
{ $slice: [ [ 1, 2, 3 ], -15, 2 ] } // [ 1, 2 ]