Can I use "$first" operator on two fields in a "$group" operation in mongo db? - mongodb

Consider the dataset
{ "_id" : { "$oid" : "aaa" }, "student_id" : 0, "type" : "exam", "score" : 54.6535436362647 }
{ "_id" : { "$oid" : "bbb" }, "student_id" : 0, "type" : "quiz", "score" : 31.95004496742112 }
{ "_id" : { "$oid" : "ccc" }, "student_id" : 0, "type" : "homework", "score" : 14.8504576811645 }
{ "_id" : { "$oid" : "ddd" }, "student_id" : 0, "type" : "homework", "score" : 63.98402553675503 }
{ "_id" : { "$oid" : "eee" }, "student_id" : 1, "type" : "exam", "score" : 74.20010837299897 }
{ "_id" : { "$oid" : "fff" }, "student_id" : 1, "type" : "quiz", "score" : 96.76851542258362 }
{ "_id" : { "$oid" : "ggg" }, "student_id" : 1, "type" : "homework", "score" : 21.33260810416115 }
{ "_id" : { "$oid" : "hhh" }, "student_id" : 1, "type" : "homework", "score" : 44.31667452616328 }
Say, for each student, I need to find minimum score and the corresponding document_id(_id).
Here is my pipeline
pipeline = [
{"$sort":{"student_id":1,"score":1 } },
{"$group": {"_id":"$student_id","mscore":{"$first":"$score"},"docid":{"$first":"$_id"} } },
{"$sort":{"_id":1}},
{"$project":{"docid":1,"_id":0}}
]
While this is working fine, I am not sure whether it is because I have the right query or whether it is because of way data is stored.
Here is my stragery
Sort by student_id, score
Group by student_id and do first on score, it will give student_id, min_score
Now, I need the doc_id(_id) also for this min_score, so I am using first on that field also. Is that correct?
Let's say after the sort, I need the entire first document, so should I apply first on each and every field or is there other way to do this?

To get the entire first document after sorting, apply the $first operator on the system variable $$ROOT which references the root document, i.e. the top-level document, currently being processed in the $group operator pipeline stage. Your pipeline would look like this:
var pipeline = [
{
"$sort": { "score": 1 }
},
{
"$group": {
"_id": "$student_id",
"data": { "$first": "$$ROOT" }
}
},
{
"$project": {
"_id": "$data._id",
"student_id": "$data.student_id",
"type": "$data.type",
"lowest_score": "$data.score"
}
}
]

Related

MongoDB - Having difficulty further sorting the query results

This is an example of the collection I am working with
> db.grades.find().limit(5).forEach(printjson)
{
"_id" : ObjectId("50b59cd75bed76f46522c353"),
"student_id" : 0,
"class_id" : 30,
"scores" : [
{
"type" : "exam",
"score" : 14.34345947841966
},
{
"type" : "quiz",
"score" : 47.65945482174327
},
{
"type" : "homework",
"score" : 83.42772189120254
},
{
"type" : "homework",
"score" : 49.86812935368258
},
{
"type" : "homework",
"score" : 39.85525554437086
}
]
}
{
"_id" : ObjectId("50b59cd75bed76f46522c356"),
"student_id" : 0,
"class_id" : 27,
"scores" : [
{
"type" : "exam",
"score" : 60.19473636151568
},
{
"type" : "quiz",
"score" : 64.15966210014162
},
{
"type" : "homework",
"score" : 82.80835343023551
}
]
}
{
"_id" : ObjectId("50b59cd75bed76f46522c350"),
"student_id" : 0,
"class_id" : 5,
"scores" : [
{
"type" : "exam",
"score" : 88.22950674232497
},
{
"type" : "quiz",
"score" : 79.28962650427184
},
{
"type" : "homework",
"score" : 18.66254946562674
},
{
"type" : "homework",
"score" : 40.28154176513361
},
{
"type" : "homework",
"score" : 1.23735944117882
},
{
"type" : "homework",
"score" : 88.96101200683958
}
]
}
{
"_id" : ObjectId("50b59cd75bed76f46522c357"),
"student_id" : 0,
"class_id" : 11,
"scores" : [
{
"type" : "exam",
"score" : 58.83297411100884
},
{
"type" : "quiz",
"score" : 49.66835710930263
},
{
"type" : "homework",
"score" : 18.05861540807023
},
{
"type" : "homework",
"score" : 80.04086698967356
}
]
}
{
"_id" : ObjectId("50b59cd75bed76f46522c358"),
"student_id" : 0,
"class_id" : 10,
"scores" : [
{
"type" : "exam",
"score" : 30.93065784731665
},
{
"type" : "quiz",
"score" : 55.98003281528393
},
{
"type" : "homework",
"score" : 55.6752702814148
},
{
"type" : "homework",
"score" : 63.15391302252755
}
]
}
What I'm trying to achieve, is to get the highest score of the exam, where the student id is 5. I've been stuck on this for quite a while, and the furthest I've managed to come is to retrieve all of the student's exam scores, however I haven't managed to make it so only the highest scoring one displays. This is another aspect I'm stuck on.
This is the code for this output:
{
"student_id" : 5,
"class_id" : 18,
"scores" : [
{
"type" : "exam",
"score" : 73.04238861317688
}
]
}
{
"student_id" : 5,
"class_id" : 8,
"scores" : [
{
"type" : "exam",
"score" : 22.38732080941065
}
]
}
{
"student_id" : 5,
"class_id" : 0,
"scores" : [
{
"type" : "exam",
"score" : 43.64758440439862
}
]
}
{
"student_id" : 5,
"class_id" : 16,
"scores" : [
{
"type" : "exam",
"score" : 33.39752665396672
}
]
}
{
"student_id" : 5,
"class_id" : 30,
"scores" : [
{
"type" : "exam",
"score" : 73.48459944869943
}
]
}
{
"student_id" : 5,
"class_id" : 19,
"scores" : [
{
"type" : "exam",
"score" : 15.36563152024366
}
]
}
{
"student_id" : 5,
"class_id" : 23,
"scores" : [
{
"type" : "exam",
"score" : 21.58296008740177
}
]
}
The code that gets me this is as follows:
var pipeline = [
{ $match: {student_id: 5} },
{ $unwind: "$scores" },
{ $group: {
_id: "$_id",
"student_id": { "$first": "$student_id" },
"class_id": { "$first": "$class_id" },
scores: { $push: "$scores" } } },
{ $project: { _id: 0,
student_id: 1,
class_id: 1,
scores: { $slice: [ "$scores", 1] } } } ];
var results = db.grades.aggregate ( pipeline );
results.forEach(printjson)
(Sorry for the poor structuring, I did my best but I'm not used too it on stackoverflow)
I've been coding with MongoDB for about 2 days now, and I'm knowledgeable in it. Even less so with aggregates, but looking at posts and other code this seemed like the way to do it. From my point of view, because I'm already slicing through it, and attempting to sort the score will only result in getting the highest score out of exams, quiz and homeworks, so it's not a guarantee to give me the exam. Unless there's a different way to sort through these individually
Ideally, I'd want the end result to return only one document, where the exam score is the highest:
{
"student_id" : 5,
"class_id" : 30,
"scores" : [
{
"type" : "exam",
"score" : 73.48459944869943
}
]
}
$match student_id condition
$unwind deconstruct scores array
$match type: exam condition
$sort documents by score in descending order
$group by student_id and get first root document
$replaceRoot to replace doc to root
var pipeline = [
{ $match: { student_id: 5 } },
{ $unwind: "$scores" },
{ $match: { "scores.type": "exam" } },
{ $sort: { "scores.score": -1 } },
{
$group: {
_id: "$student_id",
doc: { $first: "$$ROOT" }
}
},
{ $replaceRoot: { newRoot: "$doc" } }
]
Playground

Find sum of fields inside array in MongoDB

I have a data as follows:
> db.PQRCorp.find().pretty()
{
"_id" : 0,
"name" : "Ancy",
"results" : [
{
"evaluation" : "term1",
"score" : 1.463179736705023
},
{
"evaluation" : "term2",
"score" : 11.78273309957772
},
{
"evaluation" : "term3",
"score" : 6.676176060654615
}
]
}
{
"_id" : 1,
"name" : "Mark",
"results" : [
{
"evaluation" : "term1",
"score" : 5.89772766299929
},
{
"evaluation" : "term2",
"score" : 12.7726680028769
},
{
"evaluation" : "term3",
"score" : 2.78092882672992
}
]
}
{
"_id" : 2,
"name" : "Jeff",
"results" : [
{
"evaluation" : "term1",
"score" : 36.78917882992872
},
{
"evaluation" : "term2",
"score" : 2.883687879200287
},
{
"evaluation" : "term3",
"score" : 9.882668212003763
}
]
}
What I want to achieve is ::Find employees who failed in aggregate (term1 + term2 + term3)
What I am doing and eventually getting is:
db.PQRCorp.aggregate([
{$unwind:"$results"},
{ $group: {_id: "$id",
'totalTermScore':{ $sum:"$results.score" }
}
}])
OUTPUT:{ "_id" : null, "totalTermScore" : 90.92894831067625 }
Simply I am getting a output of a flat sum of all scores. What I want is, to sum terms 1 , 2 and 3 separately for separate employees.
Please can someone help me. I am new to MongoDB (quite evident though).
You do not need to use $unwind and $group here... A simple $project query can $sum your entire score...
db.PQRCorp.aggregate([
{ "$project": {
"name": 1,
"totalTermScore": {
"$sum": "$results.score"
}
}}
])

Mongodb query to group by multiple fields and filter

I want to be able to group each "Place" to show over time, how many "PatientIds" they are seeing on a given day and then be able to filter this by what the action is.
Basically Total Patients on y-axis, Date on x-axis and then a filter or stacked chart to show the action. I also thought about a mapreduce, but have never done that in mongo
I can't figure out the correct mongo query. Right now I have:
db.collection.aggregate({"$group":{_id:{place:"$place",date:"$date",action:"$action",count:{$sum:1}}},{$sort:{"_id.date":1,"_id.place":1}})
However, this is just listing out the data. I tried to do a match on all places, but that didn't give me the results I was looking for either. Any ideas?
Example json:
{
"_id" : ObjectId(""),
"patientId" : "100",
"place" : "1",
"action" : "DIAGNOSED",
"date" : ISODate("2017-01-20")
}
{
"_id" : ObjectId(""),
"patientId" : "101",
"place" : "1",
"action" : "PATIENT IN",
"date" : ISODate("2017-01-20)
}
{
"_id" : ObjectId(""),
"patientId" : "200",
"place" : "2",
"action" : "MEDICINE",
"date" : ISODate("2017-01-05")
}
{
"_id" : ObjectId(""),
"patientId" : "300",
"place" : "2",
"action" : "DIAGNOSED",
"date" : ISODate("2017-01-31")
}
EDIT - mapreduce
> var map = function(){emit(this.place,1)}
> var reduce = function(key,values){var res = 0;values.forEach(function(v){res+=1});return{count:res};}
> db.new.mapReduce(map,reduce,{out:"mapped_places"});
{
"result" : "mapped_places",
"timeMillis" : 88,
"counts" : {
"input" : 4,
"emit" : 4,
"reduce" : 2,
"output" : 2
},
"ok" : 1
}
> db.mapped_offices.find({})
{ "_id" : "1", "value" : { "count" : 2 } }
{ "_id" : "2", "value" : { "count" : 2 } }
>
You can try below aggregation query.
db.collection.aggregate([
{
"$group": {
"_id": {
"date": "$date",
"place": "$place"
},
"actions": {
"$push": "$action"
},
"count": {
"$sum": 1
}
}
},
{
"$unwind": "$actions"
},
{
"$sort": {
"_id.date": 1,
"_id.place": 1
}
}
]);
This should output something like
{ "_id" : { "date" : ISODate("2017-01-20T00:00:00Z"), "place" : "1"}, "count" : 2, "actions" : "PATIENT IN" }
{ "_id" : { "date" : ISODate("2017-01-20T00:00:00Z"), "place" : "1"}, "count" : 2, "actions" : "DIAGNOSED" }

MongoDB: Sort in combination with Aggregation group

I have a collection called transaction with below documents,
/* 0 */
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e67267",
"status" : "A",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
}
/* 1 */
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "B",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
}
/* 2 */
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "C",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
/* 3 */
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "D",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
When I run the below Aggregation query without $group,
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
}
])
I get the result in expected sorting order. i.e Sort createdTs in descending order (Minimal result)
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
],
"ok" : 1
}
If I apply the below aggregation with $group, the resultant is inversely sorted(i.e Ascending sort)
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
},
{
$group: {
"_id": {
"statusId": "$statusId",
"relatedWith": "$relatedWith",
"status": "$status"
},
"status": {$first: "$status"},
"statusId": {$first: "$statusId"},
"relatedWith": {$first: "$relatedWith"},
"createdTs": {$first: "$createdTs"}
}
}
]);
I get the result in inverse Order i.e. ** Sort createdTs in Ascending order**
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
],
"ok" : 1
}
Where am I wrong ?
The $group stage doesn't insure the ordering of the results. See here the first paragraph.
If you want the results to be sorted after a $group, you need to add a $sort after the $group stage.
In your case, you should move the $sort after the $group and before you ask the question : No, the $sort won't be able to use an index after the $group like it does before the $group :-).
The internal algorithm of $group seems to keep some sort of ordering (reversed apparently), but I would not count on that and add a $sort.
You are not doing anything wrong here, Its a $group behavior in Mongodb
Lets have a look in this example
Suppose you have following doc in collection
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:05:00Z") }
{ "_id" : 6, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-15T12:05:10Z") }
{ "_id" : 7, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T14:12:12Z") }
Now if you run this
db.collection.aggregate([{ $sort: { item: 1,date:1}} ] )
the output will be in ascending order of item and date.
Now if you add group stage in aggregation pipeline it will reverse the order.
db.collection.aggregate([{ $sort: { item: 1,date:1}},{$group:{_id:"$item"}} ] )
Output will be
{ "_id" : "xyz" }
{ "_id" : "jkl" }
{ "_id" : "abc" }
Now the solution for your problem
change "createdTs": -1 to "createdTs": 1 for group

Finding element in embedded array in MongoDB

I have a collection students look like this:
{
"_id" : 10,
"name" : "Christiano Ronaldo",
"scores" : [
{
"type" : "exam",
"score" : 40.58945534169687
},
{
"type" : "quiz",
"score" : 4.30461571152303
},
{
"type" : "homework",
"score" : 62.36309025722009
},
{
"type" : "homework",
"score" : 32.1707802903173
}
]
}
How do I find out the lowest homework? Using javadriver
Note : I can not change the Data model.
Try this below -
x = db.students.aggregate([{
"$unwind": "$scores"
}, {
"$match": {
"scores.type": "homework"
}
}, {
"$group": {
"_id": "$_id",
"minscore": {
"$min": "$scores.score"
}
}
}])
Your result document "x" contains an array field - lowest scores in the document