Aggregating and comparing scores of teams using mongodb - mongodb

I'm just starting out with mongodb and have been reading through the documentation on aggregation but still struggling to relate equivalent knowledge of sql statements to the methods used in mongo.
I have this data:
{
"_id" : ObjectId("53ac7bce4eaf6de4d5601c19"),
"uid" : ObjectId("53ac7bb84eaf6de4d5601c15"),
"mid" : ObjectId("53ab27504eaf6de4d5601be4"),
"score" : 1
},{
"_id" : ObjectId("53ac7bce4eaf6de4d5601c1a"),
"uid" : ObjectId("53ac7bb84eaf6de4d5601c16"),
"mid" : ObjectId("53ab27504eaf6de4d5601be4"),
"score" : 5
}
...
And I'm trying to get to this result:
{
"uid" : ObjectId("53ac7bb84eaf6de4d5601c15"),
"uid_2" : ObjectId("53ac7bb84eaf6de4d5601c16"),
"mid" : ObjectId("53ab27504eaf6de4d5601be4"),
"score" : 1,
"score_2" : 5,
"difference" : 4
}
...
Where I am comparing every uid against every other uid around a single mid and calculating the difference in their scores (can't be a negative difference, only positive).
Most of the examples I'm running into don't quite fit my requirements and hoping some mongo guru can help me out. Thanks!

As stated, I think your data modelling is a little off here as you need something to "pair" the "matches" as it were. I have a "simplified" case here:
{
"_id" : ObjectId("53ae9da2e24682cac4215e0c"),
"match" : ObjectId("53ae9d78e24682cac4215e0b"),
"score" : 1
}
{
"_id" : ObjectId("53ae9da5e24682cac4215e0d"),
"match" : ObjectId("53ae9d78e24682cac4215e0b"),
"score" : 5
}
{
"_id" : ObjectId("53aea6cde24682cac4215e15"),
"match" : ObjectId("53aea6c1e24682cac4215e14"),
"score" : 2
}
{
"_id" : ObjectId("53aea6e4e24682cac4215e16"),
"match" : ObjectId("53aea6c1e24682cac4215e14"),
"score" : 1
}
{
"_id" : ObjectId("53aea6eae24682cac4215e18"),
"match" : ObjectId("53aea6e6e24682cac4215e17"),
"score" : 2
}
{
"_id" : ObjectId("53aea6ece24682cac4215e19"),
"match" : ObjectId("53aea6e6e24682cac4215e17"),
"score" : 2
}
What that basically represents is the scores for "six" teams in "three" distinct matches.
Given that, my take on getting to results would be this:
db.matches.aggregate([
// Group on matches and find the "min" and "max" score
{ "$group": {
"_id": "$match",
"teams": {
"$push": {
"_id": "$_id",
"score": "$score"
}
},
"minScore": { "$min": "$score" },
"maxScore": { "$max": "$score" }
}},
// Unwind the "teams" array created
{ "$unwind": "$teams" },
// Compare scores for "win", "loss" or "draw"
{ "$group": {
"_id": "$_id",
"win": {
"$min": { "$cond": [
{ "$and": [
{ "$eq": [ "$teams.score", "$maxScore" ] },
{ "$gt": [ "$teams.score", "$minScore" ] }
]},
"$teams",
false
]}
},
"loss": {
"$min": { "$cond": [
{ "$and": [
{ "$eq": [ "$teams.score", "$minScore" ] },
{ "$lt": [ "$teams.score", "$maxScore" ] }
]},
"$teams",
false
]}
},
"draw": {
"$push": { "$cond": [
{ "$eq": [ "$minScore", "$maxScore" ] },
"$teams",
false
]}
},
"difference": {
"$max": { "$subtract": [ "$maxScore", "$minScore" ] }
}
}},
// Just fix up those "draw" results with a [false,false] array
{ "$project": {
"win": 1,
"loss": 1,
"draw": { "$cond": [
{ "$gt": [
{ "$size": { "$setDifference": [ "$draw", [false] ] } },
0
]},
"$draw",
false
]},
"difference": 1
}}
])
And this gives you a quite nice result:
{
"_id" : ObjectId("53ae9d78e24682cac4215e0b"),
"win" : {
"_id" : ObjectId("53ae9da5e24682cac4215e0d"),
"score" : 5
},
"loss" : {
"_id" : ObjectId("53ae9da2e24682cac4215e0c"),
"score" : 1
},
"draw" : false,
"difference" : 4
}
{
"_id" : ObjectId("53aea6c1e24682cac4215e14"),
"win" : {
"_id" : ObjectId("53aea6cde24682cac4215e15"),
"score" : 2
},
"loss" : {
"_id" : ObjectId("53aea6e4e24682cac4215e16"),
"score" : 1
},
"draw" : false,
"difference" : 1
}
{
"_id" : ObjectId("53aea6e6e24682cac4215e17"),
"win" : false,
"loss" : false,
"draw" : [
{
"_id" : ObjectId("53aea6eae24682cac4215e18"),
"score" : 2
},
{
"_id" : ObjectId("53aea6ece24682cac4215e19"),
"score" : 2
}
],
"difference" : 0
}
That is essentially the results per "match" and determines the "difference" between winner and looser while identifying which team "won" or "lost". The final stage there uses some operators only introduced in MongoDB 2.6, but that really is not necessary if you do not have that version available. Or you could actually still do the same thing if you wanted to by using $unwind and some other processing.

Related

How to find records whose some field value are all zero in mongo

I have lots of sensors, every sensor report a data every few seconds.
I need to find out the sensors whose data are all zero.
Furthurmore, I need to caculate the zero data ratio for every sensor.
Can any query can do this?
Any help will be highly appreciated.
The records are like
{
"_id" : ObjectId("61353065746e5e18a1d7c4ca"),
"sensor" : "SN54",
"category" : "w",
"data" : "7065",
"time" : ISODate("2021-09-06T05:02:29.308+08:00")
},
{
"_id" : ObjectId("61353065746e5e18a1d7c4c9"),
"sensor" : "SN68",
"category" : "w",
"data" : "0",
"time" : ISODate("2021-09-06T05:02:29.308+08:00")
},
Query (if data was in array (we dont need it here after the question update))
filter to keep the zero only, divides with all array size, and multiply with 100
if you want to get all zero, add a match where percentage=100
Test code here
db.collection.aggregate([
{
"$set": {
"percentage": {
"$multiply": [
{
"$cond": [
{
"$eq": [
"$data",
[]
]
},
0,
{
"$divide": [
{
"$size": {
"$filter": {
"input": "$data",
"as": "d",
"cond": {
"$eq": [
"$$d",
0
]
}
}
}
},
{
"$size": "$data"
}
]
}
]
},
100
]
}
}
}
])
Edit1 (for data that are not inside array)
Test code here
aggregate(
[ {
"$group" : {
"_id" : "$sensor",
"nzero" : {
"$sum" : {
"$cond" : [ {
"$eq" : [ "$data", "0" ]
}, 1, 0 ]
}
},
"count" : {
"$sum" : 1
}
}
}, {
"$set" : {
"sensor" : "$_id"
}
}, {
"$project" : {
"_id" : 0
}
}, {
"$project" : {
"sensor" : 1,
"percentage" : {
"$multiply" : [ {
"$divide" : [ "$nzero", "$count" ]
}, 100 ]
}
}
} ]
)

Mongodb find maximum scored student embeddded array

I am new in mongodb ,Please help me out
I have more than 500 students details like this..
{
"_id" : 7,
"name" : "Salena Olmos",
"scores" : [
{
"score" : 90.37826509157176,
"type" : "exam"
},
{
"score" : 42.48780666956811,
"type" : "quiz"
},
{
"score" : 96.52986171633331,
"type" : "homework"
}
]
},
/* 2 */
{
"_id" : 8,
"name" : "Daphne Zheng",
"scores" : [
{
"score" : 22.13583712862635,
"type" : "exam"
},
{
"score" : 14.63969941335069,
"type" : "quiz"
},
{
"score" : 75.94123677556644,
"type" : "homework"
}
]
}
Need to find one student details who got highest marks in "type" exam
Output as follows...
{
"_id" : 7,
"name" : "Salena Olmos",
"scores" : [
{
"score" : 90.37826509157176,
"type" : "exam"
},
{
"score" : 42.48780666956811,
"type" : "quiz"
},
{
"score" : 96.52986171633331,
"type" : "homework"
}
]
}
I need one student details from whole collection. The problem I am facing that need to search in embedded array "score" as well as "type".
Someone please help me
Try this
db.collection.aggregate([
{
$group: {
_id: "$_id",
scores: {
$first: "$scores"
},
data: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$data"
},
{
$match: {
"data.scores.type": "exam"
}
},
{
$sort: {
"data.scores.score": -1
}
},
{
$project: {
_id: 1,
name: "$data.name",
scores: "$scores"
}
},
{
$limit: 1
}
])
Sample Playground
While this doesn't answer the question, it is related. This one filters out all the subdocuments which match the conditions "greater or equal 90" and type "exam"
db.collection.aggregate([
{
$match: {
"scores.score": {
$gte: 90
},
"scores.type": "exam"
}
},
{
$project: {
name: true,
list: {
$filter: {
input: "$scores",
as: "list",
cond: {
$and: [
{
$gt: [
"$$list.score",
90
]
},
{
$eq: [
"$$list.type",
"exam"
]
}
]
}
}
}
}
}
])
which returns
[
{
"_id": 7,
"list": [
{
"score": 90.37826509157176,
"type": "exam"
}
],
"name": "Salena Olmos"
}
]
https://mongoplayground.net/p/hYnVzZbuNFI
If you want the entire document, then add doc: "$$ROOT", to the projection.

multiple group in mongodb

My collection look likes this.
{
"_id" : ObjectId("572c4ed33c1b5f51215219a8"),
"name" : "This is an angular course, and integeration with php",
"description" : "After we connected we can query or update the database just how we would using the mongo API with the exception that we use a callback. The format for callbacks is always callback(error, value) where error is null if no exception has occured. The update methods save, remove, update and findAndModify also pass the lastErrorObject as the last argument to the callback function.",
"difficulty_level" : "Beginner",
"type" : "Fast Track",
"tagged_skills" : [
{
"_id" : "5714e894e09a0f7d804b2254",
"name" : "PHP"
},
{
"_id" : "5717355806313b1f1715fa50",
"name" : "c++"
},
{
"_id" : "5715025bc2c5dbb4675180da",
"name" : "java"
},
{
"_id" : "5714f188ec325f5359979e33",
"name" : "symphony"
}
]}
I want to group by the collection on the basis of type,difficulty level and tagged skills and also get the count in a single query.
I am not been able to add skills count.
My query is as follows:-
db.course.aggregate([
{$unwind:"$tagged_skills"},
{$group:{
_id:null,
skills: { $addToSet: "$tagged_skills.name" },
Normal_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Normal"] },
1,
0
]
}},
Beginner_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Beginner"] },
1,
0
]
}},
Intermediate_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Intermediate"] },
1,
0
]
}},
Advanced_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Advanced"] },
1,
0
]
}},
Fast_Track_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Fast Track"] },
1,
0
]
}},
Normal_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Normal"] },
1,
0
]
}},
Beginner_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Beginner"] },
1,
0
]
}},
Normal_Track_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Normal Track"] },
1,
0
]
}},
}}
])
The result is as follows:-
{
"_id" : null,
"skills" : [
"SQL",
"PHP",
"java",
"Angular Js",
"Laravel 23",
"c++",
"Node Js",
"symphony",
"Mysql",
"Express Js",
"JAVA"
],
"Normal_df" : 1,
"Beginner_df" : 14,
"Intermediate_df" : 7,
"Advanced_df" : 2,
"Fast_Track_type" : 8,
"Normal_type" : 6,
"Beginner_type" : 1,
"Normal_Track_type" : 9
}
I also want to get all skills with their count.
To get all the skills with their count, you need to first get a list of all the skills. You can obtain this list with running a distinct command on the approapriate fields. With this list you can then construct the appropriate $group pipeline document that will use the $sum and $cond operators.
Consider the following use case:
var difficultyLevels = db.course.distinct("difficulty_level"),
types = db.course.distinct("type"),
skills = db.course.distinct("tagged_skills.name"),
unwindOperator = { "$unwind": "$tagged_skills" },
groupOperator = {
"$group": {
"_id": null,
"skills": { "$addToSet": "$tagged_skills.name" }
}
};
difficultyLevels.forEach(function (df){
groupOperator["$group"][df+"_df"] = {
"$sum": {
"$cond": [ { "$eq": ["$difficulty_level", df] }, 1, 0]
}
}
});
types.forEach(function (type){
groupOperator["$group"][type.replace(" ", "_")+"_type"] = {
"$sum": {
"$cond": [ { "$eq": ["$type", type] }, 1, 0]
}
}
});
skills.forEach(function (skill){
groupOperator["$group"][skill] = {
"$sum": {
"$cond": [ { "$eq": ["$tagged_skills.name", skill] }, 1, 0]
}
}
});
//printjson(groupOperator);
db.course.aggregate([unwindOperator, groupOperator]);
In the first line, we obtain an array with the difficulty levels by running the distinct command on the difficulty_level field
db.course.distinct("difficulty_level")
This will produce the array
var difficultyLevels = ["Normal", "Beginner", "Intermediate", "Advanced"]
Likewise, the preceding distinct operations will return the list of possible unique values for that key.
After getting these lists, you can then create the pipeline objects using the forEach() method to populate the document keys for each given item in the list. You can then use the resulting document, which will look like this
printjson(groupOperator);
{
"$group" : {
"_id" : null,
"skills" : {
"$addToSet" : "$tagged_skills.name"
},
"Beginner_df" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$difficulty_level",
"Beginner"
]
},
1,
0
]
}
},
"Intermediate_df" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$difficulty_level",
"Intermediate"
]
},
1,
0
]
}
},
"Fast_Track_type" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$type",
"Fast Track"
]
},
1,
0
]
}
},
"PHP" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"PHP"
]
},
1,
0
]
}
},
"c++" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"c++"
]
},
1,
0
]
}
},
"java" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"java"
]
},
1,
0
]
}
},
"symphony" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"symphony"
]
},
1,
0
]
}
},
"C#" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"C#"
]
},
1,
0
]
}
},
"Scala" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"Scala"
]
},
1,
0
]
}
},
"javascript" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"javascript"
]
},
1,
0
]
}
}
}
}

MongoDB aggregate group by sum of distinct column

I have analytics collection with the below sample data.
{ "_id" : ObjectId("55f996a4e4b0cc9c0a392594"), "action" : "apiUploadFile", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d384e4b0cf75af10be88"), "action" : "agAsset", "assetId" : "55f996a4e4b0cc9c0a392593"},
{ "_id" : ObjectId("5603d395e4b0cf75af10becc"), "action" : "aAD", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becd"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "123"},
{ "_id" : ObjectId("5603d395e4b0cf75af10bece"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becf"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" }
I need find sum of analytics group by 'assetId' and then for each 'action' type. I have come up with the below query
db.analytics.aggregate(
[
{
$match : {
'assetId' : { "$ne": null }
}
},
{$group :{
_id:
{
assId:'$assetId'
},
viewCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'agAsset' ] }, 1, 0 ]
}
},
sessionCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'mobCmd' ] }, 1, 0 ]
}
}
}
}]
)
This works great except for the fact that I can not find the 'sessionCount' using distinct 'sessionId'. For example here is the current output
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 3 }
The expected output is
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 2 }
I need find the sessionCount for action='mobCmd' and has distinct values for sessionId. How can use distinct inside $sum operation of the 'sessionCount' section?
You will need to group your documents on a compound _id field.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": { "assId": "$assetId", "sessionId": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
},
"sessionCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "mobCmd" ] },
1,
0
]
}
}
}}
])
Which yields:
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "1234" }, "viewCount" : 0, "sessionCount" : 2 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "123" }, "viewCount" : 0, "sessionCount" : 1 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 0 }
Or use the $addToSet operator to return an array of unique sessionId and $unwind the array then regroup your documents.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": "$assetId",
"sessionId": { "$addToSet": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
}
}},
{ "$unwind": "$sessionId" },
{ "$group": {
"_id": "$_id",
"viewCount": { "$first": "$viewCount" },
"sessionCount": { "$sum": 1 }
}}
])
Which returns:
{ "_id" : "55f996a4e4b0cc9c0a392593", "viewCount" : 1, "sessionCount" : 2 }

MongoDB aggregate group array to key : sum value

Hello I am new to mongodb and trying to convert objects with different types (int) into key value pairs.
I have collection like this:
{
"_id" : ObjectId("5372a9fc0079285635db14d8"),
"type" : 1,
"stat" : "foobar"
},
{
"_id" : ObjectId("5372aa000079285635db14d9"),
"type" : 1,
"stat" : "foobar"
},
{
"_id" : ObjectId("5372aa010079285635db14da"),
"type" : 2,
"stat" : "foobar"
},{
"_id" : ObjectId("5372aa030079285635db14db"),
"type" : 3,
"stat" : "foobar"
}
I want to get result like this:
{
"type1" : 2, "type2" : 1, "type3" : 1,
"stat" : "foobar"
}
Currently trying aggregation group and then push type values to array
db.types.aggregate(
{$group : {
_id : "$stat",
types : {$push : "$type"}
}}
)
But don't know how to sum different types and to convert it into key values
/* 0 */
{
"result" : [
{
"_id" : "foobar",
"types" : [
1,
2,
2,
3
]
}
],
"ok" : 1
}
For your actual form, and therefore presuming that you actually know the possible values for "type" then you can do this with two $group stages and some use of the $cond operator:
db.types.aggregate([
{ "$group": {
"_id": {
"stat": "$stat",
"type": "$type"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.stat",
"type1": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 1 ] },
"$count",
0
]}},
"type2": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 2 ] },
"$count",
0
]}},
"type3": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 3 ] },
"$count",
0
]}}
}}
])
Which gives exactly:
{ "_id" : "foobar", "type1" : 2, "type2" : 1, "type3" : 1 }
I actually prefer the more dynamic form with two $group stages though:
db.types.aggregate([
{ "$group": {
"_id": {
"stat": "$stat",
"type": "$type"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.stat",
"types": { "$push": {
"type": "$_id.type",
"count": "$count"
}}
}}
])
Not the same output but functional and flexible to the values:
{
"_id" : "foobar",
"types" : [
{
"type" : 3,
"count" : 1
},
{
"type" : 2,
"count" : 1
},
{
"type" : 1,
"count" : 2
}
]
}
Otherwise if you need the same output format but need the flexible fields then you can always use mapReduce, but it's not exactly the same output.
db.types.mapReduce(
function () {
var obj = { };
var key = "type" + this.type;
obj[key] = 1;
emit( this.stat, obj );
},
function (key,values) {
var obj = {};
values.forEach(function(value) {
for ( var k in value ) {
if ( !obj.hasOwnProperty(k) )
obj[k] = 0;
obj[k]++;
}
});
return obj;
},
{ "out": { "inline": 1 } }
)
And in typical mapReduce style:
"results" : [
{
"_id" : "foobar",
"value" : {
"type1" : 2,
"type2" : 1,
"type3" : 1
}
}
],
But those are your options
Is this close enough for you?
{ "_id" : "foobar", "types" : [ { "type" : "type3", "total" : 1 }, { "type" : "type2", "total" : 1 }, { "type" : "type1", "total" : 2 } ] }
The types are in an array, but it seems to get you the data you are looking for. Code is:
db.types.aggregate(
[{$group : {
_id : "$stat",
types : {$push : "$type"}
}},
{$unwind:"$types"},
{$group: {
_id:{stat:"$_id",
types: {$substr: ["$types", 0, 1]}},
total:{$sum:1}}},
{$project: {
_id:0,
stat:"$_id.stat",
type: { $concat: [ "type", "$_id.types" ] },
total:"$total" }},
{$group: {
_id: "$stat",
types: { $push: { type: "$type", total: "$total" } } }}
]
)