Related
Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])
My primary goal is to print titles are having number of grades greater than four, i can achieve it with below query,
db.students.aggregate({$project : { title:1 ,_id : 0, count: {$size : "$grades"}}},{$match: {"count": {$gt:4}}})
But if grades array have empty values how can i remove them, tried this but not giving correct output.
db.students.aggregate({$project : { title:1 ,_id : 0, count: {$size : "$grades"}}},{$match: {"count": {$gt:4},grades : {$ne:''}}})
You can use $filter to remove empty grades before you run $size:
db.students.aggregate([
{$project : { title:1 ,_id : 0, count: { $size : { $filter: { input: "$grades", cond: { $ne: [ "$$this", '' ] } } }}}},
{$match: {"count": {$gt:4}}}
])
Let's explain this with step by step of different different queries:
All possible values in the collection grades:
> db.grades.find()
{ "_id" : ObjectId("5cb2ff50d33f6ed856afe577"), "title" : "abc", "grades" : [ 12, 23, 1 ] }
{ "_id" : ObjectId("5cb2ff55d33f6ed856afe578"), "title" : "abc", "grades" : [ 12, 23 ] }
{ "_id" : ObjectId("5cb2ff5cd33f6ed856afe579"), "title" : "abc", "grades" : [ 12, 23, 10, 100, 34 ] }
{ "_id" : ObjectId("5cb2ff63d33f6ed856afe57a"), "title" : "abc", "grades" : "" }
{ "_id" : ObjectId("5cb2ff66d33f6ed856afe57b"), "title" : "abc", "grades" : [ ] }
{ "_id" : ObjectId("5cb2ff6bd33f6ed856afe57c"), "title" : "abc", "grades" : [ 1, 2, 3, 4, 5 ] }
Just filtered empty grades records as:
> db.grades.aggregate([{$match: {grades: {$ne:''}} }])
{ "_id" : ObjectId("5cb2ff50d33f6ed856afe577"), "title" : "abc", "grades" : [ 12, 23, 1 ] }
{ "_id" : ObjectId("5cb2ff55d33f6ed856afe578"), "title" : "abc", "grades" : [ 12, 23 ] }
{ "_id" : ObjectId("5cb2ff5cd33f6ed856afe579"), "title" : "abc", "grades" : [ 12, 23, 10, 100, 34 ] }
{ "_id" : ObjectId("5cb2ff66d33f6ed856afe57b"), "title" : "abc", "grades" : [ ] }
{ "_id" : ObjectId("5cb2ff6bd33f6ed856afe57c"), "title" : "abc", "grades" : [ 1, 2, 3, 4, 5 ] }
Now project the grades count values in a variable along with required other columns.
> db.grades.aggregate([{$match: {grades: {$ne:''}} }, {$project: {_id:0, title:1, count: {$size: "$grades"} } }])
{ "title" : "abc", "count" : 3 }
{ "title" : "abc", "count" : 2 }
{ "title" : "abc", "count" : 5 }
{ "title" : "abc", "count" : 0 }
{ "title" : "abc", "count" : 5 }
Now match required condition of grades array count greater than 4 as below:
> db.grades.aggregate([{$match: {grades: {$ne:''}} }, {$project: {_id:0, title:1, count: {$size: "$grades"} } }, {$match: {count: {$gte: 4}}} ])
{ "title" : "abc", "count" : 5 }
{ "title" : "abc", "count" : 5 }
>
Suppose in collection I have following documents:
[
{"title": "t1", "fingerprint":[1, 2, 3]},
{"title": "t2", "fingerprint":[4, 5, 6]}
]
I want to query documents in which at least one element in fingerprint at given position is equal to my querying array.
For example:
query([1, 7, 9]) should return [{"title": "t1", "fingerprint":[1, 2, 3]}]
query([1, 5, 9]) should return [{"title": "t1", "fingerprint":[1, 2, 3]}, {"title": "t2", "fingerprint":[4, 5, 6]}]
but query([5,1,9]) should return none records, because neither of records have same value at any of the positions in fingerprint array.
How to write given query?
When you are trying to match only documents with arrays where the sequence [ 1 2, 3 ] appears in values field and only in that exact order, you can do it this way:
db.testcol.find()
{ "_id" : "first", "value" : [ 1, 2, 3 ] }
{ "_id" : "second", "value" : [ 4, 5, 6 ] }
{ "_id" : "third", "value" : [ 1, 12, 13 ] }
{ "_id" : "fourth", "value" : [ 3, 2, 1 ] }
{ "_id" : "fifth", "value" : [ 1, 12, 13, 2, 3 ] }
{ "_id" : "sixth", "value" : [ 3, 2, 1, 2, 3 ] }
> db.testcol.aggregate([{$addFields:{
cmp: {$in:[
{$literal:[1,2,3]},
{$map: {
input:{$range:[0, {$subtract:[{$size:"$value"},2]}]},
as:"l",
in: {$slice: [ "$value", "$$l", 3] }
}}
]}
}}])
{ "_id" : "first", "value" : [ 1, 2, 3 ], "cmp" : true }
{ "_id" : "second", "value" : [ 4, 5, 6 ], "cmp" : false }
{ "_id" : "third", "value" : [ 1, 12, 13 ], "cmp" : false }
{ "_id" : "fourth", "value" : [ 3, 2, 1 ], "cmp" : false }
{ "_id" : "fifth", "value" : [ 1, 12, 13, 2, 3 ], "cmp" : false }
{ "_id" : "sixth", "value" : [ 3, 2, 1, 2, 3 ], "cmp" : true }
What the $addFields stage does is checks if [1,2,3] appears in a list of three element arrays starting at position 0 of value array and moving forward till two positions before the end.
As you can see, it's now trivial to add a $match stage to filter out documents where cmp is not true.
You can use the .$index notation to perform such a search.
Example for your query([1, 7, 9])
db.coll.find({$or: [{"fingerprint.0": 1}, {"fingerprint.1": 7 }, {"fingerprint.2": 9}]})
{ "_id" : ObjectId("59170da907e34e73c0c93a9b"), "title" : "t1", "fingerprint" : [ 1, 2, 3 ] }
And query([1, 5, 9])
db.coll.find({$or: [{"fingerprint.0": 1}, {"fingerprint.1": 5 }, {"fingerprint.2": 9}]})
{ "_id" : ObjectId("59170da907e34e73c0c93a9b"), "title" : "t1", "fingerprint" : [ 1, 2, 3 ] }
{ "_id" : ObjectId("59170da907e34e73c0c93a9c"), "title" : "t2", "fingerprint" : [ 4, 5, 6 ] }
$in operator is used to match a value against list of values.
According to above mentioned description please try executing following query in MongoDB shell
db.collection.find({fingerprint:{$in:[1,7,9]}})
Considering the following document in my mongo DB instance :
{
"_id": 1,
"people": [
{"id": 1, "name": "foo"},
{"id": 2, "name": "bar"},
/.../
],
"stats": [
{"peopleId": 1, "workHours": 24},
{"peopleId": 2, "workHours": 36},
/.../
}
Each element in my collection represent the work of every employee in my company, each weeks. As an important note, peopleId may change from one week to another !
I would like to get all weeks where foo worked more than 24 hours. As you can see, the format is kinda annoying since the people name and the work hours are separated in my database. A simple $and is not enough.
I wonder if, using some $ and $elemMatch I can achieve doing this query.
Can I use this to group the "people" entities with "stats" entities ?
Query to get foo worked more than 24 hours.
db.collection.aggregate([
{$unwind: { path : "$people"}},
{$unwind: { path : "$stats"}},
{$match: { "people.name" : "foo"}},
{$group: {
_id: "$_id",
peopleIdMoreThan24: { $addToSet: {
$cond : { if : { $and : [ {"$eq" : ["$people.id", "$stats.peopleId" ] },
{"$gt" : ["$stats.workHours", 24] }]} , then : "$people.id", else: "Not satisfying the condition"}}}
}
},
{$unwind: { path : "$peopleIdMoreThan24" }},
{$match: { "peopleIdMoreThan24" : {$nin : [ "Not satisfying the condition"]}}},
]);
Data in collection:-
/* 1 */
{
"_id" : 1,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 24
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 2 */
{
"_id" : 2,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 3 */
{
"_id" : 3,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
Output:-
The output has document id and people id of foo worked more than 24 hours.
/* 1 */
{
"_id" : 3,
"peopleIdMoreThan24" : 1
}
/* 2 */
{
"_id" : 2,
"peopleIdMoreThan24" : 1
}
Let's say I have the next 5 docs:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
I want to manipulate the collection so that it will return a group of students (with their _id) by set (combination) of courses they take and calculate how many students in each set.
In the example above I have 3 set (combination) of courses and number of students as below:
1 - [ "A", "B" ] <- 2 students take this combination
2 - [ "A", "B", "C" ] <- 2 students
3 - [ "A", "B", "D" ] <- 1 student
I feel like this is more like MapReduce task rather than Aggregation...not sure...
UPDATE 1
Thanks a lot to #ExplosionPills
So the following aggregation command:
db.students.aggregate([{
$group: {
_id: "$courses",
count: {$sum: 1},
students: {$push: "$_id"}
}
}])
gives me the following output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
It groups by set of courses, counts number of students belong to it and their _ids.
UPDATE 2
I found out, the aggregation above treats combination [ "C", "A", "B" ] as different from [ "A", "B", "C" ]. But I need these 2 count as same.
So let's look at the following documents:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
{ "_id" : "6", "student" : "Alex", "courses" : [ "C", "A", "B" ] }
Let's see this in output:
{ "_id" : [ "C", "A", "B" ], "count" : 1, "students" : [ "6" ] }
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
See the lines 1 and 3 - this is not what I wanted.
So, to treat [ "C", "A", "B" ] and [ "A", "B", "C" ] as same combination I changed the aggregation as follows:
db.students.aggregate([
{$unwind: "$courses" },
{$sort : {"courses": 1}},
{$group: {_id: "$_id", courses: {$push: "$courses"}}},
{$group: {_id: "$courses", count: {$sum:1}, students: {$push: "$_id"}}}
])
Output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "5", "1" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 3, "students" : [ "6", "4", "2" ] }
This is an aggregate operation using grouping.
db.students.aggregate([{
$group: {
// Uniquely identify the document.
// The $ syntax queries on this field
_id: "$courses",
// Add 1 for each field found (effectively a counter)
count: {$sum: 1}
}
}]);
EDIT:
If the courses can be in any order, you can $unwind, $sort, and $group again as suggested in the edited question. It's also possible to do this via mapReduce, but I'm not sure which is faster.
db.students.mapReduce(
function () {
// Use the sorted courses as the key
emit(this.courses.sort(), this._id);
},
function (key, values) {
return {"students": values, count: values.length};
},
{out: {inline: 1}}
)