Mongodb group query for embeded docs

Mongodb group query for embeded docs - mongodb

Is it possible to query value counts for a particular key inside embeded documents.
Here is my document:
{ "_id" : 1, "drives" : [ {"fw": "A"}, {"fw": "B"} ] }
{ "_id" : 2, "drives" : [ {"fw": "B"}, {"fw": "C"} ] }
{ "_id" : 3, "drives" : [ {"fw": "A"}, {"fw": "C"} ] }
{ "_id" : 4, "drives" : [ {"fw": "A"}, {"fw": "D"} ] }
And i would like to get the count of "fw":
Output:
counts : {"A": 3, "B": 2, "C": 2, "D": 1 }

Aggregation will do it. First $unwind the array to get a row per fw value, then $group by the value of fw along with a $sum of 1 to get the count per fw value;
db.test.aggregate( { $unwind: "$drives" },
{ $group: { _id: "$drives.fw", cnt: {$sum:1} } } )
# { "_id" : "D", "cnt" : 1 }
# { "_id" : "C", "cnt" : 2 }
# { "_id" : "B", "cnt" : 2 }
# { "_id" : "A", "cnt" : 3 }

Related

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.

The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}

You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

aggregate function to project array size after removing empty value in array

My primary goal is to print titles are having number of grades greater than four, i can achieve it with below query,
db.students.aggregate({$project : { title:1 ,_id : 0, count: {$size : "$grades"}}},{$match: {"count": {$gt:4}}})
But if grades array have empty values how can i remove them, tried this but not giving correct output.
db.students.aggregate({$project : { title:1 ,_id : 0, count: {$size : "$grades"}}},{$match: {"count": {$gt:4},grades : {$ne:''}}})

You can use $filter to remove empty grades before you run $size:
db.students.aggregate([
{$project : { title:1 ,_id : 0, count: { $size : { $filter: { input: "$grades", cond: { $ne: [ "$$this", '' ] } } }}}},
{$match: {"count": {$gt:4}}}
])

Let's explain this with step by step of different different queries:
All possible values in the collection grades:
> db.grades.find()
{ "_id" : ObjectId("5cb2ff50d33f6ed856afe577"), "title" : "abc", "grades" : [ 12, 23, 1 ] }
{ "_id" : ObjectId("5cb2ff55d33f6ed856afe578"), "title" : "abc", "grades" : [ 12, 23 ] }
{ "_id" : ObjectId("5cb2ff5cd33f6ed856afe579"), "title" : "abc", "grades" : [ 12, 23, 10, 100, 34 ] }
{ "_id" : ObjectId("5cb2ff63d33f6ed856afe57a"), "title" : "abc", "grades" : "" }
{ "_id" : ObjectId("5cb2ff66d33f6ed856afe57b"), "title" : "abc", "grades" : [ ] }
{ "_id" : ObjectId("5cb2ff6bd33f6ed856afe57c"), "title" : "abc", "grades" : [ 1, 2, 3, 4, 5 ] }
Just filtered empty grades records as:
> db.grades.aggregate([{$match: {grades: {$ne:''}} }])
{ "_id" : ObjectId("5cb2ff50d33f6ed856afe577"), "title" : "abc", "grades" : [ 12, 23, 1 ] }
{ "_id" : ObjectId("5cb2ff55d33f6ed856afe578"), "title" : "abc", "grades" : [ 12, 23 ] }
{ "_id" : ObjectId("5cb2ff5cd33f6ed856afe579"), "title" : "abc", "grades" : [ 12, 23, 10, 100, 34 ] }
{ "_id" : ObjectId("5cb2ff66d33f6ed856afe57b"), "title" : "abc", "grades" : [ ] }
{ "_id" : ObjectId("5cb2ff6bd33f6ed856afe57c"), "title" : "abc", "grades" : [ 1, 2, 3, 4, 5 ] }
Now project the grades count values in a variable along with required other columns.
> db.grades.aggregate([{$match: {grades: {$ne:''}} }, {$project: {_id:0, title:1, count: {$size: "$grades"} } }])
{ "title" : "abc", "count" : 3 }
{ "title" : "abc", "count" : 2 }
{ "title" : "abc", "count" : 5 }
{ "title" : "abc", "count" : 0 }
{ "title" : "abc", "count" : 5 }
Now match required condition of grades array count greater than 4 as below:
> db.grades.aggregate([{$match: {grades: {$ne:''}} }, {$project: {_id:0, title:1, count: {$size: "$grades"} } }, {$match: {count: {$gte: 4}}} ])
{ "title" : "abc", "count" : 5 }
{ "title" : "abc", "count" : 5 }
>

Querying for arrays in MongoDB

Suppose in collection I have following documents:
[
{"title": "t1", "fingerprint":[1, 2, 3]},
{"title": "t2", "fingerprint":[4, 5, 6]}
]
I want to query documents in which at least one element in fingerprint at given position is equal to my querying array.
For example:
query([1, 7, 9]) should return [{"title": "t1", "fingerprint":[1, 2, 3]}]
query([1, 5, 9]) should return [{"title": "t1", "fingerprint":[1, 2, 3]}, {"title": "t2", "fingerprint":[4, 5, 6]}]
but query([5,1,9]) should return none records, because neither of records have same value at any of the positions in fingerprint array.
How to write given query?

When you are trying to match only documents with arrays where the sequence [ 1 2, 3 ] appears in values field and only in that exact order, you can do it this way:
db.testcol.find()
{ "_id" : "first", "value" : [ 1, 2, 3 ] }
{ "_id" : "second", "value" : [ 4, 5, 6 ] }
{ "_id" : "third", "value" : [ 1, 12, 13 ] }
{ "_id" : "fourth", "value" : [ 3, 2, 1 ] }
{ "_id" : "fifth", "value" : [ 1, 12, 13, 2, 3 ] }
{ "_id" : "sixth", "value" : [ 3, 2, 1, 2, 3 ] }
> db.testcol.aggregate([{$addFields:{
cmp: {$in:[
{$literal:[1,2,3]},
{$map: {
input:{$range:[0, {$subtract:[{$size:"$value"},2]}]},
as:"l",
in: {$slice: [ "$value", "$$l", 3] }
}}
]}
}}])
{ "_id" : "first", "value" : [ 1, 2, 3 ], "cmp" : true }
{ "_id" : "second", "value" : [ 4, 5, 6 ], "cmp" : false }
{ "_id" : "third", "value" : [ 1, 12, 13 ], "cmp" : false }
{ "_id" : "fourth", "value" : [ 3, 2, 1 ], "cmp" : false }
{ "_id" : "fifth", "value" : [ 1, 12, 13, 2, 3 ], "cmp" : false }
{ "_id" : "sixth", "value" : [ 3, 2, 1, 2, 3 ], "cmp" : true }
What the $addFields stage does is checks if [1,2,3] appears in a list of three element arrays starting at position 0 of value array and moving forward till two positions before the end.
As you can see, it's now trivial to add a $match stage to filter out documents where cmp is not true.

You can use the .$index notation to perform such a search.
Example for your query([1, 7, 9])
db.coll.find({$or: [{"fingerprint.0": 1}, {"fingerprint.1": 7 }, {"fingerprint.2": 9}]})
{ "_id" : ObjectId("59170da907e34e73c0c93a9b"), "title" : "t1", "fingerprint" : [ 1, 2, 3 ] }
And query([1, 5, 9])
db.coll.find({$or: [{"fingerprint.0": 1}, {"fingerprint.1": 5 }, {"fingerprint.2": 9}]})
{ "_id" : ObjectId("59170da907e34e73c0c93a9b"), "title" : "t1", "fingerprint" : [ 1, 2, 3 ] }
{ "_id" : ObjectId("59170da907e34e73c0c93a9c"), "title" : "t2", "fingerprint" : [ 4, 5, 6 ] }

$in operator is used to match a value against list of values.
According to above mentioned description please try executing following query in MongoDB shell
db.collection.find({fingerprint:{$in:[1,7,9]}})

Use $ and $elemMatch to group entities

Considering the following document in my mongo DB instance :
{
"_id": 1,
"people": [
{"id": 1, "name": "foo"},
{"id": 2, "name": "bar"},
/.../
],
"stats": [
{"peopleId": 1, "workHours": 24},
{"peopleId": 2, "workHours": 36},
/.../
}
Each element in my collection represent the work of every employee in my company, each weeks. As an important note, peopleId may change from one week to another !
I would like to get all weeks where foo worked more than 24 hours. As you can see, the format is kinda annoying since the people name and the work hours are separated in my database. A simple $and is not enough.
I wonder if, using some $ and $elemMatch I can achieve doing this query.
Can I use this to group the "people" entities with "stats" entities ?

Query to get foo worked more than 24 hours.
db.collection.aggregate([
{$unwind: { path : "$people"}},
{$unwind: { path : "$stats"}},
{$match: { "people.name" : "foo"}},
{$group: {
_id: "$_id",
peopleIdMoreThan24: { $addToSet: {
$cond : { if : { $and : [ {"$eq" : ["$people.id", "$stats.peopleId" ] },
{"$gt" : ["$stats.workHours", 24] }]} , then : "$people.id", else: "Not satisfying the condition"}}}
}
},
{$unwind: { path : "$peopleIdMoreThan24" }},
{$match: { "peopleIdMoreThan24" : {$nin : [ "Not satisfying the condition"]}}},
]);
Data in collection:-
/* 1 */
{
"_id" : 1,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 24
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 2 */
{
"_id" : 2,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 3 */
{
"_id" : 3,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
Output:-
The output has document id and people id of foo worked more than 24 hours.
/* 1 */
{
"_id" : 3,
"peopleIdMoreThan24" : 1
}
/* 2 */
{
"_id" : 2,
"peopleIdMoreThan24" : 1
}

Aggregation on the basis of the set of nested docs

Let's say I have the next 5 docs:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
I want to manipulate the collection so that it will return a group of students (with their _id) by set (combination) of courses they take and calculate how many students in each set.
In the example above I have 3 set (combination) of courses and number of students as below:
1 - [ "A", "B" ] <- 2 students take this combination
2 - [ "A", "B", "C" ] <- 2 students
3 - [ "A", "B", "D" ] <- 1 student
I feel like this is more like MapReduce task rather than Aggregation...not sure...
UPDATE 1
Thanks a lot to #ExplosionPills
So the following aggregation command:
db.students.aggregate([{
$group: {
_id: "$courses",
count: {$sum: 1},
students: {$push: "$_id"}
}
}])
gives me the following output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
It groups by set of courses, counts number of students belong to it and their _ids.
UPDATE 2
I found out, the aggregation above treats combination [ "C", "A", "B" ] as different from [ "A", "B", "C" ]. But I need these 2 count as same.
So let's look at the following documents:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
{ "_id" : "6", "student" : "Alex", "courses" : [ "C", "A", "B" ] }
Let's see this in output:
{ "_id" : [ "C", "A", "B" ], "count" : 1, "students" : [ "6" ] }
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
See the lines 1 and 3 - this is not what I wanted.
So, to treat [ "C", "A", "B" ] and [ "A", "B", "C" ] as same combination I changed the aggregation as follows:
db.students.aggregate([
{$unwind: "$courses" },
{$sort : {"courses": 1}},
{$group: {_id: "$_id", courses: {$push: "$courses"}}},
{$group: {_id: "$courses", count: {$sum:1}, students: {$push: "$_id"}}}
])
Output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "5", "1" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 3, "students" : [ "6", "4", "2" ] }

This is an aggregate operation using grouping.
db.students.aggregate([{
$group: {
// Uniquely identify the document.
// The $ syntax queries on this field
_id: "$courses",
// Add 1 for each field found (effectively a counter)
count: {$sum: 1}
}
}]);
EDIT:
If the courses can be in any order, you can $unwind, $sort, and $group again as suggested in the edited question. It's also possible to do this via mapReduce, but I'm not sure which is faster.
db.students.mapReduce(
function () {
// Use the sorted courses as the key
emit(this.courses.sort(), this._id);
},
function (key, values) {
return {"students": values, count: values.length};
},
{out: {inline: 1}}
)

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Mongodb group query for embeded docs - mongodb

Related

mongodb aggregate sum item as nested data

aggregate function to project array size after removing empty value in array

Querying for arrays in MongoDB

Use $ and $elemMatch to group entities

Aggregation on the basis of the set of nested docs

Categories

Resources