Group using the value from two possible fields - mongodb

Let's say I have a match collection in the following format
{user1: "a", user2: "b"},
{user1: "a", user2: "c"},
{user1: "b", user2: "d"},
{user1: "b", user2: "c"},
{user1: "b", user2: "e"},
{user1: "c", user2: "f"}
I would like to know which user has the most appearance (either in user1 or user2). The result should be in this format ordered by the number of occurence.
{"user": "b", count:4},
{"user": "c", count:3},
{"user": "a", count:2},
{"user": "d", count:1},
{"user": "f", count:1},
{"user": "e", count:1}
Is there a way I can group on the value of two fields?
Something like match.aggregate({$group: {_id: {$or:["user1","user2]}}, count:{$sum:1}})

db.match.aggregate([
{$project: { user: [ "$user1", "$user2" ]}},
{$unwind: "$user"},
{$group: {_id: "$user", count: {$sum:1}}}
])
First stage projects each document into array of users
{user: ["a", "b"]},
{user: ["a", "c"]},
{user: ["b", "d"]},
...
Next we unwind arrays
{user:"a"},
{user:"b"},
{user:"a"},
{user:"c"},
{user:"b"},
...
And simple grouping at the end

Basically the concept is to $map onto an array and work from there:
db.collection.aggregate([
{ "$project": {
"_id": 0,
"user": { "$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el", "A" ] },
"then": "$user1",
"else": "$user2"
}
}
}}
}},
{ "$unwind": "$user" },
{ "$group": {
"_id": "$user",
"count": { "$sum": 1 }
}}
])

Let us take an example and go through
db.users_data.find();
{
"_id" : 1,
"user1" : "a",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T08:52:32.434Z")
},
{
"_id" : 2,
"user1" : "a",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T09:52:32.434Z")
},
{
"_id" : 3,
"user1" : "b",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 4,
"user1" : "b",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 5,
"user1" : "a",
"user2" : "aa",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
{
"_id" : 6,
"user1" : "a",
"user2" : "ab",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
Then
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa" }, "count" : 2}
{ "_id" : { "user1" : "a", "user2" : "ab" }, "count" : 2}
{ "_id" : { "user1" : "b", "user2" : "aa" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab" }, "count" : 1}
Thus grouping by multiple ids are possible
Now one more variation
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2",status:"$status"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "aa","status":"OLD" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"OLD" }, "count" : 1}

Related

Mongo ranking results

I have a collection like
db.books.insertMany([
{"products" : [{"name": "name1", "ids": [4, 5, 6]}], "author" : "Dante", "shelf": "a" },
{ "products" : [{"name": "name1", "ids": [4, 5]}], "author" : "Homer", "shelf": "a" },
{ "products" : [{"name": "name1", "ids": [2]}], "author" : "Dante", "shelf": "b" },
])
and I want to retrieve all documents where "shelf" is 'a'
and sort by 2 conditions:
1 - by Author
2 - documents where products.ids not contains 6 should be the first.
Could anyone help?
You can try this query:
First $match the shelf value with "a".
Then create an auxiliar value where will be true if 6 not exists into products.ids, otherwise false.
Then $sort by values you want.
And use $project to remove the auxiliar value.
db.collection.aggregate([
{
"$match": {"shelf": "a"}
},
{
"$set": {
"rank": {
"$eq": [
{
"$filter": {
"input": "$products",
"cond": {"$in": [6,"$$this.ids"]}
}
},[]
]
}
}
},
{
"$sort": {
"rank": -1,
"author": 1
}
},
{
"$project": {"rank": 0}
}
])
Example here
Here is a variation that sorts more granularly on author+"not containing 6".
db.foo.aggregate([
{$match: {shelf:'a'}}
,{$unwind: '$products'}
,{$addFields: {sortMarker: {$cond: [
{$in: [6, '$products.ids']},
"Z", // THEN make sortMarker at the end
"A" // ELSE make sortMarker at the start
]}
}}
,{$sort: {'author':1, 'sortMarker':1}}
]);
which given this input set:
{"products" : [
{"name": "name3", "ids": [6, 7]},
{"name": "name2", "ids": [4, 5]}
],
"author" : "Homer",
"shelf": "a" },
{"products" : [
{"name": "name1", "ids": [4, 5, 6]},
{"name": "name4", "ids": [9]},
{"name": "name7", "ids": [9,6]},
{"name": "name7", "ids": [10]}
],
"author" : "Dante",
"shelf": "a"},
{ "products" : [
{"name": "name1", "ids": [2]}
], "author" : "Dante",
"shelf": "b"}
yields this result:
{
"_id" : 1,
"products" : {
"name" : "name4",
"ids" : [
9
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 1,
"products" : {
"name" : "name7",
"ids" : [
10
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 1,
"products" : {
"name" : "name1",
"ids" : [
4,
5,
6
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "Z"
}
{
"_id" : 1,
"products" : {
"name" : "name7",
"ids" : [
9,
6
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "Z"
}
{
"_id" : 0,
"products" : {
"name" : "name2",
"ids" : [
4,
5
]
},
"author" : "Homer",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 0,
"products" : {
"name" : "name3",
"ids" : [
6,
7
]
},
"author" : "Homer",
"shelf" : "a",
"sortMarker" : "Z"
}
Optionally, this stage can be added after the $sort:
{$group: {_id: '$author', products: {$push: '$products'}}}
And this will bring the sorted "not containing 6 then containing 6" items together again as an array packaged by author; the $push retains the order. Note we need only need author in _id because the match was for one shelf. If more than one shelf is in the match, then we would need:
{$group: {_id: {author:'$author',shelf:'$shelf'}, products: {$push: '$products'}}}

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

MongoDB query to get count of array values for specific groups

I am trying to achieve a group which tells me the count of occurrence of color and the entity count corresponding to each color.
I have written a query, But this is giving me twice the count because of unwind i guess.
My Query :
db.message.aggregate([
{$unwind: '$entities'},
{ "$group": {
"_id": {
"color": "$color",
"entities" :"$entities"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.color",
"count": { "$sum": "$count" },
"entities": {
"$push": {
"entity": "$_id.entities",
"count": "$count"
},
}
}}
])
Database Message Data :
{
"_id" : ObjectId("5c55f99abf6dc481ccfa7f67"),
"color" : "Red",
"channel" : "google",
"content" : "red color",
"entities" : [
"a",
"b"
]
},
{
"_id" : ObjectId("5c57f0a0bf6dc44424e8d371"),
"color" : "Red",
"channel" : "google",
"content" : "red color",
"entities" : [
"a",
"b"
]
},{
"_id" : ObjectId("5c5947a4bf6dc462603d87da"),
"color" : "Blue",
"channel" : "google",
"content" : "yo yo",
"entities" : [
"a",
"b"
]
},
{
"_id" : ObjectId("5c6151b9bf6dc4bee8dac8c9"),
"color" : "Blue",
"handle" : "IBM",
"channel" : "twitter",
"content" : "yo yo",
"entities" : [
"a",
"b",
"c"
]
}
(EDITED) I am Expecting the following Output :
{
"color" : "Red",
"count" : 2,
"entities" : [
{
"entity" : "a",
"count" : 2
},
{
"entity" : "b",
"count" : 2
}
]
},
{
"color" : "Blue",
"count" : 2,
"entities" : [
{
"entity" : "c",
"count" : 1
},
{
"entity" : "b",
"count" : 2
},
{
"entity" : "a",
"count" : 2
}
]
}
Please Help!.

MongoDB Need to $push array in nested foreach loops

I have 3 collections: parents, children and links with subset of data like:
Parents:
{ "_id": 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000 },
{ "_id": 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100 },
{ "_id": 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 },
{ "_id": 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000 },
{ "_id": 5, "PID" : 5, "Pname" : "George", "Sal" : 10000 }
Children:
{ "_id" : 1, "CID" : 1, "Cname" : "Ronney", "Age" : 10 },
{ "_id" : 2, "CID" : 2, "Cname" : "Mo", "Age" : 11 },,
{ "_id" : 3, "CID" : 3, "Cname" : "Adam", "Age" : 13 },
{ "_id" : 4, "CID" : 4, "Cname" : "Eve", "Age" : 21 },
{ "_id" : 5, "CID" : 5, "Cname" : "Johny", "Age" : 19 },
{ "_id" : 6, "CID" : 6, "Cname" : "Sammy", "Age" : 25 },
{ "_id" : 7, "CID" : 7, "Cname" : "Sammy", "Age" : 23 }
Links:
{ "_id" : 1, "PID" : 1, "CID" : 1 },
{ "_id" : 2, "PID" : 1, "CID" : 3 },
{ "_id" : 3, "PID" : 2, "CID" : 5 },
{ "_id" : 4, "PID" : 2, "CID" : 7 },
{ "_id" : 5, "PID" : 2, "CID" : 2 },
{ "_id" : 6, "PID" : 4, "CID" : 4 },
{ "_id" : 7, "PID" : 5, "CID" : 6 }
I need to $push an array of children names into to the parents collection using the links collections which tie parent id to child id. So, for example Parent 1 will have:
{ "_id" :1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, “Children” : [“Ronny”, ”Adam”]} }
I think I can use a nested foreach loops to achieve this, but I am confused about how.
Any help would be greatly appreciated!
You can forego the usage of "nested loops" with any MongoDB version supporting the $lookup aggregation pipeline operation. This will allow the "joining" of the data from multiple sources into one result set.
Given the collections "parents", "children" and "links", you want to perform two $lookup operations followed by $unwind statements to get the related data from "links" first and then join that to the "children".
Finally for convenience you can $group to get the child names into an array for each parent:
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
])
Which on your data sample gives output like:
{ "_id" : 4, "children" : [ "Eve" ] }
{ "_id" : 5, "children" : [ "Sammy" ] }
{ "_id" : 2, "children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 1, "children" : [ "Ronney", "Adam" ] }
Now you want to use that data output as a basis to loop and update the appropriate "parent" documents. Taking into consideration that the actual data can and will likely be much larger:
let ops = [];
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
]).forEach(doc => {
ops = [
...ops,
{
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$push": { "Children": { "$each": doc.children } }
}
}
}
];
if ( ops.length >= 500 ) {
db.parents.bulkWrite(ops);
ops = [];
}
});
if ( ops.length != 0 ) {
db.parents.bulkWrite(ops);
ops = [];
}
Which then ammends your data as :
{ "_id" : 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, "Children" : [ "Ronney", "Adam" ] }
{ "_id" : 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100, "Children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 }
{ "_id" : 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000, "Children" : [ "Eve" ] }
{ "_id" : 5, "PID" : 5, "Pname" : "George", "Sal" : 10000, "Children" : [ "Sammy" ] }
The alternate to using the $push operator would be to use $set instead to write the whole array at once. But it is generally safter to $push or $addToSet which accounts for the "possibility" that an array may already exist at the location, and the intent is to "add" rather than "overwrite.

Aggregation on the basis of the set of nested docs

Let's say I have the next 5 docs:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
I want to manipulate the collection so that it will return a group of students (with their _id) by set (combination) of courses they take and calculate how many students in each set.
In the example above I have 3 set (combination) of courses and number of students as below:
1 - [ "A", "B" ] <- 2 students take this combination
2 - [ "A", "B", "C" ] <- 2 students
3 - [ "A", "B", "D" ] <- 1 student
I feel like this is more like MapReduce task rather than Aggregation...not sure...
UPDATE 1
Thanks a lot to #ExplosionPills
So the following aggregation command:
db.students.aggregate([{
$group: {
_id: "$courses",
count: {$sum: 1},
students: {$push: "$_id"}
}
}])
gives me the following output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
It groups by set of courses, counts number of students belong to it and their _ids.
UPDATE 2
I found out, the aggregation above treats combination [ "C", "A", "B" ] as different from [ "A", "B", "C" ]. But I need these 2 count as same.
So let's look at the following documents:
{ "_id" : "1", "student" : "Oscar", "courses" : [ "A", "B" ] }
{ "_id" : "2", "student" : "Alan", "courses" : [ "A", "B", "C" ] }
{ "_id" : "3", "student" : "Kate", "courses" : [ "A", "B", "D" ] }
{ "_id" : "4", "student" : "John", "courses" : [ "A", "B", "C" ] }
{ "_id" : "5", "student" : "Bema", "courses" : [ "A", "B" ] }
{ "_id" : "6", "student" : "Alex", "courses" : [ "C", "A", "B" ] }
Let's see this in output:
{ "_id" : [ "C", "A", "B" ], "count" : 1, "students" : [ "6" ] }
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 2, "students" : [ "2", "4" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "1", "5" ] }
See the lines 1 and 3 - this is not what I wanted.
So, to treat [ "C", "A", "B" ] and [ "A", "B", "C" ] as same combination I changed the aggregation as follows:
db.students.aggregate([
{$unwind: "$courses" },
{$sort : {"courses": 1}},
{$group: {_id: "$_id", courses: {$push: "$courses"}}},
{$group: {_id: "$courses", count: {$sum:1}, students: {$push: "$_id"}}}
])
Output:
{ "_id" : [ "A", "B", "D" ], "count" : 1, "students" : [ "3" ] }
{ "_id" : [ "A", "B" ], "count" : 2, "students" : [ "5", "1" ] }
{ "_id" : [ "A", "B", "C" ], "count" : 3, "students" : [ "6", "4", "2" ] }
This is an aggregate operation using grouping.
db.students.aggregate([{
$group: {
// Uniquely identify the document.
// The $ syntax queries on this field
_id: "$courses",
// Add 1 for each field found (effectively a counter)
count: {$sum: 1}
}
}]);
EDIT:
If the courses can be in any order, you can $unwind, $sort, and $group again as suggested in the edited question. It's also possible to do this via mapReduce, but I'm not sure which is faster.
db.students.mapReduce(
function () {
// Use the sorted courses as the key
emit(this.courses.sort(), this._id);
},
function (key, values) {
return {"students": values, count: values.length};
},
{out: {inline: 1}}
)