Query mutiple fields from a nested document of MongoDB - mongodb

I am using MongoDB 3.0.5. I have a collection like
db.getCollection('mytest').insert([
{_id: 1, "records": [{"Name": "Joe", "Salary": 70000, "Department": "IT"}]},
{_id: 2, "records": [{"Name": "Henry", "Salary": 80000, "Department": "Sales"}, {"Name": "Jake", "Salary": 40000, "Department": "Sales"}]},
{_id: 3, "records": [{"Name": "Sam", "Salary": 90000, "Department": "IT"}, {"Name": "Tom", "Salary": 50000, "Department": "Sales"}]},
{_id: 4, "records":[{"Name": "Janice", "Salary": 80000, "Department": "Finance"}, {"Name": "Kale", "Salary": 95000, "Department": "IT"}]}
])
Now I could query a single field, say like people in the IT department -
> db.getCollection('mytest').find({"records.Department": {"$in": ["IT"]}}, {"records.$": 1})
{ "_id" : 1, "records" : [ { "Name" : "Joe", "Salary" : 70000, "Department" : "IT" } ] }
{ "_id" : 3, "records" : [ { "Name" : "Sam", "Salary" : 90000, "Department" : "IT" } ] }
{ "_id" : 4, "records" : [ { "Name" : "Kale", "Salary" : 95000, "Department" : "IT" } ] }
I really want to find the salaries in the departments of Finance and IT.
But the query with more than one fields returns part of the desired results -
> db.getCollection('mytest').find({"records.Department": {"$in": ["IT", "Finance"]}}, {"records.$": 1})
{ "_id" : 1, "records" : [ { "Name" : "Joe", "Salary" : 70000, "Department" : "IT" } ] }
{ "_id" : 3, "records" : [ { "Name" : "Sam", "Salary" : 90000, "Department" : "IT" } ] }
{ "_id" : 4, "records" : [ { "Name" : "Janice", "Salary" : 80000, "Department" : "Finance" } ] }
I wonder if somebody could help with it. Thanks.

You can do it using mongodb $filter operator. Try the following code:
db.getCollection('mytest').aggregate([
{ $match: {'records.Department': {$in: ['IT', 'Finance']}}},
{ $project: {
departments: {
$filter: {
input: '$records',
as: 'record',
cond: {
$or: [
{$eq: ['$$record.Department', "IT"]},
{$eq: ['$$record.Department', "Finance"]}
]
}
}
}
}
}
])
For your data I got the following result:
{ "_id" : 1, "departments" : [ { "Name" : "Joe", "Salary" : 70000, "Department": "IT" } ] }
{ "_id" : 3, "departments" : [ { "Name" : "Sam", "Salary" : 90000, "Department": "IT" } ] }
{ "_id" : 4, "departments" : [ { "Name" : "Janice", "Salary" : 80000, "Department" : "Finance" }, { "Name" : "Kale", "Salary" : 95000, "Department" : "IT" } ] }

Related

Mongo ranking results

I have a collection like
db.books.insertMany([
{"products" : [{"name": "name1", "ids": [4, 5, 6]}], "author" : "Dante", "shelf": "a" },
{ "products" : [{"name": "name1", "ids": [4, 5]}], "author" : "Homer", "shelf": "a" },
{ "products" : [{"name": "name1", "ids": [2]}], "author" : "Dante", "shelf": "b" },
])
and I want to retrieve all documents where "shelf" is 'a'
and sort by 2 conditions:
1 - by Author
2 - documents where products.ids not contains 6 should be the first.
Could anyone help?
You can try this query:
First $match the shelf value with "a".
Then create an auxiliar value where will be true if 6 not exists into products.ids, otherwise false.
Then $sort by values you want.
And use $project to remove the auxiliar value.
db.collection.aggregate([
{
"$match": {"shelf": "a"}
},
{
"$set": {
"rank": {
"$eq": [
{
"$filter": {
"input": "$products",
"cond": {"$in": [6,"$$this.ids"]}
}
},[]
]
}
}
},
{
"$sort": {
"rank": -1,
"author": 1
}
},
{
"$project": {"rank": 0}
}
])
Example here
Here is a variation that sorts more granularly on author+"not containing 6".
db.foo.aggregate([
{$match: {shelf:'a'}}
,{$unwind: '$products'}
,{$addFields: {sortMarker: {$cond: [
{$in: [6, '$products.ids']},
"Z", // THEN make sortMarker at the end
"A" // ELSE make sortMarker at the start
]}
}}
,{$sort: {'author':1, 'sortMarker':1}}
]);
which given this input set:
{"products" : [
{"name": "name3", "ids": [6, 7]},
{"name": "name2", "ids": [4, 5]}
],
"author" : "Homer",
"shelf": "a" },
{"products" : [
{"name": "name1", "ids": [4, 5, 6]},
{"name": "name4", "ids": [9]},
{"name": "name7", "ids": [9,6]},
{"name": "name7", "ids": [10]}
],
"author" : "Dante",
"shelf": "a"},
{ "products" : [
{"name": "name1", "ids": [2]}
], "author" : "Dante",
"shelf": "b"}
yields this result:
{
"_id" : 1,
"products" : {
"name" : "name4",
"ids" : [
9
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 1,
"products" : {
"name" : "name7",
"ids" : [
10
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 1,
"products" : {
"name" : "name1",
"ids" : [
4,
5,
6
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "Z"
}
{
"_id" : 1,
"products" : {
"name" : "name7",
"ids" : [
9,
6
]
},
"author" : "Dante",
"shelf" : "a",
"sortMarker" : "Z"
}
{
"_id" : 0,
"products" : {
"name" : "name2",
"ids" : [
4,
5
]
},
"author" : "Homer",
"shelf" : "a",
"sortMarker" : "A"
}
{
"_id" : 0,
"products" : {
"name" : "name3",
"ids" : [
6,
7
]
},
"author" : "Homer",
"shelf" : "a",
"sortMarker" : "Z"
}
Optionally, this stage can be added after the $sort:
{$group: {_id: '$author', products: {$push: '$products'}}}
And this will bring the sorted "not containing 6 then containing 6" items together again as an array packaged by author; the $push retains the order. Note we need only need author in _id because the match was for one shelf. If more than one shelf is in the match, then we would need:
{$group: {_id: {author:'$author',shelf:'$shelf'}, products: {$push: '$products'}}}

Limit the output of each bucket in `$bucketAuto` aggregation stage of MongoDB

Consider a collection user with the following documents:
{"id": 1, "name": "John", "designation": "customer"}
{"id": 2, "name": "Alison", "designation": "manager"}
{"id": 3, "name": "Sam", "designation": "customer"}
{"id": 4, "name": "George", "designation": "salesperson"}
{"id": 5, "name": "Will", "designation": "salesperson"}
{"id": 6, "name": "Daffney", "designation": "customer"}
{"id": 7, "name": "Julie", "designation": "salesperson"}
{"id": 8, "name": "Elliot", "designation": "customer"}
{"id": 9, "name": "Bruno", "designation": "customer"}
{"id": 10, "name": "Omar", "designation": "customer"}
{"id": 11, "name": "Sid", "designation": "customer"}
{"id": 12, "name": "Nelson", "designation": "manager"}
In the following operation, input documents are grouped into three buckets according to the values in the designation field:
db.users.aggregate([
{
"$bucketAuto": {
groupBy: "$designation",
buckets: 5,
output: {
"count": { $sum: 1 },
"users" : {
$push: {
"name": "$name"
},
}
}
}
}
])
Following are the results of this operation:
/* 1 */
{
"_id" : {"min" : "customer", "max" : "manager"},
"count" : 7.0,
"users" : [
{"name" : "John"},
{"name" : "Sam"},
{"name" : "Daffney"},
{"name" : "Elliot"},
{"name" : "Bruno"},
{"name" : "Omar"},
{"name" : "Sid"}
]
}
/* 2 */
{
"_id" : {"min" : "manager", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "Nelson"},
{"name" : "Alison"}
]
}
/* 3 */
{
"_id" : {"min" : "salesperson", "max" : "salesperson"},
"count" : 3.0,
"users" : [
{"name" : "George"},
{"name" : "Will"},
{"name" : "Julie"}
]
}
What I wanted to do was limit the number of results in the "users" attribute of the resulting documents to 2, something like this:
/* 1 */
{
"_id" : {"min" : "customer", "max" : "manager"},
"count" : 2.0,
"users" : [
{"name" : "John"},
{"name" : "Sam"}
]
}
/* 2 */
{
"_id" : {"min" : "manager", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "Nelson"},
{"name" : "Alison"}
]
}
/* 3 */
{
"_id" : {"min" : "salesperson", "max" : "salesperson"},
"count" : 2.0,
"users" : [
{"name" : "George"},
{"name" : "Will"}
]
}
Is there some way I can do that?
I am not sure it is possible with $bucketAuto, but you can try $slice to get limited elements from array and $size to get number of element in array,
add this stage after $bucketAuto stage,
{
$addFields: {
users: { $slice: ["$users", 2] }
}
},
{
$addFields: {
count: { $size: "$users" }
}
}
Playground

Querying, projecting and sorting multiple fields of documents inside a mongoDB collection

I have a simple mongoDB collection, named usersCollection, whose generale structure is this:
[
{
"username" : "john",
"sex" : "m",
"email" : "john#gmail.com",
"courses" : [
{
"title" : "medicine",
"grade": 25,
},
{
"title" : "art",
"grade": 29,
},
{
"title" : "history",
"grade": 21,
}
]
},
{
"username" : "jane",
"sex" : "f",
"email" : "jane#gmail.com",
"courses" : [
{
"title" : "math",
"grade": 20,
},
{
"title" : "medicine",
"grade": 30,
}
]
},
{
"username" : "sarah",
"sex" : "f",
"email" : "sarah#gmail.com",
"courses" : [ ]
},
{
"username" : "josh",
"sex" : "f",
"email" : "josh#gmail.com",
"courses" : [
{
"title" : "english",
"grade": 28,
}
]
},
{
"username" : "mark",
"sex" : "m",
"email" : "mark#gmail.com",
"courses" : [
{
"title" : "history",
"grade": 30,
},
{
"title" : "medicine",
"grade": 19,
},
{
"title" : "math",
"grade": 22,
}
]
}
]
Every user is a member of usersCollection and has some general informations and a list of courses that he/she has already completed with the relative grades.
Can anyone tell me how I can query usersCollection to get a descending sorted array that contains an object for all users that have already completed a specific course? Every object of this array should contains the "name", the "email" and the "grade" of the relative user.
For example, launching a query on usersCollection around the course with name "medicine", I would obtain this array:
[
{
"username": "jane",
"email": "jane#gmail.com",
"grade": 30
},
{
"username": "john",
"email": "john#gmail.com",
"grade": 25
},
{
"username": "mark",
"email": "mark#gmail.com",
"grade": 19
}
]
You can manage to do it using sort and group pipeline of mongoDB.
if you want to get any specific courses you can apply match pipeline before sort pipeline.
db.users.aggregate([
{
$unwind: "$courses"
},
{
$match: {
'courses.title': 'medicine'
}
},
{
$sort: {
'courses.title': -1,
'courses.grade': -1,
}
},
{
$group: {
_id: "$courses.title",
records: {
$push: {
"_id" : "$_id",
"username" : "$username",
"sex" : "$sex",
"email" : "$email",
"grade":"$courses.grade"
}
}
}
}
])

MongoDB Use $group for the subset after $group

I just learned mongoDB, I am trying to find some repeat customer info through my customer database.
The sample collection:
{
"_id" : ObjectId("5b7617e48146d8bae"),
"amazon_id" : "112",
"date" : "2018-01-25T18:40:55-08:00",
"email" : "xxxxx#marketplace.amazon.com",
"buy_name" : "xxxxx",
"sku" : "NPC-50",
"qty" : 8,
"price" : 215.92,
"reci_name" : "XXXXX",
"street1" : "XXXXX",
"street2" : "",
"street3" : "",
"city" : "XXXXX",
"state" : "XXXXX",
"zip_code" : "XXXXXX"
}
{
"_id" : ObjectId("5b761712e48146d8bae"),
"amazon_id" : "114",
"date" : "2018-01-27T18:40:55-08:00",
"email" : "xxxxx#marketplace.amazon.com",
"buy_name" : "xxxxx",
"sku" : "ABC",
"qty" : 1,
"price" : 19.99,
"reci_name" : "XXXXX",
"street1" : "XXXXX",
"street2" : "",
"street3" : "",
"city" : "XXXXX",
"state" : "XXXXX",
"zip_code" : "XXXXXX"
}
I group all customer info by their email id, and here is my code:
db.getCollection('order').aggregate([
{ $group: { _id: "$email",
OrderInfo: {$push: {orderId: "$amazon_id", sku: "$sku", qty: "$qty", price:"$price"
}},
CustomerInfo: {$addToSet: {buyName: "$buy_name",reName: "$reci_name", email: "$email", street1: "$street1",
street2: "$street2", city: "$city", state: "$state", zipCode: "$zip_code"} }
}},
{ $project: {_id: 1, OrderInfo: 1, CustomerInfo:1, total_price:{$sum: "$OrderInfo.price"} }},
{ $match: {total_price: {$gt:100} } },
{ $sort: {total_price:-1}},
], { allowDiskUse: true } );
It shows me the result:
{
"_id" : "xxxxxxx#marketplace.amazon.com",
"OrderInfo" : [
{
"orderId" : "112",
"sku" : "NPC-50",
"qty" : 8,
"price" : 215.92
},
{
"orderId" : "112",
"sku" : "NPC-50",
"qty" : 1,
"price" : 26.99
},
{
"orderId" : "114",
"sku" : "NPC-50",
"qty" : 1,
"price" : 26.99
},
{
"orderId" : "114",
"sku" : "ABC",
"qty" : 1,
"price" : 19.99
},
{
"orderId" : "116",
"sku" : "ABC",
"qty" : 1,
"price" : 19.99
},
],
"CustomerInfo" : [
{
"buyName" : "xxxxxxxxx",
"reName" : "xxxxxxxxxxxx",
"email" : "xxxxxxxxxxxx#marketplace.amazon.com",
"street1" : "xxxxxxxxxxx",
"street2" : "",
"city" : "xxxxxxxxxx",
"state" : "xxxxxxxxxxxx",
"zipCode" : "xxxxxxxxxx"
},
{
"buyName" : "xxxxxxxxxx",
"reName" : "xxxxxx",
"email" : "xxxxxxxx#marketplace.amazon.com",
"street1" : "xxxxxxxxxxx",
"street2" : "",
"city" : "xxxxx",
"state" : "xxxx",
"zipCode" : "xxxxxxxx"
}
],
"total_price" : 309.88
}
However, I want to group the sku and sum up the qty and price in the OrderInfo Set. My expected output is something like:
{
"OrderInfo" : [
{
"sku": "NPC-50",
"qty": 10,
"price": 269.9
},
{
"sku": "ABC",
"qty": 2,
"price": 39.98
},
],
"CustomerInfo" : [
{
"buyName" : "xxxxxxxxx",
"reName" : "xxxxxxxxxxxx",
"email" : "xxxxxxxxxxxx#marketplace.amazon.com",
"street1" : "xxxxxxxxxxx",
"street2" : "",
"city" : "xxxxxxxxxx",
"state" : "xxxxxxxxxxxx",
"zipCode" : "xxxxxxxxxx"
},
{
"buyName" : "xxxxxxxxxx",
"reName" : "xxxxxx",
"email" : "xxxxxxxx#marketplace.amazon.com",
"street1" : "xxxxxxxxxxx",
"street2" : "",
"city" : "xxxxx",
"state" : "xxxx",
"zipCode" : "xxxxxxxx"
}
],
"total_price" : 309.88
}
Any Help will be appreciated.
You can use below aggregation.
db.order.aggregate([
{"$group":{
"_id":{"email":"$email","sku":"$sku"},
"qty":{"$sum":"$qty"},
"price":{"$sum":"$price"},
"CustomerInfo":{
"$addToSet":{
"buyName":"$buy_name",
"reName":"$reci_name",
"email":"$email",
"street1":"$street1",
"street2":"$street2",
"city":"$city",
"state":"$state",
"zipCode":"$zip_code"
}
}
}},
{"$group":{
"_id":"$_id.email",
"OrderInfo":{"$push":{"sku":"$_id.sku","qty":"$qty","price":"$price"}},
"total_price":{"$sum":"$price"},
"CustomerInfo":{"$first":"$CustomerInfo"}
}},
{"$match":{"total_price":{"$gt":100}}},
{"$sort":{"total_price":-1}}
])
You can try below aggregation
db.collection.aggregate([
{ "$group": {
"_id": {
"email": "$email",
"sku": "$sku"
},
"CustomerInfo": {
"$addToSet": {
"buyName": "$buy_name",
"otherFields": "$otherFields",
}
},
"price": { "$sum": "$price" },
"qty": { "$sum": "$qty" }
}},
{ "$group": {
"_id": "$_id.email",
"CustomerInfo": { "$first": "$CustomerInfo" },
"OrderInfo": {
"$push": {
"sku": "$_id.sku",
"qty": "$qty",
"price": "$price"
}
}
}}
])

Group using the value from two possible fields

Let's say I have a match collection in the following format
{user1: "a", user2: "b"},
{user1: "a", user2: "c"},
{user1: "b", user2: "d"},
{user1: "b", user2: "c"},
{user1: "b", user2: "e"},
{user1: "c", user2: "f"}
I would like to know which user has the most appearance (either in user1 or user2). The result should be in this format ordered by the number of occurence.
{"user": "b", count:4},
{"user": "c", count:3},
{"user": "a", count:2},
{"user": "d", count:1},
{"user": "f", count:1},
{"user": "e", count:1}
Is there a way I can group on the value of two fields?
Something like match.aggregate({$group: {_id: {$or:["user1","user2]}}, count:{$sum:1}})
db.match.aggregate([
{$project: { user: [ "$user1", "$user2" ]}},
{$unwind: "$user"},
{$group: {_id: "$user", count: {$sum:1}}}
])
First stage projects each document into array of users
{user: ["a", "b"]},
{user: ["a", "c"]},
{user: ["b", "d"]},
...
Next we unwind arrays
{user:"a"},
{user:"b"},
{user:"a"},
{user:"c"},
{user:"b"},
...
And simple grouping at the end
Basically the concept is to $map onto an array and work from there:
db.collection.aggregate([
{ "$project": {
"_id": 0,
"user": { "$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el", "A" ] },
"then": "$user1",
"else": "$user2"
}
}
}}
}},
{ "$unwind": "$user" },
{ "$group": {
"_id": "$user",
"count": { "$sum": 1 }
}}
])
Let us take an example and go through
db.users_data.find();
{
"_id" : 1,
"user1" : "a",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T08:52:32.434Z")
},
{
"_id" : 2,
"user1" : "a",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T09:52:32.434Z")
},
{
"_id" : 3,
"user1" : "b",
"user2" : "aa",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 4,
"user1" : "b",
"user2" : "ab",
"status" : "NEW",
"createdDate" : ISODate("2016-05-03T10:52:32.434Z")
},
{
"_id" : 5,
"user1" : "a",
"user2" : "aa",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
{
"_id" : 6,
"user1" : "a",
"user2" : "ab",
"status" : "OLD",
"createdDate" : ISODate("2015-05-03T08:52:32.434Z")
},
Then
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa" }, "count" : 2}
{ "_id" : { "user1" : "a", "user2" : "ab" }, "count" : 2}
{ "_id" : { "user1" : "b", "user2" : "aa" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab" }, "count" : 1}
Thus grouping by multiple ids are possible
Now one more variation
db.users_data.aggregate([
{"$group" : {_id:{user1:"$user1",user2:"$user2",status:"$status"}, count:{$sum:1}}} ])
])
will give the resuls as
{ "_id" : { "user1" : "a", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "aa","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "b", "user2" : "ab","status":"NEW" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "aa","status":"OLD" }, "count" : 1}
{ "_id" : { "user1" : "a", "user2" : "ab","status":"OLD" }, "count" : 1}