regroup after unwind of subdocument of subdocument - mongodb

This is my Document.
{
"_id" : ObjectId("589b6132fafb5a09549b46cb"),
"name" : "foo",
"users" : [
{
"_id" : ObjectId("589b6132fafb5a09549b46cc"),
"name" : "Peter",
"emails" : [
{
"address" : "peter#email.com"
},
{
"address" : "test2#email.com"
}
]
},
{
"_id" : ObjectId("589b6132fafb5a09549b46cd"),
"name" : "Joe",
"emails" : []
}
]
}
I'm unwinding users and users.email
And when I try to regroup, I get a duplicate on user named Peter because it has 2 emails.
Query:
db.test.aggregate([
{ "$unwind": {
"path": "$users",
"preserveNullAndEmptyArrays": true
} },
{ "$unwind": {
"path": "$users.emails",
"preserveNullAndEmptyArrays": true
} },
{
"$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"users": { "$addToSet": "$users"},
"allEmails": { "$push": "$users.emails.address" }
}
}
])
Result:
{
"_id" : ObjectId("589b6132fafb5a09549b46cb"),
"name" : "foo",
"users" : [
{
"_id" : ObjectId("589b6132fafb5a09549b46cd"),
"name" : "Joe"
},
{
"_id" : ObjectId("589b6132fafb5a09549b46cc"),
"name" : "Peter",
"emails" : {
"address" : "test2#email.com"
}
},
{
"_id" : ObjectId("589b6132fafb5a09549b46cc"),
"name" : "Peter",
"emails" : {
"address" : "peter#email.com"
}
}
],
"allEmails" : [
"peter#email.com",
"test2#email.com"
]
}
I need the users object to be exact the same before the unwind with allEmails on the document parent as shown in the following example.
{
"_id" : ObjectId("589b6132fafb5a09549b46cb"),
"name" : "foo",
"users" : [
{
"_id" : ObjectId("589b6132fafb5a09549b46cc"),
"name" : "Peter",
"emails" : [
{ "address" : "test2#email.com" },
{ "address" : "peter#email.com" }
]
},
{
"_id" : ObjectId("589b6132fafb5a09549b46cd"),
"name" : "Joe",
"emails" : []
}
],
"allEmails" : [
"peter#email.com",
"test2#email.com"
]
}

Running the following aggregate pipeline should give you the desired result:
db.test.aggregate([
{
"$addFields": {
"allEmails": {
"$reduce": {
"input": {
"$map": {
"input": "$users",
"as": "user",
"in": "$$user.emails"
}
},
"initialValue": [],
"in": { "$concatArrays": ["$$value", "$$this.address"] }
}
}
}
}
])
The above pipeline works by initially creating a two dimensional array of emails addresses objects using $map. To show an example result produced by apply the expression
{
"$map": {
"input": "$users",
"as": "user",
"in": "$$user.emails"
}
}
run a test pipeline with just a single field that holds the results:
db.test.aggregate([
{
"$project": {
"twoDarray": {
"$map": {
"input": "$users",
"as": "user",
"in": "$$user.emails"
}
}
}
}
}
])
which will produce the 2D array
{
"_id" : ObjectId("589b6132fafb5a09549b46cb"),
"twoDarray" : [
[
{ "address" : "peter#email.com" },
{ "address" : "test2#email.com" }
],
[]
]
}
Now, denormalise this 2-D array
[
[
{ "address" : "peter#email.com" },
{ "address" : "test2#email.com" }
],
[]
]
by using the $reduce operator which applies an expression to each element in an array and combines them into a single value. With the help of the $concatArrays operator, you can concatenate each element within the $reduce expression to form the final desired array
[
"peter#email.com",
"test2#email.com"
]

Related

Filter and count the number of element in an array [duplicate]

I have a mongoDB collection called "conference" with an array of participants as below :
[
{
"_id" : 5b894357a0c84d5a5d221f25,
"conferenceName" : "myFirstConference",
"startDate" : 1535722327,
"endDate" : 1535722420,
"participants" : [
{
"name" : "user1",
"origin" : "internal",
"ip" : "192.168.0.2"
},
{
"name" : "user2",
"origin" : "external",
"ip" : "172.20.0.3"
},
]
},
...
]
I would like to get the following result :
[
{
"conferenceName" : "myFirstConference",
"startDate" : 1535722327,
"endDate" : 1535722420,
"internalUsersCount" : 1
"externalUsersCount" : 1,
},
...
]
I tried the request below but it's not working :
db.getCollection("conference").aggregate([
{
$addFields: {
internalUsersCount : {
$size : { "$participants" : {$elemMatch : { origin : "internal" }}}
},
externalUsersCount : {
$size : { "$participants" : {$elemMatch : { origin : "external" }}}
}
}
}
])
How is it possible to count "participant" array elements that match {"origin" : "internal"} and {"origin" : "external"} ?
You need to use $filter aggregation to filter out the external origin and internal origin along with the $size aggregation to calculate the length of the arrays.
Something like this
db.collection.aggregate([
{ "$addFields": {
"internalUsersCount": {
"$size": {
"$filter": {
"input": "$participants",
"as": "part",
"cond": { "$eq": ["$$part.origin", "internal"]}
}
}
},
"externalUsersCount": {
"$size": {
"$filter": {
"input": "$participants",
"as": "part",
"cond": { "$eq": ["$$part.origin", "external"] }
}
}
}
}}
])
Output
[
{
"conferenceName": "myFirstConference",
"endDate": 1535722420,
"externalUsersCount": 1,
"internalUsersCount": 1,
"startDate": 1535722327
}
]

How to use $lookup and $in mongodb aggregate

Colleges
{
"_id" : ObjectId("5cd42b5c65b41027845938ae"),
"clgID" : "100",
"name" : "Vivekananda"
},
{
"_id" : ObjectId("5cd42b5c65b41027845938ad"),
"clgID" : "200",
"name" : "National"
}
Point : 1 => Take all clgID From Colleges collection.
Subjects:
{
"_id" : ObjectId("5cd42c2465b41027845938b0"),
"name" : "Hindi",
"members" : {
"student" : [
"123"
]
},
"college" : {
"collegeID" : "100"
}
},
{
"_id" : ObjectId("5cd42c2465b41027845938af"),
"name" : "English",
"members" : {
"student" : [
"456",
"789"
]
},
"college" : {
"collegeID" : "100"
}
}
Point : 2 => Subjects collection we are mapped clgID under college.collegeID, Subjects collection we need to take the values of members.student based on clgID.
CollegeProducts
{
"_id" : "123",
"StudentProdcutID" : "123",
"StudentID" : "FF80",
"CID" : "Facebook"
},
{
"_id" : "456",
"StudentProdcutID" : "456",
"StudentID" : "FF81",
"CID" : "Facebook"
},
{
"_id" : "789",
"StudentProdcutID" : "789",
"StudentID" : "FF82",
"CID" : "Facebook"
}
Point : 3 => CollegeProducts collection we are mapped members.student values under StudentProdcutID, CollegeProducts collection we need to take the values in StudentID. CollegeProducts collection we need to check condition CID should be Facebook and take the values of StudentID based on members.student.
UserDetails
{
"name" : "A",
"StudentID" : "FF80"
},
{
"name" : "B",
"StudentID" : "FF81"
},
{
"name" : "C",
"StudentID" : "FF82"
}
Point : 3 => UserDetails collection we are mapped StudentID values under StudentID, UserDetails collection we need to take the values of name.
Expected Output:
{
"collegeName" : "National",
"StudentName" : "A"
},
{
"collegeName" : "National",
"StudentName" : "B"
},
{
"collegeName" : "National",
"StudentName" : "C"
}
My Code
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] }}},
{ "$lookup": {
"from": "Subjects",
"localField": "clgID",
"foreignField": "college.collegeID",
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true }},
{ "$group": {
"_id": { "clgId": "$clg.college.collegeID", "_id": "$_id" },
"groupDetails": { "$push": "$clg.members.student" },
"clgName": { "$first": "$name" }
}},
{ "$project": {
"_id": "$_id._id",
"clgName": 1,
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}}
])
I am not getting my expected output,kindly help me anyone. i am using mongodb version3.4
Don't bother grouping if you want the the each output to be one user, you're just doing double the work.
Change your query to this:
{
"$match" : {
"clgID" : {
"$in" : [
"100",
"200"
]
}
}
},
{
"$lookup" : {
"from" : "Subjects",
"localField" : "clgID",
"foreignField" : "college.collegeID",
"as" : "clg"
}
},
{
"$unwind" : {
"path" : "$clg",
"preserveNullAndEmptyArrays" : true
}
},
{
"$unwind" : {
"path" : "$clg.members.student",
"preserveNullAndEmptyArrays" : true
}
},
{
"$project" : {
"collegeName" : "$name",
"student" : "$clg.members.student"
}
}
],
Now with the second unwind each object contains the college name and -ONE- student so all we need to do now is project in the required form.
EDIT: full query according to request
{
"$match" : {
"clgID" : {
"$in" : [
"100",
"200"
]
}
}
},
{
"$lookup" : {
"from" : "Subjects",
"localField" : "clgID",
"foreignField" : "college.collegeID",
"as" : "clg"
}
},
{
"$unwind" : {
"path" : "$clg",
"preserveNullAndEmptyArrays" : true
}
},
{
"$unwind" : {
"path" : "$clg.members.student",
"preserveNullAndEmptyArrays" : true
}
},
{
"$lookup" : {
"from" : "CollegeProducts",
"localField" : "clg.members.student",
"foreignField" : "StudentProdcutID",
"as" : "clgproduct"
}
},
{ // can skip this unwind if theres always only one match.
"$unwind" : {
"path" : "$clgproduct",
"preserveNullAndEmptyArrays" : true
}
},
{
"$match" : {
"clgproduct.CID" : "Facebook"
}
},
{
"$lookup" : {
"from" : "UserDetails",
"localField" : "clgproduct.StudentID",
"foreignField" : "StudentID",
"as" : "student"
}
},
{ // can skip this unwind if theres always only one user matched.
"$unwind" : {
"path" : "$student",
"preserveNullAndEmptyArrays" : true
}
},
{
"$project" : {
"collegeName" : "$name",
"student" : "$student.name"
}
}
],
You can use below aggregation
db.Colleges.aggregate([
{ "$match": { "clgID": { "$in": ["100", "200"] }}},
{ "$lookup": {
"from": "Subjects",
"localField": "clgID",
"foreignField": "college.collegeID",
"as": "clg"
}},
{ "$unwind": { "path": "$clg", "preserveNullAndEmptyArrays": true }},
{ "$group": {
"_id": { "clgId": "$clg.college.collegeID", "_id": "$_id" },
"groupDetails": { "$push": "$clg.members.student" },
"clgName": { "$first": "$name" }
}},
{ "$project": {
"_id": "$_id._id",
"clgName": 1,
"groupDetails": {
"$reduce": {
"input": "$groupDetails",
"initialValue": [],
"in": { "$concatArrays": ["$$this", "$$value"] }
}
}
}},
{ "$lookup": {
"from": "CollegeProduct",
"localField": "groupDetails",
"foreignField": "StudentProdcutID",
"as": "CollegeProduct"
}},
{ "$unwind": "$CollegeProduct" },
{ "$lookup": {
"from": "UserDetails",
"localField": "CollegeProduct.StudentID",
"foreignField": "StudentID",
"as": "Student"
}},
{ "$unwind": "$Student" },
{ "$project": { "collegeName": "clgName", "StudentName": "$Student.name" }}
])
MongoPlayground
Output
[
{
"StudentName": "A",
"_id": ObjectId("5cd42b5c65b41027845938ae"),
"collegeName": "clgName"
},
{
"StudentName": "B",
"_id": ObjectId("5cd42b5c65b41027845938ae"),
"collegeName": "clgName"
},
{
"StudentName": "C",
"_id": ObjectId("5cd42b5c65b41027845938ae"),
"collegeName": "clgName"
}
]

Finding intersection between two object arrays based on field

In mongo collection I have documents of following structure.
{
"_id" : "Suzuki",
"qty" : 10,
"plates" : [
{
"rego" : "1QX-WA-123",
"date" : 1516374000000.0
},
{
"rego" : "1QX-WA-456",
"date" : 1513369800000.0
}
],
"accounts" : [
{
"_id" : "23kpi9MD4KnTvnaW7",
"createdAt" : 1513810712802.0,
"date" : 1503446400000.0,
"type" : "Suzuki",
"rego" : "1QX-WA-123",
},
{
"_id" : "2Wqrd4yofvLmqLm5H",
"createdAt" : 1513810712802.0,
"date" : 1501632000000.0,
"type" : "Suzuki",
"rego" : "1QX-WA-111",
}
]
}
I am trying to filter objects in accounts array so that it contains only those objects whose rego exists in plates array.
I tried following query, however, it throws an error: all operands of $setIntersection must be arrays. One argument if of type object.
db.getCollection('dummy').aggregate([{
$project: {
plates: 1,
accounts: 1,
intersect: {
$setIntersection: [
{ $arrayElemAt: [ "$plates", 0 ] },
{ $arrayElemAt: [ "$accounts", 4 ] }
]
}
}
}])
The expected output I am looking for is:
{
"_id" : "Suzuki",
"qty" : 10,
"plates" : [
{
"rego" : "1QX-WA-123",
"date" : 1516374000000.0
},
{
"rego" : "1QX-WA-456",
"date" : 1513369800000.0
}
],
"accounts" : [
{
"_id" : "23kpi9MD4KnTvnaW7",
"createdAt" : 1513810712802.0,
"date" : 1503446400000.0,
"type" : "Suzuki",
"rego" : "1QX-WA-123",
}
]
}
So there are a couple of ways, but what you really are after is simply to $filter instead.
Using $in would likely be the first choice:
db.getCollection('dummy').aggregate([
{ "$addFields": {
"accounts": {
"$filter": {
"input": "$accounts",
"cond": {
"$in": [ "$$this.rego", "$plates.rego" ]
}
}
}
}}
])
Or if you don't have MongoDB 3.4 at least, then using $anyElementTrue:
db.getCollection('dummy').aggregate([
{ "$project": {
"qty": 1,
"plates": 1,
"accounts": {
"$filter": {
"input": "$accounts",
"as": "acc",
"cond": {
"$anyElementTrue": {
"$map": {
"input": "$plates.rego",
"as": "rego",
"in": { "$eq": [ "$$rego", "$$acc.rego" ] }
}
}
}
}
}
}}
])
Or even $setIsSubset:
db.getCollection('dummy').aggregate([
{ "$project": {
"qty": 1,
"plates": 1,
"accounts": {
"$filter": {
"input": "$accounts",
"as": "acc",
"cond": {
"$setIsSubset": [ ["$$acc.rego"], "$plates.rego" ]
}
}
}
}}
])
It's really not a $setIntersection for this type of operation, since that would need a comparison on "just the field values" as a "set", and the output is really just "that" and not the "objects".
You could do something silly with matching array indexes to the produced "set" positions:
db.getCollection('dummy').aggregate([
{ "$addFields": {
"accounts": {
"$map": {
"input": { "$setIntersection": ["$plates.rego", "$accounts.rego"] },
"in": {
"$arrayElemAt": [
"$accounts",
{ "$indexOfArray": [ "$accounts.rego", "$$this" ] }
]
}
}
}
}}
])
But in reality you probably really just want the $filter result as being far more practical. And if you want that output as a "set" then you can simply wrap the $filter output with a $setDifference or like operator to make the entries "unique".
In all variations these return:
{
"_id" : "Suzuki",
"qty" : 10.0,
"plates" : [
{
"rego" : "1QX-WA-123",
"date" : 1516374000000.0
},
{
"rego" : "1QX-WA-456",
"date" : 1513369800000.0
}
],
"accounts" : [
{
"_id" : "23kpi9MD4KnTvnaW7",
"createdAt" : 1513810712802.0,
"date" : 1503446400000.0,
"type" : "Suzuki",
"rego" : "1QX-WA-123"
}
]
}
Showing the items in the "accounts" array "filtered" as matching the respective "rego" amounts from the "plates" array.

Mongodb Search with embedded document.

Structure of mongodb collection is like this.
collection User
{
"name":"sufaid",
"age":"22",
"address":"zzzz",
"product":[{"id":1,"name":"A"},
{"id":6,"name":"N"},
{"id":3,"name":"D"},
{"id":7,"name":"q"},
]
}
I need to find users those who have product id "3"
Out put should be like this
{
"name":"sufaid",
"age":"22",
"address":"zzzz",
"product":{"id":3,"name":"D"}
}
Note : With out using $unwind and projection like "product.$"
"product.$" through error while using pymongo.
Any other option is there ???
use $elemMatch. https://docs.mongodb.com/manual/reference/operator/projection/elemMatch/
for your query:
db.User.find({},{name:1,age:1,address:1,product:{$elemMatch:{id:3}}})
or
db.User.find({},{product:{$elemMatch:{id:3}}})
o/p: {
"name" : "sufaid",
"age" : "22",
"address" : "zzzz",
"product" : [
{
"id" : 3.0,
"name" : "D"
}
]
}
As you require it for aggregation:
db.User.aggregate([
{$unwind:'$product'},
{$match:{'product.id':3}},
{$project:{_id:0,name:1,age:1,aaddress:1,product:1}}
])
o/p:
{
"name" : "sufaid",
"age" : "22",
"address" : "zzzz",
"product" : {
"id" : 3.0,
"name" : "D"
}
}
This will give exactly what you indicated in the question.
You could use the aggregation framework which has a plethora of operators that you can use, in particular you'd need the $filter and $arrayElemAt operators in a $project pipeline.
For instance, you could return just the product field as an embedded document by running the following pipeline:
db.user.aggregate([
{ "$match": { "product.id": 3 } },
{
"$project": {
"name": 1,
"age": 1,
"address": 1,
"product": {
"$arrayElemAt": [
{
"$filter": {
"input": "$product",
"as": "item",
"cond": { "$eq": [ "$$item.id", 3 ] }
}
},
0
]
}
}
}
])
Sample Output
{
"_id" : ObjectId("5829ac89628123dcf8a64b7a"),
"name" : "sufaid",
"age" : "22",
"address" : "zzzz",
"product" : {
"id" : 3,
"name" : "D"
}
}
If you just need an output with the array filtered, skip the $arrayElemAt expression and use the $filter only:
db.user.aggregate([
{ "$match": { "product.id": 3 } },
{
"$project": {
"name": 1,
"age": 1,
"address": 1,
"product": {
"$filter": {
"input": "$product",
"as": "item",
"cond": { "$eq": [ "$$item.id", 3 ] }
}
}
}
}
])
Sample Output
{
"_id" : ObjectId("5829ac89628123dcf8a64b7a"),
"name" : "sufaid",
"age" : "22",
"address" : "zzzz",
"product" : [
{ "id" : 3, "name" : "D" }
]
}
db.User.find({},{product:{$elemMatch:{id:3}}})
it's enough

MongoDB Projection of Nested Arrays

I've got a collection "accounts" which contains documents similar to this structure:
{
"email" : "john.doe#acme.com",
"groups" : [
{
"name" : "group1",
"contacts" : [
{ "localId" : "c1", "address" : "some address 1" },
{ "localId" : "c2", "address" : "some address 2" },
{ "localId" : "c3", "address" : "some address 3" }
]
},
{
"name" : "group2",
"contacts" : [
{ "localId" : "c1", "address" : "some address 1" },
{ "localId" : "c3", "address" : "some address 3" }
]
}
]
}
Via
q = { "email" : "john.doe#acme.com", "groups" : { $elemMatch: { "name" : "group1" } } }
p = { "groups.name" : 0, "groups" : { $elemMatch: { "name" : "group1" } } }
db.accounts.find( q, p ).pretty()
I'll successfully get just the group of a specified account I'm interested in.
Question: How can I get a limited list of "contacts" within a certain "group" of a specified "account"? Let's suppose I've got the following arguments:
account: email - "john.doe#acme.com"
group: name - "group1"
contact: array of localIds - [ "c1", "c3", "Not existing id" ]
Given these arguments I'd like to have the following result:
{
"groups" : [
{
"name" : "group1", (might be omitted)
"contacts" : [
{ "localId" : "c1", "address" : "some address 1" },
{ "localId" : "c3", "address" : "some address 3" }
]
}
]
}
I don't need anything else apart from the resulting contacts.
Approaches
All queries try to fetch just one matching contact instead of a list of matching contacts, for the sake of simplicity.
I've tried the following queries without any success:
p = { "groups.name" : 0, "groups" : { $elemMatch: { "name" : "group1", "contacts" : { $elemMatch: { "localId" : "c1" } } } } }
p = { "groups.name" : 0, "groups" : { $elemMatch: { "name" : "group1", "contacts.localId" : "c1" } } }
not working: returns whole array or nothing depending on localId
p = { "groups.$" : { $elemMatch: { "localId" : "c1" } } }
error: {
"$err" : "Can't canonicalize query: BadValue Cannot use $elemMatch projection on a nested field.",
"code" : 17287
}
p = { "groups.contacts" : { $elemMatch: { "localId" : "c1" } } }
error: {
"$err" : "Can't canonicalize query: BadValue Cannot use $elemMatch projection on a nested field.",
"code" : 17287
}
Any help is appreciated!
2017 Update
Such a well put question deserves a modern response. The sort of array filtering requested can actually be done in modern MongoDB releases post 3.2 via simply $match and $project pipeline stages, much like the original plain query operation intends.
db.accounts.aggregate([
{ "$match": {
"email" : "john.doe#acme.com",
"groups": {
"$elemMatch": {
"name": "group1",
"contacts.localId": { "$in": [ "c1","c3", null ] }
}
}
}},
{ "$addFields": {
"groups": {
"$filter": {
"input": {
"$map": {
"input": "$groups",
"as": "g",
"in": {
"name": "$$g.name",
"contacts": {
"$filter": {
"input": "$$g.contacts",
"as": "c",
"cond": {
"$or": [
{ "$eq": [ "$$c.localId", "c1" ] },
{ "$eq": [ "$$c.localId", "c3" ] }
]
}
}
}
}
}
},
"as": "g",
"cond": {
"$and": [
{ "$eq": [ "$$g.name", "group1" ] },
{ "$gt": [ { "$size": "$$g.contacts" }, 0 ] }
]
}
}
}
}}
])
This makes use of of the $filter and $map operators to only return the elements from the arrays as would meet the conditions, and is far better for performance than using $unwind. Since the pipeline stages effectively mirror the structure of "query" and "project" from a .find() operation, the performance here is basically on par with such and operation.
Note that where the intention is to actually work "across documents" to bring details together out of "multiple" documents rather than "one", then this would usually require some type of $unwind operation in order to do so, as such enabling the array items to be accessible for "grouping".
This is basically the approach:
db.accounts.aggregate([
// Match the documents by query
{ "$match": {
"email" : "john.doe#acme.com",
"groups.name": "group1",
"groups.contacts.localId": { "$in": [ "c1","c3", null ] },
}},
// De-normalize nested array
{ "$unwind": "$groups" },
{ "$unwind": "$groups.contacts" },
// Filter the actual array elements as desired
{ "$match": {
"groups.name": "group1",
"groups.contacts.localId": { "$in": [ "c1","c3", null ] },
}},
// Group the intermediate result.
{ "$group": {
"_id": { "email": "$email", "name": "$groups.name" },
"contacts": { "$push": "$groups.contacts" }
}},
// Group the final result
{ "$group": {
"_id": "$_id.email",
"groups": { "$push": {
"name": "$_id.name",
"contacts": "$contacts"
}}
}}
])
This is "array filtering" on more than a single match which the basic projection capabilities of .find() cannot do.
You have "nested" arrays therefore you need to process $unwind twice. Along with the other operations.
You could use the $unwind operator of the aggregation framework.
For example:
db.contact.aggregate({$unwind:'$groups'}, {$unwind:'$groups.contacts'}, {$match:{email:'john.doe#acme.com', 'groups.name':'group1', 'groups.contacts.localId':{$in:['c1', 'c3', 'whatever']}}});
Should give the following result:
{ "_id" : ObjectId("5500103e706342bc096e2e14"), "email" : "john.doe#acme.com", "groups" : { "name" : "group1", "contacts" : { "localId" : "c1", "address" : "some address 1" } } }
{ "_id" : ObjectId("5500103e706342bc096e2e14"), "email" : "john.doe#acme.com", "groups" : { "name" : "group1", "contacts" : { "localId" : "c3", "address" : "some address 3" } } }
If you want only one object, you can then use the $group operator.