Return specific array value field in aggregate - mongodb

I have a issue in MongoDB i'm trying to build a very complex aggregate query, and its work almost as i want it, but i still have trobles, and the problems is i need to move a spefiect field so i can use it later.
My aggregate look like this right now.
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
}])
and it will return data like this.
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"preview" : {
"photo" : {
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : false
}
},
"media" : [
{
"_id" : ObjectId("5b7affea1fbad441494a663b")
},
{
"_id" : ObjectId("5b7b002d1fbad441494a663c")
},
{
"_id" : ObjectId("5b7b00351fbad441494a663d")
},
{
"_id" : ObjectId("5b7d9baa1fbad410de638bbb")
},
{
"_id" : ObjectId("5b7d9bae1fbad410e10f32f9")
},
{
"_id" : ObjectId("5b7d9bb11fbad441494a663e")
},
{
"_id" : ObjectId("5b7d9bb41fbad4ff97273402")
},
{
"_id" : ObjectId("5b7d9bb71fbad4ff99527e82")
},
{
"_id" : ObjectId("5b7d9bbb1fbad410de638bbc")
},
{
"_id" : ObjectId("5b7d9bbe1fbad410e10f32fa")
},
{
"_id" : ObjectId("5b7d9bc11fbad441494a663f")
},
{
"_id" : ObjectId("5b7d9bc41fbad4ff97273403")
},
{
"_id" : ObjectId("5b7d9bc71fbad4ff99527e83")
},
{
"_id" : ObjectId("5b7d9bca1fbad410de638bbd")
},
{
"_id" : ObjectId("5b7d9bcd1fbad441494a6640")
},
{
"_id" : ObjectId("5b7d9bd01fbad4ff97273404")
}
]
}
and what you can se the last data have preview.photo.primary on it, and this field i want to return when i'm done with my aggregate query.
My final query look like this:
db.getCollection('travel_sights').aggregate([{
'$match': {
'preview.photo' : {
'$exists':true
},
'_id': {
'$in' : [ObjectId("5b7af9701fbad410e10f32f7")]
}
}
},{
'$unwind' : '$preview.photo'
}, {
'$lookup':{
'from' : 'media_data',
'localField' : '_id',
'foreignField':'bind',
'as':'media'
}
},{
'$unwind':'$media'
},{
'$project' : {
'preview' : 1,
'media': 1,
}
}, {
'$group': {
'_id':'$media._id',
'primary': {
'$first':'$preview'
}
}
}])
The problem here is when i want $preview return so i can find the primary about it, its allways only return the first where the value not exists, if i use $push the problem is i get every thing.
is there a way so i can pick the right primary value in my return? have trying $addFields to but whitout eny kind of lock.
Travel_sights data:
{
"_id" : ObjectId("5b7af9701fbad410e10f32f7"),
"city_id" : ObjectId("5b6d0cb6222d4c70b803eaeb"),
"activated" : true,
"deleted" : false,
"url" : "url is here",
"name" : "title of it here",
"updated_at" : ISODate("2018-08-22T17:22:27.000Z"),
"content" : "content here",
"preview" : {
"photo" : [
{
"id" : ObjectId("5b7affea1fbad441494a663b"),
"sort" : 0
},
{
"id" : ObjectId("5b7b002d1fbad441494a663c"),
"sort" : 0
},
{
"id" : ObjectId("5b7b00351fbad441494a663d"),
"sort" : 0,
"primary" : true
},
{
"id" : ObjectId("5b7d9baa1fbad410de638bbb"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bae1fbad410e10f32f9"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb11fbad441494a663e"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bb41fbad4ff97273402"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bb71fbad4ff99527e82"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bbb1fbad410de638bbc"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bbe1fbad410e10f32fa"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc11fbad441494a663f"),
"sort" : 0
},
{
"id" : ObjectId("5b7d9bc41fbad4ff97273403"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bc71fbad4ff99527e83"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bca1fbad410de638bbd"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bcd1fbad441494a6640"),
"sort" : 0,
"primary" : false
},
{
"id" : ObjectId("5b7d9bd01fbad4ff97273404"),
"sort" : 0
}
]
}
}
3 sample foto bind data here:
{
"_id" : ObjectId("5b7affea1fbad441494a663b"),
"file-name" : "55575110311__0F115282-B5A0-4654-AA44-B7DC2C682992.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
]
}
{
"_id" : ObjectId("5b7b002d1fbad441494a663c"),
"file-name" : "55575110748__E7B07EFD-9F7E-40D6-8B57-38F708E4C0C0.jpeg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is secoudn demo!",
"title" : "demo 3"
}
{
"_id" : ObjectId("5b7b00351fbad441494a663d"),
"file-name" : "paris2.jpg",
"options" : [
ObjectId("5b6fb855222d4c70b8041093")
],
"type" : "images",
"files" : [
{
"width" : 70,
"height" : 53
},
{
"width" : 400,
"height" : 300
},
{
"width" : 800,
"height" : 600
},
{
"width" : 1600,
"height" : 1200
}
],
"bind" : [
ObjectId("5b7af9701fbad410e10f32f7")
],
"description" : "this is a demo1 :)",
"title" : "demo"
}

You can filter out the element from the array where the primary field exists using $filter aggregation and then easily $group with the media._id field and get the $first document value.
Finally your query will be
db.getCollection("travel_sights").aggregate([
{ "$match": {
"preview.photo" : { "$exists":true },
"_id": { "$in" : [ ObjectId("5b7af9701fbad410e10f32f7") ] }
}},
{ "$addFields": {
"preview.photo": {
"$arrayElemAt": [
{ "$filter": {
"input": "$preview.photo",
"as": "photo",
"cond": { "$ne": [ "$$photo.primary", undefined ] }
}}, 0
]
}
}},
{ "$lookup":{
"from" : "media_data",
"localField" : "_id",
"foreignField": "bind",
"as": "media"
}},
{ "$unwind":"$media" },
{ "$project" : { "preview" : 1, "media": 1, }},
{ "$group": {
"_id": "$media._id",
"primary": { "$first": "$preview" }
}}
])

Related

How to sort by mutiple fields with conditons in MongoDB

Need help to sort these documents:
const docs = Docs.find(
{
'publishedOn.profileId': groupProfile._id,
},
{ sort: { ??? }}
);
I need to find documents which has defined 'publishedOn.profileId' and
sort by 'awards.type' = 'challengeWinner' and by its 'awards.score'
Not all document has awards.type = 'challengeWinner'. I need to
take on the top 'awards.score' = 1, then 2, then 3 and then the rest by 'writtenDate'.
I have no idea how to fix it. Is it possible?
[
{
"_id" : "5FW9EDW8gi3M8R7XK",
"createdAt" : ISODate("2021-06-13T00:11:48.638Z"),
"title" : "My solution",
"writtenDateType" : 4,
"writtenDate" : ISODate("2021-06-13T00:00:00.000Z"),
"userId" : "dC35hwe6XMRhvqWBv",
"publishedOn" : [
{
"profileId" : "36oPw2zxYCpKxfiu2",
"publishedDate" : ISODate("2021-06-13T00:11:48.787Z"),
"userId" : "dC35hwe6XMRhvqWBv"
},
{
"profileId" : "9y2RwJpzzyk29ApiC",
"userId" : "dC35hwe6XMRhvqWBv",
"publishedDate" : ISODate("2021-06-13T00:16:01.529Z")
}
],
"awards" : [
{
"type" : "topPoem",
"score" : 5,
"addedAt" : ISODate("2021-06-24T23:04:10.454Z"),
"updatedAt" : ISODate("2021-06-25T23:30:00.069Z")
},
{
"type" : "challengeWinner",
"score" : 2,
"challengeId" : "9y2RwJpzzyk29ApiC",
"addedAt" : ISODate("2021-06-24T23:04:10.454Z"),
"updatedAt" : ISODate("2021-06-25T23:30:00.069Z")
}
]
},
{
"_id" : "upzvo8BeHyQ9r9Yfv",
"createdAt" : ISODate("2021-06-19T15:35:13.716Z"),
"title" : "Briches",
"writtenDateType" : 2,
"writtenDate" : ISODate("2003-01-01T00:00:00.000Z"),
"userId" : "A32228XMuZqxFe4Kz",
"publishedOn" : [
{
"profileId" : "MLGkCtNyZ64bGKedG",
"publishedDate" : ISODate("2021-06-19T15:35:13.861Z"),
"userId" : "A32228XMuZqxFe4Kz"
},
{
"profileId" : "9y2RwJpzzyk29ApiC",
"userId" : "A32228XMuZqxFe4Kz",
"publishedDate" : ISODate("2021-06-19T15:35:36.280Z")
}
],
"awards" : [
{
"type" : "challengeWinner",
"score" : 1,
"challengeId" : "9y2RwJpzzyk29ApiC",
"addedAt" : ISODate("2021-06-24T22:59:00.948Z"),
"updatedAt" : ISODate("2021-06-25T23:30:00.067Z"),
"claps" : 19,
"clapsUsers" : 4
},
{
"type" : "suggestedHomepage",
"score" : 1,
"addedAt" : ISODate("2021-06-24T22:59:59.981Z"),
"updatedAt" : ISODate("2021-06-24T22:59:59.981Z")
}
]
}
]
I just learned and tried to solve your problem. I used aggregate to do the filter in your data.
First I selected all the items which $match the `publishedOn.profileId".
Then, I $project(ed) the items that are needed. In this case, I took the writtenDate and the matching awards.
In order to choose the needed value from awards, I $filter (ed) the award type.
Last, I did $sort for the award score first and then writtenDate,
db.collection.aggregate([
{
"$match": {
"publishedOn.profileId": "9y2RwJpzzyk29ApiC"
}
},
{
"$project": {
"writtenDate": 1,
"awards": {
"$filter": {
"input": "$awards",
"as": "award",
"cond": {
"$eq": [
"$$award.type",
"challengeWinner"
]
}
}
}
}
},
{
"$sort": {
"awards.score": 1,
"writtenDate": 1,
}
}
])
Working of above query: https://mongoplayground.net/p/MzWQCR2Gshg
Happy Coding !!!

How to join deeply nested array?

Here is my actual database schema.
company_id is reference object of companies collection and booking_days.consultants.consultant_id is reference object of users collection.
I want to join embedded document with company_id and booking_days.consultants.consultant_id.
{
"_id" : ObjectId("5a7040d664544e1bb877deae"),
"company_id" : ObjectId("5a6eb43f437e6a0d9e00c92f"),
"booking_days" : [
{
"booking_date" : ISODate("2018-01-31T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877deca"),
"consultants" : [
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"_id" : ObjectId("5a7040d664544e1bb877decc")
},
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52f"),
"_id" : ObjectId("5a7040d664544e1bb877decb")
}
]
},
{
"booking_date" : ISODate("2018-02-01T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877dec6"),
"consultants" : [
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52f"),
"_id" : ObjectId("5a7040d664544e1bb877dec9")
},
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"_id" : ObjectId("5a7040d664544e1bb877dec8")
},
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"_id" : ObjectId("5a7040d664544e1bb877dec7")
}
]
},
{
"booking_date" : ISODate("2018-02-02T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877dec4"),
"consultants" : [
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"_id" : ObjectId("5a7040d664544e1bb877dec5")
}
]
},
],
"__v" : 0
}
I am using below query.
db.getCollection('booking_days').aggregate(
[
{ $match: { company_id:ObjectId("5a6eb43f437e6a0d9e00c92f") } },
{
$lookup: {
localField: "company_id",
from: "companies",
foreignField: "_id",
as: "companies"
},
},
{
$lookup: {
localField: "booking_days.consultants.consultant_id",
from: "users",
foreignField: "_id",
as: "userssss"
},
},
{
$unwind:"$companies"
},
]
)
Actual Output
{
"_id" : ObjectId("5a7040d664544e1bb877deae"),
"company_id" : ObjectId("5a6eb43f437e6a0d9e00c92f"),
"booking_days" : [
{
"booking_date" : ISODate("2018-01-31T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877deca"),
"consultants" : [
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"_id" : ObjectId("5a7040d664544e1bb877decc")
},
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52f"),
"_id" : ObjectId("5a7040d664544e1bb877decb")
}
]
},
{
"booking_date" : ISODate("2018-02-01T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877dec6"),
"consultants" : [
{
"consultant_id" : ObjectId("5a6f2854ce7d6938de1dd52f"),
"_id" : ObjectId("5a7040d664544e1bb877dec9")
},
]
},
],
"__v" : 0,
"companies" : {
"_id" : ObjectId("5a6eb43f437e6a0d9e00c92f"),
"first_name" : "Adrienne Runolfsson",
},
"users" : [
{
"_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"first_name" : "Christ Hamill",
},
{
"_id" : ObjectId("5a6f2854ce7d6938de1dd52e"),
"first_name" : "Miss Dina Kovacek",
},
]
}
Excepted output. consultant data will come in booking_days.consultants array.
{
"_id" : ObjectId("5a7040d664544e1bb877deae"),
"company_id" : ObjectId("5a6eb43f437e6a0d9e00c92f"),
"booking_days" : [
{
"booking_date" : ISODate("2018-01-31T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877deca"),
"consultants" : [
{
"consultant_id" : {
"_id" : ObjectId("5a6f2854ce7d6938de1dd52c"),
"first_name" : "Christ Hamill",
},
"_id" : ObjectId("5a7040d664544e1bb877decc")
},
{
"consultant_id" : {
"_id" : ObjectId("5a6f2854ce7d6938de1dd52e"),
"first_name" : "Miss Dina Kovacek",
},
"_id" : ObjectId("5a7040d664544e1bb877decb")
}
]
},
{
"booking_date" : ISODate("2018-02-01T00:00:00.000Z"),
"_id" : ObjectId("5a7040d664544e1bb877dec6"),
"consultants" : [
{
"consultant_id" : {
"_id" : ObjectId("5a6f2854ce7d6938de1dd52e"),
"first_name" : "Miss Dina Kovacek",
},
"_id" : ObjectId("5a7040d664544e1bb877dec9")
},
]
},
],
"__v" : 0,
"companies" : {
"_id" : ObjectId("5a6eb43f437e6a0d9e00c92f"),
"first_name" : "Adrienne Runolfsson",
},
}
As such you have to $unwind the localField when it is an embedded document array expect in some cases where localField is an array of scalar ids.
$unwind twice as consultant array is two levels deep followed by $lookup to get the name and $group to get back the expected output.
db.getCollection('booking_days').aggregate([
{"$match":{"company_id":ObjectId("5a6eb43f437e6a0d9e00c92f")}},
{"$lookup":{"localField":"company_id","from":"companies","foreignField":"_id","as":"companies"}},
{"$unwind":"$companies"},
{"$unwind":"$booking_days"},
{"$unwind":"$consultants"},
{"$lookup":{
"localField":"booking_days.consultants.consultant_id",
"from":"users",
"foreignField":"_id",
"as":"booking_days.consultants.consultant_id"
}},
{"$group":{
"_id":{"_id":"$_id","booking_days_id":"$booking_days._id"},
"company_id":{"$first":"$company_id"},
"booking_date":{"$first":"$booking_days.booking_date"},
"companies":{"$first":"$companies"},
"consultants":{"$push":"$booking_days.consultants"}
}},
{"$group":{
"_id":"$_id._id",
"company_id":{"$first":"$company_id"},
"companies":{"$first":"$companies"},
"booking_days":{
"$push":{
"_id":"$_id.booking_days_id",
"booking_date":"$booking_date",
"consultants":"$consultants"
}
}
}}
])
{"Id": "5b87a4c79a9c3feac943fc6c",
"comments" : [
{
"likes" : [],
"_id" : ObjectId("5b87a4c79a9c3feac943fc6c"),
"comment" : "string",
"accountId" : "a426d0da-ac72-4932-828e-3af99a998bc7",
"commentId" : "7d2a05d1-2026-4a13-a5c1-318ed80d1b38",
"reply" : [
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string",
"accountId" : "a426d0da-ac72-4932-828e-3af99a998bc7",
"replyId" : "ec220fd7-3440-44dc-9178-7a1183879463"
},
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string klllll",
"accountId" : "a426d0da-ac72-4932-828e-3af99a998bc7",
"replyId" : "ec220fd7-3440-44dc-9178-7a1183879463"
}
]
},
{
"likes" : [],
"_id" : ObjectId("5b87c301c8a07efa2599c29e"),
"comment" : "testing",
"accountId" : "cfd29f53-d73e-480c-9cfa-ea42b4119266",
"commentId" : "0676047b-1712-4f70-89d5-29c1abe03eaf",
"reply" : [
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string",
"accountId" : "a426d0da-ac72-4932-828e-3af99a998bc7",
"replyId" : "ec220fd7-3440-44dc-9178-7a1183879463"
},
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string klllll",
"accountId" : "a426d0da-ac72-4932-828e-3af99a998bc7",
"replyId" : "ec220fd7-3440-44dc-9178-7a1183879463"
}
]
}
]
}
accountId is in differnt connection
// Expected Out Put
{"Id": "5b87a4c79a9c3feac943fc6c",
"comments" : [
{
"likes" : [],
"_id" : ObjectId("5b87a4c79a9c3feac943fc6c"),
"comment" : "string",
"name" : "apple",
"reply" : [
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string",
"name" : "apple",
},
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string klllll",
"name" : "apple",
}
]
},
{
"likes" : [],
"_id" : ObjectId("5b87c301c8a07efa2599c29e"),
"comment" : "testing",
"name" : "ball",
"reply" : [
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string",
"name" : "apple",
},
{
"_id" : ObjectId("5b87b61e97585ef1d0d22108"),
"comment" : "string klllll",
"name" : "apple", }
]
}
]
}

How to return all project employees?

I have datas of following format collection(projects) inside my database:
{ "_id" : ObjectId("5981a80f223e491a58230e5d"), "id" : 2, "name" : "gbqplhlqxzwl", "managerId" : 65151, "startDate" : "03.11.1999", "finishDate" : "02.01.2003", "projectStatus" : "POSTPONED", "participants" : [ ], "estimatedBudget" : 6017891.811079914 }
{ "_id" : ObjectId("5981a80f223e491a58230e5e"), "id" : 3, "name" : "erfekfsdgryu", "managerId" : 83749, "startDate" : "07.07.2007", "finishDate" : "26.12.2027", "projectStatus" : "POSTPONED", "participants" : [ 19229, 81856, 79270, 5509, 70344, 39424 ], "estimatedBudget" : 3086213.8981674756 }
{ "_id" : ObjectId("5981a80f223e491a58230e5f"), "id" : 1, "name" : "jvbzobhppntd", "managerId" : 18925, "startDate" : "29.04.1999", "finishDate" : "13.10.2008", "projectStatus" : "OPEN", "participants" : [ 46100, 96968, 6676, 56121, 4716, 68901, 43990, 48587, 62547, 30292, 65153, 17551, 27083, 20261, 27097, 50036, 86585, 69890, 18790, 22592, 60774, 93709, 78471, 27157, 4328, 36501, 47296, 16831 ], "estimatedBudget" : 3581496.7068344904 }
{ "_id" : ObjectId("5981a80f223e491a58230e60"), "id" : 4, "name" : "cdspkkqwvwld", "managerId" : 62042, "startDate" : "13.03.1998", "finishDate" : "20.06.2007", "projectStatus" : "OPEN", "participants" : [ 53480, 60897, 23677, 22064, 60807, 66637, 84609, 28378, 87143, 27675, 79283, 94992, 20429, 48769, 91671, 41747, 21651, 91134, 41684, 57228, 51949, 18756, 45679, 87781, 67287, 6902, 27526 ], "estimatedBudget" : 2126283.953787842 }
....
I need to find the busiest employee and list all his projects.
participants array contains employee ids who participate in the project.
I use the following query to find the busiest employee:
db.projects.aggregate(
{
$unwind: '$participants'
},
{
$addFields: {
count: 1
}
},
{
$group: {
_id : '$participants',
participation_count : {
'$sum':'$count'
}
}
},
{
$sort:{participation_count:-1}
},
{
$limit:1
}
)
and this work correctly. But I have no ideas how to list all his projects.
any ideas?
db.projects.aggregate(
[
{
$unwind: '$participants'
},
{
$addFields: {
count: 1
}
},
{
$group: {
_id : '$participants',
participation_count : {'$sum':'$count'},
projectId : {$push: '$id'}
}
},
{
$sort:{participation_count:-1}
},
{
$limit:1
}
],
{
allowDiskUse:true
}
)

Mongodb : get whether a document is the latest with a field value and filter on the result

I am trying to port an existing SQL schema into Mongo.
We have document tables, with sometimes several times the same document, with a different revision but the same reference. I want to get only the latest revisions of the documents.
A sample input data:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-A",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:23:18.807Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-A",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:30:35.757Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
Given this data, I want this result set (sometimes I want only the last revision, sometimes I want all revisions with an attribute telling me whether it's the latest):
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X",
"lastrev" : true
}
I already have a bunch of filters, sorting, and skip/limit (for pagination of data), so the final result set should be mindful of these constraints.
The current "find" query (built with the .Net driver), which filters fine but gives me all revisions of each document:
coll.find(
{ "$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxxxx"] } }
] },
{ "_id" : 0, "Uid" : 1, "lastrev" : 1, "title" : 1, "code" : 1, "creator" : 1, "owner" : 1, "modificator" : 1, "status" : 1, "reference": 1, "creationdate": 1 }
).sort({ "creationdate" : 1 }).skip(0).limit(10);
Using another question, I have been able to build this aggregation, which gives me the latest revision of each document, but with not enough attributes in the result:
coll.aggregate([
{ $sort: { "creationdate": 1 } },
{
$group: {
"_id": "$reference",
result: { $last: "$creationdate" },
creationdate: { $last: "$creationdate" }
}
}
]);
I would like to integrating the aggregate with the find query.
I have found the way to mix aggregation and filtering:
coll.aggregate(
[
{ $match: {
"$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxx"] } }
]
}
},
{ $sort: { "creationdate": 1 } },
{ $group: {
"_id": "$reference",
"doc": { "$last": "$$ROOT" }
}
},
{ $sort: { "doc.creationdate": 1 } },
{ $skip: skip },
{ $limit: limit }
],
{ allowDiskUse: true }
);
For each result node, this gives me a "doc" node with the document data. It has too much data still (it's missing projections), but it's a start.
Translated in .Net:
FilterDefinitionBuilder<BsonDocument> filterBuilder = Builders<BsonDocument>.Filter;
FilterDefinition<BsonDocument> filters = filterBuilder.Empty;
filters = filters & (filterBuilder.Not(filterBuilder.Exists("deletedid")) | filterBuilder.Eq("deletedid", BsonNull.Value));
filters = filters & (filterBuilder.Not(filterBuilder.Exists("taskid")) | filterBuilder.Eq("taskid", BsonNull.Value));
foreach (var f in fieldFilters) {
filters = filters & filterBuilder.In(f.Key, f.Value);
}
var sort = Builders<BsonDocument>.Sort.Ascending(orderby);
var group = new BsonDocument {
{ "_id", "$reference" },
{ "doc", new BsonDocument("$last", "$$ROOT") }
};
var aggregate = coll.Aggregate(new AggregateOptions { AllowDiskUse = true })
.Match(filters)
.Sort(sort)
.Group(group)
.Sort(sort)
.Skip(skip)
.Limit(rows);
return aggregate.ToList();
I'm pretty sure there are better ways to do this, though.
You answer is pretty close. Instead of $last, $max is better.
About $last operator:
Returns the value that results from applying an expression to the last document in a group of documents that share the same group by a field. Only meaningful when documents are in a defined order.
Get the last revision in each group, see code below in mongo shell:
db.collection.aggregate([
{
$group: {
_id: '$reference',
doc: {
$max: {
"creationdate" : "$creationdate",
"code" : "$code",
"Uid" : "$Uid",
"status" : "$status",
"title" : "$title",
"creator" : "$creator"
}
}
}
},
{
$project: {
_id: 0,
Uid: "$doc.Uid",
status: "$doc.status",
reference: "$_id",
code: "$doc.code",
title: "$doc.title",
creationdate: "$doc.creationdate",
creator: "$doc.creator"
}
}
]).pretty()
The output as your expect:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
}

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}