MongoDB query to get last record of each id - mongodb

I want to get last record of each sender.id based on createdAt using mongodb query.
Sample json:
{
"code" : "34242342",
"name" : "name1",
"amount" : 200,
"sender" : {
"id" : "fsrfsr3242",
"name" : "name2",
"phone" : "12345678",
"category": "cat1"
},
"receiver" : {
"id" : "42342rewr",
"name" : "naem3",
"phone" : "5653679755"
},
"message" : "",
"status" : "done",
"createdAt" : ISODate("2019-09-27T09:17:32.597Z")
}
Query i tried:
[{
$match: {
'sender.category': "cat1"
}
}, {
$group: {
_id: "$sender.id",
lastrecord: {
$last: "$createdAt"
}
}
}]
I want to return entire json as above with only last record of each sender.id. Above query is only giving me only last date , How do i return entire json using aggregation pipeline?
I am using groupby because each sender.id can have multiple records of which i only want to retrieve the last one.

You can use $$ROOT variable to get the whole last document
[
{ "$match": { "sender.category": "cat1" }},
{ "$sort": { "$createdAt": 1 }},
{ "$group": {
"_id": "$sender.id",
"lastrecord": {
"$last": "$$ROOT"
}
}}
]

Related

MongoDB - sum specific array element under conditions exclude duplicate

I have a bunch of docs that look like below:
{
"_id" : ObjectId("8f30b453c2ece001364dc04d"),
"SessionId" : "awkuTQjj53kgqAZ4J",
"StartDate" : ISODate("2020-02-24T11:51:36.918+0000"),
"EndDate" : ISODate("2020-02-24T11:51:36.918+0000"),
"List1" : "X",
"List2" : "Y",
"rating" : [
{
"ObjectId" : "5d09e98380c5d5eb89ac5069",
"List" : "List 2",
"Rate" : NumberInt(5),
"RatedDate" : ISODate("2020-02-24T11:55:47.774+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5069",
"List" : "List 2",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:48.408+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac505b",
"List" : "List 2",
"Rate" : NumberInt(3),
"RatedDate" : ISODate("2020-02-24T11:55:49.520+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac505c",
"List" : "List 2",
"Rate" : NumberInt(3),
"RatedDate" : ISODate("2020-02-24T11:55:51.787+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5057",
"List" : "List 1",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:53.865+0000")
},
{
"ObjectId" : "5d09e98380c5d5eb89ac5058",
"List" : "List 1",
"Rate" : NumberInt(4),
"RatedDate" : ISODate("2020-02-24T11:55:53.865+0000")
},
],
"Answers" : {
"SelectedList" : "1",
},
}
I need to sum up all the rating.Rate where rating.List:'List 1' and respectively sum up all rating.Rate where rating.List:'List 2', also exclude duplicate records (by rating.ObjectId) and count only the ones with latest rating.RatedDate. I suppose this is a group aggregation.
Also they should match the criteria
List1:'X' ,
Answers.selectedList:1
What I have written looks like below so far:
[
{
"$match" : {
"List1" : "X",
"Answers.SelectedList" : "1"
}
},
{
"$unwind" : {
"path" : "$rating"
}
},
{
"$group" : {
"_id" : null,
"sum" : {
"$sum" : "$Rate"
}
}
}
]
can you please help me?
I was a little confused around the List1/List2 however I think this will get you most of the way to your required aggregation query.
db.test.aggregate([
{
$match: {
"List1": "X",
"Answers.SelectedList": "1"
}
},
{
"$unwind" : "$rating"
},
{
$group:{
_id: {
id: "$rating.ObjectId",
list: "$rating.List"
},
maxRatedDate: { $max: "$rating.RatedDate" },
ratings: { $push: "$rating" }
}
},{
$addFields: {
ratings: {
$filter: {
input: "$ratings",
as: "item",
cond: { $eq: [ "$$item.RatedDate", "$maxRatedDate" ] }
}
}
}
},
{
$unwind: "$ratings"
},
{
$group:{
_id: "$ratings.List",
sum : {
$sum : "$ratings.Rate"
}
}
}
])
This will output the following
{ "_id" : "List 1", "sum" : 8 }
{ "_id" : "List 2", "sum" : 10 }
However, let's try to break it down.
To start with we've got a simple match, the same as yours in your question. this just limits the number of documents we pass back
$match: {
"List1": "X",
"Answers.SelectedList": "1"
}
Then we unwind all the array items so we get a document for each rating, this allows us to do some extra querying on the data.
{
"$unwind" : "$rating"
}
Next, we've got a group by, here we're a group on the ObjectId of the rating so we can later remove duplicates, we're also finding out in the group which rating we've group has the highest date so we can take that one later in a projection. we're then pushing all the rating back in the array for later.
$group:{
_id: {
id: "$rating.ObjectId",
list: "$rating.List"
},
maxRatedDate: { $max: "$rating.RatedDate" },
ratings: { $push: "$rating" }
}
Next we want to project the ratings array in to a single element in which it only contains the latest rating, for this we use a $filter on the array and filter them all out that don't match our max date we calculated in our previous step.
$addFields: {
ratings: {
$filter: {
input: "$ratings",
as: "item",
cond: { $eq: [ "$$item.RatedDate", "$maxRatedDate" ] }
}
}
}
The next two steps are fairly simple and are just unwinding the array again (we've only got one element, then grouping them to get the total sum for the lists.
{
$unwind: "$ratings"
},
{
$group:{
_id: "$ratings.List",
sum : {
$sum : "$ratings.Rate"
}
}
}
At this point you only need to provide the $group stage with the field that you're actually grouping on as the _id field and reference the fields properly as they are still inside of the rating array:
"$group" : {
"_id" : "$rating.List",
"sum" : {
"$sum" : "$rating.Rate"
}
}

Mongodb aggregate by day and delete duplicate value

I'm trying to clean a huge database.
Sample DB :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:22:31.254Z"),
"_id" : ObjectId("5a0062170f3c330012bafe77"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-06T13:32:41.084Z"),
"_id" : ObjectId("5a0064790f3c330012baff03"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff32"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
I have a lot of duplicate value but I need to clean only by day.
To obtain this for example :
{
"_id" : ObjectId("59fc5249d5ab401d99f3de7f"),
"addedAt" : ISODate("2017-11-03T11:26:01.744Z"),
"__v" : 0,
"check" : 17602,
"lastCheck" : ISODate("2018-04-05T11:47:00.609Z"),
"tracking" : [
{
"timeCheck" : ISODate("2017-11-06T13:17:20.861Z"),
"_id" : ObjectId("5a0060e00f3c330012bafe39"),
"rank" : 2395,
},
{
"timeCheck" : ISODate("2017-11-06T13:27:40.551Z"),
"_id" : ObjectId("5a00634c0f3c330012bafebe"),
"rank" : 2379,
},
{
"timeCheck" : ISODate("2017-11-07T13:37:51.012Z"),
"_id" : ObjectId("5a0065af0f3c330012baff34"),
"rank" : 2379,
}]
}
How can I aggregate by day and after delete last value duplicate?
I need to keep the values per day even if they are identical with another day.
The aggregation framework cannot update data at this stage. However, you can use the following aggregation pipeline in order to get the desired output and then use e.g. a bulk replace to update all your documents:
db.collection.aggregate({
$unwind: "$tracking" // flatten the "tracking" array into separate documents
}, {
$sort: {
"tracking.timeCheck": 1 // sort by timeCheck to allow us to use the $first operator in the next stage reliably
}
}, {
$group: {
_id: { // group by
"_id": "$_id", // "_id" and
"rank": "$tracking.rank", // "rank" and
"date": { // the "date" part of the "timeCheck" field
$dateFromParts : {
year: { $year: "$tracking.timeCheck" },
month: { $month: "$tracking.timeCheck" },
day: { $dayOfWeek: "$tracking.timeCheck" }
}
}
},
"doc": { $first: "$$ROOT" } // only keep the first document per group
}
}, {
$sort: {
"doc.tracking.timeCheck": 1 // restore ascending sort order - may or may not be needed...
}
}, {
$group: {
_id: "$_id._id", // merge everything again per "_id"
"addedAt": { $first: "$doc.addedAt" },
"__v": { $first: "$doc.__v" },
"check": { $first: "$doc.check" },
"lastCheck": { $first: "$doc.lastCheck" },
"tracking": { $push: "$doc.tracking" } // in order to join the tracking values into an array again
}
})

Multiple Nested Group Within Array

I'm having group of elements in MongoDB as given below:
/* 1 */
{
"_id" : ObjectId("58736c7f7d43c305461cdb9b"),
"Name" : "Kevin",
"pb_event" : [
{
"event_type" : "Birthday",
"event_date" : "2014-08-31"
},
{
"event_type" : "Anniversary",
"event_date" : "2014-08-31"
}
]
}
/* 2 */
{
"_id" : ObjectId("58736cfc7d43c305461cdba8"),
"Name" : "Peter",
"pb_event" : [
{
"event_type" : "Birthday",
"event_date" : "2014-08-31"
},
{
"event_type" : "Anniversary",
"event_date" : "2015-03-24"
}
]
}
/* 3 */
{
"_id" : ObjectId("58736cfc7d43c305461cdba9"),
"Name" : "Pole",
"pb_event" : [
{
"event_type" : "Birthday",
"event_date" : "2015-03-24"
},
{
"event_type" : "Work Anniversary",
"event_date" : "2015-03-24"
}
]
}
Now I want the result that has group on event_date then after group on event_type. event_type contain all names of the related user, then count of records in the respective array.
Expected Output
/* 1 */
{
"event_date" : "2014-08-31",
"data" : [
{
"event_type" : "Birthday",
"details" : [
{
"_id" : ObjectId("58736c7f7d43c305461cdb9b"),
"name" : "Kevin"
},
{
"_id" : ObjectId("58736cfc7d43c305461cdba8"),
"name" : "Peter"
}
],
"count" : 2
},
{
"event_type" : "Anniversary",
"details" : [
{
"_id" : ObjectId("58736c7f7d43c305461cdb9b"),
"name" : "Kevin"
}
],
"count" : 1
}
]
}
/* 2 */
{
"event_date" : "2015-03-24",
"data" : [
{
"event_type" : "Anniversary",
"details" : [
{
"_id" : ObjectId("58736cfc7d43c305461cdba8"),
"name" : "Peter"
}
],
"count" : 1
},
{
"event_type" : "Birthday",
"details" : [
{
"_id" : ObjectId("58736cfc7d43c305461cdba9"),
"name" : "Pole"
}
],
"count" : 1
},
{
"event_type" : "Work Anniversary",
"details" : [
{
"_id" : ObjectId("58736cfc7d43c305461cdba9"),
"name" : "Pole"
}
],
"count" : 1
}
]
}
Using the aggregation framework, you would need to run a pipeline that has the following stages so that you get the desired result:
db.collection.aggregate([
{ "$unwind": "$pb_event" },
{
"$group": {
"_id": {
"event_date": "$pb_event.event_date",
"event_type": "$pb_event.event_type"
},
"details": {
"$push": {
"_id": "$_id",
"name": "$Name"
}
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.event_date",
"data": {
"$push": {
"event_type": "$_id.event_type",
"details": "$details",
"count": "$count"
}
}
}
},
{
"$project": {
"_id": 0,
"event_date": "$_id",
"data": 1
}
}
])
In the above pipeline, the first step is the $unwind operator
{ "$unwind": "$pb_event" }
which comes in quite handy when the data is stored as an array. When the unwind operator is applied on a list data field, it will generate a new record for each and every element of the list data field on which unwind is applied. It basically flattens the data.
This is a necessary operation for the next pipeline stage, the $group step where you group the flattened documents by the deconstructed pb_event array fields event_date and event_type:
{
"$group": {
"_id": {
"event_date": "$pb_event.event_date",
"event_type": "$pb_event.event_type"
},
"details": {
"$push": {
"_id": "$_id",
"name": "$Name"
}
},
"count": { "$sum": 1 }
}
},
The $group pipeline operator is similar to the SQL's GROUP BY clause. In SQL, you can't use GROUP BY unless you use any of the aggregation functions. The same way, you have to use an aggregation function in MongoDB (called an accumulator operator) as well. You can read more about the aggregation functions here.
In this $group operation, the logic to calculate the count aggregate i.e. the total number of documents in the group using the $sum accumulator operator. Within the same pipeline, you can aggregate a list of the name and _id subdocuments by using the $push operator which returns an array of expression values for each group.
The preceding $group pipeline
{
"$group": {
"_id": "$_id.event_date",
"data": {
"$push": {
"event_type": "$_id.event_type",
"details": "$details",
"count": "$count"
}
}
}
}
will further aggregate the results from the last pipeline by grouping on the event_date, which forms basis of the desired output by creating a new data list using $push and then the final $project pipeline stage
{
"$project": {
"_id": 0,
"event_date": "$_id",
"data": 1
}
}
reshapes the documents fields by renaming the _id field to event_date and retaining the other field.

MongoDb aggregation framework value of a field where max another field

I have a collection that has records looking like this:
"_id" : ObjectId("550424ef2f44472856286d56"), "accountId" : "123",
"contactOperations" :
[
{ "contactId" : "1", "operation" : 1, "date" : 500 },
{ "contactId" : "1", "operation" : 2, "date" : 501 },
{ "contactId" : "2", "operation" : 1, "date" : 502 }
]
}
I want to know the latest operation number that has been applied on a certain contact.
I'm using the aggregation framework to first unwind the contactOperations and then grouping by accountId and contactOperations.contactId and max contactOperations.date.
aggregate([{$unwind : "$contactOperations"}, {$group : {"_id":{"accountId":"$accountId", "contactId":"$contactOperations.contactId"}, "date":{$max:"$contactOperations.date"} }}])
The result I get is:
"_id" : { "accountId" : "123", "contactId" : "2" }, "time" : 502 }
"_id" : { "accountId" : "123", "contactId" : "1" }, "time" : 501 }
Which seems correct so far, but I also need the contactOperations.operation field that was recorded with $max date. How can I select that?
You have to sort the unwind values then apply $last operator to get operation for max date. Hope this query will solve your problem.
aggregate([
{
$unwind: "$contactOperations"
},
{
$sort: {
"date": 1
}
},
{
$group: {
"_id": {
"accountId": "$accountId",
"contactId": "$contactOperations.contactId"
},
"date": {
$max: "$contactOperations.date"
},
"operationId": {
$last: "$contactOperations.operation"
}
}
}
])

add where condition in aggregate and group function in mongodb

I have mongo model lets say MYLIST containing data like:-
{
"_id" : ObjectId("542139f31284ad1461dbc15f"),
"Category" : "CENTER",
"Name" : "STAND",
"Url" : "center/stand",
"Img" : [ {
"url" : "www.google.com/images",
"main" : "1",
"home" : "1",
"id" : "34faf230-43cf-11e4-8743-311ea2261289"
},
{
"url" : "www.google.com/images1",
"main" : "1",
"home" : "0",
"id" : "34faf230-43cf-11e4-8743-311e66441289"
} ]
}
I execute the following query to the MYLIST collection:
db.MYLIST.aggregate([
{ "$group": {
"_id": "$Category",
"Name": { "$addToSet": {
"name": "$Name",
"url": "$Url",
"img": "$Img"
}}
}},
{ "$sort": { "_id" : 1 } }
]);
And I got the following result -
[
{ _id: 'CENTER',
Name:
[ { "name" : "Stand",
"url" : "center/stand",
"img": { "url" : "www.google.com/images" , "main" : "1", "home" : "1", "id" : "350356a0-43cf-11e4-8743-311ea2261289" }
}]
},
{ _id: 'CENTER',
Name:
[ { "name" : "Stand",
"url" : "center/stand",
"img": { "url" : "www.google.com/images1" , "main" : "1", "home" : "0", "id" : "34faf230-43cf-11e4-8743-311ea2261289" }
}]
}
]
As you can see my img key itself is an array of objects, Hence I am getting multiple entries for the same category of each entry in img array.
What I actually need is to get only those images that have some value for home key.
expected result:-
[
{ _id: 'CENTER',
Name:
[ { "name" : "Stand",
"url" : "center/stand",
"img": { "url" : "www.google.com/images" , "main" : "1", "home" : "1", "id" : "350356a0-43cf-11e4-8743-311ea2261289" }
}]
},
]
Hence I would like to add where the condition for img.home > 0 on the above-mentioned query, Could anybody help me to resolve this issue as my relatively new to MongoDB.
Still really not sure if this is what you want or even why you would be using $addToSet on this grouping. But if all you want to do is "filter" the content of the array returned in your result, then what you want to do is $match the array elements to your condition after processing an $unwind pipeline in order to "de-normalize" the content:
db.MYLIST.aggregate([
// If you only want those matching array members it makes sense to match the
// documents that contain them first
{ "$match": { "Img.home": 1 } },
// Unwind to de-normalize or "un-join" the documents
{ "$unwind": "$Img" },
// Match again to "filter" out those elements that do not match
{ "$match": { "Img.home": 1 } },
// Then do your grouping
{ "$group": {
"_id": "$Category",
"Name": {
"$addToSet": {
"name": "$Name",
"url": "$Url",
"img": "$Img"
}
}
}},
// Finally sort
{ "$sort": { "_id" : 1 } }
]);
So the $match pipeline is the equivalent of a general query or "where clause" in SQL terms, and can be used at any stage. It is usually best to have this as a first stage when there is some type of filtering that results from this. It reduces the overall load by reducing documents to be processed even if "all" of the end results are not removed as would be the case of working with an array.
The $unwind stage allows the array elements to be processed just like another document. And of course you can just use another $match pipeline stage after this in order to just match the documents to your query condition.