How to get last document of each day in MongoDB collection? - mongodb

I have a model Entry to which includes details of a hospital at a particular time. The data looks like this:
{
"_id": "5ef9c7337874820008c1a026",
"date": 1593427763640,
//... some data
"hospital": {
"_id": "5ef8d06630c364000840bb6d",
"name": "City Hospital",
//... some data
},
}
I want to get the last query of each day grouped by the hospital ID. In MySQL, it can be achieved using INNER JOIN. How can I do it using MongoDB?

Given a day, calculate start and end of a day.
This is to be used for filtering records, $match
start_of_day_ephocs=
end_of_day_ephocs=
Aggregate Query
sort by date, Group by hospital id,and select first document
db.Entry.aggregate(
[
{ "$match": { "date": {"$gte":start_of_day_ephocs,"$lte":end_of_day_ephocs }} },
{ "$sort": { "date": -1 } },
{
$group:
{
"_id": "$hospital._id",
"last_document": { "$first": "$$ROOT" }
}
}
]
)

Consider a sales collection with the following documents:
{ "_id" : 1, "item" : "abc", "date" : ISODate("2014-01-01T08:00:00Z"), "price" : 10, "quantity" : 2 }
{ "_id" : 2, "item" : "jkl", "date" : ISODate("2014-02-03T09:00:00Z"), "price" : 20, "quantity" : 1 }
{ "_id" : 3, "item" : "xyz", "date" : ISODate("2014-02-03T09:05:00Z"), "price" : 5, "quantity" : 5 }
{ "_id" : 4, "item" : "abc", "date" : ISODate("2014-02-15T08:00:00Z"), "price" : 10, "quantity" : 10 }
{ "_id" : 5, "item" : "xyz", "date" : ISODate("2014-02-15T09:05:00Z"), "price" : 5, "quantity" : 10 }
{ "_id" : 6, "item" : "xyz", "date" : ISODate("2014-02-15T12:05:10Z"), "price" : 5, "quantity" : 5 }
{ "_id" : 7, "item" : "xyz", "date" : ISODate("2014-02-15T14:12:12Z"), "price" : 5, "quantity" : 10 }
The following operation first sorts the documents by item and date, and then in the following $group stage, groups the now sorted documents by the item field and uses the $last accumulator to compute the last sales date for each item:
db.sales.aggregate(
[
{ $sort: { item: 1, date: 1 } },
{
$group:
{
_id: "$item",
lastSalesDate: { $last: "$date" }
}
}
]
)
The operation returns the following results:
{ "_id" : "xyz", "lastSalesDate" : ISODate("2014-02-15T14:12:12Z") }
{ "_id" : "jkl", "lastSalesDate" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : "abc", "lastSalesDate" : ISODate("2014-02-15T08:00:00Z") }
Resource

Related

What is $$ROOT in MongoDB aggregate and how it works?

I am watching a tutorial I can understand how this aggregate works, What is the use of pings, $$ROOT in it.
client = pymongo.MongoClient(MY_URL)
pings = client['mflix']['watching_pings']
cursor = pings.aggregate([
{
"$sample": { "size": 50000 }
},
{
"$addFields": {
"dayOfWeek": { "$dayOfWeek": "$ts" },
"hourOfDay": { "$hour": "$ts" }
}
},
{
"$group": { "_id": "$dayOfWeek", "pings": { "$push": "$$ROOT" } }
},
{
"$sort": { "_id": 1 }
}
]);
Let's assume that our collection looks like below:
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
Now based on the history field you want to group and insert the whole documents in to an array field 'items'. Here $$ROOT variable will be helpful.
So, the aggregation query to achieve the above will be:
db.collection.aggregate([{
$group: {
_id: '$history',
items: {$push: '$$ROOT'}
}
}])
It will result in following output:
{
"_id" : ISODate("2020-05-12T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-13T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-16T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
]
}
I hope it helps.

MongoDB: Latest record for Each category by Timestamp

I'm new to the MongoDb world. I have the following scenario:
A sales collection with following data:
db.sales.insert({ "_id" : 1, "item" : "abc", "date" : ISODate("2014-01-01T08:00:00.0Z"), "price" : 10, "quantity" : 20 })
db.sales.insert({ "_id" : 2, "item" : "jkl", "date" : ISODate("2014-02-03T09:00:00.0Z"), "price" : 20, "quantity" : 1 })
db.sales.insert({ "_id" : 3, "item" : "xyz", "date" : ISODate("2014-02-03T09:05:00.0Z"), "price" : 5, "quantity" : 5 })
db.sales.insert({ "_id" : 4, "item" : "abc", "date" : ISODate("2014-02-15T08:00:00.0Z"), "price" : 10, "quantity" : 10 })
db.sales.insert({ "_id" : 5, "item" : "xyz", "date" : ISODate("2014-02-15T09:05:00.0Z"), "price" : 5, "quantity" : 10 })
db.sales.insert({ "_id" : 6, "item" : "xyz", "date" : ISODate("2014-02-15T12:05:10.0Z"), "price" : 5, "quantity" : 5 })
db.sales.insert({ "_id" : 7, "item" : "xyz", "date" : ISODate("2014-02-15T14:12:12.0Z"), "price" : 5, "quantity" : 10 })
db.sales.insert({ "_id" : 8, "item" : "abc", "date" : ISODate("2014-02-10T08:00:00.0Z"), "price" : 10, "quantity" : 30 })
I want to select most recent sale (Whole Document) for each item.
Expected Result:
[
{ "_id" : 2, "item" : "jkl", "date" : "2014-02-03T09:00:00Z", "price" : 20, "quantity" : 1 },
{ "_id" : 4, "item" : "abc", "date" : "2014-02-15T08:00:00Z", "price" : 10, "quantity" : 10 },
{ "_id" : 7, "item" : "xyz", "date" : "2014-02-15T14:12:12Z", "price" : 5, "quantity" : 10 }
]
I've looked up for this but could't find a solution that can retrieve the most recent whole document for each category.
I've tried the following but I only get _id and lastSalesDate.
db.sales.aggregate(
[
{ $sort: { item: 1, date: 1 } },
{
$group:
{
_id: "$item",
lastSalesDate: { $last: "$date" }
}
}
]
)
How can I retrieve all the fields?
I'm using MongoDB driver and scala application where I need to retrieve the expected result. However, any solution in MongoDB query format is also welcome.
Thank You!
Try this Query,
db.sales.aggregate([
// Sort by date (Use -1 to sort in descending order to get most recent date)
{ $sort: { date: -1 } },
// Group by Item and Store First Sorted Whole Document in itemObj
{ $group: { _id: "$item", itemObj : { $first: "$$ROOT" }} },
// Replace the Document with itemObj Object
{ $replaceRoot: { newRoot: "$itemObj"} }
])

How can I extract whole documents based on how they compare with their whole collection?

I'm trying to extract the latest available daily measurements from a "sparse" collection that might not have a measurement for every day. I'm interested in getting the whole original document as output. The collection contains several series of measurements identified by a unique id.
For example, given the following collection:
{ "date" : "2019-04-10", "id" : 1, "measurement" : 50 }
{ "date" : "2019-04-10", "id" : 2, "measurement" : 1 }
{ "date" : "2019-04-10", "id" : 3, "measurement" : 33 }
{ "date" : "2019-04-11", "id" : 1, "measurement" : 52 }
{ "date" : "2019-04-11", "id" : 3, "measurement" : 3 }
{ "date" : "2019-04-12", "id" : 1, "measurement" : 55 }
{ "date" : "2019-04-12", "id" : 2, "measurement" : 12 }
The above collection contains measurements for 3 ids. I'd like to retrieve the latest measurements for each id.
For example, the above collection should yield the following result:
{ "date" : "2019-04-12", "id" : 1, "measurement" : 55 }
{ "date" : "2019-04-12", "id" : 2, "measurement" : 12 }
{ "date" : "2019-04-11", "id" : 3, "measurement" : 3 }
So far, I'm able to extract the latest date for every ids with this:
db.control_subs.aggregate([ { $group : { _id : "$id", "last_date" : { $max : "$date" } } }, { $sort:{ "_id": 1 }} ])
But this, unfortunately, strips the actual measurement field from the output.
How could I obtain the desired output with a single MongoDB query?
You can try below aggregation query with $$ROOT operator:
db.control_subs.aggregate([
{
"$project":
{
"id": "$id",
"date": "$date",
"document": "$$ROOT" // save all fields for future usage
}
},
{
"$sort":
{ "date": -1
}
},
{
"$group":
{
"_id":{"id":"$id"},
"original_doc":{"$first":"$document"}
}
},
{
$project:
{
"original_doc.date":1, "original_doc.id":1, "original_doc.measurement":1, _id:0}
}
])
Output of above aggregation is
{ "original_doc" : { "date" : "2019-04-11", "id" : 3, "measurement" : 3 } }
{ "original_doc" : { "date" : "2019-04-12", "id" : 2, "measurement" : 12 } }
{ "original_doc" : { "date" : "2019-04-12", "id" : 1, "measurement" : 55 } }
Even you can also replace the original_doc with the help of $replaceRoot

MongoDB: Sort in combination with Aggregation group

I have a collection called transaction with below documents,
/* 0 */
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e67267",
"status" : "A",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
}
/* 1 */
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "B",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
}
/* 2 */
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "C",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
/* 3 */
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "D",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
When I run the below Aggregation query without $group,
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
}
])
I get the result in expected sorting order. i.e Sort createdTs in descending order (Minimal result)
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
],
"ok" : 1
}
If I apply the below aggregation with $group, the resultant is inversely sorted(i.e Ascending sort)
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
},
{
$group: {
"_id": {
"statusId": "$statusId",
"relatedWith": "$relatedWith",
"status": "$status"
},
"status": {$first: "$status"},
"statusId": {$first: "$statusId"},
"relatedWith": {$first: "$relatedWith"},
"createdTs": {$first: "$createdTs"}
}
}
]);
I get the result in inverse Order i.e. ** Sort createdTs in Ascending order**
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
],
"ok" : 1
}
Where am I wrong ?
The $group stage doesn't insure the ordering of the results. See here the first paragraph.
If you want the results to be sorted after a $group, you need to add a $sort after the $group stage.
In your case, you should move the $sort after the $group and before you ask the question : No, the $sort won't be able to use an index after the $group like it does before the $group :-).
The internal algorithm of $group seems to keep some sort of ordering (reversed apparently), but I would not count on that and add a $sort.
You are not doing anything wrong here, Its a $group behavior in Mongodb
Lets have a look in this example
Suppose you have following doc in collection
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:05:00Z") }
{ "_id" : 6, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-15T12:05:10Z") }
{ "_id" : 7, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T14:12:12Z") }
Now if you run this
db.collection.aggregate([{ $sort: { item: 1,date:1}} ] )
the output will be in ascending order of item and date.
Now if you add group stage in aggregation pipeline it will reverse the order.
db.collection.aggregate([{ $sort: { item: 1,date:1}},{$group:{_id:"$item"}} ] )
Output will be
{ "_id" : "xyz" }
{ "_id" : "jkl" }
{ "_id" : "abc" }
Now the solution for your problem
change "createdTs": -1 to "createdTs": 1 for group

How to ensure grouping via two separate criteria

Expanded from How to average the summed up values in mongodb?
Using MongoDB 2.4.8,
I have the following records
{
"category" : "TOYS",
"price" : 12,
"status" : "online",
"_id" : "35043"
}
{
"category" : "TOYS",
"price" : 13,
"status" : "offline",
"_id" : "35044"
}
{
"category" : "TOYS",
"price" : 22,
"status" : "online",
"_id" : "35045"
}
{
"category" : "BOOKS",
"price" : 13,
"status" : "offline",
"_id" : "35046"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "online",
"_id" : "35047"
}
{
"category" : "TOYS",
"price" : 19,
"status" : "unavailable",
"_id" : "35048"
}
{
"category" : "BOOKS",
"price" : 10,
"status" : "unavailable",
"_id" : "35049"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "unavailable",
"_id" : "35050"
}
I want to find the average price of all categories whose status is online OR offline and total price within a category is more than 50.
Toys offline and Toys online are considered two separate categories.
I adapted the answer given.
db.items.aggregate([
{$match:
{
$or: [
{status:"online"},
{status:"offline"}
]
}
},
{$group :
{
_id: "$category",
total_price: {$sum:"$price"},
}
},
{$match:
{
total_price:{$gt:50}
}
},
{$group :
{
_id: "1",
avg_price: {$avg:"$total_price"},
}
},
]);
But I believe this query I adapted grouped categories of the same name together which is not what I am looking for.
If online and offline are the only values for status, you can remove the initial $match step. If it is needed, it would be more appropriate to use the $in operator as these values could be found in the same index (if one existed).
I think the only step you are missing is that you can $group by multiple fields (i.e. category and status):
db.items.aggregate(
// If 'online' and 'offline' are the only possible status values, this may be unnecessary
{ $match: {
'status' : { $in: [ 'online', 'offline' ] }
}},
// Group by category & status
{ $group: {
_id: { category: "$category", status: "$status" },
total_price: { $sum: "$price" },
}},
// Only find groups where total_price is > 50
{ $match: {
total_price: { $gt:50 }
}},
// Find the average price for the group
{ $group : {
_id: null,
avg_price: {$avg:"$total_price"},
}}
)