Get documents given a range (up and down, not between) - mongodb

Is it possible to get the documents that, given a date, obtain the closest to that date both above and below, I mean, greater and lower from that date.
Current code:
db.collection.aggregate({
$match: {
$or: [
{
"timestamp": {
$gte: new Date("2021-05-27T14:40:46Z")
}
},
{
"timestamp": {
$lt: new Date("2021-05-27T14:40:46Z")
}
}
]
}
},
{
$limit: 5
})
Playground

I don't think is there any way to do this straightway, temporary you can try the below query if it's really required,
$facet to separate the result for old date and new date as per conditions
old,
$match to check $lt condition
$sort by timestamp in descending order
$limit 2 documents
$sort by timestamp in ascending order
new,
$match to check $gte condition
$sort by timestamp in ascending order
$limit 3 documents
$project, $concatArrays to concat both arrays in single
The below process is optional if you want to format exactly what you need then use,
$unwind to deconstruct the above array
$replaceRoot to replace docs object to root
db.collection.aggregate([
{
$facet: {
old: [
{
$match: {
timestamp: {
$lt: new Date("2021-05-27T14:40:46Z")
}
}
},
{ $sort: { timestamp: -1 } },
{ $limit: 2 },
{ $sort: { timestamp: 1 } }
],
new: [
{
$match: {
timestamp: {
$gte: new Date("2021-05-27T14:40:46Z")
}
}
},
{ $sort: { timestamp: 1 } },
{ $limit: 3 }
]
}
},
{ $project: { docs: { $concatArrays: ["$old", "$new"] } } },
{ $unwind: "$docs" },
{ $replaceRoot: { newRoot: "$docs" } }
])
Playground

Related

How to remove duplicate objects with different parameters in an aggregation in mongo db

[
{ id:1,month:5,year:2020,text:"Completed" },
{ id:2,month:2,year:2021,text:"Pending" },
{ id:3,month:3,year:2020,text:"Completed" },
{ id:4,month:5,year:2020,text:"Pending" },
{ id:5,month:4,year:2022,text:"Pending" },
]
These are the documents in my collection. I need to remove remove the duplicate objects with same year & month using aggregation in mongo db. so that i get
[
{ id:1,month:5,year:2020,text:"Completed" },
{ id:2,month:2,year:2021,text:"Pending" },
{ id:3,month:3,year:2020,text:"Completed" },
{ id:5,month:4,year:2022,text:"Pending" },
]
Maybe something like this:
db.collection.aggregate([
{
$group: {
_id: {
month: "$month",
year: "$year"
},
cnt: {
$sum: 1
},
doc: {
$push: "$$ROOT"
}
}
},
{
$match: {
cnt: {
$gt: 1
}
}
},
{
$project: {
docsTodelete: {
$slice: [
"$doc",
1,
{
"$size": "$doc"
}
]
}
}
},
{
$unwind: "$docsTodelete"
}
]).forEach(function(doc){
db.backup.save(doc.docsTodelete);
db.collection.remove(_id:doc.docsToDelete._id)
})
explained:
Group the documents by month-year and push the originals to array doc
Match only the documents that have duplicates
Slice the documents array to leave 1x document in the collection
Unwind the array with documents to be removed
Do forEach loop to remove the duplicated documents from the collection and store the removed in backup collection just in case you have doubts later.

How to find duplicate records based on an id and a datetime field in MongoDB?

I have a MongoDB collection with millions of record. Sample records are shown below:
[
{
_id: ObjectId("609977b0e8e1c615cb551bf5"),
activityId: "123456789",
updateDateTime: "2021-03-24T20:12:02Z"
},
{
_id: ObjectId("739177b0e8e1c615cb551bf5"),
activityId: "123456789",
updateDateTime: "2021-03-24T20:15:02Z"
},
{
_id: ObjectId("805577b0e8e1c615cb551bf5"),
activityId: "123456789",
updateDateTime: "2021-03-24T20:18:02Z"
}
]
Multiple records could have the same activityId, in this case i want just the record that has the largest updateDateTime.
I have tried doing this and it works fine on a smaller collection but times out on a large collection.
[
{
$lookup: {
from: "MY_TABLE",
let: {
existing_date: "$updateDateTime",
existing_sensorActivityId: "$activityId"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{ $eq: ["$activityId", "$$existing_sensorActivityId"] },
{ $gt: ["$updateDateTime", "$$existing_date"] }
]
}
}
}
],
as: "matched_records"
}
},
{ $match: { "matched_records.0": { $exists: true } } },
{ $project: { _id: 1 } }
]
This gives me _ids for all the records which have the same activity id but smaller updateDateTime.
The slowness occurs at this step -> "matched_records.0": {$exists:true}
Is there a way to speed up this step or are there any other approach to this problem?
You can find unique documents and write result in new collection using $out instead of finding duplicate documents and deleting them,
How to find unique documents?
$sort by updateDateTime in descending order
$group by activityId and get first root record
$replaceRoot to replace record in root
$out to write query result in new collection
[
{ $sort: { updateDateTime: -1 } },
{
$group: {
_id: "$activityId",
record: { $first: "$$ROOT" }
}
},
{ $replaceRoot: { newRoot: "$record" } },
{ $out: "newCollectionName" } // set new collection name
]
Playground

In MongoDB, how to ignore documents that was inserted less than five minutes of time from the time of fetching

testResults.aggregate([
{ $match: { testId: { "$exists": true } } },
{ $sort: { _id: -1 } },
{
$group: {
_id: "$testId",
testDetails: {
$push: {
result: "$result",
testName: "$testName"
}
}
}
},
{
$addFields: {
testDetails: { $slice: ["$testDetails", 30] }
}
}
])
.exec(function (err, testResults) {
if (err) res.send(err);
res.json(testResults);
});
Using this aggregate method, I am fetching recent 30 documents.
Now, I need to ignore documents that was inserted less than five minutes of time from the time of fetching.
In, $match aggregate how to achieve this or is there any other way?
A $match stage like this should do:
{
$match: {
$expr: {
$lt: [
{$toDate: '$_id'},
{$subtract: [new Date(), 300000]}
]
}
}
}
The 300000 is 5 minutes in milliseconds.

Aggregate and $or operator in MongoDB

This is my mongodb query:
Booking.aggregate([
{ $match:
{ $and: [
{ $or: [
{ isDoubleRoom },
{ chosenRoom }
]},
{ month },
{ year },
] }},
{ $group: { _id: "$fullDate", count: { $sum: 1 } } }
]
In a first stage I would like to filter out by month, year and conditionally: if isDoubleRoom then filter only by double rooms, if it is not then filter by chosenRoom property. The thing is that $or does not switch between filters. Query returns not filtered (by chosen isDoubleRoom $or chosenRoom) results. The same worked when I used it with find instead of aggregate. But here I need aggregate in order to count filtered results.
You should use $cond
{
$match: {
$and: [
{
$cond: [{isDoubleRoomBool}, {isDoubleRoom} , {chosenRoom}] //
},
{ month },
{ year },
]
}
}

How to know that aggregated group has previous/next values?

Suppose I have the following aggregation pipeline:
db.getCollection('posts').aggregate([
{ $match: { _id: { $gt: "some id" }, tag: 'some tag' } },
{ $limit: 5 },
{ $group: { _id: null, hasNextPage: {??}, hasPreviousPage: {??} } }
])
As a result $match and $limit stages would result in a subset of all the posts with a tag some tag. How can I know that there're posts before and after my subSet?
One of the possible ways, I guess, is to have expression (with $let) inside hasPreviousPage and hasNextPage that would search for one post with _id less than "some id" and greater than $last: "$_id"respectively. But I'm not sure how I can reference my group as a variable in $let. Also, maybe there're some other more effective ways.
You can use below aggregation:
db.posts.aggregate([
{ $match: { tag: 'some tag' } },
{ $sort: { _id: 1 } },
{
$facet: {
data: [
{ $match: { _id: { $gt: 'some id' } } },
{ $limit: 5 }
],
hasPreviousPage: [
{ $match: { _id: { $lte: 'some id' } } },
{ $count: "totalPrev" }
],
hasNextPage: [
{ $match: { _id: { $gt: 'some id' } } },
{ $skip: 5 },
{ $limit: 1 }, // just to check if there's any element
{ $count: "totalNext" }
]
}
},
{
$unwind: { path: "$hasPreviousPage", preserveNullAndEmptyArrays: true }
},
{
$unwind: { path: "$hasNextPage", preserveNullAndEmptyArrays: true }
},
{
$project: {
data: 1,
hasPreviousPage: { $gt: [ "$hasPreviousPage.totalPrev", 0 ] },
hasNextPage: { $gt: [ "$hasNextPage.totalNext", 0 ] }
}
}
])
To apply any paging you have to $sort your collection to get results in deterministic order. On a set that's sorted and filtered by tag you can run $facet which allows you to apply multiple subaggregations. Pipelines that are representing previous and nextPage can be ended with $count. Every subaggregation in $facet will return an array so we can run $unwind to get nested document instead of array for hasPreviousPage and hasNextPage. Option preserveNullAndEmptyArrays is required here cause otherwise MongoDB will remove whole document from aggregation pipeline if there are no prev / next documents. In the last step we can just convert subaggregations to boolean values.