MongoDB compare endTime with startTime of next document - mongodb

I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks

First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground

You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.

Related

MongoDB - Lookup match with condition array of object with string

I have two collections "datasets" and "users".
I tried to lookup datasets.assignedTo = users.id that's working fine. Also, I want to match the field of datasets.firstBillable >= users.prices.beginDate date field are matched to get the current index price value. And also check users.prices.endDate is less than or equal to users.prices.beginDate.
For example:
cgPrices: 45
https://mongoplayground.net/p/YQps9EozlAL
Collections:
db={
users: [
{
id: 1,
name: "Aravinth",
prices: [
{
beginDate: "2022-08-24T07:29:01.639Z",
endDate: "2022-08-31T07:29:01.639Z",
price: 45
}
]
},
{
id: 2,
name: "Raja",
prices: [
{
beginDate: "2022-07-25T07:29:01.639Z",
endDate: "2022-07-30T07:29:01.639Z",
price: 55
}
]
}
],
datasets: [
{
color: "braun, rose gold",
firstBillable: "2022-08-24T07:29:01.639Z",
assignedTo: 1
},
{
color: "beige, silber",
firstBillable: "2022-07-25T07:29:01.639Z",
assignedTo: 2
}
]
}
My current implementation:
db.datasets.aggregate([
{
"$lookup": {
"from": "users",
"as": "details",
let: {
assigned_to: "$assignedTo",
first_billable: "$firstBillable"
},
pipeline: [
{
"$match": {
$expr: {
"$and": [
{
"$eq": [
"$id",
"$$assigned_to"
]
},
{
"$gte": [
"$first_billable",
"$details.prices.beginDate"
]
},
{
"$lte": [
"$first_billable",
"$details.prices.endDate"
]
}
]
}
}
}
]
}
},
{
"$addFields": {
"details": 0,
"cg": {
$first: {
"$first": "$details.prices.price"
}
}
}
}
])
Output i needed:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"assignedTo": 1,
"cg": 45,
"color": "braun, rose gold",
"details": 0,
"firstBillable": "2022-08-24T07:29:01.639Z"
},
{
"_id": ObjectId("5a934e000102030405000001"),
"assignedTo": 2,
"cg": 55,
"color": "beige, silber",
"details": 0,
"firstBillable": "2022-07-25T07:29:01.639Z"
}
]
https://mongoplayground.net/p/YQps9EozlAL
Concerns:
You should compare the date as Date instead of string, hence you are required to convert the date strings to Date before comparing.
In users collection, prices is an array. You need to deconstruct the array to multiple documents first before compare the date fields in price.
The query should be:
db.datasets.aggregate([
{
"$lookup": {
"from": "users",
"as": "details",
let: {
assigned_to: "$assignedTo",
first_billable: {
$toDate: "$firstBillable"
}
},
pipeline: [
{
$match: {
$expr: {
$eq: [
"$id",
"$$assigned_to"
]
}
}
},
{
$unwind: "$prices"
},
{
"$match": {
$expr: {
"$and": [
{
"$gte": [
"$$first_billable",
{
$toDate: "$prices.beginDate"
}
]
},
{
"$lte": [
"$$first_billable",
{
$toDate: "$prices.endDate"
}
]
}
]
}
}
}
]
}
},
{
"$addFields": {
"details": 0,
"cg": {
$first: "$details.prices.price"
}
}
}
])
Demo # Mongo Playground

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB - Aggregate get specific objects in an array

How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}
You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground
In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground
How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

MongoDb Create Aggregate Create query

I have 3 table users,shifts,temporaryShifts,
shifts:[{_id:ObjectId(2222),name:"Morning"},{_id:ObjectId(454),name:"Night"}]
users:[{_id:ObjectId(123),name:"Albert",shift_id:ObjectId(2222)}]
temporaryShifts:[
{_id:2,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-01"},
{_id:987,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-03"},
{_id:945,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-08"},
{_id:23,userId:ObjectId(123),shiftId:ObjectId(454),date:"2020-02-09"}]
i want to make a mongoose aggregate query then give me result :
get result between two dates for example :2020-02-01 2020-02-05,
resullts is :
[
{_id:ObjectId(123),name:"Albert",shift:[
{_id:2,shiftId:ObjectId(454),type:"temporary",date:"2020-02-01"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-02"},
{_id:2,shiftId:ObjectId(454),type:"temporary",date:"2020-02-03"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-04"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-05"},
]}
]
in result type temporary mean selected date in table temporaryShift document available else type permanent
MongoPlayGround You Can edit
You can first project a date range array using $range, in your example it will be like [2020-02-01, 2020-02-02, 2020-02-03, 2020-02-04, 2020-02-05], then you can use the array to perform $lookup
db.users.aggregate([
{
$limit: 1
},
{
"$addFields": {
"startDate": ISODate("2020-02-01"),
"endDate": ISODate("2020-02-05")
}
},
{
"$addFields": {
"dateRange": {
"$range": [
0,
{
$add: [
{
$divide: [
{
$subtract: [
"$endDate",
"$startDate"
]
},
86400000
]
},
1
]
}
]
}
}
},
{
"$addFields": {
"dateRange": {
$map: {
input: "$dateRange",
as: "increment",
in: {
"$add": [
"$startDate",
{
"$multiply": [
"$$increment",
86400000
]
}
]
}
}
}
}
},
{
"$unwind": "$dateRange"
},
{
"$project": {
"name": 1,
"shiftId": 1,
"dateCursor": "$dateRange"
}
},
{
"$lookup": {
"from": "temporaryShifts",
"let": {
dateCursor: "$dateCursor",
shiftId: "$shiftId"
},
"pipeline": [
{
"$addFields": {
"parsedDate": {
"$dateFromString": {
"dateString": "$date",
"format": "%Y-%m-%d"
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$$dateCursor",
"$parsedDate"
]
}
]
}
}
}
],
"as": "temporaryShiftsLookup"
}
},
{
"$unwind": {
path: "$temporaryShiftsLookup",
preserveNullAndEmptyArrays: true
}
},
{
$project: {
shiftId: 1,
type: {
"$ifNull": [
"$temporaryShiftsLookup.type",
"permanent"
]
},
date: "$dateCursor"
}
}
])
Here is the Mongo Playground for your reference.

How to use $mergeObjects to merge arrays corresponding to existing groups?

I'm trying to merge two arrays in my aggregation pipeline. After performing $facet, my MongoDB document has this format:
{
"final": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed", "final": [ "a", "b"]},
{ "status": "Done", "final": ["c","d" ] }
]
},
{
"key": "TP-2",
"status_map": [
{ "status": "Closed", "final": [ "x","y"] }
]
}
],
"start": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed", "start": [ "h"]},
{ "status": "Done", "start": ["a"]}
]
},
{
"key": "TP-2",
"status_map": [{ "status": "Done", "start": ["l","m"]}
]
}
]
}
Expected Output:
I need to merge final and start array corresponding to two groups:
Based on key and then
Based on status
{
"data": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed","final": ["a","b"],"start":["h"]},
{ "status": "Done","final": ["c","d"],"start":["a"]}
]
},
{
"key": "TP-2",
"status_map": [
{ "status": "Closed", "final":[ "x","y"],"start": []},
{ "status": "Done", "final": [ ],"start": [ "l","m"]}
]
}
]
}
How to achieve this use case?
There are several ways to approach this, not necessarily with $mergeObjects. But since you mentioned $mergeObjects this is one that uses it:
Note that, with this approach, we are merging objects of the same key and status, the values in the arrays will not get concatenated if the same key exists for multiple documents, The arrays will get replaced instead.
db.collection.aggregate([
{
$project: {
all: { $concatArrays: ["$final","$start"] }
}
},
{
$unwind: "$all"
},
{
$unwind: "$all.status_map"
},
{
$group: {
_id: {
_id: "$_id", // keep _id in $group to apply the group for each document, otherwise if you want to apply group on all documents, omit this
key: "$all.key",
status: "$all.status_map.status"
},
status_map: { $mergeObjects: "$$ROOT.all.status_map" }
}
},
{ // some data don't have start or end at all, we have to set a default empty array
$addFields: { // you can skip this stage if you allow data without start and final keys
"status_map.start": { $ifNull: ["$status_map.start", []] },
"status_map.final": { $ifNull: ["$status_map.final", []] }
}
},
{
$group: {
_id: { _id: "$_id._id", key: "$_id.key" },
key: { $first: "$_id.key" },
status_map: { $push: "$status_map" }
}
}
])
Mongo Playground
With no assumptions (for example for both keys to always appear) my strategy was to concat both arrays, unwind and finally group by the key.
db.collection.aggregate([
{
$project: {
concat: {
$concatArrays: [
"$final",
"$start"
]
}
}
},
{
$unwind: "$concat"
},
{
$unwind: "$concat.status_map"
},
{
$group: {
_id: {
k: "$concat.key",
status: "$concat.status_map.status"
},
final: {
$push: "$concat.status_map.final"
},
start: {
$push: "$concat.status_map.start"
}
}
},
{
$group: {
_id: "$_id.k",
status_map: {
$push: {
status: "$_id.status",
final: "$final",
start: "$start"
}
}
}
},
{
$project: {
key: "$_id",
status_map: 1,
_id: 0
}
}
])
Mongo Playground
Adding to #Tom Slabbaert's answer,
Mongo Playground
Here, final and start array is of format array of array. But It has to be simply an array.
It can be achieved by using $unwind on status_map and $reduce on status_map.final and status_map.start arrays.
Final query:
db.collection.aggregate([
{
$project: {
concat: {
$concatArrays: [
"$final",
"$start"
]
}
}
},
{
$unwind: "$concat"
},
{
$unwind: "$concat.status_map"
},
{
$group: {
_id: {
k: "$concat.key",
status: "$concat.status_map.status"
},
final: {
$push: "$concat.status_map.final"
},
start: {
$push: "$concat.status_map.start"
}
}
},
{
$group: {
_id: "$_id.k",
status_map: {
$push: {
status: "$_id.status",
final: "$final",
start: "$start"
}
}
}
},
{
$project: {
key: "$_id",
status_map: 1,
_id: 0
}
},
{
$unwind: "$status_map"
},
{
$project: {
key: 1,
"status_map.status": 1,
final: {
$reduce: {
input: "$status_map.final",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
start: {
$reduce: {
input: "$status_map.start",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$group: {
_id: "$key",
status_map: {
$push: {
status: "$status_map.status",
final: "$final",
start: "$start"
}
}
}
}
])
Mongo Playground