How can I select all data based on date range mongoDB? - mongodb

I have a lists of records like below
[
{
"product": "p1",
"salesdate": "2020-02-01",
"amount": 100
},
{
"product": "p2",
"salesdate": "2020-02-04",
"amount": 200
},
]
On 2nd feb and 3rd feb i don't have data. But I need to add this in my result. My expected result is
[
{
"amount": 100,
"salesdate": "2020-02-01"
},
{
"amount": 0,
"salesdate": "2020-02-02"
},
{
"amount": 0,
"salesdate": "2020-02-03"
}
{
"amount": 200,
"salesdate": "2020-02-04"
}
]
Can I achieve this using mongoDB?
https://mongoplayground.net/p/EiAJdY9jRHn

You can use $reduce for it. Whenever one has to work with data/time values, then I recommend the moment.js library. You don't have to use it, but it makes your life easier.
db.collection.aggregate([
// Put all data into one document
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
// Add missing days
{
$addFields: {
data: {
$reduce: {
// Define the range of date
input: { $range: [0, moment().get('day')] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [moment().startOf('month').toDate(), { $multiply: ["$$this", 1000 * 60 * 60 * 24] }]
}
},
in: {
$concatArrays: [
"$$value",
[{
$ifNull: [
{ $first: { $filter: { input: "$data", cond: { $eq: ["$$this.salesdate", "$$ts"] } } } },
// Default value for missing days
{ salesdate: "$$ts", amount: 0 }
]
}]
]
}
}
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
// If requried add further $group stages
])
Note, this code returns values from first day of current months to current day (not 2020 as in your sample data). You may adapt the ranges - your requirements are not clear from the question.
Mongo Playground

Related

MongoDB aggregate $group $sum that matches date inside array of objects

I'll explain my problem here and i'll put a tldr at the bottom summarizing the question.
We have a collection called apple_receipt, since we have some apple purchases in our application. That document has some fields that we will be using on this aggregation. Those are: price, currency, startedAt and history. Price, currency and startedAt are self-explanatory. History is a field that is an array of objects containing a price and startedAt. So, what we are trying to accomplish is a query that gets every document between a date of our choice, for example: 06-06-2020 through 10-10-2022 and get the total price combined of all those receipts that have a startedAt between that. We have a document like this:
{
price: 12.9,
currency: 'BRL',
startedAt: 2022-08-10T16:23:42.000+00:00
history: [
{
price: 12.9,
startedAt: 2022-05-10T16:23:42.000+00:00
},
{
price: 12.9,
startedAt: 2022-06-10T16:23:42.000+00:00
},
{
price: 12.9,
startedAt: 2022-07-10T16:23:42.000+00:00
}
]
}
If we query between dates 06-06-2022 to 10-10-2022, we would have a return like this: totalPrice: 38,7.
-total price of the 3 objects that have matched the date inside that value range-
I have tried this so far:
AppleReceipt.aggregate([
{
$project: {
price: 1,
startedAt: 1,
currency: 1,
history: 1,
}
},
{
$unwind: {
path: "$history",
preserveNullAndEmptyArrays: true,
}
},
{
$match: {
$or: [
{ startedAt: {$gte: new Date(filters.begin), $lt: new Date(filters.end)} },
]
}
},
{
$group: {
_id: "$_id",
data: { $push: '$$ROOT' },
totalAmountHelper: { $sum: '$history.price' }
}
},
{
$unwind: "$data"
},
{
$addFields: {
totalAmount: { $add: ['$totalAmountHelper', '$data.price'] }
}
}
])
It does bring me the total value but I couldn't know how to take into consideration the date to make the match stage to only get the sum of the documents that are between that date.
tl;dr: Want to make a query that gets the total sum of the prices of all documents that have startedAt between the dates we choose. Needs to match the ones inside history field - which is an array of objects, and also the startedAt outside of the history field.
https://mongoplayground.net/p/lOvRbX24QI9
db.collection.aggregate([
{
$set: {
"history_total": {
"$reduce": {
"input": "$history",
"initialValue": 0,
"in": {
$sum: [
{
"$cond": {
"if": {
$and: [
{
$gte: [
new Date("2022-06-06"),
{
$dateFromString: {
dateString: "$$this.startedAt"
}
}
]
},
{
$lt: [
{
$dateFromString: {
dateString: "$$this.startedAt"
}
},
new Date("2022-10-10")
]
},
]
},
"then": "$$this.price",
"else": 0
}
},
"$$value",
]
}
}
}
}
},
{
$set: {
"history_total": {
"$sum": [
"$price",
"$history_total"
]
}
}
}
])
Result:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"currency": "BRL",
"history": [
{
"price": 12.9,
"startedAt": "2022-05-10T16:23:42.000+00:00"
},
{
"price": 12.9,
"startedAt": "2022-06-10T16:23:42.000+00:00"
},
{
"price": 12.9,
"startedAt": "2022-07-10T16:23:42.000+00:00"
}
],
"history_total": 325.79999999999995,
"price": 312.9,
"startedAt": "2022-08-10T16:23:42.000+00:00"
}
]
Kudos goes to #user20042973

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB - Aggregate get specific objects in an array

How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}
You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground
In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground
How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

MongoDB compare endTime with startTime of next document

I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks
First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground
You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.

Sum time series with different time stamps MongoDB

I have a mongoDB database with multiple time series data and each time stamp is a separate document with some additional meta data from sensors. I want to sum the two time series in an aggregation but I am heavily struggling with that and can't find any examples.
Assume we have sensor A and B and the time stamps from the different sensors don't align. See an example of the data below. Next I want to sum the "volume" metric of the two time series. So for the example below sensor A has two time stamps en sensor B 3. So the sum of A and B should have 5 time stamps such that the sum reflects all the changes in the total volume (see also the schematic example below).
Anyone knows how to solve this in a mongoDB aggregation query? I can only use the mongoDB query language and not use NodeJS.
Sensor A
{
"_id":5d67d9ee074e99274eef30d5
"sensor": A
"volume":12.4
"temperatue": 20
"timestamp":2019-08-29 15:58:06.093
"__v":0
}
{
"_id":5d67da66074e99274eef30ea
"sensor": A
"volume":12.3
"temperatue": 21
"timestamp":2019-08-29 16:48:06.078
"__v":0
}
..etc
Sensor B
{
"_id":5d67d9ee074e99274eef30d5
"sensor": B
"volume":32.4
"temperatue": 20
"timestamp":2019-08-29 15:55:06.093
"__v":0
}
{
"_id":5d67da66074e99274eef30ea
"sensor": B
"volume":21.2
"temperatue": 21
"timestamp":2019-08-29 16:49:06.178
"__v":0
}
{
"_id":5d67da66074e99274eef30ea
"sensor": B
"volume":22.3
"temperatue": 22
"timestamp":2019-08-29 17:04:06.078
"__v":0
}
..etc
Here also a sketch of the result I would like to have.
Try this one:
db.collection.aggregate([
// Determine start and end-time
{ $sort: { timestamp: -1 } },
{ $group: { _id: "$sensor", data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$concatArrays: [
"$$value",
[
{
$mergeObjects: [
"$$this",
{
timestamp_end: {
$ifNull: [ { $last: "$$value.timestamp" }, "$$NOW" ]
}
}
]
}
]
]
}
}
}
}
},
{ $unwind: "$data" },
// find data per interval
{ $sort: { "data.timestamp": 1 } },
{
$group: {
_id: null,
data: { $push: "$data" },
timestamp: { $addToSet: "$data.timestamp" }
}
},
{
$set: {
sum_data: {
$map: {
input: "$timestamp",
as: "t",
in: {
$filter: {
input: "$data",
cond: {
$and: [
{ $lte: [ "$$this.timestamp", "$$t" ] },
{ $gt: [ "$$this.timestamp_end", "$$t" ] }
]
}
}
}
}
}
}
},
// sum up temperatures
{
$set: {
volume: {
$map: {
input: "$sum_data",
in: { $sum: "$$this.volume" }
}
},
result: { $range: [ 0, { $size: "$timestamp" } ] }
}
},
// Join arrays to final result
{
$project: {
result: {
$map: {
input: "$result",
as: "i",
in: {
timestamp: { $arrayElemAt: [ "$timestamp", "$$i" ] },
volume: { $arrayElemAt: [ "$volume", "$$i" ] }
}
}
}
}
}
])
Mongo Playground