Get Last Date within Range for Each Id Group - mongodb

Let say I have a collection with the following item:
[{myId:0,date:01.01.17,data:1000},
{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000},
{myId:1,date:01.04.17,data:4000},
{myId:0,date:01.05.17,data:5000}]
I want to create a query that get a date as a parameter and return an array with single object for evrey myId that have the maximum date bellow the requested one.
For example calling the query with 15.03.17 date return:
[{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000}]
And calling query with 15.01.17 date return
[{myId:0,date:01.01.17,data:1000}]
I'm looking for an answer that doesn't use db.eval

Fixing your data to make it valid:
db.junk.insertMany([
{myId:0,date: new Date("2017-01-01"),data:1000},
{myId:1,date: new Date("2017-02-01"),data:2000},
{myId:0,date: new Date("2017-03-01"),data:3000},
{myId:1,date: new Date("2017-04-01"),data:4000},
{myId:0,date: new Date("2017-05-01"),data:5000}
])
You run an aggregate statement, filtering the entries via $match, then applying $sort to ensure the order and using $last for the "max" on each grouping boundary:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 1.0,
"date" : ISODate("2017-02-01T00:00:00.000Z"),
"data" : 2000.0
}
/* 2 */
{
"_id" : 0.0,
"date" : ISODate("2017-03-01T00:00:00.000Z"),
"data" : 3000.0
}
And for the other date:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-01-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 0.0,
"date" : ISODate("2017-01-01T00:00:00.000Z"),
"data" : 1000.0
}
If you really must you can add a $sort as the final pipeline stage in order to ensure the order of _id ( myId value ) returned:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}},
{ "$sort": { "_id": 1 } }
])

Related

Aggregation error: $arrayElemAt's first argument must be an array, but is object

I'm trying to aggregate a collection in mongo using the following pipeline:
const results = await Price.aggregate([
{ $match: { date: today } },
{ $unwind: '$points' },
{ $match: { 'points.time': { $gte: start, $lte: now } } },
{ $sort: { 'points.time': 1 } },
{ $project: {
'high': { $max: '$points.price' },
'low': { $min: '$points.price' },
'open': { $arrayElemAt: ['$points', 0] },
'close': { $arrayElemAt: ['$points', -1] }
} }
])
However the $arrayElemAt operator isn't working preseumably because one of the preceding stages ($unwind I believe) converts the array of points I have in my documents to an object. How can I fix this?
Example document:
{
"_id" : ObjectId("5c93ac3ab89045027259a23f"),
"date" : ISODate("2019-03-21T00:00:00Z"),
"symbol" : "CC6P",
"points" : [
{
"_id" : ObjectId("5c93ac3ab89045027259a244"),
"volume" : 553,
"time" : ISODate("2019-03-21T09:35:34.239Z"),
"price" : 71
},
{
"_id" : ObjectId("5c93ac3ab89045027259a243"),
"volume" : 1736,
"time" : ISODate("2019-03-21T09:57:34.239Z"),
"price" : 49
},
....
],
My expected result is an array of objects where the points that should be passed to the project stage should be points in the specified range in the second $match. I tried combining the two $match stages and removing the $unwind stage and the error is gone however the time range isn't being applied
I believe you are missing a $group stage to rollback your points array
const results = await Price.aggregate([
{ "$match": { "date": today } },
{ "$unwind": "$points" },
{ "$match": { "points.time": { "$gte": start, "$lte": now } } },
{ "$sort": { "points.time": 1 } },
{ "$group": {
"_id": "$_id",
"points": { "$push": "$points" },
"date": { "$first": "$date" },
"symbol": { "$first": "$symbol" }
}},
{ "$project": {
"high": { "$max": "$points.price" },
"low": { "$min": "$points.price" },
"open": { "$arrayElemAt": ["$points", 0] },
"close": { "$arrayElemAt": ["$points", -1] }
}}
])

MongoDB aggregate nested grouping

I have Asset collection which has data like
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"abc.abc#abc.com",
"abc.xyz#xyz.com"
],
"remote" : {
"source" : "dropbox",
"bytes" : 1234
}
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"pqr.pqr#pqr.com",
],
"remote" : {
"source" : "google_drive",
"bytes" : 785
}
{
"_id" : ObjectId("5bfb962ee2a301554915"),
"users" : [
"abc.abc#abc.com",
"abc.xyz#xyz.com"
],
"remote" : {
"source" : "gmail",
"bytes" : 5647
}
What I am looking for is group by users and get the total of bytes according to its source like
{
"_id" : "abc.abc#abc.com",
"bytes" : {
"google_drive": 1458,
"dropbox" : 1254
}
}
I am not getting how to get the nested output using grouping.
I have tried with the query
db.asset.aggregate(
[
{$unwind : '$users'},
{$group:{
_id:
{'username': "$users",
'source': "$remote.source",
'total': {$sum: "$remote.bytes"}} }
}
]
)
This way I am getting the result with the repeated username.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $mergeObjects expression and a $replaceRoot pipeline to get the desired result.
You would need to run the following aggregate pipeline though:
db.asset.aggregate([
{ "$unwind": "$users" },
{ "$group": {
"_id": {
"users": "$users",
"source": "$remote.source"
},
"totalBytes": { "$sum": "$remote.bytes" }
} },
{ "$group": {
"_id": "$_id.users",
"counts": {
"$push": {
"k": "$_id.source",
"v": "$totalBytes"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "bytes": { "$arrayToObject": "$counts" } },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])
which yields
/* 1 */
{
"bytes" : {
"gmail" : 5647.0,
"dropbox" : 1234.0
},
"_id" : "abc.abc#abc.com"
}
/* 2 */
{
"bytes" : {
"google_drive" : 785.0
},
"_id" : "pqr.pqr#pqr.com"
}
/* 3 */
{
"bytes" : {
"gmail" : 5647.0,
"dropbox" : 1234.0
},
"_id" : "abc.xyz#xyz.com"
}
using the above sample documents.
You have to use $group couple of times here. First with the users and the source and count the total number of bytes using $sum.
And second with the users and $push the source and the bytes into an array
db.collection.aggregate([
{ "$unwind": "$users" },
{ "$group": {
"_id": {
"users": "$users",
"source": "$remote.source"
},
"bytes": { "$sum": "$remote.bytes" }
}},
{ "$group": {
"_id": "$_id.users",
"data": {
"$push": {
"source": "$_id.source",
"bytes": "$bytes"
}
}
}}
])
And even if you want to convert the source and the bytes into key value format then replace the last $group stage with the below two stages.
{ "$group": {
"_id": "$_id.users",
"data": {
"$push": {
"k": "$_id.source",
"v": "$bytes"
}
}
}},
{ "$project": {
"_id": 0,
"username": "$_id",
"bytes": { "$arrayToObject": "$data" }
}}

Using match on array field before unwind

I have the following document structure:
{
"_id" : ObjectId("5a16b7cf930a1e000465d1c5"),
"trackerId" : ObjectId("5a16b7b8930a1e000465d1c1"),
"trackingEvents" : [
{
"type" : "checkin",
"timestamp" : ISODate("2017-11-23T11:57:43.710Z"),
},
{
"type" : "connectivity",
"timestamp" : ISODate("2017-11-23T11:57:47.011Z"),
},
{
"type" : "power",
"timestamp" : ISODate("2017-11-23T11:57:47.036Z"),
}
]
}
I would like to setup a query to count number of trackingEvents with "type":"power" grouped by day for all trackingEvents happened < 1 month ago. And have the following query which works fine, except for the fact that it is not fast enough:
db.getCollection('trackerEvents').aggregate( [
{$unwind: "$trackingEvents"},
{$match:
{
"trackingEvents.type": "power",
"trackingEvents.timestamp": {
"$gt": {
"$humanTime": "1 month ago" //redash operator
}
}
}
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$trackingEvents.timestamp"
}
},
count: {$sum: 1}
}
},
,
{ $sort : { "_id":1 } }
])
However, this query did not pass the code review as my colleague suggested to swap $match and $unwind operators so that $match goes before $unwind in order to increase perfomance of the query. If I swap these two operators, I get different results, could someone please suggest how is it possible to $match array elements of the document before $unwind?
Thanks!
You could use the $match operator as a way to filter early the documents then $filter at array level before $unwind:
var oneMonthAgo = new Date();
oneMonthAgo.setMonth(oneMonthAgo.getMonth()-1);
// var oneMonthAgo = moment().subtract(1, "months").unix();
db.getCollection('trackerEvents').aggregate([
{
"$match": {
"trackingEvents.type": "power",
"trackingEvents.timestamp": { "$gt": oneMonthAgo }
}
},
{
"$project": {
"trackingEvents": {
"$filter": {
"input": "$trackingEvents",
"as": "event",
"cond": {
"$and": [
{ "$eq": ["$$event.type", "power"] },
{ "$gt": ["$$event.timestamp", oneMonthAgo] }
]
}
}
}
}
},
{ "$unwind": "$trackingEvents" },
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$trackingEvents.timestamp"
}
},
"count": { "$sum": 1}
}
},
{ "$sort": { "_id": 1 } }
]);

How do I aggregate and avg the time between two dates?

Assuming the below is my element structure. How can I SHELL query the mongodb and get the avg difference (average length) each trip took for every trip in the db? I am guessing subtracting dates? But then how to subtract and then avg?
"_id": {
"$oid": "5445ab058767000062"
},
"comment": null,
"scheduled_request": false,
"status": "blah",
"timestamp_started": {
"$date": "2014-10-21T00:38:28.990Z"
},
"timestamp_transaction_complete": {
"$date": "2014-10-21T00:49:12.990Z"
},
"user_id": "5445a9000057"
UDPATE ========
Here is my query
db.ambulance_requests.aggregate([
{ "$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
"$timestamp_transaction_complete",
"$timestamp_started"
]
}
}
}}
])
AND MY RESULT (from a Mac Terminal Shell):
{ "_id" : null, "avg_time" : 0 }
You $subtract and $avg by applying them in a $group pipeline stage. For "everything", use null for the grouping key:
db.trips.aggregate([
{ "$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
{ "$ifNull": [ "$timestamp_completed", 0 ] },
{ "$ifNull": [ "$timestamp_started", 0 ] }
]
}
}
}}
])
When you $subtract on BSON Date object from another, the difference is returned as the milliseconds interval between them. This is also a generally handy technique for extracting the milliseconds value for other purposes.
Your single document as supplied:
{
"comment" : null,
"scheduled_request" : false,
"status" : "blah",
"timestamp_started" : ISODate("2014-10-21T00:38:28.990Z"),
"timestamp_completed" : ISODate("2014-10-21T00:49:12.990Z"),
"user_id" : "5445a9000057"
}
The result from your single document in the question:
/* 1 */
{
"_id" : null,
"avg_time" : 644000.0
}
https://mongoplayground.net/p/nFO54i5GIXU
if finishedAt dose not exist in a doc then skip that document from avg calculation
db.collection.aggregate([
{
"$match": {
"finishedAt": {
"$exists": true
}
}
},
{
"$unwind": "$tags"
},
{
"$match": {
"$or": [
{
"tags.name": "Canada"
},
{
"tags.name": "ABC"
},
]
}
},
{
"$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
"$finishedAt",
"$createdAt"
]
}
}
}
}
])

Using the aggregation framework to compare array element overlap

I have a collections with documents structured like below:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
}
I would like to search the collection to see if there are any documents with the same carrier and flightNumber that also have dates in the dates array that over lap. For example:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
},
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-03T00:00:00Z"),
ISODate("2015-01-04T00:00:00Z"),
ISODate("2015-01-05T00:00:00Z")
]
}
If the above records were present in the collection I would like to return them because they both have carrier: abc, flightNumber: 123 and they also have the date ISODate("2015-01-03T00:00:00Z") in the dates array. If this date were not present in the second document then neither should be returned.
Typically I would do this by grouping and counting like below:
db.flights.aggregate([
{
$group: {
_id: { carrier: "$carrier", flightNumber: "$flightNumber" },
uniqueIds: { $addToSet: "$_id" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
}
])
But I'm not sure how I could modify this to look for array overlap. Can anyone suggest how to achieve this?
You $unwind the array if you want to look at the contents as "grouped" within them:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } }
])
That does in fact tell you which documents where the "overlap" resides, because the "same dates" along with the other same grouping key values that you are concerned about have a "count" which occurs more than once. Indicating the overlap.
Anything after the $match is really just for "presentation" as there is no point reporting the same _id value for multiple overlaps if you just want to see the overlaps. In fact if you want to see them together it would probably be best to leave the "grouped set" alone.
Now you could add a $lookup to that if retrieving the actual documents was important to you:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } },
}},
{ "$lookup": {
"from": "flights",
"localField": "_id",
"foreignField": "_id",
"as": "_ids"
}},
{ "$unwind": "$_ids" },
{ "$replaceRoot": {
"newRoot": "$_ids"
}}
])
And even do a $replaceRoot or $project to make it return the whole document. Or you could have even done $addToSet with $$ROOT if it was not a problem for size.
But the overall point is covered in the first three pipeline stages, or mostly in just the "first". If you want to work with arrays "across documents", then the primary operator is still $unwind.
Alternately for a more "reporting" like format:
db.flights.aggregate([
{ "$addFields": { "copy": "$$ROOT" } },
{ "$unwind": "$dates" },
{ "$group": {
"_id": {
"carrier": "$carrier",
"flightNumber": "$flightNumber",
"dates": "$dates"
},
"count": { "$sum": 1 },
"_docs": { "$addToSet": "$copy" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$group": {
"_id": {
"carrier": "$_id.carrier",
"flightNumber": "$_id.flightNumber",
},
"overlaps": {
"$push": {
"date": "$_id.dates",
"_docs": "$_docs"
}
}
}}
])
Which would report the overlapped dates within each group and tell you which documents contained the overlap:
{
"_id" : {
"carrier" : "abc",
"flightNumber" : 123.0
},
"overlaps" : [
{
"date" : ISODate("2015-01-03T00:00:00.000Z"),
"_docs" : [
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b97"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-03T00:00:00.000Z"),
ISODate("2015-01-04T00:00:00.000Z"),
ISODate("2015-01-05T00:00:00.000Z")
]
},
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b96"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-01T00:00:00.000Z"),
ISODate("2015-01-02T00:00:00.000Z"),
ISODate("2015-01-03T00:00:00.000Z")
]
}
]
}
]
}