Using match on array field before unwind - mongodb

I have the following document structure:
{
"_id" : ObjectId("5a16b7cf930a1e000465d1c5"),
"trackerId" : ObjectId("5a16b7b8930a1e000465d1c1"),
"trackingEvents" : [
{
"type" : "checkin",
"timestamp" : ISODate("2017-11-23T11:57:43.710Z"),
},
{
"type" : "connectivity",
"timestamp" : ISODate("2017-11-23T11:57:47.011Z"),
},
{
"type" : "power",
"timestamp" : ISODate("2017-11-23T11:57:47.036Z"),
}
]
}
I would like to setup a query to count number of trackingEvents with "type":"power" grouped by day for all trackingEvents happened < 1 month ago. And have the following query which works fine, except for the fact that it is not fast enough:
db.getCollection('trackerEvents').aggregate( [
{$unwind: "$trackingEvents"},
{$match:
{
"trackingEvents.type": "power",
"trackingEvents.timestamp": {
"$gt": {
"$humanTime": "1 month ago" //redash operator
}
}
}
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$trackingEvents.timestamp"
}
},
count: {$sum: 1}
}
},
,
{ $sort : { "_id":1 } }
])
However, this query did not pass the code review as my colleague suggested to swap $match and $unwind operators so that $match goes before $unwind in order to increase perfomance of the query. If I swap these two operators, I get different results, could someone please suggest how is it possible to $match array elements of the document before $unwind?
Thanks!

You could use the $match operator as a way to filter early the documents then $filter at array level before $unwind:
var oneMonthAgo = new Date();
oneMonthAgo.setMonth(oneMonthAgo.getMonth()-1);
// var oneMonthAgo = moment().subtract(1, "months").unix();
db.getCollection('trackerEvents').aggregate([
{
"$match": {
"trackingEvents.type": "power",
"trackingEvents.timestamp": { "$gt": oneMonthAgo }
}
},
{
"$project": {
"trackingEvents": {
"$filter": {
"input": "$trackingEvents",
"as": "event",
"cond": {
"$and": [
{ "$eq": ["$$event.type", "power"] },
{ "$gt": ["$$event.timestamp", oneMonthAgo] }
]
}
}
}
}
},
{ "$unwind": "$trackingEvents" },
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$trackingEvents.timestamp"
}
},
"count": { "$sum": 1}
}
},
{ "$sort": { "_id": 1 } }
]);

Related

Aggregation error: $arrayElemAt's first argument must be an array, but is object

I'm trying to aggregate a collection in mongo using the following pipeline:
const results = await Price.aggregate([
{ $match: { date: today } },
{ $unwind: '$points' },
{ $match: { 'points.time': { $gte: start, $lte: now } } },
{ $sort: { 'points.time': 1 } },
{ $project: {
'high': { $max: '$points.price' },
'low': { $min: '$points.price' },
'open': { $arrayElemAt: ['$points', 0] },
'close': { $arrayElemAt: ['$points', -1] }
} }
])
However the $arrayElemAt operator isn't working preseumably because one of the preceding stages ($unwind I believe) converts the array of points I have in my documents to an object. How can I fix this?
Example document:
{
"_id" : ObjectId("5c93ac3ab89045027259a23f"),
"date" : ISODate("2019-03-21T00:00:00Z"),
"symbol" : "CC6P",
"points" : [
{
"_id" : ObjectId("5c93ac3ab89045027259a244"),
"volume" : 553,
"time" : ISODate("2019-03-21T09:35:34.239Z"),
"price" : 71
},
{
"_id" : ObjectId("5c93ac3ab89045027259a243"),
"volume" : 1736,
"time" : ISODate("2019-03-21T09:57:34.239Z"),
"price" : 49
},
....
],
My expected result is an array of objects where the points that should be passed to the project stage should be points in the specified range in the second $match. I tried combining the two $match stages and removing the $unwind stage and the error is gone however the time range isn't being applied
I believe you are missing a $group stage to rollback your points array
const results = await Price.aggregate([
{ "$match": { "date": today } },
{ "$unwind": "$points" },
{ "$match": { "points.time": { "$gte": start, "$lte": now } } },
{ "$sort": { "points.time": 1 } },
{ "$group": {
"_id": "$_id",
"points": { "$push": "$points" },
"date": { "$first": "$date" },
"symbol": { "$first": "$symbol" }
}},
{ "$project": {
"high": { "$max": "$points.price" },
"low": { "$min": "$points.price" },
"open": { "$arrayElemAt": ["$points", 0] },
"close": { "$arrayElemAt": ["$points", -1] }
}}
])

$match in date after $project and $subtract returns empty result in MongoDB

The documents follow this structure:
{
"_id" : ObjectId("5a01b474d88dc4001e684c97"),
"created_time" : ISODate("2017-11-07T11:26:12.563+0000"),
"posts" : [
{
"story" : "Test",
"created_time" : ISODate("2017-11-06T17:38:02.000+0000"),
"id" : "769055806629274_768721009996087",
"_id" : ObjectId("5a01b498d88dc4001e68553c")
},
{
"story" : "Test",
"created_time" : ISODate("2017-11-05T12:00:00.000+0000"),
"id" : "1637086293239159_2011737915773993",
"_id" : ObjectId("5a01b498d88dc4001e68553d")
}
[...]
]
}
I want to filter the posts collection by created_time. Each post needs to have created_time greater than created_time of the document. In other words, I want to get posts only for the last month based on the document.
I'm trying this aggregation:
db.collection.aggregate([
{
$project: {
"past_month": {
$subtract: ["$created_time", 2629746000] //a month
},
"created_time": "$created_time",
"posts": "$posts"
}
}, {
$unwind: '$posts'
}, {
$match: {
"posts.created_time": {
$gte: "$past_month"
}
}
}, {
"$group": {
"_id": "$_id",
"posts": {
"$push": "$posts"
}
}
}
])
But the result is always empty. If I change $gte: "$past_month" to $gte: ISODate("2017-10-08T00:57:06.563+0000") to test, the results is not empty.
For the requirement:
Each post needs to have created_time greater than created_time of the
document
to be satisfied, with MongoDB Server version 3.4 and above, use the $addFields pipeline in conjunction with the $filter operator to filter the posts as:
db.collection.aggregate([
{
"$addFields": {
"posts": {
"$filter": {
"input": "$posts",
"as": "post",
"cond": {
"$gt": [
"$$post.created_time",
{ "$subtract": ["$created_time", 2629746000] }
]
}
}
}
}
}
])
The $addFields will replace the posts array with the filtered one in the expression above.
For MongoDB 3.2 you can still use $filter not within $addFields pipeline as it's not supported but with $project instead.
For MongoDB 3.0 use a combination of $setDifference and $map operators to filter the posts array as
db.collection.aggregate([
{
"$project": {
"created_time": 1,
"posts": {
"$setDifference": [
{
"$map": {
"input": "$posts",
"as": "post",
"in": {
"$cond": [
{
"$gt": [
"$$post.created_time",
{ "$subtract": ["$created_time", 2629746000] }
]
},
{
"story" : "$$post.story",
"created_time" : "$$post.created_time",
"id" : "$$post.id",
"_id" : "$$post._id",
},
false
]
}
}
},
[false]
]
}
}
}
])
You can do this in simple and clean way with the help of moment.js library -
var checkForDate = moment().subtract(1, 'months');
var startDate= moment(checkForDate).startOf('month');
var endDate= moment(checkForDate).endOf('month');
db.collection.find({
"posts.created_time":{
$lt:endDate,
$gt:startDate
}
})
you can achieve your desired result with this without using aggregate chain mechanism

Get Last Date within Range for Each Id Group

Let say I have a collection with the following item:
[{myId:0,date:01.01.17,data:1000},
{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000},
{myId:1,date:01.04.17,data:4000},
{myId:0,date:01.05.17,data:5000}]
I want to create a query that get a date as a parameter and return an array with single object for evrey myId that have the maximum date bellow the requested one.
For example calling the query with 15.03.17 date return:
[{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000}]
And calling query with 15.01.17 date return
[{myId:0,date:01.01.17,data:1000}]
I'm looking for an answer that doesn't use db.eval
Fixing your data to make it valid:
db.junk.insertMany([
{myId:0,date: new Date("2017-01-01"),data:1000},
{myId:1,date: new Date("2017-02-01"),data:2000},
{myId:0,date: new Date("2017-03-01"),data:3000},
{myId:1,date: new Date("2017-04-01"),data:4000},
{myId:0,date: new Date("2017-05-01"),data:5000}
])
You run an aggregate statement, filtering the entries via $match, then applying $sort to ensure the order and using $last for the "max" on each grouping boundary:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 1.0,
"date" : ISODate("2017-02-01T00:00:00.000Z"),
"data" : 2000.0
}
/* 2 */
{
"_id" : 0.0,
"date" : ISODate("2017-03-01T00:00:00.000Z"),
"data" : 3000.0
}
And for the other date:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-01-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 0.0,
"date" : ISODate("2017-01-01T00:00:00.000Z"),
"data" : 1000.0
}
If you really must you can add a $sort as the final pipeline stage in order to ensure the order of _id ( myId value ) returned:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}},
{ "$sort": { "_id": 1 } }
])

Using the aggregation framework to compare array element overlap

I have a collections with documents structured like below:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
}
I would like to search the collection to see if there are any documents with the same carrier and flightNumber that also have dates in the dates array that over lap. For example:
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-01T00:00:00Z"),
ISODate("2015-01-02T00:00:00Z"),
ISODate("2015-01-03T00:00:00Z")
]
},
{
carrier: "abc",
flightNumber: 123,
dates: [
ISODate("2015-01-03T00:00:00Z"),
ISODate("2015-01-04T00:00:00Z"),
ISODate("2015-01-05T00:00:00Z")
]
}
If the above records were present in the collection I would like to return them because they both have carrier: abc, flightNumber: 123 and they also have the date ISODate("2015-01-03T00:00:00Z") in the dates array. If this date were not present in the second document then neither should be returned.
Typically I would do this by grouping and counting like below:
db.flights.aggregate([
{
$group: {
_id: { carrier: "$carrier", flightNumber: "$flightNumber" },
uniqueIds: { $addToSet: "$_id" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
}
])
But I'm not sure how I could modify this to look for array overlap. Can anyone suggest how to achieve this?
You $unwind the array if you want to look at the contents as "grouped" within them:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } }
])
That does in fact tell you which documents where the "overlap" resides, because the "same dates" along with the other same grouping key values that you are concerned about have a "count" which occurs more than once. Indicating the overlap.
Anything after the $match is really just for "presentation" as there is no point reporting the same _id value for multiple overlaps if you just want to see the overlaps. In fact if you want to see them together it would probably be best to leave the "grouped set" alone.
Now you could add a $lookup to that if retrieving the actual documents was important to you:
db.flights.aggregate([
{ "$unwind": "$dates" },
{ "$group": {
"_id": { "carrier": "$carrier", "flightnumber": "$flightnumber", "date": "$dates" },
"count": { "$sum": 1 },
"_ids": { "$addToSet": "$_id" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$_ids" },
{ "$group": { "_id": "$_ids" } },
}},
{ "$lookup": {
"from": "flights",
"localField": "_id",
"foreignField": "_id",
"as": "_ids"
}},
{ "$unwind": "$_ids" },
{ "$replaceRoot": {
"newRoot": "$_ids"
}}
])
And even do a $replaceRoot or $project to make it return the whole document. Or you could have even done $addToSet with $$ROOT if it was not a problem for size.
But the overall point is covered in the first three pipeline stages, or mostly in just the "first". If you want to work with arrays "across documents", then the primary operator is still $unwind.
Alternately for a more "reporting" like format:
db.flights.aggregate([
{ "$addFields": { "copy": "$$ROOT" } },
{ "$unwind": "$dates" },
{ "$group": {
"_id": {
"carrier": "$carrier",
"flightNumber": "$flightNumber",
"dates": "$dates"
},
"count": { "$sum": 1 },
"_docs": { "$addToSet": "$copy" }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$group": {
"_id": {
"carrier": "$_id.carrier",
"flightNumber": "$_id.flightNumber",
},
"overlaps": {
"$push": {
"date": "$_id.dates",
"_docs": "$_docs"
}
}
}}
])
Which would report the overlapped dates within each group and tell you which documents contained the overlap:
{
"_id" : {
"carrier" : "abc",
"flightNumber" : 123.0
},
"overlaps" : [
{
"date" : ISODate("2015-01-03T00:00:00.000Z"),
"_docs" : [
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b97"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-03T00:00:00.000Z"),
ISODate("2015-01-04T00:00:00.000Z"),
ISODate("2015-01-05T00:00:00.000Z")
]
},
{
"_id" : ObjectId("5977f9187dcd6a5f6a9b4b96"),
"carrier" : "abc",
"flightNumber" : 123.0,
"dates" : [
ISODate("2015-01-01T00:00:00.000Z"),
ISODate("2015-01-02T00:00:00.000Z"),
ISODate("2015-01-03T00:00:00.000Z")
]
}
]
}
]
}

Compare 2 dates in mongo find method

I have mongo documents containing a last_active date and a created date. I would like to search for all documents where the day of last_active is not equal to the day of created, but I have no clue how to write the query.
In MySQL I would write it like that:
WHERE DATE_FORMAT(created, '%Y-%m-%d') != DATE_FORMAT(last_active, '%Y-%m-%d')
For MongoDB 3.6 and newer:
The $expr operator allows the use of aggregation expressions within the query language, thus you can leverage the use of $dateToString operator to transform the date field:
db.test.find({
"$expr": {
"$ne": [
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$created" } },
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$last_active" } }
]
}
})
or using aggregation framework with $match pipeline
db.test.aggregate([
{ "$match": {
"$expr": {
"$ne": [
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$created" } },
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$last_active" } }
]
}
} }
])
For MongoDB 3.0+:
You can also use the aggregation framework with the $redact pipeline operator that allows you to process the logical condition with the $cond operator and uses the special operations $$KEEP to "keep" the document where the logical condition is true or $$PRUNE to "remove" the document where the condition was false.
Consider running the following aggregate operation which demonstrates the above concept:
db.test.aggregate([
{
"$redact": {
"$cond": [
{
"$ne": [
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$created" } },
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$last_active" } }
]
},
"$$KEEP",
"$$PRUNE"
]
}
}
])
This operation is similar to having a $project pipeline that selects the fields in the collection and creates a new field that holds the result from the logical condition query and then a subsequent $match, except that $redact uses a single pipeline stage which is more efficient:
db.test.aggregate([
{
"$project": {
"created": 1,
"last_active": 1,
"sameDay": {
"$cond": [
{
"$eq": [
{"$substr" : ["$last_active",0, 10]},
{"$substr" : ["$created",0, 10]}
]
}, true, false
]
}
}
},
{ "$match": { "sameDay": false } }
])
0r
db.test.aggregate([
{
"$project": {
"created": 1,
"last_active": 1,
"sameDay": {
"$cond": [
{
"$eq": [
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$created" } },
{ "$dateToString": { "format": "%Y-%m-%d", "date": "$last_active" } }
]
}, true, false
]
}
}
},
{ "$match": { "sameDay": false } }
])
Another approach would be to use the $where operator in your find() method but note that the query will be fairly slow since using $where alone requires a table scan and the database executes the JavaScript expression or function for each document in the collection, so combine with indexed queries if you can as query performance also improves when you express it using the standard MongoDB operators (e.g., $gt, $in):
db.test.find({
"$where": function() {
return this.created.getDate() !== this.last_active.getDate()
}
});
or more compact:
db.test.find({ "$where": "this.created.getDate() !== this.last_active.getDate()" });
With the input:
/* 0 */
{
"_id" : 1,
"created" : ISODate("2014-12-19T06:01:17.171Z"),
"last_active" : ISODate("2014-12-21T15:38:13.842Z")
}
/* 1 */
{
"_id" : 2,
"created" : ISODate("2015-07-06T12:17:32.084Z"),
"last_active" : ISODate("2015-07-06T18:07:08.145Z")
}
/* 2 */
{
"_id" : 3,
"created" : ISODate("2015-07-06T06:01:17.171Z"),
"last_active" : ISODate("2015-07-07T10:04:30.921Z")
}
/* 3 */
{
"_id" : 4,
"created" : ISODate("2015-07-06T06:01:17.171Z"),
"last_active" : ISODate("2015-07-06T09:47:44.186Z")
}
/* 4 */
{
"_id" : 5,
"created" : ISODate("2013-12-19T06:01:17.171Z"),
"last_active" : ISODate("2014-01-20T13:21:37.427Z")
}
The aggregation returns:
/* 0 */
{
"result" : [
{
"_id" : 1,
"created" : ISODate("2014-12-19T06:01:17.171Z"),
"last_active" : ISODate("2014-12-21T15:38:13.842Z"),
"sameDay" : false
},
{
"_id" : 3,
"created" : ISODate("2015-07-06T06:01:17.171Z"),
"last_active" : ISODate("2015-07-07T10:04:30.921Z"),
"sameDay" : false
},
{
"_id" : 5,
"created" : ISODate("2013-12-19T06:01:17.171Z"),
"last_active" : ISODate("2014-01-20T13:21:37.427Z"),
"sameDay" : false
}
],
"ok" : 1
}
Use $cond in mongo aggregation to find out created != last_active check following aggregation:
db.collectionName.aggregate([
{
"$project": {
"check": {
"$cond": {
"if": {
"$ne": [
"$created",
"$last_active"
]
},
"then": 1,
"else": 0
}
},
"all": "$$ROOT"
}
},
{
"$match": {
"check": 1
}
},
{
"$project": {
"_id": 0,
"all": 1
}
}
]).pretty()
One way to do this would be with $where http://docs.mongodb.org/manual/reference/operator/query/where/
Or you can use aggregate query. Something like this (improve to add fields that you need to retrieve):
db.dates.aggregate({$project: { created: 1, last_active: 1, are_diff: {$ne: ["$created", "$last_active"]}}}, {$match: {are_diff: true}})
You can use a function within your query:
db.mycollection.find({
$where: function() {
return this.created.getDate() !== this.last_active.getDate();
}
})