How do I aggregate and avg the time between two dates? - mongodb

Assuming the below is my element structure. How can I SHELL query the mongodb and get the avg difference (average length) each trip took for every trip in the db? I am guessing subtracting dates? But then how to subtract and then avg?
"_id": {
"$oid": "5445ab058767000062"
},
"comment": null,
"scheduled_request": false,
"status": "blah",
"timestamp_started": {
"$date": "2014-10-21T00:38:28.990Z"
},
"timestamp_transaction_complete": {
"$date": "2014-10-21T00:49:12.990Z"
},
"user_id": "5445a9000057"
UDPATE ========
Here is my query
db.ambulance_requests.aggregate([
{ "$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
"$timestamp_transaction_complete",
"$timestamp_started"
]
}
}
}}
])
AND MY RESULT (from a Mac Terminal Shell):
{ "_id" : null, "avg_time" : 0 }

You $subtract and $avg by applying them in a $group pipeline stage. For "everything", use null for the grouping key:
db.trips.aggregate([
{ "$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
{ "$ifNull": [ "$timestamp_completed", 0 ] },
{ "$ifNull": [ "$timestamp_started", 0 ] }
]
}
}
}}
])
When you $subtract on BSON Date object from another, the difference is returned as the milliseconds interval between them. This is also a generally handy technique for extracting the milliseconds value for other purposes.
Your single document as supplied:
{
"comment" : null,
"scheduled_request" : false,
"status" : "blah",
"timestamp_started" : ISODate("2014-10-21T00:38:28.990Z"),
"timestamp_completed" : ISODate("2014-10-21T00:49:12.990Z"),
"user_id" : "5445a9000057"
}
The result from your single document in the question:
/* 1 */
{
"_id" : null,
"avg_time" : 644000.0
}

https://mongoplayground.net/p/nFO54i5GIXU
if finishedAt dose not exist in a doc then skip that document from avg calculation
db.collection.aggregate([
{
"$match": {
"finishedAt": {
"$exists": true
}
}
},
{
"$unwind": "$tags"
},
{
"$match": {
"$or": [
{
"tags.name": "Canada"
},
{
"tags.name": "ABC"
},
]
}
},
{
"$group": {
"_id": null,
"avg_time": {
"$avg": {
"$subtract": [
"$finishedAt",
"$createdAt"
]
}
}
}
}
])

Related

Aggregation error: $arrayElemAt's first argument must be an array, but is object

I'm trying to aggregate a collection in mongo using the following pipeline:
const results = await Price.aggregate([
{ $match: { date: today } },
{ $unwind: '$points' },
{ $match: { 'points.time': { $gte: start, $lte: now } } },
{ $sort: { 'points.time': 1 } },
{ $project: {
'high': { $max: '$points.price' },
'low': { $min: '$points.price' },
'open': { $arrayElemAt: ['$points', 0] },
'close': { $arrayElemAt: ['$points', -1] }
} }
])
However the $arrayElemAt operator isn't working preseumably because one of the preceding stages ($unwind I believe) converts the array of points I have in my documents to an object. How can I fix this?
Example document:
{
"_id" : ObjectId("5c93ac3ab89045027259a23f"),
"date" : ISODate("2019-03-21T00:00:00Z"),
"symbol" : "CC6P",
"points" : [
{
"_id" : ObjectId("5c93ac3ab89045027259a244"),
"volume" : 553,
"time" : ISODate("2019-03-21T09:35:34.239Z"),
"price" : 71
},
{
"_id" : ObjectId("5c93ac3ab89045027259a243"),
"volume" : 1736,
"time" : ISODate("2019-03-21T09:57:34.239Z"),
"price" : 49
},
....
],
My expected result is an array of objects where the points that should be passed to the project stage should be points in the specified range in the second $match. I tried combining the two $match stages and removing the $unwind stage and the error is gone however the time range isn't being applied
I believe you are missing a $group stage to rollback your points array
const results = await Price.aggregate([
{ "$match": { "date": today } },
{ "$unwind": "$points" },
{ "$match": { "points.time": { "$gte": start, "$lte": now } } },
{ "$sort": { "points.time": 1 } },
{ "$group": {
"_id": "$_id",
"points": { "$push": "$points" },
"date": { "$first": "$date" },
"symbol": { "$first": "$symbol" }
}},
{ "$project": {
"high": { "$max": "$points.price" },
"low": { "$min": "$points.price" },
"open": { "$arrayElemAt": ["$points", 0] },
"close": { "$arrayElemAt": ["$points", -1] }
}}
])

Aggregate with $sum in mongodb

I have data in worksheets collection like below:
/* 1 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6506"),
"isBilling" : true,
"hours" : 6,
"userId" : ObjectId("5c1f38a1d7537d1444738467"),
"projectId": ObjectId("5c1f38a1d7537d1444731234");
}
/* 2 */
{
"_id" : ObjectId("5c21d780f82aa31334ab6507"),
"isBilling" : true,
"hours" : 4,
"userId" : ObjectId("5c1f38a1d7537d1444738493"),
"projectId": ObjectId("5c1f38a1d7537d1444734567");
}
/* 3 */
{
"_id" : ObjectId("5c21e10fae07cc1204a5b647"),
"isBilling" : false,
"hours" : 8,
"userId" : ObjectId("5c1f388fd7537d1444738492"),
"projectId": ObjectId("5c1f38a1d7537d1444731234");
}
I am using below aggregate query to get total count of fields:
Worksheet.aggregate([
{
$match: conditions
},
{
"$group": {
"_id": null,
"billingHours": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, "$hours", 0]
}
},
"fixContract": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, 0, "$hours"]
}
}
}
}
])
Now i want the sum of unique projectId field. It above case it is 2. I tried it by applying two $group in above implemented query. But it is not working. I want to get the result like below:
[
{
"_id": null,
"billingHours": 0,
"fixContract": 8,
"totalProjects": 2
}
]
Use $addToSet accumulator and then $size operator to count the number of unique projectId
Worksheet.aggregate([
{ $match: conditions },
{ "$group": {
"_id": null,
"billingHours": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, "$hours", 0]
}
},
"fixContract": {
"$sum": {
"$cond": [{ "$eq": ["$isBilling", true] }, 0, "$hours"]
}
},
"projectIds": { "$addToSet": "$projectId" }
}},
{ "$addFields": { "projectIds": { "$size": "$projectIds" }}}
])

Get Last Date within Range for Each Id Group

Let say I have a collection with the following item:
[{myId:0,date:01.01.17,data:1000},
{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000},
{myId:1,date:01.04.17,data:4000},
{myId:0,date:01.05.17,data:5000}]
I want to create a query that get a date as a parameter and return an array with single object for evrey myId that have the maximum date bellow the requested one.
For example calling the query with 15.03.17 date return:
[{myId:1,date:01.02.17,data:2000},
{myId:0,date:01.03.17,data:3000}]
And calling query with 15.01.17 date return
[{myId:0,date:01.01.17,data:1000}]
I'm looking for an answer that doesn't use db.eval
Fixing your data to make it valid:
db.junk.insertMany([
{myId:0,date: new Date("2017-01-01"),data:1000},
{myId:1,date: new Date("2017-02-01"),data:2000},
{myId:0,date: new Date("2017-03-01"),data:3000},
{myId:1,date: new Date("2017-04-01"),data:4000},
{myId:0,date: new Date("2017-05-01"),data:5000}
])
You run an aggregate statement, filtering the entries via $match, then applying $sort to ensure the order and using $last for the "max" on each grouping boundary:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 1.0,
"date" : ISODate("2017-02-01T00:00:00.000Z"),
"data" : 2000.0
}
/* 2 */
{
"_id" : 0.0,
"date" : ISODate("2017-03-01T00:00:00.000Z"),
"data" : 3000.0
}
And for the other date:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-01-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}}
])
Returns:
/* 1 */
{
"_id" : 0.0,
"date" : ISODate("2017-01-01T00:00:00.000Z"),
"data" : 1000.0
}
If you really must you can add a $sort as the final pipeline stage in order to ensure the order of _id ( myId value ) returned:
db.junk.aggregate([
{ "$match": { "date": { "$lte": new Date("2017-03-15") } } },
{ "$sort": { "date": 1 } },
{ "$group": {
"_id": "$myId",
"date": { "$last": "$date" },
"data": { "$last": "$data" }
}},
{ "$sort": { "_id": 1 } }
])

$match and $cond query doesn't give the same result in MongoDB aggregate

Considering this two different MongoDB queries:
startDate query in $match
db.myCollection.aggregate([{
"$match": {
"code": "2",
"startDate": {
"$lt": ISODate("2017-01-31T23:59:59.999Z")
},
}
},
{
"$group": {
"_id": {
"code": "$code"
},
"count": {
"$sum": 1
}
}
}
]);
Result:
{
"_id" : {
"code" : "2"
},
"count" : 4844.0
}
startDate query in $cond
db.myCollection.aggregate([{
"$match": {
"code": "2"
}
},
{
"$project": {
"code": "$code",
"count": {
"$cond": [{
"$lt": ["$startDate", ISODate('2017-01-31T23:59:59.999Z')]
}, 1, 0]
}
}
}
}, {
"$group": {
"_id": {
"code": "$code"
},
"count": {
"$sum": "$count"
}
}
}])
Result:
{
"_id" : {
"code" : "2"
},
"count" : 4935.0
}
I don't understand why the second query give me more documents. It seems to me that this queries must give me a identical result... Am I using the "$cond" in a wrong way? What may causes this difference?

MongoDB Get average of group considering rank of document

I have documents getting in order like:
{
"_id": "abcde1",
"value" : 300
},
{
"_id": "abcde2",
"value" : 200
},
{
"_id": "abcde3",
"value" : 400
},
{
"_id": "abcde4",
"value" : 500
},
{
"_id": "abcde5",
"value" : 600
}
i.e,
I want average of "_id" of first 2, first 4 and all 5 documents matching like in single query:
{
"value_2" : 250, // Average of first 2 documents
"value_4" : 350, // Average of first four documents
"value_5" : 400 // Average of all 5 documents
}
Is it possible to Group documents based on rank of document.
I can do 3 results in 3 separate queries. Is it possible in single query?
You could try running the following pipeline:
db.collection.aggregate([
// previous pipeline here
{
"$group": {
"_id": null,
"values": { "$push": "$value" }
}
},
{ "$unwind": { "path": "$values", "includeArrayIndex": "rank" } },
{
"$group": {
"_id": null,
"value_2_sum": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 2] },
"$values",
0
]
}
},
"value_2_count": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 2] },
1,
0
]
}
},
"value_4_sum": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 4] },
"$values",
0
]
}
},
"value_4_count": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 4] },
1,
0
]
}
},
"value_5": { "$avg": "$values" }
}
},
{
"$project": {
"value_2" : { "$divide": ["$value_2_sum", "$value_2_count"] }, // Average of first 2 documents
"value_4" : { "$divide": ["$value_4_sum", "$value_4_count"] }, // Average of first four documents
"value_5" : 1
}
}
])
You could use a $facet aggregation stage:
// { _id: "abcde1", value: 300 }
// { _id: "abcde2", value: 200 }
// { _id: "abcde3", value: 400 }
// { _id: "abcde4", value: 500 }
// { _id: "abcde5", value: 600 }
db.collection.aggregate([
{ $facet: {
value_2: [ { $limit: 2 }, { $group: { _id: null, value_2: { $avg: "$value" } } } ],
value_4: [ { $limit: 4 }, { $group: { _id: null, value_4: { $avg: "$value" } } } ],
value_5: [ { $limit: 5 }, { $group: { _id: null, value_5: { $avg: "$value" } } } ]
}},
// {
// value_2: [ { _id: null, value_2: 250 } ],
// value_4: [ { _id: null, value_4: 350 } ],
// value_5: [ { _id: null, value_5: 400 } ]
// }
{ $set: {
value_2: { $first: "$value_2.value_2" },
value_4: { $first: "$value_4.value_4" },
value_5: { $first: "$value_5.value_5" }
}}
])
// { "value_2" : 250, "value_4" : 350, "value_5" : 400 }
The $facet stage allows us to run multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
Each field is thus produced by its own aggregation pipeline whose first stage is a simple $limit, followed by a $group stage that'll produce the $avg (average) of all considered documents.
The second part of the pipeline (the $set stage) is just there to clean-up the $facet output to the format you wished for.