Mongodb array values subtract then sum - mongodb

I have a collection contains 2 statuses of orders "Shipped" and "Delivered". I want to calculate the average in hours
Formula
(Delivered 1 - Shipped 1) + (Delivered 2 - Shipped 2) + (Delivered N - Shipped N)/N
here is my collection
{
trackingHistory: [
{
status: 'Shipped',
time: ISODate("2022-11-22T06:30:49.000Z")
},
{
status: 'Delivered',
time: ISODate("2022-11-25T15:30:00.000Z")
}
]
},
{
trackingHistory: [
{
status: 'Shipped',
time: ISODate("2022-11-22T09:29:45.000Z")
},
{
status: 'Delivered',
time: ISODate("2022-11-23T19:26:00.000Z")
}
]
}
here is my code
db.client_order_news.aggregate([
{ $match : {
receiverCity : 'New York',
created_at:{$gte:ISODate("2022-11-01T00:00:00.398Z"),$lt:ISODate("2022-11-30T23:59:59.398Z")},
"trackingHistory. status":"Shipped",
"trackingHistory.status":"Delivered"
} },
{ $project : { _id : 0, trackingHistory : {$filter: {
input: '$trackingHistory',
as: 'tracking',
cond: {$or: [{ $eq: ['$$tracking.status', "Shipped"] }, { $eq: ['$$tracking.status',"Delivered"] }]}
}}, } },
{$project: { "$sum": ["$price", { "$subtract": ["$deposits.amount"] } ] }}
]).pretty()

If we can assume Delivered has always newer timestamp than Shipped, one option is to use a simple $dateDiff with a $group step:
db.collection.aggregate([
{$project: {trackingHistory: "$trackingHistory.time", _id: 0}},
{$group: {
_id: 0,
timeDiff: {
$push: {
$abs: {
$dateDiff: {
startDate: {$first: "$trackingHistory"},
endDate: {$last: "$trackingHistory"},
unit: "hour"
}
}
}
}
}
},
{$project: {averageHour: {$avg: "$timeDiff"}, _id: 0}}
])
See how it works on the playground example

Related

Fill data with NULL value if it is not present in the timeperiod using mongodb aggregation pipeline

I have to write an aggreagtion pipeline in which I will pass:
Timestamps of start date and end data for a day
I have to divide the data into 30min buckets and find data in between that buckets like:
2023-01-16T00:30:00.000+00:00 , 2023-01-16T01:00:00.000+00:00, 2023-01-16T01:30:00.000+00:00 and so on.
If data is not present in any particular bucket fill the values of that bucketa with zero but give the timestamp like:
2023-01-16T01:00:00.000+00:00 ther is no data give {timestamp:2023-01-16T01:00:00.000+00:00,a:0,b:0,c:0}
I have done the following:
[{
$match: {
$and: [
{
timestamp: {
$gte: ISODate('2023-01-16T00:00:00.000Z'),
$lt: ISODate('2023-01-16T23:59:59.000Z')
}
}
]
}
}, {
$group: {
_id: {
$toDate: {
$subtract: [
{
$toLong: '$timestamp'
},
{
$mod: [
{
$toLong: '$timestamp'
},
1800000
]
}
]
}
},
in: {
$sum: '$a'
},
out: {
$sum: '$b'
},
Count: {
$sum: 1
}
}
}, {
$addFields: {
totalIn: {
$add: [
'$in',
'$out'
]
},{
$sort: {
_id: 1
}
}]
Result is:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T01:30:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
expected result:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T12:30:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T01:00:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
One option is to use $range with $dateAdd:
db.collection.aggregate([
{$match: {timestamp: {
$gte: startDate,
$lt: endDate
}}},
{$group: {
_id: {$dateTrunc: {date: "$timestamp", unit: "minute", binSize: 30}},
in: {$sum: "$a"},
out: {$sum: "$b"},
count: {$sum: 1}
}},
{$group: {
_id: 0,
data: {$push: {
timestamp: "$_id",
totalIn: {$add: ["$in", "$out"]},
count: "$count"
}}
}},
{$project: {
_id: 0, data: 1,
bins: {$map: {
input: {$range: [
0,
{$multiply: [
{$dateDiff: {
startDate: startDate,
endDate: endDate,
unit: "hour"
}},
2
]}
]},
in: {$dateAdd: {
startDate: startDate,
unit: "minute",
amount: {$multiply: ["$$this", 30]}
}}
}}
}},
{$unwind: "$bins"},
{$set: {data: {$filter: {
input: "$data",
cond: {$eq: ["$bins", "$$this.timestamp"]}
}}}},
{$project: {
_id: "$bins",
count: {$ifNull: [{$first: "$data.count"}, 0]},
totalIn: {$ifNull: [{$first: "$data.totalIn"}, 0]}
}}
])
See how it works on the playground example

MongoDB - get datewise/houlty aggregate count of column

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference
[
{
"_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
"status":"INTERNAL_UPDATE",
"conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
"messageIds":[],
"messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
"responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-04T11:40:06.552Z",
"source":{}
},
{
"_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
"status":"NEW",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
"messageIds":[],
"messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
"responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:30.897Z",
"source":{}
},
{
"_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
"status":"INTERNAL_UPDATE",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
"messageIds":[],
"messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
"responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:31.418Z",
"source":{}
},
{
"_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
"status":"NEW",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
"responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.174Z",
"source":{}
},
{
"_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
"status":"INTERNAL_UPDATE",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
"responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.736Z",
"source":{}
},
{
"_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
"status":"NEW",
"conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
"messageIds":[],
"messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
"responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T13:37:55.060Z",
"source":{}
}
]
Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.
Below is the query for same
db.documentName.aggregate([
{
'$match': {
'$and': [
{
timestamp: {
'$gte': ISODate('2021-05-01T00:00:00.000Z'),
'$lte': ISODate('2021-05-10T23:59:59.999Z')
}
},
{ 'source.author': { '$regex': 'user', '$options': 'i' } },
{},
{}
]
}
},
{ '$group': {
_id: {'conversationId': '$conversationId'} },
{ '$count': 'document_count' }
])
I have tried adding something like, $hour: '$timestamp' with comma separation beside conversationId in $group, but its of no use and is giving error.
The desired result I am trying to get for above data is, something like this
[{"date": "2021-05-04", "doc_count": 1},
{"date": "2021-05-05", "doc_count": 2},
{"date": "2021-05-06", "doc_count": 2}]
As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.
The question is not entirely clear to me, but it sounds like you want something like this:
The groupId is a field to rebuild the date including the hour, or not, according to your condition:
EDIT:
db.collection.aggregate([
{$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {$year: "$timestamp"},
month: {$month: "$timestamp"},
day: {$dayOfMonth: "$timestamp"},
hour: {$cond: [
{$gte: [
{$dateDiff: {
startDate: ISODate("2021-05-01T00:00:00.000Z"),
endDate: ISODate("2021-05-07T23:59:59.999Z"),
unit: "day"}}, 1]},
0,
{$hour: "$timestamp"}]}
}
}
}
},
{$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
{$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
{$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
])
See how it works on the playground example
As suggested by #nimrodserok, for mongo version 4.2.9 the query would be
db.collection.aggregate([
{
$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{
$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {
$year: "$timestamp"
},
month: {
$month: "$timestamp"
},
day: {
$dayOfMonth: "$timestamp"
},
hour: {
$cond: [
{
$gte: [
{
$subtract: [
{
$toLong: ISODate("2021-05-07T23:59:59.999Z")
},
{
$toLong: ISODate("2021-05-01T00:00:00.000Z")
}
]
},
86400000
]
},
0,
{
$hour: "$timestamp"
}
]
}
}
}
}
},
{
$group: {
_id: {
conversationId: "$conversationId",
groupId: "$groupId"
}
}
},
{
$group: {
_id: "$_id.groupId",
doc_count: {
$sum: 1
}
}
},
{
$project: {
date: {
$toString: "$_id"
},
doc_count: 1,
_id: 0
}
}
])

MongoDB - Query calculation and group multiple items

Let's say I have this data:
{"Plane":"5546","Time":"55.0", City:"LA"}
{"Plane":"5548","Time":"25.0", City:"CA"}
{"Plane":"5546","Time":"6.0", City:"LA"}
{"Plane":"5548","Time":"5.0", City:"CA"}
{"Plane":"5555","Time":"15.0", City:"XA"}
{"Plane":"5555","Time":"8.0", City:"XA"}
and more but I just visualize the data
I want to calculate and group all the time and plane, this is expected output:
{"_id:":["5546","LA"],"Sum":2,"LateRate":1,"Prob"0.5}
The sum is sum all the time, Late is sum all the time with time > "15" and Prob is Late/Sum
The code I have tried but it still is missing something:
db.Collection.aggregate([
{
$project: {
Sum: 1,
Late: {
$cond: [{ $gt: ["$Time", 15.0] }, 1, 0]
},
prob:1
}
},
{
$group:{
_id:{Plane:"$Plane", City:"$City"},
Sum: {$sum:1},
Late: {$sum: "$Late"}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
},
])
db.collection.aggregate([
{
$project: {
Time: 1,
Late: {
$cond: [
{
$gt: [
{
$toDouble: "$Time"
},
15.0
]
},
"$Time",
0
]
},
prob: 1,
Plane: 1,
City: 1
}
},
{
$group: {
_id: {
Plane: "$Plane",
City: "$City"
},
Sum: {
$sum: {
"$toDouble": "$Time"
}
},
Late: {
$sum: {
$toDouble: "$Late"
}
}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
}
])
Project limits the fields passed to the next stage
On string, you cannot perform all relational/arithmetic operations
Playground

MongoDB Aggregation based on userID and time period

I would like to achieve something like
{ _id: "A", count: 2 }
{ _id: "B", count: 1 }
from
{ userId: "A", timeStamp: "12:30PM" } <- start of 5 min interval A: 1
{ userId: "B", timeStamp: "12:30PM" } <- start of 5 min interval B: 1
{ userId: "B", timeStamp: "12:31PM" } <- ignored
{ userId: "A", timeStamp: "12:32PM" } <- ignored
{ userId: "B", timeStamp: "12:33PM" } <- ignored
{ userId: "A", timeStamp: "12:37PM" } <- start of next 5 min A : 2
where it groups based on userId and then after userId is group, the count is triggered every 5 mins.
For example: Within any 5 min period, starting at say midnight, an unlimited number of collections can have a timeStamp from 00:00 to 00:05 but would only be counted as 1 hit.
Hopefully I am explaining this clearly.
I'm able to group by userId and get the count in general but setting a condition of the count seems to be tricky.
You can try $bucket and $addToSet - the drawback is that you have to specify all the ranges manually:
db.col.aggregate([
{
$bucket: {
groupBy: "$timeStamp",
boundaries: [ "12:30PM", "12:35PM", "12:40PM", "12:45PM", "12:50PM", "12:55PM", "13:00PM" ],
output: {
"users" : { $addToSet: "$userId" }
}
}
},
{
$unwind: "$users"
},
{
$group: { _id: "$users", count: { $sum: 1 } }
}
])
Micki's solution is better if you have mongo 3.6.
If you have mongo 3.4 you can use $switch.
Obviously you would need to add all the cases in the day.
db.getCollection('user_timestamps').aggregate(
{
$group: {
_id: '$userId',
timeStamp: {$push: '$timeStamp'}
}
},
{
$project: {
timeStamps: {
$map: {
input: '$timeStamp',
as: 'timeStamp',
in: {
$switch: {
branches: [
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
then: 1
},
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
then: 2
}
],
default: 0
}
}
}
}
}
},
{
$unwind: '$timeStamps'
},
{
$group: {
_id: '$_id',
count: {
$addToSet: '$timeStamps'
}
}
},
{
$project: {
_id: true,
count: {$size: '$count'}
}
}
)
If you don't have mongo 3.4 you can replace the $switch with
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
1,
{
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
2,
0
]
}
]

Mongodb Aggregation count array/set size

Here's my problem:
Model:
{ application: "abc", date: Time.now, status: "1" user_id: [ id1, id2,
id4] }
{ application: "abc", date: Time.yesterday, status: "1", user_id: [
id1, id3, id5] }
{ application: "abc", date: Time.yesterday-1, status: "1", user_id: [
id1, id3, id5] }
I need to count the unique number of user_ids in a period of time.
Expected result:
{ application: "abc", status: "1", unique_id_count: 5 }
I'm currently using the aggregation framework and counting the ids outside mongodb.
{ $match: { application: "abc" } }, { $unwind: "$users" }, { $group:
{ _id: { status: "$status"},
users: { $addToSet: "$users" } } }
My arrays of users ids are very large, so I have to iterate the dates or I'll get the maximum document limit (16mb).
I could also $group by
{ year: { $year: "$date" }, month: { $month: "$date" }, day: {
$dayOfMonth: "$date" }
but I also get the document size limitation.
Is it possible to count the set size in mongodb?
thanks
The following will return number of uniqueUsers per application. This will apply an group operation to a result of a group operation by using pipeline feature of mongodb.
{ $match: { application: "abc" } },
{ $unwind: "$users" },
{ $group: { _id: "$status", users: { $addToSet: "$users" } } },
{ $unwind:"$users" },
{ $group : {_id : "$_id", count : {$sum : 1} } }
Hopefully this will be done in an easier way in the following releases of mongo by a command which gives the size of an array under a projection. {$project: {id: "$_id", count: {$size: "$uniqueUsers"}}}
https://jira.mongodb.org/browse/SERVER-4899
Cheers
Sorry I'm a little late to the party. Simply grouping on the 'user_id' and counting the result with a trivial group works just fine and doesn't run into doc size limits.
[
{$match: {application: 'abc', date: {$gte: startDate, $lte: endDate}}},
{$unwind: '$user_id'},
{$group: {_id: '$user_id'}},
{$group: {_id: 'singleton', count: {$sum: 1}}}
];
Use $size to get the size of set.
[
{
$match: {"application": "abc"}
},
{
$unwind: "$user_id"
},
{
$group: {
"_id": "$status",
"application": "$application",
"unique_user_id": {$addToSet: "$user_id"}
}
},
{
$project:{
"_id": "$_id",
"application": "$application",
"count": {$size: "$unique_user_id"}
}
}
]