MongoDB Aggregation based on userID and time period - mongodb

I would like to achieve something like
{ _id: "A", count: 2 }
{ _id: "B", count: 1 }
from
{ userId: "A", timeStamp: "12:30PM" } <- start of 5 min interval A: 1
{ userId: "B", timeStamp: "12:30PM" } <- start of 5 min interval B: 1
{ userId: "B", timeStamp: "12:31PM" } <- ignored
{ userId: "A", timeStamp: "12:32PM" } <- ignored
{ userId: "B", timeStamp: "12:33PM" } <- ignored
{ userId: "A", timeStamp: "12:37PM" } <- start of next 5 min A : 2
where it groups based on userId and then after userId is group, the count is triggered every 5 mins.
For example: Within any 5 min period, starting at say midnight, an unlimited number of collections can have a timeStamp from 00:00 to 00:05 but would only be counted as 1 hit.
Hopefully I am explaining this clearly.
I'm able to group by userId and get the count in general but setting a condition of the count seems to be tricky.

You can try $bucket and $addToSet - the drawback is that you have to specify all the ranges manually:
db.col.aggregate([
{
$bucket: {
groupBy: "$timeStamp",
boundaries: [ "12:30PM", "12:35PM", "12:40PM", "12:45PM", "12:50PM", "12:55PM", "13:00PM" ],
output: {
"users" : { $addToSet: "$userId" }
}
}
},
{
$unwind: "$users"
},
{
$group: { _id: "$users", count: { $sum: 1 } }
}
])

Micki's solution is better if you have mongo 3.6.
If you have mongo 3.4 you can use $switch.
Obviously you would need to add all the cases in the day.
db.getCollection('user_timestamps').aggregate(
{
$group: {
_id: '$userId',
timeStamp: {$push: '$timeStamp'}
}
},
{
$project: {
timeStamps: {
$map: {
input: '$timeStamp',
as: 'timeStamp',
in: {
$switch: {
branches: [
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
then: 1
},
{
case: {
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
then: 2
}
],
default: 0
}
}
}
}
}
},
{
$unwind: '$timeStamps'
},
{
$group: {
_id: '$_id',
count: {
$addToSet: '$timeStamps'
}
}
},
{
$project: {
_id: true,
count: {$size: '$count'}
}
}
)
If you don't have mongo 3.4 you can replace the $switch with
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:30PM']},
{$lt: ['$$timeStamp', '12:35PM']}
]
},
1,
{
cond: [
{
$and: [
{$gte: ['$$timeStamp', '12:35PM']},
{$lt: ['$$timeStamp', '12:40PM']}
]
},
2,
0
]
}
]

Related

Mongodb array values subtract then sum

I have a collection contains 2 statuses of orders "Shipped" and "Delivered". I want to calculate the average in hours
Formula
(Delivered 1 - Shipped 1) + (Delivered 2 - Shipped 2) + (Delivered N - Shipped N)/N
here is my collection
{
trackingHistory: [
{
status: 'Shipped',
time: ISODate("2022-11-22T06:30:49.000Z")
},
{
status: 'Delivered',
time: ISODate("2022-11-25T15:30:00.000Z")
}
]
},
{
trackingHistory: [
{
status: 'Shipped',
time: ISODate("2022-11-22T09:29:45.000Z")
},
{
status: 'Delivered',
time: ISODate("2022-11-23T19:26:00.000Z")
}
]
}
here is my code
db.client_order_news.aggregate([
{ $match : {
receiverCity : 'New York',
created_at:{$gte:ISODate("2022-11-01T00:00:00.398Z"),$lt:ISODate("2022-11-30T23:59:59.398Z")},
"trackingHistory. status":"Shipped",
"trackingHistory.status":"Delivered"
} },
{ $project : { _id : 0, trackingHistory : {$filter: {
input: '$trackingHistory',
as: 'tracking',
cond: {$or: [{ $eq: ['$$tracking.status', "Shipped"] }, { $eq: ['$$tracking.status',"Delivered"] }]}
}}, } },
{$project: { "$sum": ["$price", { "$subtract": ["$deposits.amount"] } ] }}
]).pretty()
If we can assume Delivered has always newer timestamp than Shipped, one option is to use a simple $dateDiff with a $group step:
db.collection.aggregate([
{$project: {trackingHistory: "$trackingHistory.time", _id: 0}},
{$group: {
_id: 0,
timeDiff: {
$push: {
$abs: {
$dateDiff: {
startDate: {$first: "$trackingHistory"},
endDate: {$last: "$trackingHistory"},
unit: "hour"
}
}
}
}
}
},
{$project: {averageHour: {$avg: "$timeDiff"}, _id: 0}}
])
See how it works on the playground example

MongoDB - Query calculation and group multiple items

Let's say I have this data:
{"Plane":"5546","Time":"55.0", City:"LA"}
{"Plane":"5548","Time":"25.0", City:"CA"}
{"Plane":"5546","Time":"6.0", City:"LA"}
{"Plane":"5548","Time":"5.0", City:"CA"}
{"Plane":"5555","Time":"15.0", City:"XA"}
{"Plane":"5555","Time":"8.0", City:"XA"}
and more but I just visualize the data
I want to calculate and group all the time and plane, this is expected output:
{"_id:":["5546","LA"],"Sum":2,"LateRate":1,"Prob"0.5}
The sum is sum all the time, Late is sum all the time with time > "15" and Prob is Late/Sum
The code I have tried but it still is missing something:
db.Collection.aggregate([
{
$project: {
Sum: 1,
Late: {
$cond: [{ $gt: ["$Time", 15.0] }, 1, 0]
},
prob:1
}
},
{
$group:{
_id:{Plane:"$Plane", City:"$City"},
Sum: {$sum:1},
Late: {$sum: "$Late"}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
},
])
db.collection.aggregate([
{
$project: {
Time: 1,
Late: {
$cond: [
{
$gt: [
{
$toDouble: "$Time"
},
15.0
]
},
"$Time",
0
]
},
prob: 1,
Plane: 1,
City: 1
}
},
{
$group: {
_id: {
Plane: "$Plane",
City: "$City"
},
Sum: {
$sum: {
"$toDouble": "$Time"
}
},
Late: {
$sum: {
$toDouble: "$Late"
}
}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
}
])
Project limits the fields passed to the next stage
On string, you cannot perform all relational/arithmetic operations
Playground

MongdDB: Combining query results of two collections as one

There are two collections (view and click) like following:
# View collection
_id publisher_id created_at
617f8ea98e0f54f05e10e796 1 2021-11-01T00:00:00.000Z
617f8eab8e0f54f05e10e798 1 2021-11-01T00:00:00.000Z
617f8eac8e0f54f05e10e79a 1 2021-11-01T00:00:00.000Z
617f90cea187d30ebbecdee9 2 2021-11-01T00:00:00.000Z
# Click collection
_id publisher_id created_at
617f8ea98e0f54f05e10e796 1 2021-11-01T00:00:00.000Z
617f8eab8e0f54f05e10e798 2 2021-11-01T00:00:00.000Z
How can I get the following expected results with one query?
(or)
What is the best way for the following expected results?
# Expected For Publisher ID(1)
_id view_count click_count
2021/11/1 3 1
# Expected For Publisher ID(2)
_id view_count click_count
2021/11/1 1 1
Currently, I am using 2 queries for both collections and combining results as one in code.
For View
db.view.aggregate([
/*FirstStage*/
{
$match:
{
"$and":
[
{
"publisher_id": 1
},
{
"created_at": {$gte: new ISODate("2021-11-01"), $lt: new ISODate("2021-11-28")}
}
]
}
},
/*SecondStage*/
{
$group:
{
_id: {$dateToString: {format: '%Y/%m/%d', date: "$created_at"}},
count: {
$sum: 1
}
}
}
])
For Click
db.click.aggregate([
/*FirstStage*/
{
$match:
{
"$and":
[
{
"publisher_id": 1
},
{
"created_at": {$gte: new ISODate("2021-11-01"), $lt: new ISODate("2021-11-28")}
}
]
}
},
/*SecondStage*/
{
$group:
{
_id: {$dateToString: {format: '%Y/%m/%d', date: "$created_at"}},
count: {
$sum: 1
}
}
}
])
Because you are querying two different collections there is no "good" way to merge this into one query, the only way I can think of is using $facet, where the first stage is the "normal" one, and the other stage starts with a $lookup from the other collection.
This approach does add overhead, which is why I recommend to just keep doing the merge in code, however for the sake of answering here is a sample:
db.view.aggregate([
{
$facet: {
views: [
{
$match: {
"$and": [
{
"publisher_id": 1
},
{
"created_at": {
$gte: ISODate("2021-11-01"),
$lt: ISODate("2021-11-28")
}
}
]
}
},
],
clicks: [
{
$limit: 1
},
{
$lookup: {
from: "click",
let: {},
pipeline: [
{
$match: {
"$and": [
{
"publisher_id": 1
},
{
"created_at": {
$gte: ISODate("2021-11-01"),
$lt: ISODate("2021-11-28")
}
}
]
}
},
],
as: "clicks"
}
},
{
$unwind: "$clicks"
},
{
$replaceRoot: {
newRoot: "$clicks"
}
}
]
}
},
{
$project: {
merged: {
"$concatArrays": [
"$views",
"$clicks"
]
}
}
},
{
$unwind: "$merged"
},
{
$group: {
_id: {
$dateToString: {
format: "%Y/%m/%d",
date: "$merged.created_at"
}
},
count: {
$sum: 1
}
}
}
])
Mongo Playground

MongoDB aggregation: count appearances of each value of a field per id

Data example:
{ id: 1, field: a, .. }
{ id: 1, field: a, .. }
{ id: 1, field: b, .. }
{ id: 2, field: b, .. }
Desired result:
{ id: 1, countA: 2, countB: 1 }
{ id :2, countA:0, countB: 1 }
'field' is an enum, so I know all the values in advance and can give names to the counters.
I have a solution but it seems that there is a better one. My solution:
db.collection.aggregate([
{ $group: { _id: { id: "$id", field: "$field"}, count: { $sum : 1}}},
{ $project: {
_id: 1,
countA: { $cond: { if: { $eq: ["$_id.field", "a"] }, then: "$count", else: 0 }},
countB: { $cond: { if: { $eq: ["$_id.field", "b"] }, then: "$count", else: 0 }}
}
},
{ $group:
{_id: "$_id.id", countA: { $max: "$countA"}, countB: { $max :"$countB"}}
}
])
upd: I have a better solution - placing the project before grouping in some way and no need for 2 groupings, but it uses the same principle. But it still seems that there should be somehting more built-in for this purpose
Thanks!
I believe you just need one $group stage to acheive what you want.
Just use $sum to count the number of fields with value a and B with $cond.
Try this:
db.collection.aggregate([
{
$group: {
_id: "$id",
id: {
$first: "$id"
},
countA: {
$sum: {
$cond: {
if: { $eq: [ "$field","a"] },
then: 1,
else: 0
}
}
},
countB: {
$sum: {
$cond: {
if: { $eq: [ "$field", "b"] },
then: 1,
else: 0
}
}
}
}
}
])
Have a look at this Mongo Playground for working demo of the query.
I hope this is what you are looking for!

Mongo aggregation query to calculate multiple computed values

Given the below documents.
{
_id: 1,
ExpirationDate: ISODate("2017-05-02T09:29:46.006+0000")
}
{
_id: 2,
ExpirationDate: ISODate("2017-05-12T09:29:46.006+0000")
}
{
_id: 3,
ExpirationDate: ISODate("2017-05-23T09:29:46.006+0000")
}
How can I use aggregation pipleline to compute the following output?
{
"NumberOfSubscriptionExpiringToday": 12,
"NumberOfSubscriptionExpiringWithInAWeek": 4
}
I am looking to accomplish this with just one query instead of two. Here is what I have so far...
.aggregate([
{
$match: {
"ExpirationDate": {
$gte: ISODate("2017-05-02T00:00:00.000+0000"),
$lte: ISODate("2017-05-03T00:00:00.000+0000")
}
}
},
{
$project: {
_id: 1
}
},
{
$count: "ExpiringToday"
}
]);
.aggregate([
{
$match: {
"ExpirationDate": {
$gte: ISODate("2017-05-02T00:00:00.000+0000"),
$lte: ISODate("2017-05-08T00:00:00.000+0000")
}
}
},
{
$project: {
_id: 1
}
},
{
$count: "ExpiringInSevenDays"
}
]);
You can do it in single aggregation query with $cond operator to check if each document expiration date falls into [today, tomorrow) range, or in [tomorrow, weekAfterToday) range:
var today = ISODate("2017-05-04T00:00:00.000");
var tomorrow = ISODate("2017-05-05T00:00:00.000");
var weekAfterToday = ISODate("2017-05-11T00:00:00.000");
db.collection.aggregate([
{ $match: { "ExpirationDate": { $gte: today, $lt: weekAfterToday }}},
{
$project: {
ExpiringToday: {
$cond: {
if: {
$and: [
{$gte: ["$ExpirationDate",today]},
{$lt:["$ExpirationDate",tomorrow]}
]
}, then: 1, else: 0
}
},
ExpiringInAWeek: {
$cond: { if: {$gte: ["$ExpirationDate",tomorrow]}, then: 1, else: 0 }
}
}
},
{ $group: {
_id: 1,
NumberOfSubscriptionExpiringToday: {$sum: "$ExpiringToday" },
NumberOfSubscriptionExpiringWithInAWeek: {$sum: "$ExpiringInAWeek" }
}
},
{ $project: { _id: 0 }}
]);
Consider also to make two simple requests:
var numberOfSubscriptionExpiringToday = db.collection.count(
{ "ExpirationDate": { $gte: today, $lt: tomorrow }}
);
var numberOfSubscriptionExpiringWithInAWeek = db.collection.count(
{ "ExpirationDate": { $gte: tomorrow , $lt: weekAfterToday }}
);