How can I generate report from collection on daily, weekly and monthly basis MongoDB? - mongodb

This is the structure of my collection
{"_id":{
"$oid":"61a5f45e7556f5670e50bd25"
},
"agent_id":"05046630001",
"c_id":null,
"agentName":"Testing",
"agent_intercom_id":"4554",
"campaign":[
"Campaig227"
],
"first_login_time":"28-12-2021 10":"55":42 AM,
"last_logout_time":"21-01-2022 2":"20":10 PM,
"parent_id":4663,
"total_call":2,
"outbound_call":1,
"iinbound_call":1,
"average_call_handling_time":56,
"logged_in_duration":2,
"total_in_call_time":30,
"total_break_duration":10,
"total_ring_time":2,
"available_time":40,
"ideal_time":0,
"occupancy":0,
"inbound_calls_missed":0,
"created_at":{
"$date":"2021-11-29T18:30:00.000Z"
}
}
I want to generate monthly result like this:
Agent
Campaign
Total call
Outgoing
Incoming
Average Call
Total Time
Idle Time
Agent 1
Campaig227
148
38
62
12:00:18
12:46:45
0:23:57
Agent 2
Campaig227
120
58
62
16:00:18
16:46:45
0:23:57
and daily report like:
Agent
Date
Campaign
Total call
Outgoing
Incoming
Average Call
Total Time
Idle Time
Agent 1
1/1/22
Campaig2
14
10
4
4:00:18
4:46:45
0:46:26
Agent 1
2/1/22
Campaig2
24
15
9
10:00:18
9:46:45
0:15:26
Agent 2
1/1/22
Campaig1
16
10
6
4:00:18
4:46:45
0:46:26
Agent 2
2/1/22
Campaig1
30
15
15
10:00:18
9:46:45
0:15:26
Please note that this is only sample data; the actual figure is different.
I tried to do this using aggregate and Pipeline but as I am new to MongoDB so find difficulty in generating query.

On proposal would be this one:
db.collection.aggregate([
{
$group: {
_id: {
agent_id: "$agent_id",
campaign: "$campaign",
date: {
$dateTrunc: {
date: "$created_at",
unit: "week",
timezone: "Europe/Zurich",
startOfWeek: "monday"
}
}
},
"Total call": { $sum: "$total_in_call_time" },
Outgoing: { $sum: "$outbound_call" },
Incoming: { $sum: "$iinbound_call" },
"Average Call": { $avg: "$total_in_call_time" },
"Total Time": { $sum: "$total_call" },
"Idle Time": { $sum: "$ideal_time" }
}
},
{
$set: {
"Average Call": { $dateToString: { date: { $toDate: { $multiply: ["$Average Call", 1000] } }, format: "%H:%M:%S" } },
"Total Time": { $dateToString: { date: { $toDate: { $multiply: ["$Total Time", 1000] } }, format: "%H:%M:%S" } },
"Idle Time": { $dateToString: { date: { $toDate: { $multiply: ["$Idle Time", 1000] } }, format: "%H:%M:%S" } }
}
},
{ $replaceWith: { $mergeObjects: ["$_id", "$$ROOT"] } },
{ $unset: "_id" }
])
Note, $dateToString: {format: "%H:%M:%S"} works for periods up to 24 hours.
Mongo Playground

Related

Grouping MongoDB Data by Ignition Field Values: A Solution for Large Datasets

I want to group the data based on the values of the "ignition" field. If the "ignition" value is 1, all records with the value 1 should be grouped together until the next value of 0 is encountered, and so on.
I have 86400 records in MongoDB, and I want to query the data to achieve the desired output.
The data looks like this:
[
{
ignition: 1,
time: 112
},
{
ignition: 1,
time: 193
},
{
ignition: 0,
time: 115
},
{
ignition: 1,
time: 116
},
{
ignition: 1,
time: 117
},
{
ignition: 1,
time: 118
},
{
ignition: 0,
time: 119
},
{
ignition: 1,
time: 120
},
{
ignition: 1,
time: 121
},
{
ignition: 1,
time: 122
},
{
ignition: 0,
time: 123
},
]
I want the output like this:
{
time: [112,193],
time: [116,117,118],
time: [120,121,122]
}
db.collection.aggregate([
{
$setWindowFields: { //6. the output of this stage is, each set of adjacent documents having same $ignition will have a unique groupNum
partitionBy: null,
sortBy: {time: 1}, //4. from all documents sorted by $time
output: {
"groupNum": { //1. create a new field groupNum
$sum: { //2. by cumulatively adding
$cond: [
{$eq: ["$ignition",1]}, 0, 1 //3. modified $ignition field
]
},
window: {
documents: ["unbounded","current"] //5. starting from the beginning to current document
}
}
}
}
},
{
$match: {"ignition": 1} //7. retain $ignition : 1
},
{
$group: {
_id: "$groupNum", //8. group by groupNum
time: {$push: "$time"} //9. pushing the time to an array
}
},
{
$sort: {_id: 1} //10.sort as necessary
}
])
Demo

Performance of mongo request for rain/sunshine/raindays on weekends

I want to know:
sum of rain (mm)
sum of sunshine (hours)
Probability (%) of a rainday with more than 0.5mm rain on weekends
On the weekends (sa+so) between week 20 to 40 for the last 17years.
I have 820k documents in 10min periods.
The request took sometimes 38sec but sometimes more than 1min.
Do you have an Idea how to improve performance?
data-Model:
[
'datum',
'regen',
'tempAussen',
'sonnenSchein',
and more...
]
schema:
[
{
$project: {
jahr: {
$year: {
date: '$datum',
timezone: 'Europe/Berlin',
},
},
woche: {
$week: {
date: '$datum',
timezone: 'Europe/Berlin',
},
},
day: {
$isoDayOfWeek: {
date: '$datum',
timezone: 'Europe/Berlin',
},
},
stunde: {
$hour: {
date: '$datum',
timezone: 'Europe/Berlin',
},
},
tagjahr: {
$dayOfYear: {
date: '$datum',
timezone: 'Europe/Berlin',
},
},
tempAussen: 1,
regen: 1,
sonnenSchein: 1,
},
},
{
$match: {
$and: [
{
woche: {
$gte: 20,
},
},
{
woche: {
$lte: 40,
},
},
{
day: {
$gte: 6,
},
},
],
},
},
{
$group: {
_id: ['$tagjahr', '$jahr'],
woche: {
$first: '$woche',
},
regen_sum: {
$sum: '$regen',
},
sonnenSchein_sum: {
$sum: '$sonnenSchein',
},
},
},
{
$project: {
_id: '$_id',
regenTage: {
$sum: {
$cond: {
if: {
$gte: ['$regen_sum', 0.5],
},
then: 1,
else: 0,
},
},
},
woche: 1,
regen_sum: 1,
sonnenSchein_sum: 1,
},
},
{
$group: {
_id: '$woche',
regen_sum: {
$sum: '$regen_sum',
},
sonnenSchein_sum: {
$sum: '$sonnenSchein_sum',
},
regenTage: {
$sum: '$regenTage',
},
},
},
{
$project: {
regenTage: 1,
regen_sum: {
$divide: ['$regen_sum', 34],
},
sonnenSchein_sum: {
$divide: ['$sonnenSchein_sum', 2040],
},
probability: {
$divide: ['$regenTage', 0.34],
},
},
},
{
$project: {
regen_sum: {
$round: ['$regen_sum', 1],
},
sonnenSchein_sum: {
$round: ['$sonnenSchein_sum', 1],
},
wahrscheinlich: {
$round: ['$probability', 0],
},
},
},
{
$sort: {
_id: 1,
},
},
]
this result is an example for week 20:
on the weekend of calender week 20 I have in average 2.3mm rain, 11.9h sunshine and a probility of 35% that it will rain atleast on one day of the weekend
_id:20
regen_sum:2.3
sonnenSchein_sum:11.9
probability:35
Without having the verbose explain output (.explain("allPlansExecution")), it is hard to say anything for sure. Here are some observations from just taking a look at the aggregation pipeline that was provided (underneath "schema:").
Before going into observations, I must ask what your specific goals are. Are operations like these something you will be running frequently? Is anything faster than 38 seconds acceptable, or is there a specific runtime that you are looking for? As outlined below, there probably isn't much opportunity for direct improvement. Therefore it might be beneficial to look into other approaches to the problem, and I'll outline one at the end.
The first observation is that this aggregation is going to perform a full collection scan. Even if an index existed on the datum field, it could not be used since the filtering in the $match is done on new fields that are calculated from datum. We could make some changes to allow an index to be used, but it probably wouldn't help. You are processing ~38% of your data (20 of the 52 weeks per year) so the overhead of doing the index scan and randomly fetching a significant portion of the data is probably more than just scanning the entire collection directly.
Secondly, you are currently $grouping twice. The only reason for this seems to be so that you can determine if a day is considered 'rainy' first (more than 0.5mm of rain). But the 'rainy day' indicator then effectively gets combined to become a 'rainy weekend' indicator in the second grouping. Although it could technically change the results a little due to the rounding done on the 24 hour basis, perhaps that small change would be worthwhile to eliminate one of the $group stages entirely?
If this were my system, I would consider pre-aggregating some of this data. Specifically having daily summaries as opposed to the 10 minute intervals of raw data would really go a long way here in reducing the amount of processing that is required to generate summaries like this. Details for each day (which won't change) would then be contained in a single document rather than in 144 individual ones. That would certainly allow an aggregation logically equivalent to the one above to process much faster than what you are currently observing.

MongoDB Aggregation for every 10 weeks between week 1 2021 (including) and week 45 2021 (excluding)

Let us say for example I have a collection which contains car sales information for a manufacturer worldwide.
Timeline Country Sales
2021-W01 A 10
2021-W02 B 20
2021-W03 C 30
…
2022-W33 Z 50
Now I would like the aggregation to compute total sales for every 10 weeks between week 1 2021 (including) and week 45 2021 (excluding).
Desired Output:
{
"result": [
{
"start": "2021-W01",
"end": "2021-W10",
"totalSales": 100
},
{
"start": "2021-W10",
"end": "2021-W20",
"totalSales": 20
},
…
{
"start": "2021-W40",
"end": "2021-W45",
"totalSales": 1
}
]
}
For this so far, I have come up with this solution.
db.collection.aggregate([
{"$match": {"$and":[{"Country": "A"}, {"Timeline": {"$in": [‘2021-W01’, ‘2021-W11’, … ‘2021-W45’]}}]}},
{"$group": {"_id": {Timeline: "$Timeline", totalSales: {"$sum": "$Sales"}}}},
{"$project": {"_id": 0, result: "$_id"}}
])
But this is producing output like this
[
{
"result": {
"Timeline": "2021-W01",
"totalSales": 10
}
},
{
"result": {
"Timeline": "2021-W02",
"totalSales": 20
}
},
…
{
"result": {
"Timeline": "2021-W45",
"totalSales": 23
}
}
]
I am unable to get aggregated results for every 10 weeks as this is only doing it for every week.
If possible, I kindly request everyone to help me understand this. Thanks.
Additional Information: (Requested by #WernfriedDomscheit)
Is the timeline according to ISO-8601? (Note, actual year might be different to ISO-Week)
Yes, it is. Only weeks are allowed(For example "2022-W34").
(yyyy-Www format)
Sample Input
{
"records": [{
"Timeline": "2021-W01",
"Sales": 10,
"Country": "A"
}, {
"Timeline": "2021-W02",
"Sales": 11,
"Country": "A"
}, {
"Timeline": "2021-W03",
"Sales": 12,
"Country": "A"
}, {
"Timeline": "2021-W04",
"Sales": 13,
"Country": "A"
}]
}
Expected Output
{
"result": [{
"start": "2021-W01",
"end": "2021-W03",
"totalSales": 21
}, {
"start": "2021-W03",
"end": "2021-W04",
"totalSales": 12
}]
}
Explanation for desired output:
Let's assume we want the aggregate the total sales for 2 weeks between 2021-W01(including) and 2021-W04 (excluding).
Add week1 sales and week2 sales => 10 + 11 => 21
( startweek: 2021-W01 (including) and endweek 2021-W03 (excluding) ).
Take week3 sales only => 12
( startweek: 2021-W03 (including) and endweek 2021-W04 (excluding) ).
Where does 2021-W01 and 2021-W45 come from? Static input values or based on collection data. Why 1-10, 10-20, 20-30, 30-40, 40-45 (and not 40-50) ?.
It is a collection data. For the last result, it is 40-45 and not 40-50 because I mentioned that we need to compute total sales for every 10 weeks between week 1 2021 (including) and week 45 2021 (excluding). So even if the collection has data till 2022-W04 we stop at week 45 2021 (excluding). This means the last result will only aggregate for the 2021-W40 - 2021-W44 range.
Do you have always at least one entry per week?
No. Certain countries may have missed feeding the data for a week.
Why do you like to exclude 2021-W04? It is not consistent with the previous intervals.
Try this one. First convert string 2021-W04 into a Date. Then convert it to a number, I would suggest YYYYWW format. Then you can define the intervals and group accordingly:
db.collection.aggregate([
{
$set: {
Time: {
$toInt: {
$dateToString: {
date: { $dateFromString: { dateString: "$Timeline", format: "%G-W%V" } },
format: "%G%V"
}
}
}
}
},
{
$bucket: {
groupBy: "$Time",
boundaries: [ 202101, 202103, 202105 ],
default: "not found",
output: {
totalSales: { $sum: "$Sales" },
start: { $min: "$Time" },
end: { $max: "$Time" }
}
}
}
])
You may need some final cosmetic, check $dateAdd, $dateToString, $dateFromString
Mongo Playground

How to aggregate data which an array field sum is between two values?

I have two values which are minCount and maxCount.
In my model I have field which is called counts.Something like this.
{
createdAt: date
counts: [ 0,200,100] ==> Sum 300
},
{
createdAt: date
counts: [ 200,500,0] ==> Sum 700
},
{
createdAt: date
counts: [ 0,1100,100] ==> Sum 1200
},
I need to return sum of counts which sum of counts array elements are between minCount and MaxCount.
Exm:
minCount= 400
maxCount= 1300
Return
{
createdAt: date
total: 700
},
{
createdAt: date
total: 1200
},
I
I have createdAt dates between two dates like this in first step of pipe.
Record.aggregate ([
{
$match: {
createdAt: {
$gte: new Date (req.body.startDate),
$lte: new Date (req.body.endDate),
},
},
},
{}, ==> I have to get total counts with condition which I could not here.
])
I am almost new to aggreagate pipeline so please help.
Working example - https://mongoplayground.net/p/I6LOLhTA-yA
db.collection.aggregate([
{
"$project": {
"counts": 1,
"createdAt": 1,
"totalCounts": {
"$sum": "$counts"
}
}
},
{
"$match": {
"totalCounts": {
"$gte": 400,
"$lte": 1300
}
}
}
])

MongoDB Aggregate for a sum on a per week basis for all prior weeks

I've got a series of docs in MongoDB. An example doc would be
{
createdAt: Mon Oct 12 2015 09:45:20 GMT-0700 (PDT),
year: 2015,
week: 41
}
Imagine these span all weeks of the year and there can be many in the same week. I want to aggregate them in such a way that the resulting values are a sum of each week and all its prior weeks counting the total docs.
So if there were something like 10 in the first week of the year and 20 in the second, the result could be something like
[{ week: 1, total: 10, weekTotal: 10},
{ week: 2, total: 30, weekTotal: 20}]
Creating an aggregation to find the weekTotal is easy enough. Including a projection to show the first part
db.collection.aggregate([
{
$project: {
"createdAt": 1,
year: {$year: "$createdAt"},
week: {$week: "$createdAt"},
_id: 0
}
},
{
$group: {
_id: {year: "$year", week: "$week"},
weekTotal : { $sum : 1 }
}
},
]);
But getting past this to sum based on that week and those weeks preceding is proving tricky.
The aggregation framework is not able to do this as all operations can only effectively look at one document or grouping boundary at a time. In order to do this on the "server" you need something with access to a global variable to keep the "running total", and that means mapReduce instead:
db.collection.mapReduce(
function() {
Date.prototype.getWeekNumber = function(){
var d = new Date(+this);
d.setHours(0,0,0);
d.setDate(d.getDate()+4-(d.getDay()||7));
return Math.ceil((((d-new Date(d.getFullYear(),0,1))/8.64e7)+1)/7);
};
emit({ year: this.createdAt.getFullYear(), week: this.createdAt.getWeekNumber() }, 1);
},
function(values) {
return Array.sum(values);
},
{
out: { inline: 1 },
scope: { total: 0 },
finalize: function(value) {
total += value;
return { total: total, weekTotal: value }
}
}
)
If you can live with the operation occuring on the "client" then you need to loop through the aggregation result and similarly sum up the totals:
var total = 0;
db.collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$createdAt" },
"week": { "$week": "$createdAt" }
},
"weekTotal": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } }
]).map(function(doc) {
total += doc.weekTotal;
doc.total = total;
return doc;
});
It's all a matter of whether it makes the most sense to you of whether this needs to happen on the server or on the client. But since the aggregation pipline has no such "globals", then you probably should not be looking at this for any further processing without outputting to another collection anyway.