Let's say I have this data:
{"Plane":"5546","Time":"55.0", City:"LA"}
{"Plane":"5548","Time":"25.0", City:"CA"}
{"Plane":"5546","Time":"6.0", City:"LA"}
{"Plane":"5548","Time":"5.0", City:"CA"}
{"Plane":"5555","Time":"15.0", City:"XA"}
{"Plane":"5555","Time":"8.0", City:"XA"}
and more but I just visualize the data
I want to calculate and group all the time and plane, this is expected output:
{"_id:":["5546","LA"],"Sum":2,"LateRate":1,"Prob"0.5}
The sum is sum all the time, Late is sum all the time with time > "15" and Prob is Late/Sum
The code I have tried but it still is missing something:
db.Collection.aggregate([
{
$project: {
Sum: 1,
Late: {
$cond: [{ $gt: ["$Time", 15.0] }, 1, 0]
},
prob:1
}
},
{
$group:{
_id:{Plane:"$Plane", City:"$City"},
Sum: {$sum:1},
Late: {$sum: "$Late"}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
},
])
db.collection.aggregate([
{
$project: {
Time: 1,
Late: {
$cond: [
{
$gt: [
{
$toDouble: "$Time"
},
15.0
]
},
"$Time",
0
]
},
prob: 1,
Plane: 1,
City: 1
}
},
{
$group: {
_id: {
Plane: "$Plane",
City: "$City"
},
Sum: {
$sum: {
"$toDouble": "$Time"
}
},
Late: {
$sum: {
$toDouble: "$Late"
}
}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
}
])
Project limits the fields passed to the next stage
On string, you cannot perform all relational/arithmetic operations
Playground
Related
Q1. I need to filter data by created date and driverId then need to sum up the total by Hourly, Weekly, Monthly, and Yearly. I already checked with other solutions but it doesn't help much.
Sample Data:
[
{
id: "1",
created : "2022-01-04T03:22:18.739Z",
completed: "2022-01-06T03:53:28.463Z",
driverId: "B-72653",
total: 15,
},
{
id: "2",
created : "2022-01-01T03:22:18.739Z",
completed: "2022-01-02T03:53:28.463Z",
driverId: "B-72653",
total: 33
},
{
id: "3",
created : "2021-08-26T01:22:18.739Z",
completed: "2021-08-26T09:53:28.463Z",
driverId: "B-72653",
total: 43
},
{
id: "4",
created : "2021-03-26T02:22:18.739Z",
completed: "2021-03-26T07:53:28.463Z",
driverId: "B-73123",
total: 35
},
]
Response needed:
{
Hourly:[10,5,5,6,7,8,4,5,6,3,44,2,1,2,3,44,5,6,75,4,3,2,1], // 24 Hours (Each Hour Total)
Weekly:[10,30,34,45,56,67,78], // 7 days (Each Day Total)
Monthly:[10,30,34,45,56,67,78,55,44,33,22,12], // 12 Months (Each Month Total)
Yearly: [10,30] // Year Total (Each Year Total)
}
Q2. How can we filter nested array by-products > brand id and get the sum of product price by its id and filter by Hourly, Weekly, Monthly, Yearly?.
You can use $group with _id being $hour / $week / $month / $year to aggregate the sum. $push them into an array to get your expected result.
Use $facet to repeat the process for all 4 cases.
db.collection.aggregate([
{
"$facet": {
"Hourly": [
{
$group: {
_id: {
$hour: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
hour: "$_id",
total: "$total"
}
}
}
}
],
Weekly: [
{
$group: {
_id: {
"$week": "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
week: "$_id",
total: "$total"
}
}
}
}
],
Monthly: [
{
$group: {
_id: {
$month: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
month: "$_id",
total: "$total"
}
}
}
}
],
Yearly: [
{
$group: {
_id: {
$year: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
year: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
"$addFields": {
"Hourly": {
"$arrayElemAt": [
"$Hourly",
0
]
},
"Weekly": {
"$arrayElemAt": [
"$Weekly",
0
]
},
"Monthly": {
"$arrayElemAt": [
"$Monthly",
0
]
},
"Yearly": {
"$arrayElemAt": [
"$Yearly",
0
]
}
}
},
{
"$addFields": {
"Hourly": "$Hourly.result",
"Weekly": "$Weekly.result",
"Monthly": "$Monthly.result",
"Yearly": "$Yearly.result"
}
}
])
Here is the Mongo playground for your reference.
Considering my documents are structured like the one below, I'm trying to calculate the percentage of people with a certain age over the total number of people (each document is a single person).
{
"codf": "002",
"nome": "Debora",
"cognome": "Palermo",
"datanascita": "1953-01-17"
"age": 41
}
What I tried so far is in the linked Mongo playground below: https://mongoplayground.net/p/8KF9M7f3PIT
Basically, I successfully calculated the total number of people with a certain age, but I'm failing to calculate the total amount of people in the collection to actually calculate the percentage, that because if I use the $count operator, it aggregates the documents and I don't want that.
The output should look like this:
{
"age": 41,
"percentage_with_age": <actual_percentage>
}
In order to calculate the age properly I would suggest this way:
age: {
$subtract: [
{ $subtract: [{ $year: "$$NOW" }, { $year: { $toDate: "$datanascita" } }] },
{ $cond: [{ $lt: [{ $dayOfYear: { $toDate: "$datanascita" } }, { $dayOfYear: "$$NOW" }] }, 0, 1] }
]
}
Then to get the percentage you can use this one:
db.pazienti.aggregate([
{
$project: {
datanascita: { $toDate: "$datanascita" },
age: {
$subtract: [
{ $subtract: [{ $year: "$$NOW" }, { $year: { $toDate: "$datanascita" } }] },
{ $cond: [{ $lt: [{ $dayOfYear: { $toDate: "$datanascita" } }, { $dayOfYear: "$$NOW" }] }, 0, 1] }
]
}
}
},
{ $group: { _id: "$age", count: { $sum: 1 } } },
{
$group: {
_id: null,
total: { $sum: "$count" },
age: { $push: { count: "$count", age: "$_id" } }
}
},
{
$set: {
age: {
$map: {
input: "$age",
in: {
age: "$$this.age",
count: "$$this.count",
percentage_with_this_age: { $round: [{ $multiply: [{ $divide: ["$$this.count", "$total"] }, 100] }, 2] }
}
}
}
}
},
{ $unwind: "$age" },
{ $replaceRoot: { newRoot: "$age" } },
{ $sort: { age: 1 } }
])
Or you can also first run $unwind:
db.pazienti.aggregate([
{
$project: {
datanascita: { $toDate: "$datanascita" },
age: {
$subtract: [
{ $subtract: [{ $year: "$$NOW" }, { $year: { $toDate: "$datanascita" } }] },
{ $cond: [{ $lt: [{ $dayOfYear: { $toDate: "$datanascita" } }, { $dayOfYear: "$$NOW" }] }, 0, 1] }
]
}
}
},
{ $group: { _id: "$age", count: { $sum: 1 } } },
{
$group: {
_id: null,
total: { $sum: "$count" },
age: { $push: { count: "$count", age: "$_id" } }
}
},
{ $unwind: "$age" },
{
$set: {
age: "$age.age",
count: "$age.count",
percentage_with_this_age: { $round: [{ $multiply: [{ $divide: ["$age.count", "$total"] }, 100] }, 2] },
total: "$$REMOVE"
}
},
{ $sort: { age: 1 } }
])
If you prefer $facet:
db.pazienti.aggregate([
{
$project: {
datanascita: { $toDate: "$datanascita" },
age: {
$subtract: [
{ $subtract: [{ $year: "$$NOW" }, { $year: { $toDate: "$datanascita" } }] },
{ $cond: [{ $lt: [{ $dayOfYear: { $toDate: "$datanascita" } }, { $dayOfYear: "$$NOW" }] }, 0, 1] }
]
}
}
},
{
$facet:
{
age: [{ $group: { _id: "$age", count: { $sum: 1 } } }],
total: [{ $group: { _id: null, count: { $sum: 1 } } }]
}
},
{ $unwind: "$age" },
{
$set: {
age: "$age._id",
count: "$age.count",
percentage_with_this_age: { $round: [{ $multiply: [{ $divide: ["$age.count", { $first: "$total.count" }] }, 100] }, 2] },
total: "$$REMOVE"
}
},
{ $sort: { age: 1 } }
])
My objective is to write an efficient query, that with the given input, gives me the expected output. I have some working solution, but all "types" are "manually" written, so I guess I'm looking for help to get the same output but in a different way.
input
reportId
type
weight
A
"fish"
4
A
"fish"
2
A
"cow"
0
B
"fish"
2
B
"tuna"
1
B
"bird"
Expected output
[
{
reportId: "A",
totalCount: 3,
totalWeight: 6,
fishCount: 2,
tunaCount: 0,
cowCount: 1,
birdCount: 0
},
{
reportId: "A",
totalCount: 3,
totalWeight: 2,
fishCount: 1,
tunaCount: 1,
cowCount: 0,
birdCount: 1
},
]
Partial "hard-coded" solution
What I have been doing so far is to create 2 group-by steps: It kind of get's the job done, but in my real use-case there are a lot of types, and therefore the group-stages are very long.
[
{
$group: {
_id: { reportId: "$reportId", type: $type },
count: { $sum: 1 },
totalWeight: { $sum: "$weight" }
}
},
{
$group: {
_id: "$_id.reportId",
totalCount: { $sum: "$totalCount" },
totalWeight: { $sum: "$totalWeight" },
fishCount: {
$sum: {
$cond: {
"if": { $eq: ["$_id.type", "fish"] },
then: "$count",
else: 0
}
}
},
tunaCount: {
$sum: {
$cond: {
"if": { $eq: ["$_id.type", "tuna"] },
then: "$count",
else: 0
}
}
},
// <== And here I have a count blog for each type. Can I get the same result in a better way?
}
}
]
I will focus to the second part, which is the difficult one. I don't know whether there is a shorter and better solution, but this one should work:
db.collection.aggregate([
{
$unset: "_id"
},
{
$set: {
data: {
"$objectToArray": "$$ROOT"
}
}
},
{
$group: {
_id: "$reportId",
data: {
$push: "$data"
}
}
},
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$set: {
data: {
$filter: {
input: "$data",
cond: {
$not: {
$in: [
"$$this.k",
[
"totalCount",
"totalWeight"
]
]
}
}
}
}
}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$_id",
data: {
$push: "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: "$data"
}
}
}
])
See Mongo playground
I have a pipeline that gives me the result according to the players belonging to a certain company in a daily, weekly, and monthly manner. I have a date filter in the dashboard which gives an iso date range. I want to limit or range my results according to the date filter? is there any way to do it in the pipeline?
if (chartType === 'Daily') {
statsPipelineCondition = {
_id: { day: { $dayOfMonth: '$createdAt' }, month: { $month: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.month': 1, '_id.day': 1 };
} else if (chartType === 'Monthly') {
statsPipelineCondition = {
_id: { month: { $month: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.month': 1 };
} else {
statsPipelineCondition = {
_id: { week: { $week: '$createdAt' }, year: { $year: '$createdAt' } },
};
sortCondition = { '_id.year': 1, '_id.week': 1 };
}
const statsPipeline = [
{
$group: {
...statsPipelineCondition,
TOTAL: { $sum: 1 },
XR: { $sum: { $cond: [{ $in: ['$company', ['XR', 'CR', 'DX']] }, 1, 0] } },
CT: { $sum: { $cond: [{ $eq: ['$company', 'CT'] }, 1, 0] } },
MR: { $sum: { $cond: [{ $eq: ['$company', 'MR'] }, 1, 0] } },
MG: { $sum: { $cond: [{ $in: ['$company', ['NM', 'MM', 'MG']] }, 1, 0] } },
},
},
{
$sort: {
...sortCondition,
},
},
];
Date filter:
datefilter - { '$gte': '2020-09-01T04:49:50.899Z',
'$lte': '2020-11-03T04:49:50.899Z' }
You need to add a $match stage prior to the $group stage to filter our based on the range:
let datefilter = {
'$gte': new Date('2020-09-01T04:49:50.899Z'),
'$lte': new Date('2020-11-03T04:49:50.899Z')
};
const statsPipeline = [
{
$match: {
createdAt: datefilter
}
},
{
$group: {
...statsPipelineCondition,
TOTAL: { $sum: 1 },
XR: { $sum: { $cond: [{ $in: ['$company', ['XR', 'CR', 'DX']] }, 1, 0] } },
CT: { $sum: { $cond: [{ $eq: ['$company', 'CT'] }, 1, 0] } },
MR: { $sum: { $cond: [{ $eq: ['$company', 'MR'] }, 1, 0] } },
MG: { $sum: { $cond: [{ $in: ['$company', ['NM', 'MM', 'MG']] }, 1, 0] } },
},
},
{
$sort: {
...sortCondition,
},
},
];
Okay, So for some reason. MongoDB doesn't like dates in strings. It would be much better if we just convert the strings by an operator provided by the aggregation framework and this will make the things work.
{
$match: {
$expr: {
$and: [
{
$gte: [
'$createdAt',
{
$dateFromString: {
dateString: dateFilter.$gte,
},
},
],
},
{
$lte: [
'$createdAt',
{
$dateFromString: {
dateString: dateFilter.$lte,
},
},
],
},
],
},
},
},
I need to match one of two fields that must not be equal to zero. How to implement it?
I try these solutions but no luck:
Solution 1:
Model.aggregate[
{
$project: {
accountID: "$_id.accountID",
locationID: "$_id.locationID",
time: "$_id.time",
value: "$value",
actualValue: "$actualValue",
total: { $add: ["$value", "$actualValue"] },
},
},
{
$match: {
total: { $ne: 0 },
},
},
]
With this solution, it will wrong when a negative plus with the opposite version. Example -1500 + 1500 will become zero.
Solution 2
Model.aggregate([
{
$group: {
_id: {
accountID: "$accountID",
locationID: "$locationID",
time: "$time",
},
value: { $sum: "$values.val" },
actualValue: { $sum: "$values.actualVal" },
},
},
{
$addFields: {
absVal: { $abs: "$value" },
absActualVal: { $abs: "$actualValue" },
},
},
{
$project: {
accountID: "$_id.accountID",
locationID: "$_id.locationID",
time: "$_id.time",
value: "$value",
actualValue: "$actualValue",
total: { $add: ["$absVal", "$absActualVal"] },
},
},
{
$match: {
total: { $ne: 0 },
},
},
])
It works, but I lost 1 second from 3.5s to 4.5s when searching in 1m document.
Any suggestion? Thank you first
Some basic boolean logic should suffice, use something like:
Model.aggregate([
{
$match: {
$or: [
{
value: {$ne: 0}
},
{
actualValue: {$ne: 0}
}
]
}
}
{
$project: {
accountID: "$_id.accountID",
locationID: "$_id.locationID",
time: "$_id.time",
value: "$value",
actualValue: "$actualValue",
total: {$add: ["$value", "$actualValue"]},
},
}
])
If you care about efficiency make sure you have a compound index that covers both value and actualValue.