how to calculate avg, median, min, max in mongodb query? - mongodb

I have ListPrice field in collection on that price Ii have to calculate min, max, median, avg of all data, active standardStatus , sold standardStatus.
I have tried to calculate using aggregation and for loop but it won't work
db.collection('selected_properties').aggregate([
{ presentation_id : ObjectId(req.body.presentation_id),
checked_status : true}
},
{
$lookup : { from :'properties', localField : 'property_id', foreignField : '_id', as : 'property_info'}
},
{
$unwind : {path : '$property_info', preserveNullAndEmptyArrays : true}
},
{
$sort : {'property_info.ListPrice' : 1}
},
{
$group:{
_id: "$user_id",
minActiveListPrice: { $min: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice','' ] } },
maxActiveListPrice: { $max: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice',0 ] } },
avgActiveListPrice: { $avg: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice','' ] } },
medianActiveListprice: { $push: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice','' ] } },
minsoldListPrice: { $min: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "S" ]},
'$property_info.ListPrice','' ] } },
maxsoldListPrice: { $max: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "S" ]},
'$property_info.ListPrice',0 ] } },
avgsoldListPrice: { $avg: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "S" ]},
'$property_info.ListPrice','' ] } },
avgPrice: { $avg: "$property_info.ListPrice" },
maxPrice: { $max: "$property_info.ListPrice" },
minPrice: { $min: "$property_info.ListPrice" },
}
median: { $push: "$property_info.ListPrice"}
}
},

db.collection('selected_properties').aggregate([
{
$match : { presentation_id : ObjectId(req.body.presentation_id),
checked_status : true}
},
{
$lookup : { from :'properties', localField : 'property_id',
foreignField : '_id', as : 'property_info'}
},
{
$unwind : {path : '$property_info', preserveNullAndEmptyArrays : true}
},
{
$sort : {'property_info.ListPrice' : 1}
},
{
$group:
{
_id: "$user_id",
minActiveListPrice: { $min: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice','' ] } },
maxActiveListPrice: { $max: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice',0 ] } },
avgActiveListPrice: { $avg: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice','' ] } },
medianActiveListprice: { $push: { $cond: [ {
$eq: [ "$property_info.StandardStatus", "A" ]},
'$property_info.ListPrice',null ] } },
avgPrice: { $avg: "$property_info.ListPrice" },
maxPrice: { $max: "$property_info.ListPrice" },
minPrice: { $min: "$property_info.ListPrice" },
median: { $push: "$property_info.ListPrice"}
}
},
{ "$project": {
"minActiveListPrice":1,
"maxActiveListPrice":1,
"avgActiveListPrice":1,
"avgPrice": 1,
"maxPrice": 1,
"minPrice": 1,
"medianActiveListpricevalue": {
$let: {
vars: {
arr: { $filter: {
input: "$medianActiveListprice",
as: "aa",
cond: {$ne:["$$aa",null]}
}},
},
in: { "$cond": {
"if": {
"$eq": [{$mod: [ {$size:"$$arr"}, 2 ]}, 0]
},
"then": {
$avg:[
{ $arrayElemAt: [ "$$arr", {$subtract:[{$divide: [ {$size:"$$arr"}, 2 ]},1]}]},
{ $arrayElemAt: [ "$$arr", {$divide: [ {$size:"$$arr"}, 2 ]}]}
]
},
"else": {
$arrayElemAt: [ "$$arr",{$floor : {$divide: [ {$size:"$$arr"}, 2 ]}}]
}
}}
}
},
"medianvalue":{ "$cond": {
"if": {
"$eq": [{$mod: [ {$size:"$median"}, 2 ]}, 0]
}
"then": {
$avg:[
{ $arrayElemAt: [ "$median", {$subtract:[{$divide: [ {$size:"$median"}, 2 ]},1]}]},
{ $arrayElemAt: [ "$median", {$divide: [ {$size:"$median"}, 2 ]}]}
]
},
"else": {
$arrayElemAt: [ "$median",{$floor : {$divide: [ {$size:"$median"}, 2 ]}}]
}
}}
} }
])

Related

mongodb - Subtracts two numbers total to return the difference

Consider I have the following collection:
[
{
"total": 48.0,
"status": "CO"
},
{
"total": 11.0,
"status": "CA"
},
{
"total": 15916.0,
"status": "PE"
}
]
I need to realize the difference of PE status - (CO + CA).
The expected result is:
{
"_id" : null,
"total" : 15857.0
}
Use $switch to cater for different cases for your sum. Use $subtract to flip the sign for the partial sum.
db.collection.aggregate([
{
$group: {
_id: null,
total: {
"$sum": {
"$switch": {
"branches": [
{
"case": {
$eq: [
"$status",
"PE"
]
},
"then": "$total"
},
{
"case": {
$eq: [
"$status",
"CO"
]
},
"then": {
$subtract: [
0,
"$total"
]
}
},
{
"case": {
$eq: [
"$status",
"CA"
]
},
"then": {
$subtract: [
0,
"$total"
]
}
}
],
default: 0
}
}
}
}
}
])
Mongo Playground
Assuming these are the only status options, one way is to $group using $cond:
db.collection.aggregate([
{$group: {
_id: 0,
total: {
$sum: {$cond: [{$eq: ["$status", "PE"]}, "$total", {$multiply: ["$total", -1]}]}
}
}}
])
See how it works on the playground example

Aggregate and calculate with mongoDB

Hello I heard that mongoDB was very good at aggregating data compared to, for example SQL server.
I tried to translate an SQL query to a mongoDB query but it's a complete failure :
For SQL server we had like 4 minutes which is manageable and now we top at 34 minutes for 128 days using this request in mongoDB :
function(thresholdObs, days)
{
var name = "period-" + days + "j"
var startDate = new Date('2020', '00', '01');
var endDate = new Date(startDate.getTime() + 1000 * 60 * 60 * 24 *[days]);
db.getCollection('measures').aggregate([
{
$match: {
$and: [
{
measureDate: {
$gte: startDate
}
},
{
measureDate: {
$lte: endDate
}
}
]
}
},
{
$addFields: {
multiplyIahobs: {
$cond: [
{$or: [
{$eq: ["$averageIAH", null]},
{$eq: ["$obs", null]}
]},
0,
{ $multiply: ["$averageIAH", "$obs"] }
]
},
multiplyPressionobs: {
$cond: [
{$or: [
{$eq: ["$averagePressure", null]},
{$eq: ["$obs", null]}
]},
0,
{ $multiply: ["$averagePressure", "$obs"] }
]
},
multiplyFuitesobs: {
$cond: [
{$or: [
{$eq: ["$averageLeakage", null]},
{$eq: ["$obs", null]}
]},
0,
{ $multiply: ["$averageLeakage", "$obs"] }
]
},
multiplyinspiratoryPressureobs: {
$cond: [
{$or: [
{$eq: ["$inspiratoryPressure", null]},
{$eq: ["$obs", null]}
]},
0,
{ $multiply: ["$inspiratoryPressure", "$obs"] }
]
},
multiplyexpiratoryPressureobs: {
$cond: [
{$or: [
{$eq: ["$expiratoryPressure", null]},
{$eq: ["$obs", null]}
]},
0,
{ $multiply: ["$expiratoryPressure", "$obs"] }
]
},
enoughDays: {
$cond: [ { $gte: ["$obs", thresholdObs ] }, 1, 0]
},
missingDays: {
$cond: [ { $lt: ["$obs", thresholdObs ] }, 1, 0]
},
daysWithoutUsage: {
$cond: [ { $eq: ["$obs", 0 ] }, 1, 0]
},
daysWithData: { $sum: 1 }
}
},
{
$group: {
_id: "$deviceID",
obsUsage: { $avg: "$obs" },
enoughDays: { $sum: "$enoughDays" },
missingDays: { $sum: "$missingDays" },
daysWithoutUsage: { $sum: "$daysWithoutUsage" },
daysWithData: { $sum: "$daysWithData" },
sumobs: { $sum: "$obs" },
sumMultiplyIahobs: { $sum: "$multiplyIahobs" },
sumMultiplyPressureObs: { $sum: "$multiplyPressionobs" },
sumMultiplyLeakageObs: { $sum: "$multiplyFuitesobs" },
sumMultiplyinspiratoryPressureobs: { $sum: "$multiplyinspiratoryPressureobs" },
sumMultiplyexpiratoryPressureobs: { $sum: "$multiplyexpiratoryPressureobs" },
}
},
{
$addFields: {
fullObs: { $divide: [ "$sumobs", days ] },
daysWithoutData: { $subtract: [ days, "$daysWithData" ]},
averageIAH: { $cond: [ { $eq: [ "$sumobs", 0 ] }, 0, { $divide: ["$sumMultiplyIahobs", "$sumobs"] } ] },
averagePressure: { $cond: [ { $eq: [ "$sumobs", 0 ] }, 0, { $divide: ["$sumMultiplyPressureObs", "$sumobs"] } ] },
averageLeakage: { $cond: [ { $eq: [ "$sumobs", 0 ] }, 0, { $divide: ["$sumMultiplyLeakageObs", "$sumobs"] } ] },
inspiratoryPressure: { $cond: [ { $eq: [ "$sumobs", 0 ] }, 0, { $divide: ["$sumMultiplyinspiratoryPressureobs", "$sumobs"] } ] },
expiratoryPressure: { $cond: [ { $eq: [ "$sumobs", 0 ] }, 0, { $divide: ["$sumMultiplyexpiratoryPressureobs", "$sumobs"] } ] },
}
},
{
$project: {
_id: 1,
[name] : {
obsUsage: "$obsUsage",
enoughDays: "$enoughDays",
missingDays: "$missingDays",
daysWithoutUsage: "$daysWithoutUsage",
daysWithData: "$daysWithData",
sumobs: "$sumobs",
fullObs: "$fullObs",
daysWithoutData: "$daysWithoutData",
averageIAH: "$averageIAH",
averagePressure: "$averagePressure",
averageLeakage: "$averageLeakage",
inspiratoryPressure: "$inspiratoryPressure",
expiratoryPressure: "$expiratoryPressure"
}
}
},
{
$merge: {
into: "periods",
on: "_id",
whenMatched: "merge",
whenNotMatched: "insert"
}
}
],{allowDiskUse: true})
}
Before throwing out the baby (mongoDB) with the bathwater (the query). I came here asking if my understanding on how to write a good query is off. Should I try to alter this query to make it work under 4 minutes ? Is it possible ? How ?
NB : measures collection contains 374.670.449 documents.
Sample documents :
{
_id: ObjectId('6127a15fef44a9ed52a5bf62'),
deviceId: 5,
measureDateAdded: ISODate('2013-03-15T10:30:35.753Z'),
measureDate: ISODate('2012-03-20T06:00:00.000Z'),
obs: 20,
averageIAH: NumberDecimal('5.50'),
averagePressure: NumberDecimal('6.30'),
averageLeakage: NumberDecimal('13.00'),
inspiratoryPressure: null,
expiratoryPressure: null
},
{
_id: ObjectId('6127a15fef44a9ed52a5bfc6'),
deviceId: 5,
measureDateAdded: ISODate('2013-03-15T10:30:40.063Z'),
measureDate: ISODate('2012-06-28T05:00:00.000Z'),
obs: 197,
averageIAH: NumberDecimal('5.30'),
averagePressure: NumberDecimal('6.90'),
averageLeakage: NumberDecimal('15.00'),
inspiratoryPressure: null,
expiratoryPressure: null
},
{
_id: ObjectId('6127aa2bef44a9ed52a0922a'),
deviceId: 367959,
measureDateAdded: ISODate('2019-01-19T14:13:19.620Z'),
measureDate: ISODate('2019-01-16T11:00:00.000Z'),
obs: 375,
averageIAH: NumberDecimal('2.00'),
averagePressure: NumberDecimal('9.60'),
averageLeakage: NumberDecimal('6.00'),
inspiratoryPressure: null,
expiratoryPressure: null
}
What cost the most seems to be the group with deviceId 4+minutes

Optimize multiple "and" statements in mongo aggregate

Is there a simpler way that would also improve the performance of this mongodb query. I know I am suppose to group the either one or the other but cant find any docs or example to help me out.
const facetQuery = { $facet: {
xCreated: [
{ $match : { $and : [{ queueStatus: 'Created' }, { queueType: 'x' } ]}},
{ $count: "Created" },
],
xApproved: [
{ $match : { $and : [{ queueStatus: 'Approved' }, { queueType: 'x' }]}},
{ $count: "Approved" }
],
xDisapproved: [
{ $match : { $and : [{ queueStatus: 'Disapproved' }, { queueType: 'x' }]}},
{ $count: "Disapproved" }
],
yCreated: [
{ $match : { $and : [{ queueStatus: 'Created' }, { queueType: 'y' }]}},
{ $count: "Created" },
],
yApproved: [
{ $match : { $and : [{ queueStatus: 'Approved' }, { queueType: 'y' }]}},
{ $count: "Approved" }
],
yDisapproved: [
{ $match : { $and : [{ queueStatus: 'Disapproved' }, { queueType: 'y' }]}},
{ $count: "Disapproved" }
],
zCreated: [
{ $match : { $and : [{ queueStatus: 'Created' }, { queueType: 'z' }]}},
{ $count: "Created" },
],
zApproved: [
{ $match : { $and : [{ queueStatus: 'Approved' }, { queueType: 'z' }]}},
{ $count: "Approved" }
],
zDisapproved: [
{ $match : { $and : [{ queueStatus: 'Disapproved' }, { queueType: 'z' }]}},
{ $count: "Disapproved" }
],
}};
Oh wow, instead of doing all these separate matches and count you can just dynamically $group on both status and type and then construct the object you need from that:
db.collection.aggregate([
{
$group: {
_id: {
type: "$queueType",
status: "$queueStatus"
},
ApprovedCount: {
$sum: {
$cond: [
{
$eq: [
"$queueStatus",
"Approved"
]
},
1,
0
]
}
},
CreatedCount: {
$sum: {
$cond: [
{
$eq: [
"$queueStatus",
"Created"
]
},
1,
0
]
}
},
DisapprovedCount: {
$sum: {
$cond: [
{
$eq: [
"$queueStatus",
"Disapproved"
]
},
1,
0
]
}
},
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: {
$arrayToObject: [
[
{
k: {
$concat: [
"$_id.type",
"$_id.status"
]
},
v: {
$switch: {
branches: [
{
case: {
$eq: [
"$_id.status",
"Approved"
]
},
then: "$ApprovedCount"
},
{
case: {
$eq: [
"$_id.status",
"Created"
]
},
then: "$CreatedCount"
},
{
case: {
$eq: [
"$_id.status",
"Disapproved"
]
},
then: "$DisapprovedCount"
},
]
}
}
}
]
]
}
}
}
}
])
Mongo Playground

MongoDB aggregate multiple group by top fields and array fields

My collection will look like this,
{
"_id" : ObjectId("591c5971240033283736860a"),
"status" : "Done",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "df07948e-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 12345,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "dsd45554545ds92a9bd2c12e0e6436d",
}
}
]}
{
"_id" : ObjectId("45sdsd59124003345121450a"),
"status" : "ToDo",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "45sds55-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 1234,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "84fe862f1924455dsds5556436d",
}
}
]}
Currently I am writing two aggregate query to achieve my requirement and my query will be below
db.collection.aggregate(
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
status: 1,
}},
{ $group: {
_id: "$dayMonthYear",
Pending: { $sum: { $cond : [{ $eq : ["$status", "ToDo"]}, 1, 0]} },
InProgress: { $sum: { $cond : [{ $eq : ["$status", "InProgress"]}, 1, 0]} },
Done: { $sum: { $cond : [{ $eq : ["$status", "Done"]}, 1, 0]} },
Total: { $sum: 1 }
}}
My output will be,
{"_id" : "17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "Total" : 2.0 }
Using above query I can able to get count but I need to find the count based on communication Status too so I am writing one more query to achieve,
db.collection.aggregate(
{"$unwind":"$communications"},
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
communications: 1
}},
{ "$group": {
_id: "$dayMonthYear",
"total_call": { $sum: { $cond : [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "completed"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] }
]}, 1, 0 ] }},
"engaged": { $addToSet: { $cond : [{ $eq : ["$communications.callResponse.Status", "completed"]},
"$communications.recipientId", "null" ]} },
"not_engaged": { $addToSet: { $cond: [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] } ]},
"$communications.recipientId", "null" ] }}
}},
{ "$project": {
"_id": 1,
"total_call": 1,
"engaged": { "$setDifference": [ "$ngaged", ["null"] ] },
"not_engaged": { "$setDifference": [ "$not_engaged", ["null"] ] },
}},
{ "$project": {
"total_call": 1,
"engaged": { "$size": "$engaged" },
"not_engaged": { "$size": { "$setDifference": [ "$not_engaged", "$engaged" ] }},
}})
My output will be,
{"_id" : "18/05/2017", "total_call" : 2.0, "engaged" : 2, "not_engaged" : 0}
Using above query I can able to get count but I want to achieve it in single query
I am looking for output like
{"_id":"17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "total_call" : 0, "engaged" : 0, "not_engaged" : 0}
{"_id":"18/05/2017", "Pending" : 0.0, "InProgress" : 0.0, "Done" : 0.0, "total_call" : 2, "engaged" : 2, "not_engaged" : 0}
Can anyone suggest or provide me good way to get above result.
You can use $concatArrays to merge the status& createdDate documents followed by $group to count the occurrences.
db.collection.aggregate([
{
"$project": {
"statusandcreateddate": {
"$concatArrays": [
[
{
"status": "$status",
"createdDate": "$createdDate"
}
],
{
"$map": {
"input": "$communications",
"as": "l",
"in": {
"status": "$$l.callResponse.Status",
"createdDate": "$$l.createdDate"
}
}
}
]
}
}
},
{
"$unwind": "$statusandcreateddate"
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%d/%m/%Y",
"date": "$statusandcreateddate.createdDate"
}
},
"total_call": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"engaged": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
1,
0
]
}
},
"not_engaged": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"Pending": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"ToDo"
]
},
1,
0
]
}
},
"InProgress": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"InProgress"
]
},
1,
0
]
}
},
"Done": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"Done"
]
},
1,
0
]
}
}
}
}
])

How to group query with multiple $cond?

I want to query like below, but this contains only one $cond.
How to query with two $cond?
collection.aggregate(
{
$match : {
'_id' : {$in:ids}
}
},
{
$group: {
_id: '$someField',
...
count: {$sum: { $cond: [ { $eq: [ "$otherField", false] } , 1, 0 ] }}
}
},
function(err, result){
...
}
);
You want to use a compound expression inside {$cond:[]} - something like:
collection.aggregate(
{
$match : {
'_id' : {$in:ids}
}
},
{
$group: {
_id: '$someField',
...
count: {$sum: { $cond: [ {$and : [ { $eq: [ "$otherField", false] },
{ $eq: [ "$anotherField","value"] }
] },
1,
0 ] }}
}
},
function(err, result){
...
}
);
The $and operator is documented here: http://docs.mongodb.org/manual/reference/operator/aggregation/#boolean-operators
you can add multiple $cond and multiple criterias inside $cond like this
`
collection.aggregate(
[
{
"$match": {
//matching criteria
}
},
{
"$project": {
"service": {
"$cond": {
"if": {
"$eq": [
"$foo",
"bar"
]
},
"then": "return string1",
"else": {
"$cond": {
"if": {
"$eq": [
"$foo",
"bar"
]
},
"then": "return string2",
"else": {
"$cond": {
"if": {
"$or": [
{
"$eq": [
"$foo",
"bar1"
]
},
{
"$eq": [
"$foo",
"bar2"
]
}
]
},
"then": "return string3",
"else": "$foo"
}
}
}
}
}
},
"_id": 0
}
}
]
)
`