MongoDB aggregation, Group by value interval, - mongodb

MongoDB documents:
[{
_id: '123213',
elevation: 2300,
area: 25
},
{
_id: '343221',
elevation: 1600,
area: 35,
},
{
_id: '545322',
elevation: 500
area: 12,
},
{
_id: '234234',
elevation: null,
area: 5
}]
I want to group these on a given interval on elevation and summarize the area property.
Group 1: < 0
Group 2: 0 - 1500
Group 3: 1501 - 3000,
Group 4: > 3000
So the expected output would be:
[{
interval: '1501-3000',
count: 2,
summarizedArea: 60
},
{
interval: '0-1500',
count: 1,
summarizedArea: 12,
},
{
interval: 'N/A',
count: 1,
summarizedArea: 5
}]
If possible, I want to use the aggregation pipeline.
Maybe something with $range? Or a combination of $gte and $lte?

As Feliix suggested $bucket should do the job, but boundaries should be slightly different to play well with negative and N/A values:
db.collection.aggregate([
{
$bucket: {
groupBy: "$elevation",
boundaries: [ -Number.MAX_VALUE, 0, 1501, 3001, Number.POSITIVE_INFINITY ],
default: Number.NEGATIVE_INFINITY,
output: {
"count": { $sum: 1 },
"summarizedArea" : { $sum: "$area" }
}
}
}
])
The formatting stage below can be added to the pipeline to adjust shape of the response:
{ $group: {
_id: null,
documents: { $push: {
interval: { $let: {
vars: {
idx: { $switch: {
branches: [
{ case: { $eq: [ "$_id", -Number.MAX_VALUE ] }, then: 3 },
{ case: { $eq: [ "$_id", 0 ] }, then: 2 },
{ case: { $eq: [ "$_id", 1501 ] }, then: 1 },
{ case: { $eq: [ "$_id", 3001 ] }, then: 0 }
],
default: 4
} }
},
in: { $arrayElemAt: [ [ ">3000", "1501-3000", "0-1500", "<0", "N/A" ], "$$idx" ] }
} },
count: "$count",
summarizedArea: "$summarizedArea"
} }
} }
$group with _id: null $push es all groups into array of a single document.
$let maps $_id from previous stage to text labels of interval defined in the array [ ">3000", "1501-3000", "0-1500", "<0", "N/A" ]. For that it calculates idx index of the label using $switch.
It must be way simpler to implement the logic on application level unless you absolutely need to do it in the pipeline.

you can use $bucket introduced in MongoDB 3.4 to achive this:
db.collection.aggregate([
{
$bucket: {
groupBy: "$elevation",
boundaries: [
0,
1500,
3000,
5000
],
default: 10000,
output: {
"count": {
$sum: 1
},
"summarizedArea": {
$sum: "$area"
}
}
}
}
])
output:
[
{
"_id": 0,
"count": 1,
"summarizedArea": 12
},
{
"_id": 1500,
"count": 2,
"summarizedArea": 60
},
{
"_id": 10000,
"count": 1,
"summarizedArea": 5
}
]
you can try it here: mongoplayground.net/p/xFe7ZygMqaY

Related

MongoDB - Query calculation and group multiple items

Let's say I have this data:
{"Plane":"5546","Time":"55.0", City:"LA"}
{"Plane":"5548","Time":"25.0", City:"CA"}
{"Plane":"5546","Time":"6.0", City:"LA"}
{"Plane":"5548","Time":"5.0", City:"CA"}
{"Plane":"5555","Time":"15.0", City:"XA"}
{"Plane":"5555","Time":"8.0", City:"XA"}
and more but I just visualize the data
I want to calculate and group all the time and plane, this is expected output:
{"_id:":["5546","LA"],"Sum":2,"LateRate":1,"Prob"0.5}
The sum is sum all the time, Late is sum all the time with time > "15" and Prob is Late/Sum
The code I have tried but it still is missing something:
db.Collection.aggregate([
{
$project: {
Sum: 1,
Late: {
$cond: [{ $gt: ["$Time", 15.0] }, 1, 0]
},
prob:1
}
},
{
$group:{
_id:{Plane:"$Plane", City:"$City"},
Sum: {$sum:1},
Late: {$sum: "$Late"}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
},
])
db.collection.aggregate([
{
$project: {
Time: 1,
Late: {
$cond: [
{
$gt: [
{
$toDouble: "$Time"
},
15.0
]
},
"$Time",
0
]
},
prob: 1,
Plane: 1,
City: 1
}
},
{
$group: {
_id: {
Plane: "$Plane",
City: "$City"
},
Sum: {
$sum: {
"$toDouble": "$Time"
}
},
Late: {
$sum: {
$toDouble: "$Late"
}
}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
}
])
Project limits the fields passed to the next stage
On string, you cannot perform all relational/arithmetic operations
Playground

Mongo DB aggregate grouping multiple values that belong to the same document

I have documents that look like this
{
"_id": "5e3334cede31d9555e38dbee",
"time": 400,
"datetime": "2020-01-05T16:35:42.315Z",
"version": "2.0.30",
"hostname": "bvasilchik-lt.extron.com",
"testfile": "cards.txt",
"tests": 5,
"failures": 3,
"skips": 0,
"status": "Failed",
"__v": 0
}
I want to create a result that includes the documents that have the highest number of time per testfile name, so if the top 10 were all the same testfile name I'd only want to show the top one that had the same testfile name.
I have done this but I also wanted to include another field that also shows the number of tests matching that grouping, but the only ways I found were to add the $first or the $last or the $max or the $min for the tests field, but that wouldn't be correct b/c the highest time might have a different number of tests.
I am also matching results from a specific date range
const times = await Suite.aggregate([
{
"$match": {
datetime: { "$gte": dateRange.startDate, "$lt": dateRange.endDate, }
}
},
{
"$group": {
_id: "$testfile",
time: { "$max" : "$time" },
}
},
{
"$sort": {
time: order
}
},
{
"$project": {
_id: 0,
testfile: "$_id",
time: "$time"
}
}
])
this produces these results
[
{
"testfile": "lists.txt",
"time": 900
},
{
"testfile": "buttons.txt",
"time": 800
},
{
"testfile": "cards.txt",
"time": 400
},
{
"testfile": "popover.txt",
"time": 300
},
{
"testfile": "about-pages.neb",
"time": 76
}
]
but what I want it to return is
[
{
"testfile": "lists.txt",
"tests": 5,
"time": 900
},
{
"testfile": "buttons.txt",
"tests": 4,
"time": 800
},
{
"testfile": "cards.txt",
"tests": 8,
"time": 400
},
{
"testfile": "popover.txt",
"tests": 1,
"time": 300
},
{
"testfile": "about-pages.neb",
"tests": 2,
"time": 76
}
]
You need to add extra field into $group and $project stages.
You need to use $max operator for time field and accumulatetests field time:tests values. In the last stage, we $reduce tests field taking highest value
{
"$group": {
_id: "$testfile",
time: {
$max: "$time"
},
tests: {
"$push": {
time: "$time",
tests: "$tests"
}
}
}
},
{
"$sort": {
time: 1
}
},
{
"$project": {
_id: 0,
testfile: "$_id",
time: "$time",
tests: {
$reduce: {
input: "$tests",
initialValue: 0,
in: {
$add: [
"$$value",
{
$cond: [
{
$and: [
{
$eq: [
"$time",
"$$this.time"
]
},
{
$gt: [
"$$this.tests",
"$$value"
]
}
]
},
{
$subtract: [
"$$this.tests",
"$$value"
]
},
0
]
}
]
}
}
}
}
}
MongoPlayground

Trying to use $cond to $sum and $subtract

My documents:
_id:"DwNMQtHYopXKK3rXt"
client_id:"ZrqKavXX8ieGpx5ae"
client_name:"luana"
companyId:"z3ern2Q7rdvviYCGv"
is_active:true
client_searchable_name:"luana"
status:"paid"
items:Object
id:912602
gross_amount:1000
type:"service"
description:"Pedicure com Zé (pacote)"
item_id:"bjmmPPjqKdWfwJqtC"
user_id:"gWjCskpHF2a3xHYy9"
user_id_commission:50
user_id_amount:0
use_package:true
quantity:1
item_costs:Array
discount_cost:Object
type:"package"
value:100
charge_from:"company_only"
entity_id:"LLRirWu5DabkRna7X"
created_at:2019-10-29T10:35:39.493+00:00
updated_at:2019-10-29T10:36:42.983+00:00
version:"2"
created_by:"2QBRDN9MACQagSkJr"
amount:0
multiple_payment_methods:Array
closed_at:2019-10-29T10:36:52.781+00:00
So i made a $project:
{
_id: 0,
closed_at: 1,
serviceId: "$items.item_id",
serviceAmount: "$items.gross_amount",
discounts:"$items.discount_cost"
}
And then $group
_id: {
month: { $month: "$closed_at" },
serviceId: "$serviceId",
discountType: "$discounts.type",
discountValue: "$discounts.value"
},
totalServiceAmount: {
$sum: "$serviceAmount"
}
}
I'm trying to make a $sum of values of the categories in my DB, actually i filtered all the data, so i have exactly what i need, like that;
_id:Object
"month":10
"serviceId":"MWBqhMyW8ataGxjBT"
"discountType":""courtesy"
"discountValue":100
"totalServiceAmount":5000
So, i have 5 types of discounts on my DB, they are: Percentage (discount in percentage), courtesy (make the service amount 0), package (make the service amount 0), gross (gross discount of value) and null if there's no discount o value.
so, if the type of discount is;
Percentage: I need to subtract the discountValue for the totalServiceAmount (discountValue will be in percentage, how i do that subtract if total serviceAmount is on gross value)
Courtesy and package: I need to transform the totalServiceAmount in 0 value.
Gross: i need to subtract the discountValue for the totalServiceAmount.
Null: just let totalServiceAmount.
I tried like that, to make some test, but i really don't know if i'm goign to the right path, the result was null for every amountWithDiscount.
{
$project: {
{
amountWithDiscount: {
$cond: {
if: {
$eq: ["$_id.discountType", "null"]
},
then: "$serviceAmount", else: {
$cond: {
if: {
$eq: ["$_id.discountType", "gross"]
},
then: {
$subtract: ["$serviceAmount", "$_id.discountValue"]
},
else: "$serviceAmount"
}
}
}
}
}
Make sense?
I create a collection with your grouping result:
01) Example of Documents:
[
{
"_id": "5db9ca609a17899b8ba6650d",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "courtesy",
"discountValue": 0,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0859a17899b8ba66856",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "gross",
"discountValue": 100,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0ac9a17899b8ba66863",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "percentage",
"discountValue": 10,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0d89a17899b8ba6687f",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": null,
"discountValue": 10,
"totalServiceAmount": 6000
}
]
02) Query:
db.collection.aggregate([
{
$project: {
discountType: "$discountType",
amountWithDiscount: {
$cond: {
if: {
$eq: [
"$discountType",
null
]
},
then: "$totalServiceAmount",
else: {
$cond: {
if: {
$eq: [
"$discountType",
"gross"
]
},
then: {
$subtract: [
"$totalServiceAmount",
"$discountValue"
]
},
else: {
$cond: {
if: {
$eq: [
"$discountType",
"percentage"
]
},
then: {
$multiply: [
"$totalServiceAmount",
{
$subtract: [
1,
{
$divide: [
"$discountValue",
100
]
}
]
}
]
},
else: "$totalServiceAmount"
}
}
}
}
}
}
}
}
])
A working example at https://mongoplayground.net/p/nU7vhGN-uSp.
I don't know if I fully understand your problem, but
take a look and see if it solves your problem.

How to group by multiple keys and values

I have a collection of documents where each document has a nestes field outside with two values:
_id: 9287645ztiu234jgk2j3g5jh,
outside: {
temperature: 'low', // 'low' or 'high'
humidity: 'high', // 'low' or 'high'
},
... some more fields
temperature and humidity can have value low or high
I want to count how many times temperature: low, temperature: high, humidity: low, humidity: high is present in each document of the collection, so the query result for e.g. 14 documents should look like this:
{
temperatureLow: 2,
temperatureHigh: 12,
humidityLow: 8,
humidityHigh: 6,
}
I tried a $group (as the only stage in the aggregation pipeline) like this:
$group: {
_id: { temperature: '$outside.temperature', humidity: '$outside.humidity' },
count: { $sum: 1 },
},
And this gives me these documents (EDITED, first post had wrong data):
{
"_id": {
"temperature": "high",
"humidity": "high"
},
"count": 6
},
{
"_id": {
"temperature": "high",
"humidity": "low"
},
"count": 6
},
{
"_id": {
"temperature": "low",
"humidity": "low"
},
"count": 2
}
How can it be combined into on document?
It's possible. You need add project stage with the using cont operator before group:
{
$project: {
"temperatureLow": { $cond: { if: { $eq: ["$outside.temperature", "low"] }, then: 1, else: 0 }},
"temperatureHigh": { $cond: { if: { $eq: ["$outside.temperature", "high"] }, then: 1, else: 0 }},
"humidityLow": { $cond: { if: { $eq: ["$outside.humidity", "low"] }, then: 1, else: 0 }},
"humidityHigh": { $cond: { if: { $eq: ["$outside.humidity", "high"] }, then: 1, else: 0 }}
}
},
{
$group: {
_id: "result",
"temperatureLow": {$sum: "$temperatureLow"},
"temperatureHigh": {$sum: "$temperatureHigh"},
"humidityLow": {$sum: "$humidityLow"},
"humidityHigh": {$sum: "$humidityHigh"},
}
},
Update
or as notes Neil Lunn I can use cond inside sum operator without project stage:
{
$group: {
_id: "result",
"temperatureLow": {$sum: { $cond: { if: { $eq: ["$outside.temperature", "low"] }, then: 1, else: 0 }}},
"temperatureHigh": {$sum: { $cond: { if: { $eq: ["$outside.temperature", "high"] }, then: 1, else: 0 }}},
"humidityLow": {$sum:{ $cond: { if: { $eq: ["$outside.humidity", "low"] }, then: 1, else: 0 }}},
"humidityHigh": {$sum:{ $cond: { if: { $eq: ["$outside.humidity", "high"] }, then: 1, else: 0 }}}
}
},

MongoDB Get average of group considering rank of document

I have documents getting in order like:
{
"_id": "abcde1",
"value" : 300
},
{
"_id": "abcde2",
"value" : 200
},
{
"_id": "abcde3",
"value" : 400
},
{
"_id": "abcde4",
"value" : 500
},
{
"_id": "abcde5",
"value" : 600
}
i.e,
I want average of "_id" of first 2, first 4 and all 5 documents matching like in single query:
{
"value_2" : 250, // Average of first 2 documents
"value_4" : 350, // Average of first four documents
"value_5" : 400 // Average of all 5 documents
}
Is it possible to Group documents based on rank of document.
I can do 3 results in 3 separate queries. Is it possible in single query?
You could try running the following pipeline:
db.collection.aggregate([
// previous pipeline here
{
"$group": {
"_id": null,
"values": { "$push": "$value" }
}
},
{ "$unwind": { "path": "$values", "includeArrayIndex": "rank" } },
{
"$group": {
"_id": null,
"value_2_sum": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 2] },
"$values",
0
]
}
},
"value_2_count": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 2] },
1,
0
]
}
},
"value_4_sum": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 4] },
"$values",
0
]
}
},
"value_4_count": {
"$sum": {
"$cond": [
{ "$lt": ["$rank", 4] },
1,
0
]
}
},
"value_5": { "$avg": "$values" }
}
},
{
"$project": {
"value_2" : { "$divide": ["$value_2_sum", "$value_2_count"] }, // Average of first 2 documents
"value_4" : { "$divide": ["$value_4_sum", "$value_4_count"] }, // Average of first four documents
"value_5" : 1
}
}
])
You could use a $facet aggregation stage:
// { _id: "abcde1", value: 300 }
// { _id: "abcde2", value: 200 }
// { _id: "abcde3", value: 400 }
// { _id: "abcde4", value: 500 }
// { _id: "abcde5", value: 600 }
db.collection.aggregate([
{ $facet: {
value_2: [ { $limit: 2 }, { $group: { _id: null, value_2: { $avg: "$value" } } } ],
value_4: [ { $limit: 4 }, { $group: { _id: null, value_4: { $avg: "$value" } } } ],
value_5: [ { $limit: 5 }, { $group: { _id: null, value_5: { $avg: "$value" } } } ]
}},
// {
// value_2: [ { _id: null, value_2: 250 } ],
// value_4: [ { _id: null, value_4: 350 } ],
// value_5: [ { _id: null, value_5: 400 } ]
// }
{ $set: {
value_2: { $first: "$value_2.value_2" },
value_4: { $first: "$value_4.value_4" },
value_5: { $first: "$value_5.value_5" }
}}
])
// { "value_2" : 250, "value_4" : 350, "value_5" : 400 }
The $facet stage allows us to run multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
Each field is thus produced by its own aggregation pipeline whose first stage is a simple $limit, followed by a $group stage that'll produce the $avg (average) of all considered documents.
The second part of the pipeline (the $set stage) is just there to clean-up the $facet output to the format you wished for.