How to aggregate percentages within arrays? - mongodb

I'm trying to work out exactly how to achieve an aggregation, I could manually unwind and group back together at the end, but I'm sure I should be able to achieve this in a more concise way so I wanted to throw it out as I'm getting stuck.
My document structure (skipping out the un-interesting bits) looks like:
{
_id: ObjectId,
panels: [
{
visConfig: {
dataConfig: {
columns: [
{ element: "DX" },
{ element: "SE" },
]
}
}
},
{
visConfig: {
dataConfig: {
columns: [
{ element: "AB" },
{ element: "XY" },
]
}
}
}
]
}
What I want to do is calculate a percentage of the element overlaps with a given set to be provided. So for example for the document shown it would produce 25% for the set ["DX"] or 50% for the set ["DX", "AB"].
So I've tried a few things, I think I've settled on the nearest so far as:
$project: {
_id: 1,
total: { $sum: { $size: "$panels.visConfig.dataConfig.columns" } }
}
But I'm getting an error here which I don't understand:
The argument to $size must be an array, but was of type: missing
Then I'm also having issues with my conditional aggregation which seems to be returning 0 for all of the element values.
{
_id: 1,
"panels.visConfig.dataConfig.columns.element": {
$sum: {
$cond: [{
$setIsSubset: [
["DX"], ["$panels.visConfig.dataConfig.columns.element"]
]
}, 1, 0 ],
}
},
}

You can try below aggregation in 3.4 version.
db.colname.aggregate([
{"$project":{
"_id":1,
"total":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":0,
"in":{"$add":["$$value",{"$size":"$$this"}]}
}},
"match":{
"$sum":{
"$map":{
"input":"$panels.visConfig.dataConfig.columns.element",
"in":{
"$size":{
"$setIntersection":[["DX","AB"],"$$this"]
}
}
}
}
}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$match","$total"]}, 100]}
}}])
Update - You can perform both match and total calculations in $reduce pipeline.
db.colname.aggregate([
{"$project":{
"_id":1,
"stats":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":{"total":0,"match":0},
"in":{
"total":{"$add":["$$value.total",{"$size":"$$this"}]},
"match":{"$add":["$$value.match",{"$sum":{"$map":{"input":"$$this","in":{"$cond":[{"$in":["$$this", ["DX","AB"]] }, 1, 0]}}}}]}
}
}}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$stats.match","$stats.total"]}, 100]}
}}])

You can use $map + $reduce to get an array of all element values and then using $divide you can divide $filter-ed $size by total $size:
db.col.aggregate([
{
$project: {
elements: {
$reduce: {
input: {
$map: {
input: "$panels",
as: "panel",
in: "$$panel.visConfig.dataConfig.columns.element"
}
},
initialValue: [],
in: { $concatArrays: [ "$$this", "$$value" ] }
}
}
}
},
{
$project: {
percentage: {
$divide: [
{
$size: {
$filter: {
input: "$elements",
as: "element",
cond: {
$in: [
"$$element",
[ "AB", "XY" ] // your input here
]
}
}
}
},
{ $size: "$elements" }
]
}
}
}
])

Well, there are couple of ways to do this, but I these two pipelines show how I would do it.
var values = ["DX", "KL"]
First approach
[
{
"$project": {
"percent": {
"$let": {
"vars": {
"allsets": {
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns",
"initialValue": [],
"in": {
"$concatArrays": [ "$$this.element", "$$value" ]
}
}
}
},
"in": {
"$multiply": [
{
"$divide": [
{
"$size": {
"$setIntersection": [ "$$allsets", values ]
}
},
{ "$size": "$$allsets" }
]
},
100
]
}
}
}
}
}
]
Second approach same idea here but, using one pipeline stage.
[
{
"$project": {
"percent": {
"$multiply": [
{
"$divide": [
{
"$sum": {
"$map": {
"input": "$panels.visConfig.dataConfig.columns.element",
"in": {
"$size": {
"$setIntersection": [ values, "$$this" ]
}
}
}
}
},
{
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns.element",
"initialValue": 0,
"in": {
"$add": [ "$$value", { "$size": "$$this" } ]
}
}
}
]
},
100
]
}
}
}
]

Related

How to transform an array field into a value equaling its maximum?

{
name: "use_name",
grades: [
{class: "math": grade: 100},
{class: "english": grade: 90}
]
}
How do I write an aggregation pipeline to output:
{
name: "use_name",
grades: {class: "math": grade: 100},
}
The grades field has been reduced to the element where its grade property is the maximum of all elements.
The requirements, the aggregation pipeline cannot have $unwind or $group because it cannot have a stage where the stage needs to receive all incoming documents before outputting to the next stage, potentially exceeding the 100mb limit. And it must be fast.
I think this one is faster:
db.collection.aggregate([
{
$set: {
grades: {
$first: {
$sortArray: {
input: "$grades",
sortBy: { grade: -1 }
}
}
}
}
}
])
Mongo Playground
or this one:
db.collection.aggregate([
{
$set: {
grades: {
$filter: {
input: "$grades",
cond: { $eq: [ "$$this.grade", { $max: "$grades.grade" } ] }
}
}
}
}
])
Replace $$value in $reduce until you find the max.
db.collection.aggregate([
{
$set: {
grades: {
"$reduce": {
"input": "$grades",
"initialValue": null,
"in": {
"$cond": {
"if": {
$or: [
{
$eq: [
null,
"$$value"
]
},
{
$gt: [
"$$this.grade",
"$$value.grade"
]
}
]
},
"then": "$$this",
"else": "$$value"
}
}
}
}
}
}
])
Mongo Playground

MongoDb Create Aggregate Create query

I have 3 table users,shifts,temporaryShifts,
shifts:[{_id:ObjectId(2222),name:"Morning"},{_id:ObjectId(454),name:"Night"}]
users:[{_id:ObjectId(123),name:"Albert",shift_id:ObjectId(2222)}]
temporaryShifts:[
{_id:2,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-01"},
{_id:987,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-03"},
{_id:945,userId:ObjectId(123),shiftId:ObjectId(454),type:"temporary",date:"2020-02-08"},
{_id:23,userId:ObjectId(123),shiftId:ObjectId(454),date:"2020-02-09"}]
i want to make a mongoose aggregate query then give me result :
get result between two dates for example :2020-02-01 2020-02-05,
resullts is :
[
{_id:ObjectId(123),name:"Albert",shift:[
{_id:2,shiftId:ObjectId(454),type:"temporary",date:"2020-02-01"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-02"},
{_id:2,shiftId:ObjectId(454),type:"temporary",date:"2020-02-03"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-04"},
{_id:2,shiftId:ObjectId(2222),type:"permanent",date:"2020-02-05"},
]}
]
in result type temporary mean selected date in table temporaryShift document available else type permanent
MongoPlayGround You Can edit
You can first project a date range array using $range, in your example it will be like [2020-02-01, 2020-02-02, 2020-02-03, 2020-02-04, 2020-02-05], then you can use the array to perform $lookup
db.users.aggregate([
{
$limit: 1
},
{
"$addFields": {
"startDate": ISODate("2020-02-01"),
"endDate": ISODate("2020-02-05")
}
},
{
"$addFields": {
"dateRange": {
"$range": [
0,
{
$add: [
{
$divide: [
{
$subtract: [
"$endDate",
"$startDate"
]
},
86400000
]
},
1
]
}
]
}
}
},
{
"$addFields": {
"dateRange": {
$map: {
input: "$dateRange",
as: "increment",
in: {
"$add": [
"$startDate",
{
"$multiply": [
"$$increment",
86400000
]
}
]
}
}
}
}
},
{
"$unwind": "$dateRange"
},
{
"$project": {
"name": 1,
"shiftId": 1,
"dateCursor": "$dateRange"
}
},
{
"$lookup": {
"from": "temporaryShifts",
"let": {
dateCursor: "$dateCursor",
shiftId: "$shiftId"
},
"pipeline": [
{
"$addFields": {
"parsedDate": {
"$dateFromString": {
"dateString": "$date",
"format": "%Y-%m-%d"
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$$dateCursor",
"$parsedDate"
]
}
]
}
}
}
],
"as": "temporaryShiftsLookup"
}
},
{
"$unwind": {
path: "$temporaryShiftsLookup",
preserveNullAndEmptyArrays: true
}
},
{
$project: {
shiftId: 1,
type: {
"$ifNull": [
"$temporaryShiftsLookup.type",
"permanent"
]
},
date: "$dateCursor"
}
}
])
Here is the Mongo Playground for your reference.

want to convert "00:10:00" to a single integer in mongodb

There are many documents in the collection which contains this field timeTaken: "00:10:00",
I want to sum up from all the documents and have to give a single integer in mongodb robo3T.
That is for the following documents:
[
{ timeTaken: "00:10:00" },
{ timeTaken: "01:10:00" },
{ timeTaken: "02:20:50" }
]
I want the result to be:
{ timeTaken: "03:40:50" }
Our strategy will be to split the string into minutes, seconds and hours, convert them to numbers, sum them up and then reconstruct the structure.
For this you will need access to operators like $toString and $toInt which means you can only do this for version 4.0+, for older Mongo versions you will have to read the documents and do this in code.
I've split the following query into multiple stages so it's clearer what I'm doing but this could be re-written into just 2 stages, the $group stage and a final $project stage to restructure the data.
db.collection.aggregate([
{
"$addFields": {
dataParts: {
$map: {
input: {
$split: [
"$data",
":"
]
},
as: "num",
in: {
"$toInt": "$$num"
}
}
},
}
},
{
$group: {
_id: null,
seconds: {
$sum: {
"$arrayElemAt": [
"$dataParts",
2
]
}
},
minutes: {
$sum: {
"$arrayElemAt": [
"$dataParts",
1
]
}
},
hours: {
$sum: {
"$arrayElemAt": [
"$dataParts",
0
]
}
},
}
},
{
"$addFields": {
finalSeconds: {
$mod: [
"$seconds",
60
]
},
}
},
{
$addFields: {
minutes: {
$sum: [
"$minutes",
{
"$divide": [
{
"$subtract": [
"$seconds",
"$finalSeconds"
]
},
60
]
}
]
},
}
},
{
$addFields: {
finalMinutes: {
$mod: [
"$minutes",
60
]
},
finalHours: {
$sum: [
"$hours",
{
$mod: [
{
$max: [
{
"$subtract": [
"$minutes",
60
]
},
0
]
},
60
]
}
]
}
}
},
{
$project: {
final: {
$concat: [
{
"$toString": "$finalHours"
},
":",
{
"$toString": "$finalMinutes"
},
":",
{
"$toString": "$finalSeconds"
},
]
}
}
}
])
Mongo Playground

MongoDB: add field with conditional number of elements

I have next simplified collection
[
{
"key": 1,
"array": [
{ "check": true },
{ "check": false },
{ "check": true }
]
},
{
"key": 2
}
]
I want to add field "count" with number of elements of array with "check"=true, so I expect next result
{
"key": 1,
"array": [
{ "check": true },
{ "check": false },
{ "check": true }
],
"count":2,
},
{
"key": 2,
"count": 0
}
]
I have next query ( it is aggregation, because actually it one of stages of pipeline)
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$array.check",true],
},
"then": 1,
"else": 0,
}
}
},
}
}
])
But I always get count=0.
Can you help me to find error in my query?
Here mongo playground
Instead of using $sum, you can use $filter to filter only the array with "check"=true, then check the size of the resulting array using $size.
db.collection.aggregate([
{
"$addFields": {
"count": {
"$size": {
"$filter": {
"input": { "$ifNull": ["$array", []] }, // default empty array if array is does not exist
"cond": "$$this.check" // only keep the truthy check value
}
}
}
}
}
])
Mongo Playground
Alternatively, if you want to use $sum, you could also map the array to an array of 0 and 1 according to the check value, using $map
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
"$map": {
"input": { "$ifNull": ["$array", []] },
"in": {
"$cond": ["$$this.check", 1, 0]
}
}
}
}
}
}
])
Mongo Playground
Here is how to achieve this using $reduce
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
$reduce: {
input: "$array",
initialValue: 0,
in: {
$sum: [
"$$value",
{
$cond: [
"$$this.check",
1,
0
]
}
]
}
}
}
}
}
}
])
Mongo Playground
Using $reduce aggregation array expression operator,
db.test.aggregate( [
{
$addFields: {
count: {
$reduce: {
input: { $ifNull: [ "$array", [] ] },
initialValue: 0,
in: {
$cond: [ { $eq: [ "$$this.check", true ] },
{ $add: [ "$$value", 1 ] },
"$$value"
]
}
}
}
}
}
] )
gets a result as follows:
{
"_id" : ObjectId("5f0c7b691c7c98bb49fd2b50"),
"key" : 1,
"array" : [
{
"check" : true
},
{
"check" : false
},
{
"check" : true
}
],
"count" : 2
}
{ "_id" : ObjectId("5f0c7b691c7c98bb49fd2b51"), "key" : 2, "count" : 0 }

Mongoose Summing Up subdocument array elements having same alias

I have a document like this:
_id:'someId',
sales:
[
{
_id:'111',
alias:'xxx',
amount:500,
name: Apple, //items with same alias always have same name and quantity
quantity:2
},
{
_id:'222',
alias:'abc',
amount:100,
name: Orange,
quantity:14
},
{
_id:'333',
alias:'xxx',
amount:300,
name: Apple, //items with same alias always have same name and quantity
quantity:2
}
]
The alias field is here to 'group' items/documents whenever they appear to have same alias i.e to be 'embeded' as one with the amount summed up.
I need to display some sort of a report in such a way that those elements which have same alias they should be displayed as ONE and the others which doesn't share same alias to remain as they are.
Example, For the sample document above, I need an output like this
[
{
alias:'xxx',
amount:800
},
{
alias:'abc',
amount:100
}
]
WHAT I HAVE TRIED
MyShop.aggregate([
{$group:{
_id: "$_id",
sales:{$last :"$sales"}
},
{$project:{
"sales.amount":1
}}
}
])
This just displays as a 'list' regardless of the alias. How do I achieve summing up amount based on the alias?
You can achieve this using $group
db.collection.aggregate([
{
$unwind: "$sales"
},
{
$group: {
_id: {
_id: "$_id",
alias: "$sales.alias"
},
sales: {
$first: "$sales"
},
_idsInvolved: {
$push: "$sales._id"
},
amount: {
$sum: "$sales.amount"
}
}
},
{
$group: {
_id: "$_id._id",
sales: {
$push: {
$mergeObjects: [
"$sales",
{
alias: "$_id.alias",
amount: "$amount",
_idsInvolved: "$_idsInvolved"
}
]
}
}
}
}
])
Mongo Playground
You can use below aggregation
db.collection.aggregate([
{
"$addFields": {
"sales": {
"$map": {
"input": {
"$setUnion": [
"$sales.alias"
]
},
"as": "m",
"in": {
"$let": {
"vars": {
"a": {
"$filter": {
"input": "$sales",
"as": "d",
"cond": {
"$eq": [
"$$d.alias",
"$$m"
]
}
}
}
},
"in": {
"amount": {
"$sum": "$$a.amount"
},
"alias": "$$m",
"_idsInvolved": "$$a._id"
}
}
}
}
}
}
}
])
MongoPlayground