I need to get the max count of negative sequence from array via aggregation , example documents:
{
"id": 1,
x: [ 1,1,-1,-1,1,1,1,-1,-1,-1,-1]
},
{
"id": 2,
x: [ 1,-1,-1,1,1,1,-1 ]
}
expected result:
{"id": 1,x:4},
{"id": 2,x:2}
Please, advice?
You can use $reduce to iterate the array and $cond to apply your logic (consecutive negatives)
The carrier is in format
{
previous: // previous value to compare for continuity
acc: // number of consecutive negatives in the current sequence
max: // length of the longest sequence
}
$let is to memoise current accumulator to reuse in the max calculation. It's optional yet convenient:
db.collection.aggregate([
{
"$set": {
"x": {
"$reduce": {
"input": "$x",
"initialValue": {
previous: 0,
acc: 0,
max: 0
},
"in": {
$let: {
vars: {
result: {
"$cond": {
"if": {
"$and": [
{
"$lt": [
"$$this",
0
]
},
{
"$lt": [
"$$value.previous",
0
]
}
]
},
"then": {
"$add": [
"$$value.acc",
1
]
},
"else": {
"$cond": {
"if": {
"$lt": [
"$$this",
0
]
},
"then": 1,
"else": 0
}
}
}
}
},
in: {
previous: "$$this",
acc: "$$result",
max: {
"$cond": {
"if": {
$gt: [
"$$value.max",
"$$result"
]
},
"then": "$$value.max",
"else": "$$result"
}
}
}
}
}
}
}
}
},
{
"$set": {
x: "$x.max"
}
}
])
Try it on mongoplayground.net.
Here's another way to do it. The general idea is to $reduce the sequence to a string and then $split to make an array filled with strings of each run. Then map the array of strings to an array of string lengths and then take the max.
db.collection.aggregate({
"$project": {
"_id": 0,
"id": 1,
"x": {
"$max": {
"$map": {
"input": {
$split: [
{
"$reduce": {
"input": "$x",
"initialValue": "",
"in": {
$concat: [
"$$value",
{
"$cond": [
{
"$gt": [
"$$this",
0
]
},
"p",
"n"
]
}
]
}
}
},
"p"
]
},
"in": {
"$strLenBytes": "$$this"
}
}
}
}
}
})
Try it on mongoplayground.net.
Related
I'm new in mongoDB.
This is one example of record from collection:
{
supplier: 1,
type: "sale",
items: [
{
"_id": ObjectId("60ee82dd2131c5032342070f"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420710"),
"itemBuySum": 10,
},
{
"_id": ObjectId("60ee82dd2131c50323420713"),
"itemBuySum": 10
},
{
"_id": ObjectId("60ee82dd2131c50323420714"),
"itemBuySum": 20
}
]
}
I need to group by TYPE field and get the SUM. This is output I need:
{
supplier: 1,
sales: 90,
returns: 170
}
please check Mongo playground for better understand. Thank you!
$match - Filter documents.
$group - Group by type and add item into data array which leads to the result like:
[
[/* data 1 */],
[/* data 2 */]
]
$project - Decorate output document.
3.1. First $reduce is used to flatten the nested array to a single array (from Result (2)) via $concatArrays.
3.2. Second $reduce is used to aggregate $sum the itemBuySum.
db.collection.aggregate({
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$type",
"supplier": {
$first: "$supplier"
},
"data": {
"$push": "$items"
}
}
},
{
"$project": {
_id: 0,
"supplier": "$supplier",
"type": "$_id",
"returns": {
"$reduce": {
"input": {
"$reduce": {
input: "$data",
initialValue: [],
in: {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
},
"initialValue": 0,
"in": {
$sum: [
"$$value",
"$$this.itemBuySum"
]
}
}
}
}
})
Sample Mongo Playground
db.collection.aggregate([
{
$match: {
supplier: 1
},
},
{
"$group": {
"_id": "$ID",
"supplier": {
"$first": "$supplier"
},
"sale": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"sale"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
},
"returns": {
"$sum": {
"$sum": {
"$cond": {
"if": {
"$eq": [
"$type",
"return"
]
},
"then": {
"$sum": "$items.itemBuySum"
},
"else": {
"$sum": 0
}
}
}
}
}
}
},
{
"$project": {
_id: 0,
supplier: 1,
sale: 1,
returns: 1
}
}
])
I have next simplified collection
[
{
"key": 1,
"array": [
{ "check": true },
{ "check": false },
{ "check": true }
]
},
{
"key": 2
}
]
I want to add field "count" with number of elements of array with "check"=true, so I expect next result
{
"key": 1,
"array": [
{ "check": true },
{ "check": false },
{ "check": true }
],
"count":2,
},
{
"key": 2,
"count": 0
}
]
I have next query ( it is aggregation, because actually it one of stages of pipeline)
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
"$cond": {
"if": {
"$eq": ["$array.check",true],
},
"then": 1,
"else": 0,
}
}
},
}
}
])
But I always get count=0.
Can you help me to find error in my query?
Here mongo playground
Instead of using $sum, you can use $filter to filter only the array with "check"=true, then check the size of the resulting array using $size.
db.collection.aggregate([
{
"$addFields": {
"count": {
"$size": {
"$filter": {
"input": { "$ifNull": ["$array", []] }, // default empty array if array is does not exist
"cond": "$$this.check" // only keep the truthy check value
}
}
}
}
}
])
Mongo Playground
Alternatively, if you want to use $sum, you could also map the array to an array of 0 and 1 according to the check value, using $map
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
"$map": {
"input": { "$ifNull": ["$array", []] },
"in": {
"$cond": ["$$this.check", 1, 0]
}
}
}
}
}
}
])
Mongo Playground
Here is how to achieve this using $reduce
db.collection.aggregate([
{
"$addFields": {
"count": {
"$sum": {
$reduce: {
input: "$array",
initialValue: 0,
in: {
$sum: [
"$$value",
{
$cond: [
"$$this.check",
1,
0
]
}
]
}
}
}
}
}
}
])
Mongo Playground
Using $reduce aggregation array expression operator,
db.test.aggregate( [
{
$addFields: {
count: {
$reduce: {
input: { $ifNull: [ "$array", [] ] },
initialValue: 0,
in: {
$cond: [ { $eq: [ "$$this.check", true ] },
{ $add: [ "$$value", 1 ] },
"$$value"
]
}
}
}
}
}
] )
gets a result as follows:
{
"_id" : ObjectId("5f0c7b691c7c98bb49fd2b50"),
"key" : 1,
"array" : [
{
"check" : true
},
{
"check" : false
},
{
"check" : true
}
],
"count" : 2
}
{ "_id" : ObjectId("5f0c7b691c7c98bb49fd2b51"), "key" : 2, "count" : 0 }
I want to modify a field through a projection stage in the aggregation pipeline, this field is combination of other fields values separated by (-)
if the field is null or empty of missing it will not be added to the cocatenated string
{$project:{
//trial-1:
finalField:{
$concat["$field1",'-','$field2','-','$field3',...]
//problem1: $concat will return null if any of it's arguments is null or missing
//problem2: if all the fields are exist with non-null values, the delimiter will exists even if the field dosen't
}
//trial-2:
finalField:{
$concat:[
{$cond:[{field1:null},'',{$concat:['$field1','-']},..]
//the problem: {field1:null} fails if the field dosen't exixt (i.e the expression gives true)
//trial-3
finalField:{
$concat:[
{$cond:[{$or:[{field1:null},{field:{$exists:true}},'',
{$concat:['$field1','-']}
]}]}
]
}
]
}
//trial-4 -> using $reduce instead of $concate (same issues)
}
You basically want $ifNull. It's "sort of" like $exists but for aggregation statements, where it returns a default value when the field expression returns null, meaning "not there":
{ "$project": {
"finalField": {
"$concat": [
{ "$ifNull": [ "$field1", "" ] },
"-",
{ "$ifNull": [ "$field2", "" ] },
"-",
{ "$ifNull": [ "$field3", "" ] }
]
}
}}
For example with data like:
{ "field1": "a", "field2": "b", "field3": "c" },
{ "field1": "a", "field2": "b" },
{ "field1": "a", "field3": "c" }
You get, without any error producing of course:
{ "finalField" : "a-b-c" }
{ "finalField" : "a-b-" }
{ "finalField" : "a--c" }
If you want something fancier, then you would instead dynamically work with the names, as in:
{ "$project": {
"finalField": {
"$reduce": {
"input": {
"$filter": {
"input": { "$objectToArray": "$$ROOT" },
"cond": { "$ne": [ "$$this.k", "_id" ] }
}
},
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ "$$value", "" ] },
"then": { "$concat": [ "$$value", "$$this.v" ] },
"else": { "$concat": [ "$$value", "-", "$$this.v" ] }
}
}
}
}
}}
Which can be aware of what fields were actually present and only attempt to join those:
{ "finalField" : "a-b-c" }
{ "finalField" : "a-b" }
{ "finalField" : "a-c" }
You can even manually specify the list of fields if you don't want the $objectToArray over the document or sub-document:
{ "$project": {
"finalField": {
"$reduce": {
"input": {
"$filter": {
"input": ["$field1", "$field2", "$field3"],
"cond": { "$ne": [ "$$this", null ] }
}
},
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ "$$value", "" ] },
"then": { "$concat": [ "$$value", "$$this" ] },
"else": { "$concat": [ "$$value", "-", "$$this" ] }
}
}
}
}
}}
I'm trying to work out exactly how to achieve an aggregation, I could manually unwind and group back together at the end, but I'm sure I should be able to achieve this in a more concise way so I wanted to throw it out as I'm getting stuck.
My document structure (skipping out the un-interesting bits) looks like:
{
_id: ObjectId,
panels: [
{
visConfig: {
dataConfig: {
columns: [
{ element: "DX" },
{ element: "SE" },
]
}
}
},
{
visConfig: {
dataConfig: {
columns: [
{ element: "AB" },
{ element: "XY" },
]
}
}
}
]
}
What I want to do is calculate a percentage of the element overlaps with a given set to be provided. So for example for the document shown it would produce 25% for the set ["DX"] or 50% for the set ["DX", "AB"].
So I've tried a few things, I think I've settled on the nearest so far as:
$project: {
_id: 1,
total: { $sum: { $size: "$panels.visConfig.dataConfig.columns" } }
}
But I'm getting an error here which I don't understand:
The argument to $size must be an array, but was of type: missing
Then I'm also having issues with my conditional aggregation which seems to be returning 0 for all of the element values.
{
_id: 1,
"panels.visConfig.dataConfig.columns.element": {
$sum: {
$cond: [{
$setIsSubset: [
["DX"], ["$panels.visConfig.dataConfig.columns.element"]
]
}, 1, 0 ],
}
},
}
You can try below aggregation in 3.4 version.
db.colname.aggregate([
{"$project":{
"_id":1,
"total":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":0,
"in":{"$add":["$$value",{"$size":"$$this"}]}
}},
"match":{
"$sum":{
"$map":{
"input":"$panels.visConfig.dataConfig.columns.element",
"in":{
"$size":{
"$setIntersection":[["DX","AB"],"$$this"]
}
}
}
}
}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$match","$total"]}, 100]}
}}])
Update - You can perform both match and total calculations in $reduce pipeline.
db.colname.aggregate([
{"$project":{
"_id":1,
"stats":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":{"total":0,"match":0},
"in":{
"total":{"$add":["$$value.total",{"$size":"$$this"}]},
"match":{"$add":["$$value.match",{"$sum":{"$map":{"input":"$$this","in":{"$cond":[{"$in":["$$this", ["DX","AB"]] }, 1, 0]}}}}]}
}
}}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$stats.match","$stats.total"]}, 100]}
}}])
You can use $map + $reduce to get an array of all element values and then using $divide you can divide $filter-ed $size by total $size:
db.col.aggregate([
{
$project: {
elements: {
$reduce: {
input: {
$map: {
input: "$panels",
as: "panel",
in: "$$panel.visConfig.dataConfig.columns.element"
}
},
initialValue: [],
in: { $concatArrays: [ "$$this", "$$value" ] }
}
}
}
},
{
$project: {
percentage: {
$divide: [
{
$size: {
$filter: {
input: "$elements",
as: "element",
cond: {
$in: [
"$$element",
[ "AB", "XY" ] // your input here
]
}
}
}
},
{ $size: "$elements" }
]
}
}
}
])
Well, there are couple of ways to do this, but I these two pipelines show how I would do it.
var values = ["DX", "KL"]
First approach
[
{
"$project": {
"percent": {
"$let": {
"vars": {
"allsets": {
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns",
"initialValue": [],
"in": {
"$concatArrays": [ "$$this.element", "$$value" ]
}
}
}
},
"in": {
"$multiply": [
{
"$divide": [
{
"$size": {
"$setIntersection": [ "$$allsets", values ]
}
},
{ "$size": "$$allsets" }
]
},
100
]
}
}
}
}
}
]
Second approach same idea here but, using one pipeline stage.
[
{
"$project": {
"percent": {
"$multiply": [
{
"$divide": [
{
"$sum": {
"$map": {
"input": "$panels.visConfig.dataConfig.columns.element",
"in": {
"$size": {
"$setIntersection": [ values, "$$this" ]
}
}
}
}
},
{
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns.element",
"initialValue": 0,
"in": {
"$add": [ "$$value", { "$size": "$$this" } ]
}
}
}
]
},
100
]
}
}
}
]
In MongoDB, I have documents with a structure like this:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: {
class_1: 0.45,
class_2: 0.11,
class_3: 0.44
}
}
Using the aggregation pipeline, is it possible to give me an object that contains the highest classification? So, given the above, I would like something like this as result:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: "class_1"
}
I thought I could use $unwind but the classification property is not an array.
For what it's worth: I know there will always be three properties in classification, so it's ok to hard-code the keys in the query.
You should probably note here that every technique applied is essentially based on "coercion" of the "key/value" pairs into an "array" format for comparison and extraction. So the real lesson to learn is is that your document "should" in fact store this as an "array" instead. But onto the techniques.
If you have MongoDB 3.4 then you can use $objectToArray to turn the "keys" into an array so you can get the value:
Dynamic
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$arrayElemAt": [
{ "$map": {
"input": {
"$filter": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"cond": {
"$eq": [
"$$c.v",
{ "$max": {
"$map": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"in": "$$c.v"
}
}}
]
}
}
},
"as": "c",
"in": "$$c.k",
}},
0
]
}
}}
])
Otherwise just to the transformation as you iterate the cursor if you do not really need it for further aggregation. As a basic JavaScript example:
db.collection.find().map(d => Object.assign(
d,
{ classification: Object.keys(d.classification)
.filter(k => d.classification[k] === Math.max.apply(null,
Object.keys(d.classification).map(k => d.classification[k])
))[0]
}
));
And that's also the same basic logic that you apply using mapReduce if you were actually aggregating something.
Both produce:
/* 1 */
{
"_id" : "123456...",
"name" : "foobar",
"classification" : "class_1"
}
HardCoding
On the "hardcoding" case which you say is okay. Then you can construct like this with $switch by supplying $max with each of the values:
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$switch": {
"branches": [
{ "case": { "$eq": [ "$classification.class_1", "$$max" ] }, "then": "class_1" },
{ "case": { "$eq": [ "$classification.class_2", "$$max" ] }, "then": "class_2" },
{ "case": { "$eq": [ "$classification.class_3", "$$max" ] }, "then": "class_3" },
]
}
}
}
}
}}
])
Which gives rise to then actually being able to write that out longer using $cond, and then the only real constraint is the change in $max for MongoDB 3.2, which allowed an array of arguments as opposed to it's previous role as an "accumulator only":
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}
}
}}
])
If you were "really" constrained then you could "force" the "max" through a separate pipeline stage using $map and $unwind on the array then $group again. This would make the operations compatible with MongoDB 2.6:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"max": {
"$map": {
"input": [1,2,3],
"as": "e",
"in": {
"$cond": {
"if": { "$eq": [ "$$e", 1 ] },
"then": "$classification.class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$$e", 2 ] },
"then": "$classification.class_2",
"else": "$classification.class_3"
}
}
}
}
}
}
}},
{ "$unwind": "$max" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": { "$max": "$max" }
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}}
])
And going really ancient, then we can instead $unwind from $const, which was (and still is) a "hidden" and undocumented operator equal in function to $literal (which is technically aliased to it) in modern versions, but also with the alternate syntax to $cond as an "array" ternary operation this then becomes compatible with all versions since the aggregation framework existed:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"temp": { "$const": [1,2,3] }
}},
{ "$unwind": "$temp" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": {
"$max": {
"$cond": [
{ "$eq": [ "$temp", 1 ] },
"$classification.class_1",
{ "$cond": [
{ "$eq": [ "$temp", 2 ] },
"$classification.class_2",
"$classification.class_3"
]}
]
}
}
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": [
{ "$eq": [ "$max", "$classification.class_1" ] },
"class_1",
{ "$cond": [
{ "$eq": [ "$max", "$classification.class_2" ] },
"class_2",
"class_3"
]}
]
}
}}
])
But it is of course possible, even if extremely messy.
You can use $indexOfArray operator to find the $max value in classification followed by projecting the key. $objectToArray to convert classification embedded doc into array of key value pairs in 3.4.4 version.
db.collection.aggregate([
{
"$addFields": {
"classification": {
"$let": {
"vars": {
"classificationkv": {
"$objectToArray": "$classification"
}
},
"in": {
"$let": {
"vars": {
"classificationmax": {
"$arrayElemAt": [
"$$classificationkv",
{
"$indexOfArray": [
"$$classificationkv.v",
{
"$max": "$$classificationkv.v"
}
]
}
]
}
},
"in": "$$classificationmax.k"
}
}
}
}
}
}
])
In the end, I went with a more simple solution, but not as generic as the other ones posted here. I used this a switch case statement:
{'$project': {'_id': 1, 'name': 1,
'classification': {'$switch': {
'branches': [
{'case': {'$and': [{'$gt': ['$classification.class_1', '$classification.class_2']},
{'$gt': ['$classification.class_1', '$classification.class_3']}]},
'then': "class1"},
{'case': {'$and': [{'$gt': ['$classification.class_2', '$classification.class_1']},
{'$gt': ['$classification.class_2', '$classification.class_3']}]},
'then': "class_2"},
{'case': {'$and': [{'$gt': ['$classification.class_3', '$classification.class_1']},
{'$gt': ['$classification.class_3', '$classification.class_2']}]},
'then': "class_3"}],
'default': ''}}
}}
This works for me, but the other answers might be a better option, YMMV.