How to determine count of distinct values of a field in mongoDB

How to determine count of distinct values of a field in mongoDB - mongodb

I have a collection called INFODOCS which has a field called PROCESSOR_ID and STATUS(True/False/Null).
I want to determine two things
Working_Processor = Count of Distinct PROCESSOR_ID where STATUS in not Null
Total_Processor = Count of Distinct PROCESSOR_ID
INFODOCS
[
{
"_id": "1",,
"PROCESSOR_ID" : "11",
"STATUS" : "True"
},
{
"_id": "2",
"PROCESSOR_ID" : "11",
"STATUS" : ""
},
{
"_id": "3",
"PROCESSOR_ID" : "22",
"STATUS" : "False"
},
{
"_id": "4",
"PROCESSOR_ID" : "33",
"STATUS" : ""
}
]
Here expected answer is:
Working_Processor = 2 (_id:1 and _id:3)
Total_Processor = 3
I tried using $addToSet with $cond, but want to know if there is better way of achieving the same.
[
{
'$group': {
'_id': None,
'WORKING_PROCESSOR': {
'$addToSet': {
'$cond': [
{
'$ne': [
'$STATUS', ''
]
}, '$PROCESSOR_ID', None
]
}
},
'TOTAL_PROCESSOR': {
'$addToSet': '$PROCESSOR_ID'
}
}
}, {
'$project': {
'_id': 0,
'WORKING_PROCESSOR': {
'$subtract': [
{
'$size': '$WORKING_PROCESSOR'
}, 1
]
},
'TOTAL_PROCESSOR': {
'$size': '$TOTAL_PROCESSOR'
}
}
}
]

One simple option is:
db.collection.aggregate([
{$group: {
_id: "$PROCESSOR_ID",
WORKING_PROCESSOR: {$max: {$cond: [{$ne: ["$STATUS", ""]}, 1, 0]}}
}},
{$group: {
_id: 0,
WORKING_PROCESSOR: {$sum: "$WORKING_PROCESSOR"},
TOTAL_PROCESSOR: {$sum: 1}
}},
{$unset: "_id"}
])
See how it works on the playground example

Related

How to custom sort a field in MongoDB

I have a collection called INFODOCS which has a field called ML_PRIORITY(HIGH/MEDIUM/LOW) and STATUS(True/False/Null). I want to determine count of STATUS for each ML_PRIORITY and then sort the ML_PRIORITY in order High, Medium and Low.
[
{
"_id": "1",
"ML_PRIORITY" : "HIGH",
"STATUS" : "True"
},
{
"_id": "2",
"ML_PRIORITY" : "HIGH",
"STATUS" : ""
},
{
"_id": "3",
"ML_PRIORITY" : "HIGH",
"STATUS" : "False"
},
{
"_id": "4",
"ML_PRIORITY" : "MEDIUM",
"STATUS" : ""
},
{
"_id": "5",
"ML_PRIORITY" : "Low",
"STATUS" : ""
}
]
I was able to determine the count of STATUS for each ML_PRIORITY using below aggregation pipeline
but I am not sure how can I custom sort the ML_PRIORITY as $sort has only two option (1 and -1)
db.collection.aggregate([
{
'$group': {
'_id': '$ML_PRIORITY',
'QUALITYCHECKDONE': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', 'TRUE'
]
}, 1, 0
]
}
},
'QUALITYCHECKNOTDONE': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', ''
]
}, 1, 0
]
}
},
'QUALITYCHECKNOTREQ': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', 'FALSE'
]
}, 1, 0
]
}
}
}
}, {
'$project': {
'_id': 0,
'ML_PRIORITY': '$_id',
'QUALITYCHECKDONE': 1,
'QUALITYCHECKNOTDONE': 1,
'QUALITYCHECKNOTREQ': 1
}
}
])
Example - https://mongoplayground.net/p/anAwoqZk2Ys

One option is to replace your last step with 3 steps, in order to $set an order field, $sort, and $unset it:
[
{$set: {
order: {$indexOfArray: [["HIGH", "MEDIUM", "Low"], "$_id"]},
"ML_PRIORITY": "$_id"
}},
{$sort: {order: 1}},
{$unset: ["_id", "order"]}
]
See how it works on the playground example

MongoDB : not able to get the field 'name' which has the max value in the two similar sub-documents

I have a test collection:
{
"_id" : ObjectId("5exxxxxx03"),
"username" : "abc",
"col1" : [
{
"colId" : 1
"col2" : [
{
"name" : "a",
"value" : 10
},
{
"name" : "b",
"value" : 20
},
{
"name" : "c",
"value" : 30
}
],
"col3" : [
{
"name" : "d",
"value" : 15
},
{
"name" : "e",
"value" : 25
},
{
"name" : "f",
"value" : 35
}
]
}
]
}
col1 has the list of sub-documents col2 and col3, which are similar, but convey different meanings. These two sub-documents are having name and value as fields.
Now, I need to find the max value from col2 or col3 and its corresponding name.
I tried the below query:
db.test.aggregate([
{$unwind: '$col1'},
{$unwind: '$col1.col2'},
{$unwind: '$col1.col3'},
{$group:
{_id: '$col1.colId',
maxCol2: {$max: '$col1.col2.value'},
maxCol3: {$max: '$col1.col3.value'}}},
{$project:
{maxValue: {$max: ['$maxCol2', '$maxCol3']},
name: {$cond: [
{$eq: ['$maxValue', '$maxCol2']},
'$col1.col2.name',
'$col1.col3.name']}}}]).pretty()
But, it resulted in the following, without name field in it:
{ "_id" : 1, "maxValue" : 35 }
So, just to check, weather my condition is correct or not, tried the following query ($col1.col2.name and $col1.col3.name replaced with 111 and 222 strings):
db.test.aggregate([
{$unwind: '$col1'},
{$unwind: '$col1.col2'},
{$unwind: '$col1.col3'},
{$group:
{_id: '$col1.colId',
maxCol2: {$max: '$col1.col2.value'},
maxCol3: {$max: '$col1.col3.value'}}},
{$project:
{maxValue: {$max: ['$maxCol2', '$maxCol3']},
name: {$cond: [
{$eq: ['$maxValue', '$maxCol2']},
'111',
'222']}}}]).pretty()
Which gives me the expected output:
{ "_id" : 1, "maxValue" : 35, "name" : "222" }
Could any one guide me why I am not getting the correct answer and how should I query this to get the correct output?
The correct out should be:
{ "_id" : 1, "maxValue" : 35, "name" : "f" }
P.S. - I'm a beginner.

You can use below aggregation
db.collection.aggregate([
{ "$project": {
"col1": {
"$max": {
"$reduce": {
"input": "$col1",
"initialValue": [],
"in": {
"$concatArrays": [
"$$this.col2",
"$$value",
"$$this.col3"
]
}
}
}
}
}}
])
MongoPlayground

Try this one:
Explanation
We need to add extra fields with col2 and col3 values. Once we calculate max value, we retrieve name based on max value.
db.collection.aggregate([
{
$unwind: "$col1"
},
{
$unwind: "$col1.col2"
},
{
$unwind: "$col1.col3"
},
{
$group: {
_id: "$col1.colId",
maxCol2: {
$max: "$col1.col2.value"
},
maxCol3: {
$max: "$col1.col3.value"
},
col2: {
$addToSet: "$col1.col2"
},
col3: {
$addToSet: "$col1.col3"
}
}
},
{
$project: {
maxValue: {
$filter: {
input: {
$cond: [
{
$gt: [
"$maxCol2",
"$maxCol3"
]
},
"$col2",
"$col3"
]
},
cond: {
$eq: [
"$$this.value",
{
$cond: [
{
$gt: [
"$maxCol2",
"$maxCol3"
]
},
"$maxCol2",
"$maxCol3"
]
}
]
}
}
}
}
},
{
$unwind: "$maxValue"
},
{
$project: {
_id: 1,
maxValue: "$maxValue.value",
name: "$maxValue.name"
}
}
])
MongoPlayground | Merging col2 / col3 | Per document

In MongoDB aggregation pipeline, how to project indices of embedded array that matched?

In a mongodb aggregation pipeline, I want to $project the indices of an embedded array (a sub-document) that matches a previous $match stage.
Say, I have the following docs.
{_id: '1', tags: ['aaa', 'bbb', 'ccc']},
{_id: '2', tags: ['baa', 'aaa', 'aaa']},
{_id: '3', tags: ['aac', 'cbb', 'aca']},
Now, if I query with {tags: 'aaa'}, I want to output something similar to
{_id: '1', tags: [0]},
{_id: '2', tags: [1, 2]}

db.inventory.aggregate([
{ $match : {tags : 'aaa' }},
{ $unwind : { path: "$tags", includeArrayIndex: "arrayIndex"}},
{ $match : {tags : 'aaa' }},
{ $group : {
_id : '$_id',
tags : { $push : '$arrayIndex'}
}
}
])
Output :
{ "_id" : "2", "tags" : [ NumberLong(1), NumberLong(2) ] }
{ "_id" : "1", "tags" : [ NumberLong(0) ] }
Another way :
db.inventory.aggregate([
{ $match : {tags : 'aaa' }},
{ $project : {
tags: {
$filter: {
input: {
$zip: {
inputs: [ "$tags", { $range: [0, { $size: "$tags" }] } ]
}
},
as: "tagWithIndex",
cond: {
$let: {
vars: {
tag : { $arrayElemAt: [ "$$tagWithIndex", 0 ] }
},
in: { $eq: [ "$$tag", 'aaa' ] }
}
}
}
}
}},
{ $unwind : '$tags'},
{ $group : {
_id : '$_id',
tags : {
$push : { $arrayElemAt: [ "$tags", 1]}
}
}
}
])
Output :
{ "_id" : "2", "tags" : [ 1, 2 ] }
{ "_id" : "1", "tags" : [ 0 ] }
hope this helps.

You need to $map over the $size of the $tags array to include index of the each element inside the tags array and then you can easily use $filter aggregation to exclude the elements which do contain letter aaa
db.collection.aggregate([
{ "$match": { "tags": "aaa" }},
{ "$project": {
"tags": {
"$filter": {
"input": {
"$map": {
"input": { "$range": [0, { "$size": "$tags" }] },
"in": {
"string": { "$arrayElemAt": ["$tags", "$$this"] },
"index": "$$this"
}
}
},
"cond": { "$eq": ["$$this.string", "aaa"] }
}
}
}},
{ "$project": { "tags": "$tags.index" }}
])
Output
[
{
"_id": "1",
"tags": [0]
},
{
"_id": "2",
"tags": [1, 2]
}
]

If you're searching for an array, you should use $in.
db.inventory.find( { tags: { $in: [ 'aaa' ] } } )
You can also write the same in the match. spelling is the same.
Will help for detail. That's what you're looking for.
Source : https://docs.mongodb.com/manual/reference/operator/query/in/
db.inventory.find( { "tags": { $in: 'aaa' } },
{ "tags.$": 1 } )
This is probably what you want.

Aggregate Fields together

I have the following structure as an input from which data needs to be aggregated:
I need to aggregate the data such that I end up with the following structure:
start: A {
tripdetails: [{
destination: B [{
duration: 10,
type: male
},
duration: 12,
type: female
},
duration: 9,
type: female
}]
]}
}
Basically I need to group "type" and "duration" together under the same destination.
I came up with the following query, but this results in a a single field for "type" for each "destination", but not for every "duration".
db.test.aggregate(
{
$group: {
_id: {"StationID": "$start", "EndStationID": "$destination"},
durations: {$addToSet: "$duration" },
usertypes: {$addToSet: "$type" }
}
},
{
$group: {
_id: "$_id.StationID",
Tripcount_out: {$sum: "durations"},
Trips_out: { $addToSet: { EndStationID: "$_id.EndStationID", Tripduration: "$durations", Usertype: "$usertypes"} }
}
}
)
My question is how I can achieve the structure described above.

You could try running the following aggregate pipeline:
db.test.aggregate([
{
"$group": {
"_id": { "StationID": "$start", "EndStationID": "$destination" },
"details": {
"$push": {
"duration": "$duration",
"type": "$type"
}
}
}
},
{
"$group": {
"_id": "$_id.StationID",
"tripdetails": {
"$push": {
"destination": "$_id.EndStationID",
"trips": "$details"
}
}
}
}
])
which yields:
{
"_id" : "A",
"tripdetails" : [
{
"destination" : "B",
"trips" : [
{
"duration" : 10,
"type" : "male"
},
{
"duration" : 9,
"type" : "female"
},
{
"duration" : 12,
"type" : "female"
}
]
}
]
}

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result

Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.

You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}