How to custom sort a field in MongoDB - mongodb

I have a collection called INFODOCS which has a field called ML_PRIORITY(HIGH/MEDIUM/LOW) and STATUS(True/False/Null). I want to determine count of STATUS for each ML_PRIORITY and then sort the ML_PRIORITY in order High, Medium and Low.
[
{
"_id": "1",
"ML_PRIORITY" : "HIGH",
"STATUS" : "True"
},
{
"_id": "2",
"ML_PRIORITY" : "HIGH",
"STATUS" : ""
},
{
"_id": "3",
"ML_PRIORITY" : "HIGH",
"STATUS" : "False"
},
{
"_id": "4",
"ML_PRIORITY" : "MEDIUM",
"STATUS" : ""
},
{
"_id": "5",
"ML_PRIORITY" : "Low",
"STATUS" : ""
}
]
I was able to determine the count of STATUS for each ML_PRIORITY using below aggregation pipeline
but I am not sure how can I custom sort the ML_PRIORITY as $sort has only two option (1 and -1)
db.collection.aggregate([
{
'$group': {
'_id': '$ML_PRIORITY',
'QUALITYCHECKDONE': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', 'TRUE'
]
}, 1, 0
]
}
},
'QUALITYCHECKNOTDONE': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', ''
]
}, 1, 0
]
}
},
'QUALITYCHECKNOTREQ': {
'$sum': {
'$cond': [
{
'$eq': [
'$STATUS', 'FALSE'
]
}, 1, 0
]
}
}
}
}, {
'$project': {
'_id': 0,
'ML_PRIORITY': '$_id',
'QUALITYCHECKDONE': 1,
'QUALITYCHECKNOTDONE': 1,
'QUALITYCHECKNOTREQ': 1
}
}
])
Example - https://mongoplayground.net/p/anAwoqZk2Ys

One option is to replace your last step with 3 steps, in order to $set an order field, $sort, and $unset it:
[
{$set: {
order: {$indexOfArray: [["HIGH", "MEDIUM", "Low"], "$_id"]},
"ML_PRIORITY": "$_id"
}},
{$sort: {order: 1}},
{$unset: ["_id", "order"]}
]
See how it works on the playground example

Related

How to determine count of distinct values of a field in mongoDB

I have a collection called INFODOCS which has a field called PROCESSOR_ID and STATUS(True/False/Null).
I want to determine two things
Working_Processor = Count of Distinct PROCESSOR_ID where STATUS in not Null
Total_Processor = Count of Distinct PROCESSOR_ID
INFODOCS
[
{
"_id": "1",,
"PROCESSOR_ID" : "11",
"STATUS" : "True"
},
{
"_id": "2",
"PROCESSOR_ID" : "11",
"STATUS" : ""
},
{
"_id": "3",
"PROCESSOR_ID" : "22",
"STATUS" : "False"
},
{
"_id": "4",
"PROCESSOR_ID" : "33",
"STATUS" : ""
}
]
Here expected answer is:
Working_Processor = 2 (_id:1 and _id:3)
Total_Processor = 3
I tried using $addToSet with $cond, but want to know if there is better way of achieving the same.
[
{
'$group': {
'_id': None,
'WORKING_PROCESSOR': {
'$addToSet': {
'$cond': [
{
'$ne': [
'$STATUS', ''
]
}, '$PROCESSOR_ID', None
]
}
},
'TOTAL_PROCESSOR': {
'$addToSet': '$PROCESSOR_ID'
}
}
}, {
'$project': {
'_id': 0,
'WORKING_PROCESSOR': {
'$subtract': [
{
'$size': '$WORKING_PROCESSOR'
}, 1
]
},
'TOTAL_PROCESSOR': {
'$size': '$TOTAL_PROCESSOR'
}
}
}
]
One simple option is:
db.collection.aggregate([
{$group: {
_id: "$PROCESSOR_ID",
WORKING_PROCESSOR: {$max: {$cond: [{$ne: ["$STATUS", ""]}, 1, 0]}}
}},
{$group: {
_id: 0,
WORKING_PROCESSOR: {$sum: "$WORKING_PROCESSOR"},
TOTAL_PROCESSOR: {$sum: 1}
}},
{$unset: "_id"}
])
See how it works on the playground example

Get invoice total from a collection with arrays

I have a collection with this structure:
db.shops.insert({
"customer": "21c3",
"shopDate": new Date("2019-06-03T23:00:00Z"),
"shopId" : "Supermarket",
"items" : [
{
"productName": "Water",
"price": 3,
"quantity": 2
},
{
"productName": "Candies",
"price": 1,
"quantity": 5
}
]
});
I need a query to get the total amount of each shopId.
This is what I have done, but it is not working:
db.shops.aggregate(
[
{
$group : {
_id : $shopName,
totalSold: { $sum: { $multiply: [ "$price", "$quantity" ] } },
}
}
]
);
Here is how to do it:
db.shops.aggregate(
[
{
"$unwind": "$items"
},
{
"$group" : {
_id : "$shopId",
totalSold: { $sum: { $multiply: [ "$items.price", "$items.quantity" ] } },
}
}
]
);

Project embedded document key value, based on condition in mongoDB aggregation

I have a mongo collection called tickets and we are storing ticket details in similar structure documents like this:
[
{
"status": "PAUSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2",
"PAUSED": "d3"
}
},
{
"status": "OPEN",
"lifecycle_dates": {
"OPEN": "d1",
"PAUSED": "d3"
}
},
{
"status": "CLOSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2"
}
}
]
I need to fetch the data which says current status of ticket and status date on.
and I want to project data like :
[
{
"status": "PAUSED",
"lifecycle_date": "d3"
},
{
"status": "OPEN",
"lifecycle_date": "d1"
},
{
"status": "CLOSED",
"lifecycle_date": "d2"
}
]
How can I project single lifecycle date based on current status in mongo aggregation pipeline?
something like this:
{
$project : {
"status" : 1,
"lifecycle_date" : $lifecycle_dates[$status]
}
}
couldn't find any reference or similar problem in mongo reference document here
current mongo version : 3.2
Updated Answer :
Since you need to fetch the date as per the status, you can use this aggregate query :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : { $cond: [ {$eq : ["$status","OPEN"]}, "$lifecycle_dates.OPEN", { $cond: [ {$eq : ["$status","CLOSED"]}, "$lifecycle_dates.CLOSED", { $cond: [ {$eq : ["$status","PAUSED"]}, "$lifecycle_dates.PAUSED", "-1" ]} ]} ]}
}
}])
This is compatible with Mongo 3.2 as well.
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }
{ "status" : "OPEN", "lifecycle_date" : "d1" }
{ "status" : "CLOSED", "lifecycle_date" : "d2" }
=========================================================================
This answer was for the previous question -
Use this aggregate :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : "$lifecycle_dates.PAUSED"
}
}
])
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }
You can try below aggregation
db.collection.aggregate([
{ "$project": {
"status": 1,
"lifecycle_date": {
"$arrayElemAt": [
{ "$filter": {
"input": { "$objectToArray": "$lifecycle_dates" },
"as": "life",
"cond": { "$eq": ["$$life.k", "$status"] }
}},
0
]
}
}},
{ "$project": {
"status": 1,
"lifecycle_date": "$lifecycle_date.v"
}}
])
db.tickets.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
"status": 1,
_id: 0,
"lifecycle_dates": {
$switch: {
branches: [{
case: {
$eq: ["$status", "PAUSED"]
},
then: "$lifecycle_dates.PAUSED"
},
{
case: {
$eq: ["$status", "OPEN"]
},
then: "$lifecycle_dates.OPEN"
},
{
case: {
$eq: ["$status", "CLOSED"]
},
then: "$lifecycle_dates.OPEN"
}
],
}
}
}
},
])

MongoDB aggregate multiple group by top fields and array fields

My collection will look like this,
{
"_id" : ObjectId("591c5971240033283736860a"),
"status" : "Done",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "df07948e-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 12345,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "dsd45554545ds92a9bd2c12e0e6436d",
}
}
]}
{
"_id" : ObjectId("45sdsd59124003345121450a"),
"status" : "ToDo",
"createdDate" : ISODate("2017-05-17T14:09:20.653Z")
"communications" : [
{
"communicationUUID" : "45sds55-4a14-468e-beb1-db55ff72b215",
"communicationType" : "CALL",
"recipientId" : 1234,
"createdDate" : ISODate("2017-05-18T14:09:20.653Z")
"callResponse" : {
"Status" : "completed",
"id" : "84fe862f1924455dsds5556436d",
}
}
]}
Currently I am writing two aggregate query to achieve my requirement and my query will be below
db.collection.aggregate(
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
status: 1,
}},
{ $group: {
_id: "$dayMonthYear",
Pending: { $sum: { $cond : [{ $eq : ["$status", "ToDo"]}, 1, 0]} },
InProgress: { $sum: { $cond : [{ $eq : ["$status", "InProgress"]}, 1, 0]} },
Done: { $sum: { $cond : [{ $eq : ["$status", "Done"]}, 1, 0]} },
Total: { $sum: 1 }
}}
My output will be,
{"_id" : "17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "Total" : 2.0 }
Using above query I can able to get count but I need to find the count based on communication Status too so I am writing one more query to achieve,
db.collection.aggregate(
{"$unwind":"$communications"},
{ $project: {
dayMonthYear: { $dateToString: { format: "%d/%m/%Y", date: "$createdDate" } },
communications: 1
}},
{ "$group": {
_id: "$dayMonthYear",
"total_call": { $sum: { $cond : [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "completed"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] }
]}, 1, 0 ] }},
"engaged": { $addToSet: { $cond : [{ $eq : ["$communications.callResponse.Status", "completed"]},
"$communications.recipientId", "null" ]} },
"not_engaged": { $addToSet: { $cond: [{ $or : [ { $eq: [ "$communications.callResponse.Status", "failed"] },
{ $eq: [ "$communications.callResponse.Status", "busy"] },
{ $eq: [ "$communications.callResponse.Status", "no-answer"] } ]},
"$communications.recipientId", "null" ] }}
}},
{ "$project": {
"_id": 1,
"total_call": 1,
"engaged": { "$setDifference": [ "$ngaged", ["null"] ] },
"not_engaged": { "$setDifference": [ "$not_engaged", ["null"] ] },
}},
{ "$project": {
"total_call": 1,
"engaged": { "$size": "$engaged" },
"not_engaged": { "$size": { "$setDifference": [ "$not_engaged", "$engaged" ] }},
}})
My output will be,
{"_id" : "18/05/2017", "total_call" : 2.0, "engaged" : 2, "not_engaged" : 0}
Using above query I can able to get count but I want to achieve it in single query
I am looking for output like
{"_id":"17/05/2017", "Pending" : 1.0, "InProgress" : 0.0, "Done" : 1.0, "total_call" : 0, "engaged" : 0, "not_engaged" : 0}
{"_id":"18/05/2017", "Pending" : 0.0, "InProgress" : 0.0, "Done" : 0.0, "total_call" : 2, "engaged" : 2, "not_engaged" : 0}
Can anyone suggest or provide me good way to get above result.
You can use $concatArrays to merge the status& createdDate documents followed by $group to count the occurrences.
db.collection.aggregate([
{
"$project": {
"statusandcreateddate": {
"$concatArrays": [
[
{
"status": "$status",
"createdDate": "$createdDate"
}
],
{
"$map": {
"input": "$communications",
"as": "l",
"in": {
"status": "$$l.callResponse.Status",
"createdDate": "$$l.createdDate"
}
}
}
]
}
}
},
{
"$unwind": "$statusandcreateddate"
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%d/%m/%Y",
"date": "$statusandcreateddate.createdDate"
}
},
"total_call": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"engaged": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"completed"
]
},
1,
0
]
}
},
"not_engaged": {
"$sum": {
"$cond": [
{
"$or": [
{
"$eq": [
"$statusandcreateddate.status",
"failed"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"busy"
]
},
{
"$eq": [
"$statusandcreateddate.status",
"no-answer"
]
}
]
},
1,
0
]
}
},
"Pending": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"ToDo"
]
},
1,
0
]
}
},
"InProgress": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"InProgress"
]
},
1,
0
]
}
},
"Done": {
"$sum": {
"$cond": [
{
"$eq": [
"$statusandcreateddate.status",
"Done"
]
},
1,
0
]
}
}
}
}
])

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}