Project embedded document key value, based on condition in mongoDB aggregation - mongodb

I have a mongo collection called tickets and we are storing ticket details in similar structure documents like this:
[
{
"status": "PAUSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2",
"PAUSED": "d3"
}
},
{
"status": "OPEN",
"lifecycle_dates": {
"OPEN": "d1",
"PAUSED": "d3"
}
},
{
"status": "CLOSED",
"lifecycle_dates": {
"OPEN": "d1",
"CLOSED": "d2"
}
}
]
I need to fetch the data which says current status of ticket and status date on.
and I want to project data like :
[
{
"status": "PAUSED",
"lifecycle_date": "d3"
},
{
"status": "OPEN",
"lifecycle_date": "d1"
},
{
"status": "CLOSED",
"lifecycle_date": "d2"
}
]
How can I project single lifecycle date based on current status in mongo aggregation pipeline?
something like this:
{
$project : {
"status" : 1,
"lifecycle_date" : $lifecycle_dates[$status]
}
}
couldn't find any reference or similar problem in mongo reference document here
current mongo version : 3.2

Updated Answer :
Since you need to fetch the date as per the status, you can use this aggregate query :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : { $cond: [ {$eq : ["$status","OPEN"]}, "$lifecycle_dates.OPEN", { $cond: [ {$eq : ["$status","CLOSED"]}, "$lifecycle_dates.CLOSED", { $cond: [ {$eq : ["$status","PAUSED"]}, "$lifecycle_dates.PAUSED", "-1" ]} ]} ]}
}
}])
This is compatible with Mongo 3.2 as well.
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }
{ "status" : "OPEN", "lifecycle_date" : "d1" }
{ "status" : "CLOSED", "lifecycle_date" : "d2" }
=========================================================================
This answer was for the previous question -
Use this aggregate :
db.test.aggregate([
{
$project : {
_id : 0,
status : 1,
lifecycle_date : "$lifecycle_dates.PAUSED"
}
}
])
Output :
{ "status" : "PAUSED", "lifecycle_date" : "d3" }

You can try below aggregation
db.collection.aggregate([
{ "$project": {
"status": 1,
"lifecycle_date": {
"$arrayElemAt": [
{ "$filter": {
"input": { "$objectToArray": "$lifecycle_dates" },
"as": "life",
"cond": { "$eq": ["$$life.k", "$status"] }
}},
0
]
}
}},
{ "$project": {
"status": 1,
"lifecycle_date": "$lifecycle_date.v"
}}
])

db.tickets.aggregate(
// Pipeline
[
// Stage 1
{
$project: {
"status": 1,
_id: 0,
"lifecycle_dates": {
$switch: {
branches: [{
case: {
$eq: ["$status", "PAUSED"]
},
then: "$lifecycle_dates.PAUSED"
},
{
case: {
$eq: ["$status", "OPEN"]
},
then: "$lifecycle_dates.OPEN"
},
{
case: {
$eq: ["$status", "CLOSED"]
},
then: "$lifecycle_dates.OPEN"
}
],
}
}
}
},
])

Related

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

MongoDB Aggregation, group by subobject keys

I have a mongo collection whose schema looks like this:
_id: ObjectId(),
segments: {
activity: 'value1',
activation: 'value2',
plan: 'value3'
}
I'm trying to use the aggregation framework to find out how many of my documents have the value1 for the segment activity for instance.
The problem is that I want to do that for every segment in the same request if possible, and that I don't know how many segments I'll have or even their name.
Basically here's what I'd like to do:
If I have these two documents:
{ _id: 1, segments: { activity: 'active', activation: 'inactive', plan: 'free' }
{ _id: 2, segments: { activity: 'inactive', activation: 'inactive', plan: 'free' }
I want to be able to see that two of them have the activation segment to inactive and the free plan, and that activity have 1 inactive and 1 active values. Here is what I want to get:
{
activity: {
active: 1,
inactive: 1
},
activation: {
inactive: 2
},
plan: {
free: 2
}
}
So basically, if you could just $group by key it would be great! Something like this:
{
$group: {
_id: { $concat: [ '$segments.$key', '-', '$segments.$key.$value' ],
count: { $sum: 1 }
}
}
Or if I could unwind on each key...
To get the counts, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the subdocuments value, something like the following:
db.collection.aggregate([
{
"$group": {
"_id": "$_id",
"activity_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "active" ] }, 1, 0 ]
}
},
"activity_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"activation_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activation", "active" ] }, 1, 0 ]
}
},
"activation_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"plan_free": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.plan", "free" ] }, 1, 0 ]
}
}
}
},
{
"$project": {
"_id": 0,
"activity": {
"active": "$activity_active",
"inactive": "$activity_inactive"
},
"activation": {
"active": "$activation_active",
"inactive": "$activation_inactive"
},
"plan": {
"free": "$plan_free"
}
}
}
])
there could be a generic solution to this problem, but might need a bit post processing:
to get output similat to this:
{
"_id" : {
"activity" : "active",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}, {
"type" : "paid",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "active"
},
"plan" : [{
"type" : "paid",
"total" : 3
}, {
"type" : "free",
"total" : 6
}
]
}
use query like that:
db.collection.aggregate([{
$group : {
_id : {
activity : "$segments.activity",
activation : "$segments.activation",
plan : "$segments.plan"
},
total : {
$sum : 1
}
}
}, {
$group : {
_id : {
activity : "$_id.activity",
activation : "$_id.activation"
},
plan : {
$push : {
type : "$_id.plan",
total : "$total"
}
}
}
},
])

Check if an element appears in an array during the projection stage of a mongo aggregation pipeline

I've got a collection of mongo documents like -
{
"_id" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"events" : [
"clickOut"
"showHoverAd",
"closeHoverAd"
]
}
{
"_id" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"events" : [
"zoom",
"pan"
]
}
{
"_id" : "9179b26e-e45c-48ab-93f6-e73b8ebe559b",
"events" : [
"clickOut"
]
}
{
"_id" : "db0b82ad-7a33-4ce8-9117-f6ecf041d0d9",
"events" : [
"adjustStars",
"adjustPrice",
"closeHoverAd",
"showHoverAd"
]
}
I'm trying to use a projection stage in an aggregation pipeline to identify if a particular string appears in the events field.
db.events.aggreate([
{$project: {
session: '$_id',
clickedOut: {
$cond: [{$elemMatch: {'$events':'clickOut'}},true,false]
}
}}
])
I'm getting an error - exception: invalid operator '$elemMatch'.
I want my output documents to look like -
{
"session" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"clickedOut" : false
}
{
"session" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"clickedOut" : true
}
But I can't seem to find a way of doing it. I've tried using $in and $all or simply
$cond: {'$events':'clickOut'}
but I'm not getting anywhere.
Use the following aggregation:
db.events.aggregate([
{
"$unwind": "$events"
},
{
"$project": {
"_id": 0,
"session": "$_id",
"clickedOut": {
"$cond": [ { "$eq": [ "$events", "clickOut" ] }, 1, 0 ]
}
}
},
{
"$group": {
"_id": "$session",
"count": {
"$sum": "$clickedOut"
}
}
},
{
"$project": {
"_id": 0,
"session": "$_id",
"clickedOut": {
"$cond": [ { "$eq": [ "$count", 1 ] }, true, false ]
}
}
},
]);
Output:
/* 1 */
{
"result" : [
{
"session" : "db0b82ad-7a33-4ce8-9117-f6ecf041d0d9",
"clickedOut" : false
},
{
"session" : "9179b26e-e45c-48ab-93f6-e73b8ebe559b",
"clickedOut" : true
},
{
"session" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"clickedOut" : false
},
{
"session" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"clickedOut" : true
}
],
"ok" : 1
}

MongoDB: aggregating fields from arrays of subdocuments

I have a mongodb collection called Events, containing baseball games. Here is an example of one record in the table:
{
"name" : "Game# 814",
"dateStart" : ISODate("2012-09-28T14:47:53.695Z"),
"_id" : ObjectId("53a1b24de3f25f4443d9747e"),
"stats" : [
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"_id" : ObjectId("53a1b24de3f25f4443d97480"),
"score" : 17
},
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"_id" : ObjectId("53a1b24de3f25f4443d9747f"),
"score" : 12
}
]
"__v" : 0
}
I need help writing the query that returns standings for all teams. The result set should look like:
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"wins" : <<number of Yankees wins>>
"losses" : <<number of Yankees losses>>
"draws" : <<number of Yankees draws>>
}
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"wins" : <<number of Reds wins>>
"losses" : <<number of Reds losses>>
"draws" : <<number of Reds draws>>
}
...
Here's the query I've started with...
db.events.aggregate(
{"$unwind": "$stats" },
{ $group : {
_id : "$stats.team",
gamesPlayed : { $sum : 1},
totalScore : { $sum : "$stats.score" }
}}
);
... which returns results:
{
"result" : [
{
"_id" : ObjectId("53a11a43a8de6dd8375c93cb"),
"gamesPlayed" : 125, // not a requirement... just trying to get $sum working
"totalScore" : 1213 // ...same here
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c955f"),
"gamesPlayed" : 128,
"totalScore" : 1276
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c9661"),
"gamesPlayed" : 152,
"totalScore" : 1509
},
....
It would seem advisable for you to keep your "wins", "losses", "draws" within your documents as you create or update them. But it is possible to do with aggregate if a little long winded
db.events.aggregate([
// Unwind the "stats" array
{ "$unwind": "$stats" },
// Combine the document with new fields
{ "$group": {
"_id": "$_id",
"firstTeam": { "$first": "$stats.team" },
"firstTeamName": { "$first": "$stats.teamName" },
"firstScore": { "$first": "$stats.score" },
"lastTeam": { "$last": "$stats.team" },
"lastTeamName": { "$last": "$stats.teamName" },
"lastScore": { "$last": "$stats.score" },
"minScore": { "$min": "$stats.score" },
"maxScore": { "$max": "$stats.score" }
}},
// Calculate by comparing scores
{ "$project": {
"firstTeam": 1,
"firstTeamName": 1,
"firstScore": 1,
"lastTeam": 1,
"lastTeamName": 1,
"lastScore": 1,
"firstWins": {
"$cond": [
{ "$gt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstLosses": {
"$cond": [
{ "$lt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstDraws": {
"$cond": [
{ "$eq": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"lastWins": {
"$cond": [
{ "$gt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastLosses": {
"$cond": [
{ "$lt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastDraws": {
"$cond": [
{ "$eq": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"type": { "$literal": [ true, false ] }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Group teams conditionally on "type"
{ "$group": {
"_id": {
"team": {
"$cond": [
"$type",
"$firstTeam",
"$lastTeam"
]
},
"teamName": {
"$cond": [
"$type",
"$firstTeamName",
"$lastTeamName"
]
}
},
"owins": {
"$sum": {
"$cond": [
"$type",
"$firstWins",
"$lastWins"
]
}
},
"olosses": {
"$sum": {
"$cond": [
"$type",
"$firstLosses",
"$lastLosses"
]
}
},
"odraws": {
"$sum": {
"$cond": [
"$type",
"$firstDraws",
"$lastDraws"
]
}
}
}},
// Project your final form
{ "$project": {
"_id": 0,
"team": "$_id.team",
"teamName": "$_id.teamName",
"wins": "$owins",
"losses": "$olosses",
"draws": "$odraws"
}}
])
The first part is to "re-shape" the document by unwinding the array and then grouping with "first" and "last" for defining fields for your two teams.
Then you want to $project through those documents and calculate your "wins", "losses" and "draws" for each team in the pairing. The additional thing is adding an array field for the two values true/false is convenient here. If you are on a pre 2.6 version of mongodb the $literal can be replaced with $const which is not documented but does the same thing.
Once you $unwind that "type" array, the documents can be split apart in the $group stage by evaluating whether to choose the "first" or "last" team field values via the use of $cond. This is a ternary operator that evaluates a true/false condition and returns the appropriate value according to that condition.
With a final $project your documents are formed exactly how you want.