How to use conditional count while doing grouping in mongodb? - mongodb

I need help in getting counts in the conditional grouping.
I have following JSON
[
{
"id": "103303dd56a731e377d01f6a37badae3",
"project_id": "10006",
"project_name": "Project_6",
"status": "TERM"
},
{
"id": "b63826f7edd2fc3ad8449add0c04fceb",
"project_id": "10004",
"project_name": "Project_4",
"status": "CMP"
},
{
"id": "d46e1fcf4c07ce4a69ee07e4134bcef1",
"project_id": "10008",
"project_name": "Project_8",
"status": "TERM"
},
{
"id": "a9fb9e6ef40426e9add520623d521ab8",
"project_id": "10001",
"project_name": "Project_1",
"status": "TERM"
},
{
"id": "b63826f7edd2fc3ad8449add0c04fceb",
"project_id": "10004",
"project_name": "Project_4",
"status": "QF"
}]
So you can see I have Duplicate Projects Records.
I want to get Data like this.
[
{
"project_id": "10007",
"starts": 2, //Count of records where project grouped
"Completes":3 //Where status="CMP"
"TERMS":6 //Where status="TERM"
"QFull":2 //Where status="QF",
"Abandons":3 //Where status=""
},
{
"project_id": "10004",
"starts": 3, //Count of records where project grouped
"Completes":2 //Where status="CMP"
"TERMS":4 //Where status="TERM"
"QFull":2 //Where status="QF",
"Abandons":1 //Where status=""
},
{
"project_id": "10001",
"starts": 3, //Count of records where project grouped
"Completes":2 //Where status="CMP"
"TERMS":4 //Where status="TERM"
"QFull":2 //Where status="QF",
"Abandons":1 //Where status=""
}
]
Here is the Fiddle for same: https://mongoplayground.net/p/yNerdPRjbxc
What I've tried so far:
db.collection.aggregate([
{
$group: {
_id: {
project_id: "$project_id"
},
project_id: {
$first: "$project_id"
},
starts: {
$sum: 1
}
}
}
])
I am not sure how can I add extra fields here based on conditions.

There is a $cond operator which can be used within $sum. So you simply add 1 if status matches your condition or 0 if it doesn't. You can try below aggregation:
db.col.aggregate([
{
$group: {
_id: "$project_id",
starts: { $sum: 1 },
Completes: { $sum: { $cond: [ { $eq: [ "$status", "CMP" ] }, 1, 0 ] } },
TERMS: { $sum: { $cond: [ { $eq: [ "$status", "TERM" ] }, 1, 0 ] } },
QFull: { $sum: { $cond: [ { $eq: [ "$status", "QF" ] }, 1, 0 ] } },
Abandons: { $sum: { $cond: [ { $eq: [ "$status", "" ] }, 1, 0 ] } },
}
},
{
$project: {
_id: 0,
project_id: "$_id",
starts: 1,
Completes: 1,
TERMS: 1,
QFull: 1,
Abandons: 1
}
}
])
Here is a Fiddle for the same: https://mongoplayground.net/p/JOZJOhyrnRL
this fiddle contains $match as well if you want to retrieve records for specific projects

Related

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

Query maximum N records of each group base on a condition in MongoDB?

I have a question regarding querying data in MongoDB. Here is my sample data:
{
"_id": 1,
"category": "fruit",
"userId": 1,
"name": "Banana"
},
{
"_id": 2,
"category": "fruit",
"userId": 2,
"name": "Apple"
},
{
"_id": 3,
"category": "fresh-food",
"userId": 1,
"name": "Fish"
},
{
"_id": 4,
"category": "fresh-food",
"userId": 2,
"name": "Shrimp"
},
{
"_id": 5,
"category": "vegetable",
"userId": 1,
"name": "Salad"
},
{
"_id": 6,
"category": "vegetable",
"userId": 2,
"name": "carrot"
}
The requirements:
If the category is fruit, returns all the records match
If the category is NOT fruit, returns maximum 10 records of each category grouped by user
The category is known and stable, so we can hard-coded in our query.
I want to get it done in a single query. So the result expected should be:
{
"fruit": [
... // All records of
],
"fresh-food": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "fresh-food"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "fresh-food"
]
},
...
],
"vegetable": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "vegetable"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "vegetable"
]
},
]
}
I've found the guideline to group by each group using $group and $slice, but I can't apply the requirement number #1.
Any help would be appreciated.
You need to use aggregation for this
$facet to categorize incoming data, we categorized into two. 1. Fruit and 2. non_fruit
$match to match the condition
$group first group to group the data based on category and user. Second group to group by its category only
$objectToArray to make the object into key value pair
$replaceRoot to make the non_fruit to root with fruit
Here is the code
db.collection.aggregate([
{
"$facet": {
"fruit": [
{ $match: { "category": "fruit" } }
],
"non_fruit": [
{
$match: {
$expr: {
$ne: [ "$category", "fruit" ]
}
}
},
{
$group: {
_id: { c: "$category", u: "$userId" },
data: { $push: "$$ROOT" }
}
},
{
$group: {
_id: "$_id.c",
v: {
$push: {
uerId: "$_id.u",
data: { "$slice": [ "$data", 3 ] }
}
}
}
},
{ $addFields: { "k": "$_id", _id: "$$REMOVE" } }
]
}
},
{ $addFields: { non_fruit: { "$arrayToObject": "$non_fruit" } }},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ "$$ROOT", "$non_fruit" ]
}
}
},
{ $project: { non_fruit: 0 } }
])
Working Mongo playground

MongoDb aggregation with arrays inside an array possible

I am struggling to find some examples of using the mongo aggregation framework to process documents which has an array of items where each item also has an array of other obejects (array containing an array)
In the example document below what I would really like is an example that sums the itemValue in the results array of all cases in the document and accross the collection where the result.decision was 'accepted'and group by the document locationCode
However, even an example that found all documents where the result.decision was 'accepted' to show or that summmed the itemValue for the same would help
Many thanks
{
"_id": "333212",
"data": {
"locationCode": "UK-555-5566",
"mode": "retail",
"caseHandler": "A N Other",
"cases": [{
"caseId": "CSE525666",
"items": [{
"id": "333212-CSE525666-1",
"type": "hardware",
"subType": "print cartridge",
"targetDate": "2020-06-15",
"itemDetail": {
"description": "acme print cartridge",
"quantity": 2,
"weight": "1.5"
},
"result": {
"decision": "rejected",
"decisionDate": "2019-02-02"
},
"isPriority": true
},
{
"id": "333212-CSE525666-2",
"type": "Stationery",
"subType": "other",
"targetDate": "2020-06-15",
"itemDetail": {
"description": "staples box",
"quantity": 3,
"weight": "1.66"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-03-03",
"itemValue": "23.01"
},
"isPriority": true
}
]
},
{
"caseId": "CSE885655",
"items": [{
"id": "333212-CSE885655-1",
"type": "marine goods",
"subType": "fish food",
"targetDate": "2020-06-04",
"itemDetail": {
"description": "fish bait",
"quantity": 5,
"weight": "0.65"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-03-02"
},
"isPriority": false
},
{
"id": "333212-CSE885655-4",
"type": "tobacco products",
"subType": "cigarettes",
"deadlineDate": "2020-06-15",
"itemDetail": {
"description": "rolling tobbaco",
"quantity": 42,
"weight": "2.25"
},
"result": {
"decision": "accepted",
"decisionDate": "2020-02-02",
"itemValue": "48.15"
},
"isPriority": true
}
]
}
]
},
"state": "open"
}
You're probably looking for $unwind. It takes an array within a document and creates a separate document for each array member.
{ foos: [1, 2] } -> { foos: 1 }, { foos: 2}
With that you can create a flat document structure and match & group as normal.
db.collection.aggregate([
{
$unwind: "$data.cases"
},
{
$unwind: "$data.cases.items"
},
{
$match: {
"data.cases.items.result.decision": "accepted"
}
},
{
$group: {
_id: "$data.locationCode",
value: {
$sum: {
$toDecimal: "$data.cases.items.result.itemValue"
}
}
}
},
{
$project: {
_id: 0,
locationCode: "$_id",
value: "$value"
}
}
])
https://mongoplayground.net/p/Xr2WfFyPZS3
Alternative solution...
We group by data.locationCode and sum all items with this condition:
cases[*].items[*].result.decision" == "accepted"
db.collection.aggregate([
{
$group: {
_id: "$data.locationCode",
itemValue: {
$sum: {
$reduce: {
input: "$data.cases",
initialValue: 0,
in: {
$sum: {
$concatArrays: [
[ "$$value" ],
{
$map: {
input: {
$filter: {
input: "$$this.items",
as: "f",
cond: {
$eq: [ "$$f.result.decision", "accepted" ]
}
}
},
as: "item",
in: {
$toDouble: {
$ifNull: [ "$$item.result.itemValue", 0 ]
}
}
}
}
]
}
}
}
}
}
}
}
])
MongoPlayground

MongoDB multiple counts, single document, arrays

I have been searching on stackoverflow and cannot find exactly what I am looking for and hope someone can help. I want to submit a single query, get multiple counts back, for a single document, based on array of that document.
My data:
db.myCollection.InsertOne({
"_id": "1",
"age": 30,
"items": [
{
"id": "1",
"isSuccessful": true,
"name": null
},{
"id": "2",
"isSuccessful": true,
"name": null
},{
"id": "3",
"isSuccessful": true,
"name": "Bob"
},{
"id": "4",
"isSuccessful": null,
"name": "Todd"
}
]
});
db.myCollection.InsertOne({
"_id": "2",
"age": 22,
"items": [
{
"id": "6",
"isSuccessful": true,
"name": "Jeff"
}
]
});
What I need back is the document and the counts associated to the items array for said document. In this example where the document _id = "1":
{
"_id": "1",
"age": 30,
{
"totalIsSuccessful" : 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1,
"totalNameNull": 2
}
}
I have found that I can get this in 4 queries using something like this below, but I would really like it to be one query.
db.test1.aggregate([
{ $match : { _id : "1" } },
{ "$project": {
"total": {
"$size": {
"$filter": {
"input": "$items",
"cond": { "$eq": [ "$$this.isSuccessful", true ] }
}
}
}
}}
])
Thanks in advance.
I am assuming your expected result is invalid since you have an object literal in the middle of another object and also you have totalIsSuccessful for id:1 as 2 where it seems they should be 3. With that said ...
you can get similar output via $unwind and then grouping with $sum and $cond:
db.collection.aggregate([
{ $match: { _id: "1" } },
{ $unwind: "$items" },
{ $group: {
_id: "_id",
age: { $first: "$age" },
totalIsSuccessful: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalNotIsSuccessful: { $sum: { $cond: [{ "$ne": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalSuccessfulNull: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", null ] }, 1, 0 ] } },
totalNameNull: { $sum: { $cond: [ { "$eq": [ "$items.name", null ]}, 1, 0] } } }
}
])
The output would be this:
[
{
"_id": "_id",
"age": 30,
"totalIsSuccessful": 3,
"totalNameNull": 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1
}
]
You can see it working here

Aggregate Field Values to Separate Key Names

I have a collection in MongoDB with sample data something like this (simplified):
{
_id: 1,
username: "ted",
content: "4125151",
status: "complete"
}
{
_id: 2,
username: "sam",
content: "4151",
status: "new"
}
{
_id: 3,
username: "ted",
content: "511",
status: "new"
}
{
_id: 4,
username: "ted",
content: "411",
status: "in_progress"
}
{
_id: 5,
username: "pat",
content: "1sds51",
status: "complete"
}
{
_id: 6,
username: "ted",
content: "4151",
status: "in_progress"
}
{
_id: 7,
username: "ted",
content: "4125",
status: "in_progress"
}
I need to aggregate the data such that for each user, I get a count for each status value as well as a total number of records. The result should look like this:
[
{
username: “pat”,
new: 0,
in_progress: 0,
complete: 1,
total: 1
},
{
username: “sam”,
new: 1,
in_progress: 0,
complete: 0,
total: 1
},
{
username: “ted”,
new: 1,
in_progress: 3,
complete: 1,
total: 5
}
]
Or any format that will effectively serve the same purpose which is, I want to be able to use with ngRepeat to display on the front end in this format:
User New In Progress Complete Total
pat 0 0 1 1
sam 1 0 0 1
ted 1 3 1 5
I can perform this aggregation:
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"count": {
"$sum": 1
}
}
}
This gives me the individual count for each user/status combination that has at least one record. But then I have to piece it together to get it in the format that I can use on the front end. This is not at all ideal.
Is there a way to perform the aggregation to get the data in the format that I need?
What you want is a "conditional" aggregation of the values to produce a distinct field property for each status.
This is pretty simple to do using the $cond operator:
[
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } },
"total": { "$sum": 1 }
}}
]
Presuming of course those are the only "status" values, but if they are not then just add an additional $project to sum the fields you want:
[
{ "$match": { "status": { "$in": [ "new", "complete", "in_progress" ] } } },
{ "$group": {
"_id": "$username",
"new": { "$sum": { "$cond": [{ "$eq": [ "$status", "new" ] },1,0 ] } },
"complete": { "$sum": { "$cond": [{ "$eq": [ "$status", "complete" ] },1,0 ] } },
"in_progress": { "$sum": { "$cond": [{ "$eq": [ "$status", "in_progress" ] },1,0 ] } }
}},
{ "$project": {
"new": 1,
"complete": 1,
"in_progress": 1,
"total": { "$add": [ "$new", "$complete", "$in_progress" ] }
]
Or just include that $add within the $group with the same calculations for the separate fields. But the $match is probably just the best idea if there are indeed other status values you don't want.
Another answer using $group twice and a $push, In this below query you need to compute the final total on UI side.
db.collection.aggregate([
{
"$group": {
"_id": {
"username": "$username",
"status": "$status"
},
"statuscount": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.username",
"finalstatus": {
"$push": {
"Status": "$_id.status",
"statuscount": "$statuscount"
}
}
}
}
])