I am trying to get the count of all the different value of a key in my MongoDB. I am getting the count as well but i am getting it with 2 different objects.
{ "_id" : ObjectId("596f6e95b6a1aa8d363befeb"), produce:"potato","variety" : "abc", "state" : 'PA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befec"), produce:"potato", "variety" : "abc", "state" : 'PA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befed"), produce:"potato", "variety" : "def", "state" : 'IA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befee"), produce:"potato", "variety" : "def", "state" : 'IA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befef"), produce:"potato", "variety" : "abc", "state" : 'DA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befeg"), produce:"potato", "variety" : "abc", "state" : 'DA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befeh"), produce:"potato", "variety" : "def", "state" : 'DA' }
{ "_id" : ObjectId("596f6e95b6a1aa8d363befei"), produce:"potato", "variety" : "abc", "state" : 'IA' }
db.aggregate([
{
$match:{produce: "potato"}
},
{
"$group":{
"_id":{"variety":"$variety","state":"$state"},
"count":{"$sum":1}
}
},
{
"$group":{
"_id":null,
"counts":{
"$push": {"filterkey":"$_id.variety","state":"$_id.state","count":"$count"}
}
}
},
])
Actual Result : -
counts
[
{ filterkey: 'abc', state: 'PA', count: 2},
{ filterkey: 'abc', state: 'IA', count: 1},
{ filterkey: 'abc', state: 'DA', count: 2},
{ filterkey: 'def', state: 'IA', count: 2},
{ filterkey: 'def', state: 'DA', count: 1}
]
Expected Result : -
counts
[
{ filterkey: 'abc', states:{'PA':2,'IA':1,'DA':2},
{ filterkey: 'def', states:{'IA':2,'DA':1}
]
Is there is some way to get the data like this?
You need to use multilevel $group ing here. First you need to use $group with the variety and state fields and need to $sum to get total number of unique document per variety and state.
Then second you need to use $group with the variety to get the number of unique documents per variety.
And Finally $arrayToObject to flatten the states array.
db.collection.aggregate([
{ "$match": { "produce": "potato" }},
{ "$group": {
"_id": { "variety": "$variety", "state": "$state" },
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.variety",
"states": {
"$push": {
"k": "$_id.state",
"v": "$count"
}
}
}},
{ "$addFields": {
"states": {
"$arrayToObject": "$states"
}
}}
])
You can remove stages one by one here and can see what actually happens.
Output
[
{
"_id": "def",
"states": {
"DA": 1,
"IA": 2
}
},
{
"_id": "abc",
"states": {
"DA": 2,
"IA": 1,
"PA": 2
}
}
]
Related
For each student in a collection, I have an array of absences. I want to summarize the data by displaying the number of absences for each day of the week.
Given the following input:
{
"_id" : 9373,
"absences" : [
{
"code" : "U",
"date" : ISODate("2021-01-17T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 1,
"dayName" : "Sunday"
}
]
}
{
"_id" : 9406,
"absences" : [
{
"code" : "E",
"date" : ISODate("2020-12-09T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 4,
"dayName" : "Wednesday"
},
{
"code" : "U",
"date" : ISODate("2021-05-27T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 5,
"dayName" : "Thursday"
}
]
}
How can I achieve the following output:
[
{
"_id": 9373,
"days": [
{
"dayNumber": 1,
"dayName": "Sunday",
"count": 1
}
]
},
{
"_id": 9406,
"days": [
{
"dayNumber": 4,
"dayName": "Wednesday",
"count": 1
},
{
"dayNumber": 5,
"dayName": "Thursday",
"count": 1
}
]
}
]
I've pushed all the required fields to this stage of the pipeline. I'm just not clear how to roll up the data in the nested absences array.
$unwind deconstruct absences array
$group by _id and dayNumber, and get count of grouped documents
$group by _id and reconstruct days array
db.collection.aggregate([
{ $unwind: "$absences" },
{
$group: {
_id: {
_id: "$_id",
dayNumber: "$absences.dayNumber"
},
dayName: { $first: "$absences.dayName" },
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id._id",
days: {
$push: {
dayName: "$dayName",
dayNumber: "$_id.dayNumber",
count: "$count"
}
}
}
}
])
Playground
I've a collection in MongoDB of objects with this structure:
{
"_id": "ID",
"email": "EMAIL",
"name": "Foo",
"surname": "Bar",
"orders": [
{
"createdAt": "2019-09-09T07:30:25.575Z"
},
{
"createdAt": "2019-10-30T14:20:04.849Z"
},
{
"createdAt": "2019-10-30T16:38:27.271Z"
},
{
"createdAt": "2020-01-03T15:49:39.614Z"
},
],
}
I need to count all duplicates "createdAt" and distinct it with changing date format.
The result should be like below:
{
"_id": "ID",
"email": "EMAIL",
"name": "Foo",
"surname": "Bar",
"orders": [
{
"date": "2019-09-09",
"total": 1,
},
{
"date": "2019-10-30",
"total": 2,
},
{
"date": "2020-01-03",
"total": 1,
},
],
}
I tried with $unwind orders.createdAt in db.collection.aggregate() but i've no idea how can i get this result.
Thanks in advance.
Try this on for size. Given this data:
db.foo.insert([
{
"_id": "ID",
"email": "EMAIL", "name": "Foo", "surname": "Bar",
"orders": [
{ "createdAt": new Date("2019-09-09T07:30:25.575Z") },
{ "createdAt": new Date("2019-10-30T14:20:04.849Z") },
{ "createdAt": new Date("2019-10-30T16:38:27.271Z") },
{ "createdAt": new Date("2020-01-03T15:49:39.614Z") }
]
},
{
"_id": "ID2",
"email": "EMAIL2", "name": "Bin", "surname": "Baz",
"orders": [
{ "createdAt": new Date("2019-09-09T07:30:25.575Z") },
{ "createdAt": new Date("2020-10-30T14:20:04.849Z") },
{ "createdAt": new Date("2020-10-30T16:38:27.271Z") },
{ "createdAt": new Date("2020-10-30T15:49:39.614Z") }
]
}
]);
This agg:
db.foo.aggregate([
{$unwind: "$orders"}
// First $group is on just the Y-M-D part of the date plus the id.
// This will produce the basic info the OP seeks -- but not in the desired
// data structure:
,{$group: {
_id: {orig_id: "$_id", d: {$dateToString: {date: "$orders.createdAt", format: "%Y-%m-%d"}} },
n:{$sum:1} ,
email: {$first: "$email"},
name: {$first: "$name"},
surname: {$first: "$surname"}
}}
// The group is not guaranteed to preserve the order of the dates. So now that
// the basic agg is done, reorder by DATE. _id.d is a Y-M-D string but fortunately
// that sorts correctly for our purposes:
,{$sort: {"_id.d":1}}
// ...so in the second $group, we pluck just the id from the id+YMD_date key and
// take the YMD_date+n and *push* it onto a new orders array to arrive at the
// desired data structure. We are not guaranteed the order of orig_id (e.g.
// ID or ID2) but for each id, the push *will* happen in the order of arrival -- which was
// sorted correctly in the prior stage! As an experiment, try changing the
// sort to -1 (reverse) and see what happens.
,{$group: {_id: "$_id.orig_id",
email: {$first: "$email"},
name: {$first: "$name"},
surname: {$first: "$surname"},
orders: {$push: {date: "$_id.d", total: "$n"}} }}
]);
yields this output:
{
"_id" : "ID",
"email" : "EMAIL",
"name" : "Foo",
"surname" : "Bar",
"orders" : [
{
"date" : "2019-09-09",
"total" : 1
},
{
"date" : "2019-10-30",
"total" : 2
},
{
"date" : "2020-01-03",
"total" : 1
}
]
}
{
"_id" : "ID2",
"email" : "EMAIL2",
"name" : "Bin",
"surname" : "Baz",
"orders" : [
{
"date" : "2019-09-09",
"total" : 1
},
{
"date" : "2020-10-30",
"total" : 3
}
]
}
If you are willing to have a slightly more complex return structure and some dupe data in return for greater dynamic behavior by not having to enumerate each field (e.g. field: {$first: "$field"} then you can do this:
db.foo.aggregate([
{$unwind: "$orders"}
,{$group: {
_id: {orig_id: "$_id", d: {$dateToString: {date: "$orders.createdAt", format: "%Y-%m-%d"}} },
n:{$sum:1} ,
ALL: {$first: "$$CURRENT"}
}}
,{$group: {_id: "$_id.orig_id",
ALL: {$first: "$ALL"},
orders: {$push: {date: "$_id.d", total: "$n"}} }}
]);
to yield this:
{
"_id" : "ID2",
"ALL" : {
"_id" : "ID2",
"email" : "EMAIL2",
"name" : "Bin",
"surname" : "Baz",
"orders" : {
"createdAt" : ISODate("2019-09-09T07:30:25.575Z")
}
},
"orders" : [
{
"date" : "2019-09-09",
"total" : 1
},
{
"date" : "2020-10-30",
"total" : 3
}
]
}
{
"_id" : "ID",
"ALL" : {
"_id" : "ID",
"email" : "EMAIL",
"name" : "Foo",
"surname" : "Bar",
"orders" : {
"createdAt" : ISODate("2019-10-30T14:20:04.849Z")
}
},
"orders" : [
{
"date" : "2019-10-30",
"total" : 2
},
{
"date" : "2020-01-03",
"total" : 1
},
{
"date" : "2019-09-09",
"total" : 1
}
]
}
As example i have documents like below :
{ "_id" : ObjectId("5ef0f23647c2b4dbae99105c"), "subcategory_name" : "ENTERPRISE ASSET MANAGEMENT", "category_id" : ObjectId("5ef0ec5547c2b4dbae990e9d"), "category_name" : "ERP", "status" : 0 }
{ "_id" : ObjectId("5ef0f23647c2b4dbae991062"), "subcategory_name" : "ENVIRONMENTAL HEALTH AND SAFETY SOFTWARE", "category_id" : ObjectId("5ef0ec5547c2b4dbae990e9d"), "category_name" : "ERP", "status" : 0 }
then i want result like this :
{
"category_id": ObjectId("5ef0ec5547c2b4dbae990e9d"),
"category_name": "ERP",
"subcategory_list": [
{"_id": ObjectId("5ef0f23647c2b4dbae99105c"), "subcategory_name": "ENTERPRISE ASSET MANAGEMENT"},
{"_id": ObjectId("5ef0f23647c2b4dbae991062"), "subcategory_name": "ENVIRONMENTAL HEALTH AND SAFETY SOFTWARE"}
]
}
You can use group to do this
[
{
$group: {
_id: "$category_id",
category_id: {
"$first": "$category_id"
},
category_name: {
$first: "$category_name"
},
subcategory_list: {
$addToSet: {
_id: "$_id",
subcategory_name: "$subcategory_name",
}
}
}
},
{
$project: {
_id: 0
}
}
]
Working Mongo playground
I have a MongoDB collection as below. And I want to get the min\max\avg\count of the xxx field inside all documents which $match: { "Parsed.FileId": "421462559", "Parsed.MessageId": "123" }
Note that Fields of each document only contains one single field with SchemaName = xxx
Is it possible with MongoDB aggregation (or other feature) and how?
{
"_id" : NumberLong(409),
"Parsed" : {
"FileId" : "421462559",
"MessageId": "123",
"Fields" : [
{
"SchemaName" : "xxx",
"Type" : 0,
"Value" : 6
},
{
"SchemaName" : "yyy",
"Type" : 0,
"Value" : 5
}
]
}
},
{
"_id" : NumberLong(510),
"Parsed" : {
"FileId" : "421462559",
"MessageId": "123",
"Fields" : [
{
"SchemaName" : "xxx",
"Type" : 0,
"Value" : 10
},
{
"SchemaName" : "yyy",
"Type" : 0,
"Value" : 20
}
]
}
}
For example collection above, I expect to get the result for field xxx as:
{
count: 2,
min: 6,
max: 10,
avg: 8
}
You can use below aggregation
Basically you need to first $unwind the nested array and then have to use $group with the corresponding accumulator i.e. min
max
$sum
$avg
db.collection.aggregate([
{ "$match": { "Parsed.Fields.SchemaName": "xxx" }},
{ "$unwind": "$Parsed.Fields" },
{ "$match": { "Parsed.Fields.SchemaName": "xxx" }},
{ "$group": {
"_id": null,
"count": { "$sum": 1 },
"max": { "$max": "$Parsed.Fields.Value" },
"min": { "$min": "$Parsed.Fields.Value" },
"avg": { "$avg": "$Parsed.Fields.Value" }
}}
])
I have a Mongo collection like this:
{
"user_id" : "1",
"branch_id" : "1",
"trans_type":"DEBIT",
"total" : 500
},
{
"user_id" : "1",
"branch_id" : "1",
"trans_type":"CREDIT",
"total" : 200
},
{
"user_id" : "1",
"branch_id" : "3",
"trans_type":"DEBIT",
"total" : 1400
},
{
"user_id" : "2",
"branch_id" : "1",
"trans_type":"DEBIT",
"total" : 100
},
{
"user_id" : "2",
"branch_id" : "1",
"trans_type":"CREDIT",
"total" : 100
}
The expected output is this:
[
{
"user_id":"1",
"branch_id":"1",
"final_balance":"300"
},
{
"user_id":"1",
"branch_id":"3",
"final_balance":"1400"
},
{
"user_id":"2",
"branch_id":"1",
"final_balance":"0"
}
]
Note that in the output I am looking for the final balance after checking out debit and credit entries per user per branch.
Thank you.
That sounds like a simple $group with a $cond would do the job for you:
db.collection.aggregate({
$group: {
"_id": { // group by both fields, "user_id" and "branch_id"
"user_id": "$user_id",
"branch_id": "$branch_id"
},
"final_balance": {
$sum: { // calculate the sum of all "total" values
$cond: {
if: { $eq: [ "$trans_type", "DEBIT" ] }, // in case of "DEBIT", we want the stored value for "total"
then: "$total",
else: { $multiply: [ "$total", -1 ] } // otherwise we want the stored value for "total" times -1
}
}
}
}
}, {
$project: { // this is not really needed unless you specifically need the output format you mentioned in the question
"_id": 0,
"user_id": "$_id.user_id",
"branch_id": "$_id.branch_id",
"final_balance": "$final_balance",
}
})
let docData = await db.Transactions.aggregate(
[{
$match: where(any condition)
},
{
$addFields: {
runningBalance: { $subtract: ['$debit', '$credit'] }
}
},
stage2 = {
$setWindowFields: {
sortBy: { transaction_date: 1 },
output: {
runningTotal: {
$sum: "$runningBalance",
window: {
documents: ["unbounded", "current"]
}
}
}
}
},
{
$sort: sortByObj(any sorted by object)
},
]
);