Grouping into array in MongoDB - mongodb

I have MongoDB collection with below documents:
[
{
"productType":"Bike",
"company":"yamaha",
"model":"y1"
},
{
"productType":"Bike",
"company":"bajaj",
"model":"b1"
},
{
"productType":"Bike",
"company":"yamaha",
"model":"y1"
},
{
"productType":"Car",
"company":"Maruti",
"model":"m1"
},
{
"productType":"Bike",
"company":"yamaha",
"model":"y2"
},
{
"productType":"Car",
"company":"Suzuki",
"model":"s1"
}
]
I want my output to be like :
{
"productType": [
{
"name": "Bike",
"count": 4,
"companies": [
{
"name": "Yamaha",
"count": 3,
"models": [
{
"name": "y1",
"count": 2
},
{
"name": "y2",
"count": 1
}
]
},
{
"name": "Bajaj",
"count": 1,
"models": [
{
"name": "b1",
"count": 1
}
]
}
]
},
{
"name": "Car",
"count": 2,
"companies": [
{
"name": "Maruti",
"count": 1,
"models": [
{
"name": "m1",
"count": 1
}
]
},
{
"name": "Suzuki",
"count": 1,
"models": [
{
"name": "s1",
"count": 1
}
]
}
]
}
]
}
I am not able to understand how to create arrays inside existing array using $push. I know we can create an array using $push but how to create array of array with it ?
In future, I might want to add "metaData" field also along with name and count.

You have to run multiple $group stages, one for each level:
db.collection.aggregate([
{
$group: {
_id: { company: "$company", productType: "$productType", model: "$model" },
count: { $sum: 1 }
}
},
{
$group: {
_id: { productType: "$_id.productType", company: "$_id.company" },
models: { $push: { name: "$_id.model", count: "$count" } },
count: { $sum: "$count" }
}
},
{
$group: {
_id: "$_id.productType",
companies: { $push: { company: "$_id.company", models: "$models", count: "$count" } },
count: { $sum: "$count" }
}
},
{ $set: { name: "$_id", _id: "$$REMOVE" } },
{
$group: {
_id: null,
productType: { $push: "$$ROOT" }
}
}
])
Mongo Playground

Try this:
db.testCollection.aggregate([
{
$group: {
_id: {
name: "$productType",
company: "$company",
model: "$model"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
name: "$_id.name",
company: "$_id.company"
},
count: { $sum: "$count" },
models: {
$push: {
name: "$_id.model",
count: "$count"
}
}
}
},
{
$group: {
_id: { name: "$_id.name" },
count: { $sum: "$count" },
companies: {
$push: {
name: "$_id.company",
count: "$count",
models: "$models"
}
}
}
},
{
$group: {
_id: null,
productType: {
$push: {
name: "$_id.name",
count: "$count",
companies: "$companies"
}
}
}
},
{
$project: { _id: 0 }
}
]);
Output:
{
"productType" : [
{
"name" : "Car",
"count" : 2,
"companies" : [
{
"name" : "Suzuki",
"count" : 1,
"models" : [
{
"name" : "s1",
"count" : 1
}
]
},
{
"name" : "Maruti",
"count" : 1,
"models" : [
{
"name" : "m1",
"count" : 1
}
]
}
]
},
{
"name" : "Bike",
"count" : 4,
"companies" : [
{
"name" : "yamaha",
"count" : 3,
"models" : [
{
"name" : "y2",
"count" : 1
},
{
"name" : "y1",
"count" : 2
}
]
},
{
"name" : "bajaj",
"count" : 1,
"models" : [
{
"name" : "b1",
"count" : 1
}
]
}
]
}
]
}

Related

How to write a mongo query to find the count based on gender field with $accumulator?

I'm trying to use the $accumulator function to get the count of names based on gender. Below is my code.
db.collection.aggregate([
{
$match:
{ department: "Finance"}
,
},
{
$group :
{
_id : "$department",
count: { $sum: 1 },
data :
{
$accumulator:
{
init: function() {
return { count: 0, maleCount: 0, femaleCount: 0 }
},
accumulate: function(state, gender) {
if (gender === "Male") {
state.maleCount+ 1;
}
if (type === "Female") {
state.femaleCount+ 1;
}
state.count + 1;
return state;
},
accumulateArgs: ["$user.gender"],
merge: function(state1, state2) {
return {
count: state1.count + state2.count,
maleCount: state1.maleCount+ state2.maleCount,
femaleCount: state1.femaleCount+ state2.femaleCount
}
},
finalize: function(state) {
return state;
},
lang: "js"
}
}
}
}
])
My data format is similar to the below:
[
{
"_id": 1,
"department": "Finance",
"user": {
"name": "Staff 1",
"gender": "Male",
}
},
{
"_id": 2,
"department": "Finance",
"user": {
"name": "Staff 2",
"gender": "Female",
}
},
{
"_id": 3,
"department": "Finance",
"user": {
"name": "Staff 3",
"gender": "Male",
}
}
]
I'm expecting the output to be the count of all the staff with maleCount and femaleCount. The below:
{
"_id" : "Finance",
"count" : 3,
"data" : {
"count" : 3,
"maleCount" : 2,
"femaleCount" : 1
}
}
I'm not getting the output as desired. Instead, the output I'm getting is something like the below:
{
"_id" : "Finance",
"count" : 3,
"data" : {
"count" : 0.0,
"maleCount" : 0.0,
"femaleCount" : 0.0
}
}
How about this:
db.collection.aggregate([
{
$setWindowFields: {
partitionBy: { department: "$department", gender: "$user.gender" },
output: {
count: { $count: {} }
}
}
},
{
$group: {
_id: "$department",
data: { $addToSet: { k: "$user.gender", v: "$count" } },
count: { $count: {} }
}
},
{
$project: {
_id: 1,
count: 1,
data: { $arrayToObject: "$data" }
}
}
])
Mongo Playground

Count nested and outer data

I have the following mongo data structure:
[
{
_id: "......",
libraryName: "a1",
stages: [
{
_id: '....',
type: 'b1',
},
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b3',
},
{
_id: '....',
type: 'b1',
},
],
},
{
_id: "......",
libraryName: "a1",
stages: [
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b1',
},
],
},
{
_id: "......",
libraryName: "a2",
stages: [
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b2',
},
{
_id: '....',
type: 'b1',
},
],
},
]
Assume this is the Session collection. Now, each session document has some irrelevant _id and libraryName key. Furthermore, each document has array of stages documents. Each stage document has some irrelevant _id and type. I want to count 2 things.
First - I want to count for each libraryName, how many session objects it has.
The solution for this query would be:
const services = await Session.aggregate(
[
{
$group: {
_id: "$libraryName",
count: { $sum: 1 },
},
}
]
);
Second - I want, per libaryName to count for each stage type how many nested stages documents it has.
So the final result I wish to retrieve is:
[
{
libraryName: 'a1',
count: 456,
stages: [
{
type: 'b1',
count: 43,
},
{
type: 'b2',
count: 44,
}
],
},
{
libraryName: 'a2',
count: 4546,
stages: [
{
type: 'b1',
count: 43
},
{
type: 'b3',
count: 44
}
]
}
]
Changed to:
[
{
"_id": "a1",
"count": 2,
"stages": [
{
"count": 1,
"type": "b3"
},
{
"count": 3,
"type": "b1"
},
{
"count": 4,
"type": "b2"
}
]
},
{
"_id": "a2",
"count": 1,
"stages": [
{
"count": 1,
"type": "b1"
},
{
"count": 3,
"type": "b2"
}
]
}
]
Using the sample data in the question post and the aggregation query:
db.collection.aggregate([
{
$unwind: "$stages"
},
{
$group: {
_id: { libraryName: "$libraryName", type: "$stages.type" },
type_count: { "$sum": 1 }
}
},
{
$group: {
_id: { libraryName: "$_id.libraryName" },
count: { "$sum": "$type_count" },
stages: { $push: { type: "$_id.type", count: "$type_count" } }
}
},
{
$project: {
libraryName: "$_id.libraryName",
count: 1,
stages: 1,
_id: 0
}
}
])
I get the following results:
{
"libraryName" : "a2",
"count" : 4,
"stages" : [
{
"type" : "b1",
"count" : 1
},
{
"type" : "b2",
"count" : 3
}
]
}
{
"libraryName" : "a1",
"count" : 8,
"stages" : [
{
"type" : "b3",
"count" : 1
},
{
"type" : "b1",
"count" : 3
},
{
"type" : "b2",
"count" : 4
}
]
}
[ EDIT - ADD ] : This is an answer after the question post's expected result is modified. This query uses the question post's sample documents as input.
db.collection.aggregate([
{
$group: {
_id: { libraryName: "$libraryName" },
count: { "$sum": 1 },
stages: { $push: "$stages" }
}
},
{
$unwind: "$stages"
},
{
$unwind: "$stages"
},
{
$group: {
_id: { libraryName: "$_id.libraryName", type: "$stages.type" },
type_count: { "$sum": 1 },
count: { $first: "$count" }
}
},
{
$group: {
_id: "$_id.libraryName",
count: { $first: "$count" },
stages: { $push: { type: "$_id.type", count: "$type_count" } }
}
},
])
The result:
{
"_id" : "a2",
"count" : 1,
"stages" : [
{
"type" : "b2",
"count" : 3
},
{
"type" : "b1",
"count" : 1
}
]
}
{
"_id" : "a1",
"count" : 2,
"stages" : [
{
"type" : "b2",
"count" : 4
},
{
"type" : "b3",
"count" : 1
},
{
"type" : "b1",
"count" : 3
}
]
}

Compare integers stored as Strings in Mongodb

In the below collection, column "qty" holds the integer values but the datatype is string.
I want to compare the "qty" field with an integer in the aggregate and "warehouse" field with a string "A". ("qty" > 2 and "warehouse" = "A")
[Can't change the datatype in the collection to integer as huge dependency is present]
Edit : Need to retrieve all the columns and all the documents matching the criteria.
Query : getting improper results
db.runCommand(
{
aggregate: "products", pipeline: [
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: { $gt: "2" }
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Result : not getting documents where item = journal though it satisfies the conditions
/* 1 */
{
"item" : "paper",
"instock" : [
{
"warehouse" : "A",
"qty" : "60"
},
{
"warehouse" : "B",
"qty" : "15"
}
]
},
/* 2 */
{
"item" : "planner",
"instock" : [
{
"warehouse" : "A",
"qty" : "22"
},
{
"warehouse" : "B",
"qty" : "5"
}
]
}
Products Collection
[
{
"item": "journal",
"instock": [
{
"warehouse": "A",
"qty": "11"
},
{
"warehouse": "C",
"qty": "15"
}
]
},
{
"item": "paper",
"instock": [
{
"warehouse": "A",
"qty": "60"
},
{
"warehouse": "B",
"qty": "15"
}
]
},
{
"item": "planner",
"instock": [
{
"warehouse": "A",
"qty": "22"
},
{
"warehouse": "B",
"qty": "5"
}
]
}
]
Getting improper results as greater than operator in this case is working lexicographically but it should work like integers. Though I tried converting that to double but I am getting no results.
Query with $convert to double : no result
db.runCommand(
{
aggregate: "products", pipeline: [
//{ $match: { "item": { $in: ["planner", "paper","journal"] } } },
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: {
$gt: [
{$convert:{ input: "$qty", to: "double" }}, 5]
}
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Try this:
db.products.aggregate([
{
$unwind: "$instock"
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$instock.warehouse",
"A"
]
},
{
$gt: [
{
$toInt: "$instock.qty"
},
2
]
}
]
}
}
},
{
$group: {
_id: "$_id",
item: {
$first: "$item"
},
instock: {
$push: "$instock"
}
}
},
{
$project: {
_id: 0
}
}
])
MongoPlayground
Try this, it uses $filter to retain objects has criteria :
db.runCommand(
{
aggregate: "products", pipeline: [
{ $match: { 'instock.warehouse': 'A' } },
{
$addFields: {
instockCheck: {
$filter: {
input: '$instock', as: 'each', cond: {
$and: [{ $gt: [{ $toInt: '$$each.qty' }, 2] },
{ $eq: ['$$each.warehouse', 'A'] }]
}
}
}
}
}, { $match: { instockCheck: { $gt: [] } } }, { $project: { instockCheck: 0, _id: 0 } }],
cursor: { batchSize: 200 }
});
Test : MongoDB-Playground

$push and $sum with the aggregation framework on sub-documents

I've a data as follows:
{
"_id" : ObjectId("55d4410544c96d6f6578f893"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "PASS",
}
],
"runEndTime" : ISODate("2015-08-19T08:40:47.049Z")
}
{
"_id" : ObjectId("55d4410544c96d6f6578f894"),
"executionProject" : "Project1",
"suiteList" : [
{
"suiteName": "Suite1",
"suiteStatus" : "PASS",
},
{
"suiteName": "Suite2",
"suiteStatus" : "FAIL",
},
{
"suiteName": "Suite3",
"suiteStatus" : "FAIL",
}
],
"runEndTime" : ISODate("2015-08-19T08:50:47.049Z")
}
And I was trying to get the result like this:
{
"executionProject": "Project1",
"data": [
{
"date": "2015-08-19 08:40:47",
"suitePass": 2,
"suiteFail": 1
},
{
"date": "2015-08-19 08:50:47",
"suitePass": 1,
"suiteFail": 2
}
]
}
Here I'm trying to group by executionProject and push the runEndTime and the pass and fail counts of suites to the result.
I tried this, but giving me wrong way of projection:
db.testruns.aggregate([
{
$project: {
executionProject: "$executionProject",
runEndTime: "$runEndTime",
suiteList: "$suiteList"
}
},
{
$unwind: "$suiteList"
},
{
$group: {
_id: "$executionProject",
runEndTime: {
$addToSet: "$runEndTime"
},
suite_pass: {
$sum: {
$cond: {
"if": {
$eq: ["$suiteList.suiteStatus", "PASS"]
},
"then": 1,
"else": 0
}
}
}
}
},
{
$group: {
_id: "$_id",
runEndTime: { $push: {runTime: "$runEndTime", suite_pass: "$suite_pass"} }
}
},
{
$project: {
executionProject: "$_id",
runEndTime: "$runEndTime",
_id: 0
}
}
]);
First you need to group by the document to get the suite totals, then you add to the array as you group on the project. Also don't forget to "sort" if you want things in order:
[
{ "$unwind": "$suiteList" },
{ "$group": {
"_id": "$_id",
"executionProject": { "$first": "$executionProject" },
"suite-pass": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "PASS" ] },
1,
0
]
}
},
"suite-fail": {
"$sum": {
"$cond": [
{ "$eq": [ "$suiteList.suiteStatus", "FAIL" ] },
1,
0
]
}
},
"date": { "$first": "$runEndTime" }
}},
{ "$sort": { "executionProject": 1, "date": 1 } },
{ "$group": {
"_id": "$executionProject",
"data": {
"$push": {
"suite-pass": "$suite-pass",
"suite-fail": "$suite-fail",
"date": "$date"
}
}
}}
]
Produces:
{
"_id" : "Project1",
"data" : [
{
"suite-pass" : 2,
"suite-fail" : 1,
"date" : ISODate("2015-08-19T08:40:47.049Z")
},
{
"suite-pass" : 1,
"suite-fail" : 2,
"date" : ISODate("2015-08-19T08:50:47.049Z")
}
]
}

Use $size with $sort in array and sub array

Here's the structure part of my collection:
_id: ObjectId("W"),
names: [
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Y", day: 10 }, { id: "Z", day: 2 } ],
list: ["A","B","C"],
day: 1
},
{
number: 2,
day: 5
},
{
number: 3,
subnames: [ { id: "X", day: 8 }, { id: "Z", day: 5 } ],
list: ["A","C"],
day: 2
},
...
],
...
I use this request:
db.publication.aggregate( [ { $match: { _id: ObjectId("W") } }, { $group: { _id: "$_id", SizeName: { $first: { $size: { $ifNull: [ "$names", [] ] } } }, names: { $first: "$names" } } }, { $unwind: "$names" }, { $sort: { "names.day": 1 } }, { $group: { _id: "$_id", SzNames: { $sum: 1 }, names: { $push: { number: "$names.number", subnames: "$names.subnames", list: "$names.list", SizeList: { $size: { $ifNull: [ "$names.list", [] ] } } } } } } ] );
but I would now use $sort for my names array AND my subnames array to obtain this result (subnames may not exist) :
_id: ObjectId("W"),
names: [
{
number: 2,
SizeList: 0,
day: 5
},
{
number: 3,
subnames: [ { id: "Z", day: 5 }, { id: "X", day: 8 } ],
list: ["A","C"],
SizeList: 2,
day: 2
},
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Z", day: 2 }, { id: "Y", day: 10 } ],
list: ["A","B","C"],
SizeList: 3,
day: 1
}
...
],
...
Can you help me ?
You can do this, but with great difficulty. I for one would gladly vote for an inline version of $sort along the lines of the $map operator. That would makes things so much easier.
For now though you need to de-construct and re-build the arrays after sorting. And you have to be very careful about this. Hence make false arrays with a single entry before processing $unwind:
db.publication.aggregate([
{ "$project": {
"SizeNames": {
"$size": {
"$ifNull": [ "$names", [] ]
}
},
"names": { "$ifNull": [{ "$map": {
"input": "$names",
"as": "el",
"in": {
"SizeList": {
"$size": {
"$ifNull": [ "$$el.list", [] ]
}
},
"SizeSubnames": {
"$size": {
"$ifNull": [ "$$el.subnames", [] ]
}
},
"number": "$$el.number",
"day": "$$el.day",
"subnames": { "$ifNull": [ "$$el.subnames", [0] ] },
"list": "$$el.list"
}
}}, [0] ] }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names.subnames" },
{ "$sort": { "_id": 1, "names.subnames.day": 1 } },
{ "$group": {
"_id": {
"_id": "$_id",
"SizeNames": "$SizeNames",
"names": {
"SizeList": "$names.SizeList",
"SizeSubnames": "$names.SizeSubnames",
"number": "$names.number",
"list": "$names.list",
"day": "$names.day"
}
},
"subnames": { "$push": "$names.subnames" }
}},
{ "$sort": { "_id._id": 1, "_id.names.day": 1 } },
{ "$group": {
"_id": "$_id._id",
"SizeNames": { "$first": "$_id.SizeNames" },
"names": {
"$push": { "$cond": [
{ "$ne": [ "$_id.names.SizeSubnames", 0 ] },
{
"number": "$_id.names.number",
"subnames": "$subnames",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
},
{
"number": "$_id.names.number",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
}
]}
}
}},
{ "$project": {
"SizeNames": 1,
"names": {
"$cond": [
{ "$ne": [ "$SizeNames", 0 ] },
"$names",
[]
]
}
}}
])
You can kind of "hide away" the original empty array from the inner document as shown, but it's really difficult to remove all presence of the outer "names" array without pulling a similar conditional array "push" technique, and that really isn't a practical approach.
If all of this is just about sorting array elements in individual documents though, the aggregation framework should not be the tool to do this. It can be done as shown, but per document this is much easier to do in client side code.
Output:
{
"_id" : ObjectId("54b5cff8102f292553ce9bb5"),
"SizeNames" : 3,
"names" : [
{
"number" : 1,
"subnames" : [
{
"id" : "X",
"day" : 1
},
{
"id" : "Z",
"day" : 2
},
{
"id" : "Y",
"day" : 10
}
],
"list" : [
"A",
"B",
"C"
],
"SizeList" : 3,
"day" : 1
},
{
"number" : 3,
"subnames" : [
{
"id" : "Z",
"day" : 5
},
{
"id" : "X",
"day" : 8
}
],
"list" : [
"A",
"C"
],
"SizeList" : 2,
"day" : 2
},
{
"number" : 2,
"SizeList" : 0,
"day" : 5
}
]
}