Mongoose aggregate the property of subdocument and display the result - mongodb

I have a document with a subdocument (not referenced). I want to apply the aggregation on the field of the subdocument.
Schema
const MFileSchema = new Schema({
path: String,
malwareNames: [String],
title: String,
severity: String // i want to aggregate bases on this field
});
const ScanSchema = new Schema({
agent: { type: Schema.Types.ObjectId, ref: "Agent" },
completedAt: Date,
startedAt: { type: Date, default: Date.now() },
mFiles: [MFileSchema] // array of malicious files schema
});
Model
let Scan = model("Scan", ScanSchema);
Task
Find the sum of severity in all scan documents of particular agents.
// agents is an array Agents (the schema is not important to show, consider the _id)
The Aggregation Query I am using
let c = await Scan.aggregate([
{ $match: { agent: agents } },
{ $project: { "mFiles.severity": true } },
{ $group: { _id: "$mFiles.severity", count: { $sum: 1 } } }
]);
console.log(c);
Actual Output
[]
Expected Output
// The value of count in this question is arbitrary
[
{ _id: "Critical", count: 30 },
{ _id: "Moderate", count: 33 },
{ _id: "Clean", count: 500 }
]
PS: Also I would appreciate if you could suggest me the best resources to learn MongoDB aggregations

You need to use $in query operator in the $match stage, and add $unwind stage before $group stage.
db.collection.aggregate([
{
$match: {
agent: {
$in: [
"5e2c98fc3d785252ce5b5693",
"5e2c98fc3d785252ce5b5694"
]
}
}
},
{
$project: {
"mFiles.severity": true
}
},
{
$unwind: "$mFiles"
},
{
$group: {
_id: "$mFiles.severity",
count: {
$sum: 1
}
}
}
])
Playground
Sample data:
[
{
"agent": "5e2c98fc3d785252ce5b5693",
"mFiles": [
{
"title": "t1",
"severity": "Critical"
},
{
"title": "t2",
"severity": "Critical"
},
{
"title": "t3",
"severity": "Moderate"
},
{
"title": "t4",
"severity": "Clean"
}
]
},
{
"agent": "5e2c98fc3d785252ce5b5694",
"mFiles": [
{
"title": "t5",
"severity": "Critical"
},
{
"title": "t6",
"severity": "Critical"
},
{
"title": "t7",
"severity": "Moderate"
}
]
}
]
Output:
[
{
"_id": "Moderate",
"count": 2
},
{
"_id": "Critical",
"count": 4
},
{
"_id": "Clean",
"count": 1
}
]
For mongoose integration:
//agents must be an array of objectIds like this
// [ObjectId("5e2c98fc3d785252ce5b5693"), ObjectId("5e2c98fc3d785252ce5b5694")]
//or ["5e2c98fc3d785252ce5b5693","5e2c98fc3d785252ce5b5694"]
const ObjectId = require("mongoose").Types.ObjectId;
let c = await Scan.aggregate([
{
$match: {
agent: {
$in: agents
}
}
},
{
$project: {
"mFiles.severity": true
}
},
{
$unwind: "$mFiles"
},
{
$group: {
_id: "$mFiles.severity",
count: {
$sum: 1
}
}
}
]);
Best place for learning mongodb aggregation is the official docs.

Related

How to delete ($pull) nested array elements with property value null in arrays with more than one element

I would like to delete ($pull) nested array elements where one of the element's properties is null and where the array has more than one element.
Here is an example. In the following collection, I would like to delete those elements of the Orders array that have Amount = null and where the Orders array has more than one element. That is, I would like to delete only the element with OrderId = 12, but no other elements.
db.TestProducts.insertMany([
{
ProductDetails: { "ProductId": 1, Language: "fr" },
Orders: [
{ "OrderId": 11, "Amount": 200 },
{ "OrderId": 12, "Amount": null }
]
},
{
ProductDetails: { "ProductId": 2, Language: "es" },
Orders: [
{ "OrderId": 13, "Amount": 300 },
{ "OrderId": 14, "Amount": 400 }
]
},
{
ProductDetails: { "ProductId": 3, Language: "en" },
Orders: [
{ "OrderId": 15, "Amount": null }
]
}
]);
The following attempt is based on googling and a combination of a few other StackOverflow answers, e.g. Aggregate and update MongoDB
db.TestProducts.aggregate(
[
{ $match: { "Orders.Amount": { "$eq": null } } },
{ $unwind: "$Orders" },
{
"$group": {
"_id": {
ProductId: "$ProductDetails.ProductId",
Language: "$ProductDetails.Language"
},"count": { "$sum": 1 }
}
},
{ "$match": { "count": { "$gt": 1 } } },
{ "$out": "temp_results" }
],
{ allowDiskUse: true}
);
db.temp_results.find().forEach((result) => {
db.TestProducts.updateMany({"ProductDetails.ProductId": result._id.ProductId, "ProductDetails.Language": result._id.Language },
{ $pull: { "Orders": {"Amount": null } }})
});
This works, but I am wondering if it can be done in a simpler way, especially if it is possible to delete the array elements within the aggregation pipeline and avoid the additional iteration (forEach).
You can check these conditions in the update query, check 2 conditions
Amount is null
check the expression $expr condition for the size of the Orders array is greater than 1
db.TestProducts.updateMany({
"Orders.Amount": null,
"$expr": {
"$gt": [{ "$size": "$Orders" }, 1]
}
},
{
"$pull": {
"Orders": { "Amount": null }
}
})
Playground
an example
an example might help:
let feed = await Feed.findOneAndUpdate(
{
_id: req.params.id,
feeds: {
$elemMatch: {
type: FeedType.Location,
locations: {
$size: 0,
},
},
},
},
{
$pull: {
feeds: { locations: { $size: 0 }, type: FeedType.Location },
},
},
{ new: true, multi: true }
);

Mongodb- group an array by key

I have an array field (contains objects) in multiple documents, I want to merge the arrays into one array and group the array by object key. I have manage to group the array but I dont know how to group the data. See the code I tried below
const test = await salesModel.aggregate([
{ $unwind: "$items" },
{
$group: {
_id: 0,
data: { $addToSet: '$items' }
},
}
])
Result of the query:
{
_id: 0,
data: [
{
_id: 61435b3c0f773abaf77a367e,
price: 3000,
type: 'service',
sellerId: 61307abca667678553be81cb,
},
{
_id: 613115808330be818abaa613,
price: 788,
type: 'product',
sellerId: 61307abca667678553be81cb,
},
{
_id: 61307c1ea667676078be81cc,
price: 1200,
type: 'product',
sellerId: 61307abca667678553be81cb,
}
]
}
Now I want to group the data array by object key data.sellerId and sum price
Desired Output:
{
data: [
{
sumPrice: 788,
sellerId: 613115808330be818abaa613,
},
{
sumPrice: 1200,
sellerId: 61307abca667678553be81cb,
}
]
}
Extend with the current query and result with:
$unwind: Deconstruct the array field to multiple documents.
$group: Group by data.sellerId to sum ($sum) for data.price.
$group: Group by 0 with $addToSet to combine multiple documents into one document with data.
MongoDB aggregation query
db.collection.aggregate([
{
$unwind: "$data"
},
{
$group: {
_id: {
sellerId: "$data.sellerId"
},
"sumPrice": {
$sum: "$data.price"
}
}
},
{
"$group": {
"_id": 0,
"data": {
$addToSet: {
"sellerId": "$_id.sellerId",
"sumPrice": "$sumPrice"
}
}
}
}
])
Sample Mongo Playground
Output
[
{
"_id": 0,
"data": [
{
"sellerId": ObjectId("61307abca667678553be81cb"),
"sumPrice": 4988
}
]
}
]
If you want to re-write the query, here are the query with sample input.
Input
[
{
items: [
{
_id: ObjectId("61435b3c0f773abaf77a367e"),
price: 3000,
type: "service",
sellerId: ObjectId("61307abca667678553be81cb"),
},
{
_id: ObjectId("613115808330be818abaa613"),
price: 788,
type: "product",
sellerId: ObjectId("61307abca667678553be81cb"),
},
{
_id: ObjectId("61307c1ea667676078be81cc"),
price: 1200,
type: "product",
sellerId: ObjectId("61307abca667678553be81cb"),
}
]
}
]
Mongo aggregation query
db.collection.aggregate([
{
$unwind: "$items"
},
{
$group: {
_id: {
sellerId: "$items.sellerId"
},
"sumPrice": {
$sum: "$items.price"
}
}
},
{
"$group": {
"_id": 0,
"data": {
$addToSet: {
"sellerId": "$_id.sellerId",
"sumPrice": "$sumPrice"
}
}
}
}
])
Sample 2 on Mongo Playground
Output
[
{
"_id": 0,
"data": [
{
"sellerId": ObjectId("61307abca667678553be81cb"),
"sumPrice": 4988
}
]
}
]

Single array of objects sort and slice not working

I have a single entry on a collection like this:
{
"_id" : ObjectId("60c6f7a5ef86bd1a5402e928"),
"cid" : 1,
"array1" : [
{ "type": "car", value: 20 },
{ "type": "bike", value: 50 },
{ "type": "bus", value: 5 },
{ "type": "cycle", value: 100 },
...... 9000 more entry something like this
],
"array2" : [
{ "type": "laptop", value: 200 },
{ "type": "desktop", value: 15 },
{ "type": "tablet", value: 55 },
{ "type": "mobile", value: 90 },
...... 9000 more entry something like this
]
}
Now I want to sort and slice the data for the pagination purpose.
For that I wrote the query which works well on slice case but not on sort case.
This is my query which works for slice case
let val = await SomeCollectionName.findOne(
{ cid: 1 },
{ _id: 1 , array1: { $slice: [0, 10] } } ---> its return the 10 data. Initially it return from 0 to 10, then next call $slice: [10, 10]
).exec();
if (val) {
//console.log('Got the value')
}
console.log(error)
This is my query When I add sort with slice
let val = await SomeCollectionName.findOne(
{ cid: 1 },
{ _id: 1 , array1: { $sort: { value: -1 }, $slice: [0, 10] } }
).exec();
if (val) {
//console.log('Got the value')
}
console.log(error)
Is there anyone who guide me where I'm wrong or suggest me what is the efficient way for getting the data.
UPDATE
I am getting the answer from the above question and looking for the same implementation for two array.
Everything is same. Earlier I was dealing with 1 array now this time I have to deal with two array.
Just curious to know that how these things happen
I wrote the aggregation query but one array results is fine but others are returning the same data throughout the array.
This is my query as per the suggestion of dealing with single array with sort and slice
db.collection.aggregate([
{
"$match": {
"cid": 1
}
},
{
$unwind: "$array1"
},
{
$unwind: "$array2"
},
{
"$sort": {
"array1.value": -1,
"array2.value": -1,
}
},
{
$skip: 0
},
{
$limit: 3
},
{
$group:{
"_id":"$_id",
"array1":{$push:"$array1"},
"array2":{$push:"$array2"}
}
}
])
The issue is that $sort is not supported by findOne() in its projection parameter.
You can instead use aggregation to achieve the expected result,
db.collection.aggregate([
{
"$match": {
"cid": 1
}
},
{
$unwind: "$array1"
},
{
"$sort": {
"array1.value": -1
}
},
{
$skip: 0
},
{
$limit: 3
},
{
$group: {
"_id": "$_id",
"array1": {
$push: {
"type": "$array1.type",
"value": "$array1.value"
}
},
"array2": {
"$first": "$array2"
}
},
},
{
$unwind: "$array2"
},
{
"$sort": {
"array2.value": -1
}
},
{
$skip: 0
},
{
$limit: 3
},
{
$group: {
"_id": "$_id",
"array2": {
$push: {
"type": "$array2.type",
"value": "$array2.value"
}
},
"array1": {
"$first": "$array1"
}
},
}
])
Aggregation
$unwind

MongoDB aggregate: count of elements in an object key

I have a MongoDB document like this:
[
{
"_id": {
"$oid": "5ff09030cd55d6d9f378d460"
},
"username": "a value",
"userid": "123456",
"last_access_ts": 1612426253,
"last_access": "2021-2-4 9:10:53",
"anotherid": 12345678910,
"verified_date": "2021-1-2 16:24:32",
"verified_ts": 1609601072,
"group_users": {
"-1001159747589": [
{
"anotherid": 12345678910,
"userid": "123456"
}
],
"-1001143137644": [
{
"anotherid": 12345678910,
"userid": "123456"
}
],
"-1001368608972": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"registered_access": "2021-1-2 16:24:42",
}
]
I've two questions.
First one: I need to count the elements inside each group_users[key] object, and I'm stuck with this aggregate:
db.collection.aggregate([
{
$match: {
username: "a value"
}
},
{
$project: {
_id: 1,
userid: 1,
"groups": {
"$objectToArray": "$group_users"
}
}
},
{
$unwind: "$groups",
},
])
This aggregate gives me this result:
[
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001449720492",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
},
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001159747589",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
},
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001143137644",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
}
]
How can I count each single groups[v] and then re-group the data? I would like to have a result like:
{
... some user data
"groups": {
"group_key": "count",
"second_group_key": "count",
"third_group_key": "count"
}
}
Is it possible with aggregate or I need to loop in the code?
My second question is always about the group_users. Is possible to have, recursively, the user data inside a group_users object?
I mean, every object inside group_users is an array of users; from this array can I have the user data (maybe with $graphLookup?) using the userid field or the anotherid field?
As a result from this second aggregate I would like to have something like this:
{
... some user data
"groups": {
"group_key": [{"userid": userid, "username": username}],
"second_group_key": [{"userid": userid, "username": username}],
"third_group_key": [{"userid": userid, "username": username}]
}
}
Obviously I can limit this "recursion" to 10 elements per time.
Thanks for any advice.
$objectToArray convert group_users object to array
$let to Bind variable group_arr for use in the specified expression, and returns the result of the expression.
$map to iterate loop of bind variable group_arr, get size of total element of v and return k and v,
$arrayToObject convert returned array from $map to object
db.collection.aggregate([
{ $match: { username: "a value" } },
{
$project: {
_id: 1,
userid: 1,
groups: {
$let: {
vars: { group_arr: { $objectToArray: "$group_users" } },
in: {
$arrayToObject: {
$map: {
input: "$$group_arr",
in: {
k: "$$this.k",
v: { $size: "$$this.v" }
}
}
}
}
}
}
}
}
])
Playground
Second question,
$unwind deconstruct groups array
$lookup with pipeline, match anotherid $in condition and return required fields
$group by _id and reconstruct groups array
$arrayToObject convert groups array to object
db.collection.aggregate([
{ $match: { username: "a value" } },
{
$project: {
_id: 1,
userid: 1,
groups: { $objectToArray: "$group_users" }
}
},
{ $unwind: "$groups" },
{
$lookup: {
from: "collection",
let: { anotherid: "$groups.v.anotherid" },
pipeline: [
{ $match: { $expr: { $in: ["$anotherid", "$$anotherid"] } } },
{
$project: {
_id: 0,
userid: 1,
username: 1
}
}
],
as: "groups.v"
}
},
{
$group: {
_id: "$_id",
groups: { $push: "$groups" },
userid: { $first: "$userid" }
}
},
{ $addFields: { groups: { $arrayToObject: "$groups" } } }
])
Playground

Filter subdocument by datetime

I've the following model
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
author: { type: Schema.Types.ObjectId }
});
var conversationSchema = new Schema({
title: { type: String },
author: { type : Schema.Types.ObjectId },
members: [ { type: Schema.Types.ObjectId } ],
creationDate: { type: Date, default: Date.now },
lastUpdate: { type: Date, default: Date.now },
comments: [ messageSchema ]
});
I want to create two methods to get the comments generated after a date by user or by conversationId.
By User
I tried with the following method
var query = {
members : { $all : [ userId, otherUserId ], "$size" : 2 }
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
When there are no comments after the specified date (at from) the method returns [] or null
By conversationId
The same happen when I try to get by user id
var query = { _id : conversationId
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
Is there any way to make the method returns the conversation information with an empty comments?
Thank you!
Sounds like a couple of problems here, but stepping through them all
In order to get more than a single match "or" none from an array to need the aggregation framework of mapReduce to do this. You could try "projecting" with $elemMatch but this can only return the "first" match. i.e:
{ "a": [1,2,3] }
db.collection.find({ },{ "$elemMatch": { "$gte": 2 } })
{ "a": [2] }
So standard projection does not work for this. It can return an "empty" array but it an also only return the "first" that is matched.
Moving along, you also have this in your code:
{ $all : [ userId, otherUserId ], "$site" : 2 }
Where $site is not a valid operator. I think you mean $size but there are actuall "two" operators with that name and your intent may not be clear here.
If you mean that the array you are testing must have "only two" elements, then this is the operator for you. If you meant that the matched conversation between the two people had to be equal to both in the match, then $all does this anyway so the $size becomes redundant in either case unless you don't want anyone else in the conversation.
On to the aggregation problem. You need to "filter" the content of the array in a "non-destructive way" in order to get more than one match or an empty array.
The best approach for this is with modern MongoDB features available from 2.6, which allows the array content to be filtered without processing $unwind:
Model.aggregate(
[
{ "$match": {
"members": { "$all": [userId,otherUserId] }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
],
function(err,result) {
}
);
That uses $map which can process an expression against each array element. In this case the vallues are tested under the $cond ternary to either return the array element where the condition is true or otherwise return false as the element.
These are then "filtered" by the $setDifference operator which essentially compares the resulting array of $map to the other array [false]. This removes any false values from the result array and only leaves matched elements or no elements at all.
An alternate may have been $redact but since your document contains "creationDate" at multiple levels, then this messes with the logic used with it's $$DESCEND operator. This rules that action out.
In earlier versions "not destroying" the array needs to be treated with care. So you need to do much the same "filter" of results in order to get the "empty" array you want:
Model.aggregate(
[
{ "$match": {
"$and": [
{ "members": userId },
{ "members": otherUserId }
}},
{ "$unwind": "$comments" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
"$comments",
false
]
}
},
"matchedSize": {
"$sum": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
1,
0
]
}
}
}},
{ "$unwind": "$comments" },
{ "$match": {
"$or": [
{ "comments": { "$ne": false } },
{ "matchedSize": 0 }
]
}},
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": { "$push": "$comments" }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$cond": [
{ "$eq": [ "$comments", [false] ] },
{ "$const": [] },
"$comments"
]
}
}}
],
function(err,result) {
}
)
This does much of the same things, but longer. In order to look at the array content you need to $unwind the content. When you $group back, you look at each element to see if it matches the condition to decide what to return, also keeping a count of the matches.
This is going to put some ( one with $addToSet ) false results in the array or only an array with the entry false where there are no matches. So yo filter these out with $match but also testing on the matched "count" to see if no matches were found. If no match was found then you don't throw away that item.
Instead you replace the [false] arrays with empty arrays in a final $project.
So depending on your MongoDB version this is either "fast/easy" or "slow/hard" to process. Compelling reasons to update a version already many years old.
Working example
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var memberSchema = new Schema({
name: { type: String }
});
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
});
var conversationSchema = new Schema({
members: [ { type: Schema.Types.ObjectId } ],
comments: [messageSchema]
});
var Member = mongoose.model( 'Member', memberSchema );
var Conversation = mongoose.model( 'Conversation', conversationSchema );
async.waterfall(
[
// Clean
function(callback) {
async.each([Member,Conversation],function(model,callback) {
model.remove({},callback);
},
function(err) {
callback(err);
});
},
// add some people
function(callback) {
async.map(["bill","ted","fred"],function(name,callback) {
Member.create({ "name": name },callback);
},callback);
},
// Create a conversation
function(names,callback) {
var conv = new Conversation();
names.forEach(function(el) {
conv.members.push(el._id);
});
conv.save(function(err,conv) {
callback(err,conv,names)
});
},
// add some comments
function(conv,names,callback) {
async.eachSeries(names,function(name,callback) {
Conversation.update(
{ "_id": conv._id },
{ "$push": { "comments": { "comment": name.name } } },
callback
);
},function(err) {
callback(err,names);
});
},
function(names,callback) {
Conversation.findOne({},function(err,conv) {
callback(err,names,conv.comments[1].creationDate);
});
},
function(names,from,callback) {
var ids = names.map(function(el) {
return el._id
});
var pipeline = [
{ "$match": {
"$and": [
{ "members": ids[0] },
{ "members": ids[1] }
]
}},
{ "$project": {
"members": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
];
//console.log(JSON.stringify(pipeline, undefined, 2 ));
Conversation.aggregate(
pipeline,
function(err,result) {
if(err) throw err;
console.log(JSON.stringify(result, undefined, 2 ));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
Which produces this output:
[
{
"_id": "55a63133dcbf671918b51a93",
"comments": [
{
"comment": "ted",
"_id": "55a63133dcbf671918b51a95",
"creationDate": "2015-07-15T10:08:51.217Z"
},
{
"comment": "fred",
"_id": "55a63133dcbf671918b51a96",
"creationDate": "2015-07-15T10:08:51.220Z"
}
],
"members": [
"55a63133dcbf671918b51a90",
"55a63133dcbf671918b51a91",
"55a63133dcbf671918b51a92"
]
}
]
Note the "comments" only contain the last two entries which are "greater than or equal" to the date which was used as input ( being the date from the second comment ).