Mongoose aggregate: how to create fields dynamically from the user request - mongodb

Please someone help me! I can't find the solution in documentation or other topics.
I'm using mongodb aggregation in Mongoose/Nest.js project to return the document data with some formatting and filtering. I have the structure of the mongo document like
{
_id: '1',
outputs: [
{
fileName: 'fileName1',
data: [
{
columnName1: 3,
columnName2: 4,
........
columnName30: 5
},
{
columnName1: 1,
columnName2: 2,
........
columnName30: 3
},
...........
]
},
{
fileName: 'fileName1',
data: [
{
columnName1: 3,
columnName2: 4,
........
columnName30: 5
},
{
columnName1: 1,
columnName2: 2,
........
columnName30: 3
},
...........
]
}
........
]
}
I've already done some formatting, but now I need to include to the response only requested by the user fields (columnNamesToChoose). And filter their values depending on gte, lte of mainColumnName. Inside $project I was going to use some mapping like this, but it doesn't work. Could you please help me to fix this part of code?
...columnNamesToChoose.map((columnName) => ({ [columnName]: {
$map: {
input: {
$filter: {
input: '$outputs.data',
as: 'item',
cond: {
$and: [
{ $gte: [`$$item.${mainColumnName}`, gte] },
{ $lte: [`$$item.${mainColumnName}`, lte] },
],
},
},
},
as: 'file',
in: `$$file.${columnName}`,
},
} })),
This is the full code of aggregation:
mainColumnName = 'column1' (from the body of the user request)
columnNamesToChoose = ['column2', 'column5'] (from the body of the user request)
myModel.aggregate([
{
$match: { _id: Number(id) },
},
{ $unwind: '$outputs' },
{
$match: { 'outputs.fileName': fileName },
},
{
$project: {
_id: '$_id',
fileName: '$outputs.fileName',
[mainColumnName]: {
$map: {
input: {
$filter: {
input: '$outputs.data',
as: 'item',
cond: {
$and: [
{ $gte: [`$$item.${mainColumnName}`, gte] },
{ $lte: [`$$item.${mainColumnName}`, lte] },
],
},
},
},
as: 'file',
in: `$$file.${mainColumnName}`,
},
},
},
},
])
My result:
{
"0": {
"column2": [
4,
2,
1,
5
]
},
"1": {
"column5": [
1,
8,
9,
0
]
},
"_id": 1,
"fileName": "somefilename.txt",
"column1": [
3,
1,
2,
20
],
}
Expected result:
{
"_id": 1,
"fileName": "somefilename.txt",
"column1": [
3,
1,
2,
20
],
"column2": [
4,
2,
1,
5
],
"column5": [
1,
8,
9,
0
],
}

One option is to first $reduce and then $unwind, $match and $group, where the $group stage is built dynamically on the code (for-loop) according to the input:
db.collection.aggregate([
{$match: {_id: id}},
{$project: {
outputs: {
$reduce: {
input: "$outputs",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{$cond: [
{$eq: ["$$this.fileName", fileName]},
"$$this.data",
[]
]
}
]
}
}
}
}
},
{$unwind: "$outputs"},
{$match: {"outputs.columnName1": {$gte: gte, $lte: lte}}},
{$group: {
_id: 0,
column1: {$push: "$outputs.columnName1"},
column2: {$push: "$outputs.columnName2"},
column5: {$push: "$outputs.columnName5"}
}},
{$set: {fileName: fileName}}
])
See how it works on the playground example
On js it will look something like:
const matchStage = {$match: {}};
matchStage.$match[`outputs.${mainColumnName}`] = {$gte: gte, $lte: lte};
const groupStage = {$group: {_id: 0}};
for (const col of columnNamesToChoose ) {
groupStage.$group[col] = {$push: `"$outputs.${col}"`}
};
const aggregation = [
{$match: {_id: id}},
{$project: {
outputs: {$reduce: {
input: "$outputs",
initialValue: [],
in: {$concatArrays: [
"$$value",
{$cond: [
{$eq: ["$$this.fileName", fileName]},
"$$this.data",
[]
]}
]}
}}
}},
{$unwind: "$outputs"},
matchStage,
groupStage,
{$set: {fileName: fileName}}
],
const res = await myModel.aggregate(aggregation)

Related

mongoDB count deeply nested array elements filtered by and condition

I am attempting to count elements in 3x nested array , I have some progress , but struggling to filter based on lang:"EN" condition inside the 2x $reduced 1x filter :
Here is example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}]
}
And here is my attempt ( just need to filter only the elements with lang:"EN"
db.collection.aggregate([
{
$project: {
res: {
$reduce: {
input: "$_a",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$cond": {
"if": {
"$eq": [
{
"$type": "$$this._p"
},
"array"
]
},
"then": {
$reduce: {
input: "$$this._p",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$filter": {
"input": {
"$objectToArray": "$$this.s"
},
"as": "f",
"cond": {
"$eq": [
"$$f.k",
"c"
]
}
}
}
]
}
}
},
"else": []
}
}
]
}
}
}
}
},
{
$unwind: "$res"
},
{
$unwind: "$res.v.t"
},
{
$count: "Total"
}
])
I need to count all _a[]._p[].s.c.t[] where lang:"EN","en","En" , note at object s there is multiple nested elements c , a , d , etc , only the c need to be counted where lang:"EN" , I managed to filter only the "c" but struggling to add the lang:"EN","en","En" inside the $filter.cond , can anybody help here?
Expected playground output is:
{count:7}
I can add final $match condition and clear the lang:EN , but I am wondering if there is better option to be done inside the reduce/reduce/objectToArray/cond and avoid the $unwind's?
Playgorund
One option to avoid $unwind is:
db.collection.aggregate([
{$match: {"_a._p.s.c.t": {$elemMatch: {lang: {$in: ["EN", "En", "en"]}}}}},
{$project: {
res: {$reduce: {
input: "$_a._p.s.c.t",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}}
}},
{$project: {
res: {$reduce: {
input: "$res",
initialValue: 0,
in: {$sum: [
"$$value",
{$size: {$filter: {
input: "$$this",
as: "inner",
cond: {$in: ["$$inner.lang", ["EN", "En", "en"]]}
}}}
]}
}}
}},
{$group: {_id: 0, count: {$sum: "$res"}}}
])
See how it works on the playground example

how to use $match after $group in mongodb aggregation

I have 4 products. I want to know the count of product-4 for users who has product-1 or product-2
Sample data:
[
{
"user_id": 1,
"product_type": "product-1"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 2,
"product_type": "product-1"
}
]
user-1 has two product-4 and one product-1 (that counts 2)
user-2 has only product-1, but no product-4 (hence that does not count)
This is how I tried
db.collection.aggregate([
{
$match: {
product_type: {
$in: [
"product-1​",
"product-2",
],
},
},
},
{
$group: {
_id: "$user_id",
},
},
{
$match: {
user_id: { $in: "$_id"}, // I want to use $group's result in here
product_type: "product-4",
},
}
]);
Expected results are:
[
{
"_id": 1,
"count": 2
},
{
"_id": 2,
"count": 0
}
]
Note:
I dont have a backend, I have to this using mongodb only.
Does this answer your question?
db.collection.aggregate([
{$group: {_id: "$user_id", data: {$push: "$product_type"}}},
{$match: {$expr: {$or: [
{$in: ["product-1", "$data"]},
{$in: ["product-2", "$data"]}
]}}},
{$project: {
count: {
$size: {
$filter: {
input: "$data",
cond: {$eq: ["$$this", "product-4"]}
}
}
}
}}
])
See how it works on the playground example

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

mongodb can $unionWith use in $push

I want to get related data based on current item processing.
Sample:
[
{ field1: 1, field2: 2, value: 12 },
{ field1: 1, field2: 2, value: 21 },
{ field1: 1, value: 1 },
{ field2: 2, value: 2 },
{ field1: 2, field2: 3, value: 23 }
];
and result:
[
{
_id: { field1: 1, field2: 2 },
value: [12, 12],
relatedValue: [1, 2], // of item 1 and 2 because field 1 = 1 or field 2 = 2
},
];
Sample query:
db.collectionA.aggregate([
{
$match: { field1: 1 }
},
{
"$group":{
"_id":{
"field1":"$field1",
"field2":"$field2"
},
"alerts":{
"$push":{
"_id":"$_id",
"value":"$value",
"relatedData": {
"$unionWith": {
"coll": "collectionA",
"pipeline": [{
"$match": {
"$or": [
{ "field1": "$field1" },
{ "field2": "$field2" }
]
}
}]
}
}
}
}
}
}
])
I tried run this query but error, Please help me fix or give a solution
// Edited: value should be array because I want to group data by field1, field2 and push all value of group to an array
You're trying to use $unionWith within $group but it is a "pipeline stage" meaning it can't be used like that, the same way you can't use $group within a $group.
Additionally this stage is used to "union" two collections and not to populate data based on value matches ( which it seems you're trying to do here ), for this case you want to use $lookup, like so:
db.collection.aggregate([
{
$lookup: {
from: "collection",
let: {
field1: "$field1",
field2: "$field2",
docId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$or: [
{
$eq: [
"$$field1",
"$field1"
]
},
{
$eq: [
"$$field2",
"$field2"
]
}
]
},
{
$ne: [
"$$docId",
"$_id"
]
}
]
}
}
},
{
$project: {
value: 1
}
}
],
as: "relatedData"
}
},
{
$group: {
_id: {
field1: "$field1",
field2: "$field2"
},
values: {
$push: "$value"
},
relatedValue: {
$push: {
$map: {
input: "$relatedData",
in: "$$this.value"
}
}
}
}
},
{
$project: {
field1: "$_id.field1",
field2: "$_id.field2",
values: 1,
relatedValues: {
"$setDifference": [
{
"$reduce": {
input: "$relatedValue",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
"$values"
]
}
}
}
])
Mongo Playground

MongoDB multiple fields lookup

I'm trying to perform a $lookup using two fields on MongoDB 3.6. I've already read the docs and similar questions here, but I was unable to find what's wrong.
Collection acls:
[ { _id: 1, FolderId: 4, Sid: 'S-123-456' }
{ _id: 2, FolderId: 5, Sid: 'S-234-567' }
{ _id: 3, FolderId: 6, Sid: 'S-345-678' } ]
Collection groups:
[ { _id: 1, ProcessId: 10, Sid: 'S-123-456', Users: [ 'user1', 'user2'] }
{ _id: 2, ProcessId: 10, Sid: 'S-234-567', Users: [ 'user1'] }
{ _id: 3, ProcessId: 20, Sid: 'S-123-456', Users: [ 'user2'] } ]
Query:
db.acls.aggregate({
$lookup:
{
from: 'groups',
let: { 'ProcessId': 10, 'GroupSid': '$Sid' },
pipeline: [{
$match: {
$expr: {
$and: [
{
$eq: [ '$ProcessId', '$$ProcessId' ]
},
{
$eq: [ '$Sid', '$$GroupSid' ]
}
]
}
}
}],
as: 'grouplist'
}
})
I was expecting to return something like:
{ _id: 1, FolderId: 4, Sid: 'S-123-456',
grouplist: [ { _id: 1, ProcessId: 10, Sid: 'S-123-456', Users: [ 'user1', 'user2'] }] }
but instead I'm getting 'Script executed successfully, but there are no results to show', on Robo 3T.
Try This it's working fine. Your let keyword must be start with lowercase
db.acls.aggregate([
{
$lookup:
{
from: "groups",
let: { processid: 10, sid: "$Sid" },
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$ProcessId", "$$processid" ] },
{ $gte: [ "$Sid", "$$sid" ] }
]
}
}
}
],
as: "grouplist"
}
}
])
$let variable operator must start with the lower case letter.
db.acls.aggregate([
{ "$lookup": {
"from": 'groups',
"let": { "groupSid": "$Sid" },
"pipeline": [
{ "$match": {
"$expr": { "$eq": [ "$Sid", "$$groupSid" ] },
"ProcessId": 10
}}
],
"as": "grouplist"
}}
])
db.getCollection("acls").aggregate(
// Pipeline
[
// Stage 1
{
$lookup: // Equality Match
{
from: "groups",
localField: "Sid",
foreignField: "Sid",
as: "grouplist"
}
},
// Stage 2
{
$project: {
grouplist: {
$filter: {
input: "$grouplist",
as: "group",
cond: {
$eq: ["$$group.ProcessId", 10]
}
}
},
FolderId: 1,
Sid: 1
}
},
]
);