group an array into subarrays in a project stage - mongodb

I want to split the following array according to the group-value. I know I can do this using $unwind and $group. Is there any way to this in a single $project-stage?
Input
{
"_id": 1,
"some_field": "some_value",
"array": [
{
"group": "a",
"subgroup": "aa",
"value": 1
},
{
"group": "b",
"subgroup": "bb",
"value": 2
},
{
"group": "a",
"subgroup": "ab",
"value": 2
}
]
}
desired output:
{
"_id": 1,
"some_field": "some_value",
"array": [
{
"group": "a",
"values": [
{
"subgroup": "aa",
"value": 1
},
{
"subgroup": "ab",
"value": 2
}
]
},
{
"group": "b",
"values": [
{
"subgroup": "bb",
"value": 2
}
]
}
]
}

Try this: https://mongoplayground.net/p/pFn3tLtAG4D
$set: {
_id: "$_id",
some_field: "$some_field",
array: {
$map: {
input: {
$setUnion: [
"$array.group"
]
},
in: {
group: "$$this",
values: {
$map: {
input: {
$filter: {
input: "$array",
as: "elem",
cond: {
$eq: [
"$$elem.group",
"$$this"
]
}
}
},
as: "vals",
in: {
subgroup: "$$vals.subgroup",
value: "$$vals.value"
}
}
}
}
}
}
}

This is far from a single project stage, but it does produce the desired output from the given input.
db.collection.aggregate([
{'$match': {'_id': 1}},
{'$unwind': '$array'},
{'$project': {'array': {'group': '$array.group', 'values': '$array'},
'some_field': 1,
'my_id': '$_id'}},
{'$unset': 'array.values.group'},
{'$group': {'_id': '$array.group',
'values': {'$push': '$array.values'},
'some_field': {'$first': '$some_field'},
'my_id': {'$first': '$my_id'}}},
{'$set': {'array': {'group': '$_id', 'values': '$values'}}},
{'$unset': 'values'},
{'$group': {'_id': '$my_id',
'array': {'$push': '$array'},
'some_field': {'$first': '$some_field'}}}
])
Try it on mongoplayground.net.

It is doable, it's definitely not clean or sexy.
My approach is to use $reduce and $mergeObjects, we'll iterate over the array and keep reconstructing the result.
The main issue that plagues this approach is this feature that doesn't allow to $concatArrays expressions, so we have to use some very ugly workarounds.
Anyways here is how you can achieve this:
db.collection.aggregate([
{
$project: {
_id: 1,
some_field: 1,
array: {
$map: {
input: {
"$objectToArray": {
$reduce: {
input: "$array",
initialValue: {},
in: {
"$mergeObjects": [
"$$value",
{
"$arrayToObject": [
[
{
k: "$$this.group",
v: {
$map: {
input: {
"$concatArrays": [
[
"$$this"
],
{
$map: {
input: {
$filter: {
input: {
"$objectToArray": "$$value"
},
as: "filterItem",
cond: {
$eq: [
"$$filterItem.k",
"$$this.group"
]
}
}
},
as: "mapItem",
in: "$$mapItem.v"
}
},
]
},
as: "map2Item",
in: {
$cond: [
{
"$isArray": "$$map2Item"
},
{
$arrayElemAt: [
"$$map2Item",
0
]
},
"$$map2Item"
]
}
}
}
}
]
]
}
]
}
}
}
},
as: "item",
in: {
group: "$$item.k",
values: "$$item.v"
}
}
}
}
}
])
Mongo Playground

Related

mongoDB count deeply nested array elements filtered by and condition

I am attempting to count elements in 3x nested array , I have some progress , but struggling to filter based on lang:"EN" condition inside the 2x $reduced 1x filter :
Here is example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}]
}
And here is my attempt ( just need to filter only the elements with lang:"EN"
db.collection.aggregate([
{
$project: {
res: {
$reduce: {
input: "$_a",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$cond": {
"if": {
"$eq": [
{
"$type": "$$this._p"
},
"array"
]
},
"then": {
$reduce: {
input: "$$this._p",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$filter": {
"input": {
"$objectToArray": "$$this.s"
},
"as": "f",
"cond": {
"$eq": [
"$$f.k",
"c"
]
}
}
}
]
}
}
},
"else": []
}
}
]
}
}
}
}
},
{
$unwind: "$res"
},
{
$unwind: "$res.v.t"
},
{
$count: "Total"
}
])
I need to count all _a[]._p[].s.c.t[] where lang:"EN","en","En" , note at object s there is multiple nested elements c , a , d , etc , only the c need to be counted where lang:"EN" , I managed to filter only the "c" but struggling to add the lang:"EN","en","En" inside the $filter.cond , can anybody help here?
Expected playground output is:
{count:7}
I can add final $match condition and clear the lang:EN , but I am wondering if there is better option to be done inside the reduce/reduce/objectToArray/cond and avoid the $unwind's?
Playgorund
One option to avoid $unwind is:
db.collection.aggregate([
{$match: {"_a._p.s.c.t": {$elemMatch: {lang: {$in: ["EN", "En", "en"]}}}}},
{$project: {
res: {$reduce: {
input: "$_a._p.s.c.t",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}}
}},
{$project: {
res: {$reduce: {
input: "$res",
initialValue: 0,
in: {$sum: [
"$$value",
{$size: {$filter: {
input: "$$this",
as: "inner",
cond: {$in: ["$$inner.lang", ["EN", "En", "en"]]}
}}}
]}
}}
}},
{$group: {_id: 0, count: {$sum: "$res"}}}
])
See how it works on the playground example

Project Fields in Array of Array Objects in Mongodb

I have this document:
[
{
"name": "Report1",
"specifications": [
{
"parameters": [
{
"name": "feature",
"value": [
"13"
]
},
{
"name": "security",
"value": [
"XXXX-695"
]
},
{
"name": "imageURL",
"value": [
"football.jpg"
],
}
]
}
]
},
{
"name": "Report2",
"specifications": [
{
"parameters": [
{
"name": "feature",
"value": [
"67"
]
},
{
"name": "imageURL",
"value": [
"basketball.jpg"
],
},
{
"name": "security",
"value": [
"XXXX-123"
]
}
]
}
]
}
]
I want to obtain specifications[0].parameters.value[0] where parameters.name = "imageUrl". Like that:
[
{
"imageparam": "football.jpg",
"name": "Report1"
},
{
"imageparam": "basketball.jpg",
"name": "Report2"
}
]
I use MongoDB 3.6.3 with MongoDB Compass. I want to use aggregation (to add a pipeline in MongoDb Compass) so I could write finally this aggregation but it has 5 $project stage. Is there any more efficient or better solution:
db.collection.aggregate([{$project: {
_id: 0,
name: 1,
specifications: {$arrayElemAt: ["$specifications", 0]}
}}, {$project: {
name: 1,
imageparam: {
$filter: {
input: '$specifications.parameters',
as: 'param',
cond: {
$eq: [
'$$param.name',
'imageURL'
]
}
}
}
}}, {$project: {
name: 1,
imageparam: {$arrayElemAt: ["$imageparam",0]}
}}, {$project: {
name: 1,
imageparam: "$imageparam.value"
}}, {$project: {
name: 1,
imageparam: {$arrayElemAt: ["$imageparam",0]}
}}])
This is playground.
You can reduce it to 2 stages, might be there will be other options as well,
pass directly $arrayElemAt of specifications.parameters to $filter input and find the matching value for imageURL
use can use $addFields or $set stage to get first element from return result
db.collection.aggregate([
{
$project: {
_id: 0,
name: 1,
imageparam: {
$arrayElemAt: [
{
$filter: {
input: { $arrayElemAt: ["$specifications.parameters", 0] },
cond: { $eq: ["$$this.name", "imageURL"] }
}
},
0
]
}
}
},
{
$addFields: {
imageparam: { $arrayElemAt: ["$imageparam.value", 0] }
}
}
])
Playground
The second option you can do it in single stage using $let to bind the variables for use in the specified expression,
db.collection.aggregate([
{
$project: {
_id: 0,
name: 1,
imageparam: {
$let: {
vars: {
param: {
$arrayElemAt: [
{
$filter: {
input: { $arrayElemAt: ["$specifications.parameters", 0] },
cond: { $eq: ["$$this.name", "imageURL"] }
}
},
0
]
}
},
in: { $arrayElemAt: ["$$param.value", 0] }
}
}
}
}
])
Playground

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB compare endTime with startTime of next document

I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks
First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground
You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.

How to combine mongodb original output of query with some new fields?

Collection:
[
{
"name": "device1",
"type": "a",
"para": {
"number": 3
}
},
{
"name": "device2",
"type": "b",
"additional": "c",
"para": {
"number": 1
}
}
]
My query:
db.collection.aggregate([
{
"$addFields": {
"arrayofkeyvalue": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$arrayofkeyvalue"
},
{
"$group": {
"_id": null,
"allkeys": {
"$addToSet": "$arrayofkeyvalue.k"
}
}
}
])
The output currently:
[
{
"_id": null,
"allkeys": [
"additional",
"_id",
"para",
"type",
"name"
]
}
]
Detail see Playground
What I want to do is add a new column which includes all of top key of the mongodb query output, exclude "para". And then combine it with the old collection to form a new json.
Is it possible?
The expected result:
{
"column": [{"prop": "name"}, {"prop": "type"}, {"prop": "additional"}],
"columnData": [
{
"name": "device1",
"type": "a",
"para": {
"number": 3
}
},
{
"name": "device2",
"type": "b",
"additional": "c",
"para": {
"number": 1
}
}
]
}
You have the right general idea in mind, here's how I would do it by utilizing operators like $filter, $map and $reduce to manipulate the objects structure.
I separated the aggregation into 3 parts for readability but you can just merge stage 2 and 3 if you wish.
db.collection.aggregate([
{
"$group": {
"_id": null,
columnData: {
$push: "$$ROOT"
},
"keys": {
"$push": {
$map: {
input: {
"$objectToArray": "$$ROOT"
},
as: "field",
in: "$$field.k"
}
}
}
}
},
{
"$addFields": {
unionedKeys: {
$filter: {
input: {
$reduce: {
input: "$keys",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
as: "item",
cond: {
$not: {
"$setIsSubset": [
[
"$$item"
],
[
"_id",
"para"
]
]
}
}
}
}
}
},
{
$project: {
_id: 0,
columnData: 1,
column: {
$map: {
input: "$unionedKeys",
as: "key",
in: {
prop: "$$key"
}
}
}
}
}
])
Mongo Playground