I want to split the following array according to the group-value. I know I can do this using $unwind and $group. Is there any way to this in a single $project-stage?
Input
{
"_id": 1,
"some_field": "some_value",
"array": [
{
"group": "a",
"subgroup": "aa",
"value": 1
},
{
"group": "b",
"subgroup": "bb",
"value": 2
},
{
"group": "a",
"subgroup": "ab",
"value": 2
}
]
}
desired output:
{
"_id": 1,
"some_field": "some_value",
"array": [
{
"group": "a",
"values": [
{
"subgroup": "aa",
"value": 1
},
{
"subgroup": "ab",
"value": 2
}
]
},
{
"group": "b",
"values": [
{
"subgroup": "bb",
"value": 2
}
]
}
]
}
Try this: https://mongoplayground.net/p/pFn3tLtAG4D
$set: {
_id: "$_id",
some_field: "$some_field",
array: {
$map: {
input: {
$setUnion: [
"$array.group"
]
},
in: {
group: "$$this",
values: {
$map: {
input: {
$filter: {
input: "$array",
as: "elem",
cond: {
$eq: [
"$$elem.group",
"$$this"
]
}
}
},
as: "vals",
in: {
subgroup: "$$vals.subgroup",
value: "$$vals.value"
}
}
}
}
}
}
}
This is far from a single project stage, but it does produce the desired output from the given input.
db.collection.aggregate([
{'$match': {'_id': 1}},
{'$unwind': '$array'},
{'$project': {'array': {'group': '$array.group', 'values': '$array'},
'some_field': 1,
'my_id': '$_id'}},
{'$unset': 'array.values.group'},
{'$group': {'_id': '$array.group',
'values': {'$push': '$array.values'},
'some_field': {'$first': '$some_field'},
'my_id': {'$first': '$my_id'}}},
{'$set': {'array': {'group': '$_id', 'values': '$values'}}},
{'$unset': 'values'},
{'$group': {'_id': '$my_id',
'array': {'$push': '$array'},
'some_field': {'$first': '$some_field'}}}
])
Try it on mongoplayground.net.
It is doable, it's definitely not clean or sexy.
My approach is to use $reduce and $mergeObjects, we'll iterate over the array and keep reconstructing the result.
The main issue that plagues this approach is this feature that doesn't allow to $concatArrays expressions, so we have to use some very ugly workarounds.
Anyways here is how you can achieve this:
db.collection.aggregate([
{
$project: {
_id: 1,
some_field: 1,
array: {
$map: {
input: {
"$objectToArray": {
$reduce: {
input: "$array",
initialValue: {},
in: {
"$mergeObjects": [
"$$value",
{
"$arrayToObject": [
[
{
k: "$$this.group",
v: {
$map: {
input: {
"$concatArrays": [
[
"$$this"
],
{
$map: {
input: {
$filter: {
input: {
"$objectToArray": "$$value"
},
as: "filterItem",
cond: {
$eq: [
"$$filterItem.k",
"$$this.group"
]
}
}
},
as: "mapItem",
in: "$$mapItem.v"
}
},
]
},
as: "map2Item",
in: {
$cond: [
{
"$isArray": "$$map2Item"
},
{
$arrayElemAt: [
"$$map2Item",
0
]
},
"$$map2Item"
]
}
}
}
}
]
]
}
]
}
}
}
},
as: "item",
in: {
group: "$$item.k",
values: "$$item.v"
}
}
}
}
}
])
Mongo Playground
Related
I am attempting to count elements in 3x nested array , I have some progress , but struggling to filter based on lang:"EN" condition inside the 2x $reduced 1x filter :
Here is example document:
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": [
{
"pid": 1,
"s": {
"a": {
"t": [
{
id: 1,
"dateP": "20200-09-20",
lang: "EN"
},
{
id: 2,
"dateP": "20200-09-20",
lang: "En"
}
]
},
"c": {
"t": [
{
id: 3,
lang: "en"
},
{
id: 4,
lang: "En"
},
{
id: 5,
"dateP": "20300-09-23"
}
]
}
},
h: "Some data"
}
]
}]
}
And here is my attempt ( just need to filter only the elements with lang:"EN"
db.collection.aggregate([
{
$project: {
res: {
$reduce: {
input: "$_a",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$cond": {
"if": {
"$eq": [
{
"$type": "$$this._p"
},
"array"
]
},
"then": {
$reduce: {
input: "$$this._p",
initialValue: [],
in: {
$concatArrays: [
"$$value",
{
"$filter": {
"input": {
"$objectToArray": "$$this.s"
},
"as": "f",
"cond": {
"$eq": [
"$$f.k",
"c"
]
}
}
}
]
}
}
},
"else": []
}
}
]
}
}
}
}
},
{
$unwind: "$res"
},
{
$unwind: "$res.v.t"
},
{
$count: "Total"
}
])
I need to count all _a[]._p[].s.c.t[] where lang:"EN","en","En" , note at object s there is multiple nested elements c , a , d , etc , only the c need to be counted where lang:"EN" , I managed to filter only the "c" but struggling to add the lang:"EN","en","En" inside the $filter.cond , can anybody help here?
Expected playground output is:
{count:7}
I can add final $match condition and clear the lang:EN , but I am wondering if there is better option to be done inside the reduce/reduce/objectToArray/cond and avoid the $unwind's?
Playgorund
One option to avoid $unwind is:
db.collection.aggregate([
{$match: {"_a._p.s.c.t": {$elemMatch: {lang: {$in: ["EN", "En", "en"]}}}}},
{$project: {
res: {$reduce: {
input: "$_a._p.s.c.t",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}}
}},
{$project: {
res: {$reduce: {
input: "$res",
initialValue: 0,
in: {$sum: [
"$$value",
{$size: {$filter: {
input: "$$this",
as: "inner",
cond: {$in: ["$$inner.lang", ["EN", "En", "en"]]}
}}}
]}
}}
}},
{$group: {_id: 0, count: {$sum: "$res"}}}
])
See how it works on the playground example
I have this document:
[
{
"name": "Report1",
"specifications": [
{
"parameters": [
{
"name": "feature",
"value": [
"13"
]
},
{
"name": "security",
"value": [
"XXXX-695"
]
},
{
"name": "imageURL",
"value": [
"football.jpg"
],
}
]
}
]
},
{
"name": "Report2",
"specifications": [
{
"parameters": [
{
"name": "feature",
"value": [
"67"
]
},
{
"name": "imageURL",
"value": [
"basketball.jpg"
],
},
{
"name": "security",
"value": [
"XXXX-123"
]
}
]
}
]
}
]
I want to obtain specifications[0].parameters.value[0] where parameters.name = "imageUrl". Like that:
[
{
"imageparam": "football.jpg",
"name": "Report1"
},
{
"imageparam": "basketball.jpg",
"name": "Report2"
}
]
I use MongoDB 3.6.3 with MongoDB Compass. I want to use aggregation (to add a pipeline in MongoDb Compass) so I could write finally this aggregation but it has 5 $project stage. Is there any more efficient or better solution:
db.collection.aggregate([{$project: {
_id: 0,
name: 1,
specifications: {$arrayElemAt: ["$specifications", 0]}
}}, {$project: {
name: 1,
imageparam: {
$filter: {
input: '$specifications.parameters',
as: 'param',
cond: {
$eq: [
'$$param.name',
'imageURL'
]
}
}
}
}}, {$project: {
name: 1,
imageparam: {$arrayElemAt: ["$imageparam",0]}
}}, {$project: {
name: 1,
imageparam: "$imageparam.value"
}}, {$project: {
name: 1,
imageparam: {$arrayElemAt: ["$imageparam",0]}
}}])
This is playground.
You can reduce it to 2 stages, might be there will be other options as well,
pass directly $arrayElemAt of specifications.parameters to $filter input and find the matching value for imageURL
use can use $addFields or $set stage to get first element from return result
db.collection.aggregate([
{
$project: {
_id: 0,
name: 1,
imageparam: {
$arrayElemAt: [
{
$filter: {
input: { $arrayElemAt: ["$specifications.parameters", 0] },
cond: { $eq: ["$$this.name", "imageURL"] }
}
},
0
]
}
}
},
{
$addFields: {
imageparam: { $arrayElemAt: ["$imageparam.value", 0] }
}
}
])
Playground
The second option you can do it in single stage using $let to bind the variables for use in the specified expression,
db.collection.aggregate([
{
$project: {
_id: 0,
name: 1,
imageparam: {
$let: {
vars: {
param: {
$arrayElemAt: [
{
$filter: {
input: { $arrayElemAt: ["$specifications.parameters", 0] },
cond: { $eq: ["$$this.name", "imageURL"] }
}
},
0
]
}
},
in: { $arrayElemAt: ["$$param.value", 0] }
}
}
}
}
])
Playground
I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])
I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks
First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground
You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.
Collection:
[
{
"name": "device1",
"type": "a",
"para": {
"number": 3
}
},
{
"name": "device2",
"type": "b",
"additional": "c",
"para": {
"number": 1
}
}
]
My query:
db.collection.aggregate([
{
"$addFields": {
"arrayofkeyvalue": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$arrayofkeyvalue"
},
{
"$group": {
"_id": null,
"allkeys": {
"$addToSet": "$arrayofkeyvalue.k"
}
}
}
])
The output currently:
[
{
"_id": null,
"allkeys": [
"additional",
"_id",
"para",
"type",
"name"
]
}
]
Detail see Playground
What I want to do is add a new column which includes all of top key of the mongodb query output, exclude "para". And then combine it with the old collection to form a new json.
Is it possible?
The expected result:
{
"column": [{"prop": "name"}, {"prop": "type"}, {"prop": "additional"}],
"columnData": [
{
"name": "device1",
"type": "a",
"para": {
"number": 3
}
},
{
"name": "device2",
"type": "b",
"additional": "c",
"para": {
"number": 1
}
}
]
}
You have the right general idea in mind, here's how I would do it by utilizing operators like $filter, $map and $reduce to manipulate the objects structure.
I separated the aggregation into 3 parts for readability but you can just merge stage 2 and 3 if you wish.
db.collection.aggregate([
{
"$group": {
"_id": null,
columnData: {
$push: "$$ROOT"
},
"keys": {
"$push": {
$map: {
input: {
"$objectToArray": "$$ROOT"
},
as: "field",
in: "$$field.k"
}
}
}
}
},
{
"$addFields": {
unionedKeys: {
$filter: {
input: {
$reduce: {
input: "$keys",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
as: "item",
cond: {
$not: {
"$setIsSubset": [
[
"$$item"
],
[
"_id",
"para"
]
]
}
}
}
}
}
},
{
$project: {
_id: 0,
columnData: 1,
column: {
$map: {
input: "$unionedKeys",
as: "key",
in: {
prop: "$$key"
}
}
}
}
}
])
Mongo Playground