mongodb can $unionWith use in $push - mongodb

I want to get related data based on current item processing.
Sample:
[
{ field1: 1, field2: 2, value: 12 },
{ field1: 1, field2: 2, value: 21 },
{ field1: 1, value: 1 },
{ field2: 2, value: 2 },
{ field1: 2, field2: 3, value: 23 }
];
and result:
[
{
_id: { field1: 1, field2: 2 },
value: [12, 12],
relatedValue: [1, 2], // of item 1 and 2 because field 1 = 1 or field 2 = 2
},
];
Sample query:
db.collectionA.aggregate([
{
$match: { field1: 1 }
},
{
"$group":{
"_id":{
"field1":"$field1",
"field2":"$field2"
},
"alerts":{
"$push":{
"_id":"$_id",
"value":"$value",
"relatedData": {
"$unionWith": {
"coll": "collectionA",
"pipeline": [{
"$match": {
"$or": [
{ "field1": "$field1" },
{ "field2": "$field2" }
]
}
}]
}
}
}
}
}
}
])
I tried run this query but error, Please help me fix or give a solution
// Edited: value should be array because I want to group data by field1, field2 and push all value of group to an array

You're trying to use $unionWith within $group but it is a "pipeline stage" meaning it can't be used like that, the same way you can't use $group within a $group.
Additionally this stage is used to "union" two collections and not to populate data based on value matches ( which it seems you're trying to do here ), for this case you want to use $lookup, like so:
db.collection.aggregate([
{
$lookup: {
from: "collection",
let: {
field1: "$field1",
field2: "$field2",
docId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$or: [
{
$eq: [
"$$field1",
"$field1"
]
},
{
$eq: [
"$$field2",
"$field2"
]
}
]
},
{
$ne: [
"$$docId",
"$_id"
]
}
]
}
}
},
{
$project: {
value: 1
}
}
],
as: "relatedData"
}
},
{
$group: {
_id: {
field1: "$field1",
field2: "$field2"
},
values: {
$push: "$value"
},
relatedValue: {
$push: {
$map: {
input: "$relatedData",
in: "$$this.value"
}
}
}
}
},
{
$project: {
field1: "$_id.field1",
field2: "$_id.field2",
values: 1,
relatedValues: {
"$setDifference": [
{
"$reduce": {
input: "$relatedValue",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
"$values"
]
}
}
}
])
Mongo Playground

Related

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

Move field to a specific object in an array in MongoDB

Here's my data:
{
foos: [
{ _id: 1 },
{ _id: 2 },
{ _id: 3 }
],
bar: "baz"
}
Now I wanna move field bar into foos object with _id = 2 to have this:
{
foos: [
{ _id: 1 },
{ _id: 2, bar: "baz" },
{ _id: 3 },
]
}
How can I do this using aggregation framework?
$map to iterate loop of foos and check _id: 2 condition, if match then return bar object and merge with current object $mergeObjects
db.collection.aggregate([
{
$project: {
foos: {
$map: {
input: "$foos",
in: {
$cond: [
{ $eq: ["$$this._id", 2] },
{ $mergeObjects: ["$$this", { bar: "$bar" }] },
"$$this"
]
}
}
}
}
}
])
Playground

How to convert an array to object in mongodb query?

I am new to mongodb and wanted to convert my array to object using pipeline. For example,
{
field1: [1,2,3,4,5],
field2: [‘a’,’b’,’c’,’d’,’e’],
}
I want the above document to be converted to,
{
fields: [
{
field1: 1,
field2: ‘a’
},
......
{
field1: 5,
field2: ‘e’
}
]
}
Any idea how I can achieve this?
You can use $unwind to separate your arrays.
And then format your new list with $project without forgetting to remove the duplicates created by the $unwind.
db.collection.aggregate({
"$unwind": {
path: "$field1",
includeArrayIndex: "field1_index"
}
},
{
"$unwind": {
"path": "$field2",
"includeArrayIndex": "field2_index"
}
},
{
"$project": {
"fields": {
"field1": "$field1",
"field2": "$field2"
},
"diff": {
$cmp: [
"$field1_index",
"$field2_index"
]
}
}
},
{
"$match": {
"diff": 0
}
},
{
$group: {
_id: "$_id",
fields: {
$push: "$fields"
}
}
})
Try it here
You can use $zip and $map and $reduce to achieve this:
db.collection.aggregate([
{
"$addFields": {
fields: {
$reduce: {
input: {
$zip: {
inputs: [
{
$map: {
input: "$field1",
as: "f1",
in: {
field1: "$$f1"
}
}
},
{
$map: {
input: "$field2",
as: "f2",
in: {
field2: "$$f2"
}
}
}
]
}
},
initialValue: [],
in: {
"$concatArrays": [
[
{
"$mergeObjects": "$$this"
}
],
"$$value"
]
}
}
}
}
}
])
MongoPlayground
Make sure both field1 and field2 are of equal length or you will lose some data.

$group inner array values without $unwind

I want to group objects in the array by same value for specified field and produce a count.
I have the following mongodb document (non-relevant fields are not present).
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
...otherFields
}
The following is what I want.
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
newArrayField: [
{ fieldA: value1, count: 1 },
{ fieldA: value2, count: 2 },
],
...otherFields
}
Here I grouped embedded documents by fieldA.
I know how to do it with unwind and 2 group stages the following way. (irrelevant stages are ommited)
Concrete example
// document structure
{
_id: ObjectId(...),
type: "test",
results: [
{ choice: "a" },
{ choice: "b" },
{ choice: "a" }
]
}
db.test.aggregate([
{ $match: {} },
{
$unwind: {
path: "$results",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
type: "$type",
choice: "$results.choice",
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
type: "$_id.type",
result: "$results.choice",
},
groupedResults: { $push: { count: "$count", choice: "$_id.choice" } }
}
}
])
You can use below aggregation
db.test.aggregate([
{ "$addFields": {
"newArrayField": {
"$map": {
"input": { "$setUnion": ["$arrayField.fieldA"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$size": {
"$filter": {
"input": "$arrayField",
"as": "d",
"cond": { "$eq": ["$$d.fieldA", "$$m"] }
}
}
}
}
}
}
}}
])
The below adds a new array field, which is generated by:
Using $setUnion to get unique set of array items, with inner $map to
extract only the choice field
Using $map on the unique set of items,
with inner $reduce on the original array, to sum all items where
choice matches
Pipeline:
db.test.aggregate([{
$addFields: {
newArrayField: {
$map: {
input: {
$setUnion: [{
$map: {
input: "$results",
in: { choice: "$$this.choice" }
}
}
]
},
as: "i",
in: {
choice: '$$i.choice',
count: {
$reduce: {
input: "$results",
initialValue: 0,
in: {
$sum: ["$$value", { $cond: [ { $eq: [ "$$this.choice", "$$i.choice" ] }, 1, 0 ] }]
}
}
}
}
}
}
}
}])
The $reduce will iterate over the results array n times, where n is the number of unique values of choice, so the performance will depend on that.

Grouping and counting across documents?

I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]