mongodb join collection without nested result - mongodb

I have two collections on which I want to do a query. Based on some condition it should return the results from both the collections in separate objects. I have looked for online solutions but didn't find anything.
sample
col1: [
{
_id: "st_123",
stud_num: 123,
school: "sc_123"
},
{
_id: "st_234",
stud_num: 123,
school: "sc_234"
},
{
_id: "st_345",
stud_num: 123,
school: "sc_345"
}
]
col2: [
{
_id: "f_123",
stud_health_id: "st_123",
schoolYear: "sy123",
fk_school: "sc_123"
},
{
_id: "f_234",
stud_health_id: "st_234",
schoolYear: "sy234",
fk_school: "sc_234"
},
{
_id: "f_345",
stud_health_id: "st_234",
schoolYear: "sy234",
fk_school: "sc_237"
}
]
When I send some filter like { std_id: 'st_123', school_id: 'sc_123' }, it should return this.
[
{
_id: "st_123",
stud_num: 123,
school: "sc_123"
},
{
_id: "f_123",
stud_health_id: "st_123",
schoolYear: "sy123",
fk_school: "sc_123"
},
]
The request param std_id matches with col1 _id and col2 stud_health_id. The school_id matches with col1 school and col2 fk_school. The objective of this is to merge the two so it can be also paginated. Is there any way this can be done with aggregation. Both of the collections will have thousands of record. I have tried lookup but that returns a nested array inside of col1 documents.

You can try this one:
db.collection.aggregate([
{
"$project": {
a: {
"$concatArrays": [
"$col1",
"$col2"
]
}
}
},
{
$project: {
a: {
$filter: {
input: "$a",
cond: {
$and: [
{
$eq: [
"$$this._id",
"st_123"
]
},
{
$eq: [
"$$this.school",
"sc_123"
]
}
]
}
}
}
}
},
{
"$unwind": "$a"
},
{
$replaceRoot: {
newRoot: "$a"
}
}
])
https://mongoplayground.net/p/rRzubuZXW8W

Related

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB - Aggregate get specific objects in an array

How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}
You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground
In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground
How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

How can I exclude results that contain a specific element from grouped results?

A: It should be output how many _ids are included by date grouped by date.
B: The number of elements in details in A.
If it has element, count 1. not 0. If the document is as follows, the value counted after excluding from A becomes B
{
_id: ObjectId
details: array //no elements
createdAt: Date
}
C: The count of B becomes C, except when there are specific details.slaesManagerIds among B.
details.salesManagerIds is provided as an array.
For examples,
[ObjecttId("612f57184205db63a3396a9e"), ObjectId("612cb021278f621a222087d7")]
I made query as follows.
https://mongoplayground.net/p/6sBxAmO_31y
It goes well until B. How can I write a query to get C ?
If you write and execute a query that can obtain C through the link above, you should get the following result.
[
{
"A": 2,
"B": 1,
"C": 1,
"_id": "2018-05-19"
},
{
"A": 3,
"B": 3,
"C": 1,
"_id": "2018-05-18"
}
]
use $filter
db.collection.aggregate([
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: "$createdAt"
}
},
A: {
$sum: 1
},
B: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
}
]
},
1,
0
]
}
},
C: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
},
{
$gt: [
{
$size: {
$filter: {
input: "$details",
as: "d",
cond: {
$and: [
{
$not: [
{
$in: [
"$$d.salesManagerId",
[
ObjectId("612f57184205db63a3396a9e"),
ObjectId("612cb021278f621a222087d7")
]
]
}
]
}
]
}
}
}
},
0
]
}
]
},
1,
0
]
}
}
}
},
{
$sort: {
_id: -1
}
}
])
mongoplayground

mongodb can $unionWith use in $push

I want to get related data based on current item processing.
Sample:
[
{ field1: 1, field2: 2, value: 12 },
{ field1: 1, field2: 2, value: 21 },
{ field1: 1, value: 1 },
{ field2: 2, value: 2 },
{ field1: 2, field2: 3, value: 23 }
];
and result:
[
{
_id: { field1: 1, field2: 2 },
value: [12, 12],
relatedValue: [1, 2], // of item 1 and 2 because field 1 = 1 or field 2 = 2
},
];
Sample query:
db.collectionA.aggregate([
{
$match: { field1: 1 }
},
{
"$group":{
"_id":{
"field1":"$field1",
"field2":"$field2"
},
"alerts":{
"$push":{
"_id":"$_id",
"value":"$value",
"relatedData": {
"$unionWith": {
"coll": "collectionA",
"pipeline": [{
"$match": {
"$or": [
{ "field1": "$field1" },
{ "field2": "$field2" }
]
}
}]
}
}
}
}
}
}
])
I tried run this query but error, Please help me fix or give a solution
// Edited: value should be array because I want to group data by field1, field2 and push all value of group to an array
You're trying to use $unionWith within $group but it is a "pipeline stage" meaning it can't be used like that, the same way you can't use $group within a $group.
Additionally this stage is used to "union" two collections and not to populate data based on value matches ( which it seems you're trying to do here ), for this case you want to use $lookup, like so:
db.collection.aggregate([
{
$lookup: {
from: "collection",
let: {
field1: "$field1",
field2: "$field2",
docId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$or: [
{
$eq: [
"$$field1",
"$field1"
]
},
{
$eq: [
"$$field2",
"$field2"
]
}
]
},
{
$ne: [
"$$docId",
"$_id"
]
}
]
}
}
},
{
$project: {
value: 1
}
}
],
as: "relatedData"
}
},
{
$group: {
_id: {
field1: "$field1",
field2: "$field2"
},
values: {
$push: "$value"
},
relatedValue: {
$push: {
$map: {
input: "$relatedData",
in: "$$this.value"
}
}
}
}
},
{
$project: {
field1: "$_id.field1",
field2: "$_id.field2",
values: 1,
relatedValues: {
"$setDifference": [
{
"$reduce": {
input: "$relatedValue",
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
},
"$values"
]
}
}
}
])
Mongo Playground

Mongodb - aggregation $push if conditional

I am trying to aggregate a batch of documents. There are two fields in the documents I would like to $push. However, lets say they are "_id" and "A" fields, I only want $push "_id" and "A" if "A" is $gt 0.
I tried two approaches.
First one.
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
}])
But this will push a null value to "field" and I don't want it.
Second one.
db.collection.aggregate([{
"$group":
"field": {
"$cond":[
{"$gt",["$A", 0]},
{"$push": {"id":"$_id", "A":"$A"}},
null
]
},
"secondField":{"$push":"$B"}
}])
The second one simply doesn't work...
Is there a way to skip the $push in else case?
ADDED:
Expected documents:
{
"_id":objectid(1),
"A":2,
"B":"One"
},
{
"_id":objectid(2),
"A":3,
"B":"Two"
},
{
"_id":objectid(3),
"B":"Three"
}
Expected Output:
{
"field":[
{
"A":"2",
"_id":objectid(1)
},
{
"A":"3",
"_id":objectid(2)
},
],
"secondField":["One", "Two", "Three"]
}
You can use "$$REMOVE":
This system variable was added in version 3.6 (mongodb docs)
db.collection.aggregate([{
$group:{
field: {
$push: {
$cond:[
{ $gt: ["$A", 0] },
{ id: "$_id", A:"$A" },
"$$REMOVE"
]
}
},
secondField:{ $push: "$B" }
}
])
In this way you don't have to filter nulls.
This is my answer to the question after reading the post suggested by #Veeram
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
},
{
"$project": {
"A":{"$setDifference":["$A", [null]]},
"B":"$B"
}
}])
One more option is to use $filter operator:
db.collection.aggregate([
{
$group : {
_id: null,
field: { $push: { id: "$_id", A : "$A"}},
secondField:{ $push: "$B" }
}
},
{
$project: {
field: {
$filter: {
input: "$field",
as: "item",
cond: { $gt: [ "$$item.A", 0 ] }
}
},
secondField: "$secondField"
}
}])
On first step you combine your array and filter them on second step
$group: {
_id: '$_id',
tasks: {
$addToSet: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.id', ''],
},
'',
],
},
then: '$$REMOVE',
else: {
id: '$tasks.id',
description: '$tasks.description',
assignee: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.assignee._id', ''],
},
'',
],
},
then: undefined,
else: {
id: '$tasks.assignee._id',
name: '$tasks.assignee.name',
thumbnail: '$tasks.assignee.thumbnail',
status: '$tasks.assignee.status',
},
},
},
},
},
},
},
}