Mongodb - aggregation $push if conditional - mongodb

I am trying to aggregate a batch of documents. There are two fields in the documents I would like to $push. However, lets say they are "_id" and "A" fields, I only want $push "_id" and "A" if "A" is $gt 0.
I tried two approaches.
First one.
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
}])
But this will push a null value to "field" and I don't want it.
Second one.
db.collection.aggregate([{
"$group":
"field": {
"$cond":[
{"$gt",["$A", 0]},
{"$push": {"id":"$_id", "A":"$A"}},
null
]
},
"secondField":{"$push":"$B"}
}])
The second one simply doesn't work...
Is there a way to skip the $push in else case?
ADDED:
Expected documents:
{
"_id":objectid(1),
"A":2,
"B":"One"
},
{
"_id":objectid(2),
"A":3,
"B":"Two"
},
{
"_id":objectid(3),
"B":"Three"
}
Expected Output:
{
"field":[
{
"A":"2",
"_id":objectid(1)
},
{
"A":"3",
"_id":objectid(2)
},
],
"secondField":["One", "Two", "Three"]
}

You can use "$$REMOVE":
This system variable was added in version 3.6 (mongodb docs)
db.collection.aggregate([{
$group:{
field: {
$push: {
$cond:[
{ $gt: ["$A", 0] },
{ id: "$_id", A:"$A" },
"$$REMOVE"
]
}
},
secondField:{ $push: "$B" }
}
])
In this way you don't have to filter nulls.

This is my answer to the question after reading the post suggested by #Veeram
db.collection.aggregate([{
"$group":{
"field": {
"$push": {
"$cond":[
{"$gt":["$A", 0]},
{"id": "$_id", "A":"$A"},
null
]
}
},
"secondField":{"$push":"$B"}
},
{
"$project": {
"A":{"$setDifference":["$A", [null]]},
"B":"$B"
}
}])

One more option is to use $filter operator:
db.collection.aggregate([
{
$group : {
_id: null,
field: { $push: { id: "$_id", A : "$A"}},
secondField:{ $push: "$B" }
}
},
{
$project: {
field: {
$filter: {
input: "$field",
as: "item",
cond: { $gt: [ "$$item.A", 0 ] }
}
},
secondField: "$secondField"
}
}])
On first step you combine your array and filter them on second step

$group: {
_id: '$_id',
tasks: {
$addToSet: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.id', ''],
},
'',
],
},
then: '$$REMOVE',
else: {
id: '$tasks.id',
description: '$tasks.description',
assignee: {
$cond: {
if: {
$eq: [
{
$ifNull: ['$tasks.assignee._id', ''],
},
'',
],
},
then: undefined,
else: {
id: '$tasks.assignee._id',
name: '$tasks.assignee.name',
thumbnail: '$tasks.assignee.thumbnail',
status: '$tasks.assignee.status',
},
},
},
},
},
},
},
}

Related

MongoDb query to exclude omission of rows based on criteria

In below example, looking for new partner suggestions for user abc. abc has already sent a request to 123 so that can be ignored. rrr has sent request to abc but rrr is in the fromUser field so rrr is still a valid row to be shown as suggestion to abc
I have two collections:
User collection
[
{
_id: "abc",
name: "abc",
group: 1
},
{
_id: "xyz",
name: "xyyy",
group: 1
},
{
_id: "123",
name: "yyy",
group: 1
},
{
_id: "rrr",
name: "tttt",
group: 1
},
{
_id: "eee",
name: "uuu",
group: 1
}
]
Partnership collection (if users have already partnered)
[
{
_id: "abc_123",
fromUser: "abc",
toUser: "123"
},
{
_id: "rrr_abc",
fromUser: "rrr",
toUser: "abc"
},
{
_id: "xyz_rrr",
fromUser: "xyz",
toUser: "rrr"
}
]
My query below excludes the user rrr but it should not because its not listed in toUser field in the partnership collection corresponding to the user abc.
How to modify this query to include user rrr in this case?
db.users.aggregate([
{
$match: {
group: 1,
_id: {
$ne: "abc"
}
}
},
{
$lookup: {
from: "partnership",
let: {
userId: "$_id"
},
as: "prob",
pipeline: [
{
$set: {
users: [
"$fromUser",
"$toUser"
],
u: "$$userId"
}
},
{
$match: {
$expr: {
$and: [
{
$in: [
"$$userId",
"$users"
]
},
{
$in: [
"abc",
"$users"
]
}
]
}
}
}
]
}
},
{
$match: {
"prob.0": {
$exists: false
}
}
},
{
$sample: {
size: 1
}
},
{
$unset: "prob"
}
])
https://mongoplayground.net/p/utGMeHFRGmt
Your current query does not allow creating an existing connection regardless of the connection direction. If the order of the connection is important use:
db.users.aggregate([
{$match: {
group: 1,
_id: {$ne: "abc"}
}
},
{$lookup: {
from: "partnership",
let: { userId: {$concat: ["abc", "_", "$_id"]}},
as: "prob",
pipeline: [{$match: {$expr: {$eq: ["$_id", "$$userId"]}}}]
}
},
{$match: {"prob.0": {$exists: false}}},
{$sample: {size: 1}},
{$unset: "prob"}
])
See how it works on the playground example
For MongoDB 5 and later, I'd propose the following aggregation pipeline:
db.users.aggregate([
{
$match: {
group: 1,
_id: {
$ne: "abc"
}
}
},
{
$lookup: {
from: "partnership",
as: "prob",
localField: "_id",
foreignField: "toUser",
pipeline: [
{
$match: {
fromUser: "abc",
}
}
]
}
},
{
$match: {
"prob.0": {
$exists: false
}
}
},
{
$unset: "prob"
}
])
The following documents are returned (full result without the $sample stage):
[
{
"_id": "eee",
"group": 1,
"name": "uuu"
},
{
"_id": "rrr",
"group": 1,
"name": "tttt"
},
{
"_id": "xyz",
"group": 1,
"name": "xyyy"
}
]
The main difference is that the lookup connects the collections by the toUser field (see localField, foreignField) and uses a minimal pipeline to restrict the results further to only retrieve the requests from the current user document to "abc".
See this playground to test.
When using MongoDB < 5, you cannot use localField and foreignField to run the pipeline only on a subset of the documents in the * from*
collection. To overcome this, you can use this aggregation pipeline:
db.users.aggregate([
{
$match: {
group: 1,
_id: {
$ne: "abc"
}
}
},
{
$lookup: {
from: "partnership",
as: "prob",
let: {
userId: "$_id"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$fromUser",
"abc"
]
},
{
$eq: [
"$toUser",
"$$userId"
]
}
]
}
}
}
]
}
},
{
$match: {
"prob.0": {
$exists: false
}
}
},
{
$unset: "prob"
}
])
The results are the same as for the upper pipeline.
See this playground to test.
For another, another way, this query starts from the partnership collection, finds which users to exclude, and then does a "$lookup" for everybody else. The remainder is just output formatting, although it looks like you may want to add a "$sample" stage at the end.
db.partnership.aggregate([
{
"$match": {
"fromUser": "abc"
}
},
{
"$group": {
"_id": null,
"exclude": {"$push": "$toUser" }
}
},
{
"$lookup": {
"from": "users",
"let": {
"exclude": {"$concatArrays": [["abc"], "$exclude"]
}
},
"pipeline": [
{
"$match": {
"$expr": {
"$not": {"$in": ["$_id", "$$exclude"]}
}
}
}
],
"as": "output"
}
},
{
"$project": {
"_id": 0,
"output": 1
}
},
{"$unwind": "$output"},
{"$replaceWith": "$output"}
])
Try it on mongoplayground.net.

MongoDB - Aggregate get specific objects in an array

How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}
You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground
In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground
How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

MongoDB $lookup on nested document, limit and count the retrieved data

I would like to get a count of all notifications that aren't read by an User ("A", "B", "C", etc) for each subRoom. Taking into account that it could be millions of notifications documents and hundreds of subrooms elements in Rooms Collections, i need to limit it. For that reason I've limited the $lookup for first 100 elements and then check if that notifications have been read or not by an User. I did it using documents (roomId) in $lookup but I cant do it using subdocuments (subRoom.id).
Notifications Collection is indexed using a Compound of (roomId: 1, timestamp: -1)
Notifications Collection: (id corresponds to notification id and roomId is the link to Rooms collection)
[{
"_id": "XXX",
"id": "1",
"read": ["A", "B", "C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},{
"_id": "XXX",
"id": "2",
"read": ["C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},
...
]
Rooms Collection:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"image": "XXX",
"name": "XXX"
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"image": "XXX",
"name": "XXX"
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"name": "XXX"
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Code working using roomId instead subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{ $project: {_id: 0, id: 1, notRead: {
$size: {
$filter: {
input: "$notifications",
cond: {
$not: {
$in: [
"A",
"$$this.read"
]
}
}
}
}
},
}
Code NOT WORKING using subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$subRoom.id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{
$addFields: {
items: {
$map: {
input: { $zip: { inputs: ["$subRoom", "$messages"] } },
in: { $mergeObjects: "$$this" },
},
},
},
},
.
. projection
.
Expected Result:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"notRead": 50 //e.g
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"notRead": 35 //e.g
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"notRead": 5 //e.g
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Finally and very importantly, I want an scalable solution that can be done with big data.
Thank you very much in advance.
$unwind deconstruct subRoom array with preserve null and empty array property
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$addFields to get count to notifications using $sum
$group by _id and reconstruct the subRoom array with required fields in result
db.rooms.aggregate([
{
$unwind: {
path: "$subRoom",
preserveNullAndEmptyArrays: true
}
},
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $eq: ["$$id", "$roomId"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: null,
count: { $sum: 1 }
}
}
],
as: "subRoom.notRead"
}
},
{
$addFields: {
"subRoom.notRead": { $sum: "$subRoom.notRead.count" }
}
},
{
$group: {
_id: "$_id",
name: { $first: "$name" },
id: { $first: "$id" },
subRoom: { $push: "$subRoom" }
}
}
])
Playground
Second option without using $unwind stage,
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$map to iterate loop of subRoom array
$filter to iterate loop of return result from lookup notifications count and get current subRoom document
$let to declare a variable n and assign above filtered result to it and return $sum from count
$mergeObjects to merge current object of subRoom and new field notRead
db.rooms.aggregate([
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $in: ["$roomId", "$$id"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: "$roomId",
count: { $sum: 1 }
}
}
],
as: "notRead"
}
},
{
$project: {
id: 1,
name: 1,
subRoom: {
$map: {
input: "$subRoom",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
notRead: {
$let: {
vars: {
n: {
$filter: {
input: "$notRead",
cond: { $eq: ["$$this._id", "$$s.id"] }
}
}
},
in: { $sum: "$$n.count" }
}
}
}
]
}
}
}
}
}
])
Playground

Returning a document with two fields from the same array in MongoDB

Given documents such as
{
_id: 'abcd',
userId: '12345',
activities: [
{ status: 'login', timestamp: '10000001' },
{ status: 'logout', timestamp: '10000002' },
{ status: 'login', timestamp: '10000003' },
{ status: 'logout', timestamp: '10000004' },
]
}
I am trying to create a pipeline such as all users that have their latest login/logout activities recorded between two timestamps will be returned. For example, if the two timestamp values are between 10000002 and 10000003, the expected document should be
{
_id: 'abcd',
userId: '12345',
login: '10000003',
logout: '10000002'
}
Of if the two timestamp values are between -1 and 10000001, the expected document should be :
{
_id: 'abcd',
userId: '12345',
login: '10000001',
logout: null
}
Etc.
I know it has to do with aggregations, and I need to $unwind, etc., but I'm not sure about the rest, namely evaluating two fields from the same document array
You can try below aggregation:
db.col.aggregate([
{
$unwind: "$activities"
},
{
$match: {
$and: [
{ "activities.timestamp": { $gte: "10000001" } },
{ "activities.timestamp": { $lte: "10000002" } }
]
}
},
{
$sort: {
"activities.timestamp": -1
}
},
{
$group: {
_id: "$_id",
userId: { $first: "$userId" },
activities: { $push: "$activities" }
}
},
{
$addFields: {
login: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "login" ] } } } , 0 ] },
logout: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "logout" ] } } } , 0 ] }
}
},
{
$project: {
_id: 1,
userId: 1,
login: { $ifNull: [ "$login.timestamp", null ] },
logout: { $ifNull: [ "$logout.timestamp", null ] }
}
}
])
We need to use $unwind + $sort + $group to make sure that our activities will be sorted by timestamp. After $unwind you can use $match to apply filtering condition. Then you can use $filter with $arrayElemAt to get first (latest) value of filtered array. In the last $project you can explicitly use $ifNull (otherwise JSON key will be skipped if there's no value)
You can use below aggregation
Instead of $unwind use $lte and $gte with the $fitler aggregation.
db.collection.aggregate([
{ "$project": {
"userId": 1,
"login": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "login"] }
]
}
}
}
},
"logout": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "logout"] }
]
}
}
}
}
}}
])

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])
you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.
i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]