How to get conditional fields in Mongodb - mongodb

Hello good developers,
I am new to MongoDB and trying to fetch conditional data for my requirements.
I have the following collection:
[
{
"id":10001,
"name":"Test 1",
"status":"live",
"traffics":[
{
"id":"1a3s5d435a4sd",
"status":"",
},
{
"id":"1a3s5d44as54d35a4sd",
"status":"CMP",
},
{
"id":"a3s5d454asd34asd",
"status":"",
},
{
"id":"1a35sd45a4sd34asd3",
"status":"TERM",
},
{
"id":"as35d435a4sd354as3d43asd4",
"status":"CMP",
},
{
"id":"135as4d5a4sd354a3s5d43asd",
"status":"CMP",
},
{
"id":"123as1d31a3d12ads13as",
"status":"TERM",
}
]
},
{...},{...}
]
I want to get data like these
ID, Name, count traffics as Starts, count (traffics where status = "CMP") as completes, count (traffics where status = "TERM") as Terminates, count (traffics where status = "") as Abandons
I am trying to run following command
db.inventory.aggregate( { $project: {id: 1, status: 1, starts: {$size: "$traffics"}, _id: 0}})
but I don't know how to get conditional data in there

Take a try to this code. The $filter operator is available from Mongodb 3.2 version.
db.inventory.aggregate( [
{ $project: {id: 1, status: 1, starts: {$size: "$traffics"},
completes:{$size: {$filter:{input:"$traffics",as:"item",cond:{$eq:[$$item.status,"CMP"]}}}},
terminates:{$size: {$filter:{input:"$traffics",as:"item",cond:{$eq:[$$item.status,"TERM"]}}}},
abandons:{$size: {$filter:{input:"$traffics",as:"item",cond:{$eq:[$$item.status,""]}}}},
_id: 0}
}
] )
Hope this help

I was able to modify #Yones answer a little bit so that I can get counts of the records based on conditions.
so here's my query for this.
db.collection.aggregate({
$project: {
id: 1,
name: 1,
status: 1,
starts: {$size: "$traffics"},
completes: {
$size: {
$filter: {
input: "$traffics",
as: "item",
cond: {
$eq: [
"$$item.status",
"CMP"
]
}
}
}
},
terminates: {
$size: {
$filter: {
input: "$traffics",
as: "item",
cond: {
$eq: [
"$$item.status",
"TERM"
]
}
}
}
},
abandons: {
$size: {
$filter: {
input: "$traffics",
as: "item",
cond: {
$eq: [
"$$item.status",
""
]
}
}
}
},
_id: 0
}
})
I am simply Filtering out the records based on my conditions using $filter
And then I am calculating its size using $size.
Here's working example for this answer: https://mongoplayground.net/p/TcuLlJShclA

I think you may need to run independent aggregation statements and use a $match statement.
$match: {
status: "CMP"
}
$match: {
status: "TERM"
}
$match: {
status: ""
}

Related

Mongodb - Perform calculation with a just deleted value in aggregation pipeline

I have this document:
{
_id: ObjectId('asdu7329n'),
payments: [
{ _id: ObjectId('28sdf310'), paidAmount: 20 },
{ _id: ObjectId('2fsd410'), paidAmount: 15 },
{ _id: ObjectId('2fs32gd70'), paidAmount: 35 },
],
totalPaidAmount: 70
}
What I want is to re-calculate the totalPaidAmount field when a payment is removed, right now I'm deleting the payment in this way:
const query = { _id: ObjectId(saleId) };
const update = [
{ $set: { payments: { $filter: {
input: '$payments',
cond: {
$ne: [ "$$this._id", ObjectId(/* paymentId to delete */) ]
}
}}}}
]
await salesSchema.findOneAndUpdate(query, update);
I know that I have to use $subtract possibly in a second $set stage but how could I reference the paidAmount value from the object so that I can do something like this:
{
$set: {
totalPaidAmount: {
$subtract: [ '$totalPaidAmount', /* paidAmount value for the deleted payment */ ]
}
}
}
I know that I can just sum the paidAmount values for all the indexes of payments but what if there is like 1000 or more items? even if it doesn't hit the performance too much it seems to me more logical to take advantage of the totalPaidAmount field here.
If you want to subtract you can use the $filter:
db.collection.update(
{payments: {$elemMatch: {_id: ObjectId("63920f965d15e98e3d7c452c")}}},
[{$project: {
payments: {
$filter: {
input: "$payments",
cond: {$ne: ["$$this._id", ObjectId("63920f965d15e98e3d7c452c")]}
}
},
totalPaidAmount: {
$subtract: [
"$totalPaidAmount",
{$getField: {
input: {
$first: {
$filter: {
input: "$payments",
cond: {$eq: ["$$this._id", ObjectId("63920f965d15e98e3d7c452c")]}
}
}
},
field: "paidAmount"
}
}
]
}
}}
])
See how it works on the playground example
But I would go with the good old $sum:
db.collection.update(
{payments: {$elemMatch: {_id: ObjectId("63920f965d15e98e3d7c452c")}}},
[{$project: {
payments: {
$filter: {
input: "$payments",
cond: {$ne: ["$$this._id", ObjectId("63920f965d15e98e3d7c452c")]}
}
}
}},
{$set: {totalPaidAmount: {$sum: "$payments.paidAmount"}}}
])
See how it works on the playground example

Sort Mongodb documents by seeing if the _id is in another array

I have two collections - "users" and "follows". "Follows" simply contains documents with a "follower" field and a "followee" field that represent when a user follows another user. What I want to do is to be able to query the users but display the users that I (or whatever user is making the request) follow first. For example if I follow users "5" and "14", when I search the list of users, I want users "5" and "14" to be at the top of the list, followed by the rest of the users in the database.
If I were to first query all the users that I follow from the "Follows" collection and get an array of those userIDs, is there a way that I can sort by using something like {$in: [userIDs]}? I don't want to filter out the users that I do not follow, I simply want to sort the list by showing the users that I do follow first.
I am using nodejs and mongoose for this.
Any help would be greatly appreciated. Thank you!
Answer
db.users.aggregate([
{
$addFields: {
sortBy: {
$cond: {
if: {
$in: [ "$_id", [ 5, 14 ] ]
},
then: 0,
else: 1
}
}
}
},
{
$sort: {
sortBy: 1
}
},
{
$unset: "sortBy"
}
])
Test Here
If you don't want you on the list, then
db.users.aggregate([
{
$addFields: {
sortBy: {
$cond: {
if: {
$in: [ "$_id", [ 5, 14 ] ]
},
then: 0,
else: 1
}
}
}
},
{
$sort: {
sortBy: 1
}
},
{
$unset: "sortBy"
},
{
$match: {
"_id": { $ne: 1 }
}
}
])
Test Here
If you want to sort users first
db.users.aggregate([
{
$sort: {
_id: 1
}
},
{
$addFields: {
sortBy: {
$cond: {
if: {
$in: [
"$_id",
[
5,
14
]
]
},
then: 0,
else: 1
}
}
}
},
{
$sort: {
sortBy: 1,
}
},
{
$unset: "sortBy"
},
{
$match: {
"_id": {
$ne: 1
}
}
}
])
Test Here

Optimise MongoDB aggregate query performance

I have next DB structure:
Workspaces:
Key
Index
PK
id
id
content
Projects:
Key
Index
PK
id
id
FK
workspace
workspace_1
deleted
deleted_1
content
Items:
Key
Index
PK
id
id
FK
project
project_1
type
_type_1
deleted
deleted_1
content
I need to calculate a number of items of each type for each project in workspace, e.g. expected output:
[
{ _id: 'projectId1', itemType1Count: 100, itemType2Count: 50, itemType3Count: 200 },
{ _id: 'projectId2', itemType1Count: 40, itemType2Count: 100, itemType3Count: 300 },
....
]
After few attempts and some debugging I've created a query which provides output I needed:
const pipeline = [
{ $match: { workspace: 'workspaceId1' } },
{
$lookup: {
from: 'items',
let: { id: '$_id' },
pipeline: [
{
$match: {
$expr: {
$eq: ['$project', '$$id'],
},
},
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{ $project: { _id: 1, type: 1, deleted: 1 } },
],
as: 'items',
},
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{ $unwind: '$items' },
{ $match: { 'items.deleted': { $eq: false } } },
{
$group: {
_id: '$_id',
items: { $push: '$items' },
},
},
{
$project: {
_id: 1,
// Note: I have only 3 possible item types, so it's OK that it's names hardcoded.
itemType1Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type1'] },
},
},
},
itemType2Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type2'] },
},
},
},
itemType3Count: {
$size: {
$filter: {
input: '$items',
cond: { $eq: ['$$this.type', 'type3'] },
},
},
},
},
},
]
const counts = await Project.aggregate(pipeline)
Query works like expected, but very slow... If I have some about 1000 items in one workspace it takes about 8 seconds to complete. Any ideas how to make it faster are appreciated.
Thanks.
Assuming your indexs are properly indexed that they contain the "correct" fields, we can still have some tweaks on the query itself.
Approach 1: keeping existing collection schema
db.projects.aggregate([
{
$match: {
workspace: "workspaceId1"
}
},
{
$lookup: {
from: "items",
let: {id: "$_id"},
pipeline: [
{
$match: {
$expr: {
$and: [
{$eq: ["$project","$$id"]},
{$eq: ["$deleted",false]}
]
}
}
},
// project only fields necessary for later pipelines to not overload
// memory and to not get `exceeded memory limit for $group` error
{
$project: {
_id: 1,
type: 1,
deleted: 1
}
}
],
as: "items"
}
},
// Use $unwind here to optimize aggregation pipeline, see:
// https://stackoverflow.com/questions/45724785/aggregate-lookup-total-size-of-documents-in-matching-pipeline-exceeds-maximum-d
// Without $unwind we may get an `matching pipeline exceeds maximum document size` error.
// Error appears not in all requests and it's really strange and hard to debug.
{
$unwind: "$items"
},
{
$group: {
_id: "$_id",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
},
itemType2Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type2"]},
"then": 1,
"else": 0
}
}
},
itemType3Count: {
$sum: {
"$cond": {
"if": {$eq: ["$items.type","type1"]},
"then": 1,
"else": 0
}
}
}
}
}
])
There are 2 major changes:
moving the items.deleted : false condition into the $lookup subpipeline to lookup less items documents
skipped items: { $push: '$items' }. Instead, do a conditional sum in later $group stage
Here is the Mongo playground for your reference. (at least for the correctness of the new query)
Approach 2: If the collection schema can be modified. We can denormalize projects.workspace into the items collection like this:
{
"_id": "i1",
"project": "p1",
"workspace": "workspaceId1",
"type": "type1",
"deleted": false
}
In this way, you can skip the $lookup. A simple $match and $group will suffice.
db.items.aggregate([
{
$match: {
"deleted": false,
"workspace": "workspaceId1"
}
},
{
$group: {
_id: "$project",
itemType1Count: {
$sum: {
"$cond": {
"if": {$eq: ["$type","type1"]},
"then": 1,
"else": 0
}
}
},
...
Here is the Mongo playground with denormalized schema for your reference.

How to query an array and retrieve it from MongoDB

Updated:
I have a document on the database that looks like this:
My question is the following:
How can I retrieve the first 10 elements from the friendsArray from database and sort it descending or ascending based on the lastTimestamp value.
I don't want to download all values to my API and then sort them in Python because that is wasting my resources.
I have tried it using this code (Python):
listOfUsers = db.user_relations.find_one({'userId': '123'}, {'friendsArray' : {'$orderBy': {'lastTimestamp': 1}}}).limit(10)
but it just gives me this error pymongo.errors.OperationFailure: Unknown expression $orderBy
Any answer at this point would be really helpful! Thank You!
use aggregate
first unwind
then sort according timestap
group by _id to create sorted array
use addfields and filter for getting first 10 item of array
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
}// 10 is n first item
}
}
},
}
])
https://mongoplayground.net/p/2Usk5sRY2L2
and for pagination use this
db.collection.aggregate([
{ $match:{userId:"123"}},
{
"$unwind": "$friendsArray"
},
{
$sort: {
"friendsArray.lastTimeStamp": 1
}
},
{
$group: {
_id: "$_id",
friendsArray: {
$push: "$friendsArray"
}
},
},
{
$addFields: {
friendsArray: {
$filter: {
input: "$friendsArray",
as: "z",
cond: {
$and: [
{
$gt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
10
]
},
{
$lt: [
{
$indexOfArray: [
"$friendsArray",
"$$z"
]
},
20
]
},
]
}// 10 is n first item
}
}
},
}
])
The translation of your find to aggregation(we need unwind that why aggregation is used) would be like the bellow query.
Test code here
Query (for descending replace 1 with -1)
db.collection.aggregate([
{
"$match": {
"userId": "123"
}
},
{
"$unwind": {
"path": "$friendsArray"
}
},
{
"$sort": {
"friendsArray.lastTimeStamp": 1
}
},
{
"$limit": 10
},
{
"$replaceRoot": {
"newRoot": "$friendsArray"
}
}
])
If you want to skip some before limit add one stage also
{
"$skip" : 10
}
To take the 10-20 messages for example.

Retrieving a count that matches specified criteria in a $group aggregation

So I am looking to group documents in my collection on a specific field, and for the output results of each group, I am looking to include the following:
A count of all documents in the group that match a specific query (i.e. a count of documents that satisfy some expression { "$Property": "Value" })
The total number of documents in the group
(Bonus, as I suspect that this is not easily accomplished) Properties of a document that correspond to a $min/$max accumulator
I am very new to the syntax used to query in mongo and don't quite understand how it all works, but after some research, I've managed to get it down to the following query (please note, I am currently using version 3.0.12 for my mongo db, but I believe we will upgrade in a couple of months time):
db.getCollection('myCollection').aggregate(
[
{
$group: {
_id: {
GroupID: "$GroupID",
Status: "$Status"
},
total: { $sum: 1 },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$DateCreated" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
},
{
$group: {
_id: "$_id.GroupID",
Statuses: {
$push: {
Status: "$_id.Status",
Count: "$total"
}
},
TotalCount: { $sum: "$total" },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$EarliestCreatedDate" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
}
]
)
Essentially what I am looking to retrieve is the Count for specific Status values, and project them into one final result document that looks like the following:
{
GroupName,
EarliestCreatedDate,
EarliestCreatedBy,
LastModifiedDate,
LastModifiedBy,
TotalCount,
PendingCount,
ClosedCount
}
Where PendingCount and ClosedCount are the total number of documents in each group that have a status Pending/Closed. I suspect I need to use $project with some other expression to extract this value, but I don't really understand the aggregation pipeline well enough to figure this out.
Also the EarliestCreatedBy and LastModifiedBy are the users who created/modified the document(s) corresponding to the EarliestCreatedDate and LastModifiedDate respectively. As I mentioned, I think retrieving these values will add another layer of complexity, so if there is no practical solution, I am willing to forgo this requirement.
Any suggestions/tips would be very much appreciated.
You can try below aggregation stages.
$group
Calculate all the necessary counts TotalCount, PendingCount and ClosedCount for each GroupID
Calculate $min and $max for EarliestCreatedDate and LastModifiedDate respectively and push all the fields to CreatedByLastModifiedBy to be compared later for fetching EarliestCreatedBy and LastModifiedBy for each GroupID
$project
Project all the fields for response
$filter the EarliestCreatedDate value against the data in the CreatedByLastModifiedBy and $map the matching CreatedBy to the EarliestCreatedBy and $arrayElemAt to convert the array to object.
Similar steps for calculating LastModifiedBy
db.getCollection('myCollection').aggregate(
[{
$group: {
_id: "$GroupID",
TotalCount: {
$sum: 1
},
PendingCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Pending"]
},
then: 1,
else: 0
}
}
},
ClosedCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Closed "]
},
then: 1,
else: 0
}
}
},
GroupName: {
$first: "$GroupName"
},
EarliestCreatedDate: {
$min: "$DateCreated"
},
LastModifiedDate: {
$max: "$LastModifiedDate"
},
CreatedByLastModifiedBy: {
$push: {
CreatedBy: "$CreatedBy",
LastModifiedBy: "$LastModifiedBy",
DateCreated: "$DateCreated",
LastModifiedDate: "$LastModifiedDate"
}
}
}
}, {
$project: {
_id: 0,
GroupName: 1,
EarliestCreatedDate: 1,
EarliestCreatedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "CrBy",
cond: {
"$eq": ["$EarliestCreatedDate", "$$CrBy.DateCreated"]
}
}
},
as: "EaCrBy",
in: {
"$$EaCrBy.CreatedBy"
}
}
}, 0]
},
LastModifiedDate: 1,
LastModifiedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
cond: {
"$eq": ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
}
}
},
as: "LaMoBy",
in: {
"$$LaMoBy.LastModifiedBy"
}
}
}, 0]
},
TotalCount: 1,
PendingCount: 1,
ClosedCount: 1
}
}]
)
Update for Version < 3.2
$filter is also not available in your version. Below is the equivalent.
The comparison logic is the same and creates an array with for every non matching entry the value of false or LastModifiedBy otherwise.
Next step is to use $setDifference to compare the previous array values with array [false] which returns the elements that only exist in the first set.
LastModifiedBy: {
$setDifference: [{
$map: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
in: {
$cond: [{
$eq: ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
},
"$$MoBy.LastModifiedBy",
false
]
}
}
},
[false]
]
}
Add $unwind stage after $project stage to change to object
{$unwind:"$LastModifiedBy"}
Similar steps for calculating EarliestCreatedBy