Perform $group and count in mongoDB aggregation - mongodb

Given that I have a complex grouping requirement, I was wondering what would be the best approach to achieving my desired result.
My data (result of $project stage) would look something like this:
{
_id:$id
status:"available"
inspectionStatus:"done"
state:"completed"
category:"One"
},
{
_id:$id
status:"booked"
inspectionStatus:"none"
state:"active"
category:"Two"
},
.
.
.
I have tried using $facet to create multiple buckets since the grouping I am trying to create are aggregations of $status + $state + $inspection, but the execution time is way unacceptable, taking something around 1639763842 milliseconds.
I can't use use $accumulator because of mongoDB version (although we can always upgrade to 4.4.x) but I am not sure whether using $accumulator would produce a better response time.
The $facet stage is included:
{
"available": [
{"$match":
{$and: [
{"status": "available"},
{"inspectionStatus": "done"}
]}
}
],
"matched": [
{"$match":
{$and: [
{"status": "booked"},
{"state": "booked"}
]
}
}
],
"inIntake": [
{"$match":
{$and: [
{"status": "available"},
{"inspectionStatus": {$ne: "done"}}
]
}
}
],
"active": [
{"$match":
{$and: [
{"status": "booked"},
{"state": "active"}
]
}
}
],
"unreturned":[
{"$match":
{"status": "forceCompleted"}
}
]
}

If you really want to push the logic to the DB, here's a solution -- but you still have to examine the XX field doc by doc:
db.foo.aggregate([
{$addFields: {XX: {$switch: {
branches: [
{ case: {
$and: [{$eq:["$status","available"]},{$eq:["$inspectionStatus","done"]}]
}, then:'AVAILABLE' },
{ case: {
$and: [{$eq:["$status","booked"]},{$eq:["$state","booked"]}]
}, then:'MATCHED' },
{ case: {
$and: [{$eq:["$status","available"]},{$ne:["$inspectionStatus","done"]}]
}, then:'IN_INTAKE' },
{ case: {
$and: [{$eq:["$status","booked"]},{$eq:["$state","active"]}]
}, then:'ACTIVE' },
{ case: {
$eq:["$status","forceCompleted"]
}, then:'UNRETURNED' },
],
default: null
}}
}}
,{$match: {XX: {$ne: null}}}
]);
The end-to-end timing on this is actually a bunch of millis better than simple find() because less material is transferred but of course the DB engine is working a little harder processing the pipeline.

Related

How to update a property of the last object of a list in mongo

I would like to update a property of the last objet stored in a list in mongo. For performance reasons, I can not pop the object from the list, then update the property, and then put the objet back. I can not either change the code design as it does not depend on me. In brief am looking for a way to select the last element of a list.
The closest I came to get it working was to use arrayFilters that I found doing research on the subject (mongodb core ticket: https://jira.mongodb.org/browse/SERVER-27089):
db.getCollection("myCollection")
.updateOne(
{
_id: ObjectId('638f5f7fe881c670052a9d08')
},
{
$set: {"theList.$[i].propertyToUpdate": 'NewValueToAssign'}
},
{
arrayFilters: [{'i.type': 'MyTypeFilter'}]
}
)
I use a filter to only update the objets in theList that have their property type evaluated as MyTypeFilter.
What I am looking for is something like:
db.getCollection("maCollection")
.updateOne(
{
_id: ObjectId('638f5f7fe881c670052a9d08')
},
{
$set: {"theList.$[i].propertyToUpdate": 'NewValueToAssign'}
},
{
arrayFilters: [{'i.index': -1}]
}
)
I also tried using "theList.$last.propertyToUpdate" instead of "theList.$[i].propertyToUpdate" but the path is not recognized (since $last is invalid)
I could not find anything online matching my case.
Thank you for your help, have a great day
You want to be using Mongo's pipelined updates, this allows us to use aggregation operators within the update body.
You do however need to consider edge cases that the previous answer does not. (null list, empty list, and list.length == 1)
Overall it looks like so:
db.collection.update({
_id: ObjectId("638f5f7fe881c670052a9d08")
},
[
{
$set: {
list: {
$concatArrays: [
{
$cond: [
{
$gt: [
{
$size: {
$ifNull: [
"$list",
[]
]
}
},
1
]
},
{
$slice: [
"$list",
0,
{
$subtract: [
{
$size: "$list"
},
1
]
}
]
},
[]
]
},
[
{
$mergeObjects: [
{
$ifNull: [
{
$last: "$list"
},
{}
]
},
{
propertyToUpdate: "NewValueToAssign"
}
]
}
]
]
}
}
}
])
Mongo Playground
One option is to use update with pipeline:
db.collection.update(
{_id: ObjectId("638f5f7fe881c670052a9d08")},
[
{$set: {
theList: {
$concatArrays: [
{$slice: ["$theList", 0, {$subtract: [{$size: "$theList"}, 1]}]},
[{$mergeObjects: [{$last: "$theList"}, {propertyToUpdate: "NewValueToAssign"}]}]
]
}
}}
]
)
See how it works on the playground example

SQL equivalent queries in MongoDB

I'm converting my SQL queries into MongoDB. I have the following SQL statement which checks an input and perform the query accordingly.
(score_min = -1 OR Scores BETWEEN score_min AND score_max)
Scores is a field name, which I want to filter
Still couldn't figure out the best way to perform this query in a MongoDB aggregate pipeline. Any suggestions please?
You can achieve the behaviour with a $expr in a simple find. Not a must to use aggregation pipeline.
db.collection.find({
$expr: {
$or: [
{
// $eq: [<score_min>, -1]
$eq: [
<score_min>,
-1
]
},
{
$and: [
{
// $gte: ["$Scores", <score_min>]
$gte: [
"$Scores",
<score_min>
]
},
{
$lte: [
// $gte: ["$Scores", <score_max>]
"$Scores",
<score_max>
]
}
]
}
]
}
})
Here is the Mongo playground for your reference.

Parallel processing of MongoDB data. Data collision

I use mongodb DB.
The problem: There are n parallel processes, each of them takes documents with query {data_processed: {$exists: false}}, processes them and updates setting {data_processed: true}. When I run all n processes, sometimes the same document appears on two or more different processes.
I think I can use something like this on query to prevent collision.
each process have id from 1 to n
for process with id i, get these documents
{
data_processed: {$exists: false},
_id: {mod_n: i}
}
where mod_n is Modulo operation on i
I use bson default ObjectId as _id, so I think it is possible to do something like this.
How can I implement this query ? Or can you suggest better way to solve this problem.
It seems like there's no easy way to convert ObjectId to long to perform modulo operation. Alternatively you can distribute your processing using simple string comparison for last character of _id or few last characters if you need more threads,
For instance if you want to run your processing using 4 processes you can try following queries:
db.col.aggregate([ { $match: { $expr: { $in: [ { $substr: [ { $toString: "$_id" }, 23, 1 ] }, [ "0", "1", "2", "3" ] ] } } } ])
...
db.col.aggregate([ { $match: { $expr: { $in: [ { $substr: [ { $toString: "$_id" }, 23, 1 ] }, [ "c", "d", "e", "f" ] ] } } } ])
This can scale to a higher number of processes, if you need more than 16 just take last two characters like:
db.col.aggregate([ { $match: { $expr: { $in: [ { $substr: [ { $toString: "$_id" }, 22, 2 ] }, [ "00", "01" ] ] } } } ])
Load should be distributed more or less evenly since last three characters represent
3-byte counter, starting with a random value.

Multiple $and $or in a MongoDb find

I have a query I can't figure out. That query returns nothing:
const read = await db.collection("action_traces")
.find(
{ $and: [
{ $or:
[
{"receipt.receiver": "newdexpocket"},
{"act.account": "newdexpocket"}
]
},
{ $and:
[
{"block_time":{ "$gte": "2018-10-02T00:00:00.000Z" } },
{"block_time":{ "$lt": "2018-10-03T00:00:00.000Z" } }
]
}
]
})
Whereas the following request returns part of the results I want:
const read = await db.collection("action_traces")
.find(
{ $and: [
{ $or:
[
{"receipt.receiver": "newdexpocket"},
{"act.account": "newdexpocket"}
]
},
{"block_time":{ "$lt": 2018-10-03T00:00:00.000Z } }
]
})
The issue is simple, how can get rid of the documents that are older than 2018-10-02T00:00:00.000Z? What am I doing wrong in my first request?
Many thanks in advance,
The and between the critieria should be implied, the below query should be equivalent to what you're trying to achieve (if I understood it right). Please test it.
db.collection("action_traces")
.find({ $or:[
{"receipt.receiver": "newdexpocket"},
{"act.account": "newdexpocket"}
],
"block_time":{ "$gte": "2018-10-02T00:00:00.000Z" ,"$lt": "2018-10-03T00:00:00.000Z" }
})

Aggregation with update in mongoDB

I've a collection with many similar structured document, two of the document looks like
Input:
{
"_id": ObjectId("525c22348771ebd7b179add8"),
"cust_id": "A1234",
"score": 500,
"status": "A"
"clear": "No"
}
{
"_id": ObjectId("525c22348771ebd7b179add9"),
"cust_id": "A1234",
"score": 1600,
"status": "B"
"clear": "No"
}
By default the clear for all document is "No",
Req: I have to add the score of all documents with same cust_id, provided they belong to status "A" and status "B". If the score exceeds 2000 then I have to update the clear attribute to "Yes" for all of the document with the same cust_id.
Expected output:
{
"_id": ObjectId("525c22348771ebd7b179add8"),
"cust_id": "A1234",
"score": 500,
"status": "A"
"clear": "Yes"
}
{
"_id": ObjectId("525c22348771ebd7b179add9"),
"cust_id": "A1234",
"score": 1600,
"status": "B"
"clear": "Yes"
}
Yes because 1600+500 = 2100, and 2100 > 2000.
My Approach:
I was only able to get the sum by aggregate function but failed at updating
db.aggregation.aggregate([
{$match: {
$or: [
{status: 'A'},
{status: 'B'}
]
}},
{$group: {
_id: '$cust_id',
total: {$sum: '$score'}
}},
{$match: {
total: {$gt: 2000}
}}
])
Please suggest me how do I proceed.
After a lot of trouble, experimenting mongo shell I've finally got a solution to my question.
Psudocode:
# To get the list of customer whose score is greater than 2000
cust_to_clear=db.col.aggregate(
{$match:{$or:[{status:'A'},{status:'B'}]}},
{$group:{_id:'$cust_id',total:{$sum:'$score'}}},
{$match:{total:{$gt:500}}})
# To loop through the result fetched from above code and update the clear
cust_to_clear.result.forEach
(
function(x)
{
db.col.update({cust_id:x._id},{$set:{clear:'Yes'}},{multi:true});
}
)
Please comment, if you have any different solution for the same question.
With Mongo 4.2 it is now possible to do this using update with aggregation pipeline. The example 2 has example how you do conditional updates:
db.runCommand(
{
update: "students",
updates: [
{
q: { },
u: [
{ $set: { average : { $avg: "$tests" } } },
{ $set: { grade: { $switch: {
branches: [
{ case: { $gte: [ "$average", 90 ] }, then: "A" },
{ case: { $gte: [ "$average", 80 ] }, then: "B" },
{ case: { $gte: [ "$average", 70 ] }, then: "C" },
{ case: { $gte: [ "$average", 60 ] }, then: "D" }
],
default: "F"
} } } }
],
multi: true
}
],
ordered: false,
writeConcern: { w: "majority", wtimeout: 5000 }
}
)
Another example:
db.c.update({}, [
{$set:{a:{$cond:{
if: {}, // some condition
then:{} , // val1
else: {} // val2 or "$$REMOVE" to not set the field or "$a" to leave existing value
}}}}
]);
You need to do this in two steps:
Identify customers (cust_id) with a total score greater than 200
For each of these customers, set clear to Yes
You already have a good solution for the first part. The second part should be implemented as a separate update() calls to the database.
Psudocode:
# Get list of customers using the aggregation framework
cust_to_clear = db.col.aggregate(
{$match:{$or:[{status:'A'},{status:'B'}]}},
{$group:{_id:'$cust_id', total:{$sum:'$score'}}},
{$match:{total:{$gt:2000}}}
)
# Loop over customers and update "clear" to "yes"
for customer in cust_to_clear:
id = customer[_id]
db.col.update(
{"_id": id},
{"$set": {"clear": "Yes"}}
)
This isn't ideal because you have to make a database call for every customer. If you need to do this kind of operation often, you might revise your schema to include the total score in each document. (This would have to be maintained by your application.) In this case, you could do the update with a single command:
db.col.update(
{"total_score": {"$gt": 2000}},
{"$set": {"clear": "Yes"}},
{"multi": true}
)
Short Answer: To avoid looping a Database query, just add $merge to the end and specify your collection like so:
db.aggregation.aggregate([
{$match: {
$or: [
{status: 'A'},
{status: 'B'}
]
}},
{$group: {
_id: '$cust_id',
total: {$sum: '$score'}
}},
{$match: {
total: {$gt: 2000}
}},
{ $merge: "<collection name here>"}
])
Elaboration: The current solution is looping through a database query, which is not good time efficiency wise and also a lot more code.
Mitar's answer is not updating through an aggregation, but the opposite => using an aggregation within Mongo's update. If your wondering what is a pro in doing it this way, well you can use all of the aggregation pipeline as opposed to being restricted to only a few as specified in their documentation.
Here is an example of an aggregate that won't work with Mongo's update:
db.getCollection('foo').aggregate([
{ $addFields: {
testField: {
$in: [ "someValueInArray", '$arrayFieldInFoo']
}
}},
{ $merge : "foo" }]
)
This will output the updated collection with a new test field that will be true if "someValueInArray" is in "arrayFieldInFoo" or false otherwise. This is NOT possible currently with Mongo.update since $in cannot be used inside update aggregate.
Update: Changed from $out to $merge since $out would only work if updating the entire collection as $out replaces entire collection with the result of the aggregate. $merge will only overrite if the aggregate matches a document (much safer).
In MongoDB 2.6., it will be possible to write the output of aggregation query, with the same command.
More information here : http://docs.mongodb.org/master/reference/operator/aggregation/out/
The solution which I found is using "$out"
*) e.g adding a field :
db.socios.aggregate(
[
{
$lookup: {
from: 'cuotas',
localField: 'num_socio',
foreignField: 'num_socio',
as: 'cuotas'
}
},
{
$addFields: { codigo_interno: 1001 }
},
{
$out: 'socios' //Collection to modify
}
]
)
*) e.g modifying a field :
db.socios.aggregate(
[
{
$lookup: {
from: 'cuotas',
localField: 'num_socio',
foreignField: 'num_socio',
as: 'cuotas'
}
},
{
$set: { codigo_interno: 1001 }
},
{
$out: 'socios' //Collection to modify
}
]
)