MongoDB aggregate group by with condition - mongodb

Example documents
{
"user_id": "1",
"key": "key1",
"percent": 95
},
{
"user_id": "1",
"key": "key1",
"percent": 50
},
{
"user_id": "1",
"key": "key2",
"percent": 50
},
{
"user_id": "1",
"key": "key3",
"percent": 50
},
{
"user_id": "1",
"key": "key3",
"percent": 70
}
I want to extract document that percent less than 95.
When there are multiple documents with a specific key, that one is greater than 95 and one is less than 95, the document with that key must not be displayed. (In the above example, key1 corresponds. Because one is greater than 95 and the other is less than 95.)
And If all of multiple documents have a percentage below 95, the document with the highest percentage must be extracted.
Current query that I used
db.test.aggregate([
{
"$match": {
"percent": {
"$lte": 95
},
"user_id": {
"$eq": user_id,
},
}
},
{
"$group": {
"_id": "$key",
"max": {"$max": "$percent"}
}
},
{
"$project": {
"_id": 1,
"percent": "$max"
}
},
])
Result
{"key": "key1", "percent": 50}
{"key": "key2", "percent": 50}
{"key": "key3", "percent": 70}
According to the conditions described above, key1 should be excluded from the results because there are documents with a percentage of 95 or higher.
Expected result
{"key": "key2", "percent": 50}
{"key": "key3", "percent": 70}
Is this a problem that can be solved with MongoDB's query?
Thanks!

db.test.aggregate([
{
"$match": {
"user_id": {
"$eq": user_id,
},
}
},
{
"$group": {
"_id": "$key",
"max": {"$max": "$percent"}
}
},
{
"$match": {
"max": {
"$lte": 95
}
}
},
{
"$project": {
"_id": 1,
"percent": "$max"
}
},
])
After changing order of match query, it works perfectly.

Related

Compare duplicates by Score By Not Max Field or Earlier Date

I need to compare duplicated documents and get the duplicated ones with the Lowest Score.
If the Score between two duplicates is Equal, then get the one with earlier date.
{
"_id": UUID("c77c72de-edd8-4576-a72c-983cf93a0f31"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 5,
},
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("f1a063a5-f9dd-4998-b6aa-df2071dd8677"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 1
}
Later, the resulting documents will be deleted.
Expected Result:
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 2 // If both Scores Equal, Compare CreationDate earlier
}
Mongo Version 4.2.21
This would be easier with some of the newer "$group" accumulators introduced in more recent versions of MongoDB, but here's one way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$DocumentId",
"count": {"$sum": 1},
"docs": {"$push": "$$ROOT"}
}
},
{ // if only 1, keep it
"$match": {
"$expr": {"$gt": ["$count", 1]}
}
},
{ // find the doc to keep
"$set": {
"keepDoc": {
"$reduce": {
"input": "$docs",
"initialValue": {
"Score": {"$minKey": 1}
},
"in": {
"$switch": {
"branches": [
{
"case": {"$gt": ["$$this.Score", "$$value.Score"]},
"then": "$$this"
},
{
"case": {"$eq": ["$$this.Score", "$$value.Score"]},
"then": {
"$cond": [
{"$gt": ["$$this.CreationDate", "$$value.CreationDate"]},
"$$this",
"$$value"
]
}
}
],
"default": "$$value"
}
}
}
}
}
},
{ // get docs other than keepDoc
"$project": {
"_id": 0,
"expiredDocs": {
"$filter": {
"input": "$docs",
"cond": {"$ne": ["$$this", "$keepDoc"]}
}
}
}
},
{"$unwind": "$expiredDocs"},
{"$replaceWith": "$expiredDocs"}
])
Try it on mongoplayground.net.
N.B.: On mongoplayground.net, there's no easy way that I know of to enter binary UUID values in the BSON configuration, so I just used strings. It should be inconsequential to the pipeline.

How can I get only specific object from nested array mongodb

I'm using mongoDB with PHP. I know how to get the document based on product_id, but I need only a specific object from the whole document, I don't know how to get only specific object from nested array based on product_id.
for ex. My expected output is:
products": [
{
"product_id": 547,
"name": "cola",
"quantity": 24
}
]
then make some changes on object p.s update the quantity then update it to the database.
My collection looks like
"_id": {
"$oid": "62ed30855836b16fd38a00b9"
},
"name": "drink",
"products": [
{
"product_id": 547,
"name": "cola",
"quantity": 24
},
{
"product_id": 984,
"name": "fanta",
"quantity": 42
},
{
"product_id": 404,
"name": "sprite",
"quantity": 12
},
{
"product_id": 854,
"name": "water",
"quantity": 35
}
]
}
Try this:
db.getCollection('test').aggregate([
{
"$unwind": "$products"
},
{
"$match": {
"products.product_id": 547
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$$ROOT",
"$products"
]
}
}
},
{
"$project": {
"products": 0
}
}
])
The query gives the following output:
{
"name" : "cola",
"product_id" : 547,
"quantity" : 24
}

MongoDB groupby - pull comma separated values in a field

I want to groupby the collection and want to pull the comma-separated values.
In the below example, I want to group by "type" and want to pull $sum of "total" and all possible unique values of "value" in a field that should be comma seperated.
collection:
[
{
"type": "1",
"value": "value1",
"total": 10
},
{
"type": "1",
"value": "value3",
"total": 20
},
{
"type": "1",
"value": "value3",
"total": 30
},
{
"type": "2",
"value": "value1",
"total": 10
},
{
"type": "2",
"value": "value2",
"total": 20
}
]
The output that I am expecting:
[
{
"type": "1",
"value": "value1,value3",
"total": 60
},
{
"type": "2",
"value": "value1,value2",
"total": 30
}
]
Please help to provide the approach or code.
This can be achieved just with $group and $project aggregation methods:
https://mongoplayground.net/p/230nt_AMFIm
db.collection.aggregate([
{
$group: {
_id: "$type",
value: {
$addToSet: "$value"
},
total: {
$sum: "$total"
}
},
},
{
$project: {
_id: 0,
type: "$_id",
value: "$value",
total: "$total"
}
},
])

Group by an optional field in mongodb

I would like to independently group the results of an or clause, including overlap. The data set is rather large so running 2 queries sequentially will result in an undesirable wait time. I am hoping I can somehow project which clause returned the corresponding data. Given this data set:
[
{
"_id": 1,
"item": "abc",
"name": "Michael",
"price": NumberDecimal("10"),
"quantity": NumberInt("2"),
"date": ISODate("2014-03-01T08:00:00Z")
},
{
"_id": 2,
"item": "jkl",
"name": "Toby",
"price": NumberDecimal("20"),
"quantity": NumberInt("1"),
"date": ISODate("2014-03-01T09:00:00Z")
},
{
"_id": 3,
"item": "xyz",
"name": "Keith",
"price": NumberDecimal("5"),
"quantity": NumberInt("10"),
"date": ISODate("2014-03-15T09:00:00Z")
},
{
"_id": 4,
"item": "abc",
"name": "Dwight",
"price": NumberDecimal("5"),
"quantity": NumberInt("20"),
"date": ISODate("2014-04-04T11:21:39.736Z")
},
{
"_id": 5,
"item": "abc",
"name": "Ryan",
"price": NumberDecimal("10"),
"quantity": NumberInt("10"),
"date": ISODate("2014-04-04T21:23:13.331Z")
},
{
"_id": 6,
"item": "def",
"name": "Jim",
"price": NumberDecimal("7.5"),
"quantity": NumberInt("5"),
"date": ISODate("2015-06-04T05:08:13Z")
},
{
"_id": 7,
"item": "abc",
"name": "Keith",
"price": NumberDecimal("7.5"),
"quantity": NumberInt("10"),
"date": ISODate("2015-09-10T08:43:00Z")
},
{
"_id": 8,
"item": "abc",
"name": "Michael",
"price": NumberDecimal("10"),
"quantity": NumberInt("5"),
"date": ISODate("2016-02-06T20:20:13Z")
},
]
I would like to receive this result:
[{
"_id": {
"name": "Keith"
},
"count": 2
},
{
"_id": {
"item": "abc",
},
"count": 5
}]
Here is what I have tried so far:
db.collection.aggregate([
{
$match: {
$or: [
{
item: "abc"
},
{
name: "Keith"
}
]
}
},
{
$group: {
_id: {
item: "$item",
name: "$name"
},
count: {
$sum: 1
}
}
}
])
You can use $facet to get multiple aggregation pipelines into the same stage in this way:
Using $facet there are two "outputs" one group by name and other by item.
In each one there are multiple stages:
First $match to process only documents you want.
Then $group with _id name or item, and $count to get the total.
db.collection.aggregate([
{
"$facet": {
"groupByName": [
{
"$match": {"name": "Keith"}
},
{
"$group": {"_id": "$name","count": {"$sum": 1}}
}
],
"groupByItem": [
{
"$match": {"item": "abc"}
},
{
"$group": {"_id": "$item","count": {"$sum": 1}}
}
]
}
}
])
Example here
The output is:
{
"groupByItem": [
{
"_id": "abc",
"count": 5
}
],
"groupByName": [
{
"_id": "Keith",
"count": 2
}
]
}
Here it is:
mongos> db.n.aggregate([ { $facet:{ names:[ {$match:{name:"Keith"}} , {$group:{_id:{name:"$name"}, count:{$sum:1}}} ] , items:[ {$match:{item:"abc"}},{ $group:{_id:{item:"$item"}, count:{$sum:1}} } ] } } , {$project:{ "namesANDitems":{$concatArrays:[ "$names","$items" ]} }} ,{$unwind:"$namesANDitems"} ,{$replaceRoot:{newRoot:"$namesANDitems"} } ]).pretty()
{ "_id" : { "name" : "Keith" }, "count" : 2 }
{ "_id" : { "item" : "abc" }, "count" : 5 }
mongos>
explained:
You create two pipes via $facet
Match in every facet pipe what you need to group pipe1=names , pipe2=items
Join the arrays from the two pipes in single array named "namesANDitems"
Convert the array to object with $unwind
Remove the temporary object name namesANDitems so you have only the two objects as requested

MongoDB projection. Operator $add field|expression array awareness or after $slice

I've got collection that looks like:
[{
"org": "A",
"type": "simple",
"payFor": 3,
"price": 100
},
{
"org": "A",
"type": "custom",
"payFor": 2,
"price": 115
},
{
"org": "B",
"type": "simple",
"payFor": 1,
"price": 110
},
{
"org": "B",
"type": "custom",
"payFor": 2,
"price": 200
},
{
"org": "B",
"type": "custom",
"payFor": 4,
"price": 220
}]
And need to produce result with query to perform group by "org" where payments appears for only first "payFor" prices in "type".
I'm trying to use expression result by $slice operator in $add but this is not works.
pipeline:
[{
"$group": {
"_id": {
"org": "$org",
"type": "$type"
},
"payFor": {
"$max": "$payFor"
},
"count": {
"$sum": 1
},
"prices": {
"$push": "$price"
}
}
},
{
"$group": {
"_id": "$_id.org",
"payments": {
"$push": {
"type": "$_id.type",
"forFirst": "$payFor",
"sum": {
"$cond": [
{
"$gte": [
"$payFor",
"$count"
]
},
{
"$add": {
"$prices": {
"$slice": "$count"
}
}
},
{
"$add": "$prices"
}
]
}
}
}
}
}]
I know that it is possible to traverse unwinded prices and pick only "payFor" count of them. but result collections are more rich than in example above and this operation will produce some unecessary overheads.
Need some advice from community. Please. Thanks.