MongoDB Ignoring duplicate documents using unique key in aggregate - mongodb

Documents looks like this.
{
"sId": "s1",
"language": "hindi",
"service": "editing",
"count": 5,
},
{
"sId": "s2",
"language": "hindi",
"service": "editing",
"count": 6,
},
{
"sId": "s2",
"language": "hindi",
"service": "reading",
"count": 6,
},
{
"sId": "s3",
"language": "english",
"service": "reading",
"count": 10,
}
I want the result should be like this
{
"language":"hindi",
"count": 11
},
{
"language":"english",
"count": 10
}
I tried with the aggregate query like this
{
"$group": {
"_id": {
"lang": "$language",
"sId": "$sId"
},
"count": {"$sum": "$count"}
}
}
In sId: s2 it should ignore the second object.
Can anyone please give me a hint how I can achieve the above.

You can use $first to get the first element of each group. You can then use $group again to sum by language.
{
"$group": {
"_id": {
"language": "$language",
"sId": "$sId"
},
"count": {
"$first": {
"$sum": "$count"
}
}
}
}
https://mongoplayground.net/p/3_RjSt1wtRS

Related

Aggregate occurrences of events in nested array

Given the following input:
[
{
"statuses": [
{
"status": "allowed",
"count": 3,
"events_count": [
"2001",
"1001",
"1001"
]
}
],
"date": "2022-09-10 15:00",
"_id": "2022-09-10 15:00"
}
]
I need count the number of occurrences of stauses.events_count, so the output would be:
[
{
"statuses": [
{
"status": "allowed",
"count": 3,
"events_count": [
{"type": "2001", "count": 1},
{"type": "1001", "count": 2},
]
}
],
"date": "2022-09-10 15:00",
"_id": "2022-09-10 15:00"
}
]
What I've tried
This is what I got so far:
db.collection.aggregate([
{
"$unwind": "$statuses"
},
{
"$unwind": "$statuses.events_count"
},
{
"$group": {
"_id": {
"event_count": "$statuses.events_count",
"status": "$statuses.status",
"date": "$date",
"count": "$statuses.count"
},
"occurences": {
"$sum": 1
}
}
}
])
Which produces:
[
{
"_id": {
"count": 3,
"date": "2022-09-10 15:00",
"event_count": "2001",
"status": "allowed"
},
"occurences": 1
},
{
"_id": {
"count": 3,
"date": "2022-09-10 15:00",
"event_count": "1001",
"status": "allowed"
},
"occurences": 2
}
]
I'm having difficulties grouping everything back together. I tried grouping by date and pushing back to a 'statuses' array, but it produces two items in the array (with status==allowed), rather than 1 item with status==allowed
You did 2 $unwinds, so it should be 2 $groups in reverse order:
{
"$group": {
"_id": {
"status": "$_id.status",
"count": "$_id.count",
"date": "$_id.date"
},
"event_count": {
"$push": {
"type": "$_id.event_count",
"count": "$occurences"
}
}
}
},
{
"$group": {
"_id": "$_id.date",
"date": {"$last": "$_id.date"},
"statuses": {
"$push": {
"status": "$_id.status",
"count": "$_id.count",
"event_count": "$event_count"
}
}
}
}

Compare duplicates by Score By Not Max Field or Earlier Date

I need to compare duplicated documents and get the duplicated ones with the Lowest Score.
If the Score between two duplicates is Equal, then get the one with earlier date.
{
"_id": UUID("c77c72de-edd8-4576-a72c-983cf93a0f31"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 5,
},
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("f1a063a5-f9dd-4998-b6aa-df2071dd8677"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 1
}
Later, the resulting documents will be deleted.
Expected Result:
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 2 // If both Scores Equal, Compare CreationDate earlier
}
Mongo Version 4.2.21
This would be easier with some of the newer "$group" accumulators introduced in more recent versions of MongoDB, but here's one way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$DocumentId",
"count": {"$sum": 1},
"docs": {"$push": "$$ROOT"}
}
},
{ // if only 1, keep it
"$match": {
"$expr": {"$gt": ["$count", 1]}
}
},
{ // find the doc to keep
"$set": {
"keepDoc": {
"$reduce": {
"input": "$docs",
"initialValue": {
"Score": {"$minKey": 1}
},
"in": {
"$switch": {
"branches": [
{
"case": {"$gt": ["$$this.Score", "$$value.Score"]},
"then": "$$this"
},
{
"case": {"$eq": ["$$this.Score", "$$value.Score"]},
"then": {
"$cond": [
{"$gt": ["$$this.CreationDate", "$$value.CreationDate"]},
"$$this",
"$$value"
]
}
}
],
"default": "$$value"
}
}
}
}
}
},
{ // get docs other than keepDoc
"$project": {
"_id": 0,
"expiredDocs": {
"$filter": {
"input": "$docs",
"cond": {"$ne": ["$$this", "$keepDoc"]}
}
}
}
},
{"$unwind": "$expiredDocs"},
{"$replaceWith": "$expiredDocs"}
])
Try it on mongoplayground.net.
N.B.: On mongoplayground.net, there's no easy way that I know of to enter binary UUID values in the BSON configuration, so I just used strings. It should be inconsequential to the pipeline.

Combining unique elements of arrays without $unwind

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]

sort a pushed array in mongodb

I have a mongodb result like this.
[
{
"_id": {
"_id": "57174838afb8eb97ccd409ca",
"name": "Yet another position",
"description": "description",
"code": "11Y-WK"
},
"votes": [
{
"candidate": {
"_id": "56f19694e84a6bf1b66ad378",
"surname": "XXXXXXXXX",
"firstName": "XXXXXXXXX",
"middleName": "XXXXXXXXX",
"othername": " XXXXXXXXX XXXXXXXXX",
"sc_number": "071050"
},
"count": 3
},
{
"candidate": {
"_id": "56f19690e84a6bf1b66aa558",
"surname": "XXXXXXXXX",
"othername": "XXXXXXXXX XXXXXXXXX",
"sc_number": "034837"
},
"count": 2
},
{
"candidate": {
"_id": "56f19690e84a6bf1b66aa2f3",
"surname": "XXXXXXXXX",
"othername": "XXXXXXXXX XXXXXXXXX",
"sc_number": "008243"
},
"count": 4
}
],
"total_count": 9
},
{
"_id": {
"_id": "571747a8afb8eb97ccd409c7",
"name": "Test Position",
"description": "Description",
"code": "10T-9K"
},
"votes": [
{
"candidate": {
"_id": "56f19690e84a6bf1b66aa3b7",
"surname": "XXXXXXXXX",
"othername": "XXXXXXXXX",
"sc_number": "044660"
},
"count": 1
},
{
"candidate": {
"_id": "56f19690e84a6bf1b66aa6ea",
"surname": "XXXXXXXXX",
"othername": "XXXXXXXXX",
"sc_number": "062444"
},
"count": 5
},
{
"candidate": {
"_id": "56f1968fe84a6bf1b66aa03e",
"surname": "XXXXXXXXX",
"othername": "XXXXXXXXX",
"sc_number": "042357"
},
"count": 3
}
],
"total_count": 9
}
]
I need to sort by count in descending order.
Below is the query that returns the result above.
I've tried applying sort at all stages but with no luck.
Vote.aggregate([
{ "$match": { "_poll" : mongoose.mongo.ObjectID(req.query._poll) } },
{
"$group": {
"_id": {
"_position": '$_position',
"candidate": '$candidate'
},
"voteCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id._position",
"votes": {
"$push": {
"candidate": "$_id.candidate",
"count": "$voteCount"
}
},
"total_count": { "$sum": "$voteCount" }
}
},
{ "$sort": { "total_count": -1 } }
Where I need to insert sort operation, to sort by count in descending order.
To sort an array during aggregation it is a bit tricky stuff.
So what I am duinfg to solve that is:
unwinding array
apply sort
regroup array
please find mongo shell code which will should give an overview ot this technique:
var group = {$group:{_id:"$type", votes:{$push:"$value" }}}
var unwind={$unwind:"$votes"}
var sort = {$sort:{"votes":-1}}
var reGroup = {$group:{_id:"$_id", votes:{$push:"$votes" }}}
db.readings.aggregate([group,unwind,sort ,reGroup])
any comments welcome!

MongoDB projection. Operator $add field|expression array awareness or after $slice

I've got collection that looks like:
[{
"org": "A",
"type": "simple",
"payFor": 3,
"price": 100
},
{
"org": "A",
"type": "custom",
"payFor": 2,
"price": 115
},
{
"org": "B",
"type": "simple",
"payFor": 1,
"price": 110
},
{
"org": "B",
"type": "custom",
"payFor": 2,
"price": 200
},
{
"org": "B",
"type": "custom",
"payFor": 4,
"price": 220
}]
And need to produce result with query to perform group by "org" where payments appears for only first "payFor" prices in "type".
I'm trying to use expression result by $slice operator in $add but this is not works.
pipeline:
[{
"$group": {
"_id": {
"org": "$org",
"type": "$type"
},
"payFor": {
"$max": "$payFor"
},
"count": {
"$sum": 1
},
"prices": {
"$push": "$price"
}
}
},
{
"$group": {
"_id": "$_id.org",
"payments": {
"$push": {
"type": "$_id.type",
"forFirst": "$payFor",
"sum": {
"$cond": [
{
"$gte": [
"$payFor",
"$count"
]
},
{
"$add": {
"$prices": {
"$slice": "$count"
}
}
},
{
"$add": "$prices"
}
]
}
}
}
}
}]
I know that it is possible to traverse unwinded prices and pick only "payFor" count of them. but result collections are more rich than in example above and this operation will produce some unecessary overheads.
Need some advice from community. Please. Thanks.