Find duplicate inside an array mongodb - mongodb

I have a Mongo Collection called Users and structured structured like this
_id: '1234aaa',
profile: {
Organizations: [A,B,C,A,B,A]
_id: '1234bbb',
profile: {
Organizations: [A,B,C]
_id: '1234ccc',
profile: {
Organizations: [A,B,C,C]
How do I return a list of all the documents in my collection ONLY if they have a duplicate value under profile.organizations.
The expected result would be:
DupesUsers: {
User: '1234aaa,
Dupes: [A,B]
User: '1234ccc,
Dupes: [C]
I've tried using Aggreagte:
{$unwind: "$profile.organizations"},
{ $project: {_id: '$_id', org: '$profile.organizations'} },
{ $group: {
_id: null,
occurances: {$push: {'org': '$_id', count: '$count'}}
but I just can't seem to wrap my head around it.

You're not far off just some minor tweaks needed:
"$unwind" : "$profile.organizations"
"$group" : {
"_id" : {
"dup" : "$profile.organizations",
"id" : "$_id"
"count" : {
"$sum" : 1.0
"$match" : {
"count" : {
"$gt" : 1.0
"$group" : {
_id: "$",
Dupes: {$push: "$_id.dup"}

You can use below aggregation
{ "$project": {
"Dupes": {
"$filter": {
"input": { "$setUnion": ["$profile.Organizations"] },
"as": "s",
"cond": {
"$gt": [
{ "$size": {
"$filter": {
"input": "$profile.Organizations",
"cond": { "$eq": ["$$this", "$$s"] }


Simple MongoDB Aggregation

I'm a bit confused on how to group using aggregation but still be able to extract specific values from arrays:
{ "$unwind": f"${stat_type}" },
"$group": {
"_id": "$userId",
"value" : { "$max" : f"${stat_type}.stat_value" },
"character" : f"${stat_type}.character_name", <-- how do I extract this value that matches where the $max from above is grabbed.
{ "$sort": { "value": -1 }},
{ '$limit' : 30 }
Sample Entries:
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 100243
'name' : "Jimmy",
'userId' : 12346,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 1020243
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "Lebron",
"stat_value" : 99900243
A sample output for what I'm looking for is below:
'user' : 'Tony'
'character_name' : 'Lebron',
'stat_value' : 99900243
'user' : 'Jimmy'
'character_name' : 'James',
'stat_value' : 1020243
You can use the $top accumulator to achieve the desired result. Like this:
"$unwind": "$damage_dealt"
"$group": {
"_id": "$userId",
"value": {
$top: {
output: {
character_name: "$damage_dealt.character_name",
stat_value: "$damage_dealt.stat_value"
sortBy: {
"damage_dealt.stat_value": -1
"$project": {
character_name: "$value.character_name",
stat_value: "$value.stat_value"
"$sort": {
"stat_value": -1
"$limit": 30
Playground link.
Or collects all the group elements in an array, and the max stat_value, then pick the object from the array containing the max stat_value.
"$unwind": "$damage_dealt"
"$group": {
"_id": "$userId",
"max_stat": {
"$max": "$damage_dealt.stat_value"
"damages": {
"$push": {
name: "$name",
damage_value: "$damage_dealt"
"$project": {
"damages": {
"$arrayElemAt": [
"$filter": {
"input": "$damages",
"as": "damage",
"cond": {
"$eq": [
"$project": {
"character_name": "$damages.damage_value.character_name",
"stat_value": "$damages.damage_value.stat_value",
"name": "$"
"$sort": {
"stat_value": -1
"$limit": 30
Playground link.
Here's another way you could do it.
"$group": {
"_id": "$userId",
"user": {"$first": "$name"},
"damage_dealts": {"$push": "$damage_dealt"},
"maxStat": {"$max": {"$first": "$damage_dealt.stat_value"}}
"$set": {
"outChar": {
"$first": {
"$arrayElemAt": [
{"$indexOfArray": ["$damage_dealts.stat_value", "$maxStat"]}
"$project": {
"user": 1,
"character_name": "$outChar.character_name",
"stat_value": "$outChar.stat_value"
{"$sort": {"stat_value": -1}},
{"$limit": 30}
Try it on

Mongodb: $unwind and compute $avg

I have documents storing IoT data.
Following MongoDB schema design best practices for IoT, I came to documents with the following structure:
"_id" : "AQ106_2020-09-12T09",
"date" : "2020-09-12T09:00:00.000Z",
"station" : {
"name" : "AQ106",
"loc" : {
"type" : "Point",
"coordinates" : [
"properties" : {
"samples" : [
"t" : ISODate("2020-09-12T11:02:00.000+02:00"),
"data" : {
"pm1_mg_m3" : 2.7,
"pm2_5_mg_m3" : 4.6,
"pm10_mg_m3" : 12,
"P0" : 152,
"P1" : 16,
"P2" : 4.7,
"P3" : 0.8,
"P4" : 0.86,
"P5" : 0.6,
"P6" : 0.28,
"P7" : 0.152,
"P8" : 0.094,
"P9" : 0.092,
"P10" : 0.019,
"P11" : 0,
"P12" : 0,
"P13" : 0.0188,
"P14" : 0,
"P15" : 0,
"P16" : 0,
"P17" : 0,
"P18" : 0,
"P19" : 0,
"P20" : 0,
"P21" : 0,
"P22" : 0,
"P23" : 0,
"temp_celsius" : 32.59,
"humRelPercent" : 34,
"press_mBar" : 1010.79,
"CO2mA" : 4,
"NO2_WE_mV" : 226.419,
"NO2_AE_mV" : 229.553,
"OX_WE_mV" : 252.287,
"OX_AE_mV" : 220.419,
"CO_WE_mV" : 509.077,
"AE_WE_mV" : 348.51,
"batt_V" : 13.5,
"source_V" : 17.6
.... additional arrays
Now I want to compute hourly or daily averages (or another metric) to populate a new collection with only summarised data.
I coded the following solution for hourly means:
db.collection.aggregate([{$match: {
}}, {$unwind: {
path: "$samples"
}}, {$group: {
_id: "$date",
P0: {
$avg : "$"
This works but I need to manually create a field for each property in in the original document and that's tedious.
Moreover, how to group both by date and
You can find a working example here.
Let's start with the easy question, how to group on multiple fields? With a simple syntax change:
$group: {
_id: {
date: "$date",
station: "$"
Now for the second question this will be a bit more tedious. Mongo does not support "merging" objects by their keys with custom logic (in this case $avg). So we will have to convert the object to an array. unwind it, calculate the average per field and eventually group to restore the required structure like so:
$match: {
"": "AQ106"
$unwind: {
path: "$samples"
$addFields: {
objArr: {
"$objectToArray": "$"
$unwind: "$objArr"
$group: {
_id: {
date: "$date",
station: "$",
objKey: "$objArr.k"
value: {
$avg: "$objArr.v"
$addFields: {
data: {
"$arrayToObject": [
k: "$_id.objKey",
v: "$value"
$group: {
_id: {
date: "$",
station: "$_id.station"
data: {
"$mergeObjects": "$data"
$replaceRoot: {
newRoot: {
"$mergeObjects": [
------- EDIT ---------
For Mongo v4.4+ you can use $accumulator which allows you to execute custom javascript code in your pipeline. I am unsure how this will fare against the native Mongo pipeline in terms of performance in scale.
One thing to note is that I added the initial $addFields stage under the assumption that different samples may have different keys. if this is not the case it is not needed.
$addFields: {
sampleKeys: {
$reduce: {
input: {
$map: {
input: "$samples",
as: "sample",
in: {
$map: {
input: {
"$objectToArray": "$$"
as: "sampleArrItem",
in: "$$sampleArrItem.k"
initialValue: [],
in: {
"$setUnion": [
$addFields: {
samples: {
$accumulator: {
init: function(keys){
return => {return {k: {v: 0, c: 0}}});
initArgs: ["$sampleKeys"],
accumulateArgs: ["$samples"],
accumulate: function(state, sample){
Object.keys(state).forEach((key) => {
if (key in {
state[key].v +=[key];
return state;
merge: function(state1, state2){
Object.keys(state1).forEach((key) => {
state1[key].v += state2[key].v;
state1[key].c += state2[key].c;
return state1;
lang: "js"
$replaceRoot: {
newRoot: {
$mergeObject: [
{station: "$", date: "$date"},
I partially resolved my question in terms of grouping by multiple fields (MongoDB documentation was not so clear at this regard, in my opinion)
$unwind: {
path: "$samples"
$group: {
_id: {
date: "$date",
station: "$"
P0: {
$avg: "$"
temp: {
$avg: "$"
Here the updated working example.
Thanks to Tom Slabbaert, I solved my question with the following query:
$unwind: {
path: "$samples"
$addFields: {
objArr: {
"$objectToArray": "$"
$unwind: "$objArr"
$group: {
_id: {
date: "$date",
station: "$station",
objKey: "$objArr.k"
value: {
$avg: "$objArr.v"
$addFields: {
data: {
"$arrayToObject": [
k: "$_id.objKey",
v: "$value"
$group: {
_id: {
date: "$",
station: "$_id.station"
data: {
"$mergeObjects": "$data"
"$project": {
_id: "$",
station: "$_id.station",
data: 1
I wonder if it is possible to simplify the above solution using the new $function operator.

Mongodb project results into single document

I have a collection like this:
"_id" : ObjectId("5f4e81f1da5ea3cb7c248a8f"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-08-24T11:10:43.219+0000")
"_id" : ObjectId("5f4e8206da5ea3cb7c248a90"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-09-24T11:10:43.219+0000")
"_id" : ObjectId("5f4e821fda5ea3cb7c248a91"),
"type" : "TYPE_2",
"updateTime" : ISODate("2020-09-25T11:10:43.219+0000")
I want to know how many documents there are of each type and also obtain the date of the last global modification. For now I can get these results like this:
// Pipeline
// Stage 1
$group: {
_id : "$type",
count: { $sum: 1 },
lastUpdate: { "$max": "$updateTime" }
// Stage 2
$sort: {
lastUpdate : -1
With which I get the results this way:
"_id" : "TYPE_2",
"count" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
"_id" : "TYPE_1",
"count" : 2.0,
"lastUpdate" : ISODate("2020-09-24T11:10:43.219+0000")
So I have both the sum of each document and the last modification (thanks to the sort).
But I would like to project and get the results like this, in a single result document:
"type1" : 2.0,
"type2" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
#varman's answer is good, this is just in different way,
$group you have already done by your self
$group create types array to combine all documents
$replaceWith to replace root with field types to convert $arrayToObject
$group: {
_id: "$type",
count: { $sum: 1 },
lastUpdate: { $max: "$updateTime" }
$group: {
_id: null,
types: {
$push: {
k: "$_id",
v: "$count"
lastUpdate: { $max: "$lastUpdate" }
$replaceWith: {
$mergeObjects: [
{ lastUpdate: "$lastUpdate" },
{ $arrayToObject: "$types" }
You can use following stages after your stage.
$group: {
_id: null,
data: {
$push: {
type: "$_id",
count: "$count"
lastUpdate: {
$first: "$lastUpdate"
$project: {
data: {
$arrayToObject: {
$map: {
input: "$data",
in: {
k: "$$this.type",
v: "$$this.count"
lastUpdate: 1
$addFields: {
"data.lastUpdate": "$lastUpdate"
"$replaceRoot": {
"newRoot": "$data"
Working Mongo playground

MongoDB query subdocument for records that don't match criteria

I currently have the following query:
$lookup: {
foreignField: "c_ID",
from: "messages",
localField: "_id",
as: "messages"
"$unwind": "$messages"
"$sort": {
"messages.t": -1
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
"allFields": {
"$first": "$$ROOT"
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"lastMessage": "$lastMessage"
$project: {
messages: 0
$match: {
"members.uID": "1",
//"lastMessage.t": { $gt: ISODate("2020-02-04 20:38:02.154Z") }
$sort: { "lastMessage.t": 1 }
$limit: 10
$project: {
members: {
$slice: [ {
$filter: {
input : "$members", as : "member", cond : {
$ne : ["$$member.uID" , "1"]
}, 3 ]
However, I also have a field for each member, named "l", which contains a timestamp. It means someone has left a conversation and thus represents the leave date. I don't want anyone who left before the current timestamp (e.g. 1582056056) to be included in the members list. How can I do this?
conversations document
"_id" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"members" : [
"uID" : "1",
"j" : 1580580922
"uID" : "4",
"j" : 1580580922,
ā€œlā€: 1580581982
"uID" : "5",
"j" : 1580580922
messages document
"_id" : ObjectId("5e35ee5f40713a43aeeeb1c5"),
"c_ID" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"fromID" : "1",
"msg" : "What's up?",
"t" : 1580591922,
"d" : {
"4" : 1580592039
"r" : {
"4" : 1580592339
We can exclude them during $filter stage with $and and $or operators.
member.uID != 1 && (member.l == undefined || lastMessage.t < member.l)
Take a look query below.
$lookup: {
from: "messages",
foreignField: "c_ID",
localField: "_id",
as: "messages"
"$unwind": "$messages"
"$sort": {
"messages.t": -1
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
"allFields": {
"$first": "$$ROOT"
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"lastMessage": "$lastMessage"
$project: {
messages: 0
$match: {
"members.uID": "1"
$sort: {
"lastMessage.t": 1
$limit: 10
$project: {
members: {
$slice: [
$filter: {
input: "$members",
as: "member",
cond: {
$and: [
$ne: [
$or: [
$eq: [
$lt: [

How to count number of objects by a selected attribute(in nested) using Mongdb

I have a collection that has documents taking a structure like this.
"_id" : ObjectId("5d6db92e8e935c407f00f39c"),
"id" : "1",
"email" : "",
"orgs" : [
"org_id" : "1",
"org_name" : "Lenovo",
"role" : "tenantadmin",
"primary_locale" : null,
"name" : "admin"
I need to get the count of admin roles available and also the count of other roles(any other tenantadmin, admin, user). So that it would give a result like
{admin:10, others:20}
This is the code that I have tried out.
db.getCollection('users').aggregate([{'$unwind': '$orgs'},{ '$group': { '_id': "$orgs.role",'count': {'$sum': 1}}}])
Which gives me a count of all the type of roles
"_id" : "user",
"count" : 3.0
"_id" : "tenantadmin",
"count" : 2.0
"_id" : "admin",
"count" : 5.0
How to get an output like this {admin:10, others:20} ?.
You can use $cond to define your grouping key:
{ '$unwind': '$orgs' },
{ '$group': { '_id': { $cond: [ { $eq: [ "$orgs.role", "admin" ] }, "$orgs.role", "other" ] },'count': {'$sum': 1}}}
Mongo Playground
EDIT: to get your grouping _id as result's keys you can run another $group followed by $replaceRoot with $arrayToObject:
{ '$unwind': '$orgs' },
{ '$group': { '_id': { $cond: [ { $eq: [ "$orgs.role", "admin" ] }, "$orgs.role", "other" ] },'count': {'$sum': 1}}},
{ '$group': { '_id': null, root: { $push: { k: '$_id', v: '$count' } } } },
{ '$replaceRoot': { newRoot: { $arrayToObject: '$root' } } }
Mongo Playground
Try as below:
"$addFields": {
"other": {
"$size": {
"$filter": {
"input": "$orgs",
"as": "el",
"cond": { "$ne": [ "$$el.role", "admin" ] }
"admin": {
"$size": {
"$filter": {
"input": "$orgs",
"as": "el",
"cond": { "$eq": [ "$$el.role", "admin" ] }
$project: {
Result will be :
"_id" : ObjectId("5de0b60ec6794c1b2be95902"),
"other" : 2,
"admin" : 1