Find duplicate inside an array mongodb - mongodb

I have a Mongo Collection called Users and structured structured like this
{
_id: '1234aaa',
profile: {
Organizations: [A,B,C,A,B,A]
}
},
{
_id: '1234bbb',
profile: {
Organizations: [A,B,C]
}
},
{
_id: '1234ccc',
profile: {
Organizations: [A,B,C,C]
}
}
How do I return a list of all the documents in my collection ONLY if they have a duplicate value under profile.organizations.
The expected result would be:
DupesUsers: {
{
User: '1234aaa,
Dupes: [A,B]
},
{
User: '1234ccc,
Dupes: [C]
},
}
I've tried using Aggreagte:
db.getCollection('users').aggregate(
{$unwind: "$profile.organizations"},
{ $project: {_id: '$_id', org: '$profile.organizations'} },
{ $group: {
_id: null,
occurances: {$push: {'org': '$_id', count: '$count'}}
}
}
);
but I just can't seem to wrap my head around it.

You're not far off just some minor tweaks needed:
db.getCollection("users").aggregate(
[
{
"$unwind" : "$profile.organizations"
},
{
"$group" : {
"_id" : {
"dup" : "$profile.organizations",
"id" : "$_id"
},
"count" : {
"$sum" : 1.0
}
}
},
{
"$match" : {
"count" : {
"$gt" : 1.0
}
}
},
{
"$group" : {
_id: "$_id.id",
Dupes: {$push: "$_id.dup"}
}
}
],
);

You can use below aggregation
db.collection.aggregate([
{ "$project": {
"Dupes": {
"$filter": {
"input": { "$setUnion": ["$profile.Organizations"] },
"as": "s",
"cond": {
"$gt": [
{ "$size": {
"$filter": {
"input": "$profile.Organizations",
"cond": { "$eq": ["$$this", "$$s"] }
}
}},
1
]
}
}
}
}}
])

Related

Simple MongoDB Aggregation

I'm a bit confused on how to group using aggregation but still be able to extract specific values from arrays:
db.collection.aggregate([
{ "$unwind": f"${stat_type}" },
{
"$group": {
"_id": "$userId",
"value" : { "$max" : f"${stat_type}.stat_value" },
"character" : f"${stat_type}.character_name", <-- how do I extract this value that matches where the $max from above is grabbed.
}
},
{ "$sort": { "value": -1 }},
{ '$limit' : 30 }
])
Sample Entries:
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 100243
]
}
{
'name' : "Jimmy",
'userId' : 12346,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 1020243
]
}
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "Lebron",
"stat_value" : 99900243
]
}
A sample output for what I'm looking for is below:
[
{
'_id':12345,
'user' : 'Tony'
'character_name' : 'Lebron',
'stat_value' : 99900243
},
{
'_id':12346,
'user' : 'Jimmy'
'character_name' : 'James',
'stat_value' : 1020243
}
]
You can use the $top accumulator to achieve the desired result. Like this:
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"value": {
$top: {
output: {
character_name: "$damage_dealt.character_name",
stat_value: "$damage_dealt.stat_value"
},
sortBy: {
"damage_dealt.stat_value": -1
}
}
},
}
},
{
"$project": {
character_name: "$value.character_name",
stat_value: "$value.stat_value"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Or collects all the group elements in an array, and the max stat_value, then pick the object from the array containing the max stat_value.
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"max_stat": {
"$max": "$damage_dealt.stat_value"
},
"damages": {
"$push": {
name: "$name",
damage_value: "$damage_dealt"
}
}
}
},
{
"$project": {
"damages": {
"$arrayElemAt": [
{
"$filter": {
"input": "$damages",
"as": "damage",
"cond": {
"$eq": [
"$$damage.damage_value.stat_value",
"$max_stat"
]
}
}
},
0
]
}
}
},
{
"$project": {
"character_name": "$damages.damage_value.character_name",
"stat_value": "$damages.damage_value.stat_value",
"name": "$damages.name"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Here's another way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"user": {"$first": "$name"},
"damage_dealts": {"$push": "$damage_dealt"},
"maxStat": {"$max": {"$first": "$damage_dealt.stat_value"}}
}
},
{
"$set": {
"outChar": {
"$first": {
"$arrayElemAt": [
"$damage_dealts",
{"$indexOfArray": ["$damage_dealts.stat_value", "$maxStat"]}
]
}
}
}
},
{
"$project": {
"user": 1,
"character_name": "$outChar.character_name",
"stat_value": "$outChar.stat_value"
}
},
{"$sort": {"stat_value": -1}},
{"$limit": 30}
])
Try it on mongoplayground.net.

Mongodb: $unwind and compute $avg

I have documents storing IoT data.
Following MongoDB schema design best practices for IoT, I came to documents with the following structure:
"_id" : "AQ106_2020-09-12T09",
"date" : "2020-09-12T09:00:00.000Z",
"station" : {
"name" : "AQ106",
"loc" : {
"type" : "Point",
"coordinates" : [
14.339263,
40.814224
]
},
"properties" : {
}
},
"samples" : [
{
"t" : ISODate("2020-09-12T11:02:00.000+02:00"),
"data" : {
"pm1_mg_m3" : 2.7,
"pm2_5_mg_m3" : 4.6,
"pm10_mg_m3" : 12,
"P0" : 152,
"P1" : 16,
"P2" : 4.7,
"P3" : 0.8,
"P4" : 0.86,
"P5" : 0.6,
"P6" : 0.28,
"P7" : 0.152,
"P8" : 0.094,
"P9" : 0.092,
"P10" : 0.019,
"P11" : 0,
"P12" : 0,
"P13" : 0.0188,
"P14" : 0,
"P15" : 0,
"P16" : 0,
"P17" : 0,
"P18" : 0,
"P19" : 0,
"P20" : 0,
"P21" : 0,
"P22" : 0,
"P23" : 0,
"temp_celsius" : 32.59,
"humRelPercent" : 34,
"press_mBar" : 1010.79,
"CO2mA" : 4,
"NO2_WE_mV" : 226.419,
"NO2_AE_mV" : 229.553,
"OX_WE_mV" : 252.287,
"OX_AE_mV" : 220.419,
"CO_WE_mV" : 509.077,
"AE_WE_mV" : 348.51,
"batt_V" : 13.5,
"source_V" : 17.6
}
},
.... additional arrays
}
Now I want to compute hourly or daily averages (or another metric) to populate a new collection with only summarised data.
I coded the following solution for hourly means:
db.collection.aggregate([{$match: {
'station.name':'AQ104'
}}, {$unwind: {
path: "$samples"
}}, {$group: {
_id: "$date",
P0: {
$avg : "$samples.data.P0"
},
temp:{
$avg:"$samples.data.temp_celsius"
}
}}])
This works but I need to manually create a field for each property in samples.data in the original document and that's tedious.
Moreover, how to group both by date and station.name?
You can find a working example here.
Thanks.
Let's start with the easy question, how to group on multiple fields? With a simple syntax change:
{
$group: {
_id: {
date: "$date",
station: "$station.name"
}
}
Now for the second question this will be a bit more tedious. Mongo does not support "merging" objects by their keys with custom logic (in this case $avg). So we will have to convert the object to an array. unwind it, calculate the average per field and eventually group to restore the required structure like so:
db.collection.aggregate([
{
$match: {
"station.name": "AQ106"
}
},
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station.name",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$data",
"$_id"
]
}
}
}
])
MongoPlayground
------- EDIT ---------
For Mongo v4.4+ you can use $accumulator which allows you to execute custom javascript code in your pipeline. I am unsure how this will fare against the native Mongo pipeline in terms of performance in scale.
One thing to note is that I added the initial $addFields stage under the assumption that different samples may have different keys. if this is not the case it is not needed.
db.collection.aggregate([
{
$addFields: {
sampleKeys: {
$reduce: {
input: {
$map: {
input: "$samples",
as: "sample",
in: {
$map: {
input: {
"$objectToArray": "$$sample.data"
},
as: "sampleArrItem",
in: "$$sampleArrItem.k"
}
}
}
},
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
}
}
},
{
$addFields: {
samples: {
$accumulator: {
init: function(keys){
return keys.map(k => {return {k: {v: 0, c: 0}}});
},
initArgs: ["$sampleKeys"],
accumulateArgs: ["$samples"],
accumulate: function(state, sample){
Object.keys(state).forEach((key) => {
if (key in sample.data) {
state[key].v += sample.data[key];
state[key].c++;
};
});
return state;
},
merge: function(state1, state2){
Object.keys(state1).forEach((key) => {
state1[key].v += state2[key].v;
state1[key].c += state2[key].c;
});
return state1;
},
lang: "js"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObject: [
"$samples",
{station: "$station.name", date: "$date"},
]
}
}
}
])
I partially resolved my question in terms of grouping by multiple fields (MongoDB documentation was not so clear at this regard, in my opinion)
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$group: {
_id: {
date: "$date",
station: "$station.name"
},
P0: {
$avg: "$samples.data.P0"
},
temp: {
$avg: "$samples.data.temp_celsius"
}
}
}
])
Here the updated working example.
Thanks to Tom Slabbaert, I solved my question with the following query:
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
"$project": {
_id: "$_id.date",
station: "$_id.station",
data: 1
}
}
])
Here
I wonder if it is possible to simplify the above solution using the new $function operator.
Thanks.

Mongodb project results into single document

I have a collection like this:
{
"_id" : ObjectId("5f4e81f1da5ea3cb7c248a8f"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-08-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e8206da5ea3cb7c248a90"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-09-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e821fda5ea3cb7c248a91"),
"type" : "TYPE_2",
"updateTime" : ISODate("2020-09-25T11:10:43.219+0000")
}
I want to know how many documents there are of each type and also obtain the date of the last global modification. For now I can get these results like this:
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id : "$type",
count: { $sum: 1 },
lastUpdate: { "$max": "$updateTime" }
}
},
// Stage 2
{
$sort: {
lastUpdate : -1
}
},
]
);
With which I get the results this way:
{
"_id" : "TYPE_2",
"count" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
{
"_id" : "TYPE_1",
"count" : 2.0,
"lastUpdate" : ISODate("2020-09-24T11:10:43.219+0000")
}
So I have both the sum of each document and the last modification (thanks to the sort).
But I would like to project and get the results like this, in a single result document:
{
"type1" : 2.0,
"type2" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
#varman's answer is good, this is just in different way,
$group you have already done by your self
$group create types array to combine all documents
$replaceWith to replace root with field types to convert $arrayToObject
db.collection.aggregate([
{
$group: {
_id: "$type",
count: { $sum: 1 },
lastUpdate: { $max: "$updateTime" }
}
},
{
$group: {
_id: null,
types: {
$push: {
k: "$_id",
v: "$count"
}
},
lastUpdate: { $max: "$lastUpdate" }
}
},
{
$replaceWith: {
$mergeObjects: [
{ lastUpdate: "$lastUpdate" },
{ $arrayToObject: "$types" }
]
}
}
])
Playground
You can use following stages after your stage.
{
$group: {
_id: null,
data: {
$push: {
type: "$_id",
count: "$count"
}
},
lastUpdate: {
$first: "$lastUpdate"
}
}
},
{
$project: {
data: {
$arrayToObject: {
$map: {
input: "$data",
in: {
k: "$$this.type",
v: "$$this.count"
}
}
}
},
lastUpdate: 1
}
},
{
$addFields: {
"data.lastUpdate": "$lastUpdate"
}
},
{
"$replaceRoot": {
"newRoot": "$data"
}
}
Working Mongo playground

MongoDB query subdocument for records that don't match criteria

I currently have the following query:
db.getCollection('conversations').aggregate([
{
$lookup: {
foreignField: "c_ID",
from: "messages",
localField: "_id",
as: "messages"
}
},
{
"$unwind": "$messages"
},
{
"$sort": {
"messages.t": -1
}
},
{
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
},
"allFields": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$allFields",
{
"lastMessage": "$lastMessage"
}
]
}
}
},
{
$project: {
messages: 0
}
},
{
$match: {
"members.uID": "1",
//"lastMessage.t": { $gt: ISODate("2020-02-04 20:38:02.154Z") }
}
},
{
$sort: { "lastMessage.t": 1 }
},
{
$limit: 10
},
{
$project: {
members: {
$slice: [ {
$filter: {
input : "$members", as : "member", cond : {
$ne : ["$$member.uID" , "1"]
}
}
}, 3 ]
}
}
},
])
However, I also have a field for each member, named "l", which contains a timestamp. It means someone has left a conversation and thus represents the leave date. I don't want anyone who left before the current timestamp (e.g. 1582056056) to be included in the members list. How can I do this?
EDIT:
conversations document
{
"_id" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"members" : [
{
"uID" : "1",
"j" : 1580580922
},
{
"uID" : "4",
"j" : 1580580922,
ā€œlā€: 1580581982
},
{
"uID" : "5",
"j" : 1580580922
}
]
}
messages document
{
"_id" : ObjectId("5e35ee5f40713a43aeeeb1c5"),
"c_ID" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"fromID" : "1",
"msg" : "What's up?",
"t" : 1580591922,
"d" : {
"4" : 1580592039
},
"r" : {
"4" : 1580592339
}
}
We can exclude them during $filter stage with $and and $or operators.
member.uID != 1 && (member.l == undefined || lastMessage.t < member.l)
Take a look query below.
db.conversations.aggregate([
{
$lookup: {
from: "messages",
foreignField: "c_ID",
localField: "_id",
as: "messages"
}
},
{
"$unwind": "$messages"
},
{
"$sort": {
"messages.t": -1
}
},
{
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
},
"allFields": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$allFields",
{
"lastMessage": "$lastMessage"
}
]
}
}
},
{
$project: {
messages: 0
}
},
{
$match: {
"members.uID": "1"
}
},
{
$sort: {
"lastMessage.t": 1
}
},
{
$limit: 10
},
{
$project: {
members: {
$slice: [
{
$filter: {
input: "$members",
as: "member",
cond: {
$and: [
{
$ne: [
"$$member.uID",
"1"
]
},
{
$or: [
{
$eq: [
"$$member.l",
undefined
]
},
{
$lt: [
"$lastMessage.t",
"$$member.l"
]
}
]
}
]
}
}
},
3
]
}
}
}
])
MongoPlayground

How to count number of objects by a selected attribute(in nested) using Mongdb

I have a collection that has documents taking a structure like this.
{
"_id" : ObjectId("5d6db92e8e935c407f00f39c"),
"id" : "1",
"email" : "admin#test.com",
"orgs" : [
{
"org_id" : "1",
"org_name" : "Lenovo",
"role" : "tenantadmin",
"primary_locale" : null,
"name" : "admin"
}
]
}
I need to get the count of admin roles available and also the count of other roles(any other tenantadmin, admin, user). So that it would give a result like
{admin:10, others:20}
This is the code that I have tried out.
db.getCollection('users').aggregate([{'$unwind': '$orgs'},{ '$group': { '_id': "$orgs.role",'count': {'$sum': 1}}}])
Which gives me a count of all the type of roles
{
"_id" : "user",
"count" : 3.0
}
{
"_id" : "tenantadmin",
"count" : 2.0
}
{
"_id" : "admin",
"count" : 5.0
}
How to get an output like this {admin:10, others:20} ?.
You can use $cond to define your grouping key:
db.getCollection('users').aggregate([
{ '$unwind': '$orgs' },
{ '$group': { '_id': { $cond: [ { $eq: [ "$orgs.role", "admin" ] }, "$orgs.role", "other" ] },'count': {'$sum': 1}}}
]
)
Mongo Playground
EDIT: to get your grouping _id as result's keys you can run another $group followed by $replaceRoot with $arrayToObject:
db.getCollection('users').aggregate([
{ '$unwind': '$orgs' },
{ '$group': { '_id': { $cond: [ { $eq: [ "$orgs.role", "admin" ] }, "$orgs.role", "other" ] },'count': {'$sum': 1}}},
{ '$group': { '_id': null, root: { $push: { k: '$_id', v: '$count' } } } },
{ '$replaceRoot': { newRoot: { $arrayToObject: '$root' } } }
]
)
Mongo Playground
Try as below:
db.collection.aggregate([
{
"$addFields": {
"other": {
"$size": {
"$filter": {
"input": "$orgs",
"as": "el",
"cond": { "$ne": [ "$$el.role", "admin" ] }
}
}
},
"admin": {
"$size": {
"$filter": {
"input": "$orgs",
"as": "el",
"cond": { "$eq": [ "$$el.role", "admin" ] }
}
}
}
}
},
{
$project: {
admin:1,
other:1
}
}
])
Result will be :
{
"_id" : ObjectId("5de0b60ec6794c1b2be95902"),
"other" : 2,
"admin" : 1
}