Find duplicate name in MongoDB - mongodb

I'm having a problem in getting the duplicate name in my mongodb to delete duplicates.
{
"users": [
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Jollibee",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Jollibee",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "MCDO",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Burger King",
},
]
}
I want to show in my output only the duplicate names. which is Jollibee.
tried this approach but it only returns me the count of all the users not the duplicated ones. I want to show 2 Jollibee only.
db.collection.aggregate([
{
"$unwind": "$users"
},
{
"$group": {
"_id": "$_id",
"count": {
"$sum": 1
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])

Suppose the documents are:
[
{
"_id": {
"$oid": "6226dd742ef592186422ad1d"
},
"name": "Stack test"
},
{
"_id": {
"$oid": "6226dd7d2ef592186422ad1e"
},
"name": "Stack test"
},
{
"_id": {
"$oid": "6226dd912ef592186422ad1f"
},
"name": "Stack test 001"
}
]
Aggreagtion Query:
db.users.aggregate(
[
{
$group: {
_id: "$name",
names: {$push: "$name"}
}
}
]
)
Result:
{
_id: 'Stack test',
names: [ 'Stack test', 'Stack test' ]
},
{
_id: 'Stack test 001',
names: [ 'Stack test 001' ]
}
But a better way to do it will be
Aggregation Query:
db.users.aggregate(
[
{
$group: {
_id: "$name",
count: {$sum: 1}
}
}
]
)
Result:
{
_id: 'Stack test',
count: 2
},
{
_id: 'Stack test 001',
count: 1
}
Now, you can iterate through the count and use the name value in _id

since the $unwind step gives you same _id for all documents grouping by _id is not correct. Instead try grouping by users.name
db.collection.aggregate([
{
"$unwind": "$users"
},
{
"$group": {
"_id": "$users.name",
"count": {
"$sum": 1
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])
demo

Related

How To Count Values Inside Deeply Nested Array Of Objects in Mongodb Aggregation

I want to sum of values inside array of objects which have another array of objects.
In my case; how can I count 'url' values in all documents inside 'urls' array under 'iocs' array;
Mongo playground: open
Here is document example;
[
{
"_id": {
"$oid": "63b4993d0625ebe8b6f5b06e"
},
"iocs": [
{
"urls": [
{
"url": "7.1.5.2",
}
],
},
{
"urls": [
{
"url": "https://l-ink.me/GeheimeBegierde",
},
{
"url": "GeheimeBegierde.ch",
}
],
},
{
"urls": [
{
"url": "https://l-ink.me/GeheimeBegierde",
}
],
}
],
type: "2"
},
{
"_id": {
"$oid": "63b4993d0624ebe8b6f5b06e"
},
"iocs": [
{
"urls": [
{
"url": "7.1.5.2",
}
],
},
{
"urls": [
{
"url": "https://l-ink.me/GeheimeBegierde",
},
{
"url": "GeheimeBegierde.ch",
}
],
},
{
"urls": [
{
"url": "https://l-ink.me/GeheimeBegierde",
}
],
}
],
type: "3"
},
{
"_id": {
"$oid": "63b4993d0615ebe8b6f5b06e"
},
"iocs": [
{
"urls": [
{
"url": "www.google.com",
}
],
},
{
"urls": [
{
"url": "abc.xyz",
},
{
"url": "GeheimeBegierde.ch",
}
],
},
{
"urls": [
{
"url": "https://123.12",
}
],
}
],
type: "1"
}
]
expected output be like;
url: "7.1.5.2",
count:2,
types:[2,3]
url: "https://l-ink.me/GeheimeBegierde",
count:4,
types:[2,3],
url: "abc.xyz",
count:1,
types:[1],
I tried unwind iocs then project urls but can't figure out how to get this output. I think i must use group but how ? Newbie in mongodb.
Any help would be appreciated. Thanks all.
NOTE: All the answers are working. Thank you all for the contributing.
You could do something like this !
db.collection.aggregate([
{
"$unwind": "$iocs"
},
{
"$unwind": "$iocs.urls"
},
{
"$group": {
"_id": "$iocs.urls.url",
"count": {
"$sum": 1
},
"types": {
"$addToSet": "$type"
}
}
},
{
"$project": {
url: "$_id",
_id: 0,
types: 1,
count: 1
}
},
])
https://mongoplayground.net/p/hhMqh2zI_SX
Here's one way you could do it.
db.collection.aggregate([
{"$unwind": "$iocs"},
{"$unwind": "$iocs.urls"},
{
"$group": {
"_id": "$iocs.urls.url",
"count": {"$count": {}},
"types": {"$addToSet": {"$toInt": "$type"}}
}
},
{
"$set": {
"url": "$_id",
"_id": "$$REMOVE"
}
}
])
Try it on mongoplayground.net.
You can try this query:
Double $unwind to deconstruct the nested array.
Then group by url get the count using $sum nad add the types into a set (to avoid duplicates, otherwise you can use simply $push)
db.collection.aggregate([
{
"$unwind": "$iocs"
},
{
"$unwind": "$iocs.urls"
},
{
"$group": {
"_id": "$iocs.urls.url",
"count": {
"$sum": 1
},
"types": {
"$addToSet": "$type"
}
}
}
])
Example here
Since $unwind is considered an inefficient operation, another option is to use $reduce and only $unwind once:
db.collection.aggregate([
{$project: {
type: 1,
urls: {
$reduce: {
input: "$iocs",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this.urls.url"]}
}
}
}},
{$unwind: "$urls"},
{$group: {
_id: "$urls",
type: {$addToSet: "$type"},
count: {$sum: 1}
}},
{$project: {url: "$_id", count: 1, type: 1}}
])
See how it works on the playground example

MongoDB match filters with grouping and get total count

My sample data:
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_2",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_3",
"priority": "P2",
"owners": ["user-1", "user-2"],
},
I want to run an aggregation pipeline on the data involving match filters and grouping, also I want to limit the number of groups returned as well as the number of items in each group.
Essentially, if limit=2, limit_per_group=1, group_by=owner, priority=P1, I want the following results:
[
{
"data": [
{
"group_key": "user-1",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
{
"group_key": "user-2",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
]
},
{
"metadata": {
"total_items_matched": 2,
"total_groups": 2
}
},
]
Need some help on how to write an aggregation pipeline to get the required result.
My current query is as follows:
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 1,
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched"
}
]
}
}
Mongo playground link
I am unable to calculate the total number of groups.
add new stage of $addfields at the end of pipeline
db.collection.aggregate([
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 0,
"group_key": "$_id",
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched",
}
]
}
},
{
"$addFields": {
"metadata.total_groups": {
"$size": "$data"
}
}
}
])
https://mongoplayground.net/p/y5a0jvr6fxI

MongoDB: Assign document objects to field in '$project' stage

I have a user collection:
[
{"_id": 1,"name": "John", "age": 25, "valid_user": true}
{"_id": 2, "name": "Bob", "age": 40, "valid_user": false}
{"_id": 3, "name": "Jacob","age": 27,"valid_user": null}
{"_id": 4, "name": "Amelia","age": 29,"valid_user": true}
]
I run a '$facet' stage on this collection. Checkout this MongoPlayground.
I want to talk about the first output from the facet stage. The following is the response currently:
{
"user_by_valid_status": [
{
"_id": false,
"count": 1
},
{
"_id": true,
"count": 2
},
{
"_id": null,
"count": 1
}
]
}
However, I want to restructure the output in this way:
"analytics": {
"invalid_user": {
"_id": false
"count": 1
},
"valid_user": {
"_id": true
"count": 2
},
"user_with_unknown_status": {
"_id": null
"count": 1
}
}
The problem with using a '$project' stage along with 'arrayElemAt' is that the order may not be definite for me to associate an index with an attribute like 'valid_users' or others. Also, it gets further complicated because unlike the sample documents that I have shared, my collection may not always contain all the three categories of users.
Is there some way I can do this?
You can use $switch conditional operator,
$project to show value part in v with _id and count field as object, k to put $switch condition
db.collection.aggregate([
{
"$facet": {
"user_by_valid_status": [
{
"$group": {
"_id": "$valid_user",
"count": { "$sum": 1 }
}
},
{
$project: {
_id: 0,
v: { _id: "$_id", count: "$count" },
k: {
$switch: {
branches: [
{ case: { $eq: ["$_id", null] }, then: "user_with_unknown_status" },
{ case: { $eq: ["$_id", false] }, then: "invalid_user" },
{ case: { $eq: ["$_id", true] }, then: "valid_user" }
]
}
}
}
}
],
"users_above_30": [{ "$match": { "age": { "$gt": 30 } } }]
}
},
$project stage in root, convert user_by_valid_status array to object using $arrayToObject
{
$project: {
analytics: { $arrayToObject: "$user_by_valid_status" },
users_above_30: 1
}
}
])
Playground

Group on field while getting the last document for each field with MongoDB

Problem
I'm trying to group a stock inventory by products. At first, my stock entries was fully filled each time so I made this aggregate:
[
{ $sort: { date: 1 } },
{
$group: {
_id: '$userId',
stocks: { $last: '$stocks' },
},
},
{ $unwind: '$stocks' },
{
$group: {
_id: '$stocks.productId',
totalQuantity: { $sum: '$stocks.quantity' },
stocks: { $push: { userId: '$_id', quantity: '$stocks.quantity' } },
},
},
]
Now, it can be possible that a stock entry doesn't contain all the products filled. So I'm stuck while writing the new aggregate.
Basically I need to group every products by productId and have an array of the last entry for each user.
Output
This is my expected output:
[
{
"_id": ObjectId("5e75eae1359fc8159d5b6073"),
"totalQuantity": 33,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 33
}
]
},
{
"_id": ObjectId("5e75eaea359fc8159d5b6074"),
"totalQuantity": 2,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 2
}
]
}
]
Documents
Documents (when fully filled):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
},
{
"productId": ObjectId("5e75eaea359fc8159d5b6074"),
"quantity": 2
}
]
}
Sometimes it won't be filled for the whole inventory (that's why I need the lastDate):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
}
]
}
Try this one:
db.collection.aggregate([
{
$group: {
_id: "$userId",
root: {
$push: "$$ROOT"
}
}
},
{
$addFields: {
root: {
$map: {
input: "$root",
as: "data",
in: {
"stocks": {
$map: {
input: "$$data.stocks",
as: "stock",
in: {
"productId": "$$stock.productId",
"userId": "$$data.userId",
"quantity": "$$stock.quantity",
"lastDate": "$$data.date"
}
}
}
}
}
}
}
},
{
$unwind: "$root"
},
{
$replaceRoot: {
newRoot: "$root"
}
},
{
$unwind: "$stocks"
},
{
$sort: {
"stocks.lastDate": 1
}
},
{
$group: {
_id: "$stocks.productId",
totalQuantity: {
$last: "$stocks.quantity"
},
stocks: {
$last: "$stocks"
}
}
},
{
$addFields: {
stocks: [
{
"lastDate": "$stocks.lastDate",
"quantity": "$stocks.quantity",
"userId": "$stocks.userId"
}
]
}
}
])
MongoPlayground

count the number of members before group them

here is my document:
member: [
{
name: 'Jack',
group: 0
},
{
name: 'Rose',
group: 0
},
{
name: 'Tom',
group: 1
}
]
first, count the number of members,then group them.
return:
{
count: 3,
member: [
[
{name: 'Jack'},
{name: 'Rose'}
],
[
{name: 'Tom'}
]
]
}
how to do?thank you very much!
First you should calculate member array size using $size in $project, after that unwind member and group them check below query :
db.collectionName.aggregate({
"$project": {
"count": {
"$size": "$member"
},
"member": 1
}
}, {
"$unwind": "$member"
}, {
"$group": {
"_id": "$member.group",
"data": {
"$push": {
"name": "$member.name"
}
},
"count": {
"$first": "$count"
}
}
}, {
"$group": {
"_id": "$count",
"member": {
"$push": "$data"
}
}
}).pretty()