Mongo $group and $count, and then $sort the result - mongodb

I have a simple table with ranked users...
User:
{
"_id" : "aaa",
"rank" : 10
},
{
"_id" : "bbb",
"rank" : 30
},
{
"_id" : "ccc",
"rank" : 20
},
{
"_id" : "ddd",
"rank" : 30
},
{
"_id" : "eee",
"rank" : 30
},
{
"_id" : "fff",
"rank" : 10
}
And I would like to count how many have each rank, and then sort them with highest to lowest count
So I can get this result:
Result:
{
"rank" : 30,
"count": 3
},
{
"rank" : 10,
"count": 2
},
{
"rank" : 20,
"count": 1
}
I tried different things but cant seem to get the correct output
db.getCollection("user").aggregate([
{
"$group": {
"_id": {
"rank": "$rank"
},
"count": { "$sum": 1 }
},
"$sort": {
"count" : -1
}
])
I hope this is possible to do.

You can count and then sort them by aggregation in mongodb
db.getCollection('users').aggregate(
[
{
$group:
{
_id: "$rank",
count: { $sum: 1 }
}
},
{ $sort : { count : -1} }
]
)
Working example
https://mongoplayground.net/p/aM3Ci3GACjp

You don't need to add additional group or count stages when you can do it in one go -
db.getCollection("user").aggregate([
{
$sortByCount: "$rank"
}
])

Related

MongoDB query similar to SELECT COUNT GROUP BY

My MongoDB database has the 'interviews' collection whose document structure is similar to this:
{
"_id" : ObjectId("632b97b0f2bd3f64bbc30ec8"),
"agency" : "AG1",
"year" : "2022",
"month" : "9",
"residents" : [
{
"sequential" : 1,
"name" : "Resident 1",
"statusResident" : "pending",
},
{
"sequential" : 2,
"name" : "Resident 2",
"statusResident" : "not analyzed"
},
{
"sequential" : 3,
"name" : "Resident 3",
"statusResident" : "not analyzed"
},
{
"sequential" : 4,
"name" : "Resident 4",
"statusResident" : "finished"
}
]
}
{
"_id" : ObjectId("882b99b0f2bd3f64xxc30ec8"),
"agency" : "AG2",
"year" : "2022",
"month" : "9",
"residents" : [
{
"sequential" : 1,
"name" : "Resident 10",
"statusResident" : "pending",
},
{
"sequential" : 2,
"name" : "Resident 20",
"statusResident" : "not analyzed"
}
]
}
I would like to make a query that returns something similar to SQL SELECT agency, statusResident, COUNT(*) FROM interviews GROUP BY agency, statusResident.
For these documents above, that would return something like
AG1 pending 1
AG1 not analyzed 2
AG1 finished 1
AG2 pending 1
AG2 not analyzed 1
I ran the following queries but they didn't return what I need:
db.interviews.aggregate([
{ $group: { _id: { agency: "$agency", statusResident: "$residents.statusResident", total: { $sum: "$residents.statusResident" } } } },
{ $sort: { agency: 1 } }
db.interviews.group({
key:{agency:1, "residents.statusResident":1},
cond:{year:2022},
reduce:function(current, result)
{
result.total += 1;
},
initial:{total:0}
})
I've consulted post "MongoDB SELECT COUNT GROUP BY" and "Select count group by mongodb" as well as the MongoDB documentation but to no avail. What query should I run to get a result similar to the one I want?
You can try this query:
First $unwind to deconstruct the array and can group by statusResident too.
Then $group by two values, agency and statusResident.
And the last stage is $project to get an easier to read output.
db.collection.aggregate([
{
"$unwind": "$residents"
},
{
"$group": {
"_id": {
"agency": "$agency",
"statusResident": "$residents.statusResident"
},
"total": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"agency": "$_id.agency",
"statusResident": "$_id.statusResident",
"total": 1
}
}
])
Example here
Try this one
db.collection.aggregate([
{ $unwind: "$residents" },
{
$group: {
_id: {
agency: "$agency",
statusResident: "$residents.statusResident",
total: { $sum: 1 }
}
}
},
{ $sort: { agency: 1 } }
])
Mongo Playground

How to get percentage total of data with group by date in MongoDB

How to get percentage total of data with group by date in MongoDB ?
Link example : https://mongoplayground.net/p/aNND4EPQhcb
I have some collection structure like this
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date" : "2019-05-03T10:39:53.108Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date" : "2019-05-03T10:39:53.133Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date" : "2019-05-03T10:39:53.180Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4e"),
"date" : "2019-05-03T10:39:53.218Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
And I have query in mongodb to get data of collection, how to get percentage of total data. in bellow example query to get data :
db.name_collection.aggregate(
[
{ "$match": {
"update_at": { "$gte": "2019-11-04T00:00:00.0Z", "$lt": "2019-11-06T00:00:00.0Z"},
"id": { "$in": [166] }
} },
{
"$group" : {
"_id": {
$substr: [ '$update_at', 0, 10 ]
},
"count" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"date" : "$_id",
"count" : "$count"
}
},
{
"$sort" : {
"date" : 1
}
}
]
)
and this response :
{
"date" : "2019-11-04",
"count" : 39
},
{
"date" : "2019-11-05",
"count" : 135
}
how to get percentage data total from key count ? example response to this :
{
"date" : "2019-11-04",
"count" : 39,
"percentage" : "22%"
},
{
"date" : "2019-11-05",
"count" : 135,
"percentage" : "78%"
}
You have to group by null to get total count and then use $map to calculate the percentage. $round will be a useful operator in such case. Finally you can $unwind and $replaceRoot to get back the same number of documents:
db.collection.aggregate([
// previous aggregation steps
{
$group: {
_id: null,
total: { $sum: "$count" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: "$docs",
in: {
date: "$$this.date",
count: "$$this.count",
percentage: { $concat: [ { $toString: { $round: { $multiply: [ { $divide: [ "$$this.count", "$total" ] }, 100 ] } } }, '%' ] }
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: { newRoot: "$docs" }
}
])
Mongo Playground

MongoDB : Find duplicate when field type is not the same

how can I detecte duplication even if the fields type are not the same ?
{id : 1 , price : 5}
{id : 2 , price : "6"}
{id : 3 , price : "5"}
so duplicates are
{id : 1 , price : 5}
{id : 3 , price : "5"}
You can use $substr to do the conversion to String going from index 0 to -1 (rest of the string). :
db.duplicates.aggregate(
[{
"$project": {
id: 1,
price: { $substr: ["$price", 0, -1] }
}
}, {
"$group": {
"_id": "$price",
"count": { "$sum": 1 },
"item": { $push: "$$ROOT" }
}
}, {
"$match": {
"_id": { "$ne": null },
"count": { "$gt": 1 }
}
}]
)
The rest of the aggregation $group by $price to count the number of occurences and match items that have a count > 1 (duplicates). In item, you have the initial items that are duplicates :
{ "_id" : "5", "count" : 2, "item" : [ { "_id" : ObjectId("58616dc177b68a6c54252bc8"), "id" : 1, "price" : "5" }, { "_id" : ObjectId("58616dc177b68a6c54252bca"), "id" : 3, "price" : "5" } ] }
To count the number of distinct fields duplicated, add :
{
"$group": {
"_id": null,
"totalCount": {
"$sum": 1
}
}
}

mongodb count number of documents for every category

My collection looks like this:
{
"_id":ObjectId("5744b6cd9c408cea15964d18"),
"uuid":"bbde4bba-062b-4024-9bb0-8b12656afa7e",
"version":1,
"categories":["sport"]
},
{
"_id":ObjectId("5745d2bab047379469e10e27"),
"uuid":"bbde4bba-062b-4024-9bb0-8b12656afa7e",
"version":2,
"categories":["sport", "shopping"]
},
{
"_id":ObjectId("5744b6359c408cea15964d15"),
"uuid":"561c3705-ba6d-432b-98fb-254483fcbefa",
"version":1,
"categories":["politics"]
}
I want to count the number of documents for every category. To do this, I unwind the categories array:
db.collection.aggregate(
{$unwind: '$categories'},
{$group: {_id: '$categories', count: {$sum: 1}} }
)
Result:
{ "_id" : "sport", "count" : 2 }
{ "_id" : "shopping", "count" : 1 }
{ "_id" : "politics", "count" : 1 }
Now I want to count the number of documents for every category, but where document version is the latest version.
This is where I am stuck.
It's ugly but I think this gives you what you're after:
db.collection.aggregate(
{ $unwind : "$categories" },
{ $group :
{ "_id" : { "uuid" : "$uuid" },
"doc" : { $push : { "version" : "$version", "category" : "$categories" } },
"maxVersion" : { $max : "$version" }
}
},
{ $unwind : "$doc" },
{ $project : { "_id" : 0, "uuid" : "$id.uuid", "category" : "$doc.category", "isCurrentVersion" : { $eq : [ "$doc.version", "$maxVersion" ] } } },
{ $match : { "isCurrentVersion" : true }},
{ $group : { "_id" : "$category", "count" : { $sum : 1 } } }
)
You can do this by first grouping the denormalized documents (from the $unwind operator step) by two keys, i.e. the categories and version fields. This is necessary for the preceding pipeline step which orders the grouped documents and their accumulated counts by the version (desc) and categories (asc) keys respectively using the $sort operator.
Another grouping will be required to get the top documents in each categories group after ordering using the $first operator. The following shows this
db.collection.aggregate(
{ "$unwind": "$categories" },
{
"$group": {
"_id": {
'categories': '$categories',
'version': '$version'
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "_id.version": -1, "_id.categories": 1 } },
{
"$group": {
"_id": "$_id.categories",
"count": { "$first": "$count" },
"version": { "$first": "$_id.version" }
}
}
)
Sample Output
{ "_id" : "shopping", "count" : 1, "version" : 2 }
{ "_id" : "sport", "count" : 1, "version" : 2 }
{ "_id" : "politics", "count" : 1, "version" : 1 }

Aggregate in MongoDB return more fields

I'm currently experimenting with MongoDB. Using the Twitters Streaming API I collected a bunch of tweets (seemed a good way to learn to use MongoDB's aggregation options).
I have the following query
db.twitter.aggregate([
{ $group : { _id : '$status.user.screen_name', count: { $sum : 1 } } },
{ $sort : { count : -1, _id : 1 } },
{ $skip : 0 },
{ $limit : 5 },
]);
As expected this is te result:
{
"result" : [
{
"_id" : "VacaturesBreda",
"count" : 5
},
{
"_id" : "breda_nws",
"count" : 3
},
{
"_id" : "BredaDichtbij",
"count" : 2
},
{
"_id" : "JobbirdUTITBaan",
"count" : 2
},
{
"_id" : "vacatures_nr1",
"count" : 2
}
],
"ok" : 1
}
The question is how can I match on the user id_str and return the screen_name and for example the followers_count of the user. I tried to do this with { $project .... } but I kept ending up with an empty result set.
For those not familiar with the user object in Twitters JSON response here a part of it (just selected the first user in the db).
"user" : {
"id" : 2678963916,
"id_str" : "2678963916",
"name" : "JobbirdUT IT Banen",
"screen_name" : "JobbirdUTITBaan",
"location" : "Utrecht",
"url" : "http://www.jobbird.com",
"description" : "Blijf op de hoogte van de nieuwste IT/Automatisering vacatures in Utrecht, via http://Jobbird.com",
"protected" : false,
"verified" : false,
"followers_count" : 1,
"friends_count" : 1,
"listed_count" : 0,
"favourites_count" : 0,
"statuses_count" : 311,
"created_at" : "Fri Jul 25 07:35:48 +0000 2014",
...
},
Update: As requested a clear example on the proposed response (sorry for not adding it).
So instead of grouping on the screen_name grouping on the id_str. Why you might ask, it is possible to edit your screen_name but you are still the same user for Twitter (so the last screen_name should be returned:
db.twitter.aggregate([
{ $group : { _id : '$status.user.id_str', count: { $sum : 1 } } },
{ $sort : { count : -1, _id : 1 } },
{ $skip : 0 },
{ $limit : 5 },
]);
And as the response something like this:
{
"result" : [
{
"_id" : "123456789",
"screen_name": "awsome_screen_name",
"followers_count": 523,
"count" : 5
},
....
],
"ok" : 1
}
You are basically looking for an operator that does not specifically "aggregate" the content, and this is basically what the $first and $last operators do:
db.twitter.aggregate([
{ "$group": {
"_id": "$status.user.id_str",
"screen_name": { "$first": "$status.user.screen_name" },
"followers_count": { "$sum": "$status.user.followers_count" },
"count": { "$sum": 1 }
}},
{ "$sort": { "followers_count": -1, "count": -1 } },
{ "$limit": 5 }
])
Which picks the "first" occurrence of the field based on the grouping key. That is generally useful where there is duplicated related data in the documents to the grouping key.
An alternate approach is to include the fields in the grouping key. You can later restructure with $project:
db.twitter.aggregate([
{ "$group": {
"_id": {
"_id": "$status.user.id_str",
"screen_name": "$status.user.screen_name"
},
"followers_count": { "$sum": "$status.user.followers_count" },
"count": { "$sum": 1 }
}},
{ "$project": {
"_id": "$_id._id",
"screen_name": "$_id.screen_name"
"followers_count": 1,
"count": 1
}},
{ "$sort": { "followers_count": -1, "count": -1 } },
{ "$limit": 5 }
])
Which is useful where you are unsure of the related "uniqueness".