MongoDB- arrays from aggregation result - mongodb

I have the following MongoDB query:
db.my_collection.aggregate([
{
$group: {"_id":"$day", count: { $sum: "$myValue" }
}}])
It returns the following result:
{
"_id" : ISODate("2020-02-10T00:00:00.000+01:00"),
"count" : 10
},
{
"_id" : ISODate("2020-02-01T00:00:00.000+01:00"),
"count" : 2
}
Is it possible to make two arrays from this result as below?
{
"days": [ISODate("2020-02-10T00:00:00.000+01:00"), ISODate("2020-02-01T00:00:00.000+01:00")],
"values": [10, 2]
}

Yes, just add another $group stage:
db.my_collection.aggregate([
{
$group: {
"_id": "$day", count: {$sum: "$myValue"}
}
},
{
$group: {
"_id": null,
days: {$push: "$_id"},
values: {$push: "$count"}
}
}
])

Related

How to group multiple operations with using MongoDB aggregation

Given the following Data:
> db.users.find({}, {name: 1, createdAt: 1, updatedAt: 1}).limit(5).pretty()
{
"_id" : ObjectId("5ec8f74f32973c7b7cb7cce9"),
"createdAt" : ISODate("2020-05-23T10:13:35.012Z"),
"updatedAt" : ISODate("2020-08-20T13:37:09.861Z"),
"name" : "Patrick Jere"
}
{
"_id" : ObjectId("5ec8ef8a2b6e5f78fa20443c"),
"createdAt" : ISODate("2020-05-23T09:40:26.089Z"),
"updatedAt" : ISODate("2020-07-23T07:54:01.833Z"),
"name" : "Austine Wiga"
}
{
"_id" : ObjectId("5ed5e1a3962a3960ad85a1a2"),
"createdAt" : ISODate("2020-06-02T05:20:35.090Z"),
"updatedAt" : ISODate("2020-07-29T14:02:52.295Z"),
"name" : "Biasi Phiri"
}
{
"_id" : ObjectId("5ed629ec6d87382c608645d9"),
"createdAt" : ISODate("2020-06-02T10:29:00.204Z"),
"updatedAt" : ISODate("2020-06-02T10:29:00.204Z"),
"name" : "Chisambwe Kalusa"
}
{
"_id" : ObjectId("5ed8d21f42bc8115f67465a8"),
"createdAt" : ISODate("2020-06-04T10:51:11.546Z"),
"updatedAt" : ISODate("2020-06-04T10:51:11.546Z"),
"name" : "Wakun Moyo"
}
...
Sample Data
I use the following query to return new_users by months:
db.users.aggregate([
{
$group: {
_id: {$dateToString: {format: '%Y-%m', date: '$createdAt'}},
new_users: {
$sum: {$ifNull: [1, 0]}
}
}
}
])
example result:
[
{
"_id": "2020-06",
"new_users": 125
},
{
"_id": "2020-07",
"new_users": 147
},
{
"_id": "2020-08",
"new_users": 43
},
{
"_id": "2020-05",
"new_users": 4
}
]
and this query returns new_users, active_users and total users for a specific month.
db.users.aggregate([
{
$group: {
_id: null,
new_users: {
$sum: {
$cond: [{
$gte: ['$createdAt', ISODate('2020-08-01')]
}, 1, 0]
}
},
active_users: {
$sum: {
$cond: [{
$gt: ['$updatedAt', ISODate('2020-02-01')]
}, 1, 0]
}
},
total_users: {
$sum: {$ifNull: [1, 0]}
}
}
}
])
How can I get the second query to return results by months just like in the first query?
expected results based on one month filter:
[
{ _id: '2020-09', new_users: 0, active_users: 69},
{ _id: '2020-08', new_users: 43, active_users: 219},
{ _id: '2020-07', new_users: 147, active_users: 276},
{ _id: '2020-06', new_users: 125, active_users: 129},
{ _id: '2020-05', new_users: 4, active_users: 4}
]
You can try below aggregation.
Count new users followed by look up to count the active users for the time window for each year month.
db.users.aggregate([
{"$group":{
"_id":{"$dateFromParts":{"year":{"$year":"$createdAt"},"month":{"$month":"$createdAt"}}},
"new_users":{"$sum":1}
}},
{"$lookup":{
"from":"users",
"let":{"end_date":"$_id", "start_date":{"$dateFromParts":{"year":{"$year":"$_id"},"month":{"$subtract":[{"$month":"$_id"},1]}}}},
"pipeline":[
{"$match":{"$expr":
{"$and":[{"$gte":[
"$updatedAt",
"$$start_date"
]}, {"$lt":[
"$updatedAt",
"$$end_date"
]}]}
}},
{"$count":"activeUserCount"}
],
"as":"activeUsers"
}},
{"$project":{
"year-month":{"$dateToString":{"format":"%Y-%m","date":"$_id"}},
"new_users":1,
"active_users":{"$arrayElemAt":["$activeUsers.activeUserCount", 0]},
"_id":0
}}])
You can do the same, that you did in first query, group by cteatedAt, no need to use $ifNull operator in total_users,
Playground
Updated,
use $facet group by month and count for both counts
$project to concat both arrays using $concatArrays
$unwind deconstruct array root
$group by month and merge both month and count
Playground

Mongo rank calculations based on count

I am trying the mongo rank calculation based on count and mentioned in below db schema. I am not getting the expecting results. Anyone help to resolve this?
Mongo Query:
db.company.aggregate([
{
"$group": {
"_id": {
"name1": "$name1",
"name2": "$name2",
},
"expanded": {
"$push": {
"name1": "$name1",
"name2": "$name2",
}
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "count": -1 } },
{
$unwind: {
path: '$expanded',
includeArrayIndex: 'count'
}
}
]);
Expecting results like
Name|Count|Rank
Google|3|1
FB|2|2
Yahoo|1| 3
DB Schema :
{
"_id" : 1.0,
"name1" : "Yahoo",
"name2" : "Google",
"salary" : 1000.0
}
/* 2 */
{
"_id" : 2.0,
"name1" : "FB",
"name2" : "Google",
"salary" : 2000.0
}
/* 3 */
{
"_id" : 3.0,
"name1" : "Google",
"name2" : "FB",
"salary" : 1500.0
}
It seems like you should count name1 and name2 separately so you can create a temporary 2-element array and then run $unwind on that array. Additionally to get rank you have to $group by null to get single array of all groups, try:
db.collection.aggregate([
{
$project: {
key: [ "$name1", "$name2" ]
}
},
{
$unwind: "$key"
},
{
$group: {
_id: "$key",
count: { $sum: 1 }
}
},
{
$sort: {
count: -1
}
},
{
$group: {
_id: null,
groups: { $push: "$$ROOT" }
}
},
{
$unwind: {
path: '$groups',
includeArrayIndex: 'rank'
}
},
{
$project: {
_id: 0,
name: "$groups._id",
rank: { $add: [ "$rank", 1 ] },
count: "$groups.count"
}
}
])
Mongo Playground
try this
db.company.aggregate([
{
$group: {
_id:null,
names1: {$push: "$name1"},
names2: {$push:"$name2"},
}
},
{
$project: {
_id: 0,
names:{$concatArrays: ["$names1", "$names2"]}
}
},
{$unwind: "$names"},
{$sortByCount: "$names"},
{$addFields:{name: "$_id"}},
{
$group : {
_id: null,
records : { $push : {count : "$count", name : "$name"}}
}
},
{
$project: {
total_docs: {$size: "$records"},
records: 1
}
},
{$unwind: "$records"},
{
$project: {
_id: 0,
name: "$records.name",
count:"$records.count",
rank: {
$add:[
{
$subtract:["$total_docs", "$records.count"]
}, 1]
}
}
}])

MongoDB: Null check in between Pipeline Stages

If I create a collection like so:
db.People.insert({"Name": "John"})
and run a simple mongo aggregate, like so:
db.People.aggregate([{$match: {Name: "John"}}, {$group: {_id: "null", count: {$sum: 1}}}])
This counts all the Johns in the collection and returns this
{ "_id" : "null", "count" : 1 }
Which is nice. But if I search for the name "Clarice" that does not exist at all, it returns null.
I would like it to return
{ "_id" : "null", "count" : 0 }
I have not found a way to achieve this. I would have to include some kind of null-check between the $match- and $group-stage.
Have have to use $facet aggregation along with the operator $ifNull. e.g:
db.People.aggregate([
{ "$facet": {
"array": [
{ "$match": { Name:"John" }},
{ "$group": {
"_id": null,
"count": { "$sum": 1 }
}},
{ "$project": { "_id": 0, "count": 1 }}
]
}},
{ "$project": {
"count": {
"$ifNull": [{ "$arrayElemAt": ["$array.count", 0] }, 0 ]
}
}}
])
Output:
{ "count" : 1 }
For other name, it should be as follow:
{ "count" : 0 }
Similar ans at $addFields when no $match found
Simply use count
db. People.count({Name:"John"})
This will return the exact number.
Otherwise You need to check the result wether it is a empty array. Below are the code for node using loopback,
db.People.aggregate([
{$match: {Name: "John"}},
{$group: {_id: "null", count: {$sum: 1}}}
],(err,res)=>{
if(err) return cb(err)
if(res.length) return cb(err,res)
else return cb(err,{_id:null,count:0})
})
You can use $ifNull in your $match stage.
If you can provide an collecion of examples it's more easy to elaborare an answer on it.
Edit: if you group by Name, result for "John" is one, for "Clarice" is an empty array that is correct, here the aggregation query:
db.People.aggregate([
{
$match: { Name: "John" }
},
{
$group: { _id: "$Name", count: { $sum: 1 } }
}
])

Find duplicate records in MongoDB

How would I find duplicate fields in a mongo collection.
I'd like to check if any of the "name" fields are duplicates.
{
"name" : "ksqn291",
"__v" : 0,
"_id" : ObjectId("540f346c3e7fc1054ffa7086"),
"channel" : "Sales"
}
Many thanks!
Use aggregation on name and get name with count > 1:
db.collection.aggregate([
{"$group" : { "_id": "$name", "count": { "$sum": 1 } } },
{"$match": {"_id" :{ "$ne" : null } , "count" : {"$gt": 1} } },
{"$project": {"name" : "$_id", "_id" : 0} }
]);
To sort the results by most to least duplicates:
db.collection.aggregate([
{"$group" : { "_id": "$name", "count": { "$sum": 1 } } },
{"$match": {"_id" :{ "$ne" : null } , "count" : {"$gt": 1} } },
{"$sort": {"count" : -1} },
{"$project": {"name" : "$_id", "_id" : 0} }
]);
To use with another column name than "name", change "$name" to "$column_name"
You can find the list of duplicate names using the following aggregate pipeline:
Group all the records having similar name.
Match those groups having records greater than 1.
Then group again to project all the duplicate names as an array.
The Code:
db.collection.aggregate([
{$group:{"_id":"$name","name":{$first:"$name"},"count":{$sum:1}}},
{$match:{"count":{$gt:1}}},
{$project:{"name":1,"_id":0}},
{$group:{"_id":null,"duplicateNames":{$push:"$name"}}},
{$project:{"_id":0,"duplicateNames":1}}
])
o/p:
{ "duplicateNames" : [ "ksqn291", "ksqn29123213Test" ] }
The answer anhic gave can be very inefficient if you have a large database and the attribute name is present only in some of the documents.
To improve efficiency you can add a $match to the aggregation.
db.collection.aggregate(
{"$match": {"name" :{ "$ne" : null } } },
{"$group" : {"_id": "$name", "count": { "$sum": 1 } } },
{"$match": {"count" : {"$gt": 1} } },
{"$project": {"name" : "$_id", "_id" : 0} }
)
Another option is to use $sortByCount stage.
db.collection.aggregate([
{ $sortByCount: '$name' }
]
This is the combination of $group & $sort.
The $sortByCount stage is equivalent to the following $group + $sort sequence:
{ $group: { _id: <expression>, count: { $sum: 1 } } },
{ $sort: { count: -1 } }
db.getCollection('orders').aggregate([
{$group: {
_id: {name: "$name"},
uniqueIds: {$addToSet: "$_id"},
count: {$sum: 1}
}
},
{$match: {
count: {"$gt": 1}
}
}
])
First Group Query the group according to the fields.
Then we check the unique Id and count it, If count is greater then 1 then the field is duplicate in the entire collection so that thing is to be handle by $match query.
this is how we can achieve this in mongoDB compass
In case you need to see all duplicated rows:
db.collection.aggregate([
{"$group" : { "_id": "$name", "count": { "$sum": 1 },"data": { "$push": "$$ROOT" }}},
{"$unwind": "$data"},
{"$match": {"_id" :{ "$ne" : null } , "count" : {"$gt": 1} } },
]);
If somebody is looking for a query for duplicates with an extra "$and" where clause, like "and where someOtherField is true"
The trick is to start with that other $match, because after the grouping you don't have all the data available anymore
// Do a first match before the grouping
{ $match: { "someOtherField": true }},
{ $group: {
_id: { name: "$name" },
count: { $sum: 1 }
}},
{ $match: { count: { $gte: 2 } }},
I searched for a very long time to find this notation, hope I can help somebody with the same problem
Search for duplicates in Compass Mongo db using $sortByCount
[screenshot]: https://i.stack.imgur.com/L85QV.png
Sometimes you want to find duplicates regardless the case, when you want to create a case insensitive index for instance. In this case you can use this aggregation pipeline
db.collection.aggregate([
{'$group': {'_id': {'$toLower': '$name'}, 'count': { '$sum': 1 }, 'duplicates': { '$push': '$$ROOT' } } },
{'$match': { 'count': { '$gt': 1 } }
]);
Explanation:
group by name but first change the case to lower case and push the docs to the duplicates array.
match those groups having records greater than 1 (the duplicates).

Mongodb Aggregation count array/set size

Here's my problem:
Model:
{ application: "abc", date: Time.now, status: "1" user_id: [ id1, id2,
id4] }
{ application: "abc", date: Time.yesterday, status: "1", user_id: [
id1, id3, id5] }
{ application: "abc", date: Time.yesterday-1, status: "1", user_id: [
id1, id3, id5] }
I need to count the unique number of user_ids in a period of time.
Expected result:
{ application: "abc", status: "1", unique_id_count: 5 }
I'm currently using the aggregation framework and counting the ids outside mongodb.
{ $match: { application: "abc" } }, { $unwind: "$users" }, { $group:
{ _id: { status: "$status"},
users: { $addToSet: "$users" } } }
My arrays of users ids are very large, so I have to iterate the dates or I'll get the maximum document limit (16mb).
I could also $group by
{ year: { $year: "$date" }, month: { $month: "$date" }, day: {
$dayOfMonth: "$date" }
but I also get the document size limitation.
Is it possible to count the set size in mongodb?
thanks
The following will return number of uniqueUsers per application. This will apply an group operation to a result of a group operation by using pipeline feature of mongodb.
{ $match: { application: "abc" } },
{ $unwind: "$users" },
{ $group: { _id: "$status", users: { $addToSet: "$users" } } },
{ $unwind:"$users" },
{ $group : {_id : "$_id", count : {$sum : 1} } }
Hopefully this will be done in an easier way in the following releases of mongo by a command which gives the size of an array under a projection. {$project: {id: "$_id", count: {$size: "$uniqueUsers"}}}
https://jira.mongodb.org/browse/SERVER-4899
Cheers
Sorry I'm a little late to the party. Simply grouping on the 'user_id' and counting the result with a trivial group works just fine and doesn't run into doc size limits.
[
{$match: {application: 'abc', date: {$gte: startDate, $lte: endDate}}},
{$unwind: '$user_id'},
{$group: {_id: '$user_id'}},
{$group: {_id: 'singleton', count: {$sum: 1}}}
];
Use $size to get the size of set.
[
{
$match: {"application": "abc"}
},
{
$unwind: "$user_id"
},
{
$group: {
"_id": "$status",
"application": "$application",
"unique_user_id": {$addToSet: "$user_id"}
}
},
{
$project:{
"_id": "$_id",
"application": "$application",
"count": {$size: "$unique_user_id"}
}
}
]