Group by several fields and custom sums with two conditions - mongodb

I want to group rows with two conditions. The first one to get total (now it works), the second to get unread messages. I cannot imagine how to do it. Inserts are:
db.messages.insert({'source_user': 'test1', 'destination_user': 'test2', 'is_read': true})
db.messages.insert({'source_user': 'test1', 'destination_user': 'test2', 'is_read': false})
db.messages.insert({'source_user': 'test1', 'destination_user': 'test3', 'is_read': true})
my code:
db.messages.aggregate([
{'$match': {'source_user': user}},
{'$group': {
'_id': {
'source_user': '$source_user',
'destination_user': '$destination_user',
'is_read': '$is_read'
},
'total': {'$sum': 1}}
},
{'$project': {
'source_user': '$_id.source_user',
'destination_user': '$_id.destination_user',
#'unread': {'$sum': {'$_id.is_read': False}},
'total': '$total',
'_id': 0
}}
])
as a result I want to get:
[{
'source_user': 'test1',
'destination_user': 'test2',
'unread': 1,
'total': 2
}, {
'source_user': 'test1',
'destination_user': 'test3',
'unread': 0,
'total': 1
}
]
Should I add a new group or I can use $is_read flag in the same group?
Thank you!

You can count unread messages the same way you do it for total but you need to apply $cond to add 0 only for those that are read and 1 for other ones:
db.messages.aggregate([
{'$match': {'source_user': user}},
{'$group': {
'_id': {
'source_user': '$source_user',
'destination_user': '$destination_user'
},
'total': {'$sum': 1},
'unread': {'$sum': { '$cond': [ '$is_read', 0, 1 ] }}
}
},
{'$project': {
'source_user': '$_id.source_user',
'destination_user': '$_id.destination_user',
'total': 1,
'unread': 1,
'_id': 0
}}
])
MongoDB Playground

Related

Group all elements with same name with their IDs Mongodb

I want to group all elements with same name and find their IDs and $push them in a list.
I have a dataset like
{
'id': 1,
'name': 'Refrigerator'
},
{
'id': 2,
'name': 'Refrigerator'
},
{
'id': 3,
'name': 'TV'
},
{
'id': 4,
'name': 'TV'
}
Expected Ouput
{
'equipment_name': 'Refrigerator',
'equipment_id': [1, 2]
},
{
'equipment_name': 'TV',
'equipment_id': [3, 4]
}
What I've tried
{'$group': {'_id': '$_id', 'equipmne_name': '$name'}}
{'$project': {'name': {'$push': {'$expr': ['$name', '$name']}}}
And a few more aggregation techniques with $cond
[
{'$group': {'_id': {'key': '$name', 'value': '$_id'}}},
{'$group': {'_id': '$_id.key', 'result': {'$push': {'$toString': '$$ROOT._id.value'}}}},
{'$project': {'_id': 0, 'equipment_name': '$_id', 'equipment_id': '$result'}}
]

How to properly use $group operator in MongoDB?

I am currently struggling with the MongoDB query in which I want to group data by 2 fields myId and myType, but the results that I get in return don't look like what I need.
My goal is to have for each myId results with myType grouping. Like:
myId : {myType1 : 5, myType2 : 3, myType3 : 1}
But when I am trying to provide query with group operator like below:
db.collection.aggregate([{
"$project": {
"myId": "$myId",
"myType": "$eventType",
}
},
{
"$group":{
"_id":{
"myId":"$myId",
"myType":"$Type"
},
"count":{
"$sum":1
}
}
}
])
Results returned by this kind of grouping looks like this
[{'_id': {'myId': 'qwerty123', 'myType': 'created', 'count': 1}},
{'_id': {'myId': qwerty123', 'myType': 'removed', 'count': 3}},
{'_id': {'myId': qwerty123', 'myType': 'updated', 'count': 2}},
{'_id': {'myId': 'asd123', 'myType': 'created', 'count': 1}},
{'_id': {'myId': asd123', 'myType': 'removed', 'count': 2}}]
But what I would like to achieve is a structure like below:
[{'_id': {'myId': 'qwerty123', 'myType': {'created' 1, 'removed' : 3, 'updated' : 2}}},
{'_id': {'myId': 'asd123', 'myType': {'created' 1, 'removed' : 2}}}]
Or maybe like this:
[{'qwerty123', 'myType': {'created' 1, 'removed' : 3, 'updated' : 2}},
{'asd123', 'myType': {'created' 1, 'removed' : 2}}]
Is it possible to achieve results from $group operator with the above schema? If yes, how can I achieve it?
Thank you.
Use below stage after your above one
db.collection.aggregate([
{ $group: {
_id: "$_id.myId",
myType: {
$push: {
$arrayToObject: [
[
{
k: "$_id.myType",
v: "$_id.count"
}
]
]
}
}
}}
])
MongoPlayground

How write get sum of array with mapReduce MongoDB?

Given following database schema:
{
'_id': 5079,
'name': 'Lincoln County',
'state': 'AR',
'population': 13024,
'cases': [{'date': '2020-03-16', 'count': 1}, {'date': '2020-03-22', 'count': 1},
{'date': '2020-03-24', 'count': 1}, {'date': '2020-03-26', 'count': 2}],
'deaths': [{'date': '2020-03-27', 'count': 1}, {'date': '2020-04-02', 'count': 1},
{'date': '2020-05-28', 'count': 2}, {'date': '2020-05-30', 'count': 1}]
}
What MongoDB mapReduce function would generate a collection of the total number of covid19 case counts for each states. Generate one record for each state with its 2-letter abbreviation and its total covid cases?
Try this query:
db.collection.aggregate([
{
"$project": {
"total": {
"$sum": {
"$map": {
"input": "$cases",
"as": "c",
"in": "$$c.count"
}
}
},
"state": 1
}
}
])
Example here
The query uses $map to create an array with values from cases.count and then $sum these values.
Also, the fields ouput are count which contains the $sum and the state using state: 1.

Merging multiple aggregation queries to one with MongoDB

I'm using these three queries to can have a python dataframe format with the columns : 'Date', '% part of business 2', '% part of business 3'. (for each day to have the percentage of gain from business 2 and 3).
query_business2 = collection.aggregate( [
{
'$match': {'Business': 2}
},
{
'$group': {
'_id': '$Date',
'stab2': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_business3 = collection.aggregate([
{
'$match': {'Business':3}
},
{
'$group': {
'_id': '$Date',
'stab3': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_total = collection.aggregate([
{
'$group': {
'_id': '$Date',
'total': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
For this to be faster, I would like to merge these three queries into one. I tried using '$or' but didn't work for unashable dict.
Is there a better way to do that ? It might be possible to directly make the dataframe format without using pandas after this queries and to calculate directly the percentage of each business compared to the total money earned. Thank you for your help
Thanks to prasad_ the answer is :
query_business = collection.aggregate([
{
'$group':{
'_id': '$Date',
'total_2': {'$sum' : {'$cond': [{'$eq': ['$Business', 2]}, '$Money', 0]}},
'total_3': {'$sum' : {'$cond': [{'$eq': ['$Business', 3]}, '$Money', 0]}},
'total': {'$sum': '$Money'},
}
},
{
'$match': {'$and': [{ 'total_2': {'$gt': 0}}, {'total': {'$gt': 0}},{'total_3':{'$gt':0}}]}
},
{
'$addFields':{
'part_2': { "$multiply": [ { "$divide": ["$total_2","$total"] }, 100 ] },
'part_3': { "$multiply": [{'$divide': ['$total_3','$total']}, 100]}
}
}
])

count on aggregate in mongodb

this my query for aggregate in pymongo:
db.connection_log.aggregate([
{ '$match': {
'login_time': {'$gte': datetime.datetime(2014, 5, 30, 6, 57)}
}},
{ '$group': {
'_id': {
'username': '$username',
'ras_id': '$ras_id',
'user_id': '$user_id'
},
'total': { '$sum': '$type_details.in_bytes'},
'total1': {'$sum': '$type_details.out_bytes'}
}},
{ '$sort': {'total': 1, 'total1': 1}}
])
How to count all result in aggregate?
Add to the end of your aggregation pipeline:
$group: {
_id:null,
count:{
$sum:1
}
}
SQL to Aggregation Mapping Chart
Well if you really want your results with a total count combined then you can always just push the results into their own array:
result = db.connection_log.aggregate([
{ '$match': {
'login_time': {'$gte': datetime.datetime(2014, 5, 30, 6, 57)}
}},
{ '$group': {
'_id': {
'username': '$username',
'ras_id': '$ras_id',
'user_id': '$user_id'
},
'total': { '$sum': '$type_details.in_bytes'},
'total1': {'$sum': '$type_details.out_bytes'}
}},
{ '$sort': {'total': 1, 'total1': 1}},
{ '$group' {
'_id': null,
'results': {
'$push': {
'_id': '$_id',
'total': '$total',
'total1': '$total1'
}
},
'count': { '$sum': 1 }
}}
])
And if you are using MongoDB 2.6 or greater you can just '$push': '$$ROOT' instead of actually specifying all of the document fields there.
But really, unless you are using MongoDB 2.6 and are explicitly asking for a cursor as a result, then that result is actually returned as an array already without adding an inner array for results with a count. So just get the length of the array, which in python is:
len(result)
If you are indeed using a cursor for a large result-set or otherwise using $limit and $skip to "page" results then you will need to do two queries with one just summarizing the "total count", but otherwise you just don't need to do this.