How to properly use $group operator in MongoDB? - mongodb

I am currently struggling with the MongoDB query in which I want to group data by 2 fields myId and myType, but the results that I get in return don't look like what I need.
My goal is to have for each myId results with myType grouping. Like:
myId : {myType1 : 5, myType2 : 3, myType3 : 1}
But when I am trying to provide query with group operator like below:
db.collection.aggregate([{
"$project": {
"myId": "$myId",
"myType": "$eventType",
}
},
{
"$group":{
"_id":{
"myId":"$myId",
"myType":"$Type"
},
"count":{
"$sum":1
}
}
}
])
Results returned by this kind of grouping looks like this
[{'_id': {'myId': 'qwerty123', 'myType': 'created', 'count': 1}},
{'_id': {'myId': qwerty123', 'myType': 'removed', 'count': 3}},
{'_id': {'myId': qwerty123', 'myType': 'updated', 'count': 2}},
{'_id': {'myId': 'asd123', 'myType': 'created', 'count': 1}},
{'_id': {'myId': asd123', 'myType': 'removed', 'count': 2}}]
But what I would like to achieve is a structure like below:
[{'_id': {'myId': 'qwerty123', 'myType': {'created' 1, 'removed' : 3, 'updated' : 2}}},
{'_id': {'myId': 'asd123', 'myType': {'created' 1, 'removed' : 2}}}]
Or maybe like this:
[{'qwerty123', 'myType': {'created' 1, 'removed' : 3, 'updated' : 2}},
{'asd123', 'myType': {'created' 1, 'removed' : 2}}]
Is it possible to achieve results from $group operator with the above schema? If yes, how can I achieve it?
Thank you.

Use below stage after your above one
db.collection.aggregate([
{ $group: {
_id: "$_id.myId",
myType: {
$push: {
$arrayToObject: [
[
{
k: "$_id.myType",
v: "$_id.count"
}
]
]
}
}
}}
])
MongoPlayground

Related

MongoDB - How to get specified values of an array in a document

Here is the document
{
'_id': ObjectId('61a4262a53ddaa8b93374613'),
'userid': 'renyi',
'data1': [{'a': 1}, 1, 2, 3],
'data2': [{'c': 1}, {'b': 2}, {'c': 3}, {'c': 2}],
'data': {'0': {'a': 1}}
}
With this
coll.find_one({'userid':'renyi','data2.c':1},{'_id':0,"data2.$":1})
I can get
{'data2': [{'c': 1}]}
But how to get
{'data2': [{'c': 1},{'c': 2}]}
You can use $filter to filter the element within the array in projection.
db.collection.find({
"userid": "renyi",
"data2.c": {
$in: [
1,
2
]
}
},
{
"_id": 0,
"data2": {
$filter: {
input: "$data2",
cond: {
$in: [
"$$this.c",
[
1,
2
]
]
}
}
}
})
Sample Mongo Playground

MongoDB - Find array index of document in array field

I have aggregation that contains array field. This array field contains documents (objects). For these I have some matching criteria and I would like create a new fields named lowestCheapIndex and highestExpensiveIndex, each with array index of matching element.
Matching criteria:
lowestCheapIndex - Should contain lowest array index number of any record item, that has price below 20.
highestExpensiveIndex - Should contain highest array index number of any record item, that has price over 30.
My current aggregation output:
{
'_id': 'Egg shop',
'records': [
{'_id': 1, 'price': 22},
{'_id': 2, 'price': 18},
{'_id': 3, 'price': 34},
{'_id': 4, 'price': 31},
{'_id': 5, 'price': 13},
]
}
Desired output:
{
'_id': 'Egg shop',
'records': [
{'_id': 1, 'price': 22},
{'_id': 2, 'price': 18},
{'_id': 3, 'price': 34},
{'_id': 4, 'price': 31},
{'_id': 5, 'price': 13},
],
'lowestCheapIndex': 1,
'highestExpensiveIndex': 3,
}
Question:
How can i retrieve array index based on my criteria? I found $indexOfArray in docs, but still I am having hard time how it would be used in my case.
You can do following in an aggregation pipeline:
use $map to augment your records array with booleans indicating below 20 and over 30
use $indexOfArray to search for the booleans; For highestExpensiveIndex, reverse the array first to get the index then subtract it from size of array - 1 to get the expected index.
db.collection.aggregate([
{
"$addFields": {
"records": {
"$map": {
"input": "$records",
"as": "r",
"in": {
"_id": "$$r._id",
"price": "$$r.price",
"below20": {
$lt: [
"$$r.price",
20
]
},
"over30": {
$gt: [
"$$r.price",
30
]
}
}
}
}
}
},
{
"$addFields": {
"lowestCheapIndex": {
"$indexOfArray": [
"$records.below20",
true
]
},
"highestExpensiveIndex": {
"$subtract": [
{
"$subtract": [
{
$size: "$records"
},
{
"$indexOfArray": [
{
"$reverseArray": "$records.over30"
},
true
]
}
]
},
1
]
}
}
}
])
Mongo playground

How write get sum of array with mapReduce MongoDB?

Given following database schema:
{
'_id': 5079,
'name': 'Lincoln County',
'state': 'AR',
'population': 13024,
'cases': [{'date': '2020-03-16', 'count': 1}, {'date': '2020-03-22', 'count': 1},
{'date': '2020-03-24', 'count': 1}, {'date': '2020-03-26', 'count': 2}],
'deaths': [{'date': '2020-03-27', 'count': 1}, {'date': '2020-04-02', 'count': 1},
{'date': '2020-05-28', 'count': 2}, {'date': '2020-05-30', 'count': 1}]
}
What MongoDB mapReduce function would generate a collection of the total number of covid19 case counts for each states. Generate one record for each state with its 2-letter abbreviation and its total covid cases?
Try this query:
db.collection.aggregate([
{
"$project": {
"total": {
"$sum": {
"$map": {
"input": "$cases",
"as": "c",
"in": "$$c.count"
}
}
},
"state": 1
}
}
])
Example here
The query uses $map to create an array with values from cases.count and then $sum these values.
Also, the fields ouput are count which contains the $sum and the state using state: 1.

Merging multiple aggregation queries to one with MongoDB

I'm using these three queries to can have a python dataframe format with the columns : 'Date', '% part of business 2', '% part of business 3'. (for each day to have the percentage of gain from business 2 and 3).
query_business2 = collection.aggregate( [
{
'$match': {'Business': 2}
},
{
'$group': {
'_id': '$Date',
'stab2': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_business3 = collection.aggregate([
{
'$match': {'Business':3}
},
{
'$group': {
'_id': '$Date',
'stab3': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_total = collection.aggregate([
{
'$group': {
'_id': '$Date',
'total': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
For this to be faster, I would like to merge these three queries into one. I tried using '$or' but didn't work for unashable dict.
Is there a better way to do that ? It might be possible to directly make the dataframe format without using pandas after this queries and to calculate directly the percentage of each business compared to the total money earned. Thank you for your help
Thanks to prasad_ the answer is :
query_business = collection.aggregate([
{
'$group':{
'_id': '$Date',
'total_2': {'$sum' : {'$cond': [{'$eq': ['$Business', 2]}, '$Money', 0]}},
'total_3': {'$sum' : {'$cond': [{'$eq': ['$Business', 3]}, '$Money', 0]}},
'total': {'$sum': '$Money'},
}
},
{
'$match': {'$and': [{ 'total_2': {'$gt': 0}}, {'total': {'$gt': 0}},{'total_3':{'$gt':0}}]}
},
{
'$addFields':{
'part_2': { "$multiply": [ { "$divide": ["$total_2","$total"] }, 100 ] },
'part_3': { "$multiply": [{'$divide': ['$total_3','$total']}, 100]}
}
}
])

Group by several fields and custom sums with two conditions

I want to group rows with two conditions. The first one to get total (now it works), the second to get unread messages. I cannot imagine how to do it. Inserts are:
db.messages.insert({'source_user': 'test1', 'destination_user': 'test2', 'is_read': true})
db.messages.insert({'source_user': 'test1', 'destination_user': 'test2', 'is_read': false})
db.messages.insert({'source_user': 'test1', 'destination_user': 'test3', 'is_read': true})
my code:
db.messages.aggregate([
{'$match': {'source_user': user}},
{'$group': {
'_id': {
'source_user': '$source_user',
'destination_user': '$destination_user',
'is_read': '$is_read'
},
'total': {'$sum': 1}}
},
{'$project': {
'source_user': '$_id.source_user',
'destination_user': '$_id.destination_user',
#'unread': {'$sum': {'$_id.is_read': False}},
'total': '$total',
'_id': 0
}}
])
as a result I want to get:
[{
'source_user': 'test1',
'destination_user': 'test2',
'unread': 1,
'total': 2
}, {
'source_user': 'test1',
'destination_user': 'test3',
'unread': 0,
'total': 1
}
]
Should I add a new group or I can use $is_read flag in the same group?
Thank you!
You can count unread messages the same way you do it for total but you need to apply $cond to add 0 only for those that are read and 1 for other ones:
db.messages.aggregate([
{'$match': {'source_user': user}},
{'$group': {
'_id': {
'source_user': '$source_user',
'destination_user': '$destination_user'
},
'total': {'$sum': 1},
'unread': {'$sum': { '$cond': [ '$is_read', 0, 1 ] }}
}
},
{'$project': {
'source_user': '$_id.source_user',
'destination_user': '$_id.destination_user',
'total': 1,
'unread': 1,
'_id': 0
}}
])
MongoDB Playground