Merging multiple aggregation queries to one with MongoDB - mongodb

I'm using these three queries to can have a python dataframe format with the columns : 'Date', '% part of business 2', '% part of business 3'. (for each day to have the percentage of gain from business 2 and 3).
query_business2 = collection.aggregate( [
{
'$match': {'Business': 2}
},
{
'$group': {
'_id': '$Date',
'stab2': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_business3 = collection.aggregate([
{
'$match': {'Business':3}
},
{
'$group': {
'_id': '$Date',
'stab3': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_total = collection.aggregate([
{
'$group': {
'_id': '$Date',
'total': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
For this to be faster, I would like to merge these three queries into one. I tried using '$or' but didn't work for unashable dict.
Is there a better way to do that ? It might be possible to directly make the dataframe format without using pandas after this queries and to calculate directly the percentage of each business compared to the total money earned. Thank you for your help

Thanks to prasad_ the answer is :
query_business = collection.aggregate([
{
'$group':{
'_id': '$Date',
'total_2': {'$sum' : {'$cond': [{'$eq': ['$Business', 2]}, '$Money', 0]}},
'total_3': {'$sum' : {'$cond': [{'$eq': ['$Business', 3]}, '$Money', 0]}},
'total': {'$sum': '$Money'},
}
},
{
'$match': {'$and': [{ 'total_2': {'$gt': 0}}, {'total': {'$gt': 0}},{'total_3':{'$gt':0}}]}
},
{
'$addFields':{
'part_2': { "$multiply": [ { "$divide": ["$total_2","$total"] }, 100 ] },
'part_3': { "$multiply": [{'$divide': ['$total_3','$total']}, 100]}
}
}
])

Related

Filter nested objects

I have a collection of docs like
{'id':1, 'score': 1, created_at: ISODate(...)}
{'id':1, 'score': 2, created_at: ISODate(...)}
{'id':2, 'score': 1, created_at: ISODate(...)}
{'id':2, 'score': 20, created_at: ISODate(...)}
etc.
Does anyone know how to find docs that were created within the past 24hrs where the difference of the score value between the two most recent docs of the same id is less than 5?
So far I can only find all docs created within the past 24hrs:
[{
$project: {
_id: 0,
score: 1,
created_at: 1
}
}, {
$match: {
$expr: {
$gte: [
'$created_at',
{
$subtract: [
'$$NOW',
86400000
]
}
]
}
}
}]
Any advice appreciated.
Edit: By the two most recent docs, the oldest of the two can be created more than 24hrs ago. So the most recent doc would be created within the past 24hrs, but the oldest doc could be created over 24hrs ago.
If I understand you correctly, you want something like:
db.collection.aggregate([
{$match: {$expr: {$gte: ["$created_at", {$subtract: ["$$NOW", 86400000]}]}}},
{$sort: {created_at: -1}},
{$group: {_id: "$id", data: {$push: "$$ROOT"}}},
{$project: {pair: {$slice: ["$data", 0, 2]}, scores: {$slice: ["$data.score", 0, 2]}}},
{$match: {$expr: {
$lte: [{$abs: {$subtract: [{$first: "$scores"}, {$last: "$scores"}]}}, 5]
}}},
{$unset: "scores"}
])
See how it works on the playground example
EDIT:
according to you comment, one option is:
db.collection.aggregate([
{$setWindowFields: {
partitionBy: "$id",
sortBy: {created_at: -1},
output: {data: {$push: "$$ROOT", window: {documents: ["current", 1]}}}
}},
{$group: {
_id: "$id",
created_at: {$first: "$created_at"},
pair: {$first: "$data"}
}},
{$match: {$expr: {$and: [
{$gte: ["$created_at", {$dateAdd: {startDate: "$$NOW", unit: "day", amount: -1}},
{$eq: [{$size: "$pair"}, 2]},
{$lte: [{$abs: {$subtract: [{$first: "$pair.score"},
{$last: "$pair.score"}]}}, 5]}
]}}},
{$project: {_id: 0, pair: 1}}
])
See how it works on the playground example
If I've understood correctly you can try this query:
First the $match as you have to get documents since a day ago.
Then $sort by the date to ensure the most recent are on top.
$group by the id, and how the most recent were on top, using $push will be the two first elements in the array.
So now you only need to $sum these two values.
And filter again with these one that are less than ($lt) 5.
db.collection.aggregate([
{
$match: {
$expr: {
$gte: [
"$created_at",
{
$subtract: [
"$$NOW",
86400000
]
}
]
}
}
},
{
"$sort": {
"created_at": -1
}
},
{
"$group": {
"_id": "$id",
"score": {
"$push": "$score"
}
}
},
{
"$project": {
"score": {
"$sum": {
"$firstN": {
"n": 2,
"input": "$score"
}
}
}
}
},
{
"$match": {
"score": {
"$lt": 5
}
}
}
])
Example here
Edit: $firstN is new in version 5.2. Other way you can use $slice in this way.

Count the documents and sum of values of fields in all documents of a mongodb

I have a set of documents modified from mongodb using
[{"$project":{"pred":1, "base-url":1}},
{"$group":{
"_id":"$base-url",
"invalid":{"$sum": { "$cond": [{ "$eq": ["$pred", "invalid"] }, 1, 0] }},
"pending":{"$sum": { "$cond": [{ "$eq": ["$pred", "null"] }, 1, 0] }},
}},
]
to get the below documents
[{'_id': 'https://www.example1.org/', 'invalid': 3, 'pending': 6},
{'_id': 'https://example2.com/', 'invalid': 10, 'pending': 4},
{'_id': 'https://www.example3.org/', 'invalid': 2, 'pending': 6}]
How to get the count of documents and sum of other fields to obtain the following result
{"count":3, "invalid":15,"pending":16}
you just need a $group stage with $sum
playground
The $sum docs and here has good examples
db.collection.aggregate([
{
$group: {
_id: null,
pending: {
$sum: "$pending"
},
invalid: {
$sum: "$invalid"
},
count: {
$sum: 1 //counting each record
}
}
},
{
$project: {
_id: 0 //removing _id field from the final output
}
}
])

Mongodb group geolocation if within X meters

So I am trying to group X points if they overlap by X meters say 5 meters.
I am wondering can MongoDB do this.
My current query is not smart at all
[{$project: {
'location': 1,
'day': {
'$dayOfMonth': '$updatedAt'
},
'month': {
'$month': '$updatedAt'
},
'year': {
'$year': '$updatedAt'
}
}}, {$match: {
'year': 2022
}}, {$group: {
'_id': '$location.coordinates',
'count': {
'$sum': 1
}
}}, {$project: {
'_id': 0,
'count': 1,
'location.coordinates': {
'$map': {
'input': '$_id',
'in': {
'$toString': '$$this'
}
}
}
}}, {$sort: {
count: -1
}}]

count on aggregate in mongodb

this my query for aggregate in pymongo:
db.connection_log.aggregate([
{ '$match': {
'login_time': {'$gte': datetime.datetime(2014, 5, 30, 6, 57)}
}},
{ '$group': {
'_id': {
'username': '$username',
'ras_id': '$ras_id',
'user_id': '$user_id'
},
'total': { '$sum': '$type_details.in_bytes'},
'total1': {'$sum': '$type_details.out_bytes'}
}},
{ '$sort': {'total': 1, 'total1': 1}}
])
How to count all result in aggregate?
Add to the end of your aggregation pipeline:
$group: {
_id:null,
count:{
$sum:1
}
}
SQL to Aggregation Mapping Chart
Well if you really want your results with a total count combined then you can always just push the results into their own array:
result = db.connection_log.aggregate([
{ '$match': {
'login_time': {'$gte': datetime.datetime(2014, 5, 30, 6, 57)}
}},
{ '$group': {
'_id': {
'username': '$username',
'ras_id': '$ras_id',
'user_id': '$user_id'
},
'total': { '$sum': '$type_details.in_bytes'},
'total1': {'$sum': '$type_details.out_bytes'}
}},
{ '$sort': {'total': 1, 'total1': 1}},
{ '$group' {
'_id': null,
'results': {
'$push': {
'_id': '$_id',
'total': '$total',
'total1': '$total1'
}
},
'count': { '$sum': 1 }
}}
])
And if you are using MongoDB 2.6 or greater you can just '$push': '$$ROOT' instead of actually specifying all of the document fields there.
But really, unless you are using MongoDB 2.6 and are explicitly asking for a cursor as a result, then that result is actually returned as an array already without adding an inner array for results with a count. So just get the length of the array, which in python is:
len(result)
If you are indeed using a cursor for a large result-set or otherwise using $limit and $skip to "page" results then you will need to do two queries with one just summarizing the "total count", but otherwise you just don't need to do this.

Conditional $sum in MongoDB

My collection in mongodb is similar to the following table in SQL:
Sentiments(Company,Sentiment)
Now, I need to execute a query like this:
SELECT
Company,
SUM(CASE WHEN Sentiment >0 THEN Sentiment ELSE 0 END) AS SumPosSenti,
SUM(CASE WHEN Sentiment <0 THEN Sentiment ELSE 0 END) AS SumNegSenti
FROM Sentiments
GROUP BY Company
What should I do to write this query in Mongo? I am stuck at the following query:
db.Sentiments.aggregate(
{ $project: {_id:0, Company:1, Sentiment: 1} },
{ $group: {_id: "$Company", SumPosSenti: {$sum: ? }, SumNegSenti: {$sum: ? } } }
);
As Sammaye suggested, you need to use the $cond aggregation projection operator to do this:
db.Sentiments.aggregate(
{ $project: {
_id: 0,
Company: 1,
PosSentiment: {$cond: [{$gt: ['$Sentiment', 0]}, '$Sentiment', 0]},
NegSentiment: {$cond: [{$lt: ['$Sentiment', 0]}, '$Sentiment', 0]}
}},
{ $group: {
_id: "$Company",
SumPosSentiment: {$sum: '$PosSentiment'},
SumNegSentiment: {$sum: '$NegSentiment'}
}});
Starting from version 3.4, we can use the $switch operator which allows logical condition processing in the $group stage. Of course we still need to use the $sum accumulator to return the sum.
db.Sentiments.aggregate(
[
{ "$group": {
"_id": "$Company",
"SumPosSenti": {
"$sum": {
"$switch": {
"branches": [
{
"case": { "$gt": [ "$Sentiment", 0 ] },
"then": "$Sentiment"
}
],
"default": 0
}
}
},
"SumNegSenti": {
"$sum": {
"$switch": {
"branches": [
{
"case": { "$lt": [ "$Sentiment", 0 ] },
"then": "$Sentiment"
}
],
"default": 0
}
}
}
}}
]
)
If you have not yet migrated your mongod to 3.4 or newer, then note that the $project stage in this answer is redundant because the $cond operator returns a numeric value which means that you can $group your documents and apply $sum to the $cond expression.
This will improve the performance in your application especially for large collection.
db.Sentiments.aggregate(
[
{ '$group': {
'_id': '$Company',
'PosSentiment': {
'$sum': {
'$cond': [
{ '$gt': ['$Sentiment', 0]},
'$Sentiment',
0
]
}
},
'NegSentiment': {
'$sum': {
'$cond': [
{ '$lt': ['$Sentiment', 0]},
'$Sentiment',
0
]
}
}
}}
]
)
Consider a collection Sentiments with the following documents:
{ "Company": "a", "Sentiment" : 2 }
{ "Company": "a", "Sentiment" : 3 }
{ "Company": "a", "Sentiment" : -1 }
{ "Company": "a", "Sentiment" : -5 }
The aggregation query produces:
{ "_id" : "a", "SumPosSenti" : 5, "SumNegSenti" : -6 }
Explaining the snippets above, that uses the array syntax:
PosSentiment: {$cond: [{$gt: ['$Sentiment', 0]}, '$Sentiment', 0]}
is equal to:
PosSentiment: {$cond: { if: {$gt: ['$Sentiment', 0]}, then: '$Sentiment', else: 0} }
The array syntax summarizes the long syntax to just { $cond: [if, then, else] }