Grouping different amounts together in MongoDB - mongodb

If I have a set of objects each with the same description, but with different amounts.
{
{
"_id": "101",
"description": "DD from my employer1",
"amount": 1000.33
},
{
"_id": "102",
"description": "DD from my employer1",
"amount": 1000.34
},
{
"_id": "103",
"description": "DD from my employer1",
"amount": 1000.35
},
{
"_id": "104",
"description": "DD from employer1",
"amount": 5000.00
},
{
"_id": "105",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "106",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "107",
"description": "DD from my employer2",
"amount": 2000.33
}
}
Below, I am able to group them using the description:
{
{
"$group": {
"_id": {
"description": "$description"
},
"count": {
"$sum": 1
},
"_id": {
"$addToSet": "$_id"
}
}
},
{
"$match": {
"count": {
"$gte": 3
}
}
}
}
Is there a way to include all the amounts in the group (_ids: 101, 102, and 103 plus 105,106,107) even if they have a small difference, but exclude the bonus amount, which in the sample above is _id 104?
I don't believe it could be done in a group stage, but is there something that could be done at a later stage that could group _ids 101, 102 and 103 together and exclude _id 104. Basically, I want MongoDB to ignore the small differences in 101, 102, 103 and group them together since the are paychecks coming from the same employer.
I have been working with $stdDevPop, but can't get a solid formula down.
I am looking for a simple array output of just the _ids.
{
"result": [
"101",
"102",
"103",
"105",
"106",
"107"
]
}

You can do this by doing some math on the "amount" to round it down to the nearest 1000 and use that as the grouping _id:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$trunc": "$amount" },
{ "$mod": [
{ "$trunc": "$amount" },
1000
]}
]
},
"results": { "$push": "$_id" }
}},
{ "$redact": {
"$cond": {
"if": { "$gt": [ { "$size": "$results" }, 1 ] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
If your MongoDB is older than 3.2 then you would just need to use a long form with $mod of what $trunc is doing. And if your MongoDB is older than 2.6 then rather than $redact you would $match. So in the longer form this is:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
{ "$mod": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
1000
]}
]
},
"results": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
Either way the output is just the _id values whose amounts grouped to the boundaries with a count more than once.
{ "_id" : null, "results" : [ "105", "106", "107", "101", "102", "103" ] }
You could either add a $sort in there or live with sorting the result array in client code.

db.yourDBNameHere.aggregate( [
{ $match: { "amount" : { $lt : 5000 } } },
{ $project: { _id: 1 } },
])
that will grab the ID only of every transaction less than 5000$.

Related

Mongodb plus and minus in one query (subtract two query results)

I have the following collection:
[
{
"type": "debit",
"amount": 10
},
{
"type": "debit",
"amount": 20
},
{
"type": "credit",
"amount": 5
},
]
I need to sum all documents with type debit and minus sum of credits.
how can I handle this with mongodb?
so my expected result is 25
db.collection.aggregate([
{
"$group": {
"_id": "$type",
"field": {
$sum: {
"$cond": {
"if": {
"$eq": [
"$type",
"debit"
]
},
"then": "$amount",
"else": {
"$multiply": [
"$amount",
-1
]
}
}
}
}
}
},
{
"$group": {
"_id": null,
"result": {
"$sum": "$field"
}
}
}
])
https://mongoplayground.net/p/cfTixd1Oo_z

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]

Group and count in Mongo DB

I have many tweets object like this:
{
"_id" : ObjectId("5a2f4a381cb29b482553e2c9"),
"user_id" : 21898942,
"created_at" : ISODate("2009-03-09T19:48:50Z"),
"id" : 1301923516,
"place" : "",
"retweet_count" : 0,
"tweet" : "Save the Date! March 28th Vietnamese Cooking Class! Call to Reserve 312.255.0088",
"favorite_count" : 0
"type": A
}
I'm using this code to qroup the tweets by date and by type:
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
}
]
results = mongo.db.tweets.aggregate(pipeline)
Here is the result I get:
{
"_id": {
"date": "2009-03-17",
"type": A
},
"count": 4
,
{
"_id": {
"date": "2009-03-17",
"type": B
},
"count": 6
}
But now I want to have the result in this format:
{date: "2009-03-17", A: 4, B: 6, C: 9}
Is there anyway I can achieve this through aggregate directly?
Note: I'm using MongoDB and PyMongo
You can try the below aggregation query in 3.6 version.
Added the second group to create array of type and count value pairs followed by $mergeObjects to merge date key value with $arrayToObject, which produces create a type value key and count value pairs, to generate the expected response.
$replaceRoot to promote the document to the top level.
pipeline = [
{
"$group": {
"_id": {
"date": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$created_at"
}
},
"type": "$type"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.date",
"typeandcount": {
"$push": {
"k": "$_id.type",
"v": "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
"date": "$_id"
},
{
"$arrayToObject": "$typeandcount"
}
]
}
}
}
]
Mongo 3.4 version:
Replace the last stage with below
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{
"k": "date",
"v": "$_id"
}
],
"$typeandcount"
]
}
}
}
}

How to get count of multiple fields based on value in mongodb?

Collection exists as below:
[
{"currentLocation": "Chennai", "baseLocation": "Bengaluru"},
{"currentLocation": "Chennai", "baseLocation": "Bengaluru"},
{"currentLocation": "Delhi", "baseLocation": "Bengaluru"},
{"currentLocation": "Chennai", "baseLocation": "Chennai"}
]
Expected Output:
[
{"city": "Chennai", "currentLocationCount": 3, "baseLocationCount": 1},
{"city": "Bengaluru", "currentLocationCount": 0, "baseLocationCount": 3},
{"city": "Delhi", "currentLocationCount": 1, "baseLocationCount": 0}
]
What I have tried is:
db.getCollection('users').aggregate([{
$group: {
"_id": "$baselocation",
baseLocationCount: {
$sum: 1
}
},
}, {
$project: {
"_id": 0,
"city": "$_id",
"baseLocationCount": 1
}
}])
Got result as:
[
{"city": "Chennai", "baseLocationCount": 1},
{"city": "Bengaluru", "baseLocationCount": "3"}
]
I'm not familiar with mongo, so any help?
MongoDB Version - 3.4
Neil Lunn and myself had a lovely argument over this topic the other day which you can read all about here: Group by day with Multiple Date Fields.
Here are two solutions to your precise problem.
The first one uses the $facet stage. Bear in mind, though, that it may not be suitable for large collections because $facet produces a single (potentially huge) document that might be bigger than the current MongoDB document size limit of 16MB (which only applies to the result document and wouldn't be a problem during pipeline processing anyway):
collection.aggregate(
{
$facet:
{
"current":
[
{
$group:
{
"_id": "$currentLocation",
"currentLocationCount": { $sum: 1 }
}
}
],
"base":
[
{
$group:
{
"_id": "$baseLocation",
"baseLocationCount": { $sum: 1 }
}
}
]
}
},
{ $project: { "result": { $setUnion: [ "$current", "$base" ] } } }, // merge results into new array
{ $unwind: "$result" }, // unwind array into individual documents
{ $replaceRoot: { newRoot: "$result" } }, // get rid of the additional field level
{ $group: { "_id": "$_id", "currentLocationCount": { $sum: "$currentLocationCount" }, "baseLocationCount": { $sum: "$baseLocationCount" } } }, // group into final result)
{ $project: { "_id": 0, "city": "$_id", "currentLocationCount": 1, "baseLocationCount": 1 } } // group into final result
)
The second one works based on the $map stage instead:
collection.aggregate(
{
"$project": {
"city": {
"$map": {
"input": [ "current", "base" ],
"as": "type",
"in": {
"type": "$$type",
"name": {
"$cond": {
"if": { "$eq": [ "$$type", "current" ] },
"then": "$currentLocation",
"else": "$baseLocation"
}
}
}
}
}
}
},
{ "$unwind": "$city" },
{
"$group": {
"_id": "$city.name",
"currentLocationCount": {
"$sum": {
"$cond": {
"if": { "$eq": [ "$city.type", "current" ] },
"then": 1,
"else": 0
}
}
},
"baseLocationCount": {
"$sum": {
"$cond": {
"if": { "$eq": [ "$city.type", "base" ] },
"then": 1,
"else": 0
}
}
}
}
}
)

how to aggregate in mongoDB

I have a document called user.monthly, in that I have we used store 'day' : no. of clicks .
Here I have given 2 samples for different date
For month January
{
name : "devid",
date : ISODate("2014-01-21T11:32:42.392Z"),
daily: {'1':12,'9':13,'30':13}
}
For month February
{
name : "devid",
date : ISODate("2014-02-21T11:32:42.392Z"),
daily: {'3':12,'12':13,'25':13}
}
How can I aggregate this and get total clicks for January and February ?
Please help me to resolve my problem.
Your current schema is not helping you here as the "daily" field ( which we presume is your clicks per type or something like that ) is represented as a sub-document, which means that you need to explicitly name the path to each field in order to do something with it.
A better approach would be to put this information in an array:
{
"name" : "devid",
"date" : ISODate("2014-02-21T11:32:42.392Z"),
"daily": [
{ "type": "3", "clicks": 12 },
{ "type": "12", "clicks": 13 },
{ "type": "25", "clicks": 13 }
]
}
Then you have an aggregation statement that goes like this:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Unwind the "daily" array
{ "$unwind": "$daily" },
// Group the values together by "type" on "January" and "February"
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$daily.type"
},
"clicks": { "$sum": "$daily.clicks" }
}},
// Sort the result nicely
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
That form is pretty simple. Or even if you do not care about the type as a grouping and just want the month totals:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$unwind": "$daily" },
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": { "$sum": "$daily.clicks" }
}},
{ "$sort": { "_id.year": 1, "_id.month": 1 }}
])
But with the current sub-document form you currently have this becomes ugly:
db.collection.aggregate([
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
},
"clicks": {
"$sum": {
"$add": [
{ "$ifNull": ["$daily.1", 0] },
{ "$ifNull": ["$daily.3", 0] },
{ "$ifNull": ["$daily.9", 0] },
{ "$ifNull": ["$daily.12", 0] },
{ "$ifNull": ["$daily.25", 0] },
{ "$ifNull": ["$daily.30", 0] },
]
}
}
}}
])
That shows that you have no other option here other than to specify what is essentially every possible field under daily ( so probably much larger ). Then we have to evaluate as that key may possibly not exist for a given document to return a default value.
For example, your first document has no key "daily.3" so without the $ifNull check the returned value would be null and invalidate the whole $sum process so that the total would be "0".
Grouping on those keys as in the first aggregate example gets even worse:
db.collection.aggregate([
// Just match the dates in January and February
{ "$match": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
}},
// Project with an array to match all possible values
{ "$project": {
"date": 1,
"daily": 1,
"type": { "$literal": ["1", "3", "9", "12", "25", "30" ] }
}},
// Unwind the "type" array
{ "$unwind": "$type" },
// Project values onto the "type" while grouping
{ "$group" : {
"_id": {
"year": { "$year": "$date" },
"month": { "$month": "$date" },
"type": "$type"
},
"clicks": { "$sum": { "$cond": [
{ "$eq": [ "$type", "1" ] },
"$daily.1",
{ "$cond": [
{ "$eq": [ "$type", "3" ] },
"$daily.3",
{ "$cond": [
{ "$eq": [ "$type", "9" ] },
"$daily.9",
{ "$cond": [
{ "$eq": [ "$type", "12" ] },
"$daily.12",
{ "$cond": [
{ "$eq": [ "$type", "25" ] },
"$daily.25",
"$daily.30"
]}
]}
]}
]}
]}}
}},
{ "$sort": {
"_id.year": 1,
"_id.month": 1,
"_id.type": 1
}}
])
Which is creating one big conditional evaluation using $cond to match out the values to the "type" which we projected all possible values in an array using the $literal operator.
If you do not have MongoDB 2.6 or greater you can always do this in place of the $literal operator statement:
"type": { "$cond": [1, ["1", "3", "9", "12", "25", "30" ], 0] }
Where essentially the true evaluation from $cond returns a "literal" declared value, which is how you specify an array. There is also the hidden $const operator that is not documented, but now exposed as $literal.
As you can see the structure here is doing you no favors, so the best option is to change it. But if you cannot and otherwise find the aggregation concept for this too hard to handle, then mapReduce offers an approach, but the processing will be much slower:
db.collection.mapReduce(
function () {
for ( var k in this.daily ) {
emit(
{
year: this.date.getFullYear(),
month: this.date.getMonth() + 1,
type: k
},
this.daily[k]
);
}
},
function(key,values) {
return Array.sum( values );
},
{
"query": {
"date": {
"$gte": new Date("2014-01-01"), "$lt": new Date("2014-03-01")
}
},
"out": { "inline": 1 }
}
)
The general lesson here is that you will get the cleanest and fastest results by altering the document format and using the aggregation framework. But all the ways to do this are listed here.