MongoDB sum of all fields with integer values - mongodb

inside the aggregation framework, it's possibile in some way, for each document like this below:
{
"Title": "Number orders",
"2021-03-16": 3,
"2021-03-15": 6,
"2021-03-19": 1,
"2021-03-14": 19
}
Obtain a new document like this?
{
"Title": "Number orders",
"2021-03-16": 3,
"2021-03-15": 6,
"2021-03-19": 1,
"2021-03-14": 19
"Total": 29
}
Basically, I want a new field that have inside the sum of all the values of the fields that are integer.
Another thing to take in consideration is that the date fields are dynamic, so one week could be like the one in the example, the following week the fields would become like
{
"Title": "Number orders",
"2021-03-23": 3,
"2021-03-22": 6,
"2021-03-26": 1,
"2021-03-21": 19
}
Thanks!

Demo - https://mongoplayground.net/p/724nerJUQtK
$$ROOT is the entire document, add total using $addFields use $sum to add them up and remove allData using $unset
db.collection.aggregate([
{ $addFields: { allData: { "$objectToArray": "$$ROOT" } } } },
{ $addFields: { "total": { $sum: "$allData.v" } } },
{ $unset: "allData" }
])

Based on your older question, I think this might help:
db.collection.aggregate([
{
$group: {
_id: {
dDate: "$deliveryDay",
name: "$plate.name"
},
v: { $sum: "$plate.quantity" }
}
},
{
$group: {
_id: "$_id.name",
Total: { $sum: "$v" },
array: {
$push: { k: "$_id.dDate", v: "$v" }
}
}
},
{
$addFields: {
array: {
$concatArrays: [
[{ k: "Title", v: "Number orders" }],
"$array",
[{ k: "Total", v: "$Total" }]
]
}
}
},
{
$replaceRoot: {
newRoot: { $arrayToObject: "$array" }
}
}
])
Output:
/* 1 */
{
"Title" : "Number orders",
"2021-01-16" : 2,
"Total" : 2
},
/* 2 */
{
"Title" : "Number orders",
"2021-01-14" : 1,
"2021-01-16" : 3,
"Total" : 4
}

Related

get rank in mongodb with date range

I have following stat data stored daily for users.
{
"_id": {
"$oid": "638df4e42332386e0e06d322"
},
"appointment_count": 1,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-05",
"customer_count": 1,
"lead_count": 1,
"door_knocks": 10
}
{
"_id": {
"$oid": "638f59a9bf33442a57c3aa99"
},
"lead_count": 2,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-06",
"video_viewed": 2,
"door_knocks": 9
}
And I'm using the following query to get the items by rank
user_stats_2022_12.aggregate([{"$match":{"company_id":5,"created_date":{"$gte":"2022-12-04","$lte":"2022-12-06"}}},{"$setWindowFields":{"partitionBy":"$company_id","sortBy":{"door_knocks":-1},"output":{"item_rank":{"$denseRank":{}},"stat_sum":{"$sum":"$door_knocks"}}}},{"$facet":{"metadata":[{"$count":"total"}],"data":[{"$skip":0},{"$limit":100},{"$sort":{"item_rank":1}}]}}])
It's giving me the rank but with the above data, the record with item_id: 2 are having different rank for same item_id. So I wanted to group them by item_id and then applied rank.
It's a little messy, but here's a playground - https://mongoplayground.net/p/JrJOo4cl9X1.
If you're going to sort by knocks after grouping, I'm assuming that you'll want the sum of door_knocks for a given item_id for this sort.
db.collection.aggregate([
{
$match: {
company_id: 5,
created_date: {
"$gte": "2022-12-04",
"$lte": "2022-12-06"
}
}
},
{
$group: {
_id: {
item_id: "$item_id",
company_id: "$company_id"
},
docs: {
$push: "$$ROOT"
},
total_door_knocks: {
$sum: "$door_knocks"
}
}
},
{
$setWindowFields: {
partitionBy: "$company_id",
sortBy: {
total_door_knocks: -1
},
output: {
item_rank: {
"$denseRank": {}
},
stat_sum: {
"$sum": "$total_door_knocks"
}
}
}
},
{
$unwind: "$docs"
},
{
$project: {
_id: "$docs._id",
appointment_count: "$docs.appointment_count",
company_id: "$docs.company_id",
created_date: "$docs.created_date",
customer_count: "$docs.customer_count",
door_knocks: "$docs.door_knocks",
item_id: "$docs.item_id",
item_type: "$docs.item_type",
lead_count: "$docs.lead_count",
item_rank: 1,
stat_sum: 1,
total_door_knocks: 1
}
},
{
$facet: {
metadata: [
{
"$count": "total"
}
],
data: [
{
"$skip": 0
},
{
"$limit": 100
},
{
"$sort": {
"item_rank": 1
}
}
]
}
}
])

MongoDB get count of field per season from MM/DD/YYYY date field

I am facing a problem in MongoDB. Suppose, I have the following collection.
{ id: 1, issueDate: "07/05/2021", code: "31" },
{ id: 2, issueDate: "12/11/2020", code: "14" },
{ id: 3, issueDate: "02/11/2021", code: "98" },
{ id: 4, issueDate: "01/02/2021", code: "14" },
{ id: 5, issueDate: "06/23/2020", code: "14" },
{ id: 6, issueDate: "07/01/2020", code: "31" },
{ id: 7, issueDate: "07/05/2022", code: "14" },
{ id: 8, issueDate: "07/02/2022", code: "20" },
{ id: 9, issueDate: "07/02/2022", code: "14" }
The date field is in the format MM/DD/YYYY. My goal is to get the count of items with each season (spring (March-May), summer (June-August), autumn (September-November) and winter (December-February).
The result I'm expecting is:
count of fields for each season:
{ "_id" : "Summer", "count" : 6 }
{ "_id" : "Winter", "count" : 3 }
top 2 codes (first and second most recurring) per season:
{ "_id" : "Summer", "codes" : {14, 31} }
{ "_id" : "Winter", "codes" : {14, 98} }
How can this be done?
You should never store date/time values as string, store always proper Date objects.
You can use $setWindowFields opedrator for that:
db.collection.aggregate([
// Convert string into Date
{ $set: { issueDate: { $dateFromString: { dateString: "$issueDate", format: "%m/%d/%Y" } } } },
// Determine the season (0..3)
{
$set: {
season: { $mod: [{ $toInt: { $divide: [{ $add: [{ $subtract: [{ $month: "$issueDate" }, 1] }, 1] }, 3] } }, 4] }
}
},
// Count codes per season
{
$group: {
_id: { season: "$season", code: "$code" },
count: { $count: {} },
}
},
// Rank occurrence of codes per season
{
$setWindowFields: {
partitionBy: "$_id.season",
sortBy: { count: -1 },
output: {
rank: { $denseRank: {} },
count: { $sum: "$count" }
}
}
},
// Get only top 2 ranks
{ $match: { rank: { $lte: 2 } } },
// Final grouping
{
$group: {
_id: "$_id.season",
count: { $first: "$count" },
codes: { $push: "$_id.code" }
}
},
// Some cosmetic for output
{
$set: {
season: {
$switch: {
branches: [
{ case: { $eq: ["$_id", 0] }, then: 'Winter' },
{ case: { $eq: ["$_id", 1] }, then: 'Spring' },
{ case: { $eq: ["$_id", 2] }, then: 'Summer' },
{ case: { $eq: ["$_id", 3] }, then: 'Autumn' },
]
}
}
}
}
])
Mongo Playground
I will give you clues,
You need to use $group with _id as $month on issueDate, use accumulator $sum to get month wise count.
You can divide month by 3, to get modulo, using $toInt, $divide, then put them into category using $cond.
Another option:
db.collection.aggregate([
{
$addFields: {
"season": {
$switch: {
branches: [
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"06",
"07",
"08"
]
]
},
then: "Summer"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"03",
"04",
"05"
]
]
},
then: "Spring"
},
{
case: {
$in: [
{
$substr: [
"$issueDate",
0,
2
]
},
[
"12",
"01",
"02"
]
]
},
then: "Winter"
}
],
default: "No date found."
}
}
}
},
{
$group: {
_id: {
s: "$season",
c: "$code"
},
cnt1: {
$sum: 1
}
}
},
{
$sort: {
cnt1: -1
}
},
{
$group: {
_id: "$_id.s",
codes: {
$push: "$_id.c"
},
cnt: {
$sum: "$cnt1"
}
}
},
{
$project: {
_id: 0,
season: "$_id",
count: "$cnt",
codes: {
"$slice": [
"$codes",
2
]
}
}
}
])
Explained:
Add one more field for season based on $switch per month(extracted from issueDate string)
Group to collect per season/code.
$sort per code DESCENDING
group per season to form an array with most recurring codes in descending order.
Project the fields to the desired output and $slice the codes to limit only to the fist two most recurring.
Comment:
Indeed keeping dates in string is not a good idea in general ...
Playground

Adding up values from array elements in MongoDB

I have done some aggregation to arrive at the below document structure for my given data:
{
"_id" : "test",
"NoOfQuestions" : 3.0,
"info" : [
{
"AnswerrCount" : 3
},
{
"AnswerrCount" : 3
},
{
"AnswerrCount" : 2
}
]
}
However, I am trying to add up all the values in the AnswerrCount column. So from the above example, I want another column that says TotalAnswers:8, (3+3+2) and then eventually have a from using the NoOfQuestions, FinalTotal:11, (8+3)
You can use $sum aggregation to add array values
db.collection.aggregate([
{ "$addFields": {
"TotalAnswers": {
"$sum": "$info.AnswerrCount"
},
"FinalTotal": {
"$add": [{ "$sum": "$info.AnswerrCount" }, "$NoOfQuestions"]
}
}}
])
db.collection.aggregate([{
$unwind: "$info"
}, {
$group: {
_id: null,
TotalAnswers: {
$sum: '$info.AnswerrCount'
},
doc: {
$first: '$$CURRENT'
}
}
}, {
$project: {
TotalAnswers: 1,
FinalTotal: {
'$add': ['$TotalAnswers', '$doc.NoOfQuestions']
},
_id: 0
}
}])

MongoDb aggregate pipeline with multiple groupings

I'm trying to get my head around an aggregate pipeline in MongoDb with multiple groups.
I have the following data: https://gist.github.com/bomortensen/36e6b3fbc987a096be36a66bbfe30d82
Expected data would be: https://gist.github.com/bomortensen/7b220df1f1da83be838acfb2ed79a2ee (total quantity sum based on highest version, hourly)
I need to write a query which does the following:
Group the data by the field MeterId to get unique meter groups.
In each group I then need to group by the StartDate's year, month, day and hour since all objects StartDate is stored as quarters, but I need to aggregate them into whole hours.
Finally, I need to only get the highest version from the Versions array by VersionNumber
I've tried the following query, but must admit I'm stuck:
mycollection.aggregate([
{ $group: {
_id : { ediel: "$_id.MeterId", start: "$_id.StartDate" },
versions: { $push: "$Versions" }
}
},
{ $unwind: { path: "$versions" } },
{ $group: {
_id: {
hour: { $hour: "$_id.start.DateTime" },
key: "$_id"
},
quantitySum: { $sum: "$Versions.Quantity" }
}
},
{ $sort: { "_id.hour": -1 } }
]);
Does anyone know how I should do this? :-)
This would give :
1 $project : get $hour from date, create a maxVersion field per record
1 $unwind to remove the Versions array
1 $project to add a keep field that will contain a boolean to check if the record should be kept or not
1 $match that match only higher version number eg keep == true
1 $group that group by id/hour and sum the quantity
1 $project to set up your required format
Query is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$_id._id", StartDate: "$StartDate" },
hour: "$_id.hour",
QuantitySum: 1
}
}])
In your example output you take into account only the first higher versionNumber, You have { "VersionNumber" : 2, "Quantity" : 7.5 } and { "VersionNumber" : 2, "Quantity" : 8.4 } for hour 2 and id 1234 but you only take { "VersionNumber" : 2, "Quantity" : 7.5 }
I dont know if this is intended or not but in this case you want to take only the first MaxVersion number. After the $match, I added :
1 $group that push versions previously filter in an array
1 $project that $slice this array to take only the first element
1 $unwind to remove this array (which contains only one elemement)
The query that match your output is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", StartDate: "$_id.StartDate" },
Versions: { $push: "$Versions" },
hour: { "$first": "$hour" }
}
}, {
$project: {
_id: 1,
hour: 1,
Versions: { $slice: ["$Versions", 1] }
}
}, {
$unwind: "$Versions"
}, {
$sort: {
_id: 1
}
}, {
$group: {
_id: { _id: "$_id._id", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$MeterId._id", StartDate: "$StartDate" },
Hour: "$_id.hour",
QuantitySum: 1
}
}])
Output is :
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
Sorry, I just dont find a straight forward way to round the hour. You can try the following. You will unwind the versions, so you can apply the grouping to collect the max version, push the versions for the next step, which is to project to filter the matching records with max version and final project to sum the max versions quantity. Right now start dt is the min from the group. You should be fine as long as you have versions at the top of the hour.
db.collection.aggregate([{
$unwind: {
path: "$Versions"
}
}, {
$group: {
_id: {
MeterId: "$_id.MeterId",
start: {
$hour: "$_id.StartDate"
}
},
startDate: {
$min: "$_id.StartDate"
},
maxVersion: {
$max: "$Versions.VersionNumber"
},
Versions: {
$push: "$Versions"
}
}
}, {
$sort: {
"_id.start": -1
}
}, {
$project: {
_id: {
MeterId: "$_id.MeterId",
StartDate: "$startDate"
},
hour: "$_id.start",
Versions: {
$filter: {
input: "$Versions",
as: "version",
cond: {
$eq: ["$maxVersion", "$$version.VersionNumber"]
}
}
}
}
}, {
$project: {
_id: 1,
hour: 1,
QuantitySum: {
$sum: "$Versions.Quantity"
}
}
}]);
Sample Output
{
"_id": {
"MeterId": "1234",
"StartDate": ISODate("2016-09-20T02:00:00Z")
},
"QuantitySum": 15,
"hour": 2
}

count array occurrences across all documents with mongo

Im trying to pull data on a collection of documents which looks like:
[
{
name: 'john',
sex: 'male',
hobbies: ['football', 'tennis', 'swimming']
},
{
name: 'betty'
sex: 'female',
hobbies: ['football', 'tennis']
},
{
name: 'frank'
sex: 'male',
hobbies: ['football', 'tennis']
}
]
I am trying to use the aggregation framework to present the data, split by sex, counting the most common hobbies. The results should look something like.
{ _id: 'male',
total: 2,
hobbies: {
football: 2,
tennis: 2,
swimming: 1
}
},
{ _id: 'female',
total: 1,
hobbies: {
football: 1,
tennis: 1
}
}
So far I can get the total of each sex, but i'm not sure how I could possibly use unwind to get the totals of the hobbies array.
My code so far:
collection.aggregate([
{
$group: {
_id: '$sex',
total: { $sum: 1 }
}
}
])
Personally I am not a big fan of transforming "data" as the names of keys in a result. The aggregation framework principles tend to aggree as this sort of operation is not supported either.
So the personal preference is to maintain "data" as "data" and accept that the processed output is actually better and more logical to a consistent object design:
db.people.aggregate([
{ "$group": {
"_id": "$sex",
"hobbies": { "$push": "$hobbies" },
"total": { "$sum": 1 }
}},
{ "$unwind": "$hobbies" },
{ "$unwind": "$hobbies" },
{ "$group": {
"_id": {
"sex": "$_id",
"hobby": "$hobbies"
},
"total": { "$first": "$total" },
"hobbyCount": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.sex",
"total": { "$first": "$total" },
"hobbies": {
"$push": { "name": "$_id.hobby", "count": "$hobbyCount" }
}
}}
])
Which produces a result like this:
[
{
"_id" : "female",
"total" : 1,
"hobbies" : [
{
"name" : "tennis",
"count" : 1
},
{
"name" : "football",
"count" : 1
}
]
},
{
"_id" : "male",
"total" : 2,
"hobbies" : [
{
"name" : "swimming",
"count" : 1
},
{
"name" : "tennis",
"count" : 2
},
{
"name" : "football",
"count" : 2
}
]
}
]
So the initial $group does the count per "sex" and stacks up the hobbies into an array of arrays. Then to de-normalize you $unwind twice to get singular items, $group to get the totals per hobby under each sex and finally regroup an array for each sex alone.
It's the same data, it has a consistent and organic structure that is easy to process, and MongoDB and the aggregation framework was quite happy in producing this output.
If you really must convert your data to names of keys ( and I still recommend you do not as it is not a good pattern to follow in design ), then doing such a tranformation from the final state is fairly trivial for client code processing. As a basic JavaScript example suitable for the shell:
var out = db.people.aggregate([
{ "$group": {
"_id": "$sex",
"hobbies": { "$push": "$hobbies" },
"total": { "$sum": 1 }
}},
{ "$unwind": "$hobbies" },
{ "$unwind": "$hobbies" },
{ "$group": {
"_id": {
"sex": "$_id",
"hobby": "$hobbies"
},
"total": { "$first": "$total" },
"hobbyCount": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.sex",
"total": { "$first": "$total" },
"hobbies": {
"$push": { "name": "$_id.hobby", "count": "$hobbyCount" }
}
}}
]).toArray();
out.forEach(function(doc) {
var obj = {};
doc.hobbies.sort(function(a,b) { return a.count < b.count });
doc.hobbies.forEach(function(hobby) {
obj[hobby.name] = hobby.count;
});
doc.hobbies = obj;
printjson(doc);
});
And then you are basically processing each cursor result into the desired output form, which really isn't an aggregation function that is really required on the server anyway:
{
"_id" : "female",
"total" : 1,
"hobbies" : {
"tennis" : 1,
"football" : 1
}
}
{
"_id" : "male",
"total" : 2,
"hobbies" : {
"tennis" : 2,
"football" : 2,
"swimming" : 1
}
}
Where that should also be fairly trival to implement that sort of manipulation into stream processing of the cursor result to tranform as required, as it is basically just the same logic.
On the other hand, you can always implement all the manipulation on the server using mapReduce instead:
db.people.mapReduce(
function() {
emit(
this.sex,
{
"total": 1,
"hobbies": this.hobbies.map(function(key) {
return { "name": key, "count": 1 };
})
}
);
},
function(key,values) {
var obj = {},
reduced = {
"total": 0,
"hobbies": []
};
values.forEach(function(value) {
reduced.total += value.total;
value.hobbies.forEach(function(hobby) {
if ( !obj.hasOwnProperty(hobby.name) )
obj[hobby.name] = 0;
obj[hobby.name] += hobby.count;
});
});
reduced.hobbies = Object.keys(obj).map(function(key) {
return { "name": key, "count": obj[key] };
}).sort(function(a,b) {
return a.count < b.count;
});
return reduced;
},
{
"out": { "inline": 1 },
"finalize": function(key,value) {
var obj = {};
value.hobbies.forEach(function(hobby) {
obj[hobby.name] = hobby.count;
});
value.hobbies = obj;
return value;
}
}
)
Where mapReduce has it's own distinct style of output, but the same principles are used in accumulation and manipulation, if not likely as efficient as the aggregation framework can do:
"results" : [
{
"_id" : "female",
"value" : {
"total" : 1,
"hobbies" : {
"football" : 1,
"tennis" : 1
}
}
},
{
"_id" : "male",
"value" : {
"total" : 2,
"hobbies" : {
"football" : 2,
"tennis" : 2,
"swimming" : 1
}
}
}
]
At the end of the day, I still say that the first form of processing is the most efficient and provides to my mind the most natural and consistent working of the data output, without even attempting to convert the data points into the names of keys. It's probably best to consider following that pattern, but if you really must, then there are ways to manipulate results into a desired form in various approaches to processing.
Since mongoDB version 3.4 you can use $reduce avoid the first grouping by sex which means holding the entire collection in t2o documents. You can also avoid the need for code, by using $arrayToObject
db.collection.aggregate([
{
$group: {
_id: {sex: "$sex", hobbies: "$hobbies"},
count: {$sum: 1},
totalIds: {$addToSet: "$_id"}
}
},
{
$group: {
_id: "$_id.sex",
hobbies: {$push: {k: "$_id.hobbies", v: "$count"}},
totalIds: {$push: "$totalIds"}
}
},
{
$set: {
hobbies: {$arrayToObject: "$hobbies"},
totalIds: {
$reduce: {
input: "$totalIds",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}}
}
}
},
{
$set: {
count: {$size: {$setIntersection: "$totalIds"}},
totalIds: "$$REMOVE"
}
}
])
Which works if you have an ObjectId.
Playground example 3.4
Otherwise, you can start with $unwind and $group, or since mongoDB version 4.4 you can add an ObjectId with a stage:
{
$set: {
o: {
$function: {
"body": "function (x) {x._id=new ObjectId(); return x}",
"args": [{_id: 1}],
"lang": "js"
}
}
}
},
Playground example creating _id
Since mongoDB version 5.0 you can calculate the total using $setWindowFields:
db.collection.aggregate([
{
$setWindowFields: {
partitionBy: "$sex",
output: {totalCount: {$count: {}}}
}
},
{$unwind: "$hobbies"},
{
$group: {
_id: {sex: "$sex", hobbies: "$hobbies"},
count: {$sum: 1},
totalCount: {$first: "$totalCount"}
}
},
{
$group: {
_id: "$_id.sex",
hobbies: {$push: {k: "$_id.hobbies", v: "$count"}},
total: {$first: "$totalCount"}
}
},
{$set: {hobbies: {$arrayToObject: "$hobbies"}}}
])
Playground example 5.0