summation of two columns in Aggregate Method

summation of two columns in Aggregate Method - mongodb

I am using mongodb Aggregate query. My db is like this:
{
"_id" : ObjectId("5a81636f017e441d609283cc"),
"userid": "123",
page : 'A',
newpage: 'A',
},
{
"_id" : ObjectId("5a81636f017e441d609283cd"),
"userid": "123",
page : 'B',
newpage: 'A',
},
{
"_id" : ObjectId("5a81636f017e441d609283ce"),
"userid": "123",
page : 'D',
newpage: 'D',
}
I want to get the Sum of all page and new page value. I am able to get one column value which can give the very precise result.
But I am stuck with the two columns. What I did for getting the sum/repetition of values for one column is:
db.Collection.aggregate([
{$match:{ "userid":"123"}},
{$unwind:"$newpage"},
{$group:{"_id":"$newpage", "count":{"$sum":1}}},
{$project: {_id:0, pagename :"$_id", count:{ $multiply: [ "$count", 1 ] }}},
{$sort: {count: -1}},
//{$limit: 10}
], function(error, data){
if (error) {
console.log(error);
} else {
console.log(data);
}
});
Desired Result will be like:
{
"pagename": "A",
"count": 3
},
{
"pagename": "D",
"count": 2
},
{
"pagename": "B",
"count": 1
}
Is anyone has any approach to getting these things for Two Column? Any Help is appreciated

Use $facet pipeline stage to process multiple aggregation pipelines within a single stage on the same set of input documents. In your case you need to aggregate the counts separately then join the two results and calculate the final aggregates.
This can be demonstrated by running the following pipeline:
db.collection.aggregate([
{ "$match": { "userid": "123" } },
{
"$facet": {
"groupByPage": [
{ "$unwind": "$page" },
{
"$group": {
"_id": "$page",
"count": { "$sum": 1 }
}
}
],
"groupByNewPage": [
{ "$unwind": "$newpage" },
{
"$group": {
"_id": "$newpage",
"count": { "$sum": 1 }
}
}
]
}
},
{
"$project": {
"pages": {
"$concatArrays": ["$groupByPage", "$groupByNewPage"]
}
}
},
{ "$unwind": "$pages" },
{
"$group": {
"_id": "$pages._id",
"count": { "$sum": "$pages.count" }
}
},
{ "$sort": { "count": -1 } }
], function(error, data){
if (error) {
console.log(error);
} else {
console.log(data);
}
)

There you go:
db.Collection.aggregate([
{$match:{ "userid":"123"}}, // filter out what's not of interest
{$facet: { // process two stages in parallel --> this will give us a single result document with the following two fields
"newpage": [ // "newpage" holding the ids and sums per "newpage" field
{$group:{"_id":"$newpage", "count":{"$sum":1}}}
],
"page": [ // and "page" holding the ids and sums per "page" field
{$group:{"_id":"$page", "count":{"$sum":1}}}
]
}},
{$project: {x:{$concatArrays:["$newpage", "$page"]}}}, // merge the two arrays into one
{$unwind: "$x"}, // flatten the single result document into multiple ones so we do not need to $reduce but can nicely $group
{$group: {_id: "$x._id", "count": {$sum: "$x.count"}}} // perform the final grouping/counting,
{$sort: {count: -1}} // well, the sort according to your question
]);

Related

Fetching sum of rows for a type of column value in mongodb as a single output

I am trying to get the sum of field 'score.number' based on the type of a column value work.type in MongoDB. It should fetch sum as 25 for 'hw' ,and 'cw' as 5 as a single output for the student 'A'. Is there a way to achieve it using mongodb queries ? I tried the $group as well but it doesn't seem to fetch the worktype and the sum for each worktype against it for a single student record 'A'.
Expected Output:

after $match you should use $group like this
db.collection.aggregate([
{
$match: {
student: {
$in: [
"A"
]
},
"work.type": {
$in: [
"hw",
"cw"
]
}
}
},
{
"$group": {
"_id": {
"worktype": "$work.type",
"student": "$student"
},
"workScore": {
"$sum": "$score.number"
}
}
}
])
https://mongoplayground.net/p/qzghM5KoAbp

Able to get the sum with these two
$match{
'student': {'$in': ['A']},
"work.type": {'$in': ['hw', 'cw']},
}
followed by
$group
{
_id: '$work.type',
totalAmount: { $sum: "$score.number" },
}

$match {'student': {
$in: [
"A"
]
},
"work.type": {
$in: [
"hw",
"cw"
]
}}
followed by
$group {
"_id": {
"worktype": "$work.type",
"student": "$student"
},
"workScore": {
"$sum": "$score.number"
}
}
followed by
$group {"_id": {
"student": "$_id.student"
},
'list': {'$push': {'worktype':"$_id.worktype", 'workScore': "$workScore" }},
}
Solved output:
Solves the issue.

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])

you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.

i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]

Lookup and group from two fields in one aggregation

I have an aggregation that looks like this:
userSchema.statics.getCounts = function (req, type) {
return this.aggregate([
{ $match: { organization: req.user.organization._id } },
{
$lookup: {
from: 'tickets', localField: `${type}Tickets`, foreignField: '_id', as: `${type}_tickets`,
},
},
{ $unwind: `$${type}_tickets` },
{ $match: { [`${type}_tickets.createdAt`]: { $gte: new Date(moment().subtract(4, 'd').startOf('day').utc()), $lt: new Date(moment().endOf('day').utc()) } } },
{
$group: {
_id: {
groupDate: {
$dateFromParts: {
year: { $year: `$${type}_tickets.createdAt` },
month: { $month: `$${type}_tickets.createdAt` },
day: { $dayOfMonth: `$${type}_tickets.createdAt` },
},
},
userId: `$${type}_tickets.assignee_id`,
},
ticketCount: {
$sum: 1,
},
},
},
{
$sort: { '_id.groupDate': -1 },
},
{ $group: { _id: '$_id.userId', data: { $push: { groupDate: '$_id.groupDate', ticketCount: '$ticketCount' } } } },
]);
};
Which outputs data like this:
[
{
_id: 5aeb6b71709f43359e0888bb,
data: [
{ "groupDate": 2018-05-07T00:00:000Z", ticketCount: 4 }
}
]
Ideally though, I would have data like this:
[
{
_id: 5aeb6b71709f43359e0888bb,
data: [
{ "groupDate": 2018-05-07T00:00:000Z", assignedCount: 4, resolvedCount: 8 }
}
]
The difference being that the object for the user would output both the total number of assigned tickets and the total number of resolved tickets for each date.
My userSchema is like this:
const userSchema = new Schema({
firstName: String,
lastName: String,
assignedTickets: [
{
type: mongoose.Schema.ObjectId,
ref: 'Ticket',
index: true,
},
],
resolvedTickets: [
{
type: mongoose.Schema.ObjectId,
ref: 'Ticket',
index: true,
},
],
}, {
timestamps: true,
});
An example user doc is like this:
{
"_id": "5aeb6b71709f43359e0888bb",
"assignedTickets": ["5aeb6ba7709f43359e0888bd", "5aeb6bf3709f43359e0888c2", "5aec7e0adcdd76b57af9e889"],
"resolvedTickets": ["5aeb6bc2709f43359e0888be", "5aeb6bc2709f43359e0888bf"],
"firstName": "Name",
"lastName": "Surname",
}
An example ticket doc is like this:
{
"_id": "5aeb6ba7709f43359e0888bd",
"ticket_id": 120292,
"type": "assigned",
"status": "Pending",
"assignee_email": "email#gmail.com",
"assignee_id": "5aeb6b71709f43359e0888bb",
"createdAt": "2018-05-02T20:05:59.147Z",
"updatedAt": "2018-05-03T20:05:59.147Z",
}
I've tried adding multiple lookups and group stages, but I keep getting an empty array. If I only do one lookup and one group, I get the correct counts for the searched on field, but I'd like to have both fields in one query. Is it possible to have the query group on two lookups?

In short you seem to be coming to terms with setting up your models in mongoose and have gone overboard with references. In reality you really should not keep the arrays within the "User" documents. This is actually an "anti-pattern" which was just something mongoose used initially as a convention for keeping "references" for population where it did not understand how to translate the references from being kept in the "child" to the "parent" instead.
You actually have that data in each "Ticket" and the natural form of $lookup is to use that "foreignField" in reference to the detail from the local collection. In this case the "assignee_id" on the tickets will suffice for looking at matching back to the "_id" of the "User". Though you don't state it, your "status" should be an indicator of whether the data is actually either "assigned" as when in "Pending" state or "resolved" when it is not.
For the sake of simplicity we are going to consider the state "resolved" if it is anything other than "Pending" in value, but extending on the logic from the example for actual needs is not the problem here.
Basically then we resolve to a single $lookup operation by actually using the natural "foreign key" as opposed to keeping separate arrays.
MongoDB 3.6 and greater
Ideally you would use features from MongoDB 3.6 with sub-pipeline processing here:
// Better date calculations
const oneDay = (1000 * 60 * 60 * 24);
var now = Date.now(),
end = new Date((now - (now % oneDay)) + oneDay),
start = new Date(end.valueOf() - (4 * oneDay));
User.aggregate([
{ "$match": { "organization": req.user.organization._id } },
{ "$lookup": {
"from": Ticket.collection.name,
"let": { "id": "$_id" },
"pipeline": [
{ "$match": {
"createdAt": { "$gte": start, "$lt": end },
"$expr": {
"$eq": [ "$$id", "$assignee_id" ]
}
}},
{ "$group": {
"_id": {
"status": "$status",
"date": {
"$dateFromParts": {
"year": { "$year": "$createdAt" },
"month": { "$month": "$createdAt" },
"day": { "$dayOfMonth": "$createdAt" }
}
}
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.date",
"data": {
"$push": {
"k": {
"$cond": [
{ "$eq": ["$_id.status", "Pending"] },
"assignedCount",
"resolvedCount"
]
},
"v": "$count"
}
}
}},
{ "$sort": { "_id": -1 } },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "groupDate": "$_id", "assignedCount": 0, "resolvedCount": 0 },
{ "$arrayToObject": "$data" }
]
}
}}
],
"as": "data"
}},
{ "$project": { "data": 1 } }
])
From MongoDB 3.0 and upwards
Or where you lack those features we use a different pipeline process and a little data transformation after the results are returned from the server:
User.aggregate([
{ "$match": { "organization": req.user.organization._id } },
{ "$lookup": {
"from": Ticket.collection.name,
"localField": "_id",
"foreignField": "assignee_id",
"as": "data"
}},
{ "$unwind": "$data" },
{ "$match": {
"data.createdAt": { "$gte": start, "$lt": end }
}},
{ "$group": {
"_id": {
"userId": "$_id",
"date": {
"$add": [
{ "$subtract": [
{ "$subtract": [ "$data.createdAt", new Date(0) ] },
{ "$mod": [
{ "$subtract": [ "$data.createdAt", new Date(0) ] },
oneDay
]}
]},
new Date(0)
]
},
"status": "$data.status"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": {
"userId": "$_id.userId",
"date": "$_id.date"
},
"data": {
"$push": {
"k": {
"$cond": [
{ "$eq": [ "$_id.status", "Pending" ] },
"assignedCount",
"resolvedCount"
]
},
"v": "$count"
}
}
}},
{ "$sort": { "_id.userId": 1, "_id.date": -1 } },
{ "$group": {
"_id": "$_id.userId",
"data": {
"$push": {
"groupDate": "$_id.date",
"data": "$data"
}
}
}}
])
.then( results =>
results.map( ({ data, ...d }) =>
({
...d,
data: data.map(di =>
({
groupDate: di.groupDate,
assignedCount: 0,
resolvedCount: 0,
...di.data.reduce((acc,curr) => ({ ...acc, [curr.k]: curr.v }),{})
})
)
})
)
)
Which just really goes to show that even with the fancy features in modern releases, you really don't need them because there pretty much has always been ways to work around this. Even the JavaScript parts just had slightly longer winded versions before the current "object spread" syntax was available.
So that is really the direction you need to go in. What you certainly don't want is using "multiple" $lookup stages or even applying $filter conditions on what could potentially be large arrays. Also both forms here do their best to "filter down" the number of items "joined" from the foreign collection so as not to cause a breach of the BSON limit.
Particularly the "pre 3.6" version actually has a trick where $lookup + $unwind + $match occur in succession which you can see in the explain output. All stages actually combine into "one" stage there which solely returns only the items which match the conditions in the $match from the foreign collection. Keeping things "unwound" until we reduce further avoids BSON limit problems, as does the new form with MongoDB 3.6 where the "sub-pipeline" does all the document reduction and grouping before any results are returned.
Your one document sample would return like this:
{
"_id" : ObjectId("5aeb6b71709f43359e0888bb"),
"data" : [
{
"groupDate" : ISODate("2018-05-02T00:00:00Z"),
"assignedCount" : 1,
"resolvedCount" : 0
}
]
}
Once I expand the date selection to include that date, which of course the date selection can also be improved and corrected from your original form.
So it seems to make sense that your relationships are actually defined that way but it's just that you recorded them "twice". You don't need to and even if that's not the definition then you should actually instead record on the "child" rather than an array in the parent. We can juggle and merge the parent arrays, but that's counterproductive to actually establishing the data relations correctly and using them correctly as well.

How about something like this?
db.users.aggregate([
{
$lookup:{ // lookup assigned tickets
from:'tickets',
localField:'assignedTickets',
foreignField:'_id',
as:'assigned',
}
},
{
$lookup:{ // lookup resolved tickets
from:'tickets',
localField:'resolvedTickets',
foreignField:'_id',
as:'resolved',
}
},
{
$project:{
"tickets":{ // merge all tickets into one single array
$concatArrays:[
"$assigned",
"$resolved"
]
}
}
},
{
$unwind:'$tickets' // flatten the 'tickets' array into separate documents
},
{
$group:{ // group by 'createdAt' and 'assignee_id'
_id:{
groupDate:{
$dateFromParts:{
year:{ $year:'$tickets.createdAt' },
month:{ $month:'$tickets.createdAt' },
day:{ $dayOfMonth:'$tickets.createdAt' },
},
},
userId:'$tickets.assignee_id',
},
assignedCount:{ // get the count of assigned tickets
$sum:{
$cond:[
{ // by checking the 'type' field for a value of 'assigned'
$eq:[
'$tickets.type',
'assigned'
]
},
1, // if matching count 1
0 // else 0
]
}
},
resolvedCount:{
$sum:{
$cond:[
{ // by checking the 'type' field for a value of 'resolved'
$eq:[
'$tickets.type',
'resolved'
]
},
1, // if matching count 1
0 // else 0
]
}
},
},
},
{
$sort:{ // sort by 'groupDate' descending
'_id.groupDate':-1
},
},
{
$group:{
_id:'$_id.userId', // group again but only by userId
data:{
$push:{ // create an array
groupDate:'$_id.groupDate',
assignedCount:{
$sum:'$assignedCount'
},
resolvedCount:{
$sum:'$resolvedCount'
}
}
}
}
}
])

Mongodb aggregation, finding within an array of values

I have a schemea that creates documents using the following structure:
{
"_id" : "2014-07-16:52TEST",
"date" : ISODate("2014-07-16T23:52:59.811Z"),
"name" : "TEST"
"values" : [
[
1405471921000,
0.737121
],
[
1405471922000,
0.737142
],
[
1405471923000,
0.737142
],
[
1405471924000,
0.737142
]
]
}
In the values, the first index is a timestamp. What I'm trying to do is query a specific timestamp to find the closest value ($gte).
I've tried the following aggregate query:
[
{ "$match": {
"values": {
"$elemMatch": { "0": {"$gte": 1405471923000} }
},
"name" : 'TEST'
}},
{ "$project" : {
"name" : 1,
"values" : 1
}},
{ "$unwind": "$values" },
{ "$match": { "values.0": { "$gte": 1405471923000 } } },
{ "$limit" : 1 },
{ "$sort": { "values.0": -1 } },
{ "$group": {
"_id": "$name",
"values": { "$push": "$values" },
}}
]
This seems to work, but it doesn't pull the closest value. It seems to pull anything greater or equal to and the sort doesn't seem to get applied, so it will pull a timestamp that is far in the future.
Any suggestions would be great!
Thank you

There are a couple of things wrong with the approach here even though it is a fair effort. You are right that you need to $sort here, but the problem is that you cannot "sort" on an inner element with an array. In order to get a value that can be sorted you must $unwind the array first as it otherwise will not sort on an array position.
You also certainly do not want $limit in the pipeline. You might be testing this against a single document, but "limit" will actually act on the entire set of documents in the pipeline. So if more than one document was matching your condition then they would be thrown away.
The key thing you want to do here is use $first in your $group stage, which is applied once you have sorted to get the "closest" element that you want.
db.collection.aggregate([
// Documents that have an array element matching the condition
{ "$match": {
"values": { "$elemMatch": { "0": {"$gte": 1405471923000 } } }
}},
// Unwind the top level array
{ "$unwind": "$values" },
// Filter just the elements that match the condition
{ "$match": { "values.0": { "$gte": 1405471923000 } } },
// Take a copy of the inner array
{ "$project": {
"date": 1,
"name": 1,
"values": 1,
"valCopy": "$values"
}},
// Unwind the inner array copy
{ "$unwind": "$valCopy" },
// Filter the inner elements
{ "$match": { "valCopy": { "$gte": 1405471923000 } }},
// Sort on the now "timestamp" values ascending for nearest
{ "$sort": { "valCopy": 1 } },
// Take the "first" values
{ "$group": {
"_id": "$_id",
"date": { "$first": "$date" },
"name": { "$first": "$name" },
"values": { "$first": "$values" },
}},
// Optionally push back to array to match the original structure
{ "$group": {
"_id": "$_id",
"date": { "$first": "$date" },
"name": { "$first": "$name" },
"values": { "$push": "$values" },
}}
])
And this produces your document with just the "nearest" timestamp value matching the original document form:
{
"_id" : "2014-07-16:52TEST",
"date" : ISODate("2014-07-16T23:52:59.811Z"),
"name" : "TEST",
"values" : [
[
1405471923000,
0.737142
]
]
}

Correct query for group by user, per month

I have MongoDB collection that stores documents in this format:
"name" : "Username",
"timeOfError" : ISODate("...")
I'm using this collection to keep track of who got an error and when it occurred.
What I want to do now is create a query that retrieves errors per user, per month or something similar. Something like this:
{
"result": [
{
"_id": "$name",
"errorsPerMonth": [
{
"month": "0",
"errorsThisMonth": 10
},
{
"month": "1",
"errorsThisMonth": 20
}
]
}
]
}
I have tried several different queries, but none have given the desired result. The closest result came from this query:
db.collection.aggregate(
[
{
$group:
{
_id: { $month: "$timeOfError"},
name: { $push: "$name" },
totalErrorsThisMonth: { $sum: 1 }
}
}
]
);
The problem here is that the $push just adds the username for each error. So I get an array with duplicate names.

You need to compound the _id value in $group:
db.collection.aggregate([
{ "$group": {
"_id": {
"name": "$name",
"month": { "$month": "$timeOfError" }
},
"totalErrors": { "$sum": 1 }
}}
])
The _id is essentially the "grouping key", so whatever elements you want to group by need to be a part of that.
If you want a different order then you can change the grouping key precedence:
db.collection.aggregate([
{ "$group": {
"_id": {
"month": { "$month": "$timeOfError" },
"name": "$name"
},
"totalErrors": { "$sum": 1 }
}}
])
Or if you even wanted to or had other conditions in your pipeline with different fields, just add a $sort pipeline stage at the end:
db.collection.aggregate([
{ "$group": {
"_id": {
"month": { "$month": "$timeOfError" },
"name": "$name"
},
"totalErrors": { "$sum": 1 }
}},
{ "$sort": { "_id.name": 1, "_id.month": 1 } }
])
Where you can essentially $sort on whatever you want.