How to write one query (count distinct, sum) in MongoDB? - mongodb

Query: select count(distinct finish_date), sum(study_num) from table where student_id=1234
Documents:
{
"_id" : ObjectId("602252684a43d5b364f3e6ca"),
"student_id" : 1234,
"study_num" : 8,
"finish_date" : "20210209",
},
{
"_id" : ObjectId("602257594a43d5b364f4cc6a"),
"student_id" : 1234,
"study_num" : 7,
"finish_date" : "20210207",
},
{
"_id" : ObjectId("5fbb65580d685b17fa56e18f"),
"student_id" : 2247,
"study_num" : 6,
"finish_date" : "20210209",
}

You can use $match and $group
db.collection.aggregate([
{
"$match": {"student_id": 1234}
},
{
"$group": {
"_id": "$finish_date",
"study_sum": { $sum: "$study_num" }
}
},
{
"$group": {
"_id": null,
"study_sum": { $sum: "$study_sum" },
count: { $sum: 1 }
}
}
])
Working Mongo playground

Query: select count(distinct finish_date), sum(study_num) from table
where student_id=1234
How to write the query? Write using an aggregation:
db.collection.aggregate([
{
$match: { student_id: 1234 }
},
{
$group: {
_id: "",
distinct_dates: { $addToSet: "$finish_date" },
study_sum: { $sum: "$study_num" }
}
},
{
$project: {
count: { $size: "$distinct_dates" },
study_sum: 1, _id: 0
}
}
])
The output: { "study_sum" : 15, "count" : 2 }
Reference: SQL to Aggregation Mapping Chart

Related

How to combine results in a Mongo aggregation query

I'm new to aggregation queries in Mongo and been really struggling trying to produce the output I want. I have the following aggregation query:
db.events.aggregate([
{ $match: { requestState: "APPROVED" } },
{ $unwind: { path: "$payload.description" } },
{ $group: { _id: { instr: "$payload.description", bu: "$createdByUser", count: { $sum: 1 } } } }
]);
that returns the following results:
{ "_id" : { "instr" : "ABC-123", "bu" : "BU2", "count" : 1 } }
{ "_id" : { "instr" : "ABC-123", "bu" : "BU1", "count" : 1 } }
{ "_id" : { "instr" : "DEF-456", "bu" : "BU1", "count" : 1 } }
How can I amend the aggregation query so that there are only 2 documents returned instead of 3? With the two "ABC-123" results combined into a single result with a new array of counts with the "bu" and "count" fields i.e.
{ "_id" : { "instr" : "ABC-123", "counts": [ { "bu" : "BU1", "count" : 1 }, { "bu" : "BU2", "count" : 1 } ] } }
Many thanks
You can add another stage to only $group by _id.instr and another stage to $project to your desired output shape
db.events.aggregate([
{
$match: { requestState: "APPROVED" }
},
{
$unwind: { path: "$payload.description" }
},
{
$group: {
_id: { instr: "$payload.description", bu: "$createdByUser", count: { $sum: 1 } }
}
},
{
$group: {
_id: { instr: "$_id.instr" },
counts: { $push: { bu: "$_id.bu", count: "$_id.count" } }
}
},
{
$project: {
_id: { instr: "$_id.instr", counts: "$counts" }
}
}
]);

How can i count total documents and also grouped counts simultanously in mongodb aggregation?

I have a dataset in mongodb collection named visitorsSession like
{ip : 192.2.1.1,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.3.1.8,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.5.1.4,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.8.1.7,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.1.1.3,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'}
I am using this mongodb aggregation
[{$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}}, {$group: {
_id : "$country",
totalSessions : {
$sum: 1
}
}}, {$project: {
_id : 0,
country : "$_id",
totalSessions : 1
}}, {$sort: {
country: -1
}}]
using above aggregation i am getting results like this
[{country : 'US',totalSessions : 3},{country : 'UK',totalSessions : 2}]
But i also total visitors also along with result like totalVisitors : 5
How can i do this in mongodb aggregation ?
You can use $facet aggregation stage to calculate total visitors as well as visitors by country in a single pass:
db.visitorsSession.aggregate( [
{
$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}
},
{
$facet: {
totalVisitors: [
{
$count: "count"
}
],
countrySessions: [
{
$group: {
_id : "$country",
sessions : { $sum: 1 }
}
},
{
$project: {
country: "$_id",
_id: 0,
sessions: 1
}
}
],
}
},
{
$addFields: {
totalVisitors: { $arrayElemAt: [ "$totalVisitors.count" , 0 ] },
}
}
] )
The output:
{
"totalVisitors" : 5,
"countrySessions" : [
{
"sessions" : 2,
"country" : "UK"
},
{
"sessions" : 3,
"country" : "US"
}
]
}
You could be better off with two queries to do this.
To save the two db round trips following aggregation can be used which IMO is kinda verbose (and might be little expensive if documents are very large) to just count the documents.
Idea: Is to have a $group at the top to count documents and preserve the original documents using $push and $$ROOT. And then before other matches/filter ops $unwind the created array of original docs.
db.collection.aggregate([
{
$group: {
_id: null,
docsCount: {
$sum: 1
},
originals: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$originals"
},
{ $match: "..." }, //and other stages on `originals` which contains the source documents
{
$group: {
_id: "$originals.country",
totalSessions: {
$sum: 1
},
totalVisitors: {
$first: "$docsCount"
}
}
}
]);
Sample O/P: Playground Link
[
{
"_id": "UK",
"totalSessions": 2,
"totalVisitors": 5
},
{
"_id": "US",
"totalSessions": 3,
"totalVisitors": 5
}
]

Mongo rank calculations based on count

I am trying the mongo rank calculation based on count and mentioned in below db schema. I am not getting the expecting results. Anyone help to resolve this?
Mongo Query:
db.company.aggregate([
{
"$group": {
"_id": {
"name1": "$name1",
"name2": "$name2",
},
"expanded": {
"$push": {
"name1": "$name1",
"name2": "$name2",
}
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "count": -1 } },
{
$unwind: {
path: '$expanded',
includeArrayIndex: 'count'
}
}
]);
Expecting results like
Name|Count|Rank
Google|3|1
FB|2|2
Yahoo|1| 3
DB Schema :
{
"_id" : 1.0,
"name1" : "Yahoo",
"name2" : "Google",
"salary" : 1000.0
}
/* 2 */
{
"_id" : 2.0,
"name1" : "FB",
"name2" : "Google",
"salary" : 2000.0
}
/* 3 */
{
"_id" : 3.0,
"name1" : "Google",
"name2" : "FB",
"salary" : 1500.0
}
It seems like you should count name1 and name2 separately so you can create a temporary 2-element array and then run $unwind on that array. Additionally to get rank you have to $group by null to get single array of all groups, try:
db.collection.aggregate([
{
$project: {
key: [ "$name1", "$name2" ]
}
},
{
$unwind: "$key"
},
{
$group: {
_id: "$key",
count: { $sum: 1 }
}
},
{
$sort: {
count: -1
}
},
{
$group: {
_id: null,
groups: { $push: "$$ROOT" }
}
},
{
$unwind: {
path: '$groups',
includeArrayIndex: 'rank'
}
},
{
$project: {
_id: 0,
name: "$groups._id",
rank: { $add: [ "$rank", 1 ] },
count: "$groups.count"
}
}
])
Mongo Playground
try this
db.company.aggregate([
{
$group: {
_id:null,
names1: {$push: "$name1"},
names2: {$push:"$name2"},
}
},
{
$project: {
_id: 0,
names:{$concatArrays: ["$names1", "$names2"]}
}
},
{$unwind: "$names"},
{$sortByCount: "$names"},
{$addFields:{name: "$_id"}},
{
$group : {
_id: null,
records : { $push : {count : "$count", name : "$name"}}
}
},
{
$project: {
total_docs: {$size: "$records"},
records: 1
}
},
{$unwind: "$records"},
{
$project: {
_id: 0,
name: "$records.name",
count:"$records.count",
rank: {
$add:[
{
$subtract:["$total_docs", "$records.count"]
}, 1]
}
}
}])

Need to sum from array object value in mongodb

I am trying to calculate total value if that value exits. But query is not working 100%. So can somebody help me to solve this problem. Here my sample document. I have attached two documents. Please these documents & find out best solution
Document : 1
{
"_id" : 1"),
"message_count" : 4,
"messages" : {
"data" : [
{
"id" : "11",
"saleValue": 1000
},
{
"id" : "112",
"saleValue": 1400
},
{
"id" : "22",
},
{
"id" : "234",
"saleValue": 111
}
],
},
"createdTime" : ISODate("2018-03-18T10:18:48.000Z")
}
Document : 2
{
"_id" : 444,
"message_count" : 4,
"messages" : {
"data" : [
{
"id" : "444",
"saleValue" : 2060
},
{
"id" : "444",
},
{
"id" : 234,
"saleValue" : 260
},
{
"id" : "34534",
}
]
},
"createdTime" : ISODate("2018-03-18T03:11:50.000Z")
}
Needed Output:
{
total : 4831
}
My query :
db.getCollection('myCollection').aggregate([
{
"$group": {
"_id": "$Id",
"totalValue": {
$sum: {
$sum: "$messages.data.saleValue"
}
}
}
}
])
So please if possible help me to solve this problem. Thanks in advance
It's not working correctly because it is aggregating all the documents in the collection; you are grouping on a constant "_id": "tempId", you just need to reference the correct key by adding the $ as:
db.getCollection('myCollection').aggregate([
{ "$group": {
"_id": "$tempId",
"totalValue": {
"$sum": { "$sum": "$messages.data.saleValue" }
}
} }
])
which in essence is a single stage pipeline version of an aggregate operation with an extra field that holds the sum expression before the group pipeline then calling that field as the $sum operator in the group.
The above works since $sum from MongoDB 3.2+ is available in both the $project and $group stages and when used in the $project stage, $sum returns the sum of the list of expressions. The expression "$messages.data.value" returns a list of numbers [120, 1200] which are then used as the $sum expression:
db.getCollection('myCollection').aggregate([
{ "$project": {
"values": { "$sum": "$messages.data.value" },
"tempId": 1,
} },
{ "$group": {
"_id": "$tempId",
"totalValue": { "$sum": "$values" }
} }
])
You can add a $unwind before your $group, in that way you will deconstructs the data array, and then you can group properly:
db.myCollection.aggregate([
{
"$unwind": "$messages.data"
},
{
"$group": {
"_id": "tempId",
"totalValue": {
$sum: {
$sum: "$messages.data.value"
}
}
}
}
])
Output:
{ "_id" : "tempId", "totalValue" : 1320 }
db.getCollection('myCollection').aggregate([
{
$unwind: "$messages.data",
$group: {
"_id": "tempId",
"totalValue": { $sum: "$messages.data.value" }
}
}
])
$unwind
According to description as mentioned into above question, as a solution please try executing following aggregate query
db.myCollection.aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path: '$messages.data'
}
},
// Stage 2
{
$group: {
_id: {
pageId: '$pageId'
},
total: {
$sum: '$messages.data.saleValue'
}
}
},
// Stage 3
{
$project: {
pageId: '$_id.pageId',
total: 1,
_id: 0
}
}
]
);
You can do it without using $group. Grouping made other data to be managed and addressed. So, I prefer using $sum and $map as shown below:
db.getCollection('myCollection').aggregate([
{
$addFields: {
total: {
$sum: {
$map: {
input: "$messages.data",
as: "message",
in: "$$message.saleValue",
},
},
},
},
},
}
])

MongoDB aggregate using distinct

I have an aggregation that groups on a date and creates a sum.
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
The output is
"result" : [
{
"_id" : {
"notificationDate" : ISODate("2013-07-18T04:00:00Z")
},
"nd" : ISODate("2013-07-18T04:00:00Z"),
"count" : 484
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-19T04:00:00Z")
},
"nd" : ISODate("2013-07-19T04:00:00Z"),
"count" : 490
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-20T04:00:00Z")
},
"nd" : ISODate("2013-07-20T04:00:00Z"),
"count" : 174
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-21T04:00:00Z")
},
"nd" : ISODate("2013-07-21T04:00:00Z"),
"count" : 6
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-22T04:00:00Z")
},
"nd" : ISODate("2013-07-22T04:00:00Z"),
"count" : 339
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-23T04:00:00Z")
},
"nd" : ISODate("2013-07-23T04:00:00Z"),
"count" : 394
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-24T04:00:00Z")
},
"nd" : ISODate("2013-07-24T04:00:00Z"),
"count" : 17
}
],
"ok" : 1
so far so good. What I need to do now is to keep this, but also add a distinct in the criteria (for argument's sake I want to use AccountId). The would yield me the count of the grouped dates only using distinct AccountId. Is distinct even possible within the aggregation framework?
you can use two group commands in the pipeline, the first to group by accoundId, followed by second group that does usual operation. something like this:
db.InboundWorkItems.aggregate(
{$match: {notificationDate: {$gte: ISODate("2013-07-18T04:00:00Z")}, dropType:'drop' }},
{$group: {_id:"accountId",notificationDate:"$notificationDate"}},
{$group: {_id:1, nd: {$first:"$notificationDate"}, count:{$sum:1} }},
{$sort:{nd:1}} )
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: "$AccountId",
notificationDate: {
$max: "$notificationDate"
},
dropType: {
$max: "$dropType"
}
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I think you might actually be looking for a single group (English is a bit confusing) like so:
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate", accountId: '$accountId'
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I add the compound _id in the $group because of:
The would yield me the count of the grouped dates only using distinct AccountId.
Which makes me think you want the grouped date count by account ID.