MongoDB: Aggregation Group operation over a dynamic key-value pair - mongodb

I have a simple document with one field as a key-value pair. I want to just perform a group operation in Aggregation over those keys and add their values. But the keys in the pair are not fixed and can be anything.
Here is a sample document.
{
_id: 349587843,
matchPair: {
3 : 21,
9 : 4,
7 : 32
}
},
{
_id: 349587478,
matchPair: {
7 : 11,
54 : 32,
9 : 7,
2 : 19
}
}
And I want a result something like the following.
{
_id : 3,
count : 21
},
{
_id : 9,
count : 11
},
{
_id : 7,
count : 43
},
{
_id : 54,
count : 32
},
{
_id : 2,
count : 19
}
I have the following query in mind and tried using $unwindoperation but it doesn't work probably because "matchPair" isn't an array and I don't know what to specify for the $sumoperation.
db.MatchPairs.aggregate([
{ "$unwind" : "$matchPair" },
{ "$group" : {
_id: "$matchPair",
count : { $sum : $matchPair }
} }
]);
I could also try Map-Reduce but for that too I need to emit() keys and values by name.
I'm sure there's a simple solution to this but I can't figure it out.

:
You could start by projecting and reshaping the matchPair field with $objectToArray
New in version 3.4.4.
{
$project: {
matchPair: { $objectToArray: '$matchPair' }
}
}
which would give
{
matchPair: [{ k: 3, v: 21 }, { k: 9, v: 4 }, ...]
}
Then $unwind based on matchPair
{
$unwind: '$matchPair'
}
which would give
{
matchPair: { k: 3, v: 21 }
}
Then $project
{
$project: {
_id: '$matchPair.k',
count: '$matchPair.v'
}
}
That should give the output you want. Altogether would be
.aggregate([
{
$project: {
matchPair: { $objectToArray: '$matchPair' }
}
},
{ $unwind: '$matchPair' },
{
$project: {
_id: '$matchPair.k',
count: '$matchPair.v'
}
}
])

In the mongoDb documentation for $unwind:
Deconstructs an array field from the input documents to output a
document for each element.
So you have to change your schema for something like:
{
"_id" : ObjectId("5880b57b039a3c89c1db145a"),
"matchPair" : [
{
"_id" : "3",
"count" : 21
},
{
"_id" : "9",
"count" : 4
},
{
"_id" : "7",
"count" : 32
}
]
},
{
"_id" : ObjectId("5880b58c039a3c89c1db145b"),
"matchPair" : [
{
"_id" : "7",
"count" : 11
},
{
"_id" : "54",
"count" : 32
},
{
"_id" : "9",
"count" : 7
},
{
"_id" : "2",
"count" : 19
}
]
}
Then doing:
db.MatchPairs.aggregate([
{ $unwind : "$matchPair" }
]);
will return:
{
"_id" : ObjectId("5880b57b039a3c89c1db145a"),
"matchPair" : {
"_id" : "3",
"count" : 21
}
},
{
"_id" : ObjectId("5880b57b039a3c89c1db145a"),
"matchPair" : {
"_id" : "9",
"count" : 4
}
},
{
"_id" : ObjectId("5880b57b039a3c89c1db145a"),
"matchPair" : {
"_id" : "7",
"count" : 32
}
},
{
"_id" : ObjectId("5880b58c039a3c89c1db145b"),
"matchPair" : {
"_id" : "7",
"count" : 11
}
},
{
"_id" : ObjectId("5880b58c039a3c89c1db145b"),
"matchPair" : {
"_id" : "54",
"count" : 32
}
},
{
"_id" : ObjectId("5880b58c039a3c89c1db145b"),
"matchPair" : {
"_id" : "9",
"count" : 7
}
},
{
"_id" : ObjectId("5880b58c039a3c89c1db145b"),
"matchPair" : {
"_id" : "2",
"count" : 19
}
}
Then you just have to do your grouping.

Related

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

how to show 0 for week when no record is matching that week in $week mongodb query

My collection looks like below with details
/* 1 createdAt:6/13/2018, 5:17:07 PM*/
{ "_id" : ObjectId("5b21043b18f3bc7c0be3414c"),
"Number" : 242,
"State" : "2",
"City" : "3",
"Website" : "",
"Contact_Person_Name" : "Ajithmullassery",
"CreatedById" : "Admin",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T17:17:07.313+05:30"),
"CreatedOn" : ISODate("2018-06-13T17:17:07.313+05:30")
},
/* 2 createdAt:6/13/2018, 6:45:42 PM*/
{
"_id" : ObjectId("5b2118fe18f3bc7c0be3415b"),
"Number" : 243,
"State" : "1",
"City" : "143",
"Website" : "",
"Contact_Person_Name" : "sachitkumar",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-13T18:45:42.590+05:30"),
"CreatedOn" : ISODate("2018-06-13T18:45:42.590+05:30")
},
/* 3 createdAt:6/18/2018, 5:34:33 PM*/
{
"_id" : ObjectId("5b279fd118f3bc7c0be34166"),
"Number" : 244,
"State" : "0",
"City" : "8",
"Website" : "",
"Contact_Person_Name" : "Akshay",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-18T17:34:33.270+05:30"),
"CreatedOn" : ISODate("2018-06-18T17:34:33.270+05:30")
},
/* 4 createdAt:6/20/2018, 1:02:21 PM*/
{
"_id" : ObjectId("5b2a030518f3bc7c0be3416d"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Dr DS Mithra",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"FacilityID" : "594387f5e2de7be83be5d5f1",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:02:21.887+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:02:21.887+05:30")
},
/* 5 createdAt:6/20/2018, 1:08:58 PM*/
{
"_id" : ObjectId("5b2a049218f3bc7c0be3416e"),
"Number" : 245,
"State" : "5",
"City" : "6",
"Website" : "",
"Contact_Person_Name" : "Ramaswamy Manickam",
"CreatedById" : "vinoth",
"UpdatedById" : "Admin",
"IsActive" : true,
"UpdatedOn" : ISODate("2018-06-20T13:08:58.040+05:30"),
"CreatedOn" : ISODate("2018-06-20T13:08:58.040+05:30")
}
I have the query like below
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
}
])
The query will return the weeknumber and number of documents created as below
/* 1 */
{
"_id" : 26,
"documentCount" : 1
},
/* 2 */
{
"_id" : 25,
"documentCount" : 1
},
/* 3 */
{
"_id" : 24,
"documentCount" : 9
},
/* 4 */
{
"_id" : 23,
"documentCount" : 2
}
In above _id is the weeknumber. If in case in above results weekNumber : 23 no records are created then the query gives only 3 records removing the "_id":23.
How to get the records with documentcount as zero when there is no records created.
Like in above example when no records for _id: 23 should get like below
/* 4 */
{
"_id" : 23,
"documentCount" : 0
}
As $week can return a value between 0 and 53 I assume you expect 54 documents as a result with 0 or non-zero values for documentCount. To achieve that you should collect all your documents into one ($group-ing by null) and then generate the output.
To generate a range of numbers you can use $range operator and then you can generate the output using $map. To transform an array of documents into multiple docs you can use $unwind.
db.collectionName.aggregate([
//where query
{ "$match": { $and:[{CreatedOn:{$lte:ISODate("2018-07-14T13:59:08.266+05:30")}},{CreatedOn:{$gte:ISODate("2018-06-10T13:59:08.266+05:30")}}] } },
//distinct column
{
"$group": {
_id: {$week: '$CreatedOn'},
documentCount: {$sum: 1}
}
},
{
$group: {
_id: null,
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: { $range: [ {$week:ISODate("2018-06-10T13:59:08.266+05:30")}, {$week:ISODate("2018-07-14T13:59:08.266+05:30")}]},
as: "weekNumber",
in: {
$let: {
vars: { index: { $indexOfArray: [ "$docs._id", "$$weekNumber" ] } },
in: {
$cond: {
if: { $eq: [ "$$index", -1 ] },
then: { _id: "$$weekNumber", documentCount: 0 },
else: { $arrayElemAt: [ "$docs", "$$index" ] }
}
}
}
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: {
newRoot: "$docs"
}
}
])
Using $indexOfArray to check if array of current docs contains the document (-1 otherwise) and $arrayElemAt to get existing document from docs. Last step ($replaceRoot) is just to get rid of one level of nesting (docs). Outputs:
{ "_id" : 0, "documentCount" : 0 }
{ "_id" : 1, "documentCount" : 0 }
{ "_id" : 2, "documentCount" : 0 }
...
{ "_id" : 22, "documentCount" : 0 }
{ "_id" : 23, "documentCount" : 2 }
{ "_id" : 24, "documentCount" : 9 }
{ "_id" : 25, "documentCount" : 1 }
{ "_id" : 26, "documentCount" : 1 }
{ "_id" : 27, "documentCount" : 0 }
...
{ "_id" : 52, "documentCount" : 0 }
{ "_id" : 53, "documentCount" : 0 }
You can easily customize returned results modifying the input of $map stage. For instance you can pass an array of consts like input: [21, 22, 23, 24] as well.
EDIT: To get the weeks between specified dates you can use $week for start and end date to get the numbers.

Count ignoring duplicate documents

I'd like to count all emails within the specific project (ID: 7), but ignoring duplicate rows in ONE campaign.
Here's the example of my collection structure:
{
"_id" : ObjectId("581a9054c274f7b512e8ed94"),
"email" : "a#example.com",
"IDproject" : 7,
"IDcampaign" : 10
}
{
"_id" : ObjectId("581a9064c274f7b512e8ed95"),
"email" : "b#example.com",
"IDproject" : 7,
"IDcampaign" : 10
}
{
"_id" : ObjectId("581a9068c274f7b512e8ed96"),
"email" : "b#example.com",
"IDproject" : 7,
"IDcampaign" : 10
}
{
"_id" : ObjectId("581a906cc274f7b512e8ed97"),
"email" : "b#example.com",
"IDproject" : 7,
"IDcampaign" : 11
}
{
"_id" : ObjectId("581a9072c274f7b512e8ed98"),
"email" : "c#example.com",
"IDproject" : 7,
"IDcampaign" : 11
}
{
"_id" : ObjectId("581a9079c274f7b512e8ed99"),
"email" : "d#example.com",
"IDproject" : 7,
"IDcampaign" : 12
}
This is what the result should be:
a#example.com
b#example.com
b#example.com
c#example.com
d#example.com
Total: 5 (of 6). Note that b#example.com is mentioned twice. That's because b#example.com has campaigns 10, 10 and 11. We're ignoring one 10.
This is what I've tried:
db.mycollection.aggregate([
{$match : {IDproject : 7}},
{$group : {_id : "$email", total : {$sum : 1}}}
])
But it returns only unique emails ignoring IDcampaign. Also, I can get unique number of emails with the following query:
db.mycollection.distinct('email', {IDproject : 7})
But again, it shows only unique emails ignoring IDcampaign.
Could someone give me a hint how to count emails including IDcampaign?
Thanks.
p.s. I use MongoDB with PHP, and I can solve the problem with PHP calculations, but that's not the solution.
Include it as part of your group key, as in the following example:
db.mycollection.aggregate([
{ "$match": { "IDproject": 7 } },
{
"$group": {
"_id": {
"email" : "$email",
"IDcampaign" : "$IDcampaign"
},
"count": { "$sum": 1 }
}
}
])
Sample Output
/* 1 */
{
"_id" : {
"email" : "a#example.com",
"IDcampaign" : 10
},
"count" : 1
}
/* 2 */
{
"_id" : {
"email" : "d#example.com",
"IDcampaign" : 12
},
"count" : 1
}
/* 3 */
{
"_id" : {
"email" : "b#example.com",
"IDcampaign" : 11
},
"count" : 1
}
/* 4 */
{
"_id" : {
"email" : "b#example.com",
"IDcampaign" : 10
},
"count" : 2
}
/* 5 */
{
"_id" : {
"email" : "c#example.com",
"IDcampaign" : 11
},
"count" : 1
}
To answer your follow-up question on getting the count only since you don't need the list of emails, you could run the following pipeline
db.mycollection.aggregate([
{ "$match": { "IDproject": 7 } },
{
"$group": {
"_id": null,
"count": { "$sum": 1 },
"emails": {
"$addToSet": {
"email" : "$email",
"IDcampaign" : "$IDcampaign"
}
}
}
},
{
"$project": {
"_id": 0,
"count": 1,
"total": { "$size": "$emails" }
}
}
])
which gives you the result
{
"total" : 5,
"count" : 6
}
that you can interpret as Total 5 (of 6)

Map reduce to count the unique count

I want a map reduce function to draw the below output from the below input collection which satisfy the below condition.
Input collection:
[{
a:1,
b:'test',
indices:[1,2,4,5]
}, {
a:2,
b:'test',
indices:[2, 3, 5]
}, {
a:2,
b:'test',
indices:[1, 2, 4]
}, {
a:3,
b:'apple',
indices:[1, 2]
}, {
a:4,
b:'apple',
indices:[1, 3, 5]
}, {
a:5,
b:'orange',
indices:[232]
}, {
a:5,
b:'dummy',
indices:[2]
}, {
a:6,
b:'dummy',
indices:[11, 2, 4]
}, {
a:6,
b:'dummy',
indices:[11, 3, 2]
}, {
a:6,
b:'dummy',
indices:[1, 2, 3, 4, 5]
}]
Conditions are:
select only which has indices array has 2. This can be send as
query. i.e, query:{indices:{$in:2}}
Group by b
If there are duplicates a, then it should be considered as 1 eg: Document having a=2 are present in two times satisfying the condition indices
has 2.
My input collection always satisfies the condition of if a
prsents in "test", it will not present in dummy/apple/etc. but a
can be duplicate.
Here is what I tried:
db.x.mapReduce(function(){
emit(this.b, 1);
}, function(key, reducable){
return Array.sum(reducable);
}, {
out: {inline: 1},
query:{
'indices':{$in:2}
}
});
Output:
[
{
"_id" : test",
"value" : {
"count" : 3 -> It should be 2
}
},{
"_id" : apple",
"value" : {
"count" : 2
}
},{
"_id" : dummy",
"value" : {
"count" : 4 -> It should be 2
}
}]
Expected Output:
[{
"_id" : test",
"value" : {
"count" : 2
}
},{
"_id" : apple",
"value" : {
"count" : 2
}
},{
"_id" : dummy",
"value" : {
"count" : 2
}
}]
No need for map/reduce. Use aggregation:
> db.crawler_status.aggregate([
{ "$match" : { "indices" : 2 } },
{ "$group" : { "_id" : { "b" : "$b", "a" : "$a" } } },
{ "$group" : { "_id" : "$_id.b", "count" : { "$sum" : 1 } } }
])
{ "_id" : "test", "count" : 2 }
{ "_id" : "apple", "count" : 1 } // your sample output was mistaken
{ "_id" : "dummy", "count" : 2 }

How can I get a running total with mongodb aggregate framework?

I am fairly new to MongoDB and I am playing with the aggregate framework. One of the examples from the documentation shows the following, which returns total number of new user joins per month and lists the month joined:
db.users.aggregate(
[
{ $project : { month_joined : { $month : "$joined" } } } ,
{ $group : { _id : {month_joined:"$month_joined"} , number : { $sum : 1 } } },
{ $sort : { "_id.month_joined" : 1 } }
]
)
The code outputs the following:
{
"_id" : {
"month_joined" : 1
},
"number" : 3
},
{
"_id" : {
"month_joined" : 2
},
"number" : 9
},
{
"_id" : {
"month_joined" : 3
},
"number" : 5
}
Is it possible to also have each object contain the sum of all users that have joined since the start, so I don't have to run over the objects programmatically and calculate it myself?
Example desired output:
{
"_id" : {
"month_joined" : 1
},
"number" : 3,
"total": 3
},
{
"_id" : {
"month_joined" : 2
},
"number" : 9,
"total": 12
},
{
"_id" : {
"month_joined" : 3
},
"number" : 5,
"total": 17
}