Aggregate multiple arrays by field value [duplicate] - mongodb

This question already has an answer here:
mongodb aggregate multiple arrays
(1 answer)
Closed 4 years ago.
With a document as shown below, I am trying to aggregate the data so my final output is the sum of each users received and sent values.
Document
{
"_id" : 1,
"received" : [
{ "name" : "david", "value" : 15 },
{ "name" : "sarah", "value" : 10 },
{ "name" : "sarah", "value" : 15 }
],
"sent" : [
{ "name" : "david", "value" : 10 },
{ "name" : "sarah", "value" : 20 },
{ "name" : "david", "value" : 15 }
]
}
Desired Result (or similar)
{
"name": "david",
"received": 15,
"sent": 25
},
{
"name": "sarah",
"received": 25,
"sent": 20
}
I have tried to unwind received and sent, but I am ending up with a lot of duplicates and honestly I have no idea if this sort of output can even be created without bringing the dataset into my client first.
Further searching of StackOverflow has lead me to mongodb aggregate multiple arrays that provides a suitable answer. I have flagged this as a duplicate.
My final solution, created by following the above post, is as follows;
[
{
'$addFields': {
'received.type': 'received',
'sent.type': 'sent'
}
}, {
'$project': {
'movements': {
'$concatArrays': [
'$received', '$sent'
]
}
}
}, {
'$unwind': {
'path': '$movements'
}
}, {
'$project': {
'name': '$movements.name',
'type': '$movements.type',
'value': '$movements.value'
}
}, {
'$group': {
'_id': '$name',
'sent': {
'$sum': {
'$cond': {
'if': {
'$eq': [
'$type', 'sent'
]
},
'then': '$value',
'else': 0
}
}
},
'received': {
'$sum': {
'$cond': {
'if': {
'$eq': [
'$type', 'received'
]
},
'then': '$value',
'else': 0
}
}
}
}
}
]

add $match stage on the top to filter the documents
$facet to compute two different result by sent and received on same document
$group to merge the by sent and received fields of previous stage
$unwind & $unwind to unwind the merged array of array
$replaceRoot to replace root with byBoth
$group to merge the results back
$project to filter and project required fields only
aggregation pipeline
db.ttt.aggregate([
{$facet : {
"byReceived" :[
{$unwind : "$received"},
{$group: {_id : "$received.name", received : {$sum : "$received.value"}}}
],
"bySent" :[
{$unwind : "$sent"},
{$group: {_id : "$sent.name", sent : {$sum : "$sent.value"}}}
]
}},
{$group: {_id:null, byBoth : {$push :{$concatArrays : ["$bySent", "$byReceived"]}}}},
{$unwind : "$byBoth"},
{$unwind : "$byBoth"},
{$replaceRoot: { newRoot: "$byBoth" }},
{$group : {_id : "$_id", sent : {$sum : "$sent"}, received : {$sum : "$received"}}},
{$project : {_id:0, name:"$_id", sent:"$sent", received:"$received"}}
])
result
{ "name" : "david", "sent" : 25, "received" : 15 }
{ "name" : "sarah", "sent" : 20, "received" : 25 }

Related

Filter by nested arrays/objects values (on different levels) and $push by multiple level - MongoDB Aggregate

I have a document with multiple level of embedded subdocument each has some nested array. Using $unwind and sort, do sorting based on day in descending and using push to combine each row records into single array. This Push is working only at one level means it allows only one push. If want to do the same things on the nested level and retains the top level data, got "errmsg" : "Unrecognized expression '$push'".
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "1",
"salary" : 200,
},
{
"day" : "2",
"salary" : 201,
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "1",
"salary" : 300,
},
{
"day" : "2",
"salary" : 400,
}
]
}
],
}
Expected Result:
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "2",
"salary" : 201
},
{
"day" : "1",
"salary" : 200
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "2",
"salary" : 400
},
{
"day" : "1",
"salary" : 300
}
]
}
],
}
Just day will be sorted and remaining things are same
I have tried but it got unrecognized expression '$push'
db.employee.aggregate([
{$unwind: '$payroll'},
{$unwind: '$payroll.payrollDetails'},
{$sort: {'payroll.payrollDetails.day': -1}},
{$group: {_id: '$_id', payroll: {$push: {payrollDetails:{$push:
'$payroll.payrollDetails'} }}}}])
It requires two time $group, you can't use $push operator two times in a field,
$group by main id and payroll id, construct payrollDetails array
$sort by payroll id (you can skip if not required)
$group by main id and construct payroll array
db.employee.aggregate([
{ $unwind: "$payroll" },
{ $unwind: "$payroll.payrollDetails" },
{ $sort: { "payroll.payrollDetails.day": -1 } },
{
$group: {
_id: {
_id: "$_id",
pid: "$payroll._id"
},
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payrollDetails: { $push: "$payroll.payrollDetails" },
month: { $first: "$payroll.month" },
salary: { $first: "$payroll.salary" }
}
},
{ $sort: { "payroll._id": -1 } },
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payroll: {
$push: {
_id: "$_id.pid",
month: "$month",
salary: "$salary",
payrollDetails: "$payrollDetails"
}
}
}
}
])
Playground

Merge Multiple Document from same collection MongoDB

I have a JSON data like this and i wanted to apply aggregation on this data in such a way that i should group by from data:
{
"series": [
{
"id": "1",
"element": "111",
"data": [
{
"timeFrame": {
"from": "2016-01-01T00:00:00Z",
"to": "2016-01-31T23:59:59Z"
},
"value": 1
},
{
"timeFrame": {
"from": "2016-02-01T00:00:00Z",
"to": "2016-02-29T23:59:59Z"
},
"value": 2
}
]
}
]
}
and i have acheived this by the above aggregation:
db.getCollection('col1').aggregate([
{$unwind: "$data"},
{$group :{
element: {$first:"$relatedElement"},
_id : {
day : {$dayOfMonth: "$values.timeFrame.from"},
month:{$month: "$values.timeFrame.from"},
year:{$year: "$values.timeFrame.from"}
},
fromDate : { $first : "$values.timeFrame.from" },
total : {$sum : "$values.value"},
count : {$sum : 1},
}
},
{
$project: {
_id : 0,
element:1,
fromDate : '$fromDate',
avgValue : { $divide: [ "$total", "$count" ] }
}
}])
OutPut:
{
"id" : "1",
"element" : "3",
"fromDate" : ISODate("2017-05-01T00:00:00.000Z"),
"avgValue" : 0.0378787878787879
}
{
"id" : "1",
"element" : "3",
"fromDate" : ISODate("2017-04-30T22:00:00.000Z"),
"avgValue" : 0.416666666666667
}
But, i am getting two document and this i want to merge as a single document like :
{
"id" : "1",
"element" : "3",
"average" : [
{
"fromDate" : ISODate("2017-05-01T00:00:00.000Z"),
"avgValue" : 0.0378787878787879
},
{
"fromDate" : ISODate("2017-04-30T22:00:00.000Z"),
"avgValue" : 0.416666666666667
}
]
}
Can anyone help me on this.
Add following $group at the end of your aggregate pipeline to merge current output documents into single document -
{$group:{
_id:"$_id",
element: {$first: "$element"},
average:{$push:{
"fromDate": "$fromDate",
"avgValue": "$avgValue"
}}
}}

mongodb count number of documents for every category

My collection looks like this:
{
"_id":ObjectId("5744b6cd9c408cea15964d18"),
"uuid":"bbde4bba-062b-4024-9bb0-8b12656afa7e",
"version":1,
"categories":["sport"]
},
{
"_id":ObjectId("5745d2bab047379469e10e27"),
"uuid":"bbde4bba-062b-4024-9bb0-8b12656afa7e",
"version":2,
"categories":["sport", "shopping"]
},
{
"_id":ObjectId("5744b6359c408cea15964d15"),
"uuid":"561c3705-ba6d-432b-98fb-254483fcbefa",
"version":1,
"categories":["politics"]
}
I want to count the number of documents for every category. To do this, I unwind the categories array:
db.collection.aggregate(
{$unwind: '$categories'},
{$group: {_id: '$categories', count: {$sum: 1}} }
)
Result:
{ "_id" : "sport", "count" : 2 }
{ "_id" : "shopping", "count" : 1 }
{ "_id" : "politics", "count" : 1 }
Now I want to count the number of documents for every category, but where document version is the latest version.
This is where I am stuck.
It's ugly but I think this gives you what you're after:
db.collection.aggregate(
{ $unwind : "$categories" },
{ $group :
{ "_id" : { "uuid" : "$uuid" },
"doc" : { $push : { "version" : "$version", "category" : "$categories" } },
"maxVersion" : { $max : "$version" }
}
},
{ $unwind : "$doc" },
{ $project : { "_id" : 0, "uuid" : "$id.uuid", "category" : "$doc.category", "isCurrentVersion" : { $eq : [ "$doc.version", "$maxVersion" ] } } },
{ $match : { "isCurrentVersion" : true }},
{ $group : { "_id" : "$category", "count" : { $sum : 1 } } }
)
You can do this by first grouping the denormalized documents (from the $unwind operator step) by two keys, i.e. the categories and version fields. This is necessary for the preceding pipeline step which orders the grouped documents and their accumulated counts by the version (desc) and categories (asc) keys respectively using the $sort operator.
Another grouping will be required to get the top documents in each categories group after ordering using the $first operator. The following shows this
db.collection.aggregate(
{ "$unwind": "$categories" },
{
"$group": {
"_id": {
'categories': '$categories',
'version': '$version'
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "_id.version": -1, "_id.categories": 1 } },
{
"$group": {
"_id": "$_id.categories",
"count": { "$first": "$count" },
"version": { "$first": "$_id.version" }
}
}
)
Sample Output
{ "_id" : "shopping", "count" : 1, "version" : 2 }
{ "_id" : "sport", "count" : 1, "version" : 2 }
{ "_id" : "politics", "count" : 1, "version" : 1 }

MongoDB Aggregation using nested element

I have a collection with documents like this:
"_id" : "15",
"name" : "empty",
"location" : "5th Ave",
"owner" : "machine",
"visitors" : [
{
"type" : "M",
"color" : "blue",
"owner" : "Steve Cooper"
},
{
"type" : "K",
"color" : "red",
"owner" : "Luis Martinez"
},
// A lot more of these
]
}
I want to group by visitors.owner to find which owner has the most visits, I tried this:
db.mycol.aggregate(
[
{$group: {
_id: {owner: "$visitors.owner"},
visits: {$addToSet: "$visits"},
count: {$sum: "comments"}
}},
{$sort: {count: -1}},
{$limit: 1}
]
)
But I always get count = 0 and visits not corresponding to one owner :/
Please help
Try the following aggregation pipeline:
db.mycol.aggregate([
{
"$unwind": "$visitors"
},
{
"$group": {
"_id": "$visitors.owner",
"count": { "$sum": 1}
}
},
{
"$project": {
"_id": 0,
"owner": "$_id",
"visits": "$count"
}
}
]);
Using the sample document you provided in your question, the result is:
/* 0 */
{
"result" : [
{
"owner" : "Luis Martinez",
"visits" : 1
},
{
"owner" : "Steve Cooper",
"visits" : 1
}
],
"ok" : 1
}

Aggregate objects' nested array occurrences count

I'm trying to aggregate logs in that way, so I can get count of how many times keywords were favorited by particular user. What I came up is following query:
db.a.aggregate([
{$unwind: "$keywords"},
{$group : {_id : {word : "$keywords", user : "$favorited_by"}, count : {$sum : 1}}}
]);
But it produces output:
{ "_id" : { "word" : "another", "user" : "too_creepy" }, "count" : 1 }
{ "_id" : { "word" : "test", "user" : "too_creepy" }, "count" : 2 }
Whilst I want to get something like this:
INPUT
{
_id: ObjectId("5475cf117ccee624583ba94a"),
favorited_by: "too_creepy",
keywords: [
"test"
]
},
{
_id: ObjectId("5475cf117ccee624583ba949"),
favorited_by: "too_creepy",
keywords: [
"test"
]
},
{
_id: ObjectId("5475cf117ccee624583ba949"),
favorited_by: "too_creepy",
keywords: [
"anotherone"
]
},
{
_id: ObjectId("5475cf117ccee624583ba09a"),
favorited_by: "hello_world",
keywords: [
"test"
]
}
OUTPUT
{
favorited_by: "too_creepy",
keywords: [
{keyword: "test", count: 2},
{keyword: "anotherone", count: 1}
]
},
{
favorited_by: "hello_world",
keywords: [
{keyword: "test", count: 1}
]
}
Any ideas how can to write this query if it's even possible?
You can do that by adding a second $group to your pipeline followed up with a final $project to reshape the output a bit:
db.a.aggregate([
{$unwind: "$keywords"},
{$group: {_id: {word: "$keywords", user: "$favorited_by"}, count: {$sum: 1}}},
// Group again on just user, and use $push to assemble an array of their keywords
{$group: {
_id: '$_id.user',
keywords: {$push: {keyword: '$_id.word', count: '$count'}}
}},
// Reshape the output
{$project: {favorited_by: '$_id', keywords: 1, _id: 0}}
]);
Output:
{
"keywords" : [
{
"keyword" : "anotherone",
"count" : 1
},
{
"keyword" : "test",
"count" : 2
}
],
"favorited_by" : "too_creepy"
},
{
"keywords" : [
{
"keyword" : "test",
"count" : 1
}
],
"favorited_by" : "hello_world"
}