Mongo aggregate nested array - mongodb

I have a mongo collection with following structure
{
"userId" : ObjectId("XXX"),
"itemId" : ObjectId("YYY"),
"resourceId" : 1,
"_id" : ObjectId("528455229486ca3606004ec9"),
"parameter" : [
{
"name" : "name1",
"value" : 150,
"_id" : ObjectId("528455359486ca3606004eed")
},
{
"name" : "name2",
"value" : 0,
"_id" : ObjectId("528455359486ca3606004eec")
},
{
"name" : "name3",
"value" : 2,
"_id" : ObjectId("528455359486ca3606004eeb")
}
]
}
There can be multiple documents with the same 'useId' with different 'itemId' but the parameter will have same key/value pairs in all of them.
What I am trying to accomplish is return aggregated parameters "name1", "name2" and "name3" for each unique "userId" disregard the 'itemId'. so final results would look like for each user :
{
"userId" : ObjectId("use1ID"),
"name1" : (aggregatedValue),
"name2" : (aggregatedValue),
"name3" : (aggregatedVAlue)
},
{
"userId" : ObjectId("use2ID"),
"name1" : (aggregatedValue),
"name2" : (aggregatedValue),
"name3" : (aggregatedVAlue)
}
Is it possible to accomplish this using the aggregated methods of mongoDB ? Could you please help me to build the proper query to accomplish that ?

The simplest form of this is to keep things keyed by the "parameter" "name":
db.collection.aggregate(
// Unwind the array
{ "$unwind": "$parameter"},
// Group on the "_id" and "name" and $sum "value"
{ "$group": {
"_id": {
"userId": "$userId",
"name": "$parameter.name"
},
"value": { "$sum": "$parameter.value" }
}},
// Put things into an array for "nice" processing
{ "$group": {
"_id": "$_id.userId",
"values": { "$push": {
"name": "$_id.name",
"value": "$value"
}}
}}
)
If you really need to have the "values" of names as the field values, you can do the the following. But since you are "projecting" the fields/properties then you must specify them all in your code. You cannot be "dynamic" anymore and you are coding/generating each one:
db.collection.aggregate([
// Unwind the array
{ "$unwind": "$parameter"},
// Group on the "_id" and "name" and $sum "value"
{ "$group": {
"_id": {
"userId": "$userId",
"name": "$parameter.name"
},
"value": { "$sum": "$parameter.value"}
}},
// Project out discrete "field" names with $cond
{ "$project": {
"name1": { "$cond": [
{ "$eq": [ "$_id.name", "name1" ] },
"$value",
0
]},
"name2": { "$cond": [
{ "$eq": [ "$_id.name", "name2" ] },
"$value",
0
]},
"name3": { "$cond": [
{ "$eq": [ "$_id.name", "name3" ] },
"$value",
0
]},
}},
// The $cond put "0" values in there. So clean up with $group and $sum
{ "$group": {
_id: "$_id.userId",
"name1": { "$sum": "$name1" },
"name2": { "$sum": "$name2" },
"name3": { "$sum": "$name3" }
}}
])
So while the extra steps give you the result that you want ( well with a final project to change the _id to userId ), for my mind the short version is workable enough, unless you really do need it. Consider the output from there as well:
{
"_id" : ObjectId("53245016ea402b31d77b0372"),
"values" : [
{
"name" : "name3",
"value" : 2
},
{
"name" : "name2",
"value" : 0
},
{
"name" : "name1",
"value" : 150
}
]
}
So that would be what I would use, personally. But your choice.

Not sure if I got your question but if the name field can contain only "name1", "name2", "name3" or at least you are only interested in this values, one of the possible queries could be this one:
db.aggTest.aggregate(
{$unwind:"$parameter"},
{$project: {"userId":1, "parameter.name":1,
"name1" : {"$cond": [{$eq : ["$parameter.name", "name1"]}, "$parameter.value", 0]},
"name2" : {"$cond": [{$eq : ["$parameter.name", "name2"]}, "$parameter.value", 0]},
"name3" : {"$cond": [{$eq : ["$parameter.name", "name3"]}, "$parameter.value", 0]}}},
{$group : {_id : {userId:"$userId"},
name1 : {$sum:"$name1"},
name2 : {$sum:"$name2"},
name3 : {$sum:"$name3"}}})
It firsts unwinds the parameter array, then separates name1, name2 and name3 values into different columns. There's a simple conditional statement for that. After that we can easily aggreagate by the new columns.
Hope it helps!

Related

Retrieve only queried element from a single document in mongoDB

I have a collection like below :
`{
"topics" : [
{
"id" : "2",
"name" : "Test1",
"owner" : [
"123"
]
},
{
"id" : "3",
"name" : "Test2",
"owner" : [
"123",
"456"
]
}
]
}`
As, this data is in single document, and I want only matching elements based on their owner, I am using below query ( using filter in aggregation ), but I am getting 0 matching elements.
Query :
Thanks in advance...!!
db.getCollection('topics').aggregate([
{"$match":{"topics.owner":{"$in":["123","456"]}}},
{"$project":{
"topics":{
"$filter":{
"input":"$topics",
"as":"topic",
"cond": {"$in": ["$$topic.owner",["123","456"]]}
}},
"_id":0
}}
])
This query should produce below output :
{
"topics" : [
{
"id" : "1",
"name" : "Test1",
"owner" : ["123"]
},
{
"id" : "2",
"name" : "Test2",
"owner" : ["123","456"]
}
]
}
As the topic.owner is an array, you can't use $in directly as this compares whether the array is within in an array.
Instead, you should do as below:
$filter - Filter the document in the topics array.
1.1. $gt - Compare the result from 1.1.1 is greater than 0.
1.1.1. $size - Get the size of the array from the result 1.1.1.1.
1.1.1.1. $setIntersection - Intersect the topic.owner array with the input array.
{
"$project": {
"topics": {
"$filter": {
"input": "$topics",
"as": "topic",
"cond": {
$gt: [
{
$size: {
$setIntersection: [
"$$topic.owner",
[
"123",
"456"
]
]
}
},
0
]
}
}
},
"_id": 0
}
}
Demo # Mongo Playground
db.getCollection('topics').aggregate([
{"$unwind":"$topics"},
{"$addFields":{
"rest":{"$or":[{"$in":["12z3","$topics.owner"]},{"$in":["456","$topics.owner"]}]}
}},
{"$match":{
"rest":true
}},
{"$group":{
"_id":"$_id",
"topics":{"$push":"$topics"}
}}
])

Project an array with MongoDB

I'm using MongoDB's aggregation pipeline, to get my documents in the form that I want. As the last step of aggregation, I use $project to put the documents into their final form.
But I'm having trouble projecting and array of sub-documents. Here is what I currently get from aggrgation:
{
"_id": "581c8c3df1325f68ffd23386",
"count": 14,
"authors": [
{
"author": {
"author": "57f246b9e01e6c6f08e1d99a",
"post": "581c8c3df1325f68ffd23386"
},
"count": 13
},
{
"author": {
"author": "5824382511f16d0f3fd5aaf2",
"post": "581c8c3df1325f68ffd23386"
},
"count": 1
}
]
}
I want to $project the authors array so that the return would be this:
{
"_id": "581c8c3df1325f68ffd23386",
"count": 14,
"authors": [
{
"_id": "57f246b9e01e6c6f08e1d99a",
"count": 13
},
{
"_id": "5824382511f16d0f3fd5aaf2",
"count": 1
}
]
}
How would I go about achieving that?
You can unwind the array and wind it u again after projecting.
Something like this:
db.collectionName.aggregate([
{$unwind:'$authors'},
{$project:{_id:1,count:1,'author.id':'$authors.author.author','author.count':'$authors.count'}},
{$group:{_id:{_id:'$_id',count:'$count'},author:{$push:{id:'$author.id',count:'$author.count'}}}},
{$project:{_id:0,_id:'$_id._id',count:'$_id.count',author:1}}
])
the output for above will be:
{
"_id" : "581c8c3df1325f68ffd23386",
"author" : [
{
"id" : "57f246b9e01e6c6f08e1d99a",
"count" : 13.0
},
{
"id" : "5824382511f16d0f3fd5aaf2",
"count" : 1.0
}
],
"count" : 14.0
}
I have been having the same problem and just now found a simple and elegant solution that has not been mentioned anywhere, so i thought I'd share it here:
You can iterate the array using $map and project each author. With the given structure, the aggregation should look somewhat like this
db.collectionName.aggregate([
$project: {
_id: 1,
count:1,
authors: {
$map: {
input: "$authors",
as: "author",
in: {
id: "$$author.author.author",
count: $$author.author.count
}
}
}
}
])
Hope this helps anyone who is looking, like me :)
Question:
"customFields" : [
{
"index" : "1",
"value" : "true",
"label" : "isOffline",
"dataType" : "check_box",
"placeholder" : "cf_isoffline",
"valueFormatted" : "true"
},
{
"index" : "2",
"value" : "false",
"label" : "tenure_extended",
"dataType" : "check_box",
"placeholder" : "cf_tenure_extended",
"valueFormatted" : "false"
}
],
Answer:
db.subscription.aggregate([
{$match:{"autoCollect" : false,"remainingBillingCycles" : -1,"customFields.value":"false", "customFields.label" : "isOffline"}},
{$project: {first: { $arrayElemAt: [ "$customFields", 1 ] }}}
])

MongoDB aggregate nested array correctly

OK I am very new to Mongo, and I am already stuck.
Db has the following structure (much simplified for sure):
{
{
"_id" : ObjectId("57fdfbc12dc30a46507044ec"),
"keyterms" : [
{
"score" : "2",
"value" : "AA",
},
{
"score" : "2",
"value" : "AA",
},
{
"score" : "4",
"value" : "BB",
},
{
"score" : "3",
"value" : "CC",
}
]
},
{
"_id" : ObjectId("57fdfbc12dc30a46507044ef"),
"keyterms" : [
...
There are some Objects. Each Object have an array "keywords". Each of this Arrays Entries, which have score and value. There are some duplicates though (not really, since in the real db the keywords entries have much more fields, but concerning value and score they are duplicates).
Now I need a query, which
selects one object by id
groups its keyterms in by value
and counts the dublicates
sorts them by score
So I want to have something like that as result
// for Object 57fdfbc12dc30a46507044ec
"keyterms"; [
{
"score" : "4",
"value" : "BB",
"count" : 1
},
{
"score" : "3",
"value" : "CC",
"count" : 1
}
{
"score" : "2",
"value" : "AA",
"count" : 2
}
]
In SQL I would have written something like this
select
score, value, count(*) as count
from
all_keywords_table_or_some_join
group by
value
order by
score
But, sadly enough, it's not SQL.
In Mongo I managed to write this:
db.getCollection('tests').aggregate([
{$match: {'_id': ObjectId('57fdfbc12dc30a46507044ec')}},
{$unwind: "$keyterms"},
{$sort: {"keyterms.score": -1}},
{$group: {
'_id': "$_id",
'keyterms': {$push: "$keyterms"}
}},
{$project: {
'keyterms.score': 1,
'keyterms.value': 1
}}
])
But there is something missing: the grouping of the the keywords by their value. I can not get rid of the feeling, that this is the wrong approach at all. How can I select the keywords array and continue with that, and use an aggregate function inly on this - that would be easy.
BTW I read this
(Mongo aggregate nested array)
but I can't figure it out for my example unfortunately...
You'd want an aggregation pipeline where after you $unwind the array, you group the flattened documents by the array's value and score keys, aggregate the counts using the $sum accumulator operator and retain the main document's _id with the $first operator.
The preceding pipeline should then group the documents from the previous pipeline by the _id key so as to preserve the original schema and recreate the keyterms array using the $push operator.
The following demonstration attempts to explain the above aggregation operation:
db.tests.aggregate([
{ "$match": { "_id": ObjectId("57fdfbc12dc30a46507044ec") } },
{ "$unwind": "$keyterms" },
{
"$group": {
"_id": {
"value": "$keyterms.value",
"score": "$keyterms.score"
},
"doc_id": { "$first": "$_id" },
"count": { "$sum": 1 }
}
},
{ "$sort": {"_id.score": -1 } },
{
"$group": {
"_id": "$doc_id",
"keyterms": {
"$push": {
"value": "$_id.value",
"score": "$_id.score",
"count": "$count"
}
}
}
}
])
Sample Output
{
"_id" : ObjectId("57fdfbc12dc30a46507044ec"),
"keyterms" : [
{
"value" : "BB",
"score" : "4",
"count" : 1
},
{
"value" : "CC",
"score" : "3",
"count" : 1
},
{
"value" : "AA",
"score" : "2",
"count" : 2
}
]
}
Demo
Meanwhile, I solved it myself:
aggregate([
{$match: {'_id': ObjectId('57fdfbc12dc30a46507044ec')}},
{$unwind: "$keyterms"},
{$sort: {"keyterms.score": -1}},
{$group: {
'_id': "$keyterms.value",
'keyterms': {$push: "$keyterms"},
'escore': {$first: "$keyterms.score"},
'evalue': {$first: "$keyterms.value"}
}},
{$limit: 15},
{$project: {
"score": "$escore",
"value": "$evalue",
"count": {$size: "$keyterms"}
}}
])

MongoDB aggregation on another aggreatation suggestions

I have a Json file imported into MongoDB. Every line on it is a user, and I have a field product, with the name of it. I know the value of every product, they are just few.
But this information is not stored on the Json.
I was able to do aggregation to retrieve the number of time that a user bought a product, but I would like to do a query to get directly the amount of money that each user spent.
This is my query:
db.source.aggregate([
{"$match": {
"$and":[
{"productName":{
"$in":[
"product2","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": {
"id_user": "$id_user",
"productName": "$productName"
},
"count": { "$sum": 1}
}},
{ "$sort" : { "count": -1 } }
])
so the output is like that:
{ "_id" : { "id_user" : "user1", "productID" : "product2" }, "count" : 433 }
{ "_id" : { "id_user" : "user2", "productID" : "product1" }, "count" : 370 }
{ "_id" : { "id_user" : "user1", "productID" : "product3" }, "count" : 300 }
{ "_id" : { "id_user" : "user3", "productID" : "product6" }, "count" : 250 }
{ "_id" : { "id_user" : "user2", "productID" : "product5" }, "count" : 140 }
{ "_id" : { "id_user" : "user3", "productID" : "product4" }, "count" : 90 }
I know that product 1 costs 20$, product 2 costs 40$, product 3 costs 55$, product 4 costs -90$, product 5 costs 110$, product 6 costs 200$.
I would like to have an output like that:
{ "_id" : { "id_user" : "user1"}, "money_spent" : 600$ }
{ "_id" : { "id_user" : "user2"}, "money_spent" : 400$ }
etc
Can you help to get that result, I am new with MongoDB.
Thanks in advance.
If you cannot go to the original source data an are only working with an import then do this:
db.source.aggregate([
{"$match": {
"$and":[
{ "productName": {
"$in":[
"product1","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": "$id_user",
"cost": {
"$sum": {
"$cond": [
{ "$eq": ["$_id.productId", "product1"] },
20,
{ "$cond": [
{ "$eq": ["$productName", "product2"] },
40,
{ "$cond": [
{ "$eq": [ "$productName", "product3"] },
55,
{ "$cond": [
{ "$eq": [ "$productName", "product4" ] },
-90,
{ "$cond": [
{ "$eq": [ "$productName", "product5" ] },
110,
200
]}
]}
]}
]}
}
}
}
}}
])
The $cond operator evaluates whether your field value matches the condition and places the appropriate value simply just $sum to get your result.
$cond provides a "ternary" operator or "if .. then .. else" that is used to evaluate the condition you provide in the first argument. You construct this to "cascade" where the condition evaluates to false in order to move on to the next condition to evaluate, otherwise return the value that matches your condition.
In this way your "known" values are applied as you aggregate for your expected total.

MongoDB: aggregating fields from arrays of subdocuments

I have a mongodb collection called Events, containing baseball games. Here is an example of one record in the table:
{
"name" : "Game# 814",
"dateStart" : ISODate("2012-09-28T14:47:53.695Z"),
"_id" : ObjectId("53a1b24de3f25f4443d9747e"),
"stats" : [
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"_id" : ObjectId("53a1b24de3f25f4443d97480"),
"score" : 17
},
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"_id" : ObjectId("53a1b24de3f25f4443d9747f"),
"score" : 12
}
]
"__v" : 0
}
I need help writing the query that returns standings for all teams. The result set should look like:
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"wins" : <<number of Yankees wins>>
"losses" : <<number of Yankees losses>>
"draws" : <<number of Yankees draws>>
}
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"wins" : <<number of Reds wins>>
"losses" : <<number of Reds losses>>
"draws" : <<number of Reds draws>>
}
...
Here's the query I've started with...
db.events.aggregate(
{"$unwind": "$stats" },
{ $group : {
_id : "$stats.team",
gamesPlayed : { $sum : 1},
totalScore : { $sum : "$stats.score" }
}}
);
... which returns results:
{
"result" : [
{
"_id" : ObjectId("53a11a43a8de6dd8375c93cb"),
"gamesPlayed" : 125, // not a requirement... just trying to get $sum working
"totalScore" : 1213 // ...same here
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c955f"),
"gamesPlayed" : 128,
"totalScore" : 1276
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c9661"),
"gamesPlayed" : 152,
"totalScore" : 1509
},
....
It would seem advisable for you to keep your "wins", "losses", "draws" within your documents as you create or update them. But it is possible to do with aggregate if a little long winded
db.events.aggregate([
// Unwind the "stats" array
{ "$unwind": "$stats" },
// Combine the document with new fields
{ "$group": {
"_id": "$_id",
"firstTeam": { "$first": "$stats.team" },
"firstTeamName": { "$first": "$stats.teamName" },
"firstScore": { "$first": "$stats.score" },
"lastTeam": { "$last": "$stats.team" },
"lastTeamName": { "$last": "$stats.teamName" },
"lastScore": { "$last": "$stats.score" },
"minScore": { "$min": "$stats.score" },
"maxScore": { "$max": "$stats.score" }
}},
// Calculate by comparing scores
{ "$project": {
"firstTeam": 1,
"firstTeamName": 1,
"firstScore": 1,
"lastTeam": 1,
"lastTeamName": 1,
"lastScore": 1,
"firstWins": {
"$cond": [
{ "$gt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstLosses": {
"$cond": [
{ "$lt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstDraws": {
"$cond": [
{ "$eq": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"lastWins": {
"$cond": [
{ "$gt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastLosses": {
"$cond": [
{ "$lt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastDraws": {
"$cond": [
{ "$eq": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"type": { "$literal": [ true, false ] }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Group teams conditionally on "type"
{ "$group": {
"_id": {
"team": {
"$cond": [
"$type",
"$firstTeam",
"$lastTeam"
]
},
"teamName": {
"$cond": [
"$type",
"$firstTeamName",
"$lastTeamName"
]
}
},
"owins": {
"$sum": {
"$cond": [
"$type",
"$firstWins",
"$lastWins"
]
}
},
"olosses": {
"$sum": {
"$cond": [
"$type",
"$firstLosses",
"$lastLosses"
]
}
},
"odraws": {
"$sum": {
"$cond": [
"$type",
"$firstDraws",
"$lastDraws"
]
}
}
}},
// Project your final form
{ "$project": {
"_id": 0,
"team": "$_id.team",
"teamName": "$_id.teamName",
"wins": "$owins",
"losses": "$olosses",
"draws": "$odraws"
}}
])
The first part is to "re-shape" the document by unwinding the array and then grouping with "first" and "last" for defining fields for your two teams.
Then you want to $project through those documents and calculate your "wins", "losses" and "draws" for each team in the pairing. The additional thing is adding an array field for the two values true/false is convenient here. If you are on a pre 2.6 version of mongodb the $literal can be replaced with $const which is not documented but does the same thing.
Once you $unwind that "type" array, the documents can be split apart in the $group stage by evaluating whether to choose the "first" or "last" team field values via the use of $cond. This is a ternary operator that evaluates a true/false condition and returns the appropriate value according to that condition.
With a final $project your documents are formed exactly how you want.