mognodb aggregation group by actor - mongodb

I have the following film collection structure:
{
"_id" : ObjectId,
"title" : "movie-1",
"actors" : [
"actor-1",
"actor-2",
"actor-3",
],
"categories" : [
"category-1",
"category-2"
]
}
I want to display result of all actors with associate movies and categories as like as given below:
{
"actor": "actor-1",
"result": {
"category-1": [ "movie-1", "movie-2" ],
"category-2": [ "movie-1", "movie-4" ]
}
}
I have tried aggregation as like as given below:
db.film.aggregate([
{ $unwind: "$actors" },
{ $group: {
_id: "$actors",
data: { $push: { movie: "$title", categories: "$categories" } }
}
},
{
$project: {
_id: 0,
actor: "$_id",
result: {
$reduce: {
input: "$data",
initialValue: {},
in: {
$let: {
vars: { movie: "$$this.movie", categories: "$$this.categories" },
in: {
$arrayToObject: {
$map: {
input: "$$categories",
in: { k: "$$this", v: "$$movie" }
}
}
}
}
}
}
}
}
}
])
But I get all actors list with only one movie with category as like as given below:
{
"actor" : "actor-1",
"result" : {
"category-1" : "movie-1",
"category-2" : "movie-2",
"category-3" : "movie-3"
}
}
How can I solve this problem? Thanks in advance.

You may need to do another $unwind on the categories array after flattening the actors array then group all the flattened docs by the two fields i.e. actor and category fields to create the movie titles list.
Another group to shape the result field is required.
The following pipeline should give you the desired result:
db.film.aggregate([
{ "$unwind": "$actors" },
{ "$unwind": "$categories" },
{ "$group": {
"_id": { "actor": "$actors", "category": "$categories" },
"movies": { "$push": "$title" }
} },
{ "$group": {
"_id": "$_id.actor",
"result": {
"$push": {
"k": "$_id.category",
"v": "$movies"
}
}
} },
{ "$addFields": {
"result": { "$arrayToObject": "$result" }
} }
])

I've used a sledgehammer to crack a nut (c)
Some stages could be replaced by $reduce, done inside $project stage (criticism and suggestions will be welcome)
db.film.aggregate([
{
$unwind: "$actors"
},
{
$group: {
_id: "$actors",
data: {
$push: {
movie: "$title",
categories: "$categories"
}
}
}
},
{
$unwind: "$data"
},
{
$unwind: "$data.categories"
},
{
$group: {
_id: {
actors: "$_id",
categories: "$data.categories"
},
movies: {
$push: "$data.movie"
}
}
},
{
$project: {
_id: 0,
actor: "$_id.actors",
result: {
k: "$_id.categories",
v: "$movies"
}
}
},
{
$group: {
_id: "$actor",
result: {
$push: "$result"
}
}
},
{
$project: {
_id: 0,
actor: "$_id",
result: {
$arrayToObject: "$result"
}
}
},
{
$sort: {
actor: 1
}
}
])
MongoPlayground

Related

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

Lodash `countBy` equivalent in MongoDB?

Let's say I have the input docs below:
[
{
"_id": "6225ca4052e7c226e2dd836d",
"data": [
"07",
"07",
"12",
"19",
"07",
"32"
]
},
{
"_id": "6225ca4052e7c226e2dd888f",
"data": [
"99",
"97",
"52",
"99",
"58",
"92"
]
}
]
I want to count the occurrences of every element in data string array per document. In JS, I can use countBy. How can I achieve the same using MongoDB Aggregation Framework?
I have tried to $reduce but MongoDB seems to not support assigning dynamic field to object.
{
$reduce: {
input: '$data',
initialValue: {},
in: { // assign `$$this` with count to `$$value`, but failed! }
}
}
Below is the desired output.
[
{
"_id": "6225ca4052e7c226e2dd836d",
"freqs": {
"12": 1,
"19": 1,
"32": 1,
"07": 3
}
},
{
"_id": "6225ca4052e7c226e2dd888f",
"freqs": {
"52": 1,
"58": 1,
"92": 1,
"97": 1,
"99": 2
}
}
]
db.collection.aggregate([
{
$match: {}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$data",
c: { $sum: 1 },
id: { $first: "$_id" }
}
},
{
$group: {
_id: "$id",
data: { $push: { k: "$_id", v: "$c" } }
}
},
{
$set: {
data: { $arrayToObject: "$data" }
}
}
])
mongoplayground
db.collection.aggregate([
{
$set: {
data: {
$function: {
body: "function(d) {let obj = {}; d.forEach(e => {if(obj[e]==null) { obj[e]=1; }else{ obj[e]++; }}); return obj;}",
args: [
"$data"
],
lang: "js"
}
}
}
}
])
mongoplayground

Mongoose subquery

I have a collection that looks like below:
[
{
"orderNum": "100",
"createdTime": ISODate("2020-12-01T21:00:00.000Z"),
"amount": 100,
"memo": "100memo",
"list": [
1
]
},
{
"orderNum": "200",
"createdTime": ISODate("2020-12-01T21:01:00.000Z"),
"amount": 200,
"memo": "200memo",
"list": [
1,
2
]
},
{
"orderNum": "300",
"createdTime": ISODate("2020-12-01T21:02:00.000Z"),
"amount": 300,
"memo": "300memo"
},
{
"orderNum": "400",
"createdTime": ISODate("2020-12-01T21:03:00.000Z"),
"amount": 400,
"memo": "400memo"
},
]
and I'm trying to get the total amount of orders that were created before order# 300 (so order#100 and #200, total amount is 300).
Does anyone know how to get it via Mongoose?
You can use this one:
db.collection.aggregate([
{ $sort: { orderNum: 1 } }, // by default the order of documents in a collection is undetermined
{ $group: { _id: null, data: { $push: "$$ROOT" } } }, // put all documents into one document
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } }, // cut desired elementes from array
{ $unwind: "$data" }, // transform back to documents
{ $replaceRoot: { newRoot: "$data" } },
{ $group: { _id: null, total_amount: { $sum: "$amount" } } } // make summary
])
Actually it is not needed to $unwind and $group, so the shortcut would be this:
db.collection.aggregate([
{ $sort: { orderNum: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $project: { total_amount: { $sum: "$data.amount" } } }
])
But the answer from #turivishal is even better.
Update for additional field
{
$set: {
data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] },
memo: { $arrayElemAt: [ "$data.memo", { $indexOfArray: ["$data.orderNum", "300"] } ] }
}
}
or
{ $set: { data: { $slice: ["$data", { $indexOfArray: ["$data.orderNum", "300"] }] } } },
{ $set: { memo: { $last: { "$data.memo" } } },
$match orderNum less than 300
$group by null and get totalAmount using $sum of amount
YourSchemaModel.aggregate([
{ $match: { orderNum: { $lt: "300" } } },
{
$group: {
_id: null,
totalAmount: { $sum: "$amount" }
}
}
])
Playground

$match in aggregate don't return data in mongodb

I have three tables below is the structure like below
I'm looking to get a result like below
"type1": [ -- type from Accounts collection
{
"_id": "5e97e9a224f62f93d5x3zz46", -- _id from Accounts collection
"locs": "sampleLocks 1", -- field from Accounts collection
"solutions": "sample solutions 1", -- field from Accounts collection
"Clause": "clause 1" -- field from AccountsDesc collection
},
{
"_id": "5e97e9a884f62f93d5x3zz46",
"locs": "sampleLocks2",
"solutions": "sample solutions2",
"Clause": "clause2"
}
],
"type2": [
// same data construction as of type1 above
]
_id, locks, solution to be coming from Accounts collection
Clause field to be coming from AccountsDesc collection
accounts_id is kind of a foreign key in AccountsDesc coming from Account
competitor_id is kind of a foreign key in AccountsDesc coming from Competitor
Below is what my query looks like
db.accountDesc.aggregate([
{
$match : {accounts_Id : "123456"}, active: true}
},
{
$lookup: {
from: 'accounts',
pipeline: [{ $match: { type: { $in: ["type1, type2, type3"] } } }],
as: 'accountsData'
}
},
{
$group: {
_id: "$accountsData.type",
data: {
$push: {_id: "$accountsData._id", clause: "$clause", locs: "$type.locs", solutions: "$type.solutions"}
}
}
},
{
$group: {
_id: null,
data: {
$push: {
k: {
$toString: '$_id'
},
v: '$data'
}
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: '$data'
}
}
}
])
Issues related with the query -
$match : {accountId : "123456"}, active: true} -- No data is returned if i use match on AccountsDesc collection
cant set localField, foriegnField if im using pipeline, then how the mapping will happen like a LEFT join.
clause: "$clause" don't get the value of this field in the response
As we discussed in chat, you want RIGHT OUTER JOIN for your aggregation.
Try the query below:
db.User_Promo_Map.aggregate([
{
$match: {
user_Id: ObjectId("5e8c1180d59de1704ce68112")
}
},
{
$lookup: {
from: "promo",
pipeline: [
{
$match: {
active: true,
platform: {
$in: [
"twitch",
"youtube",
"facebook"
]
}
}
}
],
as: "accountsData"
}
},
{
$unwind: "$accountsData"
},
{
$group: {
_id: "$accountsData.platform",
data2: {
$addToSet: {
amount: "$amount",
promo_Id: "$promo_Id"
}
},
data: {
$addToSet: {
_id: "$accountsData._id",
format: "$accountsData.format",
description: "$accountsData.description"
}
}
}
},
{
$addFields: {
data: {
$map: {
input: "$data",
as: "data",
in: {
"_id": "$$data._id",
"description": "$$data.description",
"format": "$$data.format",
amount: {
$reduce: {
input: "$data2",
initialValue: "$$REMOVE",
in: {
$cond: [
{
$eq: [
"$$this.promo_Id",
"$$data._id"
]
},
"$$this.amount",
"$$value"
]
}
}
}
}
}
}
}
},
{
$group: {
_id: null,
data: {
$push: {
k: {
$toString: "$_id"
},
v: "$data"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: "$data"
}
}
}
])
MongoPlayground

Mongodb aggregates: how to project a formatted filter

I think it's not a difficult question but I'm not sure how to do it
My collection is
[
{ type:"bananas", weight:"1"},
{ type:"bananas", weight:"10"},
{ type:"apple", weight:"5"}
]
The result I would like to have is the count of each type in the same query, result expected:
{
bananas: 2,
apple: 1
}
We have 2 items of type "bananas" and 1 of type "apple" in the collection
So I guess I have to $project the props I want (item types) but I don't know how to count/sum/match this ?
thanks in advance for your help
Try as below:
db.collection.aggregate([
{
$group: {
_id: "$type",
count: { $sum:1 },
"data": {"$push": "$$ROOT" },
}
},
{
$project: {
"specList": {
"$arrayToObject": {
"$map": {
"input": "$data",
"as": "item",
"in": {
"k": "$$item.type",
"v": "$count",
}
}
}
}
}
},
{ $replaceRoot: { newRoot: { "$mergeObjects":[ "$specList" , { "item":"$_id"} ] } } }
])
db.getCollection('test').aggregate([
{ $match: {} },
{ $group: { _id: "$type", count: { $sum: 1 }} },
{
$replaceRoot: {
newRoot: { $arrayToObject: [ [ { k: "$_id", v: "$count" } ] ]}
}
}
])
Result:
[{
"apple" : 1.0
},
{
"bananas" : 2.0
}]
Aggregation with merge
db.getCollection('test').aggregate([
{ $match: {} },
{ $group: { _id: "$type", count: { $sum: 1 }} },
{
$replaceRoot: {
newRoot: { $arrayToObject: [ [ { k: "$_id", v: "$count" } ] ]}
}
},
{ $facet: { items: [{$match: {} }]} },
{
$replaceRoot: { newRoot: { $mergeObjects: "$items" } }
},
])
Result:
[{
"apple" : 1.0,
"bananas" : 2.0
}]