Grouping and counting across documents? - mongodb

I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?

You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]

Related

MongoDB match computed value

I've created an aggregate query but for some reason it doesn't seem to work for custom fields created in the aggregation pipeline.
return this.repository.mongo().aggregate([
{
$match: { q1_avg: { $regex: baseQuery['value'], $options: 'i' } }, // NOT WORKING
},
{
$group: {
_id: '$product_sku',
id: { $first: "$_id" },
product_name: { $first: '$product_name' },
product_category: { $first: '$product_category' },
product_sku: { $first: '$product_sku' },
q1_cnt: { $sum: 1 },
q1_votes: { $push: "$final_rating" }
},
},
{
$facet: {
pagination: [ { $count: 'total' } ],
data: [
{
$project: {
_id: 1,
id: 1,
product_name: 1,
product_category: 1,
product_sku: 1,
q1_cnt: 1,
q1_votes: {
$filter: {
input: '$q1_votes',
as: 'item',
cond: { $ne: ['$$item', null] }
}
},
},
},
{
$set: {
q1_avg: { $round: [ { $avg: '$q1_votes' }, 2 ] },
}
},
{ $unset: ['q1_votes'] },
{ $skip: skip },
{ $limit: limit },
{ $sort: sortList }
]
}
},
{ $unwind : "$pagination" },
]).next();
q1_avg value is an integer and as far as I know, regex only works with strings. Could that be the reason

mongo / mongoose aggregation pipeline query for survey data

I am trying to write a query to get all of the results of some survey data stored in a mongo. The tricky part is some questions are radio questions with a single answer, and some questions are multi-select type questions, some are values that need to be averaged, so I want to perform different aggregations depending on the type of question.
The results are stored in a schema like this, with each item in the array being a survey response.
[
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547")
},
answers: {
'question1': 'a',
'question2': 'a',
'question3': ['a','c'],
'question4': 3
},
createdAt: 2022-03-03T07:30:40.517Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'a',
'question2': 'b',
'question3': ['a','c'],
'question4': 2
},
createdAt: 2022-03-03T07:30:40.518Z,
},
{
metaData: {
survey: new ObjectId("62206ea0b31be3535abac547"),
},
answers: {
'question1': 'b',
'question2': 'c',
'question3': ['b']
'question4': 1
},
createdAt: 2022-03-03T07:30:40.518Z,
}
]
question1 and question2 are radio questions, so there can be only 1 answer, whereas question 3 is a multi-select, so the user can have multiple answers. Question 4 is a value that needs to be averaged.
I think there is some way to accomplish this in a single aggregation pipeline with some combination of facets, grouping, filters, projections, etc, but I am stuck.
I'd like to get a final result that looks like this
{
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
},
'question4' : 2 //avg (3+2+1)/3
}
OR even better:
{
'radio': {
'question1' : {
'a' : 2,
'b' : 1
},
'question2' : {
'a' : 1,
'b' : 1,
'c' : 1,
},
},
'multi': {
'question3' : {
'a' : 2,
'b' : 1,
'c' : 2,
}
},
'avg' : {
'question4' : 2
}
}
My pipeline would look something like this:
Response.aggregate([
{ $match: { 'metaData.survey': surveyId} }, // filter only for the specific survey
{ $project: { // I assume I have to turn the answers into an array
"answers": { $objectToArray: "$answers" },
"createdAt": "$createdAt"
}
},
// maybe facet here?
// conceptually, In the next stage I'd want to bucket the questions
// by type with something like below, then perform the right type of
// aggregation depending on the question type
// if $in [$$answers.k ['question1, 'question2']] group by k, v and count
// if $in [$$answers.k ['question3']] unwind and count each unique value?
// { $facet : { radio: [], multi:[]}}
])
Basically, I know which question Id is a radio or a multi-select, I'm just trying to figure out how to format the pipeline to achieve the desired output based on the questionId being in a known array.
Bonus points if I can figure out how to also group the by day/month based on the createdAt time
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { "$sum": 1 }
}
},
{
$group: {
_id: "$_id.k",
v: { "$push": { k: "$_id.v", v: "$c" } }
}
},
{
$group: {
_id: null,
v: { "$push": { k: "$_id", v: { "$arrayToObject": "$v" } } }
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
}
])
mongoplayground
db.collection.aggregate([
{
$match: {}
},
{
$project: { answers: { $objectToArray: "$answers" } }
},
{
$unwind: "$answers"
},
{
$set: {
"answers.type": {
$switch: {
branches: [
{
case: { $isArray: "$answers.v" },
then: "multi"
},
{
case: { $eq: [ { $type: "$answers.v" }, "string" ] },
then: "radio"
},
{
case: { $isNumber: "$answers.v" },
then: "avg"
}
],
default: "other"
}
}
}
},
{
$unwind: "$answers.v"
},
{
$group: {
_id: "$answers",
c: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.k",
type: { $first: "$_id.type" },
v: {
$push: {
k: { $toString: "$_id.v" },
v: "$c"
}
}
}
},
{
$group: {
_id: "$type",
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$group: {
_id: null,
v: {
$push: {
k: "$_id",
v: { $arrayToObject: "$v" }
}
}
}
},
{
$set: { v: { $arrayToObject: "$v" } }
},
{
$replaceWith: "$v"
},
{
$set: {
avg: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$avg" },
as: "s",
in: {
k: "$$s.k",
v: {
$avg: {
$map: {
input: { $objectToArray: "$$s.v" },
as: "x",
in: { $multiply: [ { $toInt: "$$x.k" }, "$$x.v" ] }
}
}
}
}
}
}
}
}
}
])
mongoplayground

MongoDB get size of unwinded array

I'm trying to return size of 'orders' and sum of 'item' values for each 'order' for each order from documents like the example document:
orders: [
{
order_id: 1,
items: [
{
item_id: 1,
value:100
},
{
item_id: 2,
value:200
}
]
},
{
order_id: 2,
items: [
{
item_id: 3,
value:300
},
{
item_id: 4,
value:400
}
]
}
]
I'm using following aggregation to return them, everything works fine except I can't get size of 'orders' array because after unwind, 'orders' array is turned into an object and I can't call $size on it since it is an object now.
db.users.aggregate([
{
$unwind: "$orders"
},
{
$project: {
_id: 0,
total_values: {
$reduce: {
input: "$orders.items",
initialValue: 0,
in: { $add: ["$$value", "$$this.value"] }
}
},
order_count: {$size: '$orders'}, //I get 'The argument to $size must be an array, but was of type: object' error
}
},
])
the result I expected is:
{order_count:2, total_values:1000} //For example document
{order_count:3, total_values:1500}
{order_count:5, total_values:2500}
I found a way to get the results that I wanted. Here is the code
db.users.aggregate([
{
$project: {
_id: 1, orders: 1, order_count: { $size: '$orders' }
}
},
{ $unwind: '$orders' },
{
$project: {
_id: '$_id', items: '$orders.items', order_count: '$order_count'
}
},
{ $unwind: '$items' },
{
$project: {
_id: '$_id', sum: { $sum: '$items.value' }, order_count: '$order_count'
}
},
{
$group: {
_id: { _id: '$_id', order_count: '$order_count' }, total_values: { $sum: '$sum' }
}
},
])
output:
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f1"), order_count: 2 }, total_values: 1000 }
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f2"), order_count: 3 }, total_values: 1500 }

MongoDB to return formatted object when no results can be found

I have the following stage in my MongoDB aggregation pipeline that returns the qty and sum of sales, which works fine:
{
$lookup: {
from: 'sales',
let: { part: '$_id' },
pipeline: [
{ $match: { $and: [{ $expr: { $eq: ['$partner', '$$part'] } }] } },
{ $group: { _id: null, qty: { $sum: 1 }, soldFor: { $sum: '$soldFor' } } },
{ $project: { _id: 0, qty: 1, soldFor: 1 } }],
as: 'sales'}},
{ $unwind: { path: '$sales', preserveNullAndEmptyArrays: true } },
{ $project: { _id: 1, sales: 1 }
}
However, if there are no sales, then the $project projection returns an empty sales object, but what I'd really like is it to return a completed object, but with 0 - like this:
{
sales: {
qty: 0,
soldFor: 0
}
}
You can use $cond operator here
{
"$project": {
"_id": 1,
"sales": {
"$cond": [
{ "$eq": [{ "$size": "$sales" }, 0] },
{
"sales": {
"qty": 0,
"soldFor": 0
}
},
"$sales"
]
}
}
}

Use $size on all documents in array MongoDB

Here's the structure part of my collection:
_id: ObjectId("W"),
names: [
{
number: 1,
list: ["A","B","C"]
},
{
number: 2,
list: ["B"]
},
{
number: 3,
list: ["A","C"]
}
...
],
...
I use this request:
db.publication.aggregate( [ { $match: { _id: ObjectId("54a1de90453d224e80f5fc60") } }, { $group: { _id: "$_id", SizeName: { $first: { $size: { $ifNull: [ "$names", [] ] } } }, names: { $first: "$names" } } } ] );
but I would now use $size in every documents of my names array.
Is it possible to get this result (where "sizeList" is the result of $size) :
_id: ObjectId("W"),
SizeName: 3,
names: [
{
SizeList: 3,
number: 1,
list: ["A","B","C"]
},
{
SizeList: 1,
number: 2,
list: ["B"]
},
{
SizeList: 2,
number: 3,
list: ["A","C"]
}
...
],
...
All you really want here is a $project stage and use $map to alter the contents of the array members:
db.names.aggregate([
{ "$project": {
"SizeName": { "$size": "$names" },
"names": { "$map": {
"input": "$names",
"as": "el",
"in": {
"SizeList": { "$size": "$$el.list" },
"number": "$$el.number",
"list": "$$el.list"
}
}}
}}
])
You can alternately process with $unwind and group it all back again, but that's kind of long winded when you have MongoDB 2.6 anyway.
This isn't necessarily the most efficient way, but I think it does what you're aiming to do:
db.publication.aggregate( [
{ $unwind: '$names' },
{ $unwind: '$names.list' },
{ $group: {
_id: { _id: "$_id", number: "$names.number" },
SizeList: { $sum: 1 },
list: { $push: "$names.list" }
}
},
{ $group: {
_id: "$_id._id",
names: {
$push: {
number: "$_id.number",
list: "$list",
SizeList: "$SizeList"
}
},
SizeName: {$sum: 1}
}
}
]);