query length of nested lists - mongodb

I want to query following entries that have lists of lists and get their lengths:

db.collection.aggregate([
{ $unwind: '$molecules' },
{ $group: {
_id: '$scene',
molecules: { $push: { $size: '$molecules.data' } },
} },
])
gives
{ _id: 7674, molecules: [ 2, 1 ] }

Related

MongoDB get size of unwinded array

I'm trying to return size of 'orders' and sum of 'item' values for each 'order' for each order from documents like the example document:
orders: [
{
order_id: 1,
items: [
{
item_id: 1,
value:100
},
{
item_id: 2,
value:200
}
]
},
{
order_id: 2,
items: [
{
item_id: 3,
value:300
},
{
item_id: 4,
value:400
}
]
}
]
I'm using following aggregation to return them, everything works fine except I can't get size of 'orders' array because after unwind, 'orders' array is turned into an object and I can't call $size on it since it is an object now.
db.users.aggregate([
{
$unwind: "$orders"
},
{
$project: {
_id: 0,
total_values: {
$reduce: {
input: "$orders.items",
initialValue: 0,
in: { $add: ["$$value", "$$this.value"] }
}
},
order_count: {$size: '$orders'}, //I get 'The argument to $size must be an array, but was of type: object' error
}
},
])
the result I expected is:
{order_count:2, total_values:1000} //For example document
{order_count:3, total_values:1500}
{order_count:5, total_values:2500}
I found a way to get the results that I wanted. Here is the code
db.users.aggregate([
{
$project: {
_id: 1, orders: 1, order_count: { $size: '$orders' }
}
},
{ $unwind: '$orders' },
{
$project: {
_id: '$_id', items: '$orders.items', order_count: '$order_count'
}
},
{ $unwind: '$items' },
{
$project: {
_id: '$_id', sum: { $sum: '$items.value' }, order_count: '$order_count'
}
},
{
$group: {
_id: { _id: '$_id', order_count: '$order_count' }, total_values: { $sum: '$sum' }
}
},
])
output:
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f1"), order_count: 2 }, total_values: 1000 }
{ _id: { _id: ObjectId("5dffc33002ef525620ef09f2"), order_count: 3 }, total_values: 1500 }

How can I get a sum of sums using MongoDB aggregation?

I would like to get the total (sum) of the totalHoursForYear value. Here is my current aggregation:
const byYear = await this.workHistoryModel.aggregate([
{
$group: {
_id: '$year',
history: {
$push: '$$ROOT'
},
totalHoursForYear: {
$sum: {
$add: [
{ $multiply: ['$eightHourDays', 8] },
{ $multiply: ['$tenHourDays', 10] },
{ $multiply: ['$twelveHourDays', 12] },
]
}
},
}
},
]);
Now I need to get a sum of the totalHoursForYear to get the total for all years. Is this possible? I can't seem to get the syntax correct. When I attempt to add another sum:
$group: {
...,
totalHours: {
$sum: {
$add: '$totalHoursForYear'
},
$push: '$$ROOT'
}
}
I get an error: "The field 'totalHours' must specify one accumulator"
When you need total sum, you can do another $group with _id:null which means to consider all documents
With your script, add the following to get the total and get the same structure.
{
$group: {
_id: null,
data: {
$push: "$$ROOT"
},
total: {
$sum: "$totalHoursForYear"
}
}
},
{
"$unwind": "$data"
},
{
"$addFields": {
_id: "$data._id",
history: "$data.history",
totalHoursForYear: "$data.totalHoursForYear",
_id: "$$REMOVE",
data: "$$REMOVE"
}
}
Working Mongo playground

MongoDB multiple levels embedded array query

I have a document like this:
{
_id: 1,
data: [
{
_id: 2,
rows: [
{
myFormat: [1,2,3,4]
},
{
myFormat: [1,1,1,1]
}
]
},
{
_id: 3,
rows: [
{
myFormat: [1,2,7,8]
},
{
myFormat: [1,1,1,1]
}
]
}
]
},
I want to get distinct myFormat values as a complete array.
For example: I need the result as: [1,2,3,4], [1,1,1,1], [1,2,7,8]
How can I write mongoDB query for this?
Thanks for the help.
Please try this, if every object in rows has only one field myFormat :
db.getCollection('yourCollection').distinct('data.rows')
Ref : mongoDB Distinct Values for a field
Or if you need it in an array & also objects in rows have multiple other fields, try this :
db.yourCollection.aggregate([{$project :{'data.rows.myFormat':1}},{ $unwind: '$data' }, { $unwind: '$data.rows' },
{ $group: { _id: '$data.rows.myFormat' } },
{ $group: { _id: '', distinctValues: { $push: '$_id' } } },
{ $project: { distinctValues: 1, _id: 0 } }])
Or else:
db.yourCollection.aggregate([{ $project: { values: '$data.rows.myFormat' } }, { $unwind: '$values' }, { $unwind: '$values' },
{ $group: { _id: '', distinctValues: { $addToSet: '$values' } } }, { $project: { distinctValues: 1, _id: 0 } }])
Above aggregation queries would get what you wanted, but those can be tedious on large datasets, try to run those and check if there is any slowness, if you're using for one-time then if needed you can consider using {allowDiskUse: true} & irrespective of one-time or not you need to check on whether to use preserveNullAndEmptyArrays:true or not.
Ref : allowDiskUse , $unwind preserveNullAndEmptyArrays

Top documents per bucket

I would like to get the documents with the N highest fields for each of N categories. For example, the posts with the 3 highest scores from each of the past 3 months. So each month would have 3 posts that "won" for that month.
Here is what my work so far has gotten, simplified.
// simplified
db.posts.aggregate([
{$bucket: {
groupBy: "$createdAt",
boundaries: [
ISODate('2019-06-01'),
ISODate('2019-07-01'),
ISODate('2019-08-01')
],
default: "Other",
output: {
posts: {
$push: {
// ===
// This gets all the posts, bucketed by month
score: '$score',
title: '$title'
// ===
}
}
}
}},
{$match: {_id: {$ne: "Other"}}}
])
I attempted to use the $slice operator in between the // ===s, but go an error (below).
postResults: {
$each: [{
score: '$score',
title: '$title'
}],
$sort: {score: -1},
$slice: 2
}
An object representing an expression must have exactly one field: { $each: [ { score: \"$score\", title: \"$title\" } ], $sort: { baseScore: -1.0 }, $slice: 2.0 }
$slice you're trying to use is dedicated for update operations. To get top N posts you need to run $unwind, then $sort and $group to get ordered array. As a last step you can use $slice (aggregation), try:
db.posts.aggregate([
{$bucket: {
groupBy: "$createdAt",
boundaries: [
ISODate('2019-06-01'),
ISODate('2019-07-08'),
ISODate('2019-08-01')
],
default: "Other",
output: {
posts: {
$push: {
score: '$score',
title: '$title'
}
}
}
}},
{ $match: {_id: {$ne: "Other"}}},
{ $unwind: "$posts" },
{ $sort: { "posts.score": -1 } },
{ $group: { _id: "$_id", posts: { $push: { "score": "$posts.score", "title": "$posts.title" } } } },
{ $project: { _id: 1, posts: { $slice: [ "$posts", 3 ] } } }
])

total of all groups totals using mongodb

i did this Aggregate pipeline , and i want add a field contains the Global Total of all groups total.
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: {
_id:"$_id",
value:"$value",
transaction:"$transaction",
paymentMethod:"$paymentMethod",
createdAt:"$createdAt",
...
}
},
count:{$sum:1},
total:{$sum:"$value"}
}}
{
//i want to get
...project groups , goupsTotal , groupsCount
}
,{
"$skip":cursor.skip
},{
"$limit":cursor.limit
},
])
you need to use $facet (avaialble from MongoDB 3.4) to apply multiple pipelines on the same set of docs
first pipeline: skip and limit docs
second pipeline: calculate total of all groups
{ "$match": query },
{ "$sort": cursor.sort },
{ "$group": {
_id: { key:"$paymentFromId"},
items: {
$push: "$$CURRENT"
},
count:{$sum:1},
total:{$sum:"$value"}
}
},
{
$facet: {
docs: [
{ $skip:cursor.skip },
{ $limit:cursor.limit }
],
overall: [
{$group: {
_id: null,
groupsTotal: {$sum: '$total'},
groupsCount:{ $sum: '$count'}
}
}
]
}
the final output will be
{
docs: [ .... ], // array of {_id, items, count, total}
overall: { } // object with properties groupsTotal, groupsCount
}
PS: I've replaced the items in the third pipe stage with $$CURRENT which adds the whole document for the sake of simplicity, if you need custom properties then specify them.
i did it in this way , project the $group result in new field doc and $sum the sub totals.
{
$project: {
"doc": {
"_id": "$_id",
"total": "$total",
"items":"$items",
"count":"$count"
}
}
},{
$group: {
"_id": null,
"globalTotal": {
$sum: "$doc.total"
},
"result": {
$push: "$doc"
}
}
},
{
$project: {
"result": 1,
//paging "result": {$slice: [ "$result", cursor.skip,cursor.limit ] },
"_id": 0,
"globalTotal": 1
}
}
the output
[
{
globalTotal: 121500,
result: [ [group1], [group2], [group3], ... ]
}
]