MongoDB > extract collection from nested array - mongodb

I've been trying every method I found on SO with no success. Trying
to accomplish a seemingly simple task (very easy with json/lodash for example) in MongoDB..
I have a collection:
db.users >
[
{
_id: 'userid',
profile: {
username: 'abc',
tests: [
{
_id: 'testid',
meta: {
category: 'math',
date: '9/2/2017',
...
}
questions: [
{
type: 'add',
correct: true,
},
{
type: 'subtract',
correct: true,
},
{
type: 'add',
correct: false,
},
{
type: 'multiply',
correct: false,
},
]
},
...
]
}
},
...
]
I want to end up with an array grouped by question type:
[
{
type: 'add',
correct: 5,
wrong: 3,
},
{
type: 'subtract',
correct: 4,
wrong: 9
}
...
]
I've tried different variations of aggregate, last one is:
db.users.aggregate([
{ $match: { 'profile.tests.meta.category': 'math' }},
{
$project: {
tests: {
$filter: {
input: "$profile.tests",
as: "test",
cond: { $eq: ['$$test.meta.category', 'math'] }
}
}
}
},
{
$project: {
question: "$tests.questions"
}
},
{ $unwind: "$questions"},
])
Also tried adding $group at the end of the pipeline:
{
$group:
{
_id: '$questions.type',
res: {
$addToSet: { correct: {$eq:['$questions.chosenAnswer', '$questions.answers.correct'] }
}
}
}
No variation gave me what I'm looking for, I'm sure I'm missing a core concept, I've looked over the documentation and couldn't figure it out.. what I'm basically looking for is a flatMap to extract away all the questions of all users and group them by type.
If anyone can lead me in the right direction, I'll greatly appreciate it :) thx. (Also, I'm using Meteor, so any query has to work in Meteor mongo)

You can try below aggregation in 3.4.
$filter to filter math categories with $map to project questions array in each matching category followed by $reduce and $concatArrays to get all questions into single array for all matching categories.
$unwind questions array and $group by type and $sum to compute correct and wrong count.
db.users.aggregate([
{
"$match": {
"profile.tests.meta.category": "math"
}
},
{
"$project": {
"questions": {
"$reduce": {
"input": {
"$map": {
"input": {
"$filter": {
"input": "$profile.tests",
"as": "testf",
"cond": {
"$eq": [
"$$testf.meta.category",
"math"
]
}
}
},
"as": "testm",
"in": "$$testm.questions"
}
},
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$unwind": "$questions"
},
{
"$group": {
"_id": "$questions.type",
"correct": {
"$sum": {
"$cond": [
{
"$eq": [
"$questions.correct",
true
]
},
1,
0
]
}
},
"wrong": {
"$sum": {
"$cond": [
{
"$eq": [
"$questions.correct",
false
]
},
1,
0
]
}
}
}
}
])

Related

Mongo DB Join on Primary/Foreign Key

I have two collections, viz: clib and mp.
The schema for clib is : {name: String, type: Number} and that for mp is: {clibId: String}.
Sample Document for clib:
{_id: ObjectId("6178008397be0747443a2a92"), name: "c1", type: 1}
{_id: ObjectId("6178008397be0747443a2a91"), name: "c2", type: 0}
Sample Document for mp:
{clibId: "6178008397be0747443a2a92"}
{clibId:"6178008397be0747443a2a91"}
While Querying mp, I want those clibId's that have type = 0 in clib collection.
Any ideas how this can be achieved?
One approach that I can think of was to use $lookUp, but that doesnt seem to be working. Also, I m not sure if this is anti-pattern for mongodb, another approach is to copy the type from clib to mp while saving mp document.
If I've understood correctly you can use a pipeline like this:
This query get the values from clib where its _id is the same as clibId and also has type = 0. Also I've added a $match stage to not output values where there is not any coincidence.
db.mp.aggregate([
{
"$lookup": {
"from": "clib",
"let": {
"id": "$clibId"
},
"pipeline": [
{
"$match": {
"$expr": {
"$and": [
{
"$eq": [
{
"$toObjectId": "$$id"
},
"$_id"
]
},
{
"$eq": [
"$type",
0
]
}
]
}
}
}
],
"as": "result"
}
},
{
"$match": {
"result": {
"$ne": []
}
}
}
])
Example here
db.mp.aggregate([
{
$lookup: {
from: "clib",
let: {
clibId: "$clibId"
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [ "$_id", "$$clibId" ],
}
]
}
}
},
{
$project: { type: 1, _id: 0 }
}
],
as: "clib"
}
},
{
"$unwind": "$clib"
},
{
"$match": {
"clib.type": 0
}
}
])
Test Here

How to filter an array of objects in mongoose by date field only selecting the most recent date

I'm trying to filter through an array of objects in a user collection on MongoDB. The structure of this particular collection looks like this:
name: "John Doe"
email: "john#doe.com"
progress: [
{
_id : ObjectId("610be25ae20ce4872b814b24")
challenge: ObjectId("60f9629edd16a8943d2cab9b")
date_unlocked: 2021-08-05T12:15:32.129+00:00
completed: true
date_completed: 2021-08-06T12:15:32.129+00:00
}
{
_id : ObjectId("611be24ae32ce4772b814b32")
challenge: ObjectId("60g6723efd44a6941l2cab81")
date_unlocked: 2021-08-06T12:15:32.129+00:00
completed: true
date_completed: 2021-08-07T12:15:32.129+00:00
}
]
date: 2021-08-04T13:06:34.129+00:00
How can I query the database using mongoose to return only the challenge with the most recent 'date_unlocked'?
I have tried: User.findById(req.user.id).select('progress.challenge progress.date_unlocked').sort({'progress.date_unlocked': -1}).limit(1);
but instead of returning a single challenge with the most recent 'date_unlocked', it is returning the whole user progress array.
Any help would be much appreciated, thank you in advance!
You can try this.
db.collection.aggregate([
{
"$unwind": {
"path": "$progress"
}
},
{
"$sort": {
"progress.date_unlocked": -1
}
},
{
"$limit": 1
},
{
"$project": {
"_id": 0,
"latestChallenge": "$progress.challenge"
}
}
])
Test the code here
Alternative solution is to use $reduce in that array.
db.collection.aggregate([
{
"$addFields": {
"latestChallenge": {
"$arrayElemAt": [
{
"$reduce": {
"input": "$progress",
"initialValue": [
"0",
""
],
"in": {
"$let": {
"vars": {
"info": "$$value",
"progress": "$$this"
},
"in": {
"$cond": [
{
"$gt": [
"$$progress.date_unlocked",
{
"$arrayElemAt": [
"$$info",
0
]
}
]
},
[
{
"$arrayElemAt": [
"$$info",
0
]
},
"$$progress.challenge"
],
"$$info"
]
}
}
}
}
},
1
]
}
}
},
{
"$project": {
"_id": 0,
"latestChallenge": 1
}
},
])
Test the code here
Mongoose can use raw MQL so you can use it.

Mongoose Summing Up subdocument array elements having same alias

I have a document like this:
_id:'someId',
sales:
[
{
_id:'111',
alias:'xxx',
amount:500,
name: Apple, //items with same alias always have same name and quantity
quantity:2
},
{
_id:'222',
alias:'abc',
amount:100,
name: Orange,
quantity:14
},
{
_id:'333',
alias:'xxx',
amount:300,
name: Apple, //items with same alias always have same name and quantity
quantity:2
}
]
The alias field is here to 'group' items/documents whenever they appear to have same alias i.e to be 'embeded' as one with the amount summed up.
I need to display some sort of a report in such a way that those elements which have same alias they should be displayed as ONE and the others which doesn't share same alias to remain as they are.
Example, For the sample document above, I need an output like this
[
{
alias:'xxx',
amount:800
},
{
alias:'abc',
amount:100
}
]
WHAT I HAVE TRIED
MyShop.aggregate([
{$group:{
_id: "$_id",
sales:{$last :"$sales"}
},
{$project:{
"sales.amount":1
}}
}
])
This just displays as a 'list' regardless of the alias. How do I achieve summing up amount based on the alias?
You can achieve this using $group
db.collection.aggregate([
{
$unwind: "$sales"
},
{
$group: {
_id: {
_id: "$_id",
alias: "$sales.alias"
},
sales: {
$first: "$sales"
},
_idsInvolved: {
$push: "$sales._id"
},
amount: {
$sum: "$sales.amount"
}
}
},
{
$group: {
_id: "$_id._id",
sales: {
$push: {
$mergeObjects: [
"$sales",
{
alias: "$_id.alias",
amount: "$amount",
_idsInvolved: "$_idsInvolved"
}
]
}
}
}
}
])
Mongo Playground
You can use below aggregation
db.collection.aggregate([
{
"$addFields": {
"sales": {
"$map": {
"input": {
"$setUnion": [
"$sales.alias"
]
},
"as": "m",
"in": {
"$let": {
"vars": {
"a": {
"$filter": {
"input": "$sales",
"as": "d",
"cond": {
"$eq": [
"$$d.alias",
"$$m"
]
}
}
}
},
"in": {
"amount": {
"$sum": "$$a.amount"
},
"alias": "$$m",
"_idsInvolved": "$$a._id"
}
}
}
}
}
}
}
])
MongoPlayground

Returning a document with two fields from the same array in MongoDB

Given documents such as
{
_id: 'abcd',
userId: '12345',
activities: [
{ status: 'login', timestamp: '10000001' },
{ status: 'logout', timestamp: '10000002' },
{ status: 'login', timestamp: '10000003' },
{ status: 'logout', timestamp: '10000004' },
]
}
I am trying to create a pipeline such as all users that have their latest login/logout activities recorded between two timestamps will be returned. For example, if the two timestamp values are between 10000002 and 10000003, the expected document should be
{
_id: 'abcd',
userId: '12345',
login: '10000003',
logout: '10000002'
}
Of if the two timestamp values are between -1 and 10000001, the expected document should be :
{
_id: 'abcd',
userId: '12345',
login: '10000001',
logout: null
}
Etc.
I know it has to do with aggregations, and I need to $unwind, etc., but I'm not sure about the rest, namely evaluating two fields from the same document array
You can try below aggregation:
db.col.aggregate([
{
$unwind: "$activities"
},
{
$match: {
$and: [
{ "activities.timestamp": { $gte: "10000001" } },
{ "activities.timestamp": { $lte: "10000002" } }
]
}
},
{
$sort: {
"activities.timestamp": -1
}
},
{
$group: {
_id: "$_id",
userId: { $first: "$userId" },
activities: { $push: "$activities" }
}
},
{
$addFields: {
login: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "login" ] } } } , 0 ] },
logout: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "logout" ] } } } , 0 ] }
}
},
{
$project: {
_id: 1,
userId: 1,
login: { $ifNull: [ "$login.timestamp", null ] },
logout: { $ifNull: [ "$logout.timestamp", null ] }
}
}
])
We need to use $unwind + $sort + $group to make sure that our activities will be sorted by timestamp. After $unwind you can use $match to apply filtering condition. Then you can use $filter with $arrayElemAt to get first (latest) value of filtered array. In the last $project you can explicitly use $ifNull (otherwise JSON key will be skipped if there's no value)
You can use below aggregation
Instead of $unwind use $lte and $gte with the $fitler aggregation.
db.collection.aggregate([
{ "$project": {
"userId": 1,
"login": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "login"] }
]
}
}
}
},
"logout": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "logout"] }
]
}
}
}
}
}}
])

How to aggregate percentages within arrays?

I'm trying to work out exactly how to achieve an aggregation, I could manually unwind and group back together at the end, but I'm sure I should be able to achieve this in a more concise way so I wanted to throw it out as I'm getting stuck.
My document structure (skipping out the un-interesting bits) looks like:
{
_id: ObjectId,
panels: [
{
visConfig: {
dataConfig: {
columns: [
{ element: "DX" },
{ element: "SE" },
]
}
}
},
{
visConfig: {
dataConfig: {
columns: [
{ element: "AB" },
{ element: "XY" },
]
}
}
}
]
}
What I want to do is calculate a percentage of the element overlaps with a given set to be provided. So for example for the document shown it would produce 25% for the set ["DX"] or 50% for the set ["DX", "AB"].
So I've tried a few things, I think I've settled on the nearest so far as:
$project: {
_id: 1,
total: { $sum: { $size: "$panels.visConfig.dataConfig.columns" } }
}
But I'm getting an error here which I don't understand:
The argument to $size must be an array, but was of type: missing
Then I'm also having issues with my conditional aggregation which seems to be returning 0 for all of the element values.
{
_id: 1,
"panels.visConfig.dataConfig.columns.element": {
$sum: {
$cond: [{
$setIsSubset: [
["DX"], ["$panels.visConfig.dataConfig.columns.element"]
]
}, 1, 0 ],
}
},
}
You can try below aggregation in 3.4 version.
db.colname.aggregate([
{"$project":{
"_id":1,
"total":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":0,
"in":{"$add":["$$value",{"$size":"$$this"}]}
}},
"match":{
"$sum":{
"$map":{
"input":"$panels.visConfig.dataConfig.columns.element",
"in":{
"$size":{
"$setIntersection":[["DX","AB"],"$$this"]
}
}
}
}
}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$match","$total"]}, 100]}
}}])
Update - You can perform both match and total calculations in $reduce pipeline.
db.colname.aggregate([
{"$project":{
"_id":1,
"stats":{
"$reduce":{
"input":"$panels.visConfig.dataConfig.columns.element",
"initialValue":{"total":0,"match":0},
"in":{
"total":{"$add":["$$value.total",{"$size":"$$this"}]},
"match":{"$add":["$$value.match",{"$sum":{"$map":{"input":"$$this","in":{"$cond":[{"$in":["$$this", ["DX","AB"]] }, 1, 0]}}}}]}
}
}}
}},
{"$project":{
"_id":1,
"percent":{"$multiply":[{"$divide":["$stats.match","$stats.total"]}, 100]}
}}])
You can use $map + $reduce to get an array of all element values and then using $divide you can divide $filter-ed $size by total $size:
db.col.aggregate([
{
$project: {
elements: {
$reduce: {
input: {
$map: {
input: "$panels",
as: "panel",
in: "$$panel.visConfig.dataConfig.columns.element"
}
},
initialValue: [],
in: { $concatArrays: [ "$$this", "$$value" ] }
}
}
}
},
{
$project: {
percentage: {
$divide: [
{
$size: {
$filter: {
input: "$elements",
as: "element",
cond: {
$in: [
"$$element",
[ "AB", "XY" ] // your input here
]
}
}
}
},
{ $size: "$elements" }
]
}
}
}
])
Well, there are couple of ways to do this, but I these two pipelines show how I would do it.
var values = ["DX", "KL"]
First approach
[
{
"$project": {
"percent": {
"$let": {
"vars": {
"allsets": {
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns",
"initialValue": [],
"in": {
"$concatArrays": [ "$$this.element", "$$value" ]
}
}
}
},
"in": {
"$multiply": [
{
"$divide": [
{
"$size": {
"$setIntersection": [ "$$allsets", values ]
}
},
{ "$size": "$$allsets" }
]
},
100
]
}
}
}
}
}
]
Second approach same idea here but, using one pipeline stage.
[
{
"$project": {
"percent": {
"$multiply": [
{
"$divide": [
{
"$sum": {
"$map": {
"input": "$panels.visConfig.dataConfig.columns.element",
"in": {
"$size": {
"$setIntersection": [ values, "$$this" ]
}
}
}
}
},
{
"$reduce": {
"input": "$panels.visConfig.dataConfig.columns.element",
"initialValue": 0,
"in": {
"$add": [ "$$value", { "$size": "$$this" } ]
}
}
}
]
},
100
]
}
}
}
]