MongoDB Aggregate and keep all values while setting an active key - mongodb

I want all the values returned from "Items" and when there is a match I want "isActive" to be true.
exports.listUserItems = function (req, res) {
// Aggregate results
User.aggregate([{
"$match": {
"username": req.params.username
}
}, {
"$lookup": {
"from": "Items",
"localField": "itemIds",
"foreignField": "_id",
"as": "items"
}
}, {
"$unwind": {
"path": "$items"
}
}, {
"$project": {
"item": "$items.item_name",
"isActive": '1',
"_id": 0
}
}], (err, result) => res.json(result));
};
What is the best way to go about accomplishing this?
I was going to return all the items and users items seperately, then compare them and object.value them etc. etc... that seems like overkill. Can it be done on the model side?
I'm unable to post the document structure because stackoverflow doesn't let me, but you should get the idea.
Edit:
User Document
{
"username" : "anonuser",
"items" : [
ObjectId("5ba8345f1e56fe8e6caaaa07"),
ObjectId("5ba706d64e82292e72e9ae71")
]
}
Then I have an "Items" collection which has 3 documents like this.
{
"_id" : ObjectId("5ba706d64e82292e72e9ae71"),
"item_name" : "Salary"
}
My expected json api output is to be.
[{"_id":"5ba706d64e82292e72e9ae71","item_name":"Salary","isActive":true},{"_id":"5ba8345f1e56fe8e6caaaa07","item_name":"Fulltime","isActive":true},{"_id":"5ba9af6c1e56fe8e6cab521e","item_name":"Advisor","isActive":false}]

You can try below aggregation
Items.aggregate([
{ "$lookup": {
"from": "users",
"let": { "itemsId": "$_id" },
"pipeline": [
{ "$match": { "$expr": { "$in": ["$$itemsId", "$items"] }}},
{ "$project": { "isActive": { "$literal": true }}}
],
"as": "items"
}},
{ "$addFields": {
"isActive": {
"$ifNull": [{ "$arrayElemAt": ["$items.isActive", 0] }, { "$literal": false }]
}
}},
{ "$project": { "items": 0 }}
])

Related

Mongodb aggregation lookup to add field in each array with condition

I have 3 collections.
User:
{
"_id":ObjectId("60a495cdd4ba8b122899d415"),
"email":"br9#gmail.com",
"username":"borhan"
}
Panel:
{
"_id": ObjectId("60a495cdd4ba8b122899d417"),
"name": "borhan",
"users": [
{
"role": "admin",
"joined": "2021-05-19T04:35:47.474Z",
"status": "active",
"_id": ObjectId("60a495cdd4ba8b122899d418"),
"user": ObjectId("60a495cdd4ba8b122899d415")
},
{
"role": "member",
"joined": "2021-05-19T04:35:47.474Z",
"status": "active",
"_id": ObjectId("60a49600d4ba8b122899d41a"),
"user": ObjectId("60a34e167958972d7ce6f966")
}
],
}
Team:
{
"_id":ObjectId("60a495e0d4ba8b122899d419"),
"title":"New Teams",
"users":[
ObjectId("60a495cdd4ba8b122899d415")
],
"panel":ObjectId("60a495cdd4ba8b122899d417")
}
I want to receive a output from querying Panel colllection just like this:
{
"_id": ObjectId("60a495cdd4ba8b122899d417"),
"name": "borhan",
"users": [
{
"role": "admin",
"joined": "2021-05-19T04:35:47.474Z",
"status": "active",
"_id": ObjectId("60a495cdd4ba8b122899d418"),
"user": ObjectId("60a495cdd4ba8b122899d415"),
"teams":[
{
"_id":ObjectId("60a495e0d4ba8b122899d419"),
"title":"New Teams",
"users":[
ObjectId("60a495cdd4ba8b122899d415")
],
"panel":ObjectId("60a495cdd4ba8b122899d417")
}
]
},
{
"role": "member",
"joined": "2021-05-19T04:35:47.474Z",
"status": "active",
"_id": ObjectId("60a49600d4ba8b122899d41a"),
"user": ObjectId("60a34e167958972d7ce6f966")
}
],
}
I mean i want to add teams field (which is array of teams that user is existed on it) to each user in Panel collection
Here is my match query in mongoose to select specific panel:
panel_model.aggregate([
{
$match: {
users: {
$elemMatch: {user: ObjectId("60a495cdd4ba8b122899d415"), role:"admin"}
}
}
},
])
Is it possible to get my output with $lookup or $addFields aggregations?
You need to join all three collections,
$unwind to deconstruct the array
$lookup there are two kind of lookups which help to join collections. First I used Multiple-join-conditions-with--lookup, and I used standrad lookup to join Users and Teams collections.
$match to match the user's id
$expr - when you use $match inside lookup, u must use it.
$set to add new fields
$group to we already destructed using $unwind. No we need to restructure it
here is the code
db.Panel.aggregate([
{ $unwind: "$users" },
{
"$lookup": {
"from": "User",
"let": { uId: "$users.user" },
"pipeline": [
{
$match: {
$expr: {
$eq: [ "$_id", "$$uId" ]
}
}
},
{
"$lookup": {
"from": "Team",
"localField": "_id",
"foreignField": "users",
"as": "teams"
}
}
],
"as": "users.join"
}
},
{
"$set": {
"users.getFirstElem": {
"$arrayElemAt": [ "$users.join", 0 ]
}
}
},
{
$set: {
"users.teams": "$users.getFirstElem.teams",
"users.join": "$$REMOVE",
"users.getFirstElem": "$$REMOVE"
}
},
{
"$group": {
"_id": "$_id",
"name": { "$first": "name" },
"users": { $push: "$users" }
}
}
])
Working Mongo playground
Note : Hope the panel and user collections are in 1-1 relationship. Otherwise let me know

$match in $lookup pipeline always returns all the documents, not filtering

I have two collections.
First Collection
{ _id:"601d07fece769400012f1280",
FieldName:"Employee",
Type:"Chapter" }
Second Collection
_id : "601d11905617082d7049153a",
SurveyId : "601d118e5617082d70491539",
TemplateName : "",
Elements : [
{
_id : "601d07fece769400012f1280",
FieldName : "Employee",
Type : "Chapter"
},
{
_id : "601d07fece769400012f1281",
FieldName : "Contract",
Type : "Chapter"
}]
When I do the lookup
'$lookup': {
'from': 'SecondCollection',
'localField': 'FieldName',
'foreignField': 'Elements.FieldName',
'as': 'SurveyInfo'
}
I will get the correct result, but I get the "Total size of documents in SecondCollection matching pipeline's $lookup stage exceeds 16793600 bytes" sometimes.
So I changed my approach to join the second collection with the pipeline, so I get only the field I need.
"from": 'SecondCollection',
"let": { "fieldname": "$fieldname" },
"pipeline": [
{ "$match":
{ "$expr":
{ "$eq": ["$elements.fieldname", "$$fieldname"] }}},
{ "$project": { "SurveyId": 1}}
],
"as": 'SurveyInfo'
Now the problem is this returns all the SecondCollection documents. Not returning the matching documents.
I am would like to get the below result
_id:"601d07fece769400012f1280",
FieldName:"Employee",
Type:"Chapter",
SurveyInfo: [
{
_id:"601d11905617082d7049153a",
SurveyId:"601d118e5617082d70491539"
}
]
I am not able to figure out the issue. Please help me.
Few Fixes,
You have to try $in instead of $eq because $Elements.FieldName will return array of string
Need to correct fields name in let and $match condition
db.FirstCollection.aggregate([
{
"$lookup": {
"from": "SecondCollection",
"let": { "fieldname": "$FieldName" },
"pipeline": [
{ "$match": { "$expr": { "$in": ["$$fieldname", "$Elements.FieldName"] } } },
{ "$project": { "SurveyId": 1 } }
],
"as": "SurveyInfo"
}
}
])
Playground
To match nested condition, you can try,
$reduce to iterate loop of Elements.Children.FieldName nested array and we are going to merge nested level array in single array of sting, using $concatArrays
db.FirstCollection.aggregate([
{
"$lookup": {
"from": "SecondCollection",
"let": { "fieldname": "$FieldName" },
"pipeline": [
{
"$match": {
"$expr": {
"$in": [
"$$fieldname",
{
$reduce: {
input: "$Elements.Children.FieldName",
initialValue: [],
in: { $concatArrays: ["$$this", "$$value"] }
}
}
]
}
}
},
{ "$project": { "SurveyId": 1 } }
],
"as": "SurveyInfo"
}
}
])
Playground

How to make lookup between two collections when an item in an array exists in the other collection?

In Lookup with a pipeline, I would like to get the linked records from an array in the parent document.
// Orders
[{
"_id" : ObjectId("5b5b91a25c68de2538620689"),
"Name" : "Test",
"Products" : [
ObjectId("5b5b919a5c68de2538620688"),
ObjectId("5b5b925a5c68de2538621a15")
]
}]
// Products
[
{
"_id": ObjectId("5b5b919a5c68de2538620688"),
"ProductName": "P1"
},
{
"_id": ObjectId("5b5b925a5c68de2538621a15"),
"ProductName": "P2"
}
,
{
"_id": ObjectId("5b5b925a5c68de2538621a55"),
"ProductName": "P3"
}
]
How to make a lookup between Orders and Products when Products field is an array!
I tried this query
db.getCollection("Orders").
aggregate(
[
{
$lookup:
{
from: "Products",
let: { localId: "$_id" , prods: "$Products" },
pipeline: [
{
"$match":
{
"_id" : { $in: "$$prods" }
}
},
{
$project:
{
"_id": "$_id",
"name": "$prods" ,
}
}
],
as: "linkedData"
}
},
{
"$skip": 0
},
{
"$limit": 1
},
]
)
This is not working because $in is expecting an array, and even though $$prods is an array, it is not accepting it.
Is my whole approach correct? How to make this magic join ?
You were going in the right direction the only thing you missed here is to use expr with in aggregation operator which matches the same fields of the document
db.getCollection("Orders").aggregate([
{ "$lookup": {
"from": "Products",
"let": { "localId": "$_id" , "prods": "$Products" },
"pipeline": [
{ "$match": { "$expr": { "$in": [ "$_id", "$$prods" ] } } },
{ "$project": { "_id": 1, "name": "$ProductName" } }
],
"as": "linkedData"
}},
{ "$skip": 0 },
{ "$limit": 1 }
])
See the docs here
You just need regular $lookup, the documentation states that:
If your localField is an array, you may want to add an $unwind stage to your pipeline. Otherwise, the equality condition between the localField and foreignField is foreignField: { $in: [ localField.elem1, localField.elem2, ... ] }.
So for below aggregation:
db.Orders.aggregate([
{
$lookup: {
from :"Products",
localField: "Products",
foreignField: "_id",
as: "Products"
}
}
])
you'll get following result for your sample data:
{
"_id" : ObjectId("5b5b91a25c68de2538620689"),
"Name" : "Test",
"Products" : [
{
"_id" : ObjectId("5b5b919a5c68de2538620688"),
"ProductName" : "P1"
},
{
"_id" : ObjectId("5b5b925a5c68de2538621a15"),
"ProductName" : "P2"
}
]
}
have you try unwind before the lookup. use unwind to brak the array annd then make lookup.

Multiple sorting in aggregate with mongo

When I use a multiple sorting in aggregate method with mongo, results aren't sorting in the right way. This is my query :
db.MyCollection.aggregate(
{
"$unwind": "$objects"
},
{
"$lookup": {
"from": "CollectionA",
"localField": "objects.itemId",
"foreignField": "_id",
"as": "itemOne"
}
},
{
"$lookup": {
"from": "CollectionB",
"localField": "user_id",
"foreignField": "id",
"as": "users"
}
},
{
"$lookup": {
"from": "CollectionC",
"localField": "objects.itemName",
"foreignField": "name",
"as": "itemTwo"
}
},
{
"$addFields": {
"item": {
"$arrayElemAt": [
"$itemOne",
0
]
},
"user": {
"$arrayElemAt": [
"$users",
0
]
},
"itemP": {
"$arrayElemAt": [
"$itemTwo",
0
]
}
}
},
{
"$addFields": {
"itemName": {
"$ifNull": [
"$item.name",
"$objects.itemName"
]
},
"userName": {
"$concat": [
"$user.firstname",
" ",
"$user.lastname"
]
}
}
},
{
"$match": {
"client_id": 2
}
},
{
"$skip": 1
},
{
"$limit": 10
},
{
"$project": {
"date": "$objects.date",
"state": "$objects.state"
}
},
{
"$sort": {
"objects.state": 1,
"objects.date": 1,
}
}
)
To precise: "date" field is Date type and "state" field is number type.
If I use only one sort : result order is correct. But if I use 2 sorts, results are not order correctly. Have you got any ideas, why ?
As #Neil Lunn says :
They don't sort correctly because you renamed the fields in $project. So it should be { $sort: { state: 1, date: 1 } }

Querying mongoDB for some chart data - my pipeline seems convoluted

This is a long question. If you bother answering, I will be extra grateful.
I have some time series data that I am trying to query to create various charts. The data format isn't the most simple, but I think my aggregation pipeline is getting a bit out of hand. I am planning to use charts.js to visualise the data on the client.
I will post a sample of my data below as well as my pipeline, with the desired output.
My question is in two parts - answering either one could solve the problem.
Does charts.js accept data formats other than an array of numbers per row? This would mean my pipeline could try to do less.
My pipeline doesn't quite get to the result I need. Can you recommend any alterations to get the correct result from my pipeline? Is there is a simpler way to get my desired output format?
Sample data
Here is a real data sample - a brand with one facebook account and one twitter account. There is some data for some dates in June. Lots of null day and month fields have been omitted.
Brand
[{
"_id": "5943f427e7c11ac3ad3652b0",
"name": "Brand1",
"facebookAccounts": [
"5943f427e7c11ac3ad3652ac",
],
"twitterAccounts": [
"5943f427e7c11ac3ad3652aa",
],
}]
FacebookAccounts
[
{
"_id" : "5943f427e7c11ac3ad3652ac"
"name": "Brand 1 Name",
"years": [
{
"date": "2017-01-01T00:00:00.000Z",
"months": [
{
"date": "2017-06-01T00:00:00.000Z",
"days": [
{
"date": "2017-06-16T00:00:00.000Z",
"likes": 904025,
},
{
"date": "2017-06-17T00:00:00.000Z",
"likes": null,
},
{
"date": "2017-06-18T00:00:00.000Z",
"likes": 904345,
},
],
},
],
}
]
}
]
Twitter accounts
[
{
"_id": "5943f427e7c11ac3ad3652aa",
"name": "Brand 1 Name",
"vendorId": "twitterhandle",
"years": [
{
"date": "2017-01-01T00:00:00.000Z",
"months": [
{
"date": "2017-06-01T00:00:00.000Z",
"days": [
{
"date": "2017-06-16T00:00:00.000Z",
"followers": 69390,
},
{
"date": "2017-06-17T00:00:00.000Z",
"followers": 69397,
{
"date": "2017-06-18T00:00:00.000Z",
"followers": 69428,
},
{
"date": "2017-06-19T00:00:00.000Z",
"followers": 69457,
},
]
},
],
}
]
}
]
The query
For this example, I want, for each brand, a daily sum of facebook likes and twitter followers between June 16th and June 18th. So here, the required format is:
{
brand: Brand1,
date: ["2017-06-16T00:00:00.000Z", "2017-06-17T00:00:00.000Z", "2017-06-18T00:00:00.000Z"],
stat: [973415, 69397, 973773]
}
The pipeline
The pipeline seems more convoluted due to the population, but I accept that complexity and it is necessary. Here are the steps:
db.getCollection('brands').aggregate([
{ $match: { _id: { $in: [ObjectId("5943f427e7c11ac3ad3652b0") ] } } },
// Unwind all relevant account types. Make one row per account
{ $project: {
accounts: { $setUnion: [ '$facebookAccounts', '$twitterAccounts' ] } ,
name: '$name'
}
},
{ $unwind: '$accounts' },
// populate the accounts.
// These transform the arrays of facebookAccount ObjectIds into the objects described above.
{ $lookup: { from: 'facebookaccounts', localField: 'accounts', foreignField: '_id', as: 'facebookAccounts' } },
{ $lookup: { from: 'twitteraccounts', localField: 'accounts', foreignField: '_id', as: 'twitterAccounts' } },
// unwind the populated accounts. Back to one record per account.
{ $unwind: { path: '$facebookAccounts', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$twitterAccounts', preserveNullAndEmptyArrays: true } },
// unwind to the granularity we want. Here it is one record per day per account per brand.
{ $unwind: { path: '$facebookAccounts.years', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$facebookAccounts.years.months', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$facebookAccounts.years.months.days', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$facebookAccounts.years.months.days', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$twitterAccounts.years', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$twitterAccounts.years.months', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$twitterAccounts.years.months.days', preserveNullAndEmptyArrays: true } },
{ $unwind: { path: '$twitterAccounts.years.months.days', preserveNullAndEmptyArrays: true } },
// Filter each one between dates
{ $match: { $or: [
{ $and: [
{ 'facebookAccounts.years.months.days.date': { $gte: new Date('2017-06-16') } } ,
{ 'facebookAccounts.years.months.days.date': { $lte: new Date('2017-06-18') } }
]},
{ $and: [
{ 'twitterAccounts.years.months.days.date': { $gte: new Date('2017-06-16') } } ,
{ 'twitterAccounts.years.months.days.date': { $lte: new Date('2017-06-18') } }
]}
] }},
// Build stats and date arrays for each account
{ $group: {
_id: '$accounts',
brandId: { $first: '$_id' },
brandName: { $first: '$name' },
stat: {
$push: {
$sum: {
$add: [
{ $ifNull: ['$facebookAccounts.years.months.days.likes', 0] },
{ $ifNull: ['$twitterAccounts.years.months.days.followers', 0] }
]
}
}
},
date: { $push: { $ifNull: ['$facebookAccounts.years.months.days.date', '$twitterAccounts.years.months.days.date'] } } ,
}}
])
This gives me the output format
[{
_id: accountId, // facebook
brandName: 'Brand1'
date: ["2017-06-16T00:00:00.000Z", "2017-06-17T00:00:00.000Z", "2017-06-18T00:00:00.000Z"],
stat: [904025, null, 904345]
},
{
_id: accountId // twitter
brandName: 'Brand1',
date: ["2017-06-16T00:00:00.000Z", "2017-06-17T00:00:00.000Z", "2017-06-18T00:00:00.000Z"],
stat: [69457, 69390, 69397]
}]
So I now need to perform column-wise addition on my stat properties.And then I am stuck - I feel like there should be a more pipeline friendly way to sum these rather than column-wise addition.
Note I accept the extra work that the population required and am happy with that. Most of the repetition is done programmatically.
Thank you if you've gotten this far.
I can trim a lot of fat out of this and keep it compatible with MongoDB 3.2 ( which you must be using at least due to preserveNullAndEmptyArrays ) available operators with a few simple actions. Mostly by simply joining the arrays immediately after $lookup, which is the best place to do it:
Short Optimize
db.brands.aggregate([
{ "$lookup": {
"from": "facebookaccounts",
"localField": "facebookAccounts",
"foreignField": "_id",
"as": "facebookAccounts"
}},
{ "$lookup": {
"from": "twitteraccounts",
"localField": "twitterAccounts",
"foreignField": "_id",
"as": "twitterAccounts"
}},
{ "$project": {
"name": 1,
"all": {
"$concatArrays": [ "$facebookAccounts", "$twitterAccounts" ]
}
}},
{ "$match": {
"all.years.months.days.date": {
"$gte": new Date("2017-06-16"), "$lte": new Date("2017-06-18")
}
}},
{ "$unwind": "$all" },
{ "$unwind": "$all.years" },
{ "$unwind": "$all.years.months" },
{ "$unwind": "$all.years.months.days" },
{ "$match": {
"all.years.months.days.date": {
"$gte": new Date("2017-06-16"), "$lte": new Date("2017-06-18")
}
}},
{ "$group": {
"_id": {
"brand": "$name",
"date": "$all.years.months.days.date"
},
"total": {
"$sum": {
"$sum": [
{ "$ifNull": [ "$all.years.months.days.likes", 0 ] },
{ "$ifNull": [ "$all.years.months.days.followers", 0 ] }
]
}
}
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.brand",
"date": { "$push": "$_id.date" },
"stat": { "$push": "$total" }
}}
])
This gives the result:
{
"_id" : "Brand1",
"date" : [
ISODate("2017-06-16T00:00:00Z"),
ISODate("2017-06-17T00:00:00Z"),
ISODate("2017-06-18T00:00:00Z")
],
"stat" : [
973415,
69397,
973773
]
}
With MongoDB 3.4 we could probably speed it up a "little" more by filtering the arrays and breaking them down before we eventually $unwind to make this work across documents, or maybe even not worry about going across documents at all if the "name" from "brands" is unique. The pipeline operations to compact down the arrays "in place" though are quite cumbersome to code, if a "little" better on performance.
You seem to be doing this "per brand" or for a small sample, so it's likely of little consequence.
As for the chartjs data format, I don't seem to be able to get my hands on what I believe is a different data format to the array format here, but again this should have little bearing.
The main point I see addressed is we can easily move away from your previous output that separated the "facebook" and "twitter" data, and simply aggregate by date moving all the data together "before" the arrays are constructed.
That last point then obviates the need for further "convoluted" operations to attempt to "merge" those two documents and the arrays produced.
Alternate Optimize
As an alternate approach where this does in fact not aggregate across documents, then you can essentially do the "filter" on the array in place and then simply sum and reshape the received result in client code.
db.brands.aggregate([
{ "$lookup": {
"from": "facebookaccounts",
"localField": "facebookAccounts",
"foreignField": "_id",
"as": "facebookAccounts"
}},
{ "$lookup": {
"from": "twitteraccounts",
"localField": "twitterAccounts",
"foreignField": "_id",
"as": "twitterAccounts"
}},
{ "$project": {
"name": 1,
"all": {
"$map": {
"input": { "$concatArrays": [ "$facebookAccounts", "$twitterAccounts" ] },
"as": "all",
"in": {
"years": {
"$map": {
"input": "$$all.years",
"as": "year",
"in": {
"months": {
"$map": {
"input": "$$year.months",
"as": "month",
"in": {
"days": {
"$filter": {
"input": "$$month.days",
"as": "day",
"cond": {
"$and": [
{ "$gte": [ "$$day.date", new Date("2017-06-16") ] },
{ "$lte": [ "$$day.date", new Date("2017-06-18") ] }
]
}
}
}
}
}
}
}
}
}
}
}
}
}}
]).map(doc => {
doc.all = [].concat.apply([],[].concat.apply([],[].concat.apply([],doc.all.map(d => d.years)).map(d => d.months)).map(d => d.days));
doc.all = doc.all.reduce((a,b) => {
if ( a.findIndex( d => d.date.valueOf() == b.date.valueOf() ) != -1 ) {
a[a.findIndex( d => d.date.valueOf() == b.date.valueOf() )].stat += (b.hasOwnProperty('likes')) ? (b.likes || 0) : (b.followers || 0);
} else {
a = a.concat([{ date: b.date, stat: (b.hasOwnProperty('likes')) ? (b.likes || 0) : (b.followers || 0) }]);
}
return a;
},[]);
doc.date = doc.all.map(d => d.date);
doc.stat = doc.all.map(d => d.stat);
delete doc.all;
return doc;
})
This really leaves all the things that "need" to happen on the server, on the server. And it's then a fairly trivial task to "flatten" the array and process to "sum up" and reshape it. This would mean less load on the server, and the data returned is not really that much greater per document.
Gives the same result of course:
[
{
"_id" : ObjectId("5943f427e7c11ac3ad3652b0"),
"name" : "Brand1",
"date" : [
ISODate("2017-06-16T00:00:00Z"),
ISODate("2017-06-17T00:00:00Z"),
ISODate("2017-06-18T00:00:00Z")
],
"stat" : [
973415,
69397,
973773
]
}
]
Committing to the Diet
The biggest problem you really have is with the multiple collections and the heavily nested documents. Neither of these is doing you any favors here and will with larger results cause real performance problems.
The nesting in particular is completely unnecessary as well as not being very maintainable since there are limitations to "update" where you have nested arrays. See the positional $ operator documentation, as well as many posts about this.
Instead you really want a single collection with all those "days" entries in it. You can always work with that source easily for query as well as aggregation purposes and it should look something like this:
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac38097"),
"date" : ISODate("2017-06-16T00:00:00Z"),
"likes" : 904025,
"__t" : "Facebook",
"account" : ObjectId("5943f427e7c11ac3ad3652ac")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac38098"),
"date" : ISODate("2017-06-17T00:00:00Z"),
"likes" : null,
"__t" : "Facebook",
"account" : ObjectId("5943f427e7c11ac3ad3652ac")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac38099"),
"date" : ISODate("2017-06-18T00:00:00Z"),
"likes" : 904345,
"__t" : "Facebook",
"account" : ObjectId("5943f427e7c11ac3ad3652ac")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac3809a"),
"date" : ISODate("2017-06-16T00:00:00Z"),
"followers" : 69390,
"__t" : "Twitter",
"account" : ObjectId("5943f427e7c11ac3ad3652aa")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac3809b"),
"date" : ISODate("2017-06-17T00:00:00Z"),
"followers" : 69397,
"__t" : "Twitter",
"account" : ObjectId("5943f427e7c11ac3ad3652aa")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac3809c"),
"date" : ISODate("2017-06-18T00:00:00Z"),
"followers" : 69428,
"__t" : "Twitter",
"account" : ObjectId("5943f427e7c11ac3ad3652aa")
}
{
"_id" : ObjectId("5948cd5cd6eb0b7d6ac3809d"),
"date" : ISODate("2017-06-19T00:00:00Z"),
"followers" : 69457,
"__t" : "Twitter",
"account" : ObjectId("5943f427e7c11ac3ad3652aa")
}
Combining those referenced in the brands collection as well:
{
"_id" : ObjectId("5943f427e7c11ac3ad3652b0"),
"name" : "Brand1",
"accounts" : [
ObjectId("5943f427e7c11ac3ad3652ac"),
ObjectId("5943f427e7c11ac3ad3652aa")
]
}
Then you simply aggregate like this:
db.brands.aggregate([
{ "$lookup": {
"from": "social",
"localField": "accounts",
"foreignField": "account",
"as": "accounts"
}},
{ "$unwind": "$accounts" },
{ "$match": {
"accounts.date": {
"$gte": new Date("2017-06-16"), "$lte": new Date("2017-06-18")
}
}},
{ "$group": {
"_id": {
"brand": "$name",
"date": "$accounts.date"
},
"stat": {
"$sum": {
"$sum": [
{ "$ifNull": [ "$accounts.likes", 0 ] },
{ "$ifNull": [ "$accounts.followers", 0 ] }
]
}
}
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.brand",
"date": { "$push": "$_id.date" },
"stat": { "$push": "$stat" }
}}
])
This is actually the most efficient thing you can do, and it's mostly because of what actually happens on the server. We need to look at the "explain" output to see what happens to the pipeline here:
{
"$lookup" : {
"from" : "social",
"as" : "accounts",
"localField" : "accounts",
"foreignField" : "account",
"unwinding" : {
"preserveNullAndEmptyArrays" : false
},
"matching" : {
"$and" : [
{
"date" : {
"$gte" : ISODate("2017-06-16T00:00:00Z")
}
},
{
"date" : {
"$lte" : ISODate("2017-06-18T00:00:00Z")
}
}
]
}
}
}
This is what happens when you send $lookup -> $unwind -> $match to the server as the latter two stages are "hoisted" into the $lookup itself. This reduces the results in the actual "query" run on the collection to be joined.
Without that sequence, then $lookup potentially pulls in "a lot of data" with no constraint, and would break the 16MB BSON limit under most normal loads.
So not only is the process a lot more simple in the altered form, it actually "scales" where the present structure will not. This is something that you seriously should consider.