MongoDB : create a view which is the union of several collections - mongodb

I currently have a collection that i need to split in several smaller collections. Is there a way to make a View containing the union of all my smaller collections ?
According to the MongoDB Manual, i could use the $lookup operator in the pipeline, but it ends up being more like a "join" than an "union".
Here is an example of what i want to do :
Current collection :
{ _id: 1, name: "abc", country: "us" }
{ _id: 2, name: "def", country: "us" }
{ _id: 3, name: "123", country: "de" }
{ _id: 4, name: "456", country: "de" }
Splitting into :
Collection_US
{ _id: 1, name: "abc", country: "us" }
{ _id: 2, name: "def", country: "us" }
Collection_DE
{ _id: 3, name: "123", country: "de" }
{ _id: 4, name: "456", country: "de" }
And then, make a view :
View
{ _id: 1, name: "abc", country: "us" }
{ _id: 2, name: "def", country: "us" }
{ _id: 3, name: "123", country: "de" }
{ _id: 4, name: "456", country: "de" }
Is it possible to do this ?

This is the same modified of taminov's code.
db.createView('union_view', 'us', [
{
$facet: {
us: [
{$match: {}}
],
de: [
{$limit: 1},
{
$lookup: {
from: 'de',
localField: '__unexistingfield',
foreignField: '__unexistingfield',
as: '__col2'
}
},
{$unwind: '$__col2'},
{$replaceRoot: {newRoot: '$__col2'}}
]
},
},
{$project: {data: {$concatArrays: ['$us', '$de']}}},
{$unwind: '$data'},
{$replaceRoot: {newRoot: '$data'}}
])

its very hacky but will work for small collections. you may end up having to use a real collection if the collections are big.
db.createView('union_view', 'col1', [
{
$facet: {
col1: [
{ $match:{}}
],
col2: [
{ $limit:1},
{ $lookup:{
from: 'col2',
localField: '__unexistingfield',
foreignField: '__unexistingfield',
as: '__col2'
}},
{ $unwind:'$__col2'},
{ $replaceRoot: {newRoot: '$__col2'}}
]
},
},
{ $project: { filesFolders: {$setUnion: ['$files', '$folders']}}},
{ $unwind: '$filesFolders' },
{ $replaceRoot: {newRoot: '$filesFolders'}}
])

Related

mongodb - Merge object arrays based on key

In a mongodb database, I have the following data:
// db.people
[
{
_id: ObjectId("..."),
id: 111111111,
name: "George",
relatedPeople: [{ id: 222222222, relation: "child" }],
// A bunch of other data I don't care about
},
{
_id: ObjectId("..."),
id: 222222222,
name: "Jacob",
relatedPeople: [{ id: 111111111, relation: "father" }],
// A bunch of other data I don't care about
},
{
_id: ObjectId("..."),
id: 333333333,
name: "some guy",
relatedPeople: [],
// A bunch of other data I don't care about
},
]
I would like to query the people, and select only the fields I've shown, but have extra data in relatedPeople (id + relation + name)
So the desired output would be:
[
{
_id: ObjectId("..."),
id: 111111111,
name: "George",
relatedPeople: [{ id: 222222222, relation: "child", name: "Jacob" }],
},
{
_id: ObjectId("..."),
id: 222222222,
name: "Jacob",
relatedPeople: [{ id: 111111111, relation: "father", name: "George" }],
},
{
_id: ObjectId("..."),
id: 333333333,
name: "some guy",
relatedPeople: [],
},
]
I can get something close, with this query:
db.people.aggregate([
// { $match: { /** ... */ }, },
{
$lookup: {
from: "people",
let: { relatedPeopleIds: "$relatedPeople.id" },
pipeline: [
{ $match: { $expr: { $in: ["$id", "$$relatedPeopleIds"] } } },
{
$project: {
id: 1,
name: 1,
},
},
],
as: "relatedPeople2",
},
},
{
$project: {
id: 1,
name: 1,
relatedPeople: 1,
relatedPeople2: 1,
}
}
]);
But the data is split between two fields. I want to merge each object in the arrays by their id, and place the result array in relatedPeople
I found this question, but that merge is done over a range and uses $arrayElementAt which I can't use
I also tried looking at this question, but I couldn't get the answer to work (Kept getting empty results)
You can add one step using $arrayElementAt with $indexOfArray:
db.people.aggregate([
// { $match: { /** ... */ }, },
{$project: {id: 1, name: 1, relatedPeople: 1}},
{$lookup: {
from: "people",
let: { relatedPeopleIds: "$relatedPeople.id" },
pipeline: [
{ $match: { $expr: { $in: ["$id", "$$relatedPeopleIds"] } } },
{
$project: {
id: 1,
name: 1,
},
},
],
as: "relatedPeople2",
},
},
{$set: {
relatedPeople: {$map: {
input: "$relatedPeople",
in: {$mergeObjects: [
"$$this",
{$arrayElemAt: [
"$relatedPeople2",
{$indexOfArray: ["$relatedPeople2.id", "$$this.id"]}
]}
]}
}}
}},
{$unset: "relatedPeople2"}
])
See how it works on the playground example

Nodejs MongoDb add column to query that is a count of specific records

Considering the following document structure:
{_id: 1, name: 'joe', snapshot: null, age: 30}
{_id: 2, name: 'joe', snapshot: 'snapshot1', age: 30}
{_id: 3, name: 'joe', snapshot: 'snapshot15', age: 30}
{_id: 4, name: 'joe', snapshot: 'snapshot23', age: 30}
How would I perform a query that groups on the name field and adds an additional field that is a count of the remaining records containing subtree: 'additionalinfo'. It would look like this:
{_id: 1, name: 'joe', snapcount: 3, age: 30}
I've been able to group using aggregations but I can't quite get it like this.
My own solution:
I ultimately restructured my data to look like this instead:
{
_id: 1,
name: 'joe',
snapshots: [
{name: 'snap17', id: 1},
{name: 'snap15', id: 2},
{name: 'snap14', id: 3}
],
age: 30
}
This allows me to just check snapshots.length to solve my original problem. However; the answers in this post where very helpful and answered the original question.
Adding another aggregation query to do it: playground link: try it
db.collection.aggregate([
{
$match: {
"snapshot": {
$exists: true,
$ne: null
}
}
},
{
$group: {
_id: "$name",
snapcount: {
$sum: 1
},
age: {
"$first": "$age"
},
name: {
"$first": "$name"
}
}
},
{
"$unset": "_id"
}
])
Based on the comments, the query worked for OP:
db.collection.aggregate([
{
$match: {
"snapshot": {
$exists: true,
$ne: null
}
}
},
{
$group: {
_id: "$name",
snapcount: {
$sum: 1
},
age: {
"$first": "$age"
},
name: {
"$first": "$name"
},
id: {
"$first": "$_id"
}
}
},
{
"$unset": "_id"
}
])
Here's one way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$name",
"name": {"$first": "$name"},
"age": {"$first": "$age"},
"snapcount": {
"$sum": {
"$cond": [
{"$eq": [{"$type": "$snapshot"}, "string"]},
1,
0
]
}
}
}
},
{"$unset": "_id"}
])
Try it on mongoplayground.net.

MongoDB - How to access a newly created array field created with $lookup and aggregate function

Assuming I have two collections:
courses:
[
{
_id: 1,
name: "Geometry",
teacher_id: 1
},
{
_id: 2,
name: "English",
teacher_id: 2
}
]
teachers:
[
{
_id: 1,
firstName: "John",
lastName: "Adams"
},
{
_id: 2,
firstName: "Mary",
lastName: "Jane"
}
]
Now I perform an aggregation on the two collections to create something similar to a join in SQL:
db.collection("courses").aggregate([
{
$lookup:{
from: "teachers",
localField: "teacher_id",
foreignField: "_id",
as: "teacher_info"
}
},
{
$match:{
//I want to perform a match or filter here on the teacher_info
}
}
]);
The $lookup and aggregation will return a list of documents that have a new teacher_info array field.
[
{
_id: 1,
name: "Geometry",
teacher_id: 1,
teacher_info: [
{
_id: 1,
firstName: "John",
lastName: "Adams"
},
]
},
{
_id: 2,
name: "English",
teacher_id: 1,
teacher_info: [
{
_id: 2,
firstName: "Mary",
lastName: "Jane"
},
]
}
]
I need to perform a match operation in the newly created teacher_info array field. For example, only keep the teacher that has the first name "John". How can I do so? Is that possible?
You can work with dot notation in your $match stage.
{
$match: {
"teacher_info.firstName": "John"
}
}
Demo # Mongo Playground

mongo nested aggregation with join

I've following tenant collection:
{id: 1, name: "T1", type: "DEFAULT", state: "ACTIVE"},
{id: 2, name: "T2", type: "DEFAULT", state: "DISABLED"},
{id: 3, name: "T3", type: "STANDARD", state: "ACTIVE"},
{id: 4, name: "T4", type: "TRIAL", state: "DELETED"},
{id: 5, name: "T5", type: "DEFAULT", state: "DISABLED"}
and then second collection with options:
{id:1, tenantId: 1, opt: "OPERATING"},
{id:2, tenantId: 2, opt: "OPERATING"},
{id:3, tenantId: 3, opt: "POSTPONED"},
{id:4, tenantId: 4, opt: "DELETED"},
{id:5, tenantId: 5, opt: "POSTPONED"}
Id' like to aggregate this collections to get umber of tenant types grouped with number of operations, but I'd like to remove all DELETED tenants and all DELETED options from search. Something like this:
{type: "DEFAULT", count: 3, opts: {operating: 2, postponed: 1}}
{type: "STANDARD", count: 1, opts: {postponed: 1}}
Grouping the tenants is fine, but I don't know what should I use for that next grouping of options.
db.tenant.aggregate([
{$match: { state: {$ne: "DELETED"}}},
{$lookup: {
from: "option",
localField: "_id",
foreignField: "tenantId",
as: "options"
}},
{$group {
_id: "$type",
count: {$sum: 1}
}}
])
$group by type and get group of ids
$lookup with pipeline match $in condition for tenantId
$group by opt and get count of option
$project to show fields in k and v format
$project to show required fields, $size to count total tenant and $arrayToObject convert opts array to object
db.tenant.aggregate([
{ $match: { state: { $ne: "DELETED" } } },
{
$group: {
_id: "$type",
ids: { $push: "$id" }
}
},
{
$lookup: {
from: "options",
let: { ids: "$ids" },
pipeline: [
{ $match: { opt: { $ne: "DELETED" }, $expr: { $in: ["$tenantId", "$$ids"] } } },
{
$group: {
_id: "$opt",
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
k: "$_id",
v: "$count"
}
}
],
as: "opts"
}
},
{
$project: {
_id: 0,
type: "$_id",
count: { $size: "$ids" },
opts: { $arrayToObject: "$opts" }
}
}
])
Playground

Count articles grouping by tags mongodb

I had a lot of articles with a field called tags, and is an array of tags _ids, and for statistics purpose I want to count how many articles we had by each tag. If tags were a simple tag _id, it's easy because I could group by tag, but is an array of tags, and I can't group by that field.
First I try with this:
db.note.aggregate([{$match: {
publishedAt: {
$gte: ISODate('2018-01-01'),
$lte: ISODate('2019-01-01')
}
}}, {$group: {
_id: "$tags",
"total": {
"$sum": 1
}
}}, {$lookup: {
from: 'tags',
localField: '_id',
foreignField: '_id',
as: 'tag'
}}, {$unwind: {
path: "$tag"
}}, {$project: {
total: 1,
"tag.name": 1
}}, {$sort: {
total: -1
}}])
But that doesn't work, that query, group by tags group, so I try to do this:
{
'$match': {
'publishedAt': {
'$gte': new Date(req.body.gte),
'$lte': new Date(req.body.lte)
}
}
},
{
'$unwind': {
'path': '$tags'
}
}, {
'$group': {
'_id': '$tags',
'total': {
'$sum': 1
}
}
}, {
'$lookup': {
'from': 'tags',
'localField': '_id',
'foreignField': '_id',
'as': 'tag'
}
}, {
'$project': {
'total': 1,
'tag.name': 1
}
}, {
'$sort': {
'total': -1
}
},
{
'$unwind': {
'path': '$tag'
}
}
)
But the problem with this, that group for the first tag from the array and I miss all other tags in that array.
What do you think will be the solution?
I had a lot of articles with a field called tags, and is an array of
tags _ids, and for statistics purpose I want to count how many
articles we had by each tag.
You can try this (I am assuming the following input documents):
notes:
{ _id: 1, name: "art-1", author: "ab", tags: [ "t1", "t2" ] },
{ _id: 2, name: "art-2", author: "cd", tags: [ "t1", "t3" ] },
{ _id: 3, name: "art-3", author: "wx", tags: [ "t4", "t3" ] },
{ _id: 4, name: "art-4", author: "yx", tags: [ "t1" ] }
tags:
{ _id: 1, id: "t1", name: "t1's name" },
{ _id: 2, id: "t2", name: "t2's name" },
{ _id: 3, id: "t3", name: "t3's name" },
{ _id: 4, id: "t4", name: "t4's name" }
The Query:
db.tags.aggregate( [
{
$lookup: {
from: "notes",
localField: "id",
foreignField: "tags",
as: "tag_matches"
}
},
{ $project: { id: 1, name: 1, _id: 0, count: { $size: "$tag_matches" } } }
] )
The Output:
{ "id" : "t1", "name" : "t1's name", "count" : 3 }
{ "id" : "t2", "name" : "t2's name", "count" : 1 }
{ "id" : "t3", "name" : "t3's name", "count" : 2 }
{ "id" : "t4", "name" : "t4's name", "count" : 1 }