How to aggregate multiple document with one array? - mongodb

Here is my data structure:
{
"_id" : ObjectId("5becc8e2e9427e48d0edab83"),
"theater" : "TodayTainan",
"geometry" : {
"type" : "Point",
"coordinates" : [
120.196866,
22.99322
]
},
"movie" : [
{
"movieDate" : "上映日期:2018-07-25",
"videoId" : [
"17Y_lXjB3VI",
"ovbEe1-qUZ0"
],
"imdbScore" : "",
"cnName" : "不可能的任務:全面瓦解",
"photoHref" : "https://movies.yahoo.com.tw/x/r/w420/i/o/production/movies/June2018/8LBd1GYiRhtjZMCcdOSl-1019x1500.JPG",
"rottenScore" : "",
"releasedTime" : [
ISODate("2018-11-15T12:40:00.000Z"),
ISODate("2018-11-15T17:20:00.000Z"),
ISODate("2018-11-15T22:00:00.000Z")
],
"enName" : "Mission: Impossible Fall Out",
"goodMinePoint" : 0.75
},
{
"movieDate" : "上映日期:2018-08-10",
"videoId" : [
"j-FAn2jGI08",
"6aWIWGBWlPk"
],
"imdbScore" : "5.9",
"cnName" : "巨齒鯊",
"photoHref" : "https://movies.yahoo.com.tw/x/r/w420/i/o/production/movies/July2018/xO7qzzEwPCaipjCWkkxg-2714x3878.jpg",
"rottenScore" : "45%",
"releasedTime" : [
ISODate("2018-11-15T15:20:00.000Z"),
ISODate("2018-11-15T20:00:00.000Z")
],
"enName" : "The Meg",
"goodMinePoint" : 0.3
}
],
"phone" : "06-2205151"
}
I use addToSet to remove duplicate data with one document like this code:
db.getCollection('TaipeiEast').aggregate([
{ "$match": {
"theater": "TodayTainan"
}
},
{ "$unwind": '$movie' },
{ "$group": {
"_id": "$_id",
"movie": {
"$addToSet": {
"cnName": "$movie.cnName",
"photoHref": "$movie.photoHref"
}
}
}
}
])
Now I have a lots of documents in one collection, I want to query them become one movie array without duplicate movie.
I try to remove match like this:
db.getCollection('TaipeiEast').aggregate([
{ "$unwind": '$movie' },
{ "$group": {
"_id": "$_id",
"movie": {
"$addToSet": {
"cnName": "$movie.cnName",
"photoHref": "$movie.photoHref"
}
}
}
}
])
I can get all of documents data, but the movie array is independent with each of document.
Any one knows how to generate one movie array from multiple documents ? Thanks in advance.

Try $group with null. So you can get all movies in a array without depending on each document
{ "$group": {
"_id": null,
"movie": {
"$addToSet": {
"cnName": "$movie.cnName",
"photoHref": "$movie.photoHref"
}
}
}
}

Related

Query nested array from document

Given the following document data in collection called 'blah'...
[
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"procedureCode" : "code1",
"description" : "Description 1",
"coding" : [
{
"system" : "ABC",
"code" : "L111"
},
{
"system" : "DEFG",
"code" : "S222"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"procedureCode" : "code2",
"description" : "Description 2",
"coding" : [
{
"system" : "ABC",
"code" : "L999"
},
{
"system" : "DEFG",
"code" : "X3333"
}
]
}
]
What I want to get is all of the coding elements where system is ABC for all parents, and an array of codes like so.
[
{ "code": "L111" },
{ "code": "L999" },
]
If I use db.getCollection('blah').find({"coding.system": "ABC"}) I get the parent document with any child in the coding array of ICD.
If I use...
db.getCollection("blah")
.find({ "coding.system": "ABC" })
.projection({ "coding.code": 1 })
I do get the parent documents which have a child with a system of "ABC", but the coding for "DEFG" seems to come along for the ride too.
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "S102"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "X382"
}
]
}
I have also tried experimenting with:
db.getCollection("blah").aggregate(
{ $unwind: "$coding" },
{ $match: { "system": "ICD" } }
);
.. as per this page: mongoDB query to find the document in nested array
... but go no where fast with that approach. i.e. no records at all.
What query do I need, please, to achieve something like this..?
[
{ "code": "L111" },
{ "code": "L999" },
...
]
or even better, this..?
[
"L111",
"L999",
...
]
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$project: { code: "$coding.code" }
}
])
mongoplayground
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$group: {
_id: null,
coding: { $push: "$coding.code" }
}
}
])
mongoplayground
Instead of $unwind, $match you can also use $filter:
db.collection.aggregate([
{ $match: { "coding.system": "ABC" } },
{
$project: {
coding: {
$filter: {
input: "$coding",
cond: { $eq: [ "$$this.system", "ABC" ] }
}
}
}
}
])

MongoDB group by array subfield

Hello I am new to mongoDB, please I hope you can help me with this question.
My collection will look like this:
{
"_id": { "$oid": "5f1fd47..." },
"email":"c#c.com",
"materials": [
{
"_id": { "$oid": "5f1fda2..." },
"title": "MDF 18mm Blanco",
"id": "mdf18blanco",
"thickness": "18",
"family": "MDF",
"color": ""
}, ...
//others materials with different family
],
}
I did an aggregate like this:
{ "$match" : { "email" : "c#c.com" } },
{ "$unwind" : "$materials" },
{ "$group" : { "_id" : "$_id", "list" : { "$push" : "$materials.family" } } }
and I return this:
{
"_id" : ObjectId("5f1fd47d502e00051c673dd1"),
"list" : [
"MDF",
"MDF",
"MDF",
"Melamina",
"Melamina",
"Melamina",
"Melamina",
"MDF",
"Melamina",
"Aglomerado",
"Aglomerado"
]
}
but i need get this
{
"_id" : ObjectId("5f1fd47d502e00051c673dd1"),
"list" : [
"MDF",
"Melamina",
"Aglomerado"
]
}
I hope you understand my question and can help me, thank you very much.
All you need to do is use $addToSet instead of $push in your group stage:
{ "$group" : { "_id" : "$_id", "list" : { "$addToSet" : "$materials.family" } } }
One thing to note is that $addToSet does not guarantee a specific order as opposed to $push in case it matters to you.
You only need change $push to $addToSet.
A set not contains repeat values so it works.
db.collection.aggregate([
{
"$match": {
"email": "c#c.com"
}
},
{
"$unwind": "$materials"
},
{
"$group": {
"_id": "$_id",
"list": {
"$addToSet": "$materials.family"
}
}
}
])
Mongo Playground example

How to update array of objects to LowerCase in mongodb?

I need to update the role in team array to lowercase.
db.users.find().pretty().limit(1)
{
"_id" : ObjectId("5d9fd81d3d598088d2ea5dc9"),
"employed" : "USA-Atlanta",
"firstName" : "Rory",
"siteRole" : "super admin",
"status" : "active",
"team" : [
{
"name" : "SALES AND MARKETING",
"displayName" : "S&M",
"role" : "Manager"
}
]
}
Tried this code.I m getting it with normal fields.
db.users.find( {}, { 'role': 1 } ).forEach(function(doc) {
db.users.update(
{ _id: doc._id},
{ $set : { 'role' : doc.role.toLowerCase() } },
{ multi: true }
)
});
sample output
"team" : [
{
"name" : "SALES AND MARKETING",
"displayName" : "S&M",
"role" : "manager"
}
]
I think the below Aggregation query is what you are looking for
var count = 0;
db.users.aggregate([
{
"$match": {
"team.role": {$exists: true}
}
},
{
"$project": {
"_id": 1,
// "team": 1,
"teamModified": {
"$map": {
"input": "$team",
"as": "arrayElems",
"in": {
"$mergeObjects": [
"$$arrayElems",
{"role": {"$toLower": "$$arrayElems.role"}}
]
}
}
}
}
},
]).forEach(function(it) {
db.users.updateOne({
"_id": it["_id"]
}, {
"$set": {
"team": it["teamModified"]
}
})
printjson(++count);
})
printjson("DONE!!!")
Note: I haven't tested the script properly in my local, so do let me know if it didn't help you out

How can I get this result in mongodb?

I have this data structure:
"_id" : "121212",
"terms" : [
{
"term" : "hi",
"tf" : 2
},
{
"term" : "you",
"tf" : 1
}
]
}
and making this query:
db.foo.aggregate( [
{
$match : { _id : "121212" }
},
{
$project:{ terms:1 }
},
{
$unwind: "$terms"
}
]).pretty();
I have come to get this result in my db:
{
"_id" : "121212",
"terms" : {
"term" : "hi",
"tf" : 2
}
}
{
"_id" : "121212",
"terms" : {
"term" : "you",
"tf" : 1
}
}
but is there any way to get a result like this?:
{
"_id" : "121212",
"term" : "hi",
"tf" : 2
}
{
"_id" : "121212",
"term" : "you",
"tf" : 1
}
I have tried to build the query with $ replaceRoot: {newRoot: "$ terms"}, but after I can't select the _id field anymore.
Well, you can use the $map and $mergeObjects to do this beautifully.
[
{ "$match":{"_id":"121212"}},
{
"$addFields":{
"terms":{
"$map":{
"input":"$terms",
"in":{
"$mergeObjects":[
"$$this",
{
"_id":"$_id"
}
]
}
}
}
}
}
]
If you really need to deconstruct the "terms" array, then add the $unwind: "$terms" to the pipeline.
You can achieve by using $project stage at the end of the pipeline
db.foo.aggregate([
{ "$match" : { "_id": "121212" } },
{ "$unwind": "$terms" },
{ "$project": { "term": "$terms.term", "tf": "$terms.tf" }}
])
Output
[
{
"_id": "121212",
"term": "hi",
"tf": 2
},
{
"_id": "121212",
"term": "you",
"tf": 1
}
]
Check it here
You need to use $mergeObjects inside $replaceRoot:
db.foo.aggregate( [
{
$match : { _id : "121212" }
},
{
$project:{ terms:1 }
},
{
$unwind: "$terms"
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [ { _id: "$_id" }, "$terms" ]
}
}
}
]).pretty();
Just to complete the range of options:
db.foo.aggregate([
{ "$match" : { "_id": "121212" } }, // filter by "_id"
{ "$addFields": { "terms._id": "$_id" } }, // copy "_id" field into terms
{ "$unwind": "$terms" }, // flatten the "terms" array
{ "$replaceRoot": { "newRoot": "$terms" } } // move the contents of the "terms" field up to the root level
])

Dynamically Querying From an Input Object

I'm trying to dynamically query a database that looks like this:
db.test.insert({
"_id" : ObjectId("58e574a768afb6085ec3a388"),
"place": "A",
"tests" : [
{
"name" : "1",
"thing" : "X",
"evaluation" : [
{
"_id": ObjectId("58f782fbbebac50d5b2ae558"),
"aHigh" : [1,2],
"aLow" : [ ],
"zHigh" : [ ],
"zLow" : [1,3]
},
{
"_id": ObjectId("58f78525bebac50d5b2ae5c9"),
"aHigh" : [1,4],
"aLow" : [2],
"zHigh" : [ 3],
"zLow" : [ ]
},
{
"_id": ObjectId("58f78695bebac50d5b2ae60e"),
"aHigh" : [ ],
"aLow" : [1,2,3],
"zHigh" : [1,2,3,4],
"zLow" : [ ]
},]
},
{
"name" : "1",
"thing" : "Y",
"evaluation" : [
{
"_id": ObjectId("58f78c37bebac50d5b2ae704"),
"aHigh" : [1,3],
"aLow" : [4],
"zHigh" : [ ],
"zLow" : [3]
},
{
"_id": ObjectId("58f79159bebac50d5b2ae75c"),
"aHigh" : [1,3,4],
"aLow" : [2],
"zHigh" : [2],
"zLow" : [ ]
},
{
"_id": ObjectId("58f79487bebac50d5b2ae7f1"),
"aHigh" : [1,2,3],
"aLow" : [ ],
"zHigh" : [ ],
"zLow" : [1,2,3,4]
},]
}
]
})
db.test.insert({
"_id" : ObjectId("58eba09e51f7f631dd24aa1c"),
"place": "B",
"tests" : [
{
"name" : "2",
"thing" : "Y",
"evaluation" : [
{
"_id": ObjectId("58f7879abebac50d5b2ae64f"),
"aHigh" : [2],
"aLow" : [3 ],
"zHigh" : [ ],
"zLow" : [1,2,3,4]
},
{
"_id": ObjectId("58f78ae1bebac50d5b2ae6db"),
"aHigh" : [ ],
"aLow" : [ ],
"zHigh" : [ ],
"zLow" : [3,4]
},
{
"_id": ObjectId("58f78ae1bebac50d5b2ae6dc"),
"aHigh" : [1,2],
"aLow" : [3,4],
"zHigh" : [ ],
"zLow" : [1,2,3,4]
},]
}
]
})
In order to query the database, I have an object that is created by another part of my program. It comes in the form of:
var outputObject = {
"top": {
"place": [
"A"
]
},
"testing": {
"tests": {
"name": [
"1",
],
"thing": [
"X",
"Y"
]
}
}
}
I then use that outputObject and $match statements within the aggregate framework to execute the query. I have included two queries which do not seem to work.
db.test.aggregate([
{$match: {outputObject.top}},
{$unwind: '$tests'},
{$match: {outputObject.testing}},
{$unwind: '$tests.evaluation'},
{$group: {_id: null, uniqueValues: {$addToSet: "$tests.evaluation._id"}}}
])
db.test.aggregate([
{$match: {$and: [outputObject.top]}},
{$unwind: '$tests'},
{$match: {$and: [outputObject.testing]}},
{$unwind: '$tests.evaluation'},
{$group: {_id: null, uniqueValues: {$addToSet: "$tests.evaluation._id"}}}
])
However, this approach does not seem to be functioning. I have a couple questions:
Do I need to modify the object outputObject before applying it to the $match statement?
Are my queries correct?
Should I be using $and or $in in combination with the $match statement?
What code will produce the desired result?
Currently using mongoDB 3.4.4
You have a couple of problems here. Firstly the array arguments in your input value should rather be compared with $in which many "any of these in the list" in order to match.
The second problem is that that since the paths are "nested" here you actually need to transform to "dot notation" otherwise you have another variant of the first problem where the conditions would be looking in the "test" array for elements that only have the supplied fields you specify in the input.
So unless you "dot notate" the path as well then since your array items also contain "evaluation" which is not supplied in the input, then it would not match as well.
The other issue here, but easily corrected is the "top" and "testing" separation here is not actually needed. Both conditions actually apply within "both" the $match stages in your pipeline. So you could in fact "flatten" that, as the example shows:
var outputObject = {
"top" : {
"place" : [
"A"
]
},
"testing" : {
"tests" : {
"name" : [
"1"
],
"thing" : [
"X",
"Y"
]
}
}
};
function dotNotate(obj,target,prefix) {
target = target || {},
prefix = prefix || "";
Object.keys(obj).forEach(function(key) {
if ( Array.isArray( obj[key] ) ) {
return target[prefix + key] = { "$in": obj[key] };
} else if ( typeof(obj[key]) === "object" ) {
dotNotate(obj[key],target,prefix + key + ".");
} else {
return target[prefix + key] = obj[key];
}
});
return target;
}
// Run the transformation
var queryObject = dotNotate(Object.assign(outputObject.top,outputObject.testing));
This produces queryObject which now looks like:
{
"place" : {
"$in" : [
"A"
]
},
"tests.name" : {
"$in" : [
"1"
]
},
"tests.thing" : {
"$in" : [
"X",
"Y"
]
}
}
And then you can run the aggregation:
db.test.aggregate([
{ '$match': queryObject },
{ '$unwind': "$tests" },
{ '$match': queryObject },
{ '$unwind': "$tests.evaluation" },
{ '$group': {
'_id': null,
'uniqueValues': {
'$addToSet': "$tests.evaluation._id"
}
}}
])
Which correctly filters the objects
{
"_id" : null,
"uniqueValues" : [
ObjectId("58f79487bebac50d5b2ae7f1"),
ObjectId("58f79159bebac50d5b2ae75c"),
ObjectId("58f782fbbebac50d5b2ae558"),
ObjectId("58f78c37bebac50d5b2ae704"),
ObjectId("58f78525bebac50d5b2ae5c9"),
ObjectId("58f78695bebac50d5b2ae60e")
]
}
Please note that the conditions you supply here actually matches all documents and array entries you supplied in your question anyway. But it will of course actually remove anything that does not match.
Also ideally the "initial" query would rather use $elemMatch
{
"place" : {
"$in" : [
"A"
]
},
"tests": {
"$elemMatch": {
"name" : { "$in" : [ "1" ] },
"thing" : { "$in" : [ "X", "Y" ] }
}
}
}
Which would actually filter all of the documents properly in the initial query stage, since it would only select documents that actually had array elements which did in fact match "only" those conditions as opposed to the dot notated form in the "initial" query which would also return documents where the notated conditions for the "test" array were met in "any element" instead of "both conditions" on the element. But that may be another exercise to consider as the restructured query can apply to both the initial and "inner" filters without the $elemMatch.
Actually with thanks to this nice solution to a "Deep Object Merge" without additional library dependencies, you can use the $elemMatch like this:
var outputObject = {
"top" : {
"place" : [
"A"
]
},
"testing" : {
"tests" : {
"name" : [
"1"
],
"thing" : [
"X",
"Y"
]
}
}
};
function dotNotate(obj,target,prefix) {
target = target || {},
prefix = prefix || "";
Object.keys(obj).forEach(function(key) {
if ( Array.isArray( obj[key] ) ) {
return target[prefix + key] = { "$in": obj[key] };
} else if ( typeof(obj[key]) === "object" ) {
dotNotate(obj[key],target,prefix + key + ".");
} else {
return target[prefix + key] = obj[key];
}
});
return target;
}
function isObject(item) {
return (item && typeof item === 'object' && !Array.isArray(item));
}
function mergeDeep(target, ...sources) {
if (!sources.length) return target;
const source = sources.shift();
if (isObject(target) && isObject(source)) {
for (var key in source) {
if (isObject(source[key])) {
if (!target[key]) Object.assign(target, { [key]: {} });
mergeDeep(target[key], source[key]);
} else {
Object.assign(target, { [key]: source[key] });
}
}
}
return mergeDeep(target, ...sources);
}
var queryObject = dotNotate(Object.assign(outputObject.top,outputObject.testing));
// Replace dot with $elemMatch
var initialQuery = Object.keys(queryObject).map( k => (
( k.split(/\./).length > 1 )
? { [k.split(/\./)[0]]: { "$elemMatch": { [k.split(/\./)[1]]: queryObject[k] } } }
: { [k]: queryObject[k] }
)).reduce((acc,curr) => mergeDeep(acc,curr),{})
db.test.aggregate([
{ '$match': initialQuery },
{ '$unwind': "$tests" },
{ '$match': queryObject },
{ '$unwind': "$tests.evaluation" },
{ '$group': {
'_id': null,
'uniqueValues': {
'$addToSet': "$tests.evaluation._id"
}
}}
])
With the pipeline being sent to the server as:
[
{
"$match" : {
"place" : {
"$in" : [
"A"
]
},
"tests" : {
"$elemMatch" : {
"name" : {
"$in" : [
"1"
]
},
"thing" : {
"$in" : [
"X",
"Y"
]
}
}
}
}
},
{
"$unwind" : "$tests"
},
{
"$match" : {
"place" : {
"$in" : [
"A"
]
},
"tests.name" : {
"$in" : [
"1"
]
},
"tests.thing" : {
"$in" : [
"X",
"Y"
]
}
}
},
{
"$unwind" : "$tests.evaluation"
},
{
"$group" : {
"_id" : null,
"uniqueValues" : {
"$addToSet" : "$tests.evaluation._id"
}
}
}
]
Also your $group is probably better written as:
{ "$group": { "_id": "$tests.evaluation._id" } }
Which returns "distinct" just like $addToSet does, but also puts the output into separate documents, instead of trying to combine into "one" which is probably not the best practice and could in extreme cases break the BSON limit of 16MB. So it is generally better to obtain "distinct" in that way instead.
It is better to agree on a fixed format for outputObject and write aggregation query accordingly.
You can now process the outputObject to inject the query operators and transform the keys to match the fields.
Something like below.
{
"top": {
"place": {
"$in": [
"A"
]
}
},
"testing": {
"tests.name": {
"$in": [
"1"
]
},
"tests.thing": {
"$in": [
"X",
"Y"
]
}
}
}
JS Code
var top = outputObject.top;
Object.keys(top).forEach(function(a) {
top[a] = {
"$in": top[a]
};
});
var testing = outputObject.testing;
Object.keys(testing).forEach(function(a) {
Object.keys(testing[a]).forEach(function(b) {
var c = [a + "." + b];
testing[c] = {
"$in": testing[a][b]
};
})
delete testing[a];
});
You can now use your aggregation query
db.test.aggregate([{
$match: top
},
{
$unwind: "$tests"
},
{
$match: testing
},
{
$unwind: "$tests.evaluation"
},
{
$group: {
_id: null,
uniqueValues: {
$addToSet: "$tests.evaluation._id"
}
}
}
])
You can refactor your code to use below aggregation pipeline in 3.4
Process your output object ( includes $in operator ) to
{
"top": {
"place": {
"$in": [
"A"
]
}
},
"testing": {
"tests": {
"name": [
"1"
],
"thing": [
"X",
"Y"
]
}
}
};
JS Code
var top = outputObject.top;
Object.keys(top).forEach(function(a) {top[a] = {"$in":top[a]};});
Aggregation:
[
{
"$match": top
},
{
"$addFields": {
"tests": {
"$filter": {
"input": "$$tests",
"as": "res",
"cond": {
"$and": [
{
"$in": [
"$$res.name",
outputObject.testing.tests.name
]
},
{
"$in": [
"$$res.thing",
outputObject.testing.tests.thing
]
}
]
}
}
}
}
},
{
"$unwind": "$tests.evaluation"
},
{
"$group": {
"_id": null,
"uniqueValues": {
"$addToSet": "$tests.evaluation._id"
}
}
}
]