Omit empty fields from MongoDB query result - mongodb

Is there a way to omit empty fields (eg empty string, or an empty array) from MongoDB query results' documents (find or aggregate).
Document in DB:
{
"_id" : ObjectId("5dc3fcb388c1c7c5620ed496"),
"name": "Bill",
"emptyString" : "",
"emptyArray" : []
}
Output:
{
"_id" : ObjectId("5dc3fcb388c1c7c5620ed496"),
"name": "Bill"
}
Similar question for Elasticsearch: Omit null fields from elasticsearch results

Please use aggregate function.
If you want to remove key. you use $cond by using $project.
db.Speed.aggregate( [
{
$project: {
name: 1,
"_id": 1,
"emptyString": {
$cond: {
if: { $eq: [ "", "$emptyString" ] },
then: "$$REMOVE",
else: "$emptyString"
}
},
"emptyArray": {
$cond: {
if: { $eq: [ [], "$emptyArray" ] },
then: "$$REMOVE",
else: "$emptyArray"
}
}
}
}
] )

One way this could be done is using cursor.map() which is available on find() and aggregation([]) both.
The idea is to have list of the fields that are present/could be in the documents and filter out by using delete operator to remove the fields (which are empty strings or empty array, both have length property) from returning document.
Mongo Shell:
var fieldsList = ["name", "emptyString", "emptyArray"];
db.collection.find().map(function(d) {
fieldsList.forEach(function(k) {
if (
k in d &&
(Array.isArray(d[k]) ||
(typeof d[k] === "string" || d[k] instanceof String)) &&
d[k].length === 0
) {
delete d[k];
}
});
return d;
});
Test documents:
{
"_id" : ObjectId("5dc426d1f667120607ac5006"),
"name" : "Bill",
"emptyString" : "",
"emptyArray" : [ ]
}
{
"_id" : ObjectId("5dc426d1f667120607ac5007"),
"name" : "Foo",
"emptyString" : "foo",
"emptyArray" : [ ]
}
{
"_id" : ObjectId("5dc426d1f667120607ac5008"),
"name" : "Bar",
"emptyString" : "",
"emptyArray" : [
"foo",
"bar"
]
}
{
"_id" : ObjectId("5dc426d1f667120607ac5009"),
"name" : "May",
"emptyString" : "foobar",
"emptyArray" : [
"foo",
"bar"
]
}
O/P
[
{
"_id" : ObjectId("5dc426d1f667120607ac5006"),
"name" : "Bill"
},
{
"_id" : ObjectId("5dc426d1f667120607ac5007"),
"name" : "Foo",
"emptyString" : "foo"
},
{
"_id" : ObjectId("5dc426d1f667120607ac5008"),
"name" : "Bar",
"emptyArray" : [
"foo",
"bar"
]
},
{
"_id" : ObjectId("5dc426d1f667120607ac5009"),
"name" : "May",
"emptyString" : "foobar",
"emptyArray" : [
"foo",
"bar"
]
}
]
Note: if the number of fields are very large in the documents this may not be very optimal solution since the comparisons are going to happen with all fields in document. You might want to chunk the fieldsList with properties that are suspected to be empty array or string.

I think the easiest way to remove all empty string- and empty array-fields from the output is to add the aggregation stage below. (And yes, "easy" is relative, when you have to create these levels of logic to accomplish such a trivial task...)
$replaceRoot: {
newRoot: {
$arrayToObject: {
$filter: {
input: {
$objectToArray: '$$ROOT'
},
as: 'item',
cond: {
$and: [
{ $ne: [ '$$item.v', [] ] },
{ $ne: [ '$$item.v', '' ] }
]
}
}
}
}
}
Just modify the cond-clause to filter out other types of fields (e.g. null).
btw: I haven't tested the performance of this, but at least it's generic and somewhat readable.
Edit: IMPORTANT! The $replaceRoot-stage does prevent MongoDB from optimizing the pipeline, so if you use it in a View that you run .find() on, it will append a $match-stage to the end of the View's pipeline, in stead of prepending an indexed search at the start of the pipeline. This will have significant impact on the performance. You can safely use it in a custom pipeline though, as long as you have the $match-stage before it. (At least as far as my limited MongoDB knowledge tells me). And if anyone knows how to prépend a $match-stage to a View when querying, then please leave a comment :-)

Related

mongodb check if all subdocuments in array have the same value in one field

I have a collection of documents, each has a field which is an array of subdocuments, and all subdocuments have a common field 'status'. I want to find all documents that have the same status for all subdocuments.
collection:
{
"name" : "John",
"wives" : [
{
"name" : "Mary",
"status" : "dead"
},
{
"name" : "Anne",
"status" : "alive"
}
]
},
{
"name" : "Bill",
"wives" : [
{
"name" : "Mary",
"status" : "dead"
},
{
"name" : "Anne",
"status" : "dead"
}
]
},
{
"name" : "Mohammed",
"wives" : [
{
"name" : "Jane",
"status" : "dead"
},
{
"name" : "Sarah",
"status" : "dying"
}
]
}
I want to check if all wives are dead and find only Bill.
You can use the following aggregation query to get records of person whose wives are all dead:
db.collection.aggregate(
{$project: {name:1, wives:1, size:{$size:'$wives'}}},
{$unwind:'$wives'},
{$match:{'wives.status':'dead'}},
{$group:{_id:'$_id',name:{$first:'$name'}, wives:{$push: '$wives'},size:{$first:'$size'},count:{$sum:1}}},
{$project:{_id:1, wives:1, name:1, cmp_value:{$cmp:['$size','$count']}}},
{$match:{cmp_value:0}}
)
Output:
{ "_id" : ObjectId("56d401de8b953f35aa92bfb8"), "name" : "Bill", "wives" : [ { "name" : "Mary", "status" : "dead" }, { "name" : "Anne", "status" : "dead" } ], "cmp_value" : 0 }
If you need to find records of users who has same status, then you may remove the initial match stage.
The most efficient way to handle this is always going to be to "match" on the status of "dead" as the opening query, otherwise you are processing items that cannot possibly match, and the logic really quite simply followed with $map and $allElementsTrue:
db.collection.aggregate([
{ "$match": { "wives.status": "dead" } },
{ "$redact": {
"$cond": {
"if": {
"$allElementsTrue": {
"$map": {
"input": "$wives",
"as": "wife",
"in": { "$eq": [ "$$wife.status", "dead" ] }
}
}
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}}
])
Or the same thing with $where:
db.collection.find({
"wives.status": "dead",
"$where": function() {
return this.wives.length
== this.wives.filter(function(el) {
el.status == "dead";
}).length;
}
})
Both essentially test the "status" value of all elements to make sure they match in the fastest possible way. But the aggregate pipeline with just $match and $redact should be faster. And "less" pipeline stages ( essentially each a pass through the data ) means faster as well.
Of course keeping a property on the document is always fastest, but it would involve logic to set that only where "all elements" are the same property. Which of course would typically mean inspecting the document by loading it from the server prior to each update.

Get unique values from arrays per record in Mongodb

I have a collection in MongoDB that looks like this:
{
"_id" : ObjectId("56d3e53b965b57e4d1eb3e71"),
"name" : "John",
"posts" : [
{
"topic" : "Harry Potter",
"obj_ids" : [
"1234"
],
"dates_posted" : [
"2014-12-24"
]
},
{
"topic" : "Daniel Radcliffe",
"obj_ids" : [
"1235",
"1236",
"1237"
],
"dates_posted" : [
"2014-12-22",
"2015-01-13",
"2014-12-24"
]
}
],
},
{
"_id" : ObjectId("56d3e53b965b57e4d1eb3e72"),
"name" : "Jane",
"posts" : [
{
"topic" : "Eragon",
"tweet_ids" : [
"1672",
"1673",
"1674"
],
"dates_posted" : [
"2014-12-27",
"2014-11-16"
]
}
],
}
How could I query to get a result like:
{
"name": "John",
"dates": ["2014-12-24", "2014-12-22", "2015-01-13"]
},
{
"name": "Jane",
"dates" : ["2014-12-27", "2014-11-16"]
}
I need the dates to be unique, as "2014-12-24" appears in both elements of "posts" but I need only the one.
I tried doing db.collection.aggregate([{$unwind: "$posts"}, {$group:{_id:"$posts.dates_posted"}}]) and that gave me results like this:
{ "_id" : [ "2014-12-24", "2014-12-22", "2015-01-13", "2014-12-24" ] }
{ "_id" : [ "2014-12-27", "2014-11-16" ] }
How can I remove the duplicates and also get the name corresponding to the dates?
You would need to use the $addToSet operator to maintain unique values. One way of doing it would be to:
unwind posts.
unwind "posts.date_posted", so that the array gets flattened and the value can be aggregated in the group stage.
Then group by _id and accumulate unique values for the date field, along with name.
code:
db.collection.aggregate([
{
$unwind:"$posts"
},
{
$unwind:"$posts.dates_posted"
},
{
$group:
{
"_id":"$_id",
"dates":{$addToSet:"$posts.dates_posted"},
"name":{$first:"$name"}
}
},
{
$project:
{
"name":1,
"dates":1,
"_id":0
}
}
])
The cons of this approach being that, it uses two unwind stages, which is quiet costly, since it would increase the number of documents, input to the subsequent stages, by a multiplication factor of n where n is the number of values in the array that is flattened.

List of userids without duplicates in mongodb [duplicate]

I'm trying to learn MongoDB and how it'd be useful for analytics for me. I'm simply playing around with the JavaScript console available on their website and have created the following items:
{"title": "Cool", "_id": {"$oid": "503e4dc0cc93742e0d0ccad3"}, "tags": ["twenty", "sixty"]}
{"title": "Other", "_id": {"$oid": "503e4e5bcc93742e0d0ccad4"}, "tags": ["ten", "thirty"]}
{"title": "Ouch", "_id": {"$oid": "503e4e72cc93742e0d0ccad5"}, "tags": ["twenty", "seventy"]}
{"title": "Final", "_id": {"$oid": "503e4e72cc93742e0d0ccad6"}, "tags": ["sixty", "seventy"]}
What I'd like to do is query so I get a list of unique tags for all of these objects. The result should look something like this:
["ten", "twenty", "thirty", "sixty", "seventy"]
How do I query for this? I'm trying to distinct() it, but the call always fails without even querying.
The code that fails on their website works on an actual MongoDB instance:
> db.posts.insert({title: "Hello", tags: ["one", "five"]});
> db.posts.insert({title: "World", tags: ["one", "three"]});
> db.posts.distinct("tags");
[ "one", "three", "five"]
Weird.
You can use the aggregation framework. Depending on how you'd like the results structured, you can use either
var pipeline = [
{"$unwind": "$tags" } ,
{ "$group": { _id: "$tags" } }
];
R = db.tb.aggregate( pipeline );
printjson(R);
{
"result" : [
{
"_id" : "seventy"
},
{
"_id" : "ten"
},
{
"_id" : "sixty"
},
{
"_id" : "thirty"
},
{
"_id" : "twenty"
}
],
"ok" : 1
}
or
var pipeline = [
{"$unwind": "$tags" } ,
{ "$group":
{ _id: null, tags: {"$addToSet": "$tags" } }
}
];
R = db.tb.aggregate( pipeline );
printjson(R);
{
"result" : [
{
"_id" : null,
"tags" : [
"seventy",
"ten",
"sixty",
"thirty",
"twenty"
]
}
],
"ok" : 1
}
You should be able to use this:
db.mycollection.distinct("tags").sort()
Another way of getting unique array elements using aggregation pipeline
db.blogs.aggregate(
[
{$group:{_id : null, uniqueTags : {$push : "$tags"}}},
{$project:{
_id : 0,
uniqueTags : {
$reduce : {
input : "$uniqueTags",
initialValue :[],
in : {$let : {
vars : {elem : { $concatArrays : ["$$this", "$$value"] }},
in : {$setUnion : "$$elem"}
}}
}
}
}}
]
)
collection
> db.blogs.find()
{ "_id" : ObjectId("5a6d53faca11d88f428a2999"), "name" : "sdfdef", "tags" : [ "abc", "def", "efg", "abc" ] }
{ "_id" : ObjectId("5a6d5434ca11d88f428a299a"), "name" : "abcdef", "tags" : [ "abc", "ijk", "lmo", "zyx" ] }
>
pipeline
> db.blogs.aggregate(
... [
... {$group:{_id : null, uniqueTags : {$push : "$tags"}}},
... {$project:{
... _id : 0,
... uniqueTags : {
... $reduce : {
... input : "$uniqueTags",
... initialValue :[],
... in : {$let : {
... vars : {elem : { $concatArrays : ["$$this", "$$value"] }},
... in : {$setUnion : "$$elem"}
... }}
... }
... }
... }}
... ]
... )
result
{ "uniqueTags" : [ "abc", "def", "efg", "ijk", "lmo", "zyx" ] }
There are couple of web mongo consoles available:
http://try.mongodb.org/
http://www.mongodb.org/#
But if you type help in them you will realise they only support a very small number of ops:
HELP
Note: Only a subset of MongoDB's features are provided here.
For everything else, download and install at mongodb.org.
db.foo.help() help on collection method
db.foo.find() list objects in collection foo
db.foo.save({a: 1}) save a document to collection foo
db.foo.update({a: 1}, {a: 2}) update document where a == 1
db.foo.find({a: 1}) list objects in foo where a == 1
it use to further iterate over a cursor
As such distinct does not work because it is not supported.

How to get mongodb deeply embeded document id

I have the following mongo document, which is part of a bigger document called attributes, which also has Colour and Size
> db.attributes.find({'name': {'en-UK': 'Fabric'}}).pretty()
{
"_id" : ObjectId("543261cda14c971132fa2b91"),
"values" : [
{
"source" : [
{
"_id" : ObjectId("543261cda14c971132fa2b79"),
"name" : {
"en-UK" : "Combed Cotton"
}
},
],
"name" : [
{
"_id" : ObjectId("543261cda14c971132fa2b85"),
"name" : {
"en-UK" : "Brushed 3-ply"
}
},
{
"_id" : ObjectId("543261cda14c971132fa2b8f"),
"name" : {
"en-UK" : "Plain Weave"
}
},
{
"_id" : ObjectId("543261cda14c971132fa2b90"),
"name" : {
"en-UK" : "1x1 Rib"
}
}
]
}
],
"name" : {
"en-UK" : "Fabric"
}
}
I am trying to return the _id for a sub document and have the following:
db.attributes.aggregate([
{ '$match': {'name.en-UK': 'Fabric'} },
{ '$unwind' : '$values' },
{ '$project': { 'name' : '$values.name'} },
{ '$match': { '$and': [{"name.name.en-UK" : "1x1 Rib"} ] }}
])
What is the correct way to do this?
Also, the values of Fabric is an array with two items, source and name, but if I populate it like:
> db.attributes.find({'name': {'en-UK': 'Fabric'}}).pretty()
{
"_id" : ObjectId("543261cda14c971132fa2b91"),
"values" : {
"source" : [{ ... }]
"name": [{ ... }]
}
}
I get the following error
"errmsg" : "exception: $unwind: value at end of field path must be an array"
But if I wrap it inside a square brackets this then works, so that
> db.attributes.find({'name': {'en-UK': 'Fabric'}}).pretty()
{
"_id" : ObjectId("543261cda14c971132fa2b91"),
"values" : [{
"source" : [{ ... }],
"name": [{ ... }]
}]
}
what am I missing as values is an array of two objects, source and name each containing a list of arrays
Any advice much appreciated
What you seem to be "missing" here is that "some" of your documents do either not contain a "value" property at all or at the very least it is "not an array". This is the basic context of the error you have been given.
Fortunately there are a couple of ways to get around this. Namely, either "testing" for the presence of an array when submitting you original query. Or actually "substituting" the missing element for some kind of array when processing the pipeline.
Here are both approaches in what is effectively an redundant form since the first $match condition really sorts this out:
db.attributes.aggregate([
{ "$match": {
"name.en-UK": "Fabric",
"values.0": { "$exists": true }
}},
{ "$project": {
"name": 1,
"values": { "$ifNull": [ "$values", [] ] }
}},
{ "$unwind": "$values" },
{ "$unwind": "$values.name" },
{ "$match": { "values.name.name.en-UK" : "1x1 Rib" }}
])
So as I said. Really redundant in that the initial $match actually asks if an "initial array element" actually exists. Which kind of means that there is an array there.
The second $project phase actually uses the $ifNull operator to "fill in" a value ( or basically an empty array ) where the tested element does not exist. We tested for that anyway before, but this demonstrates the different approaches.
But the basic idea id either "avoiding" or "filling-in" where your document does not have the expected data that you want to process. Which is the cause of your error.

Grouping records in nested documents

I have a document like this:
{
"_id" : ObjectId("533e6ab0ef2188940b00002c"),
"uin" : "1396599472869",
"vm" : {
"0" : {
"draw" : "01s",
"count" : "2",
"type" : "",
"data" : {
"title" : "K1"
},
"child" : [
"1407484608965"
]
},
"1407484608965" : {
"data" : {
"title" : "K2",
"draw" : "1407473540857",
"count" : "1",
"type" : "Block"
},
"child" : [
"1407484647012"
]
},
"1407484647012" : {
"data" : {
"title" : "K3",
"draw" : "03.8878.98",
"count" : "1",
"type" : "SB"
},
"child" : [
"1407484762473"
]
},
"1407484762473" : {
"data" : {
"type" : "SB",
"title" : "D1",
"draw" : "7984",
"count" : "1"
},
"child" : []
}
}
}
How to group all records with condition (type="Block")?
I've tried:
db.ITR.aggregate({$match:{"uin":"1396599472869"}},{$project:{"vm":1}},{$group:{_id:null,r1:{$push:"$vm"}}},{$unwind:"$r1"},{$group:{_id:null,r2:{$push:"$r1"}}},{$unwind:"$r2"})
But the result is still in the form of an object and not an array. With "MapReduce" I did not get.
Your problem here is basically with the way you currently have your document structured. The usage of "keys" under "vm" here that actually identify data points does not play well with the standard query forms and the aggregation framework in general.
It also is generally not a very good pattern, as in order to access any part under "vm" you need to specify the "exact path" to the data. So looking for type "Block" requires this:
db.collection.find({
"$or": [
{ "vm.0.type": "Block" },
{ "vm.1407484608965.type": "Block" }
{ ... }
]
})
And so on. You cannot "wildcard" field names like this so the exact path is required.
A better approach to modelling is to use an array instead, and move that inner key inside the documents:
{
"_id" : ObjectId("533e6ab0ef2188940b00002c"),
"uin" : "1396599472869",
"vm" : [
{
"key": 0,
"draw" : "01s",
"count" : "2",
"type" : "",
"data" : {
"title" : "K1"
},
"child" : [
"1407484608965"
]
},
{
"key": "1407484608965",
"title" : "K2",
"draw" : "1407473540857",
"count" : "1",
"type" : "Block",
"child" : [
"1407484647012"
]
},
{
"key": "1407484647012",
"title" : "K3",
"draw" : "03.8878.98",
"count" : "1",
"type" : "SB",
"child" : [
"1407484762473"
]
}
]
}
This allows you to query for documents that contain the matching property by a common path, which greatly simplifies things:
db.collection.find({ "vm.type": "Block" })
Or if you want to "filter" the array contents so that only those "sub-documents" that match are returned you can do this:
db.collection.aggregate([
{ "$match": { "vm.type": "Block" } },
{ "$unwind": "$vm" },
{ "$match": { "vm.type": "Block" } },
{ "$group": {
"_id": "$_id",
"uin": { "$first": "$uin" },
"vm": { "$push": "$vm" }
}}
])
Or even possibly this with MongoDB 2.6 or greater:
db.collection.aggregate([
{ "$match": { "vm.type": "Block" } },
{ "$project": {
"uin": 1,
"vm": {
"$setDifference": [
{ "$map": {
"input": "$vm",
"as": "el",
"in": {"$cond": [
{ "$eq": [ "$$el.type", "Block" ] },
"$$el",
false
]}
}},
[false]
]
}
}}
])
Or any other operation, which is simplified to traverse now the data is structured that way. But as your data presently stands your only option to "traverse keys" is to use JavaScript operations, which is much slower than being able to query in a proper way:
db.collection.find(function() {
return Object.keys(this.vm).some(function(x) {
return this.vm[x].type == "Block"
})
})
Or with similar object processing using mapReduce but essentially with no other way to access the fields with fixed paths that vary all the time.
Perhaps this was a design entered into to avoid having "nested arrays" which is where the "child" element would be placed. Of course this poses a problem with updates. But really if any element should not be an array it is probably the "inner" element such as "child", which could have some kind of structure that does not use an array.
So the key is to look at restructuring, as this will likely suit the patterns that you want without causing performance problems that JavaScript traversal will introduce.