Find empty object properties in MongoDb collection - mongodb

How can I find all documents in a MongoDb collection where a property of the document or its sub-documents contains an empty object value {}?
The name of the property is not known.
Example of which documents should be returned:
{
data: {
comment: {}
}
}
As said data and comment as property names are unknown.

The way to iterate object properties within aggregation pipeline is $objectToArray operator, which converts a document to the array of key-value pairs. Unfortunately, it does not flatten embedded documents. Until such support is implemented, I don't see a way to accomplish your task with the pure aggregation pipeline.
However you could always use $where operator and put the logic into JavaScript code. It should recursively iterate over all document properties and check whether the value is an empty document. Here is a working sample:
db.collection.find({"$where" : function () {
function hasEmptyProperties(doc) {
for (var property in doc) {
var value = doc[property];
if (value !== null && value.constructor === Object &&
(Object.keys(value).length === 0 || hasEmptyProperties(value))) {
return true;
}
}
return false;
}
return hasEmptyProperties(this);
}});
If you fill the collection with the following data:
db.collection.insert({ _id: 1, p: false });
db.collection.insert({ _id: 2, p: [] });
db.collection.insert({ _id: 3, p: null });
db.collection.insert({ _id: 4, p: new Date() });
db.collection.insert({ _id: 5, p: {} });
db.collection.insert({ _id: 6, nestedDocument: { p: "Some Value" } });
db.collection.insert({ _id: 7, nestedDocument: { p1: 1, p2: {} } });
db.collection.insert({ _id: 8, nestedDocument: { deepDocument: { p: 1 } } });
db.collection.insert({ _id: 9, nestedDocument: { deepDocument: { p: {} } } });
the query will correctly detect all documents with empty properties:
{ "_id" : 5, "p" : { } }
{ "_id" : 7, "nestedDocument" : { "p1" : 1, "p2" : { } } }
{ "_id" : 9, "nestedDocument" : { "deepDocument" : { "p" : { } } } }
Just for reference, here is an aggregation pipeline based on $objectToArray which detects empty properties, however not within nested documents:
db.collection.aggregate(
[
{ "$project": {
_id: 1,
"properties": { "$objectToArray": "$$ROOT" }
}},
{ "$project": {
_id: 1,
propertyIsEmpty: {
$map: {
input: "$properties.v",
as: "value",
in: { $eq: ["$$value", {} ] }
}
}
}},
{ "$project": {
_id: 1,
anyPropertyIsEmpty: { $anyElementTrue: [ "$propertyIsEmpty" ] }
}},
{$match : {"anyPropertyIsEmpty" : true}},
{ "$project": {
_id: 1,
}},
]);

Related

Concat Arrays on all fields MongoDB

Imagine I have this collection:
{
id: 1,
b: {
"field1": ['foo'],
"field2": ['bar']
}
}
{
id: 2,
b: {
"field2": ["foobar"],
"field3": ["foofoo"]
}
}
And I want to obtain a new collection with MongoDB:
{
id: 1,
b_grouped: ['foo', 'bar']
}
{
id: 2,
b_grouped: ["foobar", "foofoo"]
}
I don't know all the name of the fields in the documents, anyone would have an idea of how to perform something like this:
db.collection.aggregate(
[
{ "$project": { "b_grouped": { $concatArrays: ["$b.*"] } } }
]
)
You can try,
$reduce input b as a array after converting from object to array using $objectToArray, this will convert object in "k" (key), "v" (value) format array of object,
$concatArrays to concat initialValue ($$value) of $raduce and array of b object's field $$this.v
db.collection.aggregate([
{
$project: {
b_grouped: {
$reduce: {
input: { $objectToArray: "$b" },
initialValue: [],
in: {
$concatArrays: ["$$this.v", "$$value"]
}
}
}
}
}
])
Playground

How to get a sorted result of $lookup [duplicate]

I've been looking for a while now and can't seem to sort an inner array and keep that in the doc that I'm currently working with.
{
"service": {
"apps": {
"updates": [
{
"n" : 1
"date": ISODate("2012-03-10T16:15:00Z")
},
{
"n" : 2
"date": ISODate("2012-01-10T16:15:00Z")
},
{
"n" : 5
"date": ISODate("2012-07-10T16:15:00Z")
}
]
}
}
}
So I want to keep the item to be returned as the service, but have my updates array sorted. So far with the shell I have:
db.servers.aggregate(
{$unwind:'$service'},
{$project:{'service.apps':1}},
{$unwind:'$service.apps'},
{$project: {'service.apps.updates':1}},
{$sort:{'service.apps.updates.date':1}});
Anyone think they can help on this?
You can do this by $unwinding the updates array, sorting the resulting docs by date, and then $grouping them back together on _id using the sorted order.
db.servers.aggregate(
{$unwind: '$service.apps.updates'},
{$sort: {'service.apps.updates.date': 1}},
{$group: {_id: '$_id', 'updates': {$push: '$service.apps.updates'}}},
{$project: {'service.apps.updates': '$updates'}})
Starting in Mongo 4.4, the $function aggregation operator allows applying a custom javascript function to implement behaviour not supported by the MongoDB Query Language.
For instance, in order to sort an array of objects by one of their fields:
// {
// "service" : { "apps" : { "updates" : [
// { "n" : 1, "date" : ISODate("2012-03-10T16:15:00Z") },
// { "n" : 2, "date" : ISODate("2012-01-10T16:15:00Z") },
// { "n" : 5, "date" : ISODate("2012-07-10T16:15:00Z") }
// ]}}
// }
db.collection.aggregate(
{ $set: {
{ "service.apps.updates":
{ $function: {
body: function(updates) {
updates.sort((a, b) => a.date - b.date);
return updates;
},
args: ["$service.apps.updates"],
lang: "js"
}}
}
}
)
// {
// "service" : { "apps" : { "updates" : [
// { "n" : 2, "date" : ISODate("2012-01-10T16:15:00Z") },
// { "n" : 1, "date" : ISODate("2012-03-10T16:15:00Z") },
// { "n" : 5, "date" : ISODate("2012-07-10T16:15:00Z") }
// ]}}
// }
This modifies the array in place, without having to apply a combination of expensive $unwind, $sort and $group stages.
$function takes 3 parameters:
body, which is the function to apply, whose parameter is the array to modify.
args, which contains the fields from the record that the body function takes as parameter. In our case "$service.apps.updates".
lang, which is the language in which the body function is written. Only js is currently available.
Starting in Mongo 5.2, it's the exact use case for the new $sortArray aggregation operator:
// {
// service: { apps: { updates: [
// { n: 1, date: ISODate("2012-03-10") },
// { n: 2, date: ISODate("2012-01-10") },
// { n: 5, date: ISODate("2012-07-10") }
// ]}}
// }
db.collection.aggregate([
{ $set: {
"service.apps.updates": {
$sortArray: {
input: "$service.apps.updates",
sortBy: { date: 1 }
}
}
}}
])
// {
// service: { apps: { updates: [
// { n: 2, date: ISODate("2012-01-10") },
// { n: 1, date: ISODate("2012-03-10") },
// { n: 5, date: ISODate("2012-07-10") }
// ]}}
// }
This:
sorts ($sortArray) the service.apps.updates array (input: "$service.apps.updates")
by applying a sort on dates (sortBy: { date: 1 })
without having to apply a combination of expensive $unwind, $sort and $group stages

Mongo DB - Second Level Search - elemMatch

I am trying to fetch all records (and count of all records) for a structure like the following,
{
id: 1,
level1: {
level2:
[
{
field1:value1;
},
{
field1:value1;
},
]
}
},
{
id: 2,
level1: {
level2:
[
{
field1:null;
},
{
field1:value1;
},
]
}
}
My requirement is to fetch the number of records that have field1 populated (atleast one in level2). I need to say fetch all the ids or the number of such ids.
The query I am using is,
db.table.find({},
{
_id = id,
value: {
$elemMatch: {'level1.level2.field1':{$exists: true}}
}
}
})
Please suggest.
EDIT1:
This is the question I was trying to ask in the comment. I was unable to elucidate in the comment properly. Hence, editing the question.
{
id: 1,
level1: {
level2:
[
{
field1:value1;
},
{
field1:value1;
},
]
}
},
{
id: 2,
level1: {
level2:
[
{
field1:value2;
},
{
field1:value2;
},
{
field1:value2;
}
]
}
}
{
id: 3,
level1: {
level2:
[
{
field1:value1;
},
{
field1:value1;
},
]
}
}
The query we used results in
value1: 4
value2: 3
I want something like
value1: 2 // Once each for documents 1 & 3
value2: 1 // Once for document 2
You can do that with the following find query:
db.table.find({ "level1.level2" : { $elemMatch: { field1 : {$exists: true} } } }, {})
This will return all documents that have a field1 in the "level1.level2" structure.
For your question in the comment, you can use the following aggregation to "I had to return a grouping (and the corresponding count) for the values in field1":
db.table.aggregate(
[
{
$unwind: "$level1.level2"
},
{
$match: { "level1.level2.field1" : { $exists: true } }
},
{
$group: {
_id : "$level1.level2.field1",
count : {$sum : 1}
}
}
]
UPDATE: For your question "'value1 - 2` At level2, for a document, assume all values will be the same for field1.".
I hope i understand your question correctly, instead of grouping only on the value of field1, i added the document _id as an xtra grouping:
db.table.aggregate(
[
{
$unwind: "$level1.level2"
},
{
$match: {
"level1.level2.field1" : { $exists: true }
}
},
{
$group: {
_id : { id : "$_id", field1: "$level1.level2.field1" },
count : {$sum : 1}
}
}
]
);
UPDATE2:
I altered the aggregation and added a extra grouping, the aggregation below gives you the results you want.
db.table.aggregate(
[
{
$unwind: "$level1.level2"
},
{
$match: {
"level1.level2.field1" : { $exists: true }
}
},
{
$group: {
_id : { id : "$_id", field1: "$level1.level2.field1" }
}
},
{
$group: {
_id : { id : "$_id.field1"},
count : { $sum : 1}
}
}
]
);

Merge duplicates and remove the oldest

I have a collection where there are some duplicate documents. In example:
First document:
{
"_id" : ObjectId("56f3d7cc1de31cb20c08ae6b"),
"AddedDate" : ISODate("2016-05-01T00:00:00.000Z"),
"Place": "THISPLACE",
"PresentInDB" : [
{
"InDB" : ISODate("2016-05-01T00:00:00.000Z")
}
],
"Checked" : [],
"Link": "http://www.mylink.com/first/84358"
}
Second document:
{
"_id" : ObjectId("577740526c1e542904725238"),
"AddedDate" : ISODate("2016-05-02T00:00:00.000Z"),
"Place": "THISPLACE",
"PresentInDB" : [
{
"InDB" : ISODate("2016-05-02T00:00:00.000Z")
},
{
"InDB" : ISODate("2016-05-03T00:00:00.000Z")
}
],
"Checked" : [
{
"Done" : ISODate("2016-05-02T00:00:00.000Z")
},
],
"Link": "http://www.mylink.com/second/84358"
}
Link field contains same sequense of numbers in both documents, 84358.
So I would like to achieve those steps:
Loop over each document in the collection.
Match the number sequence in each document in the Link field (i.e. 84358 above) and if there are several documents in
collection that have that sequence in the Link field. And also if Place field match in both documents:
Merge PresentInDB and Checked fields - > merge PresentInDB and Checked fields by adding array values from the newest document (by date in AddedDate
field) to the oldest document.
Remove the newest document.
How could I achieve such a query?
In MongoDB 3.3.6 release is introduced a $split operator for dealing with strings in aggregation framework (Jira). Before this release you only could solve this with a map/reduce solution.
After MongoDB 3.3.6 release: Aggregation framework solution
db.duplicatedCollection.aggregate(
[
{
$project: {
_id : 1,
AddedDate : 1,
Place : 1,
PresentInDB : 1,
Checked : 1,
Link : 1,
sequenceNumber: { $arrayElemAt: [ {$split: ["$Link", "/"]}, -1 ]},
}
},
{
$sort: { AddedDate: 1 }
},
{
$group: {
_id : {
sequenceNumber : "$sequenceNumber",
Place : "$Place"
},
id : { $first: "$_id"},
AddedDate: { $first: "$AddedDate" },
Place : { $first: "$Place" },
PresentInDB: {
$push: '$PresentInDB'
},
Checked: {
$push: '$Checked'
},
Link: { $first: "$Link"}
}
},
{
$unwind: "$PresentInDB"
},
{
$unwind: {
path : "$PresentInDB",
preserveNullAndEmptyArrays: true
}
},
{
$unwind: "$Checked"
},
{
$unwind: {
path : "$Checked",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id : "$id",
AddedDate: { $first: "$AddedDate" },
Place : { $first: "$Place" },
PresentInDB : {
$addToSet: '$PresentInDB'
},
Checked : {
$addToSet: '$Checked'
},
Link: { $first: "$Link"}
}
},
{
$out: "duplicatedCollection"
}
]
);
Before MongoDB 3.3.6 release: Map/Reduce solution
Map Function:
var mapFunction = function() {
var linkArray = this.Link.split("/");
var sequenceNumber = linkArray[linkArray.length - 1];
var keyDoc = {
place : this.Place,
sequenceNumber: sequenceNumber,
};
emit(keyDoc, this);
};
Reduce Function:
var reduceFunction = function(key, values) {
var reducedDoc = {};
reducedDoc._id = values[0]._id;
reducedDoc.AddedDate = values[0].AddedDate;
reducedDoc.Link = values[0].Link;
reducedDoc.PresentInDB = [];
reducedDoc.Checked = [];
var presentInDbMillisArray = [];
var checkedMillisArray = [];
values.forEach(function(doc) {
if (reducedDoc.AddedDate < doc.AddedDate) {
reducedDoc._id = doc._id;
reducedDoc.AddedDate = doc.AddedDate;
reducedDoc.Link = doc.Link;
}
// PresentInDB field merge
doc.PresentInDB.forEach(function(presentInDBElem) {
var millis = presentInDBElem.InDB.getTime();
if (!Array.contains(presentInDbMillisArray, millis)) {
reducedDoc.PresentInDB.push(presentInDBElem);
presentInDbMillisArray.push(millis);
}
});
// same here with Checked field
doc.Checked.forEach(function(checkedElem) {
var millis = checkedElem.Done.getTime();
if (!Array.contains(checkedMillisArray, millis)) {
reducedDoc.Checked.push(checkedElem);
checkedMillisArray.push(millis);
}
});
});
return reducedDoc;
};
Map/Reduce:
db.duplicatedCollection.mapReduce(
mapFunction,
reduceFunction,
{
"out": "duplicatedCollection"
}
);
Unwrap the value from the map/reduce returned documents:
db.duplicatedCollection.find(
{
value : {
$exists: true
}
}
).forEach(function(doc) {
db.duplicatedCollection.insert(doc.value);
db.duplicatedCollection.remove({_id : doc._id});
});
You can use a single aggregation query to do that :
db.device.aggregate([{
"$unwind": "$PresentInDB"
}, {
"$match": {
"Link": /84358/
}
}, {
"$sort": {
"AddedDate": 1
}
}, {
"$group": {
_id: 0,
PresentInDB: {
$addToSet: '$PresentInDB'
},
AddedDate: {
$first: "$AddedDate"
},
id: {
$first: "$_id"
},
Link: {
$first: "$Link"
}
}
}, {
$out: "documents"
}])
$unwind your array to work on it
$match your id (here containing 84358)
$sort by ascending date
$group with :
a $addToSet to merge all your PresentInDB into one single array without duplicates
a $first for each field to keep. Keeping the first means you only want the older one since we previously sorted by ascending date
$out will save the results to a new collection called documents here

Mongodb sort inner array

I've been looking for a while now and can't seem to sort an inner array and keep that in the doc that I'm currently working with.
{
"service": {
"apps": {
"updates": [
{
"n" : 1
"date": ISODate("2012-03-10T16:15:00Z")
},
{
"n" : 2
"date": ISODate("2012-01-10T16:15:00Z")
},
{
"n" : 5
"date": ISODate("2012-07-10T16:15:00Z")
}
]
}
}
}
So I want to keep the item to be returned as the service, but have my updates array sorted. So far with the shell I have:
db.servers.aggregate(
{$unwind:'$service'},
{$project:{'service.apps':1}},
{$unwind:'$service.apps'},
{$project: {'service.apps.updates':1}},
{$sort:{'service.apps.updates.date':1}});
Anyone think they can help on this?
You can do this by $unwinding the updates array, sorting the resulting docs by date, and then $grouping them back together on _id using the sorted order.
db.servers.aggregate(
{$unwind: '$service.apps.updates'},
{$sort: {'service.apps.updates.date': 1}},
{$group: {_id: '$_id', 'updates': {$push: '$service.apps.updates'}}},
{$project: {'service.apps.updates': '$updates'}})
Starting in Mongo 4.4, the $function aggregation operator allows applying a custom javascript function to implement behaviour not supported by the MongoDB Query Language.
For instance, in order to sort an array of objects by one of their fields:
// {
// "service" : { "apps" : { "updates" : [
// { "n" : 1, "date" : ISODate("2012-03-10T16:15:00Z") },
// { "n" : 2, "date" : ISODate("2012-01-10T16:15:00Z") },
// { "n" : 5, "date" : ISODate("2012-07-10T16:15:00Z") }
// ]}}
// }
db.collection.aggregate(
{ $set: {
{ "service.apps.updates":
{ $function: {
body: function(updates) {
updates.sort((a, b) => a.date - b.date);
return updates;
},
args: ["$service.apps.updates"],
lang: "js"
}}
}
}
)
// {
// "service" : { "apps" : { "updates" : [
// { "n" : 2, "date" : ISODate("2012-01-10T16:15:00Z") },
// { "n" : 1, "date" : ISODate("2012-03-10T16:15:00Z") },
// { "n" : 5, "date" : ISODate("2012-07-10T16:15:00Z") }
// ]}}
// }
This modifies the array in place, without having to apply a combination of expensive $unwind, $sort and $group stages.
$function takes 3 parameters:
body, which is the function to apply, whose parameter is the array to modify.
args, which contains the fields from the record that the body function takes as parameter. In our case "$service.apps.updates".
lang, which is the language in which the body function is written. Only js is currently available.
Starting in Mongo 5.2, it's the exact use case for the new $sortArray aggregation operator:
// {
// service: { apps: { updates: [
// { n: 1, date: ISODate("2012-03-10") },
// { n: 2, date: ISODate("2012-01-10") },
// { n: 5, date: ISODate("2012-07-10") }
// ]}}
// }
db.collection.aggregate([
{ $set: {
"service.apps.updates": {
$sortArray: {
input: "$service.apps.updates",
sortBy: { date: 1 }
}
}
}}
])
// {
// service: { apps: { updates: [
// { n: 2, date: ISODate("2012-01-10") },
// { n: 1, date: ISODate("2012-03-10") },
// { n: 5, date: ISODate("2012-07-10") }
// ]}}
// }
This:
sorts ($sortArray) the service.apps.updates array (input: "$service.apps.updates")
by applying a sort on dates (sortBy: { date: 1 })
without having to apply a combination of expensive $unwind, $sort and $group stages