MongoDB distinct values on subdocuments - mongodb

I have a little weird database structure it is as follows:
I have a document with normal properties, then I have a metadata property which is an array of objects.
metadata: {[
{
key: [key],
value: [value]
},
...
]}
Edit: There will never be a metadata sub-document which has a duplicate key
It was done this way to retain the order of the metadata objects
Now I want to get distinct values of a metadata object with a given key.
I want to find every distinct [value] where [key] = "x" using MongoDB. And have the distinct values returned in an array (not the document)
I guess this is not possible using the distinct command, but is this possible using an aggregation pipeline or do I have to use Map-Reduce?
Any suggestions?
Thanks in advance! :)

I presume you mean this:
{
"metadata": [
{ "key": "abc", "value": "borf" },
{ "key": "cdc", "value": "biff" }
]
},
{
"metadata": [
{ "key": "bbc", "value": "barf" },
{ "key": "abc", "value": "borf" },
{ "key": "abc", "value": "barf" }
]
}
Where if you filter for "abc" and get the distinct "value" entries like this:
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$unwind": "$metadata" },
{ "$match": { "metadata.key": "abc" } },
{ "$group": {
"_id": "$metadata.value"
}}
])
Or even better:
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$redact": {
"$cond": {
"if": { "$eq": [ { "$ifNull": [ "$key", "abc" ] }, "abc" ] },
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$metadata" },
{ "$group": {
"_id": "$metadata.value",
"count": { "$sum": 1 }
}}
])
Which would basically give:
{ "_id": "barf", "count": 1 },
{ "_id": "borf", "count": 2 }
But it is not possible for this to just be an array of "barf" and "borf". The distinct() method does an array of keys only, but it is also very limited. Therefore it can only do this:
db.collection.distinct("metadata.value",{ "metadata.key": "abc" })
[ "biff", "borf", "barf" ]
Which is incorrect as a result. So just take the "document" results from above and apply some "post processing":
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$redact": {
"$cond": {
"if": { "$eq": [ { "$ifNull": [ "$key", "abc" ] }, "abc" ] },
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$metadata" },
{ "$group": {
"_id": "$metadata.value"
}}
]).map(function(doc) {
return doc._id;
})
And that result is a plain array of just the distinct values:
[ "borf", "barf" ]

Related

How to count embedded array object elements in mongoDB

{
"orderNo": "123",
"bags": [{
"type": "small",
"products": [{
"id": "1",
"name": "ABC",
"returnable": true
}, {
"id": "2",
"name": "XYZ"
}
]
},{
"type": "big",
"products": [{
"id": "3",
"name": "PQR",
"returnable": true
}, {
"id": "4",
"name": "UVW"
}
]
}
]
}
I have orders collection where documents are in this format. I want to get a total count of products which has the returnable flag. e.g: for the above order the count should be 2. I am very new to MongoDB wanted to know how to write a query to find this out, I have tried few things but did not help:
this is what I tried but not worked:
db.orders.aggregate([
{ "$unwind": "$bags" },
{ "$unwind": "$bags.products" },
{ "$unwind": "$bags.products.returnable" },
{ "$group": {
"_id": "$bags.products.returnable",
"count": { "$sum": 1 }
}}
])
For inner array you can use $filter to check returnable flag and $size to get number of such items. For the outer one you can take advantage of $reduce to sum the values from inner arrays:
db.collection.aggregate([
{
$project: {
totalReturnable: {
$reduce: {
input: "$bags",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: {
$filter: {
input: "$$this.products",
as: "prod",
cond: {
$eq: [ "$$prod.returnable", true ]
}
}
}
]
}
}
}
}
}
}
])
Mongo Playground

MongoDB: Assign document objects to field in '$project' stage

I have a user collection:
[
{"_id": 1,"name": "John", "age": 25, "valid_user": true}
{"_id": 2, "name": "Bob", "age": 40, "valid_user": false}
{"_id": 3, "name": "Jacob","age": 27,"valid_user": null}
{"_id": 4, "name": "Amelia","age": 29,"valid_user": true}
]
I run a '$facet' stage on this collection. Checkout this MongoPlayground.
I want to talk about the first output from the facet stage. The following is the response currently:
{
"user_by_valid_status": [
{
"_id": false,
"count": 1
},
{
"_id": true,
"count": 2
},
{
"_id": null,
"count": 1
}
]
}
However, I want to restructure the output in this way:
"analytics": {
"invalid_user": {
"_id": false
"count": 1
},
"valid_user": {
"_id": true
"count": 2
},
"user_with_unknown_status": {
"_id": null
"count": 1
}
}
The problem with using a '$project' stage along with 'arrayElemAt' is that the order may not be definite for me to associate an index with an attribute like 'valid_users' or others. Also, it gets further complicated because unlike the sample documents that I have shared, my collection may not always contain all the three categories of users.
Is there some way I can do this?
You can use $switch conditional operator,
$project to show value part in v with _id and count field as object, k to put $switch condition
db.collection.aggregate([
{
"$facet": {
"user_by_valid_status": [
{
"$group": {
"_id": "$valid_user",
"count": { "$sum": 1 }
}
},
{
$project: {
_id: 0,
v: { _id: "$_id", count: "$count" },
k: {
$switch: {
branches: [
{ case: { $eq: ["$_id", null] }, then: "user_with_unknown_status" },
{ case: { $eq: ["$_id", false] }, then: "invalid_user" },
{ case: { $eq: ["$_id", true] }, then: "valid_user" }
]
}
}
}
}
],
"users_above_30": [{ "$match": { "age": { "$gt": 30 } } }]
}
},
$project stage in root, convert user_by_valid_status array to object using $arrayToObject
{
$project: {
analytics: { $arrayToObject: "$user_by_valid_status" },
users_above_30: 1
}
}
])
Playground

MongoDB unwind multiple empty arrays

The sample data in the database looks something like this:
{
'data':
[
'Log':
{
'IP':['8.8.8.8','8.8.4.4'],
'URL':['www.google.com']
'Hash' ['d2a12319bf1221ce7681928cc']
},
'Log':
{
'IP':['1.2.3.4'],
'URL':['www.cnn.com']
'Hash' []
},
]
}
I am trying to aggregate a list of unique IP, URL and Hash from the above list of logs. My current query looks sth like this:
db.loglist.aggregate([{'$match':{'data.Log':{'$exists':true}}},
{'$unwind':'$data'},
{'$unwind':'$data.Log.URL'},
{'$unwind':'$data.Log.Hash'},
{'$unwind':'$data.Log.IP'},
{'$group':{'_id':'$ioc',
'FHList':{'$addToSet':'$data.Log.Hash'},
'URLList':{'$addToSet':'$data.Log.URL'},
'IPList':{'$addToSet':'$data.Log.IP'}}
}])
It works well if for every log, there is at least one element in each of the three arrays. However, when there is an empty array appears in any one of the logs. Mongo returns empty for the whole query. I figured out it's the default behavior of $unwind from a few similar posts. But what is the standard way to use $unwind then, if say we have no results for "Hash", we can still keep the results for "IP" and "URL".
Thanks in advance for any answer.
The $cond operator is the main helper here, with a test to see if the array is empty, and replace it with another value to filter later:
db.loglist.aggregate([
{"$match":{"data.Log":{"$exists":true}}},
{"$unwind":"$data"},
{ "$project": {
"ioc": 1,
"data": {
"Log": {
"IP": { "$cond": [
{ "$ne": [ "$IP", [] ] },
"$IP",
[false]
]},
"URL": { "$cond": [
{ "$ne": [ "$URL", [] ] },
"$URL",
[false]
]},
"Hash": { "$cond": [
{ "$ne": [ "$Hash", [] ] },
"$Hash",
[false]
]}
}
}
}}
{"$unwind":"$data.Log.URL"},
{"$unwind":"$data.Log.Hash"},
{"$unwind":"$data.Log.IP"},
{"$group":{
"_id":"$ioc",
"FHList":{"$addToSet":"$data.Log.Hash"},
"URLList":{"$addToSet":"$data.Log.URL"},
"IPList":{"$addToSet":"$data.Log.IP"}
}},
{ "$project": {
"FHList":{ "$setDifference": ["$FHList", [false]] },
"URLList":{ "$setDifference": ["$URList", [false]] },
"IPList":{ "$setDifference": ["$IPList", [false]] }
}}
])
Once the set it contructed the unwanted value is filtered away.
If your MongoDB version is less than 2.6 and you do not have $setDifference then your can filter after unwinding again, presuming that no result array would be expected to be empty here:
db.loglist.aggregate([
{"$match":{"data.Log":{"$exists":true}}},
{"$unwind":"$data"},
{ "$project": {
"ioc": 1,
"data": {
"Log": {
"IP": { "$cond": [
{ "$ne": [ "$IP", [] ] },
"$IP",
[false]
]},
"URL": { "$cond": [
{ "$ne": [ "$URL", [] ] },
"$URL",
[false]
]},
"Hash": { "$cond": [
{ "$ne": [ "$Hash", [] ] },
"$Hash",
[false]
]}
}
}
}}
{"$unwind":"$data.Log.URL"},
{"$unwind":"$data.Log.Hash"},
{"$unwind":"$data.Log.IP"},
{"$group":{
"_id":"$ioc",
"FHList":{"$addToSet":"$data.Log.Hash"},
"URLList":{"$addToSet":"$data.Log.URL"},
"IPList":{"$addToSet":"$data.Log.IP"}
}},
{ "$unwind": "$FHList" },
{ "$match": { "FHList": { "$ne": false } }},
{ "$unwind": "$URLList" },
{ "$match": { "URLList": { "$ne": false } }},
{ "$unwind": "$IPList" },
{ "$match": { "IPList": { "$ne": false } }},
{ "$group": {
"_id": "$_id",
"FHList":{ "$addToSet":"$FHList" },
"URLList":{ "$addToSet":"$URLList" },
"IPList":{ "$addToSet":"$IPList" }
}}
])
If your grouped arrays were empty then it is tricky in the second form but still possible.

Selecting all objects from complex model

I have aggregation pipeline stage:
$project: {
'school': {
'id': '$_id',
'name': '$name',
'manager': '$manager'
},
'students': '$groups.students',
'teachers': '$groups.teachers'
}
Need something like this:
{
'users': // manager + students + teachers
}
Tried:
{
'users': {
$push: {
$each: ['$school.manager', '$students', '$teachers']
}
}
}
I'm presuming that "students" and "teachers" are both arrays here and located under a common sub-document heading like so:
{
"_id": 123,
"name": "This school",
"manager": "Bill"
"groups": {
"teachers": ["Ted"],
"students": ["Missy"]
}
}
So in order to get all of those in a singular array such as "users" then it depends on your MongoDB version and the "uniqueness" of your data. For true "sets" and where you have MongoDB 2.6 or greater available, there is the $setUnion operator, albeit with an additional level of $group to make "manager" and array:
db.collection.aggregate([
{ "$group": {
"_id": { "_id": "$_id", "name": "$name" },
"manager": { "$push": "$manager" },
"groups": { "$first": "$groups" }
}},
{ "$project": {
"users": {
"$setUnion": [ "$manager", "$groups.teachers", "$groups.students" ]
}
}}
])
Or otherwise where that operator is not available or there is a "unique" problem then there is this way to handle "combining":
db.collection.aggregate([
{ "$group": {
"_id": { "_id": "_id", "name": "$name" },
"manager": { "$push": "$manager" },
"teachers": { "$first": "$groups.teachers" },
"students": { "$first": "$groups.students" },
"type": { "$first": { "$const": ["M","T","S"] } }
}},
{ "$unwind": "$type" },
{ "$project": {
"users": {
"$cond": [
{ "$eq": [ "$type", "M" ] },
"$manager",
{ "$cond": [
{ "$eq": [ "$type", "T" ] },
"$teachers",
"$students"
]}
]
}
}},
{ "$unwind": "$users" },
{ "$group": {
"_id": "$_id",
"users": { "$push": "$users" }
}}
])
This essentially "tags" each field by a "type" for which the document is copied in the pipeline. Then placed into a single "users" field depending on which "type" matched. The single array then from the resulting three documents from each original can then be safely "unwound" and combined in a final $group operation.
So "sets" are your fastest option where available or where not available or not unique you can use the later technique in order to combine these to a single list.

MongoDB return two object for every group

I want to get two objects $first and $last after grouping. Is it possible?
Something like this, but this is not working:
{ "$group": {
"_id": "type",
"values": [{
"time": { "$first": "$time" },
"value": { "$first": "$value" }
},
{
"time": { "$last": "$time" },
"value": { "$last": "$value" }
}]
}
}
In order to get the $first and $last values from an array with the aggregation framework, you need to use $unwind first to "de-normalize" the array as individual documents. There is also another trick to put those back in an array.
Assuming a document like this
{
"type": "abc",
"values": [
{ "time": ISODate("2014-06-12T22:35:42.260Z"), "value": "ZZZ" },
{ "time": ISODate("2014-06-12T22:36:45.921Z"), "value": "KKK" },
{ "time": ISODate("2014-06-12T22:37:18.237Z"), "value": "AAA" }
]
}
And assuming that your array is already sorted your would do:
If you do not care about the results being in an array just $unwind and $group:
db.junk.aggregate([
{ "$unwind": "$values" },
{ "$group": {
"_id": "$type",
"ftime": { "$first": "$values.time" },
"fvalue": { "$first": "$values.value" },
"ltime": { "$last": "$values.time" },
"lvalue": { "$last": "$values.value" },
}}
])
For those results in array then there is a trick to it:
db.collection.aggregate([
{ "$unwind": "$values" },
{ "$project": {
"type": 1,
"values": 1,
"indicator": { "$literal": ["first", "last"] }
}},
{ "$group": {
"_id": "$type",
"ftime": { "$first": "$values.time" },
"fvalue": { "$first": "$values.value" },
"ltime": { "$last": "$values.time" },
"lvalue": { "$last": "$values.value" },
"indicator": { "$first": "$indicator" }
}},
{ "$unwind": "$indicator" },
{ "$project": {
"values": {
"time": {
"$cond": [
{ "$eq": [ "$indicator", "first" ] },
"$ftime",
"$ltime"
]
},
"value": {
"$cond": [
{ "$eq": [ "$indicator", "first" ] },
"$fvalue",
"$lvalue"
]
}
}
}},
{ "$group": {
"_id": "$_id",
"values": { "$push": "$values" }
}}
])
If your array is not sorted place an additional $sort stage before the very first $group to make sure your items are in the order you want them to be evaluated by $first and $last. A logical order where is by the "time" field, so:
{ "$sort": { "type": 1, "values.time": 1 } }
The $literal declares an array to identify the values of "first" and "last" which are later "unwound" to create two copies of each grouped document. These are then evaluated using the $cond operator to re-assign to a single field for "values" which is finally push back into an array using $push.
Remember to allways try to $match first in the pipeline in order to reduce the number of documents you are working on to what you reasonable want. You pretty much never want to do this over whole collections, especially when you are using $unwind on arrays.
Just as a final note $literal is introduced/exposed in MongoDB 2.6 and greater versions. For prior versions you can interchange that with the undocumented $const.