Mongodb aggregation $size inside nested array - mongodb

I have a problem with a query with aggregation framework.
Given a collection with documents like:
db.testSize.insert([{
"internalId" :1,
"first" : {
"second" : [
{
"value" : 1
}
]
}
}])
this aggregation :
db.testSize.aggregate([
{ $addFields: { tmpSize: { $strLenCP: { $ifNull: [ { $toString: "$first.second.value" }, "" ] } } } },
])
return this error:
{
"message" : "Unsupported conversion from array to string in $convert with no onError value",
"ok" : 0,
"code" : 241,
"codeName" : "ConversionFailure",
"name" : "MongoError"
}
Now, the solution on this problem is to use unwind in the following way:
db.testSize.aggregate([
{ $unwind: "$first.second"},
{ $addFields: { tmpSize: { $strLenCP: { $ifNull: [ { $toString: "$first.second.value" }, "" ] } } } },
])
But my requirement is to create a general approach for documents with various shape and possible nested array inside array.
Due this bug https://jira.mongodb.org/browse/SERVER-6436 seems to be impossible to unwind array inside array, so how to solve this problem ?
There is an approach ?
Some context:
I cannot change document structure before aggregation
I don't know where array will be in "field hierarchy", if first for example is an array, or is second
Thanks in advance

You can use $reduce.
====== Aggregate ======
db.testSize.aggregate([
{
"$addFields": {
"first.second.tmpSize": {
"$reduce": {
"input": "$first.second",
"initialValue": "",
"in": {
$strLenCP: {
$ifNull: [
{
$toString: "$$this.value"
},
""
]
}
}
}
}
}
}
])
====== Result ======
[
{
"_id": ObjectId("5d925bd3fabc692265f950d5"),
"first": {
"second": [
{
"tmpSize": 1,
"value": 1
}
]
},
"internalId": 1
}
]
Mongo Playground

Related

How do I fetch only the first element from the array?

How do I fetch only the first element from the "topicsName" array?
Data I have input:
{
"_id" : ObjectId("606b7046a0ccf72222c00c2f"),
"groupId" : ObjectId("5f06cca74e51ba15f5167b86"),
"insertedAt" : "2021-04-05T20:17:10.144521Z",
"isActive" : true,
"staffId" : [
"606b6c34a0ccf72222c5a4df",
"606b6c48a0ccf722228aa035"
],
"subjectName" : "Maths",
"teamId" : ObjectId("6069a6a9a0ccf704e7f4b537"),
"updatedAt" : "2022-04-29T07:57:31.072067Z",
"syllabus" : [
{
"chapterId" : "626b9b94ae6cd2092024f3ee",
"chapterName" : "chap1",
"topicsName" : [
{
"topicId" : "626b9b94ae6cd2092024f3ef",
"topicName" : "1.1"
},
{
"topicId" : "626b9b94ae6cd2092024f3f0",
"topicName" : "1.2"
}
]
},
{
"chapterId" : "626b9b94ae6cd2092024f3f1",
"chapterName" : "chap2",
"topicsName" : [
{
"topicId" : "626b9b94ae6cd2092024f3f2",
"topicName" : "2.1"
},
{
"topicId" : "626b9b94ae6cd2092024f3f3",
"topicName" : "2.2"
}
]
}
]
}
The Query I used to try to fetch the element:- "topicId" : "626b9b94ae6cd2092024f3ef" from the
"topicsName" array.
db.subject_staff_database
.find(
{ _id: ObjectId("606b7046a0ccf72222c00c2f") },
{
syllabus: {
$elemMatch: {
chapterId: "626b9b94ae6cd2092024f3f1",
topicsName: { $elemMatch: { topicId: "626b9b94ae6cd2092024f3f2" } },
},
},
}
)
.pretty();
I was trying to fetch only the first element from the "topicsName" array, but it fetched both the elements in that array.
You can do the followings in an aggregation pipeline.
$match with your given id locate documents
$reduce to flatten the syllabus and topicsName arrays
$filter to get the expected element
db.collection.aggregate([
{
$match: {
"syllabus.topicsName.topicId": "626b9b94ae6cd2092024f3ef"
}
},
{
"$project": {
result: {
"$reduce": {
"input": "$syllabus.topicsName",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
result: {
"$filter": {
"input": "$result",
"as": "r",
"cond": {
$eq: [
"$$r.topicId",
"626b9b94ae6cd2092024f3ef"
]
}
}
}
}
}
])
Here is the Mongo playground for your reference.
Welcome Ganesh Sowdepalli,
You are not only asking to "fetch only the first element from the array", but to fetch only the matching element of a nested array property of an object item in array.
Edit: (according to #ray's comment)
One way to do it is using an aggregation pipeline:
db.subject_staff_database.aggregate([
{
$match: {"_id": ObjectId("606b7046a0ccf72222c00c2f")}
},
{
$project: {
syllabus: {
$filter: {
input: "$syllabus",
as: "item",
cond: {$eq: ["$$item.chapterId", "626b9b94ae6cd2092024f3f1"
]
}
}
}
}
},
{
$unwind: "$syllabus"
},
{
$project: {
"syllabus.topicsName": {
$filter: {
input: "$syllabus.topicsName",
as: "item",
cond: {$eq: ["$$item.topicId", "626b9b94ae6cd2092024f3f2"]}
}
},
"syllabus.chapterId": 1,
"syllabus.chapterName": 1,
_id: 0
}
}
])
As you can see on this playground example.
If you want the actual first element, not by _id, look here on my first understanding to your question.
The aggregation pipeline allows us to do several operation on the results.
Since syllabus is an array that may contain more than one matching chapterId, we need to $filter it for the items we want.

MongoDB $cond with embedded document array

I am trying to generate a new collection with a field 'desc' having into account a condition in field in a documment array. To do so, I am using $cond statement
The origin collection example is the next one:
{
"_id" : ObjectId("5e8ef9a23e4f255bb41b9b40"),
"Brand" : {
"models" : [
{
"name" : "AA"
},
{
"name" : "BB"
}
]
}
}
{
"_id" : ObjectId("5e8ef9a83e4f255bb41b9b41"),
"Brand" : {
"models" : [
{
"name" : "AG"
},
{
"name" : "AA"
}
]
}
}
The query is the next:
db.runCommand({
aggregate: 'cars',
'pipeline': [
{
'$project': {
'desc': {
'$cond': {
if: {
$in: ['$Brand.models.name',['BB','TC','TS']]
},
then: 'Good',
else: 'Bad'
}
}
}
},
{
'$project': {
'desc': 1
}
},
{
$out: 'cars_stg'
}
],
'allowDiskUse': true,
})
The problem is that the $cond statement is always returning the "else" value. I also have tried $or statement with $eq or the $and with $ne, but is always returning "else".
What am I doing wrong, or how should I fix this?
Thanks
Since $Brand.models.name returns an array, we cannot use $in operator.
Instead, we can use $setIntersection which returns an array that contains the elements that appear in every input array
db.cars.aggregate([
{
"$project": {
"desc": {
"$cond": [
{
$gt: [
{
$size: {
$setIntersection: [
"$Brand.models.name",
[
"BB",
"TC",
"TS"
]
]
}
},
0
]
},
"Good",
"Bad"
]
}
}
},
{
"$project": {
"desc": 1
}
},
{
$out: 'cars_stg'
}
])
MongoPlayground | Alternative $reduce

Mongodb aggregation match ends with using field value

note: I'm using Mongodb 4 and I must use aggregation, because this is a step of a bigger aggregation
Problem
How to find in a collection documents that contains fields that ends with value from another field in same document ?
Let's start with this collection:
db.regextest.insert([
{"first":"Pizza", "second" : "Pizza"},
{"first":"Pizza", "second" : "not pizza"},
{"first":"Pizza", "second" : "not pizza"}
])
and an example query for exact match:
db.regextest.aggregate([
{
$match : { $expr: { $eq: [ "$first" ,"$second" ] } } }
])
I will get a single document
{
"_id" : ObjectId("5c49d44329ea754dc48b5ace"),
"first" : "Pizza", "second" : "Pizza"
}
And this is good.
But how to do the same, but with endsWith?
I've openend another question for start with here that uses indexOfBytes . But indexOf return only first match, and not last one
Edit: I've found an acceptable answer (with a lot of custom logic, my hope is Mongodb team will solve this), here the solution:
db.regextest.aggregate([
{
$addFields : {
"tmpContains" : { $indexOfBytes: [ "$first", { $ifNull : [ "$second" , 0] } ] }
}
},
{
$match: { "tmpContains" : { $gt : -1 } }
},
{
$addFields : {
"firstLen" : { $strLenBytes: "$first" }
}
},
{
$addFields : {
"secondLen" : { $strLenBytes: "$second" }
}
},
{
$addFields : {
"diffLen" : { $abs: { $subtract : [ "$firstLen", "$secondLen"] } }
}
},
{
$addFields : {
"res" : { $substr: [ "$first", "$diffLen", "$firstLen"] }
}
},
{
$match : { $expr : { $eq: [ "$res" , "$second" ] }}
}
])
As you know the length of both fields ($strLenBytes) you can use $substr to get last n characters of second field and the compare it to first field, try:
db.regextest.aggregate([
{
$match: {
$expr: {
$eq: [
"$first",
{
$let: {
vars: { firstLen: { $strLenBytes: "$first" }, secondLen: { $strLenBytes: "$second" } },
in: { $substr: [ "$second", { $subtract: [ "$$secondLen", "$$firstLen" ] }, "$$firstLen" ] }
}
}
]
}
}
}
])
Above aggregation will give you the same result as string comparison is case-sensitive in MongoDB. To fix that you can apply $toLower operator both on $first and on calculated substring of $second, try:
db.regextest.aggregate([
{
$match: {
$expr: {
$eq: [
{ $toLower: "$first" },
{
$let: {
vars: { firstLen: { $strLenBytes: "$first" }, secondLen: { $strLenBytes: "$second" } },
in: { $toLower: { $substr: [ "$second", { $subtract: [ "$$secondLen", "$$firstLen" ] }, "$$firstLen" ] } }
}
}
]
}
}
}
])

How to write a custom function to split a document into multiple documents of same Id

I am trying to split a document which has the following fields of string type:
{
"_id" : "17121",
"firstName": "Jello",
"lastName" : "New",
"bio" :"He is a nice person."
}
I want to split the above document into three new documents For Example:
{
"_id": "17121-1",
"firstName": "Jello"
}
{
"_id": "17121-2",
"firstName": "New"
}
{
"_id": "17121-3",
"bio": "He is a nice person."
}
Can anyone suggest how to proceed?
db.coll1.find().forEach(function(obj){
// I want to extract every single field. How to iterate on the field within this Bson object(obj) to collect every field.?
});
or any suggestion to do with aggregation pipeline in MongoDB.
You can use the below aggregation query.
The below query will convert each document fields into key value document array followed by $unwind while keeping the index and $replaceRoot with merge to produce the desired output.
$objectToArray to produce array (keyvalarr) with key (name of the array field)-value (array field) pair.
$match to remove the _id key value document.
$arrayToObject to produce the named key value while adding new _id key value pair and flatten array key values.
db.coll.aggregate([
{
"$project": {
"keyvalarr": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": {
"path": "$keyvalarr",
"includeArrayIndex": "index"
}
},
{
"$match": {
"keyvalarr.k": {
"$ne": "_id"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": [
{
"k": "_id",
"v": {
"$concat": [
{
"$substr": [
"$_id",
0,
-1
]
},
"-",
{
"$substr": [
"$index",
0,
-1
]
}
]
}
},
"$keyvalarr"
]
}
}
}
])
Anu. Here are two options you can use.
The first option is pretty straightforward, but it requires you to hardcode _id' indexes yourself.
db.users.aggregate([
{
$project: {
pairs : [
{ firstName: '$firstName', _id : { $concat : [ { $substr : [ '$_id', 0, 50 ] }, '-1' ] } },
{ lastName: '$lastName', _id : { $concat : [ '$_id', '-2' ] } },
{ bio: '$bio', _id : { $concat : [ { $substr : [ '$_id', 0, 50 ] }, '-3' ] } }
]
}
},
{
$unwind : '$pairs'
},
{
$replaceRoot: { newRoot: '$pairs' }
}
])
The second option does a little bit more job and is somewhat more tricky. But it is probably easier to extend if you ever need to add another field.
db.users.aggregate([
{
$project: {
pairs : [
{ firstName: '$firstName' },
{ lastName: '$lastName' },
{ bio: '$bio' }
]
}
},
{
$addFields: {
pairsReference : '$pairs'
}
},
{
$unwind: '$pairs'
},
{
$addFields: {
'pairs._id' : { $concat: [ { $substr : [ '$_id', 0, 50 ] }, '-', { $substr: [ { $indexOfArray : [ '$pairsReference', '$pairs' ] }, 0, 2 ] } ] }
}
},
{
$replaceRoot: { newRoot: '$pairs' }
}
])
You can redirect results of both queries into another collection by using $out stage.
UPD:
The only reason you get the error is that one of the _ids is not a string.
Replace the first parameter of $concat ($_id) with the following expression:
{ $substr : [ '$_id', 0, 50 ] }

MongoDb aggregate and group by two fields depending on values

I want to aggregate over a collection where a type is given. If the type is foo I want to group by the field author, if the type is bar I want to group by user.
All this should happen in one query.
Example Data:
{
"_id": 1,
"author": {
"someField": "abc",
},
"type": "foo"
}
{
"_id": 2,
"author": {
"someField": "abc",
},
"type": "foo"
}
{
"_id": 3,
"user": {
"someField": "abc",
},
"type": "bar"
}
This user field is only existing if the type is bar.
So basically something like that... tried to express it with an $or.
function () {
var results = db.vote.aggregate( [
{ $or: [ {
{ $match : { type : "foo" } },
{ $group : { _id : "$author", sumAuthor : {$sum : 1} } } },
{ { $match : { type : "bar" } },
{ $group : { _id : "$user", sumUser : {$sum : 1} } }
} ] }
] );
return results;
}
Does someone have a good solution for this?
I think it can be done by
db.c.aggregate([{
$group : {
_id : {
$cond : [{
$eq : [ "$type", "foo"]
}, "author", "user"]
},
sum : {
$sum : 1
}
}
}]);
The solution below can be cleaned up a bit...
For "bar" (note: for "foo", you have to change a bit)
db.vote.aggregate(
{
$project:{
user:{ $ifNull: ["$user", "notbar"]},
type:1
}
},
{
$group:{
_id:{_id:"$user.someField"},
sumUser:{$sum:1}
}
}
)
Also note: In you final answer, anything that is not of type "bar" will have an _id=null
What you want here is the $cond operator, which is a ternary operator returning a specific value where the condition is true or false.
db.vote.aggregate([
{ "$group": {
"_id": null,
"sumUser": {
"$sum": {
"$cond": [ { "$eq": [ "$type", "user" ] }, 1, 0 ]
}
},
"sumAuhtor": {
"$sum": {
"$cond": [ { "$eq": [ "$type", "auhtor" ] }, 1, 0 ]
}
}
}}
])
This basically tests the "type" of the current document and decides whether to pass either 1 or 0 to the $sum operation.
This also avoids errant grouping should the "user" and "author" fields contain the same values as they do in your example. The end result is a single document with the count of both types.