Trying to aggregate based on substring matches in mongodb 3.2 - mongodb

Let's say my collection has documents with ExpName field and Rname field. Expname are all of the type - exp_1, exp_2 etc. Rname is a character string with 4 dashes for example. "As-34rt-d3r5-4453f-er4"
I need to aggregate based on experiment name and removing the text between the last two dashes. In the example I gave above that would be "As-34rt-d3r5"
question 1) how do i incorporate this in one table?
question 2) i solved this in a dirty fashion for one exp, because it seemed like the number of characters was almost the same, so I could just take the first 13 characters which seemed like it was the the substring omitting the last two dashes. Is there a correct way to do this if the text was not so uniform?
db.getCollection('rest01').aggregate(
{$match : {ExpName : "exp_1"}},
{$group: {_id :"$ExpName",_id : {$substr : ["$RName", 0,13]}, total: { $sum:1 }}
})
Ideally I would like to have a result that says Expname, Rnamesubstring, count. This code snippet was for exp_1 one alone. Is it even possible to get it all in one result?

Here is how you could do that:
db.getCollection('rest01').aggregate({
$project: {
"ExpName": 1,
"splitRName": { $split: [ "$RName", "-" ] } // add an array with the constituents of your dash-delimited string id as a new field "splitRName"
}
}, {
$group: {
_id: { // our group id shall be made up of both...
"ExpName": "$ExpName", // ...the "ExpName" field...
"Rnamesubstring": { // and some parts of the "RName" field
$concat:
[
{ $arrayElemAt: [ "$splitRName", 0 ] },
"-",
{ $arrayElemAt: [ "$splitRName", 1 ] },
"-",
{ $arrayElemAt: [ "$splitRName", 2 ] }
]
}
},
total: { $sum: 1 }
}
})
In case you want to do it in MongoDB v3.2 (as stated in your comment), here is something that is not exactly pretty but works:
db.getCollection('rest01').aggregate({
$group: {
_id: { // our group id shall be made up of both...
"ExpName": "$ExpName", // ...the "ExpName" field...
"Rnamesubstring": {
$substr:
[
"$RName",
0,
{
$ifNull:
[
{
$arrayElemAt:
[{
$filter: {
input: {
$map: {
input: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 /* add numbers as required */ ],
as: "index",
in: {
$cond: {
if: { $eq: [ "-", { $substr: [ "$RName", "$$index", 1 ] } ] }, // if the string we look at is a dash...
then: "$$index", // ...then let's remember it
else: null // ...otherwise ignore it
}
}
}
},
as: "item",
cond: { $ne: [ null, "$$item" ] } // get rid of all null values
}
},
2 ] // we want the position of the third dash in the string (only)
},
1000 // in case of a malformed RName (wrong number of dashes or completely missing) we want the entire substring
]
}
]
}
},
total: { $sum: 1 }
}
})
Update 2: You seem to be having some data related issues as per your comments (so either missing RName values or improperly structured ones, i.e. without the required number of sections with dashes in between). I have updated the above statement for v3.2 to deal with these rows. You may want to find out, though, which rows actually cause this behaviour. They can be easily identified using the following statement:
db.getCollection('rest01').aggregate({
$project: {
_id: 1,
RName: 1,
"Rnamesubstring": {
$arrayElemAt:
[{
$filter: {
input: {
$map: {
input: [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 /* add numbers as required */ ],
as: "index",
in: {
$cond: {
if: { $eq: [ "-", { $substr: [ "$RName", "$$index", 1 ] } ] }, // if the string we look at is a dash...
then: "$$index", // ...then let's remember it
else: null // ...otherwise ignore it
}
}
}
},
as: "item",
cond: { $ne: [ null, "$$item" ] } // get rid of all null values
}
},
2 ] // we want the position of the third dash in the string (only)
}
}
}, {
$match: { "Rnamesubstring": { $exists:false } }
})

Related

find index of particular element in multi-dimensional array in MongoDB

I've got a document in MongoDB which has a multi-dimensional array as shown below.
{"_id":1,
"name":"Johnson",
"card":[
["",12,25,"","",52,60,"",86],
[1,17,29,"",43,"","","",89],
[3,"","",34,45,"",62,70,""]
]
}
I'm looking for a query that returns the index of a particular element in the array, for example, say 29 whose index is [1][2] but when i queried as:
> db.test.aggregate([{$project:{index:{$indexOfArray:["$card",29]}}}])
i got the result as:
{ "_id" : 1, "index" : -1 }
which is not true. I found that this query method works only for one-dimensional array and I'm unable to figure out how to find the index of multi-dimensional array in MongoDB.
Any help will be appreciated.
Thankyou
Not exactly clear on what datatype [1][2] is, so rendering the desired output is a bit of a challenge. Here is a attempt to help your question...
Test Data
db.collection.insert(
{
"name":"Johnson",
"card":[
["", 12, 25, "", "", 52, 60, "", 86],
[1, 17, 29, "", 43, "", "", "", 89],
[3, "", "", 34, 45, "", 62, 70, ""]
]
}
(Assumes a hard-coded value of 29 to search for)
Aggregate
EDIT 2021-12-09 - ADDED $project TO CAST RESULTS AS INTEGER. WAS NumberLong()
db.collection.aggregate([
{
$unwind:
{
path: "$card",
includeArrayIndex: "outerIndex"
}
},
{
$unwind:
{
path: "$card",
includeArrayIndex: "innerIndex"
}
},
{
$match:
{
"card": 29
}
},
{
$project:
{
name: 1,
card: 1,
outerIndex: { $convert: { input: "$outerIndex", to: "int" } },
innerIndex: { $convert: { input: "$innerIndex", to: "int" } }
}
}
])
Results
[
{
_id: ObjectId("61b13476c6c466d7d1ea9b5e"),
name: 'Johnson',
card: 29,
outerIndex: 1,
innerIndex: 2
}
]
Unwanted fields can be supressed with another $project stage, but I did not include it here since I was not clear on desired output.

MongoDB Query lower/upper ranges with an array as input

In MongoDB I have a collection that looks like:
{ low: 1, high: 5 },
{ low: 6, high: 15 },
{ low: 16, high 412 },
...
I have input that's an array of integers:
[ 4, 16, ...]
I want to find all the documents in the collection which have values included in the range depicted by low and high. In this example it would pick the first and third documents.
I've found lots of Q&A here on how to filter using a single value as the input but could not find one that included an array as input. It could be that my search failed me and that this has been answered.
Update: I should have mentioned that I'm constructing this query in an application and not running this in the CLI. Given that flexibility what if I create a $or query with each of the inputs? Something like:
$or: [{
high: { $gte: 4 },
low: { $lte: 4 },
}, {
high: { $gte: 16 },
low: { $lte: 16 },
},
...
]
It could be massive and have thousands of elements in the $or.
You can use $anyElementTrue along with $map to check if any value is included within a range defined in your documents:
db.collection.find({
$expr: {
$anyElementTrue: {
$map: {
input: [ 4, 16 ],
in: {
$and: [
{ $gte: [ "$$this", "$low" ] },
{ $lte: [ "$$this", "$high" ] },
]
}
}
}
}
})
Mongo Playground
//working code from Mongo Shell CLI 4.2.6 on windows 10
//you can use forEach and loop through for comparison if a value exists between two numbers
> print("MongoDB",db.version());
MongoDB 4.2.6
> db.lhColl.find();
{ "_id" : ObjectId("5f889258f3b30cd04c8a78e5"), "low" : 1, "high" : 5 }
{ "_id" : ObjectId("5f889258f3b30cd04c8a78e6"), "low" : 6, "high" : 15 }
{ "_id" : ObjectId("5f889258f3b30cd04c8a78e7"), "low" : 16, "high" : 412 }
> var arrayInput = [4,16,500];
> var inputLength = arrayInput.length;
> db.lhColl.aggregate([
... {$match:{}}
... ]).forEach(function(doc){
... for (i=0; i<inputLength; i++){
... if (arrayInput[i]>=doc.low){
... if(arrayInput[i] <= doc.high)
... print("arrayInputs value match:",arrayInput[i]);
... }
... }
... });
arrayInputs value match: 4
arrayInputs value match: 16

mongoDb conditional projection query

I have some data as below:
{MS: 'MS1', fileName: 'file1', RSCP: 75, EcNo: 10, ...}
{MS: 'MS2', fileName: 'file1', RSCP: 76, EcNo: 11, ...}
{MS: 'MS3', fileName: 'file1', RSCP: 77, EcNo: 12, ...}
{MS: 'MS4', fileName: 'file1', RSCP: 78, EcNo: 13, ...}
I need to query data like that:
find all documents that fileName = file1
if MS = MS1 return RSCP
if MS = MS2 return EcNo
how can I query mongo like this in one query?
You can try below aggregation query :
db.collection.aggregate([
/** filter docs with condition */
{
$match: {
fileName: "file1"
}
},
/** Re-create `EcNo` & `RSCP` fields based on conditions
* `$$REMOVE` completely removes the field
*/
{
$addFields: {
EcNo: { $cond: [ { $eq: [ "$MS", "MS1" ] }, "$$REMOVE", "$EcNo" ] },
RSCP: { $cond: [ { $eq: [ "$MS", "MS2" ] }, "$$REMOVE", "$RSCP" ] }
}
}
])
Test : mongoplayground

How rename nested key in array of object in MongoDB?

Document Structure
{
_id: 5,
grades: [
{ grade_ : 80, mean: 75, std: 8 },
{ mean: 90, std: 5 },
{ mean: 85, std: 3 }
]
}
As per above document structure in mongodb i want rename key grade_ to grade
db.collection.update({"_id":5},{"$rename":{"grades.grade_":"grades.grade"}},{"upsert":false,"multi":true})
which gives below error
"writeError" : {
"code" : 28,
"errmsg" : "cannot use the part (grades of grades.grade_) to traverse the element ({grades: [ { grade_: 80.0, mean: 75.0, std: 8.0 }, { mean: 90.0, std: 5.0 }, { mean: 85.0, std: 3.0 } ]})"
}
I want to rename key grade_ to grade, expected output
{
_id: 5,
grades: [
{ grade : 80, mean: 75, std: 8 },
{ mean: 90, std: 5 },
{ mean: 85, std: 3 }
]
}
As per MongoDB documentation: ($rename does not work if these fields are in array elements.)
For fields in embedded documents, the $rename operator can rename these fields as well as move the fields in and out of embedded documents. $rename does not work if these fields are in array elements.
So, you need to write your custom logic to update.
db.collection.find({
"grades.grade_": { $exists : 1 }
}).forEach( function( doc ) {
for( i=0; i < doc.grades.length; i++ ) {
if(doc.grades[i].grade_ != undefined) {
doc.grades[i].grade = doc.grades[i].grade_;
delete doc.grades[i].grade_;
}
}
db.collection.update({ _id : doc._id }, doc);
});
$rename do not works in an array. So,you can use Aggregate framework's $addField to rename fields in an array.
db.collection.aggregate([
{
$addFields: {
grades: {
$map: {
input: "$grades",
as: "grade",
in: {
grade: "$$grade.grade_",
mean: "$$grade.mean",
std: "$$grade.std"
}
}
}
}
}
])
Output:
[
{
"_id": 5,
"grades": [
{"grade": 80,"mean": 75,"std": 8},
{"mean": 90,"std": 5},
{"mean": 85,"std": 3}
]
}
]

Mongo 3.2 query timeseries value at specific time

I have some timeseries data stored in Mongo with one document per account, like so:
{
"account_number": 123,
"times": [
datetime(2017, 1, 2, 12, 34, 56),
datetime(2017, 3, 4, 17, 18, 19),
datetime(2017, 3, 11, 0, 1, 11),
]
"values": [
1,
10,
9001,
]
}
So, to be clear in the above representation account 123 has a value of 1 from 2017-01-02 12:34:56 until it changes to 10 on 2017-03-04 17:18:19, which then changes to 9001 at 2017-03-11, 00:01:11.
There are many accounts and each account's data is all different (could be at different times and could have more or fewer value changes than other accounts).
I'd like to query for each users value at a given time, e.g. "What was each users value at 2017-01-30 02:03:04? Would return 1 for the above account as it was set to 1 before the given time and did not change until after the given time.
It looks like $zip would be useful but thats only available in Mongo 3.4 and I'm using 3.2 and have no plans to upgrade soon.
Edit:
I can get a small part of the way there using:
> db.account_data.aggregate([{$unwind: '$times'}, {$unwind: '$values'}])
which returns something like:
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 1},
{"account_number": 123, "times": datetime(2017, 1, 2, 12, 34, 56), "values": 10},
#...
which isn't quite right as it is returning the cross product of times/values
This is possible using only 3.2 features. I tested with the Mingo library
var mingo = require('mingo')
var data = [{
"account_number": 123,
"times": [
new Date("2017-01-02T12:34:56"),
new Date("2017-03-04T17:18:19"),
new Date("2017-03-11T00:01:11")
],
"values": [1, 10, 9001]
}]
var maxDate = new Date("2017-01-30T02:03:04")
// 1. filter dates down to those less or equal to the maxDate
// 2. take the size of the filtered date array
// 3. subtract 1 from the size to get the index of the corresponding value
// 4. lookup the value by index in the "values" array into new "valueAtDate" field
// 5. project the extra fields
var result = mingo.aggregate(data, [{
$project: {
valueAtDate: {
$arrayElemAt: [
"$values",
{ $subtract: [ { $size: { $filter: { input: "$times", as: "time", cond: { $lte: [ "$$time", maxDate ] }} } }, 1 ] }
]
},
values: 1,
times: 1
}
}])
console.log(result)
// Outputs
[ { valueAtDate: 1,
values: [ 1, 10, 9001 ],
times:
[ 2017-01-02T12:34:56.000Z,
2017-03-04T17:18:19.000Z,
2017-03-11T00:01:11.000Z ] } ]
Not sure how to do the same with MongoDb 3.2, however from 3.4 you can do the following query:
db.test.aggregate([
{
$project:
{
index: { $indexOfArray: [ "$times", "2017,3,11,0,1,11" ] },
values: true
}
},
{
$project: {
resultValue: { $arrayElemAt: [ "$values", "$index" ] }
}
}])