spring-data-mongodb GroupOperation question - mongodb

i have data in mongodb,just like
{
"name":"test1",
"receivedDate":"2021-05-18 00:00:52",
}
{
"name":"test2",
"receivedDate":"2021-05-18 00:00:52",
}
{
"name":"test3",
"receivedDate":"2021-05-18 00:00:52",
}
{
"name":"test4",
"receivedDate":"2021-05-18 00:00:52",
}
I want to find data with format like:
{
"name":["test1","test2","test3","test4"],
}
When I use the following code, it can run
GroupOperation group = Aggregation.group().push("name").as("name");
And result like
[
{
"_id": null,
"name": [
"test1",
"test2",
"test3",
"test4",
"test4"
]
}
]
But when I use another code
GroupOperation group = Aggregation.group();
for (int i = 0; i < params.length; i++) {
group.push(params[i]).as(params[i]);
}
it get bad result
[
{
"_id": null
}
]
what should i do?
Thanks for your suggestion

MongoDB $group operator does not allow filtering (only accumulator operators).
Your second code is trying do something like this:
{
$group:{
_id:null,
test1:{
$push:"$test1"
},
test2:{
$push:"$test2"
},
...
}
}
and you get the wrong result
Solution: Filter before grouping (MongoPlayground)
MatchOperation match = Aggregation.match(Criteria.where("name").in(Arrays.asList(params)))
GroupOperation group = Aggregation.group().push("name").as("name");

I should use this code
group = group.push(params[i]).as(params[i]);

Related

Return an array element of an aggregation in an MongoDB Atlas (4.2) trigger function

So I am currently testing with returning an array element of a an aggregation in an MongoDB Atlas (4.2) trigger function:
exports = function(changeEvent) {
const collection = context.services.get(<clusterName>).db(<dbName>).collection(<collectionName>);
var aggArr = collection.aggregate([
{
$match: { "docType": "record" }
},
..,
{
$group: {
"_id": null,
"avgPrice": {
$avg: "$myAvgPriceFd"
}
}
}
]);
return aggArr;
};
Which outputs:
> result:
[
{
"_id": null,
"avgPrice": {
"$numberDouble": "18.08770081782988165"
}
}
]
> result (JavaScript):
EJSON.parse('[{"_id":null,"avgPrice":{"$numberDouble":"18.08770081782988165"}}]')
As you can see this is returned as one object in an array (I then intend to use the avgPrice value to update a field in a document in the same collection). I have tried to extract the object from the array with aggArr[0] or aggArr(0) - both resulting in:
> result:
{
"$undefined": true
}
> result (JavaScript):
EJSON.parse('{"$undefined":true}')
or by using aggArr[0].avgPrice as per this solution which fails with:
> error:
TypeError: Cannot access member 'avgPrice' of undefined
> trace:
TypeError: Cannot access member 'avgPrice' of undefined
at exports (function.js:81:10(163))
at function_wrapper.js:5:30(18)
at <eval>:13:8(6)
at <eval>:2:15(6)
Any pointers are most welcome because this one has me stumped for now!
I had the same problem, and figured it out. You have to append the .toArray() function to the aggregation call, where you have.
collection.aggregate(pipeline_steps).toArray()
Here's an example:
const user_collection = context.services
.get("mongodb-atlas")
.db("Development")
.collection("users");
const search_params = [
{
"$search": {
"index": 'search_users',
"text": {
"query": value,
"path": [
"email", "first_name", "last_name"
],
"fuzzy":{
"prefixLength": 1,
"maxEdits": 2
}
}
}
}
];
const search_results = await user_collection.aggregate(search_params).toArray();
const results = search_results
return results[0]
Here's the documentation showing how to convert the aggregation to an array.

Select array inner element

I have a trouble with mongodb data below.
I want to get data [projects][log][subject].
so, I tried like this
$project':{_id:0, projects.log.subject:1}
but it is not correct syntax..
{
"_id": ObjectID("569f3a3e9d2540764d8bde59"),
"A": "book",
"server": "us",
"projects": [
{
"domainArray": [
{
~~~~
}
],
"log": [
{
~~~~~,
"subject": "I WANT THIS"
}
],
"before": "234234234"
},
{
"domainArray": [
{
~~~~
}
],
"log": [
{
~~~~~,
"subject": "I WANT THIS"
}
],
"before": "234234234"
},....
] //end of projects
}//end of document
How can I get data group by [subject]? I have no idea about this..
Edited-
I expected data like this
{
"subject":"first",
"subject":"second",
"subject":"third",
"subject":"~~~"
}
Is it possible? I just want to get array of subject.
Not sure whether it is your expected result or not. Can you please try this:
db.project.aggregate([
{$project:{projects:1,_id:0}},
{$unwind:"$projects"},
{$unwind:"$projects.log"},
{$project:{subject:"$projects.log.subject",_id:0}}
])
and Map-Reduce for above result of word count is as below:
//map function
var map = function() {
for(var i in this.projects)
{
for(var j in this.projects[i].log)
{
var arrayWords = this.projects[i].log[j].subject.split(" ");
for(var k = 0; k < arrayWords.length; k++)
{
emit(arrayWords[k],{occurance:1});
}
}
}
};
//reduce function
function reduce(word, arrayOccurance) {
var totalOccurance = 0;
for (var i = 0; i < arrayOccurance.length; i++)
{
totalOccurance = totalOccurance + arrayOccurance[i].occurance;
}
return { occurance:totalOccurance };
}
//combine both function into operation and output result into wordOccurance collection
db.project.mapReduce(
map,
reduce,
{
query: {"projects.log.subject":"~~~~~"},
out: "wordOccurance"
}
)
//query the result output
db.wordOccurance.find()
You can change the query under mapreduce to match your subject that want to word count. Please let me know if my mapreduce function doesn't meet your expected result.
You can also refer below two pages to create and troubleshoot map and reduce function:
Troubleshoot the Map Function
Troubleshoot the Reduce Function

Try to match conditional column in mongodb

I am new to mongodb and now using aggregate.
I am in a problem that I have 2 column let this column1 and column2 I want to match either by column1 or column2 inside $match Is it possible. I am getting stuck please help.
db Structure:
{
"_id" : ObjectId("55794aa1be1f8fe822da139d"),
"transactionType" : "1",
"_store" : {
"storeLocation" : "Pitampura",
"storeName" : "Godown",
"_id" : "5576b5c5e414d90c03d1e330"
}
}
I am try to filter according to transactionType and storeName, I am sending these 2 params to api but when storeName sended as empty string then only filter according to transactionType else by both paramater. Not wanted to use if-elseif.
Well of course it can suit your query. You just handle as follows:
// Initial data
var request = { "storeName": "", "transactionType": "1" };
// Transform to array
var conditions = Object.keys(request).map(function(key) {
var obj = {},
newKey = "";
if ( key == "storeName" ) {
newKey = "_store." + key;
} else {
newKey = key;
}
obj[newKey] = request[key];
return obj;
});
db.collection.find({ "$or": conditions });
Where the whole structure after transformation breaks down to :
db.collection.find({
"$or": [
{ "_store.storeName": "" },
{ "transactionType": "1" }
]
})
Which of course matches the document on the condition that "transactionType" is met.
So that is what $or does, considers that at least one of the conditions in the query arguments matches data in the document.
The other thing here is that since the data presented in the request is not a "direct match" for the data in the document, manipulation is done on the "key name" to use the correct "dot notation" form for acessing that element.
These are just basic queries, so the same rules apply to aggregation $match, which is just a query element itself:
db.collection.aggregate([
// Possibly other pipeline before
// Your match phase, which probably should be first
{ "$match": {
"$or": [
{ "_store.storeName": "" },
{ "transactionType": "1" }
]
}},
// Other aggregagtion pipeline
])

How do I write a query in MongoDB that gives me all documents that contain all specified nested documents?

Say I had a MongoDB document with 2 embedded documents stored in the Array "AD". Each of these embedded documents will have specific data that I need to match. How would I write a query to find this document?
In the C# driver I'm currently writing it like this:
var q1 = Query.And(
Query.EQ("AD.ABC1", "123"),
Query.EQ("AD.YOB", "1969")
);
var q2 = Query.And(
Query.EQ("AD.ABC1", "456"),
Query.EQ("AD.YON", "1970")
);
var query = Query.And(q1, q2);
Which gives me a query of:
{
"$and": [
{
"AD.ABC1": "123"
},
{
"AD.YOB": "1969"
},
{
"AD.ABC1": "456"
},
{
"AD.YON": "1970"
}
]
}
This returns documents that have embedded documents that match EITHER q1 or q2. I want the intersection of this - ie documents that have embedded documents that match BOTH q1 AND q2.
Thanks
Worked it out
var q1 = Query.ElemMatch("AD",
Query.And(
Query.EQ("ABC1", "123"),
Query.EQ("YOB", "1969")
)
);
var q2 = Query.ElemMatch("AD",
Query.And(
Query.EQ("ABC1", "456"),
Query.EQ("YOB", "1970")
)
);
var query = Query.And(q1, q2);
which gives me
{
"$and": [
{
"AD": {
"$elemMatch": {
"ABC1": "123",
"YOB": "1969"
}
}
},
{
"AD": {
"$elemMatch": {
"ABC1": "456",
"YOB": "1970"
}
}
}
]
}

How do I unset all fields except a known set of fields?

Suppose I have a single document in my mongo collection that looks like this:
{
"_id": 123,
"field_to_prune":
{
"keep_field_1": "some value",
"random_field_1": "some value",
"keep_field_2": "some value",
"random_field_2": "some value",
"random_field_3": "some value"
}
}
I want to prune that document to look like this:
{
"_id": 123,
"field_to_prune":
{
"keep_field_1": "some value",
"keep_field_2": "some value"
}
}
However, my issue is that I don't know what the "random" field names are. In mongo, how would i $unset all fields except a couple of known fields?
I can think of a couple of ways, but i don't know the syntax.. i could select all field NAMES and then for each one of those unset the field. kind of like this:
[Some query to find all field names under "field_to_prune" for id 123].forEach(function(i) {
var key = "field_to_prune." + i;
print("removing field: " + key);
var mod = {"$unset": {}};
mod["$unset"][key] = "";
db.myCollection.update({ _id: "123" }, mod);
});
Another way I was thinking of doing it was to unset where the field name is not in an array of strings that i defined. not sure how to do that either. Any ideas?
If you don't care about atomicity then you may do it with save:
doc = db.myCollection.findOne({"_id": 123});
for (k in doc.field_to_prune) {
if (k === 'keep_field_1') continue;
if (k === 'keep_field_2') continue;
delete doc.field_to_prune[k];
}
db.myCollection.save(doc);
The main problem of this solution is that it's not atomic. So, any update to doc between findOne and save will be lost.
Alternative is to actually unset all unwanted fields instead of saving the doc:
doc = db.myCollection.findOne({"_id": 123});
unset = {};
for (k in doc.field_to_prune) {
if (k === 'keep_field_1') continue;
if (k === 'keep_field_2') continue;
unset['field_to_prune.'+k] = 1;
}
db.myCollection.update({_id: doc._id}, {$unset: unset});
This solution is much better because mongo runs update atomically, so no update will be lost. And you don't need another collection to do what you want.
Actually the best way to do this is to iterate over the cursor an use the $unset update operate to remove those fields in subdocuments except the known fields you want to keep. Also you need to use "bulk" operations for maximum efficiency.
MongoDB 3.2 deprecates Bulk() and its associated methods. So if you should use the .bulkWrite()
var count = 0;
var wantedField = ["keep_field_1", "keep_field_2"];
var requests = [];
var count = 0;
db.myCollection.find().forEach(function(document) {
var fieldToPrune = document.field_to_prune;
var unsetOp = {};
for (var key in fieldToPrune) {
if ((wantedFields.indexOf(key) === -1) && Object.prototype.hasOwnProperty.call(fieldToPrune, key ) ) {
unsetOp["field_to_prune."+key] = " ";
}
}
requests.push({
"updateOne": {
"filter": { "_id": document._id },
"update": { "$unset": unsetOp }
}
});
count++;
if (count % 1000 === 0) {
// Execute per 1000 operations and re-init
db.myCollection.bulkWrite(requests);
requests = [];
}
})
// Clean up queues
db.myCollection.bulkWrite(requests)
From MongoDB 2.6 you can use the Bulk API.
var bulk = db.myCollection.initializeUnorderedBulkOp();
var count = 0;
db.myCollection.find().forEach(function(document) {
fieldToPrune = document.field_to_prune;
var unsetOp = {};
for (var key in fieldToPrune) {
if ((wantedFields.indexOf(key) === -1) && Object.prototype.hasOwnProperty.call(fieldToPrune, key ) ) {
unsetOp["field_to_prune."+key] = " ";
}
}
bulk.find({ "_id": document._id }).updateOne( { "$unset": unsetOp } );
count++;
if (count % 1000 === 0) {
// Execute per 1000 operations and re-init
bulk.execute();
bulk = db.myCollection.initializeUnorderedBulkOp();
}
})
// Clean up queues
if (count > 0) {
bulk.execute();
}
I solved this with a temporary collection. i did the following:
db.myCollection.find({"_id": "123"}).forEach(function(i) {
db.temp.insert(i);
});
db.myCollection.update(
{_id: "123"},
{ $unset: { "field_to_prune": ""}}
)
db.temp.find().forEach(function(i) {
var key1 = "field_to_prune.keep_field_1";
var key2 = "field_to_prune.keep_field_2";
var mod = {"$set": {}};
mod["$set"][key1] = i.field_to_prune.keep_field_1;
mod["$set"][key2] = i.field_to_prune.keep_field_2;
db.myCollection.update({_id: "123"}, mod)
});
db.getCollection("temp").drop();
Unfortunately all the solutions presented so far are relying on script execution and some sort of forEach invocation, which will end up handling only one document at a time. If the collection to normalize is big this is going to be impractical and take way too long.
Also the functions passed to forEach are executed on the client, meaning that if the connection to the database is lost, the operation is going to be interrupted in the middle of the process, potentially leaving the collection in inconsistent state.
Performance issues could be mitigated by using bulk operations like the one proposed by #styvane here. That's solid advice.
But we can do better. Update operations support aggregation pipeline syntax since MongoDB 4.2, allowing the data normalization operation to be achieved by simply creating a new temporary object containing only the desired fields, unset the old one and then putting the temporary one back in its place, all using with the current values of the document as references:
db.theCollection.updateMany(
{field_to_prune: {$exists: true}},
[
{$set: {_temp: {
keep_field_1: '$field_to_prune.keep_field_1',
keep_field_2: '$field_to_prune.keep_field_2'
}}},
{$unset: 'field_to_prune'},
{$set: {field_to_prune: '$_temp'}},
{$unset: '_temp'}
]
)
Example:
> db.myColl.insertOne({
... _id: 123,
... field_to_prune: {
... keep_field_1: "some value",
... random_field_1: "some value",
... keep_field_2: "some value",
... random_field_2: "some value",
... random_field_3: "some value"
... }
... })
{ "acknowledged" : true, "insertedId" : 123 }
>
> db.myColl.insertOne({
... _id: 234,
... field_to_prune: {
... // keep_field_1 is absent
... random_field_1: "some value",
... keep_field_2: "some value",
... random_field_2: "some value",
... random_field_3: "some value"
... }
... })
{ "acknowledged" : true, "insertedId" : 234 }
>
> db.myColl.find()
{ "_id" : 123, "field_to_prune" : { "keep_field_1" : "some value", "random_field_1" : "some value", "keep_field_2" : "some value", "random_field_2" : "some value", "random_field_3" : "some value" } }
{ "_id" : 234, "field_to_prune" : { "random_field_1" : "some value", "keep_field_2" : "some value", "random_field_2" : "some value", "random_field_3" : "some value" } }
>
> db.myColl.updateMany(
... {field_to_prune: {$exists: true}},
... [
... {$set: {_temp: {
... keep_field_1: '$field_to_prune.keep_field_1',
... keep_field_2: '$field_to_prune.keep_field_2'
... }}},
... {$unset: 'field_to_prune'},
... {$set: {field_to_prune: '$_temp'}},
... {$unset: '_temp'}
... ]
...)
{ "acknowledged" : true, "matchedCount" : 2, "modifiedCount" : 2 }
>
> db.myColl.find()
{ "_id" : 123, "field_to_prune" : { "keep_field_1" : "some value", "keep_field_2" : "some value" } }
{ "_id" : 234, "field_to_prune" : { "keep_field_2" : "some value" } }
here is my solution, I think easier than the others I read:
db.labels.find({"_id" : ObjectId("123")}).snapshot().forEach(
function (elem) {
db.labels.update({_id: elem._id},
{'field_to_prune.keep_field_1': elem.field_to_prune.keep_field_1,
'field_to_prune.keep_field_2': elem.field_to_prune.keep_field_2});
});
I'm deleting everything but the fields 'keep_field_1' and 'keep_field_2'