Basically I want to update all documents inside one collection. The update is just adding 2 hours to date fields present in each document.
The documents all follow a basic structure like this :
{
code : 1,
file : {
dates : {
start : 2018-05-27 22:00:00.000Z,
end : 2018-05-27 22:00:00.000Z,
},
otherInfos : {
...
...
}
}
}
Here is my query :
var cursor = db.getCollection('files').find({});
while(cursor.hasNext()){
e = cursor.next();
let delta = 120*60*1000; //2 hours
if(e.file.dates) {
let fileStartDate = e.file.dates.start ? new Date(e.file.dates.start.getTime() + delta) : null;
let fileEndDate = e.file.dates.end ? new Date(e.file.dates.end.getTime() + delta) : null;
if(fileStartDate) {
e.file.dates.start = fileStartDate;
}
if(fileEndDate) {
e.file.dates.end = fileEndDate;
}
}
print(e);
db.getMongo().getDB('myDB').files.updateOne(
{"code":e.code},
{
$set: {"file.dates.start": fileStartDate, "file.dates.end": fileEndDate}
})
}
I am testing the query with around 20 documents and the first 10 are perfectly printed and updated with +2hours as expected but then for the second half the dates remain the exact same than before (both with the print and update).
All the documents have the same structure and same Date type so I don't understand why the query doesn't go all the way.
EDIT :
Here is a document that was succesfully updated :
{
"_id" : ObjectId("5b36c7fdd515e80009e7cc84"),
"code" : "1",
"file" : {
"dates" : {
"start" : ISODate("2018-06-11T22:00:00.000Z"),
"end" : ISODate("2018-06-11T22:00:00.000Z")
}
}
}
became as expected
{
"_id" : ObjectId("5b36c7fdd515e80009e7cc84"),
"code" : "1",
"file" : {
"dates" : {
"start" : ISODate("2018-06-12T00:00:00.000Z"),
"end" : ISODate("2018-06-12T00:00:00.000Z")
}
}
}
but for example this document :
{
"_id" : ObjectId("5b36c7ffd515e80009e7cf03"),
"code" : "15",
"file" : {
"dates" : {
"start" : ISODate("2018-09-02T22:00:00.000Z"),
"end" : ISODate("2019-09-26T22:00:00.000Z")
}
}
}
stayed the exact same
With MongoDBv4.2+, you can do an update with aggregation pipeline. Use $add to increment 2 hour * 60 minute * 60 seconds * 1000 milliseconds.
db.collection.update({},
[
{
"$set": {
"file.dates.start": {
$add: [
"$file.dates.start",
7200000
]
},
"file.dates.end": {
$add: [
"$file.dates.end",
7200000
]
}
}
}
],
{
multi: true
})
Here is the Mongo playground for your reference.
db.getMongo().getDB('myDB').files.updateOne(
{"code":e.code},
{
$set: {"file.dates.start": fileStartDate, "file.dates.end": fileEndDate}
})
updateOne only allows update on one document
You should use updateMany() to update more than 1 document
https://www.mongodb.com/docs/manual/reference/method/db.collection.updateMany/
Related
I have been trying to update an Object for this collection. Below is the collection. Looking for Server 3.6 version.
Here The ask is Need to update the class name from "HISTORY" to " HISTORY_NEW". Need to do, for some students in the class. Need a query that will select all student records in student collection with "HISTORY" class in it and update them to "HISTORY_NEW ". I have around 30,000 records and not getting a bulk update method.
{
"_id" : ObjectId("611f90aa43f77a728879c395"),
"studentId" : "stu1",
"classes" : {
"History" : {
"TeacherName" : "T1",
"Marks" : [
{
"Internal": 15
}
]
},
"Geography" : {
"TeacherName" : "T2",
"Marks" : [
{
"Internal" : 20
}
]
}
},
"updateDate" : ISODate("2021-10-12T11:40:47.156Z")
}
This is the result I am expecting
{
"_id" : ObjectId("611f90aa43f77a728879c395"),
"studentId" : "stu1",
"classes" : {
"HISTORY_NEW" : {
"TeacherName" : "T1",
"Marks" : [
{
"Internal": 15
}
]
},
"Geography" : {
"TeacherName" : "T2",
"Marks" : [
{
"Internal" : 20
}
]
}
},
"updateDate" : ISODate("2021-10-12T11:40:47.156Z")
}
.Or is that even possible with the kind of collection above or going via code route?
So far this is what I have, without any success.
Get all students' Ids and then update the Class name. But that is also not working and don't think it is smart to update DB 30,000 times.
var studentIds =[];
db.studentSubject.find({"classes.History":{$exists:true}})
.forEach(function(u) { studentIds.push(u.studentId) })
studentIds.forEach(function(studentId) {
var result;
try {
result =db.studentSubject.updateOne(
{studentId:studentId},
{ $set : {"classes.History": "HISTORY_NEW",}},
{ upsert: false});
} catch (e) {
print(e);
}
});
From your scenario, you need $rename operator.
As discussed in the comment, you don't need to fetch each document to get studentId and then pass it to update each document. Just bulk update by checking the document has classes.History field.
db.collection.update({
"classes.History": {
$exists: true
}
},
{
$rename: {
"classes.History": "classes.HISTORY_NEW"
}
},
{
upsert: false,
multi: true
})
Sample Mongo Playground
Got a weird bug that I can't quite figure out.
I have some pymongo code that looks like this:
from pymongo import UpdateOne
client = pymongo.MongoClient()
...
def update_image_locations(user_key, dataset_key, preset_name,
keys_and_coords):
db = docdb_client.db
col = db.col
operations = []
query = {'ownerKey': user_key, 'imageInfo.datasetKey': dataset_key}
for key_and_coords in keys_and_coords:
query['key'] = key_and_coords['key']
operations.append(
pymongo.UpdateOne(
query, {
'$set': {
'imageInfo.presets.%s.coords' % preset_name:
key_and_coords['coords']
}
}))
print(operations)
if len(operations) > 0:
print(col.bulk_write(operations, ordered=False).bulk_api_result)
# This section fails with a KeyError.
cursor = col.find({
'ownerKey': user_key,
'imageInfo.datasetKey': dataset_key
}, {'imageInfo': 1}
)
for doc in cursor:
print(doc['imageInfo']['presets'])
If I print out the bulk_write output, I get the following.
{'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 65, 'nModified': 65, 'nRemoved': 0, 'upserted': []}
which as far as I can tell is exactly what I expect.
However, I get KeyError failures for all but the last document in the collection when I try to iterate through the documents that should ostensibly have the new field. If I then go into the actual mongodb shell, I can confirm that only the last operation from the bulk_write seems to have actually gone off.
Based on the bulk_api_result I would expect that all of the documents would be updated, instead of only the last one. What's going on?
EDIT:
As requested, before and after queries. I'm not showing the full doc because there's a lot of vector embedding info that's going to muddle things.
Query:
> db.user_uploads.find({}, {'imageInfo.presets': 1})
Before:
{ "_id" : ObjectId("6074792104cc23375a8f979a"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979b"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979c"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979d"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979e"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979f"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a0"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a1"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a2"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a3"), "imageInfo" : { } }
After:
{ "_id" : ObjectId("6074792104cc23375a8f979a"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979b"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979c"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979d"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979e"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f979f"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a0"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a1"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a2"), "imageInfo" : { } }
{ "_id" : ObjectId("6074792104cc23375a8f97a3"), "imageInfo" : { "presets" : { "preset_one" : { "coords" : [ 2.229365348815918, 1.4654869735240936 ] } } } }
Turns out the answer has to do with how the query is constructed. Specifically, this works:
for key_and_coords in keys_and_coords:
query = {'key': key_and_coords['key']}
operations.append(
pymongo.UpdateOne(
query, {
'$set': {
'imageInfo.presets.%s.coords' % preset_name:
key_and_coords['coords']
}
}))
and this fails:
query = {}
for key_and_coords in keys_and_coords:
query['key'] = key_and_coords['key']
operations.append(
pymongo.UpdateOne(
query, {
'$set': {
'imageInfo.presets.%s.coords' % preset_name:
key_and_coords['coords']
}
}))
I think what's happening here is some async javascript-esque magic, where the query object is passed by reference to the bulk operation which then executes them once all of the bulk operations are in place. Since the query is passed by reference, the actual key value gets overwritten each time until the last one (which is also why only the last object is updated). Unfortunately this was tough to catch because printing out the queries and the operations both looked fine, but the async kicked in at execution. Still, not really an issue with pymongo after all.
Thanks to everyone who responded!
I have the following data (Cars):
[
{
"make" : “Ferrari”,
"model" : “F40",
"services" : [
{
"type" : "FULL",
“date_time" : ISODate("2019-10-31T09:00:00.000Z"),
},
{
"type" : "FULL",
"scheduled_date_time" : ISODate("2019-11-04T09:00:00.000Z"),
}
],
},
{
"make" : "BMW",
"model" : “M3",
"services" : [
{
"type" : "FULL",
"scheduled_date_time" : ISODate("2019-10-31T09:00:00.000Z"),
},
{
"type" : "FULL",
“scheduled_date_time" : ISODate("2019-11-04T09:00:00.000Z"),
}
],
}
]
Using Spring data MongoDb I would like a query to retrieve all the Cars where the scheduled_date_time of the last item in the services array is in-between a certain date range.
A query which I used previously when using the first item in the services array is like:
mongoTemplate.find(Query.query(
where("services.0.scheduled_date_time").gte(fromDate)
.andOperator(
where("services.0.scheduled_date_time").lt(toDate))),
Car.class);
Note the 0 index since it's first one as opposed to the last one (for my current requirement).
I thought using an aggregate along with a projection and .arrayElementAt(-1) would do the trick but I haven't quite got it to work. My current effort is:
Aggregation agg = newAggregation(
project().and("services").arrayElementAt(-1).as("currentService"),
match(where("currentService.scheduled_date_time").gte(fromDate)
.andOperator(where("currentService.scheduled_date_time").lt(toDate)))
);
AggregationResults<Car> results = mongoTemplate.aggregate(agg, Car.class, Car.class);
return results.getMappedResults();
Any help suggestions appreciated.
Thanks,
This mongo aggregation retrieves all the Cars where the scheduled_date_time of the last item in the services array is in-between a specific date range.
[{
$addFields: {
last: {
$arrayElemAt: [
'$services',
-1
]
}
}
}, {
$match: {
'last.scheduled_date_time': {
$gte: ISODate('2019-10-26T04:06:27.307Z'),
$lt: ISODate('2019-12-15T04:06:27.319Z')
}
}
}]
I was trying to write it in spring-data-mongodb without luck.
They do not support $addFields yet, see here.
Since version 2.2.0 RELEASE spring-data-mongodb includes the Aggregation Repository Methods
The above query should be
interface CarRepository extends MongoRepository<Car, String> {
#Aggregation(pipeline = {
"{ $addFields : { last:{ $arrayElemAt: [$services,-1] }} }",
"{ $match: { 'last.scheduled_date_time' : { $gte : '$?0', $lt: '$?1' } } }"
})
List<Car> getCarsWithLastServiceDateBetween(LocalDateTime start, LocalDateTime end);
}
This method logs this query
[{ "$addFields" : { "last" : { "$arrayElemAt" : ["$services", -1]}}}, { "$match" : { "last.scheduled_date_time" : { "$gte" : "$2019-11-03T03:00:00Z", "$lt" : "$2019-11-05T03:00:00Z"}}}]
The date parameters are not parsing correctly. I didn't spend much time making it work.
If you want the Car Ids this could work.
public List<String> getCarsIdWithServicesDateBetween(LocalDateTime start, LocalDateTime end) {
return template.aggregate(newAggregation(
unwind("services"),
group("id").last("services.date").as("date"),
match(where("date").gte(start).lt(end))
), Car.class, Car.class)
.getMappedResults().stream()
.map(Car::getId)
.collect(Collectors.toList());
}
Query Log
[{ "$unwind" : "$services"}, { "$group" : { "_id" : "$_id", "date" : { "$last" : "$services.scheduled_date_time"}}}, { "$match" : { "date" : { "$gte" : { "$date" : 1572750000000}, "$lt" : { "$date" : 1572922800000}}}}]
I am new in MongoDB and I would like to use the aggregation function where I want to check type == topic and get the following output
Expected output
[
{
conceptName : 59d98cfd1c5edc24e4024d00
totalCount : 2
},
{
conceptName : 59d98cfd1c5edc24e4024d03
totalCount : 1
}
]
Sample input db.GroupContents
{
"_id" : "5a0948bb1c5edc7a5000521a",
"type" : "topic",
"groupID" : "5a0948bb1c5edc7a5000521a",
"pedagogyID" : "59d98cfa1c5edc24e40249a3",
}
Sample input db.PedagogyNodes
{
"_id" : "59d98cfa1c5edc24e40249a3",
"latestVersion" : "59d98cfa1c5edc24e402497f_1",
"createdAt" : "2017-10-08 04:27:06",
"updatedAt" : "2017-10-08 04:27:06"
}
Sample input db.PedagogyVersions
{
"_id" : "59d98cfa1c5edc24e402497f_1",
"type" : "topic",
"contentNodes" : {
"LearningNodes" : [
"59d98cfd1c5edc24e4024d00",
"59d98cfd1c5edc24e4024d03",
"59d98cfd1c5edc24e4024d00",
]
},
"createdAt" : "2017-10-08 04:27:06",
"updatedAt" : "2017-10-08 04:27:06"
}
What I have tried so far
var groupID = "5a0948bb1c5edc7a5000521a"; // Step 1
var records;
var pnDoc;
var pvDoc;
db.GroupContents.find({groupID : groupID}).forEach(function (doc){ // Step 2
var pedagogyID = doc.pedagogyID;
var records = db.getSiblingDB('PedagogyService');
records.PedagogyNodes.find({_id : pedagogyID}).forEach(function (pnDoc) { // Step 3
var latestVersion = pnDoc.latestVersion;
// addded aggregate function here
records.PedagogyVersions.aggregate([
{
$match:{_id:latestVersion} // Step 4
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
})
});
I am unable to write db query based on my expected answer, please help.
Understand my requirement
Step : 1 => I am passing `groupID = 5a0948bb1c5edc7a5000521a`
Step : 2 => we have to check from GroupContents where groupID = groupID then we have to take `pedagogyID`
Step : 3 => we have to check from PedagogyNodes where _id = pedagogyID then we have to take `latestVersion`
Step : 4 => we have to check from PedagogyVersions where _id = latestVersion then we have to take `contentNodes->LearningNodes`
Step : 5 => Finally we have to do the aggregation then we have display the result
Try to unwind the LearningNodes array and then count them by grouping them together
db.PedagogyNodes.aggregate([
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
In case you need to do any matches you can use the $match stage
db.PedagogyNodes.aggregate([
{
$match:{type:"topic"}
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
])
Answering the edited question =>
You were not able to view the output on the console since mongoshell does not print script output on the screen. To do this, do the following:
var result = records.PedagogyVersions.aggregate([......]);
result.forEach(function(resultDoc){
print(tojson(resultDoc))
})
To see the result of your aggregation you have to pass the callback to be executed as parameter.
records.PedagogyVersions.aggregate([
{
$match:{_id:latestVersion} // Step 4
},
{
$unwind:"$contentNodes.LearningNodes"
},
{
$group:
{
_id:"$contentNodes.LearningNodes",
count:{$sum:1}
}
}
], function(err, results) {
console.log(results);
});
We have a basic enquiry management tool that we're using to track some website enquiries in our administration suite, and we're using the ObjectId of each document in our enquiries collection to sort the enquiries by the date they were added.
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",
"customer" : {
"name" : "Test Enquiry",
"email" : "test#test.com",
"telephone" : "07890123456",
"mobile" : "07890123456",
"quote" : false,
"valuation" : false
},
"site" : [],
"test" : true,
"updates" : [
{
"_id" : ObjectId("53a007db144ff47be1000001"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "New Web Enquiry",
"substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),
"notes" : "New enquiry received from website.",
},
{
"_id" : ObjectId("53a80c977d299cfe91bacf81"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "Attempted Contact",
"substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),
"notes" : "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",
},
{
"_id" : ObjectId("53a80e539b966b8da5c40c36"),
"status" : "Approved",
"status_id" : ObjectId("52e77a49d85e95f00ebf6c72"),
"status_index" : 200,
"substatus" : "Enquiry Confirmed",
"substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),
"notes" : "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",
}
]
}
Within each enquiry is an updates array of objects which also have an ObjectId as their main identity field. We're using an $unwind and $group aggregation to pull the first and latest updates, as well as the count of updates, making sure we only take enquiries where there have been more than one update (as one is automatically inserted when the enquiry is made):
db.enquiries.aggregate([
{
$match: {
"test": true
}
},
{
$unwind: "$updates"
},
{
$group: {
"_id": "$_id",
"latest_update_id": {
$last: "$updates._id"
},
"first_update_id": {
$first: "$updates._id"
},
"update_count": {
$sum: 1
}
}
},
{
$match: {
"update_count": {
$gt: 1
}
}
}
])
This results in the following output:
{
"result" : [
{
"_id" : ObjectId("53a295ad122ea80200000005"),
"latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),
"first_update_id" : ObjectId("53a295ad122ea80200000003"),
"update_count" : 2
},
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3
}
],
"ok" : 1
}
This is then passed through to our code (node.js, in this case) where we perform a few operations on it and then present some information on our dashboard.
Ideally, I'd like to add another $group pipeline aggregation to the query which would subtract the timestamp of first_update_id from the timestamp of latest_update_id to give us a timespan, which we could then use $avg on.
Can anyone tell me if this is possible? (Thank you!)
As Neil already pointed out, you can't get to the timestamp from the ObjectId in the aggregation framework.
You said that speed is not important, so using MapReduce you can get what you want:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
var val = {
latest_update_id : last._id,
first_update_id : first._id,
update_count : this.updates.length,
diff: diff
}
emit(this._id, val);
}
};
var reduce = function() { };
db.runCommand(
{
mapReduce: "enquiries",
map: map,
reduce: reduce,
out: "mrresults",
query: { test : true}
}
);
This are the results:
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"value" : {
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3,
"diff" : 525944000
}
}
Edit:
If you want to get the average diff for all documents you can do it like this:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
emit("1", {diff : diff});
}
};
var reduce = function(key, values) {
var reducedVal = { count: 0, sum: 0 };
for (var idx = 0; idx < values.length; idx++) {
reducedVal.count += 1;
reducedVal.sum += values[idx].diff;
}
return reducedVal;
};
var finalize = function (key, reducedVal) {
reducedVal.avg = reducedVal.sum/reducedVal.count;
return reducedVal;
};
db.runCommand(
{
mapReduce: "y",
map: map,
reduce: reduce,
finalize : finalize,
out: "mrtest",
query: { test : true}
}
);
And the example output:
> db.mrtest.find().pretty()
{
"_id" : "1",
"value" : {
"count" : 2,
"sum" : 1051888000,
"avg" : 525944000
}
}