Add existing fields to nested schema - mongodb

I have documents in my db with schema:
var MySchema = new Schema({
Street: { type: String },
Age: { type: Number, default: null },
Date: { type: Date },
Stuff: [
{
_id:false,
ThisDate: { type: Date },
ThisStreet: { type: String }
}]
});
Right now it is (Stuff is empty):
db.person.findOne()
{
Street: 'TheStreet',
Age: 23,
Date: ISODate("2016-02-19T00:00:00.000Z"),
Stuff: []
}
I then want to update all documents. What I want to do is to move Street and Date fields into Stuff array and delete Street and Date fields from schema.
Like this:
db.person.findOne()
{
Age: 23,
Stuff : [
{
ThisDate : ISODate("2016-02-19T00:00:00.000Z"),
ThisStreet : "TheStreet"
}
]
}
How could I achieve this?
Best Regards

Since this is a "one off" operation I would do it in the shell rather than use any other framework.
For MongoDB 3.2.x releases and greater use, bulkWrite():
var ops = [];
db.person.find({
"Street": { "$exists": true },
"Date": { "$exists": true }
}).forEach(function(doc) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$unset": {
"Street": "",
"Date": ""
},
"$set": {
"Stuff": [{
"ThisDate": doc.Date,
"ThisStreet": doc.Street
}]
}
}
}
});
if ( ops.length == 1000 ) {
db.person.bulkWrite(ops);
ops = [];
}
})
if ( ops.length > 0 )
db.person.bulkWrite(ops);
Or for MongoDB 2.6.x and 3.0.x releases use this version of Bulk operations:
var bulk = db.person.initializeUnorderedBulkOp(),
count = 0;
db.person.find({
"Street": { "$exists": true },
"Date": { "$exists": true }
}).forEach(function(doc) {
bulk.find({ "_id": doc._id }).updateOne({
"$unset": {
"Street": "",
"Date": ""
},
"$set": {
"Stuff": [{
"ThisDate": doc.Date,
"ThisStreet": doc.Street
}]
}
});
if ( count % 1000 == 0 ) {
bulk.execute();
bulk = db.person.initializeUnorderedBulkOp();
}
});
if ( count % 1000 != 0 )
bulk.execute();
Bottom line is that you need to iterate the documents in the collection and write them back with the re-arranged content "one by one". At least the Bulk operations API in use in both cases will reduce the load of writing and responding with the server to only one in every 1000 documents in the collection to process.
Also, rather than rewriting the whole document you are using $unset to remove the desired fields and $set to write "just" the data you want
Working example
db.person.insert(
{
"Street": 'TheStreet',
"Age": 23,
"Date": ISODate("2016-02-19T00:00:00.000Z"),
"Stuff": []
}
)
Then after running either pdate above the result is:
{
"_id" : ObjectId("56e607c1ca8e7e3519b4ce93"),
"Age" : 23,
"Stuff" : [
{
"ThisDate" : ISODate("2016-02-19T00:00:00Z"),
"ThisStreet" : "TheStreet"
}
]
}

I'd suggest you to transform document using aggregation framework and update as described in code snippet below
db.person.aggregate([
{$project:{Age:1, Stuff:[{Date:"$Date", Street:"$Street"}]}}
]).forEach(function(o){
var id = o._id;
delete o._id;
db.person.update({_id:id, Street:{$exists: true}},o);
});
After successful execution, you document or documents should look like
{
"_id" : ObjectId("56e2cd45792861e14df1f0a9"),
"Age" : 23.0,
"Stuff" : [
{
"Date" : ISODate("2016-02-19T00:00:00.000+0000"),
"Street" : "TheStreet"
}
]
}

Related

How to save deletion in a deeply nested MongoDB document

I am new to MongoDB and I am using MongoDB shell to perform the operations.
I am working to remove the array named Process from all the Items, but it seems that I do not grasp the remove concept correctly.
The documents we use are deeply nested - we do not know how many items there are, or how deep the level of nesting.
What I tried so far is to use recursion to iterate through the items:
function removeAllProcessFields(docItems)
{
if(Array.isArray(docItems))
{
docItems.forEach(function(item)
{
print("idItem: "+item._id);
if(item.Process == null)
{
print("Process null");
}
else
{
$unset: { Process: ""}
}
removeAllProcessFields(item.Items);
})
}
}
var docs = db.getCollection('MyCollection').find({})
docs.forEach(function(doc)
{
print("idDoc: "+doc._id);
removeAllProcessFields(doc.Items);
})
But I have difficulties on using unset properly to save the operation.
An example document would be:
{
"_id": "622226d319517e83e8ed6151",
"Name": "test1",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614e",
"Name": "test-item",
"Description": "",
"Process": [{
"Name": "Step1"
}, {
"Name": "Step2"
}],
"Items": [{
"_id": "622226d319517e83e8ed614f",
"Name": "test-subItem1",
"Description": "",
"Process": [{
"Name": "StepSub1"
}, {
"Name": "StepSub2"
}, {
"Name": "StepSub3"
}],
"Items": []
},
{
"_id": "622226d319517e83e8ed6150",
"Name": "test-subItem2",
"Description": "",
"Process": [{
"Name": "StepSub4"
}, {
"Name": "StepSub5"
}, {
"Name": "StepSub6"
}],
"Items": []
}
]
}]
}
What I hope to achieve would be:
{
"_id": "622226d319517e83e8ed6151",
"Name": "test1",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614e",
"Name": "test-item",
"Description": "",
"Items": [{
"_id": "622226d319517e83e8ed614f",
"Name": "test-subItem1",
"Description": "",
"Items": []
},
{
"_id": "622226d319517e83e8ed6150",
"Name": "test-subItem2",
"Description": "",
"Items": []
}
]
}]
}
Something like this maybe using the $[] positional operator:
db.collection.update({},
{
$unset: {
"Items.$[].Items.$[].Process": 1,
"Items.$[].Process": 1
}
})
You just need to construct it in the recursion ...
playground
JavaScript recursive function example:
mongos> db.rec.find()
{ "_id" : ObjectId("622a6c46ae295edb276df8e2"), "Items" : [ { "a" : 1 }, { "Items" : [ { "Items" : [ { "Items" : [ ], "Process" : [ 1, 2, 3 ] } ], "Process" : [ 4, 5, 6 ] } ], "Process" : [ ] } ] }
mongos> db.rec.find().forEach(function(obj){ var id=obj._id,ar=[],z=""; function x(obj){ if(typeof obj.Items != "undefined" ){ obj.Items.forEach(function(k){ if( typeof k.Process !="undefined" ){ z=z+".Items.$[]";ar.push(z.substring(1)+".Process") }; if(typeof k.Items != "undefined"){x(k)}else{} }) }else{} };x(obj);ar.forEach(function(del){print( "db.collection.update({_id:ObjectId('"+id+"')},{$unset:{'"+del+"':1}})" );}) })
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Process':1}})
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Process':1}})
db.collection.update({_id:ObjectId('622a6c46ae295edb276df8e2')},{$unset:{'Items.$[].Items.$[].Items.$[].Process':1}})
mongos>
Explained:
Loop over all documents in collection with forEach
Define recursive function x that will loop over any number of nested Items and identify if there is Process field and push to array ar
Finally loop over array ar and construct the update $unset query , in the example only printed for safety , but you can improve generating single query per document and executing unset query ...
Assuming you are on v>=4.4 you can use the "merge onto self" feature of $merge plus defining a recursive function to sweep through the collection and surgically remove one or a list of fields at any level of the hierarchy. The same sort of needs arise when processing json-schema data which is also arbitrarily hierarchical.
The solution below has extra logic to "mark" documents that had any modifications so the others can be removed from the update set passed to $merge. It also can be further refined to reduce some variables; it was edited down from a more general solution that had to examine keys and values.
db.foo.aggregate([
{$replaceRoot: {newRoot: {$function: {
body: function(obj, target) {
var didSomething = false;
var process = function(holder, spot, value) {
// test FIRST since [] instanceof Object is true!
if(Array.isArray(value)) {
for(var jj = value.length - 1; jj >= 0; jj--) {
process(value, jj, value[jj]);
}
} else if(value instanceof Object) {
walkObj(value);
}
};
var walkObj = function(obj) {
Object.keys(obj).forEach(function(k) {
if(target.indexOf(k) > -1) {
delete obj[k];
didSomething = true;
} else {
process(obj, k, obj[k]);
}
});
}
// ENTRY POINT:
if(!Array.isArray(target)) {
target = [ target ]; // if not array, make it an array
}
walkObj(obj);
if(!didSomething) {
obj['__didNothing'] = true;
}
return obj;
},
// Invoke!
// You can delete multiple fields with an array, e.g.:
// ..., ['Process','Description']
args: [ "$$ROOT", 'Process' ],
lang: "js"
}}
}}
// Only let thru docs WITHOUT the marker:
,{$match: {'__didNothing':{$exists:false}} }
,{$merge: {
into: "foo",
on: [ "_id" ],
whenMatched: "merge",
whenNotMatched: "fail"
}}
]);

How can I unset all document properties except for one or two in mongodb?

I have a collection of documents about entities that have status property that could be 1 or 0. Every document contains a lot of data and occupies space.
I want to get rid of most of the data on the documents with status equal 0.
So, I want every document in the collection that looks like
{
_id: 234,
myCode: 101,
name: "sfsdf",
status: 0,
and: 23243423.1,
a: "dsf",
lot: 3234,
more: "efsfs",
properties: "sdfsd"
}
...to be a lot smaller
{
_id: 234,
mycode: 101,
status: 0
}
So, basically I can do
db.getCollection('docs').update(
{'statusCode': 0},
{
$unset: {
and: "",
a: "",
lot: "",
more: "",
properties: ""
}
},
{multi:true}
)
But there are about 40 properties which would be a huge list, and also I'm not sure that all the objects follow the same schema.
Is there a way to unset all except two properties?
The best thing to do here is to actually throw all the possible properties to $unset and let it do it's job. You cannot "wildcard" such arguments so there really is not a better way without writing to another collection.
If you don't want to type them all out or even know all of them, then simply perform a process to "collect" all the other top level properties.
You can do this for example with .mapReduce():
var fields = db.getCollection('docs').mapReduce(
function() {
Object.keys(this)
.filter(k => k !== '_id' && k !== 'myCode')
.forEach( k => emit(k,1) )
},
function() {},
{
"out": { "inline": 1 }
}
).results.map( o => o._id )
.reduce((acc,curr) => Object.assign(acc,{ [curr]: "" }),{})
Gives you an object with the full fields list to provide to $unset as:
{
"a" : "",
"and" : "",
"lot" : "",
"more" : "",
"name" : "",
"properties" : "",
"status" : ""
}
And that is taken from all possible top level fields in the whole collection.
You can do the same thing with .aggregate() in MongoDB 3.4 using $objectToArray:
var fields = db.getCollection('docs').aggregate([
{ "$project": {
"fields": {
"$filter": {
"input": { "$objectToArray": "$$ROOT" },
"as": "d",
"cond": {
"$and": [
{ "$ne": [ "$$d.k", "_id" ] },
{ "$ne": [ "$$d.k", "myCode" ] }
]
}
}
}
}},
{ "$unwind": "$fields" },
{ "$group": {
"_id": "$fields.k"
}}
]).map( o => o._id )
.reduce((acc,curr) => Object.assign(acc,{ [curr]: "" }),{});
Whatever way you obtain the list of names, then simply send them to $unset:
db.getCollection('docs').update(
{ "statusCode": 0 },
{ "$unset": fields },
{ "multi": true }
)
Bottom like is that $unset does not care if the properties are present in the document or not, but will simply remove them where they exist.
The alternate case is to simply write everything out to a new collection if that also suits your needs. This is a simple use of $out as an aggregation pipeline stage:
db.getCollection('docs').aggregate([
{ "$match": { "statusCode": 0 } },
{ "$project": { "myCode": 1 } },
{ "$out": "newdocs" }
])

How to project selected fields from a document in mongoose based on conditions

I have my document structure in this way. Where i am storing all the events based occured with timestamps. My document look like this
[
{
"_id": {
"$oid": "589341cff92be305c034cb5a"
},
"__v": 0,
"name": "TV",
"switch_event": [
{
"timestamp": 1486186277826,
"event_type": "on"
},
{
"timestamp": 1486272677826,
"event_type": "off"
},
{
"timestamp": 1486099877826,
"event_type": "off"
},
{
"timestamp": 1486186277826,
"event_type": "on"
},
{
"timestamp": 1486272677826,
"event_type": "off"
},
{
"timestamp": 1486099877826,
"event_type": "off"
}
]
}
]
Now while querying for this document i am interested in only the events which occured today. So after querying for this i am writing projection query like this (for testing i kept timestamp > 0, which should give all events) -
SwitchAppliance.find({_id:"589341cff92be305c034cb5a"},{
name:1,
switch_event:{$elemMatch: {
timestamp: {
$gt:0
}
}}
},(err,results)=>{
if(err) {console.log(err);return next({message: "Internal Server Error"});}
return res.json(results);
} );
But when i am getting result i am only getting one event object in the switch_event array- like this -
[
{
"_id": "589341cff92be305c034cb5a",
"switch_event": [
{
"_id": "589567251c653a0890b8b1ef",
"event_type": "on",
"timestamp": 1486186277826
}
],
"name": "TV"
}
]
You are querying Date.now() (timestamp or number) against new Date() (date object). A quick console test shows the following:
> typeof Date.now()
=> "number"
> typeof new Date()
=> "object"
So first off, I would change
var today = new Date();
to
var today = Date.now();
Then you are missing that "timestamp" does not hold a date value, but another Object with one Item with the key $date and a date as a value. Therefore your query should probably look similar to something like this:
var today = Date.now();
Item1.find({}).populate({
path:"item1",
select: {
name:1,
event:{
$elemMatch: {
timestamp: {
date: {
$gte:today
}
}
}
}
}
})
Finally, found the key here.
Unwind empty array in mongodb
here #ivan.srb 's answers came handy.
Finally ended up doing something like this -
SwitchAppliance.aggregate([
{$match:{_id:{$in : switch_appliances_array}}},
{$project:{
switch_no:1,switch_name:1,switch_type:1,appliance_type:1,sboard_id:1,current_status:1,
switch_event : { $cond : [ { $eq : [ "$switch_event", [] ] }, [ { timestamp : 0 , event_type:"off"} ], '$switch_event' ] }
}},
{$unwind:"$switch_event"},
{$match:{$or: [{"switch_event.timestamp":{$gt:now}},{ "switch_event.timestamp":0} ]}},
{$group:{
_id:"$_id",
switch_no:{$first:"$switch_no"},
switch_type:{$first:"$switch_type"},
switch_name:{$first:"$switch_name"},
appliance_type:{$first:"$appliance_type"},
current_status:{$first:"$current_status"},
switch_events:{$push:"$switch_event"}
}},

MongoDB Update array element (document with a key) if exists, else push

I have such a schema:
doc:
{
//Some fields
visits:
[
{
userID: Int32
time: Int64
}
]
}
I want to first check if a specific userID exists, if not, push a document with that userID and system time, else just update time value. I know neither $push nor $addToSet are not able to do that. Also using $ with upsert:true doesn't work, because of official documentation advice which says DB will use $ as field name instead of operator when trying to upsert.
Please guide me about this. Thanks
You can use $addToSet to add an item to the array and $set to update an existing item in this array.
The following will add a new item to the array if the userID is not found in the array :
db.doc.update({
visits: {
"$not": {
"$elemMatch": {
"userID": 4
}
}
}
}, {
$addToSet: {
visits: {
"userID": 4,
"time": 1482607614
}
}
}, { multi: true });
The following will update the subdocument array item if it matches the userId :
db.doc.update({ "visits.userID": 2 }, {
$set: {
"visits.$.time": 1482607614
}
}, { multi: true });
const p = await Transaction.findOneAndUpdate(
{
_id: data.id,
'products.id': { $nin: [product.id] },
},
{
$inc: {
actualCost: product.mrp,
},
$push: {
products: { id: product.id },
},
},
{ new: true }
);
or
db.collection.aggregate([
{
"$match": {
"_id": 1
}
},
{
"$match": {
"sizes.id": {
"$nin": [
7
]
}
}
},
{
"$set": {
"price": 20
}
}
])
https://mongoplayground.net/p/BguFa6E9Tra
I know it's very late. But it may help others. Starting from mongo4.4, we can use $function to use a custom function to implement our own logic. Also, we can use the bulk operation to achieve this output.
Assuming the existing data is as below
{
"_id" : ObjectId("62de4e31daa9b8acd56656ba"),
"entrance" : "Entrance1",
"visits" : [
{
"userId" : 1,
"time" : 1658736074
},
{
"userId" : 2,
"time" : 1658736671
}
]
}
Solution 1: using custom function
db.visitors.updateMany(
{_id: ObjectId('62de4e31daa9b8acd56656ba')},
[
{
$set: {
visits: {
$function: {
lang: "js",
args: ["$visits"],
body: function(visits) {
let v = []
let input = {userId: 3, time: Math.floor(Date.now() / 1000)};
if(Array.isArray(visits)) {
v = visits.filter(x => x.userId != input.userId)
}
v.push(input)
return v;
}
}
}
}
}
]
)
In NodeJS, the function body should be enclosed with ` character
...
lang: 'js',
args: ["$visits"],
body: `function(visits) {
let v = []
let input = {userId: 3, time: Math.floor(Date.now() / 1000)};
if(Array.isArray(visits)) {
v = visits.filter(x => x.userId != input.userId)
}
v.push(input)
return v;
}`
...
Solution 2: Using bulk operation:
Please note that the time here will be in the ISODate
var bulkOp = db.visitors.initializeOrderedBulkOp()
bulkOp.find({ _id: ObjectId('62de4e31daa9b8acd56656ba') }).updateOne({$pull: { visits: { userId: 2 }} });
bulkOp.find({ _id: ObjectId('62de4e31daa9b8acd56656ba') }).updateOne({$push: {visits: {userId: 2, time: new Date()}}})
bulkOp.execute()
Reference link

How to limit the no of columns in output while doing aggregate operation in Mongo DB

My function looks like below.
function (x)
{
var SO2Min = db.AirPollution.aggregate(
[
{
$match : {"SO2":{$ne:'NA'}, "State":{$eq: x} }
},
{
$group:
{
_id: x,
SO2MinQuantity: { $min: "$SO2" }
}
},
{
$project:
{SO2MinQuantity: '$SO2MinQuantity'
}
}
]
)
db.AirPollution.update
(
{
"State": "West Bengal"},
{
$set: {
"MaxSO2": SO2Max
}
},
{
"multi": true
}
);
}
Here, AirPolltuion is my Collection. If I run this function, the collection gets updated with new column MaxSO2 as below.
{
"_id" : ObjectId("5860a2237796484df5656e0c"),
"Stn Code" : 11,
"Sampling Date" : "02/01/15",
"State" : "West Bengal",
"City/Town/Village/Area" : "Howrah",
"Location of Monitoring Station" : "Bator, Howrah",
"Agency" : "West Bengal State Pollution Control Board",
"Type of Location" : "Residential, Rural and other Areas",
"SO2" : 10,
"NO2" : 40,
"RSPM/PM10" : 138,
"PM 2.5" : 83,
"MaxSO2" : {
"_batch" : [
{
"_id" : "West Bengal",
"SO2MaxQuantity" : 153
}
],
"_cursor" : {}
}
}
Where we can see, that MaxSO2 has been added as a sub document. But I want that column to be added inside same document as a field, not as a part of sub document. Precisely, I dont want batch and cursor fields to come up. Please help.
Since the aggregate function returns a cursor, you can use the toArray() method which returns an array that contains all the documents from a cursor and then access the aggregated field. Because you are returning a single value from the aggregate, there's no need to iterate the results array, just access the first and only single document in the result to get the value.
Once you get this value you can then update your collection using updateMany() method. So you can refactor your code to:
function updateMinAndMax(x) {
var results = db.AirPollution.aggregate([
{
"$match" : {
"SO2": { "$ne": 'NA' },
"State": { "$eq": x }
}
},
{
"$group": {
"_id": x,
"SO2MinQuantity": { "$min": "$SO2" },
"SO2MaxQuantity": { "$max": "$SO2" }
}
},
]).toArray();
var SO2Min = results[0]["SO2MinQuantity"];
var SO2Max = results[0]["SO2MaxQuantity"];
db.AirPollution.updateMany(
{ "State": x },
{ "$set": { "SO2MinQuantity": SO2Min, "SO2MaxQuantity": SO2Max } },
);
}
updateMinAndMax("West Bengal");