MongoDB Conditional validation on arrays and embedded documents - mongodb

I have a number of documents in my database where I am applying document validation. All of these documents may have embedded documents. I can apply simple validation along the lines of SQL non NULL checks (these are essentially enforcing the primary key constraints) but what I would like to do is apply some sort of conditional validation to the optional arrays and embedded documents. By example, lets say I have a document that looks like this:
{
"date": <<insertion date>>,
"name" : <<the portfolio name>>,
"assets" : << amount of money we have to trade with>>
}
Clearly I can put validation on this document to ensure that date name and assets all exist at insertion time. Lets say, however, that I'm managing a stock portfolio and the document can have future updates to show an array of stocks like this:
{
"date" : <<insertion date>>,
"name" : <<the portfolio name>>,
"assets" : << amount of money we have to trade with>>
"portfolio" : [
{ "stockName" : "IBM",
"pricePaid" : 155.39,
"sharesHeld" : 100
},
{ "stockName" : "Microsoft",
"pricePaid" : 57.22,
"sharesHeld" : 250
}
]
}
Is it possible to to apply a conditional validation to this array of sub documents? It's valid for the portfolio to not be there but if it is each document in the array must contain the three fields "stockName", "pricePaid" and "sharesHeld".

MongoShell
db.createCollection("collectionname",
{
validator: {
$or: [
{
"portfolio": {
$exists: false
}
},
{
$and: [
{
"portfolio": {
$exists: true
}
},
{
"portfolio.stockName": {
$type: "string",
$exists: true
}
},
{
"portfolio.pricePaid": {
$type: "double",
$exists: true
}
},
{
"portfolio.sharesHeld": {
$type: "double",
$exists: true
}
}
]
}
]
}
})
With this above validation in place you can insert documents with or without portfolio.
After executing the validator in shell, then you can insert data of following
db.collectionname.insert({
"_id" : ObjectId("58061aac8812662c9ae1b479"),
"date" : ISODate("2016-10-18T12:50:52.372Z"),
"name" : "B",
"assets" : 200
})
db.collectionname.insert({
"_id" : ObjectId("58061ab48812662c9ae1b47a"),
"date" : ISODate("2016-10-18T12:51:00.747Z"),
"name" : "A",
"assets" : 100,
"portfolio" : [
{
"stockName" : "Microsoft",
"pricePaid" : 57.22,
"sharesHeld" : 250
}
]
})
If we try to insert a document like this
db.collectionname.insert({
"date" : new Date(),
"name" : "A",
"assets" : 100,
"portfolio" : [
{ "stockName" : "IBM",
"sharesHeld" : 100
}
]
})
then we will get the below error message
WriteResult({
"nInserted" : 0,
"writeError" : {
"code" : 121,
"errmsg" : "Document failed validation"
}
})
Using Mongoose
Yes it can be done, Based on your scenario you may need to initialize the parent and the child schema.
Shown below would be a sample of child(portfolio) schema in mongoose.
var mongoose = require('mongoose');
var Schema = mongoose.Schema;
var portfolioSchema = new Schema({
"stockName" : { type : String, required : true },
"pricePaid" : { type : Number, required : true },
"sharesHeld" : { type : Number, required : true },
}
References:
http://mongoosejs.com/docs/guide.html
http://mongoosejs.com/docs/subdocs.html
Can I require an attribute to be set in a mongodb collection? (not null)
Hope it Helps!

Related

MongoDB - Update an Key

I have been trying to update an Object for this collection. Below is the collection. Looking for Server 3.6 version.
Here The ask is Need to update the class name from "HISTORY" to " HISTORY_NEW". Need to do, for some students in the class. Need a query that will select all student records in student collection with "HISTORY" class in it and update them to "HISTORY_NEW ". I have around 30,000 records and not getting a bulk update method.
{
"_id" : ObjectId("611f90aa43f77a728879c395"),
"studentId" : "stu1",
"classes" : {
"History" : {
"TeacherName" : "T1",
"Marks" : [
{
"Internal": 15
}
]
},
"Geography" : {
"TeacherName" : "T2",
"Marks" : [
{
"Internal" : 20
}
]
}
},
"updateDate" : ISODate("2021-10-12T11:40:47.156Z")
}
This is the result I am expecting
{
"_id" : ObjectId("611f90aa43f77a728879c395"),
"studentId" : "stu1",
"classes" : {
"HISTORY_NEW" : {
"TeacherName" : "T1",
"Marks" : [
{
"Internal": 15
}
]
},
"Geography" : {
"TeacherName" : "T2",
"Marks" : [
{
"Internal" : 20
}
]
}
},
"updateDate" : ISODate("2021-10-12T11:40:47.156Z")
}
.Or is that even possible with the kind of collection above or going via code route?
So far this is what I have, without any success.
Get all students' Ids and then update the Class name. But that is also not working and don't think it is smart to update DB 30,000 times.
var studentIds =[];
db.studentSubject.find({"classes.History":{$exists:true}})
.forEach(function(u) { studentIds.push(u.studentId) })
studentIds.forEach(function(studentId) {
var result;
try {
result =db.studentSubject.updateOne(
{studentId:studentId},
{ $set : {"classes.History": "HISTORY_NEW",}},
{ upsert: false});
} catch (e) {
print(e);
}
});
From your scenario, you need $rename operator.
As discussed in the comment, you don't need to fetch each document to get studentId and then pass it to update each document. Just bulk update by checking the document has classes.History field.
db.collection.update({
"classes.History": {
$exists: true
}
},
{
$rename: {
"classes.History": "classes.HISTORY_NEW"
}
},
{
upsert: false,
multi: true
})
Sample Mongo Playground

MongoDB Lookup MainField to FieldA or FieldB

I need to join two MongoDB Colletions with lookup, the MainField to join from de first collection has to join with the other collection through Field A or Field B.
MainField is an array, with this structure [Doc1.FieldA, Doc2.FieldA, Doc3.FieldB,...].
FieldA is Unique-Index.
FieldB is Non-Unique-Index, it is for group FieldB with a unique value.
The problem is that I need to keep the order of the MainField Array.
I like to do something like this:
db.getCollection("collection1").aggregate([
$lookup: {
from: "collection2",
localField: "mainField",
foreignField: $or:["fieldA","FieldB"]
as: "mainFieldInfo"
}]
Is it possible to do this lookup or I need a different approach?
Collections examples, the documents are simplified there are more fields
in each document.
Collection Machines (1 example) :
{
"_id" : ObjectId("5c793a188021710636865c33"),
"MachineName" : "CER3A",
"NextJobs" : [ //--> MainField
"ST105862", // match with FIELD B - Flags.STS
"OFT083520", // match with FIELD A - Lote
"OFT083365",
"ST105946"
]
}
Collection Works (2 example, 1 to match with FieldA, 1 to match Field B):
Field A example:
FieldB*(Flags.STS)* is empty
{
"_id" : ObjectId("5c1b89d0b6e97d001816595e"),
"Lote" : "OFT083520", //--> FIELD A
"Flags" : {
"ShipsFinished" : true,
"PlanFinished" : true,
"Finished" : true,
"IdDefecto" : false,
"EstadoOF" : 4,
"GCT" : "GCT018929",
"PedidoVenta" : "",
"STS" : "", //--> FIELD B
}
}
Field B Example (2 docs):
FieldA*(Lote)* is diferent in each document, FieldB*(Flags.STS)* is equal
{
"_id" : ObjectId("5dcd78e2a2061070185400e2"),
"Lote" : "OFT083671", //--> FIELD A
"Flags" : {
"B2" : 1,
"EstadoOF" : 4,
"Finished" : false,
"GCT" : "GCT024270",
"LaSI" : 0,
"PedidoVenta" : "P056048",
"SPO" : "PO23579",
"STS" : "ST105862", //--> FIELD B
"Inks" : "true",
}
}
{
"_id" : ObjectId("5dcd78e2a2061070185401f0"),
"Lote" : "OFT083672", //--> FIELD A
"Flags" : {
"B2" : 1,
"EstadoOF" : 4,
"Finished" : false,
"STS" : "ST105862", //--> FIELD B
"ShipsFinished" : false,
"TipoOF" : 1,
"EstatIQC" : 1,
}
}
You have to use the other form of $lookup stage, which allow to perform multiple conditions for the lookup stage.
Here's the query you have to run :
db.machines.aggregate([
{
$lookup: {
from: "works",
let: {
"nj": "$NextJobs"
},
pipeline: [
{
$match: {
$expr: {
$or: [
{
$in: [
"$Lote",
"$$nj"
]
},
{
$in: [
"$Flags.STS",
"$$nj"
]
}
]
}
}
}
],
as: "linkedWorks"
}
}
])
You can test it here

Individual search result in multiple values in arrays

I have following model:
{
"_id" : ObjectId("5d61aaf8108e185191552bbb"),
"serials" : [
"e127av48-0697-4977-b096-5ce79c89a414",
"d163f80a-55ff-40fe-90b4-331ece5bebd5",
"4740021f-e9b5-4ca5-bf0e-8554c123bb94",
"320ffd42-f101-4b1d-8ff4-80bc693a29e6",
"fef5e68b-aed0-4a96-9488-7941c41d1c1f",
"2c0752ba-bf7a-4a3b-bd9f-14db4b2f8bae",
"6c5ff44d-5979-4bff-af12-9e6d282c3789",
"9c91bf91-72d7-4b71-827b-924947d6e93d",
"fb34b28e-afb1-4b6a-a3c1-5a1fe44246ee",
"91ab22ef-702f-4cbd-8919-a67a2b9a684c",
"ee1a7cb2-e088-47e6-a824-c8697df7d94c",
"0dc4c687-4db2-481e-a1a6-491320dede11",
"34612148-3e01-44ee-b262-de2035e63691",
"5ba85baf-e48a-40af-8578-55ff1a873c76",
"19fe3672-b6cb-4bb6-8d21-93412b938584",
"1d0d6f6d-1b49-461b-8661-ecbf43a6595e",
"d9a5455c-65ee-45e1-ae49-33cc15dec841",
"4a690a00-a76c-4d3e-aee3-78b2bb731b0c",
"ae331830-40b4-457c-8cc4-5d548f769c3e",
"fe3e460b-c89d-4ace-8a36-5ba2b53bf4d0",
"2cc6a2a0-e029-475f-a7fc-a46a79afb605",
"a7d07767-eada-4ce3-b083-9b048e9ae9f4"
],
"name" : "ApiCard",
"producer" : "Farmina",
"form" : "syrop",
"__v" : 0
}
I would like to retrive documents (multiple) from collection based on this serial numbers ("serials" field). For example i am finding:
[
"e127av48-0697-4977-b096-5ce79c89a414",
"d163f80a-55ff-40fe-90b4-331ece5bebd5",
"4740021f-e9b5-4ca5-bf0e-8554c123bb94",
"key that doesn't exist",
]
We have to assume that one of the serial number doesn't exist, so would like to get information for individual serial, expected output:
[
{
"serial":"e127av48-0697-4977-b096-5ce79c89a414",
"doc":{
....whole document where above serial is in array field "serials"
}
},
{
"serial":"e127av48-0697-4977-b096-5ce79c89a414",
"doc":{
....whole document where above serial is in array field "serials"
}
},
{
"serial":"e127av48-0697-4977-b096-5ce79c89a414",
"doc":{
....whole document where above serial is in array field "serials"
}
},
{
"serial":"key that doesn't exist",
"doc": null
}
]
I was trying the simplest solution - mongodb find by multiple array items, but unfortunately it'doesn't return info for individual serial number. I'am not sure it's possible to prepare this kind of query. I think some complex aggregation could perform it, but i don't even know this kind of pipelines.
Of course, i can get simple solution by using multiple aggregate or even find, but it could impact on performance, when application will be looking for 10000 records per request.
The following query can do the trick:
db.collection.aggregate([
{
$limit:1
},
{
$project:{
"_id":0,
"serialsToSearch":[
"e127av48-0697-4977-b096-5ce79c89a414",
"d163f80a-55ff-40fe-90b4-331ece5bebd5",
"4740021f-e9b5-4ca5-bf0e-8554c123bb94",
"key that doesn't exist",
]
}
},
{
$unwind:"$serialsToSearch"
},
{
$lookup:{
"from":"collection",
"let":{
"serial":"$serialsToSearch"
},
"pipeline":[
{
$match:{
$expr:{
$in:["$$serial","$serials"]
}
}
},
{
$project:{
"serials":0
}
}
],
"as":"searialsLookup"
}
},
{
$unwind:{
"path":"$searialsLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"serial":"$serialsToSearch",
"doc":{
$ifNull:["$searialsLookup",null]
}
}
}
]).pretty()
Data Set:
{
"_id" : ObjectId("5d61aaf8108e185191552bbb"),
"serials" : [
"e127av48-0697-4977-b096-5ce79c89a414",
"d163f80a-55ff-40fe-90b4-331ece5bebd5",
"4740021f-e9b5-4ca5-bf0e-8554c123bb94",
"320ffd42-f101-4b1d-8ff4-80bc693a29e6",
"fef5e68b-aed0-4a96-9488-7941c41d1c1f",
"2c0752ba-bf7a-4a3b-bd9f-14db4b2f8bae",
"6c5ff44d-5979-4bff-af12-9e6d282c3789",
"9c91bf91-72d7-4b71-827b-924947d6e93d",
"fb34b28e-afb1-4b6a-a3c1-5a1fe44246ee",
"91ab22ef-702f-4cbd-8919-a67a2b9a684c",
"ee1a7cb2-e088-47e6-a824-c8697df7d94c",
"0dc4c687-4db2-481e-a1a6-491320dede11",
"34612148-3e01-44ee-b262-de2035e63691",
"5ba85baf-e48a-40af-8578-55ff1a873c76",
"19fe3672-b6cb-4bb6-8d21-93412b938584",
"1d0d6f6d-1b49-461b-8661-ecbf43a6595e",
"d9a5455c-65ee-45e1-ae49-33cc15dec841",
"4a690a00-a76c-4d3e-aee3-78b2bb731b0c",
"ae331830-40b4-457c-8cc4-5d548f769c3e",
"fe3e460b-c89d-4ace-8a36-5ba2b53bf4d0",
"2cc6a2a0-e029-475f-a7fc-a46a79afb605",
"a7d07767-eada-4ce3-b083-9b048e9ae9f4"
],
"name" : "ApiCard",
"producer" : "Farmina",
"form" : "syrop",
"__v" : 0
}
Output:
{
"serial" : "e127av48-0697-4977-b096-5ce79c89a414",
"doc" : {
"_id" : ObjectId("5d61aaf8108e185191552bbb"),
"name" : "ApiCard",
"producer" : "Farmina",
"form" : "syrop",
"__v" : 0
}
}
{
"serial" : "d163f80a-55ff-40fe-90b4-331ece5bebd5",
"doc" : {
"_id" : ObjectId("5d61aaf8108e185191552bbb"),
"name" : "ApiCard",
"producer" : "Farmina",
"form" : "syrop",
"__v" : 0
}
}
{
"serial" : "4740021f-e9b5-4ca5-bf0e-8554c123bb94",
"doc" : {
"_id" : ObjectId("5d61aaf8108e185191552bbb"),
"name" : "ApiCard",
"producer" : "Farmina",
"form" : "syrop",
"__v" : 0
}
}
{ "serial" : "key that doesn't exist", "doc" : null }
Note: The query won't give expected output if the collection would be empty.
Aggregation stages details:
STAGE I: Limiting the records to 1, as initially, our motive is to inject the input array in aggregation. The injection would be done in no time.
STAGE II: Projecting the input array as serialsToSearch
STAGE III: Now we have the input array as a field, we can unwind it
STAGE IV: Lookup in the same collection with each field of the input array and check if the searched serial is present in serials array
STAGE V: unwinding the lookup output
STAGE VI: Projecting fields as per the response required.

MongoDB-design for revisioned data

There are many articles and SO questions about MongoDB data-model for storing old revisions of documents.
However, I found nothing satisfying one of my requirements; I need to be able to retroactively query the database to unambiguously find all documents that matched an arbitrary criteria for a given point in time.
To clarify, I need to be able to efficiently answer the question;
"Which documents (and preferably versions) matched criteria {X:Y...} at time T".
Pseudocode:
/* Would match a version that were active from 2010 - 2016-05-01 with zipcode 12345 */
db.my_objs.find({zipcode: "12345", ~time: ISODate("2016-01-01 22:14:31.003")~})
I haven't managed to find any solution, neither on google nor myself. I have tried;
Having a simple "from"-timestamp on data, and then select "the first item before my queried timepoint, that also matches other criteria", but I have not managed to express that in Mongo.
Having a from/to on each version, and whenever I write a new version, update "to" on the previous version to match from on the new version. However, I have not found a way to do this atomically or with eventual consistency, meaning multiple updates could wreak havoc and create ambiguous timelines. (Double entries for the same timepoint)
Any ideas?
edit
an undesirable example query for #1
db.my_objs.find({
data : {
$elemMatch : {
from : {
$lte : ISODate('2015-01-01')
}
}
}
}, {
"data.$" : 1
}).forEach(function (obj) {
    if(obj.data[0].state == 'active') {
printjson(registrar)
}
})–
aggregation framework and $unwind phase which transforms array into single document so we can create sophisticated $match condition
Example Document
{
"_id" : ObjectId("577275589ea91b3799341aba"),
"title" : "Test of design",
"firstCreated" : ISODate("2016-06-28T13:02:16.156Z"),
"lastUpdated" : ISODate("2016-06-28T13:02:16.156Z"),
"firstAuthor" : "profesor79",
"lastAuthor" : "Rawler",
"versions" : [{
"versionId" : 1.0,
"dateCreated" : ISODate("2015-10-10T00:00:00.000Z"),
"datePublished" : ISODate("2015-10-12T00:00:00.000Z"),
"isActive" : false,
"documnetPayload" : {
"a" : 1.0,
"b" : 2.0,
"c" : 3.0
}
}, {
"versionId" : 2.0,
"dateCreated" : ISODate("2015-12-10T00:00:00.000Z"),
"datePublished" : ISODate("2015-12-31T00:00:00.000Z"),
"isActive" : true,
"documnetPayload" : {
"a" : 1.0,
"b" : 3.0,
"c" : 30.0
}
}, {
"versionId" : 3.0,
"dateCreated" : ISODate("2016-01-31T00:00:00.000Z"),
"datePublished" : ISODate("2016-02-21T00:00:00.000Z"),
"isActive" : true,
"documnetPayload" : {
"a" : 11.0,
"b" : 3.0,
"c" : 31.0
}
}
]
}
Aggregation framework example
db.rawler.aggregate([{
$match : {
"_id" : ObjectId("577275589ea91b3799341aba")
}
}, {
$unwind : "$versions"
}, {
$match : {
$and : [{
"versions.dateCreated" : {
$gt : ISODate("2015-10-10T00:00:00.000Z")
}
}, {
"versions.dateCreated" : {
$lte : ISODate("2016-01-30T00:00:00.000Z")
}
}
],
"versions.datePublished" : {
$gt : new Date("2015-10-13T00:00:00.000")
},
// "versions.versionId" :{$in:[1,3,4,5]},
}
}, {
$sort : {
"versions.dateCreated" : -1
}
},
])

how to update property in nested mongo document

I want to update a particular property in a nested mongo document
{
"_id" : ObjectId("55af76e60b0e4b318ba822ec"),
"make" : "MERCEDES-BENZ",
"model" : "E-CLASS",
"variant" : "E 250 CDI CLASSIC",
"fuel" : "Diesel",
"cc" : 2143,
"seatingCapacity" : 5,
"variant_+_fuel" : "E 250 CDI CLASSIC (Diesel)",
"make_+_model_+_variant_+_fuel" : "MERCEDES-BENZ E-CLASS E 250 CDI CLASSIC (Diesel)",
"dropdown_display" : "E-CLASS E 250 CDI CLASSIC (Diesel)",
"vehicleSegment" : "HIGH END CARS",
"abc" : {
"variantId" : 1000815,
"makeId" : 1000016,
"modelId" : 1000556,
"fuelId" : 2,
"segmentId" : 1000002,
"price" : 4020000
},
"def" : {
"bodyType" : 1,
"makeId" : 87,
"modelId" : 21584,
"fuel" : "DIESEL",
"vehicleSegmentType" : "E2"
},
"isActive" : false
}
This is my document. If I want to add or update a value for key "nonPreferred" inside "abc", how do I go about it?
I tried it with this query:
db.FourWheelerMaster.update(
{ "abc.modelId": 1000556 },
{
$Set: {
"abc": {
"nonPreferred": ["Mumbai", "Pune"]
}
}
},
{multi:true}
)
but it updates the whole "abc" structure, removed all key:values inside it and kept only newly inserted key values like below
"abc" : {
"nonPreferred" : [
"Mumbai",
"Pune"
]
},
Can anyone tell me how to update only particular property inside it and not all the complete key?
Instead of using the $set operator, you need to push that array using the $push operator together with the $each modifier to append each element of the value separately as follows:
db.FourWheelerMaster.update(
{ "abc.modelId": 1000556 },
{
"$push": {
"abc.nonPreferred": {
"$each": ["Mumbai", "Pune"]
}
}
},
{ "multi": true }
)