Does Pymongo have validation rules built in? - mongodb

I am trying to validate an inserted document against a schema, and was trying to find a way to validate the inserted document.
There are libraries like MongoEngine that say they do the work, but is there a way to do document validation directly via pymongo ?

The python driver docs are indeed a little light on how to use the db.command. Here is a complete working example:
from pymongo import MongoClient
from collections import OrderedDict
import sys
client = MongoClient() # supply connection args as appropriate
db = client.testX
db.myColl.drop()
db.create_collection("myColl") # Force create!
# $jsonSchema expression type is prefered. New since v3.6 (2017):
vexpr = {"$jsonSchema":
{
"bsonType": "object",
"required": [ "name", "year", "major", "gpa" ],
"properties": {
"name": {
"bsonType": "string",
"description": "must be a string and is required"
},
"gender": {
"bsonType": "string",
"description": "must be a string and is not required"
},
"year": {
"bsonType": "int",
"minimum": 2017,
"maximum": 3017,
"exclusiveMaximum": False,
"description": "must be an integer in [ 2017, 3017 ] and is required"
},
"major": {
"enum": [ "Math", "English", "Computer Science", "History", None ],
"description": "can only be one of the enum values and is required"
},
"gpa": {
# In case you might want to allow doubles OR int, then add
# "int" to the bsonType array below:
"bsonType": [ "double" ],
"minimum": 0,
"description": "must be a double and is required"
}
}
}
}
# Per the docs, args to command() require that the first kev/value pair
# be the command string and its principal argument, followed by other
# arguments. There are two ways to do this: Using an OrderDict:
cmd = OrderedDict([('collMod', 'myColl'),
('validator', vexpr),
('validationLevel', 'moderate')]
db.command(cmd)
# Or, use the kwargs construct:
# db.command('collMod','myColl', validator=vexpr, validationLevel='moderate')
try:
db.myColl.insert({"x":1})
print "NOT good; the insert above should have failed."
except:
print "OK. Expected exception:", sys.exc_info()
try:
okdoc = {"name":"buzz", "year":2019, "major":"Math", "gpa":3.8}
db.myColl.insert(okdoc)
print "All good."
except:
print "exc:", sys.exc_info()

MongoDB supports document validation at the engine level so you'll pick it up via pymongo. You declare your "schema" (rules actually) to the engine. Here's a great place to start: https://docs.mongodb.com/manual/core/document-validation/

You can make a separated JSON file for your Document Validations Schema, like this:
{
"collMod": "users",
"validator": {
"$jsonSchema": {
"bsonType": "object",
"required": ["email", "password","name"],
"properties": {
"email": {
"bsonType": "string",
"description": "Correo Electrónico"
},
"password": {
"bsonType": "string",
"description": "Una representación Hash de la contraseña"
},
"name": {
"bsonType": "object",
"required": ["first", "last"],
"description": "Objeto que separa los nombres y apellidos",
"properties": {
"first": {
"bsonType": "string",
"description": "Primer y segundo nombre"
},
"last": {
"bsonType": "string",
"description": "Primer y segundo apellido"
}
}
},
}
}
}
}
Then you can use in python script, example:
from pymongo import MongoClient
import json #parse JSON file as dict
from collections import OrderedDict #preserve the order (key, value) in the gived insertions on the dict
client = MongoClient("your_mongo_uri")
db = client.your_db_name
with open('your_schema_file.json', 'r') as j:
d = json.loads(j.read())
d = OrderedDict(d)
db.command(d)
OrderedDict Info
collMod Info
Schema Validation Info

I know 2 options to deal with:
By creating or setting schema for collection, so any insertions will be checked against it on server side, rejected or warned depending on validationAction
The following code demonstrates scheme creation and testing:
import pymongo
mongo_client = MongoClient(url=...,
port=...,
username=...,
password=...,
authSource=...,
authMechanism=...,
connect=True, )
mongo_client.server_info()
db = mongo_client.your_db
users = db.create_collection(name="users",
validator={"$jsonSchema": {
"bsonType": "object",
"required": ["username"],
"properties": {
"username": {
"bsonType": "string",
"pattern": "[a-z0-9]{5,15}",
"description": "user name (required), only lowercase letters "
"and digits allowed, from 5 to 15 characters long"
},
"email": {
"bsonType": "string",
"description": "User's email (optional)"
},
}
}},
validationAction="error",
)
# Inserting user document that fits the scheme
users.insert_one({"username": "admin", "email": "some_admin_mail"})
# Insertion below will be rejected with "pymongo.errors.WriteError: Document failed validation, full error"
# caused by too short username (root)
users.insert_one({"username": "root", "email": "some_root_mail"})
You can think about your Mongo's documents as ordinary JSON entities and check them on the client code side using standard JSON scheme validation
from jsonschema import validate
from jsonschema.exceptions import ValidationError
db = MongoClient(...).your_db
schema = {
"type": "object",
"required": ["username"],
"properties": {
"username": {"type": "string", "pattern": "[a-z0-9]{5,15}"},
"email": {"type": "string"},
},
}
try:
new_user = {"username": "admin", "email": "some_admin_mail"}
# No exception will be raised in validation below
validate(instance=new_user, schema=schema)
db.users.insert_one(new_user)
new_user = {"username": "root", "email": "some_root_mail"}
# Exception <ValidationError: 'root' does not match '[a-z0-9]{5,15}'> will be raised
validate(instance=new_user, schema=schema)
db.users.insert_one(new_user)
except ValidationError:
# Performing error

Related

JSON Schema one-to-many relation, in MongoDB Atlas?

I'm stumped trying to get a one-to-many relationship established in MongoDB Atlas, via JSON Schema (for use in a GraphQL API).
This is in the Atlas App Services > Data Access > Schema > "Collections" tab.
(It's possible I'm thinking about this wrong, any ideas appreciated.)
So, I have two collections: Node and URL. Each Node can have one or more URLs.
Each URL has three properties: _id (primary key), url, and node:
// URLs (JSON Schema)
{
"title": "url",
"properties": {
"_id": {
"bsonType": "objectId"
},
"node": {
"bsonType": "string"
},
"url": {
"bsonType": "string"
}
}
}
Each URL is matched to a Node (URL.node === Node._id):
// URLs relationships (JSON Schema)
{
"node": {
"ref": "#/relationship/db_cluster/db_name/nodes",
"foreignKey": "_id",
"isList": false
}
}
Each Node has two properties of its own, _id (primary key) and name, and urls which should be an array of linked documents (from the URLs Collection, of course).
// Nodes (JSON Schema)
{
"title": "node",
"properties": {
"_id": {
"bsonType": "string"
},
"name": {
"bsonType": "string"
},
"urls": {
"bsonType": "array",
"items": {
"bsonType": "object",
"properties": {
"_id": {
"bsonType": "objectId"
},
"node": {
"bsonType": "string"
},
"url": {
"bsonType": "string"
}
}
}
}
}
}
So far so good. The above seems to work OK.
The trouble begins when I try and link the two Collections together. I am attempting to link a few URLs to each Node, with this relationship:
// Nodes relationships (JSON Schema)
{
"urls": {
"ref": "#/relationship/db_cluster/db_name/urls",
"foreignKey": "_id",
"isList": true
}
}
However, I can't save the above relationship. I get an error
relationship source property type must match foreign property type
Any ideas? Thanks.

JSON Schema - can array / list validation be combined with anyOf?

I have a json document I'm trying to validate with this form:
...
"products": [{
"prop1": "foo",
"prop2": "bar"
}, {
"prop3": "hello",
"prop4": "world"
},
...
There are multiple different forms an object may take. My schema looks like this:
...
"definitions": {
"products": {
"type": "array",
"items": { "$ref": "#/definitions/Product" },
"Product": {
"type": "object",
"oneOf": [
{ "$ref": "#/definitions/Product_Type1" },
{ "$ref": "#/definitions/Product_Type2" },
...
]
},
"Product_Type1": {
"type": "object",
"properties": {
"prop1": { "type": "string" },
"prop2": { "type": "string" }
},
"Product_Type2": {
"type": "object",
"properties": {
"prop3": { "type": "string" },
"prop4": { "type": "string" }
}
...
On top of this, certain properties of the individual product array objects may be indirected via further usage of anyOf or oneOf.
I'm running into issues in VSCode using the built-in schema validation where it throws errors for every item in the products array that don't match Product_Type1.
So it seems the validator latches onto that first oneOf it found and won't validate against any of the other types.
I didn't find any limitations to the oneOf mechanism on jsonschema.org. And there is no mention of it being used in the page specifically dealing with arrays here: https://json-schema.org/understanding-json-schema/reference/array.html
Is what I'm attempting possible?
Your general approach is fine. Let's take a slightly simpler example to illustrate what's going wrong.
Given this schema
{
"oneOf": [
{ "properties": { "foo": { "type": "integer" } } },
{ "properties": { "bar": { "type": "integer" } } }
]
}
And this instance
{ "foo": 42 }
At first glance, this looks like it matches /oneOf/0 and not oneOf/1. It actually matches both schemas, which violates the one-and-only-one constraint imposed by oneOf and the oneOf fails.
Remember that every keyword in JSON Schema is a constraint. Anything that is not explicitly excluded by the schema is allowed. There is nothing in the /oneOf/1 schema that says a "foo" property is not allowed. Nor does is say that "foo" is required. It only says that if the instance has a keyword "foo", then it must be an integer.
To fix this, you will need required and maybe additionalProperties depending on the situation. I show here how you would use additionalProperties, but I recommend you don't use it unless you need to because is does have some problematic properties.
{
"oneOf": [
{
"properties": { "foo": { "type": "integer" } },
"required": ["foo"],
"additionalProperties": false
},
{
"properties": { "bar": { "type": "integer" } },
"required": ["bar"],
"additionalProperties": false
}
]
}

Loopback - GET model using custom String ID from MongoDB

I'm developing an API with loopback, everything worked fine until I decided to change the ids of my documents in the database. Now I don't want them to be auto generated.
Now that I'm setting the Id myself. I get an "Unknown id" 404, whenever I hit this endpoint: GET properties/{id}
How can I use custom IDs with loopback and mongodb?
Whenever I hit this endpoint: http://localhost:5000/api/properties/20020705171616489678000000
I get this error:
{
"error": {
"name": "Error",
"status": 404,
"message": "Unknown \"Property\" id \"20020705171616489678000000\".",
"statusCode": 404,
"code": "MODEL_NOT_FOUND"
}
}
This is my model.json, just in case...
{
"name": "Property",
"plural": "properties",
"base": "PersistedModel",
"idInjection": false,
"options": {
"validateUpsert": true
},
"properties": {
"id": {"id": true, "type": "string", "generated": false},
"photos": {
"type": [
"string"
]
},
"propertyType": {
"type": "string",
"required": true
},
"internalId": {
"type": "string",
"required": true
},
"flexCode": {
"type": "string",
"required": true
}
},
"validations": [],
"relations": {},
"acls": [],
"methods": []
}
Your model setup (with with idInjection: true or false) did work when I tried it with a PostGreSQL DB setup with a text id field for smaller numbers.
Running a Loopback application with DEBUG=loopback:connector:* node . outputs the database queries being run in the terminal - I tried it with the id value you are trying and the parameter value was [2.002070517161649e+25], so the size of the number is the issue.
You could try raising it as a bug in Loopback, but JS is horrible at dealing with large numbers so you may be better off not using such large numbers as identifiers anyway.
It does work if the ID is an alphanumeric string over 16 characters so there might be a work around for you (use ObjectId?), depending on what you are trying to achieve.

json schema issue on required property

I need to write the JSON Schema based on the specification defined by http://json-schema.org/. But I'm struggling for the required/mandatory property validation. Below is the JSON schema that I have written where all the 3 properties are mandatory but In my case either one should be mandatory. How to do this?.
{
"id": "http://example.com/searchShops-schema#",
"$schema": "http://json-schema.org/draft-04/schema#",
"title": "searchShops Service",
"description": "",
"type": "object",
"properties": {
"city":{
"type": "string"
},
"address":{
"type": "string"
},
"zipCode":{
"type": "integer"
}
},
"required": ["city", "address", "zipCode"]
}
If your goal is to tell that "I want at least one member to exist" then use minProperties:
{
"type": "object",
"etc": "etc",
"minProperties": 1
}
Note also that you can use "dependencies" to great effect if you also want additional constraints to exist when this or that member is present.
{
...
"anyOf": [
{ "required": ["city"] },
{ "required": ["address"] },
{ "required": ["zipcode"] },
]
}
Or use "oneOf" if exactly one property should be present

Swagger UI doesn't show embedded json properties model

I am using the swagger tool for documenting my Jersey based REST API (the swaggerui I am using was downloaded on June 2014 don't know if this issue has been fixed in later versions but as I made a lot of customization to its code so I don't have the option to download the latest without investing lot of time to customize it again).
So far and until now, all my transfer objects have one level deep properties (no embedded pojos). But now that I added some rest paths that are returning more complex objects (two levels of depth) I found that SwaggerUI is not expanding the JSON model schema when having embedded objects.
Here is the important part of the swagger doc:
...
{
"path": "/user/combo",
"operations": [{
"method": "POST",
"summary": "Inserts a combo (user, address)",
"notes": "Will insert a new user and a address definition in a single step",
"type": "UserAndAddressWithIdSwaggerDto",
"nickname": "insertCombo",
"consumes": ["application/json"],
"parameters": [{
"name": "body",
"description": "New user and address combo",
"required": true,
"type": "UserAndAddressWithIdSwaggerDto",
"paramType": "body",
"allowMultiple": false
}],
"responseMessages": [{
"code": 200,
"message": "OK",
"responseModel": "UserAndAddressWithIdSwaggerDto"
}]
}]
}
...
"models": {
"UserAndAddressWithIdSwaggerDto": {
"id": "UserAndAddressWithIdSwaggerDto",
"description": "",
"required": ["user",
"address"],
"properties": {
"user": {
"$ref": "UserDto",
"description": "User"
},
"address": {
"$ref": "AddressDto",
"description": "Address"
}
}
},
"UserDto": {
"id": "UserDto",
"properties": {
"userId": {
"type": "integer",
"format": "int64"
},
"name": {
"type": "string"
},...
},
"AddressDto": {
"id": "AddressDto",
"properties": {
"addressId": {
"type": "integer",
"format": "int64"
},
"street": {
"type": "string"
},...
}
}
...
The embedded objects are User and Address, their models are being created correctly as shown in the json response.
But when opening the SwaggerUI I can only see:
{
"user": "UserDto",
"address": "AddressDto"
}
But I should see something like:
{
"user": {
"userId": "integer",
"name": "string",...
},
"address": {
"addressId": "integer",
"street": "string",...
}
}
Something may be wrong in the code that expands the internal properties, the javascript console doesn't show any error so I assume this is a bug.
I found the solution, there is a a line of code that needs to be modified to make it work properly:
In the swagger.js file there is a getSampleValue function with a conditional checking for undefined:
SwaggerModelProperty.prototype.getSampleValue = function(modelsToIgnore) {
var result;
if ((this.refModel != null) && (modelsToIgnore[this.refModel.name] === 'undefined'))
...
I updated the equality check to (removing quotes):
modelsToIgnore[this.refModel.name] === undefined
After that, SwaggerUI is able to show the embedded models.