I am having trouble with setting up database configs correctly for unit-testing purposes using pytest.
My goal is to create a brand new empty test database each time I run the tests.
I have a pytest.ini file that looks like this:
[pytest]
DJANGO_SETTINGS_MODULE = test_settings
python_files = test_*.py
python_functions = test_*
addopts = --ds=test_settings --create-db
My project structure is like this:
my_project
app_1
app_2
tests
my_project
settings.py
test_settings
__init__.py
The test_settings/init.py file looks like this:
from my_project.settings import *
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_HOST = os.getenv('POSTGRES_HOST')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = 'test_db'
POSTGRES_PORT = os.getenv('POSTGRES_PORT')
DATABASES = {
'default': {
'ENGINE': 'django.contrib.gis.db.backends.postgis',
'NAME': POSTGRES_DB,
'USER': POSTGRES_USER,
'PASSWORD': POSTGRES_PASSWORD,
'HOST': POSTGRES_HOST,
'PORT': POSTGRES_PORT,
}
}
And I have test cases like this:
import pytest
from model_bakery import baker
from rest_framework.test import APIClient
from sales.models import ServiceLocation
# TODO: вынести куда-то в общий модуль
#pytest.fixture
def api_client():
return APIClient
#pytest.mark.django_db
def test_get_service_locations(api_client):
baker.make(ServiceLocation, _quantity=3)
response = api_client().get('/mobile_api/v3/service_locations')
assert response.status_code == 200
assert len(response.json()['data']) == 3
When I run pytest command, for some reason I get assert 28 == 3 which means that python tries to connect to my original database that I use for development
Most surprisingly, I tried to delete one record from my development database table, so that the number of rows becomes 24 (instead of 25). But python seems to cache the database.
I am totally lost. Any ideas what I am doing wrong ?
Related
I have a connection problem with Cloud Sql Postgres from my Flask Rest API app.
I have a db.py file:
import os
from flask_sqlalchemy import SQLAlchemy
import sqlalchemy
db = SQLAlchemy()
def connect_unix_socket() -> sqlalchemy.engine.base.Engine:
""" Initializes a Unix socket connection pool for a Cloud SQL instance of Postgres. """
# Note: Saving credentials in environment variables is convenient, but not
# secure - consider a more secure solution such as
# Cloud Secret Manager (https://cloud.google.com/secret-manager) to help
# keep secrets safe.
db_user = os.environ["DB_USER"] # e.g. 'my-database-user'
db_pass = os.environ["DB_PASS"] # e.g. 'my-database-password'
db_name = os.environ["DB_NAME"] # e.g. 'my-database'
unix_socket_path = os.environ["INSTANCE_UNIX_SOCKET"] # e.g. '/cloudsql/project:region:instance'
pool = sqlalchemy.create_engine(
# Equivalent URL:
# postgresql+pg8000://<db_user>:<db_pass>#/<db_name>
# ?unix_sock=<INSTANCE_UNIX_SOCKET>/.s.PGSQL.5432
# Note: Some drivers require the `unix_sock` query parameter to use a different key.
# For example, 'psycopg2' uses the path set to `host` in order to connect successfully.
sqlalchemy.engine.url.URL.create(
drivername="postgresql+pg8000",
username=db_user,
password=db_pass,
database=db_name,
query={"unix_sock": "{}/.s.PGSQL.5432".format(unix_socket_path)},
),
# [START_EXCLUDE]
# Pool size is the maximum number of permanent connections to keep.
pool_size=5,
# Temporarily exceeds the set pool_size if no connections are available.
max_overflow=2,
# The total number of concurrent connections for your application will be
# a total of pool_size and max_overflow.
# 'pool_timeout' is the maximum number of seconds to wait when retrieving a
# new connection from the pool. After the specified amount of time, an
# exception will be thrown.
pool_timeout=30, # 30 seconds
# 'pool_recycle' is the maximum number of seconds a connection can persist.
# Connections that live longer than the specified amount of time will be
# re-established
pool_recycle=1800, # 30 minutes
# [END_EXCLUDE]
)
return pool
I import the db.py file in my app.py file:
import os
import sqlalchemy
from flask import Flask
from flask_smorest import Api
from flask_sqlalchemy import SQLAlchemy
from db import db, connect_unix_socket
import models
from resources.user import blp as UserBlueprint
# pylint: disable=C0103
app = Flask(__name__)
def init_connection_pool() -> sqlalchemy.engine.base.Engine:
# use a Unix socket when INSTANCE_UNIX_SOCKET (e.g. /cloudsql/project:region:instance) is defined
if unix_socket_path:
return connect_unix_socket()
raise ValueError(
"Missing database connection type. Please define one of INSTANCE_HOST, INSTANCE_UNIX_SOCKET, or INSTANCE_CONNECTION_NAME"
)
db = None
#app.before_first_request
def init_db() -> sqlalchemy.engine.base.Engine:
global db
db = init_connection_pool()
api = Api(app)
#app.route("/api")
def user_route():
return "Welcome user API!"
api.register_blueprint(UserBlueprint)
if __name__ == '__main__':
server_port = os.environ.get('PORT', '8080')
app.run(debug=True, port=server_port, host='0.0.0.0')
The app run correctly, when i call the end point to Get or Post users, the app crash and give me this error:
"The current Flask app is not registered with this 'SQLAlchemy'"
RuntimeError: The current Flask app is not registered with this 'SQLAlchemy' instance. Did you forget to call 'init_app', or did you create multiple 'SQLAlchemy' instances?
This is my User.py class:
from sqlalchemy.exc import SQLAlchemyError, IntegrityError
from db import db
from models import UserModel
from schemas import UserSchema
blp = Blueprint("Users", "users", description="Operations on users")
#blp.route("/user/<string:user_id>")
class User(MethodView):
#blp.response(200, UserSchema)
def get(self, user_id):
user = UserModel.query.get_or_404(user_id)
return user
def delete(self, user_id):
user = UserModel.query.get_or_404(user_id)
db.session.delete(user)
db.session.commit()
return {"message": "User deleted"}, 200
#blp.route("/user")
class UserList(MethodView):
#blp.response(200, UserSchema(many=True))
def get(self):
return UserModel.query.all()
How i can fix this issue?
#dev_ Your issue is that your are trying to intermingle the use of SQLAlchemy Core with SQLAlchemy ORM as if they are the same thing, leading to your issues. SQLAlchemy connection pools created using sqlalchemy.create_engine use the CORE API while Flask-SQLAlchemy uses the SQLAlchemy ORM model. This is the core reason for you issue. It is easier to use one or the other.
I would recommend using purely Flask-SQLALchemy with the use of the cloud-sql-python-connector library for your use-case. It will make your life much easier.
For simplicity, I am getting rid of your db.py leading to your app.py file being as follows:
from flask import Flask
from flask_smorest import Api
from flask_sqlalchemy import SQLAlchemy
from google.cloud.sql.connector import Connector, IPTypes
from resources.user import blp as UserBlueprint
# load env vars
db_user = os.environ["DB_USER"] # e.g. 'my-database-user'
db_pass = os.environ["DB_PASS"] # e.g. 'my-database-password'
db_name = os.environ["DB_NAME"] # e.g. 'my-database'
instance_connection_name = os.environ["INSTANCE_CONNECTION_NAME"] # e.g. 'project:region:instance'
# Python Connector database connection function
def getconn():
with Connector() as connector:
conn = connector.connect(
instance_connection_name, # Cloud SQL Instance Connection Name
"pg8000",
user=db_user,
password=db_pass,
db=db_name,
ip_type= IPTypes.PUBLIC # IPTypes.PRIVATE for private IP
)
return conn
app = Flask(__name__)
# configure Flask-SQLAlchemy to use Python Connector
app.config['SQLALCHEMY_DATABASE_URI'] = "postgresql+pg8000://"
app.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
"creator": getconn
}
# initialize db (using app!)
db = SQLAlchemy(app)
# rest of your code
api = Api(app)
# ...
Hope this helps resolve your issue!
I have a DB utils java file where I need to load DB username password based on environment I am running the code on , and these environment values I need to import from karate-config.js . How to achieve this ?
Just use embedded expressions ! So if you have dbusername and dbpassword set in karate-config.js:
* def config = { username: '#(dbusername)', password: '#(dbpassword)', url: 'jdbc:h2:mem:testdb', driverClassName: 'org.h2.Driver' }
* def DbUtils = Java.type('com.mycompany.DbUtils')
* def db = new DbUtils(config)
I would like to add few variables:"username' and "database" in my sqitch.conf on a defined target.
file sqitch.conf=>
engine = pg
[core "variables"]
username = jv_root
database = test
[target "dev_1"]
uri = db:pg://username#sqlhost:5432/database
[target "dev_2"]
uri = db:pg://username#sqlhost2:5432/database
where I run:
sqitch deploy -t dev_1
it throw an error =>
ERROR: no such user: username
You can add environment specific variables like this.
[target.dev_1.variables]
username = jv_root
password = test
How you address them in your sql files depends on the sql dialect.
I'm having a confusion with KubernetesPodOperator from Airflow, and I'm wondering how to pass the load_users_into_table() function that it has a conn_id parameter stored in connection of Airflow in the Pod ?
In the official doc proposes to put the conn_id in Secret but I don't understand how can I pass it in my function load_users_into_table() after that.
https://airflow.apache.org/docs/stable/kubernetes.html
the function (task) to be executed in the pod:
def load_users_into_table(postgres_hook, schema, path):
gdf = read_csv(path)
gdf.to_sql('users', con=postgres_hook.get_sqlalchemy_engine(), schema=schema)
the dag:
_pg_hook = PostgresHook(postgres_conn_id = _conn_id)
with dag:
test = KubernetesPodOperator(
namespace=namespace,
image=image_name,
cmds=["python", "-c"],
arguments=[load_users_into_table],
labels={"dag-id": dag.dag_id},
name="airflow-test-pod",
task_id="task-1",
is_delete_operator_pod=True,
in_cluster=in_cluster,
get_logs=True,
config_file=config_file,
executor_config={
"KubernetesExecutor": {"request_memory": "512Mi",
"limit_memory": "1024Mi",
"request_cpu": "1",
"limit_cpu": "2"}
}
)
Assuming you want to run with K8sPodOperator, you can use argparse and add arguments to the docker cmd. Something in these lines should do the job:
import argparse
def f(arg):
print(arg)
parser = argparse.ArgumentParser()
parser.add_argument('--foo', help='foo help')
args = parser.parse_args()
if __name__ == '__main__':
f(args.foo)
Dockerfile:
FROM python:3
COPY main.py main.py
CMD ["python", "main.py", "--foo", "somebar"]
There are other ways to solve this such as using secrets, configMaps or even Airflow Variables, but this should get you moving forward.
My python application allows users to create schemas of their naming. I need a way to protect the application from sql injections.
The SQL to be executed reads
CREATE SCHEMA schema_name AUTHORIZATION user_name;
The psycopg documentation (generally) recommends passing parameters to execute like so
conn = psycopg2.connect("dbname=test user=postgres")
cur = conn.cursor()
query = 'CREATE SCHEMA IF NOT EXISTS %s AUTHORIZATION %s;'
params = ('schema_name', 'user_name')
cur.execute(query, params)
But this results in a query with single quotes, which fails:
CREATE SCHEMA 'schema_name' AUTHORIZATION 'user_name';
> fail
Is there a way to remove the quotes, or should I just settle for stripping non-alphanumeric characters from the schema name and call it a day? The later seems kind of ugly, but should still work.
To pass identifiers use AsIs. But that exposes to SQL injection:
import psycopg2
from psycopg2.extensions import AsIs
conn = psycopg2.connect(database='cpn')
cursor = conn.cursor()
query = """CREATE SCHEMA %s AUTHORIZATION %s;"""
param = (AsIs('u1'), AsIs('u1; select * from user_table'))
print cursor.mogrify(query, param)
Output:
CREATE SCHEMA u1 AUTHORIZATION u1; select * from user_table;
Here's a boilerplate that might help. I've used environment variables but you can use a .conf or whatever you like.
Store your connection variables in a .env file:
db_host = "localhost"
db_port = "5432"
db_database = "postgres"
db_user = "postgres"
db_password = "postgres"
db_schema = "schema2"
Load params in your app.py and assign them to variables, then use the variables where required:
import psychopg2
from dotenv import load_dotenv
import database
# Load your environment variables here:
load_dotenv()
db_host = os.environ["db_host"]
db_port = os.environ["db_port"]
db_database = os.environ["db_database"]
db_user = os.environ["db_user"]
db_password = os.environ["db_password"]
db_schema = os.environ["db_schema"]
# Build Connection:
connection = psycopg2.connect(host=db_host,
port=db_port,
database=db_database,
user=db_user,
password=db_password
)
# Build Query Strings:
CREATE_SCHEMA = f"CREATE SCHEMA IF NOT EXISTS {schema};"
CREATE_TABLE1 = f"CREATE TABLE IF NOT EXISTS {schema}.table1 (...);"
CREATE_TABLE2 = f"CREATE TABLE IF NOT EXISTS {schema}.table2 (...);"
# Create Schema and Tables:
with connection:
with connection.cursor() as cursor:
cursor.execute(CREATE_SCHEMA)
cursor.execute(CREATE_TABLE1)
cursor.execute(CREATE_TABLE2)
As of psycopg2 >= 2.7, psycopg2.sql can be used to compose dynamic statements, which also guards from SQL injection.