Two identical code snippets given on the same directory level, one gets pylint import error the other does not - importerror

I have two tests, which both import the module to be tested. The code is exactly the same. One test gets an pylint import-error and the other does not.
Directory looks like this:
/root
/modules
init.py
module_to_test.py
/tests
/testcase one
test_one.py
/testcase_two
test_two.py
```
In the test code i used
```
BASEDIR = os.path.dirname(__file__)
ROOTDIR = os.path.abspath(os.path.join(os.path.join(BASEDIR, os.pardir), os.pardir))
sys.path.insert(0, ROOTDIR)
from modules import module_to_test.py
The two test code snippets are identical, yet i get an pylint import Error in one but not in the other.
i tried passing the path via the settings.json to pylint directly, i tried the init-hook method, but nothing seems to work. What am i not seeing?
Content of the test file:
pylint error message:
"{
"resource": "/c:/SourceTree/DIN743/p_test_DIN743/p_integration_kerbueberlagerung_1_test/p_integration_kerbueberlagerung_1_test.py",
"owner": "python",
"code": "import-error",
"severity": 8,
"message": "Unable to import 'p_DIN743'",
"source": "pylint",
"startLineNumber": 10,
"startColumn": 1,
"endLineNumber": 10,
"endColumn": 1
}"
´´´
# -*- coding: utf-8 -*-
import os, sys
BASEDIR = os.path.dirname(__file__)
ROOTDIR = os.path.abspath(os.path.join(BASEDIR, "../.."))
TOOLDIR = os.path.join(os.path.join(ROOTDIR, os.pardir), "tools")
sys.path.insert(0, ROOTDIR)
sys.path.insert(0, TOOLDIR)
import unittest
from p_DIN743 import m_DIN743
from tools import m_jsonhelper
class TestDIN743(unittest.TestCase):
#classmethod
def setUpClass(cls):
# change cwd
os.chdir(os.path.dirname(os.path.abspath(__file__)))
print("cwd changed to: {}".format(os.getcwd()))
# check if inputs.json & soll_outputs.json exist
if not os.path.isfile("inputs.json"):
print("inputs.json does not exist but is required!")
if not os.path.isfile("soll_outputs.json"):
print("soll_outputs.json does not exist but is required!")
# create test object
cls.__test_object = m_DIN743.DIN743()
cls.__test_object.Run()
cls.outputs = m_jsonhelper.loadjson("outputs.json")
cls.soll_outputs = m_jsonhelper.loadjson("soll_outputs.json")
def test_dicts(self):
self.maxDiff = None
self.assertDictEqual(TestDIN743.soll_outputs, TestDIN743.outputs)
if __name__ == "__main__":
unittest.main()
´´´

Related

Pytest does not respect DJANGO_SETTINGS_MODULE

I am having trouble with setting up database configs correctly for unit-testing purposes using pytest.
My goal is to create a brand new empty test database each time I run the tests.
I have a pytest.ini file that looks like this:
[pytest]
DJANGO_SETTINGS_MODULE = test_settings
python_files = test_*.py
python_functions = test_*
addopts = --ds=test_settings --create-db
My project structure is like this:
my_project
app_1
app_2
tests
my_project
settings.py
test_settings
__init__.py
The test_settings/init.py file looks like this:
from my_project.settings import *
POSTGRES_USER = os.getenv('POSTGRES_USER')
POSTGRES_HOST = os.getenv('POSTGRES_HOST')
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
POSTGRES_DB = 'test_db'
POSTGRES_PORT = os.getenv('POSTGRES_PORT')
DATABASES = {
'default': {
'ENGINE': 'django.contrib.gis.db.backends.postgis',
'NAME': POSTGRES_DB,
'USER': POSTGRES_USER,
'PASSWORD': POSTGRES_PASSWORD,
'HOST': POSTGRES_HOST,
'PORT': POSTGRES_PORT,
}
}
And I have test cases like this:
import pytest
from model_bakery import baker
from rest_framework.test import APIClient
from sales.models import ServiceLocation
# TODO: вынести куда-то в общий модуль
#pytest.fixture
def api_client():
return APIClient
#pytest.mark.django_db
def test_get_service_locations(api_client):
baker.make(ServiceLocation, _quantity=3)
response = api_client().get('/mobile_api/v3/service_locations')
assert response.status_code == 200
assert len(response.json()['data']) == 3
When I run pytest command, for some reason I get assert 28 == 3 which means that python tries to connect to my original database that I use for development
Most surprisingly, I tried to delete one record from my development database table, so that the number of rows becomes 24 (instead of 25). But python seems to cache the database.
I am totally lost. Any ideas what I am doing wrong ?

How to pass a database connection into Airflow KubernetesPodOperator

I'm having a confusion with KubernetesPodOperator from Airflow, and I'm wondering how to pass the load_users_into_table() function that it has a conn_id parameter stored in connection of Airflow in the Pod ?
In the official doc proposes to put the conn_id in Secret but I don't understand how can I pass it in my function load_users_into_table() after that.
https://airflow.apache.org/docs/stable/kubernetes.html
the function (task) to be executed in the pod:
def load_users_into_table(postgres_hook, schema, path):
gdf = read_csv(path)
gdf.to_sql('users', con=postgres_hook.get_sqlalchemy_engine(), schema=schema)
the dag:
_pg_hook = PostgresHook(postgres_conn_id = _conn_id)
with dag:
test = KubernetesPodOperator(
namespace=namespace,
image=image_name,
cmds=["python", "-c"],
arguments=[load_users_into_table],
labels={"dag-id": dag.dag_id},
name="airflow-test-pod",
task_id="task-1",
is_delete_operator_pod=True,
in_cluster=in_cluster,
get_logs=True,
config_file=config_file,
executor_config={
"KubernetesExecutor": {"request_memory": "512Mi",
"limit_memory": "1024Mi",
"request_cpu": "1",
"limit_cpu": "2"}
}
)
Assuming you want to run with K8sPodOperator, you can use argparse and add arguments to the docker cmd. Something in these lines should do the job:
import argparse
def f(arg):
print(arg)
parser = argparse.ArgumentParser()
parser.add_argument('--foo', help='foo help')
args = parser.parse_args()
if __name__ == '__main__':
f(args.foo)
Dockerfile:
FROM python:3
COPY main.py main.py
CMD ["python", "main.py", "--foo", "somebar"]
There are other ways to solve this such as using secrets, configMaps or even Airflow Variables, but this should get you moving forward.

How to hide the password from log and rendered template when pass another airflow connection to airflow SSH Operator

Summary of my DAG:
I am using SSH Operator to SSH to an EC2 instance and run a JAR file which will connect to multiple DBs. I've declared the Airflow Connection in my DAG file and able to pass the variables into the EC2 instance. As you can see from below, I'm passing properties into JAVA command.
Airflow version - airflow-1-10.7
Package installed - apache-airflow[crypto]
from airflow import DAG
from datetime import datetime, timedelta
from airflow.contrib.hooks.ssh_hook import SSHHook
from airflow.contrib.operators.ssh_operator import SSHOperator
from airflow.hooks.base_hook import BaseHook
from airflow.models.connection import Connection
ssh_hook = SSHHook(ssh_conn_id='ssh_to_ec2')
ssh_hook.no_host_key_check = True
redshift_connection = BaseHook.get_connection("my_redshift")
rs_user = redshift_connection.login
rs_password = redshift_connection.password
mongo_connection = BaseHook.get_connection("my_mongo")
mongo_user = mongo_connection.login
mongo_password = mongo_connection.password
default_args = {
'owner': 'AIRFLOW',
'start_date': datetime(2020, 4, 1, 0, 0),
'email': [],
'retries': 1,
}
dag = DAG('connect_to_redshift', default_args=default_args)
t00_00 = SSHOperator(
task_id='ssh_and_connect_db',
ssh_hook=ssh_hook,
command="java "
"-Drs_user={rs_user} -Drs_pass={rs_pass} "
"-Dmongo_user={mongo_user} -Dmongo_pass={mongo_pass} "
"-jar /home/airflow/root.jar".format(rs_user=rs_user,rs_pass=rs_pass,mongo_user=mongo_user,mongo_pass=mongo_pass),
dag=dag)
t00_00
Problem
The value for rs_pass,mongo_pass will be exposed in Rendered_Template/Airflow log which is not good and I would like to have a solution that can hide all these sensitive information from log and rendered template with SSH Operator.
So far I've tried to minimum the log verbose to ERROR in airflow.cfg, but it still shows in Rendered_Template.
Please enlighten me.
Thanks

Unable to run airflow scheduler

I have recently installed airflow on an AWS server by using this guide for ubuntu 16.04. After a painful and successful install started the webserver. I tried a sample dag as follows
from airflow.operators.python_operator import PythonOperator
from airflow.operators.dummy_operator import DummyOperator
from datetime import timedelta
from airflow import DAG
import airflow
# DEFAULT ARGS
default_args = {
'owner': 'airflow',
'start_date': airflow.utils.dates.days_ago(2),
'depends_on_past': False}
dag = DAG('init_run', default_args=default_args, description='DAG SAMPLE',
schedule_interval='#daily')
def print_something():
print("HELLO AIRFLOW!")
with dag:
task_1 = PythonOperator(task_id='do_it', python_callable=print_something)
task_2 = DummyOperator(task_id='dummy')
task_1 << task_2
But when i open the UI the tasks in the dag are still in "No Status" no matter how many times i trigger manually or refresh the page.
Later i found out that airflow scheduler is not running and shows the following error:
{celery_executor.py:228} ERROR - Error sending Celery task:No module named 'MySQLdb'
Celery Task ID: ('init_run', 'dummy', datetime.datetime(2019, 5, 30, 18, 0, 24, 902499, tzinfo=<TimezoneInfo [UTC, GMT, +00:00:00, STD]>), 1)
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/airflow/executors/celery_executor.py", line 118, in send_task_to_executor
result = task.apply_async(args=[command], queue=queue)
File "/usr/local/lib/python3.7/site-packages/celery/app/task.py", line 535, in apply_async
**options
File "/usr/local/lib/python3.7/site-packages/celery/app/base.py", line 728, in send_task
amqp.send_task_message(P, name, message, **options)
File "/usr/local/lib/python3.7/site-packages/celery/app/amqp.py", line 552, in send_task_message
**properties
File "/usr/local/lib/python3.7/site-packages/kombu/messaging.py", line 181, in publish
exchange_name, declare,
File "/usr/local/lib/python3.7/site-packages/kombu/connection.py", line 510, in _ensured
return fun(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/kombu/messaging.py", line 194, in _publish
[maybe_declare(entity) for entity in declare]
File "/usr/local/lib/python3.7/site-packages/kombu/messaging.py", line 194, in <listcomp>
[maybe_declare(entity) for entity in declare]
File "/usr/local/lib/python3.7/site-packages/kombu/messaging.py", line 102, in maybe_declare
return maybe_declare(entity, self.channel, retry, **retry_policy)
File "/usr/local/lib/python3.7/site-packages/kombu/common.py", line 121, in maybe_declare
return _maybe_declare(entity, channel)
File "/usr/local/lib/python3.7/site-packages/kombu/common.py", line 145, in _maybe_declare
entity.declare(channel=channel)
File "/usr/local/lib/python3.7/site-packages/kombu/entity.py", line 608, in declare
self._create_queue(nowait=nowait, channel=channel)
File "/usr/local/lib/python3.7/site-packages/kombu/entity.py", line 617, in _create_queue
self.queue_declare(nowait=nowait, passive=False, channel=channel)
File "/usr/local/lib/python3.7/site-packages/kombu/entity.py", line 652, in queue_declare
nowait=nowait,
File "/usr/local/lib/python3.7/site-packages/kombu/transport/virtual/base.py", line 531, in queue_declare
self._new_queue(queue, **kwargs)
File "/usr/local/lib/python3.7/site-packages/kombu/transport/sqlalchemy/__init__.py", line 82, in _new_queue
self._get_or_create(queue)
File "/usr/local/lib/python3.7/site-packages/kombu/transport/sqlalchemy/__init__.py", line 70, in _get_or_create
obj = self.session.query(self.queue_cls) \
File "/usr/local/lib/python3.7/site-packages/kombu/transport/sqlalchemy/__init__.py", line 65, in session
_, Session = self._open()
File "/usr/local/lib/python3.7/site-packages/kombu/transport/sqlalchemy/__init__.py", line 56, in _open
engine = self._engine_from_config()
File "/usr/local/lib/python3.7/site-packages/kombu/transport/sqlalchemy/__init__.py", line 51, in _engine_from_config
return create_engine(conninfo.hostname, **transport_options)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/__init__.py", line 443, in create_engine
return strategy.create(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/engine/strategies.py", line 87, in create
dbapi = dialect_cls.dbapi(**dbapi_args)
File "/usr/local/lib/python3.7/site-packages/sqlalchemy/dialects/mysql/mysqldb.py", line 104, in dbapi
return __import__("MySQLdb")
ModuleNotFoundError: No module named 'MySQLdb'
Here is the setting in the config file (airflow.cfg):
sql_alchemy_conn = postgresql+psycopg2://airflow#localhost:5432/airflow
broker_url = sqla+mysql://airflow:airflow#localhost:3306/airflow
result_backend = db+postgresql://airflow:airflow#localhost/airflow
I been struggling with this issue for two days now, Please help
In your airflow.cfg, there should also be a config option for celery_result_backend. Are you able to let us know what this value is set to? If it is not present in your config, set it to the same value as the result_backend
i.e:
celery_result_backend = db+postgresql://airflow:airflow#localhost/airflow
And then restart the airflow stack to ensure the configuration changes apply.
(I wanted to leave this as a comment but don't have enough rep to do so)
I think the example you are following didnt told you to install mysql and it seems you are using it in broker URL.
you can install mysql and than configure it. (for python 3.5+)
pip install mysqlclient
Alternatively, for a quick fix. You can also use rabbit MQ(Rabbitmq is a message broker, that you will require to rerun airflow dags with celery) guest user login
and then your broker_url will be
broker_url = amqp://guest:guest#localhost:5672//
if not already installed, Rabbitmq can be installed with following command.
sudo apt install rabbitmq-server
Change configuration NODE_IP_ADDRESS=0.0.0.0 in configuration file located at
/etc/rabbitmq/rabbitmq-env.conf
start RabbitMQ service
sudo service rabbitmq-server start

celery giving Rate limit attempt for unknown task task name

basically i am running two workers on celery, different module and different queue but same rabbitmq
celery worker -l info -A module_name.main.tasks -Q queue_one
celery worker -l info -A module_name.sub.sub_task -Q queue_two
when i try to rate limit 1st task present on 1 module i get this error from 2nd worker running the the other module..
app.control.rate_limit('module_name.main.tasks.method', '30/m')
Rate limit attempt for unknown task
I would prefer if the rate limit call would go to the worker that is working on that module and not to the other workers which are not working on that module.
any idea how to resolve this ??
Update: adding code:
celery_worker_base.py:
from __future__ import absolute_import
from celery import Celery
app = Celery('poc',
backend='mongodb://user:pass#ip:27017/collection',
broker='amqp://user:pass#ip/vhost',
include=['poc.main.proj.tasks'])
# Optional configuration, see the application user guide.
app.conf.update(
CELERY_TASK_RESULT_EXPIRES=3600,
CELERY_ROUTES = {'poc.main.proj.tasks': {'queue': 'proj_tasks'}}
)
app.control.rate_limit('poc.main.proj.tasks.get', '30/m')
app.control.rate_limit('poc.main.proj.tasks.compute', '30/m')
if __name__ == '__main__':
app.start()
celery worker code: tasks.py
from __future__ import absolute_import
from poc.celery.celery_worker_base import app
#app.task
def get(url):
print "calling get"
#app.task
def compute(info):
print "calling compute"
Another module: celery_master.py
from __future__ import absolute_import
from celery import Celery
from datetime import timedelta
from poc.config.config import *
from boto import ec2
master_app = Celery('poc',
backend='mongodb://user:pass#ip:27017/collection',
broker='amqp://user:pass#ip/vhost',
include=['poc.main.proj.tasks'])
# Optional configuration, see the application user guide.
master_app.conf.update(
CELERY_TASK_RESULT_EXPIRES=3600,
CELERYBEAT_SCHEDULE = {
'instance-check-every-fifteen-minute': {
'task': 'poc.main.instance.check.check_count',
'schedule': timedelta(seconds=900),
'options': {'queue' : 'instance_check'}
}
},
CELERY_ROUTES = {'poc.main.instance.check': {'queue': 'instance_check'}},
CELERY_TIMEZONE = 'UTC'
)
region = ec2.connect_to_region(
REGION,
aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY
)
if __name__ == '__main__':
master_app.start()
master worker: check.py
from __future__ import absolute_import
from celery import Celery
from poc.config.config import *
from poc.celery.celery_master import master_app, region
#master_app.task
def check_count():
print "calling check"
PS: thanks for not down-voting the question.
Regarding celery not being able to find the task, I would ensure you are passing them according to how they are listed by app.control.app.tasks.
This provides a dict of known tasks, where the keys are what are eligible for passing to control.rate_limit().