DAG Import Errors - Invalid arguments were passed - postgresql

Im tryng Load data from postgresql(local) to google cloud storage using airflow by docker, but i got error like this https://i.stack.imgur.com/pHzAF.png
Broken DAG: [/opt/airflow/dags/postgres_to_bigquery.py] Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/models/baseoperator.py", line 408, in apply_defaults
result = func(self, **kwargs, default_args=default_args)
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/models/baseoperator.py", line 756, in __init__
f"Invalid arguments were passed to {self.__class__.__name__} (task_id: {task_id}). "
airflow.exceptions.AirflowException: Invalid arguments were passed to PostgresToGCSOperator (task_id: postgres_to_gcs). Invalid arguments were:
**kwargs: {'google_cloud_storage_conn_id': 'gcp_conn'}
And this is some part of my own code
GCS_CONN = Variable.get('GCS_CONN')
'owner': 'airflow',
'retries': 0,
'retry_delay': timedelta(minutes=5),
with DAG(
dag_id = 'postgres_to_bigquery',
default_args = default_args,
start_date = datetime(2022, 10, 3),
schedule_interval = '#once'
) as dag:
start = DummyOperator(
task_id = 'start',
postgres_to_gcs = PostgresToGCSOperator(
task_id = f'postgres_to_gcs',
postgres_conn_id = 'postgres_localhost',
sql = f'select * from orders;',
bucket = 'airflow_fakri',
filename = f'airflow_fakri/data/orders.csv',
export_format = 'csv',
gzip = False,
use_server_side_cursor = False,
google_cloud_storage_conn_id = GCS_CONN

It looks like you are indeed passing the wrong argument.
From the doc: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/_api/airflow/providers/google/cloud/transfers/postgres_to_gcs/index.html
The correct conn parameter name is postgres_conn_id


Airflow - email operator sending multiple files issue

I am using airflow 2.2. I am trying to send multiple files using airflow email operator. the files list will be generated dynamically and using XCom pull to get the list of files from the previous task. For some reason, email operator files parameter is NOT able to read the files list from XCom value. Kindly advise.
Error details:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/airflow/task/task_runner/standard_task_runner.py", line 85, in _start_by_fork
args.func(args, dag=self.dag)
File "/usr/local/lib/python3.7/site-packages/airflow/cli/cli_parser.py", line 48, in command
return func(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/airflow/utils/cli.py", line 92, in wrapper
return f(*args, **kwargs)
File "/usr/local/lib/python3.7/site-packages/airflow/cli/commands/task_command.py", line 292, in task_run
_run_task_by_selected_method(args, dag, ti)
File "/usr/local/lib/python3.7/site-packages/airflow/cli/commands/task_command.py", line 107, in _run_task_by_selected_method
_run_raw_task(args, ti)
File "/usr/local/lib/python3.7/site-packages/airflow/cli/commands/task_command.py", line 184, in _run_raw_task
File "/usr/local/lib/python3.7/site-packages/airflow/utils/session.py", line 70, in wrapper
return func(*args, session=session, **kwargs)
File "/usr/local/lib/python3.7/site-packages/airflow/models/taskinstance.py", line 1332, in _run_raw_task
File "/usr/local/lib/python3.7/site-packages/airflow/models/taskinstance.py", line 1458, in _execute_task_with_callbacks
result = self._execute_task(context, self.task)
File "/usr/local/lib/python3.7/site-packages/airflow/models/taskinstance.py", line 1514, in _execute_task
result = execute_callable(context=context)
File "/usr/local/lib/python3.7/site-packages/airflow/operators/email.py", line 88, in execute
File "/usr/local/lib/python3.7/site-packages/airflow/utils/email.py", line 66, in send_email
File "/usr/local/lib/python3.7/site-packages/airflow/utils/email.py", line 99, in send_email_smtp
File "/usr/local/lib/python3.7/site-packages/airflow/utils/email.py", line 157, in build_mime_message
with open(fname, "rb") as file:
FileNotFoundError: [Errno 2] No such file or directory: '['
[2022-09-18, 17:24:22 UTC] {{local_task_job.py:154}} INFO - Task exited with return code 1
[2022-09-18, 17:24:23 UTC] {{local_task_job.py:264}} INFO - 0 downstream tasks scheduled from follow-on schedule check
from airflow import DAG
from airflow.operators.email import EmailOperator
from airflow.operators.python import PythonOperator
import os
from datetime import datetime, timedelta
default_args = {
"owner": 'TEST',
"depends_on_past": False,
"email_on_failure": False,
"email_on_retry": False,
"retries": 0,
with DAG(
start_date=datetime(2022, 9, 14),
end_date=datetime(2022, 9, 15),
schedule_interval="0 12 * * *", # Runs every day # 8AM EST
) as dag:
def print_local_folder_files(local_temp_folder):
print("local folder files => ", os.listdir(local_temp_folder))
files_list = []
for file in os.listdir(local_temp_folder):
files_list.append(local_temp_folder + file)
print("files_list => ", files_list)
return files_list
print_local_folder_files = PythonOperator(
op_kwargs={'local_temp_folder': "/usr/local/airflow/dags/temp_dir/"},
send_email = EmailOperator(
subject='Test Email op Notification',
html_content='Test email op notification email. ',
files="{{ task_instance.xcom_pull(task_ids='print_local_folder_files') }}"
print_local_folder_files >> send_email
You pushed a list to Xcom but Xcoms are rendered as string by default so what you have there is a string representation of list. This is why when you try to read it you get the first char because when you iterate over a string you get it char by char.
To solve your issue you should set render_template_as_native_obj=True on the DAG object:
with DAG(
) as dag:
This will let Jinja engine know that you expect to render as native Python types so you will get a list rather than a string.
For more information check Airflow docs on this feature.

NotImplementedError asynchronous

I'm trying to use fastapi connect to Postgresql with async,but I got a NotimplementError,
It's seems the coderecord = await objects.get(test5, orderId=result['orderId'])
cause this problem.
but I don't know how to fixed it
there is some solution in network,but it did'n work
import platform
import asyncio
if platform.system() == "Windows":
import peewee_async
import peewee_asyncext
from fastapi import FastAPI
from playhouse.postgres_ext import *
db = peewee_asyncext.PooledPostgresqlExtDatabase(
database = 'postgres',
host = '',
port = '5432',
user = 'postgres',
password = 1234,
register_hstore = False,
max_connections = 20,
connect_timeout = 3
objects = peewee_async.Manager(database =db)
db.set_allow_sync = False
class test5(Model):
orderId = FixedCharField(primary_key = True)
transactionId = FixedCharField()
class Meta:
database = db
table_name = 'test'
app = FastAPI()
async def test():
result = {
record = await objects.get(test5, orderId=result['orderId'])
except Exception as e:
if str(e) == "":
await objects.execute(test5.insert(result))
return result
import requests,json
Future exception was never retrieved
future: <Future finished exception=NotImplementedError()>
Traceback (most recent call last):
File "D:\Python\lib\site-packages\peewee_async.py", line 852, in connect_async
await conn.connect()
File "D:\Python\lib\site-packages\peewee_async.py", line 1014, in connect
self.pool = await aiopg.create_pool(
File "D:\Python\lib\site-packages\aiopg\pool.py", line 300, in from_pool_fill
await self._fill_free_pool(False)
File "D:\Python\lib\site-packages\aiopg\pool.py", line 336, in _fill_free_pool
conn = await connect(
File "D:\Python\lib\site-packages\aiopg\connection.py", line 65, in connect
connection = Connection(
File "D:\Python\lib\site-packages\aiopg\connection.py", line 772, in __init__
File "D:\Python\lib\asyncio\events.py", line 504, in add_reader
raise NotImplementedError
Future exception was never retrieved
future: <Future finished exception=NotImplementedError()>
Traceback (most recent call last):
File "C:\Users\user\Desktop\test\others\.\test5.py", line 39, in test
record = await objects.get(test5, orderId=result['orderId'])
File "D:\Python\lib\site-packages\peewee_async.py", line 166, in get
await self.connect()
File "D:\Python\lib\site-packages\peewee_async.py", line 302, in connect
await self.database.connect_async(loop=self.loop, timeout=self._timeout)
File "D:\Python\lib\site-packages\peewee_async.py", line 852, in connect_async
await conn.connect()
File "D:\Python\lib\site-packages\peewee_async.py", line 1014, in connect
self.pool = await aiopg.create_pool(
File "D:\Python\lib\site-packages\aiopg\pool.py", line 300, in from_pool_fill
await self._fill_free_pool(False)
File "D:\Python\lib\site-packages\aiopg\pool.py", line 336, in _fill_free_pool
conn = await connect(
File "D:\Python\lib\site-packages\aiopg\connection.py", line 65, in connect
connection = Connection(
File "D:\Python\lib\site-packages\aiopg\connection.py", line 772, in __init__
File "D:\Python\lib\asyncio\events.py", line 504, in add_reader
raise NotImplementedError
*Windows version info:
Python 3.9.10 (tags/v3.9.10:f2f3f53, Jan 17 2022, 15:14:21) [MSC v.1929 64 bit (AMD64)] on
Windows 10 Pro, version 20H2, OS build 19042.1526

Python multiprocessing, can't pickle thread.lock (pymongo.Cursor)

First, let me assure you I read all the relevant answers and they don't work for me.
I am using multiprocessing Pool to parallelize my data creation. I am using Mongodb 5.0 and pymongo client.
As you can see I am initializing the mongo client in the worker as suggested by the available answers but still I get a :
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f96f6fff160>
Is there a way I can use multiprocessing with pymongo.Cursor ??
Any help will be appreciated
This is the function that calls the Pool
def get_all_valid_events(
lookahead = 0
start = time.perf_counter()
listings = Manager().list(all_listings.copy())
valid_events = []
if debug:
for i in range(ceil(len(listings)/chunk_size)):
valid_events += get_valid_event_dates_by_listing(event_criteria,listings[i*chunk_size:(i+1)*chunk_size] , earnings, days_around_earnings,debug)
payload = list()
for i in range(ceil(len(listings)/chunk_size)):
with ThreadPool(poolsize) as pool:
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
print(f"getting all valid true events took {time.perf_counter() - start} sec")
return valid_events
And this is the worker function:
def get_valid_event_dates_by_listing(
) -> List[Tuple[Tuple[str, datetime], int]]:
#TODO: generalize event filter
start = time.perf_counter()
client = MongoClient()
db = client['stock_signals']
cursor_candles_by_listing = db.candles.find(
{'listing': {'$in': listings}},
{'_id':0, 'listing':1, 'date':1,'position':1, 'PD_BBANDS_6_lower':1, 'close':1, 'PD_BBANDS_6_upper':1}
candles = list(cursor_candles_by_listing)
df = pd.DataFrame(candles).dropna()
minimum_position_dict = dict(df.groupby('listing').min()['position']) # We need the minimum position by listing to filter only events that have lookback
# Filter only the dates that satisfy the criteria
lte_previous_bb_6_lower = df['close'] <= df[f"{event_criteria}_lower"].shift()
gte_previous_bb_6_upper = df['close'] >= df[f"{event_criteria}_upper"].shift()
potential_true_events_df = df[lte_previous_bb_6_lower | gte_previous_bb_6_upper]
potential_false_events_df = df.drop(potential_true_events_df.index)
potential_true_event_dates = potential_true_events_df[['listing', 'date', 'position']].values
actual_true_event_dates = earning_helpers.filter_event_dates_by_earnings_and_position(potential_true_event_dates, earnings_list, minimum_position_dict ,days_around_earning=days_around_earnings, lookback=lookback)
true_event_dates = [((event_date[0], event_date[1], event_date[2]), 1) for event_date in actual_true_event_dates]
potential_false_event_dates = potential_false_events_df[['listing', 'date', 'position']].values
actual_false_event_dates = _random_false_events_from_listing_df(potential_false_event_dates, len(actual_true_event_dates), earnings_list, minimum_position_dict, days_around_earnings,lookback)
false_events_dates = [((event_date[0], event_date[1], event_date[2]), 0) for event_date in actual_false_event_dates]
all_event_dates = true_event_dates + false_events_dates
print(f"getting a true sequence for listing took {time.perf_counter() - start} sec")
return all_event_dates
And this is my main
from utils import event_helpers, earning_helpers
from utils.queries import get_candle_listing
if __name__ == "__main__":
all_listings = get_candle_listing.get_listings()
earnigns = earning_helpers.get_all_earnings_dates()
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
Full Stack Trace
File "test_multiprocess.py", line 8, in <module>
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
File "/media/data/projects/ml/signal_platform/utils/event_helpers.py", line 53, in get_all_valid_events
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 537, in _handle_tasks
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/connection.py", line 206, in send
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f46e91e21f0>
Traceback (most recent call last):
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 68, in __del__
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 83, in __die
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1696, in _cleanup_cursor
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 466, in _end_session
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 871, in in_transaction
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 362, in active
AttributeError: 'NoneType' object has no attribute 'STARTING'
Update: 01-23
I tried using the multiprocess library using dill but it didn't help

Socket, AttributeError: 'str' object has no attribute 'send'

If anyone actually reads this thanks!
Anyway on to the problem, every time I run my code I get an 'AttributeError' and I can't find where the issue is. I'm using Socket, tKinter, os and multiprocessing. Here's my code(I know its now the most pythony thing in the world but hey I'm just playing with sockets):
#---Import statments---#
import socket, os, multiprocessing
import tkinter as tk
#---global variables---#
setup = ''
cleintsocket = ''
def setup():
global host, port, user
host = setup_host_box.get()
port = setup_port_box.get()
user = setup_user_box.get()
def connect(self, hostname, connectingport):
self.connect((hostname, int(connectingport)))
multiprocessing.Process(target = resv()).start()
def create_sock(nhost, nport):
global cleintsocket
cleintsocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
connect(cleintsocket, nhost, nport)
def send(username, cleintsock):
'''to send a message'''
usrmsg = (username + ' - ' + chat_msg_box.get()).encode()
def resv(sock):
'''resive subscript, run through mutiprosses module'''
while True:
rmsg = sock.recv(1024).decode()
chat_msg_display_text.insert('end.0.', rmsg)
def chat():
'''loads chat page'''
def start():
'''starts the setup page'''
#---TK Setup---#
#--window setup--#
window = tk.Tk()
#--connection setup page--#
setup_host_text = tk.Label(window, text = 'Host')
setup_host_box = tk.Entry(window, bg = '#ffffff')
setup_port_text = tk.Label(window, text = 'Port')
setup_port_box = tk.Entry(window, bg = '#ffffff')
setup_user_text = tk.Label(window, text = 'Username')
setup_user_box = tk.Entry(window, bg = '#ffffff')
setup_confirm_button = tk.Button(window,text = 'Connect', command = setup())
#--chat page--#
chat_msg_box = tk.Entry(window, bg='#ffffff')
chat_msg_send_button = tk.Button(window, text = 'send', command = send(user, cleintsocket))
chat_msg_display_text = tk.Text(window, width=600, height=500, wrap = 'word')
The python console is saying there is an error here chat_msg_send_button = tk.Button(window, text = 'send', command = send(user, cleintsocket)) that produces an AttributeError: 'str' object has no attribute 'send' error however I can't see any problems with it.
Please help.
Thanks again!
EDIT: Here's the error(Not needed now but this is for principle)
Traceback (most recent call last):
File ".../tkcleint.py", line 76, in <module>
chat_msg_send_button = tk.Button(window, text = 'send', command = send(user, cleintsocket))
File ".../tkcleint.py", line 29, in send
AttributeError: 'str' object has no attribute 'send'
First off (as #R.Murry has pointed out) you are calling the functions immediately and passing their return value as the command which in this case is None, so I'd start by fixing that up:
setup_confirm_button = tk.Button(window,text = 'Connect', command = setup) #don't call setup here
def send_button_callback():
send(user, cleintsocket)
chat_msg_send_button = tk.Button(window, text = 'send', command = send_button_callback)
next, it is important to include the whole error message in your question because that is not the line that is running into problems:
Traceback (most recent call last):
File ".../test.py", line 76, in <module>
chat_msg_send_button = tk.Button(window, text = 'send', command = send(user, cleintsocket))
File ".../test.py", line 29, in send
AttributeError: 'str' object has no attribute 'send'
You pass the variable cleintsocket into send and try to use the .send method of a socket, however it is initialized to an empty string:
cleintsocket = ''
so if you call send before it is changed to a socket you will get that error, simply check whether it has been initialized yet:
def send(username, cleintsock):
'''to send a message'''
if cleintsock: #not an empty string
usrmsg = (username + ' - ' + chat_msg_box.get()).encode()
#else:window.bell() #play a error beep

Made Locust to login to a Web Application

I want locust to be able to login to my web application and start to click in the links inside the web application.
With this code I just get activity for the front page with the login and i don't get any notification from inside the application.
import random
from locust import HttpLocust, TaskSet, task
from pyquery import PyQuery
class WalkPages(TaskSet):
def on_start(self):
self.client.post("/", {
"UserName": "my#email.com",
"Password": "2Password!",
"submit": "Sign In"
def index_page(self):
r = self.client.get("/Dashboard.mvc")
pq = PyQuery(r.content)
link_elements = pq("a")
self.urls_on_current_page = []
for l in link_elements:
if "href" in l.attrib:
def load_page(self):
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
class AwesomeUser(HttpLocust):
task_set = WalkPages
host = "https://myenv.beta.webapp.com"
min_wait = 20 * 1000
max_wait = 60 * 1000
I get the follow msg in the terminal after the first round.
[2015-02-13 12:08:43,740] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
[2015-02-13 12:08:43,752] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
[2015-02-13 12:08:43,775] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
Your list may be empty.
def load_page(self):
if self.urls_on_current_page:
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
I takes time but someone may need this. My findings in your code: login requests seems not correct (check mine if correct), you cannot reach a variable defined inside of a function from another function, giving task(10) is not suitable for data setter function. Set urls_on_current_page as a class variable to serve for other class members. See my code and comment:
import random
from locust import HttpLocust, TaskSet, task
from pyquery import PyQuery
class WalkPages(TaskSet):
# define variable here to access them from inside the functions
urls_on_current_page = []
def login(self):
self.client.post("/login", data = {"UserName": "mesutgunes#email.com", "Password": "password"})
def get_urls(self):
r = self.client.get("/Dashboard.mvc")
pq = PyQuery(r.content)
link_elements = pq("a")
for link in link_elements:
if key in link.attrib and "http" not in link.attrib[key]:
# there maybe external link on the page
def on_start(self):
def load_page(self):
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
class AwesomeUser(HttpLocust):
task_set = WalkPages
host = "https://myenv.beta.webapp.com"
min_wait = 20 * 1000
max_wait = 60 * 1000