Python multiprocessing, can't pickle thread.lock (pymongo.Cursor) - mongodb

First, let me assure you I read all the relevant answers and they don't work for me.
I am using multiprocessing Pool to parallelize my data creation. I am using Mongodb 5.0 and pymongo client.
As you can see I am initializing the mongo client in the worker as suggested by the available answers but still I get a :
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f96f6fff160>
Is there a way I can use multiprocessing with pymongo.Cursor ??
Any help will be appreciated
This is the function that calls the Pool
def get_all_valid_events(
event_criteria:str,
all_listings:List[str],
earnings:List[Dict[str,Any]],
days_around_earnings=0,
debug=False,
poolsize=10,
chunk_size=100,
lookback=30,
lookahead = 0
):
start = time.perf_counter()
listings = Manager().list(all_listings.copy())
valid_events = []
if debug:
for i in range(ceil(len(listings)/chunk_size)):
valid_events += get_valid_event_dates_by_listing(event_criteria,listings[i*chunk_size:(i+1)*chunk_size] , earnings, days_around_earnings,debug)
else:
payload = list()
for i in range(ceil(len(listings)/chunk_size)):
payload.append(
[
event_criteria,
listings[i*chunk_size:(i+1)*chunk_size],
earnings,
days_around_earnings,
debug,
lookback,
lookahead
]
)
with ThreadPool(poolsize) as pool:
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
print(f"getting all valid true events took {time.perf_counter() - start} sec")
return valid_events
And this is the worker function:
def get_valid_event_dates_by_listing(
event_criteria:str,
listings:List[str],
earnings_list,
days_around_earnings=0,
debug=False,
lookback=30,
lookahead=0
) -> List[Tuple[Tuple[str, datetime], int]]:
#TODO: generalize event filter
start = time.perf_counter()
client = MongoClient()
db = client['stock_signals']
cursor_candles_by_listing = db.candles.find(
{'listing': {'$in': listings}},
{'_id':0, 'listing':1, 'date':1,'position':1, 'PD_BBANDS_6_lower':1, 'close':1, 'PD_BBANDS_6_upper':1}
)
candles = list(cursor_candles_by_listing)
df = pd.DataFrame(candles).dropna()
minimum_position_dict = dict(df.groupby('listing').min()['position']) # We need the minimum position by listing to filter only events that have lookback
# Filter only the dates that satisfy the criteria
lte_previous_bb_6_lower = df['close'] <= df[f"{event_criteria}_lower"].shift()
gte_previous_bb_6_upper = df['close'] >= df[f"{event_criteria}_upper"].shift()
potential_true_events_df = df[lte_previous_bb_6_lower | gte_previous_bb_6_upper]
potential_false_events_df = df.drop(potential_true_events_df.index)
potential_true_event_dates = potential_true_events_df[['listing', 'date', 'position']].values
actual_true_event_dates = earning_helpers.filter_event_dates_by_earnings_and_position(potential_true_event_dates, earnings_list, minimum_position_dict ,days_around_earning=days_around_earnings, lookback=lookback)
true_event_dates = [((event_date[0], event_date[1], event_date[2]), 1) for event_date in actual_true_event_dates]
potential_false_event_dates = potential_false_events_df[['listing', 'date', 'position']].values
actual_false_event_dates = _random_false_events_from_listing_df(potential_false_event_dates, len(actual_true_event_dates), earnings_list, minimum_position_dict, days_around_earnings,lookback)
false_events_dates = [((event_date[0], event_date[1], event_date[2]), 0) for event_date in actual_false_event_dates]
all_event_dates = true_event_dates + false_events_dates
shuffle(all_event_dates)
print(f"getting a true sequence for listing took {time.perf_counter() - start} sec")
return all_event_dates
And this is my main
from utils import event_helpers, earning_helpers
from utils.queries import get_candle_listing
if __name__ == "__main__":
all_listings = get_candle_listing.get_listings()
earnigns = earning_helpers.get_all_earnings_dates()
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
Full Stack Trace
File "test_multiprocess.py", line 8, in <module>
res = event_helpers.get_all_valid_events('PD_BBANDS_6', all_listings, earnigns, 2, chunk_size=100)
File "/media/data/projects/ml/signal_platform/utils/event_helpers.py", line 53, in get_all_valid_events
valid_events = pool.starmap(get_valid_event_dates_by_listing, payload)
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 372, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 771, in get
raise self._value
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/pool.py", line 537, in _handle_tasks
put(task)
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/home/froy001/.asdf/installs/python/3.8.12/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
TypeError: cannot pickle '_thread.lock' object
Exception ignored in: <function CommandCursor.__del__ at 0x7f46e91e21f0>
Traceback (most recent call last):
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 68, in __del__
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/command_cursor.py", line 83, in __die
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/mongo_client.py", line 1696, in _cleanup_cursor
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 466, in _end_session
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 871, in in_transaction
File "/home/froy001/.cache/pypoetry/virtualenvs/signal-platform-31MTNyCe-py3.8/lib/python3.8/site-packages/pymongo/client_session.py", line 362, in active
AttributeError: 'NoneType' object has no attribute 'STARTING'
Update: 01-23
I tried using the multiprocess library using dill but it didn't help

Related

DAG Import Errors - Invalid arguments were passed

Im tryng Load data from postgresql(local) to google cloud storage using airflow by docker, but i got error like this https://i.stack.imgur.com/pHzAF.png
Broken DAG: [/opt/airflow/dags/postgres_to_bigquery.py] Traceback (most recent call last):
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/models/baseoperator.py", line 408, in apply_defaults
result = func(self, **kwargs, default_args=default_args)
File "/home/airflow/.local/lib/python3.7/site-packages/airflow/models/baseoperator.py", line 756, in __init__
f"Invalid arguments were passed to {self.__class__.__name__} (task_id: {task_id}). "
airflow.exceptions.AirflowException: Invalid arguments were passed to PostgresToGCSOperator (task_id: postgres_to_gcs). Invalid arguments were:
**kwargs: {'google_cloud_storage_conn_id': 'gcp_conn'}
And this is some part of my own code
GCS_CONN = Variable.get('GCS_CONN')
default_args={
'owner': 'airflow',
'retries': 0,
'retry_delay': timedelta(minutes=5),
}
with DAG(
dag_id = 'postgres_to_bigquery',
default_args = default_args,
start_date = datetime(2022, 10, 3),
schedule_interval = '#once'
) as dag:
start = DummyOperator(
task_id = 'start',
)
postgres_to_gcs = PostgresToGCSOperator(
task_id = f'postgres_to_gcs',
postgres_conn_id = 'postgres_localhost',
sql = f'select * from orders;',
bucket = 'airflow_fakri',
filename = f'airflow_fakri/data/orders.csv',
export_format = 'csv',
gzip = False,
use_server_side_cursor = False,
google_cloud_storage_conn_id = GCS_CONN
)
It looks like you are indeed passing the wrong argument.
From the doc: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/_api/airflow/providers/google/cloud/transfers/postgres_to_gcs/index.html
The correct conn parameter name is postgres_conn_id

Error creating universal sentence encoder embeddings using beam & tf transform

I have a simple beam pipline that takes some text and gets embeddings using universal sentence encoder with tf transform. Very similar to the demo made using tf 1.
import tensorflow as tf
import apache_beam as beam
import tensorflow_transform.beam as tft_beam
import tensorflow_transform.coders as tft_coders
from apache_beam.options.pipeline_options import PipelineOptions
import tempfile
model = None
def embed_text(text):
import tensorflow_hub as hub
global model
if model is None:
model = hub.load(
'https://tfhub.dev/google/universal-sentence-encoder/4')
embedding = model(text)
return embedding
def get_metadata():
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import dataset_metadata
metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
'id': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation()),
'text': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
}))
return metadata
def preprocess_fn(input_features):
text_integerized = embed_text(input_features['text'])
output_features = {
'id': input_features['id'],
'embedding': text_integerized
}
return output_features
def run(pipeline_options, known_args):
argv = None # if None, uses sys.argv
pipeline_options = PipelineOptions(argv)
pipeline = beam.Pipeline(options=pipeline_options)
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
articles = (
pipeline
| beam.Create([
{'id':'01','text':'To be, or not to be: that is the question: '},
{'id':'02','text':"Whether 'tis nobler in the mind to suffer "},
{'id':'03','text':'The slings and arrows of outrageous fortune, '},
{'id':'04','text':'Or to take arms against a sea of troubles, '},
]))
articles_dataset = (articles, get_metadata())
transformed_dataset, transform_fn = (
articles_dataset
| 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)
)
transformed_data, transformed_metadata = transformed_dataset
_ = (
transformed_data | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(
file_path_prefix='{0}'.format(known_args.output_dir),
file_name_suffix='.tfrecords',
coder=tft_coders.example_proto_coder.ExampleProtoCoder(
transformed_metadata.schema),
num_shards=1
)
)
result = pipeline.run()
result.wait_until_finished()
python 3.6.8, tf==2.0, tf_transform==0.15, apache-beam[gcp]==0.16 (I tried various compatible combos from https://github.com/tensorflow/transform)
I am getting an error when tf_transform calls the graph analyser:
...
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in process
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 221, in acquire
return _shared_map.acquire(self._key, constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 184, in acquire
result = control_block.acquire(constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 87, in acquire
result = constructor_fn()
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in <lambda>
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 438, in _make_graph_state
self._exclude_outputs, self._tf_config)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 357, in __init__
tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 686, in get_dependent_inputs
sink_tensors_ready)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 499, in __init__
table_init_op, graph_analyzer_for_table_init, translate_path_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 560, in _get_table_init_op_source_info
if table_init_op.type not in _TABLE_INIT_OP_TYPES:
AttributeError: 'Tensor' object has no attribute 'type' [while running 'Extract embeddings/TransformDataset/Transform']
Exception ignored in: <bound method CapturableResourceDeleter.__del__ of <tensorflow.python.training.tracking.tracking.CapturableResourceDeleter object at 0x14152fbe0>>
Traceback (most recent call last):
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/tracking.py", line 190, in __del__
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3872, in as_default
File "/Users/justingrace/.pyenv/versions/3.6.8/lib/python3.6/contextlib.py", line 159, in helper
TypeError: 'NoneType' object is not callable
It appears like the graph analyser is expecting a list of operations with a type attribute but it is receiving a tensor. I can't grasp why this error is occuring other than a bug in the graph analyzer or a compatibility issue with tfx_bsl (there seem to be issues with pyarrow 0.14 so I have downgraded to 0.13)
Output of pip freeze:
absl-py==0.8.1
annoy==1.12.0
apache-beam==2.16.0
appnope==0.1.0
astor==0.8.1
astunparse==1.6.3
attrs==19.1.0
avro-python3==1.9.1
backcall==0.1.0
bleach==3.1.0
cachetools==3.1.1
certifi==2019.11.28
chardet==3.0.4
crcmod==1.7
cymem==1.31.2
cytoolz==0.9.0.1
decorator==4.4.1
defusedxml==0.6.0
dill==0.3.0
docopt==0.6.2
en-core-web-lg==2.0.0
en-coref-lg==3.0.0
en-ner-trained==2.0.0
entrypoints==0.3
fastavro==0.21.24
fasteners==0.15
flashtext==2.7
future==0.18.2
fuzzywuzzy==0.16.0
gast==0.2.2
google-api-core==1.16.0
google-apitools==0.5.28
google-auth==1.11.0
google-auth-oauthlib==0.4.1
google-cloud-bigquery==1.17.1
google-cloud-bigtable==1.0.0
google-cloud-core==1.3.0
google-cloud-datastore==1.7.4
google-cloud-pubsub==1.0.2
google-pasta==0.1.8
google-resumable-media==0.4.1
googleapis-common-protos==1.51.0
grpc-google-iam-v1==0.12.3
grpcio==1.24.0
h5py==2.10.0
hdfs==2.5.8
httplib2==0.12.0
idna==2.8
importlib-metadata==1.5.0
ipykernel==5.1.4
ipython==7.12.0
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.16.0
Jinja2==2.11.1
jsonpickle==1.2
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==5.3.4
jupyter-console==6.1.0
jupyter-core==4.6.2
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
lxml==4.2.1
Markdown==3.2.1
MarkupSafe==1.1.1
mistune==0.8.4
mock==2.0.0
monotonic==1.5
more-itertools==8.2.0
msgpack==0.6.2
msgpack-numpy==0.4.4
murmurhash==0.28.0
nbconvert==5.6.1
nbformat==5.0.4
networkx==2.1
nltk==3.4.5
notebook==6.0.3
numpy==1.18.1
oauth2client==3.0.0
oauthlib==3.1.0
opt-einsum==3.1.0
packaging==20.1
pandas==0.23.0
pandocfilters==1.4.2
parso==0.6.1
pathlib2==2.3.5
pbr==5.4.4
pexpect==4.8.0
pickleshare==0.7.5
plac==0.9.6
pluggy==0.13.1
preshed==1.0.1
prometheus-client==0.7.1
prompt-toolkit==3.0.3
proto-google-cloud-datastore-v1==0.90.4
protobuf==3.11.3
psutil==5.6.7
ptyprocess==0.6.0
py==1.8.1
pyahocorasick==1.4.0
pyarrow==0.13.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pydot==1.4.1
Pygments==2.5.2
PyHamcrest==1.9.0
pymongo==3.10.1
pyparsing==2.4.6
pyrsistent==0.15.7
pytest==5.3.5
python-dateutil==2.8.0
python-Levenshtein==0.12.0
pytz==2019.3
PyYAML==3.13
pyzmq==18.1.1
qtconsole==4.6.0
regex==2017.4.5
repoze.lru==0.7
requests==2.22.0
requests-oauthlib==1.3.0
rsa==4.0
scikit-learn==0.19.1
scipy==1.4.1
Send2Trash==1.5.0
six==1.14.0
spacy==2.0.12
tb-nightly==2.2.0a20200217
tensorboard==2.0.2
tensorflow==2.0.0
tensorflow-estimator==2.0.1
tensorflow-hub==0.6.0
tensorflow-metadata==0.15.2
tensorflow-serving-api==2.1.0
tensorflow-transform==0.15.0
termcolor==1.1.0
terminado==0.8.3
testpath==0.4.4
textblob==0.15.1
tf-estimator-nightly==2.1.0.dev2020012309
tf-nightly==2.2.0.dev20200217
tfx-bsl==0.15.0
thinc==6.10.3
toolz==0.10.0
tornado==6.0.3
tqdm==4.23.3
traitlets==4.3.3
typing==3.7.4.1
typing-extensions==3.7.4.1
ujson==1.35
Unidecode==1.0.22
urllib3==1.25.8
wcwidth==0.1.8
webencodings==0.5.1
Werkzeug==1.0.0
Whoosh==2.7.4
widgetsnbextension==3.5.1
wrapt==1.11.2
zipp==2.2.0
This could be an underlying issue according to this github post. Try using an updated version of tensorflow (2.1.0), or maybe even an updated version of your keras packages.

Integrate djongo (Mongo ORM) into Django Rest Framework

Actually I am developping a POC on which we want an app which has a REST API and discuss with MongoDB in Python.
For this we found several techs, such as Django-rest-framework for the API side and djongo for the ORM side. Nevertheless, I scan lots of tutos on how to implement djongo ORM in DRF, no way there is nothing BUT apparently it's possible, can someone confirm?
My main problem is my POC does absolutely not work, indeed, in used djongo models in my DRF Serializers but it does not work at all, I dont understand, can someone figure it out whats going on?:
models.py:
from djongo import models
class Channel(models.Model):
sourceId = models.IntegerField(default=-1)
usageId = models.IntegerField(default=0)
channelId = models.IntegerField(default=0)
cabinetId = models.IntegerField(default=0)
zoneId = models.IntegerField(default=0)
class Product(models.Model):
dateCreation = models.DateTimeField(auto_now=True)
dateUpdate = models.DateTimeField(auto_now=True)
name = models.CharField(max_length=50, default="Unknown product name")
channels = models.EmbeddedModelField(
model_container=Channel,
)
objects = models.DjongoManager()
views.py:
from django.http import HttpResponse, JsonResponse
from django.views.decorators.csrf import csrf_exempt
from rest_framework.parsers import JSONParser
from Api.models import Product
from Api.serializers import ProductSerializer
#csrf_exempt
def ProductList(aRequest):
"""
#brief List all products, or create a new product.
"""
if aRequest.method == 'GET':
wProducts = Product.objects.all()
wSerializer = ProductSerializer(wProducts, many=True)
return JsonResponse(wSerializer.data, safe=False)
elif aRequest.method == 'POST':
data = JSONParser().parse(aRequest)
wSerializer = ProductSerializer(data=data)
if wSerializer.is_valid():
wSerializer.save()
return JsonResponse(wSerializer.data, status=201)
return JsonResponse(wSerializer.errors, status=400)
#csrf_exempt
def ProductDetail(aRequest, pk):
"""
#brief Retrieve, update or delete a product.
"""
try:
wProducts = Product.objects.get(pk=pk)
except Product.DoesNotExist:
return HttpResponse(status=404)
if aRequest.method == 'GET':
wSerializer = ProductSerializer(wProducts)
return JsonResponse(wSerializer.data)
elif aRequest.method == 'PUT':
data = JSONParser().parse(aRequest)
wSerializer = ProductSerializer(wProducts, data=data)
if wSerializer.is_valid():
wSerializer.save()
return JsonResponse(wSerializer.data)
return JsonResponse(wSerializer.errors, status=400)
elif aRequest.method == 'DELETE':
Product.delete()
return HttpResponse(status=204)
serializers.py:
from rest_framework import serializers
from Api.models import Product, Channel
class ChannelSerializer(serializers.ModelSerializer):
class Meta:
model = Channel
fields = ('sourceId', 'usageId', 'channelId', 'cabinetId', 'zoneId')
def create(self, validated_data):
wChannel = Channel.objects.create(**validated_data)
return wChannel
class ProductSerializer(serializers.ModelSerializer):
channels = ChannelSerializer(many=True)
class Meta:
model = Product
fields = ('dateCreation', 'dateUpdate', 'name', 'channels')
def create(self, validated_data):
wChannels = validated_data.pop("channels")
wProduct = Product.objects.create(**validated_data)
for wChannel in wChannels:
Channel.objects.create(product=wProduct, **wChannel)
return wProduct
When I run my server with this POST request:
{
"dateCreation": "2018-07-20 12:00:00.000",
"dateUpdate": "2018-07-20 12:00:00.000",
"name": "post_test_channel_1",
"channels": [{
"sourceId": -1,
"usageId": 100,
"channelId": 0,
"cabinetId": 0,
"zoneId": 1
}]
}
I obtain that stacktrace:
Internal Server Error: /products/
Traceback (most recent call last):
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/core/handlers/exception.py", line 35, in inner
response = get_response(request)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/core/handlers/base.py", line 128, in _get_response
response = self.process_exception_by_middleware(e, request)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/core/handlers/base.py", line 126, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/views/decorators/csrf.py", line 54, in wrapped_view
return view_func(*args, **kwargs)
File "/home/soulasb/projects/POC/PocEms/Api/views.py", line 25, in ProductList
wSerializer.save()
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/rest_framework/serializers.py", line 214, in save
self.instance = self.create(validated_data)
File "/home/soulasb/projects/POC/PocEms/Api/serializers.py", line 29, in create
wProduct = Product.objects.create(**validated_data)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/query.py", line 417, in create
obj.save(force_insert=True, using=self.db)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
force_update=force_update, update_fields=update_fields)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
using=using, raw=raw)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
return getattr(self.get_queryset(), name)(*args, **kwargs)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
return query.get_compiler(using=using).execute_sql(return_id)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1284, in execute_sql
for sql, params in self.as_sql():
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1237, in as_sql
for obj in self.query.objs
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1237, in <listcomp>
for obj in self.query.objs
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1236, in <listcomp>
[self.prepare_value(field, self.pre_save_val(field, obj)) for field in fields]
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1176, in prepare_value
value = field.get_db_prep_save(value, connection=self.connection)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/django/db/models/fields/__init__.py", line 767, in get_db_prep_save
return self.get_db_prep_value(value, connection=connection, prepared=False)
File "/home/soulasb/projects/POC/venv-app/lib/python3.6/site-packages/djongo/models/fields.py", line 461, in get_db_prep_value
model=Model
ValueError: Value: None must be instance of Model: <class 'django.db.models.base.Model'>
This usually happens when you have added an EmbeddedModelField in your model and not passing an object to this field while creating an entry to this model.
I didnt find an option to add an EmbeddedModelField with null=True option.
Hope this helps someone
It could be the ENFORCE_SCHEMA set to true in settings.py. Maybe change it to "ENFORCE_SCHEMA: false".

Made Locust to login to a Web Application

I want locust to be able to login to my web application and start to click in the links inside the web application.
With this code I just get activity for the front page with the login and i don't get any notification from inside the application.
Code:
import random
from locust import HttpLocust, TaskSet, task
from pyquery import PyQuery
class WalkPages(TaskSet):
def on_start(self):
self.client.post("/", {
"UserName": "my#email.com",
"Password": "2Password!",
"submit": "Sign In"
})
self.index_page()
#task(10)
def index_page(self):
r = self.client.get("/Dashboard.mvc")
pq = PyQuery(r.content)
link_elements = pq("a")
self.urls_on_current_page = []
for l in link_elements:
if "href" in l.attrib:
self.urls_on_current_page.append(l.attrib["href"])
#task(30)
def load_page(self):
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
class AwesomeUser(HttpLocust):
task_set = WalkPages
host = "https://myenv.beta.webapp.com"
min_wait = 20 * 1000
max_wait = 60 * 1000
I get the follow msg in the terminal after the first round.
[2015-02-13 12:08:43,740] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
self.execute_next_task()
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
[2015-02-13 12:08:43,752] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
self.execute_next_task()
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
[2015-02-13 12:08:43,775] webapp-qa/ERROR/stderr: Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 267, in run
self.execute_next_task()
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 293, in execute_next_task
self.execute_task(task["callable"], *task["args"], **task["kwargs"])
File "/usr/local/lib/python2.7/dist-packages/locust/core.py", line 305, in execute_task
task(self, *args, **kwargs)
File "/home/webapp/LoadTest/locustfile.py", line 31, in load_page
url = random.choice(self.urls_on_current_page)
File "/usr/lib/python2.7/random.py", line 273, in choice
return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
IndexError: list index out of range
Your list may be empty.
#task(30)
def load_page(self):
if self.urls_on_current_page:
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
I takes time but someone may need this. My findings in your code: login requests seems not correct (check mine if correct), you cannot reach a variable defined inside of a function from another function, giving task(10) is not suitable for data setter function. Set urls_on_current_page as a class variable to serve for other class members. See my code and comment:
import random
from locust import HttpLocust, TaskSet, task
from pyquery import PyQuery
class WalkPages(TaskSet):
# define variable here to access them from inside the functions
urls_on_current_page = []
def login(self):
self.client.post("/login", data = {"UserName": "mesutgunes#email.com", "Password": "password"})
def get_urls(self):
r = self.client.get("/Dashboard.mvc")
pq = PyQuery(r.content)
link_elements = pq("a")
for link in link_elements:
if key in link.attrib and "http" not in link.attrib[key]:
# there maybe external link on the page
self.urls_on_current_page.append(link.attrib[key])
def on_start(self):
self.login()
self.get_urls()
#task(30)
def load_page(self):
url = random.choice(self.urls_on_current_page)
r = self.client.get(url)
class AwesomeUser(HttpLocust):
task_set = WalkPages
host = "https://myenv.beta.webapp.com"
min_wait = 20 * 1000
max_wait = 60 * 1000

download file from mongo gridfs with python

I have uploaded files to the mongo.But when I want to download from mongo by httpresonse on the web browser,that did not work.
Here is the views.py:
if filename is not None:
file_ = db.fs.files.find_one({
'filename':filename
})
file_id = file_['_id']
wrapper = fs.get(file_id).read()
response = StreamingHttpResponse(FileWrapper(wrapper),content_type=file_['contentType'])
response['Content-Disposition'] = 'attachment; filename=%s' % str(filename)
response['Content-Length'] = file_['length']
return response
I got this error:
Traceback (most recent call last):
File "/usr/lib/python2.7/wsgiref/handlers.py", line 86, in run
self.finish_response()
File "/usr/lib/python2.7/wsgiref/handlers.py", line 126, in finish_response
for data in self.result:
File "/usr/local/lib/python2.7/dist-packages/django/utils/six.py", line 473, in next
return type(self).__next__(self)
File "/usr/local/lib/python2.7/dist-packages/django/http/response.py", line 292, in __next__
return self.make_bytes(next(self._iterator))
File "/usr/lib/python2.7/wsgiref/util.py", line 30, in next
data = self.filelike.read(self.blksize)
AttributeError: 'str' object has no attribute 'read'
But when I change the StreamingHttpResponse to HttpResponse,the error is as follow:
[30/Jul/2014 17:29:43] "GET /download/cs101/ HTTP/1.1" 200 664
/usr/lib/python2.7/wsgiref/handlers.py:126: DeprecationWarning:
Creating streaming responses with `HttpResponse` is deprecated.
Use `StreamingHttpResponse`instead if you need the streaming behavior.
for data in self.result:
Traceback (most recent call last):
File "/usr/lib/python2.7/wsgiref/handlers.py", line 86, in run
self.finish_response()
File "/usr/lib/python2.7/wsgiref/handlers.py", line 126, in finish_response
for data in self.result:
File "/usr/local/lib/python2.7/dist-packages/django/utils/six.py", line 473, in next
return type(self).__next__(self)
File "/usr/local/lib/python2.7/dist-packages/django/http/response.py", line 292, in __next__
return self.make_bytes(next(self._iterator))
File "/usr/lib/python2.7/wsgiref/util.py", line 30, in next
data = self.filelike.read(self.blksize)
AttributeError: 'str' object has no attribute 'read'
Thanks in advance!
You're calling the read method in:
wrapper = fs.get(file_id).read()
So you're getting a str (assuming Python 2, if 3, you're getting bytes). FileWrapper needs file like object, which of course str is not one.
Try to use:
wrapper = fs.get(file_id)
This will return file like object.
OTOH, pymongo's .get() returns a GridOut instance, which already supports iteration, so why not try something like:
wrapper = fs.get(file_id)
response = StreamingHttpResponse(wrapper, content_type=file_['contentType'])