AttributeError when using lettuce 'world' - lettuce

I have two files:
steps.py:
from lettuce import *
from splinter.browser import Browser
#before.harvest
def set_browser():
world.browser = Browser('webdriver.chrome')
#step(u'Given I visit "([^"]*)"')
def given_i_visit(step, url):
world.browser.visit(url)
test.feature:
Feature: Do some basic tests
Scenario: Check whether the website is accessable
Given I visit "/"
Running lettuce against them returns this:
Feature: Do some basic tests # features/test.feature:1
Scenario: Check whether the website is accessable # features/test.feature:2
Given I visit "/" # features/steps.py:8
Traceback (most recent call last):
File "/..../site-packages/lettuce/core.py", line 125, in __call__
ret = self.function(self.step, *args, **kw)
File "/..../test/features/steps.py", line 9, in given_i_visit
world.browser.visit(url)
AttributeError: 'thread._local' object has no attribute 'browser'
1 feature (0 passed)
1 scenario (0 passed)
1 step (1 failed, 0 passed)
Any ideas on what could be going wrong?

Although not in the documentation. place the terrain.py file in the same directory as your steps and features files. Initialized the world attribute with any value and you should be ok.

The problem is that the before.harvest takes some data, so the right code will be the following:
from lettuce import *
from splinter import Browser
#before.harvest
def set_browser(data):
world.browser = Browser('webdriver.chrome')
#step(u'Given I visit "([^"]*)"')
def given_i_visit(step, url):
world.browser.visit(url)
hope it helps!

Related

Adjust wait_time in locust using custom arguments via CLI

I want to adjust wait_time parameter by passing it via CLI.
I have tried the following way:
custom_wait_time = None
# Add custom argument to locust
#events.init_command_line_parser.add_listener
def init_parser(parser):
parser.add_argument("--locust-wait-time",
type=int,
include_in_web_ui=True,
default=None,
help="Wait time per each request of a user.")
#events.init.add_listener
def _(environment, **kwargs):
global custom_wait_time
custom_wait_time = int(environment.parsed_options.locust_wait_time)
print(custom_wait_time) # First print
class MyUser(HttpUser):
global custom_wait_time
print(custom_wait_time) # Second print
wait_time = constant(custom_wait_time)
Assume that custom_wait_time=10 when I pass it via CLI, the First print gives me custom_wait_time=10 while the Second print gives me custom_wait_time=None instead of 10, so the wait_time = constant(custom_wait_time) will break and give me the error below:
Traceback (most recent call last):
File "src/gevent/greenlet.py", line 908, in gevent._gevent_cgreenlet.Greenlet.run
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/locust/user/users.py", line 176, in run_user
user.run()
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/locust/user/users.py", line 144, in run
self._taskset_instance.run()
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/locust/user/task.py", line 367, in run
self.wait()
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/locust/user/task.py", line 445, in wait
self._sleep(self.wait_time())
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/locust/user/task.py", line 451, in _sleep
gevent.sleep(seconds)
File "/Users/opt/anaconda3/envs/ai/lib/python3.7/site-packages/gevent/hub.py", line 157, in sleep
if seconds <= 0:
TypeError: '<=' not supported between instances of 'NoneType' and 'int'
Any help would be appreciated.
The problem is that the code will run in the wrong order - MyUser is defined before any of the init-methods are called.
If you instead do MyUser.wait_time = constant(custom_wait_time) inside your init handler (and dont set it at all in the class) it should work.
That way you dont need any globals either :)
I've just do the same work.
#events.init_command_line_parser.add_listener
def _(parser):
parser.add_argument("--waitTime", type=float, env_var="LOCUST_WAIT_TIME", default=1.0, help="wait time between each task of an user")
# Set `include_in_web_ui` to False if you want to hide from the web UI
#parser.add_argument("--my-ui-invisible-argument", include_in_web_ui=False, default="I am invisible")
and in the test class, just use it value like this
class GetInfoUser(HttpUser):
def wait_time(self):
return self.environment.parsed_options.waitTime

Error creating universal sentence encoder embeddings using beam & tf transform

I have a simple beam pipline that takes some text and gets embeddings using universal sentence encoder with tf transform. Very similar to the demo made using tf 1.
import tensorflow as tf
import apache_beam as beam
import tensorflow_transform.beam as tft_beam
import tensorflow_transform.coders as tft_coders
from apache_beam.options.pipeline_options import PipelineOptions
import tempfile
model = None
def embed_text(text):
import tensorflow_hub as hub
global model
if model is None:
model = hub.load(
'https://tfhub.dev/google/universal-sentence-encoder/4')
embedding = model(text)
return embedding
def get_metadata():
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import dataset_metadata
metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
'id': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation()),
'text': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
}))
return metadata
def preprocess_fn(input_features):
text_integerized = embed_text(input_features['text'])
output_features = {
'id': input_features['id'],
'embedding': text_integerized
}
return output_features
def run(pipeline_options, known_args):
argv = None # if None, uses sys.argv
pipeline_options = PipelineOptions(argv)
pipeline = beam.Pipeline(options=pipeline_options)
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
articles = (
pipeline
| beam.Create([
{'id':'01','text':'To be, or not to be: that is the question: '},
{'id':'02','text':"Whether 'tis nobler in the mind to suffer "},
{'id':'03','text':'The slings and arrows of outrageous fortune, '},
{'id':'04','text':'Or to take arms against a sea of troubles, '},
]))
articles_dataset = (articles, get_metadata())
transformed_dataset, transform_fn = (
articles_dataset
| 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)
)
transformed_data, transformed_metadata = transformed_dataset
_ = (
transformed_data | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(
file_path_prefix='{0}'.format(known_args.output_dir),
file_name_suffix='.tfrecords',
coder=tft_coders.example_proto_coder.ExampleProtoCoder(
transformed_metadata.schema),
num_shards=1
)
)
result = pipeline.run()
result.wait_until_finished()
python 3.6.8, tf==2.0, tf_transform==0.15, apache-beam[gcp]==0.16 (I tried various compatible combos from https://github.com/tensorflow/transform)
I am getting an error when tf_transform calls the graph analyser:
...
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in process
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 221, in acquire
return _shared_map.acquire(self._key, constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 184, in acquire
result = control_block.acquire(constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 87, in acquire
result = constructor_fn()
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in <lambda>
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 438, in _make_graph_state
self._exclude_outputs, self._tf_config)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 357, in __init__
tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 686, in get_dependent_inputs
sink_tensors_ready)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 499, in __init__
table_init_op, graph_analyzer_for_table_init, translate_path_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 560, in _get_table_init_op_source_info
if table_init_op.type not in _TABLE_INIT_OP_TYPES:
AttributeError: 'Tensor' object has no attribute 'type' [while running 'Extract embeddings/TransformDataset/Transform']
Exception ignored in: <bound method CapturableResourceDeleter.__del__ of <tensorflow.python.training.tracking.tracking.CapturableResourceDeleter object at 0x14152fbe0>>
Traceback (most recent call last):
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/tracking.py", line 190, in __del__
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3872, in as_default
File "/Users/justingrace/.pyenv/versions/3.6.8/lib/python3.6/contextlib.py", line 159, in helper
TypeError: 'NoneType' object is not callable
It appears like the graph analyser is expecting a list of operations with a type attribute but it is receiving a tensor. I can't grasp why this error is occuring other than a bug in the graph analyzer or a compatibility issue with tfx_bsl (there seem to be issues with pyarrow 0.14 so I have downgraded to 0.13)
Output of pip freeze:
absl-py==0.8.1
annoy==1.12.0
apache-beam==2.16.0
appnope==0.1.0
astor==0.8.1
astunparse==1.6.3
attrs==19.1.0
avro-python3==1.9.1
backcall==0.1.0
bleach==3.1.0
cachetools==3.1.1
certifi==2019.11.28
chardet==3.0.4
crcmod==1.7
cymem==1.31.2
cytoolz==0.9.0.1
decorator==4.4.1
defusedxml==0.6.0
dill==0.3.0
docopt==0.6.2
en-core-web-lg==2.0.0
en-coref-lg==3.0.0
en-ner-trained==2.0.0
entrypoints==0.3
fastavro==0.21.24
fasteners==0.15
flashtext==2.7
future==0.18.2
fuzzywuzzy==0.16.0
gast==0.2.2
google-api-core==1.16.0
google-apitools==0.5.28
google-auth==1.11.0
google-auth-oauthlib==0.4.1
google-cloud-bigquery==1.17.1
google-cloud-bigtable==1.0.0
google-cloud-core==1.3.0
google-cloud-datastore==1.7.4
google-cloud-pubsub==1.0.2
google-pasta==0.1.8
google-resumable-media==0.4.1
googleapis-common-protos==1.51.0
grpc-google-iam-v1==0.12.3
grpcio==1.24.0
h5py==2.10.0
hdfs==2.5.8
httplib2==0.12.0
idna==2.8
importlib-metadata==1.5.0
ipykernel==5.1.4
ipython==7.12.0
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.16.0
Jinja2==2.11.1
jsonpickle==1.2
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==5.3.4
jupyter-console==6.1.0
jupyter-core==4.6.2
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
lxml==4.2.1
Markdown==3.2.1
MarkupSafe==1.1.1
mistune==0.8.4
mock==2.0.0
monotonic==1.5
more-itertools==8.2.0
msgpack==0.6.2
msgpack-numpy==0.4.4
murmurhash==0.28.0
nbconvert==5.6.1
nbformat==5.0.4
networkx==2.1
nltk==3.4.5
notebook==6.0.3
numpy==1.18.1
oauth2client==3.0.0
oauthlib==3.1.0
opt-einsum==3.1.0
packaging==20.1
pandas==0.23.0
pandocfilters==1.4.2
parso==0.6.1
pathlib2==2.3.5
pbr==5.4.4
pexpect==4.8.0
pickleshare==0.7.5
plac==0.9.6
pluggy==0.13.1
preshed==1.0.1
prometheus-client==0.7.1
prompt-toolkit==3.0.3
proto-google-cloud-datastore-v1==0.90.4
protobuf==3.11.3
psutil==5.6.7
ptyprocess==0.6.0
py==1.8.1
pyahocorasick==1.4.0
pyarrow==0.13.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pydot==1.4.1
Pygments==2.5.2
PyHamcrest==1.9.0
pymongo==3.10.1
pyparsing==2.4.6
pyrsistent==0.15.7
pytest==5.3.5
python-dateutil==2.8.0
python-Levenshtein==0.12.0
pytz==2019.3
PyYAML==3.13
pyzmq==18.1.1
qtconsole==4.6.0
regex==2017.4.5
repoze.lru==0.7
requests==2.22.0
requests-oauthlib==1.3.0
rsa==4.0
scikit-learn==0.19.1
scipy==1.4.1
Send2Trash==1.5.0
six==1.14.0
spacy==2.0.12
tb-nightly==2.2.0a20200217
tensorboard==2.0.2
tensorflow==2.0.0
tensorflow-estimator==2.0.1
tensorflow-hub==0.6.0
tensorflow-metadata==0.15.2
tensorflow-serving-api==2.1.0
tensorflow-transform==0.15.0
termcolor==1.1.0
terminado==0.8.3
testpath==0.4.4
textblob==0.15.1
tf-estimator-nightly==2.1.0.dev2020012309
tf-nightly==2.2.0.dev20200217
tfx-bsl==0.15.0
thinc==6.10.3
toolz==0.10.0
tornado==6.0.3
tqdm==4.23.3
traitlets==4.3.3
typing==3.7.4.1
typing-extensions==3.7.4.1
ujson==1.35
Unidecode==1.0.22
urllib3==1.25.8
wcwidth==0.1.8
webencodings==0.5.1
Werkzeug==1.0.0
Whoosh==2.7.4
widgetsnbextension==3.5.1
wrapt==1.11.2
zipp==2.2.0
This could be an underlying issue according to this github post. Try using an updated version of tensorflow (2.1.0), or maybe even an updated version of your keras packages.

AttributeError on Oandapy API

I'm trying to do "100 lines of python code" for algorithmic trading and have been stopped early in my tracks with an attribute error.
I'm using sublime text and running it on python 3.7.
Here is the code I used:
import configparser
import oandapy as opy
config = configparser.ConfigParser()
config.read("oanda.cfg")
oanda = opy.API(environment = "practice",
access_token = config["oanda"] ["access_token"] )
Here are the results:
Traceback (most recent call last):
File "100lines.py", line 7, in <module>
oanda = opy.API(environment = "practice",
AttributeError: module 'oandapy' has no attribute 'API'
Try this:
# Print all attributes/functions in module
print(dir(opy))
and check the modules present
I think you'll have to use 'APIv20' rather than just 'API'
The api method is now called APIv20 in the current version (0.0.9).
so changing the last line to:
oanda = opy.APIv20(environment='practice', access_token=config['oanda']['access_token'])
will make solve your issue.

Hyperopt mongotrials issue with Pickle: AttributeError: 'module' object has no attribute

I'm trying to use Hyperopt parallel search with MongoDB, and encountered some issues with Mongotrials, which have been discussed here. I've tried all their methods, and I am still unable to find solutions to my specific problem. The specific model I'm trying to minimize is RadomForestRegressor from sklearn.
I've followed this tutorial. And I'm able to print out the calculated "fmin" with no issue.
Here are my steps so far:
1) Activate a virtual environment called "tensorflow" (I've installed all my libraries there)
2) Start MongoDB:
(tensorflow) bash-3.2$ mongod --dbpath . --port 1234 --directoryperdb --journal --nohttpinterface
3) Initiate workers:
(tensorflow) bash-3.2$ hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1
4) Run my python code, and my python code is as follows:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials
# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train
# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes
# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])
space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }
trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')
def minMe(params):
# Hyperopt tuning for hyperparameters
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from hyperopt import STATUS_OK
try:
import dill as pickle
print('Went with dill')
except ImportError:
import pickle
def hyperopt_rf(params):
rf = RandomForestRegressor(**params)
return cross_val_score(rf, train_xg_x, train_xg_y).mean()
acc = hyperopt_rf(params)
print 'new acc:', acc, 'params: ', params
return {'loss': -acc, 'status': STATUS_OK}
best = fmin(fn=minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best
5) After I run the above Python code, I get the following errors:
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:no job found, sleeping for 0.7s
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:exiting with N=9223372036854775803 after 4 consecutive exceptions
6) Then Mongo workers would shut off.
Things I've tried:
install "dill" as the error suggested -> didn't work
Put global imports into the objective function so it can pickle -> didn't work
Put try except with "dill" or "pickle" as import -> didn't work
Does anyone have similar issues? I'm running out of ideas to try, and have been working on this for 2 days in vain. I think I am missing something really simple here, just can't seem to find it.
What am I missing?
Any suggestion is welcomed please!
Had the same problem in python 3.5. Installing Dill didn't help, nor dir setting workdir in MongoTrials or hyperopt-mongo-worker cli. hyperopt-mongo-worker doesn't seem to have access to __main__ where the function was defined:
AttributeError: Can't get attribute 'minMe' on <module '__main__' from ...hyperopt-mongo-worker
As #jaikumarm suggested, I circumvented the problem by writing a module file with all the required functions. However, instead of soft-linking it into the bin directory, I extended the PYTHONPATH before running hyperopt-mongo-worker:
export PYTHONPATH="${PYTHONPATH}:<dir_with_the_module.py>"
hyperopt-mongo-worker ...
That way, the hyperopt-monogo-worker is able to import the module containing minMe.
I fought with this for several days before coming up with a workable solution. there are two problems:
1. the mongo worker spawns off a separate process to run the optimizer so any context from your original python file is lost and unavailable for this new process.
2. the imports on this new process happen in the context of the hyperopt-mongo-worker scipy, which is in your case will be /Users/WernerChao/tensorflow/bin/.
So my solution is to make this new optimizer function completely self sufficient
optimizer.py
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train
# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes
# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])
def minMe(params):
# Hyperopt tuning for hyperparameters
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from hyperopt import STATUS_OK
try:
import dill as pickle
print('Went with dill')
except ImportError:
import pickle
def hyperopt_rf(params):
rf = RandomForestRegressor(**params)
return cross_val_score(rf, train_xg_x, train_xg_y).mean()
acc = hyperopt_rf(params)
print 'new acc:', acc, 'params: ', params
return {'loss': -acc, 'status': STATUS_OK}
wrapper.py
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials
import optimizer
space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }
best = fmin(fn=optimizer.minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best
trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')
Once you have this code link the optimizer.py to the bin folder
ln -s /Users/WernerChao/Git/test/optimizer.py /Users/WernerChao/tensorflow/bin/
now run the wrapper.py and then the mongo worker it should be able to import the optimizer from its local context and run the minMe function.
Try to install Dill in the Python environment of your tensorflow (or possibly the worker):
/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt
Your aim is to get rid of the hyperopt error message:
hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
This is because the Python by default cannot marshal a function. It requires dill library to extend Python's pickling module for serialising/de-serialising Python objects. In your case, it failed to serialise your function minMe().
I made a separate file which calculates the loss and copied it to /anaconda2/bin/
and
/anaconda2/lib/python2.7/site-packages/hyperopt
it is working fine.
This was my Traceback
Traceback (most recent call last):
File "/home/greatskull/anaconda2/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1073, in run_one
with temp_dir(workdir, erase_created_workdir), working_dir(workdir):
File "/home/greatskull/anaconda2/lib/python2.7/contextlib.py", line 17, in __enter__
return self.gen.next()
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/utils.py", line 229, in temp_dir
os.makedirs(dir)
File "/home/greatskull/anaconda2/lib/python2.7/os.py", line 150, in makedirs
makedirs(head, mode)
File "/home/greatskull/anaconda2/lib/python2.7/os.py", line 157, in makedirs
mkdir(name, mode)

How to plug txyam on top of tornado IOLoop

I 've tried to use tornado.platform.twisted to integrate txyam memcached client, but when I try to check it for functioning, next error is thrown:
Traceback (most recent call last):
File "swcomet/tx_memcache_helper.py", line 32, in <module>
mem_helper = MemcacheHelper()
File "swcomet/tx_memcache_helper.py", line 19, in __init__
self.add(4)
File "/home/rustem/work/sw.services.swcomet.python/venv/local/lib/python2.7/site-packages/tornado/gen.py", line 117, in wrapper
gen = func(*args, **kwargs)
File "swcomet/tx_memcache_helper.py", line 25, in add
self.mem.getPickled(user_id, decompress=True)
File "/home/rustem/work/sw.services.swcomet.python/venv/lib/python2.7/site-packages/txyam/client.py", line 133, in getPickled
return self.get(key, **kwargs).addCallback(handleResult, uncompress)
File "/home/rustem/work/sw.services.swcomet.python/venv/lib/python2.7/site-packages/txyam/client.py", line 27, in wrapper
func = getattr(self.getClient(key), cmd)
File "/home/rustem/work/sw.services.swcomet.python/venv/lib/python2.7/site-packages/txyam/client.py", line 48, in getClient
raise NoServerError, "No connected servers remaining."
txyam.client.NoServerError: No connected servers remaining.
The source code which dumps that error:
import tornado.ioloop
import tornado.gen
from txyam.client import YamClient
from swtools.date import _ts
import tornado.platform.twisted
MEMHOSTS = ['127.0.0.1111']
USER_EXPIRATION_TIME = 61
class MemcacheHelper(object):
def __init__(self, *a, **kw):
try:
self.mem = YamClient(["127.0.0.1"])
except Exception, e:
print "ERror", e
self.clients = set()
self.add(4)
#tornado.gen.engine
def add(self, user_id, expire=None):
self.clients.add(user_id)
expire = expire or USER_EXPIRATION_TIME
self.mem.getPickled(user_id, decompress=True)
print "hmmm"
if __name__ == '__main__':
print "trying to start on top of IOLOOP"
ioloop = tornado.ioloop.IOLoop.instance()
#reactor = TornadoReactor(ioloop)
mem_helper = MemcacheHelper()
#mem_helper.add(4)
ioloop.start()
Please, help me to solve this problem!
txyam appears not to let you perform any memcache operations until after at least one connection has been established:
def getActiveConnections(self):
return [factory.client for factory in self.factories if not factory.client is None]
def getClient(self, key):
hosts = self.getActiveConnections()
log.msg("Using %i active hosts" % len(hosts))
if len(hosts) == 0:
raise NoServerError, "No connected servers remaining."
return hosts[ketama(key) % len(hosts)]
It attempts to set up these connections right away:
def __init__(self, hosts):
"""
#param hosts: A C{list} of C{tuple}s containing hosts and ports.
"""
self.connect(hosts)
But connection setup is asynchronous, and it doesn't expose an event to indicate when at least one connection has been established.
So your code fails because you call add right away, before any connections exist. A good long-term fix would be to file a bug report against txyam, because this isn't a very nice interface. YamClient could have a whenReady method that returns a Deferred that fires when you are actually allowed to use the YamClient instance. Or there could be an alternate constructor that returns a Deferred that fires with the YamClient instance, but only after it can be used.