can not use kafka for python - apache-kafka

python:2.6.6
kafka-python:1.4.3
i had run the kafka producer,but it always tips me this error:
Traceback (most recent call last):
File "KafkaOperation.py", line 11, in
from kafka import KafkaProducer
File "/usr/lib/python2.6/site-packages/kafka/init.py", line 21, in
from kafka.consumer import KafkaConsumer
File "/usr/lib/python2.6/site-packages/kafka/consumer/init.py",
line 5, in
from kafka.consumer.group import KafkaConsumer
File "/usr/lib/python2.6/site-packages/kafka/consumer/group.py", line
13, in
from kafka.consumer.fetcher import Fetcher
File "/usr/lib/python2.6/site-packages/kafka/consumer/fetcher.py",
line 19, in
from kafka.record import MemoryRecords
File "/usr/lib/python2.6/site-packages/kafka/record/init.py", line
1, in
from kafka.record.memory_records import MemoryRecords
File
"/usr/lib/python2.6/site-packages/kafka/record/memory_records.py",
line 27, in
from kafka.record.default_records import DefaultRecordBatch,
DefaultRecordBatchBuilder
File
"/usr/lib/python2.6/site-packages/kafka/record/default_records.py",
line 338, in
class DefaultRecordBatchBuilder(DefaultRecordBase,
ABCRecordBatchBuilder):
File
"/usr/lib/python2.6/site-packages/kafka/record/default_records.py",
line 378, in DefaultRecordBatchBuilder
byte_like=(bytes, bytearray, memoryview),
NameError: name 'memoryview' is not defined
the codes these:
from kafka import KafkaProducer
from kafka import KafkaConsumer
from kafka.errors import KafkaError
import json
class Kafka_producer():
def __init__(self, kafkahost,kafkaport, kafkatopic):
self.kafkatopic = kafkatopic
service_host = kafkahost+":"+kafkaport
self.producer = KafkaProducer(bootstrap_servers=service_host)
def sendjsondata(self, params):
try:
# parmas_message = json.dumps(params)
producer = self.producer
futur = producer.send(self.kafkatopic, params.encode('utf-8'))
res = futur.get(timeout=60)
producer.flush()
producer.close()
except KafkaError as e:
print e
if __name__ == '__main__':
# test = {
# "test":"testtets"
# }
# Kafka_producer("http://10.25.245.192","9092","nori-log").sendjsondata(test)
producer = KafkaProducer(bootstrap_servers='10.25.245.192:9092')
for _ in range(100):
producer.send('nori-log', {"test":"test_content"})
And i had exchange the toronto version to 2.2.1

The memoryview type is new in Python 2.7. Python 2.6.6 is almost 8 years old at this point, and hasn't gotten any support or security updates for almost 5 years. You need to upgrade your Python installation.

Related

Receiving an error when trying to run chatterbot on pi

I am new to working with Chatterbot. I am attempting to run chatterbot on my raspberry pi 4. After installing it, I attempted a basic program from chatterbot's documentation website:
from chatterbot import ChatBot
from chatterbot.trainers import ChatterBotCorpusTrainer
chatbot = ChatBot('Bob')
######### Create a new trainer for the chatbot
trainer = ChatterBotCorpusTrainer(chatbot)
######### Train the chatbot based on the english corpus
trainer.train("chatterbot.corpus.english")
# Get a response to an input statement
responce = chatbot.get_response("Hello, how are you today?")
print(responce)
When I run this program I get this error message:
Traceback (most recent call last):
File "/home/pi/Matthew.codes/JenkinsProject/chatbot_test.py", line 4, in
chatbot = ChatBot('Bob')
File "/home/pi/.local/lib/python3.7/site-packages/chatterbot/chatterbot.py", line 28, in init
self.storage = utils.initialize_class(storage_adapter, **kwargs)
File "/home/pi/.local/lib/python3.7/site-packages/chatterbot/utils.py", line 33, in initialize_class
return Class(*args, **kwargs)
File "/home/pi/.local/lib/python3.7/site-packages/chatterbot/storage/sql_storage.py", line 20, in init
super().init(**kwargs)
File "/home/pi/.local/lib/python3.7/site-packages/chatterbot/storage/storage_adapter.py", line 23, in init
'tagger_language', languages.ENG
File "/home/pi/.local/lib/python3.7/site-packages/chatterbot/tagging.py", line 20, in init
import spacy
File "/home/pi/.local/lib/python3.7/site-packages/spacy/init.py", line 6, in
from .errors import setup_default_warnings
File "/home/pi/.local/lib/python3.7/site-packages/spacy/errors.py", line 2, in
from .compat import Literal
File "/home/pi/.local/lib/python3.7/site-packages/spacy/compat.py", line 38, in
from thinc.api import Optimizer # noqa: F401
File "/home/pi/.local/lib/python3.7/site-packages/thinc/api.py", line 2, in
from .initializers import normal_init, uniform_init, glorot_uniform_init, zero_init
File "/home/pi/.local/lib/python3.7/site-packages/thinc/initializers.py", line 4, in
from .backends import Ops
File "/home/pi/.local/lib/python3.7/site-packages/thinc/backends/init.py", line 7, in
from .ops import Ops
File "/home/pi/.local/lib/python3.7/site-packages/thinc/backends/ops.py", line 15, in
from .cblas import CBlas
File "thinc/backends/cblas.pyx", line 1, in init thinc.backends.cblas
File "/home/pi/.local/lib/python3.7/site-packages/blis/init.py", line 3, in
from .cy import init
ImportError: /home/pi/.local/lib/python3.7/site-packages/blis/cy.cpython-37m-arm-linux-gnueabihf.so: undefined symbol: __atomic_load_8
I have tried adding storage adapters to the name class, but that did not work. As far as I can tell, I have installed all the dependencies.

Apache Beam ReadFromSpanner decoding issue

I'm trying to run the following script in a GCP Dataflow pipeline.
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from typing import NamedTuple, Optional
from apache_beam.io.gcp.spanner import *
from past.builtins import unicode
import logging
class ItemRow(NamedTuple):
item_id: unicode
class LogResults(beam.DoFn):
"""Just log the results"""
def process(self, element):
logging.info("row: %s", element)
yield element
class SpannerToSpannerAndBigQueryPipelineOptions(PipelineOptions):
"""
Runtime Parameters given during template execution
path parameter is necessary for execution of pipeline
"""
#classmethod
def _add_argparse_args(cls, parser):
parser.add_argument(
'--SOURCE_SPANNER_PROJECT_ID', type=str, help='Source Spanner project ID',
default='project_id')
parser.add_argument(
'--SOURCE_SPANNER_DATASET_ID', type=str, help='Source Spanner dataset ID',
default='dataset_id')
parser.add_argument(
'--SOURCE_SPANNER_INSTANCE_ID', type=str, help='Source Spanner instance ID',
default='instance_id')
parser.add_argument(
'--SOURCE_QUERY', type=str, help='SQL to run in Source Spanner Instance',
required=True)
# Setup pipeline
def run():
beam.coders.registry.register_coder(ItemRow, beam.coders.RowCoder)
pipeline_options = PipelineOptions()
p = beam.Pipeline(options=pipeline_options)
importer_options = pipeline_options.view_as(
SpannerToSpannerAndBigQueryPipelineOptions)
rows = (
p
| "Read from source Spanner" >> ReadFromSpanner(
project_id=importer_options.SOURCE_SPANNER_PROJECT_ID,
instance_id=importer_options.SOURCE_SPANNER_INSTANCE_ID,
database_id=importer_options.SOURCE_SPANNER_DATASET_ID,
row_type=ItemRow,
sql='Select item_id from Items WHERE created_ts BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 5 SECOND) AND CURRENT_TIMESTAMP()',
timestamp_bound_mode=TimestampBoundMode.MAX_STALENESS,
staleness=3,
time_unit=TimeUnit.HOURS,
).with_output_types(ItemRow)
)
rows | 'Log results' >> beam.ParDo(LogResults())
result = p.run()
result.wait_until_finish()
if __name__ == "__main__":
run()
However, I've been running into issues for decoding the results obtained from Spanner. These are the output logs from my Dataflow job:
"An exception was raised when trying to execute the workitem 6665479626992209510 : Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/dataflow_worker/batchworker.py", line 649, in do_work
work_executor.execute()
File "/usr/local/lib/python3.7/site-packages/dataflow_worker/executor.py", line 179, in execute
op.start()
File "dataflow_worker/native_operations.py", line 38, in dataflow_worker.native_operations.NativeReadOperation.start
File "dataflow_worker/native_operations.py", line 39, in dataflow_worker.native_operations.NativeReadOperation.start
File "dataflow_worker/native_operations.py", line 44, in dataflow_worker.native_operations.NativeReadOperation.start
File "dataflow_worker/native_operations.py", line 48, in dataflow_worker.native_operations.NativeReadOperation.start
File "/usr/local/lib/python3.7/site-packages/dataflow_worker/inmemory.py", line 108, in __iter__
yield self._source.coder.decode(value)
File "/usr/local/lib/python3.7/site-packages/apache_beam/coders/coders.py", line 468, in decode
return self.get_impl().decode(encoded)
File "apache_beam/coders/coder_impl.py", line 226, in apache_beam.coders.coder_impl.StreamCoderImpl.decode
File "apache_beam/coders/coder_impl.py", line 228, in apache_beam.coders.coder_impl.StreamCoderImpl.decode
File "apache_beam/coders/coder_impl.py", line 123, in apache_beam.coders.coder_impl.CoderImpl.decode_from_stream
File "/usr/local/lib/python3.7/site-packages/apache_beam/coders/row_coder.py", line 215, in decode_from_stream
is_null in zip(self.components, nulls)))
File "/usr/local/lib/python3.7/site-packages/apache_beam/coders/row_coder.py", line 215, in <genexpr>
is_null in zip(self.components, nulls)))
File "apache_beam/coders/coder_impl.py", line 259, in apache_beam.coders.coder_impl.CallbackCoderImpl.decode_from_stream
File "apache_beam/coders/coder_impl.py", line 261, in apache_beam.coders.coder_impl.CallbackCoderImpl.decode_from_stream
File "/usr/local/lib/python3.7/site-packages/apache_beam/coders/coders.py", line 414, in decode
return value.decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x83 in position 9: invalid start byte
"
I'm unsure as to how to solve this problem. I'm using this https://beam.apache.org/releases/pydoc/2.27.0/apache_beam.io.gcp.spanner.html?highlight=spanner#module-apache_beam.io.gcp.spanner example as a starting point. The issue appears to be in decoding the results obtained from Spanner. There is little to no documentation on how to specify the schema for the Spanner table/tables I'm trying to query.
There is also an experimental IO module for Spanner which does not use the Java expansion module. Is it recommended to switch to the experimental version?
Thanks
I could not run a pipeline using the apache_beam.io.gcp.spanner module so I ended up using the apache_beam.io.gcp.experimental.spannerio module instead.

Error creating universal sentence encoder embeddings using beam & tf transform

I have a simple beam pipline that takes some text and gets embeddings using universal sentence encoder with tf transform. Very similar to the demo made using tf 1.
import tensorflow as tf
import apache_beam as beam
import tensorflow_transform.beam as tft_beam
import tensorflow_transform.coders as tft_coders
from apache_beam.options.pipeline_options import PipelineOptions
import tempfile
model = None
def embed_text(text):
import tensorflow_hub as hub
global model
if model is None:
model = hub.load(
'https://tfhub.dev/google/universal-sentence-encoder/4')
embedding = model(text)
return embedding
def get_metadata():
from tensorflow_transform.tf_metadata import dataset_schema
from tensorflow_transform.tf_metadata import dataset_metadata
metadata = dataset_metadata.DatasetMetadata(dataset_schema.Schema({
'id': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation()),
'text': dataset_schema.ColumnSchema(
tf.string, [], dataset_schema.FixedColumnRepresentation())
}))
return metadata
def preprocess_fn(input_features):
text_integerized = embed_text(input_features['text'])
output_features = {
'id': input_features['id'],
'embedding': text_integerized
}
return output_features
def run(pipeline_options, known_args):
argv = None # if None, uses sys.argv
pipeline_options = PipelineOptions(argv)
pipeline = beam.Pipeline(options=pipeline_options)
with tft_beam.Context(temp_dir=tempfile.mkdtemp()):
articles = (
pipeline
| beam.Create([
{'id':'01','text':'To be, or not to be: that is the question: '},
{'id':'02','text':"Whether 'tis nobler in the mind to suffer "},
{'id':'03','text':'The slings and arrows of outrageous fortune, '},
{'id':'04','text':'Or to take arms against a sea of troubles, '},
]))
articles_dataset = (articles, get_metadata())
transformed_dataset, transform_fn = (
articles_dataset
| 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)
)
transformed_data, transformed_metadata = transformed_dataset
_ = (
transformed_data | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(
file_path_prefix='{0}'.format(known_args.output_dir),
file_name_suffix='.tfrecords',
coder=tft_coders.example_proto_coder.ExampleProtoCoder(
transformed_metadata.schema),
num_shards=1
)
)
result = pipeline.run()
result.wait_until_finished()
python 3.6.8, tf==2.0, tf_transform==0.15, apache-beam[gcp]==0.16 (I tried various compatible combos from https://github.com/tensorflow/transform)
I am getting an error when tf_transform calls the graph analyser:
...
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in process
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 221, in acquire
return _shared_map.acquire(self._key, constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 184, in acquire
result = control_block.acquire(constructor_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tfx_bsl/beam/shared.py", line 87, in acquire
result = constructor_fn()
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 462, in <lambda>
lambda: self._make_graph_state(saved_model_dir))
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 438, in _make_graph_state
self._exclude_outputs, self._tf_config)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/beam/impl.py", line 357, in __init__
tensor_inputs = graph_tools.get_dependent_inputs(graph, inputs, fetches)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 686, in get_dependent_inputs
sink_tensors_ready)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 499, in __init__
table_init_op, graph_analyzer_for_table_init, translate_path_fn)
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_transform/graph_tools.py", line 560, in _get_table_init_op_source_info
if table_init_op.type not in _TABLE_INIT_OP_TYPES:
AttributeError: 'Tensor' object has no attribute 'type' [while running 'Extract embeddings/TransformDataset/Transform']
Exception ignored in: <bound method CapturableResourceDeleter.__del__ of <tensorflow.python.training.tracking.tracking.CapturableResourceDeleter object at 0x14152fbe0>>
Traceback (most recent call last):
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/tracking.py", line 190, in __del__
File "/Users/justingrace/.pyenv/versions/hlx36/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3872, in as_default
File "/Users/justingrace/.pyenv/versions/3.6.8/lib/python3.6/contextlib.py", line 159, in helper
TypeError: 'NoneType' object is not callable
It appears like the graph analyser is expecting a list of operations with a type attribute but it is receiving a tensor. I can't grasp why this error is occuring other than a bug in the graph analyzer or a compatibility issue with tfx_bsl (there seem to be issues with pyarrow 0.14 so I have downgraded to 0.13)
Output of pip freeze:
absl-py==0.8.1
annoy==1.12.0
apache-beam==2.16.0
appnope==0.1.0
astor==0.8.1
astunparse==1.6.3
attrs==19.1.0
avro-python3==1.9.1
backcall==0.1.0
bleach==3.1.0
cachetools==3.1.1
certifi==2019.11.28
chardet==3.0.4
crcmod==1.7
cymem==1.31.2
cytoolz==0.9.0.1
decorator==4.4.1
defusedxml==0.6.0
dill==0.3.0
docopt==0.6.2
en-core-web-lg==2.0.0
en-coref-lg==3.0.0
en-ner-trained==2.0.0
entrypoints==0.3
fastavro==0.21.24
fasteners==0.15
flashtext==2.7
future==0.18.2
fuzzywuzzy==0.16.0
gast==0.2.2
google-api-core==1.16.0
google-apitools==0.5.28
google-auth==1.11.0
google-auth-oauthlib==0.4.1
google-cloud-bigquery==1.17.1
google-cloud-bigtable==1.0.0
google-cloud-core==1.3.0
google-cloud-datastore==1.7.4
google-cloud-pubsub==1.0.2
google-pasta==0.1.8
google-resumable-media==0.4.1
googleapis-common-protos==1.51.0
grpc-google-iam-v1==0.12.3
grpcio==1.24.0
h5py==2.10.0
hdfs==2.5.8
httplib2==0.12.0
idna==2.8
importlib-metadata==1.5.0
ipykernel==5.1.4
ipython==7.12.0
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.16.0
Jinja2==2.11.1
jsonpickle==1.2
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==5.3.4
jupyter-console==6.1.0
jupyter-core==4.6.2
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.0
lxml==4.2.1
Markdown==3.2.1
MarkupSafe==1.1.1
mistune==0.8.4
mock==2.0.0
monotonic==1.5
more-itertools==8.2.0
msgpack==0.6.2
msgpack-numpy==0.4.4
murmurhash==0.28.0
nbconvert==5.6.1
nbformat==5.0.4
networkx==2.1
nltk==3.4.5
notebook==6.0.3
numpy==1.18.1
oauth2client==3.0.0
oauthlib==3.1.0
opt-einsum==3.1.0
packaging==20.1
pandas==0.23.0
pandocfilters==1.4.2
parso==0.6.1
pathlib2==2.3.5
pbr==5.4.4
pexpect==4.8.0
pickleshare==0.7.5
plac==0.9.6
pluggy==0.13.1
preshed==1.0.1
prometheus-client==0.7.1
prompt-toolkit==3.0.3
proto-google-cloud-datastore-v1==0.90.4
protobuf==3.11.3
psutil==5.6.7
ptyprocess==0.6.0
py==1.8.1
pyahocorasick==1.4.0
pyarrow==0.13.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pydot==1.4.1
Pygments==2.5.2
PyHamcrest==1.9.0
pymongo==3.10.1
pyparsing==2.4.6
pyrsistent==0.15.7
pytest==5.3.5
python-dateutil==2.8.0
python-Levenshtein==0.12.0
pytz==2019.3
PyYAML==3.13
pyzmq==18.1.1
qtconsole==4.6.0
regex==2017.4.5
repoze.lru==0.7
requests==2.22.0
requests-oauthlib==1.3.0
rsa==4.0
scikit-learn==0.19.1
scipy==1.4.1
Send2Trash==1.5.0
six==1.14.0
spacy==2.0.12
tb-nightly==2.2.0a20200217
tensorboard==2.0.2
tensorflow==2.0.0
tensorflow-estimator==2.0.1
tensorflow-hub==0.6.0
tensorflow-metadata==0.15.2
tensorflow-serving-api==2.1.0
tensorflow-transform==0.15.0
termcolor==1.1.0
terminado==0.8.3
testpath==0.4.4
textblob==0.15.1
tf-estimator-nightly==2.1.0.dev2020012309
tf-nightly==2.2.0.dev20200217
tfx-bsl==0.15.0
thinc==6.10.3
toolz==0.10.0
tornado==6.0.3
tqdm==4.23.3
traitlets==4.3.3
typing==3.7.4.1
typing-extensions==3.7.4.1
ujson==1.35
Unidecode==1.0.22
urllib3==1.25.8
wcwidth==0.1.8
webencodings==0.5.1
Werkzeug==1.0.0
Whoosh==2.7.4
widgetsnbextension==3.5.1
wrapt==1.11.2
zipp==2.2.0
This could be an underlying issue according to this github post. Try using an updated version of tensorflow (2.1.0), or maybe even an updated version of your keras packages.

Jython using pywinauto is throwing TypeError: __enter__(): expected 1 args; got 0 when trying to run it from java

I am trying to run a python script from java; in py script I am using pywinauto package and want to initiate notepad++.
When I am running this script it works fine, however when same is called from Java main method it throws TypeError: __enter__(): expected 1 args; got 0.
Below is python script:
import sys
sys.path.append("C:\\jython2.7.0\\bin")
sys.path.append("C:\\Python27")
sys.path.append("C:\\Python27\\Lib")
sys.path.append("C:\\jython2.7.0\\Lib")
def runpradeep():
print "trying to run it"
from pywinauto.application import Application
app = Application().start('C:\\Program Files (x86)\\Notepad++\\notepad++.exe')
app.kill()
runpradeep()
and below is java class:
package himalaya;
import org.python.core.PyObject;
import org.python.util.PythonInterpreter;
import javax.script.ScriptContext;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.util.Properties;
public class OpenBoApplication {
public static void main(String[] args) throws ScriptException {
Properties props = new Properties();
props.put("python.home", "C:\\Python27");
props.put("python.console.encoding", "UTF-8");
props.put("python.security.respectJavaAccessibility", "false");
props.put("python.import.site", "false");
Properties preprops = System.getProperties();
PythonInterpreter.initialize(preprops, props, new String[0]);
PythonInterpreter interpreter = new PythonInterpreter();
interpreter.execfile("D:path to py script\\renatoplease.py");
interpreter.exec("runpradeep()");
}
}
Below is error log:
Exception in thread "main" Traceback (most recent call last):
File "D:\pbhardwa\IdeaProjects\javapythongroovy\src\com\ingenico\runmiddleware\renatoplease.py", line 16, in
runpradeep()
File "D:\pbhardwa\IdeaProjects\javapythongroovy\src\com\ingenico\runmiddleware\renatoplease.py", line 11, in runpradeep
from pywinauto.application import Application
File "pyclasspath/pywinauto/application.py", line 72, in
File "C:\Python27\Lib\multiprocessing__init__.py", line 65, in
from multiprocessing.util import SUBDEBUG, SUBWARNING
File "C:\Python27\Lib\multiprocessing\util.py", line 39, in
import threading # we want threading to install it's
File "C:\Python27\Lib\threading.py", line 1191, in
_shutdown = _MainThread()._exitfunc
File "C:\Python27\Lib\threading.py", line 1083, in init
self._Thread__started.set()
File "C:\Python27\Lib\threading.py", line 582, in set
"""
File "C:\Python27\Lib\threading.py", line 286, in enter
return self.lock.__enter()
TypeError: enter(): expected 1 args; got 0
I am using Intellij ultimate edition and Pycharm plugin is installed. I have also installed pywinauto(as mentioned enter link description here), pythonpath is also set. Also added jython dependencies to class path. Attaching complete project
Any help will surely help us.

Hyperopt mongotrials issue with Pickle: AttributeError: 'module' object has no attribute

I'm trying to use Hyperopt parallel search with MongoDB, and encountered some issues with Mongotrials, which have been discussed here. I've tried all their methods, and I am still unable to find solutions to my specific problem. The specific model I'm trying to minimize is RadomForestRegressor from sklearn.
I've followed this tutorial. And I'm able to print out the calculated "fmin" with no issue.
Here are my steps so far:
1) Activate a virtual environment called "tensorflow" (I've installed all my libraries there)
2) Start MongoDB:
(tensorflow) bash-3.2$ mongod --dbpath . --port 1234 --directoryperdb --journal --nohttpinterface
3) Initiate workers:
(tensorflow) bash-3.2$ hyperopt-mongo-worker --mongo=localhost:1234/foo_db --poll-interval=0.1
4) Run my python code, and my python code is as follows:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials
# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train
# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes
# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])
space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }
trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')
def minMe(params):
# Hyperopt tuning for hyperparameters
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from hyperopt import STATUS_OK
try:
import dill as pickle
print('Went with dill')
except ImportError:
import pickle
def hyperopt_rf(params):
rf = RandomForestRegressor(**params)
return cross_val_score(rf, train_xg_x, train_xg_y).mean()
acc = hyperopt_rf(params)
print 'new acc:', acc, 'params: ', params
return {'loss': -acc, 'status': STATUS_OK}
best = fmin(fn=minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best
5) After I run the above Python code, I get the following errors:
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:PROTOCOL mongo
INFO:hyperopt.mongoexp:USERNAME None
INFO:hyperopt.mongoexp:HOSTNAME localhost
INFO:hyperopt.mongoexp:PORT 1234
INFO:hyperopt.mongoexp:PATH /foo_db/jobs
INFO:hyperopt.mongoexp:DB foo_db
INFO:hyperopt.mongoexp:COLLECTION jobs
INFO:hyperopt.mongoexp:PASS None
INFO:hyperopt.mongoexp:no job found, sleeping for 0.7s
INFO:hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
INFO:hyperopt.mongoexp:job exception: 'module' object has no attribute 'minMe'
Traceback (most recent call last):
File "/Users/WernerChao/tensorflow/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1064, in run_one
domain = pickle.loads(blob)
AttributeError: 'module' object has no attribute 'minMe'
INFO:hyperopt.mongoexp:exiting with N=9223372036854775803 after 4 consecutive exceptions
6) Then Mongo workers would shut off.
Things I've tried:
install "dill" as the error suggested -> didn't work
Put global imports into the objective function so it can pickle -> didn't work
Put try except with "dill" or "pickle" as import -> didn't work
Does anyone have similar issues? I'm running out of ideas to try, and have been working on this for 2 days in vain. I think I am missing something really simple here, just can't seem to find it.
What am I missing?
Any suggestion is welcomed please!
Had the same problem in python 3.5. Installing Dill didn't help, nor dir setting workdir in MongoTrials or hyperopt-mongo-worker cli. hyperopt-mongo-worker doesn't seem to have access to __main__ where the function was defined:
AttributeError: Can't get attribute 'minMe' on <module '__main__' from ...hyperopt-mongo-worker
As #jaikumarm suggested, I circumvented the problem by writing a module file with all the required functions. However, instead of soft-linking it into the bin directory, I extended the PYTHONPATH before running hyperopt-mongo-worker:
export PYTHONPATH="${PYTHONPATH}:<dir_with_the_module.py>"
hyperopt-mongo-worker ...
That way, the hyperopt-monogo-worker is able to import the module containing minMe.
I fought with this for several days before coming up with a workable solution. there are two problems:
1. the mongo worker spawns off a separate process to run the optimizer so any context from your original python file is lost and unavailable for this new process.
2. the imports on this new process happen in the context of the hyperopt-mongo-worker scipy, which is in your case will be /Users/WernerChao/tensorflow/bin/.
So my solution is to make this new optimizer function completely self sufficient
optimizer.py
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error
# Preprocessing data
train_xg = pd.read_csv('train.csv')
n_train = len(train_xg)
print "Whole data set size: ", n_train
# Creating columns for features, and categorical features
features_col = [x for x in train_xg.columns if x not in ['id', 'loss', 'log_loss']]
cat_features_col = [x for x in train_xg.select_dtypes(include=['object']).columns if x not in ['id', 'loss', 'log_loss']]
for c in range(len(cat_features_col)):
train_xg[cat_features_col[c]] = train_xg[cat_features_col[c]].astype('category').cat.codes
# Use this to train random forest regressor
train_xg_x = np.array(train_xg[features_col])
train_xg_y = np.array(train_xg['loss'])
def minMe(params):
# Hyperopt tuning for hyperparameters
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from hyperopt import STATUS_OK
try:
import dill as pickle
print('Went with dill')
except ImportError:
import pickle
def hyperopt_rf(params):
rf = RandomForestRegressor(**params)
return cross_val_score(rf, train_xg_x, train_xg_y).mean()
acc = hyperopt_rf(params)
print 'new acc:', acc, 'params: ', params
return {'loss': -acc, 'status': STATUS_OK}
wrapper.py
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from hyperopt.mongoexp import MongoTrials
import optimizer
space_rf = { 'min_samples_leaf': hp.choice('min_samples_leaf', range(1,100)) }
best = fmin(fn=optimizer.minMe, space=space_rf, trials=trials, algo=tpe.suggest, max_evals=100)
print "Best: ", best
trials = MongoTrials('mongo://localhost:1234/foo_db/jobs', exp_key='exp1')
Once you have this code link the optimizer.py to the bin folder
ln -s /Users/WernerChao/Git/test/optimizer.py /Users/WernerChao/tensorflow/bin/
now run the wrapper.py and then the mongo worker it should be able to import the optimizer from its local context and run the minMe function.
Try to install Dill in the Python environment of your tensorflow (or possibly the worker):
/Users/WernerChao/tensorflow/lib/python2.7/site-packages/hyperopt
Your aim is to get rid of the hyperopt error message:
hyperopt.mongoexp:Error while unpickling. Try installing dill via "pip install dill" for enhanced pickling support.
This is because the Python by default cannot marshal a function. It requires dill library to extend Python's pickling module for serialising/de-serialising Python objects. In your case, it failed to serialise your function minMe().
I made a separate file which calculates the loss and copied it to /anaconda2/bin/
and
/anaconda2/lib/python2.7/site-packages/hyperopt
it is working fine.
This was my Traceback
Traceback (most recent call last):
File "/home/greatskull/anaconda2/bin/hyperopt-mongo-worker", line 6, in <module>
sys.exit(hyperopt.mongoexp.main_worker())
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1302, in main_worker
return main_worker_helper(options, args)
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1249, in main_worker_helper
mworker.run_one(reserve_timeout=float(options.reserve_timeout))
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/mongoexp.py", line 1073, in run_one
with temp_dir(workdir, erase_created_workdir), working_dir(workdir):
File "/home/greatskull/anaconda2/lib/python2.7/contextlib.py", line 17, in __enter__
return self.gen.next()
File "/home/greatskull/anaconda2/lib/python2.7/site-packages/hyperopt/utils.py", line 229, in temp_dir
os.makedirs(dir)
File "/home/greatskull/anaconda2/lib/python2.7/os.py", line 150, in makedirs
makedirs(head, mode)
File "/home/greatskull/anaconda2/lib/python2.7/os.py", line 157, in makedirs
mkdir(name, mode)