How to use `client.start_ipython_workers()` in dask-distributed? - ipython

I am trying to get workers to output some information from their ipython kernel and execute various commands in the ipython session. I tried the examples in the documentation and the ipyparallel example works, but not the second example (with ipython magics). I cannot get the workers to execute any commands. For example, I am stuck on the following issue:
from dask.distributed import Client
client = Client()
info = client.start_ipython_workers()
list_workers = info.keys()
%remote info[list_workers[0]]
The last line returns an error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-19-9118451af441> in <module>
----> 1 get_ipython().run_line_magic('remote', "info['tcp://127.0.0.1:50497'] worker.active")
~/miniconda/envs/dask/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2334 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2335 with self.builtin_trap:
-> 2336 result = fn(*args, **kwargs)
2337 return result
2338
~/miniconda/envs/dask/lib/python3.7/site-packages/distributed/_ipython_utils.py in remote_magic(line, cell)
115 info_name = split_line[0]
116 if info_name not in ip.user_ns:
--> 117 raise NameError(info_name)
118 connection_info = dict(ip.user_ns[info_name])
119
NameError: info['tcp://127.0.0.1:50497']
I would appreciate any examples of how to get any information from the ipython kernel running on workers.

Posting here just for keeping track, I raised an issue for this on GitHub: https://github.com/dask/distributed/issues/4522

Related

Why pip install not working in Jupyter notebook?

When i run pip3 install <package> or !pip3 install <package> or !pip install <package> i get this error. And also i can't clone any repo in jupyter. It gives the same error. This is my first time in Jupyter.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Input In [18], in <cell line: 1>()
----> 1 get_ipython().run_line_magic('pip', 'install boto3')
File /lib/python3.9/site-packages/IPython/core/interactiveshell.py:2294, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2292 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2293 with self.builtin_trap:
-> 2294 result = fn(*args, **kwargs)
2295 return result
File /lib/python3.9/site-packages/IPython/core/magics/packaging.py:75, in PackagingMagics.pip(self, line)
72 else:
73 python = shlex.quote(python)
---> 75 self.shell.system(" ".join([python, "-m", "pip", line]))
77 print("Note: you may need to restart the kernel to use updated packages.")
File /lib/python3.9/site-packages/IPython/core/interactiveshell.py:2451, in InteractiveShell.system_piped(self, cmd)
2446 raise OSError("Background processes not supported.")
2448 # we explicitly do NOT return the subprocess status code, because
2449 # a non-None value would trigger :func:`sys.displayhook` calls.
2450 # Instead, we store the exit_code in user_ns.
-> 2451 self.user_ns['_exit_code'] = system(self.var_expand(cmd, depth=1))
File /lib/python3.9/site-packages/IPython/utils/_process_posix.py:148, in ProcessHandler.system(self, cmd)
146 child = pexpect.spawnb(self.sh, args=['-c', cmd]) # Pexpect-U
147 else:
--> 148 child = pexpect.spawn(self.sh, args=['-c', cmd]) # Vanilla Pexpect
149 flush = sys.stdout.flush
150 while True:
151 # res is the index of the pattern that caused the match, so we
152 # know whether we've finished (if we matched EOF) or not
File /lib/python3.9/site-packages/IPython/utils/_process_posix.py:57, in ProcessHandler.sh(self)
55 self._sh = pexpect.which(shell_name)
56 if self._sh is None:
---> 57 raise OSError('"{}" shell not found'.format(shell_name))
59 return self._sh
I searched everywhere, but it is weird that no-one faced this issue except me. Pls provide some solution for this. I'm getting crazy.

TypeError: 'JavaPackage' object is not callable for Xgboost in PySpark

I am trying to make Scala Xgboost API available for my PySpark Notebook. And following this blog:
https://towardsdatascience.com/pyspark-and-xgboost-integration-tested-on-the-kaggle-titanic-dataset-4e75a568bdb
However, keep on running into below err:
spark._jvm.ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator
<py4j.java_gateway.JavaPackage at 0x7fa650fe7a58>
from sparkxgb import XGBoostEstimator
xgboost = XGBoostEstimator(
featuresCol="features",
labelCol="Survival",
predictionCol="prediction"
)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-18-1765fb9e3344> in <module>
4 featuresCol="features",
5 labelCol="Survival",
----> 6 predictionCol="prediction"
7 )
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
108 raise TypeError("Method %s forces keyword arguments." % func.__name__)
109 self._input_kwargs = kwargs
--> 110 return func(self, **kwargs)
111 return wrapper
112
~/local/spark-3536cd7a-6188-4ca8-b3d0-57d42cd01531/userFiles-0a0d90bc-96b4-43f2-bf21-00ae0e6f7309/sparkxgb.zip/sparkxgb/xgboost.py in __init__(self, checkpoint_path, checkpointInterval, missing, nthread, nworkers, silent, use_external_memory, baseMarginCol, featuresCol, labelCol, predictionCol, weightCol, base_score, booster, eval_metric, num_class, num_round, objective, seed, alpha, colsample_bytree, colsample_bylevel, eta, gamma, grow_policy, max_bin, max_delta_step, max_depth, min_child_weight, reg_lambda, scale_pos_weight, sketch_eps, subsample, tree_method, normalize_type, rate_drop, sample_type, skip_drop, lambda_bias)
113
114 super(XGBoostEstimator, self).__init__()
--> 115 self._java_obj = self._new_java_obj("ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator", self.uid)
116 self._create_params_from_java()
117 self._setDefault(
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
65 java_obj = getattr(java_obj, name)
66 java_args = [_py2java(sc, arg) for arg in args]
---> 67 return java_obj(*java_args)
68
69 #staticmethod
TypeError: 'JavaPackage' object is not callable
I already google this error and tried below things. I got all ideas from this blog https://github.com/JohnSnowLabs/spark-nlp/issues/232 :
Make sure Xgboost4j is in the SPARK_DIST_CLASSPATH. Already checked.
$echo $SPARK_DIST_CLASSPATH | tr " " "\n" | grep 'xgboost4j' | rev | cut -d'/' -f1 | rev
xgboost4j-0.72.jar
xgboost4j-spark.72.jar
Make sure they are added to EXTRA_CLASSPATH. - Done
Updating configs.
'export PYSPARK_SUBMIT_ARGS="--conf spark.jars=$SPARK_HOME/jars/* --conf spark.driver.extraClassPath=$SPARK_HOME/jars/* --conf spark.executor.extraClassPath=$SPARK_HOME/jars/* pyspark-shell"',
Hardware Info:
Machine: Linux
Using Jupyter Notebook.
Spark Version 2.4.0
python3.6
I found the problem, The problem was that the sparkxbg.zip(which I downloaded over internet) is written for xgboost4j-0.72. However, my jars were from xgoost4j-0.9. And the API has been completetly changed. As a result 0.9 version didn't had any class named ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator. And hence the error. You can see the difference in API below:
https://github.com/dmlc/xgboost/tree/release_0.72/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark
vs
https://github.com/dmlc/xgboost/tree/v0.90/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark

NameError: name 're' is not defined... already imported re in the code and built in function

I keep getting "NameError: name 're' is not defined", even though I have already imported re in my code AND the built in function pat_count() defined in library_s19_week2.py. I tried all the possible places to import re but none seemed working. Please help!
My code:
import re
hash_pat = re.compile(r'#\w+')
hash_counter = pat_count(hash_pat)
tweet_table['hash_count'] = tweet_table.apply(lambda row: hash_counter(row['tweet']), axis=1)
Traceback for the error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-93-1880eb903ae9> in <module>()
10
11 hash_pat = re.compile(r'#\w+')
---> 12 hash_counter = pat_count(hash_pat)
13 tweet_table['hash_count'] = tweet_table.apply(lambda row: hash_counter(row['tweet']), axis=1)
14
/content/library_s19_week2.py in pat_count(pattern)
95 def pat_count(pattern):
96 import re
---> 97
98 pat = re.compile(pattern)
99
NameError: name 're' is not defined
I found my bug:
hash_pat = re.compile(r'#\w+') should be hash_pat = r'#\w+.
As seen in the function pat_count() in the traceback, hash_pat is an input to re.compile().

use pep8 to check the coding style in jupyter notebook

I have a problem in using pep8 to check the coding style in jupyter notebook.
install by:
in[1]
%install_ext https://raw.githubusercontent.com/SiggyF/notebooks/master/pep8_magic.py
out
Installed pep8_magic.py. To use it, type:
%load_ext pep8_magic
load by
in[2]
%load_ext pep8_magic
out
The pep8_magic extension is already loaded. To reload it, use:
%reload_ext pep8_magic
use it by
in[3]
%%pep8
a=1
out
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-3b6f6dbc7761> in <module>()
----> 1 get_ipython().run_cell_magic(u'pep8', u'', u'\na = 1')
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.pyc in run_cell_magic(self, magic_name, line, cell)
2118 magic_arg_s = self.var_expand(line, stack_depth)
2119 with self.builtin_trap:
-> 2120 result = fn(magic_arg_s, cell)
2121 return result
2122
/home/cs/.ipython/extensions/pep8_magic.pyc in pep8(line, cell)
38 with tempfile.NamedTemporaryFile() as f:
39 # save to file
---> 40 f.write(bytes(cell + '\n', 'UTF-8'))
41 # make sure it's written
42 f.flush()
TypeError: str() takes at most 1 argument (2 given)
It seems that I have installed and loaded it successfully but I can not run the cell magic.
Later I found the problem was caused by some differences between python2 and python3. By using io.Bytes and f.write(cell+'\n') in pep2_magic.py, it worked. But unfortunately, it still can't output information of pep8, like the example here pep8 style guide

IPython - Raise exception when a shell command fails

I'm using IPython as a system shell.
Is there a way to make IPython to raise an exception when the shell command fails? (non-zero exit codes)
The default makes them fail silently.
As of IPython 4.0.1, !cmd is transformed into get_ipython().system(repr(cmd)) (IPython.core.inputtransformer._tr_system()).
In the source, it's actually InteractiveShell.system_raw(), as inspect.getsourcefile() and inspect.getsource() can tell.
It delegates to os.system() in Windows and subprocess.call() in other OSes. Not configurable, as you can see from the code.
So, you need to replace it with something that would call subprocess.check_call().
Apart from monkey-patching by hand, this can be done with the IPython configuration system. Available options (viewable with the %config magic) don't allow to replace TerminalInteractiveShell with another class but several TerminalIPythonApp options allow to execute code on startup.
Do double-check whether you really need this though: a look through the system_raw()'s source reveals that it sets the _exit_code variable - so it doesn't actually fail completely silently.
If you use ! to execute shell commands, errors will pass silently
!echo "hello" && exit 1
hello
If you use the %%sh cell magic to execute the shell command, errors will raise:
%%sh
echo "hello" && exit 1
hello
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
<ipython-input-10-9229f76cae28> in <module>
----> 1 get_ipython().run_cell_magic('sh', '', 'echo "hello" && exit 1\n')
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell)
140 else:
141 line = script
--> 142 return self.shebang(line, cell)
143
144 # write a basic docstring:
<decorator-gen-110> in shebang(self, line, cell)
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magics/script.py in shebang(self, line, cell)
243 sys.stderr.flush()
244 if args.raise_error and p.returncode!=0:
--> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err)
246
247 def _run_script(self, p, cell, to_close):
CalledProcessError: Command 'b'echo "hello" && exit 1\n'' returned non-zero exit status 1.