Using rpy2 in Jupyter/IPython run_line_magic error - ipython

In the IPython and Jupyter documentation it says that get_ipython().magic() is deprecated. But when I changed my code to use run_line_magic it is failing to push to R (see below). Might be related to this problem
https://bitbucket.org/rpy2/rpy2/issues/184/valueerror-call-stack-is-not-deep-enough
I'm on Mac Yosemite, using Anaconda with Python 2.7. I just updated both Anaconda and rpy2 yesterday. The code below is from a Jupyter notebook.
%load_ext rpy2.ipython
import pandas as pd
'''Two test functions with rpy2.
The only difference between them is that
rpy2fun_magic uses 'magic' to push variable to R and
rpy2fun_linemagic uses 'run_line_magic' to push variable.
'magic' works fine. 'run_line_magic' returns an error.'''
def rpy2fun_magic(df):
get_ipython().magic('R -i df')
get_ipython().run_line_magic('R','df_cor <- cor(df)')
get_ipython().run_line_magic('R','-o df_cor')
return (df_cor)
def rpy2fun_linemagic(df):
get_ipython().run_line_magic('R','-i df')
get_ipython().run_line_magic('R','df_cor <- cor(df)')
get_ipython().run_line_magic('R','-o df_cor')
return (df_cor)
dataframetest = pd.DataFrame([[1,2,3,4],[6,3,4,5],[9,1,7,3]])
df_cor_magic = rpy2fun_magic(dataframetest)
print 'Using magic to push variable works fine\n'
print df_cor_magic
print '\nBut using run_line_magic returns an error\n'
df_cor_linemagic = rpy2fun_linemagic(dataframetest)
Using magic to push variable works fine
[[ 1. -0.37115374 0.91129318 -0.37115374]
[-0.37115374 1. -0.72057669 1. ]
[ 0.91129318 -0.72057669 1. -0.72057669]
[-0.37115374 1. -0.72057669 1. ]]
But using run_line_magic returns an error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-e418b72a8621> in <module>()
28 print '\nBut using run_line_magic returns an error\n'
29
---> 30 df_cor_linemagic = rpy2fun_linemagic(dataframetest)
<ipython-input-1-e418b72a8621> in rpy2fun_linemagic(df)
15
16 def rpy2fun_linemagic(df):
---> 17 get_ipython().run_line_magic('R','-i df')
18 get_ipython().run_line_magic('R','df_cor <- cor(df)')
19 get_ipython().run_line_magic('R','-o df_cor')
/Users/alexmillner/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2255 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2256 with self.builtin_trap:
-> 2257 result = fn(*args,**kwargs)
2258 return result
2259
/Users/alexmillner/anaconda/lib/python2.7/site-packages/rpy2/ipython/rmagic.pyc in R(self, line, cell, local_ns)
/Users/alexmillner/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/alexmillner/anaconda/lib/python2.7/site-packages/rpy2/ipython/rmagic.pyc in R(self, line, cell, local_ns)
657 val = self.shell.user_ns[input]
658 except KeyError:
--> 659 raise NameError("name '%s' is not defined" % input)
660 if args.converter is None:
661 ro.r.assign(input, self.pyconverter(val))
NameError: name 'df' is not defined

Some discussion of the same issue with %timeit first, followed by workaround answers at the bottom. I'm using IPython 3.1.0 with Anaconda 2.7.10, so my observations below could be different based on version differences alone.
This is not unique to the R extension, you can reproduce this with something simpler like %timeit:
In [47]: dfrm
Out[47]:
A B C
0 0.690466 0.370793 0.963782
1 0.478427 0.358897 0.689173
2 0.189277 0.268237 0.570624
3 0.735665 0.342549 0.509810
4 0.929736 0.090079 0.384444
5 0.210941 0.347164 0.852408
6 0.241940 0.187266 0.961489
7 0.768143 0.548450 0.604004
8 0.055765 0.842224 0.668782
9 0.717827 0.047011 0.948673
In [48]: def run_timeit(df):
get_ipython().run_line_magic('timeit', 'df.sum()')
....:
In [49]: run_timeit(dfrm)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-49-1e62302232b6> in <module>()
----> 1 run_timeit(dfrm)
<ipython-input-48-0a3e09ec1e0c> in run_timeit(df)
1 def run_timeit(df):
----> 2 get_ipython().run_line_magic('timeit', 'df.sum()')
3
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2226 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2227 with self.builtin_trap:
-> 2228 result = fn(*args,**kwargs)
2229 return result
2230
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)
1034 number = 1
1035 for _ in range(1, 10):
-> 1036 time_number = timer.timeit(number)
1037 worst_tuning = max(worst_tuning, time_number / number)
1038 if time_number >= 0.2:
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, number)
130 gc.disable()
131 try:
--> 132 timing = self.inner(it, self.timer)
133 finally:
134 if gcold:
<magic-timeit> in inner(_it, _timer)
NameError: global name 'df' is not defined
The issue is that the line magics are set to look for variable names at global scope, not at function scope. If the argument to your function rpy2fun_linemagic happened to coincide with a global variable name, the interior code would pick that up, for example:
In [52]: def run_timeit(dfrm):
get_ipython().run_line_magic('timeit', 'dfrm.sum()')
....:
In [53]: run_timeit(dfrm)
The slowest run took 5.67 times longer than the fastest. This could mean that an intermediate result is being cached
10000 loops, best of 3: 99.1 µs per loop
But this only works by accident, because the interior string passed to run_line_magic contains a name that is found globally.
However, I do get the same error even if using the plain magic function:
In [58]: def run_timeit(df):
get_ipython().magic('timeit df.sum()')
....:
In [59]: run_timeit(dfrm)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-59-1e62302232b6> in <module>()
----> 1 run_timeit(dfrm)
<ipython-input-58-e98c720ea7e8> in run_timeit(df)
1 def run_timeit(df):
----> 2 get_ipython().magic('timeit df.sum()')
3
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in magic(self, arg_s)
2305 magic_name, _, magic_arg_s = arg_s.partition(' ')
2306 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2307 return self.run_line_magic(magic_name, magic_arg_s)
2308
2309 #-------------------------------------------------------------------------
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc in run_line_magic(self, magic_name, line)
2226 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2227 with self.builtin_trap:
-> 2228 result = fn(*args,**kwargs)
2229 return result
2230
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, line, cell)
1034 number = 1
1035 for _ in range(1, 10):
-> 1036 time_number = timer.timeit(number)
1037 worst_tuning = max(worst_tuning, time_number / number)
1038 if time_number >= 0.2:
/home/ely/anaconda/lib/python2.7/site-packages/IPython/core/magics/execution.pyc in timeit(self, number)
130 gc.disable()
131 try:
--> 132 timing = self.inner(it, self.timer)
133 finally:
134 if gcold:
<magic-timeit> in inner(_it, _timer)
NameError: global name 'df' is not defined
One (super bad) way to get around this is to use globals to locate the item that is the same as the argument that was passed to your function, and then you'll have a global name for it.
For example:
In [68]: def run_timeit(df):
for var_name, var_val in globals().iteritems():
if df is var_val:
get_ipython().run_line_magic('timeit', '%s.sum()'%(var_name))
break
....:
In [69]: run_timeit(dfrm)
The slowest run took 5.72 times longer than the fastest. This could mean that an intermediate result is being cached
10000 loops, best of 3: 99.2 µs per loop
But this is very unstable, since it relies on pass-by-name in Python. If I passed an object like an integer or string, I would have to check whether it was interned or something, but otherwise couldn't find it "by name" in the global namespace.
Another way to do it that might be slightly better is to use the user_ns namespace dict that IPython stores. Then at least you're not looking at globals, and there is more stability over specific variables that have been named when assigned by the user in IPython:
In [71]: def run_timeit(df):
....: g = get_ipython()
....: for var_name, var_val in g.user_ns.iteritems():
....: if df is var_val:
....: g.run_line_magic('timeit', '%s.sum()'%(var_name))
....: break
....:
In [72]: run_timeit(dfrm)
The slowest run took 5.58 times longer than the fastest. This could mean that an intermediate result is being cached
10000 loops, best of 3: 99 µs per loop
In the case of your specific R function call, I would try:
def rpy2fun_linemagic(df):
g = get_ipython()
for var_name, var_val in g.user_ns.iteritems():
if df is var_val:
g.run_line_magic('R', '-i %s'%(var_name))
g.run_line_magic('R', 'df_cor <- cor(%s)'%(var_name))
g.run_line_magic('R', '-o df_cor')
return df_cor
You might also have to be careful on the return statement. You might need to use return g.user_ns['df_cor'] or something if the result of the output conversion back to Python is to create the variable at global scope as well, rather than function scope. Or, if that variable gets created as a side effect, you may not want to return anything at all. I'm not a big fan of relying on implicit mutation like that, but it could work for you.

I suspect that the code example you are providing is there only to demonstrate the issue with run_line_magic(), but for reference I am adding a way to do the same without ipython being involved.
from rpy2.robjects import globalenv
def rpy2cor(df):
fun = globalenv.get('cor', wantfun=True)
df_cor = fun(df)
return df_cor

Related

Python Jupyter Notebook scipy

For a long time I was able to add data and fit, then plot the curve with data. But recently I get this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-6-6f645a2744bc> in <module>
1 poland = prepare_data(europe_data, 'Poland')
----> 2 plot_all(poland, max_y=400000)
3 poland
~/Pulpit/library.py in plot_all(country, max_x, max_y)
43 def plot_all(country, max_x = 1000, max_y = 500000):
44
---> 45 parameters_logistic = scipy.optimize.curve_fit(func_logistic, country['n'], country['all'])[0]
46 parameters_expo = scipy.optimize.curve_fit(func_expo, country['n'], country['all'])[0]
47
/usr/local/lib64/python3.6/site-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
787 cost = np.sum(infodict['fvec'] ** 2)
788 if ier not in [1, 2, 3, 4]:
--> 789 raise RuntimeError("Optimal parameters not found: " + errmsg)
790 else:
791 # Rename maxfev (leastsq) to max_nfev (least_squares), if specified.
RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 800.
Here are all Python Jupyter Notebook files: https://files.fm/u/zj7cc6ne#sign_up
How to solve this?
scipy.optimize.curve_fit takes a keyword argument p0.
Initial guess for the parameters (length N). If None, then the initial
values will all be 1 (if the number of parameters for the function can
be determined using introspection, otherwise a ValueError is raised).
If the defaults 1 are too far of from the result the algorithm may not converge. Try to put some values that make sense for your problem.

Error Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select

I have the following code taken directly from here with some pretty little modifications:
import pandas as pd
import torch
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
from torch import cuda
df = pd.read_pickle('df_final.pkl')
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = 'cuda' if cuda.is_available() else 'cpu'
text = ''.join(df[(df['col1'] == 'type') & (df['col2'] == 2)].col3.to_list())
preprocess_text = text.strip().replace("\n","")
t5_prepared_Text = "summarize: "+preprocess_text
#print ("original text preprocessed: \n", preprocess_text)
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length = 500000).to(device)
# summmarize
summary_ids = model.generate(tokenized_text,
num_beams=4,
no_repeat_ngram_size=2,
min_length=30,
max_length=100,
early_stopping=True)
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print ("\n\nSummarized text: \n",output)
When executing the model_generate() part i get an error like this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-e8e9819a85dc> in <module>
12 min_length=30,
13 max_length=100,
---> 14 early_stopping=True).to(device)
15
16 output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
~\Anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_no_grad(*args, **kwargs)
47 def decorate_no_grad(*args, **kwargs):
48 with self:
---> 49 return func(*args, **kwargs)
50 return decorate_no_grad
51
~\Anaconda3\lib\site-packages\transformers\generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, **model_specific_kwargs)
383 encoder = self.get_encoder()
384
--> 385 encoder_outputs: tuple = encoder(input_ids, attention_mask=attention_mask)
386
387 # Expand input ids if num_beams > 1 or num_return_sequences > 1
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\transformers\modeling_t5.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, past_key_value_states, use_cache, output_attentions, output_hidden_states, return_dict)
701 if inputs_embeds is None:
702 assert self.embed_tokens is not None, "You have to intialize the model with valid token embeddings"
--> 703 inputs_embeds = self.embed_tokens(input_ids)
704
705 batch_size, seq_length = input_shape
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
112 return F.embedding(
113 input, self.weight, self.padding_idx, self.max_norm,
--> 114 self.norm_type, self.scale_grad_by_freq, self.sparse)
115
116 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1482 # remove once script supports set_grad_enabled
1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1485
1486
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
​
I've searched this error and fouund some other threads like this one and this one but they didn't help me much since their case seems to be completely different. In my case there are no custom instances or classes created, so i don't know how to fix this or where the error come from.
Could you please tell me where is the error coming from and how could i fix it?
Thank you very much in advance.
Try explicitly moving your model to the GPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)

TypeError: 'JavaPackage' object is not callable for Xgboost in PySpark

I am trying to make Scala Xgboost API available for my PySpark Notebook. And following this blog:
https://towardsdatascience.com/pyspark-and-xgboost-integration-tested-on-the-kaggle-titanic-dataset-4e75a568bdb
However, keep on running into below err:
spark._jvm.ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator
<py4j.java_gateway.JavaPackage at 0x7fa650fe7a58>
from sparkxgb import XGBoostEstimator
xgboost = XGBoostEstimator(
featuresCol="features",
labelCol="Survival",
predictionCol="prediction"
)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-18-1765fb9e3344> in <module>
4 featuresCol="features",
5 labelCol="Survival",
----> 6 predictionCol="prediction"
7 )
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
108 raise TypeError("Method %s forces keyword arguments." % func.__name__)
109 self._input_kwargs = kwargs
--> 110 return func(self, **kwargs)
111 return wrapper
112
~/local/spark-3536cd7a-6188-4ca8-b3d0-57d42cd01531/userFiles-0a0d90bc-96b4-43f2-bf21-00ae0e6f7309/sparkxgb.zip/sparkxgb/xgboost.py in __init__(self, checkpoint_path, checkpointInterval, missing, nthread, nworkers, silent, use_external_memory, baseMarginCol, featuresCol, labelCol, predictionCol, weightCol, base_score, booster, eval_metric, num_class, num_round, objective, seed, alpha, colsample_bytree, colsample_bylevel, eta, gamma, grow_policy, max_bin, max_delta_step, max_depth, min_child_weight, reg_lambda, scale_pos_weight, sketch_eps, subsample, tree_method, normalize_type, rate_drop, sample_type, skip_drop, lambda_bias)
113
114 super(XGBoostEstimator, self).__init__()
--> 115 self._java_obj = self._new_java_obj("ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator", self.uid)
116 self._create_params_from_java()
117 self._setDefault(
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
65 java_obj = getattr(java_obj, name)
66 java_args = [_py2java(sc, arg) for arg in args]
---> 67 return java_obj(*java_args)
68
69 #staticmethod
TypeError: 'JavaPackage' object is not callable
I already google this error and tried below things. I got all ideas from this blog https://github.com/JohnSnowLabs/spark-nlp/issues/232 :
Make sure Xgboost4j is in the SPARK_DIST_CLASSPATH. Already checked.
$echo $SPARK_DIST_CLASSPATH | tr " " "\n" | grep 'xgboost4j' | rev | cut -d'/' -f1 | rev
xgboost4j-0.72.jar
xgboost4j-spark.72.jar
Make sure they are added to EXTRA_CLASSPATH. - Done
Updating configs.
'export PYSPARK_SUBMIT_ARGS="--conf spark.jars=$SPARK_HOME/jars/* --conf spark.driver.extraClassPath=$SPARK_HOME/jars/* --conf spark.executor.extraClassPath=$SPARK_HOME/jars/* pyspark-shell"',
Hardware Info:
Machine: Linux
Using Jupyter Notebook.
Spark Version 2.4.0
python3.6
I found the problem, The problem was that the sparkxbg.zip(which I downloaded over internet) is written for xgboost4j-0.72. However, my jars were from xgoost4j-0.9. And the API has been completetly changed. As a result 0.9 version didn't had any class named ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator. And hence the error. You can see the difference in API below:
https://github.com/dmlc/xgboost/tree/release_0.72/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark
vs
https://github.com/dmlc/xgboost/tree/v0.90/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark

Running gvmagic extension on jupyter notebook returning FileNotFounError

I am developing Python 3.5.3 using the jupyter notebook in Anaconda 2.5.0 (64-bit) on a Windows 10 machine. I am trying to use an extension called 'gvmagic', which is used for viewing graphs. The extension seems to load, but returns a FileNotFoundError instead of a graph.
My input code is (Note: 'visualize_de_bruijn_graph' is a custom code that builds a de Bruijn graph from a string):
dbg = visualize_de_bruijn_graph('ACGCGTCG', 3)
print(dbg)
Which returns graph:
digraph "DeBruijn Graph" {
CG [label="CG"] ;
TC [label="TC"] ;
GC [label="GC"] ;
AC [label="AC"] ;
GT [label="GT"] ;
AC -> CG ;
CG -> GC ;
GC -> CG ;
CG -> GT ;
GT -> TC ;
TC -> CG ;
}
Trying to visualize the graph with the following code:
%load_ext gvmagic
%dotstr dbg
returns the error below. I cannot figure out what file is missing, as all the files referenced are where they are suppose to be.
FileNotFoundError Traceback (most recent call last)
<ipython-input-17-d138faf6c47c> in <module>()
----> 1 get_ipython().magic('dotstr dbg')
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in magic(self, arg_s)
2161 magic_name, _, magic_arg_s = arg_s.partition(' ')
2162 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2163 return self.run_line_magic(magic_name, magic_arg_s)
2164
2165 #-------------------------------------------------------------------------
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_line_magic(self, magic_name, line)
2082 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2083 with self.builtin_trap:
-> 2084 result = fn(*args,**kwargs)
2085 return result
2086
<decorator-gen-126> in dotstr(self, line)
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in dotstr(self, line)
50 #line_magic
51 def dotstr(self, line):
---> 52 self._from_str(line, 'dot')
53
54 #line_magic
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in _from_str(self, line, layout_engine)
151 def _from_str(self, line, layout_engine):
152 s = self.shell.ev(line)
--> 153 data = run_graphviz(s, layout_engine)
154 if data:
155 display_svg(data, raw=True)
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in run_graphviz(s, layout_engine)
30 cmd = ['dot', '-Tsvg', '-K', layout_engine]
31
---> 32 dot = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
33 stdoutdata, stderrdata = dot.communicate(s.encode('utf-8'))
34 status = dot.wait()
C:\Users\username\Anaconda3\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds)
674 c2pread, c2pwrite,
675 errread, errwrite,
--> 676 restore_signals, start_new_session)
677 except:
678 # Cleanup if the child failed starting.
C:\Users\username\Anaconda3\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)
953 env,
954 cwd,
--> 955 startupinfo)
956 finally:
957 # Child is launched. Close the parent's copy of those pipe
FileNotFoundError: [WinError 2] The system cannot find the file specified
You have to install the Graphviz software on your PC. For Windows, for example, download this https://graphviz.gitlab.io/_pages/Download/Download_windows.html.
In your IPython session, you have to point to the install location, for example:
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
%load_ext gvmagic
dbg = visualize_de_bruijn('ACGCGTCG', 3)
%dotstr dbg
And now it should work for you, just as it does for me! You can probably set the PATH variable on your PC instead of having to do it inside IPython each time.

IPython - Raise exception when a shell command fails

I'm using IPython as a system shell.
Is there a way to make IPython to raise an exception when the shell command fails? (non-zero exit codes)
The default makes them fail silently.
As of IPython 4.0.1, !cmd is transformed into get_ipython().system(repr(cmd)) (IPython.core.inputtransformer._tr_system()).
In the source, it's actually InteractiveShell.system_raw(), as inspect.getsourcefile() and inspect.getsource() can tell.
It delegates to os.system() in Windows and subprocess.call() in other OSes. Not configurable, as you can see from the code.
So, you need to replace it with something that would call subprocess.check_call().
Apart from monkey-patching by hand, this can be done with the IPython configuration system. Available options (viewable with the %config magic) don't allow to replace TerminalInteractiveShell with another class but several TerminalIPythonApp options allow to execute code on startup.
Do double-check whether you really need this though: a look through the system_raw()'s source reveals that it sets the _exit_code variable - so it doesn't actually fail completely silently.
If you use ! to execute shell commands, errors will pass silently
!echo "hello" && exit 1
hello
If you use the %%sh cell magic to execute the shell command, errors will raise:
%%sh
echo "hello" && exit 1
hello
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
<ipython-input-10-9229f76cae28> in <module>
----> 1 get_ipython().run_cell_magic('sh', '', 'echo "hello" && exit 1\n')
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360 with self.builtin_trap:
2361 args = (magic_arg_s, cell)
-> 2362 result = fn(*args, **kwargs)
2363 return result
2364
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magics/script.py in named_script_magic(line, cell)
140 else:
141 line = script
--> 142 return self.shebang(line, cell)
143
144 # write a basic docstring:
<decorator-gen-110> in shebang(self, line, cell)
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
--> 187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda/envs/altair-dev/lib/python3.6/site-packages/IPython/core/magics/script.py in shebang(self, line, cell)
243 sys.stderr.flush()
244 if args.raise_error and p.returncode!=0:
--> 245 raise CalledProcessError(p.returncode, cell, output=out, stderr=err)
246
247 def _run_script(self, p, cell, to_close):
CalledProcessError: Command 'b'echo "hello" && exit 1\n'' returned non-zero exit status 1.