Apache Beam: How to read from HDFS with delegation token - apache-beam

hdfs_options = {
"hdfs_host": "...",
"hdfs_port": 50070,
"hdfs_user": "..."
}
opts = PipelineOptions(**hdfs_options)
token = run_shell_cmd('curl -s --negotiate -u : "http://nn:50070/webhdfs/v1/?op=GETDELEGATIONTOKEN"'
p = beam.Pipeline(options=opts)
p.apply(
beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}") # does not work
);
I have the token and a delegation token file but unable to Authenticate with either.
Match operation failed with exceptions {'hdfs:///my_path/*.md?delegation=...': BeamIOError("List operation failed with exceptions {'hdfs:///my_path': HdfsError('Authentication failure. Check your credentials.')}")}
Stacktrace
---------------------------------------------------------------------------
BeamIOError Traceback (most recent call last)
<ipython-input-251-127e501adfaa> in <module>()
2
3 p.apply(
----> 4 beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}")
5 );
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, validate, skip_header_lines, **kwargs)
540 file_pattern, min_bundle_size, compression_type,
541 strip_trailing_newlines, coder, validate=validate,
--> 542 skip_header_lines=skip_header_lines)
543
544 def expand(self, pvalue):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, buffer_size, validate, skip_header_lines, header_processor_fns)
124 super(_TextSource, self).__init__(file_pattern, min_bundle_size,
125 compression_type=compression_type,
--> 126 validate=validate)
127
128 self._strip_trailing_newlines = strip_trailing_newlines
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in __init__(self, file_pattern, min_bundle_size, compression_type, splittable, validate)
123 self._splittable = splittable
124 if validate and file_pattern.is_accessible():
--> 125 self._validate()
126
127 def display_data(self):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/options/value_provider.py in _f(self, *args, **kwargs)
138 if not obj.is_accessible():
139 raise error.RuntimeValueProviderError('%s not accessible' % obj)
--> 140 return fnc(self, *args, **kwargs)
141 return _f
142 return _check_accessible
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in _validate(self)
181
182 # Limit the responses as we only want to check if something exists
--> 183 match_result = FileSystems.match([pattern], limits=[1])[0]
184 if len(match_result.metadata_list) <= 0:
185 raise IOError(
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystems.py in match(patterns, limits)
198 return []
199 filesystem = FileSystems.get_filesystem(patterns[0])
--> 200 return filesystem.match(patterns, limits)
201
202 #staticmethod
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystem.py in match(self, patterns, limits)
718
719 if exceptions:
--> 720 raise BeamIOError("Match operation failed", exceptions)
721 return result
722

Related

Sparkmagic errors out using iPython 7.33.0

I am attempting to connect to an Amazon EMR cluster using Livy 0.7 and Spark from an Amazon Sagemaker Notebook running Amazon Linux 2. Can anyone help me understand this error and how I might go about fixing it?
When I go to run the following command on a SageMaker Notebook running Amazon Linux 2:
%reload_ext sparkmagic.magics %spark add -s sparksession -l python -u $LIVY_ENDPOINT -t None
This results in the following error message:
AttributeError Traceback (most recent call last)
in
1 get_ipython().run_line_magic('reload_ext', 'sparkmagic.magics')
2 get_ipython().run_line_magic('spark', 'add -s jsmith -l python -u $LIVY_ENDPOINT -t None')
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2415 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2416 with self.builtin_trap:
2417 result = fn(*args, **kwargs)
2418 return result
2419
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/decorator.py in fun(*args, **kw)
230 if not kwsyntax:
231 args, kw = fix(args, kw, sig)
232 return caller(func, *(extras + args), **kw)
233 fun.name = func.name
234 fun.doc = func.doc
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in (f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magics/extension.py in reload_ext(self, module_str)
61 if not module_str:
62 raise UsageError('Missing module name.')
63 self.shell.extension_manager.reload_extension(module_str)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in reload_extension(self, module_str)
128 self.loaded.add(module_str)
129 else:
130 self.load_extension(module_str)
131
132 def _call_load_ipython_extension(self, mod):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in load_extension(self, module_str)
85 dir=compress_user(self.ipython_extension_dir)))
86 mod = sys.modules[module_str]
87 if self._call_load_ipython_extension(mod):
88 self.loaded.add(module_str)
89 else:
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in _call_load_ipython_extension(self, mod)
132 def _call_load_ipython_extension(self, mod):
133 if hasattr(mod, 'load_ipython_extension'):
134 mod.load_ipython_extension(self.shell)
135 return True
136
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in load_ipython_extension(ip)
322
323 def load_ipython_extension(ip):
324 ip.register_magics(RemoteSparkMagics)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in register(self, *magic_objects)
449 if isinstance(m, type):
450 # If we're given an uninstantiated class
451 m = m(shell=self.shell)
452
453 # Now that we have an instance, we can register it and update the
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in init(self, shell, data, widget)
40 if widget is None:
41 widget = MagicsControllerWidget(
42 self.spark_controller, IpyWidgetFactory(), self.ipython_display
43 )
44 self.manage_widget = widget
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints)
26 self.endpoints = endpoints
27
28 self._refresh()
29
30 def run(self):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in _refresh(self)
85 self.endpoints,
86 self.endpoints_dropdown_widget,
87 self._refresh,
88 )
89 self.manage_endpoint = ManageEndpointWidget(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/addendpointwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints, endpoints_dropdown_widget, refresh_method)
32 events_handler_module = importlib.import_module(module)
33 auth_class = getattr(events_handler_module, class_name)
34 self.auth_instances[auth] = auth_class()
35
36 self.auth_type = self.ipywidget_factory.get_dropdown(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in init(self, parsed_attributes)
33 self.password = "password"
34 HTTPBasicAuth.init(self, self.username, self.password)
35 Authenticator.init(self, parsed_attributes)
36
37 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/customauth.py in init(self, parsed_attributes)
22 else:
23 self.url = "http://example.com/livy"
24 self.widgets = self.get_widgets(WIDGET_WIDTH)
25
26 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in get_widgets(self, widget_width)
50 )
51
52 self.password_widget = ipywidget_factory.get_password(
53 description="Password:", value=self.password, width=widget_width
54 )
AttributeError: 'IpyWidgetFactory' object has no attribute 'get_password'

pyspark TypeError: 'JavaPackage' object is not callable

I'm working with pyspark 3.1. in Jupyter lab version .I'm trying to use DocumentAssembler. So I wrote following command
from sparknlp.base import DocumentAssembler
documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
But I got the error message
TypeError Traceback (most recent call last)
/tmp/ipykernel_3182/2386779740.py in <module>
1 from sparknlp.base import DocumentAssembler
----> 2 documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/base.py in __init__(self)
385 #keyword_only
386 def __init__(self):
--> 387 super(DocumentAssembler, self).__init__(classname="com.johnsnowlabs.nlp.DocumentAssembler")
388 self._setDefault(outputCol="document", cleanupMode='disabled')
389
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/internal.py in __init__(self, classname)
105 self.setParams(**kwargs)
106 self.__class__._java_class_name = classname
--> 107 self._java_obj = self._new_java_obj(classname, self.uid)
108
109
/usr/local/spark/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
64 java_obj = getattr(java_obj, name)
65 java_args = [_py2java(sc, arg) for arg in args]
---> 66 return java_obj(*java_args)
67
68 #staticmethod
TypeError: 'JavaPackage' object is not callable
Can you suggest me the step to resolve it?

pyspark blaze-AttributeError: 'DiGraph' object has no attribute 'edge'

when I blaze a pandas.dataframe,it can output the columns normally! When I run "traffic.head()", it can run normally! However,when I run "traffic.Stop_day" ,it indicated the AttributeError just like below:
import odo
traffic=bl.Data(df)
print(traffic.fields)
['Agency', 'SubAgency', 'Description', 'Location', 'Latitude', 'Longitude', 'Accident', 'Belts', 'Personal Injury', 'Property Damage', 'Fatal', 'Commercial License', 'HAZMAT', 'Commercial Vehicle', 'Alcohol', 'Work Zone', 'State', 'VehicleType', 'Year', 'Make', 'Model', 'Color', 'Violation Type', 'Charge', 'Article', 'Contributed To Accident', 'Race', 'Gender', 'Driver City', 'Driver State', 'DL State', 'Arrest Type', 'Geolocation', 'Stop_month', 'Stop_day', 'Stop_year', 'Stop_hour', 'Stop_min', 'Stop_sec']
traffic.Stop_day
AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
pretty(self, obj)
398 if cls is not object \
399 and callable(cls.dict.get('repr')):
--> 400 return _repr_pprint(obj, self, cycle)
401
402 return _default_pprint(obj, self, cycle)
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
_repr_pprint(obj, p, cycle)
693 """A pprint that just redirects to the normal repr function."""
694 # Find newlines and replace them with p.break_()
--> 695 output = repr(obj)
696 for idx,output_line in enumerate(output.splitlines()):
697 if idx:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_choose_repr(self)
440 else:
441 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 442 return expr_repr(self)
443
444
e:\software\python36\lib\site-packages\blaze\interactive.py in
expr_repr(expr, n)
358 isscalar(expr.dshape.measure) or
359 isinstance(expr.dshape.measure, Map))):
--> 360 return repr_tables(expr, 10)
361
362 # Smallish arrays
e:\software\python36\lib\site-packages\blaze\interactive.py in
repr_tables(expr, n)
240
241 def repr_tables(expr, n=10):
--> 242 result = concrete_head(expr, n).rename(columns={None: ''})
243
244 if isinstance(result, (DataFrame, Series)):
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
--------------------------------------------------------------------------- AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_warning_repr_html(self)
448 else:
449 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 450 return to_html(self)
451
452
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in
to_html(expr)
387 if not expr._resources() or ndim(expr) != 1:
388 return to_html(expr_repr(expr))
--> 389 return to_html(concrete_head(expr))
390
391
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
So,what's the problem?
Simply, use the command "traffic.data.Stop_year"! That's enough.

Iphyton Notebook AttributeError: 'thread._local' object has no attribute 'hooks'

sf['age'].show(view='Numeric')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-19-0d0158b5e1d5> in <module>()
----> 1 sf['age'].show(view='Numeric')
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/data_structures/sarray.pyc in show(self, view)
2853 """
2854 from ..visualization.show import show
-> 2855 show(self, view=view)
2856
2857 def item_length(self):
/home/anurag/gl-env/local/lib/python2.7/site-packages/multipledispatch/dispatcher.pyc in __call__(self, *args, **kwargs)
162 self._cache[types] = func
163 try:
--> 164 return func(*args, **kwargs)
165
166 except MDNotImplementedError:
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/glc_display_dispatch.pyc in show(obj, **kwargs)
19
20 graphlab.canvas.inspect.find_vars(obj)
---> 21 return graphlab.canvas.show(graphlab.canvas.views.sarray.SArrayView(obj, params=kwargs))
22
23
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/utils.pyc in show(variable)
129 get_target().state.set_selected_variable(variable)
130 variable.validate_js_component_name(variable.get_js_component())
--> 131 return get_target().show()
132
133 def _get_id(ref):
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/target.pyc in show(self, variable)
175 IPython.core.display.Javascript(
176 data=self.__makeJS(_to_json(data), view.get_js_file(), view.get_js_component()),
--> 177 css=['//cdnjs.cloudflare.com/ajax/libs/font-awesome/4.1.0/css/font-awesome.min.css', self.get_asset_url() + 'css/canvas.css']
178 )
179 )
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in display_javascript(*objs, **kwargs)
328 Metadata to be associated with the specific mimetype output.
329 """
--> 330 _display_mimetype('application/javascript', objs, **kwargs)
331
332
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in _display_mimetype(mimetype, objs, raw, metadata)
74 # turn list of pngdata into list of { 'image/png': pngdata }
75 objs = [ {mimetype: obj} for obj in objs ]
---> 76 display(*objs, raw=raw, metadata=metadata, include=[mimetype])
77
78 #-----------------------------------------------------------------------------
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in display(*objs, **kwargs)
169 # kwarg-specified metadata gets precedence
170 _merge(md_dict, metadata)
--> 171 publish_display_data(data=format_dict, metadata=md_dict)
172
173
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in publish_display_data(data, metadata, source)
119 InteractiveShell.instance().display_pub.publish(
120 data=data,
--> 121 metadata=metadata,
122 )
123
/home/anurag/gl-env/local/lib/python2.7/site-packages/ipykernel/zmqshell.pyc in publish(self, data, metadata, source)
109 # message or None. If None is returned,
110 # the message has been 'used' and we return.
--> 111 for hook in self.thread_local.hooks:
112 msg = hook(msg)
113 if msg is None:
AttributeError: 'thread._local' object has no attribute 'hooks'
Why I am facing this?
Just make sure to upgrade ipykernel to the latest version (you need at least 4.5.0, which was released on September 2, 2016). This pull request fixed that issue:
pip install --upgrade ipykernel
There was an issue as well in GitHub reporting the problem, which was closed after the pull request was merged and 4.5.0 was released.
I was having the same issue -- if you're still having this problem, downgrading the ipykernel to version 4.3.0 seemed to work for me as a temporary workaround:
pip uninstall ipykernel
pip install ipykernel=4.3.0

dill dump_session with ipython

I'm trying to use the dill module to save my ipython session using dump_session() but I'm getting an error message. I'm using Ipython 1.0.0 and dill 0.2-a-dev 20120503. Does anyone out there have any insight? Thanks in advance.
Niall
Here's the enormous traceback:
--------------------------------------------------------------------------- AssertionError Traceback (most recent call last) <ipython-input-15-9934c16a537e> in <module>()
----> 1 dill.dump_session("/data/local/nrobin/tset.sess")
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in dump_session(filename, main_module)
104 pickler._main_module = main_module
105 pickler._session = True # is best indicator of when pickling a session
--> 106 pickler.dump(main_module)
107 pickler._session = False
108 finally:
/usr/local/sci/lib/python2.7/pickle.pyc in dump(self, obj)
222 if self.proto >= 2:
223 self.write(PROTO + chr(self.proto))
--> 224 self.save(obj)
225 self.write(STOP)
226
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_module(pickler, obj)
415 if _DEBUG[0]: print "M1: %s" % obj
416 pickler.save_reduce(__import__, (obj.__name__,), obj=obj,
--> 417 state=obj.__dict__.copy())
418 else:
419 if _DEBUG[0]: print "M2: %s" % obj
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
417
418 if state is not None:
--> 419 save(state)
420 write(BUILD)
421
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_module_dict(pickler, obj)
284 else:
285 if _DEBUG[0]: print "D2: %s" % "<dict ...>" #obj
--> 286 StockPickler.save_dict(pickler, obj)
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_dict(self, obj)
647
648 self.memoize(obj)
--> 649 self._batch_setitems(obj.iteritems())
650
651 dispatch[DictionaryType] = save_dict
/usr/local/sci/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
679 for k, v in tmp:
680 save(k)
--> 681 save(v)
682 write(SETITEMS)
683 elif n:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
329
330 # Save the reduce() output and finally memoize the object
--> 331 self.save_reduce(obj=obj, *rv)
332
333 def persistent_id(self, obj):
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
417
418 if state is not None:
--> 419 save(state)
420 write(BUILD)
421
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_module_dict(pickler, obj)
284 else:
285 if _DEBUG[0]: print "D2: %s" % "<dict ...>" #obj
--> 286 StockPickler.save_dict(pickler, obj)
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_dict(self, obj)
647
648 self.memoize(obj)
--> 649 self._batch_setitems(obj.iteritems())
650
651 dispatch[DictionaryType] = save_dict
/usr/local/sci/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
684 k, v = tmp[0]
685 save(k)
--> 686 save(v)
687 write(SETITEM)
688 # else tmp is empty, and we're done
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
329
330 # Save the reduce() output and finally memoize the object
--> 331 self.save_reduce(obj=obj, *rv)
332
333 def persistent_id(self, obj):
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
417
418 if state is not None:
--> 419 save(state)
420 write(BUILD)
421
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_module_dict(pickler, obj)
284 else:
285 if _DEBUG[0]: print "D2: %s" % "<dict ...>" #obj
--> 286 StockPickler.save_dict(pickler, obj)
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_dict(self, obj)
647
648 self.memoize(obj)
--> 649 self._batch_setitems(obj.iteritems())
650
651 dispatch[DictionaryType] = save_dict
/usr/local/sci/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
679 for k, v in tmp:
680 save(k)
--> 681 save(v)
682 write(SETITEMS)
683 elif n:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_instancemethod(pickler, obj)
303 if _DEBUG[0]: print "Me: %s" % obj
304 pickler.save_reduce(MethodType, (obj.im_func, obj.im_self,
--> 305 obj.im_class), obj=obj)
306 return
307
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
399 else:
400 save(func)
--> 401 save(args)
402 write(REDUCE)
403
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_tuple(self, obj)
546 if n <= 3 and proto >= 2:
547 for element in obj:
--> 548 save(element)
549 # Subtle. Same as in the big comment below.
550 if id(obj) in memo:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_function(pickler, obj)
268 pickler.save_reduce(FunctionType, (obj.func_code, obj.func_globals,
269 obj.func_name, obj.func_defaults,
--> 270 obj.func_closure), obj=obj)
271 else:
272 if _DEBUG[0]: print "F2: %s" % obj
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
399 else:
400 save(func)
--> 401 save(args)
402 write(REDUCE)
403
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_tuple(self, obj)
560 write(MARK)
561 for element in obj:
--> 562 save(element)
563
564 if id(obj) in memo:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_module_dict(pickler, obj)
284 else:
285 if _DEBUG[0]: print "D2: %s" % "<dict ...>" #obj
--> 286 StockPickler.save_dict(pickler, obj)
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_dict(self, obj)
647
648 self.memoize(obj)
--> 649 self._batch_setitems(obj.iteritems())
650
651 dispatch[DictionaryType] = save_dict
/usr/local/sci/lib/python2.7/pickle.pyc in _batch_setitems(self, items)
679 for k, v in tmp:
680 save(k)
--> 681 save(v)
682 write(SETITEMS)
683 elif n:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_instancemethod(pickler, obj)
303 if _DEBUG[0]: print "Me: %s" % obj
304 pickler.save_reduce(MethodType, (obj.im_func, obj.im_self,
--> 305 obj.im_class), obj=obj)
306 return
307
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
399 else:
400 save(func)
--> 401 save(args)
402 write(REDUCE)
403
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/usr/local/sci/lib/python2.7/pickle.pyc in save_tuple(self, obj)
546 if n <= 3 and proto >= 2:
547 for element in obj:
--> 548 save(element)
549 # Subtle. Same as in the big comment below.
550 if id(obj) in memo:
/usr/local/sci/lib/python2.7/pickle.pyc in save(self, obj)
284 f = self.dispatch.get(t)
285 if f:
--> 286 f(self, obj) # Call unbound method with explicit self
287 return
288
/home/h02/nrobin/.local/lib/python2.7/site-packages/dill-0.2a.dev_20120503-py2.7.egg/dill/dill.pyc in save_function(pickler, obj)
268 pickler.save_reduce(FunctionType, (obj.func_code, obj.func_globals,
269 obj.func_name, obj.func_defaults,
--> 270 obj.func_closure), obj=obj)
271 else:
272 if _DEBUG[0]: print "F2: %s" % obj
/usr/local/sci/lib/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
403
404 if obj is not None:
--> 405 self.memoize(obj)
406
407 # More new special cases (that work with older protocols as
/usr/local/sci/lib/python2.7/pickle.pyc in memoize(self, obj)
242 if self.fast:
243 return
--> 244 assert id(obj) not in self.memo
245 memo_len = len(self.memo)
246 self.write(self.put(memo_len))
AssertionError:
dill.dump_session didn't work in ipython due to ipython's 'exit' type... basically, ipython hijacks the interpreter's exit method and turns it into some new object type. I have a flag that recognizes when "IPYTHON is True"... however dump_session was still seeing ipython's exit types in the globals, which were being saved in the session dump. Now, I assume they are singletons... and don't save them. Looks like it works as of dill revision #511, and you can serialize the ipython session to a file.
Let me know if you find other issues in ipython, as I rely on ipython users to tell me when there's an issue with dill in ipython.
The updates have been posted to github at https://github.com/uqfoundation,
and the on the issue tracker http://trac.mystic.cacr.caltech.edu/project/pathos/ticket/131.