I am attempting to connect to an Amazon EMR cluster using Livy 0.7 and Spark from an Amazon Sagemaker Notebook running Amazon Linux 2. Can anyone help me understand this error and how I might go about fixing it?
When I go to run the following command on a SageMaker Notebook running Amazon Linux 2:
%reload_ext sparkmagic.magics %spark add -s sparksession -l python -u $LIVY_ENDPOINT -t None
This results in the following error message:
AttributeError Traceback (most recent call last)
in
1 get_ipython().run_line_magic('reload_ext', 'sparkmagic.magics')
2 get_ipython().run_line_magic('spark', 'add -s jsmith -l python -u $LIVY_ENDPOINT -t None')
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2415 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2416 with self.builtin_trap:
2417 result = fn(*args, **kwargs)
2418 return result
2419
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/decorator.py in fun(*args, **kw)
230 if not kwsyntax:
231 args, kw = fix(args, kw, sig)
232 return caller(func, *(extras + args), **kw)
233 fun.name = func.name
234 fun.doc = func.doc
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in (f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magics/extension.py in reload_ext(self, module_str)
61 if not module_str:
62 raise UsageError('Missing module name.')
63 self.shell.extension_manager.reload_extension(module_str)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in reload_extension(self, module_str)
128 self.loaded.add(module_str)
129 else:
130 self.load_extension(module_str)
131
132 def _call_load_ipython_extension(self, mod):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in load_extension(self, module_str)
85 dir=compress_user(self.ipython_extension_dir)))
86 mod = sys.modules[module_str]
87 if self._call_load_ipython_extension(mod):
88 self.loaded.add(module_str)
89 else:
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in _call_load_ipython_extension(self, mod)
132 def _call_load_ipython_extension(self, mod):
133 if hasattr(mod, 'load_ipython_extension'):
134 mod.load_ipython_extension(self.shell)
135 return True
136
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in load_ipython_extension(ip)
322
323 def load_ipython_extension(ip):
324 ip.register_magics(RemoteSparkMagics)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in register(self, *magic_objects)
449 if isinstance(m, type):
450 # If we're given an uninstantiated class
451 m = m(shell=self.shell)
452
453 # Now that we have an instance, we can register it and update the
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in init(self, shell, data, widget)
40 if widget is None:
41 widget = MagicsControllerWidget(
42 self.spark_controller, IpyWidgetFactory(), self.ipython_display
43 )
44 self.manage_widget = widget
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints)
26 self.endpoints = endpoints
27
28 self._refresh()
29
30 def run(self):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in _refresh(self)
85 self.endpoints,
86 self.endpoints_dropdown_widget,
87 self._refresh,
88 )
89 self.manage_endpoint = ManageEndpointWidget(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/addendpointwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints, endpoints_dropdown_widget, refresh_method)
32 events_handler_module = importlib.import_module(module)
33 auth_class = getattr(events_handler_module, class_name)
34 self.auth_instances[auth] = auth_class()
35
36 self.auth_type = self.ipywidget_factory.get_dropdown(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in init(self, parsed_attributes)
33 self.password = "password"
34 HTTPBasicAuth.init(self, self.username, self.password)
35 Authenticator.init(self, parsed_attributes)
36
37 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/customauth.py in init(self, parsed_attributes)
22 else:
23 self.url = "http://example.com/livy"
24 self.widgets = self.get_widgets(WIDGET_WIDTH)
25
26 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in get_widgets(self, widget_width)
50 )
51
52 self.password_widget = ipywidget_factory.get_password(
53 description="Password:", value=self.password, width=widget_width
54 )
AttributeError: 'IpyWidgetFactory' object has no attribute 'get_password'
I'm working with pyspark 3.1. in Jupyter lab version .I'm trying to use DocumentAssembler. So I wrote following command
from sparknlp.base import DocumentAssembler
documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
But I got the error message
TypeError Traceback (most recent call last)
/tmp/ipykernel_3182/2386779740.py in <module>
1 from sparknlp.base import DocumentAssembler
----> 2 documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/base.py in __init__(self)
385 #keyword_only
386 def __init__(self):
--> 387 super(DocumentAssembler, self).__init__(classname="com.johnsnowlabs.nlp.DocumentAssembler")
388 self._setDefault(outputCol="document", cleanupMode='disabled')
389
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/internal.py in __init__(self, classname)
105 self.setParams(**kwargs)
106 self.__class__._java_class_name = classname
--> 107 self._java_obj = self._new_java_obj(classname, self.uid)
108
109
/usr/local/spark/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
64 java_obj = getattr(java_obj, name)
65 java_args = [_py2java(sc, arg) for arg in args]
---> 66 return java_obj(*java_args)
67
68 #staticmethod
TypeError: 'JavaPackage' object is not callable
Can you suggest me the step to resolve it?
hdfs_options = {
"hdfs_host": "...",
"hdfs_port": 50070,
"hdfs_user": "..."
}
opts = PipelineOptions(**hdfs_options)
token = run_shell_cmd('curl -s --negotiate -u : "http://nn:50070/webhdfs/v1/?op=GETDELEGATIONTOKEN"'
p = beam.Pipeline(options=opts)
p.apply(
beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}") # does not work
);
I have the token and a delegation token file but unable to Authenticate with either.
Match operation failed with exceptions {'hdfs:///my_path/*.md?delegation=...': BeamIOError("List operation failed with exceptions {'hdfs:///my_path': HdfsError('Authentication failure. Check your credentials.')}")}
Stacktrace
---------------------------------------------------------------------------
BeamIOError Traceback (most recent call last)
<ipython-input-251-127e501adfaa> in <module>()
2
3 p.apply(
----> 4 beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}")
5 );
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, validate, skip_header_lines, **kwargs)
540 file_pattern, min_bundle_size, compression_type,
541 strip_trailing_newlines, coder, validate=validate,
--> 542 skip_header_lines=skip_header_lines)
543
544 def expand(self, pvalue):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, buffer_size, validate, skip_header_lines, header_processor_fns)
124 super(_TextSource, self).__init__(file_pattern, min_bundle_size,
125 compression_type=compression_type,
--> 126 validate=validate)
127
128 self._strip_trailing_newlines = strip_trailing_newlines
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in __init__(self, file_pattern, min_bundle_size, compression_type, splittable, validate)
123 self._splittable = splittable
124 if validate and file_pattern.is_accessible():
--> 125 self._validate()
126
127 def display_data(self):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/options/value_provider.py in _f(self, *args, **kwargs)
138 if not obj.is_accessible():
139 raise error.RuntimeValueProviderError('%s not accessible' % obj)
--> 140 return fnc(self, *args, **kwargs)
141 return _f
142 return _check_accessible
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in _validate(self)
181
182 # Limit the responses as we only want to check if something exists
--> 183 match_result = FileSystems.match([pattern], limits=[1])[0]
184 if len(match_result.metadata_list) <= 0:
185 raise IOError(
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystems.py in match(patterns, limits)
198 return []
199 filesystem = FileSystems.get_filesystem(patterns[0])
--> 200 return filesystem.match(patterns, limits)
201
202 #staticmethod
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystem.py in match(self, patterns, limits)
718
719 if exceptions:
--> 720 raise BeamIOError("Match operation failed", exceptions)
721 return result
722
when I blaze a pandas.dataframe,it can output the columns normally! When I run "traffic.head()", it can run normally! However,when I run "traffic.Stop_day" ,it indicated the AttributeError just like below:
import odo
traffic=bl.Data(df)
print(traffic.fields)
['Agency', 'SubAgency', 'Description', 'Location', 'Latitude', 'Longitude', 'Accident', 'Belts', 'Personal Injury', 'Property Damage', 'Fatal', 'Commercial License', 'HAZMAT', 'Commercial Vehicle', 'Alcohol', 'Work Zone', 'State', 'VehicleType', 'Year', 'Make', 'Model', 'Color', 'Violation Type', 'Charge', 'Article', 'Contributed To Accident', 'Race', 'Gender', 'Driver City', 'Driver State', 'DL State', 'Arrest Type', 'Geolocation', 'Stop_month', 'Stop_day', 'Stop_year', 'Stop_hour', 'Stop_min', 'Stop_sec']
traffic.Stop_day
AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
pretty(self, obj)
398 if cls is not object \
399 and callable(cls.dict.get('repr')):
--> 400 return _repr_pprint(obj, self, cycle)
401
402 return _default_pprint(obj, self, cycle)
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
_repr_pprint(obj, p, cycle)
693 """A pprint that just redirects to the normal repr function."""
694 # Find newlines and replace them with p.break_()
--> 695 output = repr(obj)
696 for idx,output_line in enumerate(output.splitlines()):
697 if idx:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_choose_repr(self)
440 else:
441 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 442 return expr_repr(self)
443
444
e:\software\python36\lib\site-packages\blaze\interactive.py in
expr_repr(expr, n)
358 isscalar(expr.dshape.measure) or
359 isinstance(expr.dshape.measure, Map))):
--> 360 return repr_tables(expr, 10)
361
362 # Smallish arrays
e:\software\python36\lib\site-packages\blaze\interactive.py in
repr_tables(expr, n)
240
241 def repr_tables(expr, n=10):
--> 242 result = concrete_head(expr, n).rename(columns={None: ''})
243
244 if isinstance(result, (DataFrame, Series)):
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
--------------------------------------------------------------------------- AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_warning_repr_html(self)
448 else:
449 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 450 return to_html(self)
451
452
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in
to_html(expr)
387 if not expr._resources() or ndim(expr) != 1:
388 return to_html(expr_repr(expr))
--> 389 return to_html(concrete_head(expr))
390
391
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
So,what's the problem?
Simply, use the command "traffic.data.Stop_year"! That's enough.
sf['age'].show(view='Numeric')
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-19-0d0158b5e1d5> in <module>()
----> 1 sf['age'].show(view='Numeric')
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/data_structures/sarray.pyc in show(self, view)
2853 """
2854 from ..visualization.show import show
-> 2855 show(self, view=view)
2856
2857 def item_length(self):
/home/anurag/gl-env/local/lib/python2.7/site-packages/multipledispatch/dispatcher.pyc in __call__(self, *args, **kwargs)
162 self._cache[types] = func
163 try:
--> 164 return func(*args, **kwargs)
165
166 except MDNotImplementedError:
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/glc_display_dispatch.pyc in show(obj, **kwargs)
19
20 graphlab.canvas.inspect.find_vars(obj)
---> 21 return graphlab.canvas.show(graphlab.canvas.views.sarray.SArrayView(obj, params=kwargs))
22
23
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/utils.pyc in show(variable)
129 get_target().state.set_selected_variable(variable)
130 variable.validate_js_component_name(variable.get_js_component())
--> 131 return get_target().show()
132
133 def _get_id(ref):
/home/anurag/gl-env/local/lib/python2.7/site-packages/graphlab/canvas/target.pyc in show(self, variable)
175 IPython.core.display.Javascript(
176 data=self.__makeJS(_to_json(data), view.get_js_file(), view.get_js_component()),
--> 177 css=['//cdnjs.cloudflare.com/ajax/libs/font-awesome/4.1.0/css/font-awesome.min.css', self.get_asset_url() + 'css/canvas.css']
178 )
179 )
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in display_javascript(*objs, **kwargs)
328 Metadata to be associated with the specific mimetype output.
329 """
--> 330 _display_mimetype('application/javascript', objs, **kwargs)
331
332
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in _display_mimetype(mimetype, objs, raw, metadata)
74 # turn list of pngdata into list of { 'image/png': pngdata }
75 objs = [ {mimetype: obj} for obj in objs ]
---> 76 display(*objs, raw=raw, metadata=metadata, include=[mimetype])
77
78 #-----------------------------------------------------------------------------
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in display(*objs, **kwargs)
169 # kwarg-specified metadata gets precedence
170 _merge(md_dict, metadata)
--> 171 publish_display_data(data=format_dict, metadata=md_dict)
172
173
/home/anurag/gl-env/local/lib/python2.7/site-packages/IPython/core/display.pyc in publish_display_data(data, metadata, source)
119 InteractiveShell.instance().display_pub.publish(
120 data=data,
--> 121 metadata=metadata,
122 )
123
/home/anurag/gl-env/local/lib/python2.7/site-packages/ipykernel/zmqshell.pyc in publish(self, data, metadata, source)
109 # message or None. If None is returned,
110 # the message has been 'used' and we return.
--> 111 for hook in self.thread_local.hooks:
112 msg = hook(msg)
113 if msg is None:
AttributeError: 'thread._local' object has no attribute 'hooks'
Why I am facing this?
Just make sure to upgrade ipykernel to the latest version (you need at least 4.5.0, which was released on September 2, 2016). This pull request fixed that issue:
pip install --upgrade ipykernel
There was an issue as well in GitHub reporting the problem, which was closed after the pull request was merged and 4.5.0 was released.
I was having the same issue -- if you're still having this problem, downgrading the ipykernel to version 4.3.0 seemed to work for me as a temporary workaround:
pip uninstall ipykernel
pip install ipykernel=4.3.0