I am attempting to connect to an Amazon EMR cluster using Livy 0.7 and Spark from an Amazon Sagemaker Notebook running Amazon Linux 2. Can anyone help me understand this error and how I might go about fixing it?
When I go to run the following command on a SageMaker Notebook running Amazon Linux 2:
%reload_ext sparkmagic.magics %spark add -s sparksession -l python -u $LIVY_ENDPOINT -t None
This results in the following error message:
AttributeError Traceback (most recent call last)
in
1 get_ipython().run_line_magic('reload_ext', 'sparkmagic.magics')
2 get_ipython().run_line_magic('spark', 'add -s jsmith -l python -u $LIVY_ENDPOINT -t None')
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2415 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2416 with self.builtin_trap:
2417 result = fn(*args, **kwargs)
2418 return result
2419
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/decorator.py in fun(*args, **kw)
230 if not kwsyntax:
231 args, kw = fix(args, kw, sig)
232 return caller(func, *(extras + args), **kw)
233 fun.name = func.name
234 fun.doc = func.doc
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in (f, *a, **k)
185 # but it's overkill for just that one bit of state.
186 def magic_deco(arg):
187 call = lambda f, *a, **k: f(*a, **k)
188
189 if callable(arg):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magics/extension.py in reload_ext(self, module_str)
61 if not module_str:
62 raise UsageError('Missing module name.')
63 self.shell.extension_manager.reload_extension(module_str)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in reload_extension(self, module_str)
128 self.loaded.add(module_str)
129 else:
130 self.load_extension(module_str)
131
132 def _call_load_ipython_extension(self, mod):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in load_extension(self, module_str)
85 dir=compress_user(self.ipython_extension_dir)))
86 mod = sys.modules[module_str]
87 if self._call_load_ipython_extension(mod):
88 self.loaded.add(module_str)
89 else:
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/extensions.py in _call_load_ipython_extension(self, mod)
132 def _call_load_ipython_extension(self, mod):
133 if hasattr(mod, 'load_ipython_extension'):
134 mod.load_ipython_extension(self.shell)
135 return True
136
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in load_ipython_extension(ip)
322
323 def load_ipython_extension(ip):
324 ip.register_magics(RemoteSparkMagics)
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/IPython/core/magic.py in register(self, *magic_objects)
449 if isinstance(m, type):
450 # If we're given an uninstantiated class
451 m = m(shell=self.shell)
452
453 # Now that we have an instance, we can register it and update the
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/magics/remotesparkmagics.py in init(self, shell, data, widget)
40 if widget is None:
41 widget = MagicsControllerWidget(
42 self.spark_controller, IpyWidgetFactory(), self.ipython_display
43 )
44 self.manage_widget = widget
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints)
26 self.endpoints = endpoints
27
28 self._refresh()
29
30 def run(self):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/magicscontrollerwidget.py in _refresh(self)
85 self.endpoints,
86 self.endpoints_dropdown_widget,
87 self._refresh,
88 )
89 self.manage_endpoint = ManageEndpointWidget(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/controllerwidget/addendpointwidget.py in init(self, spark_controller, ipywidget_factory, ipython_display, endpoints, endpoints_dropdown_widget, refresh_method)
32 events_handler_module = importlib.import_module(module)
33 auth_class = getattr(events_handler_module, class_name)
34 self.auth_instances[auth] = auth_class()
35
36 self.auth_type = self.ipywidget_factory.get_dropdown(
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in init(self, parsed_attributes)
33 self.password = "password"
34 HTTPBasicAuth.init(self, self.username, self.password)
35 Authenticator.init(self, parsed_attributes)
36
37 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/customauth.py in init(self, parsed_attributes)
22 else:
23 self.url = "http://example.com/livy"
24 self.widgets = self.get_widgets(WIDGET_WIDTH)
25
26 def get_widgets(self, widget_width):
~/anaconda3/envs/JupyterSystemEnv/lib/python3.7/site-packages/sparkmagic/auth/basic.py in get_widgets(self, widget_width)
50 )
51
52 self.password_widget = ipywidget_factory.get_password(
53 description="Password:", value=self.password, width=widget_width
54 )
AttributeError: 'IpyWidgetFactory' object has no attribute 'get_password'
I'm working with pyspark 3.1. in Jupyter lab version .I'm trying to use DocumentAssembler. So I wrote following command
from sparknlp.base import DocumentAssembler
documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
But I got the error message
TypeError Traceback (most recent call last)
/tmp/ipykernel_3182/2386779740.py in <module>
1 from sparknlp.base import DocumentAssembler
----> 2 documentAssembler = DocumentAssembler().setInputCol('text').setOutputCol('document')
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/base.py in __init__(self)
385 #keyword_only
386 def __init__(self):
--> 387 super(DocumentAssembler, self).__init__(classname="com.johnsnowlabs.nlp.DocumentAssembler")
388 self._setDefault(outputCol="document", cleanupMode='disabled')
389
/usr/local/spark/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
112 raise TypeError("Method %s forces keyword arguments." % func.__name__)
113 self._input_kwargs = kwargs
--> 114 return func(self, **kwargs)
115 return wrapper
116
/opt/conda/lib/python3.9/site-packages/sparknlp/internal.py in __init__(self, classname)
105 self.setParams(**kwargs)
106 self.__class__._java_class_name = classname
--> 107 self._java_obj = self._new_java_obj(classname, self.uid)
108
109
/usr/local/spark/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
64 java_obj = getattr(java_obj, name)
65 java_args = [_py2java(sc, arg) for arg in args]
---> 66 return java_obj(*java_args)
67
68 #staticmethod
TypeError: 'JavaPackage' object is not callable
Can you suggest me the step to resolve it?
hdfs_options = {
"hdfs_host": "...",
"hdfs_port": 50070,
"hdfs_user": "..."
}
opts = PipelineOptions(**hdfs_options)
token = run_shell_cmd('curl -s --negotiate -u : "http://nn:50070/webhdfs/v1/?op=GETDELEGATIONTOKEN"'
p = beam.Pipeline(options=opts)
p.apply(
beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}") # does not work
);
I have the token and a delegation token file but unable to Authenticate with either.
Match operation failed with exceptions {'hdfs:///my_path/*.md?delegation=...': BeamIOError("List operation failed with exceptions {'hdfs:///my_path': HdfsError('Authentication failure. Check your credentials.')}")}
Stacktrace
---------------------------------------------------------------------------
BeamIOError Traceback (most recent call last)
<ipython-input-251-127e501adfaa> in <module>()
2
3 p.apply(
----> 4 beam.io.ReadFromText(f"hdfs:///my_path/*.md?delegation={token}")
5 );
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, validate, skip_header_lines, **kwargs)
540 file_pattern, min_bundle_size, compression_type,
541 strip_trailing_newlines, coder, validate=validate,
--> 542 skip_header_lines=skip_header_lines)
543
544 def expand(self, pvalue):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/textio.py in __init__(self, file_pattern, min_bundle_size, compression_type, strip_trailing_newlines, coder, buffer_size, validate, skip_header_lines, header_processor_fns)
124 super(_TextSource, self).__init__(file_pattern, min_bundle_size,
125 compression_type=compression_type,
--> 126 validate=validate)
127
128 self._strip_trailing_newlines = strip_trailing_newlines
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in __init__(self, file_pattern, min_bundle_size, compression_type, splittable, validate)
123 self._splittable = splittable
124 if validate and file_pattern.is_accessible():
--> 125 self._validate()
126
127 def display_data(self):
/root/miniconda3/lib/python3.7/site-packages/apache_beam/options/value_provider.py in _f(self, *args, **kwargs)
138 if not obj.is_accessible():
139 raise error.RuntimeValueProviderError('%s not accessible' % obj)
--> 140 return fnc(self, *args, **kwargs)
141 return _f
142 return _check_accessible
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filebasedsource.py in _validate(self)
181
182 # Limit the responses as we only want to check if something exists
--> 183 match_result = FileSystems.match([pattern], limits=[1])[0]
184 if len(match_result.metadata_list) <= 0:
185 raise IOError(
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystems.py in match(patterns, limits)
198 return []
199 filesystem = FileSystems.get_filesystem(patterns[0])
--> 200 return filesystem.match(patterns, limits)
201
202 #staticmethod
/root/miniconda3/lib/python3.7/site-packages/apache_beam/io/filesystem.py in match(self, patterns, limits)
718
719 if exceptions:
--> 720 raise BeamIOError("Match operation failed", exceptions)
721 return result
722
when I blaze a pandas.dataframe,it can output the columns normally! When I run "traffic.head()", it can run normally! However,when I run "traffic.Stop_day" ,it indicated the AttributeError just like below:
import odo
traffic=bl.Data(df)
print(traffic.fields)
['Agency', 'SubAgency', 'Description', 'Location', 'Latitude', 'Longitude', 'Accident', 'Belts', 'Personal Injury', 'Property Damage', 'Fatal', 'Commercial License', 'HAZMAT', 'Commercial Vehicle', 'Alcohol', 'Work Zone', 'State', 'VehicleType', 'Year', 'Make', 'Model', 'Color', 'Violation Type', 'Charge', 'Article', 'Contributed To Accident', 'Race', 'Gender', 'Driver City', 'Driver State', 'DL State', 'Arrest Type', 'Geolocation', 'Stop_month', 'Stop_day', 'Stop_year', 'Stop_hour', 'Stop_min', 'Stop_sec']
traffic.Stop_day
AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
--> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
pretty(self, obj)
398 if cls is not object \
399 and callable(cls.dict.get('repr')):
--> 400 return _repr_pprint(obj, self, cycle)
401
402 return _default_pprint(obj, self, cycle)
e:\software\python36\lib\site-packages\IPython\lib\pretty.py in
_repr_pprint(obj, p, cycle)
693 """A pprint that just redirects to the normal repr function."""
694 # Find newlines and replace them with p.break_()
--> 695 output = repr(obj)
696 for idx,output_line in enumerate(output.splitlines()):
697 if idx:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_choose_repr(self)
440 else:
441 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 442 return expr_repr(self)
443
444
e:\software\python36\lib\site-packages\blaze\interactive.py in
expr_repr(expr, n)
358 isscalar(expr.dshape.measure) or
359 isinstance(expr.dshape.measure, Map))):
--> 360 return repr_tables(expr, 10)
361
362 # Smallish arrays
e:\software\python36\lib\site-packages\blaze\interactive.py in
repr_tables(expr, n)
240
241 def repr_tables(expr, n=10):
--> 242 result = concrete_head(expr, n).rename(columns={None: ''})
243
244 if isinstance(result, (DataFrame, Series)):
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
--------------------------------------------------------------------------- AttributeError Traceback (most recent call
last)
e:\software\python36\lib\site-packages\IPython\core\formatters.py in
call(self, obj)
343 method = get_real_method(obj, self.print_method)
344 if method is not None:
--> 345 return method()
346 return None
347 else:
e:\software\python36\lib\site-packages\blaze\interactive.py in
_warning_repr_html(self)
448 else:
449 warnings.warn(_warning_msg, DeprecationWarning, stacklevel=2)
--> 450 return to_html(self)
451
452
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in
to_html(expr)
387 if not expr._resources() or ndim(expr) != 1:
388 return to_html(expr_repr(expr))
--> 389 return to_html(concrete_head(expr))
390
391
e:\software\python36\lib\site-packages\blaze\interactive.py in
concrete_head(expr, n)
220 return odo(head, DataFrame)
221 else:
--> 222 df = odo(head, DataFrame)
223 df.columns = [expr._name]
224 return df
e:\software\python36\lib\site-packages\odo\odo.py in odo(source,
target, **kwargs)
89 odo.append.append - Add things onto existing things
90 """
---> 91 return into(target, source, **kwargs)
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\blaze\interactive.py in into(a,
b, **kwargs)
404 result = compute(b, return_type='native', **kwargs)
405 kwargs['dshape'] = b.dshape
--> 406 return into(a, result, **kwargs)
407
408
e:\software\python36\lib\site-packages\multipledispatch\dispatcher.py
in call(self, *args, **kwargs)
276 self._cache[types] = func
277 try:
--> 278 return func(*args, **kwargs)
279
280 except MDNotImplementedError:
e:\software\python36\lib\site-packages\odo\into.py in wrapped(*args,
**kwargs)
41 raise TypeError('dshape argument is not an instance of DataShape')
42 kwargs['dshape'] = dshape
---> 43 return f(*args, **kwargs)
44 return wrapped
45
e:\software\python36\lib\site-packages\odo\into.py in into_type(a, b,
dshape, **kwargs)
51 if dshape is None:
52 dshape = discover(b)
---> 53 return convert(a, b, dshape=dshape, **kwargs)
54
55
e:\software\python36\lib\site-packages\odo\core.py in call(self,
*args, **kwargs)
42
43 def call(self, *args, **kwargs):
---> 44 return _transform(self.graph, *args, **kwargs)
45
46
e:\software\python36\lib\site-packages\odo\core.py in
_transform(graph, target, source, excluded_edges, ooc_types, **kwargs)
55 pth = path(graph, type(source), target,
56 excluded_edges=excluded_edges,
---> 57 ooc_types=ooc_types)
58 try:
59 for (A, B, f) in pth:
e:\software\python36\lib\site-packages\odo\core.py in path(graph,
source, target, excluded_edges, ooc_types)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
e:\software\python36\lib\site-packages\odo\core.py in (.0)
92 pth = nx.shortest_path(g, source=source, target=target, weight='cost')
93 result = [(src, tgt, graph.edge[src][tgt]['func'])
---> 94 for src, tgt in zip(pth, pth[1:])]
95 return result
96
AttributeError: 'DiGraph' object has no attribute 'edge'
So,what's the problem?
Simply, use the command "traffic.data.Stop_year"! That's enough.
I have a problem to get rpy2 running in iPython notebook.
If I load
%load_ext rpy2.ipython
in iPython 4.0.3 everything is fine. But if I do the same thing in a iPython notebook I get:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-3-a69f80d0128e> in <module>()
----> 1 get_ipython().magic('load_ext rpy2.ipython')
C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in magic(self, arg_s)
2334 magic_name, _, magic_arg_s = arg_s.partition(' ')
2335 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2336 return self.run_line_magic(magic_name, magic_arg_s)
2337
2338 #-------------------------------------------------------------------------
C:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_line_magic(self, magic_name, line)
2255 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2256 with self.builtin_trap:
-> 2257 result = fn(*args,**kwargs)
2258 return result
2259
<decorator-gen-65> in load_ext(self, module_str)
C:\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
C:\Anaconda3\lib\site-packages\IPython\core\magics\extension.py in load_ext(self, module_str)
64 if not module_str:
65 raise UsageError('Missing module name.')
---> 66 res = self.shell.extension_manager.load_extension(module_str)
67
68 if res == 'already loaded':
C:\Anaconda3\lib\site-packages\IPython\core\extensions.py in load_extension(self, module_str)
82 if module_str not in sys.modules:
83 with prepended_to_syspath(self.ipython_extension_dir):
---> 84 __import__(module_str)
85 mod = sys.modules[module_str]
86 if self._call_load_ipython_extension(mod):
C:\Anaconda3\lib\site-packages\rpy2\ipython\__init__.py in <module>()
----> 1 from .rmagic import load_ipython_extension
C:\Anaconda3\lib\site-packages\rpy2\ipython\rmagic.py in <module>()
50 # numpy and rpy2 imports
51
---> 52 import rpy2.rinterface as ri
53 import rpy2.robjects as ro
54 import rpy2.robjects.packages as rpacks
C:\Anaconda3\lib\site-packages\rpy2\rinterface\__init__.py in <module>()
72 if not os.path.exists(Rlib):
73 continue
---> 74 ctypes.CDLL(Rlib)
75 _win_ok = True
76 break
C:\Anaconda3\lib\ctypes\__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error)
345
346 if handle is None:
--> 347 self._handle = _dlopen(self._name, mode)
348 else:
349 self._handle = handle
FileNotFoundError: [WinError 161] Der angegebene Pfadname ist ungültig
Is there some way to get both running? As rpy2 runs properly in iPython I guess there the installation shoiuld be correct.
Thanks,
Marv
There is likely more differences between the environment from which ipython is called and the one from which the notebook is called: the error Der angegebene Pfadname ist ungültig occurs while trying the R shared library.
You'd need to tell us a little more about how you start either ipython or the notebook.
Having that said, you should also note that rpy2 is likely working better on Linux or OS X. If the ipython notebook is your primary interest, running through a Docker container could be a good solution.