I am trying to open zarr file as,
import pandas as pd
import xarray as xr
xf = xr.open_zarr("../../data/processed/geolink_norge_dataset/geolink_norge_well_logs.zarr")
But there comes out the errors:
ValueError Traceback (most recent call last) <ipython-input-17-ff38d9c54463> in <module>
1 import pandas as pd
2 import xarray as xr
----> 3 xf = xr.open_zarr("../../data/processed/geolink_norge_dataset/geolink_norge_well_logs.zarr")
4
5 # We will use just the 30* wells
C:\ProgramData\Anaconda3\lib\site-packages\xarray\backends\zarr.py in open_zarr(store, group, synchronizer, chunks, decode_cf, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, consolidated, overwrite_encoded_chunks, chunk_store, storage_options, decode_timedelta, use_cftime, **kwargs)
685 }
686
--> 687 ds = open_dataset(
688 filename_or_obj=store,
689 group=group,
C:\ProgramData\Anaconda3\lib\site-packages\xarray\backends\api.py in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, backend_kwargs,
*args, **kwargs)
480 engine = plugins.guess_engine(filename_or_obj)
481
--> 482 backend = plugins.get_backend(engine)
483
484 decoders = _resolve_decoders_kwargs(
C:\ProgramData\Anaconda3\lib\site-packages\xarray\backends\plugins.py in get_backend(engine)
132 engines = list_engines()
133 if engine not in engines:
--> 134 raise ValueError(
135 f"unrecognized engine {engine} must be one of: {list(engines)}"
136 )
ValueError: unrecognized engine zarr must be one of: ['scipy','store']
Can anyone help to solve this problem?
I can confirm that I have installed the scipy and store packages.
You likely need to install the zarr package as well:
pip install zarr
If that doesn't work, try:
pip install xarray[complete]
See https://github.com/pydata/xarray/issues/5395#issuecomment-850483726 for more information.
Related
When i run pip3 install <package> or !pip3 install <package> or !pip install <package> i get this error. And also i can't clone any repo in jupyter. It gives the same error. This is my first time in Jupyter.
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
Input In [18], in <cell line: 1>()
----> 1 get_ipython().run_line_magic('pip', 'install boto3')
File /lib/python3.9/site-packages/IPython/core/interactiveshell.py:2294, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
2292 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2293 with self.builtin_trap:
-> 2294 result = fn(*args, **kwargs)
2295 return result
File /lib/python3.9/site-packages/IPython/core/magics/packaging.py:75, in PackagingMagics.pip(self, line)
72 else:
73 python = shlex.quote(python)
---> 75 self.shell.system(" ".join([python, "-m", "pip", line]))
77 print("Note: you may need to restart the kernel to use updated packages.")
File /lib/python3.9/site-packages/IPython/core/interactiveshell.py:2451, in InteractiveShell.system_piped(self, cmd)
2446 raise OSError("Background processes not supported.")
2448 # we explicitly do NOT return the subprocess status code, because
2449 # a non-None value would trigger :func:`sys.displayhook` calls.
2450 # Instead, we store the exit_code in user_ns.
-> 2451 self.user_ns['_exit_code'] = system(self.var_expand(cmd, depth=1))
File /lib/python3.9/site-packages/IPython/utils/_process_posix.py:148, in ProcessHandler.system(self, cmd)
146 child = pexpect.spawnb(self.sh, args=['-c', cmd]) # Pexpect-U
147 else:
--> 148 child = pexpect.spawn(self.sh, args=['-c', cmd]) # Vanilla Pexpect
149 flush = sys.stdout.flush
150 while True:
151 # res is the index of the pattern that caused the match, so we
152 # know whether we've finished (if we matched EOF) or not
File /lib/python3.9/site-packages/IPython/utils/_process_posix.py:57, in ProcessHandler.sh(self)
55 self._sh = pexpect.which(shell_name)
56 if self._sh is None:
---> 57 raise OSError('"{}" shell not found'.format(shell_name))
59 return self._sh
I searched everywhere, but it is weird that no-one faced this issue except me. Pls provide some solution for this. I'm getting crazy.
I got this error when trying out the sample code (https://minizinc-python.readthedocs.io/en/latest/getting_started.html) of the minizinc web.
from minizinc import Instance, Model, Solver
# Load n-Queens model from file
nqueens = Model("./nqueens.mzn")
# Find the MiniZinc solver configuration for Gecode
gecode = Solver.lookup("gecode")
# Create an Instance of the n-Queens model for Gecode
instance = Instance(gecode, nqueens)
# Assign 4 to n
instance["n"] = 4
result = instance.solve()
# Output the array q
print(result["q"])
The error I got was:
AssertionError Traceback (most recent call last)
<ipython-input-1-a64f1a5182f8> in <module>
2
3 # Load n-Queens model from file
----> 4 nqueens = Model("./nqueens.mzn")
5 # Find the MiniZinc solver configuration for Gecode
6 gecode = Solver.lookup("gecode")
C:\ProgramData\Anaconda3\lib\site-packages\minizinc\model.py in __init__(self, files)
85 self._lock = threading.Lock()
86 if isinstance(files, Path) or isinstance(files, str):
---> 87 self.add_file(files)
88 elif files is not None:
89 for file in files:
C:\ProgramData\Anaconda3\lib\site-packages\minizinc\model.py in add_file(self, file, parse_data)
159 if not isinstance(file, Path):
160 file = Path(file)
--> 161 assert file.exists()
162 if not parse_data:
163 with self._lock:
AssertionError:
I've downloaded both minizinc and python. I tried using jupyternotebook and spyder, but they both had the same issue.
If anyone has faced the same issue and fixed the problem I'll appreciate any feedback regarding this problem.
I am trying to get workers to output some information from their ipython kernel and execute various commands in the ipython session. I tried the examples in the documentation and the ipyparallel example works, but not the second example (with ipython magics). I cannot get the workers to execute any commands. For example, I am stuck on the following issue:
from dask.distributed import Client
client = Client()
info = client.start_ipython_workers()
list_workers = info.keys()
%remote info[list_workers[0]]
The last line returns an error:
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-19-9118451af441> in <module>
----> 1 get_ipython().run_line_magic('remote', "info['tcp://127.0.0.1:50497'] worker.active")
~/miniconda/envs/dask/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_line_magic(self, magic_name, line, _stack_depth)
2334 kwargs['local_ns'] = self.get_local_scope(stack_depth)
2335 with self.builtin_trap:
-> 2336 result = fn(*args, **kwargs)
2337 return result
2338
~/miniconda/envs/dask/lib/python3.7/site-packages/distributed/_ipython_utils.py in remote_magic(line, cell)
115 info_name = split_line[0]
116 if info_name not in ip.user_ns:
--> 117 raise NameError(info_name)
118 connection_info = dict(ip.user_ns[info_name])
119
NameError: info['tcp://127.0.0.1:50497']
I would appreciate any examples of how to get any information from the ipython kernel running on workers.
Posting here just for keeping track, I raised an issue for this on GitHub: https://github.com/dask/distributed/issues/4522
I am trying to make Scala Xgboost API available for my PySpark Notebook. And following this blog:
https://towardsdatascience.com/pyspark-and-xgboost-integration-tested-on-the-kaggle-titanic-dataset-4e75a568bdb
However, keep on running into below err:
spark._jvm.ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator
<py4j.java_gateway.JavaPackage at 0x7fa650fe7a58>
from sparkxgb import XGBoostEstimator
xgboost = XGBoostEstimator(
featuresCol="features",
labelCol="Survival",
predictionCol="prediction"
)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-18-1765fb9e3344> in <module>
4 featuresCol="features",
5 labelCol="Survival",
----> 6 predictionCol="prediction"
7 )
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/__init__.py in wrapper(self, *args, **kwargs)
108 raise TypeError("Method %s forces keyword arguments." % func.__name__)
109 self._input_kwargs = kwargs
--> 110 return func(self, **kwargs)
111 return wrapper
112
~/local/spark-3536cd7a-6188-4ca8-b3d0-57d42cd01531/userFiles-0a0d90bc-96b4-43f2-bf21-00ae0e6f7309/sparkxgb.zip/sparkxgb/xgboost.py in __init__(self, checkpoint_path, checkpointInterval, missing, nthread, nworkers, silent, use_external_memory, baseMarginCol, featuresCol, labelCol, predictionCol, weightCol, base_score, booster, eval_metric, num_class, num_round, objective, seed, alpha, colsample_bytree, colsample_bylevel, eta, gamma, grow_policy, max_bin, max_delta_step, max_depth, min_child_weight, reg_lambda, scale_pos_weight, sketch_eps, subsample, tree_method, normalize_type, rate_drop, sample_type, skip_drop, lambda_bias)
113
114 super(XGBoostEstimator, self).__init__()
--> 115 self._java_obj = self._new_java_obj("ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator", self.uid)
116 self._create_params_from_java()
117 self._setDefault(
~/spark-assembly-2.4.0-twttr-kryo3-scala2128-hadoop2.9.2.t05/python/pyspark/ml/wrapper.py in _new_java_obj(java_class, *args)
65 java_obj = getattr(java_obj, name)
66 java_args = [_py2java(sc, arg) for arg in args]
---> 67 return java_obj(*java_args)
68
69 #staticmethod
TypeError: 'JavaPackage' object is not callable
I already google this error and tried below things. I got all ideas from this blog https://github.com/JohnSnowLabs/spark-nlp/issues/232 :
Make sure Xgboost4j is in the SPARK_DIST_CLASSPATH. Already checked.
$echo $SPARK_DIST_CLASSPATH | tr " " "\n" | grep 'xgboost4j' | rev | cut -d'/' -f1 | rev
xgboost4j-0.72.jar
xgboost4j-spark.72.jar
Make sure they are added to EXTRA_CLASSPATH. - Done
Updating configs.
'export PYSPARK_SUBMIT_ARGS="--conf spark.jars=$SPARK_HOME/jars/* --conf spark.driver.extraClassPath=$SPARK_HOME/jars/* --conf spark.executor.extraClassPath=$SPARK_HOME/jars/* pyspark-shell"',
Hardware Info:
Machine: Linux
Using Jupyter Notebook.
Spark Version 2.4.0
python3.6
I found the problem, The problem was that the sparkxbg.zip(which I downloaded over internet) is written for xgboost4j-0.72. However, my jars were from xgoost4j-0.9. And the API has been completetly changed. As a result 0.9 version didn't had any class named ml.dmlc.xgboost4j.scala.spark.XGBoostEstimator. And hence the error. You can see the difference in API below:
https://github.com/dmlc/xgboost/tree/release_0.72/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark
vs
https://github.com/dmlc/xgboost/tree/v0.90/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark
I am trying to access GCS from Colab using the following lines of code and get the given error. Am I missing something? Or Colab doesn't support this kind of GCS access? Is there any workaround or best practices I can use?
from google.cloud import storage
client = storage.Client()
bucket = client.get_bucket('busnet_videos')
blob = bucket.blob('my-test-file.txt')
blob.upload_from_string('this is test content!')
Error :
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-7-0ed440d78c8f> in <module>()
7 from google.cloud import storage
8
----> 9 client = storage.Client()
10 bucket = client.get_bucket('busnet_videos')
11 blob = bucket.blob('my-test-file.txt')
2 frames
/usr/local/lib/python3.6/dist-packages/google/cloud/storage/client.py in __init__(self, project, credentials, _http)
71 project = None
72 super(Client, self).__init__(
---> 73 project=project, credentials=credentials, _http=_http
74 )
75 if no_project:
/usr/local/lib/python3.6/dist-packages/google/cloud/client.py in __init__(self, project, credentials, _http)
221
222 def __init__(self, project=None, credentials=None, _http=None):
--> 223 _ClientProjectMixin.__init__(self, project=project)
224 Client.__init__(self, credentials=credentials, _http=_http)
/usr/local/lib/python3.6/dist-packages/google/cloud/client.py in __init__(self, project)
176 if project is None:
177 raise EnvironmentError(
--> 178 "Project was not passed and could not be "
179 "determined from the environment."
180 )
OSError: Project was not passed and could not be determined from the environment.
You may have to set a the environment variables:
GOOGLE_APPLICATION_CREDENTIALS=SERVICE_ACCOUNT_KEY.json
and
PROJECT_ID=YOUR_GOOGLE_CLOUD_PROJECT_ID