MongoClient insert_one works while mongoengine connect doesn't (unautherized) - mongodb

I try to insert document using mongoengine interface AFTER authentication, but still gets denied. This doesn't happen using MongoClient...
This is the mongoengine try to insert one document:
In [1]: from mongoengine import connect
In [2]: db = connect(host='localhost', port=27017, username='root', password='pass')
In [3]: db.local.col.insert_one({'a':1})
---------------------------------------------------------------------------
OperationFailure Traceback (most recent call last)
<ipython-input-3-55a23806fbb1> in <module>
----> 1 db.local.col.insert_one({'a':1})
~/venv3.8/lib/python3.8/site-packages/pymongo/collection.py in insert_one(self, document, bypass_document_validation, session)
696 write_concern = self._write_concern_for(session)
697 return InsertOneResult(
--> 698 self._insert(document,
699 write_concern=write_concern,
700 bypass_doc_val=bypass_document_validation,
~/venv3.8/lib/python3.8/site-packages/pymongo/collection.py in _insert(self, docs, ordered, check_keys, manipulate, write_concern, op_id, bypass_doc_val, session)
611 """Internal insert helper."""
612 if isinstance(docs, abc.Mapping):
--> 613 return self._insert_one(
614 docs, ordered, check_keys, manipulate, write_concern, op_id,
615 bypass_doc_val, session)
~/venv3.8/lib/python3.8/site-packages/pymongo/collection.py in _insert_one(self, doc, ordered, check_keys, manipulate, write_concern, op_id, bypass_doc_val, session)
600 _check_write_command_response(result)
601
--> 602 self.__database.client._retryable_write(
603 acknowledged, _insert_command, session)
604
~/venv3.8/lib/python3.8/site-packages/pymongo/mongo_client.py in _retryable_write(self, retryable, func, session)
1496 """Internal retryable write helper."""
1497 with self._tmp_session(session) as s:
-> 1498 return self._retry_with_session(retryable, func, s, None)
1499
1500 def _handle_getlasterror(self, address, error_msg):
~/venv3.8/lib/python3.8/site-packages/pymongo/mongo_client.py in _retry_with_session(self, retryable, func, session, bulk)
1382 retryable = (retryable and self.retry_writes
1383 and session and not session.in_transaction)
-> 1384 return self._retry_internal(retryable, func, session, bulk)
1385
1386 def _retry_internal(self, retryable, func, session, bulk):
~/venv3.8/lib/python3.8/site-packages/pymongo/mongo_client.py in _retry_internal(self, retryable, func, session, bulk)
1414 raise last_error
1415 retryable = False
-> 1416 return func(session, sock_info, retryable)
1417 except ServerSelectionTimeoutError:
1418 if is_retrying():
~/venv3.8/lib/python3.8/site-packages/pymongo/collection.py in _insert_command(session, sock_info, retryable_write)
588 command['bypassDocumentValidation'] = True
589
--> 590 result = sock_info.command(
591 self.__database.name,
592 command,
~/venv3.8/lib/python3.8/site-packages/pymongo/pool.py in command(self, dbname, spec, slave_ok, read_preference, codec_options, check, allowable_errors, check_keys, read_concern, write_concern, parse_write_concern_error, collation, session, client, retryable_write, publish_events, user_fields, exhaust_allowed)
681 self._raise_if_not_writable(unacknowledged)
682 try:
--> 683 return command(self, dbname, spec, slave_ok,
684 self.is_mongos, read_preference, codec_options,
685 session, client, check, allowable_errors,
~/venv3.8/lib/python3.8/site-packages/pymongo/network.py in command(sock_info, dbname, spec, slave_ok, is_mongos, read_preference, codec_options, session, client, check, allowable_errors, address, check_keys, listeners, max_bson_size, read_concern, parse_write_concern_error, collation, compression_ctx, use_op_m
sg, unacknowledged, user_fields, exhaust_allowed)
157 client._process_response(response_doc, session)
158 if check:
--> 159 helpers._check_command_response(
160 response_doc, sock_info.max_wire_version, None,
161 allowable_errors,
~/venv3.8/lib/python3.8/site-packages/pymongo/helpers.py in _check_command_response(response, max_wire_version, msg, allowable_errors, parse_write_concern_error)
165
166 msg = msg or "%s"
--> 167 raise OperationFailure(msg % errmsg, code, response,
168 max_wire_version)
169
OperationFailure: command insert requires authentication, full error: {'ok': 0.0, 'errmsg': 'command insert requires authentication', 'code': 13, 'codeName': 'Unauthorized'}
which fails, but the MongoClient works for some reason:
In [4]: from pymongo import MongoClient
In [5]: col = MongoClient(host='localhost', port=27017, username='root', password='pass')
In [6]: col.local.col.insert_one({'a':1})
Out[6]: <pymongo.results.InsertOneResult at 0x7ff2a347a8c0>

Related

Error Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select

I have the following code taken directly from here with some pretty little modifications:
import pandas as pd
import torch
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
from torch import cuda
df = pd.read_pickle('df_final.pkl')
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = 'cuda' if cuda.is_available() else 'cpu'
text = ''.join(df[(df['col1'] == 'type') & (df['col2'] == 2)].col3.to_list())
preprocess_text = text.strip().replace("\n","")
t5_prepared_Text = "summarize: "+preprocess_text
#print ("original text preprocessed: \n", preprocess_text)
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length = 500000).to(device)
# summmarize
summary_ids = model.generate(tokenized_text,
num_beams=4,
no_repeat_ngram_size=2,
min_length=30,
max_length=100,
early_stopping=True)
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print ("\n\nSummarized text: \n",output)
When executing the model_generate() part i get an error like this:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-12-e8e9819a85dc> in <module>
12 min_length=30,
13 max_length=100,
---> 14 early_stopping=True).to(device)
15
16 output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
~\Anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_no_grad(*args, **kwargs)
47 def decorate_no_grad(*args, **kwargs):
48 with self:
---> 49 return func(*args, **kwargs)
50 return decorate_no_grad
51
~\Anaconda3\lib\site-packages\transformers\generation_utils.py in generate(self, input_ids, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, num_return_sequences, attention_mask, decoder_start_token_id, use_cache, **model_specific_kwargs)
383 encoder = self.get_encoder()
384
--> 385 encoder_outputs: tuple = encoder(input_ids, attention_mask=attention_mask)
386
387 # Expand input ids if num_beams > 1 or num_return_sequences > 1
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\transformers\modeling_t5.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, past_key_value_states, use_cache, output_attentions, output_hidden_states, return_dict)
701 if inputs_embeds is None:
702 assert self.embed_tokens is not None, "You have to intialize the model with valid token embeddings"
--> 703 inputs_embeds = self.embed_tokens(input_ids)
704
705 batch_size, seq_length = input_shape
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
112 return F.embedding(
113 input, self.weight, self.padding_idx, self.max_norm,
--> 114 self.norm_type, self.scale_grad_by_freq, self.sparse)
115
116 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1482 # remove once script supports set_grad_enabled
1483 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1484 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1485
1486
RuntimeError: Expected object of device type cuda but got device type cpu for argument #1 'self' in call to _th_index_select
​
I've searched this error and fouund some other threads like this one and this one but they didn't help me much since their case seems to be completely different. In my case there are no custom instances or classes created, so i don't know how to fix this or where the error come from.
Could you please tell me where is the error coming from and how could i fix it?
Thank you very much in advance.
Try explicitly moving your model to the GPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = T5ForConditionalGeneration.from_pretrained('t5-base').to(device)

Unable to open files, with the path in Jupyter notebook

I have reinstalled the anaconda after formatting my machine, since I am getting error while opening the files in jupyter notebook.
Initially I tried access the file from desktop location, as I got an error again tried to access from D drive. both were not successful attempts.
salaries = pd.read_excel('D:\\housesales.xlsx')
Below is the error
FileNotFoundError Traceback (most recent call last) <ipython-input-13-6d8e17cbb085> in <module> ----> 1 salaries = pd.read_excel('D:\housesales.xlsx') ~\Anaconda3\lib\site-packages\pandas\util_decorators.py in wrapper(*args, **kwargs) 186 else: 187 kwargs[new_arg_name] = new_arg_value --> 188 return func(*args, **kwargs) 189 return wrapper 190 return _deprecate_kwarg ~\Anaconda3\lib\site-packages\pandas\util_decorators.py in wrapper(*args, **kwargs) 186 else: 187 kwargs[new_arg_name] = new_arg_value --> 188 return func(*args, **kwargs) 189 return wrapper 190 return _deprecate_kwarg ~\Anaconda3\lib\site-packages\pandas\io\excel.py in read_excel(io, sheet_name, header, names, index_col, parse_cols, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, keep_default_na, verbose, parse_dates, date_parser, thousands, comment, skip_footer, skipfooter, convert_float, mangle_dupe_cols, **kwds) 348 349 if not isinstance(io, ExcelFile): --> 350 io = ExcelFile(io, engine=engine) 351 352 return io.parse( ~\Anaconda3\lib\site-packages\pandas\io\excel.py in init(self, io, engine) 651 self._io = _stringify_path(io) 652 --> 653 self._reader = self._enginesengine 654 655 def fspath(self): ~\Anaconda3\lib\site-packages\pandas\io\excel.py in init(self, filepath_or_buffer) 422 self.book = xlrd.open_workbook(file_contents=data) 423 elif isinstance(filepath_or_buffer, compat.string_types): --> 424 self.book = xlrd.open_workbook(filepath_or_buffer) 425 else: 426 raise ValueError('Must explicitly set engine if not passing in' ~\Anaconda3\lib\site-packages\xlrd__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows) 109 else: 110 filename = os.path.expanduser(filename) --> 111 with open(filename, "rb") as f: 112 peek = f.read(peeksz) 113 if peek == b"PK\x03\x04": # a ZIP file FileNotFoundError: [Errno 2] No such file or directory: 'D:\housesales.xlsx'
Sounds like your housesales.xlsx file is on your Desktop, but you do not include the Desktop folder in the path to your file.
salaries = pd.read_excel('D:\\Desktop\housesales.xlsx')
I recommend you use jupyter lab as it has a file tree.
Running this bash command in a notebook cell will tell you the working directory of your jupyter instance so you know where it is looking for files.
!pwd
You could also move your file to that directory and then just access it as
salaries = pd.read_excel('housesales.xlsx')

ServerSelectionTimeoutError: Time out error when connecting to Atlas via pymongo

I'm trying to connect to my Atlas mongodb database through pymongo. I make the connection and do a basic query just to count the documents and it times out.
I am able to run the same string on my personal linux (and managed to get it working from a clean Docker), but did not manage to get it working from my Mac that I use for work (and neither did my colleagues and I wasn't able to make it work in a clean Docker image either). If it matters I'm running pymongo 3.8, that I installed with pip install pymongo[tls]. I tried downgrading as well and tried also pip install pymongo[tls,srv].
Personal guesses: something to do with proxy/firewall blocking the connection maybe? I checked whether the port was open On the server I whitelisted the 0.0.0.0/0 so that shouldn't be the issue.
import pymongo
client = pymongo.MongoClient("mongodb+srv://whatever:yep#cluster0-xxxxx.mongodb.net/test?retryWrites=true")
client.test.matches.count_documents({}) # this blocks and then errors
I get the following error
/usr/local/lib/python3.7/site-packages/pymongo/collection.py in count_documents(self, filter, session, **kwargs)
1693 collation = validate_collation_or_none(kwargs.pop('collation', None))
1694 cmd.update(kwargs)
-> 1695 with self._socket_for_reads(session) as (sock_info, slave_ok):
1696 result = self._aggregate_one_result(
1697 sock_info, slave_ok, cmd, collation, session)
/usr/local/Cellar/python/3.7.3/Frameworks/Python.framework/Versions/3.7/lib/python3.7/contextlib.py in __enter__(self)
110 del self.args, self.kwds, self.func
111 try:
--> 112 return next(self.gen)
113 except StopIteration:
114 raise RuntimeError("generator didn't yield") from None
/usr/local/lib/python3.7/site-packages/pymongo/mongo_client.py in _socket_for_reads(self, read_preference)
1133 topology = self._get_topology()
1134 single = topology.description.topology_type == TOPOLOGY_TYPE.Single
-> 1135 server = topology.select_server(read_preference)
1136
1137 with self._get_socket(server) as sock_info:
/usr/local/lib/python3.7/site-packages/pymongo/topology.py in select_server(self, selector, server_selection_timeout, address)
224 return random.choice(self.select_servers(selector,
225 server_selection_timeout,
--> 226 address))
227
228 def select_server_by_address(self, address,
/usr/local/lib/python3.7/site-packages/pymongo/topology.py in select_servers(self, selector, server_selection_timeout, address)
182 with self._lock:
183 server_descriptions = self._select_servers_loop(
--> 184 selector, server_timeout, address)
185
186 return [self.get_server_by_address(sd.address)
/usr/local/lib/python3.7/site-packages/pymongo/topology.py in _select_servers_loop(self, selector, timeout, address)
198 if timeout == 0 or now > end_time:
199 raise ServerSelectionTimeoutError(
--> 200 self._error_message(selector))
201
202 self._ensure_opened()
ServerSelectionTimeoutError: cluster0-shard-00-01-eflth.mongodb.net:27017: timed out,cluster0-shard-00-00-eflth.mongodb.net:27017: timed out,cluster0-shard-00-02-eflth.mongodb.net:27017: timed out

TypeError: iteration over a 0-d array when trying to use TextLMDataBunch.from_csv in FastAI

The library expects utf-8. I tried to convert my us-ascii file to utf-8 using:
iconv -f us-ascii -t utf-8 src.csv > target.csv
When I did:
file -I target.csv
It still showed charset as us-ascii. Then I found out that us-ascii is a subset of utf-8 and that file will only guess the file type.
However, if I take use src.csv as input to the TextLMDataBunch.from_csv() library, it works. If I do:
cat src.csv > target.csv
And then use target.csv as input to the same library, it doesn't work and gives the following error:
TypeError Traceback (most recent call last)
<ipython-input-118-44bc7147d2a4> in <module>()
----> 1 data_lm = TextLMDataBunch.from_csv(sample_p, 'voila.csv')
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in from_csv(cls, path, csv_name, valid_pct, test, tokenizer, vocab, classes, header, text_cols, label_cols, label_delim, **kwargs)
180 test_df = None if test is None else pd.read_csv(Path(path)/test, header=header)
181 return cls.from_df(path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols,
--> 182 label_cols, label_delim, **kwargs)
183
184 #classmethod
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in from_df(cls, path, train_df, valid_df, test_df, tokenizer, vocab, classes, text_cols, label_cols, label_delim, **kwargs)
165 src = ItemLists(path, TextList.from_df(train_df, path, cols=text_cols, processor=processor),
166 TextList.from_df(valid_df, path, cols=text_cols, processor=processor))
--> 167 src = src.label_for_lm() if cls==TextLMDataBunch else src.label_from_df(cols=label_cols, classes=classes, sep=label_delim)
168 if test_df is not None: src.add_test(TextList.from_df(test_df, path, cols=text_cols))
169 return src.databunch(**kwargs)
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in _inner(*args, **kwargs)
356 assert isinstance(fv, Callable)
357 def _inner(*args, **kwargs):
--> 358 self.train = ft(*args, **kwargs)
359 assert isinstance(self.train, LabelList)
360 self.valid = fv(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/fastai/text/data.py in label_for_lm(self, **kwargs)
285 "A special labelling method for language models."
286 self.__class__ = LMTextList
--> 287 return self.label_const(0, label_cls=LMLabel)
288
289 def reconstruct(self, t:Tensor):
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in label_const(self, const, **kwargs)
211 def label_const(self, const:Any=0, **kwargs)->'LabelList':
212 "Label every item with `const`."
--> 213 return self.label_from_func(func=lambda o: const, **kwargs)
214
215 def label_empty(self):
/usr/local/lib/python3.6/dist-packages/fastai/data_block.py in label_from_func(self, func, **kwargs)
219 def label_from_func(self, func:Callable, **kwargs)->'LabelList':
220 "Apply `func` to every input to get its label."
--> 221 return self.label_from_list([func(o) for o in self.items], **kwargs)
222
223 def label_from_folder(self, **kwargs)->'LabelList':
TypeError: iteration over a 0-d array
Can someone please tell me what is wrong? I am trying this on Google Colab and tried the character encoding changes on Colab and on my Mac but with no results.

Running gvmagic extension on jupyter notebook returning FileNotFounError

I am developing Python 3.5.3 using the jupyter notebook in Anaconda 2.5.0 (64-bit) on a Windows 10 machine. I am trying to use an extension called 'gvmagic', which is used for viewing graphs. The extension seems to load, but returns a FileNotFoundError instead of a graph.
My input code is (Note: 'visualize_de_bruijn_graph' is a custom code that builds a de Bruijn graph from a string):
dbg = visualize_de_bruijn_graph('ACGCGTCG', 3)
print(dbg)
Which returns graph:
digraph "DeBruijn Graph" {
CG [label="CG"] ;
TC [label="TC"] ;
GC [label="GC"] ;
AC [label="AC"] ;
GT [label="GT"] ;
AC -> CG ;
CG -> GC ;
GC -> CG ;
CG -> GT ;
GT -> TC ;
TC -> CG ;
}
Trying to visualize the graph with the following code:
%load_ext gvmagic
%dotstr dbg
returns the error below. I cannot figure out what file is missing, as all the files referenced are where they are suppose to be.
FileNotFoundError Traceback (most recent call last)
<ipython-input-17-d138faf6c47c> in <module>()
----> 1 get_ipython().magic('dotstr dbg')
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in magic(self, arg_s)
2161 magic_name, _, magic_arg_s = arg_s.partition(' ')
2162 magic_name = magic_name.lstrip(prefilter.ESC_MAGIC)
-> 2163 return self.run_line_magic(magic_name, magic_arg_s)
2164
2165 #-------------------------------------------------------------------------
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_line_magic(self, magic_name, line)
2082 kwargs['local_ns'] = sys._getframe(stack_depth).f_locals
2083 with self.builtin_trap:
-> 2084 result = fn(*args,**kwargs)
2085 return result
2086
<decorator-gen-126> in dotstr(self, line)
C:\Users\username\Anaconda3\lib\site-packages\IPython\core\magic.py in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in dotstr(self, line)
50 #line_magic
51 def dotstr(self, line):
---> 52 self._from_str(line, 'dot')
53
54 #line_magic
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in _from_str(self, line, layout_engine)
151 def _from_str(self, line, layout_engine):
152 s = self.shell.ev(line)
--> 153 data = run_graphviz(s, layout_engine)
154 if data:
155 display_svg(data, raw=True)
C:\Users\username\Anaconda3\lib\site-packages\IPython\extensions\gvmagic.py in run_graphviz(s, layout_engine)
30 cmd = ['dot', '-Tsvg', '-K', layout_engine]
31
---> 32 dot = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
33 stdoutdata, stderrdata = dot.communicate(s.encode('utf-8'))
34 status = dot.wait()
C:\Users\username\Anaconda3\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds)
674 c2pread, c2pwrite,
675 errread, errwrite,
--> 676 restore_signals, start_new_session)
677 except:
678 # Cleanup if the child failed starting.
C:\Users\username\Anaconda3\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)
953 env,
954 cwd,
--> 955 startupinfo)
956 finally:
957 # Child is launched. Close the parent's copy of those pipe
FileNotFoundError: [WinError 2] The system cannot find the file specified
You have to install the Graphviz software on your PC. For Windows, for example, download this https://graphviz.gitlab.io/_pages/Download/Download_windows.html.
In your IPython session, you have to point to the install location, for example:
import os
os.environ["PATH"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin'
%load_ext gvmagic
dbg = visualize_de_bruijn('ACGCGTCG', 3)
%dotstr dbg
And now it should work for you, just as it does for me! You can probably set the PATH variable on your PC instead of having to do it inside IPython each time.