Cannot print timestamp from mongodb oplog using pymongo - mongodb

I have the following code:
connection = MongoClient('core.mongo.com', 27017)
db = connection['admin']
first = db.oplog.rs.find().sort('$natural', pymongo.DESCENDING).limit(-1).next()
ts = first['ts']
while True:
cursor = db.oplog.find({'ts': {'$gt': ts}}, tailable=True, await_data=True)
while cursor.alive:
for doc in cursor:
ts = doc['ts']
time.sleep(1)
I get:
Traceback (most recent call last):
File "tail.py", line 25, in <module>
ts = first['ts']
File "/Library/Python/2.7/site-packages/pymongo/cursor.py", line 569, in __getitem__
"instances" % index)
TypeError: index 'ts' cannot be applied to Cursor instances
How am I supposed to get the latest time-stamp from the oplog of the mongo database?

Following code gives me the last operation on database_name.collection_name:
connection = MongoClient('core.mongo.com', 27017)
db = connection['admin']
oplog_str = str(connection.local.oplog.rs)
print oplog_str
new_query = {'ns': {'$in': ['database_name.collection_name']}}
curr = connection.local.oplog.rs.find(new_query).sort('$natural', pymongo.DESCENDING).limit(-1)
for doc_count, doc in enumerate(curr):
current_time_stamp = doc['ts'].time
good_date = datetime.datetime.fromtimestamp(current_time_stamp).ctime()
print doc_count, good_date
If you want the operation irrespective of the database and collection, just remove new_query from curr.

Related

Convert pointcloud csv to hdf5 to train on PointCNN network

I am trying to train my point cloud data on PointCNN so I need to convert my dataset to hdf5 as used in PointCNN. PointCNN used the modelnet40_ply_hdf5_2048 dataset.
I have tried converting my custom dataset but I am having issues with the label.
I tried this to get the label/shape_names
shape_ids = {}
shape_ids = [line.rstrip() for line in open(os.path.join(PATH, 'filelist1.txt'))]
shape_names = ['_'.join(x.split('_')[0:-1]) for x in shape_ids]
datapath = [(shape_names[i], os.path.join(PATH, shape_names[i], shape_ids[i])) for i
in range(len(shape_ids))]
Convert to h5py file
import numpy as np
from tqdm import tqdm
import h5py
filenames = [line.rstrip() for line in open(os.path.join(PATH))]
f = h5py.File("filename", 'w')
data = np.zeros((len(filenames), 1024, 3))
for i in range(0, len(datapath)):
fn = datapath[i]
cls = classes[datapath[i][0]]
label = np.array([cls]).astype(np.int32)
csvreader = np.genfromtxt("data1/" + filenames[i] + ".csv", delimiter=",").astype(np.float32)
for j in range(0,1024):
data[i,j] = [csvreader[j][0], csvreader[j][1], csvreader[j][2]]
label
dset1 = f.create_dataset("data", data=data, compression="gzip", compression_opts=4)
dset2 = f.create_dataset("label", data=label, compression="gzip", compression_opts=1)
f.close()
It did convert successfully but when I tried to train on PointCNN
PointCNN training
------Building model-------
------Successfully Built model-------
Traceback (most recent call last):
File "train_pytorch.py", line 174, in <module>
current_data, current_label, _ = provider.shuffle_data(current_data, np.squeeze(current_label))
File "provider.py", line 28, in shuffle_data
idx = np.arange(len(labels))
TypeError: len() of unsized object

using dataloader to interface kafka data

i use dataloader to inferface the data in kafka and it doesnt work
here is my code
class kfkdataset(Dataset):
def __init__(self,consumer,image_size):
super(kfkdataset).__init__()
self.image_size=image_size
self.consumer = consumer
def __getitem__(self, index):
info = json.loads(next(self.consumer).value)
image_osspath = info['path']
image = prep_image_batch(image_osspath,self.image_size)
return image,image_osspath
def __len__(self):
# You should change 0 to the total size of your dataset.
return 9000000
consumer = KafkaConsumer('my-topic',bootstrap_servers=[])
prodataset = kfkdataset(consumer,image_size=608)#)
k = DataLoader(prodataset,
batch_size=batch_size,
num_workers=16)
for inputimage,osspath in k:
inputimage = inputimage.to(device)
detections,_ = model(inputimage)
detections = non_max_suppression(detections, 0.98, 0.4)
it works when num_workers is 1
when num_workers >1:
errors came out
File "batch_upload.py", line 80, in
for inputimage,osspath in k:
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 801, in__next__
return self._process_data(data)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/dataloader.py", line 846,in_process_data
data.reraise()
File "/usr/local/lib/python3.6/dist-packages/torch/_utils.py", line 369, in reraise
raise self.exc_type(msg)
FileExistsError: Caught FileExistsError in DataLoader worker process 1.
Original Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/worker.py", line 178, in _worker_loop
data = fetcher.fetch(index)
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/usr/local/lib/python3.6/dist-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/appbatch/utils/utils.py", line 49, in getitem
info = json.loads(next(self.consumer).value)
File "/usr/local/lib/python3.6/dist-packages/kafka/consumer/group.py", line 1192, in next
return self.next_v2()
File "/usr/local/lib/python3.6/dist-packages/kafka/consumer/group.py", line 1200, in next_v2
return next(self._iterator)
File "/usr/local/lib/python3.6/dist-packages/kafka/consumer/group.py", line 1115, in _message_generator_v2
record_map = self.poll(timeout_ms=timeout_ms, update_offsets=False)
File "/usr/local/lib/python3.6/dist-packages/kafka/consumer/group.py", line 654, in poll
records = self._poll_once(remaining, max_records, update_offsets=update_offsets)
File "/usr/local/lib/python3.6/dist-packages/kafka/consumer/group.py", line 701, in _poll_once
self._client.poll(timeout_ms=timeout_ms)
File "/usr/local/lib/python3.6/dist-packages/kafka/client_async.py", line 600, in poll
self._poll(timeout / 1000)
File "/usr/local/lib/python3.6/dist-packages/kafka/client_async.py", line 629, in _poll
self._register_send_sockets()
File "/usr/local/lib/python3.6/dist-packages/kafka/client_async.py", line 619, in _register_send_sockets
self._selector.modify(key.fileobj, events, key.data)
File "/usr/lib/python3.6/selectors.py", line 261, in modify
key = self.register(fileobj, events, data)
File "/usr/lib/python3.6/selectors.py", line 412, in register
self._epoll.register(key.fd, epoll_events)
FileExistsError: [Errno 17] File exists
i want know how to make it works
Basically, setting num_workers > 1 in PyTorch's DataLoader is creating several worker processes which are in turn biding to the same socket port as there is only one consumer.
One approach to parallelize and improve importing data from Kafka is to create several consumers in the same consumer group for that topic.

TypeError: must be str, not bytes , Python 3, Raspberry pi

I am trying to send video from raspberry pi to my laptop via laptop
and save them as pictures so i found the below code online
but I get the following errors when I run them
so i run this client code on the pi using Thonny ide that comes preloaded
, I apologize for the way code is formatted below and would be very grateful if anybody can help me sort this out
Server on the laptop is run using python 3.6 idle
import sys
import numpy as np
import cv2
import socket
class VideoStreamingTest(object):
def __init__(self):
self.server_socket = socket.socket()
self.server_socket.bind(('0.0.0.0', 9006))
self.server_socket.listen(0)
self.connection, self.client_address = self.server_socket.accept()
self.connection = self.connection.makefile('rb')
self.streaming()
def streaming(self):
try:
print("Connection from: ", self.client_address)
print("Streaming...")
print("Press 'q' to exit")
stream_bytes = ' '
while True:
stream_bytes += self.connection.read(1024)
first = stream_bytes.find('\xff\xd8')
last = stream_bytes.find('\xff\xd9')
if first != -1 and last != -1:
jpg = stream_bytes[first:last + 2]
stream_bytes = stream_bytes[last + 2:]
#image = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.CV_LOAD_IMAGE_GRAYSCALE)
image = cv2.imdecode(np.fromstring(jpg, dtype=np.uint8), cv2.CV_LOAD_IMAGE_UNCHANGED)
cv2.imshow('image', image)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
self.connection.close()
self.server_socket.close()
if __name__ == '__main__':
VideoStreamingTest()
I get the following error
Connection from: ('192.168.43.3', 47518)
Streaming...
Press 'q' to exit
Traceback (most recent call last):
File "C:\Users\John Doe\d-ff\Desktop\AutoRCCar-master
3\test\stream_server_test.py", line 46, in <module>
VideoStreamingTest()
File "C:\Users\John Doe\d-ff\Desktop\AutoRCCar-master
3\test\stream_server_test.py", line 16, in __init__
self.streaming()
File "C:\Users\John Doe\d-ff\Desktop\AutoRCCar-master
3\test\stream_server_test.py", line 28, in streaming
stream_bytes += self.connection.read(1024)
TypeError: must be str, not bytes
Client side on the pi
import io
import socket
import struct
import time
import picamera
# create socket and bind host
client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client_socket.connect(('ToM', 9006))
connection = client_socket.makefile('wb')
try:
with picamera.PiCamera() as camera:
camera.resolution = (320, 240) # pi camera resolution
camera.framerate = 5 # 10 frames/sec
time.sleep(2) # give 2 secs for camera to initilize
start = time.time()
stream = io.BytesIO()
# send jpeg format video stream
for foo in camera.capture_continuous(stream, 'jpeg', use_video_port = True):
connection.write(struct.pack('<L', stream.tell()))
connection.flush()
stream.seek(0)
connection.write(stream.read())
if time.time() - start > 600:
break
stream.seek(0)
stream.truncate()
connection.write(struct.pack('<L', 0))
finally:
connection.close()
client_socket.close()
I get the following error
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/pi/Desktop/stream_client.py", line 40, in <module>
connection.close()
File "/usr/lib/python3.5/socket.py", line 594, in write
return self._sock.send(b)
BrokenPipeError: [Errno 32] Broken pipe
I first thought it might be because of the limited bandwidth since i was running vnc viewer (remote desktop) via wifi on the pi but I don't think it is
I also had same problem. After some searching I found solution.
In python 3 we have to specify whether string is regular string or binary.Thats why we use b'string' instead of just 'string'
Change
stream_bytes = ' '
to
stream_bytes = b' '
Also change
first = stream_bytes.find('\xff\xd8')
last = stream_bytes.find('\xff\xd9')
to
first = stream_bytes.find(b'\xff\xd8')
last = stream_bytes.find(b'\xff\xd9')
Note that you are using cv2.CV_LOAD_IMAGE_UNCHANGED which is not available in opencv3.0
Use cv2.IMREAD_COLOR to show image in color.
Edit these changes and your stream should run smoothly.
connection.write(struct.pack('<L', 0))
Check out by inserting the above within try

How to enable text search in mongo?

I tried so many things..
# in replica set configuration, specify the name of the replica set
# replSet = setname
setParameter=textSearchEnabled=true
This is the part of config file. Still after setting this the text search is not enabled.
Am using pymongo for text searching
This is my code
db.command("text", 'tracks' ,search=request.POST['content_search'], limit = 12)['results']]
My mongo version is 2.4.10. Please guide me.
This is the traceback
Traceback (most recent call last):
File "/home/nidhin/social-media-widget/env/local/lib/python2.7/site-packages/django/core/handlers/base.py", line 114, in get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/home/nidhin/social-media-widget/env/local/lib/python2.7/site-packages/django/views/decorators/csrf.py", line 57, in wrapped_view
return view_func(*args, **kwargs)
File "/home/nidhin/social-media-widget/socialmedia/widget/views.py", line 84, in monitor
data = [i['obj'] for i in db.command("text", 'tracks' ,search=request.POST['content_search'], filter = test_data, limit = 12)['results']]
File "/home/nidhin/social-media-widget/env/local/lib/python2.7/site-packages/pymongo/database.py", line 435, in command
uuid_subtype, compile_re, **kwargs)[0]
File "/home/nidhin/social-media-widget/env/local/lib/python2.7/site-packages/pymongo/database.py", line 341, in _command
msg, allowable_errors)
File "/home/nidhin/social-media-widget/env/local/lib/python2.7/site-packages/pymongo/helpers.py", line 178, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
OperationFailure: command SON([('text', 'tracks'), ('filter', {'publisher_desc': u'Blogs'}), ('search', u'box'), ('limit', 12)]) failed: text search not enabled
Adding this line to config file should work:
setParameter=textSearchEnabled=true
How do you start mongdb?
Edit:
I recommend you to check that:
You have mongodb started with this config.
You could check it by calling db.runCommand("getCmdLineOpts") in MongoDb shell
via MongoDb Shell db.runCommand({getParameter:1, textSearchEnabled: 1}) returns textSearchEnabled:true

MongoDB Assertion Error: starting_from == self.__retrieved (pymongo driver)

MongoDB Question:
We're using a sharded replicaset, running pymongo 2.2 against mongo (version: 2.1.1-pre-). We're getting a traceback when a query returns more than one result document.
Traceback (most recent call last):
File "/usr/lib64/python2.6/threading.py", line 532, in __bootstrap_inner
self.run()
File "/opt/DCM/mods/plugin.py", line 25, in run
self._mod.collect_metrics_dcm()
File "/opt/DCM/plugins/res.py", line 115, in collect_metrics_dcm
ms.updateSpecificMetric(metricName, value, timestamp)
File "/opt/DCM/mods/mongoSaver.py", line 155, in updateSpecificMetric
latestDoc = self.getLatestDoc(metricName)
File "/opt/DCM/mods/mongoSaver.py", line 70, in getLatestDoc
for d in dlist:
File "/usr/lib64/python2.6/site-packages/pymongo/cursor.py", line 747, in next
if len(self.__data) or self._refresh():
File "/usr/lib64/python2.6/site-packages/pymongo/cursor.py", line 698, in _refresh
self.__uuid_subtype))
File "/usr/lib64/python2.6/site-packages/pymongo/cursor.py", line 668, in __send_message
assert response["starting_from"] == self.__retrieved
AssertionError
The code that give what dlist is is a simple find(). I've tried reIndex(), no joy. I've tried stopping and starting the mongo server, no joy.
This is easily replicable for me. Any ideas?
Ok, so traced this down a bit, and I have a SOLUTION for this assertion error.
There is a BUG in Mongo. When querying a sharded replicaset, Mongo returns an incorrect value for 'starting_from'. Instead of returning 0 on the first query, it's returning the number of records received instead of the offset value. I have a patch for pymongo to protect against this bad info:
File is site-packages/pymongo/cursor.py.
[user#hostname]$ diff cursor.py.orig cursor.py
631,632c631,634
< if not self.__tailable:
< assert response["starting_from"] == self.__retrieved
---
> if ((not self.__tailable) and (self.__retrieved != 0) and (response["starting_from"] != self.__retrieved)):
> from pprint import pformat
> msg = "Server response of 'starting_from' is '%s', but self__retrieved (which is only set to nonzero below here) is '%s'." % (pformat(response), pformat(self.__retrieved))
> assert False, msg
The 'starting_from' comes from helpers.py decoding the response from Mongo:
result["starting_from"] = struct.unpack("<i", response[12:16])[0]
So, it's the 12th thru the 15th byte of Mongo's response.
This is a bug in the 2.1.1 development release of mongos. See https://jira.mongodb.org/browse/SERVER-5844