Traffic Sign detection and Recognition using Neural networks - neural-network

I wanted to detect and recognize traffic signs from a video feed. I used the Tensorflow ML framework for recognition of signs and used haar classifier for detection of signs.
Here is the code:
import cv2
import numpy as np
import tensorflow as tf
import os,time
import threading
# constants
IMAGE_SIZE = 200.0
MATCH_THRESHOLD = 3
def SignRecognizer():
#to neglect all tensorflow compilation warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
#path to the blob
image_path='/root/Desktop/blob.jpg'
#read the image data
image_data = tf.gfile.FastGFile(image_path,'rb').read()
#load label file,strip off carriage return \n
label_lines= [line.rstrip() for line in tf.gfile.GFile("/root/Desktop/another_model/retrained_labels.txt")]
#unpersists graph from file
with tf.gfile.FastGFile("/root/Desktop/another_model/retrained_graph.pb",'rb') as f:
graph_def=tf.GraphDef()
graph_def.ParseFromString(f.read())
_=tf.import_graph_def(graph_def,name='')
with tf.Session() as sess:
#feed the image_data as input to the graph and get the first prediction
softmax_tensor=sess.graph.get_tensor_by_name("final_result:0")
predictions = sess.run(softmax_tensor,\
{'DecodeJpeg/contents:0':image_data})
#sort to show labels of first prediction in order of confidence
top_k=predictions[0].argsort()[-len(predictions[0]):][::-1]
for node_id in top_k:
human_string=label_lines[node_id]
print("%s"%(human_string))
break
roundabout_cascade = cv2.CascadeClassifier("/root/Desktop/tsp/haarcascade_roundabout.xml")
videocapture = cv2.VideoCapture(0)
scale_factor=1.3
while 1:
ret,pic = videocapture.read()
# do roundabout detection on street image
gray = cv2.cvtColor(pic,cv2.COLOR_RGB2GRAY)
signs = roundabout_cascade.detectMultiScale(pic,scaleFactor=1.4,minNeighbors=6)
# initialize ORB and BFMatcher
orb = cv2.ORB_create()
bf = cv2.BFMatcher(cv2.NORM_HAMMING,crossCheck=True)
# find the keypoints and descriptors for roadsign image
roadsign = cv2.imread("/root/Desktop/tsp/roundabout.jpg",0)
kp_r,des_r = orb.detectAndCompute(roadsign,None)
for (x,y,w,h) in signs:
#cv2.rectangle(pic,(x,y),(x+w,y+h),(255,0,0),2)
# obtain object from street image
obj = gray[y:y+h,x:x+w]
color_image=pic[y:y+h,x:x+w]
cv2.imwrite("/root/Desktop/blob.jpg",color_image)
cv2.imshow('blob', color_image)
#start a new thread and run SignRecognizer on it
t=threading.Thread(name="SignRecognizer",target=SignRecognizer)
#set the thread as a daemon to prevent blocking of the main program
t.setDaemon(True)
t.start()
ratio = IMAGE_SIZE / obj.shape[1]
obj = cv2.resize(obj,(int(IMAGE_SIZE),int(obj.shape[0]*ratio)))
# find the keypoints and descriptors for object
kp_o, des_o = orb.detectAndCompute(obj,None)
if len(kp_o) == 0 or des_o == None:
continue
# match descriptors
matches = bf.match(des_r,des_o)
# draw object on street image, if threshold met
if(len(matches) >= MATCH_THRESHOLD):
cv2.rectangle(pic,(x,y),(x+w,y+h),(255,0,0),2)
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(pic,'Roundabout sign',(x,y),font,1,(255,255,255),1,cv2.LINE_AA)
cv2.imshow('roundabout_signs',pic)
k = cv2.waitKey(30) & 0xFF
if k==2:
break
cv2.waitKey(0)
cv2.destroyAllWindows()
The SignRecognizer function reads the blob image file and recognizes the sign using the model I created using tensorflow ML Framework.
I used VideoCapture(0) to start the webcam and simulate a live video feed.
I also used OpenCV's ORB ( Oriented FAST and rotated BRIEF) to remove false positives.
I used threading module to run the SignRecognizer on another thread and set it as a daemon so that the main pgm. wasn't blocked during recognition.
Everything works great but there seems to be a little lag inspite of using threading module.Is there any way to make it lag free?

Related

nearest building with open street map

I have a csv of relevant points with latitude and longitude and trying to get the nearest
building data to each point and add a column to the csv (or panda) in python. Tried using Pyrosm and various libraries but can't seem to prune the data to get the nearest building and then add the data. Thanks
This is what I have
from pyrosm import OSM
from pyrosm import get_data
import geopandas as gpd
from sklearn.neighbors import BallTree
import numpy as np
import osmnx as ox
# get rid of weird error
import shapely
import warnings
from shapely.errors import ShapelyDeprecationWarning
import csv
def get_gig_data(csv_fname):
with open(csv_fname, "r", encoding="latin-1") as gig_records:
for gig_record in csv.reader(gig_records):
yield gig_record
def main():
warnings.filterwarnings("ignore", category=ShapelyDeprecationWarning)
chicago_osm = OSM(get_data("chicago"))
#get a Point of Interest GeoDataFrame
points_of_interest = chicago_osm.get_pois() #can use a custom filter if we want to filter the types, but I think no filter might be the best
# get buildings nodes and edges
nodes, edges = chicago_osm.get_network(nodes=True, network_type="walking")
buildings = chicago_osm.get_buildings()
b_cnt = len(buildings)
G = chicago_osm.to_graph(nodes, edges)
#nodes = get_igraph_nodes(G)
buildings['geometry'] = buildings.centroid
# poi_list = np.asarray([ point.coords for point in points_of_interest['geometry'] ]) #if point.geom_type == point])
#print(poi_list.shape)
#tree = BallTree( np.asarray([ point.coords for point in points_of_interest['geometry'] if point.geom_type == point]), metric="manhattan") #Note: the scipy implementation of manhattan/cityblock distance might be faster according to the internet bc it uses a C function
#Read in the gig work data - I think the best way to do this will probably be with the CSV.reader with open thing because it will go line by line and save a ton of memory
'''for i in points_of_interest:
print('Type: ', type(i) , ' ',i)'''
gig_fp = "data_sample.csv"
#gig_data = gpd.read_file(gig_fp)
iter_gig = iter(get_gig_data(gig_fp))
next(iter_gig)
ids=dict()
for building in buildings.iterrows():
#print(type(building[1][32]) , ' ', building[1][32])
#tup = tuple(float(x) for x in [trip[17][8:-1].split()])
ids[building[1][32]] = building
#make the tree that determines closest POI
#if we use the CSV reader this for loop will be done already
for trip in iter_gig:
# Using generator so this should be efficient memory wise.
tup = tuple([float(x) for x in trip[17][8:-1].split() ])
print(type(tup), ' ', tup)
src_ids,euclidean_distance=ox.distance.nearest_nodes(G,tup)
src_ids, euclidean_distance= ox.distance.nearest_nodes(G,tup)
# find nearest node
#THEN ADD THE PICKUP AND DROPOFF IDS TO THIS TUPLE AND ADD TO A NEW NP ARRAY
if __name__ == '__main__':
main()

How to know that the token ids in a gensim pre-trained word2vec will match the ids of a tokenizer's vocabulary

I am building a pytorch BiLSTM that utilizes pre-trained gensim word2vec. I first used a nn.Embedding layer that was trained with the model from scratch but, i decided to use a pre-trained word2vec embeddings to improve accuracy.
My model architecture follows a simple BiLSTM architecture, where the first layer is the embedding layer followed by a BiLSTM layer(s), and lastly two feed forward layers.
import torch
import gensim
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
word2vec = gensim.models.Word2Vec.load('path_to_word2vec/wikipedia_cbow_100')
weights = torch.FloatTensor(word2vec.wv.vectors)
class BiLSTM_model(torch.nn.Module) :
def __init__(self, max_features, embedding_dim, hidden_dim, num_layers, lstm_dropout) :
# max_features is the vocabulary size (num of tokens/words).
super().__init__()
# self.embeddings = nn.Embedding(max_features, embedding_dim, padding_idx=0)
self.embeddings = nn.Embedding.from_pretrained(weights)
self.lstm = nn.LSTM(word2vec.wv.vector_size,
hidden_dim,
batch_first=True,
bidirectional=True,
num_layers = num_layers,
dropout=lstm_dropout)
self.relu=nn.ReLU()
self.fc1 = nn.Linear(hidden_dim * 2, 64)
self.dropout = nn.Dropout(0.2)
self.fc2 = nn.Linear(64, config['num_classes'])
def forward(self, input):
embeddings_out = self.embeddings(input)
lstm_out, (hidden, cell) = self.lstm(embeddings_out)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
rel = self.relu(hidden)
dense1 = self.fc1(rel)
drop = self.dropout(dense1)
final_out = self.fc2(drop)
return final_out
i use a keras tokenizer to tokenize the text and obtain the token ids.
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
## Tokenize the sentences
tokenizer = Tokenizer(num_words=config['max_features'])
tokenizer.fit_on_texts(list(train_X))
train_X = tokenizer.texts_to_sequences(train_X)
test_X = tokenizer.texts_to_sequences(test_X)
finally i use a standard training loop with an optimizer and a loss function. The code runs fine but there are no performance gains from using the pre-trained embeddings.
I suspect that it has to do with token ids not matching between the keras.preprocessing.text tokenizer and the gensim pre-trained embeddings for the words. My question is, how do i confirm (or deny) this inconsistency and ,if it is the case, how do i handle the issue?
Note: i am using a custom word2vec embeddings for the Arabic language. You can find the embeddings here.
After looking into jhso's comment. It seems that the solution for this problem is to use word2vec.wv.index2word which will return the vocabulary (words) as a list sorted in an order which reflects a word's embedding.
for example, the following code:
pretrained_embedding = gensim.models.Word2Vec.load('path/to/embedding')
word_vectors= pretrained_embedding.wv
for i in range (0,3):
print(f"{i}: '{word_vectors.index2word[i]}'")
will print:
0: 'this'
1: 'is'
2: 'an'
3: 'example'
where this token will have the id 0 and so on.
You then use word2vec.wv.index2word as input to the keras.preprocessing.text.Tokenizer object's .fit_on_texts() method as following:
vocabulary = pretrained_embeddings.index2word
tokenizer = Tokenizer(num_words=config['max_features'])
tokenizer.fit_on_texts(vocabulary)
this should preserve the token ids between the gensim word2vec model and the keras tokenizer.

Image classification using tensorflow lite without Google Coral USB

I am trying to evaluate a Raspberry Pi performance with a Google Goral Edge TPU USB device and without it for an image classification task on a video file. I have managed to evaluate the peformance using the Edge TPU USB device already. However, when I try running a tensorflow lite code to run inference it gets me an error that tells me I need to plugin the device:
ValueError: Failed to load delegate from libedgetpu.so.1
What I am doing specifically is running inference on a video using the coral device and saving every frame in the video to benchmark the hardware.
import argparse
import time
import cv2
import numpy as np
from pycoral.adapters import classify, common
from pycoral.utils.dataset import read_label_file
from pycoral.utils.edgetpu import make_interpreter
from utils import visualization as visual
WINDOW_NAME = "Edge TPU Image classification"
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", help="File path of Tflite model.", required=True)
parser.add_argument("--label", help="File path of label file.", required=True)
parser.add_argument("--top_k", help="keep top k candidates.", default=2, type=int)
parser.add_argument("--threshold", help="Score threshold.", default=0.0, type=float)
parser.add_argument("--width", help="Resolution width.", default=640, type=int)
parser.add_argument("--height", help="Resolution height.", default=480, type=int)
parser.add_argument("--videopath", help="File path of Videofile.", default="")
args = parser.parse_args()
# Initialize window.
cv2.namedWindow(WINDOW_NAME)
cv2.moveWindow(WINDOW_NAME, 100, 200)
# Initialize engine and load labels.
count = 0
interpreter = make_interpreter(args.model)
interpreter.allocate_tensors()
labels = read_label_file(args.label) if args.label else None
elapsed_list = []
cap = cv2.VideoCapture('/home/pi/coral-usb/pycoral/test_data/video.mkv)
while cap.isOpened():
_, frame = cap.read()
im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imwrite("/home/pi/Desktop/frames/frame_%d.jpeg" % count, frame)
print('gravou o frame_%d'% count, frame)
cv2.imshow('Frame', frame)
cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Run inference.
start = time.perf_counter()
_, scale = common.set_resized_input(
interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)
)
interpreter.invoke()
# Check result.
results = classify.get_classes(interpreter, args.top_k, args.threshold)
elapsed_ms = (time.perf_counter() - start) * 1000
if results:
for i in range(len(results)):
label = "{0} ({1:.2f})".format(labels[results[i][0]], results[i][1])
pos = 60 + (i * 30)
visual.draw_caption(frame, (10, pos), label)
# display
cv2.imshow(WINDOW_NAME, frame)
if cv2.waitKey(10) & 0xFF == ord("q"):
break
This code is used to run inference with coral device. I would like to know how can I do the same thing but without coral? I would like to test the differences between using my model with and without the edge tpu usb device.
Lastly, I have tried Image classification from this link using tensorflow lite. However, I am getting the following error:
RuntimeError: Encountered unresolved custom op: edgetpu-custom-op.Node
number 0 (edgetpu-custom-op) failed to prepare.
I recently came into this for a thesis supervision. We tested face detection in a raspberry pi 4 with Coral USB an without (inference on rpi CPU). Are you using the same model file for both? If this is the case, then this is the problem.
You need to use the bare tflite model for the CPU inference and the TPU-compiled model for the inference with TPU.
You can take a look at this repo where you can find the code I was mentioned before (it's not well documented but it's working, look at the inference CPU and inference CORAL files).

Saving a trained Detectron2 model and making predictions on a single image

I am new to detectron2 and this is my first project. After reading the docs and using the tutorials as a guide, I trained my model on the custom dataset and performed the evaluation.
I would now like to make predictions on images I receive via an API by loading this saved model. I could not find any reading materials that could help me with this task.
To save my model, I have used this link as a reference - https://detectron2.readthedocs.io/en/latest/tutorials/models.html
I am able to save my trained model using the following code-
from detectron2.modeling import build_model
model = build_model(cfg) # returns a torch.nn.Module
from detectron2.checkpoint import DetectionCheckpointer
checkpointer = DetectionCheckpointer(model, save_dir="output")
checkpointer.save("model_final") # save to output/model_final.pth
But I am still confused as to how I can go about implementing what I want. I could use some guidance on what my next steps should be. Would be extremely grateful to anyone who can help.
for a single image, create a list of data. Put image path in the file_name as below:
test_data = [{'file_name': '.../image_1jpg',
'image_id': 10}]
Then do run the following:
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.data import MetadataCatalog
from detectron2.utils.visualizer import Visualizer, ColorMode
import matplotlib.pyplot as plt
import cv2.cv2 as cv2
test_data = [{'file_name': '.../image_1jpg',
'image_id': 10}]
cfg = get_cfg()
cfg.merge_from_file("model config")
cfg.MODEL.WEIGHTS = "model_final.pth" # path for final model
predictor = DefaultPredictor(cfg)
im = cv2.imread(test_data[0]["file_name"])
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1],
metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
scale=0.5,
instance_mode=ColorMode.IMAGE_BW)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
img = cv2.cvtColor(out.get_image()[:, :, ::-1], cv2.COLOR_RGBA2RGB)
plt.imshow(img)
This will show the prediction for the single image

LSTM neural network with two sources of data

I have the following configuration: One lstm network that receives a text with n-grams with size 2. Below a simple schematic:
After some tests, I noticed that for some classes I have an significant incrise on accuracy when I use ngrams with size 3. Now I want to train a new LSTM neural network with both ngram sizes at same time, like the following schematic:
How can I provide the data and build this model, using keras to perform this task?
I assume you already have a function to split words into n-grams, as you already have the 2-grams and 3-grams model working? Therefor I just construct a one-sample example of the word "cool" for a working example. I had to use embedding for my example, as an LSTM layer with 26^3=17576 nodes was a little too much for my computer to handle. I expect you did the same in your 3-grams code?
Below is a complete working example:
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, concatenate
from tensorflow.keras.models import Model
import numpy as np
# c->2 o->14 o->14 l->11
np_2_gram_in = np.array([[26*2+14,26*14+14,26*14+11]])#co,oo,ol
np_3_gram_in = np.array([[26**2*2+26*14+14,26**2*14+26*14+26*11]])#coo,ool
np_output = np.array([[1]])
output_shape=1
lstm_2_gram_embedding = 128
lstm_3_gram_embedding = 192
inputs_2_gram = Input(shape=(None,))
em_input_2_gram = Embedding(output_dim=lstm_2_gram_embedding, input_dim=26**2)(inputs_2_gram)
lstm_2_gram = LSTM(lstm_2_gram_embedding)(em_input_2_gram)
inputs_3_gram = Input(shape=(None,))
em_input_3_gram = Embedding(output_dim=lstm_3_gram_embedding, input_dim=26**3)(inputs_3_gram)
lstm_3_gram = LSTM(lstm_3_gram_embedding)(em_input_3_gram)
concat = concatenate([lstm_2_gram, lstm_3_gram])
output = Dense(output_shape,activation='sigmoid')(concat)
model = Model(inputs=[inputs_2_gram, inputs_3_gram], outputs=[output])
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit([np_2_gram_in, np_3_gram_in], [np_output], epochs=5)
model.predict([np_2_gram_in,np_3_gram_in])