I am using the following script on a raspberryPi, because the pi is slow it requires quite a large delay compared to using it on my mac. I was wondering if anyone can help improve any of the functions to help speed it up a bit.
import serial
import time
port = '/dev/ttyUSB0'
ser = serial.Serial(port , baudrate=9600, bytesize=8, parity=serial.PARITY_NONE, stopbits=1, timeout=0)
romIDlist = []
romIDrevList = []
delay = .6
def sensors():
#issue a device search (f) to get romID & call the function to get the romID
x = ser.write('\\f28')
result = romIDprocess(x)
#append the first romID founf to the romID list
romIDlist.append(result)
#search for next ROMID
while result.find('+')!= -1:
#issue a next device search (n) to get additional devices
id = ser.write('n')
#call the function to get the romID
result = romIDprocess(id)
#append the romID to the romID list
romIDlist.append(result)
sensorList = reverseList(romIDlist)
print sensorList
ser.write("r")
time.sleep(delay)
ser.close()
#pass the romIDlist to method reverseList for reversal of the romID's and return it as the sensorList
def romIDprocess(result):
time.sleep(delay)
#put the romID that was found into a list
x = list(ser.read(ser.inWaiting()))
#join the items in the list together
romID = ''.join(x[0:18])
return romID
#reverse the romid to put it in the correct order
def reverseList(romIDlist):
for i in romIDlist:
#change each romID into a list
id = list(i[2:18]) #deletes the + & -
#separate the list by two's for the octets
id = [i+j for i,j in zip(id[::2],id[1::2])]
#reverse the romID
id.reverse()
#join the items in the list together
idRev = ''.join(id[0:18])
#put the romID into a list
romIDrevList.append(idRev)
return romIDrevList
sensors()
Related
Need to write a GPA calculator using the provided dictionary to output the gpa based on the 4 arguments of letter grades. I can get the code to run in google colab or other IDEs, but I get no output in CL. Can someone point me to what I am missing?
'''
import sys
#print (sys.argv[1])
#print (sys.argv[2])
#print (sys.argv[3])
#print (sys.argv[4])
def gpa_calculator():
grades = [sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]]
grades_upper = [each_string.upper() for each_string in grades]
points = 0
grade_chart = {'A':4.0, 'A-':3.66, 'B+':3.33, 'B':3.0, 'B-':2.66,
'C+':2.33, 'C':2.0, 'C-':1.66, 'D+':1.33, 'D':1.00, 'D-':.66, 'F':0.00}
for grade in grades_upper:
points += grade_chart[grade]
gpa = points / len(grades)
rounded_gpa = round(gpa,2)
return rounded_gpa
print (rounded_gpa)
gpa_calculator()'''
return rounded_gpa
print (rounded_gpa)
You seem to be returning the value from the function before you reach the print statement. I'm guessing the value is returned correctly, but you don't do anything with the return value when calling the function, so nothing is output to the screen.
You should move the print(...) call above the return statement, or print out the result when calling the function:
print(gpa_calculate())
That's because you return first before print.
In jupyter notebook like google colab, each cell will print anything the last line returns (if any). That's why in such environment you get output.
Corrected code:
import sys
#print (sys.argv[1])
#print (sys.argv[2])
#print (sys.argv[3])
#print (sys.argv[4])
def gpa_calculator():
grades = [sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]]
grades_upper = [each_string.upper() for each_string in grades]
points = 0
grade_chart = {'A':4.0, 'A-':3.66, 'B+':3.33, 'B':3.0, 'B-':2.66,
'C+':2.33, 'C':2.0, 'C-':1.66, 'D+':1.33, 'D':1.00, 'D-':.66, 'F':0.00}
for grade in grades_upper:
points += grade_chart[grade]
gpa = points / len(grades)
rounded_gpa = round(gpa,2)
print(rounded_gpa)
return rounded_gpa
gpa_calculator()
output:
C:\Users\XXXXX\Desktop>python3 a.py A B C D
2.5
C:\Users\XXXXX\Desktop>python3 a.py A A A A
4.0
I am trying to follow a tutorial on how to make a deeplearning chatbot with pytorch. However, this code is quite complex for me and it has stopped with a "IndexError: list index out of range". I looked the error up and get the gist of what it usually means, but seeing as this code is very complex for me I can't figure out how to solve the error.
this is the source tutorial: [https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/chatbot_tutorial.ipynb#scrollTo=LTzdbPF-OBL9][1]
Line 198 seems to be causing the error
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
This is the error log
Start preparing training data ...
Reading lines...
Traceback (most recent call last):
File "D:\Documents\Python\python pycharm files\pythonProject4\3.9 Chatbot.py", line 221, in <module>
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
File "D:\Documents\Python\python pycharm files\pythonProject4\3.9 Chatbot.py", line 209, in loadPrepareData
pairs = filterPairs(pairs)
File "D:\Documents\Python\python pycharm files\pythonProject4\3.9 Chatbot.py", line 202, in filterPairs
return [pair for pair in pairs if filterPair(pair)]
File "D:\Documents\Python\python pycharm files\pythonProject4\3.9 Chatbot.py", line 202, in <listcomp>
return [pair for pair in pairs if filterPair(pair)]
File "D:\Documents\Python\python pycharm files\pythonProject4\3.9 Chatbot.py", line 198, in filterPair
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
IndexError: list index out of range
Read 442563 sentence pairs
Process finished with exit code 1
And this is my code copied from my pycharm up to the block with the error. Seeing as its a huge code I could not copy the entire code. The rest of the code can be found in the github source link above.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
corpus_name = "cornell movie-dialogs corpus"
corpus = os.path.join("D:\Documents\Python\intents", corpus_name)
def printLines(file, n=10):
with open(file, 'rb') as datafile:
lines = datafile.readlines()
for line in lines[:n]:
print(line)
printLines(os.path.join(corpus, "movie_lines.txt"))
# Splits each line of the file into a dictionary of fields
def loadLines(fileName, fields):
lines = {}
with open(fileName, 'r', encoding='iso-8859-1') as f:
for line in f:
values = line.split(" +++$+++ ")
# Extract fields
lineObj = {}
for i, field in enumerate(fields):
lineObj[field] = values[i]
lines[lineObj['lineID']] = lineObj
return lines
# Groups fields of lines from `loadLines` into conversations based on *movie_conversations.txt*
def loadConversations(fileName, lines, fields):
conversations = []
with open(fileName, 'r', encoding='iso-8859-1') as f:
for line in f:
values = line.split(" +++$+++ ")
# Extract fields
convObj = {}
for i, field in enumerate(fields):
convObj[field] = values[i]
# Convert string to list (convObj["utteranceIDs"] == "['L598485', 'L598486', ...]")
lineIds = eval(convObj["utteranceIDs"])
# Reassemble lines
convObj["lines"] = []
for lineId in lineIds:
convObj["lines"].append(lines[lineId])
conversations.append(convObj)
return conversations
# Extracts pairs of sentences from conversations
def extractSentencePairs(conversations):
qa_pairs = []
for conversation in conversations:
# Iterate over all the lines of the conversation
for i in range(len(conversation["lines"]) - 1): # We ignore the last line (no answer for it)
inputLine = conversation["lines"][i]["text"].strip()
targetLine = conversation["lines"][i+1]["text"].strip()
# Filter wrong samples (if one of the lists is empty)
if inputLine and targetLine:
qa_pairs.append([inputLine, targetLine])
return qa_pairs
# Define path to new file
datafile = os.path.join(corpus, "formatted_movie_lines.txt")
delimiter = '\t'
# Unescape the delimiter
delimiter = str(codecs.decode(delimiter, "unicode_escape"))
# Initialize lines dict, conversations list, and field ids
lines = {}
conversations = []
MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]
# Load lines and process conversations
print("\nProcessing corpus...")
lines = loadLines(os.path.join(corpus, "movie_lines.txt"), MOVIE_LINES_FIELDS)
print("\nLoading conversations...")
conversations = loadConversations(os.path.join(corpus, "movie_conversations.txt"),
lines, MOVIE_CONVERSATIONS_FIELDS)
# Write new csv file
print("\nWriting newly formatted file...")
with open(datafile, 'w', encoding='utf-8') as outputfile:
writer = csv.writer(outputfile, delimiter=delimiter)
for pair in extractSentencePairs(conversations):
writer.writerow(pair)
# Print a sample of lines
print("\nSample lines from file:")
printLines(datafile)
# Default word tokens
PAD_token = 0 # Used for padding short sentences
SOS_token = 1 # Start-of-sentence token
EOS_token = 2 # End-of-sentence token
class Voc:
def __init__(self, name):
self.name = name
self.trimmed = False
self.word2index = {}
self.word2count = {}
self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
self.num_words = 3 # Count SOS, EOS, PAD
def addSentence(self, sentence):
for word in sentence.split(' '):
self.addWord(word)
def addWord(self, word):
if word not in self.word2index:
self.word2index[word] = self.num_words
self.word2count[word] = 1
self.index2word[self.num_words] = word
self.num_words += 1
else:
self.word2count[word] += 1
# Remove words below a certain count threshold
def trim(self, min_count):
if self.trimmed:
return
self.trimmed = True
keep_words = []
for k, v in self.word2count.items():
if v >= min_count:
keep_words.append(k)
print('keep_words {} / {} = {:.4f}'.format(
len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
))
# Reinitialize dictionaries
self.word2index = {}
self.word2count = {}
self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
self.num_words = 3 # Count default tokens
for word in keep_words:
self.addWord(word)
MAX_LENGTH = 10 # Maximum sentence length to consider
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
return ''.join(
c for c in unicodedata.normalize('NFD', s)
if unicodedata.category(c) != 'Mn'
)
# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
s = unicodeToAscii(s.lower().strip())
s = re.sub(r"([.!?])", r" \1", s)
s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
s = re.sub(r"\s+", r" ", s).strip()
return s
# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
print("Reading lines...")
# Read the file and split into lines
lines = open(datafile, encoding='utf-8').\
read().strip().split('\n')
# Split every line into pairs and normalize
pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
voc = Voc(corpus_name)
return voc, pairs
# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
# Input sequences need to preserve the last word for EOS token
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
# Filter pairs using filterPair condition
def filterPairs(pairs):
return [pair for pair in pairs if filterPair(pair)]
# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
print("Start preparing training data ...")
voc, pairs = readVocs(datafile, corpus_name)
print("Read {!s} sentence pairs".format(len(pairs)))
pairs = filterPairs(pairs)
print("Trimmed to {!s} sentence pairs".format(len(pairs)))
print("Counting words...")
for pair in pairs:
voc.addSentence(pair[0])
voc.addSentence(pair[1])
print("Counted words:", voc.num_words)
return voc, pairs
# Load/Assemble voc and pairs
save_dir = os.path.join("data", "save")
voc, pairs = loadPrepareData(corpus, corpus_name, datafile, save_dir)
# Print some pairs to validate
print("\npairs:")
for pair in pairs[:10]:
print(pair)
MIN_COUNT = 3 # Minimum word count threshold for trimming
I really hope someone can help me fix this problem and help me understand why it happens.
In the end I changed
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
to
try:
return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
except:
return False
And now the code seems to be working.
Good Day
I was testing the functionality of a text summarization code published on the website: https://towardsdatascience.com/understand-text-summarization-and-create-your-own-summarizer-in-python-b26a9f09fc70.
The problem is that, when I call the function on a text file, the 'cp949' codec can't decode byte 0xe2 in position 205: illegal multibyte sequence error appears. I know, from other posts, that it is an error related to the encoding type of the file. Therefore, I changed the encoding type of the test2.txt file to UTF-8 (saving the file in Plain text format, then choosing UTF-8 on Text Encoding > Other Encoding), but I still get this error message.
Here is the code that I wrote:
Import libraries
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
test_text_word = "test2.txt"
def read_article(test_text_word):
file = open(test_text_word, "r")
filedata = file.readlines()
article = filedata[0].split(". ")
sentences = []`
for sentence in article:
print(sentence)
sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
sentences.pop()
return sentences
def sentence_similarity(sent1, sent2, stopwords=None):
if stopwords is None:
stopwords = []
sent1 = [w.lower() for w in sent1]
sent2 = [w.lower() for w in sent2]
all_words = list(set(sent1 + sent2))
vector1 = [0] * len(all_words)
vector2 = [0] * len(all_words)
# build the vector for the first sentence
for w in sent1:
if w in stopwords:
continue
vector1[all_words.index(w)] += 1
# build the vector for the second sentence
for w in sent2:
if w in stopwords:
continue
vector2[all_words.index(w)] += 1
return 1 - cosine_distance(vector1, vector2)
def build_similarity_matrix(sentences, stop_words):
# Create an empty similarity matrix
similarity_matrix = np.zeros((len(sentences), len(sentences)))
for idx1 in range(len(sentences)):
for idx2 in range(len(sentences)):
if idx1 == idx2: #ignore if both are same sentences
continue
similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
return similarity_matrix
def generate_summary(test_text_word, top_n=5):
stop_words = stopwords.words('english')
summarize_text = []
# Step 1 - Read text anc split it
sentences = read_article(test_text_word)
# Step 2 - Generate Similary Martix across sentences
sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
scores = nx.pagerank(sentence_similarity_graph)
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
print("Indexes of top ranked_sentence order are ", ranked_sentence)
for i in range(top_n):
summarize_text.append(" ".join(ranked_sentence[i][1]))
# Step 5 - Offcourse, output the summarize texr
print("Summarize Text: \n", ". ".join(summarize_text))
The problem is that, when I run the code, with the following command:
generate_summary("test2.txt", 2)
I receive this error message: 'cp949' codec can't decode byte 0xe2 in position 205: illegal multibyte sequence
Should I change something in the code?
Thanks for your support.
I need to find the next neighbor using the euklid-distance-algorithm.
Given are two hashes with each 100 elements in this format:
$hash{$i}{price}
$hash{$i}{height}
I need to compare each hash elements with each other so that my result is a 100 x 100 matrix.
After that my matrix shoul be sorted after the following rules:
$hash{$i,$j} {lowest_euklid_distance}
$hash{$i,$j+1}{lowest_euklid_distance + 1}
$hash{$i,$j+2}{lowest_euklid_distance + 2}
.
.
$hash{$i+1,$j}{lowest_euklid_distance}
$hash{$i+1,$j}{lowest_euklid_distance + 2}
.
.
$hash{$i+n,$j+n}{lowest_euklid_distance+n}
My problem is to sort these elements properly.
Any adivices?
Thanks in advance.
/edit: adding more information:
I create the distance with the following subroutine:
sub euklid_distance{
#w1 = price_testdata
#w2 = height_testdata
#h1 = price_origindata
#h2 = height_origindata
my $w1 = trim($_[0]);
my $w2 = trim($_[1]);
my $h1 = trim($_[2]);
my $h2 = trim($_[3]);
my $result = (((($w2-$w1)**2)+(($h2-$h1)**2))**(1/2));
return $result;
}
I fetch the test and the origindata from two seperate lists.
The resulthash with the 100x100 matrix is created by the following code:
my %distancehash;
my $countvar=0;
for (my $j=0;$j<100;$j++){
for (my $i=0;$i<100;$i++){
$distancehash{$countvar}{distance} = euklid_distance( $origindata{$i}{price}, $testdata{$j}{price}, $origindata{$i}{height}, $testdata{$j}{height} );
$distancehash{$countvar}{originPrice} = $origindata{$i}{price};
$distancehash{$countvar}{originHeight} = $origindata{$i}{height};
$distancehash{$countvar}{testPrice} = $testdata{$j}{price};
$distancehash{$countvar}{testHeight} = $testdata{$j}{height};
$countvar++;
}
}
where $j goes over the testdata and $i over the origindata.
My goal is to have a new hash which is sorted by the lowest distance from the current $j ascending to the highest.
Trying to return the category of input data. Training data is 'data.csv' which is 3500000 rows of sentence and its class.
import nltk
from nltk.stem.lancaster import LancasterStemmer
import os
import csv
import json
import datetime
stemmer = LancasterStemmer()
training_data = []
with open('data.csv') as f:
training_data = [{k: str(v) for k, v in row.items()}
for row in csv.DictReader(f, skipinitialspace=True)]
words = []
classes = []
documents = []
ignore_words = ['?','.','_','-'] #words to be ignored in input data file
for pattern in training_data:
w = nltk.word_tokenize(pattern['sentence'])
words.extend(w)
documents.append((w, pattern['class']))
if pattern['class'] not in classes:
classes.append(pattern['class'])
words = [stemmer.stem(a.lower()) for a in words if a not in ignore_words]
words = list(set(words)) #remove duplicates
classes = list(set(classes))
create our training data
training = []
output = []
output_empty = [0] * len(classes)
for doc in documents:
# initialize our bag of words
bag = []
# list of tokenized words for the pattern
pattern_words = doc[0]
# stem each word
pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
for w in words:
bag.append(1) if w in pattern_words else bag.append(0)
training.append(bag)
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
output.append(output_row)
import numpy as np
import time
def sigmoid(x):
output = 1/(1+np.exp(-x))
return output
def sigmoid_output_to_derivative(output):
return output*(1-output)
def clean_up_sentence(sentence):
sentence_words = nltk.word_tokenize(sentence)
sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
return sentence_words
def bow(sentence, words, show_details=False):
# tokenize the pattern
sentence_words = clean_up_sentence(sentence)
# bag of words
bag = [0]*len(words)
for s in sentence_words:
for i,w in enumerate(words):
if w == s:
bag[i] = 1
return(np.array(bag))
returns the calculated value of the output after multiplying with the sigmoids
def think(sentence, show_details=False):
x = bow(sentence.lower(), words, show_details)
# input layer is our bag of words
l0 = x
# matrix multiplication of input and hidden layer
l1 = sigmoid(np.dot(l0, synapse_0))
# output layer
l2 = sigmoid(np.dot(l1, synapse_1))
return l2