Naive Bayes classification technique algorithm - naivebayes

I found a code online for Naive bayes classification for a small research I am doing. The code I am using is showing some errors and cannot find the solution for them. I would greatly appreciate your help.
The code is below:
# Example of Naive Bayes implemented from Scratch in Python
import csv
import random
import math
def loadCsv(filename):
lines = csv.reader(open(filename, "rt"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = [float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset, splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
copy = list(dataset)
while len(trainSet) < trainSize:
index = random.randrange(len(copy))
trainSet.append(copy.pop(index))
return [trainSet, copy]
def separateByClass(dataset):
separated = {}
for i in range(len(dataset)):
vector = dataset[i]
if (vector[-1] not in separated):
separated[vector[-1]] = []
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers) / float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum([pow(x - avg, 2) for x in numbers]) / float(len(numbers) - 1)
return math.sqrt(variance)
def summarize(dataset):
summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated = separateByClass(dataset)
summaries = {}
for classValue, instances in separated.items():
summaries[classValue] = summarize(instances)
return summaries
def calculateProbability(x, mean, stdev):
exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def calculateClassProbabilities(summaries, inputVector):
probabilities = {}
for classValue, classSummaries in summaries.items():
probabilities[classValue] = 1
for i in range(len(classSummaries)):
mean, stdev = classSummaries[i]
x = inputVector[i]
probabilities[classValue] *= calculateProbability(x, mean, stdev)
return probabilities
def predict(summaries, inputVector):
probabilities = calculateClassProbabilities(summaries, inputVector)
bestLabel, bestProb = None, -1
for classValue, probability in probabilities.items():
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classValue
return bestLabel
def getPredictions(summaries, testSet):
predictions = []
for i in range(len(testSet)):
result = predict(summaries, testSet[i])
predictions.append(result)
return predictions
def getAccuracy(testSet, predictions):
correct = 0
for i in range(len(testSet)):
if testSet[i][-1] == predictions[i]:
correct += 1
return (correct / float(len(testSet))) * 100.0
def main():
filename = 'E:\iris.data.csv'
splitRatio = 0.67
dataset = loadCsv(filename)
trainingSet, testSet = splitDataset(dataset, splitRatio)
print(('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet)))
# prepare model
summaries = summarizeByClass(trainingSet)
# test model
predictions = getPredictions(summaries, testSet)
accuracy = getAccuracy(testSet, predictions)
print(('Accuracy: {0}%').format(accuracy))
main()
The traceback for the same is below:
File "<ipython-input-18-4397d9969e66>", line 1, in <module>
runfile('C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py', wdir='C:/Users/Lenovo/Desktop/EE Codes')
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
execfile(filename, namespace)
File "C:\Users\Lenovo\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 76, in <module>
main()
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 69, in main
neighbors = getNeighbors(trainingSet, testSet[x], k)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 31, in getNeighbors
dist = euclideanDistance(testInstance, trainingSet[x], length)
File "C:/Users/Lenovo/Desktop/EE Codes/Knn with prima.py", line 24, in euclideanDistance
distance += pow((instance1[x] - instance2[x]), 2)
TypeError: unsupported operand type(s) for -: 'str' and 'str'
I would request you all to please provide a solution to how to solve this error for the respective code. If you require the dataset then please do ask. I can provide you the link for that too.
Thanks in advance

Related

IndexError target is out of bounds

I'm working on a custom dataset of images and using a Neural Net to classify them.
The data set is about 6000 images of 58 classes. But on training I keep getting a "target is out of bounds" error.
I've double checked the number of classes and image size but still get the same error.
#hyperprams
learning_rate = 5e-4
#3 for RGB values
in_channel = 3
#classes from data set
num_classes = 58
# arbitray choice
batch_size = 32
#total number of epochs used to train the model
epochs = 3
traffic_dataset = TrafficSigns(csv_file='annotations.csv',
root_directory='/Users/*****/Desktop/images/',
transform = transforms.ToTensor())
train_size = int(0.8 * len(traffic_dataset))
test_size = len(traffic_dataset) - train_size
train, test = torch.utils.data.random_split(traffic_dataset,
[train_size, test_size])
train_loader = torch.utils.data.DataLoader(train,
batch_size= batch_size,
shuffle= True,
num_workers= 4)
test_loader = torch.utils.data.DataLoader(test,
batch_size = batch_size,
shuffle= True,
num_workers= 4)
#Create a fully connected nn
class Net(nn.Module):
#use the constructor w/ arguments size of data and number of classes
def __init__(self,
input_size,
num_classes):
super(Net, self).__init__()
self.fc1 = nn.Linear(input_size, 60)
self.fc2 = nn.Linear(60, num_classes)
#define your forward step function with relu as the non-linear function of the weights
#x will be the datapassed to the model
def forward(self, x):
x=f.relu(self.fc1(x))
x = self.fc2(x)
return x
#sanity check
test = Net(2028, num_classes)
x = torch.randn(24, 2028)
print(test(x).shape)
#instantiate the class object of NN
net = Net(2028, num_classes)
criterion = nn.CrossEntropyLoss()
nn_optimizer = optim.Adam(net.parameters(),
lr = learning_rate)
#train on multiple epochs using the criterion and gradient decent algorthim estabilished above
for epoch in range(1):
for i, (data, target) in enumerate(tqdm.tqdm(train_loader)):
data = data.reshape(data.shape[0], -1)
#forward
outputs = net(data)
loss = criterion(outputs, target)
#backward propigation
nn_optimizer.zero_grad()
loss.backward()
#gradiant decent choosen
nn_optimizer.step()
Im also using a custom dataset class to import the images and labels.
My first thought was that the class is not iterating over the CSV and images correctly but I can't seem to find where they might be not matching up.
class TrafficSigns(Dataset):
#constructure will need csv file of labels images and the transform function defined above
def __init__(self,
csv_file,
root_directory,
transform = None):
self.labels = pd.read_csv(csv_file)
self.root_directory = root_directory
self.transform = transform
#returns the length
def __len__(self):
return len(self.labels)
#get data index by indes
def __getitem__(self, i):
image_path = os.path.join(self.root_directory, self.labels.iloc[i,0])
image = io.imread(image_path)
y_label = torch.tensor(int(self.labels.iloc[i, 1]))
#if statement needed since transform can be set to None
if self.transform:
image = self.transform(image)
return (image, y_label)
Any help would be awesome, thank you.
Here is the full stacktrace error that's getting thrown.
IndexError Traceback (most recent call last)
/var/folders/t_/rcfcs8g56jn7trwnsvmdyh_r0000gn/T/ipykernel_34551/1839343274.py in <module>
11 #forward
12 outputs = net(data)
---> 13 loss = criterion(outputs, target)
14 #backward propigation
15 nn_optimizer.zero_grad()
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
1148
1149 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1150 return F.cross_entropy(input, target, weight=self.weight,
1151 ignore_index=self.ignore_index, reduction=self.reduction,
1152 label_smoothing=self.label_smoothing)
~/Library/Python/3.8/lib/python/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
IndexError: Target 125 is out of bounds.
I came across same issue where I used sequential model (LSTM) for next sequence prediction. I check data loader where labels contained -1 because of which cross entropy loss throwing exception. here is my sequence chunks where model found -1 sequence as label in data loader:
Solved please check your null rows and remove those or set accordingly.

Name of Modules to compute sparsity

I'm writing a function that computes the sparsity of the weight matrices of the following fully connected network:
class FCN(nn.Module):
def __init__(self):
super(FCN, self).__init__()
self.fc1 = nn.Linear(input_dim, hidden_dim)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(hidden_dim, hidden_dim)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.relu2(out)
out = self.fc3(out)
out = self.relu3(out)
out = self.fc4(out)
return out
The function I have written is the following:
def print_layer_sparsity(model):
for name,module in model.named_modules():
if 'fc' in name:
zeros = 100. * float(torch.sum(model.name.weight == 0))
tot = float(model.name.weight.nelement())
print("Sparsity in {}.weight: {:.2f}%".format(name, zeros/tot))
But it gives me the following error:
torch.nn.modules.module.ModuleAttributeError: 'FCN' object has no attribute 'name'
It works fine when I manually enter the name of the layers (e.g.,
(model.fc1.weight == 0)
(model.fc2.weight == 0)
(model.fc3.weight == 0) ....
but I'd like to make it independent from the network. In other words, I'd like to adapt my function in a way that, given any sparse network, it prints the sparsity of every layer. Any suggestions?
Thanks!!
Try:
getattr(model, name).weight
In place of
model.name.weight
Your print_layer_sparsity function becomes:
def print_layer_sparsity(model):
for name,module in model.named_modules():
if 'fc' in name:
zeros = 100. * float(torch.sum(getattr(model, name).weight == 0))
tot = float(getattr(model, name).weight.nelement())
print("Sparsity in {}.weight: {:.2f}%".format(name, zeros/tot))
You can't do model.name because name is a str. The in-built getattr function allows you to get the member variables / attributes of an object using its name as a string.
For more information, checkout this answer.

NameError: name 'pbc' is not defined

Hello I am learning MDAnalysis through python-3.7. Would you please check my code and advise how to resolve the following error:
Traceback (most recent call last):
File "/home/pulokdeb/projects/def-sohrabz/pulokdeb/beluga_python/Closest_atom_Oxy_group.py", line 242, in <module>
eigen_value = iio.eigen_vals()
File "/home/pulokdeb/ENV/lib/python3.7/site-packages/MDAnalysis/core/topologyattrs.py", line 1347, in eigen_vals
com = atomgroup.center_of_mass(pbc=pbc)
NameError: name 'pbc' is not defined
The code (partial) is below:
def radius_of_gyration(group, pbc=False, **kwargs):
"""Radius of gyration.
Parameters
----------
pbc : bool, optional
If ``True``, move all atoms within the primary unit cell before
calculation. [``False``]
.. versionchanged:: 0.8 Added *pbc* keyword
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
rog_sq = np.sum(masses * np.sum(recenteredpos**2,
axis=1)) / atomgroup.total_mass()
return np.sqrt(rog_sq)
transplants[GroupBase].append(
('radius_of_gyration', radius_of_gyration))
I changed a few lines (def_eif_vals) in topologyattrs.py file and got my results. Hope it works for my future simulations.
def shape_parameter(group, pbc=False, **kwargs):
"""Shape parameter.
See [Dima2004a]_ for background information.
Parameters
----------
pbc : bool, optional
If ``True``, move all atoms within the primary unit cell before
calculation. [``False``]
References
----------
.. [Dima2004a] Dima, R. I., & Thirumalai, D. (2004). Asymmetry
in the shapes of folded and denatured states of
proteins. *J Phys Chem B*, 108(21),
6564-6570. doi:`10.1021/jp037128y
<https://doi.org/10.1021/jp037128y>`_
.. versionadded:: 0.7.7
.. versionchanged:: 0.8 Added *pbc* keyword
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
tensor = np.zeros((3, 3))
for x in range(recenteredpos.shape[0]):
tensor += masses[x] * np.outer(recenteredpos[x, :],
recenteredpos[x, :])
tensor /= atomgroup.total_mass()
eig_vals = np.linalg.eigvalsh(tensor)
shape = 27.0 * np.prod(eig_vals - np.mean(eig_vals)
) / np.power(np.sum(eig_vals), 3)
return shape
transplants[GroupBase].append(
('shape_parameter', shape_parameter))
def eigen_vals(group, pbc=False, **kwargs):
""" Changed by Pulok Deb
"""
atomgroup = group.atoms
masses = atomgroup.masses
com = atomgroup.center_of_mass(pbc=pbc)
if pbc:
recenteredpos = atomgroup.pack_into_box(inplace=False) - com
else:
recenteredpos = atomgroup.positions - com
tensor = np.zeros((3, 3))
for x in range(recenteredpos.shape[0]):
tensor += masses[x] * np.outer(recenteredpos[x, :],
recenteredpos[x, :])
tensor /= atomgroup.total_mass()
eig_vals = np.linalg.eigvalsh(tensor)
return eig_vals
transplants[GroupBase].append(
('eigen_vals', eigen_vals))
#warn_if_not_unique
#check_pbc_and_unwrap

Solve DAE with Pyomo and class

I'm trying to solve a car problem.
first, I have an original code of car problem:
# Ampl Car Example
#
# Shows how to convert a minimize final time optimal control problem
# to a format pyomo.dae can handle by removing the time scaling from
# the ContinuousSet.
#
# min tf
# dxdt = 0
# dvdt = a-R*v^2
# x(0)=0; x(tf)=L
# v(0)=0; v(tf)=0
# -3<=a<=1
from pyomo.environ import *
from pyomo.dae import *
m = ConcreteModel()
m.R = Param(initialize=0.001) # Friction factor
m.L = Param(initialize=100.0) # Final position
m.tau = ContinuousSet(initialize=[0.0, 0.80, 1.0]) # Unscaled time
m.time = Var(m.tau) # Scaled time
m.tf = Var()
m.x = Var(m.tau,bounds=(0,None))
m.v = Var(m.tau,bounds=(0,None))
m.a = Var(m.tau, bounds=(-3.0,1.0),initialize=0)
m.dtime = DerivativeVar(m.time)
m.dx = DerivativeVar(m.x)
m.dv = DerivativeVar(m.v)
m.obj = Objective(expr=m.tf)
def _ode1(m,i):
if i == 0 :
return Constraint.Skip
return m.dx[i] == m.tf * m.v[i]
m.ode1 = Constraint(m.tau, rule=_ode1)
def _ode2(m,i):
if i == 0 :
return Constraint.Skip
return m.dv[i] == m.tf*(m.a[i] - m.R*m.v[i]**2)
m.ode2 = Constraint(m.tau, rule=_ode2)
def _ode3(m,i):
if i == 0:
return Constraint.Skip
return m.dtime[i] == m.tf
m.ode3 = Constraint(m.tau, rule=_ode3)
def _init(m):
yield m.x[0] == 0
yield m.x[1] == m.L
yield m.v[0] == 0
yield m.v[1] == 0
yield m.time[0] == 0
m.initcon = ConstraintList(rule=_init)
discretizer = TransformationFactory('dae.collocation')
discretizer.apply_to(m,ncp=1, scheme='LAGRANGE-RADAU')
solver = SolverFactory('ipopt')
solver.solve(m, tee=True)
print("final time = %6.2f" %(value(m.tf)))
Now, I want to use class to express a car,then I could instantiate two cars.
So I write like this:
from pyomo.environ import *
from pyomo.dae import *
m = ConcreteModel()
class Car():
def __init__(self,friction):
self.friction = friction
self.R = Param(initialize = self.friction) # Friction factor
self.tau = ContinuousSet(bounds=(0, 1)) # Unscaled time
self.time = Var(self.tau) # Scaled time
self.tf = Var()
self.x = Var(self.tau, bounds=(0, None), initialize=0)
self.v = Var(self.tau, bounds=(0, None))
self.a = Var(self.tau, bounds=(-3.0, 1.0), initialize=0)
self.dtime = DerivativeVar(self.time)
self.dx = DerivativeVar(self.x)
self.dv = DerivativeVar(self.v)
def _ode1(m, i):
if i == 0:
return Constraint.Skip
return self.dx[i] == m.tf * self.v[i]
self.ode1 = Constraint(self.tau, rule=_ode1)
def _ode2(m, i):
if i == 0:
return Constraint.Skip
return self.dv[i] == m.tf * (self.a[i] - self.R * self.v[i] ** 2)
self.ode2 = Constraint(self.tau, rule=_ode2)
def _ode3(m, i):
if i == 0:
return Constraint.Skip
return self.dtime[i] == m.tf
self.ode3 = Constraint(self.tau, rule=_ode3)
m.car1 = Car(0.001)
m.obj = Objective(expr=m.car1.tf)
def _init(m):
yield m.car1.x[0] == 0
yield m.car1.x[1] == 100
yield m.car1.v[0] == 0
yield m.car1.v[1] == 0
yield m.car1.time[0] == 0
m.car1.initcon = ConstraintList(rule=_init)
discretizer = TransformationFactory('dae.finite_difference')
discretizer.apply_to(m, nfe=10, scheme='BACKWARD')
solver = SolverFactory('ipopt')
solver.solve(m, tee=True)
print("final time = %6.2f" % (value(m.car1.tf)))
However, I get this:
Traceback (most recent call last):
File "D:/pyo/pyomoceshi/ceshi3/car/classcar3.py", line 79, in <module>
solver.solve(m, tee=True)
File "D:\python\m\lib\site-packages\pyomo\opt\base\solvers.py", line 582, in solve
self._presolve(*args, **kwds)
File "D:\python\m\lib\site-packages\pyomo\opt\solver\shellcmd.py", line 196, in _presolve
OptSolver._presolve(self, *args, **kwds)
File "D:\python\m\lib\site-packages\pyomo\opt\base\solvers.py", line 661, in _presolve
**kwds)
File "D:\python\m\lib\site-packages\pyomo\opt\base\solvers.py", line 729, in _convert_problem
**kwds)
File "D:\python\m\lib\site-packages\pyomo\opt\base\convert.py", line 110, in convert_problem
problem_files, symbol_map = converter.apply(*tmp, **tmpkw)
File "D:\python\m\lib\site-packages\pyomo\solvers\plugins\converter\model.py", line 164, in apply
io_options=io_options)
File "D:\python\m\lib\site-packages\pyomo\core\base\block.py", line 1646, in write
io_options)
File "D:\python\m\lib\site-packages\pyomo\repn\plugins\ampl\ampl_.py", line 357, in __call__
include_all_variable_bounds=include_all_variable_bounds)
File "D:\python\m\lib\site-packages\pyomo\repn\plugins\ampl\ampl_.py", line 783, in _print_model_NL
list(self_varID_map[id(var)] for var in ampl_repn._linear_vars),
File "D:\python\m\lib\site-packages\pyomo\repn\plugins\ampl\ampl_.py", line 783, in <genexpr>
list(self_varID_map[id(var)] for var in ampl_repn._linear_vars),
KeyError: 68767416L
I want to know how to solve it or use other ways.
Below is a working version of your script. I changed things so that instead of a Car class there is a Car function that returns a Pyomo Block representing the car. By having a Car class you were essentially trying to create a subclass of Block and running into several subtle challenges that go along with that. You can see the blog post here for more information. The second change I made was in your declaration of the initial conditions, I changed the name of the ConstraintList from m.car1.initcon to m.car1_initcon. The difference is whether you want the ConstraintList to live on the car1 Block or the model. In your code, the 'dot' in the name meant you were trying to put it on the car1 Block but the constraints yielded in the rule were relative to the model. I changed the name to resolve this inconsistency.
from pyomo.environ import *
from pyomo.dae import *
m = ConcreteModel()
def Car(model, friction):
def construct_car_block(b):
b.R = Param(initialize = friction) # Friction factor
b.tau = ContinuousSet(bounds=(0, 1)) # Unscaled time
b.time = Var(b.tau) # Scaled time
b.tf = Var()
b.x = Var(b.tau, bounds=(0, None), initialize=0)
b.v = Var(b.tau, bounds=(0, None))
b.a = Var(b.tau, bounds=(-3.0, 1.0), initialize=0)
b.dtime = DerivativeVar(b.time)
b.dx = DerivativeVar(b.x)
b.dv = DerivativeVar(b.v)
def _ode1(b, i):
if i == 0:
return Constraint.Skip
return b.dx[i] == b.tf * b.v[i]
b.ode1 = Constraint(b.tau, rule=_ode1)
def _ode2(b, i):
if i == 0:
return Constraint.Skip
return b.dv[i] == b.tf * (b.a[i] - b.R * b.v[i] ** 2)
b.ode2 = Constraint(b.tau, rule=_ode2)
def _ode3(m, i):
if i == 0:
return Constraint.Skip
return b.dtime[i] == b.tf
b.ode3 = Constraint(b.tau, rule=_ode3)
return Block(rule=construct_car_block)
m.car1 = Car(m, friction=0.001)
m.obj = Objective(expr=m.car1.tf)
def _init(m):
yield m.car1.x[0] == 0
yield m.car1.x[1] == 100
yield m.car1.v[0] == 0
yield m.car1.v[1] == 0
yield m.car1.time[0] == 0
m.car1_initcon = ConstraintList(rule=_init)
discretizer = TransformationFactory('dae.finite_difference')
discretizer.apply_to(m, nfe=15, scheme='BACKWARD')
solver = SolverFactory('ipopt')
solver.solve(m, tee=True)
print("final time = %6.2f" % (value(m.car1.tf)))

Code not training fast. I gave 3500000 rows of input as 'data.csv' and system hanged. Even after 24 hours no output

Trying to return the category of input data. Training data is 'data.csv' which is 3500000 rows of sentence and its class.
import nltk
from nltk.stem.lancaster import LancasterStemmer
import os
import csv
import json
import datetime
stemmer = LancasterStemmer()
training_data = []
with open('data.csv') as f:
training_data = [{k: str(v) for k, v in row.items()}
for row in csv.DictReader(f, skipinitialspace=True)]
words = []
classes = []
documents = []
ignore_words = ['?','.','_','-'] #words to be ignored in input data file
for pattern in training_data:
w = nltk.word_tokenize(pattern['sentence'])
words.extend(w)
documents.append((w, pattern['class']))
if pattern['class'] not in classes:
classes.append(pattern['class'])
words = [stemmer.stem(a.lower()) for a in words if a not in ignore_words]
words = list(set(words)) #remove duplicates
classes = list(set(classes))
create our training data
training = []
output = []
output_empty = [0] * len(classes)
for doc in documents:
# initialize our bag of words
bag = []
# list of tokenized words for the pattern
pattern_words = doc[0]
# stem each word
pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
for w in words:
bag.append(1) if w in pattern_words else bag.append(0)
training.append(bag)
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
output.append(output_row)
import numpy as np
import time
def sigmoid(x):
output = 1/(1+np.exp(-x))
return output
def sigmoid_output_to_derivative(output):
return output*(1-output)
def clean_up_sentence(sentence):
sentence_words = nltk.word_tokenize(sentence)
sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
return sentence_words
def bow(sentence, words, show_details=False):
# tokenize the pattern
sentence_words = clean_up_sentence(sentence)
# bag of words
bag = [0]*len(words)
for s in sentence_words:
for i,w in enumerate(words):
if w == s:
bag[i] = 1
return(np.array(bag))
returns the calculated value of the output after multiplying with the sigmoids
def think(sentence, show_details=False):
x = bow(sentence.lower(), words, show_details)
# input layer is our bag of words
l0 = x
# matrix multiplication of input and hidden layer
l1 = sigmoid(np.dot(l0, synapse_0))
# output layer
l2 = sigmoid(np.dot(l1, synapse_1))
return l2