Java based Neural Network --- how to implement backpropagation - neural-network

I am building a test neural network and it is definitely not working. My main problem is backpropagation. From my research, I know that it is easy to use the sigmoid function. Therefore, I update each weight by (1-Output)(Output)(target-Output) but the problem with this is what if my Output is 1 but my target is not? If it is one at some point then the weight update will always be 0...For now I am just trying to get the darn thing to add the inputs from 2 input neurons, so the optimal weights should just be 1 as the output neuron simply adds its inputs. I'm sure I have messed this up in lots of places but here is my code:
public class Main {
public static void main(String[] args) {
Double[] inputs = {1.0, 2.0};
ArrayList<Double> answers = new ArrayList<Double>();
answers.add(3.0);
net myNeuralNet = new net(2, 1, answers);
for(int i=0; i<200; i++){
myNeuralNet.setInputs(inputs);
myNeuralNet.start();
myNeuralNet.backpropagation();
myNeuralNet.printOutput();
System.out.println("*****");
for(int j=0; j<myNeuralNet.getOutputs().size(); j++){
myNeuralNet.getOutputs().get(j).resetInput();
myNeuralNet.getOutputs().get(j).resetOutput();
myNeuralNet.getOutputs().get(j).resetNumCalled();
}
}
}
}
package myneuralnet;
import java.util.ArrayList;
public class net {
private ArrayList<neuron> inputLayer;
private ArrayList<neuron> outputLayer;
private ArrayList<Double> answers;
public net(Integer numInput, Integer numOut, ArrayList<Double> answers){
inputLayer = new ArrayList<neuron>();
outputLayer = new ArrayList<neuron>();
this.answers = answers;
for(int i=0; i<numOut; i++){
outputLayer.add(new neuron(true));
}
for(int i=0; i<numInput; i++){
ArrayList<Double> randomWeights = createRandomWeights(numInput);
inputLayer.add(new neuron(outputLayer, randomWeights, -100.00, true));
}
for(int i=0; i<numOut; i++){
outputLayer.get(i).setBackConn(inputLayer);
}
}
public ArrayList<neuron> getOutputs(){
return outputLayer;
}
public void backpropagation(){
for(int i=0; i<answers.size(); i++){
neuron iOut = outputLayer.get(i);
ArrayList<neuron> iOutBack = iOut.getBackConn();
Double iSigDeriv = (1-iOut.getOutput())*iOut.getOutput();
Double iError = (answers.get(i) - iOut.getOutput());
System.out.println("Answer: "+answers.get(i) + " iOut: "+iOut.getOutput()+" Error: "+iError+" Sigmoid: "+iSigDeriv);
for(int j=0; j<iOutBack.size(); j++){
neuron jNeuron = iOutBack.get(j);
Double ijWeight = jNeuron.getWeight(i);
System.out.println("ijWeight: "+ijWeight);
System.out.println("jNeuronOut: "+jNeuron.getOutput());
jNeuron.setWeight(i, ijWeight+(iSigDeriv*iError*jNeuron.getOutput()));
}
}
for(int i=0; i<inputLayer.size(); i++){
inputLayer.get(i).resetInput();
inputLayer.get(i).resetOutput();
}
}
public ArrayList<Double> createRandomWeights(Integer size){
ArrayList<Double> iWeight = new ArrayList<Double>();
for(int i=0; i<size; i++){
Double randNum = (2*Math.random())-1;
iWeight.add(randNum);
}
return iWeight;
}
public void setInputs(Double[] is){
for(int i=0; i<is.length; i++){
inputLayer.get(i).setInput(is[i]);
}
for(int i=0; i<outputLayer.size(); i++){
outputLayer.get(i).resetInput();
}
}
public void start(){
for(int i=0; i<inputLayer.size(); i++){
inputLayer.get(i).fire();
}
}
public void printOutput(){
for(int i=0; i<outputLayer.size(); i++){
System.out.println(outputLayer.get(i).getOutput().toString());
}
}
}
package myneuralnet;
import java.util.ArrayList;
public class neuron {
private ArrayList<neuron> connections;
private ArrayList<neuron> backconns;
private ArrayList<Double> weights;
private Double threshold;
private Double input;
private Boolean isOutput = false;
private Boolean isInput = false;
private Double totalSignal;
private Integer numCalled;
private Double myOutput;
public neuron(ArrayList<neuron> conns, ArrayList<Double> weights, Double threshold){
this.connections = conns;
this.weights = weights;
this.threshold = threshold;
this.totalSignal = 0.00;
this.numCalled = 0;
this.backconns = new ArrayList<neuron>();
this.input = 0.00;
}
public neuron(ArrayList<neuron> conns, ArrayList<Double> weights, Double threshold, Boolean isin){
this.connections = conns;
this.weights = weights;
this.threshold = threshold;
this.totalSignal = 0.00;
this.numCalled = 0;
this.backconns = new ArrayList<neuron>();
this.input = 0.00;
this.isInput = isin;
}
public neuron(Boolean tf){
this.connections = new ArrayList<neuron>();
this.weights = new ArrayList<Double>();
this.threshold = 0.00;
this.totalSignal = 0.00;
this.numCalled = 0;
this.isOutput = tf;
this.backconns = new ArrayList<neuron>();
this.input = 0.00;
}
public void setInput(Double input){
this.input = input;
}
public void setOut(Boolean tf){
this.isOutput = tf;
}
public void resetNumCalled(){
numCalled = 0;
}
public void setBackConn(ArrayList<neuron> backs){
this.backconns = backs;
}
public Double getOutput(){
return myOutput;
}
public Double getInput(){
return totalSignal;
}
public Double getRealInput(){
return input;
}
public ArrayList<Double> getWeights(){
return weights;
}
public ArrayList<neuron> getBackConn(){
return backconns;
}
public Double getWeight(Integer i){
return weights.get(i);
}
public void setWeight(Integer i, Double d){
weights.set(i, d);
}
public void setOutput(Double d){
myOutput = d;
}
public void activation(Double myInput){
numCalled++;
totalSignal += myInput;
if(numCalled==backconns.size() && isOutput){
System.out.println("Total Sig: "+totalSignal);
setInput(totalSignal);
setOutput(totalSignal);
}
}
public void activation(){
Double activationValue = 1 / (1 + Math.exp(input));
setInput(activationValue);
fire();
}
public void fire(){
for(int i=0; i<connections.size(); i++){
Double iWeight = weights.get(i);
neuron iConn = connections.get(i);
myOutput = (1/(1+(Math.exp(-input))))*iWeight;
iConn.activation(myOutput);
}
}
public void resetInput(){
input = 0.00;
totalSignal = 0.00;
}
public void resetOutput(){
myOutput = 0.00;
}
}
OK so that is a lot of code so allow me to explain. The net is simple for now, just an input layer and an output layer --- I want to add a hidden layer later but I'm taking baby steps for now. Each layer is an arraylist of neurons. Input neurons are loaded with inputs, a 1 and a 2 in this example. These neurons fire, which calculates the sigmoid of the inputs and outputs that to the output neurons, which adds them and stores the value. Then the net backpropagates by taking the (answer-output)(output)(1-output)(output of the specific input neuron) and updates the weights accordingly. A lot of times, it cycles through and I get infinity, which seems to correlate with negative weights or sigmoid. When that doesn't happen it converges to 1 and since (1-output of 1) is 0, my weights stop updating.
The numCalled and totalSignal values are just so the algorithm waits for all neuron inputs before continuing. I know I'm doing this an odd way, but the neuron class has an arraylist of neurons called connections to hold the neurons that it is forward connected to. Another arraylist called backconns holds the backward connections. I should be updating the correct weights as well since I am getting all back connections between neurons i and j but of all neurons j (the layer above i) I am only pulling weight i. I apologize for the messiness --- I've been trying lots of things for hours upon hours now and still cannot figure it out. Any help is greatly appreciated!

Some of the best textbooks on neural networks in general are Chris Bishop's and Simon Haykin's. Try reading through the chapter on backprop and understand why the terms in the weight update rule are the way they are.The reason why I am asking you to do that is that backprop is more subtle than it seems at first. Things change a bit if you use a linear activation function for the output layer (think about why you might want to do that. Hint: post-processing), or if you add a hidden layer. It got clearer for me when I actually read the book.

You might want to compare your code to this single layer perceptron.
I think you have a bug in your backprop algo. Also, try replacing the sigmoid with a squarewave.
http://web.archive.org/web/20101228185321/http://en.literateprograms.org/Perceptron_%28Java%29

what if my Output is 1 but my target is not?
The sigmoid function 1/(1 + Math.exp(-x)) never equates to 1. The lim as x approaches infinity is equal to 0, but this is a horizontal asymptote, so the function never actually touches 1. Therefore, if this expression is used to compute all of your output values, then your output will never be 1. So (1 - output) shouldn't ever equal 0.
I think your issue is during the calculation of the output. For a neural network, the output for each neuron is typically sigmoid(dot product of inputs and weights). In other words, value = input1 * weight1 + input2 * weight2 + ... (for each weight of neuron) + biasWeight. Then that neuron's output = 1 / (1 + Math.exp(-value). If it's calculated in this way, the output won't ever be equal to 1.

Related

Problem in Unity: ArgumentOutOfRangeException: Index was out of range

I always have the following problem when I execute the code in Unity:
ArgumentOutOfRangeException: Index was out of range. Must be non-negative and less than the size of the collection.
Parameter name: index
The property Grid I set directly in the Unity editor with a value of 10 for x and y.
public class World : MonoBehaviour
{
public Room[,] Dungeon { get; set; }
public Vector2 Grid;
private void Awake()
{
Dungeon = new Room[(int)Grid.x, (int)Grid.y];
StartCoroutine(GenerateFloor());
}
public IEnumerator GenerateFloor()
{
for (int x = 0; x < Grid.x; x++)
{
for (int y = 0; y < Grid.y; y++)
{
Dungeon[x, y] = new Room
{
RoomIndex = new Vector2(x, y)
};
}
}
yield return new WaitForSeconds(3);
Vector2 exitLocation = new Vector2((int)Random.Range(0, Grid.x), (int)Random.Range(0, Grid.y));
Dungeon[(int)exitLocation.x, (int)exitLocation.y].Exit = true;
Dungeon[(int)exitLocation.x, (int)exitLocation.y].Empty = false;
Debug.Log("Exit is at: " + exitLocation); }}
I hope you can help me in this case
It's pretty hard to see an error like this, without debugging the code. But in General,try to break public Vector2 Grid; 2 int variables and to use them,so do not need to be explicitly cast to an int, and your code will become significantly readable if you want x==y then we can make the test in Awake and fail to accidentally ask the wrong value. And Vector2 exitLocation can be replaced with Vector2Int or 2 variables.

Incompatible operand types String[] to int

Incompatible operand types String[] to int. I added divide and conquer algorithm to my project but I don't know is it good entegration? My project related about the finding the shortest way to x-y coordinate for cities and this project can be related about 3 algorithms for instance; divide and conquer strategy
greedy algorithm
nearest neighbor algorithm
package formalProject;
import java.io.*;
import java.io.FileNotFoundException;
import java.util.*;
public class Main {
public static void main (String[] args) throws FileNotFoundException{
String[][] cities = readArray("att48_xy.txt");
//printing cities 2d array
for(int i = 0; i < cities.length ; i++){
System.out.println(cities[i][0] + " " + cities[i][1]);
}
}
public static String[][] readArray(String file) throws FileNotFoundException{
//we'll count how many elements are there?
int counter = 0; //counter for calculating text's row length
Scanner sc1 = new Scanner(new File(file)); //scanner for calculating text's row length
while(sc1.hasNextLine()){ //checks for if there is any line
counter++;
sc1.nextLine();//jumps to next line
}
String[][] cities = new String[counter][2]; //creating our cities array with
//rows as "counter" and columns as 2(X, Y)
Scanner sc2 = new Scanner(new File(file)); //scanner for getting values from text
int i = 0;
while(sc2.hasNext()) {
String tempX = sc2.next();//first next will be X coordinate
String tempY = sc2.next();//second next will be Y coordinate
cities[i][0] = tempX;
cities[i][1] = tempY;
i++;
}
return cities; //returns our 2d array
}
public static int cities1 (String[][] cities){
if(cities.length==0 || cities.length==1){
return 0;
}
else{
return cities(cities,1,cities.length);
}
}
public static int cities(String[][] cities,int i,int f){
int m,result,sx,dx;
if(i>=f){
return 0;
}
else{
m=(i+f)/2;
sx=cities(cities,i,m);
dx=cities(cities,m+1,f);
result=sx+dx;
if((cities[m]==cities[m+1])&&(cities[m]==0)) //problem is here
result++;
return result;
}
}
}
cities[m] is an array. I think you possibly want cities[m].length here.

How to use selection sort in objects and classes

I'm creating two classes called stop watch and random numbers, which I have already done, but I needed to create a test program that would measure the execution time of sorting 100,000 numbers using selection sort. I know how to create a selection sort, I just don't know how to take the random numbers class and put it together with the selection sort, I get the error message "incompatible types random numbers cannot be converted to int" I hope someone can help me.
My random numbers class
import java.util.Random;
public class randomnumbers {
Random ran1 = new Random();
private int size;
public randomnumbers(){
size = 100000;
}
public int getSize(){
return size;
}
public void setSize(int newSize){
size = newSize;
}
public int [] createArray(int [] size){
for (int i = 0; i < size.length; i++){
size[i] = ran1.nextInt();
}
return size;
}
public static void printArray (int [] array){
for (int i = 0; i < array.length; i++){
if (i < 0){
System.out.println(array[i] + " ");
}
}
}
}
My test Program
public static void main (String [] args){
// Create a StopWatch object
StopWatch timer = new StopWatch();
//create random numbers
randomnumbers numbers = new randomnumbers();
//Create the size of the array
numbers.getSize();
// Invoke the start method in StopWatch class
timer.start();
//sort random numbers
selectionSort();
// Invoke the stop method in StopWatch class
timer.stop();
// Display the execution time
System.out.println("The execution time for sorting 100,000 " +
"numbers using selection sort: " + timer.getElapsedTime() +
" milliseconds");
}
// selectionSort performs a selection sort on an array
public static void selectionSort(int[] array) {
for (int i = 0; i < array.length - 1; i++) {
int min = array[i];
int minIndex = i;
for (int j = i + 1; j < array.length; j++) {
if (array[j] < min) {
min = array[j];
minIndex = j;
}
}
if (i != minIndex) {
array[minIndex] = array[i];
array[i] = min;
}
}
}
}
Where exactly are you getting "incompatible types random numbers cannot be converted to int" error?
There are multiple issues with the code:
Unconventional naming
size field is in randomnumbers class is used as actual array size in constructor but in createArray it's overshadowed with a parameter of the same name but different type and meaning.
You are not passing any array to selectionSort in Main. This is where I get compile error on your code.
printArray has if (i < 0) condition that is false for all ran1.nextInt() numbers so it will not print anything.
Feeding selectionSort with numbers.createArray(new int[numbers.getSize()]) compiles and ends up sorting the array.

DL4J linear regression

I am new in neural networks. I am trying to implement and train simple neural network with DL4j. My function:
y = x * 2 + 300
My vision
My result
Parameters:
public final int seed = 12345;
public final int iterations = 1;
public final int nEpochs = 1;
public final int batchSize = 1000;
public final double learningRate = 0.01;
public final Random rng = new Random(seed);
public final int numInputs = 2;
public final int numOutputs = 1;
public final double maxX = 100;//xmax = 100; ymax=500.
public final double scale = 500;//for scale out x and y.
Network configuration:
public MultiLayerConfiguration createConf() {
return new NeuralNetConfiguration.Builder()
.seed(seed)
.iterations(iterations)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
.learningRate(learningRate)
.weightInit(WeightInit.XAVIER)
.updater(new Nesterovs(0.9))
.list()
.layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MSE)
.activation(Activation.IDENTITY)
.nIn(numInputs).nOut(numOutputs).build())
.pretrain(false).backprop(true).build();
}
Training data:
public DataSetIterator generateTrainingData() {
List<DataSet> list = new ArrayList<>();
for (int i = 0; i < batchSize; i++) {
double x = rng.nextDouble() * maxX * (rng.nextBoolean() ? 1 : -1);
double y = y(x);
list.add(
new DataSet(
Nd4j.create(new double[]{x / scale, 1}),
Nd4j.create(new double[]{y / scale})
)
);
}
return new ListDataSetIterator(list, batchSize);
}
Testing:
public void test() {
final MultiLayerNetwork net = new MultiLayerNetwork(createConf());
net.init();
net.setListeners(new ScoreIterationListener(1));
for (int i = 0; i < nEpochs; i++) {
net.fit(generateTrainingData());
}
int idx = 0;
double x[] = new double[19];
double y[] = new double[19];
double p[] = new double[19];
for (double i = -90; i < 100; i += 10) {
x[idx] = i;
y[idx] = y(i);
p[idx] = scale * net.output(Nd4j.create(new double[]{i / scale, 1})).getDouble(0, 0);
idx++;
}
plot(x, y, p);
}
Please tell me what i am doing wrong or if i have incorrect vision...
Thank you in advance,
Regards,
Minas
Take a look at this example:
https://github.com/deeplearning4j/dl4j-examples/tree/master/dl4j-examples/src/main/java/org/deeplearning4j/examples/feedforward/regression
Few tips:
Use our built in normalization tools. Don't do this yourself.
Our normalization tools allow you to normalize labels as well.
Turn minibatch off (set minibatch(false) on the neural net config near the top)
Ultimately you still aren't actually doing "minibatch learning"
Also, you're regenerating the dataset each time. There's no need to do that. Just create it once and pass it in to fit.
For visualization purposes, use the restore mechanism I mentioned earlier (This is in the example, you can pick 1 of any of the normalizers like MinMaxScalar, NormalizeStandardize,.. etc)
Your iterations are also wrong. Just keep that value at 1 and keep your for loop. Otherwise you're just overfitting and spending way more of your training time then you need to. An "iteration" is actually the number of updates you want to run per fit call on the same dataset. Next release we are getting rid of that option anyways.

XOR Neural Network not converging

I'm having a problem with getting my XOR neural network to converge. It has two inputs, 2 nodes in the hidden layer, and one output node. I think it has something to do with my back propagation algorithm but I have tried to figure out where in it the problem occurs but I can't. I have also looked extensively over all the algorithms and they appear to be all correct.
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Random;
public class NeuralNetwork {
public static class Perceptron {
public ArrayList<Perceptron> inputs;
public ArrayList<Double> inputWeight;
public double output;
public double error;
private double bias = 1;
private double biasWeight;
public boolean activationOn = false;
//sets up non input layers
public Perceptron(ArrayList<Perceptron> in) {
inputWeight = new ArrayList<Double>(in.size());
inputs = in;
initWeight(in.size());
}
//basic constructor
public Perceptron() { }
//generate random weights
private void initWeight(int size) {
Random generator = new Random();
for(int i=0; i<size; i++)
inputWeight.add(i, ((generator.nextDouble())));
biasWeight = (generator.nextDouble());
}
//calculate output based on current outputs of last layer
public double calculateOutput() {
double num = 0;
num = bias*biasWeight;
for(int i=0; i<inputs.size(); i++)
num += inputs.get(i).output * inputWeight.get(i);
output = num;
if(activationOn)
output = sigmoid(output);
else
output = threshold(output);
return output;
}
//methods used for learning
//calculate output error
public double calcOutputError(double expected){
error = output * (1 - output) * (expected - output);
return error;
}
//calculate node blame
public void blame(double outError, double outWeight) {
error = output * (1 - output) * outWeight * outError;
}
//adjust weights
public void adjustWeight() {
double alpha = .5;
double newWeight = 0;
for(int i=0; i<inputs.size(); i++) {
newWeight = inputWeight.get(i) + alpha * inputs.get(i).output * error;
inputWeight.set(i, newWeight);
}
//adjust bias weight
newWeight = biasWeight + alpha * bias * error;
biasWeight = newWeight;
//System.out.println("Weight " + biasWeight);
}
//returns the sigmoid of x
private double sigmoid(double x) {
return (1 / ( 1 + Math.pow(Math.E, -x)));
}
//returns threshold of x
private double threshold(double x) {
if(x>=0.5)
return 1;
else
return 0;
}
}
//teaches a neural network XOR
public static void teachXOR(ArrayList<Perceptron> inputs, ArrayList<Perceptron> hidden, Perceptron output) {
int examples[][] = { {0,0,0},
{1,1,0},
{0,1,1},
{1,0,1} };
boolean examplesFix[] = {false, false, false, false};
int layerSize = 2;
boolean learned = false;
boolean fixed;
int limit = 50000;
while(!learned && limit > 0) {
learned = true;
limit--;
//turn on using activation function
for(int i=0; i<2; i++)
hidden.get(i).activationOn = true;
output.activationOn = true;
for(int i=0; i<4; i++) {
examplesFix[i] = false;
//set up inputs
for(int j=0; j<layerSize; j++)
inputs.get(j).output = examples[i][j];
//calculate outputs for hidden layer
for(int j=0; j<layerSize; j++)
hidden.get(j).calculateOutput();
//calculate final output
double outValue = output.calculateOutput();
System.out.println("Check output " + examples[i][0] + "," + examples[i][1] + " = " + outValue);
if(((outValue < .5 && examples[i][2] == 1) || (outValue > .5 && examples[i][2] == 0))) {
learned = false;
examplesFix[i] = true;
}
}
//turn on using activation function
for(int i=0; i<2; i++)
hidden.get(i).activationOn = true;
output.activationOn = true;
//teach the nodes that are incorrect
if(!learned && limit >= 0) {
for(int i=0; i<4; i++) {
if(examplesFix[i]) {
fixed = false;
while(!fixed) {
//System.out.println("Adjusting weight: " + examples[i][0] + "," + examples[i][1] + " --> " + examples[i][2]);
for(int j=0; j<layerSize; j++)
inputs.get(j).output = examples[i][j];
//calculate outputs for hidden layer
for(int j=0; j<layerSize; j++)
hidden.get(j).calculateOutput();
//calculate final output
double outValue = output.calculateOutput();
if((outValue >= .5 && examples[i][2] == 1) || (outValue < .5 && examples[i][2] == 0)) {
fixed = true;
}
else {
double outError = output.calcOutputError(examples[i][2]);
//blame the hidden layer nodes
for(int j=0; j<layerSize; j++)
hidden.get(j).blame(outError, output.inputWeight.get(j));
//adjust weights
for(int j=0; j<layerSize; j++)
hidden.get(j).adjustWeight();
output.adjustWeight();
}
}
}
}
}
}
//if(limit <= 0)
// System.out.println("Did not converge");//, error: " + output.error);
//System.out.println("Done");
}
//runs tests for XOR, not complete
public static void runXOR(ArrayList<Perceptron> inputs, ArrayList<Perceptron> hidden, Perceptron output) throws IOException {
//create new file
PrintWriter writer;
File file = new File("Test.csv");
if(file.exists())
file.delete();
file.createNewFile();
writer = new PrintWriter(file);
ArrayList<String> positive = new ArrayList<String>();
ArrayList<String> negative = new ArrayList<String>();
//turn off using activation function
for(int i=0; i<2; i++)
hidden.get(i).activationOn = false;
output.activationOn = false;
//tests 10,000 points
for(int i=0; i<=100; i++) {
for(int j=0; j<=100; j++) {
inputs.get(0).output = (double)i/100;
inputs.get(1).output = (double)j/100;
//calculate outputs for hidden layer
for(int k=0; k<2; k++)
hidden.get(k).calculateOutput();
//calculate final output
double outValue = output.calculateOutput();
//keep track of positive and negative results
if(outValue >= .5) {
positive.add((double)i/100 + "," + (double)j/100 + "," + outValue);
//writer.println((double)i/100 + "," + (double)j/100 + ",1");
}
else if(outValue < .5) {
negative.add((double)i/100 + "," + (double)j/100 + "," + outValue);
//writer.println((double)i/100 + "," + (double)j/100 + ",0");
}
}
}
//write out to file
writer.println("X,Y,Positive,X,Y,Negative");
int i = 0;
while(i<positive.size() && i<negative.size()) {
writer.println(positive.get(i) + "," + negative.get(i));
i++;
}
while(i<positive.size()) {
writer.println(positive.get(i));
i++;
}
while(i<negative.size()) {
writer.println(",,," + negative.get(i));
i++;
}
writer.close();
}
//used for testing
public static void main(String[] args) throws IOException {
int layerSize = 2;
ArrayList<Perceptron> inputLayer;
ArrayList<Perceptron> hiddenLayer;
Perceptron outputLayer;
//XOR neural network
inputLayer = new ArrayList<Perceptron>(layerSize);
hiddenLayer = new ArrayList<Perceptron>(layerSize);
//for(Perceptron per : inputLayer)
// per = new Perceptron();
for(int i=0; i<layerSize; i++)
inputLayer.add(new Perceptron());
for(int i=0; i<layerSize; i++)
hiddenLayer.add(new Perceptron(inputLayer));
outputLayer = new Perceptron(hiddenLayer);
teachXOR(inputLayer, hiddenLayer, outputLayer);
runXOR(inputLayer, hiddenLayer, outputLayer);
}
}
First, your code has very peculiar structure and will be hard to debug. I would consider writing it from scratch, with more clear structure, less internal fields, and more actual functions returning values.
One major error (possibly not the only one) is your distinction between output and learnOutput in hidden layer. When you calculate activation of the output layer you actually use "output" field, while you should use learnOutput (which is the only one actually using sigmoid activation).
Furthermore - if you correctly restructure your code you could create unit test for numerical gradient testing, and this is what you should always do when working with neural networks/other gradient trained machines. In this case it would show you that your gradient is incorrect.