Neural network back propagation fails to overfit small data

Neural network back propagation fails to overfit small data - neural-network

I'm trying to implement neural network with back propagation algorithm in Racket. To test the implementation, I decided to train it on a very small data for large amount of iterations, and see if it fits the data it was trained on. However it does not -- using the sigmoid function it outputs extremely small values (of the magnitude of -20), but relative values is correct (that is, the input vector with biggest target value also produces the biggest value in the trained network). Using relu function, the outputs by their magnitued are closer to desired, but incorrect relative to each other. I'd be glad to receive any insight, on why it is so.
#lang racket
; activation function. Fn - function, dfn - its derirative
(define-struct activation (fn dfn))
;activation using sigmoid
(define sigmoid-a (let ([sigmoid (lambda (x)
(/ 1 (+ 1 (exp (* x -1)))))])
(activation (lambda(x)
(sigmoid x))
(lambda(x)
(*(sigmoid x) (- 1 (sigmoid x)))))))
; activation using relu
(define relu-a (activation (lambda(x) (if (< x 0)
(* 0.2 x)
x))
(lambda(x) (if (< x 0)
0.2
1))))
; neuron. Bias is implicit first weight
(define-struct neuron (weights) #:transparent )
; neuron output before applying activation function
(define (neuron-out-unactivated neuron inputs)
(foldl (lambda(w in result)
(+ result (* w in)))
0
(neuron-weights neuron)
(cons -1 inputs)))
; neuron output with activation function applied
(define (neuron-out neuron inputs activation-fn)
(activation-fn (neuron-out-unactivated neuron inputs)))
; neuron layer
(define-struct layer (neurons) #:transparent )
; list of layer's neurons' output, before activation function
(define (layer-out-unactivated layer inputs)
(map (lambda(neuron)
(neuron-out-unactivated neuron inputs))
(layer-neurons layer)))
; list of layer's neurons' output with activation function applied
(define (layer-out layer inputs activation-fn)
(map (lambda(neuron)
(neuron-out neuron inputs activation-fn))
(layer-neurons layer)))
; neural network
(define-struct neural-network (layers activation) #:transparent)
; neural network output
(define (neural-network-out nn inputs)
(let pass ([layers (neural-network-layers nn)]
[inputs inputs])
(if (empty? layers) inputs
(pass (rest layers)
(layer-out (first layers)
inputs
(activation-fn (neural-network-activation nn)))))))
; calculating derirative for the neuron in the last (output) layer
; out-unactivated -- neuron's output before applying activation function
; target-- teaching data / desired result
; activation -- activation fn and its derirative
(define (d-lastlayer out-unactivated target activation)
(let ([result (* (- ((activation-fn activation) out-unactivated) target) ((activation-dfn activation) out-unactivated))])
result))
; calculating derirative for the neuron in the inner (hidden) layer
; neuron-index -- place of the neuron in its layer. Needed, because weights are stored in the next layer's neurons.
; out-unactivated -- neuron's output before applying activation function
; d-nextlayer -- deriratives of the next layer
; activation -- activation fn and its derirative
(define (d-innerlayer neuron-index out-unactivated d-nextlayer nextlayer activation)
(define mp (map (lambda (neur d)
(let* ([w (list-ref (neuron-weights neur)
(add1 neuron-index))]
[result (* d w)])
result))
(layer-neurons nextlayer)
d-nextlayer))
(* (foldl + 0 mp)
((activation-dfn activation) out-unactivated)))
; maps list of layers into list of layer deriratives, where each layer derirative is a list of its neuron deriratives
(define (backpropagation layers inputs targets activation)
(let ([output (layer-out-unactivated (first layers) inputs)])
(if (empty? (rest layers))
(list (map (lambda (out target) (d-lastlayer out target activation)) output targets))
(let ([next-layer-d (backpropagation (rest layers) output targets activation)])
(cons (map (lambda(index out)
(d-innerlayer index
out
(first next-layer-d)
(first (rest layers))
activation))
(range (length output))
output)
next-layer-d)))))
; calculates new weights for the layer.
(define (transform-layer _layer input derirative train-speed)
(layer (map (lambda(n d)
(neuron (map (lambda(w i)
(+ w (* (- train-speed) i d)))
(neuron-weights n)
(cons -1 input))))
(layer-neurons _layer)
derirative)))
; calculates new weights for all layers
(define (update-layers layers inputs deriratives train-speed activation-fn)
(if (empty? layers) '()
(cons (transform-layer (first layers)
inputs
(first deriratives)
train-speed)
(update-layers (rest layers)
(layer-out (first layers)
inputs
activation-fn)
(rest deriratives)
train-speed
activation-fn))))
; performs network update for single input vector
(define (train-neural-network-iteration network inputs target train-speed)
(let* ([layers (neural-network-layers network)]
[activation (neural-network-activation network)]
[deriratives (backpropagation layers inputs target activation)]
[new-layers (update-layers layers inputs deriratives train-speed (activation-fn activation))])
(neural-network new-layers (neural-network-activation network))))
; performs network update for each input in teaching-data
(define (train-neural-network-epoch network teaching-data train-speed)
(let train ([network network]
[data teaching-data])
(if (empty? data) network
(train (train-neural-network-iteration network (car (first data)) (cdr (first data)) train-speed) (rest data)))))
; Trains network for `iterations` amount of epochs
(define (train-neural-network network data iterations train-speed)
(let it ([i 0] [network network])
(if (> i iterations) network
(it (add1 i) (train-neural-network-epoch network data train-speed)))))
; creates a network. Neuron count list -- a list of integers, each telling how many neurons in that layer
(define (create-neural-network inputs-length neuron-count-list activation)
(let _create ([inputs-l inputs-length] [n-count neuron-count-list] [layers '()])
(if (empty? n-count) (neural-network (reverse layers) activation)
(_create (first n-count)
(rest n-count)
(cons (layer (build-list (first n-count)
(lambda (n)
(neuron (build-list (add1 inputs-l)
(lambda(n2) (/ (+ (random 50) 14) 64)))))))
layers)))
))
;test
(define (test-case act)
(define nn (create-neural-network 1 (list 3 1) act))
(define data (list (cons (list 0) (list 0))
(cons (list 1) (list 1))
(cons (list 2) (list 0))))
(define trained-nn (train-neural-network nn data 1000000 0.001))
(println (~a (neural-network-out trained-nn (list 0))))
(println (~a (neural-network-out trained-nn (list 1))))
(println (~a (neural-network-out trained-nn (list 2))))
(println (~a trained-nn)))
(test-case sigmoid-a)
;outputs
;0->2 * 10^(-29)
;1->5 * 10^(-21)
;2->2 * 10^(-31)
(test-case relu-a)
;outputs
;0 -> ~164
;1 -> ~164
;2 -> ~0
(provide (all-defined-out))

The issue was in the recursive call of the backpropogation function --
(let ([next-layer-d (backpropagation (rest layers) output targets activation)])
Output here is the output of current layer before activation function, however it should've been after.

Related

Turning a vector into a list and then into an image in Racket

So, my brain is fried and in class we are working on Conway's Game of Life in DrRacket. This is an Intro to CS class so this is proving to be difficult for me, since coding is very new to me.
I have used lists in animation before but I am lost as to how to turn a vector into an image. Our prof gave us the hint of turning a vector into a list and then we should be able to create the image. I can turn the vector into a list but then I get lost. Any help, guidance or advice would be so greatly appreciated...greatly greatly appreciated.
This isn't all of the code, just a sample.
(define small-board (vector
(vector 1 0)
(vector 0 1))
)
(define live-square (square 10 "solid" "blue"))
(define dead-square (square 10 "solid" "red"))
;Purpose: Create a function that turns board into an image
;Signature: Vector of Vectors -> Image
;Example
(check-expect (board->image small-board)
(above (beside live-square dead-square)
(beside dead-square live-square))
)
;Code
(define (board->image brd)
...

Since you can turn the 2d vector into a 2d list, I can show you how to turn the 2d list into an image.
(require 2htdp/image)
(define small-board (vector (vector 1 0) (vector 0 1)))
(define small-board-as-list (list (list 1 0) (list 0 1)))
(define live-square (square 10 "solid" "blue"))
(define dead-square (square 10 "solid" "red"))
(define MT empty-image)
Recur over the board and put every rendered row above the rest of the rendered board. Within a helper, recur over the row and put each rendered cell beside the rendered "rest" of the row.
; [List-of [List-of (U 1 0)]] -> Image
(define (board->image b)
(cond [(empty? b) MT]
[else (above (row->image (first b))
(board->image (rest b)))]))
; [List-of (U 1 0)] -> Image
(define (row->image r)
(cond [(empty? r) MT]
[else (beside (cell->image (first r))
(row->image (rest r)))]))
; Cell -> Image
(define (cell->image c)
(if (= 1 c) live-square dead-square))
The recursive structure can be abstracted using foldr:
; [List-of [List-of (U 1 0)]] -> Image
(define (board->image-abs.v1 b)
(foldr (λ (r b) (above (foldr (λ (c r) (beside (cell->image c) r)) MT r) b)) MT b))
We can also use map and apply
; [List-of [List-of (U 1 0)]] -> Image
(define (board->image-abs.v2 b)
(apply above (map (λ (r) (apply beside (map (λ (c) (cell->image c)) r))) b)))
The result
(board->image small-board-as-list)
(board->image-abs.v1 small-board-as-list)
(board->image-abs.v2 small-board-as-list)

Getting a "bad binding form" error in LISP

I have to write a simple program in Lisp that multiplies a polynomial by some factor. In this example, I want to multiply (x + 5) * 5x. The answer should be 5x^2 + 25x.
When I put in ((1 1) (5 0)) (5 1)) I should get (5 2) (25 1). However, I'm getting various errors ranging from undefined operator TERM in (TERM) and bad binding form. I'm a novice at Lisp and trying to return a list as shown above. Below is my short block of code:
(defun get-coef (term)
(car term))
(defun get-power (term)
(cadr term))
(defun make-term (coef power)
(cons coef power))
(defun poly-eval (poly factor)
(if (null poly) 0
(let ((term (car poly))
(let (coef ((* (get-coef(term)) (get-coef(factor)))))
(power ((+ (cadr(term)) (cadr(factor)))))
(make-term (coef power))
(poly-eval (cdr poly) factor))))))
Any help is appreciated!!

Several problems with your code:
You are using (fun (arg1 arg2)) syntax. It should be (fun arg1 arg2). For example, you write (make-term (coef power)) but it should be (make-term coef power).
Your bindings in let are all over the place. The correct syntax is
(let ((v1 e1)
(v2 e2)
(v3 e3))
e0)
i.e. all the bindings are in one list, and each binding is a list of two elements. Note that the expressions that the variables are bound to (e1 etc.) are not wrapped in any extra layers of parentheses.
make-term doesn't use the same representation as get-power. In get-power you use cadr so you need to make sure make-term puts the power in the right position.
Your poly-eval doesn't actually combine (make-term coef power) with the recursive call to (poly-eval (cdr poly) factor), so it gets lost. You should cons the "here"-result to the "there"-result.
Your poly-eval returns 0 instead of the empty list for empty polynomials.
All in all, your code can be fixed as
(defun get-coef (term)
(car term))
(defun get-power (term)
(cadr term))
(defun make-term (coef power)
(list coef power))
(defun poly-eval (poly factor)
(if (null poly) nil
(let ((term (car poly)))
(let
((coef (* (get-coef term) (get-coef factor)))
(power (+ (get-power term) (get-power factor))))
(cons (make-term coef power)
(poly-eval (cdr poly) factor))))))
giving e.g.
(poly-eval '((1 1) (5 0)) '(5 1))
resulting in
((5 2) (25 1))

Your make-term uses CONS but your get-power takes the CADR:
(defun get-power (term) (cadr term))
(defun make-term (coef power) (cons coef power))
You prolly wanted (list coef power).
(cons 'c 'p) returns (c . p), not (c p).
Now your get-power goes for CADR, the CAR of the CDR, but the CDR is 'p.
Your inputs are lists of coeff and power eg (5 1), so it seems the only problem is in your make-term.
Or you can turn around and be consistent with (( 5 . 1)(5 . 0) and then change get power to be (cdr term).

Another way:
(defun mult(term factor)
(list (* (first term) (first factor)) (+ (second term) (second factor))))
(defun polyeval(poly factor)
(cond
((null poly) nil)
(t (cons (mult (first poly) factor) (polyeval (rest poly) factor)))))
Note: first=car, rest=cdr, second=cadr

How to implement recursive function in Racket?

I am trying to create a function called lcm-from-factors that computes the Lowest Common Multiple of two numbers (m and n) The inputs to the function are m-co-groups and n-co-groups, which list all the prime factors and their powers. For instance, for m= 2970 and n= 163,800, we will have:
m-co-groups= ’((2 1) (3 3) (5 1) (7 0) (11 1) (13 0))
n-co-groups= ’((2 3) (3 2) (5 2) (7 1) (11 0) (13 1))
These are returned by a function called co-factor which has been given to me. I have written the code but the function is not compiling because I believe I did not implement the recursion properly. I'd appreciate any help in figuring out what I am doing wrong. My code is as follows.
(define (lcm-from-factors m n)
(let-values (((m-co-groups n-co-groups) (co-factor m n)))
(define (recurse m-co-groups n-co-groups)
(let* ((a (first(m-co-groups)))
(b (first(n-co-groups))))
(cond ((>= (rest(a)) (rest(b)))
(+ (expt (first(a)) (rest(a))) (recurse (rest(m-co-groups)) (rest(n-co-groups)))))
(else (+ (expt (first(b)) (rest(b))) (recurse (rest(m-co-groups)) (rest(n-co-groups))))))))))

The following is a stepping stone to get you started.
The code handles the specific situation where m and n has the same prime factors.
It is your job, to figure out how to handle the other cases.
#lang racket
(require math/number-theory)
(define (co-factor m n) (values (factorize m) (factorize n)))
(define (exponent power) (second power))
(define (base power) (first power))
(define (lcm-from-factors m n)
(let-values ([(m-co-groups n-co-groups) (co-factor m n)])
(define (recurse m-co-groups n-co-groups)
(cond
[(and (empty? m-co-groups) (empty? n-co-groups)) 1]
[(empty? m-co-groups) 'something-1]
[(empty? n-co-groups) 'something-2]
[else
(define a-power (first m-co-groups))
(define b-power (first n-co-groups))
(define a-base (base a-power))
(define b-base (base b-power))
(define a-exp (exponent a-power))
(define b-exp (exponent b-power))
(cond
[(= a-base b-base) (* (expt a-base (max a-exp b-exp))
(recurse (rest m-co-groups) (rest n-co-groups)))]
[(< a-base b-base) 'something-3]
[(> a-base b-base) 'something-4])]))
(recurse m-co-groups n-co-groups)))
(define x (* (expt 2 3) (expt 3 4)))
(define y (* (expt 2 1) (expt 3 5)))
(lcm-from-factors x y) ; gives 1944
(lcm x y) ; gives 1944

Scheme, N-queens optimization strategies SICP chapter 2

SICP contains an partially complete example of the n-queens solutions, by walking a tree of every possible queen placement in the last row, generating more possible positions in the next row to combine the results so far, filtering the possibilities to keep only ones where the newest queen is safe, and repeating recursively.
This strategy blows up after about n=11 with a maximum recursion error.
I've implemented an alternate strategy that does a smarter tree-walk from the first column, generating possible positions from a list of unused rows, consing each position-list onto an updated list of yet-unused rows. Filtering those pairs considered safe, and recursively mapping over these pairs for the next column. This doesn't blow up (so far) but n=12 takes a minute and n=13 takes about 10 minutes to solve.
(define (queens board-size)
(let loop ((k 1) (pp-pair (cons '() (enumerate-interval 1 board-size))))
(let ((position (car pp-pair))
(potential-rows (cdr pp-pair)))
(if (> k board-size)
(list position)
(flatmap (lambda (pp-pair) (loop (++ k) pp-pair))
(filter (lambda (pp-pair) (safe? k (car pp-pair))) ;keep only safe
(map (lambda (new-row)
(cons (adjoin-position new-row k position)
(remove-row new-row potential-rows))) ;make pp-pair
potential-rows)))))))
;auxiliary functions not listed
Not really looking for code, but a simple explanation of a strategy or two that's less naive and that clicks well with a functional approach.

I can offer you a simplification of your code, so it may run a little bit faster. We start by renaming some variables for improved readability (YMMV),
(define (queens board-size)
(let loop ((k 1)
(pd (cons '() (enumerate-interval 1 board-size))))
(let ((position (car pd))
(domain (cdr pd)))
(if (> k board-size)
(list position)
(flatmap (lambda (pd) (loop (1+ k) pd))
(filter (lambda (pd) (safe? k (car pd))) ;keep only safe NewPositions
(map (lambda (row)
(cons (adjoin-position row k position) ;NewPosition
(remove-row row domain))) ;make new PD for each Row in D
domain))))))) ; D
Now, filter f (map g d) == flatmap (\x->let {y=g x} in [y | f y]) d (using a bit of Haskell syntax there), i.e. we can fuse the map and the filter into one flatmap:
(flatmap (lambda (pd) (loop (1+ k) pd))
(flatmap (lambda (row) ;keep only safe NewPositions
(let ( (p (adjoin-position row k position))
(d (remove-row row domain)))
(if (safe? k p)
(list (cons p d))
'())))
domain))
then, flatmap h (flatmap g d) == flatmap (h <=< g) d (where <=< is right-to-left Kleisli composition operator, but who cares), so we can fuse the two flatmaps into just one, with
(flatmap
(lambda (row) ;keep only safe NewPositions
(let ((p (adjoin-position row k position)))
(if (safe? k p)
(loop (1+ k) (cons p (remove-row row domain)))
'())))
domain)
so the simplified code is
(define (queens board-size)
(let loop ((k 1)
(position '())
(domain (enumerate-interval 1 board-size)))
(if (> k board-size)
(list position)
(flatmap
(lambda (row) ;use only the safe picks
(if (safe_row? row k position) ;better to test before consing
(loop (1+ k) (adjoin-position row k position)
(remove-row row domain))
'()))
domain))))

Here's what I came up with a second time around. Not sure it's terribly much faster though. Quite a bit prettier though.
(define (n-queens n)
(let loop ((k 1) (r 1) (dangers (starting-dangers n)) (res '()) (solutions '()))
(cond ((> k n) (cons res solutions))
((> r n) solutions)
((safe? r k dangers)
(let ((this (loop (+ k 1) 1 (update-dangers r k dangers)
(cons (cons r k) res) solutions)))
(loop k (+ r 1) dangers res this)))
(else (loop k (+ r 1) dangers res solutions)))))
Big thing is using a let statement to serialize recursion, limiting depth to n. Solutions come out backwards (could probably fix by going n->1 instead of 1->n on r and k) but a backwards set is the same set as the frowards set.
(define (starting-dangers n)
(list (list)
(list (- n))
(list (+ (* 2 n) 1))))
;;instead of terminating in null list, terminate in term that cant threaten
small improvement, a danger can come from a row, a down diagonal, or and up diagonal, keep track of each as the board evolves.
(define (safe? r k dangers)
(and (let loop ((rdangers (rdang dangers)))
(cond ((null? rdangers) #t)
((= r (car rdangers))
#f)
(else (loop (cdr rdangers)))))
(let ((ddiag (- k r)))
(let loop ((ddangers (ddang dangers)))
(if (<= (car ddangers) ddiag)
(if (= (car ddangers) ddiag)
#f
#t)
(loop (cdr ddangers)))))
(let ((udiag (+ k r)))
(let loop ((udangers (udang dangers)))
(if (>= (car udangers) udiag)
(if (= (car udangers) udiag)
#f
#t)
(loop (cdr udangers)))))))
medium improvement in the change of format, only needing to do one comparison to check vs prior two. Don't think keeiping diagonals sorted cost me anything, but I don't think it saves time either.
(define (update-dangers r k dangers)
(list
(cons r (rdang dangers))
(insert (- k r) (ddang dangers) >)
(insert (+ k r) (udang dangers) <)))
(define (insert x sL pred)
(let loop ((L sL))
(cond ((null? L) (list x))
((pred x (car L))
(cons x L))
(else (cons (car L)
(loop (cdr L)))))))
(define (rdang dangers)
(car dangers))
(define (ddang dangers)
(cadr dangers))
(define (udang dangers)
(caddr dangers))

(define (average ....)) in Lisp

I'm just playing around with scheme/lisp and was thinking about how I would right my own definition of average. I'm not sure how to do some things that I think are required though.
define a procedure that takes an arbitrary number of arguments
count those arguments
pass the argument list to (+) to sum them together
Does someone have an example of defining average? I don't seem to know enough about LISP to form a web search that gets back the results I'm looking for.

The definition would be a very simple one-liner, but without spoiling it, you should look into:
a "rest" argument -- this (define (foo . xs) ...xs...) defines foo as a function that takes any number of arguments and they're available as a list which will be the value of xs.
length returns the length of a list.
apply takes a function and a list of values and applies the function to these values.
When you get that, you can go for more:
see the foldl function to avoid applying a list on a potentially very big list (this can matter in some implementations where the length of the argument list is limited, but it wouldn't make much difference in Racket).
note that Racket has exact rationals, and you can use exact->inexact to make a more efficient floating-point version.
And the spoilers are:
(define (average . ns) (/ (apply + ns) (length ns)))
Make it require one argument: (define (average n . ns) (/ (apply + n ns) (add1 (length ns))))
Use foldl: (define (average n . ns) (/ (foldl + 0 (cons n ns)) (add1 (length ns))))
Make it use floating point: (define (average n . ns) (/ (foldl + 0.0 (cons n ns)) (add1 (length ns))))

In Common Lisp, it looks like you can do:
(defun average (&rest args)
(when args
(/ (apply #'+ args) (length args))))
although I have no idea if &rest is available on all implementations of Lisp. Reference here.
Putting that code into GNU CLISP results in:
[1]> (defun average (&rest args)
(when args
(/ (apply #'+ args) (length args))))
AVERAGE
[2]> (average 1 2 3 4 5 6)
7/2
which is 3.5 (correct).

Two versions in Common Lisp:
(defun average (items)
(destructuring-bind (l . s)
(reduce (lambda (c a)
(incf (car c))
(incf (cdr c) a)
c)
items
:initial-value (cons 0 0))
(/ s l)))
(defun average (items &aux (s 0) (l 0))
(dolist (i items (/ s l))
(incf s i)
(incf l)))

In Scheme, I prefer using a list instead of the "rest" argument because rest argument makes implementing procedures like the following difficult:
> (define (call-average . ns)
(average ns))
> (call-average 1 2 3) ;; => BANG!
Packing arbitrary number of arguments into a list allows you to perform any list operation on the arguments. You can do more with less syntax and confusion. Here is my Scheme version of average that take 'n' arguments:
(define (average the-list)
(let loop ((count 0) (sum 0) (args the-list))
(if (not (null? args))
(loop (add1 count) (+ sum (car args)) (cdr args))
(/ sum count))))
Here is the same procedure in Common Lisp:
(defun average (the-list)
(let ((count 0) (sum 0))
(dolist (n the-list)
(incf count)
(incf sum n))
(/ sum count)))

In Scheme R5RS:
(define (average . numbers)
(/ (apply + numbers) (length numbers)))

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Neural network back propagation fails to overfit small data - neural-network

The issue was in the recursive call of the backpropogation function -- (let ([next-layer-d (backpropagation (rest layers) output targets activation)]) Output here is the output of current layer before activation function, however it should've been after.

Related

Turning a vector into a list and then into an image in Racket

Getting a "bad binding form" error in LISP

How to implement recursive function in Racket?

Scheme, N-queens optimization strategies SICP chapter 2

(define (average ....)) in Lisp

Categories

Resources