Related
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 10 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
print(Wo)
alpha = 0.05 # Learning rate
t_ = []
loss_ = []
def ReLU(x):
return np.maximum(0,x)
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,3000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
Z_2 = np.dot(Wo,ReLU(Z_1))
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWo[i,j] = dWo[i,j] + (y[i]-d)*Z_1[j]
#dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
else:
dWo[i,j] += 0
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*(y[i]-d)
#dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
else:
dWs[j,k] += 0
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,ReLU(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,ReLU(Z_1)))
print(y)
If I try this with sigmoid function, it works fine but when the ReLU activation function is implemented, the the program doesn't learning anything.
The NN consist of 3 input, hidden, output layers and sigmoid activation fuction is implemented for output function. Hand calculation seems fine but can't find the flaw.
The code below with sigmoid activation function works just fine.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 5 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
#print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
#print(Wo)
alpha = 0.1 # Learning rate
t_ = []
loss_ = []
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,5000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
A_1 = sigmoid(Z_1)
Z_2 = np.dot(Wo,A_1)
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,sigmoid(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,sigmoid(Z_1)))
print(y)
I found similar case in Quora.
And have tested it in my networks that involves modelling logics to resolve some noisy cost function.
I found that ReLu outputs are usually blasted all over, by the 3rd layer of MLP, the values before the output have accumulated to thousands if not millions.
And with that, I prefer sigmoid with MLPs. Don't forget, sigmoid limits output to 1, but ReLu does not.
The intuition behind ReLu is that it filters out unneeded info by means of MAX(0,X) function, before forwarded to the next layer of processing. For the same reason you see it being used in Convolution problems. Note: Normalization Layer is used in these cases so that the output values of the nodes will not blast all over.
But in the case of an MLP, you didn't implement any Norm Layer after ReLu, for that reason, it is difficult to model a simple function such as XOR. In short, without Norm Layer, I don't recommend the use of ReLu, although in some cases, it still can function properly.
##
set.seed(123)
SimpleEulerApproximation = function(T,x,a,b,delta){
numberofSteps = T/delta;
TimeSteps = rep(numberofSteps,1);
Y = rep(numberofSteps,1)
Y[1] = x;
for (i in 1:numberofSteps){
TimeSteps[i] = 0 + i*delta;
}
for (j in 2:numberofSteps){
Y[j] = Y[j-1] + a*Y[j-1]*delta + b*Y[j-1]*rnorm(1,0,sqrt(delta));
}
##plot(TimeSteps,Y, type = "l")
}
SimpleEulerApproximation(1,20,-0.01,0.25,0.001)
set.seed(123)
MultipleEulerApproximation = function(T,x,a,b,delta,numberofTrajectories){
numberofSteps = round(T/delta);
TimeSteps = rep(numberofSteps,1);
Y = rep(numberofSteps,rep(numberofTrajectories))
Y = data.matrix(Y)
for (i in 1:numberofTrajectories){
Y[,i] = SimpleEulerApproximation(T,x,a,b,delta);
}
for (i in 1:numberofSteps){
TimeSteps[i] = 0 + i*delta;
}
AverageTrajectory = rep(numberofSteps,1)
for (i in 1:numberofSteps){
AverageTrajectory[i] = mean(Y[i,])
}
##plot(TimeSteps,AverageTrajectory)
}
MultipleEulerApproximation(1,52,0.12,0.30,0.0001,10000)
MonteCarloSimulation = function(T,x,r,sigma,K,delta,numberofTrajectories){
Y = MultipleEulerApproximation(T,x,r,sigma,delta,numberofTrajectories);
lastStep = round(T/delta);
max(Y[lastStep,]-K,0);
size(Y)
price = 1/numberofTrajectories * sum(max(Y[lastStep,]-K,0))*exp(-r*T)
}
MonteCarloSimulation(0.25,52,0.12,0.3,50,0.0001,10000)
When I run the code for multipleEulerApproximation, I get replacement has length 0 error. Can someone help me with this? Much Appreciated.
The first one is simple Euler Approximation for stochastic differential equation dXt =
−0.1Xtdt + 0.25XtdBt, X0 = 20 over the time interval [0, 1] with time step size
∆ = 0.001.
The second chunk of code is for multipleeulerapproximation that is where the error.
The third-chunk is for calculating European call option price using projections.
I am trying to use deep reinforcement learning with keras to train an agent to learn how to play the Lunar Lander OpenAI gym environment. The problem is that my model is not converging. Here is my code:
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers
def get_random_action(epsilon):
return np.random.rand(1) < epsilon
def get_reward_prediction(q, a):
qs_a = np.concatenate((q, table[a]), axis=0)
x = np.zeros(shape=(1, environment_parameters + num_of_possible_actions))
x[0] = qs_a
guess = model.predict(x[0].reshape(1, x.shape[1]))
r = guess[0][0]
return r
results = []
epsilon = 0.05
alpha = 0.003
gamma = 0.3
environment_parameters = 8
num_of_possible_actions = 4
obs = 15
mem_max = 100000
epochs = 3
total_episodes = 15000
possible_actions = np.arange(0, num_of_possible_actions)
table = np.zeros((num_of_possible_actions, num_of_possible_actions))
table[np.arange(num_of_possible_actions), possible_actions] = 1
env = gym.make('LunarLander-v2')
env.reset()
i_x = np.random.random((5, environment_parameters + num_of_possible_actions))
i_y = np.random.random((5, 1))
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=i_x.shape[1]))
model.add(Dense(i_y.shape[1]))
opt = optimizers.adam(lr=alpha)
model.compile(loss='mse', optimizer=opt, metrics=['accuracy'])
total_steps = 0
i_x = np.zeros(shape=(1, environment_parameters + num_of_possible_actions))
i_y = np.zeros(shape=(1, 1))
mem_x = np.zeros(shape=(1, environment_parameters + num_of_possible_actions))
mem_y = np.zeros(shape=(1, 1))
max_steps = 40000
for episode in range(total_episodes):
g_x = np.zeros(shape=(1, environment_parameters + num_of_possible_actions))
g_y = np.zeros(shape=(1, 1))
q_t = env.reset()
episode_reward = 0
for step_number in range(max_steps):
if episode < obs:
a = env.action_space.sample()
else:
if get_random_action(epsilon, total_episodes, episode):
a = env.action_space.sample()
else:
actions = np.zeros(shape=num_of_possible_actions)
for i in range(4):
actions[i] = get_reward_prediction(q_t, i)
a = np.argmax(actions)
# env.render()
qa = np.concatenate((q_t, table[a]), axis=0)
s, r, episode_complete, data = env.step(a)
episode_reward += r
if step_number is 0:
g_x[0] = qa
g_y[0] = np.array([r])
mem_x[0] = qa
mem_y[0] = np.array([r])
g_x = np.vstack((g_x, qa))
g_y = np.vstack((g_y, np.array([r])))
if episode_complete:
for i in range(0, g_y.shape[0]):
if i is 0:
g_y[(g_y.shape[0] - 1) - i][0] = g_y[(g_y.shape[0] - 1) - i][0]
else:
g_y[(g_y.shape[0] - 1) - i][0] = g_y[(g_y.shape[0] - 1) - i][0] + gamma * g_y[(g_y.shape[0] - 1) - i + 1][0]
if mem_x.shape[0] is 1:
mem_x = g_x
mem_y = g_y
else:
mem_x = np.concatenate((mem_x, g_x), axis=0)
mem_y = np.concatenate((mem_y, g_y), axis=0)
if np.alen(mem_x) >= mem_max:
for l in range(np.alen(g_x)):
mem_x = np.delete(mem_x, 0, axis=0)
mem_y = np.delete(mem_y, 0, axis=0)
q_t = s
if episode_complete and episode >= obs:
if episode%10 == 0:
model.fit(mem_x, mem_y, batch_size=32, epochs=epochs, verbose=0)
if episode_complete:
results.append(episode_reward)
break
I am running tens of thousands of episodes and my model still won't converge. It will begin to reduce average change in policy over ~5000 episodes while increasing the average reward, but then it goes off the deep end and the average reward per episode actually goes down after that. I've tried messing with the hyperparameters, but I haven't gotten anywhere with that. I'm trying to model my code after the DeepMind DQN paper.
You might want to change your get_random_action function to decay epsilon with each episode. After all, assuming your agent can learn an optimal policy, at some point you won't want to take random actions at all, right? Here's a slightly different version of get_random_action that would do this for you:
def get_random_action(epsilon, total_episodes, episode):
explore_prob = epsilon - (epsilon * (episode / total_episodes))
return np.random.rand(1) < explore_prob
In this modified version of your function, epsilon will decrease slightly with each episode. This may help your model converge.
There are a handful of ways to decay a parameter. For more info, check out this Wikipedia article.
I recently implemented this successfully. https://github.com/tianchuliang/techblog/tree/master/OpenAIGym
Basically, I let the agent run randomly for 3000 frames while collecting these as initial training data (states) and labels (rewards), then after that I train my neural net model every 100 frames and let the model make decisions as to what action results in best score.
See my github, it may help. Oh, my training iterations are on YouTube too, https://www.youtube.com/watch?v=wrrr90Pevuw
https://www.youtube.com/watch?v=TJzKbFAlKa0
https://www.youtube.com/watch?v=y91uA_cDGGs
I'm trying to find where are make mistakes. Be very glad if you could help me.
Here is my problem:
In serial the train, from neural network toolbox, function behave in one way but when I put it in a parfor loop everything goes crazy.
>> version
ans =
8.3.0.532 (R2014a)
Here is a function
function per = neuralTr(tSet,Y,CrossVal,Ycv)
hiddenLayerSize = 94;
redeT = patternnet(hiddenLayerSize);
redeT.input.processFcns = {'removeconstantrows','mapminmax'};
redeT.output.processFcns = {'removeconstantrows','mapminmax'};
redeT.divideFcn = 'dividerand'; % Divide data randomly
redeT.divideMode = 'sample'; % Divide up every sample
redeT.divideParam.trainRatio = 80/100;
redeT.divideParam.valRatio = 10/100;
redeT.divideParam.testRatio = 10/100;
redeT.trainFcn = 'trainscg'; % Scaled conjugate gradient
redeT.performFcn = 'crossentropy'; % Cross-entropy
redeT.trainParam.showWindow=0; %default is 1)
redeT = train(redeT,tSet,Y);
outputs = sim(redeT,CrossVal);
per = perform(redeT,Ycv,outputs);
end
And here is the code I'm typing:
Data loaded in workspace
whos
Name Size Bytes Class Attributes
CrossVal 282x157 354192 double
Y 2x363 5808 double
Ycv 2x157 2512 double
per 1x1 8 double
tSet 282x363 818928 double
Function executing in Serial
per = neuralTr(tSet,Y,CrossVal,Ycv)
per =
0.90
Starting parallel
>> parpool local
Starting parallel pool (parpool) using the 'local' profile ... connected to 12 workers.
ans =
Pool with properties:
Connected: true
NumWorkers: 12
Cluster: local
AttachedFiles: {}
IdleTimeout: Inf (no automatic shut down)
SpmdEnabled: true
Initializing and executing the function 12 times in parallel
per = cell(12,1);
parfor ii = 1 : 12
per{ii} = neuralTr(tSet,Y,CrossVal,Ycv);
end
per
per =
[0.96]
[0.83]
[0.92]
[1.08]
[0.85]
[0.89]
[1.06]
[0.83]
[0.90]
[0.93]
[0.95]
[0.81]
Executing again to see if random initialization brings different values
per = cell(12,1);
parfor ii = 1 : 12
per{ii} = neuralTr(tSet,Y,CrossVal,Ycv);
end
per
per =
[0.96]
[0.83]
[0.92]
[1.08]
[0.85]
[0.89]
[1.06]
[0.83]
[0.90]
[0.93]
[0.95]
[0.81]
EDIT 1:
Running the function only with for
per = cell(12,1);
for ii = 1 : 12
per{ii} = neuralTr(tSet,Y,CrossVal,Ycv);
end
per
per =
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
[0.90]
EDIT 2:
I modified my function now everything works great. Maybe the problem is when data is divided in parallel. So i divided the data before send to parallel. Tks a lot
function per = neuralTr(tSet,Y,CrossVal,Ycv)
indt = 1:round(size(tSet,2) * 0.8) ;
indv = round(size(tSet,2) * 0.8):round(size(tSet,2) * 0.9);
indte = round(size(tSet,2) * 0.9):size(tSet,2);
hiddenLayerSize = 94;
redeT = patternnet(hiddenLayerSize);
redeT.input.processFcns = {'removeconstantrows','mapminmax'};
redeT.output.processFcns = {'removeconstantrows','mapminmax'};
redeT.divideFcn = 'dividerand'; % Divide data randomly
redeT.divideMode = 'sample'; % Divide up every sample
redeT.divideParam.trainRatio = 80/100;
redeT.divideParam.valRatio = 10/100;
redeT.divideParam.testRatio = 10/100;
redeT.trainFcn = 'trainscg'; % Scaled conjugate gradient
redeT.performFcn = 'crossentropy'; % Cross-entropy
redeT.trainParam.showWindow=0; %default is 1)
redeT = train(redeT,tSet,Y);
outputs = sim(redeT,CrossVal);
per = zeros(12,1);
parfor ii = 1 : 12
redes = train(redeT,tSet,Y);
per(ii) = perform(redes,Ycv,outputs);
end
end
Result:
>> per = neuralTr(tSet,Y,CrossVal,Ycv)
per =
0.90
0.90
0.90
0.90
0.90
0.90
0.90
0.90
0.90
0.90
0.90
0.90
Oh! I think I found it, but cant test it.
you have in your code:
redeT.divideFcn = 'dividerand'; % Divide data randomly
If each of the workers chooses the data randomly, then its expected for them to have different results, aren't they?
Try the next:
per = cell(12,1);
parfor ii = 1 : 12
rng(1); % set the seed for random number generation, so every time the number generated will be the same
per{ii} = neuralTr(tSet,Y,CrossVal,Ycv);
end
per
Not sure if neuralTr does set the seed inside, but give it a go.
I tried to use the modified version of NN back propagation code by Phil Brierley
(www.philbrierley.com). When i try to solve the XOR problem it works perfectly. but when i try to solve a problem of the form output = x1^2 + x2^2 (ouput = sum of squares of input), the results are not accurate. i have scaled the input and ouput between -1 and 1. I get different results every time i run the same program (i understand its due to random wts initialization), but results are very different. i tried changing learning rate but still results converge.
have given the code below
%---------------------------------------------------------
% MATLAB neural network backprop code
% by Phil Brierley
%--------------------------------------------------------
clear; clc; close all;
%user specified values
hidden_neurons = 4;
epochs = 20000;
input = [];
for i =-10:2.5:10
for j = -10:2.5:10
input = [input;i j];
end
end
output = (input(:,1).^2 + input(:,2).^2);
output1 = output;
% Maximum input and output limit and scaling factors
m1 = -10; m2 = 10;
m3 = 0; m4 = 250;
c = -1; d = 1;
%Scale input and output
for i =1:size(input,2)
I = input(:,i);
scaledI = ((d-c)*(I-m1) ./ (m2-m1)) + c;
input(:,i) = scaledI;
end
for i =1:size(output,2)
I = output(:,i);
scaledI = ((d-c)*(I-m3) ./ (m4-m3)) + c;
output(:,i) = scaledI;
end
train_inp = input;
train_out = output;
%read how many patterns and add bias
patterns = size(train_inp,1);
train_inp = [train_inp ones(patterns,1)];
%read how many inputs and initialize learning rate
inputs = size(train_inp,2);
hlr = 0.1;
%set initial random weights
weight_input_hidden = (randn(inputs,hidden_neurons) - 0.5)/10;
weight_hidden_output = (randn(1,hidden_neurons) - 0.5)/10;
%Training
err = zeros(1,epochs);
for iter = 1:epochs
alr = hlr;
blr = alr / 10;
%loop through the patterns, selecting randomly
for j = 1:patterns
%select a random pattern
patnum = round((rand * patterns) + 0.5);
if patnum > patterns
patnum = patterns;
elseif patnum < 1
patnum = 1;
end
%set the current pattern
this_pat = train_inp(patnum,:);
act = train_out(patnum,1);
%calculate the current error for this pattern
hval = (tanh(this_pat*weight_input_hidden))';
pred = hval'*weight_hidden_output';
error = pred - act;
% adjust weight hidden - output
delta_HO = error.*blr .*hval;
weight_hidden_output = weight_hidden_output - delta_HO';
% adjust the weights input - hidden
delta_IH= alr.*error.*weight_hidden_output'.*(1-(hval.^2))*this_pat;
weight_input_hidden = weight_input_hidden - delta_IH';
end
% -- another epoch finished
%compute overall network error at end of each epoch
pred = weight_hidden_output*tanh(train_inp*weight_input_hidden)';
error = pred' - train_out;
err(iter) = ((sum(error.^2))^0.5);
%stop if error is small
if err(iter) < 0.001
fprintf('converged at epoch: %d\n',iter);
break
end
end
%Output after training
pred = weight_hidden_output*tanh(train_inp*weight_input_hidden)';
Y = m3 + (m4-m3)*(pred-c)./(d-c);
% Testing for a new set of input
input_test = [6 -3.1; 0.5 1; -2 3; 3 -2; -4 5; 0.5 4; 6 1.5];
output_test = (input_test(:,1).^2 + input_test(:,2).^2);
input1 = input_test;
%Scale input
for i =1:size(input1,2)
I = input1(:,i);
scaledI = ((d-c)*(I-m1) ./ (m2-m1)) + c;
input1(:,i) = scaledI;
end
%Predict output
train_inp1 = input1;
patterns = size(train_inp1,1);
bias = ones(patterns,1);
train_inp1 = [train_inp1 bias];
pred1 = weight_hidden_output*tanh(train_inp1*weight_input_hidden)';
%Rescale
Y1 = m3 + (m4-m3)*(pred1-c)./(d-c);
analy_numer = [output_test Y1']
plot(err)
This is the sample output i get for problem
state after 20000 epochs
analy_numer =
45.6100 46.3174
1.2500 -2.9457
13.0000 11.9958
13.0000 9.7097
41.0000 44.9447
16.2500 17.1100
38.2500 43.9815
if i run once more i get different results. as can be observed for small values of input i get totally wrong ans (negative ans not possible). for other values accuracy is still poor.
can someone tell what i am doing wrong and how to correct.
thanks
raman