BNN with regression using Pymc3 - neural-network

I'm trying to build BNN in a regression task, and I get a result that seems not true.
My code
First, build toy data
#Toy model
def build_toy_dataset(N=50, noise_std=0.2):
x = np.linspace(-3, 3, num=N)
y = np.cos(x) + np.random.normal(0, noise_std, size=N)
x = x.reshape((N, 1))
x = scale(x)
x = x.astype(floatX)
y = y.astype(floatX)
return x, y
N = 50 # number of data points
D = 1 # number of features
X_train, Y_train = build_toy_dataset(N)
X_test, Y_test = build_toy_dataset(N)
fig, ax = plt.subplots()
ax.plot(X_test,Y_test,'ro',X_train,Y_train,'bx',alpha=0.2)
ax.legend(['Y_test','Y_train'])
ax.set(xlabel='X', ylabel='Y', title='Toy Regression data set');
X = scale(X)
X = X.astype(floatX)
Y = Y.astype(floatX)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
Then, define BNN with output
#2 layers with 5 nodes each
def construct_nn_2Layers(ann_input, ann_output):
n_hidden = 5
n_features = ann_input.get_value().shape[1]
# Initialize random weights between each layer
init_1 = np.random.randn(n_features, n_hidden).astype(floatX)
init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
init_out = np.random.randn(n_hidden).astype(floatX)
# Initialize random biases in each layer
init_b_1 = np.random.randn(n_hidden).astype(floatX)
init_b_2 = np.random.randn(n_hidden).astype(floatX)
init_b_out = np.random.randn(1).astype(floatX)
with pm.Model() as neural_network:
# Weights from input to hidden layer
weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
shape=(n_features, n_hidden),
testval=init_1)
bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1)
# Weights from 1st to 2nd layer
weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
shape=(n_hidden, n_hidden),
testval=init_2)
bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2)
# Weights from hidden layer to output
weights_2_out = pm.Normal('w_2_out', 0, sd=1,
shape=(n_hidden,),
testval=init_out)
bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out)
# Build neural-network using tanh activation function
act_1 = pm.math.tanh(pm.math.dot(ann_input,
weights_in_1)+bias_1)
act_2 = pm.math.tanh(pm.math.dot(act_1,
weights_1_2)+bias_2)
act_out = pm.math.dot(act_2, weights_2_out)+bias_out
sd = pm.HalfNormal('sd', sd=1)
out = pm.Normal('out', mu=act_out, sd=sd, observed=ann_output)
return neural_network
Then construct:
ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn_2Layers(ann_input, ann_output)
run ADVI:
with neural_network:
inference_no_s = pm.ADVI()
# Checking convergence - Tracking parameters
tracker = pm.callbacks.Tracker(
mean=inference_no_s.approx.mean.eval, # callable that returns mean
std=inference_no_s.approx.std.eval # callable that returns std
)
approx_no_s = pm.fit(n=30000, method=inference_no_s, callbacks=[tracker])
Predict in test:
ann_input.set_value(X_test)
ann_output.set_value(Y_test)
with neural_network:
ppc = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
and this is what I get which seems not relevant. What am I doing wrong?

Related

how to make the prediction of a qos with a lstm network under matlab

I am trying to make a QOS prediction on the QWS dataset but I have the following error:
Error using trainNetwork (line 170)
Too many input arguments.
Error in lstm (line 63)
net =
trainNetwork(x_train,y_train,layers,options);
Caused by:
Error using
trainNetwork>iParseInputArguments
(line 326)
Too many input arguments.
data = readtable('C:\Users\Etudiant FST\Documents\études\mini_pjt\d\qws1\qws1.txt');
%test_data = readtable('C:\Users\Etudiant FST\Documents\études\mini_pjt\d\qws2\qws2.txt');
data = data(:,1:10);
x = [];
y = [];
delta_x = 1;
delta_y = 1;
pas = 1;
while (height(data) >= delta_x + delta_y)
x = [x; data(1:delta_x,:)];
y = [y; data(delta_x + 1:delta_x + delta_y,:)];
data(1:pas,:) = [];
end
%numObservations = height(data);
%idxTrain = 1:floor(0.8*numObservations);
%idxTest = floor(0.8*numObservations)+1:numObservations;
%dataTrain = data(idxTrain,:);
%dataTest = data(idxTest,:);
%%for n = 1:numel(dataTrain)
%X = dataTrain{n};
% xt{n} = X(:,1:end-1);
% tt{n} = X(:,2:end);
%%end
height_x = height(x);
split = fix(height_x*0.8);
x_train = x(1:split,:);
x_test = x(split:height_x,:);
y_train = y(1:split,:);
y_test = y(split:height_x,:);
layers = [
sequenceInputLayer(10)
lstmLayer(128,'OutputMode','sequence')
fullyConnectedLayer(10)
regressionLayer];
options = trainingOptions('adam', ...
'MaxEpochs',maxEpochs, ...
'MiniBatchSize',miniBatchSize, ...
'InitialLearnRate',0.01, ...
'GradientThreshold',1, ...
'Shuffle','never', ...
'Plots','training-progress',...
'Verbose',0);
net = trainNetwork(x_train,y_train,layers,options);
enter image description here
I would like it to give me a prediction of the new QOS from the old ones
thank you.
As the error message suggests, MATLAB isn't able to detect the correct trainNetwork function to use (since the function is overloaded). This is because the correct function is selected based on the numbers of inputs and the input (types) passed to it.
If you look at the example for LSTM on the documentation for trainNetwork, you will see that XTrain is a 270 by 1 'cell array' with every cell containing a N x M array while YTrain is a 270 by 1 'categorical array'.
Shaping your Xtrain and Ytrain to these data shape and types should solve the problem. Everything else on the code seems okay to me.

pyTorch mat1 and mat2 cannot be multiplied

I am getting the following error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x33856 and 640000x256)
I don't understand how do I need to change the parameters of my net. I took the net created in this paper and tried to modify the parameters to meet my needs.This is the code, I changed the parameters of the first convolution but still get the error:
class ChordClassificationNetwork(nn.Module):
def __init__(self, train_model=False):
super(ChordClassificationNetwork, self).__init__()
self.train_model = train_model
self.flatten = nn.Flatten()
self.firstConv = nn.Conv2d(3, 64, (3, 3))
self.secondConv = nn.Conv2d(64, 64, (3, 3))
self.pool = nn.MaxPool2d(2)
self.drop = nn.Dropout(0.25)
self.fc1 = nn.Linear(100*100*64, 256)
self.fc2 = nn.Linear(256, 256)
self.outLayer = nn.Linear(256, 7)
def forward(self, x):
x = self.firstConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.secondConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.drop(x)
x = self.flatten(x)
x = self.fc1(x)
x = F.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop(x)
x = self.outLayer(x)
output = F.softmax(x, dim=1)
return output
and this is the training file:
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((100, 100))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset("../chords_data/cropped_images/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = ChordClassificationNetwork().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
if __name__ == "__main__":
train()
What am I doing wrong?
Look at the error message, the issue comes from the fc1 layer which doesn't have the required number of neurons. It is receiving a tensor of shape (batch_size, 33856) but expects (batch_size, 640000). The reduction in dimensionality is caused by the different layers you have applied to your input tensor before fc1.
You can fix this by defining fc1 with:
self.fc1 = nn.Linear(33856, 256)
Alternatively, you can use nn.LazyLinear which will initialize its weights with the appropriate number of neurons at runtime depending on the input it receives. But that's lazy:
self.fc1 = nn.LazyLinear(256)

XOR with ReLU activation function

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 10 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
print(Wo)
alpha = 0.05 # Learning rate
t_ = []
loss_ = []
def ReLU(x):
return np.maximum(0,x)
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,3000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
Z_2 = np.dot(Wo,ReLU(Z_1))
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWo[i,j] = dWo[i,j] + (y[i]-d)*Z_1[j]
#dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
else:
dWo[i,j] += 0
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
if Z_1[j] >= 0:
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*(y[i]-d)
#dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
else:
dWs[j,k] += 0
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,ReLU(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,ReLU(Z_1)))
print(y)
If I try this with sigmoid function, it works fine but when the ReLU activation function is implemented, the the program doesn't learning anything.
The NN consist of 3 input, hidden, output layers and sigmoid activation fuction is implemented for output function. Hand calculation seems fine but can't find the flaw.
The code below with sigmoid activation function works just fine.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
input = [[0,0,1],[0,1,1],[1,0,1],[1,1,1]]
output = [0,1,1,0]
N = np.size(input,0) # number of samples
Ni = np.size(input,1) # dimension of the samples of input
No = 1 # dimension of the sample of output
Nh = 5 # number of hidden units
Ws = 1/4*np.random.rand(Nh,Ni+1)
#print(Ws)
Wo = 1/4*np.random.rand(No,Nh)
#print(Wo)
alpha = 0.1 # Learning rate
t_ = []
loss_ = []
def sigmoid(x):
return 1/(1+np.exp(-x))
## train the model ====================================================================
for epoch in range(0,5000):
loss = 0
for id_ in range(0,N):
dWs = 0*Ws
dWo = 0*Wo
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
A_1 = sigmoid(Z_1)
Z_2 = np.dot(Wo,A_1)
y = sigmoid(Z_2)
d = output[id_]
for j in range(0,Nh):
for i in range(0,No):
dWo[i,j] = dWo[i,j] + sigmoid(Z_1[j])*(y[i]-d)
Wo = Wo - alpha*dWo
for k in range(0,Ni+1):
for j in range(0,Nh):
for i in range(0,No):
dWs[j,k] = dWs[j,k] + x[k]*Wo[i,j]*sigmoid(Z_1[j])*(1-sigmoid(Z_1[j]))*(y[i]-d)
Ws = Ws - alpha*dWs
loss = loss + 1/2*np.linalg.norm(y-d)
if np.mod(epoch,50) == 0:
print(epoch,"-th epoch trained")
t_ = np.append(t_,epoch)
loss_ = np.append(loss_,loss)
fig = plt.figure(num=0,figsize=[10,5])
plt.plot(t_,loss_,marker="")
plt.title('Loss decay')
plt.xlabel('epoch',FontSize=20)
plt.ylabel('Loss',FontSize=20)
plt.show()
## figure out the function shape the model==========================================
xn = np.linspace(0,1,20)
yn = np.linspace(0,1,20)
xm, ym = np.meshgrid(xn, yn)
xx = np.reshape(xm,np.size(xm,0)*np.size(xm,1))
yy = np.reshape(ym,np.size(xm,0)*np.size(xm,1))
Z = []
for id__ in range(0,np.size(xm)):
x = np.append([xx[id__],yy[id__]],[1,1])
Z_1 = np.dot(Ws,x)
y_ = sigmoid(np.dot(Wo,sigmoid(Z_1)))
Z = np.append(Z,y_)
fig = plt.figure(num=1,figsize=[10,5])
ax = fig.gca(projection='3d')
surf = ax.plot_surface(xm,ym,np.reshape(Z,(np.size(xm,0),np.size(xm,1))),cmap='coolwarm',linewidth=0,antialiased=False)
print("====================================================================")
plt.show()
## test the trained model ====================================================================
for id_ in range(0,N):
x = np.append(input[id_],1)
Z_1 = np.dot(Ws,x)
y = sigmoid(np.dot(Wo,sigmoid(Z_1)))
print(y)
I found similar case in Quora.
And have tested it in my networks that involves modelling logics to resolve some noisy cost function.
I found that ReLu outputs are usually blasted all over, by the 3rd layer of MLP, the values before the output have accumulated to thousands if not millions.
And with that, I prefer sigmoid with MLPs. Don't forget, sigmoid limits output to 1, but ReLu does not.
The intuition behind ReLu is that it filters out unneeded info by means of MAX(0,X) function, before forwarded to the next layer of processing. For the same reason you see it being used in Convolution problems. Note: Normalization Layer is used in these cases so that the output values of the nodes will not blast all over.
But in the case of an MLP, you didn't implement any Norm Layer after ReLu, for that reason, it is difficult to model a simple function such as XOR. In short, without Norm Layer, I don't recommend the use of ReLu, although in some cases, it still can function properly.

Initialising weights and bias with PyTorch - how to correct dimensions?

Using this model I'm attempting to initialise my network with my predefined weights and bias :
dimensions_input = 10
hidden_layer_nodes = 5
output_dimension = 10
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(dimensions_input,hidden_layer_nodes)
self.linear2 = torch.nn.Linear(hidden_layer_nodes,output_dimension)
self.linear.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
self.linear2.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear2.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
def forward(self, x):
l_out1 = self.linear(x)
y_pred = self.linear2(l_out1)
return y_pred
model = Model()
criterion = torch.nn.MSELoss(size_average = False)
optim = torch.optim.SGD(model.parameters(), lr = 0.00001)
def train_model():
y_data = x_data.clone()
for i in range(10000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
if i % 5000 == 0:
print(loss)
optim.zero_grad()
loss.backward()
optim.step()
RuntimeError:
The expanded size of the tensor (10) must match the existing size (5)
at non-singleton dimension 1
My dimensions appear correct as they match the corresponding linear layers ?
The code provided doesn't run due to the fact that x_data isn't defined, so I can't be sure that this is the issue, but one thing that strikes me is that you should replace
self.linear2.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear2.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
with
self.linear2.weight = torch.nn.Parameter(torch.zeros(hidden_layer_nodes, output_dimension))
self.linear2.bias = torch.nn.Parameter(torch.ones(output_dimension))

How to accumulate and appy gradients for Async n-step DQNetwork update in Tensorflow?

I am trying to implement Asynchronous Methods for Deep Reinforcement Learning and one of the steps requires to accumulate the gradient over different steps and then apply it.
What is the best way to achieve this in tensorflow?
I got so far as to accumulate the gradient and I don't think is the fastest way to achieve it (lots of transfers from tensorflow to python and back).
Any suggestions are welcome.
This is my code of a toy NN. It does not model or compute anything it just exercise the operations that I want to use.
import tensorflow as tf
from model import *
graph = tf.Graph()
with graph.as_default():
state = tf.placeholder(tf.float32, shape=[None, 80,80,1])
with tf.variable_scope('layer1'):
W = weight_variable([8, 8, 1, 32])
variable_summaries(W, "layer1/W")
b = bias_variable([32])
variable_summaries(b, "layer1/b")
h = conv2d(state, W, 4) + b
activation = tf.nn.relu(h)
pool1 = max_pool_2x2(activation)
print(pool1.get_shape())
pool1 = tf.reshape(pool1, [-1, 3200])
with tf.variable_scope('readout'):
W = weight_variable([3200, 3])
b = bias_variable([3])
logits = tf.matmul(pool1, W) + b
variable_summaries(h, "y")
action_indexes = tf.placeholder(tf.int32, shape=[None], name="action_indexes")
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, action_indexes)
starter_learning_rate = 1e-6
global_step = tf.Variable(0, trainable=False)
# decay every 1000 steps with a base of 0.96:
learning_rate = tf.train.exponential_decay(starter_learning_rate,
global_step,
10000, 0.96, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate)
gradients_and_variables = optimizer.compute_gradients(loss, tf.trainable_variables())
discounted_values = tf.placeholder(tf.float32, shape=[None, 1])
with tf.Session(graph=graph) as s:
for v in tf.trainable_variables():
print(v.name, v.dtype, v.get_shape())
s.run(tf.initialize_all_variables())
feed_dict= {
state : np.zeros([1, 80, 80, 1]),
action_indexes: [1],
}
var_to_grad = dict((var.name, grad) for grad, var in gradients_and_variables)
keys = sorted(var_to_grad.keys())
print(keys)
name_to_var = dict((var.name, var) for _, var in gradients_and_variables)
for i in range(10):
gradients = s.run([ var_to_grad[k] for k in keys], feed_dict=feed_dict)
for k,v in zip(keys, gradients):
var_to_grad[k] += v
for k in keys:
print(var_to_grad[k])
s.run( optimizer.apply_gradients( (g, name_to_var[v]) for v,g in var_to_grad.iteritems()), feed_dict=feed_dict)
Updated code after #yaroslave suggestion:
import tensorflow as tf
from model import *
graph = tf.Graph()
with graph.as_default():
minibatch = 32
state = tf.placeholder(tf.float32, shape=[minibatch, 80,80,1], name="input")
with tf.variable_scope('layer1'):
W = weight_variable([8, 8, 1, 32])
variable_summaries(W, "layer1/W")
b = bias_variable([32])
variable_summaries(b, "layer1/b")
h = conv2d(state, W, 4) + b
activation = tf.nn.relu(h)
pool1 = max_pool_2x2(activation)
print(pool1.get_shape())
pool1 = tf.reshape(pool1, [-1, 3200])
with tf.variable_scope('readout'):
W = weight_variable([3200, 3])
b = bias_variable([3])
logits = tf.matmul(pool1, W) + b
variable_summaries(h, "y")
action_indexes = tf.placeholder(tf.int32, shape=[minibatch], name="action_indexes")
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, action_indexes)
starter_learning_rate = 1e-6
global_step = tf.Variable(0, trainable=False)
# decay every 1000 steps with a base of 0.96:
learning_rate = tf.train.exponential_decay(starter_learning_rate,
global_step,
10000, 0.96, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate)
trainable_variables = tf.trainable_variables()
varname_to_var = dict( (v.name, v) for v in trainable_variables )
keys = sorted(varname_to_var.keys())
gradients_and_variables = optimizer.compute_gradients(loss, [ varname_to_var[k] for k in keys])
var_to_grad = dict((var.name, grad) for grad, var in gradients_and_variables)
name_to_var = dict((var.name, var) for _, var in gradients_and_variables)
# save the gradients in memory
var_to_ref_grad = {}
for k in keys:
grad = var_to_grad[k]
print(k, grad.get_shape())
ref = tf.Variable(tf.zeros_like(grad))
ref = ref.assign_add(grad)
var_to_ref_grad[k] = ref
discounted_values = tf.placeholder(tf.float32, shape=[None, 1], name='discounted_values')
# control when to apply gradients
compute_gradients_flag = tf.placeholder(tf.int32, name="compute_gradients")
def fn1():
var_grad_list = []
for k in keys:
grad = var_to_ref_grad[k]
var = varname_to_var[k]
var_grad_list.append((grad,var))
optimizer.apply_gradients(var_grad_list)
return tf.no_op()
fn2 = lambda : tf.no_op()
last_op = tf.cond(tf.equal(compute_gradients_flag, 1), fn1, fn2)
with tf.Session(graph=graph) as s:
feed_dict= {
state : np.zeros([minibatch, 80, 80, 1]),
action_indexes: [1],
compute_gradients_flag: False,
}
s.run(tf.initialize_all_variables())
for i in range(10):
# accumulate gradients
s.run(last_op, feed_dict=feed_dict)
You don't really have to manually accumulate gradients. You can have Tensorflow accumulate them for you by applying the rollout update as a batch.
s_list = list_of_states_visited
a_list = list_of_actions_taken
R_list = list_of_value_targets
sess.run(local_net.update, feed_dict={
local_net.input: s_list,
local_net.a: a_list,
local_net.R: R_list
})
Something like this might work to create ops for accumulating gradients, resetting the accumulated gradients, and applying the accumulated gradients (untested!):
def build_gradient_accumulators(optimizer, gradients_and_variables):
accum_grads_and_vars = []
accumulators = []
resetters = []
for grad, var in gradients_and_variables:
accum = tf.Variable(tf.zeros_like(grad))
accum = accum.assign_add(grad)
accumulators.append(accum)
accum_grads_and_vars.append((accum, var))
resetters.append(tf.assign(accum, tf.zeros_like(accum)))
reset_op = tf.group(*resetters)
accum_op = tf.group(*accumulators)
apply_op = optimizer.apply_gradients(accum_grads_and_vars)
return reset_op, accum_op, apply_op