In tensorflow divide label and features is not supported? - neural-network

I want to make the data which divided label and features, beause tf.nn.softmax_cross_entropy_with_logits required.
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]] #
)
make the queue and put the label and features.
after that I should divide label and features for cost function. but how to do that?
Thank you
import tensorflow as tf
import numpy as np
# Parameters
learning_rate = 0.003
training_epochs = 30
batch_size = 2
display_step = 1
min_after_dequeue = 5
capacity = 16246832
# Network Parameters
# feature size
n_input = 199
# 1st layer num features
n_hidden_1 = 150
# 2nd layer num features
n_hidden_2 = 100
# 3rd layer num features
n_hidden_3 = 50
# 4th layer num features
n_hidden_4 = 30
#class
n_classes = 3
#read csv, 0 index is label
filename_queue = tf.train.string_input_producer(["data.csv"])
record_default = [[0.0] for x in xrange(200)] # with a label and 199 features
#testfile
reader = tf.TextLineReader()
#file read
key, value = reader.read(filename_queue)
#decode
features = tf.decode_csv(value, record_defaults= record_default)
featurespack = tf.pack(features)
#xy = tf.map_fn(fn = lambda f: [f[1:],f[0]], elems=featurespack)
#for the batch
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]]
)
#enqueue
enqueue_op = queue.enqueue(featurespack)
#dequeue
inputs = queue.dequeue_many(batch_size)
#threading
qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
#features n=199
x = tf.placeholder("float", [None, n_input])
# class 0,1,2
y = tf.placeholder("float", [None, n_classes])
#dropout
dropout_keep_prob = tf.placeholder("float")
# Create model
def multilayer_perceptron(_X, _weights, _biases, _keep_prob):
layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)
layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)
layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob)
layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob)
return tf.sigmoid(tf.matmul(layer_4, _weights['out']) + _biases['out'])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer
# Initializing the variables
print "1"
with tf.Session() as sess:
#init
tf.initialize_all_variables().run
#what is
coord = tf.train.Coordinator()
#queue start what is
tf.train.start_queue_runners (coord=coord)
#i dont know well
enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
#
#print sess.run(feature)
#Training cycle
for epoch in range(training_epochs):
print epoch
avg_cost = 0.
# Loop over all batches
for i in range(10):
print i
if coord.should_stop():
break
#get inputs
inputs_value = sess.run(inputs)
#THIS IS NOT WORK
batch_xs = np.ndarray([x[1:] for x in inputs_value])
batch_ys = np.ndarray([x[0] for x in inputs_value])
print 'batch', len(batch_ys), len(batch_xs)
#batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
#optimzierm put x and y
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})/batch_size
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob: 1.}))
coord.request_stop ()
coord.join (enqueue_threads)
print ("Optimization Finished!")

Related

Machine Translation FFN : Dimension problem due to window size

this is my first time creating a FFN to train it to translate French to English using word prediction:
Input are two arrays of size 2 x window_size + 1 from source language and window_size target language. And the label of size 1
For e.g for window_size = 2:
["je","mange", "la", "pomme","avec"]
and
["I", "eat"]
So the input of size [5] and [2] after concatenating => 7
Label: "the" (refering to "la" in French)
The label is changed to one-hot-encoding before comparing with yHat
I'm using unique index for each word ( 1 to len(vocab) ) and train using the index (not the words)
The output of the FFN is a probability of the size of the vocab of the target language
The problem is that the FFN doesn't learn and the accuracy stays at 0.
When I print the size of y_final (target probability) and yHat (Model Hypo) they have different dimensions:
yHat.size()=[512, 7, 10212]
with 64 batch_size, 7 is the concatenated input size and 10212 size of target vocab, while
y_final.size()= [512, 10212]
And over all the forward method I have these sizes:
torch.Size([512, 5, 32])
torch.Size([512, 5, 64])
torch.Size([512, 5, 64])
torch.Size([512, 2, 256])
torch.Size([512, 2, 32])
torch.Size([512, 2, 64])
torch.Size([512, 2, 64])
torch.Size([512, 7, 64])
torch.Size([512, 7, 128])
torch.Size([512, 7, 10212])
Since the accuracy augments when yHat = y_final then I thought that it is never the case because they don't even have the same shapes (2D vs 3D). Is this the problem ?
Please refer to the code and if you need any other info please tell me.
The code is working fine, no errors.
trainingData = TensorDataset(encoded_source_windows, encoded_target_windows, encoded_labels)
# print(trainingData)
batchsize = 512
trainingLoader = DataLoader(trainingData, batch_size=batchsize, drop_last=True)
def ffnModel(vocabSize1,vocabSize2, learningRate=0.01):
class ffNetwork(nn.Module):
def __init__(self):
super().__init__()
self.embeds_src = nn.Embedding(vocabSize1, 256)
self.embeds_target = nn.Embedding(vocabSize2, 256)
# input layer
self.inputSource = nn.Linear(256, 32)
self.inputTarget = nn.Linear(256, 32)
# hidden layer 1
self.fc1 = nn.Linear(32, 64)
self.bnormS = nn.BatchNorm1d(5)
self.bnormT = nn.BatchNorm1d(2)
# Layer(s) afer Concatenation:
self.fc2 = nn.Linear(64,128)
self.output = nn.Linear(128, vocabSize2)
self.softmaaax = nn.Softmax(dim=0)
# forward pass
def forward(self, xSource, xTarget):
xSource = self.embeds_src(xSource)
xSource = F.relu(self.inputSource(xSource))
xSource = F.relu(self.fc1(xSource))
xSource = self.bnormS(xSource)
xTarget = self.embeds_target(xTarget)
xTarget = F.relu(self.inputTarget(xTarget))
xTarget = F.relu(self.fc1(xTarget))
xTarget = self.bnormT(xTarget)
xCat = torch.cat((xSource, xTarget), dim=1)#dim=128 or 1 ?
xCat = F.relu(self.fc2(xCat))
print(xCat.size())
xCat = self.softmaaax(self.output(xCat))
return xCat
# creating instance of the class
net = ffNetwork()
# loss function
lossfun = nn.CrossEntropyLoss()
# lossfun = nn.NLLLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)
return net, lossfun, optimizer
def trainModel(vocabSize1,vocabSize2, learningRate):
# number of epochs
numepochs = 64
# create a new Model instance
net, lossfun, optimizer = ffnModel(vocabSize1,vocabSize2, learningRate)
# initialize losses
losses = torch.zeros(numepochs)
trainAcc = []
# loop over training data batches
batchAcc = []
batchLoss = []
for epochi in range(numepochs):
#Switching on training mode
net.train()
# loop over training data batches
batchAcc = []
batchLoss = []
for A, B, y in tqdm(trainingLoader):
# forward pass and loss
final_y = []
for i in range(y.size(dim=0)):
yy = [0] * target_vocab_length
yy[y[i]] = 1
final_y.append(yy)
final_y = torch.tensor(final_y)
yHat = net(A, B)
loss = lossfun(yHat, final_y)
################
print("\n yHat.size()")
print(yHat.size())
print("final_y.size()")
print(final_y.size())
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# loss from this batch
batchLoss.append(loss.item())
print(f'batchLoss: {loss.item()}')
#Accuracy calculator:
matches = torch.argmax(yHat) == final_y # booleans (false/true)
matchesNumeric = matches.float() # convert to numbers (0/1)
accuracyPct = 100 * torch.mean(matchesNumeric) # average and x100
batchAcc.append(accuracyPct) # add to list of accuracies
print(f'accuracyPct: {accuracyPct}')
trainAcc.append(np.mean(batchAcc))
losses[epochi] = np.mean(batchLoss)
return trainAcc,losses,net
trainAcc,losses,net = trainModel(len(source_vocab),len(target_vocab), 0.01)
print(trainAcc)

Modify training function of 3 nets with shared classifier

I have 3 VGG: VGGA, VGGB and VGG*, trained with the following training function:
def train(nets, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
# try:
nets = [n.to(dev) for n in nets]
model_a = module_unwrap(nets[0], True)
model_b = module_unwrap(nets[1], True)
model_c = module_unwrap(nets[2], True)
reg_loss = nn.MSELoss()
criterion.to(dev)
reg_loss.to(dev)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": [0,0,0], "val": [0,0,0], "test": [0,0,0]}
progbar = None
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
for n in nets:
n.train()
widgets = [
' [', pb.Timer(), '] ',
pb.Bar(),
' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
]
progbar = pb.ProgressBar(max_value=len(loaders[split][0]),widgets=widgets,redirect_stdout=True)
else:
for n in nets:
n.eval()
# Process each batch
for j,((input_a, labels_a),(input_b, labels_b)) in enumerate(zip(loaders[split][0],loaders[split][1])):
input_a = input_a.to(dev)
input_b = input_b.to(dev)
labels_a = labels_a.long().to(dev)
labels_b = labels_b.long().to(dev)
#print(labels_a.shape)
#labels_a = labels_a.squeeze()
#labels_b = labels_b.squeeze()
#labels_a = labels_a.unsqueeze(1)
#labels_b = labels_b.unsqueeze(1)
#print(labels_a.shape)
#labels_a = labels_a.argmax(-1)
#labels_b = labels_b.argmax(-1)
inputs = torch.cat([input_a,input_b],axis=0)
labels = torch.cat([labels_a, labels_b])
#labels = labels.squeeze()
#print(labels.shape)
#labels = labels.argmax(-1)
# Reset gradients
optimizer.zero_grad()
# Compute output
features_a = nets[0](input_a)
features_b = nets[1](input_b)
features_c = nets[2](inputs)
pred_a = torch.squeeze(nets[3](features_a))
pred_b = torch.squeeze(nets[3](features_b))
pred_c = torch.squeeze(nets[3](features_c))
loss = criterion(pred_a, labels_a) + criterion(pred_b, labels_b) + criterion(pred_c, labels)
for n in model_a:
layer_a = model_a[n]
layer_b = model_b[n]
layer_c = model_c[n]
if (isinstance(layer_a,nn.Conv2d)):
loss += lambda_reg * reg_loss(combo_fn(layer_a.weight,layer_b.weight),layer_c.weight)
if (layer_a.bias is not None):
loss += lambda_reg * reg_loss(combo_fn(layer_a.bias, layer_b.bias), layer_c.bias)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels_a = (pred_a >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_b = (pred_b >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_c = (pred_c >= 0.0).long() # Binarize predictions to 0 and 1
#print(pred_a.shape)
_,pred_label_a = torch.max(pred_a, dim = 1)
pred_labels_a = (pred_label_a == labels_a).float()
_,pred_label_b = torch.max(pred_b, dim = 1)
pred_labels_b = (pred_label_b == labels_b).float()
_,pred_label_c = torch.max(pred_c, dim = 1)
pred_labels_c = (pred_label_c == labels).float()
batch_accuracy_a = pred_labels_a.sum().item() / len(labels_a)
batch_accuracy_b = pred_labels_b.sum().item() / len(labels_b)
batch_accuracy_c = pred_labels_c.sum().item() / len(labels)
# Update accuracy
sum_accuracy[split][0] += batch_accuracy_a
sum_accuracy[split][1] += batch_accuracy_b
sum_accuracy[split][2] += batch_accuracy_c
if (split=='train'):
progbar.update(j, ta=batch_accuracy_c)
if (progbar is not None):
progbar.finish()
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split] / len(loaders[split][0]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: [sum_accuracy[split][i] / len(loaders[split][0]) for i in range(len(sum_accuracy[split])) ] for split in ["train", "val", "test"]}
# # Store params at the best validation accuracy
# if save_param and epoch_accuracy["val"] > best_val_accuracy:
# # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
# torch.save(net.state_dict(), f"{model_name}_best_val.pth")
# best_val_accuracy = epoch_accuracy["val"]
print(f"Epoch {epoch + 1}:")
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"\t{split}\tLoss: {epoch_loss[split]:0.5}\tVGG 1:{epoch_accuracy[split][0]:0.5}"
f"\tVGG 2:{epoch_accuracy[split][1]:0.5}\tVGG *:{epoch_accuracy[split][2]:0.5}")
if save_param:
torch.save({'vgg_a':nets[0].state_dict(),'vgg_b':nets[1].state_dict(),'vgg_star':nets[2].state_dict(),'classifier':nets[3].state_dict()},f'{model_name}.pth')
For each epoch of training the result is this:
Then, I have a combined model which sums the weights of VGGA and VGGB:
DO = 'TEST'
if (DO=='TRAIN'):
train(nets, loaders, optimizer, criterion, epochs=50, dev=dev,save_param=True)
else:
state_dicts = torch.load('valerio.pth')
model1.load_state_dict(state_dicts['vgg_a']) #questi state_dict vengono dalla funzione di training
model2.load_state_dict(state_dicts['vgg_b'])
model3.load_state_dict(state_dicts['vgg_star'])
classifier.load_state_dict(state_dicts['classifier'])
test(model1,classifier,test_loader_all)
test(model2, classifier, test_loader_all)
test(model3, classifier, test_loader_all)
summed_state_dict = OrderedDict()
for key in state_dicts['vgg_star']:
if key.find('conv') >=0:
print(key)
summed_state_dict[key] = combo_fn(state_dicts['vgg_a'][key],state_dicts['vgg_b'][key])
else:
summed_state_dict[key] = state_dicts['vgg_star'][key]
model3.load_state_dict(summed_state_dict)
test(model3, classifier, test_loader_all)
where the test function is this:
def test(net,classifier, loader):
net.to(dev)
classifier.to(dev)
net.eval()
sum_accuracy = 0
# Process each batch
for j, (input, labels) in enumerate(loader):
input = input.to(dev)
labels = labels.float().to(dev)
features = net(input)
pred = torch.squeeze(classifier(features))
# https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels = (pred >= 0.0).long() # Binarize predictions to 0 and 1
_,pred_label = torch.max(pred, dim = 1)
pred_labels = (pred_label == labels).float()
batch_accuracy = pred_labels.sum().item() / len(labels)
# Update accuracy
sum_accuracy += batch_accuracy
epoch_accuracy = sum_accuracy / len(loader)
print(f"Accuracy after sum: {epoch_accuracy:0.5}")
And the result of this aggregation is the following:
I want to modify my training function in order to print the same things of the first image, plus the accuracy of the aggregated model (the highlighted part in red of the second picture). So basically, for each epoch, accuracies of VGGA, VGGB, VGG* and combined VGG, print these accuracies and continue with the training. I tried to add this model combo but I failed, because I did not able to insert into each epoch, but only at the end of the training. I was trying to add in the training function, between print(f"Epoch {epoch + 1}:")and
# Update history
for split in ["train", "val", "test"]:
the code with the part of state_dict, but i am doing something wrong, i do not know what.
Can I reuse the code of the test, or I have to write new code?
Do you think i have to save the state_dict for each epoch, or i can do something else? Like model_c.parameters()=model_a.parameters()+model_b.parameters() (which does not work, already tried)
I solved, here is the solution of how I modified my training function:
def train(nets, loaders, optimizer, criterion, epochs=20, dev=None, save_param=False, model_name="valerio"):
# try:
nets = [n.to(dev) for n in nets]
model_a = module_unwrap(nets[0], True)
model_b = module_unwrap(nets[1], True)
model_c = module_unwrap(nets[2], True)
reg_loss = nn.MSELoss()
criterion.to(dev)
reg_loss.to(dev)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
history_test = 0
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": [0,0,0], "val": [0,0,0], "test": [0,0,0]}
progbar = None
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
for n in nets:
n.train()
widgets = [
' [', pb.Timer(), '] ',
pb.Bar(),
' [', pb.ETA(), '] ', pb.Variable('ta','[Train Acc: {formatted_value}]')
]
progbar = pb.ProgressBar(max_value=len(loaders[split][0]),widgets=widgets,redirect_stdout=True)
else:
for n in nets:
n.eval()
# Process each batch
for j,((input_a, labels_a),(input_b, labels_b)) in enumerate(zip(loaders[split][0],loaders[split][1])):
input_a = input_a.to(dev)
input_b = input_b.to(dev)
labels_a = labels_a.long().to(dev)
labels_b = labels_b.long().to(dev)
#print(labels_a.shape)
#labels_a = labels_a.squeeze()
#labels_b = labels_b.squeeze()
#labels_a = labels_a.unsqueeze(1)
#labels_b = labels_b.unsqueeze(1)
#print(labels_a.shape)
#labels_a = labels_a.argmax(-1)
#labels_b = labels_b.argmax(-1)
inputs = torch.cat([input_a,input_b],axis=0)
labels = torch.cat([labels_a, labels_b])
#labels = labels.squeeze()
#print(labels.shape)
#labels = labels.argmax(-1)
# Reset gradients
optimizer.zero_grad()
# Compute output
features_a = nets[0](input_a)
features_b = nets[1](input_b)
features_c = nets[2](inputs)
pred_a = torch.squeeze(nets[3](features_a))
pred_b = torch.squeeze(nets[3](features_b))
pred_c = torch.squeeze(nets[3](features_c))
loss = criterion(pred_a, labels_a) + criterion(pred_b, labels_b) + criterion(pred_c, labels)
for n in model_a:
layer_a = model_a[n]
layer_b = model_b[n]
layer_c = model_c[n]
if (isinstance(layer_a,nn.Conv2d)):
loss += lambda_reg * reg_loss(combo_fn(layer_a.weight,layer_b.weight),layer_c.weight)
if (layer_a.bias is not None):
loss += lambda_reg * reg_loss(combo_fn(layer_a.bias, layer_b.bias), layer_c.bias)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#https://discuss.pytorch.org/t/bcewithlogitsloss-and-model-accuracy-calculation/59293/ 2
#pred_labels_a = (pred_a >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_b = (pred_b >= 0.0).long() # Binarize predictions to 0 and 1
#pred_labels_c = (pred_c >= 0.0).long() # Binarize predictions to 0 and 1
#print(pred_a.shape)
_,pred_label_a = torch.max(pred_a, dim = 1)
pred_labels_a = (pred_label_a == labels_a).float()
_,pred_label_b = torch.max(pred_b, dim = 1)
pred_labels_b = (pred_label_b == labels_b).float()
_,pred_label_c = torch.max(pred_c, dim = 1)
pred_labels_c = (pred_label_c == labels).float()
batch_accuracy_a = pred_labels_a.sum().item() / len(labels_a)
batch_accuracy_b = pred_labels_b.sum().item() / len(labels_b)
batch_accuracy_c = pred_labels_c.sum().item() / len(labels)
# Update accuracy
sum_accuracy[split][0] += batch_accuracy_a
sum_accuracy[split][1] += batch_accuracy_b
sum_accuracy[split][2] += batch_accuracy_c
if (split=='train'):
progbar.update(j, ta=batch_accuracy_c)
if (progbar is not None):
progbar.finish()
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split] / len(loaders[split][0]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: [sum_accuracy[split][i] / len(loaders[split][0]) for i in range(len(sum_accuracy[split])) ] for split in ["train", "val", "test"]}
# # Store params at the best validation accuracy
# if save_param and epoch_accuracy["val"] > best_val_accuracy:
# # torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
# torch.save(net.state_dict(), f"{model_name}_best_val.pth")
# best_val_accuracy = epoch_accuracy["val"]
print(f"Epoch {epoch + 1}:")
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"\t{split}\tLoss: {epoch_loss[split]:0.5}\tVGG 1:{epoch_accuracy[split][0]:0.5}"
f"\tVGG 2:{epoch_accuracy[split][1]:0.5}\tVGG *:{epoch_accuracy[split][2]:0.5}")
if save_param:
torch.save({'vgg_a':nets[0].state_dict(),'vgg_b':nets[1].state_dict(),'vgg_star':nets[2].state_dict(),'classifier':nets[3].state_dict()},f'{model_name}.pth')
test(nets[0], nets[3], test_loader_all)
test(nets[1], nets[3], test_loader_all)
test(nets[2], nets[3], test_loader_all)
summed_state_dict = OrderedDict()
for key in nets[2].state_dict():
if key.find('conv') >=0:
#print(key)
summed_state_dict[key] = combo_fn(nets[0].state_dict()[key],nets[1].state_dict()[key])
else:
summed_state_dict[key] = nets[2].state_dict()[key]
nets[2].load_state_dict(summed_state_dict)
test(nets[2], nets[3], test_loader_all)
The edited parts are the last rows.

Where the weights get updated in this code?

I want to train a model in distributed system. I have found a code in github for distributed training where the worker node send gradient to the parameter server and the parameter server sends the average gradient to the workers. But in client/worker side code, i couldn't understand where the received gradient updates the weights and biases.
Here is client/worker side the code, it receives initial gradients from the parameter server and then calculates loss, gradients and sends the gradient value to the server again.
from __future__ import division
from __future__ import print_function
import numpy as np
import sys
import pickle as pickle
import socket
from datetime import datetime
import time
import tensorflow as tf
import cifar10
TCP_IP = 'some IP'
TCP_PORT = 5014
port = 0
port_main = 0
s = 0
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/home/ubuntu/cifar10_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 5000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
tf.app.flags.DEFINE_integer('log_frequency', 10,
"""How often to log results to the console.""")
#gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.30)
def safe_recv(size, server_socket):
data = ""
temp = ""
data = bytearray()
recv_size = 0
while 1:
try:
temp = server_socket.recv(size-len(data))
data.extend(temp)
recv_size = len(data)
if recv_size >= size:
break
except:
print("Error")
data = bytes(data)
return data
def train():
"""Train CIFAR-10 for a number of steps."""
g1 = tf.Graph()
with g1.as_default():
global_step = tf.Variable(-1, name='global_step',
trainable=False, dtype=tf.int32)
increment_global_step_op = tf.assign(global_step, global_step+1)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
grads = cifar10.train_part1(loss, global_step)
only_gradients = [g for g, _ in grads]
class _LoggerHook(tf.train.SessionRunHook):
"""Logs loss and runtime."""
def begin(self):
self._step = -1
self._start_time = time.time()
def before_run(self, run_context):
self._step += 1
return tf.train.SessionRunArgs(loss) # Asks for loss value.
def after_run(self, run_context, run_values):
if self._step % FLAGS.log_frequency == 0:
current_time = time.time()
duration = current_time - self._start_time
self._start_time = current_time
loss_value = run_values.results
examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
sec_per_batch = float(duration / FLAGS.log_frequency)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print(format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
# log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)) as mon_sess:
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
global port
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port_main))
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
s.close()
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
print("Received variable values from ps")
# Opening the socket and connecting to server
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((TCP_IP, port))
while not mon_sess.should_stop():
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
# sending the gradients
send_data = pickle.dumps(gradients, pickle.HIGHEST_PROTOCOL)
to_send_size = len(send_data)
send_size = pickle.dumps(to_send_size, pickle.HIGHEST_PROTOCOL)
s.sendall(send_size)
s.sendall(send_data)
# receiving the variable values
recv_size = safe_recv(17, s)
recv_size = pickle.loads(recv_size)
recv_data = safe_recv(recv_size, s)
var_vals = pickle.loads(recv_data)
feed_dict = {}
i = 0
for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
feed_dict[v] = var_vals[i]
i = i+1
s.close()
def main(argv=None): # pylint: disable=unused-argument
global port
global port_main
global s
if(len(sys.argv) != 3):
print("<port> <worker-id> required")
sys.exit()
port = int(sys.argv[1]) + int(sys.argv[2])
port_main = int(sys.argv[1])
print("Connecting to port ", port)
cifar10.maybe_download_and_extract()
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
total_start_time = time.time()
train()
print("--- %s seconds ---" % (time.time() - total_start_time))
if __name__ == '__main__':
tf.app.run()
EDIT:
Here is the train_part1() code:
def train_part1(total_loss, global_step):
"""Train CIFAR-10 model.
Create an optimizer and apply to all trainable variables. Add moving
average for all trainable variables.
Args:
total_loss: Total loss from loss().
global_step: Integer Variable counting the number of training steps
processed.
Returns:
train_op: op for training.
"""
# Variables that affect learning rate.
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.summary.scalar('learning_rate', lr)
# Generate moving averages of all losses and associated summaries.
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.GradientDescentOptimizer(lr)
grads = opt.compute_gradients(total_loss)
return grads
To me it seems that line
gradients, step_val = mon_sess.run(
[only_gradients, increment_global_step_op], feed_dict=feed_dict)
receieves new values for variables in feed_dict, assign these values to variables, and makes a training step, during which it only calculates and returns the gradients, that are later sent to the parameter server. I would expect cifar10.train_part1 (the one that returns only_gradients) to depend on variable values and define the update.
Update: I looked into the code and changed my mind. Had to google and found next answer that shed some light on what is happening.
Gradients are actually not applied in this code anywhere implicitly. Instead, gradients are sent to the parameter server, parameter server averages gradients and applies them to weights, it returns the weights to the local worker, * recieved weights are used instead of local weights during session run through feed_dict* i.e. local weights are never actually updated and do not actually matter at all. The key, is that feed_dict allows to rewrite any tensor output of the session run and this code rewrites variables.

Accuracy from sess.run(() is returning the value in bytes. How can I change to value?

I am new to CNN and tried to train the CNN model. However when I try to print the accuracies returned from cnn it gives me results in bytes format like b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'. However when I try to print the values from the loss_train obtained from the same sess.run I get value of 1419.06. Why is this happening.
########################################################################################################################
#IMPORT PACKAGES
import math
import shutil
import pywt
import sys
import random
import numpy as np
import h5py
import pip
import os
from os import system
import tensorflow as tf
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import skimage.io as io
import matplotlib.image as mpimg
import time
np.random.seed(1)
slim = tf.contrib.slim
########################################################################################################################
########################################################################################################################
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 128, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = 'E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\train\\None_train_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#PLACEHOLDERS
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=32, filter_size=5, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTEN LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_fc2, labels=y_one_hot)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer_train = tf.summary.FileWriter("Training_FileWriter/")
writer_val = tf.summary.FileWriter("Validation_FileWriter/")
#summary
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_train.initializer)
# Add the model graph to TensorBoard
writer_train.add_graph(sess.graph)
writer_val.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
start_time = time.time()
train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
_, loss_train, acc_train, acc_summ = sess.run([optimizer, cost, accuracy, merged_summary], feed_dict = {x: img_train, y: lbl_train})
print(loss_train)
print(acc_train)
train_accuracy+=acc_train
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
writer_train.add_summary(acc_summ,epoch+1)
#######################################################################################################################
The error is obtained as
Training
1427.1069
b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'
Traceback (most recent call last):
File "train_trial.py", line 302, in <module>
train_accuracy+=acc_train
TypeError: unsupported operand type(s) for +=: 'int' and 'bytes'
You are overwriting your loss and accuracy operations here:
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
Then when you run accuracy you get the protobuf bytes of the summary, instead of just running the op. You should rename these variables to prevent overwriting/name clashes.

How to predict in pycaffe?

I have a model that has been trained on CIFAR-10, but I don't realise how can I make a prediction in pycaffe.
I got an image from lmdb but I don't know how to load it in a net and get a predicted class.
My code:
net = caffe.Net('acc81/model.prototxt',
'acc81/cifar10_full_iter_70000.caffemodel.h5',
caffe.TEST)
lmdb_env = lmdb.open('cifar10_test_lmdb/')
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
image = caffe.io.datum_to_array(datum)
image = image.astype(np.uint8)
# What's next with the image variable?
# If i try:
# out = net.forward_all(data=np.asarray([image]))
# I get Exception: Input blob arguments do not match net inputs.
print("Image class is " + label)
Use this python script
# Run the script with anaconda-python
# $ /home/<path to anaconda directory>/anaconda/bin/python LmdbClassification.py
import sys
import numpy as np
import lmdb
import caffe
from collections import defaultdict
caffe.set_mode_gpu()
# Modify the paths given below
deploy_prototxt_file_path = '/home/<username>/caffe/examples/cifar10/cifar10_deploy.prototxt' # Network definition file
caffe_model_file_path = '/home/<username>/caffe/examples/cifar10/cifar10_iter_5000.caffemodel' # Trained Caffe model file
test_lmdb_path = '/home/<username>/caffe/examples/cifar10/cifar10_test_lmdb/' # Test LMDB database path
mean_file_binaryproto = '/home/<username>/caffe/examples/cifar10/mean.binaryproto' # Mean image file
# Extract mean from the mean image file
mean_blobproto_new = caffe.proto.caffe_pb2.BlobProto()
f = open(mean_file_binaryproto, 'rb')
mean_blobproto_new.ParseFromString(f.read())
mean_image = caffe.io.blobproto_to_array(mean_blobproto_new)
f.close()
# CNN reconstruction and loading the trained weights
net = caffe.Net(deploy_prototxt_file_path, caffe_model_file_path, caffe.TEST)
count = 0
correct = 0
matrix = defaultdict(int) # (real,pred) -> int
labels_set = set()
lmdb_env = lmdb.open(test_lmdb_path)
lmdb_txn = lmdb_env.begin()
lmdb_cursor = lmdb_txn.cursor()
for key, value in lmdb_cursor:
datum = caffe.proto.caffe_pb2.Datum()
datum.ParseFromString(value)
label = int(datum.label)
image = caffe.io.datum_to_array(datum)
image = image.astype(np.uint8)
out = net.forward_all(data=np.asarray([image]) - mean_image)
plabel = int(out['prob'][0].argmax(axis=0))
count += 1
iscorrect = label == plabel
correct += (1 if iscorrect else 0)
matrix[(label, plabel)] += 1
labels_set.update([label, plabel])
if not iscorrect:
print("\rError: key = %s, expected %i but predicted %i" % (key, label, plabel))
sys.stdout.write("\rAccuracy: %.1f%%" % (100.*correct/count))
sys.stdout.flush()
print("\n" + str(correct) + " out of " + str(count) + " were classified correctly")
print ""
print "Confusion matrix:"
print "(r , p) | count"
for l in labels_set:
for pl in labels_set:
print "(%i , %i) | %i" % (l, pl, matrix[(l,pl)])