Machine Translation FFN : Dimension problem due to window size

Machine Translation FFN : Dimension problem due to window size - neural-network

this is my first time creating a FFN to train it to translate French to English using word prediction:
Input are two arrays of size 2 x window_size + 1 from source language and window_size target language. And the label of size 1
For e.g for window_size = 2:
["je","mange", "la", "pomme","avec"]
and
["I", "eat"]
So the input of size [5] and [2] after concatenating => 7
Label: "the" (refering to "la" in French)
The label is changed to one-hot-encoding before comparing with yHat
I'm using unique index for each word ( 1 to len(vocab) ) and train using the index (not the words)
The output of the FFN is a probability of the size of the vocab of the target language
The problem is that the FFN doesn't learn and the accuracy stays at 0.
When I print the size of y_final (target probability) and yHat (Model Hypo) they have different dimensions:
yHat.size()=[512, 7, 10212]
with 64 batch_size, 7 is the concatenated input size and 10212 size of target vocab, while
y_final.size()= [512, 10212]
And over all the forward method I have these sizes:
torch.Size([512, 5, 32])
torch.Size([512, 5, 64])
torch.Size([512, 5, 64])
torch.Size([512, 2, 256])
torch.Size([512, 2, 32])
torch.Size([512, 2, 64])
torch.Size([512, 2, 64])
torch.Size([512, 7, 64])
torch.Size([512, 7, 128])
torch.Size([512, 7, 10212])
Since the accuracy augments when yHat = y_final then I thought that it is never the case because they don't even have the same shapes (2D vs 3D). Is this the problem ?
Please refer to the code and if you need any other info please tell me.
The code is working fine, no errors.
trainingData = TensorDataset(encoded_source_windows, encoded_target_windows, encoded_labels)
# print(trainingData)
batchsize = 512
trainingLoader = DataLoader(trainingData, batch_size=batchsize, drop_last=True)
def ffnModel(vocabSize1,vocabSize2, learningRate=0.01):
class ffNetwork(nn.Module):
def __init__(self):
super().__init__()
self.embeds_src = nn.Embedding(vocabSize1, 256)
self.embeds_target = nn.Embedding(vocabSize2, 256)
# input layer
self.inputSource = nn.Linear(256, 32)
self.inputTarget = nn.Linear(256, 32)
# hidden layer 1
self.fc1 = nn.Linear(32, 64)
self.bnormS = nn.BatchNorm1d(5)
self.bnormT = nn.BatchNorm1d(2)
# Layer(s) afer Concatenation:
self.fc2 = nn.Linear(64,128)
self.output = nn.Linear(128, vocabSize2)
self.softmaaax = nn.Softmax(dim=0)
# forward pass
def forward(self, xSource, xTarget):
xSource = self.embeds_src(xSource)
xSource = F.relu(self.inputSource(xSource))
xSource = F.relu(self.fc1(xSource))
xSource = self.bnormS(xSource)
xTarget = self.embeds_target(xTarget)
xTarget = F.relu(self.inputTarget(xTarget))
xTarget = F.relu(self.fc1(xTarget))
xTarget = self.bnormT(xTarget)
xCat = torch.cat((xSource, xTarget), dim=1)#dim=128 or 1 ?
xCat = F.relu(self.fc2(xCat))
print(xCat.size())
xCat = self.softmaaax(self.output(xCat))
return xCat
# creating instance of the class
net = ffNetwork()
# loss function
lossfun = nn.CrossEntropyLoss()
# lossfun = nn.NLLLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learningRate)
return net, lossfun, optimizer
def trainModel(vocabSize1,vocabSize2, learningRate):
# number of epochs
numepochs = 64
# create a new Model instance
net, lossfun, optimizer = ffnModel(vocabSize1,vocabSize2, learningRate)
# initialize losses
losses = torch.zeros(numepochs)
trainAcc = []
# loop over training data batches
batchAcc = []
batchLoss = []
for epochi in range(numepochs):
#Switching on training mode
net.train()
# loop over training data batches
batchAcc = []
batchLoss = []
for A, B, y in tqdm(trainingLoader):
# forward pass and loss
final_y = []
for i in range(y.size(dim=0)):
yy = [0] * target_vocab_length
yy[y[i]] = 1
final_y.append(yy)
final_y = torch.tensor(final_y)
yHat = net(A, B)
loss = lossfun(yHat, final_y)
################
print("\n yHat.size()")
print(yHat.size())
print("final_y.size()")
print(final_y.size())
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# loss from this batch
batchLoss.append(loss.item())
print(f'batchLoss: {loss.item()}')
#Accuracy calculator:
matches = torch.argmax(yHat) == final_y # booleans (false/true)
matchesNumeric = matches.float() # convert to numbers (0/1)
accuracyPct = 100 * torch.mean(matchesNumeric) # average and x100
batchAcc.append(accuracyPct) # add to list of accuracies
print(f'accuracyPct: {accuracyPct}')
trainAcc.append(np.mean(batchAcc))
losses[epochi] = np.mean(batchLoss)
return trainAcc,losses,net
trainAcc,losses,net = trainModel(len(source_vocab),len(target_vocab), 0.01)
print(trainAcc)

Related

IndexError target is out of bounds

I'm working on a custom dataset of images and using a Neural Net to classify them.
The data set is about 6000 images of 58 classes. But on training I keep getting a "target is out of bounds" error.
I've double checked the number of classes and image size but still get the same error.
#hyperprams
learning_rate = 5e-4
#3 for RGB values
in_channel = 3
#classes from data set
num_classes = 58
# arbitray choice
batch_size = 32
#total number of epochs used to train the model
epochs = 3
traffic_dataset = TrafficSigns(csv_file='annotations.csv',
root_directory='/Users/*****/Desktop/images/',
transform = transforms.ToTensor())
train_size = int(0.8 * len(traffic_dataset))
test_size = len(traffic_dataset) - train_size
train, test = torch.utils.data.random_split(traffic_dataset,
[train_size, test_size])
train_loader = torch.utils.data.DataLoader(train,
batch_size= batch_size,
shuffle= True,
num_workers= 4)
test_loader = torch.utils.data.DataLoader(test,
batch_size = batch_size,
shuffle= True,
num_workers= 4)
#Create a fully connected nn
class Net(nn.Module):
#use the constructor w/ arguments size of data and number of classes
def __init__(self,
input_size,
num_classes):
super(Net, self).__init__()
self.fc1 = nn.Linear(input_size, 60)
self.fc2 = nn.Linear(60, num_classes)
#define your forward step function with relu as the non-linear function of the weights
#x will be the datapassed to the model
def forward(self, x):
x=f.relu(self.fc1(x))
x = self.fc2(x)
return x
#sanity check
test = Net(2028, num_classes)
x = torch.randn(24, 2028)
print(test(x).shape)
#instantiate the class object of NN
net = Net(2028, num_classes)
criterion = nn.CrossEntropyLoss()
nn_optimizer = optim.Adam(net.parameters(),
lr = learning_rate)
#train on multiple epochs using the criterion and gradient decent algorthim estabilished above
for epoch in range(1):
for i, (data, target) in enumerate(tqdm.tqdm(train_loader)):
data = data.reshape(data.shape[0], -1)
#forward
outputs = net(data)
loss = criterion(outputs, target)
#backward propigation
nn_optimizer.zero_grad()
loss.backward()
#gradiant decent choosen
nn_optimizer.step()
Im also using a custom dataset class to import the images and labels.
My first thought was that the class is not iterating over the CSV and images correctly but I can't seem to find where they might be not matching up.
class TrafficSigns(Dataset):
#constructure will need csv file of labels images and the transform function defined above
def __init__(self,
csv_file,
root_directory,
transform = None):
self.labels = pd.read_csv(csv_file)
self.root_directory = root_directory
self.transform = transform
#returns the length
def __len__(self):
return len(self.labels)
#get data index by indes
def __getitem__(self, i):
image_path = os.path.join(self.root_directory, self.labels.iloc[i,0])
image = io.imread(image_path)
y_label = torch.tensor(int(self.labels.iloc[i, 1]))
#if statement needed since transform can be set to None
if self.transform:
image = self.transform(image)
return (image, y_label)
Any help would be awesome, thank you.
Here is the full stacktrace error that's getting thrown.
IndexError Traceback (most recent call last)
/var/folders/t_/rcfcs8g56jn7trwnsvmdyh_r0000gn/T/ipykernel_34551/1839343274.py in <module>
11 #forward
12 outputs = net(data)
---> 13 loss = criterion(outputs, target)
14 #backward propigation
15 nn_optimizer.zero_grad()
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
1148
1149 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1150 return F.cross_entropy(input, target, weight=self.weight,
1151 ignore_index=self.ignore_index, reduction=self.reduction,
1152 label_smoothing=self.label_smoothing)
~/Library/Python/3.8/lib/python/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
IndexError: Target 125 is out of bounds.

I came across same issue where I used sequential model (LSTM) for next sequence prediction. I check data loader where labels contained -1 because of which cross entropy loss throwing exception. here is my sequence chunks where model found -1 sequence as label in data loader:
Solved please check your null rows and remove those or set accordingly.

Why is the true positive - false negative distribution always the same

I have a neural network that I use it for binary classification. I change the size of training data and predict on the test set. By looking at the results, the difference between tp and fn is always the same and the difference between tn and fp is always the same. For example, in iteration #2, tp#2 - tp#1 = -91 and fn#2 - fn#1 = +91. Also, fp#2 - fp#1 = -46 and tn#2 - tn#1 = +46. As another example, tp#3 - tp#2 = -35 and fn#2 - fn#2 = +35.
Iteration #1
tn=119, fp=173, fn=110, tp=407
Iteration #2
tn=165, fp=127, fn=201, tp=316
Iteration #3
tn=176, fp=116, fn=236, tp=281
Iteration #4
tn=157, fp=135, fn=207, tp=310
Iteration #5
tn=155, fp=137, fn=214, tp=303
I have tried various architectures of neural nets, but I always get the same numbers. Do you have an idea what is wrong.
The following is a very simple network that I use:
class AllCnns(nn.Module):
def __init__(self, vocab_size, embedding_size):
torch.manual_seed(0)
super(AllCnns, self).__init__()
self.word_embeddings = nn.Embedding(vocab_size, embedding_size)
self.conv1 = nn.Conv1d(embedding_size, 64, 3)
self.drop1 = nn.Dropout(0.3)
self.max_pool1 = nn.MaxPool1d(2)
self.flat1 = nn.Flatten()
self.fc1 = nn.Linear(64*80, 100)
self.fc2 = nn.Linear(100, 1)
def forward(self, sentence):
embedding = self.word_embeddings(sentence).permute(0, 2, 1)
conv1 = F.relu(self.conv1(embedding))
drop1 = self.drop1(conv1)
max_pool1 = self.max_pool1(drop1)
flat1 = self.flat1(max_pool1)
fc1 = F.relu(self.fc1(flat1))
fc2 = torch.sigmoid(self.fc2(fc1))
return fc2

I think it should be the same.
The sum of tn(true negative) and fp(false positive) adds up to the total 'real' negative values, and same goes for the other two.
So as long as you are using the same data,
tn + fp = 292(total negative values)
fn + tp = 517(total positive values)
these equations are always true.
So tn#1 + fp#1 = tn#2 + fp#2 so tn#1 - tn#2 = fp#2 - fp#1

Accuracy from sess.run(() is returning the value in bytes. How can I change to value?

I am new to CNN and tried to train the CNN model. However when I try to print the accuracies returned from cnn it gives me results in bytes format like b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'. However when I try to print the values from the loss_train obtained from the same sess.run I get value of 1419.06. Why is this happening.
########################################################################################################################
#IMPORT PACKAGES
import math
import shutil
import pywt
import sys
import random
import numpy as np
import h5py
import pip
import os
from os import system
import tensorflow as tf
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import skimage.io as io
import matplotlib.image as mpimg
import time
np.random.seed(1)
slim = tf.contrib.slim
########################################################################################################################
########################################################################################################################
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 128, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = 'E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\train\\None_train_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#PLACEHOLDERS
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=32, filter_size=5, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTEN LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_fc2, labels=y_one_hot)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer_train = tf.summary.FileWriter("Training_FileWriter/")
writer_val = tf.summary.FileWriter("Validation_FileWriter/")
#summary
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_train.initializer)
# Add the model graph to TensorBoard
writer_train.add_graph(sess.graph)
writer_val.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
start_time = time.time()
train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
_, loss_train, acc_train, acc_summ = sess.run([optimizer, cost, accuracy, merged_summary], feed_dict = {x: img_train, y: lbl_train})
print(loss_train)
print(acc_train)
train_accuracy+=acc_train
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
writer_train.add_summary(acc_summ,epoch+1)
#######################################################################################################################
The error is obtained as
Training
1427.1069
b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'
Traceback (most recent call last):
File "train_trial.py", line 302, in <module>
train_accuracy+=acc_train
TypeError: unsupported operand type(s) for +=: 'int' and 'bytes'

You are overwriting your loss and accuracy operations here:
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
Then when you run accuracy you get the protobuf bytes of the summary, instead of just running the op. You should rename these variables to prevent overwriting/name clashes.

Keras shape error when checking input

I am trying to train a simple MLP model that maps input questions (using a 300D word embedding) and image features extracted using a pretrained VGG16 model to a feature vector of fixed length. However, I can't figure out how to fix the error mentioned below. Here is the code I'm trying to run at the moment:
parser = argparse.ArgumentParser()
parser.add_argument('-num_hidden_units', type=int, default=1024)
parser.add_argument('-num_hidden_layers', type=int, default=3)
parser.add_argument('-dropout', type=float, default=0.5)
parser.add_argument('-activation', type=str, default='tanh')
parser.add_argument('-language_only', type=bool, default= False)
parser.add_argument('-num_epochs', type=int, default=10) #default=100
parser.add_argument('-model_save_interval', type=int, default=10)
parser.add_argument('-batch_size', type=int, default=128)
args = parser.parse_args()
questions_train = open('data/qa/preprocess/questions_train2014.txt', 'r').read().splitlines()
answers_train = open('data/qa/preprocess/answers_train2014_modal.txt', 'r').read().splitlines()
images_train = open('data/qa/preprocess/images_train2014.txt', 'r').read().splitlines()
vgg_model_path = 'data/coco/vgg_feats.mat'
maxAnswers = 1000
questions_train, answers_train, images_train = selectFrequentAnswers(questions_train,answers_train,images_train, maxAnswers)
#encode the remaining answers
labelencoder = preprocessing.LabelEncoder()
labelencoder.fit(answers_train)
nb_classes = len(list(labelencoder.classes_))
joblib.dump(labelencoder,'models/labelencoder.pkl')
features_struct = scipy.io.loadmat(vgg_model_path)
VGGfeatures = features_struct['feats']
print ('loaded vgg features')
image_ids = open('data/coco/coco_vgg_IDMap.txt').read().splitlines()
id_map = {}
for ids in image_ids:
id_split = ids.split()
id_map[id_split[0]] = int(id_split[1])
nlp = English()
print ('loaded word2vec features...')
img_dim = 4096
word_vec_dim = 300
model = Sequential()
if args.language_only:
model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
model.add(Activation(args.activation))
if args.dropout>0:
model.add(Dropout(args.dropout))
for i in range(args.num_hidden_layers-1):
model.add(Dense(args.num_hidden_units, init='uniform'))
model.add(Activation(args.activation))
if args.dropout>0:
model.add(Dropout(args.dropout))
model.add(Dense(nb_classes, init='uniform'))
model.add(Activation('softmax'))
json_string = model.to_json()
if args.language_only:
model_file_name = 'models/mlp_language_only_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
else:
model_file_name = 'models/mlp_num_hidden_units_' + str(args.num_hidden_units) + '_num_hidden_layers_' + str(args.num_hidden_layers)
open(model_file_name + '.json', 'w').write(json_string)
print ('Compiling model...')
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
print ('Compilation done...')
print ('Training started...')
for k in range(args.num_epochs):
#shuffle the data points before going through them
index_shuf = list(range(len(questions_train)))
shuffle(index_shuf)
questions_train = [questions_train[i] for i in index_shuf]
answers_train = [answers_train[i] for i in index_shuf]
images_train = [images_train[i] for i in index_shuf]
progbar = generic_utils.Progbar(len(questions_train))
for qu_batch,an_batch,im_batch in zip(grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]),
grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]),
grouper(images_train, args.batch_size, fillvalue=images_train[-1])):
X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
if args.language_only:
X_batch = X_q_batch
else:
X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
X_batch = np.hstack((X_q_batch, X_i_batch))
Y_batch = get_answers_matrix(an_batch, labelencoder)
loss = model.train_on_batch(X_batch, Y_batch)
progbar.add(args.batch_size, values=[("train loss", loss)])
#print type(loss)
if k%args.model_save_interval == 0:
model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
And here is the error I get:
Keras: Error when checking input: expected dense_9_input to have shape
(4396,) but got array with shape (4096,)

I think that the error lies in what you pass in the else statement in the first layer of your model versus what you pass in training. In your first layer you specify:
model = Sequential()
if args.language_only:
model.add(Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform'))
else:
model.add(Dense(args.num_hidden_units, input_dim=img_dim+word_vec_dim, init='uniform'))
You clearly pass input_dim = img_dim + word_vec_dim = 4096 + 300 = 4396. During training you pass:
X_q_batch = get_questions_matrix_sum(qu_batch, nlp)
if args.language_only:
X_batch = X_q_batch
else:
X_i_batch = get_images_matrix(im_batch, id_map, VGGfeatures)
X_batch = np.hstack((X_q_batch, X_i_batch))
So, in the else branch, X_batch will have X_q_batch or X_i_batch rows, which apparently = 4096.
By the way, for debugging purposes, it would be easier to give your layers a name, e.g.
x = Dense(64, activation='relu', name="dense_one")
I hope this helps.

In tensorflow divide label and features is not supported?

I want to make the data which divided label and features, beause tf.nn.softmax_cross_entropy_with_logits required.
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]] #
)
make the queue and put the label and features.
after that I should divide label and features for cost function. but how to do that?
Thank you
import tensorflow as tf
import numpy as np
# Parameters
learning_rate = 0.003
training_epochs = 30
batch_size = 2
display_step = 1
min_after_dequeue = 5
capacity = 16246832
# Network Parameters
# feature size
n_input = 199
# 1st layer num features
n_hidden_1 = 150
# 2nd layer num features
n_hidden_2 = 100
# 3rd layer num features
n_hidden_3 = 50
# 4th layer num features
n_hidden_4 = 30
#class
n_classes = 3
#read csv, 0 index is label
filename_queue = tf.train.string_input_producer(["data.csv"])
record_default = [[0.0] for x in xrange(200)] # with a label and 199 features
#testfile
reader = tf.TextLineReader()
#file read
key, value = reader.read(filename_queue)
#decode
features = tf.decode_csv(value, record_defaults= record_default)
featurespack = tf.pack(features)
#xy = tf.map_fn(fn = lambda f: [f[1:],f[0]], elems=featurespack)
#for the batch
queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.float32],
shapes=[[n_input+1]]
)
#enqueue
enqueue_op = queue.enqueue(featurespack)
#dequeue
inputs = queue.dequeue_many(batch_size)
#threading
qr = tf.train.QueueRunner(queue, [enqueue_op] * 4)
#features n=199
x = tf.placeholder("float", [None, n_input])
# class 0,1,2
y = tf.placeholder("float", [None, n_classes])
#dropout
dropout_keep_prob = tf.placeholder("float")
# Create model
def multilayer_perceptron(_X, _weights, _biases, _keep_prob):
layer_1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), _keep_prob)
layer_2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_1, _weights['h2']), _biases['b2'])), _keep_prob)
layer_3 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_2, _weights['h3']), _biases['b3'])), _keep_prob)
layer_4 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(layer_3, _weights['h4']), _biases['b4'])), _keep_prob)
return tf.sigmoid(tf.matmul(layer_4, _weights['out']) + _biases['out'])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], stddev=0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], stddev=0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], stddev=0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], stddev=0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'b3': tf.Variable(tf.random_normal([n_hidden_3])),
'b4': tf.Variable(tf.random_normal([n_hidden_4])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
# optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.8).minimize(cost) # Adam Optimizer
# Initializing the variables
print "1"
with tf.Session() as sess:
#init
tf.initialize_all_variables().run
#what is
coord = tf.train.Coordinator()
#queue start what is
tf.train.start_queue_runners (coord=coord)
#i dont know well
enqueue_threads = qr.create_threads(sess, coord=coord, start=True)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
print sess.run(features)
#
#print sess.run(feature)
#Training cycle
for epoch in range(training_epochs):
print epoch
avg_cost = 0.
# Loop over all batches
for i in range(10):
print i
if coord.should_stop():
break
#get inputs
inputs_value = sess.run(inputs)
#THIS IS NOT WORK
batch_xs = np.ndarray([x[1:] for x in inputs_value])
batch_ys = np.ndarray([x[0] for x in inputs_value])
print 'batch', len(batch_ys), len(batch_xs)
#batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
#optimzierm put x and y
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})
# Compute average loss
avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, dropout_keep_prob: 0.5})/batch_size
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch: %03d/%03d cost: %.9f" % (epoch, training_epochs, avg_cost))
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels, dropout_keep_prob: 1.}))
coord.request_stop ()
coord.join (enqueue_threads)
print ("Optimization Finished!")