GridsearchCV tuning KerasClassifier with callbacks error: ValueError: Found input variables with inconsistent numbers of samples - callback

Using sklearn.GridSearchCV to fine tune the hyperparameters of model in Keras. Also, I add callbacks into it.
Input Format: (1500, 3, 10, 10)
Output Format: (1500,)
Grid search code:
def Grid_Search_Training(model):
# parameters grid
epochs = [300]
activations = ['relu', 'tanh']
L2_lambda = [0.01, 0.001, 0.0001]
batches = [16, 32, 64, 128]
param_grid = dict(activation=activations, epochs=epochs, batch_size=batches, L2_lambda=L2_lambda)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5)
return grid
def run(grid_search = True):
model = Model()
plot_model(model, to_file='Model_plot.png', show_shapes=True, show_layer_names=True)
# save layer names into a set, to visualize all layers' output in tensorboard
embeddings_all_layer_names = set(layer.name for layer in model.layers if layer.name.startswith('tower_'))
# train and save the model weights
Model_weights_path = 'Model_weights.h5'
checkpointer = ModelCheckpoint(Model_weights_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0000001)
tensorboard_log_dir = 'ModelLogs/{}'.format(time.time())
tensorboard = TensorBoard(log_dir = tensorboard_log_dir, histogram_freq = 1,
write_graph=True, write_images=True, embeddings_freq=1,
embeddings_layer_names=embeddings_all_layer_names, embeddings_metadata=None)
callbacks_list = [checkpointer, reduce_lr, tensorboard]
fit_params = dict(callbacks=callbacks_list)
if grid_search:
t0 = time.time()
print incepModel().summary()
model = KerasClassifier(build_fn = model, verbose=1)
grid = Grid_Search_Training(model)
print 'Start Training the model......'
grid_result = grid.fit(X_train, y_train, **fit_params)
print("Best acc Score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
t1 = time.time()
t = t1-t0
print 'The GirdSearch on CNN took %.2f mins.' %(round(t/60., 2))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
else:
history = model.fit(X_train, to_categorical(y_train), epochs=100, batch_size=64, validation_split=0.2, callbacks=callbacks_list)
X_train, X_test, y_train, y_test = read_split(data)
run(grid_search=True)
The error is :
grid_result = grid.fit(X_train, y_train, fit_params)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 615, in fit
X, y, groups = indexable(X, y, groups)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 229, in indexable
check_consistent_length(*result)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 204, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [1500, 1500, 1]
The code works well without callbacks, i.e. No fit_params in grid_result = grid.fit(X_train, y_train, fit_params). There is no error.
What causes such kind of error?

Related

Error while executing CrossEntropyLoss() in PyTorch

My dataset contains images of shape [3,28,28]. I have written the following code:
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(nn.Conv2d(3, 28, kernel_size=5, stride=1, padding=2),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(nn.Conv2d(28, 56, kernel_size=5, stride=1, padding=2),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 56, 1000)
self.fc2 = nn.Linear(1000, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(loader_train)
for e in range(num_epochs):
print("Epoch ", e+1,": ")
for i, (images, labels) in enumerate(loader_train):
optimizer.zero_grad()
actual_out = model(images)
loss = criterion(actual_out, labels)
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.3f}' .format(e+1, num_epochs, i+1, total_step, loss.item()))
However, I'm getting the following error:
AttributeError Traceback (most recent call last)
in
8 actual_out = model(images)
9
---> 10 loss = criterion(actual_out, labels)
11 loss.backward()
AttributeError: 'tuple' object has no attribute 'size'
I converted labels into a tensor by the following method:
target_out = torch.empty(batch_size,dtype=torch.long).random_(labels)
loss = criterion(actual_out, target_out)
But that generates:
TypeError Traceback (most recent call last)
in
---> 11 target_out = torch.empty(batch_size,dtype=torch.long).random_(labels)
12 loss = criterion(actual_out, target_out)
TypeError: random_() received an invalid combination of arguments - got (tuple), but expected one of:
(*, torch.Generator generator)
(int from, int to, *, torch.Generator generator)
(int to, *, torch.Generator generator)
Your labels object is a tuple and you want to convert it to a tensor of dtype long.
You can do this via:
torch.tensor(labels, dtype=torch.long)
Assuming this is how your train_loader is structured, you can reshape in the training loop rather than in the forward() function. I've given an example change it as your need.
Also during the backward, clear the grad, the .to(device) is optional, if you have a GPU
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [100, 1, 28, 28]
# resized: [100, 784]
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()

Training neural networks to learn moving averages

I am training a neural network to learn to calculate moving averages. The input is 5 day values and the output is moving average of the 5 days. But the NN is unable to learn it. It is giving a constant value for all predictions. The code is given below.
X_train = []
y_train = []
n = 2000
for i in range(5, n):
X_train.append(df.iloc[i-5:i, 0])
y_train.append(df.iloc[i-1, 2])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs = 250, batch_size = 32)
What am I missing?

L1 regulariser Pytorch acting opposite to what I expect

I'm trying to add an L1 penalty to a specific layer of a neural network, and I have the code below (in which I attempt to add l1 penalty to the first layer). If I run it for lambda = 0 (i.e. no penalty), the output gets very close to the expected weights those being [10, 12, 2, 11, -0.25]) and if I run for enough epochs or reduce batch size it will get it exactly, as in the output below:
mlp.0.weight
Parameter containing:
tensor([[ 9.8657, -11.8305, 2.0242, 10.8913, -0.1978]],
requires_grad=True)
Then, when I run it for a large lambda, say 1000, I would expect these weights to shrink towards zero as there is a large penalty being added to the loss that we are trying to minimise. However, the opposite happens and the weights explode, as in the output below (for lam = 1000)
mlp.0.weight
Parameter containing:
tensor([[-13.9368, 9.9072, 2.2447, -11.6870, 26.7293]],
requires_grad=True)
If anyone could help me, that'd be great. I'm new to pytorch (but not the idea of regularisation), so I'm guessing it's something in my code that is the problem.
Thanks
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.linear_model import LinearRegression
class TrainDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return self.data.shape[0]
def __getitem__(self, ind):
x = self.data[ind][1:]
y = self.data[ind][0]
return x, y
class TestDataset(TrainDataset):
def __getitem__(self, ind):
x = self.data[ind]
return x
torch.manual_seed(94)
x_train = np.random.rand(1000, 5)
y_train = x_train[:, 0] * 10 - x_train[:, 1] * 12 + x_train[:, 2] * 2 + x_train[:, 3] * 11 - x_train[:, 4] * 0.25
y_train = y_train.reshape(1000, 1)
x_train.shape
y_train.shape
train_data = np.concatenate((y_train, x_train), axis=1)
train_set = TrainDataset(train_data)
batch_size = 100
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(nn.Linear(5, 1, bias=False))
def forward(self, x_mlp):
out = self.mlp(x_mlp)
return out
device = 'cpu'
model = MLP()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.82)
criterion = nn.MSELoss()
epochs = 5
lam = 0
model.train()
for epoch in range(epochs):
losses = []
for batch_num, input_data in enumerate(train_loader):
optimizer.zero_grad()
x, y = input_data
x = x.to(device).float()
y = y.reshape(batch_size, 1)
y = y.to(device)
output = model(x)
for name, param in model.named_parameters():
if name == 'mlp.0.weight':
l1_norm = torch.norm(param, 1)
loss = criterion(output, y) + lam * l1_norm
loss.backward()
optimizer.step()
print('\tEpoch %d | Batch %d | Loss %6.2f' % (epoch, batch_num, loss.item()))
for name, param in model.named_parameters():
if param.requires_grad:
print(name)
print(param)
I found that if I use Adagrad as the optimiser instead of SGD, it acts as expected. Will need to look into the difference of those now, but this can be considered answered.

BNN with regression using Pymc3

I'm trying to build BNN in a regression task, and I get a result that seems not true.
My code
First, build toy data
#Toy model
def build_toy_dataset(N=50, noise_std=0.2):
x = np.linspace(-3, 3, num=N)
y = np.cos(x) + np.random.normal(0, noise_std, size=N)
x = x.reshape((N, 1))
x = scale(x)
x = x.astype(floatX)
y = y.astype(floatX)
return x, y
N = 50 # number of data points
D = 1 # number of features
X_train, Y_train = build_toy_dataset(N)
X_test, Y_test = build_toy_dataset(N)
fig, ax = plt.subplots()
ax.plot(X_test,Y_test,'ro',X_train,Y_train,'bx',alpha=0.2)
ax.legend(['Y_test','Y_train'])
ax.set(xlabel='X', ylabel='Y', title='Toy Regression data set');
X = scale(X)
X = X.astype(floatX)
Y = Y.astype(floatX)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
Then, define BNN with output
#2 layers with 5 nodes each
def construct_nn_2Layers(ann_input, ann_output):
n_hidden = 5
n_features = ann_input.get_value().shape[1]
# Initialize random weights between each layer
init_1 = np.random.randn(n_features, n_hidden).astype(floatX)
init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
init_out = np.random.randn(n_hidden).astype(floatX)
# Initialize random biases in each layer
init_b_1 = np.random.randn(n_hidden).astype(floatX)
init_b_2 = np.random.randn(n_hidden).astype(floatX)
init_b_out = np.random.randn(1).astype(floatX)
with pm.Model() as neural_network:
# Weights from input to hidden layer
weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
shape=(n_features, n_hidden),
testval=init_1)
bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1)
# Weights from 1st to 2nd layer
weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
shape=(n_hidden, n_hidden),
testval=init_2)
bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2)
# Weights from hidden layer to output
weights_2_out = pm.Normal('w_2_out', 0, sd=1,
shape=(n_hidden,),
testval=init_out)
bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out)
# Build neural-network using tanh activation function
act_1 = pm.math.tanh(pm.math.dot(ann_input,
weights_in_1)+bias_1)
act_2 = pm.math.tanh(pm.math.dot(act_1,
weights_1_2)+bias_2)
act_out = pm.math.dot(act_2, weights_2_out)+bias_out
sd = pm.HalfNormal('sd', sd=1)
out = pm.Normal('out', mu=act_out, sd=sd, observed=ann_output)
return neural_network
Then construct:
ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn_2Layers(ann_input, ann_output)
run ADVI:
with neural_network:
inference_no_s = pm.ADVI()
# Checking convergence - Tracking parameters
tracker = pm.callbacks.Tracker(
mean=inference_no_s.approx.mean.eval, # callable that returns mean
std=inference_no_s.approx.std.eval # callable that returns std
)
approx_no_s = pm.fit(n=30000, method=inference_no_s, callbacks=[tracker])
Predict in test:
ann_input.set_value(X_test)
ann_output.set_value(Y_test)
with neural_network:
ppc = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
and this is what I get which seems not relevant. What am I doing wrong?

TensorFlow restoring from NN does not work

I am struggling with restoring values from NN in tensorflow. I tried to follow the examples on net, and here is my code:
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
np.random.seed(1000) # for repro
function_to_learn = lambda x: np.sin(x) + 0.1*np.random.randn(*x.shape)
NUM_HIDDEN_NODES = 2
NUM_EXAMPLES = 1000
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 100
NUM_EPOCHS = 500
all_x = np.float32(np.random.uniform(-2*math.pi, 2*math.pi, (1, NUM_EXAMPLES))).T
np.random.shuffle(all_x)
train_size = int(NUM_EXAMPLES*TRAIN_SPLIT)
trainx = all_x[:train_size]
validx = all_x[train_size:]
trainy = function_to_learn(trainx)
validy = function_to_learn(validx)
plt.figure()
plt.scatter(trainx, trainy, c='green', label='train')
plt.scatter(validx, validy, c='red', label='validation')
plt.legend()
X = tf.placeholder(tf.float32, [None, 1], name="X")
Y = tf.placeholder(tf.float32, [None, 1], name="Y")
w_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="w_h"))
b_h = tf.Variable(tf.zeros([1, NUM_HIDDEN_NODES],name="b_h"))
w_o = tf.Variable(tf.zeros([NUM_HIDDEN_NODES,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
def init_weights(shape, init_method='xavier', xavier_params = (None, None)):
if init_method == 'zeros':
return tf.Variable(tf.zeros(shape, dtype=tf.float32))
elif init_method == 'uniform':
return tf.Variable(tf.random_normal(shape, stddev=0.01, dtype=tf.float32))
def model(X, num_hidden = NUM_HIDDEN_NODES):
w_h = init_weights([1, num_hidden], 'uniform' )
b_h = init_weights([1, num_hidden], 'zeros')
h = tf.nn.sigmoid(tf.matmul(X, w_h) + b_h)
w_o = init_weights([num_hidden, 1], 'xavier', xavier_params=(num_hidden, 1))
b_o = init_weights([1, 1], 'zeros')
return tf.matmul(h, w_o) + b_o
yhat = model(X, NUM_HIDDEN_NODES)
train_op = tf.train.AdamOptimizer().minimize(tf.nn.l2_loss(yhat - Y))
plt.figure()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for v in tf.all_variables():
print v.name
saver = tf.train.Saver()
errors = []
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for i in range(NUM_EPOCHS):
for start, end in zip(range(0, len(trainx), MINI_BATCH_SIZE), range(MINI_BATCH_SIZE, len(trainx), MINI_BATCH_SIZE)):
sess.run(train_op, feed_dict={X: trainx[start:end], Y: trainy[start:end]})
mse = sess.run(tf.nn.l2_loss(yhat - validy), feed_dict={X:validx})
errors.append(mse)
if i%100 == 0:
print "epoch %d, validation MSE %g" % (i, mse)
print sess.run(w_h)
saver.save(sess,"/Python/tensorflow/res/save_net.ckpt", global_step = i)
print " ******* AFTR *******"
for v in tf.all_variables():
print v.name
plt.plot(errors)
plt.xlabel('#epochs')
plt.ylabel('MSE')
******* to get the restore values, I tried:**
import tensorflow as tf
import numpy as np
import math, random
import matplotlib.pyplot as plt
NUM_HIDDEN_NODES = 2
#SECOND PART TO GET THE STORED VALUES
w_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(1, NUM_HIDDEN_NODES), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(NUM_HIDDEN_NODES).reshape(NUM_HIDDEN_NODES, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
saver = tf.train.Saver()
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state("/Python/tensorflow/res/")
if ckpt and ckpt.model_checkpoint_path:
# Restores from checkpoint
saver.restore(sess, "/Python/tensorflow/res/save_net.ckpt-400")
print "Model loaded"
else:
print "No checkpoint file found"
print("weights:", sess.run(w_h))
print("biases:", sess.run(b_h))
Your help is greatly appreciated and I am almost giving up on this.
Thanks a lot again
It seems the checkpoint file you want to restore your variables from is different from the current variable/shape of existing code.
Save: (if substitute it with constants from definitions above)
w_h = tf.Variable(tf.zeros([1, 5],name="w_h"))
b_h = tf.Variable(tf.zeros([1, 5],name="b_h"))
w_o = tf.Variable(tf.zeros([5,1],name="w_o"))
b_o = tf.Variable(tf.zeros([1, 1],name="b_o"))
Restore:
w_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='w_h')
b_h = tf.Variable(np.arange(10).reshape(1, 10), dtype=tf.float32, name='b_h')
w_o = tf.Variable(np.arange(10).reshape(10, 1), dtype=tf.float32, name='w_o')
b_o = tf.Variable(np.arange(1).reshape(1, 1), dtype=tf.float32, name='b_o')
To prevent these types of problems, try to use functions for training and inference so all your code will same variables and constants.
You are creating two sets of weights, once globally and second time when you call init_weights. The second set of variables is the one that's getting optimized, but both sets are saved.
In your eval code, you are creating this set of variables once, so your restore only restores the first set, which has not been modified after initialization.
The solution is to either factor out model creation code so that exactly same graph is created during training and during eval, or to use meta_graph which will recreate graph structure during restore.