Training neural networks to learn moving averages - neural-network

I am training a neural network to learn to calculate moving averages. The input is 5 day values and the output is moving average of the 5 days. But the NN is unable to learn it. It is giving a constant value for all predictions. The code is given below.
X_train = []
y_train = []
n = 2000
for i in range(5, n):
X_train.append(df.iloc[i-5:i, 0])
y_train.append(df.iloc[i-1, 2])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs = 250, batch_size = 32)
What am I missing?

Related

pyTorch mat1 and mat2 cannot be multiplied

I am getting the following error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x33856 and 640000x256)
I don't understand how do I need to change the parameters of my net. I took the net created in this paper and tried to modify the parameters to meet my needs.This is the code, I changed the parameters of the first convolution but still get the error:
class ChordClassificationNetwork(nn.Module):
def __init__(self, train_model=False):
super(ChordClassificationNetwork, self).__init__()
self.train_model = train_model
self.flatten = nn.Flatten()
self.firstConv = nn.Conv2d(3, 64, (3, 3))
self.secondConv = nn.Conv2d(64, 64, (3, 3))
self.pool = nn.MaxPool2d(2)
self.drop = nn.Dropout(0.25)
self.fc1 = nn.Linear(100*100*64, 256)
self.fc2 = nn.Linear(256, 256)
self.outLayer = nn.Linear(256, 7)
def forward(self, x):
x = self.firstConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.secondConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.drop(x)
x = self.flatten(x)
x = self.fc1(x)
x = F.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop(x)
x = self.outLayer(x)
output = F.softmax(x, dim=1)
return output
and this is the training file:
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((100, 100))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset("../chords_data/cropped_images/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = ChordClassificationNetwork().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
if __name__ == "__main__":
train()
What am I doing wrong?
Look at the error message, the issue comes from the fc1 layer which doesn't have the required number of neurons. It is receiving a tensor of shape (batch_size, 33856) but expects (batch_size, 640000). The reduction in dimensionality is caused by the different layers you have applied to your input tensor before fc1.
You can fix this by defining fc1 with:
self.fc1 = nn.Linear(33856, 256)
Alternatively, you can use nn.LazyLinear which will initialize its weights with the appropriate number of neurons at runtime depending on the input it receives. But that's lazy:
self.fc1 = nn.LazyLinear(256)

Constant loss through epochs

I code this neural network to make a gaussian regression but I don't understand why my loss doesn't change through epochs. I set the learning rate to 1 to see the loss decreases but it does not. I chose to take 2000 poitns to train my Neural network. I watched several algorithms on this website and I don't really understand why my algorithm do not achieve what I expect.
I have already imported all libraries needed.
Thank you for your help
def f(x):
return x * np.sin(x) # function to predict
m =2000
X_bis = np.zeros((1,m),dtype = float)
X_bis=np.random.random(m)*10
## Create my training,validation and test set
X_train = X_bis[0:600]
X_val = X_bis[600:800]
X_test = X_bis[800:]
y_train = f(X_train)
y_val = f(X_val)
y_test = f(X_test)
mean_X_train = np.mean(X_train)
std_X_train = np.std(X_train)
mean_y_train = np.mean(y_train)
std_y_train =np.std(y_train)
class MyDataset(data.Dataset):
def __init__(self, data_feature, data_target):
self.data_feature = data_feature
self.data_target = data_target
def __len__(self):
return len(self.data_feature)
def __getitem__(self, index):
X_train_normalized = (self.data_feature[index] - mean_X_train) / std_X_train
y_train_normalized = (self.data_target[index] - mean_y_train) / std_y_train
return torch.from_numpy(np.array(X_train_normalized,ndmin=1)).float(), torch.from_numpy(np.array(y_train_normalized, ndmin = 1)).float()
training_set = MyDataset(X_train,y_train)
train_loading = torch.utils.data.DataLoader(training_set, batch_size= 100)
val_set = MyDataset(X_val, y_val)
val_loading = torch.utils.data.DataLoader(val_set, batch_size= 10)
test_set = MyDataset(X_test,y_test)
test_loading = torch.utils.data.DataLoader(test_set, batch_size= 100)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.FC1 = nn.Linear(1,10)
self.FC2 = nn.Linear(10, 1)
def forward(self, x):
x = F.relu(self.FC1(x))
x = self.FC2(x)
return x
model = Net()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=1, weight_decay= 0.01, momentum = 0.9)
def train(net, train_loader, optimizer, epoch):
net.train()
total_loss=0
for idx,(data, target) in enumerate(train_loader, 0):
outputs = net(data)
loss = criterion(outputs,target)
total_loss +=loss.cpu().item()
optimizer.step()
print('Epoch:', epoch , 'average training loss ', total_loss/ len(train_loader))
def test(net,test_loader):
net.eval()
total_loss = 0
for idx,(data, target) in enumerate(test_loader,0):
outputs = net(data)
outputs = outputs * std_X_train + mean_X_train
target = target * std_y_train + mean_y_train
loss = criterion(outputs,target)
total_loss += sqrt(loss.cpu().item())
print('average testing loss', total_loss/len(test_loader))
for epoch in range(50):
train(model,train_loading,optimizer,epoch)
test(model,val_loading)
'''
I'm wondering why you don't have loss.backward() after the line that you compute the loss (i.e., loss = criterion(outputs,target)) in your training snippet. This will help backpropagating and ultimately updating the parameters of your network upon optimizer.step(). Also, try using lower learning rates as lr=1 normally is too much in training such networks. Try using learning rates in between 0.001-0.01 to see if your network is learning the mapping between input X and target Y.

L1 regulariser Pytorch acting opposite to what I expect

I'm trying to add an L1 penalty to a specific layer of a neural network, and I have the code below (in which I attempt to add l1 penalty to the first layer). If I run it for lambda = 0 (i.e. no penalty), the output gets very close to the expected weights those being [10, 12, 2, 11, -0.25]) and if I run for enough epochs or reduce batch size it will get it exactly, as in the output below:
mlp.0.weight
Parameter containing:
tensor([[ 9.8657, -11.8305, 2.0242, 10.8913, -0.1978]],
requires_grad=True)
Then, when I run it for a large lambda, say 1000, I would expect these weights to shrink towards zero as there is a large penalty being added to the loss that we are trying to minimise. However, the opposite happens and the weights explode, as in the output below (for lam = 1000)
mlp.0.weight
Parameter containing:
tensor([[-13.9368, 9.9072, 2.2447, -11.6870, 26.7293]],
requires_grad=True)
If anyone could help me, that'd be great. I'm new to pytorch (but not the idea of regularisation), so I'm guessing it's something in my code that is the problem.
Thanks
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.linear_model import LinearRegression
class TrainDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return self.data.shape[0]
def __getitem__(self, ind):
x = self.data[ind][1:]
y = self.data[ind][0]
return x, y
class TestDataset(TrainDataset):
def __getitem__(self, ind):
x = self.data[ind]
return x
torch.manual_seed(94)
x_train = np.random.rand(1000, 5)
y_train = x_train[:, 0] * 10 - x_train[:, 1] * 12 + x_train[:, 2] * 2 + x_train[:, 3] * 11 - x_train[:, 4] * 0.25
y_train = y_train.reshape(1000, 1)
x_train.shape
y_train.shape
train_data = np.concatenate((y_train, x_train), axis=1)
train_set = TrainDataset(train_data)
batch_size = 100
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(nn.Linear(5, 1, bias=False))
def forward(self, x_mlp):
out = self.mlp(x_mlp)
return out
device = 'cpu'
model = MLP()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.82)
criterion = nn.MSELoss()
epochs = 5
lam = 0
model.train()
for epoch in range(epochs):
losses = []
for batch_num, input_data in enumerate(train_loader):
optimizer.zero_grad()
x, y = input_data
x = x.to(device).float()
y = y.reshape(batch_size, 1)
y = y.to(device)
output = model(x)
for name, param in model.named_parameters():
if name == 'mlp.0.weight':
l1_norm = torch.norm(param, 1)
loss = criterion(output, y) + lam * l1_norm
loss.backward()
optimizer.step()
print('\tEpoch %d | Batch %d | Loss %6.2f' % (epoch, batch_num, loss.item()))
for name, param in model.named_parameters():
if param.requires_grad:
print(name)
print(param)
I found that if I use Adagrad as the optimiser instead of SGD, it acts as expected. Will need to look into the difference of those now, but this can be considered answered.

BNN with regression using Pymc3

I'm trying to build BNN in a regression task, and I get a result that seems not true.
My code
First, build toy data
#Toy model
def build_toy_dataset(N=50, noise_std=0.2):
x = np.linspace(-3, 3, num=N)
y = np.cos(x) + np.random.normal(0, noise_std, size=N)
x = x.reshape((N, 1))
x = scale(x)
x = x.astype(floatX)
y = y.astype(floatX)
return x, y
N = 50 # number of data points
D = 1 # number of features
X_train, Y_train = build_toy_dataset(N)
X_test, Y_test = build_toy_dataset(N)
fig, ax = plt.subplots()
ax.plot(X_test,Y_test,'ro',X_train,Y_train,'bx',alpha=0.2)
ax.legend(['Y_test','Y_train'])
ax.set(xlabel='X', ylabel='Y', title='Toy Regression data set');
X = scale(X)
X = X.astype(floatX)
Y = Y.astype(floatX)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
Then, define BNN with output
#2 layers with 5 nodes each
def construct_nn_2Layers(ann_input, ann_output):
n_hidden = 5
n_features = ann_input.get_value().shape[1]
# Initialize random weights between each layer
init_1 = np.random.randn(n_features, n_hidden).astype(floatX)
init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
init_out = np.random.randn(n_hidden).astype(floatX)
# Initialize random biases in each layer
init_b_1 = np.random.randn(n_hidden).astype(floatX)
init_b_2 = np.random.randn(n_hidden).astype(floatX)
init_b_out = np.random.randn(1).astype(floatX)
with pm.Model() as neural_network:
# Weights from input to hidden layer
weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
shape=(n_features, n_hidden),
testval=init_1)
bias_1 = pm.Normal('b_1', mu=0, sd=1, shape=(n_hidden), testval=init_b_1)
# Weights from 1st to 2nd layer
weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
shape=(n_hidden, n_hidden),
testval=init_2)
bias_2 = pm.Normal('b_2', mu=0, sd=1, shape=(n_hidden), testval=init_b_2)
# Weights from hidden layer to output
weights_2_out = pm.Normal('w_2_out', 0, sd=1,
shape=(n_hidden,),
testval=init_out)
bias_out = pm.Normal('b_out', mu=0, sd=1, shape=(1), testval=init_b_out)
# Build neural-network using tanh activation function
act_1 = pm.math.tanh(pm.math.dot(ann_input,
weights_in_1)+bias_1)
act_2 = pm.math.tanh(pm.math.dot(act_1,
weights_1_2)+bias_2)
act_out = pm.math.dot(act_2, weights_2_out)+bias_out
sd = pm.HalfNormal('sd', sd=1)
out = pm.Normal('out', mu=act_out, sd=sd, observed=ann_output)
return neural_network
Then construct:
ann_input = theano.shared(X_train)
ann_output = theano.shared(Y_train)
neural_network = construct_nn_2Layers(ann_input, ann_output)
run ADVI:
with neural_network:
inference_no_s = pm.ADVI()
# Checking convergence - Tracking parameters
tracker = pm.callbacks.Tracker(
mean=inference_no_s.approx.mean.eval, # callable that returns mean
std=inference_no_s.approx.std.eval # callable that returns std
)
approx_no_s = pm.fit(n=30000, method=inference_no_s, callbacks=[tracker])
Predict in test:
ann_input.set_value(X_test)
ann_output.set_value(Y_test)
with neural_network:
ppc = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
and this is what I get which seems not relevant. What am I doing wrong?

GridsearchCV tuning KerasClassifier with callbacks error: ValueError: Found input variables with inconsistent numbers of samples

Using sklearn.GridSearchCV to fine tune the hyperparameters of model in Keras. Also, I add callbacks into it.
Input Format: (1500, 3, 10, 10)
Output Format: (1500,)
Grid search code:
def Grid_Search_Training(model):
# parameters grid
epochs = [300]
activations = ['relu', 'tanh']
L2_lambda = [0.01, 0.001, 0.0001]
batches = [16, 32, 64, 128]
param_grid = dict(activation=activations, epochs=epochs, batch_size=batches, L2_lambda=L2_lambda)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5)
return grid
def run(grid_search = True):
model = Model()
plot_model(model, to_file='Model_plot.png', show_shapes=True, show_layer_names=True)
# save layer names into a set, to visualize all layers' output in tensorboard
embeddings_all_layer_names = set(layer.name for layer in model.layers if layer.name.startswith('tower_'))
# train and save the model weights
Model_weights_path = 'Model_weights.h5'
checkpointer = ModelCheckpoint(Model_weights_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0000001)
tensorboard_log_dir = 'ModelLogs/{}'.format(time.time())
tensorboard = TensorBoard(log_dir = tensorboard_log_dir, histogram_freq = 1,
write_graph=True, write_images=True, embeddings_freq=1,
embeddings_layer_names=embeddings_all_layer_names, embeddings_metadata=None)
callbacks_list = [checkpointer, reduce_lr, tensorboard]
fit_params = dict(callbacks=callbacks_list)
if grid_search:
t0 = time.time()
print incepModel().summary()
model = KerasClassifier(build_fn = model, verbose=1)
grid = Grid_Search_Training(model)
print 'Start Training the model......'
grid_result = grid.fit(X_train, y_train, **fit_params)
print("Best acc Score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
t1 = time.time()
t = t1-t0
print 'The GirdSearch on CNN took %.2f mins.' %(round(t/60., 2))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
else:
history = model.fit(X_train, to_categorical(y_train), epochs=100, batch_size=64, validation_split=0.2, callbacks=callbacks_list)
X_train, X_test, y_train, y_test = read_split(data)
run(grid_search=True)
The error is :
grid_result = grid.fit(X_train, y_train, fit_params)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_search.py", line 615, in fit
X, y, groups = indexable(X, y, groups)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 229, in indexable
check_consistent_length(*result)
File "/Users/jd/anaconda2/lib/python2.7/site-packages/sklearn/utils/validation.py", line 204, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError: Found input variables with inconsistent numbers of samples: [1500, 1500, 1]
The code works well without callbacks, i.e. No fit_params in grid_result = grid.fit(X_train, y_train, fit_params). There is no error.
What causes such kind of error?