InvalidType: Invalid operation is performed - neural-network

I am trying to write a stacked autoencoder. Since this a stacked autoencoder we need to train the first autoencoder and pass the weights to the second autoencoder. So during training we need to define train_data_for_next_layer. Here I am getting error:
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 784 != 250
I am having issue with the last line. Is this problem due to incorrect model layer, I want to know what is the issue here. I have faced this problem several times before and any detailed explanation is welcome. The code is as follows:
class AutoEncoder(chainer.Chain):
def __init__(self, n_in, n_out, activation='relu', tied=True):
if tied:
super(AutoEncoder, self).__init__(
l1 = L.Linear(n_in, n_out)
)
self.add_param('decoder_bias', n_in)
self.decoder_bias.data[...] = 0
else:
super(AutoEncoder, self).__init__(
l1 = L.Linear(n_in, n_out),
l2 = L.Linear(n_out, n_in)
)
self.tied = tied
self.n_in = n_in
self.n_out = n_out
self.activation = {'relu': F.relu, 'sigmoid': F.sigmoid,
'identity': F.identity}[activation]
def __call__(self, x, train=True):
h1 = F.dropout(self.activation(self.l1(x)), train=train)
if self.tied:
return self.activation(F.linear(h1, F.transpose(self.l1.W),
self.decoder_bias))
else:
return self.activation(self.l2(h1))
def encode(self, x, train=True):
return F.dropout(self.activation(self.l1(x)), train=train)
def decode(self, x, train=True):
if self.tied:
return self.activation(F.linear(x, F.transpose(self.l1.W),
self.decoder_bias))
else:
return self.activation(self.l2(x))
class StackedAutoEncoder(chainer.ChainList):
def __init__(self, autoencoders):
super(StackedAutoEncoder, self).__init__()
for ae in autoencoders:
self.add_link(ae)
def __call__(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
h = self[i].encode(h, train=train)
for i in range(depth):
if i == depth-1: # do not use dropout in the output layer
train = False
h = self[depth-1-i].decode(h, train=train)
return h
def encode(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
h = self[i].encode(h, train=train)
return h
def decode(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
if i == depth-1: # do not use dropout in the output layer
train = False
h = self[depth-1-i].decode(h, train=train)
return h
class Regression(chainer.Chain):
def __init__(self, predictor):
super(Regression, self).__init__(predictor=predictor)
def __call__(self, x, t):
y = self.predictor(x, True)
self.loss = F.mean_squared_error(y, t)
return self.loss
def dump(self, x):
return self.predictor(x, False)
initmodel = ''resume = ''
gpu = -1
epoch_pre = 20
epoch_fine = 20
batchsize = 100
noise = 0
optimizer = 'adam'
learningrate = 0.01
alpha = 0.001
unit = '1000, 500, 250, 2'
activation = 'sigmoid'
untied = False
batchsize = batchsize
n_epoch = epoch_pre
n_epoch_fine = epoch_fine
n_units = list(map(int, unit.split(',')))
activation = activation
mnist = fetch_mldata('MNIST original', data_home='.')
perm = np.random.permutation(len(mnist.data))
mnist.data = mnist.data.astype(np.float32) / 255
train_data = mnist.data[perm][:60000]
test_data = mnist.data[perm][60000:]
# prepare layers
aes = []
for idx in range(len(n_units)):
n_in = n_units[idx-1] if idx > 0 else 28*28
n_out = n_units[idx]
ae = AutoEncoder(n_in, n_out, activation, tied = True)
aes.append(ae)
# prepare train data for next layer
x = chainer.Variable(np.array(train_data))
train_data_for_next_layer = cuda.to_cpu(ae.encode(x, train=False))

The InvalidType error indicates that the input shape of the array given to F.linear is wrong.
Expect: x.shape[1] == W.shape[1]
Actual: 784 != 250
In this case, for the given input x and W, F.linear expects that
x.shape[1] is the same as W.shape[1], but it does not.
For more detailed description of the error message, see https://docs.chainer.org/en/stable/tips.html#how-do-i-fix-invalidtype-error to understand how to interpret that error message.

Related

pyTorch mat1 and mat2 cannot be multiplied

I am getting the following error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x33856 and 640000x256)
I don't understand how do I need to change the parameters of my net. I took the net created in this paper and tried to modify the parameters to meet my needs.This is the code, I changed the parameters of the first convolution but still get the error:
class ChordClassificationNetwork(nn.Module):
def __init__(self, train_model=False):
super(ChordClassificationNetwork, self).__init__()
self.train_model = train_model
self.flatten = nn.Flatten()
self.firstConv = nn.Conv2d(3, 64, (3, 3))
self.secondConv = nn.Conv2d(64, 64, (3, 3))
self.pool = nn.MaxPool2d(2)
self.drop = nn.Dropout(0.25)
self.fc1 = nn.Linear(100*100*64, 256)
self.fc2 = nn.Linear(256, 256)
self.outLayer = nn.Linear(256, 7)
def forward(self, x):
x = self.firstConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.secondConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.drop(x)
x = self.flatten(x)
x = self.fc1(x)
x = F.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop(x)
x = self.outLayer(x)
output = F.softmax(x, dim=1)
return output
and this is the training file:
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((100, 100))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset("../chords_data/cropped_images/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = ChordClassificationNetwork().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
if __name__ == "__main__":
train()
What am I doing wrong?
Look at the error message, the issue comes from the fc1 layer which doesn't have the required number of neurons. It is receiving a tensor of shape (batch_size, 33856) but expects (batch_size, 640000). The reduction in dimensionality is caused by the different layers you have applied to your input tensor before fc1.
You can fix this by defining fc1 with:
self.fc1 = nn.Linear(33856, 256)
Alternatively, you can use nn.LazyLinear which will initialize its weights with the appropriate number of neurons at runtime depending on the input it receives. But that's lazy:
self.fc1 = nn.LazyLinear(256)

I can't figure out why the size of the tensors doesn't match in Pytorch

Some context:
I have been studying AI and ML for the last couple of month now and finally I am studying neural nets. Great! The problem is that when I follow a tutorial everything seems to be OK, but when I try to implement a NN by my self I always face issues related to the size of the tensors.
I have seem the answer to other questions (like this one) but they face the exact problem of the post. I am not looking for a code to just copy and paste. I want to understand why I am facing this problem, how to handle it and avoid it.
The error message:
/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([16, 2])) that is different to the input size (torch.Size([9, 2])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Traceback (most recent call last):
File "nn_conv.py", line 195, in
loss = loss_function(outputs, targets)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 528, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/functional.py", line 2928, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/functional.py", line 74, in broadcast_tensors
return _VF.broadcast_tensors(tensors) # type: ignore
RuntimeError: The size of tensor a (9) must match the size of tensor b (16) at non-singleton dimension 0
This is my code:
import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class DogsVSCats():
IMG_SIZE = 50
CATS = 'PetImages/Cat'
DOGS = 'PetImages/Dog'
LABELS = {CATS: 0, DOGS: 1}
training_data = []
cats_count = 0
dogs_count = 0
def make_training_data(self):
for label in self.LABELS.keys():
for f in tqdm(os.listdir(label)):
try:
path = os.path.join(label, f)
# convert image to grayscale
img = cv2.imread(path)
if img is not None:
height, width = img.shape[:2]
if width > height:
height = round((height * self.IMG_SIZE) / width)
width = self.IMG_SIZE
right = 0
bottom = self.IMG_SIZE - height
else:
width = round((width * self.IMG_SIZE) / height)
height = self.IMG_SIZE
right = self.IMG_SIZE - width
bottom = 0
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.resize(img, (width, height))
img = cv2.copyMakeBorder(img,
top=0,
bottom=bottom,
left=0,
right=right,
borderType=cv2.BORDER_CONSTANT)
# Add a One-hot-vector of label of the image to self.training_data
self.training_data.append([np.array(img), np.eye(len(self.LABELS))[self.LABELS[label]]])
if label == self.CATS:
self.cats_count += 1
elif label == self.DOGS:
self.dogs_count += 1
except cv2.error as e:
pass
np.random.shuffle(self.training_data)
np.save("PetImages/training_data.npy", self.training_data)
print("Cats:", self.cats_count)
print("Dogs:", self.dogs_count)
training_data = np.load('PetImages/training_data.npy', allow_pickle=True)
plt.imsave('PetImages/trained_example.png', training_data[1][0])
class RunningMetrics():
def __init__(self):
self._sum = 0
self._count = 0
def __call__(self):
return self._sum/float(self._count)
def update(self, val, size):
self._sum += val
self._count += size
class Net(nn.Module):
def __init__(self, num_channels, conv_kernel_size=3, stride=1, padding=1, max_pool_kernel_size=2):
super(Net, self).__init__()
self._num_channels = num_channels
self._max_pool_kernel_size = max_pool_kernel_size
self.conv1 = nn.Conv2d(1, self._num_channels, conv_kernel_size, stride, padding)
self.conv2 = nn.Conv2d(self._num_channels, self._num_channels*2, conv_kernel_size, stride, padding)
self.conv3 = nn.Conv2d(self._num_channels*2, self._num_channels*4, conv_kernel_size, stride, padding)
# Calc input of first
self.fc1 = nn.Linear(self._num_channels*4*8*8, self._num_channels*8)
self.fc2 = nn.Linear(self._num_channels*8, 2)
def forward(self, x):
# Conv
x = self.conv1(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
x = self.conv2(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
x = self.conv3(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
# Flatten
x = x.view(-1, self._num_channels*4*8*8)
# Fully Connected
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
# return F.log_softmax(x, dim=1)
return F.softmax(x, dim=1)
def save_model(path):
torch.save(save, path)
def load_model(path):
self = torch.load(PATH)
self.eval()
if __name__ == '__main__':
print('Loading dataset')
if not os.path.exists("PetImages/training_data.npy"):
dogsvcats = DogsVSCats()
dogsvcats.make_training_data()
training_data = np.load('PetImages/training_data.npy', allow_pickle=True)
print('Loading Net')
net = Net(num_channels=32)
# net = net.to(device)
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9 )
optimizer = optim.Adam(net.parameters(), lr=0.001)
# loss_function = nn.NLLLoss()
loss_function = nn.MSELoss()
print('Converting X tensor')
X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
X = X/255.0
print('Converting Y tensor')
y = torch.Tensor([i[1] for i in training_data])
# Validation data
VAL_PERCENT = 0.1
val_size = int(len(X)*VAL_PERCENT)
X_train = X[:-val_size]
y_train = y[:-val_size]
X_test = X[-val_size:]
y_test = y[-val_size:]
print('Training Set:', len(X_train))
print('Testing Set:', len(X_test))
BATCH_SIZE = 16
EPOCHS = 2
IMG_SIZE=50
for epoch in range(EPOCHS):
print(f'Epoch {epoch+1}/{EPOCHS}')
running_loss = RunningMetrics()
running_acc = RunningMetrics()
for i in tqdm(range(0, len(X_train), BATCH_SIZE)):
inputs = X_train[i:i+BATCH_SIZE].view(-1,1, IMG_SIZE, IMG_SIZE)
targets = y_train[i:i+BATCH_SIZE]
# inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
_, preds = torch.max(outputs, 1)
loss = loss_function(outputs, targets)
loss.backward()
optimizer.step()
running_loss.update(loss.item()*BATCH_SIZE,
BATCH_SIZE)
running_acc.update(toch.sum(preds == targets).float(),
BATCH_SIZE)
print(f'Loss: {running_loss:.4f}, Acc: {running_acc:.4f}')
print('-'*10)
Dataset:
I am using the Microsoft's dataset of cats and dogs images
EDIT:
The error previous message has been solved following Anonymous' advice but now I am getting another error:
Traceback (most recent call last):
File "nn_conv.py", line 203, in
running_acc.update(torch.sum(preds == targets).float(),
RuntimeError: The size of tensor a (16) must match the size of tensor b (2) at non-singleton dimension 1
Input : 16 x 1 x 50 x 50
After conv1/maxpool1 : 16 x 32 x 25 x 25
After conv2/maxpool2 : 16 x 64 x 12 x 12 (no padding so taking floor)
After conv3/maxpool3 : 16 x 128 x 6 x 6 (=73 728 neurons here is your error)
Flattening : you specified a view like -1 x 32 * 4 * 8 * 8 = 9 x 8192
The correct flattening is -1 x 32 * 4 * 6 * 6
Few tips :
as you begin pytorch, you should go see how to use a dataloader/dataset
the binary cross entropy is more commonly used for classification (though MSE is still possible)

Network Training is too slow for Custom Network even though network is not too large(Resnet + LSTM type network)

I have made a custom network for the image data. But the training on this network is very slow though the network is not too huge.
When I am training on resnet150 with lstm, the training is quite fast. Not sure which operation is lagging my training speed?
I have tried reducing Batch_size, seq_dim and network parameters. My guess is some operation is reducing the speed
The dataset I am using are images and they are also very small size(96*96) gray scale images.
***** CODE *****
class Residual(nn.Module):
def __init__(self, input_channels, num_channels,
use_1x1conv=True, strides=1, dilation=2, padding=1, kernel_size=5):
super(Residual, self).__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=kernel_size, padding=padding, stride=strides, dilation=dilation)
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=kernel_size, padding=2 * padding, dilation=(2 * dilation))
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)
def weight_init(m):
'''
Usage:
model = Model()
model.apply(weight_init)
'''
if isinstance(m, nn.Conv1d):
init.normal_(m.weight.data)
if m.bias is not None:
init.normal_(m.bias.data)
elif isinstance(m, nn.Conv2d):
init.xavier_normal_(m.weight.data)
if m.bias is not None:
init.normal_(m.bias.data)
elif isinstance(m, nn.BatchNorm1d):
init.normal_(m.weight.data, mean=1, std=0.02)
init.constant_(m.bias.data, 0)
elif isinstance(m, nn.BatchNorm2d):
init.normal_(m.weight.data, mean=1, std=0.02)
init.constant_(m.bias.data, 0)
elif isinstance(m, nn.Linear):
init.xavier_normal_(m.weight.data)
init.normal_(m.bias.data)
elif isinstance(m, nn.LSTM):
for param in m.parameters():
if len(param.shape) >= 2:
init.orthogonal_(param.data)
else:
init.normal_(param.data)
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_channels = 64
strides = 1
dilation = 2
padding = 4
kernel_size = 5
input_channel = 3
self.fc = nn.Linear(config['hidden_dim'], config['output_dim'])
self.lstm = None
b1 = Residual(input_channels=input_channel, num_channels=num_channels,
use_1x1conv=True, strides=strides, dilation=dilation, padding=padding, kernel_size=kernel_size)
b2 = Residual(input_channels=num_channels, num_channels=2 * num_channels,
use_1x1conv=True, strides=strides, dilation=2 * dilation, padding=2 * padding,
kernel_size=kernel_size)
b3 = Residual(input_channels=2 * num_channels, num_channels=4 * num_channels,
use_1x1conv=True, strides=strides, dilation=4 * dilation, padding=4 * padding,
kernel_size=kernel_size)
self.net = nn.Sequential(b1, b2, b3, nn.AdaptiveMaxPool2d((2, 2)))
self.apply(weight_init)
def forward(self, x):
x = self.net(x)
x = x.view(config['batch_size'], config['seq_dim'], -1)
if self.lstm is None:
self.lstm = nn.LSTM(x.size(2), config['hidden_dim'], 1, batch_first=True).to(self.device)
for param in self.lstm.parameters():
if len(param.shape) >= 2:
init.orthogonal_(param.data)
else:
init.normal_(param.data)
h0 = torch.zeros(config['layer_dim'], x.size(0), config['hidden_dim']).to(self.device)
# Initialize cell state
c0 = torch.zeros(config['layer_dim'], x.size(0), config['hidden_dim']).to(self.device)
output, (hn, cn) = self.lstm(x, (h0,c0))
output = output[:, :, :]
output = self.fc(output)
return output, (hn, cn)
for epoch in range(config['num_epochs']):
print('epoch', epoch)
running_loss = 0
nb_classes = config['output_dim']
confusion_matrix = torch.zeros(nb_classes, nb_classes)
for i, (image, label) in enumerate(trainLoader):
print('batch: ',i)
image = image.float().to(device)
label = label.to(device)
optimizer.zero_grad()
batch_size, timesteps, H, W, C = image.size()
# Change Image shape
image = image.view(batch_size * timesteps, H, W, C)
image = image.permute(0, 3, 1, 2) # from NHWC to NCHW
output, (hn,cn) = model(image)
label = label.view(-1)
output = output.view(-1, output.size(2))
loss = criterion(output, label)
loss *= config['seq_dim']
loss.backward() # Backward pass
optimizer.step() # Now we can do an optimizer step
running_loss += loss.item()

animated boundary visualization of perceptron

i buit from scratch the perceptron class in python, and now i'm trying to make the animated visualization of the decision boundary in every iteration of the learning process.The problem is that my code doesn't works, cuz looks like the "animation_func" is not called, i don't know why. can you help me.
class Perceptron:
def __init__(self,X=None, y=None,lr=0.001, niter=1000):
self.lr = lr
self.niter = niter
self.w = None
self.b = None
def fit(self,X,y):
for indice,X_i in enumerate(X):
self.w += self.lr*(y_[indice]-self.predic(X_i))* X_i
def animar_perceptron(self,X,y,niter):
samples,features = X.shape
self.w = np.zeros(features)
self.b = 0
y_ = np.array([1 if i>0 else 0 for i in y])
x0_1 = np.amin(X[:,0])
x0_2 = np.amax(X[:,0])
ymin = np.amin(X[:,1])
ymax= np.amax(X[:1])
fig, ax = plt.subplots()
ax.scatter(X[:,0], X[:,1],marker='o', c=y )
boundary, =ax.plot([X0_1, X0_2],[0, 0],'k')
def animation_func(_):
self.fit(X,y)
x1_1 = (-self.w[0]*x0_1-self.b )/self.w[1]
x1_2 = (-self.w[0]*x0_2-self.b )/self.w[1]
boundary.set_ydata([x1_1,x1_2])
return boundary,
return FuncAnimation(fig, func=animation_func, frames=np.arange(1,niter), interval=50)
def predic(self, X):
y_hat = np.dot(X,self.w) + self.b
y_hat =self.activate_fun(y_hat)
return y_hat
def activate_fun(self,z):
return np.where(z>0, 1,0)
def score(self, y_true, y_pred):
accuracy = np.sum(y_true == y_pred)/len(y_true)
return accuracy
p = Perceptron()
animacion = animar_perceptron(X,y,1000)

Initialising weights and bias with PyTorch - how to correct dimensions?

Using this model I'm attempting to initialise my network with my predefined weights and bias :
dimensions_input = 10
hidden_layer_nodes = 5
output_dimension = 10
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear = torch.nn.Linear(dimensions_input,hidden_layer_nodes)
self.linear2 = torch.nn.Linear(hidden_layer_nodes,output_dimension)
self.linear.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
self.linear2.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear2.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
def forward(self, x):
l_out1 = self.linear(x)
y_pred = self.linear2(l_out1)
return y_pred
model = Model()
criterion = torch.nn.MSELoss(size_average = False)
optim = torch.optim.SGD(model.parameters(), lr = 0.00001)
def train_model():
y_data = x_data.clone()
for i in range(10000):
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
if i % 5000 == 0:
print(loss)
optim.zero_grad()
loss.backward()
optim.step()
RuntimeError:
The expanded size of the tensor (10) must match the existing size (5)
at non-singleton dimension 1
My dimensions appear correct as they match the corresponding linear layers ?
The code provided doesn't run due to the fact that x_data isn't defined, so I can't be sure that this is the issue, but one thing that strikes me is that you should replace
self.linear2.weight = torch.nn.Parameter(torch.zeros(dimensions_input,hidden_layer_nodes))
self.linear2.bias = torch.nn.Parameter(torch.ones(hidden_layer_nodes))
with
self.linear2.weight = torch.nn.Parameter(torch.zeros(hidden_layer_nodes, output_dimension))
self.linear2.bias = torch.nn.Parameter(torch.ones(output_dimension))