I can't figure out why the size of the tensors doesn't match in Pytorch - neural-network

Some context:
I have been studying AI and ML for the last couple of month now and finally I am studying neural nets. Great! The problem is that when I follow a tutorial everything seems to be OK, but when I try to implement a NN by my self I always face issues related to the size of the tensors.
I have seem the answer to other questions (like this one) but they face the exact problem of the post. I am not looking for a code to just copy and paste. I want to understand why I am facing this problem, how to handle it and avoid it.
The error message:
/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py:528: UserWarning: Using a target size (torch.Size([16, 2])) that is different to the input size (torch.Size([9, 2])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Traceback (most recent call last):
File "nn_conv.py", line 195, in
loss = loss_function(outputs, targets)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 528, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/nn/functional.py", line 2928, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File "/home/devops/aic/venv/lib/python3.8/site-packages/torch/functional.py", line 74, in broadcast_tensors
return _VF.broadcast_tensors(tensors) # type: ignore
RuntimeError: The size of tensor a (9) must match the size of tensor b (16) at non-singleton dimension 0
This is my code:
import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class DogsVSCats():
IMG_SIZE = 50
CATS = 'PetImages/Cat'
DOGS = 'PetImages/Dog'
LABELS = {CATS: 0, DOGS: 1}
training_data = []
cats_count = 0
dogs_count = 0
def make_training_data(self):
for label in self.LABELS.keys():
for f in tqdm(os.listdir(label)):
try:
path = os.path.join(label, f)
# convert image to grayscale
img = cv2.imread(path)
if img is not None:
height, width = img.shape[:2]
if width > height:
height = round((height * self.IMG_SIZE) / width)
width = self.IMG_SIZE
right = 0
bottom = self.IMG_SIZE - height
else:
width = round((width * self.IMG_SIZE) / height)
height = self.IMG_SIZE
right = self.IMG_SIZE - width
bottom = 0
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.resize(img, (width, height))
img = cv2.copyMakeBorder(img,
top=0,
bottom=bottom,
left=0,
right=right,
borderType=cv2.BORDER_CONSTANT)
# Add a One-hot-vector of label of the image to self.training_data
self.training_data.append([np.array(img), np.eye(len(self.LABELS))[self.LABELS[label]]])
if label == self.CATS:
self.cats_count += 1
elif label == self.DOGS:
self.dogs_count += 1
except cv2.error as e:
pass
np.random.shuffle(self.training_data)
np.save("PetImages/training_data.npy", self.training_data)
print("Cats:", self.cats_count)
print("Dogs:", self.dogs_count)
training_data = np.load('PetImages/training_data.npy', allow_pickle=True)
plt.imsave('PetImages/trained_example.png', training_data[1][0])
class RunningMetrics():
def __init__(self):
self._sum = 0
self._count = 0
def __call__(self):
return self._sum/float(self._count)
def update(self, val, size):
self._sum += val
self._count += size
class Net(nn.Module):
def __init__(self, num_channels, conv_kernel_size=3, stride=1, padding=1, max_pool_kernel_size=2):
super(Net, self).__init__()
self._num_channels = num_channels
self._max_pool_kernel_size = max_pool_kernel_size
self.conv1 = nn.Conv2d(1, self._num_channels, conv_kernel_size, stride, padding)
self.conv2 = nn.Conv2d(self._num_channels, self._num_channels*2, conv_kernel_size, stride, padding)
self.conv3 = nn.Conv2d(self._num_channels*2, self._num_channels*4, conv_kernel_size, stride, padding)
# Calc input of first
self.fc1 = nn.Linear(self._num_channels*4*8*8, self._num_channels*8)
self.fc2 = nn.Linear(self._num_channels*8, 2)
def forward(self, x):
# Conv
x = self.conv1(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
x = self.conv2(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
x = self.conv3(x)
x = F.relu(F.max_pool2d(x, self._max_pool_kernel_size))
# Flatten
x = x.view(-1, self._num_channels*4*8*8)
# Fully Connected
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
# return F.log_softmax(x, dim=1)
return F.softmax(x, dim=1)
def save_model(path):
torch.save(save, path)
def load_model(path):
self = torch.load(PATH)
self.eval()
if __name__ == '__main__':
print('Loading dataset')
if not os.path.exists("PetImages/training_data.npy"):
dogsvcats = DogsVSCats()
dogsvcats.make_training_data()
training_data = np.load('PetImages/training_data.npy', allow_pickle=True)
print('Loading Net')
net = Net(num_channels=32)
# net = net.to(device)
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9 )
optimizer = optim.Adam(net.parameters(), lr=0.001)
# loss_function = nn.NLLLoss()
loss_function = nn.MSELoss()
print('Converting X tensor')
X = torch.Tensor([i[0] for i in training_data]).view(-1, 50, 50)
X = X/255.0
print('Converting Y tensor')
y = torch.Tensor([i[1] for i in training_data])
# Validation data
VAL_PERCENT = 0.1
val_size = int(len(X)*VAL_PERCENT)
X_train = X[:-val_size]
y_train = y[:-val_size]
X_test = X[-val_size:]
y_test = y[-val_size:]
print('Training Set:', len(X_train))
print('Testing Set:', len(X_test))
BATCH_SIZE = 16
EPOCHS = 2
IMG_SIZE=50
for epoch in range(EPOCHS):
print(f'Epoch {epoch+1}/{EPOCHS}')
running_loss = RunningMetrics()
running_acc = RunningMetrics()
for i in tqdm(range(0, len(X_train), BATCH_SIZE)):
inputs = X_train[i:i+BATCH_SIZE].view(-1,1, IMG_SIZE, IMG_SIZE)
targets = y_train[i:i+BATCH_SIZE]
# inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = net(inputs)
_, preds = torch.max(outputs, 1)
loss = loss_function(outputs, targets)
loss.backward()
optimizer.step()
running_loss.update(loss.item()*BATCH_SIZE,
BATCH_SIZE)
running_acc.update(toch.sum(preds == targets).float(),
BATCH_SIZE)
print(f'Loss: {running_loss:.4f}, Acc: {running_acc:.4f}')
print('-'*10)
Dataset:
I am using the Microsoft's dataset of cats and dogs images
EDIT:
The error previous message has been solved following Anonymous' advice but now I am getting another error:
Traceback (most recent call last):
File "nn_conv.py", line 203, in
running_acc.update(torch.sum(preds == targets).float(),
RuntimeError: The size of tensor a (16) must match the size of tensor b (2) at non-singleton dimension 1

Input : 16 x 1 x 50 x 50
After conv1/maxpool1 : 16 x 32 x 25 x 25
After conv2/maxpool2 : 16 x 64 x 12 x 12 (no padding so taking floor)
After conv3/maxpool3 : 16 x 128 x 6 x 6 (=73 728 neurons here is your error)
Flattening : you specified a view like -1 x 32 * 4 * 8 * 8 = 9 x 8192
The correct flattening is -1 x 32 * 4 * 6 * 6
Few tips :
as you begin pytorch, you should go see how to use a dataloader/dataset
the binary cross entropy is more commonly used for classification (though MSE is still possible)

Related

pyTorch mat1 and mat2 cannot be multiplied

I am getting the following error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x33856 and 640000x256)
I don't understand how do I need to change the parameters of my net. I took the net created in this paper and tried to modify the parameters to meet my needs.This is the code, I changed the parameters of the first convolution but still get the error:
class ChordClassificationNetwork(nn.Module):
def __init__(self, train_model=False):
super(ChordClassificationNetwork, self).__init__()
self.train_model = train_model
self.flatten = nn.Flatten()
self.firstConv = nn.Conv2d(3, 64, (3, 3))
self.secondConv = nn.Conv2d(64, 64, (3, 3))
self.pool = nn.MaxPool2d(2)
self.drop = nn.Dropout(0.25)
self.fc1 = nn.Linear(100*100*64, 256)
self.fc2 = nn.Linear(256, 256)
self.outLayer = nn.Linear(256, 7)
def forward(self, x):
x = self.firstConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.secondConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.drop(x)
x = self.flatten(x)
x = self.fc1(x)
x = F.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop(x)
x = self.outLayer(x)
output = F.softmax(x, dim=1)
return output
and this is the training file:
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((100, 100))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset("../chords_data/cropped_images/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = ChordClassificationNetwork().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
if __name__ == "__main__":
train()
What am I doing wrong?
Look at the error message, the issue comes from the fc1 layer which doesn't have the required number of neurons. It is receiving a tensor of shape (batch_size, 33856) but expects (batch_size, 640000). The reduction in dimensionality is caused by the different layers you have applied to your input tensor before fc1.
You can fix this by defining fc1 with:
self.fc1 = nn.Linear(33856, 256)
Alternatively, you can use nn.LazyLinear which will initialize its weights with the appropriate number of neurons at runtime depending on the input it receives. But that's lazy:
self.fc1 = nn.LazyLinear(256)

Pytorch-GPU what am I forgetting to move over to the GPU?

I'm getting this error. What am I leaving out, I feel that I have tired everything.
Also is there not a easy way to just use the GPU and not the CPU I feel like I have tried all those options as well. As in not using .cuda() everywhere
This is one of my first neutral networks so please go easy on me. (most of it is from Class)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking arugment for argument target in method wrapper_nll_loss_forward)
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16).cuda()
self.fc2 = nn.Linear(16, 10).cuda()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda()
x = F.relu(self.fc1(x)).cuda()
x = self.fc2(x).cuda()
return x
def training():
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train().cuda()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Your data, and target are not in GPU (considering the removal of repeated cuda calls).
You are also doing of unnecessary cuda() which is not needed. Simply, see where your data and model are. Take the model to GPU, take the data and label to GPU, finally feed data to the model.
Don't use, cuda(), use to.device(), it's safer in the long run and easily customizable in multi-gpu setup.
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16)
self.fc2 = nn.Linear(16, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
def training():
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
###################################
data = data.to(device)
target = target.to(device)
###################################
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Clearly your target variable is not on GPU.
Also, its a bad idea to call .cuda() inside forward()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda() # BAD
x = F.relu(self.fc1(x)).cuda() # BAD
x = self.fc2(x).cuda() #BAD
return x
Rather, remove all .cuda() inside forward and do this is main loop
for data, target in load_data.train_loader:
data = data.cuda()
target = target.cuda()

Error while executing CrossEntropyLoss() in PyTorch

My dataset contains images of shape [3,28,28]. I have written the following code:
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(nn.Conv2d(3, 28, kernel_size=5, stride=1, padding=2),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(nn.Conv2d(28, 56, kernel_size=5, stride=1, padding=2),nn.ReLU(),nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 56, 1000)
self.fc2 = nn.Linear(1000, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
total_step = len(loader_train)
for e in range(num_epochs):
print("Epoch ", e+1,": ")
for i, (images, labels) in enumerate(loader_train):
optimizer.zero_grad()
actual_out = model(images)
loss = criterion(actual_out, labels)
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.3f}' .format(e+1, num_epochs, i+1, total_step, loss.item()))
However, I'm getting the following error:
AttributeError Traceback (most recent call last)
in
8 actual_out = model(images)
9
---> 10 loss = criterion(actual_out, labels)
11 loss.backward()
AttributeError: 'tuple' object has no attribute 'size'
I converted labels into a tensor by the following method:
target_out = torch.empty(batch_size,dtype=torch.long).random_(labels)
loss = criterion(actual_out, target_out)
But that generates:
TypeError Traceback (most recent call last)
in
---> 11 target_out = torch.empty(batch_size,dtype=torch.long).random_(labels)
12 loss = criterion(actual_out, target_out)
TypeError: random_() received an invalid combination of arguments - got (tuple), but expected one of:
(*, torch.Generator generator)
(int from, int to, *, torch.Generator generator)
(int to, *, torch.Generator generator)
Your labels object is a tuple and you want to convert it to a tensor of dtype long.
You can do this via:
torch.tensor(labels, dtype=torch.long)
Assuming this is how your train_loader is structured, you can reshape in the training loop rather than in the forward() function. I've given an example change it as your need.
Also during the backward, clear the grad, the .to(device) is optional, if you have a GPU
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [100, 1, 28, 28]
# resized: [100, 784]
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()

L1 regulariser Pytorch acting opposite to what I expect

I'm trying to add an L1 penalty to a specific layer of a neural network, and I have the code below (in which I attempt to add l1 penalty to the first layer). If I run it for lambda = 0 (i.e. no penalty), the output gets very close to the expected weights those being [10, 12, 2, 11, -0.25]) and if I run for enough epochs or reduce batch size it will get it exactly, as in the output below:
mlp.0.weight
Parameter containing:
tensor([[ 9.8657, -11.8305, 2.0242, 10.8913, -0.1978]],
requires_grad=True)
Then, when I run it for a large lambda, say 1000, I would expect these weights to shrink towards zero as there is a large penalty being added to the loss that we are trying to minimise. However, the opposite happens and the weights explode, as in the output below (for lam = 1000)
mlp.0.weight
Parameter containing:
tensor([[-13.9368, 9.9072, 2.2447, -11.6870, 26.7293]],
requires_grad=True)
If anyone could help me, that'd be great. I'm new to pytorch (but not the idea of regularisation), so I'm guessing it's something in my code that is the problem.
Thanks
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.linear_model import LinearRegression
class TrainDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return self.data.shape[0]
def __getitem__(self, ind):
x = self.data[ind][1:]
y = self.data[ind][0]
return x, y
class TestDataset(TrainDataset):
def __getitem__(self, ind):
x = self.data[ind]
return x
torch.manual_seed(94)
x_train = np.random.rand(1000, 5)
y_train = x_train[:, 0] * 10 - x_train[:, 1] * 12 + x_train[:, 2] * 2 + x_train[:, 3] * 11 - x_train[:, 4] * 0.25
y_train = y_train.reshape(1000, 1)
x_train.shape
y_train.shape
train_data = np.concatenate((y_train, x_train), axis=1)
train_set = TrainDataset(train_data)
batch_size = 100
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(nn.Linear(5, 1, bias=False))
def forward(self, x_mlp):
out = self.mlp(x_mlp)
return out
device = 'cpu'
model = MLP()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.82)
criterion = nn.MSELoss()
epochs = 5
lam = 0
model.train()
for epoch in range(epochs):
losses = []
for batch_num, input_data in enumerate(train_loader):
optimizer.zero_grad()
x, y = input_data
x = x.to(device).float()
y = y.reshape(batch_size, 1)
y = y.to(device)
output = model(x)
for name, param in model.named_parameters():
if name == 'mlp.0.weight':
l1_norm = torch.norm(param, 1)
loss = criterion(output, y) + lam * l1_norm
loss.backward()
optimizer.step()
print('\tEpoch %d | Batch %d | Loss %6.2f' % (epoch, batch_num, loss.item()))
for name, param in model.named_parameters():
if param.requires_grad:
print(name)
print(param)
I found that if I use Adagrad as the optimiser instead of SGD, it acts as expected. Will need to look into the difference of those now, but this can be considered answered.

Beginner PyTorch - RuntimeError: shape '[16, 400]' is invalid for input of size 9600

I'm trying to build a CNN but I get this error:
---> 52 x = x.view(x.size(0), 5 * 5 * 16)
RuntimeError: shape '[16, 400]' is invalid for input of size 9600
It's not clear for me what the inputs of the 'x.view' line should be. Also, I don't really understand how many times I should have this 'x.view' function in my code. Is it only once, after the 3 convolutional layers and 2 linear layers? Or is it 5 times, one after every layer?
Here's my code:
CNN
import torch.nn.functional as F
# Convolutional neural network
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=3,
out_channels=16,
kernel_size=3)
self.conv2 = nn.Conv2d(
in_channels=16,
out_channels=24,
kernel_size=4)
self.conv3 = nn.Conv2d(
in_channels=24,
out_channels=32,
kernel_size=4)
self.dropout = nn.Dropout2d(p=0.3)
self.pool = nn.MaxPool2d(2)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(512, 10)
self.final = nn.Softmax(dim=1)
def forward(self, x):
print('shape 0 ' + str(x.shape))
x = F.max_pool2d(F.relu(self.conv1(x)), 2)
x = self.dropout(x)
print('shape 1 ' + str(x.shape))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = self.dropout(x)
print('shape 2 ' + str(x.shape))
# x = F.max_pool2d(F.relu(self.conv3(x)), 2)
# x = self.dropout(x)
x = F.interpolate(x, size=(5, 5))
x = x.view(x.size(0), 5 * 5 * 16)
x = self.fc1(x)
return x
net = ConvNet()
Can someone help me understand the problem?
The output of 'x.shape' is:
shape 0 torch.Size([16, 3, 256, 256])
shape 1 torch.Size([16, 16, 127, 127])
shape 2 torch.Size([16, 24, 62, 62])
Thanks
This means that instead the product of the channel and spatial dimensions is not 5*5*16. To flatten the tensor, replace x = x.view(x.size(0), 5 * 5 * 16) with:
x = x.view(x.size(0), -1)
And self.fc1 = nn.Linear(600, 120) with:
self.fc1 = nn.Linear(600, 120)