Intermediate layer outputs pytorch - neural-network

I have Alexnet neural network:
class AlexNet(nn.Module):
def __init__(self, num_classes=100):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
I am trying to get the information of the intermediate layers (for example the penultimate layer ) with backward hook but I couldn't get it

According to this answer
You have to split your model in different parts and create methods to access them parts such as :
class AlexNet(nn.Module):
def __init__(self, num_classes=100):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
def getFeatures(self,x):
x = self.features(x)
return x.view(x.size(0), 256 * 6 * 6)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
return x
This way is quite common and you can find plenty of examples.

Related

TypeError: forward() missing 1 required positional argument: 'c'

I created this simplified version of VGG16:
class VGG16COMBO(nn.Module):
def __init__(self, num_classes):
super(VGG16COMBO, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=1,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, num_classes),
)
def forward(self, m, c):
m = self.block_1(m)
m = self.block_2(m)
m = self.block_3(m)
m = self.block_4(m)
m = m.view(m.size(0), -1)
m = self.classifier(m)
c = self.block_1(c)
c = self.block_2(c)
c = self.block_3(c)
c = self.block_4(c)
c = c.view(c.size(0), -1)
c = self.classifier(c)
x = torch.cat((m, c), dim=1)
return x
You can see that in forward I pass 2 elements, m and c. m is referred to MNIST, and c to CIFAR10, because i want a multi input neural network (or a network with shared weights).
Then:
modelcombo = VGG16COMBO(1).cuda()
print(modelcombo)
# Define an optimizier
import torch.optim as optim
optimizer = optim.SGD(modelcombo.parameters(), lr = 0.01)
# Define a loss
criterion = nn.BCEWithLogitsLoss()
This is my training function:
#train da modificare con entrambi i dataset
def train(net, loaders, optimizer, criterion, epochs=20, dev=dev, save_param = False, model_name="valerio"):
try:
net = net.to(dev)
#print(net)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": 0, "val": 0, "test": 0}
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
net.train()
else:
net.eval()
# Process each batch
for (input, labels) in loaders[split]:
# Move to CUDA
input = input.to(dev)
labels = labels.to(dev)
# Reset gradients
optimizer.zero_grad()
# Compute output
pred = net(input)
#pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
labels = labels.unsqueeze(1)
labels = labels.float()
loss = criterion(pred, labels)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#pred_labels = pred.argmax(1) + 1
pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
# Update accuracy
sum_accuracy[split] += batch_accuracy
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}
# Store params at the best validation accuracy
if save_param and epoch_accuracy["val"] > best_val_accuracy:
#torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
torch.save(net.state_dict(), f"{model_name}_best_val.pth")
best_val_accuracy = epoch_accuracy["val"]
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"Epoch {epoch+1}:",
f"TrL={epoch_loss['train']:.4f},",
f"TrA={epoch_accuracy['train']:.4f},",
f"VL={epoch_loss['val']:.4f},",
f"VA={epoch_accuracy['val']:.4f},",
f"TeL={epoch_loss['test']:.4f},",
f"TeA={epoch_accuracy['test']:.4f},")
except KeyboardInterrupt:
print("Interrupted")
finally:
# Plot loss
plt.title("Loss")
for split in ["train", "val", "test"]:
plt.plot(history_loss[split], label=split)
plt.legend()
plt.show()
# Plot accuracy
plt.title("Accuracy")
for split in ["train", "val", "test"]:
plt.plot(history_accuracy[split], label=split)
plt.legend()
plt.show()
But when I do the training
# Train model
train(modelcombo, loaders, optimizer, criterion, epochs=10, dev=dev)
I obtain this error:
TypeError: forward() missing 1 required positional argument: 'c'
What I have to change, the net or the training function? I think the problem is in the training function, because I have to pass loaders, and loaders_cifar, but I don't know how. In particular, I have to cat loaders of mnist and loaders of cifar before passing them to the training function, or I have to modify the for (input, labels) in loaders[split]: in something like for (input, labels) in loaders[split] and loaders_cifar[split]:?
EDIT: I created this function:
def itr_merge(*itrs):
for itr in itrs:
for v in itr:
yield v
Edited the training function in this way:
#train da modificare con entrambi i dataset
def train2(net, loaders, loaders_cifar, optimizer, criterion, epochs=20, dev=dev, save_param = False, model_name="valerio"):
try:
net = net.to(dev)
#print(net)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": 0, "val": 0, "test": 0}
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
net.train()
else:
net.eval()
# Process each batch
for x in itr_merge(loaders[split], loaders_cifar[split]):
for (input, labels) in loaders[split]:
# Move to CUDA
input = input.to(dev)
labels = labels.to(dev)
# Reset gradients
optimizer.zero_grad()
# Compute output
pred = net(input)
#pred = pred.squeeze(dim=1) # Output shape is [Batch size, 1], but we want [Batch size]
labels = labels.unsqueeze(1)
labels = labels.float()
loss = criterion(pred, labels)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
#pred_labels = pred.argmax(1) + 1
pred_labels = (pred >= 0.5).long() # Binarize predictions to 0 and 1
batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
# Update accuracy
sum_accuracy[split] += batch_accuracy
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}
# Store params at the best validation accuracy
if save_param and epoch_accuracy["val"] > best_val_accuracy:
#torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
torch.save(net.state_dict(), f"{model_name}_best_val.pth")
best_val_accuracy = epoch_accuracy["val"]
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"Epoch {epoch+1}:",
f"TrL={epoch_loss['train']:.4f},",
f"TrA={epoch_accuracy['train']:.4f},",
f"VL={epoch_loss['val']:.4f},",
f"VA={epoch_accuracy['val']:.4f},",
f"TeL={epoch_loss['test']:.4f},",
f"TeA={epoch_accuracy['test']:.4f},")
except KeyboardInterrupt:
print("Interrupted")
finally:
# Plot loss
plt.title("Loss")
for split in ["train", "val", "test"]:
plt.plot(history_loss[split], label=split)
plt.legend()
plt.show()
# Plot accuracy
plt.title("Accuracy")
for split in ["train", "val", "test"]:
plt.plot(history_accuracy[split], label=split)
plt.legend()
plt.show()
But I have still the same error
Yes if you have 2 inputs of data points , then pass 2 arguments here
pred = net(input1,input2) #input1 ---> mnist ,input2 ---> cifar

RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[64, 1, 32, 32] to have 3 channels, but got 1 channels instead

I am working on SVHN dataset, and I got this error, only during training phase. During the instantiation of the model, it works.
RuntimeError: Given groups=1, weight of size [64, 3, 3, 3], expected input[64, 1, 32, 32] to have 3 channels, but got 1 channels instead
To be sure of having 3 channels, I wrote that Grayscale(3) transformation.
# Compose transformations
data_transform = transforms.Compose([
transforms.Resize((32,32)),
transforms.Grayscale(num_output_channels=3),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
])
# Compose transformations
test_transform = transforms.Compose([
transforms.Resize((32,32)),
transforms.Grayscale(num_output_channels=3),
transforms.ToTensor(),
])
# Load MNIST dataset with transforms
train_set_svhn = torchvision.datasets.SVHN(root=base_dir, split='train', download=True, transform=data_transform, target_transform=None)
test_set_svhn = torchvision.datasets.SVHN(root=base_dir, split='test', download=True, transform=test_transform)
class VGG16(nn.Module):
def __init__(self, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=3,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.25),
nn.Linear(4096, num_classes),
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
# nn.init.xavier_normal_(m.weight)
if m.bias is not None:
m.bias.detach().zero_()
# self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
# x = self.avgpool(x)
x = x.view(x.size(0), -1)
logits = self.classifier(x)
probas = F.softmax(logits, dim=1)
# probas = nn.Softmax(logits)
return probas
# return logits
I have no idea where that 1 comes from
Moreover, this is the shape of the output of the model without classifier (fully connected layers):
output = model1(test_x)
output.shape
torch.Size([1, 256, 4, 4])
And indeed I will pass 256x4x4 as input to the first FC.
Edit:
I encountered similar problems, but I did not use the MNIST dataset. I confirmed that I used the input file of (256256,3), and the following is my script file.
RuntimeError: Given groups=1, weight of size [64, 3, 4, 4], expected input[2, 2, 64, 64] to have 3 channels, but got 2 channels instead[enter image description here][1]

Binary classification on MNIST: loss and accuracies remain costant

I am trying to do binary classification on MNIST dataset. Class 0 for even numbers and class 1 for odd numbers. I am using a simplified version of VGG.
My NN has a loss and an accuracy that remain costant.
I want to say that my model, reached to over 90% of accuracy before of changing targets into binary targets, so probably there is something wrong.
Here I change the targets into binary:
for i in range(10):
idx = (train_set.targets==i)
if (i == 0) or ((i % 2) == 0): train_set.targets[idx] = 0
else: train_set.targets[idx] = 1
for i in range(10):
idx = (test_set.targets==i)
if (i == 0) or ((i % 2) == 0): test_set.targets[idx] = 0
else: test_set.targets[idx] = 1
This is my net:
class VGG16(nn.Module):
def __init__(self, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=1,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.ReLU(True),
nn.Dropout(p=0.65),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(p=0.65),
nn.Linear(4096, num_classes),
nn.Sigmoid()
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Linear):
nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
# nn.init.xavier_normal_(m.weight)
if m.bias is not None:
m.bias.detach().zero_()
# self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
# x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
#logits = self.classifier(x)
#probas = F.softmax(logits, dim=1)
# probas = nn.Softmax(logits)
#return probas
# return logits
# Define an optimizier
import torch.optim as optim
optimizer = optim.SGD(model.parameters(), lr = 0.01)
# Define a loss
criterion = nn.BCELoss()
def train(net, loaders, optimizer, criterion, epochs=20, dev=dev, save_param = False, model_name="valerio"):
try:
net = net.to(dev)
#print(net)
# Initialize history
history_loss = {"train": [], "val": [], "test": []}
history_accuracy = {"train": [], "val": [], "test": []}
# Store the best val accuracy
best_val_accuracy = 0
# Process each epoch
for epoch in range(epochs):
# Initialize epoch variables
sum_loss = {"train": 0, "val": 0, "test": 0}
sum_accuracy = {"train": 0, "val": 0, "test": 0}
# Process each split
for split in ["train", "val", "test"]:
if split == "train":
net.train()
else:
net.eval()
# Process each batch
for (input, labels) in loaders[split]:
# Move to CUDA
input = input.to(dev)
labels = labels.to(dev)
# Reset gradients
optimizer.zero_grad()
# Compute output
pred = net(input)
labels = labels.unsqueeze(1)
labels = labels.float()
loss = criterion(pred, labels)
# Update loss
sum_loss[split] += loss.item()
# Check parameter update
if split == "train":
# Compute gradients
loss.backward()
# Optimize
optimizer.step()
# Compute accuracy
_,pred_labels = pred.max(1)
batch_accuracy = (pred_labels == labels).sum().item()/input.size(0)
# Update accuracy
sum_accuracy[split] += batch_accuracy
# Compute epoch loss/accuracy
epoch_loss = {split: sum_loss[split]/len(loaders[split]) for split in ["train", "val", "test"]}
epoch_accuracy = {split: sum_accuracy[split]/len(loaders[split]) for split in ["train", "val", "test"]}
# Store params at the best validation accuracy
if save_param and epoch_accuracy["val"] > best_val_accuracy:
#torch.save(net.state_dict(), f"{net.__class__.__name__}_best_val.pth")
torch.save(net.state_dict(), f"{model_name}_best_val.pth")
best_val_accuracy = epoch_accuracy["val"]
# Update history
for split in ["train", "val", "test"]:
history_loss[split].append(epoch_loss[split])
history_accuracy[split].append(epoch_accuracy[split])
# Print info
print(f"Epoch {epoch+1}:",
f"TrL={epoch_loss['train']:.4f},",
f"TrA={epoch_accuracy['train']:.4f},",
f"VL={epoch_loss['val']:.4f},",
f"VA={epoch_accuracy['val']:.4f},",
f"TeL={epoch_loss['test']:.4f},",
f"TeA={epoch_accuracy['test']:.4f},")
except KeyboardInterrupt:
print("Interrupted")
finally:
# Plot loss
plt.title("Loss")
for split in ["train", "val", "test"]:
plt.plot(history_loss[split], label=split)
plt.legend()
plt.show()
# Plot accuracy
plt.title("Accuracy")
for split in ["train", "val", "test"]:
plt.plot(history_accuracy[split], label=split)
plt.legend()
plt.show()
From the previous model of digit recognition i changed only the targets, and the final layer of classifier from 10 classes to 1 class + Sigmoid. And i changed also cross entropy to BCELoss. What I am doing wrong?
These are loss and accuracy values:
Epoch 1: TrL=49.0955, TrA=31.4211, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
Epoch 2: TrL=49.0992, TrA=31.4235, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
Epoch 3: TrL=49.0899, TrA=31.4176, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
Epoch 4: TrL=49.0936, TrA=31.4199, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
Epoch 5: TrL=49.0936, TrA=31.4199, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
Epoch 6: TrL=49.0825, TrA=31.4128, VL=49.7285, VA=31.7340, TeL=49.2635, TeA=31.3758,
What's wrong? How is it possible that with 10 classes I reached over 90% accuracy, and with a simplified version, only 2 classes, I reach 30% of accuracy?
Edit: increasing batch size from 64 to 128, accuracy reaches to 60% and remains constant...
In my opinion, the problem is different representations of odd and even numbers. Let's take 1, 3 pictures with this number are sundry, and convolution neural networks have a problem with extract features. The neural network has 90% accuracy, with 10 classes, so why you need to convert this into 2. If you know that number is 1, 3, 5, 7, 9, you know that it's odd.

How to get an output dimension for each layer of the Neural Network in Pytorch?

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(in_channels = 16, out_channels = 16),
nn.ReLU(),
Flatten(),
nn.Linear(4096, 64),
nn.ReLU(),
nn.Linear(64, 10))
def forward(self, x):
return self.net(x)
I have created this model without a firm knowledge in Neural Network and I just fixed parameters until it worked in the training. I am not sure how to get the output dimension for each layer (e.g. output dimension after the first layer).
Is there an easy way to do this in Pytorch?
You can use torchsummary, for instance, for ImageNet dimension(3x224x224):
from torchvision import models
from torchsummary import summary
vgg = models.vgg16()
summary(vgg, (3, 224, 224)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
Linear-32 [-1, 4096] 102,764,544
ReLU-33 [-1, 4096] 0
Dropout-34 [-1, 4096] 0
Linear-35 [-1, 4096] 16,781,312
ReLU-36 [-1, 4096] 0
Dropout-37 [-1, 4096] 0
Linear-38 [-1, 1000] 4,097,000
================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.59
Params size (MB): 527.79
Estimated Total Size (MB): 746.96
----------------------------------------------------------------
Source: model-summary-in-pytorch
A simple way is:
Pass the input to the model.
Print the size of the output after passing every layer.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(in_channels = 16, out_channels = 16),
nn.ReLU(),
Flatten(),
nn.Linear(4096, 64),
nn.ReLU(),
nn.Linear(64, 10))
def forward(self, x):
for layer in self.net:
x = layer(x)
print(x.size())
return x
model = Model()
x = torch.randn(1, 3, 224, 224)
# Let's print it
model(x)
But be careful with the input size because you are using nn.Linear in your net. It would cause incompatible input size for nn.Linear if your input size is not 4096.
Like David Ng's answer but a tad shorter:
def get_output_shape(model, image_dim):
return model(torch.rand(*(image_dim))).data.shape
In this example I needed to figure out the input of the last Linear layer:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.expected_input_shape = (1, 1, 192, 168)
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.maxpool1 = nn.MaxPool2d(2)
self.maxpool2 = nn.MaxPool2d(3)
# Calculate the input of the Linear layer
conv1_out = get_output_shape(self.maxpool1, get_output_shape(conv1, self.expected_input_shape))
conv2_out = get_output_shape(self.maxpool2, get_output_shape(conv2, conv1_out))
fc1_in = np.prod(list(conv2_out)) # Flatten
self.fc1 = nn.Linear(fc1_in, 38)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.maxpool2(x)
x = self.dropout1(x)
x = torch.flatten(x, 1) # flatten to a single dimension
x = self.fc1(x)
output = F.log_softmax(x, dim=1)
return output
This way, if I make changes to previous layers, I won't have to calculate all over again!
My answer is based on this answer
Another way to get the size after a certain layer in an nn.Sequential container is to add a custom Module that just prints out the size of the input.
class PrintSize(nn.Module):
def __init__(self):
super(PrintSize, self).__init__()
def forward(self, x):
print(x.shape)
return x
And now you can do:
model = nn.Sequential(
nn.Conv2d(3, 10, 5, 1),
// lots of convolutions, pooling, etc.
nn.Flatten(),
PrintSize(),
nn.Linear(1, 12), // the input dim of 1 is just a placeholder
)
Now, you can do model(x) and it will print out the shape of the output after the Conv2d layer ran. This is useful if you have a lot of convolutions and want to figure out what the final dimensions are for the first fully connected layer. You don't need to reformat your nn.Sequential as a Module and can just drop in this helper class with one-line.
for layer in model.children():
if hasattr(layer, 'out_features'):
print(layer.out_features)
Here's a solution in the form of a helper function:
def get_tensor_dimensions_impl(model, layer, image_size, for_input=False):
t_dims = None
def _local_hook(_, _input, _output):
nonlocal t_dims
t_dims = _input[0].size() if for_input else _output.size()
return _output
layer.register_forward_hook(_local_hook)
dummy_var = torch.zeros(1, 3, image_size, image_size)
model(dummy_var)
return t_dims
Example:
from torchvision import models, transforms
a_model = models.squeezenet1_0(pretrained=True)
get_tensor_dimensions_impl(a_model, a_model._modules['classifier'], 224)
Output is:
torch.Size([1, 1000, 1, 1])
Maybe you can try print(model.state_dict()['next_layer.weight'].shape).
This gives you a hint of the output shape from last layer.

How to count the amount of layers in a CNN?

The Pytorch implementation of ResNet-18.
has the following structure, which appears to be 54 layers, not 18.
So why is it called "18"? How many layers does it actually have?
ResNet (
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(maxpool): MaxPool2d (size=(3, 3), stride=(2, 2), padding=(1, 1), dilation=(1, 1))
(layer1): Sequential (
(0): BasicBlock (
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
)
(1): BasicBlock (
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
)
)
(layer2): Sequential (
(0): BasicBlock (
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
(downsample): Sequential (
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
)
)
(1): BasicBlock (
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True)
)
)
(layer3): Sequential (
(0): BasicBlock (
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
(downsample): Sequential (
(0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
)
)
(1): BasicBlock (
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
)
)
(layer4): Sequential (
(0): BasicBlock (
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
(downsample): Sequential (
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
)
)
(1): BasicBlock (
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
(relu): ReLU (inplace)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
)
)
(avgpool): AvgPool2d (
)
(fc): Linear (512 -> 1000)
)
From your output, we can know that there are 20 convolution layers (one 7x7 conv, 16 3x3 conv, and plus 3 1x1 conv for downsample). Basically, if you ignore the 1x1 conv, and counting the FC (linear) layer, the number of layers are 18.
And I've also made an example on how to visualize your architecture in pytorch via graphviz, hope it will help you understand your architecture.
Why does ResNet-18 have 18 layers?
Well, then the answer is pretty straightforward, the number of layers in Neural Net is a hyperparameter (means you can tune it as you want). In the ResNet paper, the authors have gone through training multiple models of various layers (like 18, 34, 50) to conduct a proper study of accuracy, error rate, etc. thus the naming convention they followed is ResNet-18, ResNet-34, ResNet-50...
Why the architecture of ResNet-18 (that you've provided in your question) have more than 18 layers?
There're a number of ways people calculate the number of layers of a deep neural net model, some people count input/output layers as well, some count the pooling layers.
But the way the authors did in the ResNet paper is they just calculated all the convolution layers and the fully connected layers, nothing else. However the model architecture that you've given, there are more than 18 layers! It is simply because of the 1x1 convolution layers, the authors called them projection layers, these layers are simply used for matching input dimension (x) with residual block's dimension (F(x)) so that they can be summed (y=F(x)+x). So If you count without those projections (1x1 convs.) you'll see there are 18 layers, thus the name ResNet-18