How to get an output dimension for each layer of the Neural Network in Pytorch? - neural-network

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(in_channels = 16, out_channels = 16),
nn.ReLU(),
Flatten(),
nn.Linear(4096, 64),
nn.ReLU(),
nn.Linear(64, 10))
def forward(self, x):
return self.net(x)
I have created this model without a firm knowledge in Neural Network and I just fixed parameters until it worked in the training. I am not sure how to get the output dimension for each layer (e.g. output dimension after the first layer).
Is there an easy way to do this in Pytorch?

You can use torchsummary, for instance, for ImageNet dimension(3x224x224):
from torchvision import models
from torchsummary import summary
vgg = models.vgg16()
summary(vgg, (3, 224, 224)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 1,792
ReLU-2 [-1, 64, 224, 224] 0
Conv2d-3 [-1, 64, 224, 224] 36,928
ReLU-4 [-1, 64, 224, 224] 0
MaxPool2d-5 [-1, 64, 112, 112] 0
Conv2d-6 [-1, 128, 112, 112] 73,856
ReLU-7 [-1, 128, 112, 112] 0
Conv2d-8 [-1, 128, 112, 112] 147,584
ReLU-9 [-1, 128, 112, 112] 0
MaxPool2d-10 [-1, 128, 56, 56] 0
Conv2d-11 [-1, 256, 56, 56] 295,168
ReLU-12 [-1, 256, 56, 56] 0
Conv2d-13 [-1, 256, 56, 56] 590,080
ReLU-14 [-1, 256, 56, 56] 0
Conv2d-15 [-1, 256, 56, 56] 590,080
ReLU-16 [-1, 256, 56, 56] 0
MaxPool2d-17 [-1, 256, 28, 28] 0
Conv2d-18 [-1, 512, 28, 28] 1,180,160
ReLU-19 [-1, 512, 28, 28] 0
Conv2d-20 [-1, 512, 28, 28] 2,359,808
ReLU-21 [-1, 512, 28, 28] 0
Conv2d-22 [-1, 512, 28, 28] 2,359,808
ReLU-23 [-1, 512, 28, 28] 0
MaxPool2d-24 [-1, 512, 14, 14] 0
Conv2d-25 [-1, 512, 14, 14] 2,359,808
ReLU-26 [-1, 512, 14, 14] 0
Conv2d-27 [-1, 512, 14, 14] 2,359,808
ReLU-28 [-1, 512, 14, 14] 0
Conv2d-29 [-1, 512, 14, 14] 2,359,808
ReLU-30 [-1, 512, 14, 14] 0
MaxPool2d-31 [-1, 512, 7, 7] 0
Linear-32 [-1, 4096] 102,764,544
ReLU-33 [-1, 4096] 0
Dropout-34 [-1, 4096] 0
Linear-35 [-1, 4096] 16,781,312
ReLU-36 [-1, 4096] 0
Dropout-37 [-1, 4096] 0
Linear-38 [-1, 1000] 4,097,000
================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 218.59
Params size (MB): 527.79
Estimated Total Size (MB): 746.96
----------------------------------------------------------------
Source: model-summary-in-pytorch

A simple way is:
Pass the input to the model.
Print the size of the output after passing every layer.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_channels = 3, out_channels = 16),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(in_channels = 16, out_channels = 16),
nn.ReLU(),
Flatten(),
nn.Linear(4096, 64),
nn.ReLU(),
nn.Linear(64, 10))
def forward(self, x):
for layer in self.net:
x = layer(x)
print(x.size())
return x
model = Model()
x = torch.randn(1, 3, 224, 224)
# Let's print it
model(x)
But be careful with the input size because you are using nn.Linear in your net. It would cause incompatible input size for nn.Linear if your input size is not 4096.

Like David Ng's answer but a tad shorter:
def get_output_shape(model, image_dim):
return model(torch.rand(*(image_dim))).data.shape
In this example I needed to figure out the input of the last Linear layer:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.expected_input_shape = (1, 1, 192, 168)
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout2d(0.25)
self.dropout2 = nn.Dropout2d(0.5)
self.maxpool1 = nn.MaxPool2d(2)
self.maxpool2 = nn.MaxPool2d(3)
# Calculate the input of the Linear layer
conv1_out = get_output_shape(self.maxpool1, get_output_shape(conv1, self.expected_input_shape))
conv2_out = get_output_shape(self.maxpool2, get_output_shape(conv2, conv1_out))
fc1_in = np.prod(list(conv2_out)) # Flatten
self.fc1 = nn.Linear(fc1_in, 38)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.maxpool1(x)
x = self.conv2(x)
x = F.relu(x)
x = self.maxpool2(x)
x = self.dropout1(x)
x = torch.flatten(x, 1) # flatten to a single dimension
x = self.fc1(x)
output = F.log_softmax(x, dim=1)
return output
This way, if I make changes to previous layers, I won't have to calculate all over again!
My answer is based on this answer

Another way to get the size after a certain layer in an nn.Sequential container is to add a custom Module that just prints out the size of the input.
class PrintSize(nn.Module):
def __init__(self):
super(PrintSize, self).__init__()
def forward(self, x):
print(x.shape)
return x
And now you can do:
model = nn.Sequential(
nn.Conv2d(3, 10, 5, 1),
// lots of convolutions, pooling, etc.
nn.Flatten(),
PrintSize(),
nn.Linear(1, 12), // the input dim of 1 is just a placeholder
)
Now, you can do model(x) and it will print out the shape of the output after the Conv2d layer ran. This is useful if you have a lot of convolutions and want to figure out what the final dimensions are for the first fully connected layer. You don't need to reformat your nn.Sequential as a Module and can just drop in this helper class with one-line.

for layer in model.children():
if hasattr(layer, 'out_features'):
print(layer.out_features)

Here's a solution in the form of a helper function:
def get_tensor_dimensions_impl(model, layer, image_size, for_input=False):
t_dims = None
def _local_hook(_, _input, _output):
nonlocal t_dims
t_dims = _input[0].size() if for_input else _output.size()
return _output
layer.register_forward_hook(_local_hook)
dummy_var = torch.zeros(1, 3, image_size, image_size)
model(dummy_var)
return t_dims
Example:
from torchvision import models, transforms
a_model = models.squeezenet1_0(pretrained=True)
get_tensor_dimensions_impl(a_model, a_model._modules['classifier'], 224)
Output is:
torch.Size([1, 1000, 1, 1])

Maybe you can try print(model.state_dict()['next_layer.weight'].shape).
This gives you a hint of the output shape from last layer.

Related

Dimension out of range (expected to be in range of [-1, 0], but got 1) (pytorch)

I have a very simple feed forward neural network (pytorch)
import torch
import torch.nn.functional as F
import numpy as np
class Net_1(nn.Module):
def __init__(self):
super(Net_1, self).__init__()
self.fc1 = nn.Linear(5*5, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 3)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x, dim=1)
net = Net_1()
and the input is this 5x5 numpy array
state = [[0, 0, 3, 0, 0],
[0, 0, 0, 0, 0],
[0, 2, 1, 0, 0],
[0, 0, 0, 0, 0],
[0, 0, 0, 0, 0]]
state = torch.Tensor(state).view(-1)
net(state) throws the following error
Dimension out of range (expected to be in range of [-1, 0], but got 1)
the problem is when F.log_softmax() is applied
at the point when you call return F.log_softmax(x, dim=1), x is a 1-dimensional tensor with shape torch.Size([3]).
dimension indexing in pytorch starts at 0, so you cannot use dim=1 for a 1-dimensional tensor, you will need to use dim=0.
replace return F.log_softmax(x, dim=1) with return F.log_softmax(x, dim=0) and you'll be good to go.
in the future you can check tensor sizes by adding print(x.shape) in forward.
You are giving a 3 element 1d array to your log_softmax function.
When saying dim=1 you are telling it to apply softmax to an axis that doesn't exist.
Just set dim=0 for a 1d array.
More on this function and what that parameter means here

Calculate padding for 3D CNN in Pytorch

I'm currently trying to apply a 3D CNN to a set of images with the dimensions of 193 x 229 x 193 and would like to retain the same image dimensions through each convolutional layer (similar to tensorflow's padding=SAME). I know that the padding can be calculated as follow:
S=Stride
P=Padding
W=Width
K=Kernal size
P = ((S-1)*W-S+K)/2
Which yields a padding of 1 for the first layer:
P = ((1-1)*193-1+3)/2
P= 1.0
Although I also get a result of 1.0 for each of the subsequent layers. Anyone have any suggestions? Sorry, beginner here!
Reproducible example:
import torch
import torch.nn as nn
x = torch.randn(1, 1, 193, 229, 193)
padding = ((1-1)*96-1+3)/2
print(padding)
x = nn.Conv3d(in_channels=1, out_channels=8, kernel_size=3, padding=1)(x)
print("shape after conv1: " + str(x.shape))
x = nn.Conv3d(in_channels=8, out_channels=8, kernel_size=3,padding=1)(x)
x = nn.BatchNorm3d(8)(x)
print("shape after conv2 + batch norm: " + str(x.shape))
x = nn.ReLU()(x)
print("shape after reLU:" + str(x.shape))
x = nn.MaxPool3d(kernel_size=2, stride=2)(x)
print("shape after max pool" + str(x.shape))
x = nn.Conv3d(in_channels=8, out_channels=16, kernel_size=3,padding=1)(x)
print("shape after conv3: " + str(x.shape))
x = nn.Conv3d(in_channels=16, out_channels=16, kernel_size=3,padding=1)(x)
print("shape after conv4: " + str(x.shape))
Current output:
shape after conv1: torch.Size([1, 8, 193, 229, 193])
shape after conv2 + batch norm: torch.Size([1, 8, 193, 229, 193])
shape after reLU:torch.Size([1, 8, 193, 229, 193])
shape after max pooltorch.Size([1, 8, 96, 114, 96])
shape after conv3: torch.Size([1, 16, 96, 114, 96])
shape after conv4: torch.Size([1, 16, 96, 114, 96])
Desired output:
shape after conv1: torch.Size([1, 8, 193, 229, 193])
shape after conv2 + batch norm: torch.Size([1, 8, 193, 229, 193])
...
shape after conv3: torch.Size([1, 16, 193, 229, 193])
shape after conv4: torch.Size([1, 16, 193, 229, 193])
TLDR; your formula also applies to nn.MaxPool3d
You are using a max pool layer of kernel size 2 (implicitly (2,2,2)) with a stride of 2 (implicitly (2,2,2)). This means for every 2x2x2 block you're only getting a single value. In other words - as the name implies: only the maximum value from every 2x2x2 block is pooled to the output array.
That's why you're going from (1, 8, 193, 229, 193) to (1, 8, 96, 114, 96) (notice the division by 2).
Of course, if you set kernel_size=3 and stride=1 on nn.MaxPool3d, you will preserve the shape of your blocks.
Let #x be the input shape, and #w the kernel shape. If we want the output to have the same size, then #x = floor((#x + 2p - #w)/s + 1) needs to be true. That's 2p = s(#x - 1) - #x + #w = #x(s - 1) + #w - s (your formula)
Since s = 2 and #w = 2, then 2p = #x which is not possible.

How to smooth interpolation of a float array into a bigger array?

I'm stuck with interpolation in Swift. Can anyone help me with that?
I want to interpolate the float array (say [0, 0, 100, 25, 0, 0, 0, 25, 0, 0, 0]) into another array with some given size (for example 128). I found an article (Use Linear Interpolation to Construct New Data Points) that shows, how to achieve this stuff.
There are two ways (you can see the results below, how they perform):
Linear Interpolation using vDSP_vgenp and
Smoother (but not for my purposes) Interpolation using vDSP_vlint
The problem is both techniques don't realize my expectations, which illustrated in Screenshot 3. How can I make my interpolated distribution smoother? I want to see a cube-like curve.
Initial Plot:
Linear Interpolation:
import Accelerate
let n = vDSP_Length(128)
let stride = vDSP_Stride(1)
let values: [Float] = [0, 0, 100, 25, 0, 0, 0, 25, 0, 0, 0]
let indices: [Float] = [0, 11, 23, 34, 46, 58, 69, 81, 93, 104, 116]
var result = [Float](repeating: 0, count: Int(n))
vDSP_vgenp(values, stride, indices, stride, &result, stride, n, vDSP_Length(values.count))
Smooth Interpolation:
import Accelerate
import AVFoundation
let n = vDSP_Length(1024)
let stride = vDSP_Stride(1)
let values: [Float] = [0, 0, 100, 25, 0, 0, 0, 25, 0, 0, 0]
let denominator = Float(n) / Float(values.count - 1)
let control: [Float] = (0 ... n).map {
let x = Float($0) / denominator
return floor(x) + simd_smoothstep(0, 1, simd_fract(x))
}
var result = [Float](repeating: 0, count: Int(n))
vDSP_vlint(values, control, stride, &result, stride, n, vDSP_Length(values.count))
It seems to me that the vDSP_vqint quadratic interpolation functions would solve the problem. See the discussion at https://developer.apple.com/documentation/accelerate/1449942-vdsp_vqint.

How to construct a sobel filter for kernel initialization in input layer for images of size 128x128x3?

This is my code for sobel filter:
def init_f(shape, dtype=None):
sobel_x = tf.constant([[-5, -4, 0, 4, 5], [-8, -10, 0, 10, 8], [-10, -20, 0, 20, 10], [-8, -10, 0, 10, 8], [-5, -4, 0, 4, 5]])
ker = np.zeros(shape, dtype)
ker_shape = tf.shape(ker)
kernel = tf.tile(sobel_x, ker_shape)//*Is this correct?*
return kernel
model.add(Conv2D(filters=30, kernel_size=(5,5), kernel_initializer=init_f, strides=(1,1), activation='relu'))
So far I have managed to do this.
But, this gives me error:
Shape must be rank 2 but is rank 4 for 'conv2d_17/Tile' (op: 'Tile') with input shapes: [5,5], [4].
Tensorflow Version: 2.1.0
You're close, but the args to tile don't appear to be correct. That is why you're getting the error "Shape must be rank 2 but is rank 4 for..." You're sobel_x must be a rank 4 tensor, so you need to add two more dimensions. I used reshape in this example.
from tensorflow import keras
import tensorflow as tf
import numpy
def kernelInitializer(shape, dtype=None):
print(shape)
sobel_x = tf.constant(
[
[-5, -4, 0, 4, 5],
[-8, -10, 0, 10, 8],
[-10, -20, 0, 20, 10],
[-8, -10, 0, 10, 8],
[-5, -4, 0, 4, 5]
], dtype=dtype )
#create the missing dims.
sobel_x = tf.reshape(sobel_x, (5, 5, 1, 1))
print(tf.shape(sobel_x))
#tile the last 2 axis to get the expected dims.
sobel_x = tf.tile(sobel_x, (1, 1, shape[-2],shape[-1]))
print(tf.shape(sobel_x))
return sobel_x
x1 = keras.layers.Input((128, 128, 3))
cvl = keras.layers.Conv2D(30, kernel_size=(5,5), kernel_initializer=kernelInitializer, strides=(2,2), activation='relu')
model = keras.Sequential();
model.add(x1)
model.add(cvl)
data = numpy.ones((1, 128, 128, 3))
data[:, 0:64, 0:64, :] = 0
pd = model.predict(data)
print(pd.shape)
d = pd[0, :, :, 0]
for row in d:
for col in row:
m = '0'
if col != 0:
m = 'X'
print(m, end="")
print("")
I looked at using expand_dims instead of reshape but there didn't appear any advantage. broadcast_to seems ideal, but you still have to add the dimensions, so I don't think it was better than tile.
Why 30 filters of the same filter though? Are they going to be changed afterwards?

Floyd algorithm shortest path

i have written the code below,it works for shortest distance but not for shortest path,
import math
def floyd(dist_mat):
n=len(dist_mat)
p=[[0]*n]*n
for k in range(n):
for i in range(n):
for j in range(n):
if dist_mat[i][j]>dist_mat[i][k]+dist_mat[k][j]:
dist_mat[i][j] = dist_mat[i][k] + dist_mat[k][j]
p[i][j] = k+1
return p
if __name__ == '__main__':
print(floyd([[0,5,9999,9999],
[50,0,15,5],
[30,9999,0,15],
[15,9999,5,0]]))
result of this code is: [[4, 1, 4, 2], [4, 1, 4, 2], [4, 1, 4, 2], [4, 1, 4, 2]]
true result is: [[0, 0, 4, 2], [4, 0, 4, 0], [0, 1, 0, 0], [0, 1, 0, 0]],
I will be happy to receive your ideas about why it works wrong soon