Network Training is too slow for Custom Network even though network is not too large(Resnet + LSTM type network) - neural-network

I have made a custom network for the image data. But the training on this network is very slow though the network is not too huge.
When I am training on resnet150 with lstm, the training is quite fast. Not sure which operation is lagging my training speed?
I have tried reducing Batch_size, seq_dim and network parameters. My guess is some operation is reducing the speed
The dataset I am using are images and they are also very small size(96*96) gray scale images.
***** CODE *****
class Residual(nn.Module):
def __init__(self, input_channels, num_channels,
use_1x1conv=True, strides=1, dilation=2, padding=1, kernel_size=5):
super(Residual, self).__init__()
self.conv1 = nn.Conv2d(input_channels, num_channels,
kernel_size=kernel_size, padding=padding, stride=strides, dilation=dilation)
self.conv2 = nn.Conv2d(num_channels, num_channels,
kernel_size=kernel_size, padding=2 * padding, dilation=(2 * dilation))
if use_1x1conv:
self.conv3 = nn.Conv2d(input_channels, num_channels,
kernel_size=1, stride=strides)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(num_channels)
self.bn2 = nn.BatchNorm2d(num_channels)
self.relu = nn.ReLU(inplace=True)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
Y += X
return F.relu(Y)
def weight_init(m):
'''
Usage:
model = Model()
model.apply(weight_init)
'''
if isinstance(m, nn.Conv1d):
init.normal_(m.weight.data)
if m.bias is not None:
init.normal_(m.bias.data)
elif isinstance(m, nn.Conv2d):
init.xavier_normal_(m.weight.data)
if m.bias is not None:
init.normal_(m.bias.data)
elif isinstance(m, nn.BatchNorm1d):
init.normal_(m.weight.data, mean=1, std=0.02)
init.constant_(m.bias.data, 0)
elif isinstance(m, nn.BatchNorm2d):
init.normal_(m.weight.data, mean=1, std=0.02)
init.constant_(m.bias.data, 0)
elif isinstance(m, nn.Linear):
init.xavier_normal_(m.weight.data)
init.normal_(m.bias.data)
elif isinstance(m, nn.LSTM):
for param in m.parameters():
if len(param.shape) >= 2:
init.orthogonal_(param.data)
else:
init.normal_(param.data)
class Network(nn.Module):
def __init__(self):
super(Network, self).__init__()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_channels = 64
strides = 1
dilation = 2
padding = 4
kernel_size = 5
input_channel = 3
self.fc = nn.Linear(config['hidden_dim'], config['output_dim'])
self.lstm = None
b1 = Residual(input_channels=input_channel, num_channels=num_channels,
use_1x1conv=True, strides=strides, dilation=dilation, padding=padding, kernel_size=kernel_size)
b2 = Residual(input_channels=num_channels, num_channels=2 * num_channels,
use_1x1conv=True, strides=strides, dilation=2 * dilation, padding=2 * padding,
kernel_size=kernel_size)
b3 = Residual(input_channels=2 * num_channels, num_channels=4 * num_channels,
use_1x1conv=True, strides=strides, dilation=4 * dilation, padding=4 * padding,
kernel_size=kernel_size)
self.net = nn.Sequential(b1, b2, b3, nn.AdaptiveMaxPool2d((2, 2)))
self.apply(weight_init)
def forward(self, x):
x = self.net(x)
x = x.view(config['batch_size'], config['seq_dim'], -1)
if self.lstm is None:
self.lstm = nn.LSTM(x.size(2), config['hidden_dim'], 1, batch_first=True).to(self.device)
for param in self.lstm.parameters():
if len(param.shape) >= 2:
init.orthogonal_(param.data)
else:
init.normal_(param.data)
h0 = torch.zeros(config['layer_dim'], x.size(0), config['hidden_dim']).to(self.device)
# Initialize cell state
c0 = torch.zeros(config['layer_dim'], x.size(0), config['hidden_dim']).to(self.device)
output, (hn, cn) = self.lstm(x, (h0,c0))
output = output[:, :, :]
output = self.fc(output)
return output, (hn, cn)
for epoch in range(config['num_epochs']):
print('epoch', epoch)
running_loss = 0
nb_classes = config['output_dim']
confusion_matrix = torch.zeros(nb_classes, nb_classes)
for i, (image, label) in enumerate(trainLoader):
print('batch: ',i)
image = image.float().to(device)
label = label.to(device)
optimizer.zero_grad()
batch_size, timesteps, H, W, C = image.size()
# Change Image shape
image = image.view(batch_size * timesteps, H, W, C)
image = image.permute(0, 3, 1, 2) # from NHWC to NCHW
output, (hn,cn) = model(image)
label = label.view(-1)
output = output.view(-1, output.size(2))
loss = criterion(output, label)
loss *= config['seq_dim']
loss.backward() # Backward pass
optimizer.step() # Now we can do an optimizer step
running_loss += loss.item()

Related

pyTorch mat1 and mat2 cannot be multiplied

I am getting the following error:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x33856 and 640000x256)
I don't understand how do I need to change the parameters of my net. I took the net created in this paper and tried to modify the parameters to meet my needs.This is the code, I changed the parameters of the first convolution but still get the error:
class ChordClassificationNetwork(nn.Module):
def __init__(self, train_model=False):
super(ChordClassificationNetwork, self).__init__()
self.train_model = train_model
self.flatten = nn.Flatten()
self.firstConv = nn.Conv2d(3, 64, (3, 3))
self.secondConv = nn.Conv2d(64, 64, (3, 3))
self.pool = nn.MaxPool2d(2)
self.drop = nn.Dropout(0.25)
self.fc1 = nn.Linear(100*100*64, 256)
self.fc2 = nn.Linear(256, 256)
self.outLayer = nn.Linear(256, 7)
def forward(self, x):
x = self.firstConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.secondConv(x)
x = F.relu(x)
x = self.pool(x)
x = self.drop(x)
x = self.flatten(x)
x = self.fc1(x)
x = F.relu(x)
x = self.drop(x)
x = self.fc2(x)
x = F.relu(x)
x = self.drop(x)
x = self.outLayer(x)
output = F.softmax(x, dim=1)
return output
and this is the training file:
device = ("cuda" if torch.cuda.is_available() else "cpu")
transformations = transforms.Compose([
transforms.Resize((100, 100))
])
num_epochs = 10
learning_rate = 0.001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 1
dataset = GuitarDataset("../chords_data/cropped_images/train", transform=transformations)
train_set, validation_set = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8*len(dataset))])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers,
pin_memory=pin_memory)
model = ChordClassificationNetwork().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct) / float(num_samples) * 100:.2f}"
)
return f"{float(num_correct) / float(num_samples) * 100:.2f}"
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total=len(train_loader), leave=True)
if epoch % 2 == 0:
loop.set_postfix(val_acc=check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss=loss.item())
if __name__ == "__main__":
train()
What am I doing wrong?
Look at the error message, the issue comes from the fc1 layer which doesn't have the required number of neurons. It is receiving a tensor of shape (batch_size, 33856) but expects (batch_size, 640000). The reduction in dimensionality is caused by the different layers you have applied to your input tensor before fc1.
You can fix this by defining fc1 with:
self.fc1 = nn.Linear(33856, 256)
Alternatively, you can use nn.LazyLinear which will initialize its weights with the appropriate number of neurons at runtime depending on the input it receives. But that's lazy:
self.fc1 = nn.LazyLinear(256)

Pytorch-GPU what am I forgetting to move over to the GPU?

I'm getting this error. What am I leaving out, I feel that I have tired everything.
Also is there not a easy way to just use the GPU and not the CPU I feel like I have tried all those options as well. As in not using .cuda() everywhere
This is one of my first neutral networks so please go easy on me. (most of it is from Class)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking arugment for argument target in method wrapper_nll_loss_forward)
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16).cuda()
self.fc2 = nn.Linear(16, 10).cuda()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda()
x = F.relu(self.fc1(x)).cuda()
x = self.fc2(x).cuda()
return x
def training():
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train().cuda()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Your data, and target are not in GPU (considering the removal of repeated cuda calls).
You are also doing of unnecessary cuda() which is not needed. Simply, see where your data and model are. Take the model to GPU, take the data and label to GPU, finally feed data to the model.
Don't use, cuda(), use to.device(), it's safer in the long run and easily customizable in multi-gpu setup.
import torch.cuda
import numpy as np
import time
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
torch.cuda.set_device(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def load_data():
num_workers = 0
load_data.batch_size = 20
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)
load_data.train_loader = torch.utils.data.DataLoader(train_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data,
batch_size=load_data.batch_size, num_workers=num_workers, pin_memory=True)
def visualize():
dataiter = iter(load_data.train_loader)
visualize.images, labels = dataiter.next()
visualize.images = visualize.images.numpy()
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(load_data.batch_size):
ax = fig.add_subplot(2, load_data.batch_size/2, idx+1, xticks=[], yticks=[])
ax.imshow(np.squeeze(visualize.images[idx]), cmap='gray')
ax.set_title(str(labels[idx].item()))
#plt.show()
def fig_values():
img = np.squeeze(visualize.images[1])
fig = plt.figure(figsize = (12,12))
ax = fig.add_subplot(111)
ax.imshow(img, cmap='gray')
width, height = img.shape
thresh = img.max()/2.5
for x in range(width):
for y in range(height):
val = round(img[x][y],2) if img[x][y] !=0 else 0
ax.annotate(str(val), xy=(y,x),
horizontalalignment='center',
verticalalignment='center',
color='white' if img[x][y]<thresh else 'black')
#plt.show()
load_data()
visualize()
fig_values()
class NeuralNet(nn.Module):
def __init__(self, gpu = True):
super(NeuralNet, self ).__init__()
self.fc1 = nn.Linear(28 * 28, 16)
self.fc2 = nn.Linear(16, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
def training():
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
n_epochs = 100
model.train()
for epoch in range(n_epochs):
train_loss = 0.0
for data, target in load_data.train_loader:
optimizer.zero_grad()
###################################
data = data.to(device)
target = target.to(device)
###################################
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(load_data.train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
model = NeuralNet().to(device)
summary(model, input_size=(1, 28, 28))
training()
Clearly your target variable is not on GPU.
Also, its a bad idea to call .cuda() inside forward()
def forward(self, x):
x = x.view(-1, 28 * 28).cuda() # BAD
x = F.relu(self.fc1(x)).cuda() # BAD
x = self.fc2(x).cuda() #BAD
return x
Rather, remove all .cuda() inside forward and do this is main loop
for data, target in load_data.train_loader:
data = data.cuda()
target = target.cuda()

Neural Network Is Taking 1000 Epochs Just To Solve XOR

My Neural Network takes 1000 epochs just to solve XOR and when I increase number of neurons loss increases and it doesnt work.
Also before when I was using np.random.rand it did'nt work with a learning rate less than 1.
import numpy as np
input = np.array([[0, 1] ,[1, 0], [1, 1], [0, 0]])
labels = np.array([[1], [1], [0], [0]])
np.random.seed(0)
class NN:
def __init__(self):
self.layers = []
def add(self, layer):
self.layers.append(layer)
def loss(self, layer):
self.loss = layer
def predict(self, input):
output = input
for layer in self.layers:
output = layer.forward(output)
return output
def train(self, input, labels):
prediction = self.predict(input)
loss = self.loss.forward(prediction, labels)
gradient = self.loss.back() * 0.1
for layer in self.layers[::-1]:
gradient = layer.back(gradient)
return loss
Dense Layer
class Dense:
def __init__(self, input_size, output_size):
self.weights = np.random.randn(input_size, output_size) - 0.5
self.bias = np.random.randn(1, output_size) - 0.5
def forward(self, input):
self.input = input
output = np.dot(input, self.weights) + self.bias
return output
def back(self, gradient):
gradientW = np.dot(self.input.T, gradient)
self.weights -= gradientW
self.bias -= np.mean(gradient)
return np.dot(gradient, self.weights.T)
Sigmoid
class Sigmoid:
def forward(self, input):
output = 1 / (1+np.exp(-input))
self.output = output
return output
def back(self, gradient):
output = self.output * (1 - self.output)
gradient *= output
return gradient
I am using Mean Squaresd Error
class MSE:
def forward(self, input, labels):
self.input = input
self.labels = labels
return np.mean((labels - input)**2)
def back(self):
output = 2 * (self.input - self.labels)
return output
Calling the Class
N = NN()
N.add(Dense(2, 10))
N.add(Sigmoid())
N.add(Dense(10, 1))
N.add(Sigmoid())
N.loss(MSE())
Training
for i in range(1000):
loss = (N.train(input, labels))
if i%99==0:
print(loss)
print(N.predict(input))
Edit: I Tried this and now it works with 100 epochs. I needed an additional layer to increase neurons why? I tried to train it on AND but it doesnt work.
N = NN()
N.add(Dense(2, 50))
N.add(Sigmoid())
N.add(Dense(50, 1000))
N.add(Sigmoid())
N.add(Dense(1000, 50))
N.add(Sigmoid())
N.add(Dense(50, 1))
N.add(Sigmoid())
N.loss(MSE())

Constant loss through epochs

I code this neural network to make a gaussian regression but I don't understand why my loss doesn't change through epochs. I set the learning rate to 1 to see the loss decreases but it does not. I chose to take 2000 poitns to train my Neural network. I watched several algorithms on this website and I don't really understand why my algorithm do not achieve what I expect.
I have already imported all libraries needed.
Thank you for your help
def f(x):
return x * np.sin(x) # function to predict
m =2000
X_bis = np.zeros((1,m),dtype = float)
X_bis=np.random.random(m)*10
## Create my training,validation and test set
X_train = X_bis[0:600]
X_val = X_bis[600:800]
X_test = X_bis[800:]
y_train = f(X_train)
y_val = f(X_val)
y_test = f(X_test)
mean_X_train = np.mean(X_train)
std_X_train = np.std(X_train)
mean_y_train = np.mean(y_train)
std_y_train =np.std(y_train)
class MyDataset(data.Dataset):
def __init__(self, data_feature, data_target):
self.data_feature = data_feature
self.data_target = data_target
def __len__(self):
return len(self.data_feature)
def __getitem__(self, index):
X_train_normalized = (self.data_feature[index] - mean_X_train) / std_X_train
y_train_normalized = (self.data_target[index] - mean_y_train) / std_y_train
return torch.from_numpy(np.array(X_train_normalized,ndmin=1)).float(), torch.from_numpy(np.array(y_train_normalized, ndmin = 1)).float()
training_set = MyDataset(X_train,y_train)
train_loading = torch.utils.data.DataLoader(training_set, batch_size= 100)
val_set = MyDataset(X_val, y_val)
val_loading = torch.utils.data.DataLoader(val_set, batch_size= 10)
test_set = MyDataset(X_test,y_test)
test_loading = torch.utils.data.DataLoader(test_set, batch_size= 100)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.FC1 = nn.Linear(1,10)
self.FC2 = nn.Linear(10, 1)
def forward(self, x):
x = F.relu(self.FC1(x))
x = self.FC2(x)
return x
model = Net()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=1, weight_decay= 0.01, momentum = 0.9)
def train(net, train_loader, optimizer, epoch):
net.train()
total_loss=0
for idx,(data, target) in enumerate(train_loader, 0):
outputs = net(data)
loss = criterion(outputs,target)
total_loss +=loss.cpu().item()
optimizer.step()
print('Epoch:', epoch , 'average training loss ', total_loss/ len(train_loader))
def test(net,test_loader):
net.eval()
total_loss = 0
for idx,(data, target) in enumerate(test_loader,0):
outputs = net(data)
outputs = outputs * std_X_train + mean_X_train
target = target * std_y_train + mean_y_train
loss = criterion(outputs,target)
total_loss += sqrt(loss.cpu().item())
print('average testing loss', total_loss/len(test_loader))
for epoch in range(50):
train(model,train_loading,optimizer,epoch)
test(model,val_loading)
'''
I'm wondering why you don't have loss.backward() after the line that you compute the loss (i.e., loss = criterion(outputs,target)) in your training snippet. This will help backpropagating and ultimately updating the parameters of your network upon optimizer.step(). Also, try using lower learning rates as lr=1 normally is too much in training such networks. Try using learning rates in between 0.001-0.01 to see if your network is learning the mapping between input X and target Y.

InvalidType: Invalid operation is performed

I am trying to write a stacked autoencoder. Since this a stacked autoencoder we need to train the first autoencoder and pass the weights to the second autoencoder. So during training we need to define train_data_for_next_layer. Here I am getting error:
InvalidType:
Invalid operation is performed in: LinearFunction (Forward)
Expect: x.shape[1] == W.shape[1]
Actual: 784 != 250
I am having issue with the last line. Is this problem due to incorrect model layer, I want to know what is the issue here. I have faced this problem several times before and any detailed explanation is welcome. The code is as follows:
class AutoEncoder(chainer.Chain):
def __init__(self, n_in, n_out, activation='relu', tied=True):
if tied:
super(AutoEncoder, self).__init__(
l1 = L.Linear(n_in, n_out)
)
self.add_param('decoder_bias', n_in)
self.decoder_bias.data[...] = 0
else:
super(AutoEncoder, self).__init__(
l1 = L.Linear(n_in, n_out),
l2 = L.Linear(n_out, n_in)
)
self.tied = tied
self.n_in = n_in
self.n_out = n_out
self.activation = {'relu': F.relu, 'sigmoid': F.sigmoid,
'identity': F.identity}[activation]
def __call__(self, x, train=True):
h1 = F.dropout(self.activation(self.l1(x)), train=train)
if self.tied:
return self.activation(F.linear(h1, F.transpose(self.l1.W),
self.decoder_bias))
else:
return self.activation(self.l2(h1))
def encode(self, x, train=True):
return F.dropout(self.activation(self.l1(x)), train=train)
def decode(self, x, train=True):
if self.tied:
return self.activation(F.linear(x, F.transpose(self.l1.W),
self.decoder_bias))
else:
return self.activation(self.l2(x))
class StackedAutoEncoder(chainer.ChainList):
def __init__(self, autoencoders):
super(StackedAutoEncoder, self).__init__()
for ae in autoencoders:
self.add_link(ae)
def __call__(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
h = self[i].encode(h, train=train)
for i in range(depth):
if i == depth-1: # do not use dropout in the output layer
train = False
h = self[depth-1-i].decode(h, train=train)
return h
def encode(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
h = self[i].encode(h, train=train)
return h
def decode(self, x, train=True, depth=0):
if depth == 0: depth = len(self)
h = x
for i in range(depth):
if i == depth-1: # do not use dropout in the output layer
train = False
h = self[depth-1-i].decode(h, train=train)
return h
class Regression(chainer.Chain):
def __init__(self, predictor):
super(Regression, self).__init__(predictor=predictor)
def __call__(self, x, t):
y = self.predictor(x, True)
self.loss = F.mean_squared_error(y, t)
return self.loss
def dump(self, x):
return self.predictor(x, False)
initmodel = ''resume = ''
gpu = -1
epoch_pre = 20
epoch_fine = 20
batchsize = 100
noise = 0
optimizer = 'adam'
learningrate = 0.01
alpha = 0.001
unit = '1000, 500, 250, 2'
activation = 'sigmoid'
untied = False
batchsize = batchsize
n_epoch = epoch_pre
n_epoch_fine = epoch_fine
n_units = list(map(int, unit.split(',')))
activation = activation
mnist = fetch_mldata('MNIST original', data_home='.')
perm = np.random.permutation(len(mnist.data))
mnist.data = mnist.data.astype(np.float32) / 255
train_data = mnist.data[perm][:60000]
test_data = mnist.data[perm][60000:]
# prepare layers
aes = []
for idx in range(len(n_units)):
n_in = n_units[idx-1] if idx > 0 else 28*28
n_out = n_units[idx]
ae = AutoEncoder(n_in, n_out, activation, tied = True)
aes.append(ae)
# prepare train data for next layer
x = chainer.Variable(np.array(train_data))
train_data_for_next_layer = cuda.to_cpu(ae.encode(x, train=False))
The InvalidType error indicates that the input shape of the array given to F.linear is wrong.
Expect: x.shape[1] == W.shape[1]
Actual: 784 != 250
In this case, for the given input x and W, F.linear expects that
x.shape[1] is the same as W.shape[1], but it does not.
For more detailed description of the error message, see https://docs.chainer.org/en/stable/tips.html#how-do-i-fix-invalidtype-error to understand how to interpret that error message.