Subprocessing Data Loading in pytroch into Google Colab - subprocess

I'm working on training a deep neural network using pytorch and I use DataLoader for preprocessing data and multi-processing purpose over dataset. I set num_workers attribute to positive number like 4 and my batch_size is 8. I train network on Google Colab Environment but when training keep on after few minutes, stop training and get error in reading .PNG files. I think it's memory error and I want to know what is relation between number of GPU and batch_size and num_workers to set up a reasonable relation between them specially in Google Colab .

I think you can follow this page:
https://colab.research.google.com/notebook#fileId=1jxUPzMsAkBboHMQtGyfv5M5c7hU8Ss2c&scrollTo=EM7EnBoyK8nR
It provide a guide of how to set settings of Google Colab.
I try it and feels really fast.
Hope you love it.
Following is the code it provides but I change a bit about install pytorch:
#!/usr/bin/env python
# encoding: utf-8
import sys
sys.version
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
input_size = 784 # The image size = 28 x 28 = 784
hidden_size = 500 # The number of nodes at the hidden layer
num_classes = 10 # The number of output classes. In this case, from 0 to 9
num_epochs = 5 # The number of times entire dataset is trained
batch_size = 100 # The size of input data took for one iteration
learning_rate = 1e-3 # The speed of convergence
train_dataset = dsets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = dsets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
class Net(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(Net, self).__init__() # Inherited from the parent class nn.Module
self.fc1 = nn.Linear(input_size, hidden_size) # 1st Full-Connected Layer: 784 (input data) -> 500 (hidden node)
self.relu = nn.ReLU() # Non-Linear ReLU Layer: max(0,x)
self.fc2 = nn.Linear(hidden_size, num_classes) # 2nd Full-Connected Layer: 500 (hidden node) -> 10 (output class)
def forward(self, x): # Forward pass: stacking each layer together
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
net = Net(input_size, hidden_size, num_classes)
use_cuda = True
if use_cuda and torch.cuda.is_available():
net.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader): # Load a batch of images with its (index, data, class)
images = Variable(images.view(-1, 28*28)) # Convert torch tensor to Variable: change image from a vector of size 784 to a matrix of 28 x 28
labels = Variable(labels)
if use_cuda and torch.cuda.is_available():
images = images.cuda()
labels = labels.cuda()
optimizer.zero_grad() # Intialize the hidden weight to all zeros
outputs = net(images) # Forward pass: compute the output class given a image
loss = criterion(outputs, labels) # Compute the loss: difference between the output class and the pre-given label
loss.backward() # Backward pass: compute the weight
optimizer.step() # Optimizer: update the weights of hidden nodes
if (i+1) % 100 == 0: # Logging
print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))
correct = 0
total = 0
for images, labels in test_loader:
images = Variable(images.view(-1, 28*28))
if use_cuda and torch.cuda.is_available():
images = images.cuda()
labels = labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs.data, 1) # Choose the best class from the output: The class with the best score
total += labels.size(0) # Increment the total count
correct += (predicted == labels).sum() # Increment the correct count
print('Accuracy of the network on the 10K test images: %d %%' % (100 * correct / total))
torch.save(net.state_dict(), 'fnn_model.pkl')

Related

Cross Entropy Loss function not converging

I'm new to neural networks and I'm building one that reads handwritten numbers. The losses float around 2.2-2.3 and I'm not sure why it's not converging. I tried messing with the learning rate but it doesn't really do anything.
# TODO: Define function to create our own neural network
# Parameters
input_size = 784 # Hint: image size is 28x28, and we want to flatten the image
num_classes = 10 # Hint: our inputs include 0-9
num_epochs = 5 # Number of times we loop through the entire training dataset, can be pretty arbitrary
class NN(nn.Module):
############ YOUR CODE STARTS HERE ############
# 1. Initialize our own NN model
def __init__(self, input_size, num_classes):
super(NN, self).__init__()
self.flatten = nn.Flatten()
# Use ReLU activation function
self.relu = nn.ReLU()
# Input layer
self.input_layer = nn.Linear(input_size, 13)
# Hidden layers: use at least 1 hidden layer!
self.hidden1 = nn.Linear(13, 6)
# Output layer
self.output_layer = nn.Linear(6, 10)
# 2. Define method for forwarding input data
def forward(self, sample):
sample = self.flatten(sample)
out = self.input_layer(sample)
out = self.relu(out)
out = self.hidden1(out)
out = self.relu(out) #TODO: activation function
out = self.output_layer(out) #TODO: forward to output layer
return out
nn_model = NN(input_size, num_classes)
print("My NN Model: ", nn_model)
Loss function:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(nn_model.parameters(), lr=.0001)
Training:
total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Reshape our images from 2D(28x28) to 1D(784)
images = images.view(-1, 28*28).to(device)
labels = labels.to(device)
# Call functions we've previously defined to perform forward pass & calculate loss
output = nn_model.forward(images)
loss = loss_function(output, labels)
# Backward pass
optimizer.zero_grad()
loss.backward()
# calculates gradients
optimizer.step()
# Print out training process
if (i+1) % 100 == 0:
print(f'epoch {epoch+1} / {num_epochs}, step {i+1}/{total_steps}, loss = {loss.item():.4f}')
Any help is appreciated!

Given groups=1, weight of size [10, 1, 5, 5], expected input[2, 3, 28, 28] to have 1 channels, but got 3 channels instead

I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
batch_size = 64
train_dataset = datasets.MNIST(root='./data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = ImageFolder('my_digit_images/', transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
#print(self.conv1.weight.shape)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv3 = nn.Conv2d(20, 20, kernel_size=3)
#print(self.conv2.weight.shape)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(320, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv1(x))
#print(x.shape)
x = F.relu(self.mp(self.conv2(x)))
x = F.relu(self.mp(self.conv3(x)))
#print("2.", x.shape)
# x = F.relu(self.mp(self.conv3(x)))
x = x.view(in_size, -1) # flatten the tensor
#print("3.", x.shape)
x = self.fc(x)
return F.log_softmax(x)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
MNIST dataset contains black and white 1-channel images, while yours are 3-channeled RGB probably. Either recode your images or preprocess it like
img = img[:,0:1,:,:]
You can do it with custom transform, adding it after transforms.ToTensor()
The images in training and testing should follow the same distribution. Since MNIST data is by default in Grayscale and it is expected that you didn't change the channels, then the model expects the same number of channels in testing.
The following code is an example of how it's done using a transformation.
Following the order defined below, it
Converts the image to a single channel (Grayscale)
Resize the image to the size of the default MNIST data
Convert the image to a tensor
Normalize the tensor to have same mean and std as that of during training(assuming that you used the same values).
test_dataset = ImageFolder('my_digit_images/', transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]))

How can I fix tensor dimension matching error (with 1 unit difference)

I'm trying to run my code for Graph Convolution Network (GCN) in PyTorch with several .csv input files, but I get error below:
RuntimeError: The expanded size of the tensor (732) must match the existing size (731) at non-singleton dimension 0. Target sizes: [732]. Tensor sizes: [731]
here is my code:
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from sklearn.metrics import r2_score
import numpy as np
import datetime
import dgl.function as fn
# Below are the graph convolution functions:
# (where each node collects information about nearby nodes)
def gcn_message(edges):
return {'msg' : edges.src['h']}
def gcn_reduce(nodes):
return {'h' : torch.sum(nodes.mailbox['msg'], dim=1)}
# Below is the pytorch module that defines the operations at each graph convolution layer
class gcnLayer(nn.Module):
def __init__(self, in_feats, out_feats):
super(gcnLayer, self).__init__()
self.linear = nn.Linear(in_feats*2, out_feats)
def forward(self, g, inputs):
with g.local_scope():
g.ndata['h'] = inputs # inputs: POI features
print(g.ndata['h'])
g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N'))
h_N=g.ndata['h_N']
h_total = torch.cat([inputs, h_N], dim=1) # Result (Convoluted POIs) of convolution at a layer is extracted
return self.linear(h_total) # Result is linearly transformed
# Below is the pytorch class (machine learning architectures are initiliazed as classes)
# that defines the the graph convolutional network (GCN) architecture (number of hidden layers, neurons, activation function, etc)
class gcn(torch.nn.Module):
def __init__(self, input, hidden, output):
super(gcn, self).__init__()
# Initially each row in the input has (input) number of elements.
#In other words, each node in the network has (input number of features, i.e.: number of POI types)
self.gcnInput = gcnLayer(input,hidden) # Input size is converted into hidden size
self.gcnHidden = gcnLayer(hidden,hidden) # Hidden size is converted into hidden size
self.gcnOutput = gcnLayer(hidden,output) # Hidden size is converted into desired output size
# Forward function: this function is run when we call the class
def forward(self, g, pois):
y = F.relu(self.gcnInput(g, pois)) # Result of the input layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function (Here, an arbitrary amount of hidden layers can be added)
y = self.gcnOutput(g, y) # Result of the output layer (not activated)
return y
# Below is the pytorch class that defines the the multilayer perceptron (MLP) architecture
# (number of hidden layers, neurons, activation function, etc)
class mlp(torch.nn.Module):
def __init__(self, input, hidden):
super(mlp, self).__init__() #initialize
self.classifier = nn.Sequential( # Sequential is used when combining different layers
nn.Linear(input, hidden), # Input feature matrix is converted into a matrix with shape (hidden) and linearly transformated
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, hidden), # Result of previous layer is linearly transformaed
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, 1)) # At the final layer, one output is given (Trip amount)
def forward(self, x):
x = self.classifier(x) # the input is sent throught the MLP architecture defined above
return x
# Below is the pytorch class that defines the the the combined deep learning architecture
class od(nn.Module):
def __init__(self, gcnInput, gcnHidden, gcnOutput, mlpHidden):
super(od, self).__init__()
self.gcn = gcn(gcnInput, gcnHidden,gcnOutput) # First: GCN
self.mlp = mlp((2*gcnoutput+1), mlpHidden) # Afterwards: MLP
def forward(self, g, pois, costs, indices, q, zoneCount):
y = self.gcn(g,pois) # First, send the input through GCN
p = torch.zeros(len(costs),2*q).cuda() # Prepare a matrix that will have the POI output at origin (size: q), POI output at destination (size: q)
count = 0
for i in range(zoneCount):
for j in range(zoneCount):
p[count][:q] = y[i][:] # POI output at origin (size: q)
p[count][q:] = y[j][:] # POI output at destination (size: q)
count +=1
p = p[indices][:] # Order the input matrix in the order of shuffled zones (or OD pairs)
costs = costs[indices][:] # Order the cost matrix in the order of shuffled zones (or OD pairs)
inputs = torch.cat((p, costs), 1).cuda() # Combine POI and cost matrices
y = self.mlp(inputs) # Last, send through MLP
return y
def train(optimizer, model, criterion, pois, costs, labels, indices, zoneCount, gcnOutput):
model.train() # Model is in the training mode (meaning gradients are calculated)
optimizer.zero_grad() # Gradients are zeroed
print(optimizer)
pred = model(g, pois, costs, indices, gcnOutput, zoneCount) # Get model output as predicted output
loss = criterion(pred, labels) # Calculate loss between prediction and label
loss.backward() # Backpropagate the gradients
optimizer.step() # (I dont fully know what happens with this code)
return loss.item() # Return loss
def test(model, pois, costs, labels, indices, zoneCount, gcnOutput):
model.eval() # Mode is in evaluation mode: no gradients are calcualted
with torch.no_grad(): # In tensorflow if tensor has a parameter "autograd:true" then, gradients are calculated. This code sets the autograd to false for all tensors below
pred = model(g, pois, costs, indices,gcnOutput, zoneCount) # Get prediction
predictions = pred.detach().cpu() # Move prediction tensor from GPU to CPU
r2 = r2_score(labels.cpu(), predictions) # Calculate R2
return r2
def data_collection(key): #Below part gets the data from the files into the program (POIS, nodes, costs, labels). If the file types are different than the ones used in this research, this part should be adjusted.
if key == "mb": #mb: manhattan and brooklyn case
no = 3
else:
no = 2
with open("/nodes.csv".format(key)) as f:
nodeCount = sum(1 for line in f)
print (nodeCount)
with open("/poisInfo.csv".format(key)) as f:
poiCount = sum(1 for line in f)
print(poiCount)
with open("/zones.csv".format(key)) as f:
zoneCount = sum(1 for line in f)
print(zoneCount)
pois = torch.zeros((nodeCount,poiCount)).cuda()
print(pois)
i = 0
with open('/nodes.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
print(row)
pois[i][:] = torch.FloatTensor([int(i) for i in row[no:]])
i += 1
costs = torch.zeros((zoneCount*zoneCount,1)).cuda()
labels = torch.zeros((zoneCount*zoneCount,1)).cuda()
count = 0
with open('/costsTrips.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
costs[count][0] = int(row[2])
labels[count][0] = int(row[3])
count += 1
g = dgl.DGLGraph().to(torch.device('cuda:0')) # dgl: deep graph learning library: We move POIs to the graph for graph convolution
print (nodeCount)
g.add_nodes(nodeCount) # Add nodes to the graph
print (nodeCount)
print (g.number_of_nodes)
with open('/edges.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
g.add_edge(int(row[0]), int(row[1])) # If edge exists between 2 nodes, add edge
print('We have %d nodes.' % g.number_of_nodes())
print('We have %d edges.' % g.number_of_edges())
return([g, pois, labels,costs, zoneCount, poiCount])
gcnoutput = 10
keys = ["manhattan", "brooklyn", "mb"]
count = 0
with open("costFinal.csv", mode='w', newline="") as wx:
w = csv.writer(wx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
w.writerow(["place", "iteration", "split", "r2"])
for key in keys:
[g, pois, labels, costs, zoneCount, poiCount] = data_collection(key)
for iteration in range(1,11): # We test each split ratio with 10 times to get the average
a = np.random.permutation(zoneCount) # randomize the zones
for i in range(1,10):
split = i/10 # Below lines split the training and test subsets
breaker = int(split * zoneCount)
train_zones = a[:breaker]
test_zones = a[breaker:]
train_indices = []
test_indices = []
for z in train_zones:
train_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
for z in test_zones:
test_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
# model parameters: gcninput, gcnhidden, gcnoutput, mlphidden
model = od(poiCount, 64, gcnoutput, 64).cuda() # construct the model
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # optimizer: adam optimizer
print(optimizer)
criterion = torch.nn.MSELoss() # loss: mean squared error loss
print(criterion)
for epoch in range(1, 11): # Train the algorithm 500 epochs
print (epoch)
loss = train(optimizer, model, criterion, pois, costs, labels[train_indices], train_indices, zoneCount, gcnoutput)
# print(count, datetime.datetime.now() - start, key, iteration, i, epoch, loss)
count += 1
r2 = test(model, pois, costs, labels[test_indices], test_indices, zoneCount, gcnoutput) # At the end of the algorithm, test the model and get r2
w.writerow([key, iteration, i*10, r2]) # write key[manhattan,brooklyn,manhattan and brooklyn], iteration[0...9], split ratio[10%...90%], r2 to the file

ValueError: Expected input batch_size (24) to match target batch_size (8)

Got many links to solve this read different stackoverflow answer related to this but not able to figure it out .
My image size is torch.Size([8, 3, 16, 16]).
My architechture is as below
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(16 * 16, 768)
self.fc2 = nn.Linear(768, 64)
self.fc3 = nn.Linear(64, 10)
self.dropout = nn.Dropout(p=.5)
def forward(self, x):
# flatten image input
x = x.view(-1, 16 * 16)
# add hidden layer, with relu activation function
x = self.dropout(F.relu(self.fc1(x)))
x = self.dropout(F.relu(self.fc2(x)))
x = F.log_softmax(self.fc3(x), dim=1)
return x
# specify loss function
criterion = nn.NLLLoss()
# specify optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=.003)
# number of epochs to train the model
n_epochs = 30 # suggest training between 20-50 epochs
model.train() # prep model for training
for epoch in range(n_epochs):
# monitor training loss
train_loss = 0.0
###################
# train the model #
###################
for data, target in trainloader:
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update running training loss
train_loss += loss.item()*data.size(0)
# print training statistics
# calculate average loss over an epoch
train_loss = train_loss/len(trainloader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
i am getting value error as
ValueError: Expected input batch_size (24) to match target batch_size (8).
how to fix it . My batch size is 8 and input image size is (16*16).And i have 10 class classification here .
Your input images have 3 channels, therefore your input feature size is 16*16*3, not 16*16. Currently, you consider each channel as separate instances, leading to a classifier output - after x.view(-1, 16*16) flattening - of (24, 16*16). Clearly, the batch size doesn't match because it is supposed to be 8, not 8*3 = 24.
You could either:
Switch to a CNN to handle multi-channel inputs (here 3 channels).
Use a self.fc1 with 16*16*3 input features.
If the input is RGB, maybe even convert to 1-channel grayscale map.

Cannot make this autoencoder network function properly (with convolutional and maxpool layers)

Autoencoder networks seems to be way trickier than normal classifier MLP networks. After several attempts using Lasagne all what I get in the reconstructed output is something that resembles at its best a blurry averaging of all the images of the MNIST database without distinction on what the input digit actually is.
The networks structure I chose are the following cascade layers:
input layer (28x28)
2D convolutional layer, filter size 7x7
Max Pooling layer, size 3x3, stride 2x2
Dense (fully connected) flattening layer, 10 units (this is the bottleneck)
Dense (fully connected) layer, 121 units
Reshaping layer to 11x11
2D convolutional layer, filter size 3x3
2D Upscaling layer factor 2
2D convolutional layer, filter size 3x3
2D Upscaling layer factor 2
2D convolutional layer, filter size 5x5
Feature max pooling (from 31x28x28 to 28x28)
All the 2D convolutional layers have the biases untied, sigmoid activations and 31 filters.
All the fully connected layers have sigmoid activations.
The loss function used is squared error, the updating function is adagrad. The length of the chunk for the learning is 100 samples, multiplied for 1000 epochs.
Just for completeness, the following is the code I used:
import theano.tensor as T
import theano
import sys
sys.path.insert(0,'./Lasagne') # local checkout of Lasagne
import lasagne
from theano import pp
from theano import function
import gzip
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
def load_mnist():
def load_mnist_images(filename):
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
return X_train, y_train, X_test, y_test
def plot_filters(conv_layer):
W = conv_layer.get_params()[0]
W_fn = theano.function([],W)
params = W_fn()
ks = np.squeeze(params)
kstack = np.vstack(ks)
plt.imshow(kstack,interpolation='none')
plt.show()
def main():
#theano.config.exception_verbosity="high"
#theano.config.optimizer='None'
X_train, y_train, X_test, y_test = load_mnist()
ohe = OneHotEncoder()
y_train = ohe.fit_transform(np.expand_dims(y_train,1)).toarray()
chunk_len = 100
visamount = 10
num_epochs = 1000
num_filters=31
dropout_p=.0
print "X_train.shape",X_train.shape,"y_train.shape",y_train.shape
input_var = T.tensor4('X')
output_var = T.tensor4('X')
conv_nonlinearity = lasagne.nonlinearities.sigmoid
net = lasagne.layers.InputLayer((chunk_len,1,28,28), input_var)
conv1 = net = lasagne.layers.Conv2DLayer(net,num_filters,(7,7),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(2,2))
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv2_layer = lasagne.layers.Conv2DLayer(dropout_layer,num_filters,(3,3),nonlinearity=conv_nonlinearity)
#pool2_layer = lasagne.layers.MaxPool2DLayer(conv2_layer,(3,3),stride=(2,2))
net = lasagne.layers.DenseLayer(net,10,nonlinearity=lasagne.nonlinearities.sigmoid)
#augment_layer1 = lasagne.layers.DenseLayer(reduction_layer,33,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.DenseLayer(net,121,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.ReshapeLayer(net,(chunk_len,1,11,11))
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after0 = lasagne.layers.MaxPool2DLayer(conv_after1,(3,3),stride=(2,2))
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv_after2 = lasagne.layers.Conv2DLayer(upscale_layer1,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after1 = lasagne.layers.MaxPool2DLayer(conv_after2,(3,3),stride=(1,1))
#upscale_layer2 = lasagne.layers.Upscale2DLayer(pool_after1,4)
net = lasagne.layers.Conv2DLayer(net,num_filters,(5,5),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.FeaturePoolLayer(net,num_filters,pool_function=theano.tensor.max)
print "output_shape:",lasagne.layers.get_output_shape(net)
params = lasagne.layers.get_all_params(net, trainable=True)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.squared_error(prediction, output_var)
#loss = lasagne.objectives.binary_crossentropy(prediction, output_var)
aggregated_loss = lasagne.objectives.aggregate(loss)
updates = lasagne.updates.adagrad(aggregated_loss,params)
train_fn = theano.function([input_var, output_var], loss, updates=updates)
test_prediction = lasagne.layers.get_output(net, deterministic=True)
predict_fn = theano.function([input_var], test_prediction)
print "starting training..."
for epoch in range(num_epochs):
selected = list(set(np.random.random_integers(0,59999,chunk_len*4)))[:chunk_len]
X_train_sub = X_train[selected,:]
_loss = train_fn(X_train_sub, X_train_sub)
print("Epoch %d: Loss %g" % (epoch + 1, np.sum(_loss) / len(X_train)))
"""
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
vis1 = np.hstack([chunk[j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
"""
print "done."
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
print "chunk.shape",chunk.shape
print "result.shape",result.shape
plot_filters(conv1)
for i in range(chunk_len/visamount):
vis1 = np.hstack([chunk[i*visamount+j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[i*visamount+j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
import ipdb; ipdb.set_trace()
if __name__ == "__main__":
main()
Any ideas on how to improve this network to get a reasonably functioning autoencoder?