Python Neural Network Prediction - neural-network

I am working on a project , I have two various numbers :
1- First number maximum value is 1500 and minimum is 200.
2- Second number maximum value is 200 and minimum is 10.
3- I want to create neural network , adding samples and train the network to predict the last number, for example :
900,67 equals 87
870,99 equals 100
1000,50 equals ?
What's type of neural networks can work with my project?

To this example, you input two values and get one.
import numpy as np
import keras
from keras import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
MIN = np.random.rand(100)*500
MAX = np.random.rand(100)*500 + 500
x = np.concatenate((MIN.reshape(-1,1),MAX.reshape(-1,1)),axis = 1)
y = np.sin(x[:,0])*500 + np.cos(x[:,1])*500
x_max = x.max()
y_max = y.max()
x = x/x_max
y = (y-y.min())/(y_max-y.min())
model = Sequential()
model.add(Dense(200,input_dim = 2, activation = 'relu'))
model.add(Dense(100, activation = 'sigmoid'))
model.add(Dense(100, activation = 'sigmoid'))
model.add(Dense(1,activation = 'relu'))
opt = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.6, beta_2=0.97, amsgrad=False)
model.compile(loss='mean_squared_error',optimizer=opt , metrics=['mse'])
model.fit(x, y, epochs=10000, batch_size=2)
y_hat = model.predict(x)
plt.figure(figsize=(10,5))
plt.plot(y)
plt.plot(y_hat.reshape(-1))
This is the result:
You will need to make pre and post processing, normalizing inputs and reescaling outputs from the neural network. This is the input:
use example:
In [10]: model.predict(np.array([0.234,0.567]).reshape(-1,2))
Out[10]: array([[0.61975896]], dtype=float32)

Related

Seemingly inconsistent tensor sizes in pytorch

I'm building a convolutional autoencoder, but want the encoding to be in a linear form so I can more easily feed it as input into an MLP. I have two convolutional layers on the encoder along with a linear inner layer to reduce dimension. This encoding is then fed into the corresponding decoder.
When I flatten the output of the second convolutional layer, based on my calculation (using the standard formula: Calculate the Output size in Convolution layer) should come out to a 1x100352 rank 1 tensor. However, when I set the input dimension of the linear layer to be 100352, the flattened rank 1 tensor has dimension 1x50176. Then comes the weird part.
I tried changing the input dimension of the linear layer to be 50176, assuming I had miscalculated. When I do this, the reshaped rank 1 tensor confusingly becomes 1x100352, and then the aforementioned weight matrix becomes 50176x256 as expected.
This response to modifying the linear layer's input dimension doesn't make sense to me. That hyperparameter controls the weight matrix correctly, but I guess I'm uncertain why it has any bearing on the linear layer's input since that's just a reshaped tensor output from a convolutional layer whose hyperparameters are unrelated to the hyperparameter in question.
I apologize if I'm just missing something obvious. I'm very new to pytorch, and I couldn't find any other posts which discussed this sort of issue.
Here's what I believe to be the minimal reproducible example:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
class convAutoEncoder(nn.Module):
def __init__(self,**kwargs):
super().__init__()
#Creating network structure
#Encoder portion of autoencoder
self.enc1 = nn.Conv2d(in_channels = kwargs["inputChannels"], out_channels = kwargs["channelsEncoderMid"], kernel_size = kwargs["kernelSize"])
self.enc2 = nn.Conv2d(in_channels = kwargs["channelsEncoderMid"], out_channels = kwargs["channelsEncoderInner"], kernel_size = kwargs["kernelSize"])
self.enc3 = nn.Linear(in_features = kwargs["intoLinear"], out_features = kwargs["linearEncoded"])
#Decoder portion of autoencoder
self.dec1 = nn.Linear(in_features = kwargs["linearEncoded"], out_features = kwargs["intoLinear"])
self.dec2 = nn.ConvTranspose2d(in_channels = kwargs["channelsEncoderInner"], out_channels = kwargs["channelsDecoderMid"], kernel_size = kwargs["kernelSize"])
self.dec3 = nn.ConvTranspose2d(in_channels = kwargs["channelsDecoderMid"], out_channels = kwargs["inputChannels"], kernel_size = kwargs["kernelSize"])
def forward(self,x):
#Encoding
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = x.reshape(1,-1)
x = x.squeeze()
x = F.relu(self.enc3(x))
#Decoding
x = F.relu(self.dec1(x))
x = x.reshape([32,4,28,28])
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
return x
def encodeDecodeConv(numEpochs = 20, input_Channels = 3, batchSize = 32,
channels_Encoder_Inner = 4, channels_Encoder_Mid = 8, into_Linear = 100352,
linear_Encoded = 256, channels_Decoder_Mid = 8, kernel_Size = 3,
learningRate = 1e-3):
#Pick a device. If GPU available, use that. Otherwise, use CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define data transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#Define training dataset
trainSet = datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
#Define testing dataset
testSet = datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
#Define data loaders
trainLoader = DataLoader(trainSet, batch_size = batchSize, shuffle = True)
testLoader = DataLoader(testSet, batch_size = batchSize, shuffle = True)
#Initialize neural network
model = convAutoEncoder(inputChannels = input_Channels, channelsEncoderMid = channels_Encoder_Mid, channelsEncoderInner = channels_Encoder_Inner, intoLinear = into_Linear, linearEncoded = linear_Encoded, channelsDecoderMid = channels_Decoder_Mid, kernelSize = kernel_Size)
#Optimization setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr = learningRate)
lossTracker = []
for epoch in range(numEpochs):
loss = 0
for data,_ in trainLoader:
data = data.to(device)
optimizer.zero_grad()
outputs = model(data)
train_loss = criterion(outputs,data)
train_loss.backward()
optimizer.step()
loss += train_loss.item()
loss = loss/len(trainLoader)
print('Epoch {} of {}, Train loss: {:.3f}'.format(epoch+1,numEpochs,loss))
encodeDecodeConv()
Edit2: Somewhere in the CIFAR10 dataset, the data appears to change dimension. After playing around with print statements more, I discovered that setting the relevant hyperparameter to 100352 works great for many entries, but then seemingly one image pops up that has a different size. Not sure why that would occur, though.

Tensorflow - keras: bad performance for simple curve fitting task

I'm trying to implement a very simple one layered MLP for a toy regression problem with one variable (dimension = 1) and one target (dimension = 1). It's a simple curve fitting problem with zero noise.
Matlab\Deep Learning Toolbox
Using levenberg-marquardt backpropagation on a MLP with a single hidden layer with 100 neurons and hyperbolic tangent activation I got pretty decent performance with almost zero effort:
MSE = 7.18e-08
Plotting the predictions and the targets I get a very precise fitting.
Python\Tensorflow\Keras
With the same network settings I used in matlab there's almost no training. No matter how hard I try to tune the training parameters or switch the optimizer.
MSE = 0.12900154
In this case the plot of the predictions is a curve that is not even able to follow the oscillations of the target curve.
I can obtain something better using RELU activations for the hidden layer but we're still far:
MSE = 0.0582045
This is the code I used in Python:
# IMPORT LIBRARIES
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
# IMPORT DATASET FROM CSV FILE, SHUFFLE TRAINING SET
# AND MAKE NUMPY ARRAY FOR TRAINING (DATA ARE ALREADY NORMALIZED)
dataset_path = "C:/Users/Rob/Desktop/Learning1.csv"
Learning_Dataset = pd.read_csv(dataset_path
, comment='\t',sep=","
,skipinitialspace=False)
Learning_Dataset = Learning_Dataset.sample(frac = 1) # SHUFFLING
test_dataset_path = "C:/Users/Rob/Desktop/Test1.csv"
Test_Dataset = pd.read_csv(test_dataset_path
, comment='\t',sep=","
,skipinitialspace=False)
Learning_Target = Learning_Dataset.pop('Target')
Test_Target = Test_Dataset.pop('Target')
Learning_Dataset = np.array(Learning_Dataset,dtype = "float32")
Test_Dataset = np.array(Test_Dataset,dtype = "float32")
Learning_Target = np.array(Learning_Target,dtype = "float32")
Test_Target = np.array(Test_Target,dtype = "float32")
# DEFINE SIMPLE MLP MODEL
inputs = tf.keras.layers.Input(shape=(1,))
x = tf.keras.layers.Dense(100, activation='relu')(inputs)
y = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=y)
# TRAIN MODEL
opt = tf.keras.optimizers.RMSprop(learning_rate = 0.001,
rho = 0.9,
momentum = 0.0,
epsilon = 1e-07,
centered = False)
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=100)
model.compile(optimizer = opt,
loss = 'mse',
metrics = ['mse'])
model.fit(Learning_Dataset,
Learning_Target,
epochs=500,
validation_split = 0.2,
verbose=0,
callbacks=[early_stop],
shuffle = False,
batch_size = 100)
# INFERENCE AND CHECK ACCURACY
Predictions = model.predict(Test_Dataset)
Predictions = Predictions.reshape(10000)
print(np.square(np.subtract(Test_Target,Predictions)).mean()) # MSE
plt.plot(Test_Dataset,Test_Target,'o',Test_Dataset,Predictions,'o')
plt.legend(('Target','Model Prediction'))
plt.show()
What am i doing wrong?
Thanks

non-linear neural network regression - quadratic function is not being estimated correctly

I have mostly used ANNs for classification and only recently started to try them out for modeling continuous variables. As an exercise I generated a simple set of (x, y) pairs where y = x^2 and tried to train an ANN to learn this quadratic function.
The ANN model:
This ANN has 1 input node (ie. x), 2 hidden layers each with 2 nodes in each layer, and 1 output node. All four hidden nodes use the non-linear tanh activation function and the output node has no activation function (since it is regression).
The Data:
For the training set I randomly generated 100 numbers between (-20, 20) for x and computed y=x^2. For the testing set I randomly generated 100 numbers between (-30, 30) for x and also computed y=x^2. I then transformed all x so that they are centered around 0 and their min and max are approximately around -1.5 and 1.5. I also transformed all y similarly but made their min and max about -0.9 and 0.9. This way, all the data falls within that mid range of the tanh activation function and not way out at the extremes.
The Problem:
After training the ANN in Keras, I am seeing that only the right half of the polynomial function is being learned, and the left half is completely flat. Does anyone have any ideas why this may be happening? I tried playing around with different scaling options, as well as hidden layer specifications but no luck on that left side.
Thanks!
Attached is the code I used for everything and the image shows the plot of the scaled training x vs the predicted y. As you can see, only half of the parabola is recovered.
import numpy as np, pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
seed = 10
n = 100
X_train = np.random.uniform(-20, 20, n)
Y_train = X_train ** 2
X_test = np.random.uniform(-30, 30, n)
Y_test = X_test ** 2
#### Scale the data
x_cap = max(abs(np.array(list(X_train) + list(X_test))))
y_cap = max(abs(np.array(list(Y_train) + list(Y_test))))
x_mean = np.mean(np.array(list(X_train) + list(X_test)))
y_mean = np.mean(np.array(list(Y_train) + list(Y_test)))
X_train2 = (X_train-x_mean) / x_cap
X_test2 = (X_test-x_mean) / x_cap
Y_train2 = (Y_train-y_mean) / y_cap
Y_test2 = (Y_test-y_mean) / y_cap
X_train2 = X_train2 * (1.5 / max(X_train2))
Y_train2 = Y_train2 * (0.9 / max(Y_train2))
# define base model
def baseline_model1():
# create model
model1 = Sequential()
model1.add(Dense(2, input_dim=1, kernel_initializer='normal', activation='tanh'))
model1.add(Dense(2, input_dim=1, kernel_initializer='normal', activation='tanh'))
model1.add(Dense(1, kernel_initializer='normal'))
# Compile model
model1.compile(loss='mean_squared_error', optimizer='adam')
return model1
np.random.seed(seed)
estimator1 = KerasRegressor(build_fn=baseline_model1, epochs=100, batch_size=5, verbose=0)
estimator1.fit(X_train2, Y_train2)
prediction = estimator1.predict(X_train2)
plt.scatter(X_train2, prediction)
enter image description here
You should also consider adding more width to you hidden layer. I changed from 2 to 5 and got a very good fit. I also used more epochs as suggested from rvinas
Your network is very sensible to the initial parameters. The following will help:
Change your kernel_initializer to glorot_uniform. Your network is very small and glorot_uniform will work better in consonance with the tanh activations. Glorot uniform will encourage your weights to be initially within a more reasonable range (since it takes into account the fan-in and fan-out of each layer).
Train your model for more epochs (i.e. 1000).

tensorflow model has different results than the same model in skflow (optimizer)

I'm using tensorflow to replicate a neural network for the MNIST dataset, previously programmed in skflow. Here is the model in skflow:
import tensorflow.contrib.learn as skflow
from sklearn import metrics
from sklearn.datasets import fetch_mldata
from sklearn.cross_validation import train_test_split
mnist = fetch_mldata('MNIST original')
train_dataset, test_dataset, train_labels, test_labels = train_test_split( mnist.data, mnist.target, test_size=10000, random_state=42)
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[1200, 1200], n_classes=10, optimizer="SGD", learning_rate=0.01, batch_size=128, steps=1000)
classifier.fit(train_dataset, train_labels)
score = metrics.accuracy_score(test_labels, classifier.predict(test_dataset))
print("Accuracy: %f" % score)
This model get 0.950600 of accuracy.
But the model replicated in tensorflow gets nan in the loss fuction and fails to improve (I think it's not related with Tensorflow NaN bug? since I'm using tf.nn.softmax_cross_entropy_with_logits).
I can't figure out why, since the setup of the model in tensorflow is the same than in the model in skflow. The only thing I'm unsure if it's the same, is on how skflow initializes the weights of the network, I searched that part in the code of skflow but I have not found it.
Here is the code in tensorflow:
import numpy as np
import tensorflow as tf
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
num_labels = len(np.unique(mnist.target))
num_pixels = mnist.data.shape[1]
#reshape labels to one hot encoding
labels = (np.arange(num_labels) == mnist.target[:, None]).astype(np.float32)
#create train_dataset of 60000 and test_dataset of 10000 elem
train_dataset, test_dataset, train_labels, test_labels = train_test_split(mnist.data, labels, test_size=10000, random_state=42)
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)) / predictions.shape[0])
batch_size = 128
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, num_pixels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_test_dataset = tf.cast(tf.constant(test_dataset), tf.float32)
w_hidden = tf.Variable(tf.truncated_normal([num_pixels, 1200]))
b_hidden = tf.Variable(tf.zeros([1200]))
hidden = tf.nn.relu(tf.matmul(tf_train_dataset, w_hidden) + b_hidden)
w_hidden_2 = tf.Variable(tf.truncated_normal([1200, 1200]))
b_hidden_2 = tf.Variable(tf.zeros([1200]))
hidden2 = tf.nn.relu(tf.matmul(hidden, w_hidden_2) + b_hidden_2)
w = tf.Variable(tf.truncated_normal([1200, num_labels]))
b = tf.Variable(tf.zeros([num_labels]))
logits = tf.matmul(hidden2, w) + b
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits, tf_train_labels))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# Predictions for the training, and test data.
train_prediction = tf.nn.softmax(logits)
test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(tf.matmul(tf.nn.relu(tf.matmul(tf_test_dataset, w_hidden) + b_hidden), w_hidden_2) + b_hidden_2), w) + b)
num_steps = 1001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
_, l, predictions = session.run( [optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 100 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
I'm clueless on what might be the issue. Any suggestions?
Edited 1: As I was suggested, I tried replacing tf.Variable calls with tf.get_variable("w_hidden", [num_pixels, 1200]), but I got Nans.
Also, I used skflow.ops.dnn op for doing the layers and used my own loss and etc, and still got Nans.
Edited 2: Turns out it is not a problem of weight initialization. It seems that the gradients are too unstable (in the tensorflow model) and that lead the loss to become NaN. As in Adding multiple layers to TensorFlow causes loss function to become Nan, I slowed the learning rate by an order of magnitude, and it worked out.
Now what I don't understand is what differs between the SGD optimizer of skflow and the one above. Or what is the explanation that they "seem" equal, but they need different learning rates?
Initialization in skflow relies on tf.get_variable default initialization - uniform_unit_scaling_initializer (see this for detailed description).
You can try replacing your tf.Variable calls with something like tf.get_variable("w_hidden", [num_pixels, 1200]).
Alternative, is to start with using skflow.ops.dnn op that will do the layers for you but you still do your own loss and etc.
Also please let me know if you there a clear usecase that forced you to rewrite things in pure TensorFlow instead of using skflow - I would love to address it. You can always write custom model via passing model_fn into TensorFlowEstimator and still use training / batching / saving and etc functionality.

Cannot make this autoencoder network function properly (with convolutional and maxpool layers)

Autoencoder networks seems to be way trickier than normal classifier MLP networks. After several attempts using Lasagne all what I get in the reconstructed output is something that resembles at its best a blurry averaging of all the images of the MNIST database without distinction on what the input digit actually is.
The networks structure I chose are the following cascade layers:
input layer (28x28)
2D convolutional layer, filter size 7x7
Max Pooling layer, size 3x3, stride 2x2
Dense (fully connected) flattening layer, 10 units (this is the bottleneck)
Dense (fully connected) layer, 121 units
Reshaping layer to 11x11
2D convolutional layer, filter size 3x3
2D Upscaling layer factor 2
2D convolutional layer, filter size 3x3
2D Upscaling layer factor 2
2D convolutional layer, filter size 5x5
Feature max pooling (from 31x28x28 to 28x28)
All the 2D convolutional layers have the biases untied, sigmoid activations and 31 filters.
All the fully connected layers have sigmoid activations.
The loss function used is squared error, the updating function is adagrad. The length of the chunk for the learning is 100 samples, multiplied for 1000 epochs.
Just for completeness, the following is the code I used:
import theano.tensor as T
import theano
import sys
sys.path.insert(0,'./Lasagne') # local checkout of Lasagne
import lasagne
from theano import pp
from theano import function
import gzip
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
def load_mnist():
def load_mnist_images(filename):
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
return X_train, y_train, X_test, y_test
def plot_filters(conv_layer):
W = conv_layer.get_params()[0]
W_fn = theano.function([],W)
params = W_fn()
ks = np.squeeze(params)
kstack = np.vstack(ks)
plt.imshow(kstack,interpolation='none')
plt.show()
def main():
#theano.config.exception_verbosity="high"
#theano.config.optimizer='None'
X_train, y_train, X_test, y_test = load_mnist()
ohe = OneHotEncoder()
y_train = ohe.fit_transform(np.expand_dims(y_train,1)).toarray()
chunk_len = 100
visamount = 10
num_epochs = 1000
num_filters=31
dropout_p=.0
print "X_train.shape",X_train.shape,"y_train.shape",y_train.shape
input_var = T.tensor4('X')
output_var = T.tensor4('X')
conv_nonlinearity = lasagne.nonlinearities.sigmoid
net = lasagne.layers.InputLayer((chunk_len,1,28,28), input_var)
conv1 = net = lasagne.layers.Conv2DLayer(net,num_filters,(7,7),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(2,2))
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv2_layer = lasagne.layers.Conv2DLayer(dropout_layer,num_filters,(3,3),nonlinearity=conv_nonlinearity)
#pool2_layer = lasagne.layers.MaxPool2DLayer(conv2_layer,(3,3),stride=(2,2))
net = lasagne.layers.DenseLayer(net,10,nonlinearity=lasagne.nonlinearities.sigmoid)
#augment_layer1 = lasagne.layers.DenseLayer(reduction_layer,33,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.DenseLayer(net,121,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.ReshapeLayer(net,(chunk_len,1,11,11))
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after0 = lasagne.layers.MaxPool2DLayer(conv_after1,(3,3),stride=(2,2))
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv_after2 = lasagne.layers.Conv2DLayer(upscale_layer1,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after1 = lasagne.layers.MaxPool2DLayer(conv_after2,(3,3),stride=(1,1))
#upscale_layer2 = lasagne.layers.Upscale2DLayer(pool_after1,4)
net = lasagne.layers.Conv2DLayer(net,num_filters,(5,5),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.FeaturePoolLayer(net,num_filters,pool_function=theano.tensor.max)
print "output_shape:",lasagne.layers.get_output_shape(net)
params = lasagne.layers.get_all_params(net, trainable=True)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.squared_error(prediction, output_var)
#loss = lasagne.objectives.binary_crossentropy(prediction, output_var)
aggregated_loss = lasagne.objectives.aggregate(loss)
updates = lasagne.updates.adagrad(aggregated_loss,params)
train_fn = theano.function([input_var, output_var], loss, updates=updates)
test_prediction = lasagne.layers.get_output(net, deterministic=True)
predict_fn = theano.function([input_var], test_prediction)
print "starting training..."
for epoch in range(num_epochs):
selected = list(set(np.random.random_integers(0,59999,chunk_len*4)))[:chunk_len]
X_train_sub = X_train[selected,:]
_loss = train_fn(X_train_sub, X_train_sub)
print("Epoch %d: Loss %g" % (epoch + 1, np.sum(_loss) / len(X_train)))
"""
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
vis1 = np.hstack([chunk[j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
"""
print "done."
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
print "chunk.shape",chunk.shape
print "result.shape",result.shape
plot_filters(conv1)
for i in range(chunk_len/visamount):
vis1 = np.hstack([chunk[i*visamount+j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[i*visamount+j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
import ipdb; ipdb.set_trace()
if __name__ == "__main__":
main()
Any ideas on how to improve this network to get a reasonably functioning autoencoder?