I'm traying to create a ResNet model for time series forecasting, I got always the same prediction which is "1.0", don't know why? - prediction

I'm traying to create a ResNet model for time series forecasting, I got always the same prediction which is "1.0", don't know why?
def create_res_net():
inputs = Input(shape=(50, 1))
num_filters = 5
t = BatchNormalization()(inputs)
t = Conv1D(kernel_size=18, strides=1, filters=num_filters, padding="same")(t)
t = relu_bn(t)
num_blocks_list = [2, 5, 5, 2]
for i in range(len(num_blocks_list)):
num_blocks = num_blocks_list[i]
for j in range(num_blocks):
t = residual_block(t, downsample=(j==0 and i!=0), filters=num_filters)
num_filters *= 2
t = AveragePooling1D(2)(t)
t = Flatten()(t)
outputs = Dense(1, activation='softmax')(t)
model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
return model
model = create_res_net()
# save model after each epoch
model.fit(x=X, y=y_train,epochs=10,verbose=1)

Related

How do I train a new network with a convolution kernel?

Doing a final project for my school and need help because I'm not sure how to execute it.
I'm investigating how image distortion will affect ANN's learning ability. Have no background in coding whatsoever so any help will be hugely appreciated! Can't use TensorFlow as part of requirements.
This is what my prof used for network classification which is standard from the textbook we're using.
# neural network class definition
class neuralNetwork:
# initialise the neural network
def __init__(self, inputnodes, hiddennodes, outputnodes, learningrate):
# set number of nodes in each input, hidden, output layer
self.inodes = inputnodes
self.hnodes = hiddennodes
self.onodes = outputnodes
self.wih = numpy.random.normal(0.0, pow(self.inodes, -0.5), (self.hnodes, self.inodes))
self.who = numpy.random.normal(0.0, pow(self.hnodes, -0.5), (self.onodes, self.hnodes))
self.lr = learningrate
self.activation_function = lambda x: scipy.special.expit(x)
pass
# train the neural network
def train(self, inputs_list, targets_list):
inputs = numpy.array(inputs_list, ndmin=2).T
targets = numpy.array(targets_list, ndmin=2).T
hidden_inputs = numpy.dot(self.wih, inputs)
hidden_outputs = self.activation_function(hidden_inputs)
final_inputs = numpy.dot(self.who, hidden_outputs)
final_outputs = self.activation_function(final_inputs)
output_errors = targets - final_outputs
hidden_errors = numpy.dot(self.who.T, output_errors)
self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), numpy.transpose(hidden_outputs))
self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), numpy.transpose(inputs))
pass
# query the neural network
def query(self, inputs_list):
inputs = numpy.array(inputs_list, ndmin=2).T
hidden_inputs = numpy.dot(self.wih, inputs)
hidden_outputs = self.activation_function(hidden_inputs)
final_inputs = numpy.dot(self.who, hidden_outputs)
final_outputs = self.activation_function(final_inputs)
return final_outputs
# number of input, hidden and output nodes
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
learning_rate = 0.1
# create instance of neural network
n = neuralNetwork(input_nodes,hidden_nodes,output_nodes, learning_rate)
# load the mnist training data CSV file into a list
training_data_file = open("mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()
# train the neural network
epochs = 5
for e in range(epochs):
for record in training_data_list:
all_values = record.split(',')
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
targets = numpy.zeros(output_nodes) + 0.01
targets[int(all_values[0])] = 0.99
n.train(inputs, targets)
pass
pass
# load the mnist test data CSV file into a list
test_data_file = open("mnist_test.csv", 'r')
test_data_list = test_data_file.readlines()
test_data_file.close()
# test the neural network
# scorecard for how well the network performs, initially empty
scorecard = []
for record in test_data_list:
all_values = record.split(',')
correct_label = int(all_values[0])
inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
outputs = n.query(inputs)
label = numpy.argmax(outputs)
if (label == correct_label):
scorecard.append(1)
else:
scorecard.append(0)
pass
pass
# calculate the performance score, the fraction of correct answers
scorecard_array = numpy.asarray(scorecard)
print ("performance = ", scorecard_array.sum() / scorecard_array.size)
So I came up with a sharpening kernel and basically what I want to accomplish is to apply this kernel to the network so that the images in the training dataset is now 'sharpened'. (Also am using the convolution method given by my prof) Where am I supposed to add this to the class above??
sharpen_kernel = np.array([[0, -1, 0],
[-1, 5, -1],
[0, -1, 0]])
# convolve your image with the kernel
matplotlib.rcParams['figure.figsize'] = 20,20
conv_image = numpy.ones((28,28))
print("shape of image_array",numpy.shape(image_array))
step = 3
i=0
while i < 25:
i+=1
j = 0
while j < 25 :
sub_image = image_array[i:(i+step),j:(j+step):] # array[starty : endy , start x : end x : step]
sub_image = numpy.reshape(sub_image,(1,(step ** 2)))
kernel = numpy.reshape(sharpen_kernel, ((step ** 2),1))
conv_scalar = numpy.dot(sub_image,kernel)
conv_image[i,j] = conv_scalar
j+=1
pass
pass
Basically want to measure the relationship of the performance of ANN and quality of image (e.g. blur, sharpening). Help!

Given groups=1, weight of size [10, 1, 5, 5], expected input[2, 3, 28, 28] to have 1 channels, but got 3 channels instead

I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
batch_size = 64
train_dataset = datasets.MNIST(root='./data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = ImageFolder('my_digit_images/', transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
#print(self.conv1.weight.shape)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv3 = nn.Conv2d(20, 20, kernel_size=3)
#print(self.conv2.weight.shape)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(320, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv1(x))
#print(x.shape)
x = F.relu(self.mp(self.conv2(x)))
x = F.relu(self.mp(self.conv3(x)))
#print("2.", x.shape)
# x = F.relu(self.mp(self.conv3(x)))
x = x.view(in_size, -1) # flatten the tensor
#print("3.", x.shape)
x = self.fc(x)
return F.log_softmax(x)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
MNIST dataset contains black and white 1-channel images, while yours are 3-channeled RGB probably. Either recode your images or preprocess it like
img = img[:,0:1,:,:]
You can do it with custom transform, adding it after transforms.ToTensor()
The images in training and testing should follow the same distribution. Since MNIST data is by default in Grayscale and it is expected that you didn't change the channels, then the model expects the same number of channels in testing.
The following code is an example of how it's done using a transformation.
Following the order defined below, it
Converts the image to a single channel (Grayscale)
Resize the image to the size of the default MNIST data
Convert the image to a tensor
Normalize the tensor to have same mean and std as that of during training(assuming that you used the same values).
test_dataset = ImageFolder('my_digit_images/', transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]))

Seemingly inconsistent tensor sizes in pytorch

I'm building a convolutional autoencoder, but want the encoding to be in a linear form so I can more easily feed it as input into an MLP. I have two convolutional layers on the encoder along with a linear inner layer to reduce dimension. This encoding is then fed into the corresponding decoder.
When I flatten the output of the second convolutional layer, based on my calculation (using the standard formula: Calculate the Output size in Convolution layer) should come out to a 1x100352 rank 1 tensor. However, when I set the input dimension of the linear layer to be 100352, the flattened rank 1 tensor has dimension 1x50176. Then comes the weird part.
I tried changing the input dimension of the linear layer to be 50176, assuming I had miscalculated. When I do this, the reshaped rank 1 tensor confusingly becomes 1x100352, and then the aforementioned weight matrix becomes 50176x256 as expected.
This response to modifying the linear layer's input dimension doesn't make sense to me. That hyperparameter controls the weight matrix correctly, but I guess I'm uncertain why it has any bearing on the linear layer's input since that's just a reshaped tensor output from a convolutional layer whose hyperparameters are unrelated to the hyperparameter in question.
I apologize if I'm just missing something obvious. I'm very new to pytorch, and I couldn't find any other posts which discussed this sort of issue.
Here's what I believe to be the minimal reproducible example:
import os
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.utils import save_image
class convAutoEncoder(nn.Module):
def __init__(self,**kwargs):
super().__init__()
#Creating network structure
#Encoder portion of autoencoder
self.enc1 = nn.Conv2d(in_channels = kwargs["inputChannels"], out_channels = kwargs["channelsEncoderMid"], kernel_size = kwargs["kernelSize"])
self.enc2 = nn.Conv2d(in_channels = kwargs["channelsEncoderMid"], out_channels = kwargs["channelsEncoderInner"], kernel_size = kwargs["kernelSize"])
self.enc3 = nn.Linear(in_features = kwargs["intoLinear"], out_features = kwargs["linearEncoded"])
#Decoder portion of autoencoder
self.dec1 = nn.Linear(in_features = kwargs["linearEncoded"], out_features = kwargs["intoLinear"])
self.dec2 = nn.ConvTranspose2d(in_channels = kwargs["channelsEncoderInner"], out_channels = kwargs["channelsDecoderMid"], kernel_size = kwargs["kernelSize"])
self.dec3 = nn.ConvTranspose2d(in_channels = kwargs["channelsDecoderMid"], out_channels = kwargs["inputChannels"], kernel_size = kwargs["kernelSize"])
def forward(self,x):
#Encoding
x = F.relu(self.enc1(x))
x = F.relu(self.enc2(x))
x = x.reshape(1,-1)
x = x.squeeze()
x = F.relu(self.enc3(x))
#Decoding
x = F.relu(self.dec1(x))
x = x.reshape([32,4,28,28])
x = F.relu(self.dec2(x))
x = F.relu(self.dec3(x))
return x
def encodeDecodeConv(numEpochs = 20, input_Channels = 3, batchSize = 32,
channels_Encoder_Inner = 4, channels_Encoder_Mid = 8, into_Linear = 100352,
linear_Encoded = 256, channels_Decoder_Mid = 8, kernel_Size = 3,
learningRate = 1e-3):
#Pick a device. If GPU available, use that. Otherwise, use CPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define data transforms
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
#Define training dataset
trainSet = datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
#Define testing dataset
testSet = datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
#Define data loaders
trainLoader = DataLoader(trainSet, batch_size = batchSize, shuffle = True)
testLoader = DataLoader(testSet, batch_size = batchSize, shuffle = True)
#Initialize neural network
model = convAutoEncoder(inputChannels = input_Channels, channelsEncoderMid = channels_Encoder_Mid, channelsEncoderInner = channels_Encoder_Inner, intoLinear = into_Linear, linearEncoded = linear_Encoded, channelsDecoderMid = channels_Decoder_Mid, kernelSize = kernel_Size)
#Optimization setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr = learningRate)
lossTracker = []
for epoch in range(numEpochs):
loss = 0
for data,_ in trainLoader:
data = data.to(device)
optimizer.zero_grad()
outputs = model(data)
train_loss = criterion(outputs,data)
train_loss.backward()
optimizer.step()
loss += train_loss.item()
loss = loss/len(trainLoader)
print('Epoch {} of {}, Train loss: {:.3f}'.format(epoch+1,numEpochs,loss))
encodeDecodeConv()
Edit2: Somewhere in the CIFAR10 dataset, the data appears to change dimension. After playing around with print statements more, I discovered that setting the relevant hyperparameter to 100352 works great for many entries, but then seemingly one image pops up that has a different size. Not sure why that would occur, though.

How to use keras ImageDataGenerator with a Siamese or Tripple networks

I'm trying to build up both a Siamese neural network and triple neural network on a custom large dataset
Keras has ImageDataGenerator which makes the generation of input data to a regular neural network very easy.
I'm interesting to use ImageDataGenerator or similar ways in order to train a networks with 2(siamese) and 3(triple) inputs.
In mniset keras siamese example, The input generated by a pre-process stage which is done by create_pairs method. I don't think this kind of way fit for a large dataset.
Is it possible to use ImageDataGenerator in this case? What are my other options assuming the data-set is very big?
The idea of DataGenerators is to give fit_generator a stream of data in batches.. hence giving control to you how you want to produce the data, ie whether you load from files or you do some data augmentation like what is done in ImageDataGenerator.
Here I posting the modified version of mniset siamese example with custom DataGenerator, you can work it out from here.
import numpy as np
np.random.seed(1337) # for reproducibility
import random
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Lambda
from keras.optimizers import SGD, RMSprop
from keras import backend as K
class DataGenerator(object):
"""docstring for DataGenerator"""
def __init__(self, batch_sz):
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
self.tr_pairs, self.tr_y = self.create_pairs(X_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
self.te_pairs, self.te_y = self.create_pairs(X_test, digit_indices)
self.tr_pairs_0 = self.tr_pairs[:, 0]
self.tr_pairs_1 = self.tr_pairs[:, 1]
self.te_pairs_0 = self.te_pairs[:, 0]
self.te_pairs_1 = self.te_pairs[:, 1]
self.batch_sz = batch_sz
self.samples_per_train = (self.tr_pairs.shape[0]/self.batch_sz)*self.batch_sz
self.samples_per_val = (self.te_pairs.shape[0]/self.batch_sz)*self.batch_sz
self.cur_train_index=0
self.cur_val_index=0
def create_pairs(self, x, digit_indices):
'''Positive and negative pair creation.
Alternates between positive and negative pairs.
'''
pairs = []
labels = []
n = min([len(digit_indices[d]) for d in range(10)]) - 1
for d in range(10):
for i in range(n):
z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
pairs += [[x[z1], x[z2]]]
inc = random.randrange(1, 10)
dn = (d + inc) % 10
z1, z2 = digit_indices[d][i], digit_indices[dn][i]
pairs += [[x[z1], x[z2]]]
labels += [1, 0]
return np.array(pairs), np.array(labels)
def next_train(self):
while 1:
self.cur_train_index += self.batch_sz
if self.cur_train_index >= self.samples_per_train:
self.cur_train_index=0
yield ([ self.tr_pairs_0[self.cur_train_index:self.cur_train_index+self.batch_sz],
self.tr_pairs_1[self.cur_train_index:self.cur_train_index+self.batch_sz]
],
self.tr_y[self.cur_train_index:self.cur_train_index+self.batch_sz]
)
def next_val(self):
while 1:
self.cur_val_index += self.batch_sz
if self.cur_val_index >= self.samples_per_val:
self.cur_val_index=0
yield ([ self.te_pairs_0[self.cur_val_index:self.cur_val_index+self.batch_sz],
self.te_pairs_1[self.cur_val_index:self.cur_val_index+self.batch_sz]
],
self.te_y[self.cur_val_index:self.cur_val_index+self.batch_sz]
)
def euclidean_distance(vects):
x, y = vects
return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))
def eucl_dist_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
margin = 1
return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
def create_base_network(input_dim):
'''Base network to be shared (eq. to feature extraction).
'''
seq = Sequential()
seq.add(Dense(128, input_shape=(input_dim,), activation='relu'))
seq.add(Dropout(0.1))
seq.add(Dense(128, activation='relu'))
seq.add(Dropout(0.1))
seq.add(Dense(128, activation='relu'))
return seq
def compute_accuracy(predictions, labels):
'''Compute classification accuracy with a fixed threshold on distances.
'''
return labels[predictions.ravel() < 0.5].mean()
input_dim = 784
nb_epoch = 20
batch_size=128
datagen = DataGenerator(batch_size)
# network definition
base_network = create_base_network(input_dim)
input_a = Input(shape=(input_dim,))
input_b = Input(shape=(input_dim,))
# because we re-use the same instance `base_network`,
# the weights of the network
# will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model(input=[input_a, input_b], output=distance)
# train
rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)
model.fit_generator(generator=datagen.next_train(), samples_per_epoch=datagen.samples_per_train, nb_epoch=nb_epoch, validation_data=datagen.next_val(), nb_val_samples=datagen.samples_per_val)

Howto Create Recommendations with a Incremental SVD Recommender System

I am testing a recommendation system that is built according to Simon Funk's algorithm.
(written by Timely Dev. http://www.timelydevelopment.com/demos/NetflixPrize.aspx)
The problem is, all Incremental SVD algorithms try to predict the rating for user_id and movie_id. But in a real system, this should produce a list of new items to the active user.
I see that some people used kNN after Incremental SVD, but if I don't miss something, I lose all the performance gain if I use kNN after creating the model by Incremental SVD.
Anyone has any experience with Incremental SVD/Simon Funk method, and tell me how to produce list of new recommended items?
The way to produce recommended movies:
Take a list of movies that hasn't been viewed
Multiply their feature vector by the user's feature vector.
Sort descending by the result and take the top movies.
For the theory: pretend there are only two dimensions (comedy and drama). If I love comedies, but hate dramas, my feature vector is [1.0, 0.0]. If you compare me against the following movies:
Comedy: [1.0, 0.0] x [1.0, 0.0] = 1
Dramedy: [0.5, 0.5] x [1.0, 0.0] = 0.5
Drama: [0.0, 1.0] x [1.0, 0,0] = 0
Here is a simple Python code based on Yelp Netflix code. If you install Numba it will go at C speeds.
data_loader.py
import os
import numpy as np
from scipy import sparse
class DataLoader:
def __init__(self):
pass
#staticmethod
def create_review_matrix(file_path):
data = np.array([[int(tok) for tok in line.split('\t')[:3]]
for line in open(file_path)])
ij = data[:, :2]
ij -= 1
values = data[:, 2]
review_matrix = sparse.csc_matrix((values, ij.T)).astype(float)
return review_matrix
movielens_file_path = '%s/Downloads/ml-100k/u1.base' % os.environ['HOME']
my_reviews = DataLoader.create_review_matrix(movielens_file_path)
user_reviews = my_reviews[8]
user_reviews = user_reviews.toarray().ravel()
user_rated_movies, = np.where(user_reviews > 0)
user_ratings = user_reviews[user_rated_movies]
movie_reviews = my_reviews[:, 201]
movie_reviews = movie_reviews.toarray().ravel()
movie_rated_users, = np.where(movie_reviews > 0)
movie_ratings = movie_reviews[movie_rated_users]
user_pseudo_average_ratings = {}
user_pseudo_average_ratings[8] = np.mean(user_ratings)
user_pseudo_average_ratings[9] = np.mean(user_ratings)
user_pseudo_average_ratings[10] = np.mean(user_ratings)
users, movies = my_reviews.nonzero()
users_matrix = np.empty((3, 3))
users_matrix[:] = 0.1
movies_matrix = np.empty((3, 3))
movies_matrix[:] = 0.1
result = users_matrix[0] * movies_matrix[0]
otro = movies_matrix[:, 2]
otro[2] = 8
funk.py
# Requires Movielens 100k data
import numpy as np, time, sys
from data_loader import DataLoader
from numba import jit
import os
def get_user_ratings(user_id, review_matrix):
"""
Returns a numpy array with the ratings that user_id has made
:rtype : numpy array
:param user_id: the id of the user
:return: a numpy array with the ratings that user_id has made
"""
user_reviews = review_matrix[user_id]
user_reviews = user_reviews.toarray().ravel()
user_rated_movies, = np.where(user_reviews > 0)
user_ratings = user_reviews[user_rated_movies]
return user_ratings
def get_movie_ratings(movie_id, review_matrix):
"""
Returns a numpy array with the ratings that movie_id has received
:rtype : numpy array
:param movie_id: the id of the movie
:return: a numpy array with the ratings that movie_id has received
"""
movie_reviews = review_matrix[:, movie_id]
movie_reviews = movie_reviews.toarray().ravel()
movie_rated_users, = np.where(movie_reviews > 0)
movie_ratings = movie_reviews[movie_rated_users]
return movie_ratings
def create_user_feature_matrix(review_matrix, NUM_FEATURES, FEATURE_INIT_VALUE):
"""
Creates a user feature matrix of size NUM_FEATURES X NUM_USERS
with all cells initialized to FEATURE_INIT_VALUE
:rtype : numpy matrix
:return: a matrix of size NUM_FEATURES X NUM_USERS
with all cells initialized to FEATURE_INIT_VALUE
"""
num_users = review_matrix.shape[0]
user_feature_matrix = np.empty((NUM_FEATURES, num_users))
user_feature_matrix[:] = FEATURE_INIT_VALUE
return user_feature_matrix
def create_movie_feature_matrix(review_matrix, NUM_FEATURES, FEATURE_INIT_VALUE):
"""
Creates a user feature matrix of size NUM_FEATURES X NUM_MOVIES
with all cells initialized to FEATURE_INIT_VALUE
:rtype : numpy matrix
:return: a matrix of size NUM_FEATURES X NUM_MOVIES
with all cells initialized to FEATURE_INIT_VALUE
"""
num_movies = review_matrix.shape[1]
movie_feature_matrix = np.empty((NUM_FEATURES, num_movies))
movie_feature_matrix[:] = FEATURE_INIT_VALUE
return movie_feature_matrix
#jit(nopython=True)
def predict_rating(user_id, movie_id, user_feature_matrix, movie_feature_matrix):
"""
Makes a prediction of the rating that user_id will give to movie_id if
he/she sees it
:rtype : float
:param user_id: the id of the user
:param movie_id: the id of the movie
:return: a float in the range [1, 5] with the predicted rating for
movie_id by user_id
"""
rating = 1.
for f in range(user_feature_matrix.shape[0]):
rating += user_feature_matrix[f, user_id] * movie_feature_matrix[f, movie_id]
# We trim the ratings in case they go above or below the stars range
if rating > 5: rating = 5
elif rating < 1: rating = 1
return rating
#jit(nopython=True)
def sgd_inner(feature, A_row, A_col, A_data, user_feature_matrix, movie_feature_matrix, NUM_FEATURES):
K = 0.015
LEARNING_RATE = 0.001
squared_error = 0
for k in range(len(A_data)):
user_id = A_row[k]
movie_id = A_col[k]
rating = A_data[k]
p = predict_rating(user_id, movie_id, user_feature_matrix, movie_feature_matrix)
err = rating - p
squared_error += err ** 2
user_feature_value = user_feature_matrix[feature, user_id]
movie_feature_value = movie_feature_matrix[feature, movie_id]
#for j in range(NUM_FEATURES):
user_feature_matrix[feature, user_id] += \
LEARNING_RATE * (err * movie_feature_value - K * user_feature_value)
movie_feature_matrix[feature, movie_id] += \
LEARNING_RATE * (err * user_feature_value - K * movie_feature_value)
return squared_error
def calculate_features(A_row, A_col, A_data, user_feature_matrix, movie_feature_matrix, NUM_FEATURES):
"""
Iterates through all the ratings in search for the best features that
minimize the error between the predictions and the real ratings.
This is the main function in Simon Funk SVD algorithm
:rtype : void
"""
MIN_IMPROVEMENT = 0.0001
MIN_ITERATIONS = 100
rmse = 0
last_rmse = 0
print len(A_data)
num_ratings = len(A_data)
for feature in xrange(NUM_FEATURES):
iter = 0
while (iter < MIN_ITERATIONS) or (rmse < last_rmse - MIN_IMPROVEMENT):
last_rmse = rmse
squared_error = sgd_inner(feature, A_row, A_col, A_data, user_feature_matrix, movie_feature_matrix, NUM_FEATURES)
rmse = (squared_error / num_ratings) ** 0.5
iter += 1
print ('Squared error = %f' % squared_error)
print ('RMSE = %f' % rmse)
print ('Feature = %d' % feature)
return last_rmse
LAMBDA = 0.02
FEATURE_INIT_VALUE = 0.1
NUM_FEATURES = 20
movielens_file_path = '%s/Downloads/ml-100k/u1.base' % os.environ['HOME']
A = DataLoader.create_review_matrix(movielens_file_path)
from scipy.io import mmread, mmwrite
mmwrite('./data/A', A)
user_feature_matrix = create_user_feature_matrix(A, NUM_FEATURES, FEATURE_INIT_VALUE)
movie_feature_matrix = create_movie_feature_matrix(A, NUM_FEATURES, FEATURE_INIT_VALUE)
users, movies = A.nonzero()
A = A.tocoo()
rmse = calculate_features(A.row, A.col, A.data, user_feature_matrix, movie_feature_matrix, NUM_FEATURES )
print 'rmse', rmse
I think this is a big question, as there are many recommender approaches that I think could be called "incremental SVD". To answer your specific question: kNN is run on the projected item space, not the original space, so should be quite fast.
Assume you have n users and m items. After incremental SVD you have k trained features. To get the new items for a given user multiply the 1xk user feature vector and the kxm item feature matrix together. You end up with the m ratings for each item for that user. Then just sort them, remove ones they have already seen, and show some number of new ones.