Value error during grid search - epochs is not a legal parameter - valueerror

I am trying to tune my model's hyperparameters with a grid search, using a code that worked for me before without errors, as shown in the code:
def ann_gridsearch(dataset, dense_layers, batch_size, units, optimizer,
activation, method=StandardScaler):
x_train, x_test, y_train, y_test, sc = split_the_data(dataset, method=method)
test2split = pd.DataFrame(np.column_stack([x_test, y_test]))
x_test, x_val, y_test, y_val, sc = split_the_data(test2split, method=method, test_size=0.5)
classifier = KerasClassifier(build_fn=build_ann_classifier)
parameters = {'dense_layers': dense_layers, 'batch_size': batch_size, 'units': units, 'epochs' : [150],
'optimizer': optimizer, 'activation': activation, 'input_dim': [input_dim]}
grid_search = GridSearchCV(estimator=classifier,
param_grid=parameters,
return_train_score=True,
scoring=['accuracy', 'recall'],
refit='accuracy',
cv=3,
n_jobs=-1)
grid_results = grid_search.fit(x_train, y_train, validation_data=(x_val, y_val))
best_parameters = grid_results.best_params_
best_accuracy = grid_results.best_score_
grid_score = grid_results.cv_results_
print('The best parameters are: {}'.format(best_parameters))
print('And they give the accuracy of: {}'.format(best_accuracy))
return grid_score
where my build_fn is:
def build_ann_classifier(dense_layers, input_dim, units, optimizer, activation, input_decrease=False):
classifier = Sequential()
dense_layers = dense_layers
if input_decrease == True:
for index, lsize in enumerate(dense_layers):
decrease = index + 1
# Input Layer - includes the input_shape
if index == 0:
classifier.add(Dense(units=units, kernel_initializer='uniform', activation=activation,
input_dim=input_dim, kernel_regularizer=l2(0.001)))
else:
units = math.floor(units * (0.8 ^ decrease))
classifier.add(Dense(units=units, kernel_initializer='uniform', activation=activation))
else:
for index, lsize in enumerate(dense_layers):
# Input Layer - includes the input_shape
if index == 0:
classifier.add(Dense(units=units, kernel_initializer='uniform', activation=activation,
input_dim=input_dim, kernel_regularizer=l2(0.001)))
else:
classifier.add(Dense(units=units, kernel_initializer='uniform', activation=activation))
classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
return classifier
running it with:
grid = ann_gridsearch(india_dataset, dense_layers=[(1,), (2,), (4,)], units=[50, 75, 100], batch_size=[256]
activation=['relu'], optimizer=['adam'])
The codes start to run the grid search, but after a while, it crashes with the following error:
raise ValueError('{} is not a legal parameter'.format(params_name))
ValueError: epochs is not a legal parameter
I couldn't find any relevant answers anywhere. Any ideas why this happens and how to fix it?

try "epoch"->"nb_epoch", tarting Keras 2.0, nb_epoch argument has been renamed to epochs, but KerasClassifier doesn't seem to change.

Related

IndexError target is out of bounds

I'm working on a custom dataset of images and using a Neural Net to classify them.
The data set is about 6000 images of 58 classes. But on training I keep getting a "target is out of bounds" error.
I've double checked the number of classes and image size but still get the same error.
#hyperprams
learning_rate = 5e-4
#3 for RGB values
in_channel = 3
#classes from data set
num_classes = 58
# arbitray choice
batch_size = 32
#total number of epochs used to train the model
epochs = 3
traffic_dataset = TrafficSigns(csv_file='annotations.csv',
root_directory='/Users/*****/Desktop/images/',
transform = transforms.ToTensor())
train_size = int(0.8 * len(traffic_dataset))
test_size = len(traffic_dataset) - train_size
train, test = torch.utils.data.random_split(traffic_dataset,
[train_size, test_size])
train_loader = torch.utils.data.DataLoader(train,
batch_size= batch_size,
shuffle= True,
num_workers= 4)
test_loader = torch.utils.data.DataLoader(test,
batch_size = batch_size,
shuffle= True,
num_workers= 4)
#Create a fully connected nn
class Net(nn.Module):
#use the constructor w/ arguments size of data and number of classes
def __init__(self,
input_size,
num_classes):
super(Net, self).__init__()
self.fc1 = nn.Linear(input_size, 60)
self.fc2 = nn.Linear(60, num_classes)
#define your forward step function with relu as the non-linear function of the weights
#x will be the datapassed to the model
def forward(self, x):
x=f.relu(self.fc1(x))
x = self.fc2(x)
return x
#sanity check
test = Net(2028, num_classes)
x = torch.randn(24, 2028)
print(test(x).shape)
#instantiate the class object of NN
net = Net(2028, num_classes)
criterion = nn.CrossEntropyLoss()
nn_optimizer = optim.Adam(net.parameters(),
lr = learning_rate)
#train on multiple epochs using the criterion and gradient decent algorthim estabilished above
for epoch in range(1):
for i, (data, target) in enumerate(tqdm.tqdm(train_loader)):
data = data.reshape(data.shape[0], -1)
#forward
outputs = net(data)
loss = criterion(outputs, target)
#backward propigation
nn_optimizer.zero_grad()
loss.backward()
#gradiant decent choosen
nn_optimizer.step()
Im also using a custom dataset class to import the images and labels.
My first thought was that the class is not iterating over the CSV and images correctly but I can't seem to find where they might be not matching up.
class TrafficSigns(Dataset):
#constructure will need csv file of labels images and the transform function defined above
def __init__(self,
csv_file,
root_directory,
transform = None):
self.labels = pd.read_csv(csv_file)
self.root_directory = root_directory
self.transform = transform
#returns the length
def __len__(self):
return len(self.labels)
#get data index by indes
def __getitem__(self, i):
image_path = os.path.join(self.root_directory, self.labels.iloc[i,0])
image = io.imread(image_path)
y_label = torch.tensor(int(self.labels.iloc[i, 1]))
#if statement needed since transform can be set to None
if self.transform:
image = self.transform(image)
return (image, y_label)
Any help would be awesome, thank you.
Here is the full stacktrace error that's getting thrown.
IndexError Traceback (most recent call last)
/var/folders/t_/rcfcs8g56jn7trwnsvmdyh_r0000gn/T/ipykernel_34551/1839343274.py in <module>
11 #forward
12 outputs = net(data)
---> 13 loss = criterion(outputs, target)
14 #backward propigation
15 nn_optimizer.zero_grad()
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~/Library/Python/3.8/lib/python/site-packages/torch/nn/modules/loss.py in forward(self, input, target)
1148
1149 def forward(self, input: Tensor, target: Tensor) -> Tensor:
-> 1150 return F.cross_entropy(input, target, weight=self.weight,
1151 ignore_index=self.ignore_index, reduction=self.reduction,
1152 label_smoothing=self.label_smoothing)
~/Library/Python/3.8/lib/python/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
2844 if size_average is not None or reduce is not None:
2845 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2846 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
2847
2848
IndexError: Target 125 is out of bounds.
I came across same issue where I used sequential model (LSTM) for next sequence prediction. I check data loader where labels contained -1 because of which cross entropy loss throwing exception. here is my sequence chunks where model found -1 sequence as label in data loader:
Solved please check your null rows and remove those or set accordingly.

Embeddings in sentiment analysis task with PyTorch and Multilayer Perception

I have the body of the code, which should work fine, I think it doesn't because of something I'm messing up here, probably having to do with the embedding.
import torch.nn as nn
class MultilayerPerceptron(nn.Module):
def __init__(self, input_size, hidden_size): # I removed output size
# Call initializer function of the super class
super(MultilayerPerceptron, self).__init__()
self.embedding = nn.Embedding(INPUT_DIM, EMBEDDING_DIM) #added this myself, maybe wrong
#self.mlp = nn.MultilayerPerceptron(EMBEDDING_DIM, HIDDEN_DIM) #also added
self.INPUT_DIM = INPUT_DIM
self.HIDDEN_DIM = HIDDEN_DIM
self.OUTPUT_DIM = OUTPUT_DIM
self.EMBEDDING_DIM = EMBEDDING_DIM
#whenever this model is called, those layers in the sequential block
#will be processed in the order given to the block.
self.model = nn.Sequential(
#nn.Flatten(), # adding this hopefully it works (it didn't)
#embeds = embedded.mean(dim=1) #god help
nn.Linear(self.INPUT_DIM, self.HIDDEN_DIM), #later on, multiply by embedding dimensionality #I removed
nn.ReLU(),
nn.Linear(self.HIDDEN_DIM, self.OUTPUT_DIM), #one layer neural network
nn.ReLU(), # do I need this?
nn.Sigmoid(),
)
def forward(self, x):
embedded = self.embedding(x)
#embedded = [sent len, batch size, emb dim]
output, hidden = self.model(embedded)
output = self.model(x) #call the model defined above for forward propagation.
return output
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100 #how do I fit this into the model??
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = MultilayerPerceptron(INPUT_DIM, HIDDEN_DIM) #MLP instead
The error I get is "mat1 and mat2 shapes cannot be multiplied (50176x100 and 25002x256)".

Accuracy from sess.run(() is returning the value in bytes. How can I change to value?

I am new to CNN and tried to train the CNN model. However when I try to print the accuracies returned from cnn it gives me results in bytes format like b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'. However when I try to print the values from the loss_train obtained from the same sess.run I get value of 1419.06. Why is this happening.
########################################################################################################################
#IMPORT PACKAGES
import math
import shutil
import pywt
import sys
import random
import numpy as np
import h5py
import pip
import os
from os import system
import tensorflow as tf
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import skimage.io as io
import matplotlib.image as mpimg
import time
np.random.seed(1)
slim = tf.contrib.slim
########################################################################################################################
########################################################################################################################
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 128, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = 'E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\train\\None_train_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#PLACEHOLDERS
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=32, filter_size=5, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTEN LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_fc2, labels=y_one_hot)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer_train = tf.summary.FileWriter("Training_FileWriter/")
writer_val = tf.summary.FileWriter("Validation_FileWriter/")
#summary
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_train.initializer)
# Add the model graph to TensorBoard
writer_train.add_graph(sess.graph)
writer_val.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
start_time = time.time()
train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
_, loss_train, acc_train, acc_summ = sess.run([optimizer, cost, accuracy, merged_summary], feed_dict = {x: img_train, y: lbl_train})
print(loss_train)
print(acc_train)
train_accuracy+=acc_train
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
writer_train.add_summary(acc_summ,epoch+1)
#######################################################################################################################
The error is obtained as
Training
1427.1069
b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'
Traceback (most recent call last):
File "train_trial.py", line 302, in <module>
train_accuracy+=acc_train
TypeError: unsupported operand type(s) for +=: 'int' and 'bytes'
You are overwriting your loss and accuracy operations here:
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
Then when you run accuracy you get the protobuf bytes of the summary, instead of just running the op. You should rename these variables to prevent overwriting/name clashes.

How can I get every layers output value with keras?

I want use keras Lstm to get the time series features, then use the features to Kmeans. But now I can not get the layers output values. How can I get the layers output values?
This is my lstm network
Layer (type) Output Shape Param #
lstm_66 (LSTM) (None, None, 50) 10400
lstm_67 (LSTM) (None, 100) 60400
dense_19 (Dense) (None, 1) 101
activation_19 (Activation) (None, 1) 0
I want to get the lstm_67 output values,my code is:
import keras.backend as K
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
sess = tf.Session()
sess.run(tf.global_variables_initializer())
import numpy as np
statesAll=[]
layers = model.layers
print layers[1].output,type(layers[1].output[1]),sess.run(layers[1].output)
and the result is:
Tensor("lstm_61/TensorArrayReadV3:0", shape=(?, 100), dtype=float32)
So, how can I get the layers output value?
Thanks!
But it not work,my code is:
def load_data(file_name, sequence_length=10, split=0.8):
df = pd.read_csv(file_name, sep=',', usecols=[1])
data_all = np.array(df).astype(float)
scaler = MinMaxScaler()
data_all = scaler.fit_transform(data_all)
data = []
print len(data_all)
for i in range(len(data_all) - sequence_length - 1):
data.append(data_all[i: i + sequence_length + 1])
reshaped_data = np.array(data).astype('float64')
np.random.shuffle(reshaped_data)
x = reshaped_data[:, :-1]
y = reshaped_data[:, -1]
split_boundary = int(reshaped_data.shape[0] * split)
train_x = x[: split_boundary]
test_x = x[split_boundary:]
train_y = y[: split_boundary]
test_y = y[split_boundary:]
return train_x, train_y, test_x, test_y, scaler
def build_model(n_samples, time_steps, input_dim):
model = Sequential()
model.add(LSTM(input_dim=1, output_dim=50,return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer='rmsprop')
print(model.layers)
return model
def train_model(train_x, train_y, test_x, test_y):
model = build_model()
model.fit(train_x, train_y, batch_size=128, nb_epoch=30,validation_split=0.1)
return model
train_x, train_y, test_x, test_y, scaler = load_data(file path)
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
model = train_model(train_x, train_y, test_x, test_y)
from keras import backend as K
layers = model.layers
K.eval(layers[1].output)
In TensorFlow 2.x, you can do like this:
from tensorflow.python.keras import backend as K
model = build_model()
# lstm_67 is the second layer.
lstm = K.function([model.layers[0].input], [model.layers[1].output])
lstm_output = lstm([test_x])[0]
keras.backend.eval() should do.
Look at the documentation here and here
First of all, this is a tensor, you need to use the tf. Print () method to see the specific value. If you use Spyder, you will not see this information in the console. You need to execute this program in the command line.

One-hot vector prediction always returns the same value

My deep neural network returns the same output for every input. I tried (with no luck) different variations of:
loss
optimizer
network topology / layers types
number of epochs (1-100)
I have 3 outputs (one-hot) and for every input output they are like (it changes after every training):
4.701869785785675049e-01 4.793547391891479492e-01 2.381391078233718872e-01
This problem happens probably because of highly random nature of my training data (stock prediction).
The data set is also heavily shifted towards one of the answers (that's why I used sample_weight - calculated proportionally).
I think I can rule out overfitting (it happens even for 1 epoch and I have dropout layers).
One of the examples of my network:
xs_conv = xs.reshape(xs.shape[0], xs.shape[1], 1)
model_conv = Sequential()
model_conv.add(Conv1D(128, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Conv1D(64, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Flatten())
model_conv.add(Dense(128, activation='relu'))
model_conv.add(Dropout(0.4))
model_conv.add(Dense(3, activation='sigmoid'))
model_conv.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model_conv.fit(xs_conv, ys, epochs=10, batch_size=16, sample_weight=sample_weight, validation_split=0.3, shuffle=True)
I would understand if the outputs were random, but what happens seems very peculiar. Any ideas?
Data: computed.csv
Whole code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Input, Dense, Conv1D, Dropout, MaxPooling1D, Flatten
from keras.models import Model, Sequential
from keras import backend as K
import random
DATA_DIR = '../../Data/'
INPUT_DATA_FILE = DATA_DIR + 'computed.csv'
def get_y(row):
profit = 0.010
hot_one = [0,0,0]
hot_one[0] = int(row.close_future_5 >= profit)
hot_one[1] = int(row.close_future_5 <= -profit)
hot_one[2] = int(row.close_future_5 < profit and row.close_future_10 > -profit)
return hot_one
def rolling_window(window, arr):
return [np.array(arr[i:i+window]).transpose().flatten().tolist() for i in range(0, len(arr))][0:-window+1]
def prepare_data(data, widnow, test_split):
xs1 = data.iloc[:,1:26].as_matrix()
ys1 = [get_y(row) for row in data.to_records()]
xs = np.array(rolling_window(window, xs1)).tolist()
ys = ys1[0:-window+1]
zipped = list(zip(xs, ys))
random.shuffle(zipped)
train_size = int((1.0 - test_split) * len(data))
xs, ys = zip(*zipped[0:train_size])
xs_test, ys_test = zip(*zipped[train_size:])
return np.array(xs), np.array(ys), np.array(xs_test), np.array(ys_test)
def get_sample_weight(y):
if(y[0]): return ups_w
elif(y[1]): return downs_w
else: return flats_w
data = pd.read_csv(INPUT_DATA_FILE)
window = 30
test_split = .9
xs, ys, xs_test, ys_test = prepare_data(data, window, test_split)
ups_cnt = sum(y[0] for y in ys)
downs_cnt = sum(y[1] for y in ys)
flats_cnt = sum(y[0] == False and y[1] == False for y in ys)
total_cnt = ups_cnt + downs_cnt + flats_cnt
ups_w = total_cnt/ups_cnt
downs_w = total_cnt/downs_cnt
flats_w = total_cnt/flats_cnt
sample_weight = np.array([get_sample_weight(y) for y in ys])
_, input_columns = xs.shape
xs_conv = xs.reshape(xs.shape[0], xs.shape[1], 1)
model_conv = Sequential()
model_conv.add(Conv1D(128, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Conv1D(64, 15, input_shape=(input_columns,1), activation='relu'))
model_conv.add(MaxPooling1D(pool_size=3))
model_conv.add(Dropout(0.4))
model_conv.add(Flatten())
model_conv.add(Dense(128, activation='relu'))
model_conv.add(Dropout(0.4))
model_conv.add(Dense(3, activation='sigmoid'))
model_conv.compile(loss='mean_squared_error', optimizer='nadam', metrics=['accuracy'])
model_conv.fit(xs_conv, ys, epochs=1, batch_size=16, sample_weight=sample_weight, validation_split=0.3, shuffle=True)
xs_test_conv = xs_test.reshape(xs_test.shape[0], xs_test.shape[1], 1)
res = model_conv.predict(xs_test_conv)
plotdata = pd.concat([pd.DataFrame(res, columns=['res_up','res_down','res_flat']), pd.DataFrame(ys_test, columns=['ys_up','ys_down','y_flat'])], axis = 1)
plotdata[['res_up', 'ys_up']][3000:3500].plot(figsize=(20,4))
plotdata[['res_down', 'ys_down']][3000:3500].plot(figsize=(20,4))
I have run your model with the attached data and so far can say that the biggest problem is lack of data cleaning.
For instance, there's a inf value in .csv at line 623. After I've filtered them all out with
xs1 = xs1[np.isfinite(xs1).all(axis=1)]
... I collected some statistics over xs, namely min, max and mean. They turned out pretty remarkable:
-43.0049723138
32832.3333333 # !!!
0.213126234391
On average, the values are close to 0, but some are 6 orders of magnitude higher. These particular rows definitely hurt the neural network, so you should either filter them as well or come up with a clever way to normalize the features.
But even with them, the model ended up with 71-79% validation accuracy. The result distribution is a bit skewed towards the 3rd class, but in general pretty diverse to name it peculiar: 19% for class 1, 7% for class 2, 73% for class 3. Example test output:
[[ 1.93120316e-02 4.47684433e-04 9.97518778e-01]
[ 1.40607255e-02 2.45630667e-02 9.74113524e-01]
[ 3.07740629e-01 4.80920941e-01 2.28664145e-01]
...,
[ 5.72797097e-02 9.45571139e-02 8.07634115e-01]
[ 1.05512664e-01 8.99530351e-02 6.70437515e-01]
[ 5.24505274e-03 1.46622911e-01 9.42657173e-01]]