Correct data loading, splitting and augmentation in Pytorch - neural-network

The tutorial doesn't seem to explain how we should load, split and do proper augmentation.
Let's have a dataset consisting of cars and cats. The folder structure would be:
data
cat
0101.jpg
0201.jpg
...
dogs
0101.jpg
0201.jpg
...
At first, I loaded the dataset by datasets.ImageFolder function. Image Function has command "TRANSFORM" where we can set some augmentation commands, but we don't want to apply augmentation to test dataset! So let's stay with transform=None.
data = datasets.ImageFolder(root='data')
Apparently, we don't have folder structure train and test and therefore I assume a good approach would be to use split_dataset function
train_size = int(split * len(data))
test_size = len(data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, test_size])
Now let's load the data the following way.
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=8,
shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=8,
shuffle=True)
How can I apply transformations (data augmentation) to the "train_loader" images?
Basically I need to: 1. load data from the folder structure explained above
2. split the data into test/train parts
3. apply augmentations on train part.

I am not sure if there is a recommended way of doing this, but this is how I would workaround this problem:
Given that torch.utils.data.random_split() returns Subset, we cannot (can we? not 100% sure here I double-checked, we cannot) exploit their inner datasets, because they are the same (the only diference is in the indices). In this context, I would implement a simple class to apply transformations, something like this:
from torch.utils.data import Dataset
class ApplyTransform(Dataset):
"""
Apply transformations to a Dataset
Arguments:
dataset (Dataset): A Dataset that returns (sample, target)
transform (callable, optional): A function/transform to be applied on the sample
target_transform (callable, optional): A function/transform to be applied on the target
"""
def __init__(self, dataset, transform=None, target_transform=None):
self.dataset = dataset
self.transform = transform
self.target_transform = target_transform
# yes, you don't need these 2 lines below :(
if transform is None and target_transform is None:
print("Am I a joke to you? :)")
def __getitem__(self, idx):
sample, target = self.dataset[idx]
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self):
return len(self.dataset)
And then use it before passing the dataset to the dataloader:
import torchvision.transforms as transforms
train_transform = transforms.Compose([
transforms.ToTensor(),
# ...
])
train_dataset = ApplyTransform(train_dataset, transform=train_transform)
# continue with DataLoaders...

I think you can see this https://gist.github.com/kevinzakka/d33bf8d6c7f06a9d8c76d97a7879f5cb
def get_train_valid_loader(data_dir,
batch_size,
augment,
random_seed,
valid_size=0.1,
shuffle=True,
show_sample=False,
num_workers=4,
pin_memory=False):
"""
Utility function for loading and returning train and valid
multi-process iterators over the CIFAR-10 dataset. A sample
9x9 grid of the images can be optionally displayed.
If using CUDA, num_workers should be set to 1 and pin_memory to True.
Params
------
- data_dir: path directory to the dataset.
- batch_size: how many samples per batch to load.
- augment: whether to apply the data augmentation scheme
mentioned in the paper. Only applied on the train split.
- random_seed: fix seed for reproducibility.
- valid_size: percentage split of the training set used for
the validation set. Should be a float in the range [0, 1].
- shuffle: whether to shuffle the train/validation indices.
- show_sample: plot 9x9 sample grid of the dataset.
- num_workers: number of subprocesses to use when loading the dataset.
- pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
True if using GPU.
Returns
-------
- train_loader: training set iterator.
- valid_loader: validation set iterator.
"""
error_msg = "[!] valid_size should be in the range [0, 1]."
assert ((valid_size >= 0) and (valid_size <= 1)), error_msg
normalize = transforms.Normalize(
mean=[0.4914, 0.4822, 0.4465],
std=[0.2023, 0.1994, 0.2010],
)
# define transforms
valid_transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
if augment:
train_transform = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
else:
train_transform = transforms.Compose([
transforms.ToTensor(),
normalize,
])
# load the dataset
train_dataset = datasets.CIFAR10(
root=data_dir, train=True,
download=True, transform=train_transform,
)
valid_dataset = datasets.CIFAR10(
root=data_dir, train=True,
download=True, transform=valid_transform,
)
num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))
if shuffle:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=batch_size, sampler=train_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
valid_loader = torch.utils.data.DataLoader(
valid_dataset, batch_size=batch_size, sampler=valid_sampler,
num_workers=num_workers, pin_memory=pin_memory,
)
# visualize some images
if show_sample:
sample_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=9, shuffle=shuffle,
num_workers=num_workers, pin_memory=pin_memory,
)
data_iter = iter(sample_loader)
images, labels = data_iter.next()
X = images.numpy().transpose([0, 2, 3, 1])
plot_images(X, labels)
return (train_loader, valid_loader)
Seems that he use sampler=train_sampler to do the split.

Related

How can I get some images with flow from directory?

In below example I want to take some images to train not all because my computer's efficiency is low.
How I can do that with flow_from_directory?
train_generator = train_datagen.flow_from_directory(
'/train/,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
My batch size is 20 here. You can iterate over to get the images:
image_list = []
for images,labels in next(zip(train_generator)):
for i in range(16): # can't be greater than 20
image_list.append(images[i])
image_list = np.array(image_list)
image_list.shape # (16,150,150,3)
Data generator will help us in pro-processing (rescaling) our images
data_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255, validation_split=0.2)
Use the validation_split to create for train and val datasets
train_generator = data_generator.flow_from_directory(
data_dir,
target_size=(Img_size, Img_size),
batch_size=Batch_size,
subset='training',
shuffle=True)
create val generator
val_generator = data_generator.flow_from_directory(
data_dir,
target_size=(Img_size, Img_size),
batch_size=Batch_size,
subset='validation',
shuffle=True)

Embeddings in sentiment analysis task with PyTorch and Multilayer Perception

I have the body of the code, which should work fine, I think it doesn't because of something I'm messing up here, probably having to do with the embedding.
import torch.nn as nn
class MultilayerPerceptron(nn.Module):
def __init__(self, input_size, hidden_size): # I removed output size
# Call initializer function of the super class
super(MultilayerPerceptron, self).__init__()
self.embedding = nn.Embedding(INPUT_DIM, EMBEDDING_DIM) #added this myself, maybe wrong
#self.mlp = nn.MultilayerPerceptron(EMBEDDING_DIM, HIDDEN_DIM) #also added
self.INPUT_DIM = INPUT_DIM
self.HIDDEN_DIM = HIDDEN_DIM
self.OUTPUT_DIM = OUTPUT_DIM
self.EMBEDDING_DIM = EMBEDDING_DIM
#whenever this model is called, those layers in the sequential block
#will be processed in the order given to the block.
self.model = nn.Sequential(
#nn.Flatten(), # adding this hopefully it works (it didn't)
#embeds = embedded.mean(dim=1) #god help
nn.Linear(self.INPUT_DIM, self.HIDDEN_DIM), #later on, multiply by embedding dimensionality #I removed
nn.ReLU(),
nn.Linear(self.HIDDEN_DIM, self.OUTPUT_DIM), #one layer neural network
nn.ReLU(), # do I need this?
nn.Sigmoid(),
)
def forward(self, x):
embedded = self.embedding(x)
#embedded = [sent len, batch size, emb dim]
output, hidden = self.model(embedded)
output = self.model(x) #call the model defined above for forward propagation.
return output
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100 #how do I fit this into the model??
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = MultilayerPerceptron(INPUT_DIM, HIDDEN_DIM) #MLP instead
The error I get is "mat1 and mat2 shapes cannot be multiplied (50176x100 and 25002x256)".

Accuracy from sess.run(() is returning the value in bytes. How can I change to value?

I am new to CNN and tried to train the CNN model. However when I try to print the accuracies returned from cnn it gives me results in bytes format like b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'. However when I try to print the values from the loss_train obtained from the same sess.run I get value of 1419.06. Why is this happening.
########################################################################################################################
#IMPORT PACKAGES
import math
import shutil
import pywt
import sys
import random
import numpy as np
import h5py
import pip
import os
from os import system
import tensorflow as tf
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
import skimage.io as io
import matplotlib.image as mpimg
import time
np.random.seed(1)
slim = tf.contrib.slim
########################################################################################################################
########################################################################################################################
#The FLAGS are used to assign constant values to several paths as well as variables that will be constantly used.
flags = tf.app.flags
flags.DEFINE_string('dataset_dir','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\','E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\')
flags.DEFINE_float('validation_size', 0.1, 'Float: The proportion of examples in the dataset to be used for validation')
flags.DEFINE_float('test_size', 0.1, 'Float: The proportion of examples in the dataset to be used for test')
flags.DEFINE_integer('num_shards', 1, 'Int: Number of shards to split the TFRecord files into')
flags.DEFINE_integer('random_seed', 0, 'Int: Random seed to use for repeatability.')
flags.DEFINE_string('tfrecord_filename', None, 'String: The output filename to name your TFRecord file')
tf.app.flags.DEFINE_integer('target_image_height', 256, 'train input image height')
tf.app.flags.DEFINE_integer('target_image_width', 256, 'train input image width')
tf.app.flags.DEFINE_integer('batch_size', 128, 'batch size of training.')
tf.app.flags.DEFINE_integer('num_epochs', 30, 'epochs of training.')
tf.app.flags.DEFINE_float('learning_rate', 0.001, 'learning rate of training.')
FLAGS = flags.FLAGS
img_size = 256
num_channels=3
num_classes=45
########################################################################################################################
########################################################################################################################
datapath_train = 'E:\\CODING\\CNN_Compressed\\Trial\\Codes\\using_numpy\\NWPU-RESISC45\\NWPU-RESISC45\\train\\None_train_00000-of-00001.tfrecord'
def _extract_fn(tfrecord):
features={
'image/encoded': tf.FixedLenFeature([], tf.string),
'image/format': tf.FixedLenFeature([], tf.string),
'image/class/label': tf.FixedLenFeature([], tf.int64),
'image/height': tf.FixedLenFeature([], tf.int64),
'image/width': tf.FixedLenFeature([], tf.int64),
'image/channels': tf.FixedLenFeature([],tf.int64)
}
parsed_example = tf.parse_single_example(tfrecord, features)
image_de = tf.io.decode_raw(parsed_example['image/encoded'],tf.uint8)
img_height = tf.cast(parsed_example['image/height'],tf.int32)
img_width = tf.cast(parsed_example['image/width'],tf.int32)
img_channel = tf.cast(parsed_example['image/channels'],tf.int32)
img_shape = tf.stack([img_height,img_width,img_channel])
label = tf.cast(parsed_example['image/class/label'],tf.int64)
image = tf.reshape(image_de,img_shape)
#label = parsed_example['image/class/label']
return image, img_shape, label
########################################################################################################################
#########################################################################################################################
"""
# Pipeline of dataset and iterator
dataset = tf.data.TFRecordDataset(datapath)
# Parse the record into tensors.
dataset = dataset.map(_extract_fn)
# Generate batches
dataset = dataset.batch(1)
# Create a one-shot iterator
iterator = dataset.make_one_shot_iterator()
image, img_shape, label = iterator.get_next()
with tf.Session() as sess:
try:
print(sess.run(img_shape))
image_batch=sess.run(image)
print(image_batch)
img_bas=tf.cast(image_batch,tf.uint8)
plt.imshow(image_batch[0,:,:,:]*255)
plt.show()
except tf.errors.OutOfRangeError:
pass"""
########################################################################################################################
########################################################################################################################
#INITIALIZATION FOR THE CNN ARCHITECTURE
filter_size_conv1 = [5,5]
num_filters_conv1 = 32
filter_shape_pool1 = [2,2]
filter_size_conv2 = [3,3]
num_filters_conv2 = 64
filter_shape_pool2 = [2,2]
#PLACEHOLDERS
x = tf.placeholder(tf.float32, shape = [None, img_size,img_size,num_channels], name='x')
y = tf.placeholder(tf.int32, shape= [None], name = 'ytrue') #Output data placeholder
y_one_hot = tf.one_hot(y,45)
y_true_cls = tf.argmax(y_one_hot, dimension=1)
########################################################################################################################
########################################################################################################################
def new_conv_layer(input, num_input_channels, filter_size, num_filters, name):
with tf.variable_scope(name) as scope:
# Shape of the filter-weights for the convolution
shape = [filter_size, filter_size, num_input_channels, num_filters]
# Create new weights (filters) with the given shape
weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
# Create new biases, one for each filter
biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))
# TensorFlow operation for convolution
layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
# Add the biases to the results of the convolution.
layer += biases
return layer, weights
def new_pool_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
return layer
def new_relu_layer(input, name):
with tf.variable_scope(name) as scope:
# TensorFlow operation for convolution
layer = tf.nn.relu(input)
return layer
def new_fc_layer(input, num_inputs, num_outputs, name):
with tf.variable_scope(name) as scope:
# Create new weights and biases.
weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
# Multiply the input and weights, and then add the bias-values.
layer = tf.matmul(input, weights) + biases
return layer
# CONVOLUTIONAL LAYER 1
layer_conv1, weights_conv1 = new_conv_layer(input=x, num_input_channels=3, filter_size=5, num_filters=32, name ="conv1")
# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1, name="pool1")
# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1, name="relu1")
# CONVOLUTIONAL LAYER 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=32, filter_size=5, num_filters=64, name= "conv2")
# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2, name="pool2")
# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2, name="relu2")
# FLATTEN LAYER
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])
# FULLY-CONNECTED LAYER 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=1000, name="fc1")
# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1, name="relu3")
# FULLY-CONNECTED LAYER 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=1000, num_outputs=45, name="fc2")
# Use Softmax function to normalize the output
with tf.variable_scope("Softmax"):
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, dimension=1)
# Use Cross entropy cost function
with tf.name_scope("cross_ent"):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=layer_fc2, labels=y_one_hot)
cost = tf.reduce_mean(cross_entropy)
# Use Adam Optimizer
with tf.name_scope("optimizer"):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4).minimize(cost)
# Accuracy
with tf.name_scope("accuracy"):
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# setup the initialisation operator
init_op = tf.global_variables_initializer()
# Pipeline of dataset and iterator
dataset_train = tf.data.TFRecordDataset(datapath_train)
# Parse the record into tensors.
dataset_train = dataset_train.map(_extract_fn)
# Generate batches
dataset_train = dataset_train.batch(FLAGS.batch_size)
iterator_train = dataset_train.make_initializable_iterator()
next_element_train = iterator_train.get_next()
print('\n Starting the CNN train')
# Initialize the FileWriter
writer_train = tf.summary.FileWriter("Training_FileWriter/")
writer_val = tf.summary.FileWriter("Validation_FileWriter/")
#summary
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
# Merge all summaries together
merged_summary = tf.summary.merge_all()
#PERFORM THE CNN OPERATIONS
with tf.Session() as sess:
sess.run(init_op)
sess.run(iterator_train.initializer)
# Add the model graph to TensorBoard
writer_train.add_graph(sess.graph)
writer_val.add_graph(sess.graph)
# Loop over number of epochs
print('\nTraining')
for epoch in range(FLAGS.num_epochs):
sess.run(iterator_train.initializer)
start_time = time.time()
train_accuracy = 0
validation_accuracy = 0
acc_train_avg = 0
val_acc_avg = 0
for batch in range(0, int(25200/FLAGS.batch_size)):
img_train, shp_train, lbl_train = sess.run(next_element_train)
_, loss_train, acc_train, acc_summ = sess.run([optimizer, cost, accuracy, merged_summary], feed_dict = {x: img_train, y: lbl_train})
print(loss_train)
print(acc_train)
train_accuracy+=acc_train
end_time = time.time()
#acc_train_avg = (train_accuracy/(int(25200/FLAGS.batch_size)))
#TRAINING
print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
print("\tAccuracy:")
print("\t- Training Loss:\t{}", loss_train)
print ("\t- Training Accuracy:\t{}",acc_train)
writer_train.add_summary(acc_summ,epoch+1)
#######################################################################################################################
The error is obtained as
Training
1427.1069
b'\n\x11\n\naccuracy_1\x15\x00\x00\x80<'
Traceback (most recent call last):
File "train_trial.py", line 302, in <module>
train_accuracy+=acc_train
TypeError: unsupported operand type(s) for +=: 'int' and 'bytes'
You are overwriting your loss and accuracy operations here:
accuracy = tf.summary.scalar("accuracy", accuracy)
loss = tf.summary.scalar("loss", cost)
Then when you run accuracy you get the protobuf bytes of the summary, instead of just running the op. You should rename these variables to prevent overwriting/name clashes.

How can I get every layers output value with keras?

I want use keras Lstm to get the time series features, then use the features to Kmeans. But now I can not get the layers output values. How can I get the layers output values?
This is my lstm network
Layer (type) Output Shape Param #
lstm_66 (LSTM) (None, None, 50) 10400
lstm_67 (LSTM) (None, 100) 60400
dense_19 (Dense) (None, 1) 101
activation_19 (Activation) (None, 1) 0
I want to get the lstm_67 output values,my code is:
import keras.backend as K
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
sess = tf.Session()
sess.run(tf.global_variables_initializer())
import numpy as np
statesAll=[]
layers = model.layers
print layers[1].output,type(layers[1].output[1]),sess.run(layers[1].output)
and the result is:
Tensor("lstm_61/TensorArrayReadV3:0", shape=(?, 100), dtype=float32)
So, how can I get the layers output value?
Thanks!
But it not work,my code is:
def load_data(file_name, sequence_length=10, split=0.8):
df = pd.read_csv(file_name, sep=',', usecols=[1])
data_all = np.array(df).astype(float)
scaler = MinMaxScaler()
data_all = scaler.fit_transform(data_all)
data = []
print len(data_all)
for i in range(len(data_all) - sequence_length - 1):
data.append(data_all[i: i + sequence_length + 1])
reshaped_data = np.array(data).astype('float64')
np.random.shuffle(reshaped_data)
x = reshaped_data[:, :-1]
y = reshaped_data[:, -1]
split_boundary = int(reshaped_data.shape[0] * split)
train_x = x[: split_boundary]
test_x = x[split_boundary:]
train_y = y[: split_boundary]
test_y = y[split_boundary:]
return train_x, train_y, test_x, test_y, scaler
def build_model(n_samples, time_steps, input_dim):
model = Sequential()
model.add(LSTM(input_dim=1, output_dim=50,return_sequences=True))
model.add(LSTM(100, return_sequences=False))
model.add(Dense(output_dim=1))
model.add(Activation('linear'))
model.compile(loss='mse', optimizer='rmsprop')
print(model.layers)
return model
def train_model(train_x, train_y, test_x, test_y):
model = build_model()
model.fit(train_x, train_y, batch_size=128, nb_epoch=30,validation_split=0.1)
return model
train_x, train_y, test_x, test_y, scaler = load_data(file path)
train_x = np.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
test_x = np.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
model = train_model(train_x, train_y, test_x, test_y)
from keras import backend as K
layers = model.layers
K.eval(layers[1].output)
In TensorFlow 2.x, you can do like this:
from tensorflow.python.keras import backend as K
model = build_model()
# lstm_67 is the second layer.
lstm = K.function([model.layers[0].input], [model.layers[1].output])
lstm_output = lstm([test_x])[0]
keras.backend.eval() should do.
Look at the documentation here and here
First of all, this is a tensor, you need to use the tf. Print () method to see the specific value. If you use Spyder, you will not see this information in the console. You need to execute this program in the command line.

How to simplify DataLoader for Autoencoder in Pytorch

Is there any easier way to set up the dataloader, because input and target data is the same in case of an autoencoder and to load the data during training? The DataLoader always requires two inputs.
Currently I define my dataloader like this:
X_train = rnd.random((300,100))
X_val = rnd.random((75,100))
train = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(X_train).float())
val = data_utils.TensorDataset(torch.from_numpy(X_val).float(), torch.from_numpy(X_val).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
val_loader = data_utils.DataLoader(val, batch_size=1)
and train like this:
for epoch in range(50):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target).detach()
optimizer.zero_grad()
output = model(data, x)
loss = criterion(output, target)
Why not subclassing TensorDataset to make it compatible with unlabeled data ?
class UnlabeledTensorDataset(TensorDataset):
"""Dataset wrapping unlabeled data tensors.
Each sample will be retrieved by indexing tensors along the first
dimension.
Arguments:
data_tensor (Tensor): contains sample data.
"""
def __init__(self, data_tensor):
self.data_tensor = data_tensor
def __getitem__(self, index):
return self.data_tensor[index]
And something along these lines for training your autoencoder
X_train = rnd.random((300,100))
train = UnlabeledTensorDataset(torch.from_numpy(X_train).float())
train_loader= data_utils.DataLoader(train, batch_size=1)
for epoch in range(50):
for batch in train_loader:
data = Variable(batch)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, data)
I believe this is as simple as it gets. Other than that, I guess you will have to implement your own dataset. A sample code is below.
class ImageLoader(torch.utils.data.Dataset):
def __init__(self, root, tform=None, imgloader=PIL.Image.open):
super(ImageLoader, self).__init__()
self.root=root
self.filenames=sorted(glob(root))
self.tform=tform
self.imgloader=imgloader
def __len__(self):
return len(self.filenames)
def __getitem__(self, i):
out = self.imgloader(self.filenames[i]) # io.imread(self.filenames[i])
if self.tform:
out = self.tform(out)
return out
You can then use it as follows.
source_dataset=ImageLoader(root='/dldata/denoise_ae/clean/*.png', tform=source_depth_transform)
target_dataset=ImageLoader(root='/dldata/denoise_ae/clean_cam_n9dmaps/*.png', tform=target_depth_transform)
source_dataloader=torch.utils.data.DataLoader(source_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
target_dataloader=torch.utils.data.DataLoader(target_dataset, batch_size=32, shuffle=False, drop_last=True, num_workers=15)
To test the 1st batch go as follows.
dataiter = iter(source_dataloader)
images = dataiter.next()
print(images.size())
And finally you can enumerate on the loaded data in the batch training loop as follows.
for i, (source, target) in enumerate(zip(source_dataloader, target_dataloader), 0):
source, target = Variable(source.float().cuda()), Variable(target.float().cuda())
Have fun.
PS. The code samples I shared so not load validation data.