Pytorch model 2D regression given an scalar input - neural-network

I want to create a model to perform this regression:
My dataset looks like:
t,x,y
0.0,-,0.5759052335487023
0.01,-,-
0.02,1.1159124144549086,-
0.03,-,-
0.04,1.0054825084650338,0.4775267298487888
0.05,-,-
I'm having some troubles with loss, dataset load, batch_size, and Net structure (I add one single layer to simplify the problem)
Thats my code:
Net:
class Net(nn.Module):
'''Model to regress 2d time series values given scalar input.'''
def __init__(self):
super(Net, self).__init__()
#Layers
self.predict = nn.Linear(1, 2)
def forward(self, x):
x = self.predict(x)
return x
Dataset load
class TimeSeriesDataset(torch.utils.data.Dataset):
def __init__(self, csv_file):
#Load the dataset
#Load the csv file as a dataframe
df = pd.read_csv(csv_file, header=0, na_values='-')
#Store the inputs and outputs
self.x = df.values[:,:-2].astype('float32')
self.y = df.values[:,1:].astype('float32')
#Ensure target has the right shape
self.y = self.y.reshape((len(self.y),2))
def __len__(self):
#Return the number of rows in the dataset
return len(self.x)
def __getitem__(self, idx):
#Return a row at an index
return [self.x[idx], self.y[idx]]
Trainloader, loss, optimizer
dataset = TimeSeriesDataset('data.csv')
trainloader = torch.utils.data.DataLoader(
dataset, batch_size=32, shuffle=True, num_workers=2)
def lossFunc(outputs, labels):
# nn.MSELoss() #Mean Squared Error, works fine with regression problems and with small numbers (x-y)^2
return torch.mean((outputs-labels)**2)
net = Net()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
print(net)
Trainning:
for epoch in range(300):
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# TODO get the data
# inputs, labels
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
#print("Inputs", inputs)
#print("labels", labels)
#print("outputs", outputs)
loss = lossFunc(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 20 == 19: # print every 20 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 20))
running_loss = 0.0
print('Finished Training')
Outputs looks this way:
tensor([[nan, nan],
[nan, nan],
[nan, nan],
...
And when I execute the 300 epochs error value doesn't change and prints nan

After the line loss = loss(outputs, labels), loss is now a tensor, not a function anymore. Python does not allow you to have distinct objects with identical names.
So after the first call, loss has become a tensor, and as the error says "tensors are not callable", so the second call fails

Related

Cross Entropy Loss function not converging

I'm new to neural networks and I'm building one that reads handwritten numbers. The losses float around 2.2-2.3 and I'm not sure why it's not converging. I tried messing with the learning rate but it doesn't really do anything.
# TODO: Define function to create our own neural network
# Parameters
input_size = 784 # Hint: image size is 28x28, and we want to flatten the image
num_classes = 10 # Hint: our inputs include 0-9
num_epochs = 5 # Number of times we loop through the entire training dataset, can be pretty arbitrary
class NN(nn.Module):
############ YOUR CODE STARTS HERE ############
# 1. Initialize our own NN model
def __init__(self, input_size, num_classes):
super(NN, self).__init__()
self.flatten = nn.Flatten()
# Use ReLU activation function
self.relu = nn.ReLU()
# Input layer
self.input_layer = nn.Linear(input_size, 13)
# Hidden layers: use at least 1 hidden layer!
self.hidden1 = nn.Linear(13, 6)
# Output layer
self.output_layer = nn.Linear(6, 10)
# 2. Define method for forwarding input data
def forward(self, sample):
sample = self.flatten(sample)
out = self.input_layer(sample)
out = self.relu(out)
out = self.hidden1(out)
out = self.relu(out) #TODO: activation function
out = self.output_layer(out) #TODO: forward to output layer
return out
nn_model = NN(input_size, num_classes)
print("My NN Model: ", nn_model)
Loss function:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(nn_model.parameters(), lr=.0001)
Training:
total_steps = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Reshape our images from 2D(28x28) to 1D(784)
images = images.view(-1, 28*28).to(device)
labels = labels.to(device)
# Call functions we've previously defined to perform forward pass & calculate loss
output = nn_model.forward(images)
loss = loss_function(output, labels)
# Backward pass
optimizer.zero_grad()
loss.backward()
# calculates gradients
optimizer.step()
# Print out training process
if (i+1) % 100 == 0:
print(f'epoch {epoch+1} / {num_epochs}, step {i+1}/{total_steps}, loss = {loss.item():.4f}')
Any help is appreciated!

Given groups=1, weight of size [10, 1, 5, 5], expected input[2, 3, 28, 28] to have 1 channels, but got 3 channels instead

I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
I am trying to run CNN with train MNIST, but test on my own written digits. To do that I wrote the following code but I getting an error in title of this questions:
batch_size = 64
train_dataset = datasets.MNIST(root='./data/',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = ImageFolder('my_digit_images/', transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
#print(self.conv1.weight.shape)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv3 = nn.Conv2d(20, 20, kernel_size=3)
#print(self.conv2.weight.shape)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(320, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv1(x))
#print(x.shape)
x = F.relu(self.mp(self.conv2(x)))
x = F.relu(self.mp(self.conv3(x)))
#print("2.", x.shape)
# x = F.relu(self.mp(self.conv3(x)))
x = x.view(in_size, -1) # flatten the tensor
#print("3.", x.shape)
x = self.fc(x)
return F.log_softmax(x)
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data
pred = output.data.max(1, keepdim=True)[1]
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
MNIST dataset contains black and white 1-channel images, while yours are 3-channeled RGB probably. Either recode your images or preprocess it like
img = img[:,0:1,:,:]
You can do it with custom transform, adding it after transforms.ToTensor()
The images in training and testing should follow the same distribution. Since MNIST data is by default in Grayscale and it is expected that you didn't change the channels, then the model expects the same number of channels in testing.
The following code is an example of how it's done using a transformation.
Following the order defined below, it
Converts the image to a single channel (Grayscale)
Resize the image to the size of the default MNIST data
Convert the image to a tensor
Normalize the tensor to have same mean and std as that of during training(assuming that you used the same values).
test_dataset = ImageFolder('my_digit_images/', transform=transforms.Compose([transforms.Grayscale(num_output_channels=1),
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))]))

How can I fix tensor dimension matching error (with 1 unit difference)

I'm trying to run my code for Graph Convolution Network (GCN) in PyTorch with several .csv input files, but I get error below:
RuntimeError: The expanded size of the tensor (732) must match the existing size (731) at non-singleton dimension 0. Target sizes: [732]. Tensor sizes: [731]
here is my code:
import csv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from sklearn.metrics import r2_score
import numpy as np
import datetime
import dgl.function as fn
# Below are the graph convolution functions:
# (where each node collects information about nearby nodes)
def gcn_message(edges):
return {'msg' : edges.src['h']}
def gcn_reduce(nodes):
return {'h' : torch.sum(nodes.mailbox['msg'], dim=1)}
# Below is the pytorch module that defines the operations at each graph convolution layer
class gcnLayer(nn.Module):
def __init__(self, in_feats, out_feats):
super(gcnLayer, self).__init__()
self.linear = nn.Linear(in_feats*2, out_feats)
def forward(self, g, inputs):
with g.local_scope():
g.ndata['h'] = inputs # inputs: POI features
print(g.ndata['h'])
g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N'))
h_N=g.ndata['h_N']
h_total = torch.cat([inputs, h_N], dim=1) # Result (Convoluted POIs) of convolution at a layer is extracted
return self.linear(h_total) # Result is linearly transformed
# Below is the pytorch class (machine learning architectures are initiliazed as classes)
# that defines the the graph convolutional network (GCN) architecture (number of hidden layers, neurons, activation function, etc)
class gcn(torch.nn.Module):
def __init__(self, input, hidden, output):
super(gcn, self).__init__()
# Initially each row in the input has (input) number of elements.
#In other words, each node in the network has (input number of features, i.e.: number of POI types)
self.gcnInput = gcnLayer(input,hidden) # Input size is converted into hidden size
self.gcnHidden = gcnLayer(hidden,hidden) # Hidden size is converted into hidden size
self.gcnOutput = gcnLayer(hidden,output) # Hidden size is converted into desired output size
# Forward function: this function is run when we call the class
def forward(self, g, pois):
y = F.relu(self.gcnInput(g, pois)) # Result of the input layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function
y = F.relu(self.gcnHidden(g, y)) # Result of the hidden layer is sent through activation function (Here, an arbitrary amount of hidden layers can be added)
y = self.gcnOutput(g, y) # Result of the output layer (not activated)
return y
# Below is the pytorch class that defines the the multilayer perceptron (MLP) architecture
# (number of hidden layers, neurons, activation function, etc)
class mlp(torch.nn.Module):
def __init__(self, input, hidden):
super(mlp, self).__init__() #initialize
self.classifier = nn.Sequential( # Sequential is used when combining different layers
nn.Linear(input, hidden), # Input feature matrix is converted into a matrix with shape (hidden) and linearly transformated
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, hidden), # Result of previous layer is linearly transformaed
nn.ReLU(), # Activation function is applied
nn.Linear(hidden, 1)) # At the final layer, one output is given (Trip amount)
def forward(self, x):
x = self.classifier(x) # the input is sent throught the MLP architecture defined above
return x
# Below is the pytorch class that defines the the the combined deep learning architecture
class od(nn.Module):
def __init__(self, gcnInput, gcnHidden, gcnOutput, mlpHidden):
super(od, self).__init__()
self.gcn = gcn(gcnInput, gcnHidden,gcnOutput) # First: GCN
self.mlp = mlp((2*gcnoutput+1), mlpHidden) # Afterwards: MLP
def forward(self, g, pois, costs, indices, q, zoneCount):
y = self.gcn(g,pois) # First, send the input through GCN
p = torch.zeros(len(costs),2*q).cuda() # Prepare a matrix that will have the POI output at origin (size: q), POI output at destination (size: q)
count = 0
for i in range(zoneCount):
for j in range(zoneCount):
p[count][:q] = y[i][:] # POI output at origin (size: q)
p[count][q:] = y[j][:] # POI output at destination (size: q)
count +=1
p = p[indices][:] # Order the input matrix in the order of shuffled zones (or OD pairs)
costs = costs[indices][:] # Order the cost matrix in the order of shuffled zones (or OD pairs)
inputs = torch.cat((p, costs), 1).cuda() # Combine POI and cost matrices
y = self.mlp(inputs) # Last, send through MLP
return y
def train(optimizer, model, criterion, pois, costs, labels, indices, zoneCount, gcnOutput):
model.train() # Model is in the training mode (meaning gradients are calculated)
optimizer.zero_grad() # Gradients are zeroed
print(optimizer)
pred = model(g, pois, costs, indices, gcnOutput, zoneCount) # Get model output as predicted output
loss = criterion(pred, labels) # Calculate loss between prediction and label
loss.backward() # Backpropagate the gradients
optimizer.step() # (I dont fully know what happens with this code)
return loss.item() # Return loss
def test(model, pois, costs, labels, indices, zoneCount, gcnOutput):
model.eval() # Mode is in evaluation mode: no gradients are calcualted
with torch.no_grad(): # In tensorflow if tensor has a parameter "autograd:true" then, gradients are calculated. This code sets the autograd to false for all tensors below
pred = model(g, pois, costs, indices,gcnOutput, zoneCount) # Get prediction
predictions = pred.detach().cpu() # Move prediction tensor from GPU to CPU
r2 = r2_score(labels.cpu(), predictions) # Calculate R2
return r2
def data_collection(key): #Below part gets the data from the files into the program (POIS, nodes, costs, labels). If the file types are different than the ones used in this research, this part should be adjusted.
if key == "mb": #mb: manhattan and brooklyn case
no = 3
else:
no = 2
with open("/nodes.csv".format(key)) as f:
nodeCount = sum(1 for line in f)
print (nodeCount)
with open("/poisInfo.csv".format(key)) as f:
poiCount = sum(1 for line in f)
print(poiCount)
with open("/zones.csv".format(key)) as f:
zoneCount = sum(1 for line in f)
print(zoneCount)
pois = torch.zeros((nodeCount,poiCount)).cuda()
print(pois)
i = 0
with open('/nodes.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
print(row)
pois[i][:] = torch.FloatTensor([int(i) for i in row[no:]])
i += 1
costs = torch.zeros((zoneCount*zoneCount,1)).cuda()
labels = torch.zeros((zoneCount*zoneCount,1)).cuda()
count = 0
with open('/costsTrips.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
costs[count][0] = int(row[2])
labels[count][0] = int(row[3])
count += 1
g = dgl.DGLGraph().to(torch.device('cuda:0')) # dgl: deep graph learning library: We move POIs to the graph for graph convolution
print (nodeCount)
g.add_nodes(nodeCount) # Add nodes to the graph
print (nodeCount)
print (g.number_of_nodes)
with open('/edges.csv'.format(key), mode='r') as rx:
r = csv.reader(rx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in r:
g.add_edge(int(row[0]), int(row[1])) # If edge exists between 2 nodes, add edge
print('We have %d nodes.' % g.number_of_nodes())
print('We have %d edges.' % g.number_of_edges())
return([g, pois, labels,costs, zoneCount, poiCount])
gcnoutput = 10
keys = ["manhattan", "brooklyn", "mb"]
count = 0
with open("costFinal.csv", mode='w', newline="") as wx:
w = csv.writer(wx, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
w.writerow(["place", "iteration", "split", "r2"])
for key in keys:
[g, pois, labels, costs, zoneCount, poiCount] = data_collection(key)
for iteration in range(1,11): # We test each split ratio with 10 times to get the average
a = np.random.permutation(zoneCount) # randomize the zones
for i in range(1,10):
split = i/10 # Below lines split the training and test subsets
breaker = int(split * zoneCount)
train_zones = a[:breaker]
test_zones = a[breaker:]
train_indices = []
test_indices = []
for z in train_zones:
train_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
for z in test_zones:
test_indices += [j for j in range(z * zoneCount, z * zoneCount + zoneCount)]
# model parameters: gcninput, gcnhidden, gcnoutput, mlphidden
model = od(poiCount, 64, gcnoutput, 64).cuda() # construct the model
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # optimizer: adam optimizer
print(optimizer)
criterion = torch.nn.MSELoss() # loss: mean squared error loss
print(criterion)
for epoch in range(1, 11): # Train the algorithm 500 epochs
print (epoch)
loss = train(optimizer, model, criterion, pois, costs, labels[train_indices], train_indices, zoneCount, gcnoutput)
# print(count, datetime.datetime.now() - start, key, iteration, i, epoch, loss)
count += 1
r2 = test(model, pois, costs, labels[test_indices], test_indices, zoneCount, gcnoutput) # At the end of the algorithm, test the model and get r2
w.writerow([key, iteration, i*10, r2]) # write key[manhattan,brooklyn,manhattan and brooklyn], iteration[0...9], split ratio[10%...90%], r2 to the file

ValueError: Expected input batch_size (24) to match target batch_size (8)

Got many links to solve this read different stackoverflow answer related to this but not able to figure it out .
My image size is torch.Size([8, 3, 16, 16]).
My architechture is as below
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# linear layer (784 -> 1 hidden node)
self.fc1 = nn.Linear(16 * 16, 768)
self.fc2 = nn.Linear(768, 64)
self.fc3 = nn.Linear(64, 10)
self.dropout = nn.Dropout(p=.5)
def forward(self, x):
# flatten image input
x = x.view(-1, 16 * 16)
# add hidden layer, with relu activation function
x = self.dropout(F.relu(self.fc1(x)))
x = self.dropout(F.relu(self.fc2(x)))
x = F.log_softmax(self.fc3(x), dim=1)
return x
# specify loss function
criterion = nn.NLLLoss()
# specify optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=.003)
# number of epochs to train the model
n_epochs = 30 # suggest training between 20-50 epochs
model.train() # prep model for training
for epoch in range(n_epochs):
# monitor training loss
train_loss = 0.0
###################
# train the model #
###################
for data, target in trainloader:
# clear the gradients of all optimized variables
optimizer.zero_grad()
# forward pass: compute predicted outputs by passing inputs to the model
output = model(data)
# calculate the loss
loss = criterion(output, target)
# backward pass: compute gradient of the loss with respect to model parameters
loss.backward()
# perform a single optimization step (parameter update)
optimizer.step()
# update running training loss
train_loss += loss.item()*data.size(0)
# print training statistics
# calculate average loss over an epoch
train_loss = train_loss/len(trainloader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(
epoch+1,
train_loss
))
i am getting value error as
ValueError: Expected input batch_size (24) to match target batch_size (8).
how to fix it . My batch size is 8 and input image size is (16*16).And i have 10 class classification here .
Your input images have 3 channels, therefore your input feature size is 16*16*3, not 16*16. Currently, you consider each channel as separate instances, leading to a classifier output - after x.view(-1, 16*16) flattening - of (24, 16*16). Clearly, the batch size doesn't match because it is supposed to be 8, not 8*3 = 24.
You could either:
Switch to a CNN to handle multi-channel inputs (here 3 channels).
Use a self.fc1 with 16*16*3 input features.
If the input is RGB, maybe even convert to 1-channel grayscale map.

Using SGD on MNIST dataset with Pytorch, loss not decreasing

I tried to use SGD on MNIST dataset with batch size of 32, but the loss does not decrease at all.
I checked my model, loss function and read documentation but couldn't figure out what I've done wrong.
I defined my neural network as below
class classification(nn.Module):
def __init__(self):
super(classification, self).__init__()
# construct layers for a neural network
self.classifier1 = nn.Sequential(
nn.Linear(in_features=28*28, out_features=20*20),
nn.Sigmoid(),
)
self.classifier2 = nn.Sequential(
nn.Linear(in_features=20*20, out_features=10*10),
nn.Sigmoid(),
)
self.classifier3 = nn.Sequential(
nn.Linear(in_features=10*10, out_features=10),
nn.LogSoftmax(dim=1),
)
def forward(self, inputs): # [batchSize, 1, 28, 28]
x = inputs.view(inputs.size(0), -1) # [batchSize, 28*28]
x = self.classifier1(x) # [batchSize, 20*20]
x = self.classifier2(x) # [batchSize, 10*10]
out = self.classifier3(x) # [batchSize, 10]
return out
And I defined my training process as below
classifier = classification().to("cuda")
#optimizer
optimizer = torch.optim.SGD(classifier.parameters(), lr=learning_rate_value)
#loss function
criterion = nn.NLLLoss()
batch_size=32
epoch = 30
#array to save loss history
loss_train_arr=np.zeros(epoch)
#used DataLoader to make split batch
batched_train = torch.utils.data.DataLoader(training_set, batch_size, shuffle=True)
for i in range(epoch):
loss_train=0
#train and compute loss, accuracy
for img, label in batched_train:
img=img.to(device)
label=label.to(device)
optimizer.zero_grad()
predicted = classifier(img)
label_predicted = torch.argmax(predicted,dim=1)
loss = criterion(predicted, label)
loss.backward
optimizer.step()
loss_train += loss.item()
loss_train_arr[i]=loss_train/(len(batched_train.dataset)/batch_size)
I am using a model with LogSoftmax layer, so my loss function seems right. But the loss does not decrease at all.
If the code you posted is the exact code you use, the problem is that you don't actually call backward on the loss (missing parentheses ()).