PyTorch will not fit straight line to two data points - linear-regression

I'm facing issues in fitting a simple y= 4x1 line with 2 data points using pytorch. While running the inference code, the model seems to output same value to any input which is strange. Pls find the code attached along with the data files used by me. Appreciate any help here.
import torch
import numpy as np
import pandas as pd
df = pd.read_csv('data.csv')
test_data = pd.read_csv('test_data.csv')
inputs = df[['x1']]
target = df['y']
inputs = torch.tensor(inputs.values).float()
target = torch.tensor(target.values).float()
test_data = torch.tensor(test_data.values).float()
#Defining Network Architecture
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
hidden1 = 3
# hidden2 = 5
self.fc1 = nn.Linear(1,hidden1)
self.fc3 = nn.Linear(hidden1,1)
def forward(self,x):
x = F.relu(self.fc1(x))
x = self.fc3(x)
return x
#instantiate the model
model = Net()
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
model.train()
#epochs
epochs = 100
for x in range(epochs):
#initialize the training loss to 0
train_loss = 0
#clear out gradients
optimizer.zero_grad()
#calculate the output
output = model(inputs)
#calculate loss
loss = criterion(output,target)
#backpropagate
loss.backward()
#update parameters
optimizer.step()
if ((x%5)==0):
print('Training Loss after epoch {:2d} is {:2.6f}'.format(x,loss))
#set the model in evaluation mode
model.eval()
#Test the model on unseen data
test_output = model(test_data)
print(test_output)
Below is the model output
#model output
tensor([[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579],
[56.7579]], grad_fn=<AddmmBackward>)

Your model is collapsing. You can probably see that based on the prints. You may want to use a lower learning rate (1e-5, 1e-6, etc.). Switching from SGD(...)to Adam(...) may be easier if you do not have experience and want less trouble fine-tuning these hparams. Also, maybe 100 epochs is not enough. As you did not share an MCVE, I cannot tell you for sure what it is. Here is an MCVE of linefitting using the same Net you used:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
epochs = 1000
max_range = 40
interval = 4
# DATA
x_train = torch.arange(0, max_range, interval).view(-1, 1).float()
x_train += torch.rand(x_train.size(0), 1) - 0.5 # small noise
y_train = (4 * x_train)
y_train += torch.rand(x_train.size(0), 1) - 0.5 # small noise
x_test = torch.arange(interval // 2, max_range, interval).view(-1, 1).float()
y_test = 4 * x_test
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
hidden1 = 3
self.fc1 = nn.Linear(1, hidden1)
self.fc3 = nn.Linear(hidden1, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc3(x)
return x
model = Net()
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)
# TRAIN
model.train()
for epoch in range(epochs):
optimizer.zero_grad()
y_pred = model(x_train)
loss = criterion(y_pred, y_train)
loss.backward()
optimizer.step()
if epoch % 10 == 0:
print('Training Loss after epoch {:2d} is {:2.6f}'.format(epoch, loss))
# TEST
model.eval()
y_pred = model(x_test)
print(torch.cat((x_test, y_pred, y_test), dim=-1))
This is what the data looks like:
And this is what the training looks like:
Training Loss after epoch 0 is 7416.805664
Training Loss after epoch 10 is 6645.655273
Training Loss after epoch 20 is 5792.936523
Training Loss after epoch 30 is 4700.106445
Training Loss after epoch 40 is 3245.384277
Training Loss after epoch 50 is 1779.370728
Training Loss after epoch 60 is 747.418579
Training Loss after epoch 70 is 246.781311
Training Loss after epoch 80 is 68.635155
Training Loss after epoch 90 is 17.332235
Training Loss after epoch 100 is 4.280161
Training Loss after epoch 110 is 1.170808
Training Loss after epoch 120 is 0.453974
...
Training Loss after epoch 970 is 0.232296
Training Loss after epoch 980 is 0.232090
Training Loss after epoch 990 is 0.231888
And this is what the output looks like:
| x_test | y_pred | y_test |
|:-------:|:--------:|:--------:|
| 2.0000 | 8.6135 | 8.0000 |
| 6.0000 | 24.5276 | 24.0000 |
| 10.0000 | 40.4418 | 40.0000 |
| 14.0000 | 56.3303 | 56.0000 |
| 18.0000 | 72.1884 | 72.0000 |
| 22.0000 | 88.0465 | 88.0000 |
| 26.0000 | 103.9047 | 104.0000 |
| 30.0000 | 119.7628 | 120.0000 |
| 34.0000 | 135.6210 | 136.0000 |
| 38.0000 | 151.4791 | 152.0000 |

Related

Constant loss through epochs

I code this neural network to make a gaussian regression but I don't understand why my loss doesn't change through epochs. I set the learning rate to 1 to see the loss decreases but it does not. I chose to take 2000 poitns to train my Neural network. I watched several algorithms on this website and I don't really understand why my algorithm do not achieve what I expect.
I have already imported all libraries needed.
Thank you for your help
def f(x):
return x * np.sin(x) # function to predict
m =2000
X_bis = np.zeros((1,m),dtype = float)
X_bis=np.random.random(m)*10
## Create my training,validation and test set
X_train = X_bis[0:600]
X_val = X_bis[600:800]
X_test = X_bis[800:]
y_train = f(X_train)
y_val = f(X_val)
y_test = f(X_test)
mean_X_train = np.mean(X_train)
std_X_train = np.std(X_train)
mean_y_train = np.mean(y_train)
std_y_train =np.std(y_train)
class MyDataset(data.Dataset):
def __init__(self, data_feature, data_target):
self.data_feature = data_feature
self.data_target = data_target
def __len__(self):
return len(self.data_feature)
def __getitem__(self, index):
X_train_normalized = (self.data_feature[index] - mean_X_train) / std_X_train
y_train_normalized = (self.data_target[index] - mean_y_train) / std_y_train
return torch.from_numpy(np.array(X_train_normalized,ndmin=1)).float(), torch.from_numpy(np.array(y_train_normalized, ndmin = 1)).float()
training_set = MyDataset(X_train,y_train)
train_loading = torch.utils.data.DataLoader(training_set, batch_size= 100)
val_set = MyDataset(X_val, y_val)
val_loading = torch.utils.data.DataLoader(val_set, batch_size= 10)
test_set = MyDataset(X_test,y_test)
test_loading = torch.utils.data.DataLoader(test_set, batch_size= 100)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.FC1 = nn.Linear(1,10)
self.FC2 = nn.Linear(10, 1)
def forward(self, x):
x = F.relu(self.FC1(x))
x = self.FC2(x)
return x
model = Net()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(),
lr=1, weight_decay= 0.01, momentum = 0.9)
def train(net, train_loader, optimizer, epoch):
net.train()
total_loss=0
for idx,(data, target) in enumerate(train_loader, 0):
outputs = net(data)
loss = criterion(outputs,target)
total_loss +=loss.cpu().item()
optimizer.step()
print('Epoch:', epoch , 'average training loss ', total_loss/ len(train_loader))
def test(net,test_loader):
net.eval()
total_loss = 0
for idx,(data, target) in enumerate(test_loader,0):
outputs = net(data)
outputs = outputs * std_X_train + mean_X_train
target = target * std_y_train + mean_y_train
loss = criterion(outputs,target)
total_loss += sqrt(loss.cpu().item())
print('average testing loss', total_loss/len(test_loader))
for epoch in range(50):
train(model,train_loading,optimizer,epoch)
test(model,val_loading)
'''
I'm wondering why you don't have loss.backward() after the line that you compute the loss (i.e., loss = criterion(outputs,target)) in your training snippet. This will help backpropagating and ultimately updating the parameters of your network upon optimizer.step(). Also, try using lower learning rates as lr=1 normally is too much in training such networks. Try using learning rates in between 0.001-0.01 to see if your network is learning the mapping between input X and target Y.

L1 regulariser Pytorch acting opposite to what I expect

I'm trying to add an L1 penalty to a specific layer of a neural network, and I have the code below (in which I attempt to add l1 penalty to the first layer). If I run it for lambda = 0 (i.e. no penalty), the output gets very close to the expected weights those being [10, 12, 2, 11, -0.25]) and if I run for enough epochs or reduce batch size it will get it exactly, as in the output below:
mlp.0.weight
Parameter containing:
tensor([[ 9.8657, -11.8305, 2.0242, 10.8913, -0.1978]],
requires_grad=True)
Then, when I run it for a large lambda, say 1000, I would expect these weights to shrink towards zero as there is a large penalty being added to the loss that we are trying to minimise. However, the opposite happens and the weights explode, as in the output below (for lam = 1000)
mlp.0.weight
Parameter containing:
tensor([[-13.9368, 9.9072, 2.2447, -11.6870, 26.7293]],
requires_grad=True)
If anyone could help me, that'd be great. I'm new to pytorch (but not the idea of regularisation), so I'm guessing it's something in my code that is the problem.
Thanks
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.linear_model import LinearRegression
class TrainDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return self.data.shape[0]
def __getitem__(self, ind):
x = self.data[ind][1:]
y = self.data[ind][0]
return x, y
class TestDataset(TrainDataset):
def __getitem__(self, ind):
x = self.data[ind]
return x
torch.manual_seed(94)
x_train = np.random.rand(1000, 5)
y_train = x_train[:, 0] * 10 - x_train[:, 1] * 12 + x_train[:, 2] * 2 + x_train[:, 3] * 11 - x_train[:, 4] * 0.25
y_train = y_train.reshape(1000, 1)
x_train.shape
y_train.shape
train_data = np.concatenate((y_train, x_train), axis=1)
train_set = TrainDataset(train_data)
batch_size = 100
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.mlp = nn.Sequential(nn.Linear(5, 1, bias=False))
def forward(self, x_mlp):
out = self.mlp(x_mlp)
return out
device = 'cpu'
model = MLP()
optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.82)
criterion = nn.MSELoss()
epochs = 5
lam = 0
model.train()
for epoch in range(epochs):
losses = []
for batch_num, input_data in enumerate(train_loader):
optimizer.zero_grad()
x, y = input_data
x = x.to(device).float()
y = y.reshape(batch_size, 1)
y = y.to(device)
output = model(x)
for name, param in model.named_parameters():
if name == 'mlp.0.weight':
l1_norm = torch.norm(param, 1)
loss = criterion(output, y) + lam * l1_norm
loss.backward()
optimizer.step()
print('\tEpoch %d | Batch %d | Loss %6.2f' % (epoch, batch_num, loss.item()))
for name, param in model.named_parameters():
if param.requires_grad:
print(name)
print(param)
I found that if I use Adagrad as the optimiser instead of SGD, it acts as expected. Will need to look into the difference of those now, but this can be considered answered.

Why is linear regression wrong for pyspark?

I kept getting wrong answers so I tried it on something very, very basic, and it was still wrong.
input file:
1 1:1
2 1:2
3 1:3
4 1:4
from pyspark.ml.regression import LinearRegression
# Load training data
training = spark.read.format("libsvm").load("stupid.txt")
lr = LinearRegression(maxIter=100, regParam=0.3, loss='squaredError')
# Fit the model
lrModel = lr.fit(training)
# Print the coefficients and intercept for linear regression
print("Coefficients: %s" % str(lrModel.coefficients))
print("Intercept: %s" % str(lrModel.intercept))
# Summarize the model over the training set and print out some metrics
trainingSummary = lrModel.summary
print("numIterations: %d" % trainingSummary.totalIterations)
print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
trainingSummary.residuals.show()
print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
print("r2: %f" % trainingSummary.r2)
Should have gotten coefficients [1] and intercept 0.
Instead got
Coefficients: [0.7884394856681294]
Intercept: 0.52890128583
It looks like the issue is the regParam parameter that you’re using. If I run it with that set to 0, which causes normal OLS to take place, we get the expected output:
Code:
from pyspark.ml.regression import LinearRegression
from pyspark.ml.linalg import Vectors
training = spark.createDataFrame([
(1.0, Vectors.dense(1.0)),
(2.0, Vectors.dense(2.0)),
(3.0, Vectors.dense(3.0)),
(4.0, Vectors.dense(4.0))], ["label", "features"])
lr = LinearRegression(maxIter=100, regParam=0, loss='squaredError')
# Fit the model
lrModel = lr.fit(training)
# Print the coefficients and intercept for linear regression
print("Coefficients: %s" % str(lrModel.coefficients))
print("Intercept: %s" % str(lrModel.intercept))
# Summarize the model over the training set and print out some metrics
trainingSummary = lrModel.summary
print("numIterations: %d" % trainingSummary.totalIterations)
print("objectiveHistory: %s" % str(trainingSummary.objectiveHistory))
trainingSummary.residuals.show()
print("RMSE: %f" % trainingSummary.rootMeanSquaredError)
print("r2: %f" % trainingSummary.r2)
Output:
Coefficients: [1.0]
Intercept: 0.0
numIterations: 1
objectiveHistory: [0.0]
+---------+
|residuals|
+---------+
| 0.0|
| 0.0|
| 0.0|
| 0.0|
+---------+
RMSE: 0.000000
r2: 1.000000
It seems like the regParam > 0 is being used as an L2 regularization term, and preventing the model from performing a normal OLS process.

How to achieve 0 training error when using one-hidden-layer neural network with random inputs?

In theory, one-hidden-layer neural network with m hidden nodes can be trained by gradient descent to fit n data points with 0 training error, where m >= n.
I have 100 data points (x, y), x in R and y in R, no specific pattern, just random. And I was using one-hidden-layer neural network with 1000/2000/10000/... hidden nodes to fit those points (with stochastic gradient descent and ReLU).
But I can't achieve that. Any idea what's the problem here?
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation
from keras.optimizers import SGD
from keras import initializers
## initializing x_train and y_train randomly ##
def f1(x):
if x < 3:
return np.abs(x-1)
else:
return -np.abs(x-1)+4
n = 100
x_train = np.random.uniform(-4+1, 4+1, size = n)
e = np.random.normal(0, 0.5, size = n)
y_train = np.vectorize(f1)(x_train) + e
np.random.shuffle(y_train)
k = 10000 # number of hidden nodes
ep = 5
loss = []
model = Sequential()
model.add(Dense(k, kernel_initializer = 'random_normal', input_shape = (1,), use_bias=True))
model.add(Activation('relu'))
model.add(Dense(1, kernel_initializer = 'random_normal', use_bias=True))
#sgd = SGD(lr=0.00005, decay=1e-6, momentum=0.9)
sgd = SGD(lr=0.00008)
model.compile(loss='mse', optimizer=sgd, metrics = ['mse'])
for i in range(5000):
H = model.fit(x_train, y_train, epochs=ep, verbose=False)
wt = model.get_weights()
temp = H.history['mean_squared_error'][-1]
print(temp)
loss.append(temp)
image
What is your loss function? Can you show your code and perhaps some printouts of the loss per training epoch? How are you initializing the parameters of those hidden nodes (also do the nnn/nnnn/nnnn in your description mean those are different experimental settings?)?

Size mismatch for DNN for the MNIST dataset in pytorch

I have to find a way to create a neural network model and train it on the MNIST dataset. I need there to be 5 layers, with 100 neurons each. However, when I try to set this up I get an error that there is a size mismatch. Can you please help? I am hoping that I can train on the model below:
class Mnist_DNN(nn.Module):
def __init__(self):
super().__init__()
self.layer1 = nn.Linear(784, 100)
self.layer2 = nn.Linear(100, 100)
self.layer3 = nn.Linear(100, 100)
self.layer4 = nn.Linear(100, 100)
self.layer5 = nn.Linear(100, 10)
def forward(self, xb):
xb = xb.view(-1, 1, 28, 28)
xb = F.relu(self.layer1(xb))
xb = F.relu(self.layer2(xb))
xb = F.relu(self.layer3(xb))
xb = F.relu(self.layer4(xb))
xb = F.relu(self.layer5(xb))
return self.layer5(xb)
You setup your layer to get a batch of 1D vectors of dim 784 (=28*28). However, in your forward function you view the input as a batch of 2D matrices of size 28*28.
try viewing the input as a batch of 1D signals:
xb = xb.view(-1, 784)