function [J, grad] = costFunction(theta, X, y)
data = load('ex2data1.txt');
y = data(:, 3);
theta = [1;1;2];
m = length(y);
one = ones(m,1);
X1 = data(:, [1, 2]);
X = [one X1];
J = 0;
grad = zeros(size(theta));
J= 1/m *((sum(-y*log(sigmoid(X*theta)))) - (sum(1-y * log(1 - sigmoid(X*theta)))));
for i = 1:m
grad = (1/m) * sum (sigmoid(X*theta) - y')*X;
end
end
I want to know if i implemented the cost function and gradient descent correctly i am getting NaN answer though this and does theta(1) always have to be 0 i have it as 1 here. How many iterations i need for grad that should be equal to the length of matrix or something else?
function [J, grad] = costFunction(theta, X, y)
m = length(y);
J = 0;
grad = zeros(size(theta));
sig = 1./(1 + (exp(-(X * theta))));
J = ((-y' * log(sig)) - ((1 - y)' * log(1 - sig)))/m;
grad = ((sig - y)' * X)/m;
end
where
sig = 1./(1 + (exp(-(X * theta))));
is matrix representation of the logistic regression hypothesis which is defined as:
where function g is the sigmoid function. The sigmoid function is defined as:
J = ((-y' * log(sig)) - ((1 - y)' * log(1 - sig)))/m;
is matrix representation of the cost function in logistic regression :
and
grad = ((sig - y)' * X)/m;
is matrix representation of the gradient of the cost which is a vector of the same length as θ where the jth element (for j = 0,1,...,n) is defined as follows:
Related
For the purpose of generalization, I hope Matlab can automatically compute the 1st & 2nd derivatives of the associated function f(x). (in case I change f(x) = sin(6x) to f(x) = sin(8x))
I know there exists built-in commands called diff() and syms, but I cannot figure out how to deal with them with the index i in the for-loop. This is the key problem I am struggling with.
How do I make changes to the following set of codes? I am using MATLAB R2019b.
n = 10;
h = (2.0 * pi) / (n - 1);
for i = 1 : n
x(i) = 0.0 + (i - 1) * h;
f(i) = sin(6 * x(i));
dfe(i) = 6 * cos(6 * x(i)); % first derivative
ddfe(i) = -36 * sin(6 * x(i)); % second derivative
end
You can simply use subs and double to do that. For your case:
% x is given here
n = 10;
h = (2.0 * pi) / (n - 1);
syms 'y';
g = sin(6 * y);
for i = 1 : n
x(i) = 0.0 + (i - 1) * h;
f(i) = double(subs(g,y,x(i)));
dfe(i) = double(subs(diff(g),y,x(i))); % first derivative
ddfe(i) = double(subs(diff(g,2),y,x(i))); % second derivative
end
By #Daivd comment, you can vectorize the loop as well:
% x is given here
n = 10;
h = (2.0 * pi) / (n - 1);
syms 'y';
g = sin(6 * y);
x = 0.0 + ((1:n) - 1) * h;
f = double(subs(g,y,x));
dfe = double(subs(diff(g),y,x)); % first derivative
ddfe = double(subs(diff(g,2),y,x)); % second derivative
I have this code for the cost in logistic regression, in matlab:
function [J, grad] = costFunction(theta, X, y)
m = length(y); % number of training examples
thetas = size(theta,1);
features = size(X,2);
steps = 100;
alpha = 0.1;
J = 0;
grad = zeros(size(theta));
sums = [];
result = 0;
for i=1:m
% sums = [sums; (y(i))*log10(sigmoid(X(i,:)*theta))+(1-y(i))*log10(1-sigmoid(X(i,:)*theta))]
sums = [sums; -y(i)*log(sigmoid(theta'*X(i,:)'))-(1-y(i))*log(1-sigmoid(theta'*X(i,:)'))];
%use log simple not log10, mistake
end
result = sum(sums);
J = (1/m)* result;
%gradient one step
tempo = [];
thetas_update = 0;
temp_thetas = [];
grad = temp_thetas;
for i = 1:size(theta)
for j = 1:m
tempo(j) = (sigmoid(theta'*X(j,:)')-y(j))*X(j,i);
end
temp_thetas(i) = sum(tempo);
tempo = [];
end
grad = (1/m).*temp_thetas;
% =============================================================
end
And I need to vectorize it, but I do not know how do it do it and why? I'm a programmer so I like the for's. But to vectorize it, I'm blank. Any help? Thanks.
function [J, grad] = costFunction(theta, X, y)
hx = sigmoid(X * theta);
m = length(X);
J = (-y' * log(hx) - (1 - y')*log(1 - hx)) / m;
grad = X' * (hx - y) / m;
end
Is the code should be
function [J, grad] = costFunction(theta, X, y)
hx = sigmoid(X' * theta);
m = length(X);
J = sum(-y * log(hx) - (1 - y)*log(1 - hx)) / m;
grad = X' * (hx - y) / m;
end
i have a problem with gradient descent in Matlab.
I dont know how to build the function.
Default settings:
max_iter = 1000;
learing = 1;
degree = 1;
My logistic regression cost function: (Correct ???)
function [Jval, Jgrad] = logcost(function(theta, matrix, y)
mb = matrix * theta;
p = sigmoid(mb);
Jval = sum(-y' * log(p) - (1 - y')*log(1 - p)) / length(matrix);
if nargout > 1
Jgrad = matrix' * (p - y) / length(matrix);
end
and now my gradient descent function:
function [theta, Jval] = graddescent(logcost, learing, theta, max_iter)
[Jval, Jgrad] = logcost(theta);
for iter = 1:max_iter
theta = theta - learing * Jgrad; % is this correct?
Jval[iter] = ???
end
thx for all help :), Hans
You can specify the code of your cost function in a regular matlab function:
function [Jval, Jgrad] = logcost(theta, matrix, y)
mb = matrix * theta;
p = sigmoid(mb);
Jval = sum(-y' * log(p) - (1 - y')*log(1 - p)) / length(matrix);
if nargout > 1
Jgrad = matrix' * (p - y) / length(matrix);
end
end
Then, create your gradient descent method (Jgrad is automatically updated in each loop iteration):
function [theta, Jval] = graddescent(logcost, learing, theta, max_iter)
for iter = 1:max_iter
[Jval, Jgrad] = logcost(theta);
theta = theta - learing * Jgrad;
end
end
and call it with a function object that can be used to evaluate your cost:
% Initialize 'matrix' and 'y' ...
matrix = randn(2,2);
y = randn(2,1);
% Create function object.
fLogcost = #(theta)(logcost(theta, matrix, y));
% Perform gradient descent.
[ theta, Jval] = graddescent(fLogcost, 1e-3, [ 0 0 ]', 10);
You can also take a look at fminunc, built in Matlab's method for function optimization which includes an implementation of gradient descent, among other minimization techniques.
Regards.
I have this code for the cost in logistic regression, in matlab:
function [J, grad] = costFunction(theta, X, y)
m = length(y); % number of training examples
thetas = size(theta,1);
features = size(X,2);
steps = 100;
alpha = 0.1;
J = 0;
grad = zeros(size(theta));
sums = [];
result = 0;
for i=1:m
% sums = [sums; (y(i))*log10(sigmoid(X(i,:)*theta))+(1-y(i))*log10(1-sigmoid(X(i,:)*theta))]
sums = [sums; -y(i)*log(sigmoid(theta'*X(i,:)'))-(1-y(i))*log(1-sigmoid(theta'*X(i,:)'))];
%use log simple not log10, mistake
end
result = sum(sums);
J = (1/m)* result;
%gradient one step
tempo = [];
thetas_update = 0;
temp_thetas = [];
grad = temp_thetas;
for i = 1:size(theta)
for j = 1:m
tempo(j) = (sigmoid(theta'*X(j,:)')-y(j))*X(j,i);
end
temp_thetas(i) = sum(tempo);
tempo = [];
end
grad = (1/m).*temp_thetas;
% =============================================================
end
And I need to vectorize it, but I do not know how do it do it and why? I'm a programmer so I like the for's. But to vectorize it, I'm blank. Any help? Thanks.
function [J, grad] = costFunction(theta, X, y)
hx = sigmoid(X * theta);
m = length(X);
J = (-y' * log(hx) - (1 - y')*log(1 - hx)) / m;
grad = X' * (hx - y) / m;
end
Is the code should be
function [J, grad] = costFunction(theta, X, y)
hx = sigmoid(X' * theta);
m = length(X);
J = sum(-y * log(hx) - (1 - y)*log(1 - hx)) / m;
grad = X' * (hx - y) / m;
end
I'm trying my hand at regularized LR, simple with this formulas in matlab:
The cost function:
J(theta) = 1/m*sum((-y_i)*log(h(x_i)-(1-y_i)*log(1-h(x_i))))+(lambda/2*m)*sum(theta_j)
The gradient:
∂J(theta)/∂theta_0 = [(1/m)*(sum((h(x_i)-y_i)*x_j)] if j=0
∂j(theta)/∂theta_n = [(1/m)*(sum((h(x_i)-y_i)*x_j)]+(lambda/m)*(theta_j) if j>1
This is not matlab code is just the formula.
So far I've done this:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
J = 0;
grad = zeros(size(theta));
temp_theta = [];
%cost function
%get the regularization term
for jj = 2:length(theta)
temp_theta(jj) = theta(jj)^2;
end
theta_reg = lambda/(2*m)*sum(temp_theta);
temp_sum =[];
%for the sum in the cost function
for ii =1:m
temp_sum(ii) = -y(ii)*log(sigmoid(theta'*X(ii,:)'))-(1-y(ii))*log(1-sigmoid(theta'*X(ii,:)'));
end
tempo = sum(temp_sum);
J = (1/m)*tempo+theta_reg;
%regulatization
%theta 0
reg_theta0 = 0;
for jj=1:m
reg_theta0(jj) = (sigmoid(theta'*X(m,:)') -y(jj))*X(jj,1)
end
reg_theta0 = (1/m)*sum(reg_theta0)
grad_temp(1) = reg_theta0
%for the rest of thetas
reg_theta = [];
thetas_sum = 0;
for ii=2:size(theta)
for kk =1:m
reg_theta(kk) = (sigmoid(theta'*X(m,:)') - y(kk))*X(kk,ii)
end
thetas_sum(ii) = (1/m)*sum(reg_theta)+(lambda/m)*theta(ii)
reg_theta = []
end
for i=1:size(theta)
if i == 1
grad(i) = grad_temp(i)
else
grad(i) = thetas_sum(i)
end
end
end
And the cost function is giving correct results, but I have no idea why the gradient (one step) is not, the cost gives J = 0.6931 which is correct and the gradient grad = 0.3603 -0.1476 0.0320, which is not, the cost starts from 2 because the parameter theta(1) does not have to be regularized, any help? I guess there is something wrong with the code, but after 4 days I can't see it.Thanks
Vectorized:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
hx = sigmoid(X * theta);
m = length(X);
J = (sum(-y' * log(hx) - (1 - y')*log(1 - hx)) / m) + lambda * sum(theta(2:end).^2) / (2*m);
grad =((hx - y)' * X / m)' + lambda .* theta .* [0; ones(length(theta)-1, 1)] ./ m ;
end
I used more variables, so you could see clearly what comes from the regular formula, and what comes from "the regularization cost added". Additionally, It is a good practice to use "vectorization" instead of loops in Matlab/Octave. By doing this, you guarantee a more optimized solution.
function [J, grad] = costFunctionReg(theta, X, y, lambda)
%Hypotheses
hx = sigmoid(X * theta);
%%The cost without regularization
J_partial = (-y' * log(hx) - (1 - y)' * log(1 - hx)) ./ m;
%%Regularization Cost Added
J_regularization = (lambda/(2*m)) * sum(theta(2:end).^2);
%%Cost when we add regularization
J = J_partial + J_regularization;
%Grad without regularization
grad_partial = (1/m) * (X' * (hx -y));
%%Grad Cost Added
grad_regularization = (lambda/m) .* theta(2:end);
grad_regularization = [0; grad_regularization];
grad = grad_partial + grad_regularization;
Finally got it, after rewriting it again like for the 4th time, this is the correct code:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
J = 0;
grad = zeros(size(theta));
temp_theta = [];
for jj = 2:length(theta)
temp_theta(jj) = theta(jj)^2;
end
theta_reg = lambda/(2*m)*sum(temp_theta);
temp_sum =[];
for ii =1:m
temp_sum(ii) = -y(ii)*log(sigmoid(theta'*X(ii,:)'))-(1-y(ii))*log(1-sigmoid(theta'*X(ii,:)'));
end
tempo = sum(temp_sum);
J = (1/m)*tempo+theta_reg;
%regulatization
%theta 0
reg_theta0 = 0;
for i=1:m
reg_theta0(i) = ((sigmoid(theta'*X(i,:)'))-y(i))*X(i,1)
end
theta_temp(1) = (1/m)*sum(reg_theta0)
grad(1) = theta_temp
sum_thetas = []
thetas_sum = []
for j = 2:size(theta)
for i = 1:m
sum_thetas(i) = ((sigmoid(theta'*X(i,:)'))-y(i))*X(i,j)
end
thetas_sum(j) = (1/m)*sum(sum_thetas)+((lambda/m)*theta(j))
sum_thetas = []
end
for z=2:size(theta)
grad(z) = thetas_sum(z)
end
% =============================================================
end
If its helps anyone, or anyone has any comments on how can I do it better. :)
Here is an answer that eliminates the loops
m = length(y); % number of training examples
predictions = sigmoid(X*theta);
reg_term = (lambda/(2*m)) * sum(theta(2:end).^2);
calcErrors = -y.*log(predictions) - (1 -y).*log(1-predictions);
J = (1/m)*sum(calcErrors)+reg_term;
% prepend a 0 column to our reg_term matrix so we can use simple matrix addition
reg_term = [0 (lambda*theta(2:end)/m)'];
grad = sum(X.*(predictions - y)) / m + reg_term;