Related
I am trying to plot a geodesic on a 3D surface (tractrix) with Matlab. This worked for me in the past when I didn't need to parametrize the surface (see here). However, the tractrix called for parameterization, chain rule differentiation, and collection of u,v,x,y and f(x,y) values.
After many mistakes I think that I'm getting the right values for x = f1(u,v) and y = f2(u,v) describing a spiral right at the base of the surface:
What I can't understand is why the z value or height of the 3D plot of the curve is consistently zero, when I'm applying the same mathematical formula that allowed me to plot the surface in the first place, ie. f = #(x,y) a.* (y - tanh(y)) .
Here is the code, which runs without any errors on Octave. I'm typing a special note in upper case on the crucial calls. Also note that I have restricted the number of geodesic lines to 1 to decrease the execution time.
a = 0.3;
u = 0:0.1:(2 * pi);
v = 0:0.1:5;
[X,Y] = meshgrid(u,v);
% NOTE THAT THESE FORMULAS RESULT IN A SUCCESSFUL PLOT OF THE SURFACE:
x = a.* cos(X) ./ cosh(Y);
y = a.* sin(X) ./ cosh(Y);
z = a.* (Y - tanh(Y));
h = surf(x,y,z);
zlim([0, 1.2]);
set(h,'edgecolor','none')
colormap summer
hold on
% THESE ARE THE GENERIC FUNCTIONS (f) WHICH DON'T SEEM TO WORK AT THE END:
f = #(x,y) a.* (y - tanh(y));
f1 = #(u,v) a.* cos(u) ./ cosh(v);
f2 = #(u,v) a.* sin(u) ./ cosh(v);
dfdu = #(u,v) ((f(f1(u,v)+eps, f2(u,v)) - f(f1(u,v) - eps, f2(u,v)))/(2 * eps) .*
(f1(u+eps,v)-f1(u-eps,v))/(2*eps) +
(f(f1(u,v), f2(u,v)+eps) - f(f1(u,v), f2(u,v)-eps))/(2 * eps) .*
(f2(u+eps,v)-f2(u-eps,v))/(2*eps));
dfdv = #(u,v) ((f(f1(u,v)+eps, f2(u,v)) - f(f1(u,v) - eps, f2(u,v)))/(2 * eps) .*
(f1(u,v+eps)-f1(u,v-eps))/(2*eps) +
(f(f1(u,v), f2(u,v)+eps) - f(f1(u,v), f2(u,v)-eps))/(2 * eps) .*
(f2(u,v+eps)-f2(u,v-eps))/(2*eps));
% Normal vector to the surface:
N = #(u,v) [- dfdu(u,v), - dfdv(u,v), 1]; % Normal vec to surface # any pt.
% Some colors to draw the lines:
C = {'k','r','g','y','m','c'};
for s = 1:1 % No. of lines to be plotted.
% Starting points:
u0 = [0, u(length(u))];
v0 = [0, v(length(v))];
du0 = 0.001;
dv0 = 0.001;
step_size = 0.00005; % Will determine the progression rate from pt to pt.
eta = step_size / sqrt(du0^2 + dv0^2); % Normalization.
eps = 0.0001; % Epsilon
max_num_iter = 100000; % Number of dots in each line.
% Semi-empty vectors to collect results:
U = [[u0(s), u0(s) + eta*du0], zeros(1,max_num_iter - 2)];
V = [[v0(s), v0(s) + eta*dv0], zeros(1,max_num_iter - 2)];
for i = 2:(max_num_iter - 1) % Creating the geodesic:
ut = U(i);
vt = V(i);
xt = f1(ut,vt);
yt = f2(ut,vt);
ft = f(xt,yt);
utm1 = U(i - 1);
vtm1 = V(i - 1);
xtm1 = f1(utm1,vtm1);
ytm1 = f2(utm1,vtm1);
ftm1 = f(xtm1,ytm1);
usymp = ut + (ut - utm1);
vsymp = vt + (vt - vtm1);
xsymp = f1(usymp,vsymp);
ysymp = f2(usymp,vsymp);
fsymp = ft + (ft - ftm1);
df = fsymp - f(xsymp,ysymp); % Is the surface changing? How much?
n = N(ut,vt); % Normal vector at point t
gamma = df * n(3); % Scalar x change f x z value of N
xtp1 = xsymp - gamma * n(1); % Gamma to modulate incre. x & y.
ytp1 = ysymp - gamma * n(2);
U(i + 1) = usymp - gamma * n(1);;
V(i + 1) = vsymp - gamma * n(2);;
end
% THE PROBLEM! f(f1(U,V),f2(U,V)) below YIELDS ALL ZEROS!!! The expected values are between 0 and 1.2.
P = [f1(U,V); f2(U,V); f(f1(U,V),f2(U,V))]; % Compiling results into a matrix.
units = 35; % Determines speed (smaller, faster)
packet = floor(size(P,2)/units);
P = P(:,1: packet * units);
for k = 1:packet:(packet * units)
hold on
plot3(P(1, k:(k+packet-1)), P(2,(k:(k+packet-1))), P(3,(k:(k+packet-1))),
'.', 'MarkerSize', 5,'color',C{s})
drawnow
end
end
The answer is to Cris Luengo's credit, who noticed that the upper-case assigned to the variable Y, used for the calculation of the height of the curve z, was indeed in the parametrization space u,v as intended, and not in the manifold x,y! I don't use Matlab/Octave other than for occasional simulations, and I was trying every other syntactical permutation I could think of without realizing that f fed directly from v (as intended). I changed now the names of the different variables to make it cleaner.
Here is the revised code:
a = 0.3;
u = 0:0.1:(3 * pi);
v = 0:0.1:5;
[U,V] = meshgrid(u,v);
x = a.* cos(U) ./ cosh(V);
y = a.* sin(U) ./ cosh(V);
z = a.* (V - tanh(V));
h = surf(x,y,z);
zlim([0, 1.2]);
set(h,'edgecolor','none')
colormap summer
hold on
f = #(x,y) a.* (y - tanh(y));
f1 = #(u,v) a.* cos(u) ./ cosh(v);
f2 = #(u,v) a.* sin(u) ./ cosh(v);
dfdu = #(u,v) ((f(f1(u,v)+eps, f2(u,v)) - f(f1(u,v) - eps, f2(u,v)))/(2 * eps) .*
(f1(u+eps,v)-f1(u-eps,v))/(2*eps) +
(f(f1(u,v), f2(u,v)+eps) - f(f1(u,v), f2(u,v)-eps))/(2 * eps) .*
(f2(u+eps,v)-f2(u-eps,v))/(2*eps));
dfdv = #(u,v) ((f(f1(u,v)+eps, f2(u,v)) - f(f1(u,v) - eps, f2(u,v)))/(2 * eps) .*
(f1(u,v+eps)-f1(u,v-eps))/(2*eps) +
(f(f1(u,v), f2(u,v)+eps) - f(f1(u,v), f2(u,v)-eps))/(2 * eps) .*
(f2(u,v+eps)-f2(u,v-eps))/(2*eps));
% Normal vector to the surface:
N = #(u,v) [- dfdu(u,v), - dfdv(u,v), 1]; % Normal vec to surface # any pt.
% Some colors to draw the lines:
C = {'y','r','k','m','w',[0.8 0.8 1]}; % Color scheme
for s = 1:6 % No. of lines to be plotted.
% Starting points:
u0 = [0, -pi/2, 2*pi, 4*pi/3, pi/4, pi];
v0 = [0, 0, 0, 0, 0, 0];
du0 = [0, -0.0001, 0.001, - 0.001, 0.001, -0.01];
dv0 = [0.1, 0.01, 0.001, 0.001, 0.0005, 0.01];
step_size = 0.00005; % Will determine the progression rate from pt to pt.
eta = step_size / sqrt(du0(s)^2 + dv0(s)^2); % Normalization.
eps = 0.0001; % Epsilon
max_num_iter = 180000; % Number of dots in each line.
% Semi-empty vectors to collect results:
Uc = [[u0(s), u0(s) + eta*du0(s)], zeros(1,max_num_iter - 2)];
Vc = [[v0(s), v0(s) + eta*dv0(s)], zeros(1,max_num_iter - 2)];
for i = 2:(max_num_iter - 1) % Creating the geodesic:
ut = Uc(i);
vt = Vc(i);
xt = f1(ut,vt);
yt = f2(ut,vt);
ft = f(xt,yt);
utm1 = Uc(i - 1);
vtm1 = Vc(i - 1);
xtm1 = f1(utm1,vtm1);
ytm1 = f2(utm1,vtm1);
ftm1 = f(xtm1,ytm1);
usymp = ut + (ut - utm1);
vsymp = vt + (vt - vtm1);
xsymp = f1(usymp,vsymp);
ysymp = f2(usymp,vsymp);
fsymp = ft + (ft - ftm1);
df = fsymp - f(xsymp,ysymp); % Is the surface changing? How much?
n = N(ut,vt); % Normal vector at point t
gamma = df * n(3); % Scalar x change f x z value of N
xtp1 = xsymp - gamma * n(1); % Gamma to modulate incre. x & y.
ytp1 = ysymp - gamma * n(2);
Uc(i + 1) = usymp - gamma * n(1);;
Vc(i + 1) = vsymp - gamma * n(2);;
end
x = f1(Uc,Vc);
y = f2(Uc,Vc);
P = [x; y; f(Uc,Vc)]; % Compiling results into a matrix.
units = 35; % Determines speed (smaller, faster)
packet = floor(size(P,2)/units);
P = P(:,1: packet * units);
for k = 1:packet:(packet * units)
hold on
plot3(P(1, k:(k+packet-1)), P(2,(k:(k+packet-1))), P(3,(k:(k+packet-1))),
'.', 'MarkerSize', 5,'color',C{s})
drawnow
end
end
function [J, grad] = costFunction(theta, X, y)
data = load('ex2data1.txt');
y = data(:, 3);
theta = [1;1;2];
m = length(y);
one = ones(m,1);
X1 = data(:, [1, 2]);
X = [one X1];
J = 0;
grad = zeros(size(theta));
J= 1/m *((sum(-y*log(sigmoid(X*theta)))) - (sum(1-y * log(1 - sigmoid(X*theta)))));
for i = 1:m
grad = (1/m) * sum (sigmoid(X*theta) - y')*X;
end
end
I want to know if i implemented the cost function and gradient descent correctly i am getting NaN answer though this and does theta(1) always have to be 0 i have it as 1 here. How many iterations i need for grad that should be equal to the length of matrix or something else?
function [J, grad] = costFunction(theta, X, y)
m = length(y);
J = 0;
grad = zeros(size(theta));
sig = 1./(1 + (exp(-(X * theta))));
J = ((-y' * log(sig)) - ((1 - y)' * log(1 - sig)))/m;
grad = ((sig - y)' * X)/m;
end
where
sig = 1./(1 + (exp(-(X * theta))));
is matrix representation of the logistic regression hypothesis which is defined as:
where function g is the sigmoid function. The sigmoid function is defined as:
J = ((-y' * log(sig)) - ((1 - y)' * log(1 - sig)))/m;
is matrix representation of the cost function in logistic regression :
and
grad = ((sig - y)' * X)/m;
is matrix representation of the gradient of the cost which is a vector of the same length as θ where the jth element (for j = 0,1,...,n) is defined as follows:
I am taking the course from Andrew Ng on Machine Learning on Coursera. In this assginment, I am working to calculate the cost function using logistic regression in MatLab, but am receiving "Error using sfminbx (line 27)
Objective function is undefined at initial point. fminunc cannot continue.".
I should add that the cost J within the costFunction function below is NaN because the log(sigmoid(X * theta)) is a -Inf vector. I'm sure this is related to the exception. Can you please help?
My cost function looks like the following:
function [J, grad] = costFunction(theta, X, y)
m = length(y); % number of training examples
J = 0;
grad = zeros(size(theta));
h = sigmoid(theta * X);
J = - (1 / m) * ((log(h)' * y) + (log(1 - h)' * (1 - y)));
grad = (1 / m) * X' * (h - y);
end
My code that calls this function looks like the following:
data = load('ex2data1.txt');
X = data(:, [1, 2]); y = data(:, 3);
[m, n] = size(X);
% Add intercept term to x and X_test
X = [ones(m, 1) X];
% Initialize fitting parameters
initial_theta = zeros(n + 1, 1);
% Compute and display initial cost and gradient
[cost, grad] = costFunction(initial_theta, X, y);
fprintf('Cost at initial theta (zeros): %f\n', cost);
fprintf('Expected cost (approx): 0.693\n');
fprintf('Gradient at initial theta (zeros): \n');
fprintf(' %f \n', grad);
fprintf('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n');
% Compute and display cost and gradient with non-zero theta
test_theta = [-24; 0.2; 0.2];
[cost, grad] = costFunction(test_theta, X, y);
fprintf('\nCost at test theta: %f\n', cost);
fprintf('Expected cost (approx): 0.218\n');
fprintf('Gradient at test theta: \n');
fprintf(' %f \n', grad);
fprintf('Expected gradients (approx):\n 0.043\n 2.566\n 2.647\n');
fprintf('\nProgram paused. Press enter to continue.\n');
pause;
%% ============= Part 3: Optimizing using fminunc =============
% In this exercise, you will use a built-in function (fminunc) to find the
% optimal parameters theta.
% Set options for fminunc
options = optimset('GradObj', 'on', 'MaxIter', 400, 'Algorithm', 'trust-
region');
% Run fminunc to obtain the optimal theta
% This function will return theta and the cost
[theta, cost] = ...
fminunc(#(t)(costFunction(t, X, y)), initial_theta, options);
end
The dataset looks like the following:
34.62365962451697,78.0246928153624,0
30.28671076822607,43.89499752400101,0
35.84740876993872,72.90219802708364,0
60.18259938620976,86.30855209546826,1
79.0327360507101,75.3443764369103,1
45.08327747668339,56.3163717815305,0
61.10666453684766,96.51142588489624,1
75.02474556738889,46.55401354116538,1
76.09878670226257,87.42056971926803,1
84.43281996120035,43.53339331072109,1
95.86155507093572,38.22527805795094,0
75.01365838958247,30.60326323428011,0
82.30705337399482,76.48196330235604,1
69.36458875970939,97.71869196188608,1
39.53833914367223,76.03681085115882,0
53.9710521485623,89.20735013750205,1
69.07014406283025,52.74046973016765,1
67.94685547711617,46.67857410673128,0
70.66150955499435,92.92713789364831,1
76.97878372747498,47.57596364975532,1
67.37202754570876,42.83843832029179,0
89.67677575072079,65.79936592745237,1
50.534788289883,48.85581152764205,0
34.21206097786789,44.20952859866288,0
77.9240914545704,68.9723599933059,1
62.27101367004632,69.95445795447587,1
80.1901807509566,44.82162893218353,1
93.114388797442,38.80067033713209,0
61.83020602312595,50.25610789244621,0
38.78580379679423,64.99568095539578,0
61.379289447425,72.80788731317097,1
85.40451939411645,57.05198397627122,1
52.10797973193984,63.12762376881715,0
52.04540476831827,69.43286012045222,1
40.23689373545111,71.16774802184875,0
54.63510555424817,52.21388588061123,0
33.91550010906887,98.86943574220611,0
64.17698887494485,80.90806058670817,1
74.78925295941542,41.57341522824434,0
34.1836400264419,75.2377203360134,0
83.90239366249155,56.30804621605327,1
51.54772026906181,46.85629026349976,0
94.44336776917852,65.56892160559052,1
82.36875375713919,40.61825515970618,0
51.04775177128865,45.82270145776001,0
62.22267576120188,52.06099194836679,0
77.19303492601364,70.45820000180959,1
97.77159928000232,86.7278223300282,1
62.07306379667647,96.76882412413983,1
91.56497449807442,88.69629254546599,1
79.94481794066932,74.16311935043758,1
99.2725269292572,60.99903099844988,1
90.54671411399852,43.39060180650027,1
34.52451385320009,60.39634245837173,0
50.2864961189907,49.80453881323059,0
49.58667721632031,59.80895099453265,0
97.64563396007767,68.86157272420604,1
32.57720016809309,95.59854761387875,0
74.24869136721598,69.82457122657193,1
71.79646205863379,78.45356224515052,1
75.3956114656803,85.75993667331619,1
35.28611281526193,47.02051394723416,0
56.25381749711624,39.26147251058019,0
30.05882244669796,49.59297386723685,0
44.66826172480893,66.45008614558913,0
66.56089447242954,41.09209807936973,0
40.45755098375164,97.53518548909936,1
49.07256321908844,51.88321182073966,0
80.27957401466998,92.11606081344084,1
66.74671856944039,60.99139402740988,1
32.72283304060323,43.30717306430063,0
64.0393204150601,78.03168802018232,1
72.34649422579923,96.22759296761404,1
60.45788573918959,73.09499809758037,1
58.84095621726802,75.85844831279042,1
99.82785779692128,72.36925193383885,1
47.26426910848174,88.47586499559782,1
50.45815980285988,75.80985952982456,1
60.45555629271532,42.50840943572217,0
82.22666157785568,42.71987853716458,0
88.9138964166533,69.80378889835472,1
94.83450672430196,45.69430680250754,1
67.31925746917527,66.58935317747915,1
57.23870631569862,59.51428198012956,1
80.36675600171273,90.96014789746954,1
68.46852178591112,85.59430710452014,1
42.0754545384731,78.84478600148043,0
75.47770200533905,90.42453899753964,1
78.63542434898018,96.64742716885644,1
52.34800398794107,60.76950525602592,0
94.09433112516793,77.15910509073893,1
90.44855097096364,87.50879176484702,1
55.48216114069585,35.57070347228866,0
74.49269241843041,84.84513684930135,1
89.84580670720979,45.35828361091658,1
83.48916274498238,48.38028579728175,1
42.2617008099817,87.10385094025457,1
99.31500880510394,68.77540947206617,1
55.34001756003703,64.9319380069486,1
74.77589300092767,89.52981289513276,1
The only problem I see is that you should have written h = sigmoid(X * theta) instead of h = sigmoid(theta * X). I am getting the same answer from your code after changing this as I was getting from my code for the same assignment.
The multivariate linear regression cost function:
Is the following code in Matlab correct?
function J = computeCostMulti(X, y, theta)
m = length(y);
J = 0;
J=(1/(2*m)*(X*theta-y)'*(X*theta-y);
end
There is two ways i tried which is essentially the same code.
J = (X * theta - y)'*(X * theta - y)/2*m;
or you can try:
J = (1/(2*m))*(X * theta - y)'*(X * theta - y)
Your are missing a ) in the end:
J=(1/(2*m))*(X*theta-y)'*(X*theta-y);
^
I'm trying my hand at regularized LR, simple with this formulas in matlab:
The cost function:
J(theta) = 1/m*sum((-y_i)*log(h(x_i)-(1-y_i)*log(1-h(x_i))))+(lambda/2*m)*sum(theta_j)
The gradient:
∂J(theta)/∂theta_0 = [(1/m)*(sum((h(x_i)-y_i)*x_j)] if j=0
∂j(theta)/∂theta_n = [(1/m)*(sum((h(x_i)-y_i)*x_j)]+(lambda/m)*(theta_j) if j>1
This is not matlab code is just the formula.
So far I've done this:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
J = 0;
grad = zeros(size(theta));
temp_theta = [];
%cost function
%get the regularization term
for jj = 2:length(theta)
temp_theta(jj) = theta(jj)^2;
end
theta_reg = lambda/(2*m)*sum(temp_theta);
temp_sum =[];
%for the sum in the cost function
for ii =1:m
temp_sum(ii) = -y(ii)*log(sigmoid(theta'*X(ii,:)'))-(1-y(ii))*log(1-sigmoid(theta'*X(ii,:)'));
end
tempo = sum(temp_sum);
J = (1/m)*tempo+theta_reg;
%regulatization
%theta 0
reg_theta0 = 0;
for jj=1:m
reg_theta0(jj) = (sigmoid(theta'*X(m,:)') -y(jj))*X(jj,1)
end
reg_theta0 = (1/m)*sum(reg_theta0)
grad_temp(1) = reg_theta0
%for the rest of thetas
reg_theta = [];
thetas_sum = 0;
for ii=2:size(theta)
for kk =1:m
reg_theta(kk) = (sigmoid(theta'*X(m,:)') - y(kk))*X(kk,ii)
end
thetas_sum(ii) = (1/m)*sum(reg_theta)+(lambda/m)*theta(ii)
reg_theta = []
end
for i=1:size(theta)
if i == 1
grad(i) = grad_temp(i)
else
grad(i) = thetas_sum(i)
end
end
end
And the cost function is giving correct results, but I have no idea why the gradient (one step) is not, the cost gives J = 0.6931 which is correct and the gradient grad = 0.3603 -0.1476 0.0320, which is not, the cost starts from 2 because the parameter theta(1) does not have to be regularized, any help? I guess there is something wrong with the code, but after 4 days I can't see it.Thanks
Vectorized:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
hx = sigmoid(X * theta);
m = length(X);
J = (sum(-y' * log(hx) - (1 - y')*log(1 - hx)) / m) + lambda * sum(theta(2:end).^2) / (2*m);
grad =((hx - y)' * X / m)' + lambda .* theta .* [0; ones(length(theta)-1, 1)] ./ m ;
end
I used more variables, so you could see clearly what comes from the regular formula, and what comes from "the regularization cost added". Additionally, It is a good practice to use "vectorization" instead of loops in Matlab/Octave. By doing this, you guarantee a more optimized solution.
function [J, grad] = costFunctionReg(theta, X, y, lambda)
%Hypotheses
hx = sigmoid(X * theta);
%%The cost without regularization
J_partial = (-y' * log(hx) - (1 - y)' * log(1 - hx)) ./ m;
%%Regularization Cost Added
J_regularization = (lambda/(2*m)) * sum(theta(2:end).^2);
%%Cost when we add regularization
J = J_partial + J_regularization;
%Grad without regularization
grad_partial = (1/m) * (X' * (hx -y));
%%Grad Cost Added
grad_regularization = (lambda/m) .* theta(2:end);
grad_regularization = [0; grad_regularization];
grad = grad_partial + grad_regularization;
Finally got it, after rewriting it again like for the 4th time, this is the correct code:
function [J, grad] = costFunctionReg(theta, X, y, lambda)
J = 0;
grad = zeros(size(theta));
temp_theta = [];
for jj = 2:length(theta)
temp_theta(jj) = theta(jj)^2;
end
theta_reg = lambda/(2*m)*sum(temp_theta);
temp_sum =[];
for ii =1:m
temp_sum(ii) = -y(ii)*log(sigmoid(theta'*X(ii,:)'))-(1-y(ii))*log(1-sigmoid(theta'*X(ii,:)'));
end
tempo = sum(temp_sum);
J = (1/m)*tempo+theta_reg;
%regulatization
%theta 0
reg_theta0 = 0;
for i=1:m
reg_theta0(i) = ((sigmoid(theta'*X(i,:)'))-y(i))*X(i,1)
end
theta_temp(1) = (1/m)*sum(reg_theta0)
grad(1) = theta_temp
sum_thetas = []
thetas_sum = []
for j = 2:size(theta)
for i = 1:m
sum_thetas(i) = ((sigmoid(theta'*X(i,:)'))-y(i))*X(i,j)
end
thetas_sum(j) = (1/m)*sum(sum_thetas)+((lambda/m)*theta(j))
sum_thetas = []
end
for z=2:size(theta)
grad(z) = thetas_sum(z)
end
% =============================================================
end
If its helps anyone, or anyone has any comments on how can I do it better. :)
Here is an answer that eliminates the loops
m = length(y); % number of training examples
predictions = sigmoid(X*theta);
reg_term = (lambda/(2*m)) * sum(theta(2:end).^2);
calcErrors = -y.*log(predictions) - (1 -y).*log(1-predictions);
J = (1/m)*sum(calcErrors)+reg_term;
% prepend a 0 column to our reg_term matrix so we can use simple matrix addition
reg_term = [0 (lambda*theta(2:end)/m)'];
grad = sum(X.*(predictions - y)) / m + reg_term;