I am a total beginner in Matlab and trying to write some Machine Learning Algorithms in Matlab. I would really appreciate it if someone can help me in debugging this code.
function y = KNNpredict(trX,trY,K,X)
% trX is NxD, trY is Nx1, K is 1x1 and X is 1xD
% we return a single value 'y' which is the predicted class
% TODO: write this function
% int[] distance = new int[N];
distances = zeroes(N, 1);
examples = zeroes(K, D+2);
i = 0;
% for(every row in trX) { // taking ONE example
for row=1:N,
examples(row,:) = trX(row,:);
%sum = 0.0;
%for(every col in this example) { // taking every feature of this example
for col=1:D,
% diff = compute squared difference between these points - (trX[row][col]-X[col])^2
diff =(trX(row,col)-X(col))^2;
sum += diff;
end % for
distances(row) = sqrt(sum);
examples(i:D+1) = distances(row);
examples(i:D+2) = trY(row:1);
end % for
% sort the examples based on their distances thus calculated
sortrows(examples, D+1);
% for(int i = 0; i < K; K++) {
% These are the nearest neighbors
pos = 0;
neg = 0;
res = 0;
for row=1:K,
if(examples(row,D+2 == -1))
neg = neg + 1;
else
pos = pos + 1;
%disp(distances(row));
end
end % for
if(pos > neg)
y = 1;
return;
else
y = -1;
return;
end
end
end
Thanks so much
When working with matrices in MATLAB, it is usually better to avoid excessive loops and instead use vectorized operations whenever possible. This will usually produce faster and shorter code.
In your case, the k-nearest neighbors algorithm is simple enough and can be well vectorized. Consider the following implementation:
function y = KNNpredict(trX, trY, K, x)
%# euclidean distance between instance x and every training instance
dist = sqrt( sum( bsxfun(#minus, trX, x).^2 , 2) );
%# sorting indices from smaller to larger distances
[~,ord] = sort(dist, 'ascend');
%# get the labels of the K nearest neighbors
kTrY = trY( ord(1:min(K,end)) );
%# majority class vote
y = mode(kTrY);
end
Here is an example to test it using the Fisher-Iris dataset:
%# load dataset (data + labels)
load fisheriris
X = meas;
Y = grp2idx(species);
%# partition the data into training/testing
c = cvpartition(Y, 'holdout',1/3);
trX = X(c.training,:);
trY = Y(c.training);
tsX = X(c.test,:);
tsY = Y(c.test);
%# prediction
K = 10;
pred = zeros(c.TestSize,1);
for i=1:c.TestSize
pred(i) = KNNpredict(trX, trY, K, tsX(i,:));
end
%# validation
C = confusionmat(tsY, pred)
The confusion matrix of the kNN prediction with K=10:
C =
17 0 0
0 16 0
0 1 16
Related
The problem is there are c no. of firms bidding on p no. of projects. The winning bidders should collectively have the lowest cost on the client. Each firm can win a maximum of 2 projects.
I have written this code. It works, but takes forever to produce the result, and is very inefficient.
==========================================================================
function FINANCIAL_RESULTS
clear all; clc;
%This Matlab Program aims to select a large number of random combinations,
%filter those with more than two allocations per firm, and select the
%lowest price.
%number of companies
c = 7;
%number of projects
p = 9;
%max number of projects per company
lim = 2;
%upper and lower random limits
a = 1;
b = c;
%Results Matrix: each row represents the bidding price of one firm on all projects
Results = [382200,444050,725200,279250,750800,190200,528150,297700,297700;339040,393420,649520,243960,695760,157960,454550,259700,256980;388032,499002,721216,9999999,773184,204114,512148,293608,300934;385220,453130,737860,287480,9999999,188960,506690,274260,285670;351600,9999999,9999999,276150,722400,9999999,484150,266000,281400;404776,476444,722540,311634,778424,210776,521520,413130,442160;333400,403810,614720,232200,656140,165660,9999999,274180,274180];
Output = zeros(1,p+1);
n=1;
i=1;
for i = 1:10000000
rndm = round(a + (b-a).*rand(1,p));
%random checker with the criteria (max 2 allocations)
Check = tabulate(rndm);
if max(Check(:,2)) > lim
continue
end
Output(n,1:end-1) = rndm;
%Cumulative addition of random results
for k = 1:p
Output(n,end) = Output(n,end) + Results(rndm(k),k);
end
n = n+1;
end
disp(Results);
[Min_pay,Indx] = min(Output(:,end));
disp(Output(Indx,:));
%You know the program is done when Handel plays
load handel
sound(y,Fs);
%Done !
end
Since the first dimension is much greater than the second dimension it would be more efficient to perform loop along the second dimension:
i = 10000000;
rndm = round(a + (b-a) .* rand(i, p));
Check = zeros(size(rndm, 1), 1);
for k = 1:p
Check = max(Check, sum(rndm == k, 2));
end
rndm = rndm(Check <= lim, :);
OutputEnd = zeros(size(rndm, 1), 1);
for k = 1:p
OutputEnd = OutputEnd + Results(rndm(:, k), k);
end
Output = [rndm OutputEnd];
Note that if the compute has a limited memory put the above code inside a loop and concatenate the results of iterations to produce the final result:
n = 10;
Outputc = cell(n, 1);
for j = 1:n
i = 1000000;
....
Outputc{j} = Output;
end
Output = cat(1, Outputc{:});
I am creating a Forward Propagation In the feedforward step, an input pattern is propagated through the network to obtain an output. I have written this in pseudo code and currently attempting to implement this within MATLAB.
There are two errors I currently receive.
Patterns = x'; Desired = y; NHIDDENS = 1; prnout=Desired;
% Patterns become x so number of inputs becomes size of patterns
[NINPUTS,NPATS] = size(Patterns); [NOUTPUTS,NP] = size(Desired);
%apply the backprop here...
LearnRate = 0.15; Momentum = 0; DerivIncr = 0; deltaW1 = 0; deltaW2 = 0;
% Keeps the tan ordering of the examples of x
Inputs1= [Patterns;ones(1,NPATS)]; %Inputs1 = [ones(1,NPATS); Patterns];
% Weight initialisation
Weights1 = 0.5*(rand(NHIDDENS,1+NINPUTS)-0.5);
Weights2 = 0.5*(rand(1,1+NHIDDENS)-0.5);
TSS_Limit = 0.02;
for epoch = 1:10
% FORWARD LOOP
size(NOUTPUTS)
size(NPATS)
for ii = 0: ii < length(NINPUTS)
NOUTPUTS(ii+1) = NPATS(ii);
% Sets bias to 1
NOUTPUTS(1) = 1;
end
for ii = NHIDDENS: ii < NINPUTS
sum = 0;
for ij = 0: ij < ii
sum = sum + deltaW1(ii,ij) * NOUTPUTS(ij);
NOUTPUTS(ii) = tanh(sum);
end
end
Unable to perform assignment because the
left and right sides have a different
number of elements.
Error in mlpts (line 66)
NOUTPUTS(i+1) = NPATS(i);
i am still new to MATLAB and trying to become use to it.
After iterating through the loop
NOUTPUTS = 0 and the error is displayed. I am confused as I am trying to increment NOUTPUTS with ii by 1 through each loop.
I have been able to create the forward propagation with a loop.
for i =3:NNODES
summ = 0;
for j=1:i-1
summ = summ + weights(i,j) * Node_outputs(j);
end
if i == NNODES
Node_outputs(i) = summ
else
Node_outputs(i) = sigmoid(summ);
end
end
Out = Node_outputs(NNODES);
% BOut = ((Node_outputs(NNODES)) * (1 - Node_outputs));
BOut=zeros(1,6);
DeltaWeight = zeros(6,6);
I need to run the K-Means algorithm on the key points of the Sift algorithm in MATLAB .I want to cluster the key points in the image but I do not know how to do it.
First, put the key points into X with x coordinates in the first column and y coordinates in the second column like this
X=[reshape(keypxcoord,numel(keypxcoord),1),reshape(keypycoord,numel(keypycoord),1))]
Then if you have the statistical toolbox, you can just use the built in 'kmeans' function lik this
output = kmeans(X,num_clusters)
Otherwise, write your own kmeans function:
function [ min_group, mu ] = mykmeans( X,K )
%MYKMEANS
% X = N obervations of D element vectors
% K = number of centroids
assert(K > 0);
D = size(X,1); %No. of r.v.
N = size(X,2); %No. of observations
group_size = zeros(1,K);
min_group = zeros(1,N);
step = 0;
%% init centroids
mu = kpp(X,K);
%% 2-phase iterative approach (local then global)
while step < 400
%% phase 1, batch update
old_group = min_group;
% computing distances
d2 = distances2(X,mu);
% reassignment all points to closest centroids
[~, min_group] = min(d2,[],1);
% recomputing centroids (K number of means)
for k = 1 : K
group_size(k) = sum(min_group==k);
% check empty group
%if group_size(k) == 0
assert(group_size(k)>0);
%else
mu(:,k) = sum(X(:,min_group==k),2)/group_size(k);
%end
end
changed = sum(min_group ~= old_group);
p1_converged = changed <= N*0.001;
%% phase 2, individual update
changed = 0;
for n = 1 : N
d2 = distances2(X(:,n),mu);
[~, new_group] = min(d2,[],1);
% recomputing centroids of affected groups
k = min_group(n);
if (new_group ~= k)
mu(:,k)=(mu(:,k)*group_size(k)-X(:,n));
group_size(k) = group_size(k) - 1;
mu(:,k)=mu(:,k)/group_size(k);
mu(:,new_group) = mu(:,new_group)*group_size(new_group)+ X(:,n);
group_size(new_group) = group_size(new_group) + 1;
mu(:,new_group)=mu(:,new_group)/group_size(new_group);
min_group(n) = new_group;
changed = changed + 1;
end
end
%% check convergence
if p1_converged && changed <= N*0.001
break;
else
step = step + 1;
end
end
end
function d2 = distances2(X, mu)
K = size(mu,2);
N = size(X,2);
d2 = zeros(K,N);
for j = 1 : K
d2(j,:) = sum((X - repmat(mu(:,j),1,N)).^2,1);
end
end
function mu = kpp( X,K )
% kmeans++ init
D = size(X,1); %No. of r.v.
N = size(X,2); %No. of observations
mu = zeros(D, K);
mu(:,1) = X(:,round(rand(1) * (size(X, 2)-1)+1));
for k = 2 : K
% computing distances between centroids and observations
d2 = distances2(X, mu(1:k-1));
% assignment
[min_dist, ~] = min(d2,[],1);
% select new centroids by selecting point with the cumulative dist
% value (distance) larger than random value (falls in range between
% dist(n-1) : dist(n), dist(0)= 0)
rv = sum(min_dist) * rand(1);
for n = 1 : N
if min_dist(n) >= rv
mu(:,k) = X(:,n);
break;
else
rv = rv - min_dist(n);
end
end
end
end
I am programming a simple perceptron in matlab but it is not converging and I can't figure out why.
The goal is to binary classify 2D points.
%P1 Generate a dataset of two-dimensional points, and choose a random line in
%the plane as your target function f, where one side of the line maps to +1 and
%the other side to -1. Let the inputs xn 2 R2 be random points in the plane,
%and evaluate the target function f on each xn to get the corresponding output
%yn = f(xn).
clear all;
clc
clear
n = 20;
inputSize = 2; %number of inputs
dataset = generateRandom2DPointsDataset(n)';
[f , m , b] = targetFunction();
signs = classify(dataset,m,b);
weights=ones(1,2)*0.1;
threshold = 0;
fprintf('weights before:%d,%d\n',weights);
mistakes = 1;
numIterations = 0;
figure;
plotpv(dataset',(signs+1)/2);%mapping signs from -1:1 to 0:1 in order to use plotpv
hold on;
line(f(1,:),f(2,:));
pause(1)
while true
mistakes = 0;
for i = 1:n
if dataset(i,:)*weights' > threshold
result = 1;
else
result = -1;
end
error = signs(i) - result;
if error ~= 0
mistakes = mistakes + 1;
for j = 1:inputSize
weights(j) = weights(j) + error*dataset(i,j);
end
end
numIterations = numIterations + 1
end
if mistakes == 0
break
end
end
fprintf('weights after:%d,%d\n',weights);
random points and signs are fine since plotpv is working well
The code is based on that http://es.mathworks.com/matlabcentral/fileexchange/32949-a-perceptron-learns-to-perform-a-binary-nand-function?focused=5200056&tab=function.
When I pause the infinite loop, this is the status of my vairables:
I am not able to see why it is not converging.
Additional code( it is fine, just to avoid answers asking for that )
function [f,m,b] = targetFunction()
f = rand(2,2);
f(1,1) = 0;
f(1,2) = 1;
m = (f(2,2) - f(2,1));
b = f(2,1);
end
function dataset = generateRandom2DPointsDataset(n)
dataset = rand(2,n);
end
function values = classify(dataset,m,b)
for i=1:size(dataset,1)
y = m*dataset(i,1) + b;
if dataset(i,2) >= y, values(i) = 1;
else values(i) = -1;
end
end
end
For my studies I had to write a PDE solver for the Poisson equation on a disc shaped domain using the finite difference method.
I already passed the Lab exercise. There is one issue in my code I couldn't fix. Function fun1 with the boundary value problem gun2 is somehow oscillating at the boundary. When I use fun2 everything seems fine...
Both functions use at the boundary gun2. What is the problem?
function z = fun1(x,y)
r = sqrt(x.^2+y.^2);
z = zeros(size(x));
if( r < 0.25)
z = -10^8*exp(1./(r.^2-1/16));
end
end
function z = fun2(x,y)
z = 100*sin(2*pi*x).*sin(2*pi*y);
end
function z = gun2(x,y)
z = x.^2+y.^2;
end
function [u,A] = poisson2(funame,guname,M)
if nargin < 3
M = 50;
end
%Mesh Grid Generation
h = 2/(M + 1);
x = -1:h:1;
y = -1:h:1;
[X,Y] = meshgrid(x,y);
CI = ((X.^2 +Y.^2) < 1);
%Boundary Elements
Sum= zeros(size(CI));
%Sum over the neighbours
for i = -1:1
Sum = Sum + circshift(CI,[i,0]) + circshift(CI,[0,i]) ;
end
%if sum of neighbours larger 3 -> inner note!
CI = (Sum > 3);
%else boundary
CB = (Sum < 3 & Sum ~= 0);
Sum= zeros(size(CI));
%Sum over the boundary neighbour nodes....
for i = -1:1
Sum = Sum + circshift(CB,[i,0]) + circshift(CB,[0,i]);
end
%If the sum is equal 2 -> Diagonal boundary
CB = CB + (Sum == 2 & CB == 0 & CI == 0);
%Converting X Y to polar coordinates
Phi = atan(Y./X);
%Converting Phi R back to cartesian coordinates, only at the boundarys
for j = 1:M+2
for i = 1:M+2
if (CB(i,j)~=0)
if j > (M+2)/2
sig = 1;
else
sig = -1;
end
X(i,j) = sig*1*cos(Phi(i,j));
Y(i,j) = sig*1*sin(Phi(i,j));
end
end
end
%Numberize the internal notes u1,u2,......,un
CI = CI.*reshape(cumsum(CI(:)),size(CI));
%Number of internal notes
Ni = nnz(CI);
f = zeros(Ni,1);
k = 1;
A = spalloc(Ni,Ni,5*Ni);
%Create matix A!
for j=2:M+1
for i =2:M+1
if(CI(i,j) ~= 0)
hN = h;hS = h; hW = h; hE = h;
f(k) = fun(X(i,j),Y(i,j));
if(CB(i+1,j) ~= 0)
hN = abs(1-sqrt(X(i,j)^2+Y(i,j)^2));
f(k) = f(k) + gun(X(i,j),Y(i+1,j))*2/(hN^2+hN*h);
A(k,CI(i-1,j)) = -2/(h^2+h*hN);
else
if(CB(i-1,j) ~= 0) %in negative y is a boundry
hS = abs(1-sqrt(X(i,j)^2+Y(i,j)^2));
f(k) = f(k) + gun(X(i,j),Y(i-1,j))*2/(hS^2+h*hS);
A(k,CI(i+1,j)) = -2/(h^2+h*hS);
else
A(k,CI(i-1,j)) = -1/h^2;
A(k,CI(i+1,j)) = -1/h^2;
end
end
if(CB(i,j+1) ~= 0)
hE = abs(1-sqrt(X(i,j)^2+Y(i,j)^2));
f(k) = f(k) + gun(X(i,j+1),Y(i,j))*2/(hE^2+hE*h);
A(k,CI(i,j-1)) = -2/(h^2+h*hE);
else
if(CB(i,j-1) ~= 0)
hW = abs(1-sqrt(X(i,j)^2+Y(i,j)^2));
f(k) = f(k) + gun(X(i,j-1),Y(i,j))*2/(hW^2+h*hW);
A(k,CI(i,j+1)) = -2/(h^2+h*hW);
else
A(k,CI(i,j-1)) = -1/h^2;
A(k,CI(i,j+1)) = -1/h^2;
end
end
A(k,k) = (2/(hE*hW)+2/(hN*hS));
k = k + 1;
end
end
end
%Solve linear system
u = A\f;
U = zeros(M+2,M+2);
p = 1;
%re-arange u
for j = 1:M+2
for i = 1:M+2
if ( CI(i,j) ~= 0)
U(i,j) = u(p);
p = p+1;
else
if ( CB(i,j) ~= 0)
U(i,j) = gun(X(i,j),Y(i,j));
else
U(i,j) = NaN;
end
end
end
end
surf(X,Y,U);
end
I'm keeping this answer short for now, but may extend when the question contains more info.
My first guess is that what you are seeing is just numerical errors. Looking at the scales of the two graphs, the peaks in the first graph are relatively small compared to the signal in the second graph. Maybe there is a similar issue in the second that is just not visible because the signal is much bigger. You could try to increase the number of nodes and observe what happens with the result.
You should always expect to see numerical errors in such simulations. It's only a matter of trying to get their magnitude as small as possible (or as small as needed).