I executed this code using Feature Matrix 517*11 and Label Matrix 517*1. But once the dimensions of matrices change the code cant be run. How can I fix this?
The error is:
Subscripted assignment dimension mismatch.
in this line :
edges(k,j) = quantlevels(a);
Here is my code:
function [features,weights] = MI(features,labels,Q)
if nargin <3
Q = 12;
end
edges = zeros(size(features,2),Q+1);
for k = 1:size(features,2)
minval = min(features(:,k));
maxval = max(features(:,k));
if minval==maxval
continue;
end
quantlevels = minval:(maxval-minval)/500:maxval;
N = histc(features(:,k),quantlevels);
totsamples = size(features,1);
N_cum = cumsum(N);
edges(k,1) = -Inf;
stepsize = totsamples/Q;
for j = 1:Q-1
a = find(N_cum > j.*stepsize,1);
edges(k,j) = quantlevels(a);
end
edges(k,j+2) = Inf;
end
S = zeros(size(features));
for k = 1:size(S,2)
S(:,k) = quantize(features(:,k),edges(k,:))+1;
end
I = zeros(size(features,2),1);
for k = 1:size(features,2)
I(k) = computeMI(S(:,k),labels,0);
end
[weights,features] = sort(I,'descend');
%% EOF
function [I,M,SP] = computeMI(seq1,seq2,lag)
if nargin <3
lag = 0;
end
if(length(seq1) ~= length(seq2))
error('Input sequences are of different length');
end
lambda1 = max(seq1);
symbol_count1 = zeros(lambda1,1);
for k = 1:lambda1
symbol_count1(k) = sum(seq1 == k);
end
symbol_prob1 = symbol_count1./sum(symbol_count1)+0.000001;
lambda2 = max(seq2);
symbol_count2 = zeros(lambda2,1);
for k = 1:lambda2
symbol_count2(k) = sum(seq2 == k);
end
symbol_prob2 = symbol_count2./sum(symbol_count2)+0.000001;
M = zeros(lambda1,lambda2);
if(lag > 0)
for k = 1:length(seq1)-lag
loc1 = seq1(k);
loc2 = seq2(k+lag);
M(loc1,loc2) = M(loc1,loc2)+1;
end
else
for k = abs(lag)+1:length(seq1)
loc1 = seq1(k);
loc2 = seq2(k+lag);
M(loc1,loc2) = M(loc1,loc2)+1;
end
end
SP = symbol_prob1*symbol_prob2';
M = M./sum(M(:))+0.000001;
I = sum(sum(M.*log2(M./SP)));
function y = quantize(x, q)
x = x(:);
nx = length(x);
nq = length(q);
y = sum(repmat(x,1,nq)>repmat(q,nx,1),2);
I've run the function several times without getting any error.
I've used as input for "seq1" and "seq2" arrays such as 1:10 and 11:20
Possible error might rise in the loops
for k = 1:lambda1
symbol_count1(k) = sum(seq1 == k);
end
if "seq1" and "seq2" are defined as matrices since sum will return an array while
symbol_count1(k)
is expected to be single value.
Another possible error might rise if seq1 and seq2 are not of type integer since they are used as indexes in
M(loc1,loc2) = M(loc1,loc2)+1;
Hope this helps.
Related
Trying to find the optimal hyperparameters for my svm model using a grid search, but it simply returns 1 for the hyperparameters.
function evaluations = inner_kfold_trainer(C,q,k,features_xy,labels)
features_xy_flds = kdivide(features_xy, k);
labels_flds = kdivide(labels, k);
evaluations = zeros(k,3);
for i = 1:k
fprintf('Fold %i of %i\n',i,k);
train_data = cell2mat(features_xy_flds(1:end ~= i));
train_labels = cell2mat(labels_flds(1:end ~= i));
test_data = cell2mat(features_xy_flds(i));
test_labels = cell2mat(labels_flds(i));
%AU1
train_labels = train_labels(:,1);
test_labels = test_labels(:,1);
[k,~] = size(test_labels);
%train
sv = fitcsvm(train_data,train_labels, 'KernelFunction','polynomial', 'PolynomialOrder',q,'BoxConstraint',C);
sv.predict(test_data);
%Calculate evaluative measures
%svm_outputs = zeros(k,1);
sv_predictions = sv.predict(test_data);
[precision,recall,F1] = evaluation(sv_predictions,test_labels);
evaluations(i,1) = precision;
evaluations(i,2) = recall;
evaluations(i,3) = F1;
end
save('eval.mat', 'evaluations');
end
an inner-fold cross validation function
and below the grid function where something seems to be going wrong
function [q,C] = grid_search(features_xy,labels,k)
% n x n grid
n = 3;
q_grid = linspace(1,19,n);
C_grid = linspace(1,59,n);
tic
evals = zeros(n,n,3);
for i = 1:n
for j = 1:n
fprintf('## i=%i, j=%i ##\n', i, j);
svm_results = inner_kfold_trainer(C_grid(i), q_grid(j),k,features_xy,labels);
evals(i,j,:) = mean(svm_results(:,:));
% precision only
%evals(i,j,:) = max(svm_results(:,1));
toc
end
end
f = evals;
% retrieving the best value of the hyper parameters, to use in the outer
% fold
[M1,I1] = max(f);
[~,I2] = max(M1(1,1,:));
index = I1(:,:,I2);
C = C_grid(index(1))
q = q_grid(index(2))
end
When I run grid_search(features_xy,labels,8) for example, I get C=1 and q=1, for any k(the no. of folds) value. Also features_xy is a 500*98 matrix.
This is my Approximate entropy Calculator in MATLAB. https://en.wikipedia.org/wiki/Approximate_entropy
I'm not sure why it isn't working. It's returning a negative value.Can anyone help me with this? R1 being the data.
FindSize = size(R1);
N = FindSize(1);
% N = input ('insert number of data values');
%if you want to put your own N in, take away the % from the line above
and
%insert the % before the N = FindSize(1)
%m = input ('insert m: integer representing length of data, embedding
dimension ');
m = 2;
%r = input ('insert r: positive real number for filtering, threshold
');
r = 0.2*std(R1);
for x1= R1(1:N-m+1,1)
D1 = pdist2(x1,x1);
C11 = (D1 <= r)/(N-m+1);
c1 = C11(1);
end
for i1 = 1:N-m+1
s1 = sum(log(c1));
end
phi1 = (s1/(N-m+1));
for x2= R1(1:N-m+2,1)
D2 = pdist2(x2,x2);
C21 = (D2 <= r)/(N-m+2);
c2 = C21(1);
end
for i2 = 1:N-m+2
s2 = sum(log(c2));
end
phi2 = (s2/(N-m+2));
Ap = phi1 - phi2;
Apen = Ap(1)
Following the documentation provided by the Wikipedia article, I developed this small function that calculates the approximate entropy:
function res = approximate_entropy(U,m,r)
N = numel(U);
res = zeros(1,2);
for i = [1 2]
off = m + i - 1;
off_N = N - off;
off_N1 = off_N + 1;
x = zeros(off_N1,off);
for j = 1:off
x(:,j) = U(j:off_N+j);
end
C = zeros(off_N1,1);
for j = 1:off_N1
dist = abs(x - repmat(x(j,:),off_N1,1));
C(j) = sum(~any((dist > r),2)) / off_N1;
end
res(i) = sum(log(C)) / off_N1;
end
res = res(1) - res(2);
end
I first tried to replicate the computation shown the article, and the result I obtain matches the result shown in the example:
U = repmat([85 80 89],1,17);
approximate_entropy(U,2,3)
ans =
-1.09965411068114e-05
Then I created another example that shows a case in which approximate entropy produces a meaningful result (the entropy of the first sample is always less than the entropy of the second one):
% starting variables...
s1 = repmat([10 20],1,10);
s1_m = mean(s1);
s1_s = std(s1);
s2_m = 0;
s2_s = 0;
% datasample will not always return a perfect M and S match
% so let's repeat this until equality is achieved...
while ((s1_m ~= s2_m) && (s1_s ~= s2_s))
s2 = datasample([10 20],20,'Replace',true,'Weights',[0.5 0.5]);
s2_m = mean(s2);
s2_s = std(s2);
end
m = 2;
r = 3;
ae1 = approximate_entropy(s1,m,r)
ae2 = approximate_entropy(s2,m,r)
ae1 =
0.00138568170752751
ae2 =
0.680090884817465
Finally, I tried with your sample data:
fid = fopen('O1.txt','r');
U = cell2mat(textscan(fid,'%f'));
fclose(fid);
m = 2;
r = 0.2 * std(U);
approximate_entropy(U,m,r)
ans =
1.08567461184858
The code below is defined as algorithm 1 that computes the Pseudo Zernike Radial polynomials:
function R = pseudo_zernike_radial_polynomials(n,r)
if any( r>1 | r<0 | n<0)
error(':zernike_radial_polynomials either r is less than or greater thatn 1, r must be between 0 and 1 or n is less than 0.')
end
if n==0;
R =ones(n +1, length(r));
return;
end
R =ones(n +1, length(r));
rSQRT= sqrt(r);
r0 = ~logical(rSQRT.^(2*n+1)) ; % if any low r exist, and high n, then treat as 0
if any(r0)
m = n:-1:mod(n,2); ss=1:sum(r0);
R0(m +1, ss)=0;
R0(0 +1, ss)=1;
R(:,r0)=R0;
end
if any(~r0)
rSQRT= rSQRT(~r0);
R1 = zernike_radial_polynomials(2*n+1, rSQRT );
m = 2:2: 2*n+1 +1;
R1=R1(m,:);
for m=1:size(R1,1)
R1(m,:) = R1(m,:)./rSQRT';
end
R(:,~r0)=R1;
end
Then, this is algorithm 2 that calculates the moments:
and I translate into the code as follow:
clear all
%input : 2D image f, Nmax = order.
f = rgb2gray(imread('Oval_45.png'));
prompt = ('Input PZM order Nmax:');
Nmax = input(prompt);
Pzm =0;
l = size(f,1);
for x = 1:l;
for y =x;
for n = 0:Nmax;
[X,Y] = meshgrid(x,y);
R = sqrt((2.*X-l-1).^2+(2.*Y-l-1).^2)/l;
theta = atan2((l-1-2.*Y+2),(2.*X-l+1-2));
R = (R<=1).*R;
rad = pseudo_zernike_radial_polynomials(n, R);
for m = 0:n;
%find psi
if mod(m,2)==0
%m is even
newd1 = f(x,y)+f(x,y);
newd2 = f(y,x)+f(y,x);
newd3 = f(x,y)+f(x,y);
newd4 = f(y,x)+f(y,x);
x1 = newd1;
y1 = (-1)^m/2*newd2;
x2 = newd3;
y2 = (-1)^m/2*newd4;
psi = cos(m*theta)*(x1+y1+x2+y2)-(1i)*sin(m*theta)*(x1+y1-x2-y2);
else
newd1 = f(x,y)-f(x,y);
newd2 = f(y,x)-f(y,x);
newd3 = f(x,y)-f(x,y);
newd4 = f(y,x)-f(y,x);
x1 = newd1;
y1 = (-1)^m/2*newd2;
x2 = newd3;
y2 = (-1)^m/2*newd4;
psi = cos(m*theta)*(x1+x2)+sin(m*theta)*(y1-y2)+(1i)*(cos(m*theta)*(y1+y2)-sin(m*theta)*(x1-x2));
end
Pzm = Pzm+rad*psi;
end
end
end
end
However its give me error :
Error using *
Integers can only be combined with integers of the same class, or scalar doubles.
Error in main_pzm (line 44)
Pzm = Pzm+rad*psi;
The detail of the calculation can be seen here
I'm trying to implement k-NN in matlab. I have a matrix of 214 x's that have 9 columns of attributes with the 10th column being the label. I want to measure loss with a 0-1 function on 10 cross-validation tests. I have the following code:
function q3(file)
data = knnfile(file);
loss(data(:,1:9),'KFold',data(:,10))
losses = zeros(25,3);
new_data = data;
new_data(:,10) = [];
sdd = std(new_data);
meand = mean(new_data);
for s = 1:214
for q = 1:9
new_data(s,q) = (new_data(s,q) - meand(q)) / sdd(q);
end
end
new_data = [new_data data(:,10)];
for k = 1:25
loss1 = 0;
loss2 = 0;
for j = 0:9
index = floor(214/10)*j+1;
curd1 = data([1:index-1,index+21:end],:);
curd2 = new_data([1:index-1,index+21:end],:);
for l = 0:20
c1 = knn(curd1,k,data(index+l,:));
c2 = knn(curd2,k,new_data(index+l,:));
loss1 = loss1 + (c1 ~= data(index+l,10));
loss2 = loss2 + (c2 ~= new_data(index+l,10));
end
end
losses(k,1) = k;
losses(k,2) = 100*loss1/210;
losses(k,3) = 100*loss2/210;
end
function cluster = knn(Data,k,x)
distances = zeros(193,2);
for i = 1:size(Data,1)
row = Data(i,:);
d = norm(row(1:size(row,2)-1) - x(1:size(x,2)-1));
distances(i,:) = [d row(10)];
end
distances = sortrows(distances,1);
cluster = mode(distances(1:k,2));
I'm getting 40%+ loss with almost no correlation to k and I'm sure that something here is wrong but I'm not quite sure.
Any help would be appreciated!
The following is my full code: (Most of it isn't useful for what I'm asking, but I just put in the entire code for context, the part of the code that is causing me trouble is towards the end)
clc
clear
P = xlsread('b3.xlsx', 'P');
d = xlsread('b3.xlsx', 'd');
CM = xlsread('b3.xlsx', 'Cov');
Original_PD = P; %Store original PD
LM_rows = size(P,1)+1; %Expected LM rows
LM_columns = size(P,2); %Expected LM columns
LM_FINAL = zeros(LM_rows,LM_columns); %Dimensions of LM_FINAL
% Start of the outside loop
for k = 1:size(P,2)
P = Original_PD(:,k);
interval = cell(size(P,1)+2,1);
for i = 1:size(P,1)
interval{i,1} = NaN(size(P,1),2);
interval{i,1}(:,1) = -Inf;
interval{i,1}(:,2) = d;
interval{i,1}(i,1) = d(i,1);
interval{i,1}(i,2) = Inf;
end
interval{i+1,1} = [-Inf*ones(size(P,1),1) d];
interval{i+2,1} = [d Inf*ones(size(P,1),1)];
c = NaN(size(interval,1),1);
for i = 1:size(c,1)
c(i,1) = mvncdf(interval{i,1}(:,1),interval{i,1}(:,2),0,CM);
end
c0 = c(size(P,1)+1,1);
f = c(size(P,1)+2,1);
c = c(1:size(P,1),:);
b0 = exp(1);
b = exp(1)*P;
syms x;
eqn = f*x;
for i = 1:size(P,1)
eqn = eqn*(c0/c(i,1)*x + (b(i,1)-b0)/c(i,1));
end
eqn = c0*x^(size(P,1)+1) + eqn - b0*x^size(P,1);
x0 = solve(eqn);
for i = 1:size(x0)
id(i,1) = isreal(x0(i,1));
end
x0 = x0(id,:);
x0 = x0(x0 > 0,:);
clear x;
for i = 1:size(P,1)
x(i,:) = (b(i,1) - b0)./(c(i,1)*x0) + c0/c(i,1);
end
x = [x0'; x];
x = double(x);
x = x(:,sum(x <= 0,1) == 0)
lamda = -log(x);
LM_FINAL(:,k) = lamda;
end
% end of the outside loop
The important part of the above loop is towards the end:
x = x(:,sum(x <= 0,1) == 0)
This condition is sometimes not satisfied and hence the variable x is empty, which means LM_FINAL(:,k) = lamda is also empty. When this happens, I get the error:
x =
Empty matrix: 43-by-0
Improper assignment with rectangular empty matrix.
Error in Solution (line 75)
LM_FINAL(:,k) = lamda;
How can I skip this error so that the column for LM_FINAL remains as empty, but the loop continues (so that the rest of LM_FINAL's columns are filled) rather than terminating?
You can use try and catch phrase to explicitly handle errors inside loop (or elsewhere in your code).