Matlab parfor + multi-processing - matlab

I am trying to run some code that creates a sketch from an image:
https://github.com/panly099/sketchSynthesis
After getting it going on windows, im running into what I think are performance issues. The program is supposed to complete the task in 5 minutes, but my computer wont complete the task at all (ive waited at least 30 minutes)
I got some advice that I need to look into "parfor" and multiprocessing to speed it up/get it to complete the task.
Im wondering if someone could explain it to me simply how I might be able to speed it up to complete the image in under 5 minutes.
The code im supposed to edit is:
https://github.com/panly099/sketchSynthesis/blob/master/functions/sketchDetect.m
function [ detection ] = sketchDetect(sample, strokeModel, configurations, searchRatio, threshold, appGeoWeight)
% This function anchors the stroke model on the given input sketch or edge
% map sample via a dynamic programming process.
% Input :
% sample : the input sketch or edge map sample.
% strokeModel : the stroke model.
% configurations : the sampled configurations of the object.
% searchRatio : the search ratio of the original stroke bounding box.
% threshold : the chamfer matching threshold.
% appGeoWeight : the balance between the appearance and the geometry.
% Output :
% detection : the detected model instance.
% Author :
% panly099#gmail.com
% Version :
% 1.1 07/06/2016
if nargin < 5
threshold = 0.5;
end
if nargin < 6
appGeoWeight = 0.5;
end
baseScale = 1;
baseAspect = 1;
overlapWeight = 0.001;
mst = strokeModel.mst;
avgWidth = strokeModel.avgWidth;
avgHeight = strokeModel.avgHeight;
clusterBbox = strokeModel.clusterBbox;
[height, width] = size(sample);
numConf = length(configurations);
detections = cell(1, numConf);
energy = zeros(1,numConf);
for a = 1 : numConf
configuration = configurations{a};
%% sampling by fast directional chamfer matching (fdcm)
numCluster = length(configuration);
strokeCandidates = cell(1, numCluster);
parfor j = 1 : numCluster
fprintf('current stroke id: %d\n', j);
strokeImg = configuration{j};
strBbox = getBoundingBox(strokeImg,0);
strokeImg = strokeImg(strBbox(2):strBbox(4), strBbox(1):strBbox(3));
% compute the search region
curBbox = clusterBbox(:,j)';
% curBbox = strBbox;
curWidth = curBbox(3) - curBbox(1);
curHeight = curBbox(4) - curBbox(2);
searchRegion = [curBbox(1) - ceil((searchRatio - 1) / 2 * curWidth), curBbox(2) - ceil((searchRatio - 1) / 2 * curHeight), ...
curBbox(3) + ceil((searchRatio - 1) / 2 * curWidth), curBbox(4) + ceil((searchRatio - 1) / 2 * curHeight)];
if searchRegion(1) < 1
searchRegion(1) = 1;
end
if searchRegion(2) < 1
searchRegion(2) = 1;
end
if searchRegion(3) > width
searchRegion(3) = width;
end
if searchRegion(4) > height
searchRegion(4) = height;
end
img = sample(searchRegion(2):searchRegion(4), searchRegion(1):searchRegion(3));
% fdcm anchoring
[strokeMatched, cost] = chamferLocate(img, strokeImg, baseScale, baseAspect, threshold);
% figure;imshow(~strokeImg);figure;imshow(~img);
if size(strokeMatched,1) > 1000
stop = 1;
end
for s = 1 : length(strokeMatched)
tmpSample = zeros(height, width);
tmpSample(searchRegion(2):searchRegion(4), searchRegion(1):searchRegion(3)) = strokeMatched{s};
if cost(s) <= 0
cost(s) = 1;
end
strokeCandidates{j}(s,:) = [{tmpSample}, cost(s)];
% figure;imshow(~strokeMatched{s});
end
% figure;imshow(sample);
end
%% energy minimization by dynamic programming
% backward
fprintf('Backward propogation\n');
for i = length(mst) : -1 : 2
fprintf('Scanning MST layer: %d\n', i);
curLayer = mst{i};
for j = 1 : length(curLayer)
% parents = [];
curEdge = curLayer{j};
curParent = curEdge{1}(2);
% parents = [parents curParent];
curChild = curEdge{1}(1);
curParam = curEdge{2};
curParentChildIdx = size(strokeCandidates{curParent},2) + 1;
curParentCandidates = strokeCandidates{curParent}(:,1);
curChildCandidates = strokeCandidates{curChild};
for p = 1 : length(curParentCandidates)
tmpBbox = getBoundingBox(curParentCandidates{p}, 0);
curParentCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
childCosts = ones(1, size(curChildCandidates, 1))*Inf;
for c = 1 : size(curChildCandidates, 1)
tmpBbox = getBoundingBox(curChildCandidates{c,1},0);
curChildCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
geoNorm = 1/40;
curCost = appGeoWeight * curChildCandidates{c,2} - (1-appGeoWeight) * geoNorm * log(mvnpdf(curChildCenter-curParentCenter, ...
curParam{2},curParam{3}));
for gc = 3 : size(curChildCandidates,2)
curCost = curCost + curChildCandidates{c,gc}(2);
end
childCosts(c) = curCost;
end
[minCost, minIdx] = min(childCosts);
if minCost == Inf
stop = 1;
end
strokeCandidates{curParent}{p, curParentChildIdx} = [curChild, minCost, minIdx];
end
% % adding overlapping penalty
% parents = unique(parents);
% for u = 1 : length(parents)
% curParent = parents(u);
% for p = 1 : size(strokeCandidates{curParent}, 1)
% curParentImg = strokeCandidates{curParent}{p,1};
% overlapCost = 0;
% for c = 3 : size(strokeCandidates{curParent}, 2)
% curChild = strokeCandidates{curParent}{p,c}(1);
% curChildIdx = strokeCandidates{curParent}{p,c}(3);
% curChildImg = strokeCandidates{curChild}{curChildIdx,1};
% curOverlap = curParentImg.*curChildImg;
% overlapCost = overlapCost + sum(curOverlap(:)) * overlapWeight;
% end
% strokeCandidates{curParent}{p,2} = strokeCandidates{curParent}{p,2} + overlapCost;
% end
% end
end
end
% forward
fprintf('forward propogation\n');
fprintf('Scanning MST layer: %d\n', 1);
detection = cell(1, numCluster);
root = mst{1};
rootCosts = [];
for i = 1 : size(strokeCandidates{root},1)
cost = strokeCandidates{root}{i,2};
for j = 3 : size(strokeCandidates{root},2)
cost = cost + strokeCandidates{root}{i,j}(2);
end
rootCosts(end+1) = cost;
end
[~,rootSelected] = min(rootCosts);
minCost = rootCosts(rootSelected);
detection{root} = strokeCandidates{root}{rootSelected, 1};
for i = 2 : length(mst)
fprintf('Scanning MST layer: %d\n', i);
curLayer = mst{i};
for j = 1 : length(curLayer)
curEdge = curLayer{j};
curParent = curEdge{1}(2);
curChild = curEdge{1}(1);
curParam = curEdge{2};
curParentCandidate = detection{curParent};
curChildCandidates = strokeCandidates{curChild};
tmpBbox = getBoundingBox(curParentCandidate, 0);
curParentCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
childCosts = ones(1, size(curChildCandidates, 1))*Inf;
for c = 1 : size(curChildCandidates, 1)
tmpBbox = getBoundingBox(curChildCandidates{c,1},0);
curChildCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
geoNorm = 1/40;
curCost = appGeoWeight * curChildCandidates{c,2} - (1-appGeoWeight) * geoNorm * log(mvnpdf(curChildCenter-curParentCenter, ...
curParam{2},curParam{3}));
for gc = 3 : size(curChildCandidates,2)
curCost = curCost + curChildCandidates{c,gc}(2);
end
childCosts(c) = curCost;
end
[~, minId] = min(childCosts);
detection{curChild} = curChildCandidates{minId,1};
end
end
detections{a} = detection;
energy(a) = minCost;
end
[~,minIdx] = min(energy);
detection = detections{minIdx};
% visualize the detection
% synthesized = zeros(height, width);
% for i = 1 : length(detection)
% if sum(detection{i}(:))>1
% synthesized = synthesized + detection{i};
% end
% end
% figure;imshow(~sample);
% figure;imshow(~synthesized);
end

Related

Euclidian distances in speaker recognition system

I'm new in Matlab and now I have a problem for the implementation of a simple speaker recognition system using PNCC and MFFC.
My problem is on matrix dimension in fact, when I run my program, it give me this error:
Matrix dimensions must agree.
Error in disteu (line 43)
d(n,:) = sum((x(:, n+copies) - y) .^2, 1);
Error in test (line 22)
d = disteu(v, code{l});
Error in main (line 4)
test('C:\Users\Antonio\Documents\MATLAB\test',5, code);
Just for the sake of clarity I have attached my code.
Could anyone help me please?
function d = disteu(x, y)
% DISTEU Pairwise Euclidean distances between columns of two matrices
%
% Input:
% x, y: Two matrices whose each column is an a vector data.
%
% Output:
% d: Element d(i,j) will be the Euclidean distance between two
% column vectors X(:,i) and Y(:,j)
%
% Note:
% The Euclidean distance D between two vectors X and Y is:
% D = sum((x-y).^2).^0.5
% D = sum((x-y).^2).^0.5
[M, N] = size(x);
[M2, P] = size(y);
if (M ~= M2)
y=padarray(y,0,0,'post');
x=padarray(x,21,0,'post');
[M, N] = size(x)
[M2, P] = size(y)
y=padarray(y,0,0,'post');
[M2, P] = size(y)
end
%error('Matrix dimensions do not match.')
d = zeros(N, P);
if (N < P)
copies = zeros(1,P);
for n = 1:N
d(n,:) = sum((x(:, n+copies) - y) .^2, 1);
end
else
copies = zeros(1,N);
for p = 1:P
d(:,p) = sum((x - y(:, p+copies)) .^2, 1)';
end
end
d = d.^0.5;
function [aadDCT] = PNCC(rawdata, fsamp)
ad_x = rawdata;
%addpath voicebox/; % With Spectral Subtraction - default parameters
%ad_x = specsub(rawdata, fsamp);
dLamda_L = 0.999;
dLamda_S = 0.999;
dSampRate = fsamp;
dLowFreq = 200;% Changed to 40 from 200 as low freq is 40 in gabor as well
dHighFreq = dSampRate / 2;
dPowerCoeff = 1 / 15;
iFiltType = 1;
dFactor = 2.0;
dGammaThreshold = 0.005;
iM = 0; % Changed from 2 to 0 as number of frames coming out to be different due to queue
iN = 4;
iSMType = 0;
dLamda = 0.999;
dLamda2 = 0.5;
dDelta1 = 1;
dLamda3 = 0.85;
dDelta2 = 0.2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Flags
%
bPreem = 1; % pre-emphasis flag
bSSF = 1;
bPowerLaw = 1;
bDisplay = 0;
dFrameLen = 0.025; % 25.6 ms window length, which is the default setting in CMU Sphinx
dFramePeriod = 0.010; % 10 ms frame period
iPowerFactor = 1;
global iNumFilts;
iNumFilts = 40;
if iNumFilts<30
iFFTSize = 512;
else
iFFTSize = 1024;
end
% For derivatives
deltawindow = 2; % to calculate 1st derivative
accwindow = 2; % to calculate 2nd derivative
% numcoeffs = 13; % number of cepstral coefficients to be used
numcoeffs = 13;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Flags
%
%
% Array Queue Ring-buffer
%
global Queue_aad_P;
global Queue_iHead;
global Queue_iTail;
global Queue_iWindow;
global Queue_iNumElem;
Queue_iWindow = 2 * iM + 1;
Queue_aad_P = zeros(Queue_iWindow, iNumFilts);
Queue_iHead = 0;
Queue_iTail = 0;
Queue_iNumElem = 0;
iFL = floor(dFrameLen * dSampRate);
iFP = floor(dFramePeriod * dSampRate);
iNumFrames = floor((length(ad_x) - iFL) / iFP) + 1;
iSpeechLen = length(ad_x);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Pre-emphasis using H(z) = 1 - 0.97 z ^ -1
%
if (bPreem == 1)
ad_x = filter([1 -0.97], 1, double(ad_x));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Obtaning the gammatone coefficient.
%
% Based on M. Snelly's auditory toolbox.
% In actual C-implementation, we just use a table
%
bGamma = 1;
[wts,binfrqs] = fft2melmx(iFFTSize, dSampRate, iNumFilts, 1, dLowFreq, dHighFreq, iFiltType);
wts = wts';
wts(size(wts, 1) / 2 + 1 : size(wts, 1), : ) = [];
aad_H = wts;
i_FI = 0;
i_FI_Out = 0;
if bSSF == 1
adSumPower = zeros(1, iNumFrames - 2 * iM);
else
adSumPower = zeros(1, iNumFrames);
end
%dLamda_L = 0.998;
aad_P = zeros(iNumFrames, iNumFilts);
aad_P_Out = zeros(iNumFrames - 2 * iM, iNumFilts);
ad_Q = zeros(1, iNumFilts);
ad_Q_Out = zeros(1, iNumFilts);
ad_QMVAvg = zeros(1, iNumFilts);
ad_w = zeros(1, iNumFilts);
ad_w_sm = zeros(1, iNumFilts);
ad_QMVAvg_LA = zeros(1, iNumFilts);
MEAN_POWER = 1e10;
dMean = 5.8471e+08;
dPeak = 2.7873e+09 / 15.6250;
% (1.7839e8, 2.0517e8, 2.4120e8, 2.9715e8, 3.9795e8) 95, 96, 97, 98, 99
% percentile from WSJ-si84
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dPeakVal = 4e+07;% % 4.0638e+07 --> Mean from WSJ0-si84 (Important!!!)
%%%%%%%%%%%
dMean = dPeakVal;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Obtaining the short-time Power P(i, j)
%
for m = 0 : iFP : iSpeechLen - iFL
ad_x_st = ad_x(m + 1 : m + iFL) .* hamming(iFL);
adSpec = fft(ad_x_st, iFFTSize);
ad_X = abs(adSpec(1: iFFTSize / 2));
aadX(:, i_FI + 1) = ad_X;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Calculating the Power P(i, j)
%
for j = 1 : iNumFilts
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Squared integration
%
if iFiltType == 2
aad_P(i_FI + 1, j) = sum((ad_X .* aad_H(:, j)) .^ 2);
else
aad_P(i_FI + 1, j) = sum((ad_X .^ 2 .* aad_H(:, j)));
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Calculating the Power P(i, j)
%
dSumPower = sum(aad_P(i_FI + 1, : ));
if bSSF == 1
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Ring buffer (using a Queue)
%
if (i_FI >= 2 * iM + 1)
Queue_poll();
end
Queue_offer(aad_P(i_FI + 1, :));
ad_Q = Queue_avg();
if (i_FI == 2 * iM)
ad_QMVAvg = ad_Q.^ (1 / 15);
ad_PBias = (ad_Q) * 0.9;
end
if (i_FI >= 2 * iM)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Bias Update
%
for i = 1 : iNumFilts,
if (ad_Q(i) > ad_PBias(i))
ad_PBias(i) = dLamda * ad_PBias(i) + (1 - dLamda) * ad_Q(i);
else
ad_PBias(i) = dLamda2 * ad_PBias(i) + (1 - dLamda2) * ad_Q(i);
end
end
for i = 1 : iNumFilts,
ad_Q_Out(i) = max(ad_Q(i) - ad_PBias(i), 0) ;
if (i_FI == 2 * iM)
ad_QMVAvg2(i) = 0.9 * ad_Q_Out(i);
ad_QMVAvg3(i) = ad_Q_Out(i);
ad_QMVPeak(i) = ad_Q_Out(i);
end
if (ad_Q_Out(i) > ad_QMVAvg2(i))
ad_QMVAvg2(i) = dLamda * ad_QMVAvg2(i) + (1 - dLamda) * ad_Q_Out(i);
else
ad_QMVAvg2(i) = dLamda2 * ad_QMVAvg2(i) + (1 - dLamda2) * ad_Q_Out(i);
end
dOrg = ad_Q_Out(i);
ad_QMVAvg3(i) = dLamda3 * ad_QMVAvg3(i);
if (ad_Q(i) < dFactor * ad_PBias(i))
ad_Q_Out(i) = ad_QMVAvg2(i);
else
if (ad_Q_Out(i) <= dDelta1 * ad_QMVAvg3(i))
ad_Q_Out(i) = dDelta2 * ad_QMVAvg3(i);
end
end
ad_QMVAvg3(i) = max(ad_QMVAvg3(i), dOrg);
ad_Q_Out(i) = max(ad_Q_Out(i), ad_QMVAvg2(i));
end
ad_w = ad_Q_Out ./ max(ad_Q, eps);
for i = 1 : iNumFilts,
if iSMType == 0
ad_w_sm(i) = mean(ad_w(max(i - iN, 1) : min(i + iN ,iNumFilts)));
elseif iSMType == 1
ad_w_sm(i) = exp(mean(log(ad_w(max(i - iN, 1) : min(i + iN ,iNumFilts)))));
elseif iSMType == 2
ad_w_sm(i) = mean((ad_w(max(i - iN, 1) : min(i + iN ,iNumFilts))).^(1/15))^15;
elseif iSMType == 3
ad_w_sm(i) = (mean( (ad_w(max(i - iN, 1) : min(i + iN ,iNumFilts))).^15 )) ^ (1 / 15);
end
end
aad_P_Out(i_FI_Out + 1, :) = ad_w_sm .* aad_P(i_FI - iM + 1, :);
adSumPower(i_FI_Out + 1) = sum(aad_P_Out(i_FI_Out + 1, :));
if adSumPower(i_FI_Out + 1) > dMean
dMean = dLamda_S * dMean + (1 - dLamda_S) * adSumPower(i_FI_Out + 1);
else
dMean = dLamda_L * dMean + (1 - dLamda_L) * adSumPower(i_FI_Out + 1);
end
aad_P_Out(i_FI_Out + 1, :) = aad_P_Out(i_FI_Out + 1, :) / (dMean) * MEAN_POWER;
i_FI_Out = i_FI_Out + 1;
end
else % if not SSF
adSumPower(i_FI + 1) = sum(aad_P(i_FI + 1, :));
if adSumPower(i_FI_Out + 1) > dMean
dMean = dLamda_S * dMean + (1 - dLamda_S) * adSumPower(i_FI_Out + 1);
else
dMean = dLamda_L * dMean + (1 - dLamda_L) * adSumPower(i_FI_Out + 1);
end
aad_P_Out(i_FI + 1, :) = aad_P(i_FI + 1, :) / (dMean) * MEAN_POWER;
end
i_FI = i_FI + 1;
end
%adSorted = sort(adSumPower);
%dMaxPower = adSorted(round(0.98 * length(adSumPower)));
%aad_P_Out = aad_P_Out / dMaxPower * 1e10;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Apply the nonlinearity
%
%dPowerCoeff
if bPowerLaw == 1
aadSpec = aad_P_Out .^ dPowerCoeff;
else
aadSpec = log(aad_P_Out + eps);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% DCT
%
aadDCT = dct(aadSpec')';
%aadDCT(:, numcoeffs+1:iNumFilts) = [];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% MVN
%
% for i = 1 : numcoeffs
% aadDCT( :, i ) = (aadDCT( : , i ) - mean(aadDCT( : , i)))/std(aadDCT(:,i));
% end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Temporal Derivatives
% calculate 1st derivative (velocity)
dt1 = deltacc(aadDCT', deltawindow);
% calculate 2nd derivative (acceleration)
dt2 = deltacc(dt1, accwindow);
% append dt1 and dt2 to mfcco
aadDCT = [aadDCT'; dt2];
% aadDCT = [aadDCT'; dt2];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Display
%
if bDisplay == 1
figure
aadSpec = idct(aadDCT', iNumFilts);
imagesc(aadSpec); axis xy;
end
aadDCT = aadDCT';
%{
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Writing the feature in Sphinx format
%
[iM, iN] = size(aadDCT);
iNumData = iM * iN;
fid = fopen(szOutFeatFileName, 'wb');
fwrite(fid, iNumData, 'int32');
iCount = fwrite(fid, aadDCT(:), 'float32');
fclose(fid);
%}
end
function dt = deltacc(input, winlen)
% calculates derivatives of a matrix, whose columns are feature vectors
tmp = 0;
for cnt = 1 : winlen
tmp = tmp + cnt*cnt;
end
nrm = 1 / (2*tmp);
dt = zeros(size(input));
rows = size(input,1);
cols = size(input,2);
for col = 1 : cols
for cnt = 1 : winlen
inx1 = col - cnt; inx2 = col + cnt;
if inx1 < 1; inx1 = 1; end
if inx2 > cols; inx2 = cols; end
dt(:, col) = dt(:, col) + (input(:, inx2) - input(:, inx1)) * cnt;
end
end
dt = dt * nrm;
end
function [] = Queue_offer(ad_x)
global Queue_aad_P;
global Queue_iHead;
global Queue_iTail;
global Queue_iWindow;
global Queue_iNumElem;
Queue_aad_P(Queue_iTail + 1, :) = ad_x;
Queue_iTail = mod(Queue_iTail + 1, Queue_iWindow);
Queue_iNumElem = Queue_iNumElem + 1;
if Queue_iNumElem > Queue_iWindow
error ('Queue overflow');
end
end
function [ad_x] = Queue_poll()
global Queue_aad_P;
global Queue_iHead;
global Queue_iTail;
global Queue_iWindow;
global Queue_iNumElem;
if Queue_iNumElem <= 0
error ('No elements');
end
ad_x = Queue_aad_P(Queue_iHead + 1, :);
Queue_iHead = mod(Queue_iHead + 1, Queue_iWindow);
Queue_iNumElem = Queue_iNumElem - 1;
end
function[adMean] = Queue_avg()
global Queue_aad_P;
global Queue_iHead;
global Queue_iTail;
global Queue_iWindow;
global Queue_iNumElem;
global iNumFilts;
adMean = zeros(1, iNumFilts); % Changed from 40 (number of filter banks)
iPos = Queue_iHead;
for i = 1 : Queue_iNumElem
adMean = adMean + Queue_aad_P(iPos + 1 ,: );
iPos = mod(iPos + 1, Queue_iWindow);
end
adMean = adMean / Queue_iNumElem;
end
function test(testdir, n, code)
for k = 1:n % read test sound file of each speaker
file = sprintf('%ss%d.wav', testdir, k);
[s, fs] = audioread(file);
%x = s + 0.01*randn(length(s),1); %AWGN Noise
%[SNR1] = snr(s);
%[SNR2] = snr(x) ;
v = PNCC(s, fs); % Compute MFCC's
distmin = inf;
k1 = 0;
for l = 1:length(code) % each trained codebook, compute distortion
d = disteu(v, code{l});
dist = sum(min(d,[],2)) / size(d,1);
if dist < distmin
distmin = dist;
k1 = l;
end
end
msg = sprintf('speaker%d -->> s%d', k, k1);
disp(msg);
end
function r = vqlbg(d,k)
%
% Inputs: d contains training data vectors (one per column)
% k is number of centroids required
e = .01;
r = mean(d, 2);
dpr = 10000;
for i = 1:log2(k)
r = [r*(1+e), r*(1-e)];
while (1 == 1)
z = interdists(d, r);
[m,ind] = min(z, [], 2);
t = 0;
for j = 1:2^i
r(:, j) = mean(d(:, find(ind == j)), 2);
x = interdists(d(:, find(ind == j)), r(:, j));
for q = 1:length(x)
t = t + x(q);
end
end
if (((dpr - t)/t) < e)
break;
else
dpr = t;
end
end
end %Output: r contains the result VQ codebook (k columns, one for each centroids)

Parallel computing and number of cores in Matlab

I am trying to run some code that creates a sketch from an image:
https://github.com/panly099/sketchSynthesis
After getting it going on windows, im running into what I think are performance issues. The program is supposed to complete the task in 5 minutes, but my computer wont complete the task at all (ive waited at least 30 minutes)
My computer has only 4GB of RAM and is an Intel I3.
The code uses parallel computing apparently, so to active Parfor I did:
if matlabpool('size') == 0
matlabpool open 2
end
Which gave me "connected to 2 workers"
The CPU usage looks like: https://imgur.com/a/lkMRg
Im wondering if someone could explain it to me simply how I might be able to speed it up to complete the image in under 5 minutes, or tell me if I need to upgrade my RAM or amount of cores.
The guy who wrote the code said he ran it originally on an 8-core server.
The code that includes the parfor loop comes from
https://github.com/panly099/sketchSynthesis/blob/master/functions/sketchDetect.m :
function [ detection ] = sketchDetect(sample, strokeModel, configurations, searchRatio, threshold, appGeoWeight)
% This function anchors the stroke model on the given input sketch or edge
% map sample via a dynamic programming process.
% Input :
% sample : the input sketch or edge map sample.
% strokeModel : the stroke model.
% configurations : the sampled configurations of the object.
% searchRatio : the search ratio of the original stroke bounding box.
% threshold : the chamfer matching threshold.
% appGeoWeight : the balance between the appearance and the geometry.
% Output :
% detection : the detected model instance.
% Author :
% panly099#gmail.com
% Version :
% 1.1 07/06/2016
if nargin < 5
threshold = 0.5;
end
if nargin < 6
appGeoWeight = 0.5;
end
baseScale = 1;
baseAspect = 1;
overlapWeight = 0.001;
mst = strokeModel.mst;
avgWidth = strokeModel.avgWidth;
avgHeight = strokeModel.avgHeight;
clusterBbox = strokeModel.clusterBbox;
[height, width] = size(sample);
numConf = length(configurations);
detections = cell(1, numConf);
energy = zeros(1,numConf);
for a = 1 : numConf
configuration = configurations{a};
%% sampling by fast directional chamfer matching (fdcm)
numCluster = length(configuration);
strokeCandidates = cell(1, numCluster);
parfor j = 1 : numCluster
fprintf('current stroke id: %d\n', j);
strokeImg = configuration{j};
strBbox = getBoundingBox(strokeImg,0);
strokeImg = strokeImg(strBbox(2):strBbox(4), strBbox(1):strBbox(3));
% compute the search region
curBbox = clusterBbox(:,j)';
% curBbox = strBbox;
curWidth = curBbox(3) - curBbox(1);
curHeight = curBbox(4) - curBbox(2);
searchRegion = [curBbox(1) - ceil((searchRatio - 1) / 2 * curWidth), curBbox(2) - ceil((searchRatio - 1) / 2 * curHeight), ...
curBbox(3) + ceil((searchRatio - 1) / 2 * curWidth), curBbox(4) + ceil((searchRatio - 1) / 2 * curHeight)];
if searchRegion(1) < 1
searchRegion(1) = 1;
end
if searchRegion(2) < 1
searchRegion(2) = 1;
end
if searchRegion(3) > width
searchRegion(3) = width;
end
if searchRegion(4) > height
searchRegion(4) = height;
end
img = sample(searchRegion(2):searchRegion(4), searchRegion(1):searchRegion(3));
% fdcm anchoring
[strokeMatched, cost] = chamferLocate(img, strokeImg, baseScale, baseAspect, threshold);
% figure;imshow(~strokeImg);figure;imshow(~img);
if size(strokeMatched,1) > 1000
stop = 1;
end
for s = 1 : length(strokeMatched)
tmpSample = zeros(height, width);
tmpSample(searchRegion(2):searchRegion(4), searchRegion(1):searchRegion(3)) = strokeMatched{s};
if cost(s) <= 0
cost(s) = 1;
end
strokeCandidates{j}(s,:) = [{tmpSample}, cost(s)];
% figure;imshow(~strokeMatched{s});
end
% figure;imshow(sample);
end
%% energy minimization by dynamic programming
% backward
fprintf('Backward propogation\n');
for i = length(mst) : -1 : 2
fprintf('Scanning MST layer: %d\n', i);
curLayer = mst{i};
for j = 1 : length(curLayer)
% parents = [];
curEdge = curLayer{j};
curParent = curEdge{1}(2);
% parents = [parents curParent];
curChild = curEdge{1}(1);
curParam = curEdge{2};
curParentChildIdx = size(strokeCandidates{curParent},2) + 1;
curParentCandidates = strokeCandidates{curParent}(:,1);
curChildCandidates = strokeCandidates{curChild};
for p = 1 : length(curParentCandidates)
tmpBbox = getBoundingBox(curParentCandidates{p}, 0);
curParentCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
childCosts = ones(1, size(curChildCandidates, 1))*Inf;
for c = 1 : size(curChildCandidates, 1)
tmpBbox = getBoundingBox(curChildCandidates{c,1},0);
curChildCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
geoNorm = 1/40;
curCost = appGeoWeight * curChildCandidates{c,2} - (1-appGeoWeight) * geoNorm * log(mvnpdf(curChildCenter-curParentCenter, ...
curParam{2},curParam{3}));
for gc = 3 : size(curChildCandidates,2)
curCost = curCost + curChildCandidates{c,gc}(2);
end
childCosts(c) = curCost;
end
[minCost, minIdx] = min(childCosts);
if minCost == Inf
stop = 1;
end
strokeCandidates{curParent}{p, curParentChildIdx} = [curChild, minCost, minIdx];
end
% % adding overlapping penalty
% parents = unique(parents);
% for u = 1 : length(parents)
% curParent = parents(u);
% for p = 1 : size(strokeCandidates{curParent}, 1)
% curParentImg = strokeCandidates{curParent}{p,1};
% overlapCost = 0;
% for c = 3 : size(strokeCandidates{curParent}, 2)
% curChild = strokeCandidates{curParent}{p,c}(1);
% curChildIdx = strokeCandidates{curParent}{p,c}(3);
% curChildImg = strokeCandidates{curChild}{curChildIdx,1};
% curOverlap = curParentImg.*curChildImg;
% overlapCost = overlapCost + sum(curOverlap(:)) * overlapWeight;
% end
% strokeCandidates{curParent}{p,2} = strokeCandidates{curParent}{p,2} + overlapCost;
% end
% end
end
end
% forward
fprintf('forward propogation\n');
fprintf('Scanning MST layer: %d\n', 1);
detection = cell(1, numCluster);
root = mst{1};
rootCosts = [];
for i = 1 : size(strokeCandidates{root},1)
cost = strokeCandidates{root}{i,2};
for j = 3 : size(strokeCandidates{root},2)
cost = cost + strokeCandidates{root}{i,j}(2);
end
rootCosts(end+1) = cost;
end
[~,rootSelected] = min(rootCosts);
minCost = rootCosts(rootSelected);
detection{root} = strokeCandidates{root}{rootSelected, 1};
for i = 2 : length(mst)
fprintf('Scanning MST layer: %d\n', i);
curLayer = mst{i};
for j = 1 : length(curLayer)
curEdge = curLayer{j};
curParent = curEdge{1}(2);
curChild = curEdge{1}(1);
curParam = curEdge{2};
curParentCandidate = detection{curParent};
curChildCandidates = strokeCandidates{curChild};
tmpBbox = getBoundingBox(curParentCandidate, 0);
curParentCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
childCosts = ones(1, size(curChildCandidates, 1))*Inf;
for c = 1 : size(curChildCandidates, 1)
tmpBbox = getBoundingBox(curChildCandidates{c,1},0);
curChildCenter = ceil([(tmpBbox(2)+tmpBbox(4))/2, (tmpBbox(1)+tmpBbox(3))/2]);
geoNorm = 1/40;
curCost = appGeoWeight * curChildCandidates{c,2} - (1-appGeoWeight) * geoNorm * log(mvnpdf(curChildCenter-curParentCenter, ...
curParam{2},curParam{3}));
for gc = 3 : size(curChildCandidates,2)
curCost = curCost + curChildCandidates{c,gc}(2);
end
childCosts(c) = curCost;
end
[~, minId] = min(childCosts);
detection{curChild} = curChildCandidates{minId,1};
end
end
detections{a} = detection;
energy(a) = minCost;
end
[~,minIdx] = min(energy);
detection = detections{minIdx};
% visualize the detection
% synthesized = zeros(height, width);
% for i = 1 : length(detection)
% if sum(detection{i}(:))>1
% synthesized = synthesized + detection{i};
% end
% end
% figure;imshow(~sample);
% figure;imshow(~synthesized);
end

Convolutional Neural Networks in MATLAB. Adding one more layer

what I'm trying to do is to testify the fact that if I add one more layer into CNN, the accuracy goes higher.
The code is below here.
This code is from https://github.com/lhoang29/DigitRecognition/blob/master/cnnload.m
I'm at the beginner stage of CNN and trying to expand one more layer including
convolution and pooling stage. I tried several ways but seems not working. Could someone show me how to expand one more layer?
Thankyou. Below is the code
Code for main function:
clear all; close all; clc;
maxtrain = 10000;
iter = 10;
eta = 0.01;
%% Data Load
trlblid = fopen('train-labels-idx1-ubyte');
trimgid = fopen('train-images-idx3-ubyte');
tslblid = fopen('t10k-labels-idx1-ubyte');
tsimgid = fopen('t10k-images-idx3-ubyte');
% read train labels
fread(trlblid, 4);
numtrlbls = toint(fread(trlblid, 4));
trainlabels = fread(trlblid, numtrlbls);
% read train data
fread(trimgid, 4);
numtrimg = toint(fread(trimgid, 4));
trimgh = toint(fread(trimgid, 4));
trimgw = toint(fread(trimgid, 4));
trainimages = permute(reshape(fread(trimgid,trimgh*trimgw*numtrimg),trimgh,trimgw,numtrimg), [2 1 3]);
% read test labels
fread(tslblid, 4);
numtslbls = toint(fread(tslblid, 4));
testlabels = fread(tslblid, numtslbls);
% read test data
fread(tsimgid, 4);
numtsimg = toint(fread(tsimgid, 4));
tsimgh = toint(fread(tsimgid, 4));
tsimgw = toint(fread(tsimgid, 4));
testimages = permute(reshape(fread(tsimgid, tsimgh*tsimgw*numtsimg),tsimgh,tsimgw,numtsimg), [2 1 3]);
%% CNN Training
[missimages, misslabels] = cnntrain(trainlabels,trainimages,testlabels,testimages,maxtrain,iter,eta);
%% CNN Testing
showmiss(missimages,misslabels,testimages,testlabels,25,2);
Code for training:
function [missimages, misslabels] = cnntrain(trainlabels,trainimages,testlabels,testimages,maxtrain,iter,eta)
fn = 5; % number of kernels for layer 1
ks = 5; % size of kernel
[h,w,n] = size(trainimages);
n = min(n,maxtrain);
% normalize data to [-1,1] range
nitrain = (trainimages / 255) * 2 - 1;
nitest = (testimages / 255) * 2 - 1;
% train with backprop
h1 = h-ks+1;
w1 = w-ks+1;
A1 = zeros(h1,w1,fn);
h2 = h1/2;
w2 = w1/2;
I2 = zeros(h2,w2,fn);
A2 = zeros(h2,w2,fn);
A3 = zeros(10,1);
% kernels for layer 1
W1 = randn(ks,ks,fn) * .01;
B1 = ones(1,fn);
% scale parameter and bias for layer 2
S2 = randn(1,fn) * .01;
B2 = ones(1,fn);
% weights and bias parameters for fully-connected output layer
W3 = randn(h2,w2,fn,10) * .01;
B3 = ones(10,1);
% true outputs
Y = eye(10)*2-1;
for it=1:iter
err = 0;
for im=1:n
%------------ FORWARD PROP ------------%
% Layer 1: convolution with bias followed by sigmoidal squashing
for fm=1:fn
A1(:,:,fm) = convn(nitrain(:,:,im),W1(end:-1:1,end:-1:1,fm),'valid') + B1(fm);
end
Z1 = tanh(A1);
% Layer 2: average/subsample with scaling and bias
for fm=1:fn
I2(:,:,fm) = avgpool(Z1(:,:,fm));
A2(:,:,fm) = I2(:,:,fm) * S2(fm) + B2(fm);
end
Z2 = tanh(A2);
% Layer 3: fully connected
for cl=1:10
A3(cl) = convn(Z2,W3(end:-1:1,end:-1:1,end:-1:1,cl),'valid') + B3(cl);
end
Z3 = tanh(A3); % Final output
err = err + .5 * norm(Z3 - Y(:,trainlabels(im)+1),2)^2;
%------------ BACK PROP ------------%
% Compute error at output layer
Del3 = (1 - Z3.^2) .* (Z3 - Y(:,trainlabels(im)+1));
% Compute error at layer 2
Del2 = zeros(size(Z2));
for cl=1:10
Del2 = Del2 + Del3(cl) * W3(:,:,:,cl);
end
Del2 = Del2 .* (1 - Z2.^2);
% Compute error at layer 1
Del1 = zeros(size(Z1));
for fm=1:fn
Del1(:,:,fm) = (S2(fm)/4)*(1 - Z1(:,:,fm).^2);
for ih=1:h1
for iw=1:w1
Del1(ih,iw,fm) = Del1(ih,iw,fm) * Del2(floor((ih+1)/2),floor((iw+1)/2),fm);
end
end
end
% Update bias at layer 3
DB3 = Del3; % gradient w.r.t bias
B3 = B3 - eta*DB3;
% Update weights at layer 3
for cl=1:10
DW3 = DB3(cl) * Z2; % gradient w.r.t weights
W3(:,:,:,cl) = W3(:,:,:,cl) - eta * DW3;
end
% Update scale and bias parameters at layer 2
for fm=1:fn
DS2 = convn(Del2(:,:,fm),I2(end:-1:1,end:-1:1,fm),'valid');
S2(fm) = S2(fm) - eta * DS2;
DB2 = sum(sum(Del2(:,:,fm)));
B2(fm) = B2(fm) - eta * DB2;
end
% Update kernel weights and bias parameters at layer 1
for fm=1:fn
DW1 = convn(nitrain(:,:,im),Del1(end:-1:1,end:-1:1,fm),'valid');
W1(:,:,fm) = W1(:,:,fm) - eta * DW1;
DB1 = sum(sum(Del1(:,:,fm)));
B1(fm) = B1(fm) - eta * DB1;
end
end
disp(['Error: ' num2str(err) ' at iteration ' num2str(it)]);
end
miss = 0;
numtest=size(testimages,3);
missimages = zeros(1,numtest);
misslabels = zeros(1,numtest);
for im=1:numtest
for fm=1:fn
A1(:,:,fm) = convn(nitest(:,:,im),W1(end:-1:1,end:-1:1,fm),'valid') + B1(fm);
end
Z1 = tanh(A1);
% Layer 2: average/subsample with scaling and bias
for fm=1:fn
I2(:,:,fm) = avgpool(Z1(:,:,fm));
A2(:,:,fm) = I2(:,:,fm) * S2(fm) + B2(fm);
end
Z2 = tanh(A2);
% Layer 3: fully connected
for cl=1:10
A3(cl) = convn(Z2,W3(end:-1:1,end:-1:1,end:-1:1,cl),'valid') + B3(cl);
end
Z3 = tanh(A3); % Final output
[pm,pl] = max(Z3);
if pl ~= testlabels(im)+1
miss = miss + 1;
missimages(miss) = im;
misslabels(miss) = pl - 1;
end
end
disp(['Miss: ' num2str(miss) ' out of ' num2str(numtest)]);
end
function [pr] = avgpool(img)
pr = zeros(size(img)/2);
for r=1:2:size(img,1)
for c=1:2:size(img,2)
pr((r+1)/2,(c+1)/2) = (img(r,c)+img(r+1,c)+img(r,c+1)+img(r+1,c+1))/4;
end
end
end
Code for showing accuracy
function [] = showmiss(missim,misslab,testimages,testlabels,numshow,numpages)
nummiss = nnz(missim);
page = 1;
showsize = floor(sqrt(numshow));
for f=1:numshow:nummiss
figure(floor(f/numshow) + 1);
for m=f:min(nummiss,f+numshow-1)
subplot(showsize,showsize,m-f+1);
imshow(testimages(:,:,missim(m)));
title(strcat(num2str(testlabels(missim(m))), ':', num2str(misslab(m))));
end
page = page + 1;
if page > numpages
break;
end
end
end
Function toint
function [x] = toint(b)
x = b(1)*16777216 + b(2)*65536 + b(3)*256 + b(4);
end

Plot equally spaced markers along a spiral

I want to move a red star marker along the spiral trajectory with an equal distance of 5 units between the red star points on its circumference like in the below image.
vertspacing = 10;
horzspacing = 10;
thetamax = 10*pi;
% Calculation of (x,y) - underlying archimedean spiral.
b = vertspacing/2/pi;
theta = 0:0.01:thetamax;
x = b*theta.*cos(theta)+50;
y = b*theta.*sin(theta)+50;
% Calculation of equidistant (xi,yi) points on spiral.
smax = 0.5*b*thetamax.*thetamax;
s = 0:horzspacing:smax;
thetai = sqrt(2*s/b);
xi = b*thetai.*cos(thetai);
yi = b*thetai.*sin(thetai);
plot(x,y,'b-');
hold on
I want to get a figure that looks like the following:
This is my code for the circle trajectory:
% Initialization steps.
format long g;
format compact;
fontSize = 20;
r1 = 50;
r2 = 35;
r3= 20;
xc = 50;
yc = 50;
% Since arclength = radius * (angle in radians),
% (angle in radians) = arclength / radius = 5 / radius.
deltaAngle1 = 5 / r1;
deltaAngle2 = 5 / r2;
deltaAngle3 = 5 / r3;
theta1 = 0 : deltaAngle1 : (2 * pi);
theta2 = 0 : deltaAngle2 : (2 * pi);
theta3 = 0 : deltaAngle3 : (2 * pi);
x1 = r1*cos(theta1) + xc;
y1 = r1*sin(theta1) + yc;
x2 = r2*cos(theta2) + xc;
y2 = r2*sin(theta2) + yc;
x3 = r3*cos(theta3) + xc;
y3 = r3*sin(theta3) + yc;
plot(x1,y1,'color',[1 0.5 0])
hold on
plot(x2,y2,'color',[1 0.5 0])
hold on
plot(x3,y3,'color',[1 0.5 0])
hold on
% Connecting Line:
plot([70 100], [50 50],'color',[1 0.5 0])
% Set up figure properties:
% Enlarge figure to full screen.
set(gcf, 'Units', 'Normalized', 'OuterPosition', [0, 0, 1, 1]);
drawnow;
axis square;
for i = 1 : length(theta1)
plot(x1(i),y1(i),'r*')
pause(0.1)
end
for i = 1 : length(theta2)
plot(x2(i),y2(i),'r*')
pause(0.1)
end
for i = 1 : length(theta3)
plot(x3(i),y3(i),'r*')
pause(0.1)
end
I can't think of a way to compute distance along a spiral, so I'm approximating it with circles, in hopes that it will still be useful.
My solution relies on the InterX function from FEX, to find the intersection of circles with the spiral. I am providing an animation so it is easier to understand.
The code (tested on R2017a):
function [x,y,xi,yi] = q44916610(doPlot)
%% Input handling:
if nargin < 1 || isempty(doPlot)
doPlot = false;
end
%% Initialization:
origin = [50,50];
vertspacing = 10;
thetamax = 5*(2*pi);
%% Calculation of (x,y) - underlying archimedean spiral.
b = vertspacing/(2*pi);
theta = 0:0.01:thetamax;
x = b*theta.*cos(theta) + origin(1);
y = b*theta.*sin(theta) + origin(2);
%% Calculation of equidistant (xi,yi) points on spiral.
DST = 5; cRes = 360;
numPts = ceil(vertspacing*thetamax); % Preallocation
[xi,yi] = deal(NaN(numPts,1));
if doPlot && isHG2() % Plots are only enabled if the MATLAB version is new enough.
figure(); plot(x,y,'b-'); hold on; axis equal; grid on; grid minor;
hAx = gca; hAx.XLim = [-5 105]; hAx.YLim = [-5 105];
hP = plot(xi,yi,'r*');
else
hP = struct('XData',xi,'YData',yi);
end
hP.XData(1) = origin(1); hP.YData(1) = origin(2);
for ind = 2:numPts
P = InterX([x;y], makeCircle([hP.XData(ind-1),hP.YData(ind-1)],DST/2,cRes));
[~,I] = max(abs(P(1,:)-origin(1)+1i*(P(2,:)-origin(2))));
if doPlot, pause(0.1); end
hP.XData(ind) = P(1,I); hP.YData(ind) = P(2,I);
if doPlot, pause(0.1); delete(hAx.Children(1)); end
end
xi = hP.XData(~isnan(hP.XData)); yi = hP.YData(~isnan(hP.YData));
%% Nested function(s):
function [XY] = makeCircle(cnt, R, nPts)
P = (cnt(1)+1i*cnt(2))+R*exp(linspace(0,1,nPts)*pi*2i);
if doPlot, plot(P,'Color',lines(1)); end
XY = [real(P); imag(P)];
end
end
%% Local function(s):
function tf = isHG2()
try
tf = ~verLessThan('MATLAB', '8.4');
catch
tf = false;
end
end
function P = InterX(L1,varargin)
% DOCUMENTATION REMOVED. For a full version go to:
% https://www.mathworks.com/matlabcentral/fileexchange/22441-curve-intersections
narginchk(1,2);
if nargin == 1
L2 = L1; hF = #lt; %...Avoid the inclusion of common points
else
L2 = varargin{1}; hF = #le;
end
%...Preliminary stuff
x1 = L1(1,:)'; x2 = L2(1,:);
y1 = L1(2,:)'; y2 = L2(2,:);
dx1 = diff(x1); dy1 = diff(y1);
dx2 = diff(x2); dy2 = diff(y2);
%...Determine 'signed distances'
S1 = dx1.*y1(1:end-1) - dy1.*x1(1:end-1);
S2 = dx2.*y2(1:end-1) - dy2.*x2(1:end-1);
C1 = feval(hF,D(bsxfun(#times,dx1,y2)-bsxfun(#times,dy1,x2),S1),0);
C2 = feval(hF,D((bsxfun(#times,y1,dx2)-bsxfun(#times,x1,dy2))',S2'),0)';
%...Obtain the segments where an intersection is expected
[i,j] = find(C1 & C2);
if isempty(i), P = zeros(2,0); return; end
%...Transpose and prepare for output
i=i'; dx2=dx2'; dy2=dy2'; S2 = S2';
L = dy2(j).*dx1(i) - dy1(i).*dx2(j);
i = i(L~=0); j=j(L~=0); L=L(L~=0); %...Avoid divisions by 0
%...Solve system of eqs to get the common points
P = unique([dx2(j).*S1(i) - dx1(i).*S2(j), ...
dy2(j).*S1(i) - dy1(i).*S2(j)]./[L L],'rows')';
function u = D(x,y)
u = bsxfun(#minus,x(:,1:end-1),y).*bsxfun(#minus,x(:,2:end),y);
end
end
Result:
Note that in the animation above, the diameter of the circle (and hence the distance between the red points) is 10 and not 5.

Output of k3_1 is capped at -3.1445e+24

I'm solving a system of ODEs using RK4. I'm generating a straight line plot that seems to be due to the fact that k3_1 is capped at -3.1445e+24. I don't understand why it is capped.
function RK4system_MNModel()
parsec = 3.08*10^18;
r_1 = 8.5*1000.0*parsec; % in cm
z_1 = 0.0; % in cm also
theta_1 = 0.0;
grav = 6.6720*10^-8;
amsun = 1.989*10^33; % in grams
amg = 1.5d11*amsun; % in grams
gm = grav*amg; % constant
q = 0.9; % axial ratio
u_1 = 130.0; % in cm/sec
w_1 = 95*10^4.0; % in cm/sec
v = 180*10^4.0; % in cm/sec
vcirc = sqrt(gm/r_1); % circular speed (constant)
nsteps = 50000;
deltat = 5.0*10^11; % in seconds
angmom = r_1*v; % these are the same
angmom2 = angmom^2.0;
e = -gm/r_1+u_1*u_1/2.0+angmom2/(2.0*r_1*r_1);
time=0.0;
for i=1:nsteps
k3_1 = deltat*u_1 %%%%% THIS LINE
k4_1 = deltat*(-gm*r_1/((r_1^2.0+(1+sqrt(1+z_1^2.0))^2.0)^1.5) + angmom2/(r_1^3.0)); % u'=-dphi_dr+lz^2/(r^3.0) with lz=vi*ri this gives deltau
k5_1 = deltat*(angmom/(r_1^2.0)); % theta'=lz/r^2 this gives deltatheta
k6_1 = deltat*w_1;
k7_1 = deltat*(-gm*z_1*(1+sqrt(1+z_1^2.0))/(sqrt(1+z_1^2.0)*(r_1^2.0+(1+sqrt(1+z_1^2.0))^2.0)^1.5));
r_2 = r_1+k3_1/2.0;
u_2 = u_1+k4_1/2.0;
theta_2 = theta_1+k5_1/2.0;
z_2 = z_1 + k6_1/2.0;
w_2 = w_1 + k7_1/2.0;
k3_2 = deltat*u_2;
k4_2 = deltat*(-gm*r_2/((r_2^2.0+(1+sqrt(1+z_2^2.0))^2.0)^1.5)+angmom2/(r_2^3.0));
k5_2 = deltat*(angmom/(r_2^2.0)); % theta'=lz/r^2 =====> deltatheta
k6_2 = deltat*w_2;
k7_2 = deltat*(-gm*z_2*(1+sqrt(1+z_2^2.0))/(sqrt(1+z_2^2.0)*(r_2^2.0+(1+sqrt(1+z_2^2.0))^2.0)^1.5));
r_3 = r_1+k3_2/2.0;
u_3 = u_1+k4_2/2.0;
theta_3 = theta_1+k5_2/2.0;
z_3 = z_1 + k6_2/2.0;
w_3 = w_1 + k7_2/2.0;
k3_3 = deltat*u_3; % r'=u
k4_3 = deltat*(-gm*r_3/((r_3^2.0+(1+sqrt(1+z_3^2.0))^2.0)^1.5)+angmom2/(r_3^3.0));% u'=-dphi_dr+lz^2/(r^3.0)
k5_3 = deltat*(angmom/(r_3^2.0)); % theta'=lz/r^2
k6_3 = deltat*w_3;
k7_3 = deltat*(-gm*z_3*(1+sqrt(1+z_3^2.0))/(sqrt(1+z_3^2.0)*(r_3^2.0+(1+sqrt(1+z_3^2.0))^2.0)^1.5));
r_4 = r_1+k3_2;
u_4 = u_1+k4_2;
theta_4 = theta_1+k5_2;
z_4 = z_1 + k6_2;
w_4 = w_1 + k7_2;
k3_4 = deltat*u_4; % r'=u
k4_4 = deltat*(-gm*r_4/((r_4^2.0+(1+sqrt(1+z_4^2.0))^2.0)^1.5)+angmom2/(r_4^3.0)); % u'=-dphi_dr+lz^2/(r^3.0)
k5_4 = deltat*(angmom/(r_4^2.0)); % theta'=lz/r^2
k6_4 = deltat*w_4;
k7_4 = deltat*(-gm*z_4*(1+sqrt(1+z_4^2.0))/(sqrt(1+z_4^2.0)*(r_4^2.0+(1+sqrt(1+z_4^2.0))^2.0)^1.5));
r_1 = r_1+(k3_1+2.0*k3_2+2.0*k3_3+k3_4)/6.0; % New value of R for next step
u_1 = u_1+(k4_1+2.0*k4_2+2.0*k4_3+k4_4)/6.0; % New value of U for next step
theta_1 = theta_1+(k5_1+2.0*k5_2+2.0*k5_3+k5_4)/6.0; % New value of theta
z_1 = z_1+(k6_1+2.0*k6_2+2.0*k6_3+k6_4)/6.0;
w_1 = w_1+(k7_1+2.0*k7_2+2.0*k7_3+k7_4)/6.0;
e = -gm/r_1+u_1*u_1/2.0+angmom2/(2.0*r_1*r_1); % energy
ecc = (1.0+(2.0*e*angmom2)/(gm^2.0))^0.5; % eccentricity
x(i) = r_1*cos(theta_1)/(1000.0*parsec); % X for plotting orbit
y(i) = r_1*sin(theta_1)/(1000.0*parsec); % Y for plotting orbit
time = time+deltat;
r(i) = r_1;
z(i) = z_1;
time1(i)= time;
end
Note that the anomally occurs on the indicated line.
It's not k3_1 that's capped, it's the calculation of u_1 that returns a value of -3.1445e+24 / deltat (deltat is constant).
u_1 is calculated in the line:
u_1 = u_1+(k4_1+2.0*k4_2+2.0*k4_3+k4_4)/6.0;
After the first iteration, this returns:
u_1(1) = 6.500e+13 % Hard coded before the loop
u_1(2) = -1.432966614767040e+04 % Calculated using the equation above
u_1(3) = -2.878934017859105e+04 % Calculated using the equation above
u_1(4) = -4.324903004768405e+04
Based on the equation u_1(n+1) = u_1(n) + du it looks like du represents a relatively small difference. The difference between the two first values is very large, so I'm assuming it is this calculation that's incorrect.
If you find that that calculation is correct, then your error is in one of these lines:
k4_1 = deltat*(-gm*r_1/((r_1^2.0+(1+sqrt(1+z_1^2.0))^2.0)^1.5)+angmom2/(r_1^3.0)); % u'=-dphi_dr+lz^2/(r^3.0) with lz=vi*ri this gives delta
k4_2 = deltat*(-gm*r_2/((r_2^2.0+(1+sqrt(1+z_2^2.0))^2.0)^1.5)+angmom2/(r_2^3.0));
k4_3 = deltat*(-gm*r_3/((r_3^2.0+(1+sqrt(1+z_3^2.0))^2.0)^1.5)+angmom2/(r_3^3.0));% u'=-dphi_dr+lz^2/(r^3.0)
k4_4 = deltat*(-gm*r_4/((r_4^2.0+(1+sqrt(1+z_4^2.0))^2.0)^1.5)+angmom2/(r_4^3.0)); % u'=-dphi_dr+lz^2/(r^3.0)