I am calculating sort of a histogram based on the distance between a pair of points in 3d space:
numBins = 20;
binWidth = 2.5;
pop = zeros(1,numBins);
parfor j=1:particles
r1 = coords(j,:);
for k=j+1:particles
r2 = coords(k,:);
d = norm(r1-r2);
ind = ceil(d/binWidth);
pop(ind) = pop(ind) + 1;
end
end
This, expectedly, results in
Error: The variable pop in a parfor cannot be classified.
I understand the problem, but I am confused as to how can I solve it.
In principle, what could be done is to have n copies of pop = zeroes(1,numBins) be sent to each of n workers, and joined by adding each copy together at the end of computation. How can I do this here? Or is there another, more standard way of solving the problem?
There is two things that don't work in your code:
1) for k = j+1:particles
In a parfor a nested loop should have fixed bound.
2) pop(ind)
Matlab is afraid that the for-loop order matters and display an error message. Even if, in your specific case, the order doesn't matters (But matlab is not smart enough to know that).
The solution, Linearization:
%Dummy data
numBins = 20;
binWidth = 2.5;
particles = 10;
coords = rand(10,2)*40;
%Initialization
pop = zeros(1,numBins);
parfor j=1:particles
r1 = coords(j,:)
r2 = coords((j+1):end,:)
d = sqrt(sum([r1-r2].^2,2)) % compute each norm at the same time !
pop = pop + histcounts(ceil(d/binWidth),0:numBins)
end
You can create a function that computes the inner loop and use a handle to it in the parfor (I didn't tested it but I think it should work according to the documentation):
function pop = hist_comp(pop,j,particles,coords,binWidth)
r1 = coords(j,:);
for k=j+1:particles
r2 = coords(k,:);
d = norm(r1-r2);
ind = ceil(d/binWidth);
pop(ind) = pop(ind) + 1;
end
end
numBins = 20;
binWidth = 2.5;
particles = 10;
coords = rand(10,2)*5;
pop = zeros(1,numBins);
f = #(pop,j) hist_comp(pop,j,particles,coords,binWidth);
parfor j=1:particles
pop = f(pop,j);
end
Related
I am currently looking for the most efficient way to shift and rearrange large matrices. Essentially, I have data with some parabolic shift that needs to be corrected in order to shift the "signal" to a linear event.
I have currently tried the following solutions and tried timing them. Is there any other method that may prove to be more efficient?
DATA = ones(100000,501);
DATA(10000,251) = 100;
for i=1:250
DATA(10000+i^2-1000:10000+i^2+1000,251-i) = 100;
DATA(10000+i^2-1000:10000+i^2+1000,251+i) = 100;
end
k = abs(-250:1:250).^2;
d = size(DATA,1);
figure(99)
imagesc(DATA)
t_INDEX = timeit(#()fun_INDEX(DATA,k))
t_SNIPPET = timeit(#()fun_SNIPPET(DATA,k))
t_CIRCSHIFT = timeit(#()fun_CIRCSHIFT(DATA,k))
t_INDEX_clean = timeit(#()fun_INDEX_clean(DATA,k))
t_SPARSE = timeit(#()fun_SPARSE(DATA,k))
t_BSXFUN = timeit(#()fun_BSXFUN(DATA,k))
function fun_INDEX(DATA,k)
DATA_1 = zeros(size(DATA));
for i=1:size(DATA,2)
DATA_1(:,i) = DATA([k(i)+1:end 1:k(i)],i);
end
figure(1)
imagesc(DATA_1)
end
function fun_SNIPPET(DATA,k)
kmax = max(k);
DATA_2 = zeros(size(DATA,1)-kmax,size(DATA,2));
for i=1:size(DATA,2)
DATA_2(:,i) = DATA(k(i)+1:end-kmax+k(i),i);
end
figure(2)
imagesc(DATA_2)
end
function fun_CIRCSHIFT(DATA,k)
DATA_3 = zeros(size(DATA));
for i=1:size(DATA,2)
DATA_3(:,i) = circshift(DATA(:,i),-k(i),1);
end
figure(3)
imagesc(DATA_3)
end
function fun_INDEX_clean(DATA,k)
[m, n] = size(DATA);
k = size(DATA,1)-k;
DATA_4 = zeros(m, n);
for i = (1 : n)
DATA_4(:, i) = [DATA((m - k(i) + 1 : m), i); DATA((1 : m - k(i) ), i)];
end
figure(4)
imagesc(DATA_4)
end
function fun_SPARSE(DATA,k)
[m,n] = size(DATA);
k = -k;
S = full(sparse(mod(k,m)+1,1:n,1,m,n));
DATA_5 = ifft(fft(DATA).*fft(S),'symmetric');
figure(5)
imagesc(DATA_5)
end
function fun_BSXFUN(DATA,k)
DATA = DATA';
k = -k;
[m,n] = size(DATA);
idx0 = mod(bsxfun(#plus,n-k(:),1:n)-1,n);
DATA_6 = DATA(bsxfun(#plus,(idx0*m),(1:m)'));
figure(6)
imagesc(DATA_6)
end
Is there any way to decrease computation time for this kind of problem?
Thanks in advance for any tips!
One option would be to use MATLAB's GPU functions, if your workstation has a GPU. Depending on if the entire data fits on the GPU at once, it will start to outperform CPU circshift at 1000 X 1000 matrix size.
The implementation only requires you to copy your data to the GPU with a single statement, and then operate circshift on the newly created you array.
A small discussion on its performance can be found here: https://www.mathworks.com/matlabcentral/answers/274619-circshift-slower-on-gpu . Especially, the last post describes a much faster GPU implementation if you actually don't need to circularly shift, but can get away with zero passing on one side, which might be relevant.
I have originally written the following Matlab code to find intersection between a set of Axes Aligned Bounding Boxes (AABB) and space partitions (here 8 partitions). I believe it is readable by itself, moreover, I have added some comments for even more clarity.
function [A,B] = AABBPart(bbx,it) % bbx: aabb, it: iteration
global F
IT = it+1;
n = size(bbx,1);
F = cell(n,it);
A = Part([min(bbx(:,1:3)),max(bbx(:,4:6))],it,0); % recursive partitioning
B = F; % matlab does not allow
function s = Part(bx,it,J) % output to be global
s = {};
if it < 1; return; end
s = cell(8,1);
p = bx(1:3);
q = bx(4:6);
h = 0.5*(p+q);
prt = [p,h;... % 8 sub-parts (octa)
h(1),p(2:3),q(1),h(2:3);...
p(1),h(2),p(3),h(1),q(2),h(3);...
h(1:2),p(3),q(1:2),h(3);...
p(1:2),h(1),h(1:2),q(3);...
h(1),p(2),h(3),q(1),h(2),q(3);...
p(1),h(2:3),h(1),q(2:3);...
h,q];
for j=1:8 % check for each sub-part
k = 0;
t = zeros(0,1);
for i=1:n
if all(bbx(i,1:3) <= prt(j,4:6)) && ... % interscetion test for
all(prt(j,1:3) <= bbx(i,4:6)) % every aabb and sub-parts
k = k+1;
t(k) = i;
end
end
if ~isempty(t)
s{j,1} = [t; Part(prt(j,:),it-1,j)]; % recursive call
for i=1:numel(t) % collecting the results
if isempty(F{t(i),IT-it})
F{t(i),IT-it} = [-J,j];
else
F{t(i),IT-it} = [F{t(i),IT-it}; [-J,j]];
end
end
end
end
end
end
Concerns:
In my tests, it seems that probably few intersections are missing, say, 10 or so for 1000 or more setup. So I would be glad if you could help to find out any problematic parts in the code.
I am also concerned about using global F. I prefer to get rid of it.
Any other better solution in terms of speed, will be loved.
Note that the code is complete. And you can easily try it by some following setup.
n = 10000; % in the original application, n would be millions
bbx = rand(n,6);
it = 3;
[A,B] = AABBPart(bbx,it);
I am writing a graphical representation of numerical stability of differential operators and I am having trouble removing a nested for loop. The code loops through all entries in the X,Y, plane and calculates the stability value for each point. This is done by finding the roots of a polynomial of a size dependent on an input variable (length of input vector results in a polynomial 3d matrix of size(m,n,(lenght of input vector)). The main nested for loop is as follows.
for m = 1:length(z2)
for n = 1:length(z1)
pointpoly(1,:) = p(m,n,:);
r = roots(pointpoly);
if isempty(r),r=1e10;end
z(m,n) = max(abs(r));
end
end
The full code of an example numerical method (Trapezoidal Rule) is as follows. Any and all help is appreciated.
alpha = [-1 1];
beta = [.5 .5];
Wind = 2;
Wsize = 500;
if numel(Wind) == 1
Wind(4) = Wind(1);
Wind(3) = -Wind(1);
Wind(2) = Wind(4);
Wind(1) = Wind(3);
end
if numel(Wsize) == 1
Wsize(2) = Wsize;
end
z1 = linspace(Wind(1),Wind(2),Wsize(1));
z2 = linspace(Wind(3),Wind(4),Wsize(2));
[Z1,Z2] = meshgrid(z1,z2);
z = Z1+1i*Z2;
p = zeros(Wsize(2),Wsize(1),length(alpha));
for n = length(alpha):-1:1
p(:,:,(length(alpha)-n+1)) = alpha(n)-z*beta(n);
end
for m = 1:length(z2)
for n = 1:length(z1)
pointpoly(1,:) = p(m,n,:);
r = roots(pointpoly);
if isempty(r),r=1e10;end
z(m,n) = max(abs(r));
end
end
figure()
surf(Z1,Z2,z,'EdgeColor','None');
caxis([0 2])
cmap = jet(255);
cmap((127:129),:) = 0;
colormap(cmap)
view(2);
title(['Alpha Values (',num2str(alpha),') Beta Values (',num2str(beta),')'])
EDIT::
I was able to remove one of the for loops using the reshape command. So;
for m = 1:length(z2)
for n = 1:length(z1)
pointpoly(1,:) = p(m,n,:);
r = roots(pointpoly);
if isempty(r),r=1e10;end
z(m,n) = max(abs(r));
end
end
has now become
gg = reshape(p,[numel(p)/length(alpha) length(alpha)]);
r = zeros(numel(p)/length(alpha),1);
for n = 1:numel(p)/length(alpha)
temp = roots(gg(n,:));
if isempty(temp),temp = 0;end
r(n,1) = max(abs(temp));
end
z = reshape(r,[Wsize(2),Wsize(1)]);
This might be one for loop, but I am still going through the same number of elements. Is there a way to use the roots command on all of my rows at the same time?
This is a follow-up question to How to append an element to an array in MATLAB? That question addressed how to append an element to an array. Two approaches are discussed there:
A = [A elem] % for a row array
A = [A; elem] % for a column array
and
A(end+1) = elem;
The second approach has the obvious advantage of being compatible with both row and column arrays.
However, this question is: which of the two approaches is fastest? My intuition tells me that the second one is, but I'd like some evidence for or against that. Any idea?
The second approach (A(end+1) = elem) is faster
According to the benchmarks below (run with the timeit benchmarking function from File Exchange), the second approach (A(end+1) = elem) is faster and should therefore be preferred.
Interestingly, though, the performance gap between the two approaches is much narrower in older versions of MATLAB than it is in more recent versions.
R2008a
R2013a
Benchmark code
function benchmark
n = logspace(2, 5, 40);
% n = logspace(2, 4, 40);
tf = zeros(size(n));
tg = tf;
for k = 1 : numel(n)
x = rand(round(n(k)), 1);
f = #() append(x);
tf(k) = timeit(f);
g = #() addtoend(x);
tg(k) = timeit(g);
end
figure
hold on
plot(n, tf, 'bo')
plot(n, tg, 'ro')
hold off
xlabel('input size')
ylabel('time (s)')
leg = legend('y = [y, x(k)]', 'y(end + 1) = x(k)');
set(leg, 'Location', 'NorthWest');
end
% Approach 1: y = [y, x(k)];
function y = append(x)
y = [];
for k = 1 : numel(x);
y = [y, x(k)];
end
end
% Approach 2: y(end + 1) = x(k);
function y = addtoend(x)
y = [];
for k = 1 : numel(x);
y(end + 1) = x(k);
end
end
How about this?
function somescript
RStime = timeit(#RowSlow)
CStime = timeit(#ColSlow)
RFtime = timeit(#RowFast)
CFtime = timeit(#ColFast)
function RowSlow
rng(1)
A = zeros(1,2);
for i = 1:1e5
A = [A rand(1,1)];
end
end
function ColSlow
rng(1)
A = zeros(2,1);
for i = 1:1e5
A = [A; rand(1,1)];
end
end
function RowFast
rng(1)
A = zeros(1,2);
for i = 1:1e5
A(end+1) = rand(1,1);
end
end
function ColFast
rng(1)
A = zeros(2,1);
for i = 1:1e5
A(end+1) = rand(1,1);
end
end
end
For my machine, this yields the following timings:
RStime =
30.4064
CStime =
29.1075
RFtime =
0.3318
CFtime =
0.3351
The orientation of the vector does not seem to matter that much, but the second approach is about a factor 100 faster on my machine.
In addition to the fast growing method pointing out above (i.e., A(k+1)), you can also get a speed increase from increasing the array size by some multiple, so that allocations become less as the size increases.
On my laptop using R2014b, a conditional doubling of size results in about a factor of 6 speed increase:
>> SO
GATime =
0.0288
DWNTime =
0.0048
In a real application, the size of A would needed to be limited to the needed size or the unfilled results filtered out in some way.
The Code for the SO function is below. I note that I switched to cos(k) since, for some unknown reason, there is a large difference in performance between rand() and rand(1,1) on my machine. But I don't think this affects the outcome too much.
function [] = SO()
GATime = timeit(#GrowAlways)
DWNTime = timeit(#DoubleWhenNeeded)
end
function [] = DoubleWhenNeeded()
A = 0;
sizeA = 1;
for k = 1:1E5
if ((k+1) > sizeA)
A(2*sizeA) = 0;
sizeA = 2*sizeA;
end
A(k+1) = cos(k);
end
end
function [] = GrowAlways()
A = 0;
for k = 1:1E5
A(k+1) = cos(k);
end
end
I am currently trying to run a script that calls a particular function, but want to call the function inside a loop that halfs one of the input variables for roughly 4 iterations.
in the code below the function has been replaced for another for loop and the inputs stated above.
the for loop is running an Euler method on the function, and works fine, its just trying to run it with the repeated smaller step size im having trouble with.
any help is welcomed.
f = '3*exp(-x)-0.4*y';
xa = 0;
xb = 3;
ya = 5;
n = 2;
h=(xb-xa)/n;
x = xa:h:xb;
% h = zeros(1,4);
y = zeros(1,length(x));
F = inline(f);
y(1) = ya;
for j = 1:4
hOld = h;
hNew = hOld*0.5;
hOld = subs(y(1),'h',hNew);
for i = 1:(length(x)-1)
k1 = F(x(i),y(i));
y(i+1,j+1) = y(i) + h*k1;
end
end
disp(h)
after your comment, something like this
for j = 1:4
h=h/2;
x = xa:h:xb;
y = zeros(1,length(x));
y(1) = ya;
for i = 1:(length(x)-1)
k1 = F(x(i),y(i));
y(i+1,j+1) = y(i) + h*k1;
end
end