Matlab calling C function using mex - matlab

First of all, I never tried to call C code in a Matlab program - so it might just be a stupid mistake which I just can't figure out.
The C function is the following one and can be found on here, it is called durlevML.c and is part of the ARFIMA(p,d,q) estimator suite:
#include "mex.h"
#include "matrix.h"
#define square(p) ((p)*(p))
#define inv(q) (1/(q))
/* Durbin-Levinson algorithm for linear stationary AR(FI)MA(p,d,q) processes
Slightly altered for the maximum likelihood estimation
(C) György Inzelt 2011 */
void levinson_recur1(double* v,double* L, int N, double* gammas,int step)
{
int i,k;
if(step==0)
{
*(v + step) = *(gammas + step);
*(L + step) = 1;
for(k = step+1;k < N;++k)
{
*(L + k) = 0;
}
}
else if(step > 0 && step < N)
{
//phi_tt
*(L + step*N ) = (-1.00)* *(gammas + step);
if(step > 1)
{
for(i = 1;i < step ;++i)
{
*(L + step*N) -= *(L + (step-1)*N + (step -1) - i ) * *(gammas + step - i) ;
}
}
*(L +step*N) *= inv( *(v + step-1) );
//v_t
*(v + step) = *(v + step-1)*(1- square( *(L + step*N) ));
//phi_tj
for(i =1; i < step; ++i)
{
*(L + step*N + step - i) = *(L + (step-1)*N + (step -1) - i) + *(L + step*N ) * *(L + (step-1)*N + i -1 ) ;
}
//filling L with zeros and ones
*(L + step*N + step ) = 1;
if(step != N-1)
{
for(k = step*N +step+1 ;k < step*N + N ;++k)
{
*(L + k) =0;
}
}
}
if(step < N-1)
levinson_recur1(v,L,N,gammas,++step);
}
/* The gateway function */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
int step=0;
int N;
double *gammas,*v,*L;
// getting the autocovariances
gammas = mxGetPr(prhs[0]);
N = mxGetM(prhs[0]);
// v
plhs[0] = mxCreateDoubleMatrix(0,0,mxREAL);
mxSetM(plhs[0],N);
mxSetN(plhs[0],1);
mxSetData(plhs[0], mxMalloc(sizeof(double)*N*1));
// L
plhs[1] = mxCreateDoubleMatrix(0,0,mxREAL);
mxSetM(plhs[1],square(N));
mxSetN(plhs[1],1);
mxSetData(plhs[1], mxMalloc(sizeof(double)*square(N)*1));
//
v = mxGetPr(plhs[0]);
L = mxGetPr(plhs[1]);
//
levinson_recur1(v, L, N,gammas,step);
//
return;
}
I have two problems now, the first one I already solved:
I got the following warning
Warning: You are using gcc version "4.6.3-1ubuntu5)". The version
currently supported with MEX is "4.4.6".
For a list of currently supported compilers see:
http://www.mathworks.com/support/compilers/current_release/
mex: durlevML.c not a normal file or does not exist.
but following the solution by changing the corresponding entries and installing the gcc-4.4 the warning disappeared, but the second actual problem still persists, although I don't get any warning any longer I still get the error
mex: durlevML.c not a normal file or does not exist.
What can be the possible reason for this?
I am using
ubuntu 12.04
Matlab 2012b
gcc-4.6 but was able to bring Matlab to use gcc-4.4
The problem occurred while running the function arfima_test.m which calls the durlevML.c function, the relevant (I guess) part of this is
% compiling the C/MEX file
c = input('Would you like to compile the MEX source? (1/0)');
switch c
case(1)
mex durlevML.c
case(0)
end

My thanks go to Ander Biguri due to his the comment!
The solution was indeed very easy, the problem was that the C file durlevML.c was not properly linked, so the compiler couldn't find anything. So I had to add some information in arfima_test.m in the following way
mex durlevML.c <---------------- needs to be linked /home/....
The C code itself was alright with one smaller issue. My compiler - I used gcc-4.4 due to Matlab incompatibilities with newer ones - didn't recognize the comments // and always produced errors, so I had to change them to the long comment format /**/ which finally worked.
Besides this everything worked, as far as I can tell!

Related

No Symbols Loaded: libmex.pdb not loaded (throw_segv_longjmp_seh_filter() = EXCEPTION_CONTINUE_SEARCH : C++ exception)

In order to create a MEX function and use it in my MATLAB code, like this:
[pow,index] = mx_minimum_power(A11,A12,A13,A22,A23,A33);
I've created the file mx_minimum_power.cpp and written the following code in it:
#include <math.h>
#include <complex>
#include "mex.h"
#include "matrix.h"
#include "cvm.h"
#include "blas.h"
#include "cfun.h"
using std::complex;
using namespace cvm;
/* The gateway function */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
const int arraysize = 62172;
const int matrixDimention = 3;
float *inMatrixA11 = (float *)mxGetPr(prhs[0]);
complex<float> *inMatrixA12 = (complex<float> *)mxGetPr(prhs[1]);
complex<float> *inMatrixA13 = (complex<float> *)mxGetPr(prhs[2]);
float *inMatrixA22 = (float *)mxGetPr(prhs[3]);
complex<float> *inMatrixA23 = (complex<float> *)mxGetPr(prhs[4]);
float *inMatrixA33 = (float *)mxGetPr(prhs[5]);
basic_schmatrix< float, complex<float> > A(matrixDimention);
int i = 0;
for (i = 0; i < arraysize; i++)
{
A.set(1, 1, inMatrixA11[i]);
A.set(1, 2, inMatrixA12[i]);
A.set(1, 3, inMatrixA13[i]);
A.set(2, 2, inMatrixA22[i]);
A.set(2, 3, inMatrixA23[i]);
A.set(3, 3, inMatrixA33[i]);
}
}
And then in order to be able to debug the code, I've created the mx_minimum_power.pdb and mx_minimum_power.mexw64 files, using the following code in the Matlab Command Window:
mex -g mx_minimum_power.cpp cvm_em64t_debug.lib
The files blas.h, cfun.h, cvm.h and cvm_em64t_debug.lib are in the same directory as mx_minimum_power.cpp.
They are the headers and library files of the CVM Class Library.
Then I've attached MATLAB.exe to Visual Studio 2013, using the way explained here.
and have set a breakpoint at line40:
When I run my MATLAB code, there's no error until the specified line.
But if I click on the Step Over button, I'll encounter the following message:
With the following information added to the Output:
First-chance exception at 0x000007FEFCAE9E5D in MATLAB.exe: Microsoft C++ exception: cvm::cvmexception at memory location 0x0000000004022570.
> throw_segv_longjmp_seh_filter()
throw_segv_longjmp_seh_filter(): C++ exception
< throw_segv_longjmp_seh_filter() = EXCEPTION_CONTINUE_SEARCH
Can you suggest me why libmex.pdb is needed at that line and how should I solve the issue?
If I stop debugging, I'll get the following information in MATLAB Command Window:
Unexpected Standard exception from MEX file.
What() is:First index value 0 is out of [1,4) range
Right before pressing the step over button, we have the following values for A11[0],A12[0],A13[0],A22[0],A23[0],A33[0]:
that are just right as my expectations, according to what MATLAB passes to the MEX function:
Maybe the problem is because of wrong allocation for matrix A, it is as follows just before pressing the step over button.
The problem occurs because we have the following code in lines 48 to 53 of the cvm.h file:
// 5.7 0-based indexing
#if defined (CVM_ZERO_BASED)
# define CVM0 TINT_ZERO //!< Index base, 1 by default or 0 when \c CVM_ZERO_BASED is defined
#else
# define CVM0 TINT_ONE //!< Index base, 1 by default or 0 when \c CVM_ZERO_BASED is defined
#endif
That makes CVM0 = 1 by default. So if we press the Step Into(F11) button at the specified line two times, we will get into line 34883 of the file cvm.h:
basic_schmatrix& set(tint nRow, tint nCol, TC c) throw(cvmexception)
{
this->_set_at(nRow - CVM0, nCol - CVM0, c);
return *this;
}
Press Step Into(F11) at line this->_set_at(nRow - CVM0, nCol - CVM0, c); and you'll go to the definition of the function _set_at:
// sets both elements to keep matrix hermitian, checks ranges
// zero based
void _set_at(tint nRow, tint nCol, TC val) throw(cvmexception)
{
_check_lt_ge(CVM_OUTOFRANGE_LTGE1, nRow, CVM0, this->msize() + CVM0);
_check_lt_ge(CVM_OUTOFRANGE_LTGE2, nCol, CVM0, this->nsize() + CVM0);
if (nRow == nCol && _abs(val.imag()) > basic_cvmMachMin<TR>()) { // only reals on main diagonal
throw cvmexception(CVM_BREAKS_HERMITIANITY, "real number");
}
this->get()[this->ld() * nCol + nRow] = val;
if (nRow != nCol) {
this->get()[this->ld() * nRow + nCol] = _conjugate(val);
}
}
pressing Step Over(F10) button,you'll get the result:
so in order to get nRow=1 and nCol=1 and not nRow=0 and nCol=0, which is out of the range [1,4), you should write that line of code as:
A.set(2, 2, inMatrixA11[i]);

FFTW with MEX and MATLAB argument issues

I wrote the following C/MEX code using the FFTW library to control the number of threads used for a FFT computation from MATLAB. The code works great (complex FFT forward and backward) with the FFTW_ESTIMATE argument in the planner although it is slower than MATLAB. But, when I switch to the FFTW_MEASURE argument to tune up the FFTW planner, it turns out that applying one FFT forward and then one FFT backward does not return the initial image. Instead, the image is scaled by a factor. Using FFTW_PATIENT gives me an even worse result with null matrices.
My code is as follows:
Matlab functions:
FFT forward:
function Y = fftNmx(X,NumCPU)
if nargin < 2
NumCPU = maxNumCompThreads;
disp('Warning: Use the max maxNumCompThreads');
end
Y = FFTN_mx(X,NumCPU)./numel(X);
FFT backward:
function Y = ifftNmx(X,NumCPU)
if nargin < 2
NumCPU = maxNumCompThreads;
disp('Warning: Use the max maxNumCompThreads');
end
Y = iFFTN_mx(X,NumCPU);
Mex functions:
FFT forward:
# include <string.h>
# include <stdlib.h>
# include <stdio.h>
# include <mex.h>
# include <matrix.h>
# include <math.h>
# include </home/nicolas/Code/C/lib/include/fftw3.h>
char *Wisfile = NULL;
char *Wistemplate = "%s/.fftwis";
#define WISLEN 8
void set_wisfile(void)
{
char *home;
if (Wisfile) return;
home = getenv("HOME");
Wisfile = (char *)malloc(strlen(home) + WISLEN + 1);
sprintf(Wisfile, Wistemplate, home);
}
fftw_plan CreatePlan(int NumDims, int N[], double *XReal, double *XImag, double *YReal, double *YImag)
{
fftw_plan Plan;
fftw_iodim Dim[NumDims];
int k, NumEl;
FILE *wisdom;
for(k = 0, NumEl = 1; k < NumDims; k++)
{
Dim[NumDims - k - 1].n = N[k];
Dim[NumDims - k - 1].is = Dim[NumDims - k - 1].os = (k == 0) ? 1 : (N[k-1] * Dim[NumDims-k].is);
NumEl *= N[k];
}
/* Import the wisdom. */
set_wisfile();
wisdom = fopen(Wisfile, "r");
if (wisdom) {
fftw_import_wisdom_from_file(wisdom);
fclose(wisdom);
}
if(!(Plan = fftw_plan_guru_split_dft(NumDims, Dim, 0, NULL, XReal, XImag, YReal, YImag, FFTW_MEASURE *(or FFTW_ESTIMATE respectively)* )))
mexErrMsgTxt("FFTW3 failed to create plan.");
/* Save the wisdom. */
wisdom = fopen(Wisfile, "w");
if (wisdom) {
fftw_export_wisdom_to_file(wisdom);
fclose(wisdom);
}
return Plan;
}
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[] )
{
#define B_OUT plhs[0]
int k, numCPU, NumDims;
const mwSize *N;
double *pr, *pi, *pr2, *pi2;
static long MatLeng = 0;
fftw_iodim Dim[NumDims];
fftw_plan PlanForward;
int NumEl = 1;
int *N2;
if (nrhs != 2) {
mexErrMsgIdAndTxt( "MATLAB:FFT2mx:invalidNumInputs",
"Two input argument required.");
}
if (!mxIsDouble(prhs[0])) {
mexErrMsgIdAndTxt( "MATLAB:FFT2mx:invalidNumInputs",
"Array must be double");
}
numCPU = (int) mxGetScalar(prhs[1]);
if (numCPU > 8) {
mexErrMsgIdAndTxt( "MATLAB:FFT2mx:invalidNumInputs",
"NumOfThreads < 8 requested");
}
if (!mxIsComplex(prhs[0])) {
mexErrMsgIdAndTxt( "MATLAB:FFT2mx:invalidNumInputs",
"Array must be complex");
}
NumDims = mxGetNumberOfDimensions(prhs[0]);
N = mxGetDimensions(prhs[0]);
N2 = (int*) mxMalloc( sizeof(int) * NumDims);
for(k=0;k<NumDims;k++) {
NumEl *= NumEl * N[k];
N2[k] = N[k];
}
pr = (double *) mxGetPr(prhs[0]);
pi = (double *) mxGetPi(prhs[0]);
//B_OUT = mxCreateNumericArray(NumDims, N, mxDOUBLE_CLASS, mxCOMPLEX);
B_OUT = mxCreateNumericMatrix(0, 0, mxDOUBLE_CLASS, mxCOMPLEX);
mxSetDimensions(B_OUT , N, NumDims);
mxSetData(B_OUT , (double* ) mxMalloc( sizeof(double) * mxGetNumberOfElements(prhs[0]) ));
mxSetImagData(B_OUT , (double* ) mxMalloc( sizeof(double) * mxGetNumberOfElements(prhs[0]) ));
pr2 = (double* ) mxGetPr(B_OUT);
pi2 = (double* ) mxGetPi(B_OUT);
fftw_init_threads();
fftw_plan_with_nthreads(numCPU);
PlanForward = CreatePlan(NumDims, N2, pr, pi, pr2, pi2);
fftw_execute_split_dft(PlanForward, pr, pi, pr2, pi2);
fftw_destroy_plan(PlanForward);
fftw_cleanup_threads();
}
FFT backward:
This MEX function differs from the above only in switching pointers pr <-> pi, and pr2 <-> pi2 in the CreatePlan function and in the execution of the plan, as suggested in the FFTW documentation.
If I run
A = imread('cameraman.tif');
>> A = double(A) + i*double(A);
>> B = fftNmx(A,8);
>> C = ifftNmx(B,8);
>> figure,imagesc(real(C))
with the FFTW_MEASURE and FFTW_ESTIMATE arguments respectively I get this result.
I wonder if this is due to an error in my code or in the library. I tried different thing around the wisdom, saving not saving. Using the wisdom produce by the FFTW standalone tool to produce wisdom. I haven't seen any improvement. Can anyone suggest why this is happening?
Additional information:
I compile the MEX code using static libraries:
mex FFTN_Meas_mx.cpp /home/nicolas/Code/C/lib/lib/libfftw3.a /home/nicolas/Code/C/lib/lib/libfftw3_threads.a -lm
The FFTW library hasn't been compiled with:
./configure CFLAGS="-fPIC" --prefix=/home/nicolas/Code/C/lib --enable-sse2 --enable-threads --&& make && make install
I tried different flags without success. I am using MATLAB 2011b on a Linux 64-bit station (AMD opteron quad core).
FFTW computes not normalized transform, see here:
http://www.fftw.org/doc/What-FFTW-Really-Computes.html
Roughly speaking, when you perform direct transform followed by inverse one, you get
back the input (plus round-off errors) multiplied by the length of your data.
When you create a plan using flags other than FFTW_ESTIMATE, your input is overwritten:
http://www.fftw.org/doc/Planner-Flags.html

Passing GPUArray to feval

I have the following kernel
__global__ void func( float * arr, int N ) {
int rtid = blockDim.x * blockIdx.x + threadIdx.x;
if( rtid < N )
{
float* row = (float*)((char*)arr + rtid*N*sizeof(float) );
for (int c = 1; c < N; ++c)
{
//Manipulation
}
}
}
When I call the kernel from MATLAB using
gtm= parallel.gpu.GPUArray(ones(a,b,'double'));
OR gtm= parallel.gpu.GPUArray(ones(1,b,'double'));
gtm=k.feval(gtm,b);
it is giving the following error:
Error using ==> feval
parallel.gpu.GPUArray must match the exact input type as specified on the kernel
prototype.
Error in ==> sameInit at 65 gtm=k.feval(gtm,b);
Can someone please tell me where am I going wrong.
Thanking You,
Viharri P L V.
The kernel object "k" that is being created in MATLAB have the following structure:
MaxNumLHSArguments: 1
NumRHSArguments: 2
ArgumentTypes: {'inout single' 'in int32 scalar'}
with the above mentioned CUDA kernel prototype i.e.,
__global__ void func( float * arr, int N )
So, there was an mismatch error. We need to either change the prototype of the CUDA kernel to
__global__ void func( double * arr, int N )
or create the MATLAB array with 'single' type.
gtm= parallel.gpu.GPUArray(ones(a,b,'single'));

Finding log2() using sqrt()

This is an interview question I saw on some site.
It was mentioned that the answer involves forming a recurrence of log2() as follows:
double log2(double x )
{
if ( x<=2 ) return 1;
if ( IsSqureNum(x) )
return log2(sqrt(x) ) * 2;
return log2( sqrt(x) ) * 2 + 1; // Why the plus one here.
}
as for the recurrence, clearly the +1 is wrong. Also, the base case is also erroneous.
Does anyone know a better answer?
How is log() and log10() actually implemented in C.
Perhaps I have found the exact answers the interviewers were looking for. From my part, I would say it's little bit difficult to derive this under interview pressure. The idea is, say you want to find log2(13), you can know that it lies between 3 to 4. Also 3 = log2(8) and 4 = log2(16),
from properties of logarithm, we know that log( sqrt( (8*16) ) = (log(8) + log(16))/2 = (3+4)/2 = 3.5
Now, sqrt(8*16) = 11.3137 and log2(11.3137) = 3.5. Since 11.3137<13, we know that our desired log2(13) would lie between 3.5 and 4 and we proceed to locate that. It is easy to notice that this has a Binary Search solution and we iterate up to a point when our value converges to the value whose log2() we wish to find. Code is given below:
double Log2(double val)
{
int lox,hix;
double rval, lval;
hix = 0;
while((1<<hix)<val)
hix++;
lox =hix-1;
lval = (1<<lox) ;
rval = (1<<hix);
double lo=lox,hi=hix;
// cout<<lox<<" "<<hix<<endl;
//cout<<lval<<" "<<rval;
while( fabs(lval-val)>1e-7)
{
double mid = (lo+hi)/2;
double midValue = sqrt(lval*rval);
if ( midValue > val)
{
hi = mid;
rval = midValue;
}
else{
lo=mid;
lval = midValue;
}
}
return lo;
}
It's been a long time since I've written pure C, so here it is in C++ (I think the only difference is the output function, so you should be able to follow it):
#include <iostream>
using namespace std;
const static double CUTOFF = 1e-10;
double log2_aux(double x, double power, double twoToTheMinusN, unsigned int accumulator) {
if (twoToTheMinusN < CUTOFF)
return accumulator * twoToTheMinusN * 2;
else {
int thisBit;
if (x > power) {
thisBit = 1;
x /= power;
}
else
thisBit = 0;
accumulator = (accumulator << 1) + thisBit;
return log2_aux(x, sqrt(power), twoToTheMinusN / 2.0, accumulator);
}
}
double mylog2(double x) {
if (x < 1)
return -mylog2(1.0/x);
else if (x == 1)
return 0;
else if (x > 2.0)
return mylog2(x / 2.0) + 1;
else
return log2_aux(x, 2.0, 1.0, 0);
}
int main() {
cout << "5 " << mylog2(5) << "\n";
cout << "1.25 " << mylog2(1.25) << "\n";
return 0;
}
The function 'mylog2' does some simple log trickery to get a related number which is between 1 and 2, then call log2_aux with that number.
The log2_aux more or less follows the algorithm that Scorpi0 linked to above. At each step, you get 1 bit of the result. When you have enough bits, stop.
If you can get a hold of a copy, the Feynman Lectures on Physics, number 23, starts off with a great explanation of logs and more or less how to do this conversion. Vastly superior to the Wikipedia article.

Carefully deleting N items from a "circular" vector (or perhaps just an NSMutableArray)

Imagine a std:vector, say, with 100 things on it (0 to 99) currently. You are treating it as a loop. So the 105th item is index 4; forward 7 from index 98 is 5.
You want to delete N items after index position P.
So, delete 5 items after index 50; easy.
Or 5 items after index 99: as you delete 0 five times, or 4 through 0, noting that position at 99 will be erased from existence.
Worst, 5 items after index 97 - you have to deal with both modes of deletion.
What's the elegant and solid approach?
Here's a boring routine I wrote
-(void)knotRemovalHelper:(NSMutableArray*)original
after:(NSInteger)nn howManyToDelete:(NSInteger)desired
{
#define ORCO ((NSInteger)[original count])
static NSInteger kount, howManyUntilLoop, howManyExtraAferLoop;
if ( ... our array is NOT a loop ... )
// trivial, if messy...
{
for ( kount = 1; kount<=desired; ++kount )
{
if ( (nn+1) >= ORCO )
return;
[original removeObjectAtIndex:( nn+1 )];
}
return;
}
else // our array is a loop
// messy, confusing and inelegant. how to improve?
// here we go...
{
howManyUntilLoop = (ORCO-1) - nn;
if ( howManyUntilLoop > desired )
{
for ( kount = 1; kount<=desired; ++kount )
[original removeObjectAtIndex:( nn+1 )];
return;
}
howManyExtraAferLoop = desired - howManyUntilLoop;
for ( kount = 1; kount<=howManyUntilLoop; ++kount )
[original removeObjectAtIndex:( nn+1 )];
for ( kount = 1; kount<=howManyExtraAferLoop; ++kount )
[original removeObjectAtIndex:0];
return;
}
#undef ORCO
}
Update!
InVariant's second answer leads to the following excellent solution. "starting with" is much better than "starting after". So the routine now uses "start with". Invariant's second answer leads to this very simple solution...
N times do if P < currentsize remove P else remove 0
-(void)removeLoopilyFrom:(NSMutableArray*)ra
startingWithThisOne:(NSInteger)removeThisOneFirst
howManyToDelete:(NSInteger)countToDelete
{
// exception if removeThisOneFirst > ra highestIndex
// exception if countToDelete is > ra size
// so easy thanks to Invariant:
for ( do this countToDelete times )
{
if ( removeThisOneFirst < [ra count] )
[ra removeObjectAtIndex:removeThisOneFirst];
else
[ra removeObjectAtIndex:0];
}
}
Update!
Toolbox has pointed out the excellent idea of working to a new array - super KISS.
Here's an idea off the top of my head.
First, generate an array of integers representing the indices to remove. So "remove 5 from index 97" would generate [97,98,99,0,1]. This can be done with the application of a simple modulus operator.
Then, sort this array descending giving [99,98,97,1,0] and then remove the entries in that order.
Should work in all cases.
This solution seems to work, and it copies all remaining elements in the vector only once (to their final destination).
Assume kNumElements, kStartIndex, and kNumToRemove are defined as const size_t values.
vector<int> my_vec(kNumElements);
for (size_t i = 0; i < my_vec.size(); ++i) {
my_vec[i] = i;
}
for (size_t i = 0, cur = 0; i < my_vec.size(); ++i) {
// What is the "distance" from the current index to the start, taking
// into account the wrapping behavior?
size_t distance = (i + kNumElements - kStartIndex) % kNumElements;
// If it's not one of the ones to remove, then we keep it by copying it
// into its proper place.
if (distance >= kNumToRemove) {
my_vec[cur++] = my_vec[i];
}
}
my_vec.resize(kNumElements - kNumToRemove);
There's nothing wrong with two loop solutions as long as they're readable and don't do anything redundant. I don't know Objective-C syntax, but here's the pseudocode approach I'd take:
endIdx = after + howManyToDelete
if (Len <= after + howManyToDelete) //will have a second loop
firstloop = Len - after; //handle end in the first loop, beginning in second
else
firstpass = howManyToDelete; //the first loop will get them all
for (kount = 0; kount < firstpass; kount++)
remove after+1
for ( ; kount < howManyToDelete; kount++) //if firstpass < howManyToDelete, clean up leftovers
remove 0
This solution doesn't use mod, does the limit calculation outside the loop, and touches the relevant samples once each. The second for loop won't execute if all the samples were handled in the first loop.
The common way to do this in DSP is with a circular buffer. This is just a fixed length buffer with two associated counters:
//make sure BUFSIZE is a power of 2 for quick mod trick
#define BUFSIZE 1024
int CircBuf[BUFSIZE];
int InCtr, OutCtr;
void PutData(int *Buf, int count) {
int srcCtr;
int destCtr = InCtr & (BUFSIZE - 1); // if BUFSIZE is a power of 2, equivalent to and faster than destCtr = InCtr % BUFSIZE
for (srcCtr = 0; (srcCtr < count) && (destCtr < BUFSIZE); srcCtr++, destCtr++)
CircBuf[destCtr] = Buf[srcCtr];
for (destCtr = 0; srcCtr < count; srcCtr++, destCtr++)
CircBuf[destCtr] = Buf[srcCtr];
InCtr += count;
}
void GetData(int *Buf, int count) {
int srcCtr = OutCtr & (BUFSIZE - 1);
int destCtr = 0;
for (destCtr = 0; (srcCtr < BUFSIZE) && (destCtr < count); srcCtr++, destCtr++)
Buf[destCtr] = CircBuf[srcCtr];
for (srcCtr = 0; srcCtr < count; srcCtr++, destCtr++)
Buf[destCtr] = CircBuf[srcCtr];
OutCtr += count;
}
int BufferOverflow() {
return ((InCtr - OutCtr) > BUFSIZE);
}
This is pretty lightweight, but effective. And aside from the ctr = BigCtr & (SIZE-1) stuff, I'd argue it's highly readable. The only reason for the & trick is in old DSP environments, mod was an expensive operation so for something that ran often, like every time a buffer was ready for processing, you'd find ways to remove stuff like that. And if you were doing FFT's, your buffers were probably a power of 2 anyway.
These days, of course, you have 1 GHz processors and magically resizing arrays. You kids get off my lawn.
Another method:
N times do {remove entry at index P mod max(ArraySize, P)}
Example:
N=5, P=97, ArraySize=100
1: max(100, 97)=100 so remove at 97%100 = 97
2: max(99, 97)=99 so remove at 97%99 = 97 // array size is now 99
3: max(98, 97)=98 so remove at 97%98 = 97
4: max(97, 97)=97 so remove at 97%97 = 0
5: max(96, 97)=97 so remove at 97%97 = 0
I don't program iphone for know, so I image std::vector, it's quite easy, simple and elegant enough:
#include <iostream>
using std::cout;
#include <vector>
using std::vector;
#include <cassert> //no need for using, assert is macro
template<typename T>
void eraseCircularVector(vector<T> & vec, size_t position, size_t count)
{
assert(count <= vec.size());
if (count > 0)
{
position %= vec.size(); //normalize position
size_t positionEnd = (position + count) % vec.size();
if (positionEnd < position)
{
vec.erase(vec.begin() + position, vec.end());
vec.erase(vec.begin(), vec.begin() + positionEnd);
}
else
vec.erase(vec.begin() + position, vec.begin() + positionEnd);
}
}
int main()
{
vector<int> values;
for (int i = 0; i < 10; ++i)
values.push_back(i);
cout << "Values: ";
for (vector<int>::const_iterator cit = values.begin(); cit != values.end(); cit++)
cout << *cit << ' ';
cout << '\n';
eraseCircularVector(values, 5, 1); //remains 9: 0,1,2,3,4,6,7,8,9
eraseCircularVector(values, 16, 5); //remains 4: 3,4,6,7
cout << "Values: ";
for (vector<int>::const_iterator cit = values.begin(); cit != values.end(); cit++)
cout << *cit << ' ';
cout << '\n';
return 0;
}
However, you might consider:
creating new loop_vector class, if you use this kind of functionality enough
using list if you perform many deletions (or few deletions (not from end, that's simple pop_back) but large array)
If your container (NSMutableArray or whatever) is not list, but vector (i.e. resizable array), you most definitely don't want to delete items one by one, but whole range (e.g. std::vector's erase(begin, end)!
Edit: reacting to comment, to fully realize what must be done by vector, if you erase element other than the last one: it must copy all values after that element (e.g. 1000 items in array, you erase first, 999x copying (moving) of item, that is very costly).
Example:
#include <iostream>
#include <vector>
#include <ctime>
using namespace std;
int main()
{
clock_t start, end;
vector<int> vec;
const int items = 64 * 1024;
cout << "using " << items << " items in vector\n";
for (size_t i = 0; i < items; ++i) vec.push_back(i);
start = clock();
while (!vec.empty()) vec.erase(vec.begin());
end = clock();
cout << "Inefficient method took: "
<< (end - start) * 1.0 / CLOCKS_PER_SEC << " ms\n";
for (size_t i = 0; i < items; ++i) vec.push_back(i);
start = clock();
vec.erase(vec.begin(), vec.end());
end = clock();
cout << "Efficient method took: "
<< (end - start) * 1.0 / CLOCKS_PER_SEC << " ms\n";
return 0;
}
Produces output:
using 65536 items in vector
Inefficient method took: 1.705 ms
Efficient method took: 0 ms
Note it's very easy to get inefficient, look e.g. have at http://www.cplusplus.com/reference/stl/vector/erase/