Good day,
I have the following code which has given me problems for a day already.
I have debugged it, and it works fine until trying to free the memory. The free() function should be called at the end of the execution automatically, so I commented the mxFree() code out, in hope of getting a result. Even if I do that, the program frees the memory twice, like in the case of manually freeing memory - and thus I conclude that it is beyond my control.
*** glibc detected *** /usr/local/MATLAB/R2012a/bin/glnx86/MATLAB: free(): invalid pointer: 0xad2427a1 ***
Is there something I missed?
Note: I have tried some examples of .mex files with memory allocation and they work fine - so the mistake is down below, in my code.
/*
Beamforming algorithm
Arguments: xr, yr, zr,
t, ts, W, U,
Sdata, NrSensor c, omega_o
Output: S1_sum
S1_sum = beamforming(xr,yr,zr,t,ts,W,U,Sdata,NrSensor,c,omega_o)
*/
#include "mex.h"
#include <stdlib.h>
#include <math.h>
#define INPUT_ARGS 11
#define OUTPUT_ARGS 1
#define Ar(a, b) Ar[b+a*NrSensor]
#define Ai(a, b) Ai[b+a*NrSensor]
#define V(a, b) V[b+a*len_zr]
#define tau(a, b) tau[b+a*NrSensor]
#define tau_s(a, b) tau_s[b+a*len_zr]
#define tau_r(a, b) tau_r[b+a*NrSensor]
#define w(a, b) w[b+a*len_zr]
#define W(a, b) W[b+a*NrSensor]
#define arg(a, b) arg[b+a*len_zr]
#define newrange(a, b) newrange[b+a*len_zr]
#define oldrange(a, b) oldrange[b+a*len_zr]
#define S1_sum_r(a, b, c) S1_sum_r[c+b*len_xr+a*len_yr*len_xr]
#define S1_sum_i(a, b, c) S1_sum_i[c+b*len_xr+a*len_yr*len_xr]
#define Sdata_r(a, b) Sdata_r[b+a*NrSensor]
#define Sdata_i(a, b) Sdata_i[b+a*NrSensor]
#define S1interpr(a, b) S1interpr[b+a*len_zr]
#define S1interpi(a, b) S1interpi[b+a*len_zr]
#define PI_ 3.141592653
double sinc(double x){
return sin(PI_*x)/(PI_*x);
}
void mexFunction(int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[])
{
/*
Declarations
*/
int xr_, yr_, i, j, m;
int len_xr, len_yr, len_zr, len_t,
len_t_, NrSensor, NrSensor_Cen;
double _t, _i, arg_min, arg_max, norm_factor;
double start_tau, end_tau, c, omega_o;
double *xr, *yr, *zr, *t, *ts,
*Sdata_r, *Sdata_i, *W;
double *Ar, *Ai, *V, *tau, *tau_r, *tau_s,
*arg, *U, *w, *t_, *x_r, *x_i, *start_index,
*newrange, *oldrange, *S1_sum_r, *S1_sum_i,
*S1interpi, *S1interpr;
/*
Checking number of arguments
*/
if(nrhs != INPUT_ARGS)
mxErrMsgTxt("Incorrect number of arguments!\n");
if(nlhs != OUTPUT_ARGS)
mxErrMsgTxt("Incorrect number of outputs!\n");
/*
Reading arguments
*/
xr = mxGetPr(prhs[0]);
yr = mxGetPr(prhs[1]);
zr = mxGetPr(prhs[2]);
t = mxGetPr(prhs[3]);
ts = mxGetPr(prhs[4]);
W = mxGetPr(prhs[5]);
U = mxGetPr(prhs[6]);
Sdata_r = mxGetPr(prhs[7]);
Sdata_i = mxGetPi(prhs[7]);
NrSensor = (int) mxGetScalar(prhs[8]);
c = mxGetScalar(prhs[9]);
omega_o = mxGetScalar(prhs[10]);
len_xr = mxGetN(prhs[0]);
len_yr = mxGetN(prhs[1]);
len_zr = mxGetN(prhs[2]);
len_t = mxGetM(prhs[3]);
/*
Initialisations
*/
_t = 0.0;
len_t_ = 0;
NrSensor_Cen = NrSensor/2;
/*
Space allocation and checking
*/
arg = malloc(sizeof(double)*len_zr*len_t);
Ar = malloc(sizeof(double)*len_zr*NrSensor);
Ai = malloc(sizeof(double)*len_zr*NrSensor);
V = malloc(sizeof(double)*len_zr*3);
tau = malloc(sizeof(double)*len_zr*NrSensor);
tau_s = malloc(sizeof(double)*len_zr*3);
tau_r = malloc(sizeof(double)*len_zr*NrSensor);
U = malloc(sizeof(double)*3);
w = malloc(sizeof(double)*len_zr*3);
W = malloc(sizeof(double)*NrSensor*3);
ts = malloc(sizeof(double)*len_zr);
t = malloc(sizeof(double)*len_t);
t_ = malloc(sizeof(double)*len_t);
x_r = malloc(sizeof(double)*len_t);
x_i = malloc(sizeof(double)*len_t);
arg = malloc(sizeof(double)*len_zr*len_t);
newrange = malloc(sizeof(double)*len_zr*len_t);
oldrange = malloc(sizeof(double)*len_zr*len_t);
S1interpr = malloc(sizeof(double)*NrSensor*len_zr);
S1interpi = malloc(sizeof(double)*NrSensor*len_zr);
start_index = malloc(sizeof(double)*len_t);
/*
S1_sum_r = mxMalloc(len_xr*len_yr*len_zr);
S1_sum_i = mxMalloc(len_xr*len_yr*len_zr);
*/
int dim_S1_sum[3] = {len_xr, len_yr, len_zr};
plhs[0] = mxCreateNumericArray(3, dim_S1_sum,
mxDOUBLE_CLASS,
mxCOMPLEX);
S1_sum_r = (double*) mxGetPr(plhs[0]);
S1_sum_i = (double*) mxGetPi(plhs[0]);
if(arg == NULL ||
Ar == NULL ||
Ai == NULL ||
V == NULL ||
tau == NULL ||
tau_s == NULL ||
tau_r == NULL ||
U == NULL ||
W == NULL ||
w == NULL ||
ts == NULL ||
t == NULL ||
t_ == NULL ||
x_r == NULL ||
x_i == NULL ||
start_index == NULL ||
newrange == NULL ||
oldrange == NULL ||
S1_sum_r == NULL ||
S1_sum_i == NULL ||
S1interpr == NULL||
S1interpi == NULL){
mxErrMsgTxt("Malloc error!\n");
return;
}
/*
--- INITIALISING S1interp, S1_sum, tau, full of zeros
*/
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
S1interpr(i,j) = 0;
S1interpi(i,j) = 0;
}
}
for(i=0; i<len_xr; i++){
for(j=0; j<len_yr; j++){
for(m=0; m<len_zr; m++){
S1_sum_r(i,j,m) = 0;
S1_sum_i(i,j,m) = 0;
}
}
}
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
tau(i,j) = 0;
}
}
/*
--- MAIN ALGORITHM ---
*/
for(xr_=0; xr_ < len_xr; xr_++){
for(yr_=0; yr_ < len_yr; yr_++){
for(i=0; i < len_zr; i++){
V(0, i) = xr[xr_];
V(1, i) = yr[yr_];
V(2, i) = zr[i];
}
for(i=0; i < len_zr; i++){
tau_s(0, i) = V(0, i) - U[0];
tau_s(1, i) = V(1, i) - U[1];
tau_s(2, i) = V(2, i) - U[2];
}
for(m=0; m < NrSensor; m++){
for(i=0; i < len_zr; i++){
/*
I see no point of the squeeze function
since W is already of known sizes
*/
w(0, i) = V(0, i) - W(0, m);
w(1, i) = V(1, i) - W(1, m);
w(2, i) = V(2, i) - W(2, m);
}
for(i=0; i< len_zr; i++){
/*
sum(w.*w)
*/
_t = w(0, i)*w(0,i) +
w(1, i)*w(1,i) +
w(2, i)*w(2,i);
tau_r(m, i) = sqrt(_t)/c;
}
}
for(m=0; m < len_zr; m++){
for(i=0; i < NrSensor; i++){
/*
Computing sum(tau_s(m, :).*tau_s(m, :))
*/
_t = tau_s(0, m)*tau_s(0, m) +
tau_s(1, m)*tau_s(1, m) +
tau_s(2, m)*tau_s(2, m);
tau(i, m) = tau_r(i, m) + sqrt(_t)/c;
}
}
/*
for(i=0; i < len_zr; i++){
for(j=0; j < NrSensor; j++){
Tau(i,j)=tau(i,j);
}
}
*/
for(i=0; i < len_zr; i++){
for(j=0; j < NrSensor; j++){
Ar(i,j)=cos(omega_o * tau(i, j));
Ai(i,j)=sin(omega_o * tau(i, j));
}
}
/*
--- BIG LOOP AHEAD ---
*/
for(m=0; m < NrSensor; m++){
start_tau = tau(NrSensor_Cen, 1);
end_tau = tau(NrSensor_Cen, len_zr);
/*
Finding index : writing start indexes
and also t_ array
*/
len_t_=0;
for(i=0; i<len_t; i++){
if(t[i] >= start_tau &&
t[i] <= end_tau){
start_index[len_t_] = i;
t_[len_t_] = t[i];
x_r[len_t_] = Sdata_r(i, m);
x_i[len_t_] = Sdata_i(i, m);
len_t_++;
}
}
for(i=0; i < len_zr; i++){
ts[i]=tau(m, i);
}
for(i=0; i < len_t_; i++){
for(j=0; j < len_zr; j++){
newrange(i, j) = ts[j];
}
}
for(i=0; i < len_zr; i++){
for(j=0; j < len_t_; j++){
oldrange(j, i) = t_[j];
}
}
for(i=0; i < len_t_; i++){
for(j=0; j < len_zr; j++){
arg(i, j) = newrange(i, j)-oldrange(i, j);
}
}
arg_min = arg[0];
for(i=0; i < len_t_; i++)
for(j=0; j < len_zr; j++)
if(arg_min>arg(i, j))
arg_min=arg(i, j);
arg_max = arg[0];
for(i=0; i < len_t_; i++)
for(j=0; j < len_zr; j++)
if(arg_max<arg(i, j))
arg_max=arg(i, j);
norm_factor = (2*len_t_)/(arg_max-arg_min);
for(i=0; i < len_zr; i++){
_t = 0;
for(j=0; j < NrSensor; j++){
_t = sinc(arg(i, j)*norm_factor)*x_r[j];
}
S1interpr(m, i) = Ar(m, i) * _t;
_t = 0;
for(j=0; j < NrSensor; j++){
_t = sinc(arg(i, j)*norm_factor)*x_i[j];
}
S1interpi(m, i) = Ai(m, i) * _t;
}
}
for(i=0; i < len_zr; i++){
_t = 0;
for(j=0; j < NrSensor; j++){
_t += S1interpr(j, i);
}
S1_sum_r(xr_, yr_, i) = _t;
_t = 0;
for(j=0; j < NrSensor; j++){
_t += S1interpi(j, i);
}
S1_sum_i(xr_, yr_, i) = _t;
}
}
}
free(arg);
free(Ar);
free(Ai);
free(V);
free(tau);
free(tau_s);
free(tau_r);
free(U);
free(w);
free(W);
free(ts);
free(t);
free(t_);
free(x_r);
free(x_i);
free(newrange);
free(oldrange);
free(S1interpr);
free(S1interpi);
free(start_index);
return;
}
EDIT
I have deleted all my code except the beginning (up to the memory allocation) and the memory freeing. I'm also using malloc() and free() now. In between the memory allocation and freeing, I have also put this code:
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
S1interpr(i,j) = 0;
S1interpi(i,j) = 0;
}
}
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
tau(i,j) = 0;
}
}
The first loop causes no problem. The second one though, apparently corrupts the variable V (declared before it) and the one declared after it. And it seems, according to my logic, that it does not exceed any kind of bounds...
The second loop does not index correctly for tau. You are defining indexing for tau as
#define tau(a, b) tau[b+a*NrSensor]
Let us walk through the second loop assuming NrSensor = 10 and len_zr = 5. For this case max value of loop variable i is 9 and max value of loop variable j is 4. Now,
tau(9,4) => tau[4+9*10] => tau[94].
But you are allocating tau with
tau = malloc(sizeof(double)*len_zr*NrSensor);
which for the sample values 10 and 5 is
tau = malloc(sizeof(double)*50)
You either need to change the indexing definition for tau to swap a and b or change the order of loops i and j.
I am implementing part of FFT algorithm using Renderscript in Android, When I ran the code my application got hanged. I want to process 512 values from real and img allocation at a time. Kernel will execute 512 times using provided dummy allocation of size 512.
Here is my java code
RenderScript rs = RenderScript.create(WajinViewerApplication
.getApplication());
ScriptC_fft scriptC_fft = new ScriptC_fft(rs);
float inReal[] = new float[512 * 512];
float inImg[] = new float[512 * 512];
int k = 0;
for (int i = 0; i < 512; i++) {
for (int j = 0; j < 512; j++) {
// copy values from complex 2d array to 1d array
inReal[k] = data[i][j].real;
inImg[k] = data[i][j].imaginary;
k++;
}
}
Allocation realAllocation = Allocation.createSized(rs, Element.F32(rs),
512 * 512);
Allocation imgAllocation = Allocation.createSized(rs, Element.F32(rs),
512 * 512);
realAllocation.copyFrom(inReal);
imgAllocation.copyFrom(inImg);
scriptC_fft.set_real(realAllocation);
scriptC_fft.set_img(imgAllocation);
Allocation inAllocation = Allocation.createSized(rs, Element.U16(rs),
512);
Allocation outAllocation = Allocation.createTyped(rs,
inAllocation.getType());
inAllocation.copyFrom(new short[512]);
// set direction
if (direction == Direction.Forward) {
scriptC_fft.set_is_forward(true);
} else {
scriptC_fft.set_is_forward(false);
}
scriptC_fft.set_len(512);
scriptC_fft.set_levels(Integer.numberOfLeadingZeros(512));
scriptC_fft.forEach_root(inAllocation, outAllocation);
outAllocation.copyTo(new short[512]);
float outReal[] = new float[512 * 512];
float outImg[] = new float[512 * 512];
scriptC_fft.get_real().copyTo(outReal);
scriptC_fft.get_img().copyTo(outImg);
k = 0;
for (int i = 0; i < 512; i++) {
for (int j = 0; j < 512; j++) {
// copy values from complex 1d array to 2d array
data[i][j].real = outReal[k];
data[i][j].imaginary = outImg[k];
k++;
}
}
rs.destroy();
And here is my Renderscript code
#pragma version(1)
#pragma rs java_package_name(jp.drmh.wajin.newversion)
#include "common.rsh"
rs_allocation real;
rs_allocation img;
bool is_forward;
uint32_t len;
uint32_t levels;
uint16_t __attribute__((kernel)) root(uint16_t in, uint32_t x, uint32_t y){
// rsDebug("call",x);
float realval[512];
float imagval[512];
if(is_forward){
for (uint32_t i = 0; i < len; i++) {
realval[i]=rsGetElementAt_float(real,x*512+i);
imagval[i]=rsGetElementAt_float(img,x*512+i);
//rsDebug("values", realval[i], imagval[i]);
}
}else{
for (uint32_t i = 0; i < len; i++) {
realval[i]=rsGetElementAt_float(img,x*512+i);
imagval[i]=rsGetElementAt_float(real,x*512+i);
}
}
float costable[256],sintable[256];
for (uint32_t i = 0; i < len / 2; i++) {
costable[i]=cos(2 * M_PI * i / len);
sintable[i]=sin(2 * M_PI * i / len);
}
// Bit-reversed addressing permutation
for (uint32_t i = 0; i < len; i++) {
uint32_t j = bit_reverse32(i);
uint32_t ans=j>>(32 - levels);
if (j > i) {
float temp = realval[i];
realval[i] = realval[j];
realval[j] = temp;
temp = imagval[i];
imagval[i] = imagval[j];
imagval[j] = temp;
}
}
for (uint32_t size = 2; size <= len; size *= 2) {
uint32_t halfsize = size / 2;
uint32_t tablestep = len / size;
for (uint32_t i = 0; i < len; i += size) {
for (uint32_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
float tpre=realval[j + halfsize] * costable[k]
+ imagval[j + halfsize] * sintable[k];
float tpim = -realval[j + halfsize] * sintable[k]
+ imagval[j + halfsize] * costable[k];
realval[j + halfsize] = realval[j] - tpre;
imagval[j + halfsize] = imagval[j] - tpim;
realval[j] += tpre;
imagval[j] += tpim;
}
}
if (size == len)
break;
}
if(!is_forward){
for(uint32_t i = 0; i < len; i++){
realval[i]=realval[i]/len;
imagval[i]=imagval[i]/len;
rsDebug("values", realval[i], imagval[i]);
}
for (uint32_t i = 0; i < len; i++) {
rsSetElementAt_float(real, realval[i], x*512+i);
rsSetElementAt_float(img, imagval[i], x*512+i);
}
}
return in;
}
Hello everyone This is the code of iRPROP+ algo for my MLP. When I try to train my network, standart deviation decreases for 1500 epoches (so slow: from ~0.5 to 0.4732) but suddenly it starts to increase.
Can someone say what did I do wrong?
public void RPROP()
{
double a = 1.2, b = 0.5, nMax = 50, nMin = 0.000001;
for (int l = Network.Length - 1; l > 0; l--)
{
for (int i = 0; i < Network[l].getSize(); i++)
{
Neuron n = Network[l].Neurons[i];
double sum = 0;
if (l == Network.Length - 1) n.Delta = (n.Output - DesiredOutput[i]) * ActFunc.calcDeprivateFunction(n.Output);
else
{
for (int k = 0; k < Network[l + 1].getSize(); k++)
{
sum += Network[l + 1].Neurons[k].getWeight(i) * Network[l + 1].Neurons[k].Delta;
}
n.Delta = sum * ActFunc.calcDeprivateFunction(n.Output);
}
}
}
for (int l = 1; l < Network.Length; l++)
{
for (int i = 0; i < Network[l].getSize(); i++)
{
Neuron n = Network[l].Neurons[i];
if ((n.PrevDelta * n.Delta) > 0)
{
n.N = Math.Min(a * n.PrevN, nMax);
n.Bias -= n.N * Math.Sign(n.Delta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) - n.N * Math.Sign(n.Delta));
}
n.PrevDelta = n.Delta;
}
else if ((n.PrevDelta * n.Delta) < 0)
{
n.N = Math.Max(b * n.PrevN, nMin);
if (this.CurrentError > this.LastError)
{
n.Bias += n.PrevN * Math.Sign(n.PrevDelta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) + n.PrevN * Math.Sign(n.PrevDelta));
}
}
n.Delta = 0;
}
else if ((n.PrevDelta * n.Delta) == 0)
{
n.Bias -= n.N * Math.Sign(n.Delta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) - n.N * Math.Sign(n.Delta));
}
n.PrevDelta = n.Delta;
}
n.PrevN = n.N;
}
}
}
For the first view, you calculate one train element error and you instantly teach it to the network. try to run over the full train set, without change the weights, and just summarize the Delta. After that, update the weights once, set the prev delta and start over.
Also, there is no update for neuron threshold.
I want to generate the random numbers using this loop. When i runs the apps at everytime, i want to generate the random numbers without duplicates.
Eg:
for(int i = 0; i < 5; i++)
{
// int d = random() % i;
NSLog(#"The Value %d",i);
NSLog(#"The random Number %d",i);
}
Actual Number Random Number
1 4
2 5
3 2
4 1
5 3
It's Random Permutation Generation problem. Read this: http://www.techuser.net/randpermgen.html
The main idea is (in pseudo code):
for (i=1 to n) ar[i] = i;
for (i=1 to n) swap(ar[i], ar[Random(i,n)]);
In your case:
int ar[5],i,d,tmp;
for(i = 0; i < 5; i++) ar[i] = i+1;
for(i = 0; i < 5; i++) {
d = i + (random()%(5-i));
tmp = ar[i];
ar[i] = ar[d];
ar[d] = tmp;
NSLog(#"%d",ar[i]);
}
Can be something like this,
int rand[5] = {0};
int max = 5;
for(int i = 0; i < max; i++){
int r = random() % max + 1;
while([self foundNumber:r inArray:rand limit:i){
r = random() % max + 1;
}
rand[i] = r;
}
- (BOOL) foundNumber:r inArray:rand limit:l {
for(int i = 0; i < l; i++){
if(rand[i] == r) return YES;
}
return NO;
}