free() and mxFree() in MATLAB - freeing memory twice

free() and mxFree() in MATLAB - freeing memory twice - matlab

Good day,
I have the following code which has given me problems for a day already.
I have debugged it, and it works fine until trying to free the memory. The free() function should be called at the end of the execution automatically, so I commented the mxFree() code out, in hope of getting a result. Even if I do that, the program frees the memory twice, like in the case of manually freeing memory - and thus I conclude that it is beyond my control.
*** glibc detected *** /usr/local/MATLAB/R2012a/bin/glnx86/MATLAB: free(): invalid pointer: 0xad2427a1 ***
Is there something I missed?
Note: I have tried some examples of .mex files with memory allocation and they work fine - so the mistake is down below, in my code.
/*
Beamforming algorithm
Arguments: xr, yr, zr,
t, ts, W, U,
Sdata, NrSensor c, omega_o
Output: S1_sum
S1_sum = beamforming(xr,yr,zr,t,ts,W,U,Sdata,NrSensor,c,omega_o)
*/
#include "mex.h"
#include <stdlib.h>
#include <math.h>
#define INPUT_ARGS 11
#define OUTPUT_ARGS 1
#define Ar(a, b) Ar[b+a*NrSensor]
#define Ai(a, b) Ai[b+a*NrSensor]
#define V(a, b) V[b+a*len_zr]
#define tau(a, b) tau[b+a*NrSensor]
#define tau_s(a, b) tau_s[b+a*len_zr]
#define tau_r(a, b) tau_r[b+a*NrSensor]
#define w(a, b) w[b+a*len_zr]
#define W(a, b) W[b+a*NrSensor]
#define arg(a, b) arg[b+a*len_zr]
#define newrange(a, b) newrange[b+a*len_zr]
#define oldrange(a, b) oldrange[b+a*len_zr]
#define S1_sum_r(a, b, c) S1_sum_r[c+b*len_xr+a*len_yr*len_xr]
#define S1_sum_i(a, b, c) S1_sum_i[c+b*len_xr+a*len_yr*len_xr]
#define Sdata_r(a, b) Sdata_r[b+a*NrSensor]
#define Sdata_i(a, b) Sdata_i[b+a*NrSensor]
#define S1interpr(a, b) S1interpr[b+a*len_zr]
#define S1interpi(a, b) S1interpi[b+a*len_zr]
#define PI_ 3.141592653
double sinc(double x){
return sin(PI_*x)/(PI_*x);
}
void mexFunction(int nlhs, mxArray * plhs[], int nrhs, const mxArray * prhs[])
{
/*
Declarations
*/
int xr_, yr_, i, j, m;
int len_xr, len_yr, len_zr, len_t,
len_t_, NrSensor, NrSensor_Cen;
double _t, _i, arg_min, arg_max, norm_factor;
double start_tau, end_tau, c, omega_o;
double *xr, *yr, *zr, *t, *ts,
*Sdata_r, *Sdata_i, *W;
double *Ar, *Ai, *V, *tau, *tau_r, *tau_s,
*arg, *U, *w, *t_, *x_r, *x_i, *start_index,
*newrange, *oldrange, *S1_sum_r, *S1_sum_i,
*S1interpi, *S1interpr;
/*
Checking number of arguments
*/
if(nrhs != INPUT_ARGS)
mxErrMsgTxt("Incorrect number of arguments!\n");
if(nlhs != OUTPUT_ARGS)
mxErrMsgTxt("Incorrect number of outputs!\n");
/*
Reading arguments
*/
xr = mxGetPr(prhs[0]);
yr = mxGetPr(prhs[1]);
zr = mxGetPr(prhs[2]);
t = mxGetPr(prhs[3]);
ts = mxGetPr(prhs[4]);
W = mxGetPr(prhs[5]);
U = mxGetPr(prhs[6]);
Sdata_r = mxGetPr(prhs[7]);
Sdata_i = mxGetPi(prhs[7]);
NrSensor = (int) mxGetScalar(prhs[8]);
c = mxGetScalar(prhs[9]);
omega_o = mxGetScalar(prhs[10]);
len_xr = mxGetN(prhs[0]);
len_yr = mxGetN(prhs[1]);
len_zr = mxGetN(prhs[2]);
len_t = mxGetM(prhs[3]);
/*
Initialisations
*/
_t = 0.0;
len_t_ = 0;
NrSensor_Cen = NrSensor/2;
/*
Space allocation and checking
*/
arg = malloc(sizeof(double)*len_zr*len_t);
Ar = malloc(sizeof(double)*len_zr*NrSensor);
Ai = malloc(sizeof(double)*len_zr*NrSensor);
V = malloc(sizeof(double)*len_zr*3);
tau = malloc(sizeof(double)*len_zr*NrSensor);
tau_s = malloc(sizeof(double)*len_zr*3);
tau_r = malloc(sizeof(double)*len_zr*NrSensor);
U = malloc(sizeof(double)*3);
w = malloc(sizeof(double)*len_zr*3);
W = malloc(sizeof(double)*NrSensor*3);
ts = malloc(sizeof(double)*len_zr);
t = malloc(sizeof(double)*len_t);
t_ = malloc(sizeof(double)*len_t);
x_r = malloc(sizeof(double)*len_t);
x_i = malloc(sizeof(double)*len_t);
arg = malloc(sizeof(double)*len_zr*len_t);
newrange = malloc(sizeof(double)*len_zr*len_t);
oldrange = malloc(sizeof(double)*len_zr*len_t);
S1interpr = malloc(sizeof(double)*NrSensor*len_zr);
S1interpi = malloc(sizeof(double)*NrSensor*len_zr);
start_index = malloc(sizeof(double)*len_t);
/*
S1_sum_r = mxMalloc(len_xr*len_yr*len_zr);
S1_sum_i = mxMalloc(len_xr*len_yr*len_zr);
*/
int dim_S1_sum[3] = {len_xr, len_yr, len_zr};
plhs[0] = mxCreateNumericArray(3, dim_S1_sum,
mxDOUBLE_CLASS,
mxCOMPLEX);
S1_sum_r = (double*) mxGetPr(plhs[0]);
S1_sum_i = (double*) mxGetPi(plhs[0]);
if(arg == NULL ||
Ar == NULL ||
Ai == NULL ||
V == NULL ||
tau == NULL ||
tau_s == NULL ||
tau_r == NULL ||
U == NULL ||
W == NULL ||
w == NULL ||
ts == NULL ||
t == NULL ||
t_ == NULL ||
x_r == NULL ||
x_i == NULL ||
start_index == NULL ||
newrange == NULL ||
oldrange == NULL ||
S1_sum_r == NULL ||
S1_sum_i == NULL ||
S1interpr == NULL||
S1interpi == NULL){
mxErrMsgTxt("Malloc error!\n");
return;
}
/*
--- INITIALISING S1interp, S1_sum, tau, full of zeros
*/
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
S1interpr(i,j) = 0;
S1interpi(i,j) = 0;
}
}
for(i=0; i<len_xr; i++){
for(j=0; j<len_yr; j++){
for(m=0; m<len_zr; m++){
S1_sum_r(i,j,m) = 0;
S1_sum_i(i,j,m) = 0;
}
}
}
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
tau(i,j) = 0;
}
}
/*
--- MAIN ALGORITHM ---
*/
for(xr_=0; xr_ < len_xr; xr_++){
for(yr_=0; yr_ < len_yr; yr_++){
for(i=0; i < len_zr; i++){
V(0, i) = xr[xr_];
V(1, i) = yr[yr_];
V(2, i) = zr[i];
}
for(i=0; i < len_zr; i++){
tau_s(0, i) = V(0, i) - U[0];
tau_s(1, i) = V(1, i) - U[1];
tau_s(2, i) = V(2, i) - U[2];
}
for(m=0; m < NrSensor; m++){
for(i=0; i < len_zr; i++){
/*
I see no point of the squeeze function
since W is already of known sizes
*/
w(0, i) = V(0, i) - W(0, m);
w(1, i) = V(1, i) - W(1, m);
w(2, i) = V(2, i) - W(2, m);
}
for(i=0; i< len_zr; i++){
/*
sum(w.*w)
*/
_t = w(0, i)*w(0,i) +
w(1, i)*w(1,i) +
w(2, i)*w(2,i);
tau_r(m, i) = sqrt(_t)/c;
}
}
for(m=0; m < len_zr; m++){
for(i=0; i < NrSensor; i++){
/*
Computing sum(tau_s(m, :).*tau_s(m, :))
*/
_t = tau_s(0, m)*tau_s(0, m) +
tau_s(1, m)*tau_s(1, m) +
tau_s(2, m)*tau_s(2, m);
tau(i, m) = tau_r(i, m) + sqrt(_t)/c;
}
}
/*
for(i=0; i < len_zr; i++){
for(j=0; j < NrSensor; j++){
Tau(i,j)=tau(i,j);
}
}
*/
for(i=0; i < len_zr; i++){
for(j=0; j < NrSensor; j++){
Ar(i,j)=cos(omega_o * tau(i, j));
Ai(i,j)=sin(omega_o * tau(i, j));
}
}
/*
--- BIG LOOP AHEAD ---
*/
for(m=0; m < NrSensor; m++){
start_tau = tau(NrSensor_Cen, 1);
end_tau = tau(NrSensor_Cen, len_zr);
/*
Finding index : writing start indexes
and also t_ array
*/
len_t_=0;
for(i=0; i<len_t; i++){
if(t[i] >= start_tau &&
t[i] <= end_tau){
start_index[len_t_] = i;
t_[len_t_] = t[i];
x_r[len_t_] = Sdata_r(i, m);
x_i[len_t_] = Sdata_i(i, m);
len_t_++;
}
}
for(i=0; i < len_zr; i++){
ts[i]=tau(m, i);
}
for(i=0; i < len_t_; i++){
for(j=0; j < len_zr; j++){
newrange(i, j) = ts[j];
}
}
for(i=0; i < len_zr; i++){
for(j=0; j < len_t_; j++){
oldrange(j, i) = t_[j];
}
}
for(i=0; i < len_t_; i++){
for(j=0; j < len_zr; j++){
arg(i, j) = newrange(i, j)-oldrange(i, j);
}
}
arg_min = arg[0];
for(i=0; i < len_t_; i++)
for(j=0; j < len_zr; j++)
if(arg_min>arg(i, j))
arg_min=arg(i, j);
arg_max = arg[0];
for(i=0; i < len_t_; i++)
for(j=0; j < len_zr; j++)
if(arg_max<arg(i, j))
arg_max=arg(i, j);
norm_factor = (2*len_t_)/(arg_max-arg_min);
for(i=0; i < len_zr; i++){
_t = 0;
for(j=0; j < NrSensor; j++){
_t = sinc(arg(i, j)*norm_factor)*x_r[j];
}
S1interpr(m, i) = Ar(m, i) * _t;
_t = 0;
for(j=0; j < NrSensor; j++){
_t = sinc(arg(i, j)*norm_factor)*x_i[j];
}
S1interpi(m, i) = Ai(m, i) * _t;
}
}
for(i=0; i < len_zr; i++){
_t = 0;
for(j=0; j < NrSensor; j++){
_t += S1interpr(j, i);
}
S1_sum_r(xr_, yr_, i) = _t;
_t = 0;
for(j=0; j < NrSensor; j++){
_t += S1interpi(j, i);
}
S1_sum_i(xr_, yr_, i) = _t;
}
}
}
free(arg);
free(Ar);
free(Ai);
free(V);
free(tau);
free(tau_s);
free(tau_r);
free(U);
free(w);
free(W);
free(ts);
free(t);
free(t_);
free(x_r);
free(x_i);
free(newrange);
free(oldrange);
free(S1interpr);
free(S1interpi);
free(start_index);
return;
}
EDIT
I have deleted all my code except the beginning (up to the memory allocation) and the memory freeing. I'm also using malloc() and free() now. In between the memory allocation and freeing, I have also put this code:
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
S1interpr(i,j) = 0;
S1interpi(i,j) = 0;
}
}
for(i=0; i<NrSensor; i++){
for(j=0; j<len_zr; j++){
tau(i,j) = 0;
}
}
The first loop causes no problem. The second one though, apparently corrupts the variable V (declared before it) and the one declared after it. And it seems, according to my logic, that it does not exceed any kind of bounds...

The second loop does not index correctly for tau. You are defining indexing for tau as
#define tau(a, b) tau[b+a*NrSensor]
Let us walk through the second loop assuming NrSensor = 10 and len_zr = 5. For this case max value of loop variable i is 9 and max value of loop variable j is 4. Now,
tau(9,4) => tau[4+9*10] => tau[94].
But you are allocating tau with
tau = malloc(sizeof(double)*len_zr*NrSensor);
which for the sample values 10 and 5 is
tau = malloc(sizeof(double)*50)
You either need to change the indexing definition for tau to swap a and b or change the order of loops i and j.

Related

neural network for mnist keep guessing 1 digit

I'm working on feed forward neural network for solving mnist dataset without library to help me better understand the concept of neural network. But I think I miss something as the result guessed by the neural network keep guessing just one number. For example just guess digit 5 or digit 9, even if the weight just pure random.
node[0] always bias
feedforward:
int c = 1;
input[0] = 1;
for (int j = 0; j < 28; j++)
{
for (int k = 0; k < 28; k++)
{
if (traindata[i, j, k] > 126)
{
input[c] = 1;
}
else
{
input[c] = 0;
} //Console.Write(input[c]);
} //Console.WriteLine();
} //MessageBox.Show("Test");
//feed forward
hiddenlayer1[0] = 1;
double temp;
for (int j = 1; j < HIDDEN1; j++)
{
temp = 0;
for (int k = 0; k < INPUT; k++)
{
temp += input[k] * Winput_hiddenlayer1[k, j];
} hiddenlayer1[j] = sigmoid(temp); //MessageBox.Show(hiddenlayer1[j].ToString());
}
hiddenlayer2[0] = 1;
for (int j = 1; j < HIDDEN2; j++)
{
temp = 0;
for (int k = 0; k < HIDDEN1; k++)
{
temp += hiddenlayer1[k] * Whiddenlayer1_hiddenlayer2[k, j];
} hiddenlayer2[j] = sigmoid(temp);
}
for (int j = 0; j < OUTPUT; j++)
{
temp = 0;
for (int k = 0; k < HIDDEN2; k++)
{
temp += hiddenlayer2[k] * Whiddenlayer2_output[k, j];
} output[j] = sigmoid(temp);
}
and the backpropagation:
//set desired output
for (int j = 0; j < OUTPUT; j++)
{
Doutput[j] = 0;
} Doutput[labeltrain[i]] = 1;
//for (int j = 0; j < OUTPUT; j++)
//{
// Console.Write(Doutput[j].ToString());
//} Console.WriteLine();
//MessageBox.Show("Test");
//output error calculation
for (int j = 0; j < OUTPUT; j++)
{
outputerror[j] = (Doutput[j] - output[j]) * (1.0 - output[j]);
//Console.WriteLine("expected: " + Doutput[j]);
//Console.WriteLine("real: " + output[j]);
//Console.WriteLine("(Doutput[j] - output[j]): " + (Doutput[j] - output[j]));
//Console.WriteLine("1.0 - output[j]: " + (1.0 - output[j]));
//Console.WriteLine("output error: " + outputerror[j]);
//MessageBox.Show("Test");
}
//hidden2 error calculation
for (int j = 0; j < HIDDEN2; j++)
{
temp = 0;
for (int k = 0; k < OUTPUT; k++)
{
for (int l = 0; l < HIDDEN1; l++)
{
temp += outputerror[k] * Whiddenlayer1_hiddenlayer2[l, k];
}
} hidden2error[j] = temp * hiddenlayer2[j] * (1.0 - hiddenlayer2[j]);
}
//hidden1 error calculation
for (int j = 0; j < HIDDEN1; j++)
{
temp = 0;
for (int k = 0; k < HIDDEN2; k++)
{
for (int l = 0; l < INPUT; l++)
{
temp += hidden2error[k] * Winput_hiddenlayer1[l, k];
}
} hidden1error[j] = temp * hiddenlayer1[j] * (1.0 - hiddenlayer1[j]);
}
//hidden2-output weight adjustment
for (int j = 0; j < HIDDEN2; j++)
{
for (int k = 0; k < OUTPUT; k++)
{
Whiddenlayer2_output[j,k] += LEARNING_RATE * outputerror[k] * hiddenlayer2[j];
}
}
//hidden1-hidden2 weight adjusment
for (int j = 0; j < HIDDEN1; j++)
{
for (int k = 0; k < HIDDEN2; k++)
{
Whiddenlayer1_hiddenlayer2[j, k] += LEARNING_RATE * hidden2error[k] * hiddenlayer1[j];
}
}
//input-hidden1 weight adjustment
for (int j = 0; j < INPUT; j++)
{
for (int k = 0; k < HIDDEN1; k++)
{
Winput_hiddenlayer1[j, k] += LEARNING_RATE * hidden1error[k] * input[j];
}
}

Renderscript hangs device

I am implementing part of FFT algorithm using Renderscript in Android, When I ran the code my application got hanged. I want to process 512 values from real and img allocation at a time. Kernel will execute 512 times using provided dummy allocation of size 512.
Here is my java code
RenderScript rs = RenderScript.create(WajinViewerApplication
.getApplication());
ScriptC_fft scriptC_fft = new ScriptC_fft(rs);
float inReal[] = new float[512 * 512];
float inImg[] = new float[512 * 512];
int k = 0;
for (int i = 0; i < 512; i++) {
for (int j = 0; j < 512; j++) {
// copy values from complex 2d array to 1d array
inReal[k] = data[i][j].real;
inImg[k] = data[i][j].imaginary;
k++;
}
}
Allocation realAllocation = Allocation.createSized(rs, Element.F32(rs),
512 * 512);
Allocation imgAllocation = Allocation.createSized(rs, Element.F32(rs),
512 * 512);
realAllocation.copyFrom(inReal);
imgAllocation.copyFrom(inImg);
scriptC_fft.set_real(realAllocation);
scriptC_fft.set_img(imgAllocation);
Allocation inAllocation = Allocation.createSized(rs, Element.U16(rs),
512);
Allocation outAllocation = Allocation.createTyped(rs,
inAllocation.getType());
inAllocation.copyFrom(new short[512]);
// set direction
if (direction == Direction.Forward) {
scriptC_fft.set_is_forward(true);
} else {
scriptC_fft.set_is_forward(false);
}
scriptC_fft.set_len(512);
scriptC_fft.set_levels(Integer.numberOfLeadingZeros(512));
scriptC_fft.forEach_root(inAllocation, outAllocation);
outAllocation.copyTo(new short[512]);
float outReal[] = new float[512 * 512];
float outImg[] = new float[512 * 512];
scriptC_fft.get_real().copyTo(outReal);
scriptC_fft.get_img().copyTo(outImg);
k = 0;
for (int i = 0; i < 512; i++) {
for (int j = 0; j < 512; j++) {
// copy values from complex 1d array to 2d array
data[i][j].real = outReal[k];
data[i][j].imaginary = outImg[k];
k++;
}
}
rs.destroy();
And here is my Renderscript code
#pragma version(1)
#pragma rs java_package_name(jp.drmh.wajin.newversion)
#include "common.rsh"
rs_allocation real;
rs_allocation img;
bool is_forward;
uint32_t len;
uint32_t levels;
uint16_t __attribute__((kernel)) root(uint16_t in, uint32_t x, uint32_t y){
// rsDebug("call",x);
float realval[512];
float imagval[512];
if(is_forward){
for (uint32_t i = 0; i < len; i++) {
realval[i]=rsGetElementAt_float(real,x*512+i);
imagval[i]=rsGetElementAt_float(img,x*512+i);
//rsDebug("values", realval[i], imagval[i]);
}
}else{
for (uint32_t i = 0; i < len; i++) {
realval[i]=rsGetElementAt_float(img,x*512+i);
imagval[i]=rsGetElementAt_float(real,x*512+i);
}
}
float costable[256],sintable[256];
for (uint32_t i = 0; i < len / 2; i++) {
costable[i]=cos(2 * M_PI * i / len);
sintable[i]=sin(2 * M_PI * i / len);
}
// Bit-reversed addressing permutation
for (uint32_t i = 0; i < len; i++) {
uint32_t j = bit_reverse32(i);
uint32_t ans=j>>(32 - levels);
if (j > i) {
float temp = realval[i];
realval[i] = realval[j];
realval[j] = temp;
temp = imagval[i];
imagval[i] = imagval[j];
imagval[j] = temp;
}
}
for (uint32_t size = 2; size <= len; size *= 2) {
uint32_t halfsize = size / 2;
uint32_t tablestep = len / size;
for (uint32_t i = 0; i < len; i += size) {
for (uint32_t j = i, k = 0; j < i + halfsize; j++, k += tablestep) {
float tpre=realval[j + halfsize] * costable[k]
+ imagval[j + halfsize] * sintable[k];
float tpim = -realval[j + halfsize] * sintable[k]
+ imagval[j + halfsize] * costable[k];
realval[j + halfsize] = realval[j] - tpre;
imagval[j + halfsize] = imagval[j] - tpim;
realval[j] += tpre;
imagval[j] += tpim;
}
}
if (size == len)
break;
}
if(!is_forward){
for(uint32_t i = 0; i < len; i++){
realval[i]=realval[i]/len;
imagval[i]=imagval[i]/len;
rsDebug("values", realval[i], imagval[i]);
}
for (uint32_t i = 0; i < len; i++) {
rsSetElementAt_float(real, realval[i], x*512+i);
rsSetElementAt_float(img, imagval[i], x*512+i);
}
}
return in;
}

Where is the huge performance difference for the two versions of the code?

I am working on a problem that Given a string s, partitions s such that every substring of the partition is a palindrome.
Return the minimum cuts needed for a palindrome partitioning of s. The problem can also be found in here. https://oj.leetcode.com/problems/palindrome-partitioning-ii/
Version 1 is one version of solution I found online.
Version 2 is my code.
They both seem to work in very similar ways. However, with a reasonably large input, version 2 takes more than 6000 milliseconds whereas version 1 takes around 71 milliseconds.
Can anyone provide any idea where the time difference is from?
Version 1:
int minSol(string s) {
int len = s.size();
vector<int> D(len + 1);
vector<vector<int>> P;
for (int i = 0; i < len; i++){
vector<int> t(len);
P.push_back(t);
}
for (int i = 0; i <= len; i++)
D[i] = len - i;
for (int i = 0; i < len; i++)
for (int j = 0; j < len; j++)
P[i][j] = false;
for (int i = len - 1; i >= 0; i--){
for (int j = i; j < len; j++){
if (s[i] == s[j] && (j - i < 2 || P[i + 1][j - 1])){
P[i][j] = true;
D[i] = min(D[i], D[j + 1] + 1);
}
}
}
return D[0] - 1;
}
Version 2:
int minCut(string s) {
int size = s.size();
vector<vector<bool>> map;
for (int i = 0; i < size; i++){
vector<bool> t;
for (int j = 0; j < size; j++){
t.push_back(false);
}
map.push_back(t);
}
vector<int> minCuts;
for (int i = 0; i < size; i++){
map[i][i] = true;
minCuts.push_back(size - i - 1);
}
for (int i = size - 1; i >= 0; i--){
for (int j = size - 1; j >= i; j--){
if (s[i] == s[j] && (j - i <= 1 || map[i + 1][j - 1])){
map[i][j] = true;
if (j == size - 1){
minCuts[i] = 0;
}else if (minCuts[i] > minCuts[j + 1] + 1){
minCuts[i] = minCuts[j + 1] + 1;
}
}
}
}
return minCuts[0];
}

I would guess it's because in the second version you're doing size^2 push_back's, whereas in the first version you're just doing size push_back's.

iRPROP+ Multilayer Perceptron

Hello everyone This is the code of iRPROP+ algo for my MLP. When I try to train my network, standart deviation decreases for 1500 epoches (so slow: from ~0.5 to 0.4732) but suddenly it starts to increase.
Can someone say what did I do wrong?
public void RPROP()
{
double a = 1.2, b = 0.5, nMax = 50, nMin = 0.000001;
for (int l = Network.Length - 1; l > 0; l--)
{
for (int i = 0; i < Network[l].getSize(); i++)
{
Neuron n = Network[l].Neurons[i];
double sum = 0;
if (l == Network.Length - 1) n.Delta = (n.Output - DesiredOutput[i]) * ActFunc.calcDeprivateFunction(n.Output);
else
{
for (int k = 0; k < Network[l + 1].getSize(); k++)
{
sum += Network[l + 1].Neurons[k].getWeight(i) * Network[l + 1].Neurons[k].Delta;
}
n.Delta = sum * ActFunc.calcDeprivateFunction(n.Output);
}
}
}
for (int l = 1; l < Network.Length; l++)
{
for (int i = 0; i < Network[l].getSize(); i++)
{
Neuron n = Network[l].Neurons[i];
if ((n.PrevDelta * n.Delta) > 0)
{
n.N = Math.Min(a * n.PrevN, nMax);
n.Bias -= n.N * Math.Sign(n.Delta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) - n.N * Math.Sign(n.Delta));
}
n.PrevDelta = n.Delta;
}
else if ((n.PrevDelta * n.Delta) < 0)
{
n.N = Math.Max(b * n.PrevN, nMin);
if (this.CurrentError > this.LastError)
{
n.Bias += n.PrevN * Math.Sign(n.PrevDelta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) + n.PrevN * Math.Sign(n.PrevDelta));
}
}
n.Delta = 0;
}
else if ((n.PrevDelta * n.Delta) == 0)
{
n.Bias -= n.N * Math.Sign(n.Delta);
for (int j = 0; j < Network[l - 1].getSize(); j++)
{
n.setWeight(j, n.getWeight(j) - n.N * Math.Sign(n.Delta));
}
n.PrevDelta = n.Delta;
}
n.PrevN = n.N;
}
}
}

For the first view, you calculate one train element error and you instantly teach it to the network. try to run over the full train set, without change the weights, and just summarize the Delta. After that, update the weights once, set the prev delta and start over.
Also, there is no update for neuron threshold.

Looking for SLAB6 implementation

I'm looking to implement SLAB6 into my raycaster, especially the kv6 support for voxelmodels. However the SLAB6 source by Ken Silverman is totally unreadably (mostly ASM) so I was hoping someone could point me to a proper C / Java source to load kv6 models or maybe to explain me the workings in some pseudocode preferably (since I want to know how to support the kv6, I know how it works). Thanks, Kaj
EDIT: the implementation would be in Java.

I found some code in an application called VoxelGL (author not mentioned in sourcecode):
void CVoxelWorld::generateSlabFromData(unsigned char *data, VoxelData *vdata, Slab *slab)
{
int currentpattern = 1;
int i = 0;
int n, totalcount, v, count;
n = 0;
v = 0;
while (1)
{
while (data[i] == currentpattern)
{
if (currentpattern == 1)
v++;
i++;
if (i == 256)
break;
}
n++;
if (i == 256)
{
if (currentpattern == 0)
n--;
break;
}
currentpattern ^= 1;
}
slab->nentries = n;
if (slab->description != 0)delete [] slab->description;
if (slab->data != 0)delete [] slab->data;
slab->description = new int[n];
slab->data = new VoxelData[v];
totalcount = 0;
v = 0;
currentpattern = 1;
for (i = 0; i < n; i++)
{
count = 0;
while (data[totalcount] == currentpattern)
{
count++;
totalcount++;
if (totalcount == 256)
break;
}
slab->description[i] = count-1;
if (i % 2 == 0)
{
memcpy(slab->data + v, vdata + totalcount - count, 3 * count);
v += count;
}
currentpattern ^= 1;
}
}
And:
#define clustersize 8
Slab *CVoxelWorld::getSlab(int x, int z)
{
int xgrid = x / clustersize;
int ygrid = z / clustersize;
int clusteroffset = xgrid * 1024 * clustersize + ygrid * clustersize * clustersize;
return &m_data[clusteroffset + (x & (clustersize - 1)) + (z & (clustersize - 1)) * clustersize];
}
And:
int CVoxelWorld::isSolid(int x, int y, int z)
{
Slab *slab;
if (y < 0 || y > 256)
return 0;
slab = getSlab(x, z);
int counter = 0;
for (int i = 0; i < slab->nentries; i++)
{
int height = slab->description[i] + 1;
if (i % 2 == 0)
{
if (y >= counter && y < counter + height)
return 1;
}
counter += height;
}
return 0;
}

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

free() and mxFree() in MATLAB - freeing memory twice - matlab

Related

neural network for mnist keep guessing 1 digit

Renderscript hangs device

Where is the huge performance difference for the two versions of the code?

iRPROP+ Multilayer Perceptron

Looking for SLAB6 implementation

Categories

Resources