Hello StackOverflow community
I have a very basic mexFunction implemented in Visual Studio 2013. I can compile a empty mexFunction as follows, and when I test it in MATLAB, it works perfectly:
#include "mex.h"
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
{
}
However, when I enter any function, the compilation is still successful but when I try to run in MATLAB, MATLAB crashes.
#include "mex.h"
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
{
const char* message = "Hello World";
mexPrintf(message);
}
This is as basic an example as humanly possible, and it causes MATLAB to crash. It does not use any of the inputs I provide.I have tried 0 inputs to 3 inputs:
cfg.i = 1;
cfg.o = 2;
cfg.p = 3;
mcx(cfg);
mcx is the name of the mex function compiled by visual studio. I clear my entire folder before re-compiling the mexFunction. Any help on this problem will be very helpful.
Compilation of this code is succesful but crashes MATLAB as well:
#include "mex.h"
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
{
mexPrintf("test");
}
I am trying to multiply Multiple Matrices from within matlab using cublasSgemmBatched in a mex file.
My matlab code is very simple:
gpuDevice(1);
a = single(rand(400,10,1500,'gpuArray'));
b = single(rand(10,12,1500,'gpuArray'));
c = MatCuda(a,b)
I get the following error:
Error using gpuArray/subsref
An unexpected error occurred during CUDA execution. The CUDA error was:
unknown error
and here's the mexFunction code:
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]){
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs != 2) || !(mxIsGPUArray(prhs[0])) || !(mxIsGPUArray(prhs[1]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);
if ((mxGPUGetClassID(A) != mxSINGLE_CLASS) || (mxGPUGetClassID(B) != mxSINGLE_CLASS)) {
mexErrMsgIdAndTxt(errId, errMsg);
}
float const *d_A;
float const *d_B;
d_A = (float const *)(mxGPUGetDataReadOnly(A));
d_B = (float const *)(mxGPUGetDataReadOnly(B));
const mwSize *dimsA = mxGPUGetDimensions(A);
size_t nrowsA = dimsA[0];
size_t ncolsA = dimsA[1];
size_t nMatricesA = dimsA[2];
mxFree((void*) dimsA);
const mwSize *dimsB = mxGPUGetDimensions(B);
size_t nrowsB = dimsB[0];
size_t ncolsB = dimsB[1];
size_t nMatricesB = dimsB[2];
mxFree((void*)dimsB);
size_t nrowsC = nrowsA;
size_t ncolsC = ncolsB;
mwSize dimsC[3] = { nrowsA, ncolsB, nMatricesB };
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
dimsC,
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
float *d_C;
d_C = (float *)(mxGPUGetData(C));
cublasHandle_t handle;
cublasStatus_t ret;
ret = cublasCreate(&handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
const float alpha = 1.0f;
const float beta = 0.0f;
ret = cublasSgemmBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, nrowsA, ncolsB, ncolsA, &alpha, &d_A, nrowsA, &d_B, nrowsB, &beta, &d_C, nrowsC, nMatricesA);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
ret = cublasDestroy(handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);
}
I suspect it's related to the function cublasSgemmBatched because when I remove it from the code then I don't get the error.
Help will be very much appreciated !
Thanks !
You don't need a MEX file here, you can use pagefun to do this. Also, I'd recommend building a and b in single precision directly rather than casting. In other words,
a = rand(400,10,1500,'single','gpuArray');
b = rand(10,12,1500,'single','gpuArray');
c = pagefun(#mtimes, a, b);
cublasDgemm works for me. I just pass regular arrays to the mexfunction. Here is my example code.
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <time.h>
#include "mex.h"
#include "mxGPUArray.h"
typedef struct _matrixSize
{
unsigned int uiWA, uiHA, uiWB, uiHB, uiWC, uiHC;
} sMatrixSize;
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[])
{
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;
_matrixSize matrix_size;
mwSize const *A_sz;
mwSize const *B_sz;
double const *d_A;
double const *d_B;
double *d_C;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
if (nrhs != 2) {
mexErrMsgTxt("Need two inputs");
}
A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);
A_sz=mxGPUGetDimensions(A);
B_sz = mxGPUGetDimensions(B);
matrix_size.uiWA = (unsigned int)A_sz[0]; matrix_size.uiHA = (unsigned int)A_sz[1];
matrix_size.uiWB = (unsigned int)B_sz[0]; matrix_size.uiHB = (unsigned int)B_sz[1];
mwSize C_sz[3] = { matrix_size.uiWA, matrix_size.uiHB, 1 };
d_A = (double const *)(mxGPUGetDataReadOnly(A));
d_B = (double const *)(mxGPUGetDataReadOnly(B));
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
C_sz,
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
d_C = (double *)(mxGPUGetData(C));
matrixMultiply(d_A, d_B, d_C, matrix_size);
plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxFree((void*)A_sz);
mxFree((void*)B_sz);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);
}
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size)
{
cublasStatus_t status;
cublasHandle_t handle;
status=cublasCreate(&handle);
if (status != CUBLAS_STATUS_SUCCESS)
{
if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
mexPrintf("CUBLAS initializing error");
}
getchar();
return;
}
const double alpha = 1.0f;
const double beta = 0.0f;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, matrix_size.uiWB, matrix_size.uiHA, matrix_size.uiWA, &alpha, d_B, matrix_size.uiWB, d_A, matrix_size.uiWA, &beta, d_C, matrix_size.uiWB);
cudaThreadSynchronize();
cublasDestroy(handle);
}
There is also a more native cuda style. Both works fine for me.
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <time.h>
#include "mex.h"
#include "mxGPUArray.h"
typedef struct _matrixSize
{
unsigned int uiWA, uiHA, uiWB, uiHB, uiWC, uiHC;
} sMatrixSize;
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[])
{
mxArray const *mA;
mxArray const *mB;
_matrixSize matrix_size;
size_t A_w, A_h, B_w, B_h;
double *d_A;
double *d_B;
double *d_C;
double *h_A;
double *h_B;
double *h_C;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
if (nrhs != 2) {
mexErrMsgTxt("Need two inputs");
}
mA = prhs[0]; mB = prhs[1];
A_w = mxGetM(mA);A_h = mxGetN(mA);B_w = mxGetM(mB);B_h = mxGetN(mB);
matrix_size.uiWA = (unsigned int)A_w; matrix_size.uiHA = (unsigned int)A_h;
matrix_size.uiWB = (unsigned int)B_w; matrix_size.uiHB = (unsigned int)B_h;
matrix_size.uiWC = (unsigned int)A_w; matrix_size.uiHC = (unsigned int)B_h;
mwSize const C_sz[3] = { matrix_size.uiWA, matrix_size.uiHB, 1 };
unsigned int size_A = matrix_size.uiWA * matrix_size.uiHA;
unsigned int mem_size_A = sizeof(double) * size_A;
h_A = (double*)mxGetData(mA);
unsigned int size_B = matrix_size.uiWB * matrix_size.uiHB;
unsigned int mem_size_B = sizeof(double) * size_B;
h_B = (double*)mxGetData(mB);
unsigned int size_C = matrix_size.uiWC * matrix_size.uiHC;
unsigned int mem_size_C = sizeof(double) * size_C;
plhs[0] = mxCreateNumericArray(3, C_sz, mxDOUBLE_CLASS, mxREAL);
h_C = (double*)mxGetData(plhs[0]);
cudaMalloc((void **)&d_A, mem_size_A);
cudaMalloc((void **)&d_B, mem_size_B);
cudaMemcpy(d_A, h_A, mem_size_A, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, mem_size_B, cudaMemcpyHostToDevice);
cudaMalloc((void **)&d_C, mem_size_C);
matrixMultiply(d_A, d_B, d_C, matrix_size);
cudaMemcpy(h_C, d_C, mem_size_C, cudaMemcpyDeviceToHost);
cudaThreadSynchronize();
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size)
{
cublasHandle_t handle;
cublasCreate(&handle);
const double alpha = 1.0f;
const double beta = 0.0f;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, matrix_size.uiWB, matrix_size.uiHA, matrix_size.uiWA, &alpha, d_B, matrix_size.uiWB, d_A, matrix_size.uiWA, &beta, d_C, matrix_size.uiWB);
cublasDestroy(handle);
}
I have a very basic mex file example here:
#include "mex.h"
#include "matrix.h"
void createStructureArray(mxArray* main_array)
{
const char* Title[] = { "first", "second" };
main_array = mxCreateStructMatrix(1,1, 2, Title);
}
void mexFunction(mwSize nlhs, mxArray *plhs[], mwSize nrhs,
const mxArray *prhs[])
{
double* x = mxGetPr(prhs[0]);
if (*x < 1.0)
{
//This works
const char* Title[] = { "first", "second" };
plhs[0] = mxCreateStructMatrix(1,1, 2, Title);
}
else
{
//This does not
createStructureArray(plhs[0]);
}
}
This function should always return a struct with the elements first and second. No matter the input, I expect the same output. However with an input parameter < 1, everything works as expected, but > 1 I get an error message:
>> a = easy_example(0.0)
a =
first: []
second: []
>> a = easy_example(2.0)
One or more output arguments not assigned during call to "easy_example".
Thus, can I not call the mxCreateStructMatrix function outside mexFunction, or did I do something wrong when passing the pointers?
You don't have a problem with mex but with pointers!
Try to change your function to:
void createStructureArray(mxArray** main_array)
{
const char* Title[] = { "first", "second" };
*main_array = mxCreateStructMatrix(1,1, 2, Title);
}
and the function call to
createStructureArray(&plhs[0]);
Your problem is that plhs[0] is a mxArray, but in order to return it, you need to pass the pointer to that mxArray!
I tried to input this struct from MATLAB into my MEX file: struct('speed',{100.3},'nr',{55.4},'on',{54}), but the last value which is defined in my MEX file as unsigned char reads out as zero before calling my C function? The two double values works like intended.
struct post_TAG
{
double speed;
double nr;
unsigned char on;
};
const char *keys[] = { "speed", "nr", "on" };
void testmex(post_TAG *post)
{
...
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])
{
post_TAG post;
int numFields, i;
const char *fnames[3];
mxArray *tmp;
double *a,*b;
unsigned char *c;
numFields=mxGetNumberOfFields(prhs[0]);
for(i=0;i<numFields;i++)
fnames[i] = mxGetFieldNameByNumber(prhs[0],i);
tmp = mxGetField(prhs[0],0,fnames[0]);
a=(double*)mxGetData(tmp);
tmp = mxGetField(prhs[0],0,fnames[1]);
b=(double*)mxGetData(tmp);
tmp = mxGetField(prhs[0],0,fnames[2]);
c=(unsigned char*)mxGetData(tmp);
mexPrintf("POST0, speed=%f, nr=%f, on=%u\n",*a,*b,*c);
post.speed = *a;
post.nr = *b;
post.on = *c;
testmex(&post);
}
In a struct defined as struct('speed',{100.3},'nr',{55.4},'on',{54}), the field on is a double. Pass as a uint8 from MATLAB:
struct('speed',{100.3},'nr',{55.4},...
'on',{uint8(54)}),
Any numeric value without a specified type in MATLAB is a double.
Also note that for reading a scalar value, the problem is simplified somewhat by mxGetScalar. It will return one double value for any underlying data type.
unsigned char s = (unsigned char) mxGetScalar(...); // cast a double to unsigned char
I'm trying to plot data out from a MEX file (C language). To do this fast, I would like to use what corresponds to the following Matlab code:
figure; imagehandle = imagesc(rand(500));
new_CData = rand(500);
set(newCData,imagehandle);
For this, the command mexSet() should be working. Ideally, I want something like this
mex plotX.c
figure; imagehandle = imagesc(rand(500));
A = rand(500);
plotX(A,imagehandle)
with a mex-Function plotX. Here is my tryout:
#include "mex.h"
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
double *x, imagehandle;
(void) plhs;
x = mxGetPr(prhs[0]);
imagehandle = mxGetScalar(prhs[1]);
mexSet(imagehandle,"Cdata",x);
}
This can be compiled, but I get the following error: "Error using plotX. Numeric or logical matrix required for image CData".
What am I doing wrong?
Solution:
#include "mex.h"
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
double *x, imagehandle;
(void) plhs;
imagehandle = mxGetScalar(prhs[1]);
mexSet(imagehandle,"Cdata",prhs[0]);
}
I found this out when I used a different compiler which gave me the following error: " note: expected ‘struct mxArray *’ but argument is of type ‘double *’". And prhs[] is already a mxArray, so easy to solve ...