I am trying to multiply Multiple Matrices from within matlab using cublasSgemmBatched in a mex file.
My matlab code is very simple:
gpuDevice(1);
a = single(rand(400,10,1500,'gpuArray'));
b = single(rand(10,12,1500,'gpuArray'));
c = MatCuda(a,b)
I get the following error:
Error using gpuArray/subsref
An unexpected error occurred during CUDA execution. The CUDA error was:
unknown error
and here's the mexFunction code:
void mexFunction( int nlhs, mxArray *plhs[],
int nrhs, const mxArray *prhs[]){
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
/* Declare all variables.*/
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;
/* Initialize the MathWorks GPU API. */
mxInitGPU();
/* Throw an error if the input is not a GPU array. */
if ((nrhs != 2) || !(mxIsGPUArray(prhs[0])) || !(mxIsGPUArray(prhs[1]))) {
mexErrMsgIdAndTxt(errId, errMsg);
}
A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);
if ((mxGPUGetClassID(A) != mxSINGLE_CLASS) || (mxGPUGetClassID(B) != mxSINGLE_CLASS)) {
mexErrMsgIdAndTxt(errId, errMsg);
}
float const *d_A;
float const *d_B;
d_A = (float const *)(mxGPUGetDataReadOnly(A));
d_B = (float const *)(mxGPUGetDataReadOnly(B));
const mwSize *dimsA = mxGPUGetDimensions(A);
size_t nrowsA = dimsA[0];
size_t ncolsA = dimsA[1];
size_t nMatricesA = dimsA[2];
mxFree((void*) dimsA);
const mwSize *dimsB = mxGPUGetDimensions(B);
size_t nrowsB = dimsB[0];
size_t ncolsB = dimsB[1];
size_t nMatricesB = dimsB[2];
mxFree((void*)dimsB);
size_t nrowsC = nrowsA;
size_t ncolsC = ncolsB;
mwSize dimsC[3] = { nrowsA, ncolsB, nMatricesB };
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
dimsC,
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
float *d_C;
d_C = (float *)(mxGPUGetData(C));
cublasHandle_t handle;
cublasStatus_t ret;
ret = cublasCreate(&handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
const float alpha = 1.0f;
const float beta = 0.0f;
ret = cublasSgemmBatched(handle, CUBLAS_OP_N, CUBLAS_OP_N, nrowsA, ncolsB, ncolsA, &alpha, &d_A, nrowsA, &d_B, nrowsB, &beta, &d_C, nrowsC, nMatricesA);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasSgemm returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
ret = cublasDestroy(handle);
if (ret != CUBLAS_STATUS_SUCCESS)
{
printf("cublasCreate returned error code %d, line(%d)\n", ret, __LINE__);
exit(EXIT_FAILURE);
}
plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);
}
I suspect it's related to the function cublasSgemmBatched because when I remove it from the code then I don't get the error.
Help will be very much appreciated !
Thanks !
You don't need a MEX file here, you can use pagefun to do this. Also, I'd recommend building a and b in single precision directly rather than casting. In other words,
a = rand(400,10,1500,'single','gpuArray');
b = rand(10,12,1500,'single','gpuArray');
c = pagefun(#mtimes, a, b);
cublasDgemm works for me. I just pass regular arrays to the mexfunction. Here is my example code.
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <time.h>
#include "mex.h"
#include "mxGPUArray.h"
typedef struct _matrixSize
{
unsigned int uiWA, uiHA, uiWB, uiHB, uiWC, uiHC;
} sMatrixSize;
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[])
{
mxGPUArray const *A;
mxGPUArray const *B;
mxGPUArray *C;
_matrixSize matrix_size;
mwSize const *A_sz;
mwSize const *B_sz;
double const *d_A;
double const *d_B;
double *d_C;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
if (nrhs != 2) {
mexErrMsgTxt("Need two inputs");
}
A = mxGPUCreateFromMxArray(prhs[0]);
B = mxGPUCreateFromMxArray(prhs[1]);
A_sz=mxGPUGetDimensions(A);
B_sz = mxGPUGetDimensions(B);
matrix_size.uiWA = (unsigned int)A_sz[0]; matrix_size.uiHA = (unsigned int)A_sz[1];
matrix_size.uiWB = (unsigned int)B_sz[0]; matrix_size.uiHB = (unsigned int)B_sz[1];
mwSize C_sz[3] = { matrix_size.uiWA, matrix_size.uiHB, 1 };
d_A = (double const *)(mxGPUGetDataReadOnly(A));
d_B = (double const *)(mxGPUGetDataReadOnly(B));
C = mxGPUCreateGPUArray(mxGPUGetNumberOfDimensions(A),
C_sz,
mxGPUGetClassID(A),
mxGPUGetComplexity(A),
MX_GPU_DO_NOT_INITIALIZE);
d_C = (double *)(mxGPUGetData(C));
matrixMultiply(d_A, d_B, d_C, matrix_size);
plhs[0] = mxGPUCreateMxArrayOnGPU(C);
mxFree((void*)A_sz);
mxFree((void*)B_sz);
mxGPUDestroyGPUArray(A);
mxGPUDestroyGPUArray(B);
mxGPUDestroyGPUArray(C);
}
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size)
{
cublasStatus_t status;
cublasHandle_t handle;
status=cublasCreate(&handle);
if (status != CUBLAS_STATUS_SUCCESS)
{
if (status == CUBLAS_STATUS_NOT_INITIALIZED) {
mexPrintf("CUBLAS initializing error");
}
getchar();
return;
}
const double alpha = 1.0f;
const double beta = 0.0f;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, matrix_size.uiWB, matrix_size.uiHA, matrix_size.uiWA, &alpha, d_B, matrix_size.uiWB, d_A, matrix_size.uiWA, &beta, d_C, matrix_size.uiWB);
cudaThreadSynchronize();
cublasDestroy(handle);
}
There is also a more native cuda style. Both works fine for me.
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <time.h>
#include "mex.h"
#include "mxGPUArray.h"
typedef struct _matrixSize
{
unsigned int uiWA, uiHA, uiWB, uiHB, uiWC, uiHC;
} sMatrixSize;
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray const *prhs[])
{
mxArray const *mA;
mxArray const *mB;
_matrixSize matrix_size;
size_t A_w, A_h, B_w, B_h;
double *d_A;
double *d_B;
double *d_C;
double *h_A;
double *h_B;
double *h_C;
char const * const errId = "parallel:gpu:mexGPUExample:InvalidInput";
char const * const errMsg = "Invalid input to MEX file.";
if (nrhs != 2) {
mexErrMsgTxt("Need two inputs");
}
mA = prhs[0]; mB = prhs[1];
A_w = mxGetM(mA);A_h = mxGetN(mA);B_w = mxGetM(mB);B_h = mxGetN(mB);
matrix_size.uiWA = (unsigned int)A_w; matrix_size.uiHA = (unsigned int)A_h;
matrix_size.uiWB = (unsigned int)B_w; matrix_size.uiHB = (unsigned int)B_h;
matrix_size.uiWC = (unsigned int)A_w; matrix_size.uiHC = (unsigned int)B_h;
mwSize const C_sz[3] = { matrix_size.uiWA, matrix_size.uiHB, 1 };
unsigned int size_A = matrix_size.uiWA * matrix_size.uiHA;
unsigned int mem_size_A = sizeof(double) * size_A;
h_A = (double*)mxGetData(mA);
unsigned int size_B = matrix_size.uiWB * matrix_size.uiHB;
unsigned int mem_size_B = sizeof(double) * size_B;
h_B = (double*)mxGetData(mB);
unsigned int size_C = matrix_size.uiWC * matrix_size.uiHC;
unsigned int mem_size_C = sizeof(double) * size_C;
plhs[0] = mxCreateNumericArray(3, C_sz, mxDOUBLE_CLASS, mxREAL);
h_C = (double*)mxGetData(plhs[0]);
cudaMalloc((void **)&d_A, mem_size_A);
cudaMalloc((void **)&d_B, mem_size_B);
cudaMemcpy(d_A, h_A, mem_size_A, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, mem_size_B, cudaMemcpyHostToDevice);
cudaMalloc((void **)&d_C, mem_size_C);
matrixMultiply(d_A, d_B, d_C, matrix_size);
cudaMemcpy(h_C, d_C, mem_size_C, cudaMemcpyDeviceToHost);
cudaThreadSynchronize();
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
}
void matrixMultiply(double const* d_A, double const* d_B, double* d_C, sMatrixSize &matrix_size)
{
cublasHandle_t handle;
cublasCreate(&handle);
const double alpha = 1.0f;
const double beta = 0.0f;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, matrix_size.uiWB, matrix_size.uiHA, matrix_size.uiWA, &alpha, d_B, matrix_size.uiWB, d_A, matrix_size.uiWA, &beta, d_C, matrix_size.uiWB);
cublasDestroy(handle);
}
Related
I attach here my kernel module I developed and the test I am using at application level
memalloc.c
/*
* DMA memory allocation
* This kernel module allocates coherent, non-cached memory
* and returns the physical and virtual address of the allocated buffer
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include "memalloc.h"
#define DEVICE_NAME "memalloc"
// Max number of buffers
#define BUFFER_MAX_NUMBER 16
// Structure for buffer information
struct bufferInfo {
int active;
int size;
dma_addr_t handle;
int virtualAddress;
int *kernelAddress;
};
static struct bufferInfo bufferInfoTable[BUFFER_MAX_NUMBER];
// Defines which buffer is currently active - for mmap
static int activeBufferID;
struct memAllocIF {
struct device *device_p;
dev_t dev_node;
struct cdev cdev;
struct class *class_p;
};
static struct memAllocIF interface;
// Methods
static int releaseBuffer(int i)
{
if (i > BUFFER_MAX_NUMBER)
{
printk("Wrong bufferID %d\n", i);
return -1;
}
printk("Releasing buffer %d\n", i);
bufferInfoTable[i].active = 0;
dma_free_coherent(NULL, bufferInfoTable[i].size, bufferInfoTable[i].kernelAddress, bufferInfoTable[i].handle);
return 0;
}
static int reserveBuffer(size_t size)
{
int i;
for (i = 0; i < BUFFER_MAX_NUMBER; i++)
{
if (bufferInfoTable[i].active == 0)
{
printk("Reserving buffer %d\n", i);
bufferInfoTable[i].active = 1;
break;
}
}
if (i < BUFFER_MAX_NUMBER)
{
bufferInfoTable[i].kernelAddress = dma_alloc_coherent(NULL, size, &bufferInfoTable[i].handle, GFP_KERNEL);
if (bufferInfoTable[i].kernelAddress == NULL)
{
printk("Allocation failure\n");
return -1;
}
bufferInfoTable[i].size = (int)size;
return i;
}
else
{
printk("No buffer available\n");
return -1;
}
}
static void cleanup(void)
{
int i;
for (i = 0; i < BUFFER_MAX_NUMBER; i++)
{
if (bufferInfoTable[i].active != 0)
{
dma_free_coherent(NULL, bufferInfoTable[i].size, bufferInfoTable[i].kernelAddress, bufferInfoTable[i].handle);
bufferInfoTable[i].active = 0;
}
}
}
static unsigned int memAllocGetVirtual (int i)
{
if (i > BUFFER_MAX_NUMBER)
{
printk("Wrong bufferID %d\n", i);
return -1;
}
if (bufferInfoTable[i].active == 0)
{
printk("Inactive buffer - ID %d\n", i);
return -1;
}
printk("request for buffer %d: vaddr = %X\n", i, (unsigned int)bufferInfoTable[i].virtualAddress);
return bufferInfoTable[i].virtualAddress;
}
static unsigned int memAllocGetPhysical (int i)
{
if (i > BUFFER_MAX_NUMBER)
{
printk("Wrong bufferID %d\n", i);
return -1;
}
return (unsigned int)bufferInfoTable[i].handle;
}
static long memAllocIoctl (struct file *fd, unsigned int cmd, unsigned long arg)
{
printk("received command %u arg %lu\n", cmd, arg);
switch(cmd)
{
case MEMALLOC_RESERVE:
return reserveBuffer(arg);
break;
case MEMALLOC_RELEASE:
return releaseBuffer(arg);
break;
case MEMALLOC_GET_VIRTUAL:
return memAllocGetVirtual(arg);
break;
case MEMALLOC_GET_PHYSICAL:
return memAllocGetPhysical(arg);
break;
case MEMALLOC_ACTIVATE_BUFFER:
if (arg > BUFFER_MAX_NUMBER || bufferInfoTable[arg].active == 0)
{
printk("Wrong bufferID %lu\n", arg);
return -1;
}
activeBufferID = arg;
return arg;
break;
default:
printk("Wrong command: %d\n", cmd);
return -1;
break;
}
}
static int memAllocMmap (struct file *fd, struct vm_area_struct *vma)
{
bufferInfoTable[activeBufferID].virtualAddress = dma_common_mmap(interface.device_p, vma, bufferInfoTable[activeBufferID].kernelAddress, bufferInfoTable[activeBufferID].handle, vma->vm_end-vma->vm_start);
printk("mmap for idx %d: vaddr = %X\n", activeBufferID, (int)bufferInfoTable[activeBufferID].virtualAddress);
return bufferInfoTable[activeBufferID].virtualAddress;
}
static int memAllocRelease(struct inode *in, struct file *fd)
{
cleanup();
return 0;
}
static int memAllocOpen(struct inode *ino, struct file *file)
{
file->private_data = container_of(ino->i_cdev, struct memAllocIF, cdev);
return 0;
}
static struct file_operations fops = {
.unlocked_ioctl = memAllocIoctl,
.mmap = memAllocMmap,
.release = memAllocRelease,
.open = memAllocOpen
};
static int __init memAllocInit(void)
{
int rc;
int i;
static struct class *local_class_p = NULL;
printk("Loading DMA allocation module\n");
// Allocate a character device from the kernel for this driver
rc = alloc_chrdev_region(&interface.dev_node, 0, 1, DEVICE_NAME);
if (rc)
{
printk("Unable to get a char device number\n");
return rc;
}
// Initialize the ter device data structure before registering the character device with the kernel
cdev_init(&interface.cdev, &fops);
rc = cdev_add(&interface.cdev, interface.dev_node, 1);
if (rc)
{
printk("Unable to add char device\n");
cdev_del(&interface.cdev);
return rc;
}
// Create the device in sysfs which will allow the device node in /dev to be created
local_class_p = class_create(THIS_MODULE, DEVICE_NAME);
interface.class_p = local_class_p;
// Create the device node in /dev so the device is accessible as a character device
interface.device_p = device_create(interface.class_p, NULL, interface.dev_node, NULL, DEVICE_NAME);
if (IS_ERR(interface.device_p))
{
printk("Unable to create the device\n");
class_destroy(interface.class_p);
cdev_del(&interface.cdev);
return rc;
}
for (i = 0; i < BUFFER_MAX_NUMBER; i++)
{
bufferInfoTable[activeBufferID].active = 0;
}
return 0;
}
static void __exit my_memAllocExit(void)
{
printk("Module unloading\n");
cleanup();
cdev_del(&interface.cdev);
device_destroy(interface.class_p, interface.dev_node);
class_destroy(interface.class_p);
unregister_chrdev_region(interface.dev_node, 1);
}
module_init(memAllocInit);
module_exit(my_memAllocExit);
MODULE_AUTHOR("me");
MODULE_DESCRIPTION("Create a buffer and return physical and virtual address, for DMA userspace driver");
MODULE_LICENSE("GPL");
memalloc.h
#ifndef MEMALLOC_H
#define MEMALLOC_H
#ifdef __cplusplus
extern "C" {
#endif
#include <linux/types.h>
#include <asm/ioctl.h>
static long memAllocIoctl (struct file *, unsigned int, unsigned long);
static int memAllocMmap (struct file *, struct vm_area_struct *);
static int memAllocRelease (struct inode *, struct file *);
static int memAllocOpen(struct inode *, struct file *);
enum memAllocCmd
{
MEMALLOC_RESERVE = 0,
MEMALLOC_RELEASE = 1,
MEMALLOC_GET_VIRTUAL = 2,
MEMALLOC_GET_PHYSICAL = 3,
MEMALLOC_ACTIVATE_BUFFER = 4,
};
#ifdef __cplusplus
}
#endif
#endif /* MEMALLOC_H */
test.c
#include <stdlib.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <stdio.h>
// derive this from memalloc.h
enum memAllocCmd
{
MEMALLOC_RESERVE = 0,
MEMALLOC_RELEASE = 1,
MEMALLOC_GET_VIRTUAL = 2,
MEMALLOC_GET_PHYSICAL = 3,
MEMALLOC_ACTIVATE_BUFFER = 4,
};
int main ()
{
int memAllocFd;
volatile int iVaddr;
volatile int oVaddr;
volatile int iVaddr_2;
volatile int oVaddr_2;
volatile void * iPaddr;
volatile void * oPaddr;
int iBufID;
int oBufID;
int size = 2048;
memAllocFd = open("/dev/memalloc", O_RDWR);
// create iBuffer
iBufID = ioctl(memAllocFd, MEMALLOC_RESERVE, size);
iPaddr = (void *)ioctl(memAllocFd, MEMALLOC_GET_PHYSICAL, iBufID);
ioctl(memAllocFd, MEMALLOC_ACTIVATE_BUFFER, iBufID);
iVaddr = (int)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, memAllocFd, 0);
ioctl(memAllocFd, MEMALLOC_GET_VIRTUAL, iBufID);
/*
if (iVaddr != iVaddr_2)
{
printf("Error: virtual addresses for buffer %d don't match: %X %X\n", iBufID, iVaddr, iVaddr_2);
}
*/
// create oBuffer
oBufID = ioctl(memAllocFd, MEMALLOC_RESERVE, size);
oPaddr = (void *)ioctl(memAllocFd, MEMALLOC_GET_PHYSICAL, oBufID);
ioctl(memAllocFd, MEMALLOC_ACTIVATE_BUFFER, oBufID);
oVaddr = (int)mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, memAllocFd, 0);
ioctl(memAllocFd, MEMALLOC_GET_VIRTUAL, oBufID);
/*
if (oVaddr != oVaddr_2)
{
printf("Error: virtual addresses for buffer %d don't match: %X %X\n", oBufID, oVaddr, oVaddr_2);
}
*/
ioctl(memAllocFd, MEMALLOC_RELEASE, iBufID);
ioctl(memAllocFd, MEMALLOC_RELEASE, oBufID);
return 0;
}
Result of the test is
received command 0 arg 2048
Reserving buffer 0
received command 3 arg 0
received command 4 arg 0
mmap for idx 0: vaddr = 0
received command 0 arg 2048
Reserving buffer 1
received command 3 arg 1
received command 4 arg 1
mmap for idx 1: vaddr = 0
received command 1 arg 0
Releasing buffer 0
received command 1 arg 1
Releasing buffer 1
Which means that all ioctl calls with arg=MEMALLOC_GET_VIRTUAL are not executed, while all the others are.
What can be the reason for that?
Thanks,
Max
I tried to input this struct from MATLAB into my MEX file: struct('speed',{100.3},'nr',{55.4},'on',{54}), but the last value which is defined in my MEX file as unsigned char reads out as zero before calling my C function? The two double values works like intended.
struct post_TAG
{
double speed;
double nr;
unsigned char on;
};
const char *keys[] = { "speed", "nr", "on" };
void testmex(post_TAG *post)
{
...
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])
{
post_TAG post;
int numFields, i;
const char *fnames[3];
mxArray *tmp;
double *a,*b;
unsigned char *c;
numFields=mxGetNumberOfFields(prhs[0]);
for(i=0;i<numFields;i++)
fnames[i] = mxGetFieldNameByNumber(prhs[0],i);
tmp = mxGetField(prhs[0],0,fnames[0]);
a=(double*)mxGetData(tmp);
tmp = mxGetField(prhs[0],0,fnames[1]);
b=(double*)mxGetData(tmp);
tmp = mxGetField(prhs[0],0,fnames[2]);
c=(unsigned char*)mxGetData(tmp);
mexPrintf("POST0, speed=%f, nr=%f, on=%u\n",*a,*b,*c);
post.speed = *a;
post.nr = *b;
post.on = *c;
testmex(&post);
}
In a struct defined as struct('speed',{100.3},'nr',{55.4},'on',{54}), the field on is a double. Pass as a uint8 from MATLAB:
struct('speed',{100.3},'nr',{55.4},...
'on',{uint8(54)}),
Any numeric value without a specified type in MATLAB is a double.
Also note that for reading a scalar value, the problem is simplified somewhat by mxGetScalar. It will return one double value for any underlying data type.
unsigned char s = (unsigned char) mxGetScalar(...); // cast a double to unsigned char
Inspired from this post, I am interested to pass std::strings into the cell array. However, the mxDuplicateArray accepts mxArray format variables. I have tried to transform the std::string to mxArray with mxGetString but without success.
Could you please make a suggestion on this?
Thanks!
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[])
{
std::string str ("Hellooo");
const char *cstr = str.c_str();
mwSize len = 10;
mxArray *mxarr = mxCreateCellMatrix(len, 1);
mxArray *mxstr = mxCreateString("");
mxGetString(mxstr, (char*) cstr, str.length());
for(mwIndex i=0; i<len; i++) {
// I simply replaced the call to mxDuplicateArray here
mxSetCell(mxarr, i, mxDuplicateArray(mxstr));
}
mxDestroyArray(mxstr);
plhs[0] = mxarr;
}
You could also cut out the call to mxDuplicateArray (and mxDestroyArray).
#include "mex.h"
#include <string>
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
std::string str("Hellooo");
const char *cstr = str.c_str();
mwSize len = 10;
mxArray *mxarr = mxCreateCellMatrix(len, 1);
for (mwIndex i=0; i<len; i++) {
mxSetCell(mxarr, i, mxCreateString(cstr));
}
plhs[0] = mxarr;
}
Untested...
From the docs on mxGetString:
Call mxGetString to copy the character data of a string mxArray
What you want, is the opposite: create an mxArray from a c-style string. For that you can use
mxCreateString directly. It seems to tried to use it to create an empty string at first. This should work:
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[])
{
std::string str ("Hellooo");
const char *cstr = str.c_str();
mwSize len = 10;
mxArray *mxarr = mxCreateCellMatrix(len, 1);
mxArray *mxstr = mxCreateString(cstr);
// no need for this line
// mxGetString(mxstr, (char*) cstr, str.length());
for(mwIndex i=0; i<len; i++) {
// I simply replaced the call to mxDuplicateArray here
mxSetCell(mxarr, i, mxDuplicateArray(mxstr));
}
mxDestroyArray(mxstr);
plhs[0] = mxarr;
}
i installed the usb webcam driver in linux 2.6 version
then i want to capture using V4L please any one reply me how can i write the program to capture using webcam
I work with V4l using the reference of the linuxTV (http://linuxtv.org/downloads/v4l-dvb-apis/). They have the documentation about V4l and some examples. Take a look at the following example (i got from the linuxTV):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <linux/videodev2.h>
#include "../libv4l/include/libv4l2.h"
#define CLEAR(x) memset(&(x), 0, sizeof(x))
struct buffer {
void *start;
size_t length;
};
static void xioctl(int fh, int request, void *arg)
{
int r;
do {
r = v4l2_ioctl(fh, request, arg);
} while (r == -1 && ((errno == EINTR) || (errno == EAGAIN)));
if (r == -1) {
fprintf(stderr, "error %d, %s\n", errno, strerror(errno));
exit(EXIT_FAILURE);
}
}
int main(int argc, char **argv)
{
struct v4l2_format fmt;
struct v4l2_buffer buf;
struct v4l2_requestbuffers req;
enum v4l2_buf_type type;
fd_set fds;
struct timeval tv;
int r, fd = -1;
unsigned int i, n_buffers;
char *dev_name = "/dev/video0";
char out_name[256];
FILE *fout;
struct buffer *buffers;
fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
if (fd < 0) {
perror("Cannot open device");
exit(EXIT_FAILURE);
}
CLEAR(fmt);
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
fmt.fmt.pix.width = 640;
fmt.fmt.pix.height = 480;
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
xioctl(fd, VIDIOC_S_FMT, &fmt);
if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) {
printf("Libv4l didn't accept RGB24 format. Can't proceed.\n");
exit(EXIT_FAILURE);
}
if ((fmt.fmt.pix.width != 640) || (fmt.fmt.pix.height != 480))
printf("Warning: driver is sending image at %dx%d\n",
fmt.fmt.pix.width, fmt.fmt.pix.height);
CLEAR(req);
req.count = 2;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
xioctl(fd, VIDIOC_REQBUFS, &req);
buffers = calloc(req.count, sizeof(*buffers));
for (n_buffers = 0; n_buffers < req.count; ++n_buffers) {
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = n_buffers;
xioctl(fd, VIDIOC_QUERYBUF, &buf);
buffers[n_buffers].length = buf.length;
buffers[n_buffers].start = v4l2_mmap(NULL, buf.length,
PROT_READ | PROT_WRITE, MAP_SHARED,
fd, buf.m.offset);
if (MAP_FAILED == buffers[n_buffers].start) {
perror("mmap");
exit(EXIT_FAILURE);
}
}
for (i = 0; i < n_buffers; ++i) {
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = i;
xioctl(fd, VIDIOC_QBUF, &buf);
}
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
xioctl(fd, VIDIOC_STREAMON, &type);
for (i = 0; i < 20; i++) {
do {
FD_ZERO(&fds);
FD_SET(fd, &fds);
/* Timeout. */
tv.tv_sec = 2;
tv.tv_usec = 0;
r = select(fd + 1, &fds, NULL, NULL, &tv);
} while ((r == -1 && (errno = EINTR)));
if (r == -1) {
perror("select");
return errno;
}
CLEAR(buf);
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
xioctl(fd, VIDIOC_DQBUF, &buf);
sprintf(out_name, "out%03d.ppm", i);
fout = fopen(out_name, "w");
if (!fout) {
perror("Cannot open image");
exit(EXIT_FAILURE);
}
fprintf(fout, "P6\n%d %d 255\n",
fmt.fmt.pix.width, fmt.fmt.pix.height);
fwrite(buffers[buf.index].start, buf.bytesused, 1, fout);
fclose(fout);
xioctl(fd, VIDIOC_QBUF, &buf);
}
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
xioctl(fd, VIDIOC_STREAMOFF, &type);
for (i = 0; i < n_buffers; ++i)
v4l2_munmap(buffers[i].start, buffers[i].length);
v4l2_close(fd);
return 0;
}
I know there is an Objective-C wrapper around YAJL, but this is a really fat thing which blows up the whole JASON parser to a ridiculous huge amount of 21 files, many of them with tiny scroll bars.
So to keep my app binary small I'd like to stick with the C-version of that parser. But I'm having a hard time finding any useful documentation for this rather than the wrapper.
Maybe someone who used the C-base can point out such a tutorial or documentation?
The documentation with C examples can be found here: http://lloyd.github.com/yajl/
The github repository with examples can be found here : https://github.com/lloyd/yajl
Here is a C example that reformats JSON from stdin:
#include <yajl/yajl_parse.h>
#include <yajl/yajl_gen.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int reformat_null(void * ctx)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_null(g);
}
static int reformat_boolean(void * ctx, int boolean)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_bool(g, boolean);
}
static int reformat_number(void * ctx, const char * s, size_t l)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_number(g, s, l);
}
static int reformat_string(void * ctx, const unsigned char * stringVal,
size_t stringLen)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_string(g, stringVal, stringLen);
}
static int reformat_map_key(void * ctx, const unsigned char * stringVal,
size_t stringLen)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_string(g, stringVal, stringLen);
}
static int reformat_start_map(void * ctx)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_map_open(g);
}
static int reformat_end_map(void * ctx)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_map_close(g);
}
static int reformat_start_array(void * ctx)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_array_open(g);
}
static int reformat_end_array(void * ctx)
{
yajl_gen g = (yajl_gen) ctx;
return yajl_gen_status_ok == yajl_gen_array_close(g);
}
static yajl_callbacks callbacks = {
reformat_null,
reformat_boolean,
NULL,
NULL,
reformat_number,
reformat_string,
reformat_start_map,
reformat_map_key,
reformat_end_map,
reformat_start_array,
reformat_end_array
};
static void
usage(const char * progname)
{
fprintf(stderr, "%s: reformat json from stdin\n"
"usage: json_reformat [options]\n"
" -m minimize json rather than beautify (default)\n"
" -u allow invalid UTF8 inside strings during parsing\n",
progname);
exit(1);
}
int
main(int argc, char ** argv)
{
yajl_handle hand;
static unsigned char fileData[65536];
/* generator config */
yajl_gen g;
yajl_status stat;
size_t rd;
int retval = 0;
int a = 1;
g = yajl_gen_alloc(NULL);
yajl_gen_config(g, yajl_gen_beautify, 1);
yajl_gen_config(g, yajl_gen_validate_utf8, 1);
/* ok. open file. let's read and parse */
hand = yajl_alloc(&callbacks, NULL, (void *) g);
/* and let's allow comments by default */
yajl_config(hand, yajl_allow_comments, 1);
/* check arguments.*/
while ((a < argc) && (argv[a][0] == '-') && (strlen(argv[a]) > 1)) {
unsigned int i;
for ( i=1; i < strlen(argv[a]); i++) {
switch (argv[a][i]) {
case 'm':
yajl_gen_config(g, yajl_gen_beautify, 0);
break;
case 'u':
yajl_config(hand, yajl_dont_validate_strings, 1);
break;
default:
fprintf(stderr, "unrecognized option: '%c'\n\n",
argv[a][i]);
usage(argv[0]);
}
}
++a;
}
if (a < argc) {
usage(argv[0]);
}
for (;;) {
rd = fread((void *) fileData, 1, sizeof(fileData) - 1, stdin);
if (rd == 0) {
if (!feof(stdin)) {
fprintf(stderr, "error on file read.\n");
retval = 1;
}
break;
}
fileData[rd] = 0;
stat = yajl_parse(hand, fileData, rd);
if (stat != yajl_status_ok) break;
{
const unsigned char * buf;
size_t len;
yajl_gen_get_buf(g, &buf, &len);
fwrite(buf, 1, len, stdout);
yajl_gen_clear(g);
}
}
stat = yajl_complete_parse(hand);
if (stat != yajl_status_ok) {
unsigned char * str = yajl_get_error(hand, 1, fileData, rd);
fprintf(stderr, "%s", (const char *) str);
yajl_free_error(hand, str);
retval = 1;
}
yajl_gen_free(g);
yajl_free(hand);
return retval;
}