Memory issue with texture in mexCUDA compiled code

Question

0 votes

I created a helper function mxArrayToTexture_3D_float4() to facilitate the process of creating CUDA objects from mxArray.

This function will check the type and size of inputMxArray, hide the details of creating cuArray, texture resource, description, etc. It works great when I have a dozen of texture objects to create with different dimension, size and type. However, I am not sure how to clean up and free the memory afterwards.

In the attached code, I can pass a 4 x 400x 400 x400 matlab array (~1G in memory size) and bind it to a texture object in CUDA with 1) explicit code, 2) through the helper function.

With the first approach, the device memory allocated to the texture is freed with cudaDestroyTextureObject() and cudaFreeArray()

However, with the second approach, those memories are not freed after the code exits. Everytime, I ran the code it will occupy ~1Gb device memory and eventually causing out of memory erros.

It is probably due to how I wrote those C codes and pass the texture object and cuda array through the helpfer function. Anyway, any suggestion to fix this issue is appreciated !

#include "tmwtypes.h"
#include "mex.h"
#include "gpu/mxGPUArray.h"
/* Convert matlab float array to CUDA 3D texture*/
void mxArrayToTexture_3D_float4(
    const mxArray* inputArray,
    cudaTextureObject_t& outputTexture,
    cudaArray *cuArray){
    unsigned int height = mxGetDimensions(inputArray)[1];
    unsigned int width  = mxGetDimensions(inputArray)[2];
    unsigned int depth  = mxGetDimensions(inputArray)[3];
    cudaExtent extent = make_cudaExtent(height, width, depth);
    float4 const*  h_ptr     = (float4 const*)mxGetPr(inputArray);
    // cuArray and channel description
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
    cudaMalloc3DArray(&cuArray, &channelDesc, extent);
    // cudaMemcpy
    cudaMemcpy3DParms copyParams = { 0 };
    copyParams.srcPtr   = make_cudaPitchedPtr((void *)h_ptr, extent.width*sizeof(float4), extent.width, extent.height);
    copyParams.dstArray = cuArray;
    copyParams.extent   = extent;
    copyParams.kind     = cudaMemcpyHostToDevice;
    cudaMemcpy3D(&copyParams);
    // CUDA resource description
    cudaResourceDesc            texRes;
    memset(&texRes,0,sizeof(cudaResourceDesc));
    texRes.resType                    = cudaResourceTypeArray;
    texRes.res.array.array            = cuArray;
    // CUDA texture description
    cudaTextureDesc             texDescr;
    memset(&texDescr,0,sizeof(cudaTextureDesc));
    texDescr.normalizedCoords         = false;
    texDescr.filterMode               = cudaFilterModeLinear;
    texDescr.addressMode[0]           = cudaAddressModeClamp;
    texDescr.addressMode[1]           = cudaAddressModeClamp;
    texDescr.addressMode[2]           = cudaAddressModeClamp;
    texDescr.readMode                 = cudaReadModeElementType;
    cudaCreateTextureObject(&outputTexture, &texRes, &texDescr, NULL);
}
/**
 * MEX gateway
 */
void mexFunction(int /* nlhs */, mxArray *plhs[],
                 int nrhs, mxArray const *prhs[])
{
    mxInitGPU();
    /* 1. Explicit approach, no issue with GPU memory after code finishs*/
    mxArray const* inputArray = prhs[0];
    unsigned int height = mxGetDimensions(inputArray)[1];
    unsigned int width  = mxGetDimensions(inputArray)[2];
    unsigned int depth  = mxGetDimensions(inputArray)[3];
    cudaExtent extent = make_cudaExtent(height, width, depth);
    cudaTextureObject_t outputTexture1;
    cudaArray *cuArray1;
    float4 const*  h_ptr     = (float4 const*)mxGetPr(inputArray);
    // cuArray and channel description
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float4>();
    cudaMalloc3DArray(&cuArray1, &channelDesc, extent);
    // cudaMemcpy
    cudaMemcpy3DParms copyParams = { 0 };
    copyParams.srcPtr   = make_cudaPitchedPtr((void *)h_ptr, extent.width*sizeof(float4), extent.width, extent.height);
    copyParams.dstArray = cuArray1;
    copyParams.extent   = extent;
    copyParams.kind     = cudaMemcpyHostToDevice;
    cudaMemcpy3D(&copyParams);
    // CUDA resource description
    cudaResourceDesc            texRes;
    memset(&texRes,0,sizeof(cudaResourceDesc));
    texRes.resType                    = cudaResourceTypeArray;
    texRes.res.array.array            = cuArray1;
    // CUDA texture description
    cudaTextureDesc             texDescr;
    memset(&texDescr,0,sizeof(cudaTextureDesc));
    texDescr.normalizedCoords         = false;
    texDescr.filterMode               = cudaFilterModeLinear;
    texDescr.addressMode[0]           = cudaAddressModeClamp;
    texDescr.addressMode[1]           = cudaAddressModeClamp;
    texDescr.addressMode[2]           = cudaAddressModeClamp;
    texDescr.readMode                 = cudaReadModeElementType;
    cudaCreateTextureObject(&outputTexture1, &texRes, &texDescr, NULL);
    cudaError_t lastError;
    lastError = cudaGetLastError();
    mexPrintf("%s\n", cudaGetErrorString(lastError));
    cudaDestroyTextureObject(outputTexture1);
    cudaFreeArray(cuArray1);
    lastError = cudaGetLastError();
    mexPrintf("%s\n", cudaGetErrorString(lastError));
    /* 2. Using helper function, Device memory not cleared after the code exits */
    cudaTextureObject_t  outputTexture2;
    cudaArray *cuArray2;
    mxArrayToTexture_3D_float4(prhs[0], outputTexture2, cuArray2);
    lastError = cudaGetLastError();
    mexPrintf("%s\n", cudaGetErrorString(lastError));
    cudaDestroyTextureObject(outputTexture2);
    cudaFreeArray(cuArray2);
    lastError = cudaGetLastError();
    mexPrintf("%s\n", cudaGetErrorString(lastError));
}

0 Comments
Show -2 older comments Hide -2 older comments

Sign in to comment.

Sign in to answer this question.

Follow Question

Answer 1

Joss Knight on 26 May 2023

Edited: Joss Knight on 26 May 2023

0 votes

It looks like the syntax for your function mxArrayToTexture_3D_float4 is incorrect. You are passing the pointer cuArray by value, so you are overwriting it locally with the allocated address but not passing that back to the caller. Pass the pointer by reference instead.

0 Comments
Show -2 older comments Hide -2 older comments

Sign in to comment.

Memory issue with texture in mexCUDA compiled code

0 Comments
Show -2 older comments Hide -2 older comments

Answers (1)

0 Comments
Show -2 older comments Hide -2 older comments

Categories

Products

Release

Tags

Community Treasure Hunt

Memory issue with texture in mexCUDA compiled code

0 Comments Show -2 older comments Hide -2 older comments

Answers (1)

0 Comments Show -2 older comments Hide -2 older comments

Categories

Products

Release

Tags

See Also

Community Treasure Hunt

0 Comments
Show -2 older comments Hide -2 older comments

0 Comments
Show -2 older comments Hide -2 older comments