Merge pull request #282 from gargrahul/texture_driver_3d_support

Added support for 3D texture driver apis

[ROCm/hip commit: 814d55f046]
This commit is contained in:
Maneesh Gupta
2017-12-18 15:39:26 +05:30
committed by GitHub
4 changed files with 273 additions and 57 deletions
@@ -62,6 +62,8 @@ struct HIP_ARRAY_DESCRIPTOR {
unsigned int numChannels;
size_t width;
size_t height;
unsigned int flags;
size_t depth;
};
struct hipArray {
@@ -73,6 +75,7 @@ struct hipArray {
unsigned int depth;
struct HIP_ARRAY_DESCRIPTOR drvDesc;
bool isDrv;
unsigned int textureType;
};
typedef struct hip_Memcpy2D {
@@ -251,6 +254,30 @@ struct hipMemcpy3DParms {
struct hipExtent extent;
enum hipMemcpyKind kind;
size_t Depth;
size_t Height;
size_t WidthInBytes;
hipDeviceptr_t dstDevice;
size_t dstHeight;
void * dstHost;
size_t dstLOD;
hipMemoryType dstMemoryType;
size_t dstPitch;
size_t dstXInBytes;
size_t dstY;
size_t dstZ;
void * reserved0;
void * reserved1;
hipDeviceptr_t srcDevice;
size_t srcHeight;
const void * srcHost;
size_t srcLOD;
hipMemoryType srcMemoryType;
size_t srcPitch;
size_t srcXInBytes;
size_t srcY;
size_t srcZ;
};
static __inline__ struct hipPitchedPtr make_hipPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
@@ -1317,6 +1317,11 @@ hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* d
size_t width, size_t height, unsigned int flags);
#endif
hipError_t hipArrayCreate ( hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray );
hipError_t hipArray3DCreate(hipArray_t *array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray );
hipError_t hipMalloc3D (hipPitchedPtr* pitchedDevPtr, hipExtent extent );
/**
* @brief Frees an array on the device.
*
+239 -55
View File
@@ -344,24 +344,16 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
return hipHostMalloc(ptr, sizeBytes, flags);
};
// width in bytes
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth)
{
HIP_INIT_SPECIAL_API((TRACE_MEM), ptr, pitch, width, height);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
if(width == 0 || height == 0)
return ihipLogStatus(hipErrorUnknown);
// hardcoded 128 bytes
*pitch = ((((int)width-1)/128) + 1)*128;
const size_t sizeBytes = (*pitch)*height;
auto ctx = ihipGetTlsDefaultCtx();
//err = hipMalloc(ptr, (*pitch)*height);
if (ctx) {
hc::accelerator acc = ctx->getDevice()->_acc;
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
@@ -373,9 +365,12 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = *pitch;
imageDescriptor.height = height;
imageDescriptor.depth = 0;
imageDescriptor.depth = 0;//depth;
imageDescriptor.array_size = 0;
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
if(depth == 0)
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
else
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
imageDescriptor.format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
@@ -394,6 +389,42 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height
hip_status = hipErrorMemoryAllocation;
}
return hip_status;
}
// width in bytes
hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height)
{
HIP_INIT_SPECIAL_API((TRACE_MEM), ptr, pitch, width, height);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
if(width == 0 || height == 0)
return ihipLogStatus(hipErrorUnknown);
hip_status = ihipMallocPitch(ptr, pitch, width, height, 0);
return ihipLogStatus(hip_status);
}
hipError_t hipMalloc3D (hipPitchedPtr* pitchedDevPtr, hipExtent extent )
{
HIP_INIT_API(pitchedDevPtr, &extent);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
if(extent.width == 0 || extent.height == 0)
return ihipLogStatus(hipErrorUnknown);
if(!pitchedDevPtr)
return ihipLogStatus(hipErrorInvalidValue);
void* ptr;
size_t pitch;
hip_status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth);
if(hip_status == hipSuccess) {
pitchedDevPtr->pitch = pitch;
pitchedDevPtr->xsize = extent.width;
pitchedDevPtr->ysize = extent.height;
}
return ihipLogStatus(hip_status);
}
@@ -531,7 +562,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
array[0]->depth = 1;
array[0]->desc = *desc;
array[0]->isDrv = false;
array[0]->textureType = hipTextureType2D;
void ** ptr = &array[0]->data;
if (ctx) {
@@ -610,12 +641,132 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc,
return ihipLogStatus(hip_status);
}
hipError_t hipArray3DCreate(hipArray_t *array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray )
{
HIP_INIT_SPECIAL_API((TRACE_MEM), array, pAllocateArray);
hipError_t hip_status = hipSuccess;
auto ctx = ihipGetTlsDefaultCtx();
*array = (hipArray*)malloc(sizeof(hipArray));
array[0]->type = pAllocateArray->flags;
array[0]->width = pAllocateArray->width;
array[0]->height = pAllocateArray->height;
array[0]->depth = pAllocateArray->depth;
array[0]->drvDesc = *pAllocateArray;
array[0]->isDrv = true;
array[0]->textureType = hipTextureType3D;
void ** ptr = &array[0]->data;
if (ctx) {
const unsigned am_flags = 0;
const size_t size = pAllocateArray->width*pAllocateArray->height*pAllocateArray->depth;
size_t allocSize = 0;
hsa_ext_image_channel_type_t channelType;
switch(pAllocateArray->format) {
case HIP_AD_FORMAT_UNSIGNED_INT8:
allocSize = size * sizeof(uint8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8;
break;
case HIP_AD_FORMAT_UNSIGNED_INT16:
allocSize = size * sizeof(uint16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16;
break;
case HIP_AD_FORMAT_UNSIGNED_INT32:
allocSize = size * sizeof(uint32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32;
break;
case HIP_AD_FORMAT_SIGNED_INT8:
allocSize = size * sizeof(int8_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8;
break;
case HIP_AD_FORMAT_SIGNED_INT16:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16;
break;
case HIP_AD_FORMAT_SIGNED_INT32:
allocSize = size * sizeof(int32_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32;
break;
case HIP_AD_FORMAT_HALF:
allocSize = size * sizeof(int16_t);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT;
break;
case HIP_AD_FORMAT_FLOAT:
allocSize = size * sizeof(float);
channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT;
break;
default:
hip_status = hipErrorUnknown;
break;
}
hc::accelerator acc = ctx->getDevice()->_acc;
hsa_agent_t* agent =static_cast<hsa_agent_t*>(acc.get_hsa_agent());
size_t allocGranularity = 0;
hsa_amd_memory_pool_t *allocRegion = static_cast<hsa_amd_memory_pool_t*>(acc.get_hsa_am_region());
hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity);
hsa_ext_image_descriptor_t imageDescriptor;
imageDescriptor.width = pAllocateArray->width;
imageDescriptor.height = pAllocateArray->height;
imageDescriptor.depth = 0;
imageDescriptor.array_size = 0;
switch (pAllocateArray->flags) {
case hipArrayLayered:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
imageDescriptor.array_size = pAllocateArray->depth;
break;
case hipArraySurfaceLoadStore:
case hipArrayTextureGather:
case hipArrayDefault:
assert(0);
break;
case hipArrayCubemap:
default:
imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
imageDescriptor.depth = pAllocateArray->depth;
break;
}
hsa_ext_image_channel_order_t channelOrder;
//getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType);
if (pAllocateArray->numChannels == 4) {
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA;
} else if (pAllocateArray->numChannels == 2) {
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG;
} else if (pAllocateArray->numChannels == 1) {
channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R;
}
imageDescriptor.format.channel_order = channelOrder;
imageDescriptor.format.channel_type = channelType;
hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW;
hsa_ext_image_data_info_t imageInfo;
hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo);
size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment;
*ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment);
if (size && (*ptr == NULL)) {
hip_status = hipErrorMemoryAllocation;
}
} else {
hip_status = hipErrorMemoryAllocation;
}
return ihipLogStatus(hip_status);
}
hipError_t hipMalloc3DArray(hipArray_t *array,
const struct hipChannelFormatDesc* desc,
struct hipExtent extent,
unsigned int flags)
{
HIP_INIT();
HIP_INIT_API(array, desc, &extent, flags);
HIP_SET_DEVICE();
hipError_t hip_status = hipSuccess;
@@ -627,7 +778,8 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
array[0]->height = extent.height;
array[0]->depth = extent.depth;
array[0]->desc = *desc;
array[0]->isDrv = false;
array[0]->textureType = hipTextureType3D;
void ** ptr = &array[0]->data;
if (ctx) {
@@ -702,7 +854,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array,
hip_status = hipErrorMemoryAllocation;
}
return hip_status;
return ihipLogStatus(hip_status);
}
hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr)
@@ -1262,53 +1414,85 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset,
hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p)
{
HIP_INIT_SPECIAL_API((TRACE_MCMD), p);
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
hipError_t e = hipSuccess;
size_t byteSize;
if(p) {
switch(p->dstArray->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
size_t byteSize;
size_t depth;
size_t height;
size_t widthInBytes;
size_t dstWidthInbytes;
size_t srcPitch;
size_t dstPitch;
void *srcPtr;
void *dstPtr;
size_t ySize;
if(p->dstArray != nullptr) {
if(p->dstArray->isDrv == false) {
switch(p->dstArray->desc.f) {
case hipChannelFormatKindSigned:
byteSize = sizeof(int);
break;
case hipChannelFormatKindUnsigned:
byteSize = sizeof(unsigned int);
break;
case hipChannelFormatKindFloat:
byteSize = sizeof(float);
break;
case hipChannelFormatKindNone:
byteSize = sizeof(size_t);
break;
default:
byteSize = 0;
break;
}
depth = p->extent.depth;
height = p->extent.height;
widthInBytes = p->extent.width * byteSize;
srcPitch = p->srcPtr.pitch;
srcPtr = p->srcPtr.ptr;
ySize = p->srcPtr.ysize;
dstWidthInbytes = p->dstArray->width*byteSize;
dstPtr = p->dstArray->data;
} else {
depth = p->Depth;
height = p->Height;
widthInBytes = p->WidthInBytes;
dstWidthInbytes = p->dstArray->width*4;
srcPitch = p->srcPitch;
srcPtr = (void*)p->srcHost;
ySize = p->srcHeight;
dstPtr = p->dstArray->data;
}
} else {
//Non array destination
depth = p->extent.depth;
height = p->extent.height;
widthInBytes = p->extent.width;
srcPitch = p->srcPtr.pitch;
srcPtr = p->srcPtr.ptr;
dstPtr = p->dstPtr.ptr;
ySize = p->srcPtr.ysize;
dstWidthInbytes = p->dstPtr.pitch;
}
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
hc::completion_future marker;
try {
for (int i = 0; i < depth; i++) {
for(int j = 0; j < height; j++) {
// TODO: p->srcPos or p->dstPos are not 0.
unsigned char* src = (unsigned char*)srcPtr + i*ySize*srcPitch + j*srcPitch;
unsigned char* dst = (unsigned char*)dstPtr + i*height*dstWidthInbytes + j*dstWidthInbytes;
stream->locked_copySync(dst, src, widthInBytes, p->kind);
}
}
} catch (ihipException ex) {
e = ex._code;
}
} else {
return ihipLogStatus(hipErrorUnknown);
e = hipErrorInvalidValue;
}
try {
for (int i = 0; i < p->extent.depth; i++) {
for(int j = 0; j < p->extent.height; j++) {
// TODO: p->srcPos or p->dstPos are not 0.
unsigned char* src = (unsigned char*)p->srcPtr.ptr + i*p->srcPtr.ysize*p->srcPtr.pitch + j*p->srcPtr.pitch;
unsigned char* dst = (unsigned char*)p->dstArray->data + i*p->dstArray->height*p->dstArray->width*byteSize + j*p->dstArray->width*byteSize;
stream->locked_copySync(dst, src, p->extent.width*byteSize, p->kind);
}
}
}
catch (ihipException &ex) {
e = ex._code;
}
return ihipLogStatus(e);
}
namespace
{
template<
+2 -2
View File
@@ -623,7 +623,7 @@ hipError_t hipBindTextureToArray(textureReference* tex,
HIP_INIT_API(tex, array, desc);
hipError_t hip_status = hipSuccess;
// TODO: hipReadModeElementType is default.
hip_status = ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType,
hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType,
array, *desc, tex);
return ihipLogStatus(hip_status);
}
@@ -742,7 +742,7 @@ hipError_t hipTexRefSetArray ( textureReference* tex, hipArray_const_t array, u
HIP_INIT_API(tex, array, flags);
hipError_t hip_status = hipSuccess;
hip_status = ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType,
hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType,
array, array->desc,tex );
return ihipLogStatus(hip_status);
}