From 105df94cd071f410ccb267988ec4a1b6c66489f7 Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Tue, 5 Dec 2017 14:11:13 +0530 Subject: [PATCH 01/28] Added support for - 3D texture driver APIs - hipMalloc3D - hipMemcpy3D for destination other than array --- hipamd/include/hip/hcc_detail/driver_types.h | 27 ++ .../include/hip/hcc_detail/hip_runtime_api.h | 5 + hipamd/src/hip_memory.cpp | 294 ++++++++++++++---- hipamd/src/hip_texture.cpp | 4 +- 4 files changed, 273 insertions(+), 57 deletions(-) diff --git a/hipamd/include/hip/hcc_detail/driver_types.h b/hipamd/include/hip/hcc_detail/driver_types.h index 5b31e3cd16..b1e83139b8 100644 --- a/hipamd/include/hip/hcc_detail/driver_types.h +++ b/hipamd/include/hip/hcc_detail/driver_types.h @@ -62,6 +62,8 @@ struct HIP_ARRAY_DESCRIPTOR { unsigned int numChannels; size_t width; size_t height; + unsigned int flags; + size_t depth; }; struct hipArray { @@ -73,6 +75,7 @@ struct hipArray { unsigned int depth; struct HIP_ARRAY_DESCRIPTOR drvDesc; bool isDrv; + unsigned int textureType; }; typedef struct hip_Memcpy2D { @@ -251,6 +254,30 @@ struct hipMemcpy3DParms { struct hipExtent extent; enum hipMemcpyKind kind; + + size_t Depth; + size_t Height; + size_t WidthInBytes; + hipDeviceptr_t dstDevice; + size_t dstHeight; + void * dstHost; + size_t dstLOD; + hipMemoryType dstMemoryType; + size_t dstPitch; + size_t dstXInBytes; + size_t dstY; + size_t dstZ; + void * reserved0; + void * reserved1; + hipDeviceptr_t srcDevice; + size_t srcHeight; + const void * srcHost; + size_t srcLOD; + hipMemoryType srcMemoryType; + size_t srcPitch; + size_t srcXInBytes; + size_t srcY; + size_t srcZ; }; static __inline__ struct hipPitchedPtr make_hipPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz) diff --git a/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/hipamd/include/hip/hcc_detail/hip_runtime_api.h index 16f13ebee2..9d0757f83a 100644 --- a/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -1317,6 +1317,11 @@ hipError_t hipMallocArray(hipArray** array, const struct hipChannelFormatDesc* d size_t width, size_t height, unsigned int flags); #endif hipError_t hipArrayCreate ( hipArray** pHandle, const HIP_ARRAY_DESCRIPTOR* pAllocateArray ); + +hipError_t hipArray3DCreate(hipArray_t *array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray ); + +hipError_t hipMalloc3D (hipPitchedPtr* pitchedDevPtr, hipExtent extent ); + /** * @brief Frees an array on the device. * diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index cedc3c59b5..77526cf9ac 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -344,24 +344,16 @@ hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags) return hipHostMalloc(ptr, sizeBytes, flags); }; - // width in bytes -hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) +hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth) { - HIP_INIT_SPECIAL_API((TRACE_MEM), ptr, pitch, width, height); - HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; - - if(width == 0 || height == 0) - return ihipLogStatus(hipErrorUnknown); - // hardcoded 128 bytes *pitch = ((((int)width-1)/128) + 1)*128; const size_t sizeBytes = (*pitch)*height; auto ctx = ihipGetTlsDefaultCtx(); - //err = hipMalloc(ptr, (*pitch)*height); if (ctx) { hc::accelerator acc = ctx->getDevice()->_acc; hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); @@ -373,9 +365,12 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height hsa_ext_image_descriptor_t imageDescriptor; imageDescriptor.width = *pitch; imageDescriptor.height = height; - imageDescriptor.depth = 0; + imageDescriptor.depth = 0;//depth; imageDescriptor.array_size = 0; - imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + if(depth == 0) + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2D; + else + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D; imageDescriptor.format.channel_order = HSA_EXT_IMAGE_CHANNEL_ORDER_R; imageDescriptor.format.channel_type = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; @@ -394,6 +389,42 @@ hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height hip_status = hipErrorMemoryAllocation; } + return hip_status; +} + +// width in bytes +hipError_t hipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height) +{ + HIP_INIT_SPECIAL_API((TRACE_MEM), ptr, pitch, width, height); + HIP_SET_DEVICE(); + hipError_t hip_status = hipSuccess; + + if(width == 0 || height == 0) + return ihipLogStatus(hipErrorUnknown); + + hip_status = ihipMallocPitch(ptr, pitch, width, height, 0); + return ihipLogStatus(hip_status); +} + +hipError_t hipMalloc3D (hipPitchedPtr* pitchedDevPtr, hipExtent extent ) +{ + HIP_INIT_API(pitchedDevPtr, &extent); + HIP_SET_DEVICE(); + hipError_t hip_status = hipSuccess; + + if(extent.width == 0 || extent.height == 0) + return ihipLogStatus(hipErrorUnknown); + if(!pitchedDevPtr) + return ihipLogStatus(hipErrorInvalidValue); + void* ptr; + size_t pitch; + + hip_status = ihipMallocPitch(&pitchedDevPtr->ptr, &pitch, extent.width, extent.height, extent.depth); + if(hip_status == hipSuccess) { + pitchedDevPtr->pitch = pitch; + pitchedDevPtr->xsize = extent.width; + pitchedDevPtr->ysize = extent.height; + } return ihipLogStatus(hip_status); } @@ -531,7 +562,7 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, array[0]->depth = 1; array[0]->desc = *desc; array[0]->isDrv = false; - + array[0]->textureType = hipTextureType2D; void ** ptr = &array[0]->data; if (ctx) { @@ -610,12 +641,132 @@ hipError_t hipMallocArray(hipArray** array, const hipChannelFormatDesc* desc, return ihipLogStatus(hip_status); } +hipError_t hipArray3DCreate(hipArray_t *array, const HIP_ARRAY_DESCRIPTOR* pAllocateArray ) +{ + HIP_INIT_SPECIAL_API((TRACE_MEM), array, pAllocateArray); + hipError_t hip_status = hipSuccess; + + auto ctx = ihipGetTlsDefaultCtx(); + + *array = (hipArray*)malloc(sizeof(hipArray)); + array[0]->type = pAllocateArray->flags; + array[0]->width = pAllocateArray->width; + array[0]->height = pAllocateArray->height; + array[0]->depth = pAllocateArray->depth; + array[0]->drvDesc = *pAllocateArray; + array[0]->isDrv = true; + array[0]->textureType = hipTextureType3D; + void ** ptr = &array[0]->data; + + if (ctx) { + const unsigned am_flags = 0; + const size_t size = pAllocateArray->width*pAllocateArray->height*pAllocateArray->depth; + + size_t allocSize = 0; + hsa_ext_image_channel_type_t channelType; + switch(pAllocateArray->format) { + case HIP_AD_FORMAT_UNSIGNED_INT8: + allocSize = size * sizeof(uint8_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8; + break; + case HIP_AD_FORMAT_UNSIGNED_INT16: + allocSize = size * sizeof(uint16_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16; + break; + case HIP_AD_FORMAT_UNSIGNED_INT32: + allocSize = size * sizeof(uint32_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32; + break; + case HIP_AD_FORMAT_SIGNED_INT8: + allocSize = size * sizeof(int8_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8; + break; + case HIP_AD_FORMAT_SIGNED_INT16: + allocSize = size * sizeof(int16_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16; + break; + case HIP_AD_FORMAT_SIGNED_INT32: + allocSize = size * sizeof(int32_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32; + break; + case HIP_AD_FORMAT_HALF: + allocSize = size * sizeof(int16_t); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT; + break; + case HIP_AD_FORMAT_FLOAT: + allocSize = size * sizeof(float); + channelType = HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT; + break; + default: + hip_status = hipErrorUnknown; + break; + } + + hc::accelerator acc = ctx->getDevice()->_acc; + hsa_agent_t* agent =static_cast(acc.get_hsa_agent()); + + size_t allocGranularity = 0; + hsa_amd_memory_pool_t *allocRegion = static_cast(acc.get_hsa_am_region()); + hsa_amd_memory_pool_get_info(*allocRegion, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &allocGranularity); + + hsa_ext_image_descriptor_t imageDescriptor; + imageDescriptor.width = pAllocateArray->width; + imageDescriptor.height = pAllocateArray->height; + imageDescriptor.depth = 0; + imageDescriptor.array_size = 0; + switch (pAllocateArray->flags) { + case hipArrayLayered: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA; + imageDescriptor.array_size = pAllocateArray->depth; + break; + case hipArraySurfaceLoadStore: + case hipArrayTextureGather: + case hipArrayDefault: + assert(0); + break; + case hipArrayCubemap: + default: + imageDescriptor.geometry = HSA_EXT_IMAGE_GEOMETRY_3D; + imageDescriptor.depth = pAllocateArray->depth; + break; + } + hsa_ext_image_channel_order_t channelOrder; + + //getChannelOrderAndType(*desc, hipReadModeElementType, &channelOrder, &channelType); + if (pAllocateArray->numChannels == 4) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA; + } else if (pAllocateArray->numChannels == 2) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_RG; + } else if (pAllocateArray->numChannels == 1) { + channelOrder = HSA_EXT_IMAGE_CHANNEL_ORDER_R; + } + imageDescriptor.format.channel_order = channelOrder; + imageDescriptor.format.channel_type = channelType; + + hsa_access_permission_t permission = HSA_ACCESS_PERMISSION_RW; + hsa_ext_image_data_info_t imageInfo; + hsa_status_t status = hsa_ext_image_data_get_info(*agent, &imageDescriptor, permission, &imageInfo); + size_t alignment = imageInfo.alignment <= allocGranularity ? 0 : imageInfo.alignment; + + *ptr = hip_internal::allocAndSharePtr("device_array", allocSize, ctx, false, am_flags, 0, alignment); + + if (size && (*ptr == NULL)) { + hip_status = hipErrorMemoryAllocation; + } + + } else { + hip_status = hipErrorMemoryAllocation; + } + + return ihipLogStatus(hip_status); +} + hipError_t hipMalloc3DArray(hipArray_t *array, const struct hipChannelFormatDesc* desc, struct hipExtent extent, unsigned int flags) { - HIP_INIT(); + HIP_INIT_API(array, desc, &extent, flags); HIP_SET_DEVICE(); hipError_t hip_status = hipSuccess; @@ -627,7 +778,8 @@ hipError_t hipMalloc3DArray(hipArray_t *array, array[0]->height = extent.height; array[0]->depth = extent.depth; array[0]->desc = *desc; - + array[0]->isDrv = false; + array[0]->textureType = hipTextureType3D; void ** ptr = &array[0]->data; if (ctx) { @@ -702,7 +854,7 @@ hipError_t hipMalloc3DArray(hipArray_t *array, hip_status = hipErrorMemoryAllocation; } - return hip_status; + return ihipLogStatus(hip_status); } hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) @@ -1262,53 +1414,85 @@ hipError_t hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, hipError_t hipMemcpy3D(const struct hipMemcpy3DParms *p) { HIP_INIT_SPECIAL_API((TRACE_MCMD), p); - - hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); - - hc::completion_future marker; - hipError_t e = hipSuccess; - - size_t byteSize; if(p) { - switch(p->dstArray->desc.f) { - case hipChannelFormatKindSigned: - byteSize = sizeof(int); - break; - case hipChannelFormatKindUnsigned: - byteSize = sizeof(unsigned int); - break; - case hipChannelFormatKindFloat: - byteSize = sizeof(float); - break; - case hipChannelFormatKindNone: - byteSize = sizeof(size_t); - break; - default: - byteSize = 0; - break; + size_t byteSize; + size_t depth; + size_t height; + size_t widthInBytes; + size_t dstWidthInbytes; + size_t srcPitch; + size_t dstPitch; + void *srcPtr; + void *dstPtr; + size_t ySize; + if(p->dstArray != nullptr) { + if(p->dstArray->isDrv == false) { + switch(p->dstArray->desc.f) { + case hipChannelFormatKindSigned: + byteSize = sizeof(int); + break; + case hipChannelFormatKindUnsigned: + byteSize = sizeof(unsigned int); + break; + case hipChannelFormatKindFloat: + byteSize = sizeof(float); + break; + case hipChannelFormatKindNone: + byteSize = sizeof(size_t); + break; + default: + byteSize = 0; + break; + } + depth = p->extent.depth; + height = p->extent.height; + widthInBytes = p->extent.width * byteSize; + srcPitch = p->srcPtr.pitch; + srcPtr = p->srcPtr.ptr; + ySize = p->srcPtr.ysize; + dstWidthInbytes = p->dstArray->width*byteSize; + dstPtr = p->dstArray->data; + } else { + depth = p->Depth; + height = p->Height; + widthInBytes = p->WidthInBytes; + dstWidthInbytes = p->dstArray->width*4; + srcPitch = p->srcPitch; + srcPtr = (void*)p->srcHost; + ySize = p->srcHeight; + dstPtr = p->dstArray->data; + } + } else { + //Non array destination + depth = p->extent.depth; + height = p->extent.height; + widthInBytes = p->extent.width; + srcPitch = p->srcPtr.pitch; + srcPtr = p->srcPtr.ptr; + dstPtr = p->dstPtr.ptr; + ySize = p->srcPtr.ysize; + dstWidthInbytes = p->dstPtr.pitch; } + hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull); + hc::completion_future marker; + try { + for (int i = 0; i < depth; i++) { + for(int j = 0; j < height; j++) { + // TODO: p->srcPos or p->dstPos are not 0. + unsigned char* src = (unsigned char*)srcPtr + i*ySize*srcPitch + j*srcPitch; + unsigned char* dst = (unsigned char*)dstPtr + i*height*dstWidthInbytes + j*dstWidthInbytes; + stream->locked_copySync(dst, src, widthInBytes, p->kind); + } + } + } catch (ihipException ex) { + e = ex._code; + } } else { - return ihipLogStatus(hipErrorUnknown); + e = hipErrorInvalidValue; } - - try { - for (int i = 0; i < p->extent.depth; i++) { - for(int j = 0; j < p->extent.height; j++) { - // TODO: p->srcPos or p->dstPos are not 0. - unsigned char* src = (unsigned char*)p->srcPtr.ptr + i*p->srcPtr.ysize*p->srcPtr.pitch + j*p->srcPtr.pitch; - unsigned char* dst = (unsigned char*)p->dstArray->data + i*p->dstArray->height*p->dstArray->width*byteSize + j*p->dstArray->width*byteSize; - stream->locked_copySync(dst, src, p->extent.width*byteSize, p->kind); - } - } - } - catch (ihipException &ex) { - e = ex._code; - } - return ihipLogStatus(e); } - namespace { template< diff --git a/hipamd/src/hip_texture.cpp b/hipamd/src/hip_texture.cpp index 7bf540ecc1..a69c91df8d 100644 --- a/hipamd/src/hip_texture.cpp +++ b/hipamd/src/hip_texture.cpp @@ -623,7 +623,7 @@ hipError_t hipBindTextureToArray(textureReference* tex, HIP_INIT_API(tex, array, desc); hipError_t hip_status = hipSuccess; // TODO: hipReadModeElementType is default. - hip_status = ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType, + hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, *desc, tex); return ihipLogStatus(hip_status); } @@ -742,7 +742,7 @@ hipError_t hipTexRefSetArray ( textureReference* tex, hipArray_const_t array, u HIP_INIT_API(tex, array, flags); hipError_t hip_status = hipSuccess; - hip_status = ihipBindTextureToArrayImpl(hipTextureType2D, hipReadModeElementType, + hip_status = ihipBindTextureToArrayImpl(array->textureType, hipReadModeElementType, array, array->desc,tex ); return ihipLogStatus(hip_status); } From 379cbfedcf45138d3b4622aa974ce4708391fd75 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Thu, 14 Dec 2017 12:16:44 +0530 Subject: [PATCH 02/28] [cmake] Added target static_check for static code analysis - Added static code analysis using cppcheck - Fixed couple of warnings reported by cppcheck Change-Id: Ie3b9b06e95ada322e7dc2ce3d6b91831e870198d --- hipamd/CMakeLists.txt | 7 ++++++- hipamd/src/hip_fp16.cpp | 2 +- hipamd/src/macro_based_grid_launch.inl | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index e405d06ed6..baf3b49df1 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -361,6 +361,12 @@ if(POLICY CMP0037) cmake_policy(POP) endif() +############################# +# Code analysis +############################# +# Target: static_check +add_custom_target(static_check COMMAND cppcheck --force --quiet --enable=warning,performance,portability,information,missingInclude src include -I /opt/rocm/include/hcc -I /opt/rocm/include --suppress=*:/opt/rocm/include/hcc/hc.hpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + ############################# # Testing steps ############################# @@ -388,5 +394,4 @@ else() message(STATUS "Testing targets will not be available. To enable them please ensure that the HIP installation directory is writeable. Use -DCMAKE_INSTALL_PREFIX to specify a suitable location") endif() - # vim: ts=4:sw=4:expandtab:smartindent diff --git a/hipamd/src/hip_fp16.cpp b/hipamd/src/hip_fp16.cpp index 8e8f003f56..2d16a3717e 100644 --- a/hipamd/src/hip_fp16.cpp +++ b/hipamd/src/hip_fp16.cpp @@ -448,7 +448,7 @@ __device__ __half2 __lowhigh2highlow(const __half2 a) { __device__ __half2 __lows2half2(const __half2 a, const __half2 b) { __half2 c; - c.y = a.x; + c.x = a.x; c.y = b.x; return c; } diff --git a/hipamd/src/macro_based_grid_launch.inl b/hipamd/src/macro_based_grid_launch.inl index 5547d3a71a..1e36903c56 100644 --- a/hipamd/src/macro_based_grid_launch.inl +++ b/hipamd/src/macro_based_grid_launch.inl @@ -89,9 +89,8 @@ namespace hip_impl stream->lockclose_postKernelCommand(kernel_name, acc_v); delete static_cast(locked_stream); - locked_stream = nullptr; if(HIP_PROFILE_API) { MARKER_END(); } } -} \ No newline at end of file +} From 89bedb74e7f440658b9d7e91d47fef7bd9ed8447 Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Mon, 18 Dec 2017 14:31:00 +0530 Subject: [PATCH 03/28] [nvccTests] Resubmit hipMemcpyDtoD & inline_asm_vadd --- hipamd/tests/src/kernel/inline_asm_vadd.cpp | 2 +- hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp | 10 +++++----- .../tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp | 7 +++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/hipamd/tests/src/kernel/inline_asm_vadd.cpp b/hipamd/tests/src/kernel/inline_asm_vadd.cpp index 23406eefff..541632f72c 100644 --- a/hipamd/tests/src/kernel/inline_asm_vadd.cpp +++ b/hipamd/tests/src/kernel/inline_asm_vadd.cpp @@ -16,7 +16,7 @@ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTI THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s * RUN: %t * HIT_END */ diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp index c64b01f8a7..cf0eb28e2d 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoD.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvcc + * BUILD: %t %s ../../test_common.cpp * RUN: %t * HIT_END */ @@ -32,7 +32,7 @@ THE SOFTWARE. int main() { - hipDevice_t device; + size_t Nbytes = N*sizeof(int); int numDevices = 0; int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; @@ -69,8 +69,8 @@ int main() HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMemcpyDtoD(X_d, A_d, Nbytes)); - HIPCHECK(hipMemcpyDtoD(Y_d, B_d, Nbytes)); + HIPCHECK(hipMemcpyDtoD((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes)); + HIPCHECK(hipMemcpyDtoD((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes)); hipLaunchKernel( HipTest::vectorADD, @@ -82,7 +82,7 @@ int main() static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpyDtoH(C_h, Z_d, Nbytes)); + HIPCHECK(hipMemcpyDtoH(C_h, (hipDeviceptr_t)Z_d, Nbytes)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); diff --git a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp index 6d21ac62e7..e259af0c29 100644 --- a/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp +++ b/hipamd/tests/src/runtimeApi/memory/hipMemcpyDtoDAsync.cpp @@ -32,7 +32,6 @@ THE SOFTWARE. int main() { - hipDevice_t device; size_t Nbytes = N*sizeof(int); int numDevices = 0; int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; @@ -70,8 +69,8 @@ int main() HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipSetDevice(1)); - HIPCHECK(hipMemcpyDtoDAsync(X_d, A_d, Nbytes, s)); - HIPCHECK(hipMemcpyDtoDAsync(Y_d, B_d, Nbytes, s)); + HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s)); + HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s)); hipLaunchKernel( HipTest::vectorADD, @@ -83,7 +82,7 @@ int main() static_cast(Y_d), Z_d, N); - HIPCHECK(hipMemcpyDtoHAsync(C_h, Z_d, Nbytes, s)); + HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s)); HIPCHECK(hipStreamSynchronize(s)); HIPCHECK(hipDeviceSynchronize()); From eff6831217c2b9badfce01ada55756f833e6388e Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 18 Dec 2017 19:57:04 +0300 Subject: [PATCH 04/28] [HIPIFY][cmake] Fix require_program function Function require_program erroneously doesn't report "Can't find program" on any missing program except the first one due to the cached FOUND_PROGRAM value. Additionally: + Do not throw FATAL_ERROR on missing program in order to obtain the whole list of missing programs (if any). + Report also found program location. --- hipamd/hipify-clang/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hipamd/hipify-clang/CMakeLists.txt b/hipamd/hipify-clang/CMakeLists.txt index da6eaeaa99..79d181528b 100644 --- a/hipamd/hipify-clang/CMakeLists.txt +++ b/hipamd/hipify-clang/CMakeLists.txt @@ -68,9 +68,11 @@ if (HIPIFY_CLANG_TESTS) find_package(PythonInterp 2.7 REQUIRED EXACT) function (require_program PROGRAM_NAME) - find_program(FOUND_PROGRAM ${PROGRAM_NAME}) - if (NOT FOUND_PROGRAM) - message(FATAL_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS to OFF to disable hipify tests, or install the missing program.") + find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) + if (FOUND_${PROGRAM_NAME}) + message(STATUS "Found ${PROGRAM_NAME}: ${FOUND_${PROGRAM_NAME}}") + else() + message(SEND_ERROR "Can't find ${PROGRAM_NAME}. Either set HIPIFY_CLANG_TESTS to OFF to disable hipify tests, or install the missing program.") endif() endfunction() From 037ce74fc94cf55e7c5ed8fb2d81b7fc8988ba5b Mon Sep 17 00:00:00 2001 From: Rahul Garg Date: Wed, 20 Dec 2017 09:36:00 +0530 Subject: [PATCH 05/28] Return pass on single gpu in hipPeerToPeer_simple --- hipamd/tests/src/p2p/hipPeerToPeer_simple.cpp | 42 +++++++++---------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/hipamd/tests/src/p2p/hipPeerToPeer_simple.cpp b/hipamd/tests/src/p2p/hipPeerToPeer_simple.cpp index 32cab371b8..c279658b5a 100644 --- a/hipamd/tests/src/p2p/hipPeerToPeer_simple.cpp +++ b/hipamd/tests/src/p2p/hipPeerToPeer_simple.cpp @@ -397,32 +397,30 @@ int main(int argc, char *argv[]) if (gpuCount < 2) { printf("P2P application requires atleast 2 gpu devices\n"); - return 0; - } + } else { + if (p_tests & 0x100) { + testPeerHostToDevice(false/*useAsyncCopy*/); + } + testPeerHostToDevice(true/*useAsyncCopy*/); - if (p_tests & 0x100) { - testPeerHostToDevice(false/*useAsyncCopy*/); - } - testPeerHostToDevice(true/*useAsyncCopy*/); + if (p_tests & 0x1) { + enablePeerFirst(false/*useAsyncCopy*/); + } - if (p_tests & 0x1) { - enablePeerFirst(false/*useAsyncCopy*/); - } + if (p_tests & 0x2) { + allocMemoryFirst(false/*useAsyncCopy*/); + } - if (p_tests & 0x2) { - allocMemoryFirst(false/*useAsyncCopy*/); - } + if (p_tests & 0x4) { + simpleNegative(); + } - if (p_tests & 0x4) { - simpleNegative(); + if (p_tests & 0x8) { + enablePeerFirst(true/*useAsyncCopy*/); + } + if (p_tests & 0x10) { + allocMemoryFirst(true/*useAsyncCopy*/); + } } - - if (p_tests & 0x8) { - enablePeerFirst(true/*useAsyncCopy*/); - } - if (p_tests & 0x10) { - allocMemoryFirst(true/*useAsyncCopy*/); - } - passed(); } From f69762b300aef58f3d60f45ef2575793a824efcc Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Wed, 20 Dec 2017 12:05:21 +0530 Subject: [PATCH 06/28] [nvccWarnings] Fix -Wno-deprecated-declarations in hip_anyall and hip_ballot --- hipamd/tests/src/deviceLib/hip_anyall.cpp | 2 +- hipamd/tests/src/deviceLib/hip_ballot.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hipamd/tests/src/deviceLib/hip_anyall.cpp b/hipamd/tests/src/deviceLib/hip_anyall.cpp index 06354383df..9d455d15b4 100644 --- a/hipamd/tests/src/deviceLib/hip_anyall.cpp +++ b/hipamd/tests/src/deviceLib/hip_anyall.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS --Wno-deprecated-declarations * RUN: %t * HIT_END */ diff --git a/hipamd/tests/src/deviceLib/hip_ballot.cpp b/hipamd/tests/src/deviceLib/hip_ballot.cpp index 14b8f314a1..e4d3cc70a6 100644 --- a/hipamd/tests/src/deviceLib/hip_ballot.cpp +++ b/hipamd/tests/src/deviceLib/hip_ballot.cpp @@ -18,7 +18,7 @@ THE SOFTWARE. */ /* HIT_START - * BUILD: %t %s ../test_common.cpp + * BUILD: %t %s ../test_common.cpp NVCC_OPTIONS --Wno-deprecated-declarations * RUN: %t * HIT_END */ From a1abbea28da85129f232e0441297ff63a060f274 Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 20 Dec 2017 17:20:03 -0600 Subject: [PATCH 07/28] Disable CUDA build/test while upgrading to nvidia-docker2 This enables us to remove the driver version from startup string --- hipamd/Jenkinsfile | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hipamd/Jenkinsfile b/hipamd/Jenkinsfile index 9b4b903a33..12c9755103 100644 --- a/hipamd/Jenkinsfile +++ b/hipamd/Jenkinsfile @@ -449,11 +449,7 @@ nvcc: // Block of string constants customizing behavior for cuda String nvcc_ver = 'nvcc-9.0' String from_image = 'nvidia/cuda:9.0-devel' - - // This unfortunately hardcodes the driver version nvidia_driver_384.90 in the volume mount. Research if a way - // exists to get volume driver to customize the volume names to leave out driver version - String inside_args = '''--device=/dev/nvidiactl --device=/dev/nvidia0 --device=/dev/nvidia-uvm --device=/dev/nvidia-uvm-tools - --volume-driver=nvidia-docker --volume=nvidia_driver_384.90:/usr/local/nvidia:ro'''; + String inside_args = '--runtime=nvidia'; // Checkout source code, dependencies and version files String source_hip_rel = checkout_and_version( nvcc_ver ) From 5a45d3ca84c294a6edd473b5da9a645426a98222 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 26 Dec 2017 20:54:54 +0300 Subject: [PATCH 08/28] [HIPIFY][FIX][#306] Eliminate second cuda main include directive // hipified to #include #include // 1st cuda main include (Driver API) // to eliminate #include // 2nd cuda main include (Runtime API) HIP has one header hip_runtime.h for both CUDA APIs, thus second cuda main include directive is eliminated entirely. --- hipamd/hipify-clang/src/HipifyAction.cpp | 22 +++++++++++++------- hipamd/tests/hipify-clang/headers_test_01.cu | 6 ++++++ hipamd/tests/hipify-clang/headers_test_02.cu | 6 ++++++ 3 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 hipamd/tests/hipify-clang/headers_test_01.cu create mode 100644 hipamd/tests/hipify-clang/headers_test_02.cu diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index ee23387e1f..87329680b3 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -157,11 +157,11 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, } // Special-casing to avoid duplication of the hip_runtime include. + bool secondMainInclude = false; if (found->second.hipName == "hip/hip_runtime.h") { if (insertedRuntimeHeader) { - return; + secondMainInclude = true; } - insertedRuntimeHeader = true; } @@ -169,22 +169,28 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, clang::SourceLocation sl = filename_range.getBegin(); if (found->second.unsupported) { - // An unsupported CUDA header? Oh dear. Print a warning. clang::DiagnosticsEngine& DE = getCompilerInstance().getDiagnostics(); DE.Report(sl, DE.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Unsupported CUDA header")); return; } - const char *B = SM.getCharacterData(sl); + char *B = nullptr; const char *E = SM.getCharacterData(filename_range.getEnd()); - clang::SmallString<128> includeBuffer; clang::StringRef newInclude; // Keep the same include type that the user gave. - if (is_angled) { - newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); + if (!secondMainInclude) { + B = const_cast(SM.getCharacterData(sl)); + clang::SmallString<128> includeBuffer; + if (is_angled) { + newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); + } else { + newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer); + } } else { - newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer); + // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. + B = const_cast(SM.getCharacterData(hash_loc)); + sl = hash_loc; } ct::Replacement Rep(SM, sl, E - B, newInclude); diff --git a/hipamd/tests/hipify-clang/headers_test_01.cu b/hipamd/tests/hipify-clang/headers_test_01.cu new file mode 100644 index 0000000000..c39ef80d8f --- /dev/null +++ b/hipamd/tests/hipify-clang/headers_test_01.cu @@ -0,0 +1,6 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +// CHECK: #include +#include +// CHECK-NOT: #include +#include diff --git a/hipamd/tests/hipify-clang/headers_test_02.cu b/hipamd/tests/hipify-clang/headers_test_02.cu new file mode 100644 index 0000000000..90d412f797 --- /dev/null +++ b/hipamd/tests/hipify-clang/headers_test_02.cu @@ -0,0 +1,6 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +// CHECK: #include +#include +// CHECK-NOT: #include +#include From 7b060535c05c1217b7b756e98e6a75140069f29b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 9 Jan 2018 20:03:53 +0300 Subject: [PATCH 09/28] [HIPIFY][#308][fix] Consume error returned by Replacements::add(...) --- hipamd/hipify-clang/src/LLVMCompat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/hipify-clang/src/LLVMCompat.cpp b/hipamd/hipify-clang/src/LLVMCompat.cpp index 474ba2a7dd..6b6dc18dd2 100644 --- a/hipamd/hipify-clang/src/LLVMCompat.cpp +++ b/hipamd/hipify-clang/src/LLVMCompat.cpp @@ -25,7 +25,7 @@ ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef fi void insertReplacement(ct::Replacements& replacements, const ct::Replacement& rep) { #if LLVM_VERSION_MAJOR > 3 // New clang added error checking to Replacements, and *insists* that you explicitly check it. - llvm::Error e = replacements.add(rep); + llvm::consumeError(replacements.add(rep)); #else // In older versions, it's literally an std::set replacements.insert(rep); From 257bc4748cb7ef485be1a7bfe8e8967baac74a5d Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 9 Jan 2018 20:20:28 +0300 Subject: [PATCH 10/28] [HIPIFY][tests] Add Windows testing support --- hipamd/tests/hipify-clang/lit.cfg | 17 +++++++++++++---- hipamd/tests/hipify-clang/lit.site.cfg.in | 13 +++++++++++++ hipamd/tests/hipify-clang/run_test.bat | 17 +++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 hipamd/tests/hipify-clang/run_test.bat diff --git a/hipamd/tests/hipify-clang/lit.cfg b/hipamd/tests/hipify-clang/lit.cfg index 76b2ca08dc..4dca50c052 100644 --- a/hipamd/tests/hipify-clang/lit.cfg +++ b/hipamd/tests/hipify-clang/lit.cfg @@ -44,8 +44,17 @@ if obj_root is not None: path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH'])) config.environment['PATH'] = path -config.substitutions.append(("hipify", obj_root+"/hipify-clang")) +hipify_path = obj_root +clang_args = "-x cuda -v --cuda-gpu-arch=sm_30 --cuda-path='%s'" -# Clang args for CUDA... -config.substitutions.append(("%cuda_args", "-x cuda --cuda-path=%s --cuda-gpu-arch=sm_30 -isystem%s/samples/common/inc" % (config.cuda_root, config.cuda_root))) -config.substitutions.append(("%run_test", config.test_source_root + "/run_test.sh")) +if sys.platform in ['win32']: + run_test_ext = ".bat" + hipify_path += "/" + config.build_type + clang_args += " -isystem'%s'/common/inc -std=c++14" +else: + run_test_ext = ".sh" + clang_args += " -isystem'%s'/samples/common/inc" + +config.substitutions.append(("%cuda_args", clang_args % (config.cuda_root, config.cuda_sdk_root))) +config.substitutions.append(("hipify", '"' + hipify_path + "/hipify-clang" + '"')) +config.substitutions.append(("%run_test", '"' + config.test_source_root + "/run_test" + run_test_ext + '"')) diff --git a/hipamd/tests/hipify-clang/lit.site.cfg.in b/hipamd/tests/hipify-clang/lit.site.cfg.in index c1f6804d4d..c1095f65a9 100644 --- a/hipamd/tests/hipify-clang/lit.site.cfg.in +++ b/hipamd/tests/hipify-clang/lit.site.cfg.in @@ -1,8 +1,21 @@ import sys +import os config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" config.obj_root = "@CMAKE_CURRENT_BINARY_DIR@" config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" +if sys.platform in ['win32']: + config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" + if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND": + config.cuda_samples_root = os.environ.get('NVCUDASAMPLES_ROOT') + if not config.cuda_samples_root or config.cuda_samples_root == "NVCUDASAMPLES_ROOT-NOTFOUND": + lit_config.fatal('No CUDA Samples dir set! Please set CUDA_SDK_ROOT_DIR.') + config.cuda_sdk_root = config.cuda_samples_root + config.build_type = "@CMAKE_BUILD_TYPE@" + if not config.build_type: + config.build_type = "Debug" +else: + config.cuda_sdk_root = config.cuda_root # Support substitution of the tools and libs dirs with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/hipamd/tests/hipify-clang/run_test.bat b/hipamd/tests/hipify-clang/run_test.bat new file mode 100644 index 0000000000..5db0cc5043 --- /dev/null +++ b/hipamd/tests/hipify-clang/run_test.bat @@ -0,0 +1,17 @@ +@echo off + +for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i +if not defined FILE_CHECK (echo Error: FileCheck.exe not found in PATH. && exit /b 1) + +set HIPIFY=%1 +set IN_FILE=%2 +set TMP_FILE=%3 + +set all_args=%* +call set clang_args=%%all_args:*%4=%% +set clang_args=%4%clang_args% + +%HIPIFY% -o=%TMP_FILE% %IN_FILE% -- %clang_args% +if errorlevel 1 (echo Error: hipify-clang.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) +%FILE_CHECK% %IN_FILE% -input-file=%TMP_FILE% +if errorlevel 1 (echo Error: FileCheck.exe failed with exit code: %errorlevel% && exit /b %errorlevel%) From dc6094cc608f846325c84efc6f0b9c89839c85d5 Mon Sep 17 00:00:00 2001 From: Phaneendr-kumar Lanka Date: Wed, 10 Jan 2018 10:51:01 +0530 Subject: [PATCH 11/28] [nvcc] Enable hipGetDeviceAttribute --- hipamd/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp b/hipamd/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp index 2919939694..0b965a3ae3 100644 --- a/hipamd/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp +++ b/hipamd/tests/src/runtimeApi/device/hipGetDeviceAttribute.cpp @@ -23,7 +23,7 @@ THE SOFTWARE. /* HIT_START * BUILD: %t %s ../../test_common.cpp - * RUN: %t EXCLUDE_HIP_PLATFORM nvcc + * RUN: %t * HIT_END */ From b32639d1a8bf8050a3555d0c5daeae2cc210b246 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 10 Jan 2018 21:03:02 +0300 Subject: [PATCH 12/28] [HIPIFY][tests] Add setlocal to batch script --- hipamd/tests/hipify-clang/run_test.bat | 1 + 1 file changed, 1 insertion(+) diff --git a/hipamd/tests/hipify-clang/run_test.bat b/hipamd/tests/hipify-clang/run_test.bat index 5db0cc5043..6eefb7e46e 100644 --- a/hipamd/tests/hipify-clang/run_test.bat +++ b/hipamd/tests/hipify-clang/run_test.bat @@ -1,4 +1,5 @@ @echo off +setlocal for %%i in (FileCheck.exe) do set FILE_CHECK=%%~$PATH:i if not defined FILE_CHECK (echo Error: FileCheck.exe not found in PATH. && exit /b 1) From 5c15cc77cb087e5907d5704816522c1043c8d358 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 10 Jan 2018 21:06:06 +0300 Subject: [PATCH 13/28] [HIPIFY][cmake] Exclude socat from Win config --- hipamd/hipify-clang/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hipamd/hipify-clang/CMakeLists.txt b/hipamd/hipify-clang/CMakeLists.txt index 79d181528b..2b725d7188 100644 --- a/hipamd/hipify-clang/CMakeLists.txt +++ b/hipamd/hipify-clang/CMakeLists.txt @@ -78,7 +78,9 @@ if (HIPIFY_CLANG_TESTS) require_program(lit) require_program(FileCheck) - require_program(socat) + if(NOT WIN32) + require_program(socat) + endif() # Populates CUDA_TOOLKIT_ROOT_DIR, which is then applied to the lit config to give the # value of --cuda-path for the test runs. From cc133b09aa582beaad89b5c5dd4fc0e5621891d3 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Wed, 10 Jan 2018 21:26:05 +0300 Subject: [PATCH 14/28] [HIPIFY][fix][#306] Code improve --- hipamd/hipify-clang/src/HipifyAction.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index 87329680b3..8fb318776d 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -174,13 +174,10 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, return; } - char *B = nullptr; - const char *E = SM.getCharacterData(filename_range.getEnd()); clang::StringRef newInclude; // Keep the same include type that the user gave. if (!secondMainInclude) { - B = const_cast(SM.getCharacterData(sl)); clang::SmallString<128> includeBuffer; if (is_angled) { newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); @@ -189,10 +186,10 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, } } else { // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. - B = const_cast(SM.getCharacterData(hash_loc)); sl = hash_loc; } - + const char *B = SM.getCharacterData(sl); + const char *E = SM.getCharacterData(filename_range.getEnd()); ct::Replacement Rep(SM, sl, E - B, newInclude); insertReplacement(Rep, clang::FullSourceLoc{sl, SM}); } From 39a03720770205bd4fedd7f5d674f4ac08dae734 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 12 Jan 2018 17:15:37 +0300 Subject: [PATCH 15/28] [HIPIFY][tests][win] CUDA samples root env. var is changes Env. var NVCUDASAMPLES_ROOT is changed to NVCUDASAMPLESX_Y_ROOT where X - major ver, Y - minor ver. Reason: NVCUDASAMPLES_ROOT contains path to CUDA SDK installed last, while NVCUDASAMPLESX_Y_ROOT contains samples of the same version as of CUDA_TOOLKIT_ROOT_DIR. --- hipamd/tests/hipify-clang/lit.site.cfg.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hipamd/tests/hipify-clang/lit.site.cfg.in b/hipamd/tests/hipify-clang/lit.site.cfg.in index c1095f65a9..e52db7b026 100644 --- a/hipamd/tests/hipify-clang/lit.site.cfg.in +++ b/hipamd/tests/hipify-clang/lit.site.cfg.in @@ -7,8 +7,10 @@ config.cuda_root = "@CUDA_TOOLKIT_ROOT_DIR@" if sys.platform in ['win32']: config.cuda_sdk_root = "@CUDA_SDK_ROOT_DIR@" if not config.cuda_sdk_root or config.cuda_sdk_root == "CUDA_SDK_ROOT_DIR-NOTFOUND": - config.cuda_samples_root = os.environ.get('NVCUDASAMPLES_ROOT') - if not config.cuda_samples_root or config.cuda_samples_root == "NVCUDASAMPLES_ROOT-NOTFOUND": + cuda_version = "@CUDA_VERSION@" + cuda_version = cuda_version.replace('.','_') + config.cuda_samples_root = os.environ.get('NVCUDASAMPLES' + cuda_version + '_ROOT') + if not config.cuda_samples_root: lit_config.fatal('No CUDA Samples dir set! Please set CUDA_SDK_ROOT_DIR.') config.cuda_sdk_root = config.cuda_samples_root config.build_type = "@CMAKE_BUILD_TYPE@" From d11dccdd443c3295f7f02fdb659c68b7558ae717 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 12 Jan 2018 17:36:41 +0300 Subject: [PATCH 16/28] [HIPIFY][cmake] Version compatibility checks of CUDA and clang are added --- hipamd/hipify-clang/CMakeLists.txt | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/hipamd/hipify-clang/CMakeLists.txt b/hipamd/hipify-clang/CMakeLists.txt index 79d181528b..5910698e51 100644 --- a/hipamd/hipify-clang/CMakeLists.txt +++ b/hipamd/hipify-clang/CMakeLists.txt @@ -65,7 +65,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHIPIFY_CLANG_RES=\\\"${LLVM_LIBRARY_DI install(TARGETS hipify-clang DESTINATION bin) if (HIPIFY_CLANG_TESTS) - find_package(PythonInterp 2.7 REQUIRED EXACT) + find_package(PythonInterp 2.7 REQUIRED) function (require_program PROGRAM_NAME) find_program(FOUND_${PROGRAM_NAME} ${PROGRAM_NAME}) @@ -78,11 +78,30 @@ if (HIPIFY_CLANG_TESTS) require_program(lit) require_program(FileCheck) - require_program(socat) + if(NOT WIN32) + require_program(socat) + endif() # Populates CUDA_TOOLKIT_ROOT_DIR, which is then applied to the lit config to give the # value of --cuda-path for the test runs. find_package(CUDA REQUIRED) + if ((CUDA_VERSION VERSION_LESS "7.0") OR (LLVM_PACKAGE_VERSION VERSION_LESS "3.8") OR + (CUDA_VERSION VERSION_GREATER "7.5" AND LLVM_PACKAGE_VERSION VERSION_LESS "4.0") OR + (CUDA_VERSION VERSION_GREATER "8.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "6.0") OR + (CUDA_VERSION VERSION_GREATER "9.0" AND LLVM_PACKAGE_VERSION VERSION_LESS "7.0")) + message(SEND_ERROR "CUDA ${CUDA_VERSION} is not supported by clang ${LLVM_PACKAGE_VERSION}.") + if (CUDA_VERSION VERSION_LESS "7.0") + message(STATUS "Please install CUDA 7.0 or higher.") + elseif ((CUDA_VERSION VERSION_EQUAL "7.0") OR (CUDA_VERSION VERSION_EQUAL "7.5")) + message(STATUS "Please install clang 3.8 or higher.") + elseif (CUDA_VERSION VERSION_EQUAL "8.0") + message(STATUS "Please install clang 4.0 or higher.") + elseif (CUDA_VERSION VERSION_EQUAL "9.0") + message(STATUS "Please install clang 6.0 or higher.") + elseif (CUDA_VERSION VERSION_EQUAL "9.1") + message(STATUS "Please install clang 7.0 or higher.") + endif() + endif() configure_file( ${CMAKE_CURRENT_LIST_DIR}/../tests/hipify-clang/lit.site.cfg.in From f83df46b8cf99008a2637b3f45db9f7b4a876818 Mon Sep 17 00:00:00 2001 From: emankov Date: Mon, 15 Jan 2018 14:20:37 +0300 Subject: [PATCH 17/28] [HIPIFY][#311][fix] Get rid of socat in run_test.sh --- hipamd/hipify-clang/CMakeLists.txt | 3 --- hipamd/tests/hipify-clang/run_test.sh | 11 ----------- 2 files changed, 14 deletions(-) diff --git a/hipamd/hipify-clang/CMakeLists.txt b/hipamd/hipify-clang/CMakeLists.txt index 5910698e51..8b3fa7e591 100644 --- a/hipamd/hipify-clang/CMakeLists.txt +++ b/hipamd/hipify-clang/CMakeLists.txt @@ -78,9 +78,6 @@ if (HIPIFY_CLANG_TESTS) require_program(lit) require_program(FileCheck) - if(NOT WIN32) - require_program(socat) - endif() # Populates CUDA_TOOLKIT_ROOT_DIR, which is then applied to the lit config to give the # value of --cuda-path for the test runs. diff --git a/hipamd/tests/hipify-clang/run_test.sh b/hipamd/tests/hipify-clang/run_test.sh index 46b2fc066b..418df5dd4d 100755 --- a/hipamd/tests/hipify-clang/run_test.sh +++ b/hipamd/tests/hipify-clang/run_test.sh @@ -13,16 +13,5 @@ shift 3 # Remaining args are the ones to forward to clang proper. -# Time for the classic insane little trick for making colour output work. -# A self-deleting shell-script that does the thing we want to do... -TMP_SCRIPT=$(mktemp) -cat << EOF > $TMP_SCRIPT -set -o errexit -set -o xtrace -rm $TMP_SCRIPT $HIPIFY -o=$TMP_FILE $IN_FILE -- $@ && cat $TMP_FILE | sed -Ee 's|//.+|// |g' | FileCheck $IN_FILE -EOF -chmod a+x $TMP_SCRIPT -# Run the script via socat, spawning a virtual terminal and propagating exit code, and hence failure. -socat -du EXEC:$TMP_SCRIPT,pty,stderr STDOUT From e90a76a1ef6ecd719f3f89778ec60cae3d0a2d2b Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Mon, 15 Jan 2018 21:05:05 +0300 Subject: [PATCH 18/28] [HIPIFY][fix][#211] Algorithm for explicit insert of hip include directive If in source CUDA file main header (cuda_runtime.h or cuda.h) is not presented, corresponding HIP main header (hip_runtime.h) should be explicitly included in output hipified file. [Algorithm] 1. If #pragma once is presented, HIP main header should be placed just after it; 2. Otherwise if any other (not CUDA main) header is presented, HIP main header should be placed just before it; 3. Otherwise HIP main header should be placed in the beginning of output file. P.S. There might be one more situation when #ifndef #define ... #endif guard for the entire file is presented (make sense for *.h, *.hpp, *.cuh files). In this case HIP main include should be placed just after such #ifdef, or after #pragma once, if it is also presented. This situation will be handled in a separate change. --- hipamd/hipify-clang/src/HipifyAction.cpp | 51 +++++++++++++++----- hipamd/hipify-clang/src/HipifyAction.h | 9 ++++ hipamd/tests/hipify-clang/headers_test_03.cu | 10 ++++ hipamd/tests/hipify-clang/headers_test_04.cu | 12 +++++ hipamd/tests/hipify-clang/headers_test_05.cu | 12 +++++ 5 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 hipamd/tests/hipify-clang/headers_test_03.cu create mode 100644 hipamd/tests/hipify-clang/headers_test_04.cu create mode 100644 hipamd/tests/hipify-clang/headers_test_05.cu diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index 8fb318776d..ada0adec5d 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -152,7 +152,10 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, const auto found = CUDA_INCLUDE_MAP.find(file_name); if (found == CUDA_INCLUDE_MAP.end()) { - // Not a CUDA include - don't touch it. + if (!firstNotMainHeader) { + firstNotMainHeader = true; + firstNotMainHeaderLoc = hash_loc; + } return; } @@ -160,7 +163,7 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, bool secondMainInclude = false; if (found->second.hipName == "hip/hip_runtime.h") { if (insertedRuntimeHeader) { - secondMainInclude = true; + secondMainInclude = true; } insertedRuntimeHeader = true; } @@ -178,15 +181,15 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, // Keep the same include type that the user gave. if (!secondMainInclude) { - clang::SmallString<128> includeBuffer; - if (is_angled) { - newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); - } else { - newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer); - } + clang::SmallString<128> includeBuffer; + if (is_angled) { + newInclude = llvm::Twine("<" + found->second.hipName + ">").toStringRef(includeBuffer); + } else { + newInclude = llvm::Twine("\"" + found->second.hipName + "\"").toStringRef(includeBuffer); + } } else { - // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. - sl = hash_loc; + // hashLoc is location of the '#', thus replacing the whole include directive by empty newInclude starting with '#'. + sl = hash_loc; } const char *B = SM.getCharacterData(sl); const char *E = SM.getCharacterData(filename_range.getEnd()); @@ -194,6 +197,18 @@ void HipifyAction::InclusionDirective(clang::SourceLocation hash_loc, insertReplacement(Rep, clang::FullSourceLoc{sl, SM}); } +void HipifyAction::PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) { + if (pragmaOnce) { return; } + clang::SourceManager& SM = getCompilerInstance().getSourceManager(); + clang::Preprocessor& PP = getCompilerInstance().getPreprocessor(); + const clang::Token tok = PP.LookAhead(0); + clang::LangOptions DefaultLangOptions; + StringRef Text(SM.getCharacterData(tok.getLocation()), tok.getLength()); + if (Text == "once") { + pragmaOnce = true; + pragmaOnceLoc = PP.LookAhead(1).getLocation(); + } +} bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::MatchResult& Result) { StringRef refName = "cudaLaunchKernel"; @@ -339,10 +354,16 @@ void HipifyAction::EndSourceFileAction() { // implicitly included by the compiler. Instead, we _delete_ CUDA headers, and unconditionally insert // one copy of the hip include into every file. clang::SourceManager& SM = getCompilerInstance().getSourceManager(); - - clang::SourceLocation sl = SM.getLocForStartOfFile(SM.getMainFileID()); + clang::SourceLocation sl; + if (pragmaOnce) { + sl = pragmaOnceLoc; + } else if (firstNotMainHeader) { + sl = firstNotMainHeaderLoc; + } else { + sl = SM.getLocForStartOfFile(SM.getMainFileID()); + } clang::FullSourceLoc fullSL(sl, SM); - ct::Replacement Rep(SM, sl, 0, "#include \n"); + ct::Replacement Rep(SM, sl, 0, "\n#include \n"); insertReplacement(Rep, fullSL); } @@ -367,6 +388,10 @@ public: const clang::Module* imported) override { hipifyAction.InclusionDirective(hash_loc, include_token, file_name, is_angled, filename_range, file, search_path, relative_path, imported); } + + void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer) override { + hipifyAction.PragmaDirective(Loc, Introducer); + } }; } diff --git a/hipamd/hipify-clang/src/HipifyAction.h b/hipamd/hipify-clang/src/HipifyAction.h index 03d34601f3..a269a37117 100644 --- a/hipamd/hipify-clang/src/HipifyAction.h +++ b/hipamd/hipify-clang/src/HipifyAction.h @@ -23,6 +23,10 @@ private: // not, we insert it at the top of the file when we finish processing it. // This approach means we do the best it's possible to do w.r.t preserving the user's include order. bool insertedRuntimeHeader = false; + bool firstNotMainHeader = false; + bool pragmaOnce = false; + clang::SourceLocation firstNotMainHeaderLoc; + clang::SourceLocation pragmaOnceLoc; /** * Rewrite a string literal to refer to hip, not CUDA. @@ -57,6 +61,11 @@ public: StringRef relative_path, const clang::Module *imported); + /** + * Called by the preprocessor for each pragma directive during the non-raw lexing pass. + */ + void PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntroducerKind Introducer); + protected: /** * Add a Replacement for the current file. These will all be applied after executing the FrontendAction. diff --git a/hipamd/tests/hipify-clang/headers_test_03.cu b/hipamd/tests/hipify-clang/headers_test_03.cu new file mode 100644 index 0000000000..0223f11119 --- /dev/null +++ b/hipamd/tests/hipify-clang/headers_test_03.cu @@ -0,0 +1,10 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +// CHECK: #pragma once +// CHECK-NEXT: #include +#pragma once +// CHECK-NOT: #include +int main(int argc, char* argv[]) { + return 0; +} + diff --git a/hipamd/tests/hipify-clang/headers_test_04.cu b/hipamd/tests/hipify-clang/headers_test_04.cu new file mode 100644 index 0000000000..f10c0eda56 --- /dev/null +++ b/hipamd/tests/hipify-clang/headers_test_04.cu @@ -0,0 +1,12 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +// CHECK: #include +// CHECK-NEXT: #include +// CHECK-NEXT: #include +#include +#include +// CHECK-NOT: #include +int main(int argc, char* argv[]) { + return 0; +} + diff --git a/hipamd/tests/hipify-clang/headers_test_05.cu b/hipamd/tests/hipify-clang/headers_test_05.cu new file mode 100644 index 0000000000..c9428b62d5 --- /dev/null +++ b/hipamd/tests/hipify-clang/headers_test_05.cu @@ -0,0 +1,12 @@ +// RUN: %run_test hipify "%s" "%t" %cuda_args + +// CHECK: #pragma once +// CHECK-NEXT: #include +#pragma once +// CHECK-NOT: #include +#include + +int main(int argc, char* argv[]) { + return 0; +} + From f51ac2b43eda4355e302c357f81adaf9eacc9abc Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 10 Jan 2018 11:26:51 -0600 Subject: [PATCH 19/28] Adding dependencies for rocm_agent_enumerator --- hipamd/Jenkinsfile | 6 +++--- hipamd/docker/dockerfile-build-ubuntu-16.04 | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/hipamd/Jenkinsfile b/hipamd/Jenkinsfile index 12c9755103..ce6fbc2e9e 100644 --- a/hipamd/Jenkinsfile +++ b/hipamd/Jenkinsfile @@ -371,7 +371,7 @@ parallel hcc_ctu: { String hcc_ver = 'hcc-ctu' String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/clang_tot_upgrade/hcc-lc-ubuntu-16.04:latest' - String inside_args = '--device=/dev/kfd' + String inside_args = '--device=/dev/kfd --device=/dev/dri' // Checkout source code, dependencies and version files String source_hip_rel = checkout_and_version( hcc_ver ) @@ -411,8 +411,8 @@ hcc_1_6: node('docker && rocm') { String hcc_ver = 'hcc-1.6' - String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/roc-1.6.x/hcc-lc-ubuntu-16.04:latest' - String inside_args = '--device=/dev/kfd' + String from_image = 'rocm/dev-ubuntu-16.04:latest' + String inside_args = '--device=/dev/kfd --device=/dev/dri' // Checkout source code, dependencies and version files String source_hip_rel = checkout_and_version( hcc_ver ) diff --git a/hipamd/docker/dockerfile-build-ubuntu-16.04 b/hipamd/docker/dockerfile-build-ubuntu-16.04 index 031bf72437..8f655f7c78 100644 --- a/hipamd/docker/dockerfile-build-ubuntu-16.04 +++ b/hipamd/docker/dockerfile-build-ubuntu-16.04 @@ -7,6 +7,7 @@ MAINTAINER Kent Knox ARG user_uid # Install Packages +# python and libnuma1 are dependencies of rocm_agent_enumerator RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ sudo \ build-essential \ @@ -14,6 +15,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins git \ libelf-dev \ rpm \ + python \ + libnuma1 \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* From e645a2b8f397ef40128ebd151bf1d54d1c13672e Mon Sep 17 00:00:00 2001 From: Kent Knox Date: Wed, 10 Jan 2018 13:28:18 -0600 Subject: [PATCH 20/28] adding group-add flag to docker run --- hipamd/Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hipamd/Jenkinsfile b/hipamd/Jenkinsfile index ce6fbc2e9e..4909e666f0 100644 --- a/hipamd/Jenkinsfile +++ b/hipamd/Jenkinsfile @@ -367,11 +367,11 @@ if( params.hcc_integration_test ) // The following launches 3 builds in parallel: hcc-ctu, hcc-1.6 and cuda parallel hcc_ctu: { - node('docker && rocm') + node('docker && rocm && dkms') { String hcc_ver = 'hcc-ctu' String from_image = 'compute-artifactory:5001/radeonopencompute/hcc/clang_tot_upgrade/hcc-lc-ubuntu-16.04:latest' - String inside_args = '--device=/dev/kfd --device=/dev/dri' + String inside_args = '--device=/dev/kfd --device=/dev/dri --group-add=video' // Checkout source code, dependencies and version files String source_hip_rel = checkout_and_version( hcc_ver ) @@ -408,7 +408,7 @@ parallel hcc_ctu: }, hcc_1_6: { - node('docker && rocm') + node('docker && rocm && !dkms') { String hcc_ver = 'hcc-1.6' String from_image = 'rocm/dev-ubuntu-16.04:latest' From a26e323612c21137698ba820f248e9622128cd04 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 15:08:08 +0300 Subject: [PATCH 21/28] Update HipifyAction.cpp dead code eliminate --- hipamd/hipify-clang/src/HipifyAction.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/hipamd/hipify-clang/src/HipifyAction.cpp b/hipamd/hipify-clang/src/HipifyAction.cpp index ada0adec5d..7cd5b3d402 100644 --- a/hipamd/hipify-clang/src/HipifyAction.cpp +++ b/hipamd/hipify-clang/src/HipifyAction.cpp @@ -202,7 +202,6 @@ void HipifyAction::PragmaDirective(clang::SourceLocation Loc, clang::PragmaIntro clang::SourceManager& SM = getCompilerInstance().getSourceManager(); clang::Preprocessor& PP = getCompilerInstance().getPreprocessor(); const clang::Token tok = PP.LookAhead(0); - clang::LangOptions DefaultLangOptions; StringRef Text(SM.getCharacterData(tok.getLocation()), tok.getLength()); if (Text == "once") { pragmaOnce = true; From 5c82a2e7fa8d3c2ef8f78c83af575c275db38a49 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 16:40:31 +0300 Subject: [PATCH 22/28] [HIPIFY][tests] Add more suffixes to lit config --- hipamd/tests/hipify-clang/lit.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/hipify-clang/lit.cfg b/hipamd/tests/hipify-clang/lit.cfg index 4dca50c052..12b1410cee 100644 --- a/hipamd/tests/hipify-clang/lit.cfg +++ b/hipamd/tests/hipify-clang/lit.cfg @@ -13,7 +13,7 @@ import lit.util config.name = 'hipify' # suffixes: CUDA source is only supported -config.suffixes = ['.cu'] +config.suffixes = ['.cu','.cuh','.cpp','.c','.hpp','.h'] # testFormat: The test format to use to interpret tests. config.test_format = lit.formats.ShTest() From 42f0966a9efa7a489d5d4fe501c613b0994f9042 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 17:13:29 +0300 Subject: [PATCH 23/28] [HIPIFY][tests] Remove checks on cudaBuiltins As HIP has started to support vanilla CUDA syntax for threadIdx, blockIdx, blockDim and gridDim. Other CUDA builtins are not tracked for now. --- hipamd/tests/hipify-clang/axpy.cu | 1 - hipamd/tests/hipify-clang/cudaRegister.cu | 2 -- hipamd/tests/hipify-clang/square.cu | 2 -- 3 files changed, 5 deletions(-) diff --git a/hipamd/tests/hipify-clang/axpy.cu b/hipamd/tests/hipify-clang/axpy.cu index 071c503a35..2e59fc021a 100644 --- a/hipamd/tests/hipify-clang/axpy.cu +++ b/hipamd/tests/hipify-clang/axpy.cu @@ -16,7 +16,6 @@ template __global__ void axpy(T a, T *x, T *y) { - // CHECK: y[hipThreadIdx_x] = a * x[hipThreadIdx_x]; y[threadIdx.x] = a * x[threadIdx.x]; } diff --git a/hipamd/tests/hipify-clang/cudaRegister.cu b/hipamd/tests/hipify-clang/cudaRegister.cu index 80d17f65b9..79d21707c2 100644 --- a/hipamd/tests/hipify-clang/cudaRegister.cu +++ b/hipamd/tests/hipify-clang/cudaRegister.cu @@ -38,7 +38,6 @@ if(status != cudaSuccess) { \ } __global__ void Inc1(float *Ad, float *Bd){ - // CHECK: int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; int tx = threadIdx.x + blockIdx.x * blockDim.x; if(tx < 1 ){ for(int i=0;i __global__ void vector_square(T *C_d, const T *A_d, size_t N) { - // CHECK: size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x); - // CHECK: size_t stride = hipBlockDim_x * hipGridDim_x; size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); size_t stride = blockDim.x * gridDim.x; From 23889e79872bf382aca5dd18bf9a3e28dbebad08 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 17:21:19 +0300 Subject: [PATCH 24/28] [HIPIFY] Add more supported by HIP CUDA RT API Textures and Arrays data types --- hipamd/hipify-clang/src/CUDA2HipMap.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hipamd/hipify-clang/src/CUDA2HipMap.cpp b/hipamd/hipify-clang/src/CUDA2HipMap.cpp index 1893f6ce5b..db95c7216b 100644 --- a/hipamd/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipamd/hipify-clang/src/CUDA2HipMap.cpp @@ -213,6 +213,13 @@ const std::map CUDA_TYPE_NAME_MAP{ {"cudaMipmappedArray_t", {"hipMipmappedArray_t", CONV_MEM, API_RUNTIME}}, {"cudaMipmappedArray_const_t", {"hipMipmappedArray_const_t", CONV_MEM, API_RUNTIME}}, + // defines + {"cudaArrayDefault", {"hipArrayDefault", CONV_MEM, API_RUNTIME}}, + {"cudaArrayLayered", {"hipArrayLayered", CONV_MEM, API_RUNTIME}}, + {"cudaArraySurfaceLoadStore", {"hipArraySurfaceLoadStore", CONV_MEM, API_RUNTIME}}, + {"cudaArrayCubemap", {"hipArrayCubemap", CONV_MEM, API_RUNTIME}}, + {"cudaArrayTextureGather", {"hipArrayTextureGather", CONV_MEM, API_RUNTIME}}, + {"cudaMemoryAdvise", {"hipMemAdvise", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_Driver ANALOGUE (CUmem_advise) {"cudaMemRangeAttribute", {"hipMemRangeAttribute", CONV_TYPE, API_RUNTIME, HIP_UNSUPPORTED}}, // API_Driver ANALOGUE (CUmem_range_attribute) {"cudaMemcpyKind", {"hipMemcpyKind", CONV_MEM, API_RUNTIME}}, @@ -263,6 +270,15 @@ const std::map CUDA_TYPE_NAME_MAP{ {"cudaSurfaceFormatMode", {"hipSurfaceFormatMode", CONV_SURFACE, API_RUNTIME, HIP_UNSUPPORTED}}, + // defines + {"cudaTextureType1D", {"hipTextureType1D", CONV_TEX, API_RUNTIME}}, + {"cudaTextureType2D", {"hipTextureType2D", CONV_TEX, API_RUNTIME}}, + {"cudaTextureType3D", {"hipTextureType3D", CONV_TEX, API_RUNTIME}}, + {"cudaTextureTypeCubemap", {"hipTextureTypeCubemap", CONV_TEX, API_RUNTIME}}, + {"cudaTextureType1DLayered", {"hipTextureType1DLayered", CONV_TEX, API_RUNTIME}}, + {"cudaTextureType2DLayered", {"hipTextureType2DLayered", CONV_TEX, API_RUNTIME}}, + {"cudaTextureTypeCubemapLayered", {"hipTextureTypeCubemapLayered", CONV_TEX, API_RUNTIME}}, + // Inter-Process Communication (IPC) {"cudaIpcEventHandle_t", {"hipIpcEventHandle_t", CONV_TYPE, API_RUNTIME}}, {"cudaIpcEventHandle_st", {"hipIpcEventHandle_t", CONV_TYPE, API_RUNTIME}}, From 44d51e794bdd3cca5da823174888e7223ab187ab Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 19:21:14 +0300 Subject: [PATCH 25/28] Update headers_test_04.cu --- hipamd/tests/hipify-clang/headers_test_04.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/hipify-clang/headers_test_04.cu b/hipamd/tests/hipify-clang/headers_test_04.cu index f10c0eda56..57667b5a34 100644 --- a/hipamd/tests/hipify-clang/headers_test_04.cu +++ b/hipamd/tests/hipify-clang/headers_test_04.cu @@ -5,7 +5,7 @@ // CHECK-NEXT: #include #include #include -// CHECK-NOT: #include +// CHECK-NOT: #include int main(int argc, char* argv[]) { return 0; } From 3db7dc5b9e69ef7e3f5f0cf0d8bb672d521a6b56 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 19:21:59 +0300 Subject: [PATCH 26/28] Update headers_test_03.cu --- hipamd/tests/hipify-clang/headers_test_03.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hipamd/tests/hipify-clang/headers_test_03.cu b/hipamd/tests/hipify-clang/headers_test_03.cu index 0223f11119..5f2e479683 100644 --- a/hipamd/tests/hipify-clang/headers_test_03.cu +++ b/hipamd/tests/hipify-clang/headers_test_03.cu @@ -3,7 +3,7 @@ // CHECK: #pragma once // CHECK-NEXT: #include #pragma once -// CHECK-NOT: #include +// CHECK-NOT: #include int main(int argc, char* argv[]) { return 0; } From 284d1cb4e3580e61682f182433cfe0e094fcf578 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 20:41:08 +0300 Subject: [PATCH 27/28] [HIPIFY][tests] remove concurentKernels.cu as it is one of CUDA SDK samples. --- hipamd/tests/hipify-clang/concurentKernels.cu | 242 ------------------ 1 file changed, 242 deletions(-) delete mode 100644 hipamd/tests/hipify-clang/concurentKernels.cu diff --git a/hipamd/tests/hipify-clang/concurentKernels.cu b/hipamd/tests/hipify-clang/concurentKernels.cu deleted file mode 100644 index 27e9e0d0e7..0000000000 --- a/hipamd/tests/hipify-clang/concurentKernels.cu +++ /dev/null @@ -1,242 +0,0 @@ -// RUN: %run_test hipify "%s" "%t" %cuda_args -/* - * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -// -// This sample demonstrates the use of streams for concurrent execution. It also illustrates how to -// introduce dependencies between CUDA streams with the new cudaStreamWaitEvent function introduced -// in CUDA 3.2. -// -// Devices of compute capability 1.x will run the kernels one after another -// Devices of compute capability 2.0 or higher can overlap the kernels -// -#include -#include -#include - -// This is a kernel that does no real work but runs at least for a specified number of clocks -__global__ void clock_block(clock_t *d_o, clock_t clock_count) -{ - unsigned int start_clock = (unsigned int) clock(); - - clock_t clock_offset = 0; - - while (clock_offset < clock_count) - { - unsigned int end_clock = (unsigned int) clock(); - - // The code below should work like - // this (thanks to modular arithmetics): - // - // clock_offset = (clock_t) (end_clock > start_clock ? - // end_clock - start_clock : - // end_clock + (0xffffffffu - start_clock)); - // - // Indeed, let m = 2^32 then - // end - start = end + m - start (mod m). - - clock_offset = (clock_t)(end_clock - start_clock); - } - - d_o[0] = clock_offset; -} - - -// Single warp reduction kernel -__global__ void sum(clock_t *d_clocks, int N) -{ - __shared__ clock_t s_clocks[32]; - - clock_t my_sum = 0; - - for (int i = threadIdx.x; i < N; i+= blockDim.x) - { - my_sum += d_clocks[i]; - } - - s_clocks[threadIdx.x] = my_sum; - syncthreads(); - - for (int i=16; i>0; i/=2) - { - if (threadIdx.x < i) - { - s_clocks[threadIdx.x] += s_clocks[threadIdx.x + i]; - } - - syncthreads(); - } - - d_clocks[0] = s_clocks[0]; -} - -int main(int argc, char **argv) -{ - int nkernels = 8; // number of concurrent kernels - int nstreams = nkernels + 1; // use one more stream than concurrent kernel - int nbytes = nkernels * sizeof(clock_t); // number of data bytes - float kernel_time = 10; // time the kernel should run in ms - float elapsed_time; // timing variables - int cuda_device = 0; - - printf("[%s] - Starting...\n", argv[0]); - - // get number of kernels if overridden on the command line - if (checkCmdLineFlag(argc, (const char **)argv, "nkernels")) - { - nkernels = getCmdLineArgumentInt(argc, (const char **)argv, "nkernels"); - nstreams = nkernels + 1; - } - - // use command-line specified CUDA device, otherwise use device with highest Gflops/s - cuda_device = findCudaDevice(argc, (const char **)argv); - - // CHECK: hipDeviceProp_t deviceProp; - cudaDeviceProp deviceProp; - // CHECK: checkCudaErrors(hipGetDevice(&cuda_device)); - checkCudaErrors(cudaGetDevice(&cuda_device)); - - // CHECK: checkCudaErrors(hipGetDeviceProperties(&deviceProp, cuda_device)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device)); - - if ((deviceProp.concurrentKernels == 0)) - { - printf("> GPU does not support concurrent kernel execution\n"); - printf(" CUDA kernel runs will be serialized\n"); - } - - printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - - // allocate host memory - clock_t *a = 0; // pointer to the array data in host memory - // CHECK: checkCudaErrors(hipHostMalloc((void **)&a, nbytes)); - checkCudaErrors(cudaMallocHost((void **)&a, nbytes)); - - // allocate device memory - clock_t *d_a = 0; // pointers to data and init value in the device memory - // CHECK: checkCudaErrors(hipMalloc((void **)&d_a, nbytes)); - checkCudaErrors(cudaMalloc((void **)&d_a, nbytes)); - - // CHECK: hipStream_t *streams = (hipStream_t *) malloc(nstreams * sizeof(hipStream_t)); - // allocate and initialize an array of stream handles - cudaStream_t *streams = (cudaStream_t *) malloc(nstreams * sizeof(cudaStream_t)); - - for (int i = 0; i < nstreams; i++) - { - // CHECK: checkCudaErrors(hipStreamCreate(&(streams[i]))); - checkCudaErrors(cudaStreamCreate(&(streams[i]))); - } - - // CHECK: hipEvent_t start_event, stop_event; - // create CUDA event handles - cudaEvent_t start_event, stop_event; - - // CHECK: checkCudaErrors(hipEventCreate(&start_event)); - // CHECK: checkCudaErrors(hipEventCreate(&stop_event)); - checkCudaErrors(cudaEventCreate(&start_event)); - checkCudaErrors(cudaEventCreate(&stop_event)); - - // the events are used for synchronization only and hence do not need to record timings - // this also makes events not introduce global sync points when recorded which is critical to get overlap - - // CHECK: hipEvent_t *kernelEvent; - // CHECK: kernelEvent = (hipEvent_t *) malloc(nkernels * sizeof(hipEvent_t)); - cudaEvent_t *kernelEvent; - kernelEvent = (cudaEvent_t *) malloc(nkernels * sizeof(cudaEvent_t)); - - for (int i = 0; i < nkernels; i++) - { - // CHECK: checkCudaErrors(hipEventCreateWithFlags(&(kernelEvent[i]), hipEventDisableTiming)); - checkCudaErrors(cudaEventCreateWithFlags(&(kernelEvent[i]), cudaEventDisableTiming)); - } - - ////////////////////////////////////////////////////////////////////// - // time execution with nkernels streams - clock_t total_clocks = 0; -#if defined(__arm__) || defined(__aarch64__) - // the kernel takes more time than the channel reset time on arm archs, so to prevent hangs reduce time_clocks. - clock_t time_clocks = (clock_t)(kernel_time * (deviceProp.clockRate / 1000)); -#else - clock_t time_clocks = (clock_t)(kernel_time * deviceProp.clockRate); -#endif - - // CHECK: hipEventRecord(start_event, 0); - cudaEventRecord(start_event, 0); - - // queue nkernels in separate streams and record when they are done - for (int i=0; i>>(&d_a[i], time_clocks); - total_clocks += time_clocks; - - // CHECK: checkCudaErrors(hipEventRecord(kernelEvent[i], streams[i])); - checkCudaErrors(cudaEventRecord(kernelEvent[i], streams[i])); - - // make the last stream wait for the kernel event to be recorded - // CHECK: checkCudaErrors(hipStreamWaitEvent(streams[nstreams-1], kernelEvent[i],0)); - checkCudaErrors(cudaStreamWaitEvent(streams[nstreams-1], kernelEvent[i],0)); - } - - // queue a sum kernel and a copy back to host in the last stream. - // the commands in this stream get dispatched as soon as all the kernel events have been recorded - // CHECK: hipLaunchKernelGGL(sum, dim3(1), dim3(32), 0, streams[nstreams-1], d_a, nkernels); - // CHECK: checkCudaErrors(hipMemcpyAsync(a, d_a, sizeof(clock_t), hipMemcpyDeviceToHost, streams[nstreams-1])); - sum<<<1,32,0,streams[nstreams-1]>>>(d_a, nkernels); - checkCudaErrors(cudaMemcpyAsync(a, d_a, sizeof(clock_t), cudaMemcpyDeviceToHost, streams[nstreams-1])); - - // at this point the CPU has dispatched all work for the GPU and can continue processing other tasks in parallel - - // in this sample we just wait until the GPU is done - // CHECK: checkCudaErrors(hipEventRecord(stop_event, 0)); - // CHECK: checkCudaErrors(hipEventSynchronize(stop_event)); - // CHECK: checkCudaErrors(hipEventElapsedTime(&elapsed_time, start_event, stop_event)); - checkCudaErrors(cudaEventRecord(stop_event, 0)); - checkCudaErrors(cudaEventSynchronize(stop_event)); - checkCudaErrors(cudaEventElapsedTime(&elapsed_time, start_event, stop_event)); - - printf("Expected time for serial execution of %d kernels = %.3fs\n", nkernels, nkernels * kernel_time/1000.0f); - printf("Expected time for concurrent execution of %d kernels = %.3fs\n", nkernels, kernel_time/1000.0f); - printf("Measured time for sample = %.3fs\n", elapsed_time/1000.0f); - - bool bTestResult = (a[0] > total_clocks); - - // release resources - for (int i = 0; i < nkernels; i++) - { - // CHECK: hipStreamDestroy(streams[i]); - // CHECK: hipEventDestroy(kernelEvent[i]); - cudaStreamDestroy(streams[i]); - cudaEventDestroy(kernelEvent[i]); - } - - free(streams); - free(kernelEvent); - - // CHECK: hipEventDestroy(start_event); - // CHECK: hipEventDestroy(stop_event); - // CHECK: hipHostFree(a); - // CHECK: hipFree(d_a); - cudaEventDestroy(start_event); - cudaEventDestroy(stop_event); - cudaFreeHost(a); - cudaFree(d_a); - - if (!bTestResult) - { - printf("Test failed!\n"); - exit(EXIT_FAILURE); - } - - printf("Test passed\n"); - exit(EXIT_SUCCESS); -} From e2ac6c6296fb11462998e48a8f69ab82f0cc1871 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 16 Jan 2018 21:07:50 +0300 Subject: [PATCH 28/28] [HIPIFY] Add more supported by HIP CUDA Driver API Arrays data types and functions --- ...A_Driver_API_functions_supported_by_HIP.md | 24 +++++++++---------- hipamd/hipify-clang/src/CUDA2HipMap.cpp | 24 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md index 9a4700b19c..5045c9d81d 100644 --- a/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md +++ b/hipamd/docs/markdown/CUDA_Driver_API_functions_supported_by_HIP.md @@ -5,8 +5,8 @@ | **type** | **CUDA** | **HIP** | |-------------:|---------------------------------------------------------------|------------------------------------------------------------| | struct | `CUDA_ARRAY3D_DESCRIPTOR` | | -| struct | `CUDA_ARRAY_DESCRIPTOR` | | -| struct | `CUDA_MEMCPY2D` | | +| struct | `CUDA_ARRAY_DESCRIPTOR` | `HIP_ARRAY_DESCRIPTOR` | +| struct | `CUDA_MEMCPY2D` | `hip_Memcpy2D` | | struct | `CUDA_MEMCPY3D` | | | struct | `CUDA_MEMCPY3D_PEER` | | | struct | `CUDA_POINTER_ATTRIBUTE_P2P_TOKENS` | | @@ -27,15 +27,15 @@ | 0x03 |*`CU_CUBEMAP_FACE_NEGATIVE_Y`* | | | 0x04 |*`CU_CUBEMAP_FACE_POSITIVE_Z`* | | | 0x05 |*`CU_CUBEMAP_FACE_NEGATIVE_Z`* | | -| enum |***`CUarray_format`*** | | -| 0x01 |*`CU_AD_FORMAT_UNSIGNED_INT8`* | | -| 0x02 |*`CU_AD_FORMAT_UNSIGNED_INT16`* | | -| 0x03 |*`CU_AD_FORMAT_UNSIGNED_INT32`* | | -| 0x08 |*`CU_AD_FORMAT_SIGNED_INT8`* | | -| 0x09 |*`CU_AD_FORMAT_SIGNED_INT16`* | | -| 0x0a |*`CU_AD_FORMAT_SIGNED_INT32`* | | -| 0x10 |*`CU_AD_FORMAT_HALF`* | | -| 0x20 |*`CU_AD_FORMAT_FLOAT`* | | +| enum |***`CUarray_format`*** |***`hipArray_format`*** | +| 0x01 |*`CU_AD_FORMAT_UNSIGNED_INT8`* |*`HIP_AD_FORMAT_UNSIGNED_INT8`* | +| 0x02 |*`CU_AD_FORMAT_UNSIGNED_INT16`* |*`HIP_AD_FORMAT_UNSIGNED_INT16`* | +| 0x03 |*`CU_AD_FORMAT_UNSIGNED_INT32`* |*`HIP_AD_FORMAT_UNSIGNED_INT32`* | +| 0x08 |*`CU_AD_FORMAT_SIGNED_INT8`* |*`HIP_AD_FORMAT_SIGNED_INT8`* | +| 0x09 |*`CU_AD_FORMAT_SIGNED_INT16`* |*`HIP_AD_FORMAT_SIGNED_INT16`* | +| 0x0a |*`CU_AD_FORMAT_SIGNED_INT32`* |*`HIP_AD_FORMAT_SIGNED_INT32`* | +| 0x10 |*`CU_AD_FORMAT_HALF`* |*`HIP_AD_FORMAT_HALF`* | +| 0x20 |*`CU_AD_FORMAT_FLOAT`* |*`HIP_AD_FORMAT_FLOAT`* | | enum |***`CUctx_flags`*** | | | 0x00 |*`CU_CTX_SCHED_AUTO`* | | | 0x01 |*`CU_CTX_SCHED_SPIN`* | | @@ -518,7 +518,7 @@ | **CUDA** | **HIP** | |-----------------------------------------------------------|-------------------------------| -| `cuArray3DCreate` | | +| `cuArray3DCreate` | `hipArray3DCreate` | | `cuArray3DGetDescriptor` | | | `cuArrayCreate` | | | `cuArrayDestroy` | | diff --git a/hipamd/hipify-clang/src/CUDA2HipMap.cpp b/hipamd/hipify-clang/src/CUDA2HipMap.cpp index db95c7216b..4f468e7b47 100644 --- a/hipamd/hipify-clang/src/CUDA2HipMap.cpp +++ b/hipamd/hipify-clang/src/CUDA2HipMap.cpp @@ -10,8 +10,8 @@ const std::map CUDA_TYPE_NAME_MAP{ ///////////////////////////// CUDA DRIVER API ///////////////////////////// {"CUDA_ARRAY3D_DESCRIPTOR", {"HIP_ARRAY3D_DESCRIPTOR", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_ARRAY_DESCRIPTOR", {"HIP_ARRAY_DESCRIPTOR", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUDA_MEMCPY2D", {"HIP_MEMCPY2D", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, + {"CUDA_ARRAY_DESCRIPTOR", {"HIP_ARRAY_DESCRIPTOR", CONV_TYPE, API_DRIVER}}, + {"CUDA_MEMCPY2D", {"hip_Memcpy2D", CONV_TYPE, API_DRIVER}}, {"CUDA_MEMCPY3D", {"HIP_MEMCPY3D", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUDA_MEMCPY3D_PEER", {"HIP_MEMCPY3D_PEER", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUDA_POINTER_ATTRIBUTE_P2P_TOKENS", {"HIP_POINTER_ATTRIBUTE_P2P_TOKENS", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, @@ -23,7 +23,7 @@ const std::map CUDA_TYPE_NAME_MAP{ {"CUaddress_mode", {"hipAddress_mode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, {"CUarray_cubemap_face", {"hipArray_cubemap_face", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, - {"CUarray_format", {"hipArray_format", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, + {"CUarray_format", {"hipArray_format", CONV_TYPE, API_DRIVER}}, {"CUcomputemode", {"hipComputemode", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode) {"CUmem_advise", {"hipMemAdvise", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaComputeMode) {"CUmem_range_attribute", {"hipMemRangeAttribute", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // API_RUNTIME ANALOGUE (cudaMemRangeAttribute) @@ -564,14 +564,14 @@ const std::map CUDA_IDENTIFIER_MAP{ {"CU_CUBEMAP_FACE_NEGATIVE_Z", {"HIP_CUBEMAP_FACE_NEGATIVE_Z", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x05 // CUarray_format enum - {"CU_AD_FORMAT_UNSIGNED_INT8", {"HIP_AD_FORMAT_UNSIGNED_INT8", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x01 - {"CU_AD_FORMAT_UNSIGNED_INT16", {"HIP_AD_FORMAT_UNSIGNED_INT16", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x02 - {"CU_AD_FORMAT_UNSIGNED_INT32", {"HIP_AD_FORMAT_UNSIGNED_INT32", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x03 - {"CU_AD_FORMAT_SIGNED_INT8", {"HIP_AD_FORMAT_SIGNED_INT8", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x08 - {"CU_AD_FORMAT_SIGNED_INT16", {"HIP_AD_FORMAT_SIGNED_INT16", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x09 - {"CU_AD_FORMAT_SIGNED_INT32", {"HIP_AD_FORMAT_SIGNED_INT32", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x0a - {"CU_AD_FORMAT_HALF", {"HIP_AD_FORMAT_HALF", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x10 - {"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0x20 + {"CU_AD_FORMAT_UNSIGNED_INT8", {"HIP_AD_FORMAT_UNSIGNED_INT8", CONV_TYPE, API_DRIVER}}, // 0x01 + {"CU_AD_FORMAT_UNSIGNED_INT16", {"HIP_AD_FORMAT_UNSIGNED_INT16", CONV_TYPE, API_DRIVER}}, // 0x02 + {"CU_AD_FORMAT_UNSIGNED_INT32", {"HIP_AD_FORMAT_UNSIGNED_INT32", CONV_TYPE, API_DRIVER}}, // 0x03 + {"CU_AD_FORMAT_SIGNED_INT8", {"HIP_AD_FORMAT_SIGNED_INT8", CONV_TYPE, API_DRIVER}}, // 0x08 + {"CU_AD_FORMAT_SIGNED_INT16", {"HIP_AD_FORMAT_SIGNED_INT16", CONV_TYPE, API_DRIVER}}, // 0x09 + {"CU_AD_FORMAT_SIGNED_INT32", {"HIP_AD_FORMAT_SIGNED_INT32", CONV_TYPE, API_DRIVER}}, // 0x0a + {"CU_AD_FORMAT_HALF", {"HIP_AD_FORMAT_HALF", CONV_TYPE, API_DRIVER}}, // 0x10 + {"CU_AD_FORMAT_FLOAT", {"HIP_AD_FORMAT_FLOAT", CONV_TYPE, API_DRIVER}}, // 0x20 // CUcomputemode enum {"CU_COMPUTEMODE_DEFAULT", {"hipComputeModeDefault", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}}, // 0 // API_RUNTIME ANALOGUE (cudaComputeModeDefault = 0) @@ -1063,7 +1063,7 @@ const std::map CUDA_IDENTIFIER_MAP{ {"cuStreamBatchMemOp", {"hipStreamBatchMemOp", CONV_STREAM, API_DRIVER, HIP_UNSUPPORTED}}, // // no API_Runtime ANALOGUE // Memory management - {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}}, + {"cuArray3DCreate", {"hipArray3DCreate", CONV_MEM, API_DRIVER}}, {"cuArray3DGetDescriptor", {"hipArray3DGetDescriptor", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}}, {"cuArrayCreate", {"hipArrayCreate", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}}, {"cuArrayDestroy", {"hipArrayDestroy", CONV_MEM, API_DRIVER, HIP_UNSUPPORTED}},