From 052f630bd332e01f952ddb2d1796fafcabdad2f6 Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Fri, 19 May 2017 17:22:14 +0300 Subject: [PATCH] [HIP] [HIPIFY] [FIX] cuModuleLoadDataEx -> hipModuleLoadDataEx https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/issues/81 1. Do not use JIT options on HCC path, call hipModuleLoadData instead. 2. NVCC path is unchanged, to call cuModuleLoadDataEx with all options. 3. Get rid of manual hipification, based on #ifdef #else for NVCC/HIP. 4. Update documentation accordingly. [ROCm/clr commit: ae9f14ef9c7239455d2e9e3eb6f0dfc6e0afb25d] --- .../docs/markdown/hip_porting_driver_api.md | 65 +++++++++++-------- .../clr/hipamd/hipify-clang/src/Cuda2Hip.cpp | 38 +++++------ .../include/hip/hcc_detail/hip_runtime_api.h | 19 ++++-- .../clr/hipamd/include/hip/hip_runtime_api.h | 24 +++++++ .../include/hip/nvcc_detail/hip_runtime_api.h | 44 ++++++++++--- projects/clr/hipamd/src/hip_module.cpp | 6 +- 6 files changed, 134 insertions(+), 62 deletions(-) diff --git a/projects/clr/hipamd/docs/markdown/hip_porting_driver_api.md b/projects/clr/hipamd/docs/markdown/hip_porting_driver_api.md index dd3b9c3e86..0912e676cc 100644 --- a/projects/clr/hipamd/docs/markdown/hip_porting_driver_api.md +++ b/projects/clr/hipamd/docs/markdown/hip_porting_driver_api.md @@ -98,48 +98,57 @@ HIP/HCC will push primary context to context stack when it is empty. This can ha #### Interoperation between HIP and CUDA Driver CUDA applications may want to mix CUDA driver code with HIP code (see example below). This table shows the type equivalence to enable this interaction. -|**HIP Type** |**CU Driver Type**|**CUDA Runtime Type**| -| ---- | ---- | ---- | -| hipModule | CUmodule | | -| hipFunction | CUfunction | | -| hipCtx_t | CUcontext | | -| hipDevice_t | CUdevice | | -| hipStream_t | CUstream | cudaStream_t | -| hipEvent_t | CUevent | cudaEvent_t | -| hipArray | CUarray | cudaArray | - -#### Compilation Flags -The hipModule interface does not support the `cuModuleLoadDataEx` function, which is used to control PTX compilation options. -HCC does not use PTX and does not support the same compilation options. -In fact, HCC code objects always contain fully compiled ISA and do not require additional compilation as part of the load step. -Code which requires this functionally should use platform-specific coding, calling `cuModuleLoadDataEx` on the NVCC path and `hipModuleLoadData` on the hcc path. -For example: +|**HIP Type** |**CU Driver Type**|**CUDA Runtime Type**| +| ---- | ---- | ---- | +| hipModule_t | CUmodule | | +| hipFunction_t | CUfunction | | +| hipCtx_t | CUcontext | | +| hipDevice_t | CUdevice | | +| hipStream_t | CUstream | cudaStream_t | +| hipEvent_t | CUevent | cudaEvent_t | +| hipArray | CUarray | cudaArray | +#### Compilation Options +The `hipModule_t` interface does not support `cuModuleLoadDataEx` function, which is used to control PTX compilation options. +HCC does not use PTX and does not support these compilation options. +In fact, HCC code objects always contain fully compiled ISA and do not require additional compilation as a part of the load step. +The corresponding HIP function `hipModuleLoadDataEx` behaves as `hipModuleLoadData` on HCC path (compilation options are not used) and as `cuModuleLoadDataEx` on NVCC path. +For example (CUDA): ``` -hipModule module; -void *imagePtr = ... ; // Somehow populate data pointer with code object +CUmodule module; +void *imagePtr = ...; // Somehow populate data pointer with code object -#ifdef __HIP_PLATFORM_NVCC__ -// Use CUDA driver API but write to hipModule since they are same type: const int numOptions = 1; CUJit_option options[numOptions]; void * optionValues[numOptions]; options[0] = CU_JIT_MAX_REGISTERS; -unsigned maxRegs=15; -optionValues[0] = (void*) (&maxRegs); +unsigned maxRegs = 15; +optionValues[0] = (void*)(&maxRegs); cuModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues); -#else // __HIP_PLATFORM_HCC__ +CUfunction k; +cuModuleGetFunction(&k, module, "myKernel"); +``` +HIP: +``` +hipModule_t module; +void *imagePtr = ...; // Somehow populate data pointer with code object -// HCC path does not support or require JIT options, so just load the module. -hipModuleLoadData(&module, imagePtr); +const int numOptions = 1; +hipJitOption options[numOptions]; +void * optionValues[numOptions]; -#endif +options[0] = hipJitOptionMaxRegisters; +unsigned maxRegs = 15; +optionValues[0] = (void*)(&maxRegs); -// Back to unified code - both paths above loaded the "module" variable. -hipFunction k; +// hipModuleLoadData(module, imagePtr) will be called on HCC path, JIT options will not be used, and +// cupModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues) will be called on NVCC path +hipModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues); + +hipFunction_t k; hipModuleGetFunction(&k, module, "myKernel"); ``` diff --git a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp index 0c6b0f1efc..e07baab3fd 100644 --- a/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp +++ b/projects/clr/hipamd/hipify-clang/src/Cuda2Hip.cpp @@ -678,24 +678,24 @@ struct cuda2hipMap { cuda2hipRename["CU_PREFER_PTX"] = {"hipJitFallbackPreferPtx", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["CU_PREFER_BINARY"] = {"hipJitFallbackPreferBinary", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; // enum CUjit_option/CUjit_option_enum - cuda2hipRename["CUjit_option"] = {"hipJitOption", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no) - cuda2hipRename["CUjit_option_enum"] = {"hipJitOption", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_MAX_REGISTERS"] = {"hipJitOptionMaxRegisters", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_THREADS_PER_BLOCK"] = {"hipJitOptionThreadsPerBlock", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_WALL_TIME"] = {"hipJitOptionWallTime", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_INFO_LOG_BUFFER"] = {"hipJitOptionInfoLogBuffer", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionInfoLogBufferSizeBytes", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER"] = {"hipJitOptionErrorLogBuffer", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionErrorLogBufferSizeBytes", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_OPTIMIZATION_LEVEL"] = {"hipJitOptionOptimizationLevel", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_TARGET_FROM_CUCONTEXT"] = {"hipJitOptionTargetFromContext", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_TARGET"] = {"hipJitOptionTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_FALLBACK_STRATEGY"] = {"hipJitOptionFallbackStrategy", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_GENERATE_DEBUG_INFO"] = {"hipJitOptionGenerateDebugInfo", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_LOG_VERBOSE"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_GENERATE_LINE_INFO"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_CACHE_MODE"] = {"hipJitOptionCacheMode", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; - cuda2hipRename["CU_JIT_NUM_OPTIONS"] = {"hipJitOptionNumOptions", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["CUjit_option"] = {"hipJitOption", CONV_JIT, API_DRIVER}; // API_Runtime ANALOGUE (no) + cuda2hipRename["CUjit_option_enum"] = {"hipJitOption", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_MAX_REGISTERS"] = {"hipJitOptionMaxRegisters", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_THREADS_PER_BLOCK"] = {"hipJitOptionThreadsPerBlock", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_WALL_TIME"] = {"hipJitOptionWallTime", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_INFO_LOG_BUFFER"] = {"hipJitOptionInfoLogBuffer", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionInfoLogBufferSizeBytes", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER"] = {"hipJitOptionErrorLogBuffer", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionErrorLogBufferSizeBytes", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_OPTIMIZATION_LEVEL"] = {"hipJitOptionOptimizationLevel", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_TARGET_FROM_CUCONTEXT"] = {"hipJitOptionTargetFromContext", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_TARGET"] = {"hipJitOptionTarget", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_FALLBACK_STRATEGY"] = {"hipJitOptionFallbackStrategy", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_GENERATE_DEBUG_INFO"] = {"hipJitOptionGenerateDebugInfo", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_LOG_VERBOSE"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_GENERATE_LINE_INFO"] = {"hipJitOptionGenerateLineInfo", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_CACHE_MODE"] = {"hipJitOptionCacheMode", CONV_JIT, API_DRIVER}; + cuda2hipRename["CU_JIT_NUM_OPTIONS"] = {"hipJitOptionNumOptions", CONV_JIT, API_DRIVER}; // enum CUjit_target/CUjit_target_enum cuda2hipRename["CUjit_target"] = {"hipJitTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no) cuda2hipRename["CUjit_target_enum"] = {"hipJitTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; @@ -905,7 +905,7 @@ struct cuda2hipMap { cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER}; // unsupported yet by HIP - cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; + cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER}; cuda2hipRename["cuModuleLoadFatBinary"] = {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED}; cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER}; diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index e1aecef1e8..34ed2ed5ce 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -1915,7 +1915,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, con * @brief returns device memory pointer and size of the kernel present in the module with symbol @p name * * @param [out] dptr - * @param [out[ bytes + * @param [out] bytes * @param [in] hmod * @param [in] name * @@ -1923,7 +1923,6 @@ hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, con */ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name); - /** * @brief builds module from code object which resides in host memory. Image is pointer to that location. * @@ -1934,11 +1933,23 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t h */ hipError_t hipModuleLoadData(hipModule_t *module, const void *image); +/** +* @brief builds module from code object which resides in host memory. Image is pointer to that location. Options are not used. hipModuleLoadData is called. +* +* @param [in] image +* @param [out] module +* @param [in] number of options +* @param [in] options for JIT +* @param [in] option values for JIT +* +* @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized +*/ +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues); /** * @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra * - * @param [in[ f Kernel to launch. + * @param [in] f Kernel to launch. * @param [in] gridDimX X grid dimension specified as multiple of blockDimX. * @param [in] gridDimY Y grid dimension specified as multiple of blockDimY. * @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ. @@ -1946,7 +1957,7 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image); * @param [in] blockDimY Y grid dimension specified in work-items * @param [in] blockDimZ Z grid dimension specified in work-items * @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED. - * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules. + * @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules. * @param [in] kernelParams * @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel. * diff --git a/projects/clr/hipamd/include/hip/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hip_runtime_api.h index 8eae1d6a3a..fa54dda5dc 100644 --- a/projects/clr/hipamd/include/hip/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hip_runtime_api.h @@ -250,6 +250,30 @@ typedef enum hipDeviceAttribute_t { hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices. } hipDeviceAttribute_t; +/* +* @brief hipJitOption +* @enum +* @ingroup Enumerations +*/ +typedef enum hipJitOption { + hipJitOptionMaxRegisters = 0, + hipJitOptionThreadsPerBlock, + hipJitOptionWallTime, + hipJitOptionInfoLogBuffer, + hipJitOptionInfoLogBufferSizeBytes, + hipJitOptionErrorLogBuffer, + hipJitOptionErrorLogBufferSizeBytes, + hipJitOptionOptimizationLevel, + hipJitOptionTargetFromContext, + hipJitOptionTarget, + hipJitOptionFallbackStrategy, + hipJitOptionGenerateDebugInfo, + hipJitOptionLogVerbose, + hipJitOptionGenerateLineInfo, + hipJitOptionCacheMode, + hipJitOptionNumOptions +} hipJitOption; + /** * @} */ diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index 69a9b46570..01a93f7ba4 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -54,26 +54,44 @@ hipMemcpyHostToHost #define hipFilterModePoint cudaFilterModePoint //! Flags that can be used with hipEventCreateWithFlags: -#define hipEventDefault cudaEventDefault -#define hipEventBlockingSync cudaEventBlockingSync -#define hipEventDisableTiming cudaEventDisableTiming -#define hipEventInterprocess cudaEventInterprocess +#define hipEventDefault cudaEventDefault +#define hipEventBlockingSync cudaEventBlockingSync +#define hipEventDisableTiming cudaEventDisableTiming +#define hipEventInterprocess cudaEventInterprocess #define hipEventDisableSystemRelease cudaEventDefault /* no-op on CUDA platform */ -#define hipHostMallocDefault cudaHostAllocDefault -#define hipHostMallocPortable cudaHostAllocPortable -#define hipHostMallocMapped cudaHostAllocMapped +#define hipHostMallocDefault cudaHostAllocDefault +#define hipHostMallocPortable cudaHostAllocPortable +#define hipHostMallocMapped cudaHostAllocMapped #define hipHostMallocWriteCombined cudaHostAllocWriteCombined #define hipHostRegisterPortable cudaHostRegisterPortable -#define hipHostRegisterMapped cudaHostRegisterMapped +#define hipHostRegisterMapped cudaHostRegisterMapped #define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER -#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE +#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE #define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END #define hipLimitMallocHeapSize cudaLimitMallocHeapSize -#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess +#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess + +// enum CUjit_option redefines +#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS +#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK +#define hipJitOptionWallTime CU_JIT_WALL_TIME +#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER +#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES +#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER +#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES +#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL +#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT +#define hipJitOptionTarget CU_JIT_TARGET +#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY +#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO +#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE +#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO +#define hipJitOptionCacheMode CU_JIT_CACHE_MODE +#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS typedef cudaEvent_t hipEvent_t; typedef cudaStream_t hipStream_t; @@ -84,6 +102,7 @@ typedef cudaFuncCache hipFuncCache_t; typedef CUcontext hipCtx_t; typedef CUsharedconfig hipSharedMemConfig; typedef CUfunc_cache hipFuncCache; +typedef CUjit_option hipJitOption; typedef CUdevice hipDevice_t; typedef CUmodule hipModule_t; typedef CUfunction hipFunction_t; @@ -894,6 +913,11 @@ inline static hipError_t hipModuleLoadData(hipModule_t *module, const void *imag return hipCUResultTohipError(cuModuleLoadData(module, image)); } +inline static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues) +{ + return hipCUResultTohipError(cuModuleLoadDataEx(module, image, numOptions, options, optionValues)); +} + inline static hipError_t hipModuleLaunchKernel(hipFunction_t f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, diff --git a/projects/clr/hipamd/src/hip_module.cpp b/projects/clr/hipamd/src/hip_module.cpp index da01f23769..d364a6b519 100644 --- a/projects/clr/hipamd/src/hip_module.cpp +++ b/projects/clr/hipamd/src/hip_module.cpp @@ -525,7 +525,6 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, } } - hipError_t hipModuleLoadData(hipModule_t *module, const void *image) { HIP_INIT_API(module, image); @@ -575,3 +574,8 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image) } return ihipLogStatus(ret); } + +hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues) +{ + return hipModuleLoadData(module, image); +}