[HIP] [HIPIFY] [FIX] cuModuleLoadDataEx -> hipModuleLoadDataEx

https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP/issues/81

1.	Do not use JIT options on HCC path, call hipModuleLoadData instead.
2.	NVCC path is unchanged, to call cuModuleLoadDataEx with all options.
3.	Get rid of manual hipification, based on #ifdef #else for NVCC/HIP.
4.	Update documentation accordingly.


[ROCm/clr commit: ae9f14ef9c]
This commit is contained in:
Evgeny Mankov
2017-05-19 17:22:14 +03:00
parent 905ab9a689
commit 052f630bd3
6 changed files with 134 additions and 62 deletions
@@ -98,48 +98,57 @@ HIP/HCC will push primary context to context stack when it is empty. This can ha
#### Interoperation between HIP and CUDA Driver
CUDA applications may want to mix CUDA driver code with HIP code (see example below). This table shows the type equivalence to enable this interaction.
|**HIP Type** |**CU Driver Type**|**CUDA Runtime Type**|
| ---- | ---- | ---- |
| hipModule | CUmodule | |
| hipFunction | CUfunction | |
| hipCtx_t | CUcontext | |
| hipDevice_t | CUdevice | |
| hipStream_t | CUstream | cudaStream_t |
| hipEvent_t | CUevent | cudaEvent_t |
| hipArray | CUarray | cudaArray |
#### Compilation Flags
The hipModule interface does not support the `cuModuleLoadDataEx` function, which is used to control PTX compilation options.
HCC does not use PTX and does not support the same compilation options.
In fact, HCC code objects always contain fully compiled ISA and do not require additional compilation as part of the load step.
Code which requires this functionally should use platform-specific coding, calling `cuModuleLoadDataEx` on the NVCC path and `hipModuleLoadData` on the hcc path.
For example:
|**HIP Type** |**CU Driver Type**|**CUDA Runtime Type**|
| ---- | ---- | ---- |
| hipModule_t | CUmodule | |
| hipFunction_t | CUfunction | |
| hipCtx_t | CUcontext | |
| hipDevice_t | CUdevice | |
| hipStream_t | CUstream | cudaStream_t |
| hipEvent_t | CUevent | cudaEvent_t |
| hipArray | CUarray | cudaArray |
#### Compilation Options
The `hipModule_t` interface does not support `cuModuleLoadDataEx` function, which is used to control PTX compilation options.
HCC does not use PTX and does not support these compilation options.
In fact, HCC code objects always contain fully compiled ISA and do not require additional compilation as a part of the load step.
The corresponding HIP function `hipModuleLoadDataEx` behaves as `hipModuleLoadData` on HCC path (compilation options are not used) and as `cuModuleLoadDataEx` on NVCC path.
For example (CUDA):
```
hipModule module;
void *imagePtr = ... ; // Somehow populate data pointer with code object
CUmodule module;
void *imagePtr = ...; // Somehow populate data pointer with code object
#ifdef __HIP_PLATFORM_NVCC__
// Use CUDA driver API but write to hipModule since they are same type:
const int numOptions = 1;
CUJit_option options[numOptions];
void * optionValues[numOptions];
options[0] = CU_JIT_MAX_REGISTERS;
unsigned maxRegs=15;
optionValues[0] = (void*) (&maxRegs);
unsigned maxRegs = 15;
optionValues[0] = (void*)(&maxRegs);
cuModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues);
#else // __HIP_PLATFORM_HCC__
CUfunction k;
cuModuleGetFunction(&k, module, "myKernel");
```
HIP:
```
hipModule_t module;
void *imagePtr = ...; // Somehow populate data pointer with code object
// HCC path does not support or require JIT options, so just load the module.
hipModuleLoadData(&module, imagePtr);
const int numOptions = 1;
hipJitOption options[numOptions];
void * optionValues[numOptions];
#endif
options[0] = hipJitOptionMaxRegisters;
unsigned maxRegs = 15;
optionValues[0] = (void*)(&maxRegs);
// Back to unified code - both paths above loaded the "module" variable.
hipFunction k;
// hipModuleLoadData(module, imagePtr) will be called on HCC path, JIT options will not be used, and
// cupModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues) will be called on NVCC path
hipModuleLoadDataEx(module, imagePtr, numOptions, options, optionValues);
hipFunction_t k;
hipModuleGetFunction(&k, module, "myKernel");
```
@@ -678,24 +678,24 @@ struct cuda2hipMap {
cuda2hipRename["CU_PREFER_PTX"] = {"hipJitFallbackPreferPtx", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_PREFER_BINARY"] = {"hipJitFallbackPreferBinary", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
// enum CUjit_option/CUjit_option_enum
cuda2hipRename["CUjit_option"] = {"hipJitOption", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no)
cuda2hipRename["CUjit_option_enum"] = {"hipJitOption", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_MAX_REGISTERS"] = {"hipJitOptionMaxRegisters", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_THREADS_PER_BLOCK"] = {"hipJitOptionThreadsPerBlock", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_WALL_TIME"] = {"hipJitOptionWallTime", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_INFO_LOG_BUFFER"] = {"hipJitOptionInfoLogBuffer", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionInfoLogBufferSizeBytes", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER"] = {"hipJitOptionErrorLogBuffer", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionErrorLogBufferSizeBytes", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_OPTIMIZATION_LEVEL"] = {"hipJitOptionOptimizationLevel", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_TARGET_FROM_CUCONTEXT"] = {"hipJitOptionTargetFromContext", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_TARGET"] = {"hipJitOptionTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_FALLBACK_STRATEGY"] = {"hipJitOptionFallbackStrategy", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_GENERATE_DEBUG_INFO"] = {"hipJitOptionGenerateDebugInfo", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_LOG_VERBOSE"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_GENERATE_LINE_INFO"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_CACHE_MODE"] = {"hipJitOptionCacheMode", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CU_JIT_NUM_OPTIONS"] = {"hipJitOptionNumOptions", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["CUjit_option"] = {"hipJitOption", CONV_JIT, API_DRIVER}; // API_Runtime ANALOGUE (no)
cuda2hipRename["CUjit_option_enum"] = {"hipJitOption", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_MAX_REGISTERS"] = {"hipJitOptionMaxRegisters", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_THREADS_PER_BLOCK"] = {"hipJitOptionThreadsPerBlock", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_WALL_TIME"] = {"hipJitOptionWallTime", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_INFO_LOG_BUFFER"] = {"hipJitOptionInfoLogBuffer", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionInfoLogBufferSizeBytes", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER"] = {"hipJitOptionErrorLogBuffer", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES"] = {"hipJitOptionErrorLogBufferSizeBytes", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_OPTIMIZATION_LEVEL"] = {"hipJitOptionOptimizationLevel", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_TARGET_FROM_CUCONTEXT"] = {"hipJitOptionTargetFromContext", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_TARGET"] = {"hipJitOptionTarget", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_FALLBACK_STRATEGY"] = {"hipJitOptionFallbackStrategy", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_GENERATE_DEBUG_INFO"] = {"hipJitOptionGenerateDebugInfo", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_LOG_VERBOSE"] = {"hipJitOptionLogVerbose", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_GENERATE_LINE_INFO"] = {"hipJitOptionGenerateLineInfo", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_CACHE_MODE"] = {"hipJitOptionCacheMode", CONV_JIT, API_DRIVER};
cuda2hipRename["CU_JIT_NUM_OPTIONS"] = {"hipJitOptionNumOptions", CONV_JIT, API_DRIVER};
// enum CUjit_target/CUjit_target_enum
cuda2hipRename["CUjit_target"] = {"hipJitTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED}; // API_Runtime ANALOGUE (no)
cuda2hipRename["CUjit_target_enum"] = {"hipJitTarget", CONV_JIT, API_DRIVER, HIP_UNSUPPORTED};
@@ -905,7 +905,7 @@ struct cuda2hipMap {
cuda2hipRename["cuModuleLoad"] = {"hipModuleLoad", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleLoadData"] = {"hipModuleLoadData", CONV_MODULE, API_DRIVER};
// unsupported yet by HIP
cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["cuModuleLoadDataEx"] = {"hipModuleLoadDataEx", CONV_MODULE, API_DRIVER};
cuda2hipRename["cuModuleLoadFatBinary"] = {"hipModuleLoadFatBinary", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED};
cuda2hipRename["cuModuleUnload"] = {"hipModuleUnload", CONV_MODULE, API_DRIVER};
@@ -1915,7 +1915,7 @@ hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, con
* @brief returns device memory pointer and size of the kernel present in the module with symbol @p name
*
* @param [out] dptr
* @param [out[ bytes
* @param [out] bytes
* @param [in] hmod
* @param [in] name
*
@@ -1923,7 +1923,6 @@ hipError_t hipModuleGetFunction(hipFunction_t *function, hipModule_t module, con
*/
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t hmod, const char *name);
/**
* @brief builds module from code object which resides in host memory. Image is pointer to that location.
*
@@ -1934,11 +1933,23 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes, hipModule_t h
*/
hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
/**
* @brief builds module from code object which resides in host memory. Image is pointer to that location. Options are not used. hipModuleLoadData is called.
*
* @param [in] image
* @param [out] module
* @param [in] number of options
* @param [in] options for JIT
* @param [in] option values for JIT
*
* @returns hipSuccess, hipErrorNotInitialized, hipErrorOutOfMemory, hipErrorNotInitialized
*/
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues);
/**
* @brief launches kernel f with launch parameters and shared memory on stream with arguments passed to kernelparams or extra
*
* @param [in[ f Kernel to launch.
* @param [in] f Kernel to launch.
* @param [in] gridDimX X grid dimension specified as multiple of blockDimX.
* @param [in] gridDimY Y grid dimension specified as multiple of blockDimY.
* @param [in] gridDimZ Z grid dimension specified as multiple of blockDimZ.
@@ -1946,7 +1957,7 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image);
* @param [in] blockDimY Y grid dimension specified in work-items
* @param [in] blockDimZ Z grid dimension specified in work-items
* @param [in] sharedMemBytes Amount of dynamic shared memory to allocate for this kernel. The kernel can access this with HIP_DYNAMIC_SHARED.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
* @param [in] stream Stream where the kernel should be dispatched. May be 0, in which case th default stream is used with associated synchronization rules.
* @param [in] kernelParams
* @param [in] extra Pointer to kernel arguments. These are passed directly to the kernel and must be in the memory layout and alignment expected by the kernel.
*
@@ -250,6 +250,30 @@ typedef enum hipDeviceAttribute_t {
hipDeviceAttributeIsMultiGpuBoard, ///< Multiple GPU devices.
} hipDeviceAttribute_t;
/*
* @brief hipJitOption
* @enum
* @ingroup Enumerations
*/
typedef enum hipJitOption {
hipJitOptionMaxRegisters = 0,
hipJitOptionThreadsPerBlock,
hipJitOptionWallTime,
hipJitOptionInfoLogBuffer,
hipJitOptionInfoLogBufferSizeBytes,
hipJitOptionErrorLogBuffer,
hipJitOptionErrorLogBufferSizeBytes,
hipJitOptionOptimizationLevel,
hipJitOptionTargetFromContext,
hipJitOptionTarget,
hipJitOptionFallbackStrategy,
hipJitOptionGenerateDebugInfo,
hipJitOptionLogVerbose,
hipJitOptionGenerateLineInfo,
hipJitOptionCacheMode,
hipJitOptionNumOptions
} hipJitOption;
/**
* @}
*/
@@ -54,26 +54,44 @@ hipMemcpyHostToHost
#define hipFilterModePoint cudaFilterModePoint
//! Flags that can be used with hipEventCreateWithFlags:
#define hipEventDefault cudaEventDefault
#define hipEventBlockingSync cudaEventBlockingSync
#define hipEventDisableTiming cudaEventDisableTiming
#define hipEventInterprocess cudaEventInterprocess
#define hipEventDefault cudaEventDefault
#define hipEventBlockingSync cudaEventBlockingSync
#define hipEventDisableTiming cudaEventDisableTiming
#define hipEventInterprocess cudaEventInterprocess
#define hipEventDisableSystemRelease cudaEventDefault /* no-op on CUDA platform */
#define hipHostMallocDefault cudaHostAllocDefault
#define hipHostMallocPortable cudaHostAllocPortable
#define hipHostMallocMapped cudaHostAllocMapped
#define hipHostMallocDefault cudaHostAllocDefault
#define hipHostMallocPortable cudaHostAllocPortable
#define hipHostMallocMapped cudaHostAllocMapped
#define hipHostMallocWriteCombined cudaHostAllocWriteCombined
#define hipHostRegisterPortable cudaHostRegisterPortable
#define hipHostRegisterMapped cudaHostRegisterMapped
#define hipHostRegisterMapped cudaHostRegisterMapped
#define HIP_LAUNCH_PARAM_BUFFER_POINTER CU_LAUNCH_PARAM_BUFFER_POINTER
#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
#define HIP_LAUNCH_PARAM_BUFFER_SIZE CU_LAUNCH_PARAM_BUFFER_SIZE
#define HIP_LAUNCH_PARAM_END CU_LAUNCH_PARAM_END
#define hipLimitMallocHeapSize cudaLimitMallocHeapSize
#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
#define hipIpcMemLazyEnablePeerAccess cudaIpcMemLazyEnablePeerAccess
// enum CUjit_option redefines
#define hipJitOptionMaxRegisters CU_JIT_MAX_REGISTERS
#define hipJitOptionThreadsPerBlock CU_JIT_THREADS_PER_BLOCK
#define hipJitOptionWallTime CU_JIT_WALL_TIME
#define hipJitOptionInfoLogBuffer CU_JIT_INFO_LOG_BUFFER
#define hipJitOptionInfoLogBufferSizeBytes CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES
#define hipJitOptionErrorLogBuffer CU_JIT_ERROR_LOG_BUFFER
#define hipJitOptionErrorLogBufferSizeBytes CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES
#define hipJitOptionOptimizationLevel CU_JIT_OPTIMIZATION_LEVEL
#define hipJitOptionTargetFromContext CU_JIT_TARGET_FROM_CUCONTEXT
#define hipJitOptionTarget CU_JIT_TARGET
#define hipJitOptionFallbackStrategy CU_JIT_FALLBACK_STRATEGY
#define hipJitOptionGenerateDebugInfo CU_JIT_GENERATE_DEBUG_INFO
#define hipJitOptionLogVerbose CU_JIT_LOG_VERBOSE
#define hipJitOptionGenerateLineInfo CU_JIT_GENERATE_LINE_INFO
#define hipJitOptionCacheMode CU_JIT_CACHE_MODE
#define hipJitOptionNumOptions CU_JIT_NUM_OPTIONS
typedef cudaEvent_t hipEvent_t;
typedef cudaStream_t hipStream_t;
@@ -84,6 +102,7 @@ typedef cudaFuncCache hipFuncCache_t;
typedef CUcontext hipCtx_t;
typedef CUsharedconfig hipSharedMemConfig;
typedef CUfunc_cache hipFuncCache;
typedef CUjit_option hipJitOption;
typedef CUdevice hipDevice_t;
typedef CUmodule hipModule_t;
typedef CUfunction hipFunction_t;
@@ -894,6 +913,11 @@ inline static hipError_t hipModuleLoadData(hipModule_t *module, const void *imag
return hipCUResultTohipError(cuModuleLoadData(module, image));
}
inline static hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
{
return hipCUResultTohipError(cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
}
inline static hipError_t hipModuleLaunchKernel(hipFunction_t f,
unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
+5 -1
View File
@@ -525,7 +525,6 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
}
}
hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
{
HIP_INIT_API(module, image);
@@ -575,3 +574,8 @@ hipError_t hipModuleLoadData(hipModule_t *module, const void *image)
}
return ihipLogStatus(ret);
}
hipError_t hipModuleLoadDataEx(hipModule_t *module, const void *image, unsigned int numOptions, hipJitOption *options, void **optionValues)
{
return hipModuleLoadData(module, image);
}