Merge branch 'master' into getsymboladdress

[ROCm/hip commit: 8610128c3e]
This commit is contained in:
Michael Kuron
2018-11-20 12:03:22 +01:00
committato da GitHub
31 ha cambiato i file con 1349 aggiunte e 580 eliminazioni
+1 -3
Vedi File
@@ -167,8 +167,6 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
}
// Cap the maximum amount of testing, in case of hangs
// Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133
// Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1
timeout(time: 1, unit: 'HOURS')
{
stage("${platform} unit testing")
@@ -178,7 +176,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
cd ${build_dir_rel}
make install -j\$(nproc)
make build_tests -i -j\$(nproc)
ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)"
ctest
"""
// If unit tests output a junit or xunit file in the future, jenkins can parse that file
// to display test results on the dashboard
+4
Vedi File
@@ -498,6 +498,10 @@ foreach $arg (@ARGV)
$obj = "$tmpdir/$obj";
my $fileType = `file $obj`;
my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/);
if ($fileType =~ m/ELF/) {
my $sections = `readelf -e -W $obj`;
$isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/);
}
$allIsObj = ($allIsObj and $isObj);
if ($isObj) {
$realObjs = ($realObjs . " " . $obj);
@@ -22,8 +22,8 @@
| typedef |`CUDA_RESOURCE_VIEW_DESC_st` | |
| struct |`CUDA_TEXTURE_DESC` | |
| typedef |`CUDA_TEXTURE_DESC_st` | |
| struct |`CUdevprop` |`hipDeviceProp_t` |
| typedef |`CUdevprop_st` |`hipDeviceProp_t` |
| struct |`CUdevprop` | |
| typedef |`CUdevprop_st` | |
| struct |`CUipcEventHandle` |`ihipIpcEventHandle_t` |
| typedef |`CUipcEventHandle_st` |`ihipIpcEventHandle_t` |
| struct |`CUipcMemHandle` |`hipIpcMemHandle_t` |
@@ -763,6 +763,7 @@
| `cuDeviceGetName` | `hipDeviceGetName` |
| `cuDeviceTotalMem` | `hipDeviceTotalMem` |
| `cuDeviceGetLuid` | |
| `cuDeviceGetUuid` | |
## **6. Device Management [DEPRECATED]**
@@ -792,9 +793,9 @@
| `cuCtxGetCurrent` | `hipCtxGetCurrent` |
| `cuCtxGetDevice` | `hipCtxGetDevice` |
| `cuCtxGetFlags` | `hipCtxGetFlags` |
| `cuCtxGetLimit` | |
| `cuCtxGetLimit` | `hipDeviceGetLimit` |
| `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` |
| `cuCtxGetStreamPriorityRange` | |
| `cuCtxGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange`|
| `cuCtxPopCurrent` | `hipCtxPopCurrent` |
| `cuCtxPushCurrent` | `hipCtxPushCurrent` |
| `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` |
@@ -835,16 +836,16 @@
|-----------------------------------------------------------|-------------------------------|
| `cuArray3DCreate` | `hipArray3DCreate` |
| `cuArray3DGetDescriptor` | |
| `cuArrayCreate` | |
| `cuArrayCreate` | `hipArrayCreate` |
| `cuArrayDestroy` | |
| `cuArrayGetDescriptor` | |
| `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` |
| `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` |
| `cuIpcCloseMemHandle` | |
| `cuIpcCloseMemHandle` | `hipIpcCloseMemHandle` |
| `cuIpcGetEventHandle` | |
| `cuIpcGetMemHandle` | |
| `cuIpcGetMemHandle` | `hipIpcGetMemHandle` |
| `cuIpcOpenEventHandle` | |
| `cuIpcOpenMemHandle` | |
| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` |
| `cuMemAlloc` | `hipMalloc` |
| `cuMemAllocHost` | |
| `cuMemAllocManaged` | |
@@ -867,7 +868,7 @@
| `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` |
| `cuMemcpyDtoH` | `hipMemcpyDtoH` |
| `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` |
| `cuMemcpyHtoA` | |
| `cuMemcpyHtoA` | `hipMemcpyHtoA` |
| `cuMemcpyHtoAAsync` | |
| `cuMemcpyHtoD` | `hipMemcpyHtoD` |
| `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` |
@@ -875,11 +876,11 @@
| `cuMemcpyPeerAsync` | |
| `cuMemFree` | `hipFree` |
| `cuMemFreeHost` | `hipFreeHost` |
| `cuMemGetAddressRange` | |
| `cuMemGetAddressRange` | `hipMemGetAddressRange` |
| `cuMemGetInfo` | `hipMemGetInfo` |
| `cuMemHostAlloc` | `hipHostMalloc` |
| `cuMemHostGetDevicePointer` | |
| `cuMemHostGetFlags` | |
| `cuMemHostGetDevicePointer` | `hipHostGetDevicePointer` |
| `cuMemHostGetFlags` | `hipHostGetFlags` |
| `cuMemHostRegister` | `hipHostRegister` |
| `cuMemHostUnregister` | `hipHostUnregister` |
| `cuMemsetD16` | |
@@ -892,8 +893,8 @@
| `cuMemsetD2D8Async` | |
| `cuMemsetD32` | `hipMemset` |
| `cuMemsetD32Async` | `hipMemsetAsync` |
| `cuMemsetD2D8` | |
| `cuMemsetD2D8Async` | |
| `cuMemsetD8` | `hipMemsetD8` |
| `cuMemsetD8Async` | |
| `cuMipmappedArrayCreate` | |
| `cuMipmappedArrayDestroy` | |
| `cuMipmappedArrayGetLevel` | |
@@ -916,8 +917,8 @@
|-----------------------------------------------------------|-------------------------------|
| `cuStreamAddCallback` | `hipStreamAddCallback` |
| `cuStreamAttachMemAsync` | |
| `cuStreamCreate` | |
| `cuStreamCreateWithPriority` | |
| `cuStreamCreate` | `hipStreamCreateWithFlags` |
| `cuStreamCreateWithPriority` | `hipStreamCreateWithPriority` |
| `cuStreamDestroy` | `hipStreamDestroy` |
| `cuStreamGetFlags` | `hipStreamGetFlags` |
| `cuStreamGetPriority` | `hipStreamGetPriority` |
@@ -932,7 +933,7 @@
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| `cuEventCreate` | `hipEventCreate` |
| `cuEventCreate` | `hipEventCreateWithFlags` |
| `cuEventDestroy` | `hipEventDestroy` |
| `cuEventElapsedTime` | `hipEventElapsedTime` |
| `cuEventQuery` | `hipEventQuery` |
@@ -967,10 +968,13 @@
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| `cuFuncGetAttribute` | |
| `cuFuncSetAttribute` | |
| `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` |
| `cuFuncSetSharedMemConfig` | |
| `cuLaunchKernel` | `hipModuleLaunchKernel` |
| `cuLaunchHostFunc` | |
| `cuLaunchCooperativeKernel` | |
| `cuLaunchCooperativeKernelMultiDevice` | |
## **18. Execution Control [DEPRECATED]**
@@ -1047,8 +1051,8 @@
| `cuTexRefGetMipmapLevelBias` | |
| `cuTexRefGetMipmapLevelClamp` | |
| `cuTexRefGetMipmappedArray` | |
| `cuTexRefSetAddress` | |
| `cuTexRefSetAddress2D` | |
| `cuTexRefSetAddress` | `hipTexRefSetAddress` |
| `cuTexRefSetAddress2D` | `hipTexRefSetAddress2D` |
| `cuTexRefSetAddressMode` | `hipTexRefSetAddressMode` |
| `cuTexRefSetArray` | `hipTexRefSetArray` |
| `cuTexRefSetBorderColor` | |
@@ -1233,3 +1237,4 @@
| `cuEGLStreamProducerReturnFrame` | |
| `cuGraphicsEGLRegisterImage` | |
| `cuGraphicsResourceGetMappedEglFrame` | |
| `cuEventCreateFromEGLSync` | |
@@ -11,7 +11,7 @@
| `cudaDeviceGetLimit` | `hipDeviceGetLimit` |
| `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` |
| `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` |
| `cudaDeviceGetStreamPriorityRange` | |
| `cudaDeviceGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange` |
| `cudaDeviceReset` | `hipDeviceReset` |
| `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` |
| `cudaDeviceSetLimit` | `hipDeviceSetLimit` |
@@ -19,7 +19,7 @@
| `cudaDeviceSynchronize` | `hipDeviceSynchronize` |
| `cudaGetDevice` | `hipGetDevice` |
| `cudaGetDeviceCount` | `hipGetDeviceCount` |
| `cudaGetDeviceFlags` | |
| `cudaGetDeviceFlags` | `hipCtxGetFlags` |
| `cudaGetDeviceProperties` | `hipGetDeviceProperties` |
| `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` |
| `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` |
@@ -56,12 +56,15 @@
|-----------------------------------------------------------|-------------------------------|
| `cudaStreamAddCallback` | `hipStreamAddCallback` |
| `cudaStreamAttachMemAsync` | |
| `cudaStreamBeginCapture` | |
| `cudaStreamEndCapture` | |
| `cudaStreamIsCapturing` | |
| `cudaStreamCreate` | `hipStreamCreate` |
| `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` |
| `cudaStreamCreateWithPriority` | |
| `cudaStreamCreateWithPriority` | `hipStreamCreateWithPriority` |
| `cudaStreamDestroy` | `hipStreamDestroy` |
| `cudaStreamGetFlags` | `hipStreamGetFlags` |
| `cudaStreamGetPriority` | |
| `cudaStreamGetPriority` | `hipStreamGetPriority` |
| `cudaStreamQuery` | `hipStreamQuery` |
| `cudaStreamSynchronize` | `hipStreamSynchronize` |
| `cudaStreamWaitEvent` | `hipStreamWaitEvent` |
@@ -82,7 +85,14 @@
| **CUDA** | **HIP** |
|-----------------------------------------------------------|-------------------------------|
| `cudaSignalExternalSemaphoresAsync` | |
| `cudaWaitExternalSemaphoresAsync` | |
| `cudaImportExternalMemory` | |
| `cudaExternalMemoryGetMappedBuffer` | |
| `cudaExternalMemoryGetMappedMipmappedArray` | |
| `cudaDestroyExternalMemory` | |
| `cudaImportExternalSemaphore` | |
| `cudaDestroyExternalSemaphore` | |
## **7. Execution Control**
+64 -56
Vedi File
@@ -1433,7 +1433,7 @@ __device__ float __expf(float x);
__device__ static float __fadd_rd(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fadd_rn
@@ -1441,7 +1441,7 @@ __device__ static float __fadd_rd(float x, float y);
__device__ static float __fadd_rn(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fadd_ru
@@ -1449,7 +1449,7 @@ __device__ static float __fadd_rn(float x, float y);
__device__ static float __fadd_ru(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fadd_rz
@@ -1457,7 +1457,7 @@ __device__ static float __fadd_ru(float x, float y);
__device__ static float __fadd_rz(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fdiv_rd
@@ -1465,7 +1465,7 @@ __device__ static float __fadd_rz(float x, float y);
__device__ static float __fdiv_rd(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fdiv_rn
@@ -1473,7 +1473,7 @@ __device__ static float __fdiv_rd(float x, float y);
__device__ static float __fdiv_rn(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fdiv_ru
@@ -1481,7 +1481,7 @@ __device__ static float __fdiv_rn(float x, float y);
__device__ static float __fdiv_ru(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fdiv_rz
@@ -1489,7 +1489,7 @@ __device__ static float __fdiv_ru(float x, float y);
__device__ static float __fdiv_rz(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fdividef
@@ -1505,7 +1505,7 @@ __device__ static float __fdividef(float x, float y);
__device__ float __fmaf_rd(float x, float y, float z);
```
**Description:** Supported
**Description:** Unsupported
### __fmaf_rn
@@ -1513,7 +1513,7 @@ __device__ float __fmaf_rd(float x, float y, float z);
__device__ float __fmaf_rn(float x, float y, float z);
```
**Description:** Supported
**Description:** Unsupported
### __fmaf_ru
@@ -1521,7 +1521,7 @@ __device__ float __fmaf_rn(float x, float y, float z);
__device__ float __fmaf_ru(float x, float y, float z);
```
**Description:** Supported
**Description:** Unsupported
### __fmaf_rz
@@ -1529,7 +1529,7 @@ __device__ float __fmaf_ru(float x, float y, float z);
__device__ float __fmaf_rz(float x, float y, float z);
```
**Description:** Supported
**Description:** Unsupported
### __fmul_rd
@@ -1537,7 +1537,7 @@ __device__ float __fmaf_rz(float x, float y, float z);
__device__ static float __fmul_rd(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fmul_rn
@@ -1545,7 +1545,7 @@ __device__ static float __fmul_rd(float x, float y);
__device__ static float __fmul_rn(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fmul_ru
@@ -1553,7 +1553,7 @@ __device__ static float __fmul_rn(float x, float y);
__device__ static float __fmul_ru(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fmul_rz
@@ -1561,7 +1561,7 @@ __device__ static float __fmul_ru(float x, float y);
__device__ static float __fmul_rz(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __frcp_rd
@@ -1569,7 +1569,7 @@ __device__ static float __fmul_rz(float x, float y);
__device__ float __frcp_rd(float x);
```
**Description:** Supported
**Description:** Unsupported
### __frcp_rn
@@ -1577,7 +1577,7 @@ __device__ float __frcp_rd(float x);
__device__ float __frcp_rn(float x);
```
**Description:** Supported
**Description:** Unsupported
### __frcp_ru
@@ -1585,7 +1585,7 @@ __device__ float __frcp_rn(float x);
__device__ float __frcp_ru(float x);
```
**Description:** Supported
**Description:** Unsupported
### __frcp_rz
@@ -1593,7 +1593,7 @@ __device__ float __frcp_ru(float x);
__device__ float __frcp_rz(float x);
```
**Description:** Supported
**Description:** Unsupported
### __frsqrt_rn
@@ -1601,7 +1601,7 @@ __device__ float __frcp_rz(float x);
__device__ float __frsqrt_rn(float x);
```
**Description:** Supported
**Description:** Unsupported
### __fsqrt_rd
@@ -1609,7 +1609,7 @@ __device__ float __frsqrt_rn(float x);
__device__ float __fsqrt_rd(float x);
```
**Description:** Supported
**Description:** Unsupported
### __fsqrt_rn
@@ -1617,7 +1617,7 @@ __device__ float __fsqrt_rd(float x);
__device__ float __fsqrt_rn(float x);
```
**Description:** Supported
**Description:** Unsupported
### __fsqrt_ru
@@ -1625,7 +1625,7 @@ __device__ float __fsqrt_rn(float x);
__device__ float __fsqrt_ru(float x);
```
**Description:** Supported
**Description:** Unsupported
### __fsqrt_rz
@@ -1633,7 +1633,7 @@ __device__ float __fsqrt_ru(float x);
__device__ float __fsqrt_rz(float x);
```
**Description:** Supported
**Description:** Unsupported
### __fsub_rd
@@ -1641,7 +1641,7 @@ __device__ float __fsqrt_rz(float x);
__device__ static float __fsub_rd(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fsub_rn
@@ -1649,7 +1649,7 @@ __device__ static float __fsub_rd(float x, float y);
__device__ static float __fsub_rn(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fsub_ru
@@ -1657,7 +1657,15 @@ __device__ static float __fsub_rn(float x, float y);
__device__ static float __fsub_ru(float x, float y);
```
**Description:** Supported
**Description:** Unsupported
### __fsub_rz
```cpp
__device__ static float __fsub_rz(float x, float y);
```
**Description:** Unsupported
### __log10f
@@ -1729,7 +1737,7 @@ __device__ float __tanf(float x);
__device__ static double __dadd_rd(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dadd_rn
@@ -1737,7 +1745,7 @@ __device__ static double __dadd_rd(double x, double y);
__device__ static double __dadd_rn(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dadd_ru
@@ -1745,7 +1753,7 @@ __device__ static double __dadd_rn(double x, double y);
__device__ static double __dadd_ru(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dadd_rz
@@ -1753,7 +1761,7 @@ __device__ static double __dadd_ru(double x, double y);
__device__ static double __dadd_rz(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __ddiv_rd
@@ -1761,7 +1769,7 @@ __device__ static double __dadd_rz(double x, double y);
__device__ static double __ddiv_rd(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __ddiv_rn
@@ -1769,7 +1777,7 @@ __device__ static double __ddiv_rd(double x, double y);
__device__ static double __ddiv_rn(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __ddiv_ru
@@ -1777,7 +1785,7 @@ __device__ static double __ddiv_rn(double x, double y);
__device__ static double __ddiv_ru(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __ddiv_rz
@@ -1785,7 +1793,7 @@ __device__ static double __ddiv_ru(double x, double y);
__device__ static double __ddiv_rz(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dmul_rd
@@ -1793,7 +1801,7 @@ __device__ static double __ddiv_rz(double x, double y);
__device__ static double __dmul_rd(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dmul_rn
@@ -1801,7 +1809,7 @@ __device__ static double __dmul_rd(double x, double y);
__device__ static double __dmul_rn(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dmul_ru
@@ -1809,7 +1817,7 @@ __device__ static double __dmul_rn(double x, double y);
__device__ static double __dmul_ru(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dmul_rz
@@ -1817,7 +1825,7 @@ __device__ static double __dmul_ru(double x, double y);
__device__ static double __dmul_rz(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __drcp_rd
@@ -1825,7 +1833,7 @@ __device__ static double __dmul_rz(double x, double y);
__device__ double __drcp_rd(double x);
```
**Description:** Supported
**Description:** Unsupported
### __drcp_rn
@@ -1833,7 +1841,7 @@ __device__ double __drcp_rd(double x);
__device__ double __drcp_rn(double x);
```
**Description:** Supported
**Description:** Unsupported
### __drcp_ru
@@ -1841,7 +1849,7 @@ __device__ double __drcp_rn(double x);
__device__ double __drcp_ru(double x);
```
**Description:** Supported
**Description:** Unsupported
### __drcp_rz
@@ -1849,7 +1857,7 @@ __device__ double __drcp_ru(double x);
__device__ double __drcp_rz(double x);
```
**Description:** Supported
**Description:** Unsupported
### __dsqrt_rd
@@ -1857,7 +1865,7 @@ __device__ double __drcp_rz(double x);
__device__ double __dsqrt_rd(double x);
```
**Description:** Supported
**Description:** Unsupported
### __dsqrt_rn
@@ -1865,7 +1873,7 @@ __device__ double __dsqrt_rd(double x);
__device__ double __dsqrt_rn(double x);
```
**Description:** Supported
**Description:** Unsupported
### __dsqrt_ru
@@ -1873,7 +1881,7 @@ __device__ double __dsqrt_rn(double x);
__device__ double __dsqrt_ru(double x);
```
**Description:** Supported
**Description:** Unsupported
### __dsqrt_rz
@@ -1881,7 +1889,7 @@ __device__ double __dsqrt_ru(double x);
__device__ double __dsqrt_rz(double x);
```
**Description:** Supported
**Description:** Unsupported
### __dsub_rd
@@ -1889,7 +1897,7 @@ __device__ double __dsqrt_rz(double x);
__device__ static double __dsub_rd(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dsub_rn
@@ -1897,7 +1905,7 @@ __device__ static double __dsub_rd(double x, double y);
__device__ static double __dsub_rn(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dsub_ru
@@ -1905,7 +1913,7 @@ __device__ static double __dsub_rn(double x, double y);
__device__ static double __dsub_ru(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __dsub_rz
@@ -1913,7 +1921,7 @@ __device__ static double __dsub_ru(double x, double y);
__device__ static double __dsub_rz(double x, double y);
```
**Description:** Supported
**Description:** Unsupported
### __fma_rd
@@ -1921,7 +1929,7 @@ __device__ static double __dsub_rz(double x, double y);
__device__ double __fma_rd(double x, double y, double z);
```
**Description:** Supported
**Description:** Unsupported
### __fma_rn
@@ -1929,7 +1937,7 @@ __device__ double __fma_rd(double x, double y, double z);
__device__ double __fma_rn(double x, double y, double z);
```
**Description:** Supported
**Description:** Unsupported
### __fma_ru
@@ -1937,7 +1945,7 @@ __device__ double __fma_rn(double x, double y, double z);
__device__ double __fma_ru(double x, double y, double z);
```
**Description:** Supported
**Description:** Unsupported
### __fma_rz
@@ -1945,7 +1953,7 @@ __device__ double __fma_ru(double x, double y, double z);
__device__ double __fma_rz(double x, double y, double z);
```
**Description:** Supported
**Description:** Unsupported
### __brev
@@ -51,6 +51,8 @@ const std::map<llvm::StringRef, hipCounter>& CUDA_RENAMES_MAP() {
ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end());
ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end());
ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end());
ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end());
ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end());
ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end());
ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end());
ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end());
@@ -15,6 +15,10 @@ extern const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP;
extern const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP;
// Maps the names of CUDA RUNTIME API types to the corresponding HIP types
extern const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP;
// Maps the names of CUDA Complex API types to the corresponding HIP types
extern const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_TYPE_NAME_MAP;
// Maps the names of CUDA Complex API functions to the corresponding HIP functions
extern const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_FUNCTION_MAP;
// Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions
extern const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP;
// Maps the names of CUDA BLAS API types to the corresponding HIP types
@@ -0,0 +1,28 @@
#include "CUDA2HIP.h"
// Maps the names of CUDA DRIVER API types to the corresponding HIP types
const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_FUNCTION_MAP{
{"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}},
{"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}},
{"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}},
{"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}},
{"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}},
{"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}},
{"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}},
{"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}},
{"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}},
{"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}},
{"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}},
{"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}},
{"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}},
{"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}},
{"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}},
{"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}},
{"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}},
{"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}},
{"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}},
{"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}},
{"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}},
{"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}},
{"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}},
};
@@ -0,0 +1,8 @@
#include "CUDA2HIP.h"
// Maps the names of CUDA DRIVER API types to the corresponding HIP types
const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_TYPE_NAME_MAP{
{"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}},
{"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}},
{"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}},
};
File diff soppresso perché troppo grande Carica Diff
@@ -80,8 +80,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP{
{"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
{"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
{"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}},
{"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}},
// no analogue
// NOTE: cudaDeviceProp differs
{"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
{"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
// cudaIpcEventHandle_st
{"CUipcEventHandle_st", {"ihipIpcEventHandle_t", CONV_TYPE, API_DRIVER}},
@@ -1,6 +1,6 @@
#include "CUDA2HIP.h"
// Map of all functions
// Map of all CUDA Runtime API functions
const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
// Error API
{"cudaGetLastError", {"hipGetLastError", CONV_ERROR, API_RUNTIME}},
@@ -9,29 +9,49 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"cudaGetErrorString", {"hipGetErrorString", CONV_ERROR, API_RUNTIME}},
// memcpy functions
// no analogue
// NOTE: Not equal to cuMemcpy due to different signatures
{"cudaMemcpy", {"hipMemcpy", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpyToArray", {"hipMemcpyToArray", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpyAsync", {"hipMemcpyAsync", CONV_MEMORY, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuMemcpy2D due to different signatures
{"cudaMemcpy2D", {"hipMemcpy2D", CONV_MEMORY, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuMemcpy2DAsync due to different signatures
{"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMemcpy3D due to different signatures
{"cudaMemcpy3D", {"hipMemcpy3D", CONV_MEMORY, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuMemcpy3DAsync due to different signatures
{"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMemcpy3DPeer due to different signatures
{"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures
{"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMemcpyAtoA due to different signatures
{"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", CONV_MEMORY, API_RUNTIME}},
{"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", CONV_MEMORY, API_RUNTIME}},
{"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
{"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
{"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
// cuMemAdvise
{"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// cuMemRangeGetAttribute
{"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// cuMemRangeGetAttributes
{"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// memset
{"cudaMemset", {"hipMemset", CONV_MEMORY, API_RUNTIME}},
@@ -42,13 +62,17 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"cudaMemset3DAsync", {"hipMemset3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// Memory management
// cuMemGetInfo
{"cudaMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_RUNTIME}},
{"cudaArrayGetInfo", {"hipArrayGetInfo", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures
{"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME}},
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME}},
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // // API_Driver ANALOGUE (cuMemPrefetchAsync)
// TODO: double check cuMemPrefetchAsync
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// malloc
{"cudaMalloc", {"hipMalloc", CONV_MEMORY, API_RUNTIME}},
@@ -57,15 +81,22 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"cudaMalloc3D", {"hipMalloc3D", CONV_MEMORY, API_RUNTIME}},
{"cudaMalloc3DArray", {"hipMalloc3DArray", CONV_MEMORY, API_RUNTIME}},
{"cudaMallocManaged", {"hipMallocManaged", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
// no analogue
// NOTE: Not equal to cuMipmappedArrayCreate due to different signatures
{"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaMallocPitch", {"hipMallocPitch", CONV_MEMORY, API_RUNTIME}},
// cuMemFree
{"cudaFree", {"hipFree", CONV_MEMORY, API_RUNTIME}},
// cuMemFreeHost
{"cudaFreeHost", {"hipHostFree", CONV_MEMORY, API_RUNTIME}},
{"cudaFreeArray", {"hipFreeArray", CONV_MEMORY, API_RUNTIME}},
// cuMemHostRegister
{"cudaHostRegister", {"hipHostRegister", CONV_MEMORY, API_RUNTIME}},
// cuMemHostUnregister
{"cudaHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_RUNTIME}},
// hipHostAlloc deprecated - use hipHostMalloc instead
// cuMemHostAlloc
// NOTE: hipHostAlloc deprecated - use hipHostMalloc instead
{"cudaHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_RUNTIME}},
// make memory functions
@@ -74,35 +105,81 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"make_cudaPos", {"make_hipPos", CONV_MEMORY, API_RUNTIME}},
// Host Register Flags
// cuMemHostGetFlags
{"cudaHostGetFlags", {"hipHostGetFlags", CONV_MEMORY, API_RUNTIME}},
// Events
{"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}},
{"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}},
{"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}},
{"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}},
{"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}},
{"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}},
{"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuEventCreate due to different signatures
{"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}},
// cuEventCreate
{"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}},
// cuEventDestroy
{"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}},
// cuEventRecord
{"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}},
// cuEventElapsedTime
{"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}},
// cuEventSynchronize
{"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}},
// cuEventQuery
{"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}},
// 5.6. External Resource Interoperability
// cuDestroyExternalMemory
{"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuDestroyExternalSemaphore
{"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuExternalMemoryGetMappedBuffer
{"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuExternalMemoryGetMappedMipmappedArray
{"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuImportExternalMemory
{"cudaImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuImportExternalSemaphore
{"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuSignalExternalSemaphoresAsync
{"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// cuWaitExternalSemaphoresAsync
{"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
// Streams
// no analogue
// NOTE: Not equal to cuStreamCreate due to different signatures
{"cudaStreamCreate", {"hipStreamCreate", CONV_STREAM, API_RUNTIME}},
// cuStreamCreate
{"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", CONV_STREAM, API_RUNTIME}},
{"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
// cuStreamCreateWithPriority
{"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME}},
// cuStreamDestroy
{"cudaStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}},
// cuStreamWaitEvent
{"cudaStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}},
// cuStreamSynchronize
{"cudaStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME}},
// cuStreamGetFlags
{"cudaStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}},
// cuStreamQuery
{"cudaStreamQuery", {"hipStreamQuery", CONV_STREAM, API_RUNTIME}},
// cuStreamAddCallback
{"cudaStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_RUNTIME}},
// cuStreamAttachMemAsync
{"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
// cuStreamBeginCapture
{"cudaStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
// cuStreamEndCapture
{"cudaStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
// cuStreamIsCapturing
{"cudaStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
// cuStreamGetPriority
{"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME}},
// Other synchronization
{"cudaDeviceSynchronize", {"hipDeviceSynchronize", CONV_DEVICE, API_RUNTIME}},
{"cudaDeviceReset", {"hipDeviceReset", CONV_DEVICE, API_RUNTIME}},
{"cudaSetDevice", {"hipSetDevice", CONV_DEVICE, API_RUNTIME}},
{"cudaGetDevice", {"hipGetDevice", CONV_DEVICE, API_RUNTIME}},
// cuDeviceGetCount
{"cudaGetDeviceCount", {"hipGetDeviceCount", CONV_DEVICE, API_RUNTIME}},
{"cudaChooseDevice", {"hipChooseDevice", CONV_DEVICE, API_RUNTIME}},
@@ -118,20 +195,25 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
{"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_RUNTIME}},
// Pointer Attributes
// struct cudaPointerAttributes
{"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_RUNTIME}},
// no analogue
// NOTE: Not equal to cuPointerGetAttributes due to different signatures
{"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_RUNTIME}},
// cuMemHostGetDevicePointer
{"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_RUNTIME}},
// Device
{"cudaGetDeviceProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_RUNTIME}},
// cuDeviceGetPCIBusId
{"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_RUNTIME}},
// cuDeviceGetByPCIBusId
{"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_RUNTIME}},
{"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
// cuCtxGetStreamPriorityRange
{"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME}},
{"cudaSetValidDevices", {"hipSetValidDevices", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
// Device Flags
{"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
// cuCtxGetFlags
{"cudaGetDeviceFlags", {"hipCtxGetFlags", CONV_DEVICE, API_RUNTIME}},
{"cudaSetDeviceFlags", {"hipSetDeviceFlags", CONV_DEVICE, API_RUNTIME}},
// Cache config
@@ -179,7 +261,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
// {"cudaThreadGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}},
// {"cudaThreadSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}},
// cuCtxGetLimit
{"cudaDeviceGetLimit", {"hipDeviceGetLimit", CONV_DEVICE, API_RUNTIME}},
// Profiler
@@ -270,14 +270,14 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc
if (numArgs > 0) {
OS << ", ";
// Start of the first argument.
clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart();
clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0));
// End of the last argument.
clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd();
clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1));
OS << readSourceText(*SM, {argStart, argEnd});
}
OS << ")";
clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()});
clang::SourceRange replacementRange = getWriteRange(*SM, {llcompat::getBeginLoc(launchKernel), llcompat::getEndLoc(launchKernel)});
clang::SourceLocation launchStart = replacementRange.getBegin();
clang::SourceLocation launchEnd = replacementRange.getEnd();
size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart);
@@ -320,8 +320,8 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match
}
if (!typeName.empty()) {
clang::SourceLocation slStart = sharedVar->getLocStart();
clang::SourceLocation slEnd = sharedVar->getLocEnd();
clang::SourceLocation slStart = llcompat::getBeginLoc(sharedVar->getTypeSourceInfo()->getTypeLoc());
clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc());
clang::SourceManager* SM = Result.SourceManager;
size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1;
std::string varName = sharedVar->getNameAsString();
@@ -9,6 +9,7 @@
#include "Statistics.h"
namespace ct = clang::tooling;
using namespace llvm;
/**
* A FrontendAction that hipifies CUDA programs.
@@ -8,11 +8,11 @@ void PrintStackTraceOnErrorSignal() {
#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8)
llvm::sys::PrintStackTraceOnErrorSignal();
#else
llvm::sys::PrintStackTraceOnErrorSignal(clang::StringRef());
llvm::sys::PrintStackTraceOnErrorSignal(StringRef());
#endif
}
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file) {
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file) {
#if LLVM_VERSION_MAJOR > 3
// getReplacements() now returns a map from filename to Replacements - so create an entry
// for this source file and return a reference to it.
@@ -40,4 +40,36 @@ void EnterPreprocessorTokenStream(clang::Preprocessor& _pp, const clang::Token *
#endif
}
clang::SourceLocation getBeginLoc(const clang::Stmt* stmt) {
#if LLVM_VERSION_MAJOR < 8
return stmt->getLocStart();
#else
return stmt->getBeginLoc();
#endif
}
clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc) {
#if LLVM_VERSION_MAJOR < 8
return typeLoc.getLocStart();
#else
return typeLoc.getBeginLoc();
#endif
}
clang::SourceLocation getEndLoc(const clang::Stmt* stmt) {
#if LLVM_VERSION_MAJOR < 8
return stmt->getLocEnd();
#else
return stmt->getEndLoc();
#endif
}
clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc) {
#if LLVM_VERSION_MAJOR < 8
return typeLoc.getLocEnd();
#else
return typeLoc.getEndLoc();
#endif
}
} // namespace llcompat
@@ -25,15 +25,23 @@ namespace llcompat {
#define LLVM_DEBUG(X) DEBUG(X)
#endif
clang::SourceLocation getBeginLoc(const clang::Stmt* stmt);
clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc);
clang::SourceLocation getEndLoc(const clang::Stmt* stmt);
clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc);
void PrintStackTraceOnErrorSignal();
using namespace llvm;
/**
* Get the replacement map for a given filename in a RefactoringTool.
*
* Older LLVM versions don't actually support multiple filenames, so everything all gets
* smushed together. It is the caller's responsibility to cope with this.
*/
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file);
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file);
/**
* Add a Replacement to a Replacements.
@@ -33,6 +33,7 @@ THE SOFTWARE.
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <iostream>
#include <mutex>
@@ -56,7 +57,9 @@ template <
typename... Ts,
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
inline std::vector<std::uint8_t> make_kernarg(
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>&) {
const std::tuple<Ts...>&,
const std::vector<std::pair<std::size_t, std::size_t>>&,
std::vector<std::uint8_t> kernarg) {
return kernarg;
}
@@ -65,7 +68,9 @@ template <
typename... Ts,
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
inline std::vector<std::uint8_t> make_kernarg(
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>& formals) {
const std::tuple<Ts...>& formals,
const std::vector<std::pair<std::size_t, std::size_t>>& size_align,
std::vector<std::uint8_t> kernarg) {
using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
static_assert(
@@ -80,24 +85,44 @@ inline std::vector<std::uint8_t> make_kernarg(
#endif
kernarg.resize(round_up_to_next_multiple_nonnegative(
kernarg.size(), alignof(T)) + sizeof(T));
kernarg.size(), size_align[n].second) +
size_align[n].first);
new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get<n>(formals)};
std::memcpy(
kernarg.data() + kernarg.size() - size_align[n].first,
&std::get<n>(formals),
size_align[n].first);
return make_kernarg<n + 1>(std::move(kernarg), formals);
return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
}
template <typename... Formals, typename... Actuals>
inline std::vector<std::uint8_t> make_kernarg(
void (*)(Formals...), std::tuple<Actuals...> actuals) {
void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
static_assert(sizeof...(Formals) == sizeof...(Actuals),
"The count of formal arguments must match the count of actuals.");
if (sizeof...(Formals) == 0) return {};
const auto it = function_names().find(
reinterpret_cast<std::uintptr_t>(kernel));
if (it == function_names().cend()) {
throw std::runtime_error{"Undefined __global__ function."};
}
const auto it1 = kernargs().find(it->second);
if (it1 == kernargs().end()) {
throw std::runtime_error{
"Missing metadata for __global__ function: " + it->second};
}
std::tuple<Formals...> to_formals{std::move(actuals)};
std::vector<std::uint8_t> kernarg;
kernarg.reserve(sizeof(to_formals));
return make_kernarg<0>(std::move(kernarg), to_formals);
return make_kernarg<0>(to_formals, it1->second, std::move(kernarg));
}
void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks,
@@ -41,8 +41,14 @@ THE SOFTWARE.
#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
#if __HIP__ && __HIP_DEVICE_COMPILE__
__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
__attribute__((weak)) __device__
uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
#else
extern __device__ char __hip_device_heap[];
extern __device__ uint32_t __hip_device_page_flag[];
#endif
extern "C" inline __device__ void* __hip_malloc(size_t size) {
char* heap = (char*)__hip_device_heap;
@@ -514,38 +514,41 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); }
__DEVICE__
inline
float __expf(float x) { return __ocml_exp_f32(x); }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
inline
float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); }
float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
__DEVICE__
inline
float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
__DEVICE__
inline
float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); }
float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
__DEVICE__
inline
float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
__DEVICE__
inline
float __fdiv_rd(float x, float y) { return x / y; }
float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
__DEVICE__
inline
float __fdiv_rn(float x, float y) { return x / y; }
float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); }
__DEVICE__
inline
float __fdiv_ru(float x, float y) { return x / y; }
float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
__DEVICE__
inline
float __fdiv_rz(float x, float y) { return x / y; }
float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
#endif
__DEVICE__
inline
float __fdividef(float x, float y) { return x / y; }
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
inline
float __fmaf_rd(float x, float y, float z)
{
return __ocml_fma_rtp_f32(x, y, z);
return __ocml_fma_rtn_f32(x, y, z);
}
__DEVICE__
inline
@@ -557,7 +560,7 @@ __DEVICE__
inline
float __fmaf_ru(float x, float y, float z)
{
return __ocml_fma_rtn_f32(x, y, z);
return __ocml_fma_rtp_f32(x, y, z);
}
__DEVICE__
inline
@@ -567,13 +570,13 @@ float __fmaf_rz(float x, float y, float z)
}
__DEVICE__
inline
float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
__DEVICE__
inline
float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
__DEVICE__
inline
float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
__DEVICE__
inline
float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
@@ -594,28 +597,29 @@ inline
float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
__DEVICE__
inline
float __fsqrt_rd(float x) { return __ocml_sqrt_f32(x); }
float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
__DEVICE__
inline
float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); }
float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
__DEVICE__
inline
float __fsqrt_ru(float x) { return __ocml_sqrt_f32(x); }
float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
__DEVICE__
inline
float __fsqrt_rz(float x) { return __ocml_sqrt_f32(x); }
float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
__DEVICE__
inline
float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
__DEVICE__
inline
float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
__DEVICE__
inline
float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
__DEVICE__
inline
float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
#endif
__DEVICE__
inline
float __log10f(float x) { return __ocml_log10_f32(x); }
@@ -1034,39 +1038,40 @@ double yn(int n, double x)
}
// BEGIN INTRINSICS
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__DEVICE__
inline
double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); }
double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
__DEVICE__
inline
double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
__DEVICE__
inline
double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); }
double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
__DEVICE__
inline
double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
__DEVICE__
inline
double __ddiv_rd(double x, double y) { return x / y; }
double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
__DEVICE__
inline
double __ddiv_rn(double x, double y) { return x / y; }
double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); }
__DEVICE__
inline
double __ddiv_ru(double x, double y) { return x / y; }
double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
__DEVICE__
inline
double __ddiv_rz(double x, double y) { return x / y; }
double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
__DEVICE__
inline
double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
__DEVICE__
inline
double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
__DEVICE__
inline
double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
__DEVICE__
inline
double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
@@ -1084,25 +1089,25 @@ inline
double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
__DEVICE__
inline
double __dsqrt_rd(double x) { return __ocml_sqrt_f64(x); }
double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
__DEVICE__
inline
double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
__DEVICE__
inline
double __dsqrt_ru(double x) { return __ocml_sqrt_f64(x); }
double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
__DEVICE__
inline
double __dsqrt_rz(double x) { return __ocml_sqrt_f64(x); }
double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
__DEVICE__
inline
double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
__DEVICE__
inline
double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
__DEVICE__
inline
double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
__DEVICE__
inline
double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
@@ -1110,7 +1115,7 @@ __DEVICE__
inline
double __fma_rd(double x, double y, double z)
{
return __ocml_fma_rtp_f64(x, y, z);
return __ocml_fma_rtn_f64(x, y, z);
}
__DEVICE__
inline
@@ -1122,7 +1127,7 @@ __DEVICE__
inline
double __fma_ru(double x, double y, double z)
{
return __ocml_fma_rtn_f64(x, y, z);
return __ocml_fma_rtp_f64(x, y, z);
}
__DEVICE__
inline
@@ -1130,6 +1135,7 @@ double __fma_rz(double x, double y, double z)
{
return __ocml_fma_rtz_f64(x, y, z);
}
#endif
// END INTRINSICS
// END DOUBLE
@@ -288,6 +288,30 @@ __attribute__((const))
float __ocml_mul_rtz_f32(float, float);
__device__
__attribute__((const))
float __ocml_div_rte_f32(float, float);
__device__
__attribute__((const))
float __ocml_div_rtn_f32(float, float);
__device__
__attribute__((const))
float __ocml_div_rtp_f32(float, float);
__device__
__attribute__((const))
float __ocml_div_rtz_f32(float, float);
__device__
__attribute__((const))
float __ocml_sqrt_rte_f32(float, float);
__device__
__attribute__((const))
float __ocml_sqrt_rtn_f32(float, float);
__device__
__attribute__((const))
float __ocml_sqrt_rtp_f32(float, float);
__device__
__attribute__((const))
float __ocml_sqrt_rtz_f32(float, float);
__device__
__attribute__((const))
float __ocml_fma_rte_f32(float, float, float);
__device__
__attribute__((const))
@@ -572,6 +596,30 @@ __attribute__((const))
double __ocml_mul_rtz_f64(double, double);
__device__
__attribute__((const))
double __ocml_div_rte_f64(double, double);
__device__
__attribute__((const))
double __ocml_div_rtn_f64(double, double);
__device__
__attribute__((const))
double __ocml_div_rtp_f64(double, double);
__device__
__attribute__((const))
double __ocml_div_rtz_f64(double, double);
__device__
__attribute__((const))
double __ocml_sqrt_rte_f64(double, double);
__device__
__attribute__((const))
double __ocml_sqrt_rtn_f64(double, double);
__device__
__attribute__((const))
double __ocml_sqrt_rtp_f64(double, double);
__device__
__attribute__((const))
double __ocml_sqrt_rtz_f64(double, double);
__device__
__attribute__((const))
double __ocml_fma_rte_f64(double, double, double);
__device__
__attribute__((const))
@@ -594,4 +642,4 @@ double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
#if defined(__cplusplus)
} // extern "C"
#endif
#endif
@@ -99,6 +99,8 @@ const std::unordered_map<std::uintptr_t, std::vector<std::pair<hsa_agent_t, Kern
functions(bool rebuild = false);
const std::unordered_map<std::uintptr_t, std::string>& function_names(bool rebuild = false);
std::unordered_map<std::string, void*>& globals(bool rebuild = false);
std::unordered_map<
std::string, std::vector<std::pair<std::size_t, std::size_t>>>& kernargs();
hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable,
hsa_agent_t agent);
@@ -23,10 +23,6 @@ THE SOFTWARE.
#include <stdio.h>
#include <iostream>
#include "hip/hip_runtime.h"
#ifdef __HIP_PLATFORM_HCC__
#include <hc.hpp>
#endif
#define CHECK(cmd) \
{ \
@@ -44,7 +40,7 @@ __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N)
for (size_t i = offset; i < N; i += stride) {
#ifdef __HIP_PLATFORM_HCC__
C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4);
C_d[i] = __bitextract_u32(A_d[i], 8, 4);
#else /* defined __HIP_PLATFORM_NVCC__ or other path */
C_d[i] = ((A_d[i] & 0xf00) >> 8);
#endif
+57 -13
Vedi File
@@ -22,6 +22,7 @@ THE SOFTWARE.
#include <unordered_map>
#include <string>
#include <fstream>
#include "hip/hip_runtime.h"
#include "hip_hcc_internal.h"
@@ -86,6 +87,7 @@ __hipRegisterFatBinary(const void* data)
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
@@ -110,10 +112,35 @@ __hipRegisterFatBinary(const void* data)
if (module->executable.handle) {
modules->at(deviceId) = module;
tprintf(DB_FB, "Loaded code object for %s\n", name);
if (HIP_DUMP_CODE_OBJECT) {
char fname[30];
static std::atomic<int> index;
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
tprintf(DB_FB, "Dump code object %s\n", fname);
std::ofstream ofs;
ofs.open(fname, std::ios::binary);
ofs << image;
ofs.close();
}
} else {
fprintf(stderr, "Failed to load code object for %s\n", name);
abort();
}
}
}
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hsa_agent_t agent = g_allAgents[deviceId + 1];
char name[64] = {};
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
if (!(*modules)[deviceId]) {
fprintf(stderr, "No device code bundle for %s\n", name);
abort();
}
}
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
return modules;
}
@@ -132,13 +159,20 @@ extern "C" void __hipRegisterFunction(
dim3* gridDim,
int* wSize)
{
HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName);
std::vector<hipFunction_t> functions{g_deviceCnt};
assert(modules && modules->size() >= g_deviceCnt);
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
hipFunction_t function;
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) {
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) &&
function != nullptr) {
functions[deviceId] = function;
}
else {
tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
" device %d\n", deviceName, deviceId);
}
}
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
@@ -180,6 +214,7 @@ hipError_t hipSetupArgument(
size_t size,
size_t offset)
{
HIP_INIT_API(arg, size, offset);
auto ctx = ihipGetTlsDefaultCtx();
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
auto& arguments = crit->_execStack.top()._arguments;
@@ -194,6 +229,7 @@ hipError_t hipSetupArgument(
hipError_t hipLaunchByPtr(const void *hostFunction)
{
HIP_INIT_API(hostFunction);
ihipExec_t exec;
{
auto ctx = ihipGetTlsDefaultCtx();
@@ -213,20 +249,28 @@ hipError_t hipLaunchByPtr(const void *hostFunction)
deviceId = 0;
}
hipError_t e = hipSuccess;
decltype(g_functions)::iterator it;
if ((it = g_functions.find(hostFunction)) == g_functions.end())
return hipErrorUnknown;
if ((it = g_functions.find(hostFunction)) == g_functions.end() ||
!it->second[deviceId]) {
e = hipErrorUnknown;
fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p"
" for device %d!\n", hostFunction, deviceId);
abort();
} else {
size_t size = exec._arguments.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
size_t size = exec._arguments.size();
void *extra[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
HIP_LAUNCH_PARAM_END
};
e = hipModuleLaunchKernel(it->second[deviceId],
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
exec._sharedMem, exec._hStream, nullptr, extra);
}
return hipModuleLaunchKernel(it->second[deviceId],
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
exec._sharedMem, exec._hStream, nullptr, extra);
return ihipLogStatus(e);
}
+6
Vedi File
@@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1;
int HIP_SYNC_STREAM_WAIT = 0;
int HIP_FORCE_NULL_STREAM = 0;
int HIP_DUMP_CODE_OBJECT = 0;
#if (__hcc_workweek__ >= 17300)
// Make sure we have required bug fix in HCC
@@ -1294,6 +1296,10 @@ void HipReadEnv() {
"overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag "
"when creating the event.");
READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0,
"If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory,"
"where nnnn is the index number.");
// Some flags have both compile-time and runtime flags - generate a warning if user enables the
// runtime flag but the compile-time flag is disabled.
if (HIP_DB && !COMPILE_HIP_DB) {
+1 -1
Vedi File
@@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM;
extern int HIP_INIT_ALLOC;
extern int HIP_FORCE_NULL_STREAM;
extern int HIP_DUMP_CODE_OBJECT;
// TODO - remove when this is standard behavior.
extern int HCC_OPT_FLUSH;
// Class to assign a short TID to each new thread, for HIP debugging purposes.
class TidInfo {
public:
+6 -7
Vedi File
@@ -985,10 +985,9 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault ||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
// acc.memcpy_symbol(dst, (void*)src, count+offset);
stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false);
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
@@ -1018,9 +1017,9 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count,
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost ||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false);
} else {
return ihipLogStatus(hipErrorInvalidValue);
}
@@ -1052,7 +1051,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_
if (stream) {
try {
stream->lockedSymbolCopyAsync(acc, dst, (void*)src, count, offset, kind);
hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream);
} catch (ihipException& ex) {
e = ex._code;
}
@@ -1088,7 +1087,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
stream = ihipSyncAndResolveStream(stream);
if (stream) {
try {
stream->lockedSymbolCopyAsync(acc, dst, src, count, offset, kind);
hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream);
} catch (ihipException& ex) {
e = ex._code;
}
+16 -7
Vedi File
@@ -258,20 +258,29 @@ struct Agent_global {
uint32_t byte_cnt;
};
inline void track(const Agent_global& x) {
inline void track(const Agent_global& x, hsa_agent_t agent) {
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(),
x.address, x.byte_cnt);
auto device = ihipGetTlsDefaultCtx()->getWriteableDevice();
int deviceIndex =0;
for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) {
if(g_allAgents[deviceIndex] == agent)
break;
}
auto device = ihipGetDevice(deviceIndex - 1);
hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true,
false);
hc::am_memtracker_add(x.address, ptr_info);
#if USE_APP_PTR_FOR_CTX
hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
#else
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
#endif
}
template <typename Container = vector<Agent_global>>
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent,
hsa_executable_symbol_t x, void* out) {
assert(out);
@@ -281,7 +290,7 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
if (t == HSA_SYMBOL_KIND_VARIABLE) {
static_cast<Container*>(out)->push_back(Agent_global{name(x), address(x), size(x)});
track(static_cast<Container*>(out)->back());
track(static_cast<Container*>(out)->back(),agent);
}
return HSA_STATUS_SUCCESS;
@@ -342,7 +351,7 @@ hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, hi
tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), it0->second.cend(), name);
return dptr ? hipSuccess : hipErrorNotFound;
return *dptr ? hipSuccess : hipErrorNotFound;
}
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) {
@@ -367,7 +376,7 @@ hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, c
tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), it->second.cend(), name);
return dptr ? hipSuccess : hipErrorNotFound;
return *dptr ? hipSuccess : hipErrorNotFound;
}
hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) {
+105 -2
Vedi File
@@ -312,8 +312,8 @@ const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels(bool rebui
void load_code_object_and_freeze_executable(
const string& file, hsa_agent_t agent,
hsa_executable_t
executable) { // TODO: the following sequence is inefficient, should be refactored
hsa_executable_t executable) {
// TODO: the following sequence is inefficient, should be refactored
// into a single load of the file and subsequent ELFIO
// processing.
static const auto cor_deleter = [](hsa_code_object_reader_t* p) {
@@ -340,6 +340,90 @@ void load_code_object_and_freeze_executable(
code_readers.push_back(move(tmp));
}
}
size_t parse_args(
const string& metadata,
size_t f,
size_t l,
vector<pair<size_t, size_t>>& size_align) {
if (f == l) return f;
if (!size_align.empty()) return l;
do {
static constexpr size_t size_sz{5};
f = metadata.find("Size:", f) + size_sz;
if (l <= f) return f;
auto size = strtoul(&metadata[f], nullptr, 10);
static constexpr size_t align_sz{6};
f = metadata.find("Align:", f) + align_sz;
char* l{};
auto align = strtoul(&metadata[f], &l, 10);
f += (l - &metadata[f]) + 1;
size_align.emplace_back(size, align);
} while (true);
}
void read_kernarg_metadata(
elfio& reader,
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs)
{ // TODO: this is inefficient.
auto it = find_section_if(
reader, [](const section* x) { return x->get_type() == SHT_NOTE; });
if (!it) return;
const note_section_accessor acc{reader, it};
for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) {
ELFIO::Elf_Word type{};
string name{};
void* desc{};
Elf_Word desc_size{};
acc.get_note(i, type, name, desc, desc_size);
if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA.
string tmp{
static_cast<char*>(desc), static_cast<char*>(desc) + desc_size};
auto dx = tmp.find("Kernels:");
if (dx == string::npos) continue;
static constexpr decltype(tmp.size()) kernels_sz{8};
dx += kernels_sz;
do {
dx = tmp.find("Name:", dx);
if (dx == string::npos) break;
static constexpr decltype(tmp.size()) name_sz{5};
dx = tmp.find_first_not_of(" '", dx + name_sz);
auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx);
dx += fn.size();
auto dx1 = tmp.find("CodeProps", dx);
dx = tmp.find("Args:", dx);
if (dx1 < dx) {
dx = dx1;
continue;
}
if (dx == string::npos) break;
static constexpr decltype(tmp.size()) args_sz{5};
dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]);
} while (true);
}
}
} // namespace
namespace hip_impl {
@@ -501,6 +585,25 @@ unordered_map<string, void*>& globals(bool rebuild) {
return r;
}
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs() {
static unordered_map<string, vector<pair<size_t, size_t>>> r;
static once_flag f;
call_once(f, []() {
for (auto&& blob : code_object_blobs()) {
stringstream tmp{std::string{
blob.second.front().cbegin(), blob.second.front().cend()}};
elfio reader;
if (!reader.load(tmp)) continue;
read_kernarg_metadata(reader, r);
}
});
return r;
}
hsa_executable_t load_executable(const string& file, hsa_executable_t executable,
hsa_agent_t agent) {
elfio reader;
@@ -34,6 +34,7 @@ THE SOFTWARE.
#pragma clang diagnostic ignored "-Wunused-variable"
__device__ void double_precision_intrinsics() {
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__dadd_rd(0.0, 1.0);
__dadd_rn(0.0, 1.0);
__dadd_ru(0.0, 1.0);
@@ -62,6 +63,7 @@ __device__ void double_precision_intrinsics() {
__fma_rn(1.0, 2.0, 3.0);
__fma_ru(1.0, 2.0, 3.0);
__fma_rz(1.0, 2.0, 3.0);
#endif
}
__global__ void compileDoublePrecisionIntrinsics(int ignored) {
@@ -38,11 +38,13 @@ __global__ void floatMath(float* In, float* Out) {
Out[tid] = __cosf(In[tid]);
Out[tid] = __exp10f(Out[tid]);
Out[tid] = __expf(Out[tid]);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
Out[tid] = __frsqrt_rn(Out[tid]);
//Out[tid] = __fsqrt_rd(Out[tid]);
//Out[tid] = __fsqrt_rn(Out[tid]);
//Out[tid] = __fsqrt_ru(Out[tid]);
//Out[tid] = __fsqrt_rz(Out[tid]);
Out[tid] = __fsqrt_rd(Out[tid]);
Out[tid] = __fsqrt_rn(Out[tid]);
Out[tid] = __fsqrt_ru(Out[tid]);
Out[tid] = __fsqrt_rz(Out[tid]);
#endif
Out[tid] = __log10f(Out[tid]);
Out[tid] = __log2f(Out[tid]);
Out[tid] = __logf(Out[tid]);
@@ -39,6 +39,7 @@ __device__ void single_precision_intrinsics() {
__cosf(0.0f);
__exp10f(0.0f);
__expf(0.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fadd_rd(0.0f, 1.0f);
__fadd_rn(0.0f, 1.0f);
__fadd_ru(0.0f, 1.0f);
@@ -47,7 +48,9 @@ __device__ void single_precision_intrinsics() {
__fdiv_rn(4.0f, 2.0f);
__fdiv_ru(4.0f, 2.0f);
__fdiv_rz(4.0f, 2.0f);
#endif
__fdividef(4.0f, 2.0f);
#if defined OCML_BASIC_ROUNDED_OPERATIONS
__fmaf_rd(1.0f, 2.0f, 3.0f);
__fmaf_rn(1.0f, 2.0f, 3.0f);
__fmaf_ru(1.0f, 2.0f, 3.0f);
@@ -69,6 +72,7 @@ __device__ void single_precision_intrinsics() {
__fsub_rn(2.0f, 1.0f);
__fsub_ru(2.0f, 1.0f);
__fsub_rz(2.0f, 1.0f);
#endif
__log10f(1.0f);
__log2f(1.0f);
__logf(1.0f);