Merge branch 'master' into getsymboladdress
[ROCm/hip commit: 8610128c3e]
This commit is contained in:
esterno
+1
-3
@@ -167,8 +167,6 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
|
||||
}
|
||||
|
||||
// Cap the maximum amount of testing, in case of hangs
|
||||
// Excluding hipVectorTypes test from automation; due to regression from HCC commit 2367133
|
||||
// Excluding hipFloatMath test from automation; due to regression from ROCDL commit 2fc04e1
|
||||
timeout(time: 1, unit: 'HOURS')
|
||||
{
|
||||
stage("${platform} unit testing")
|
||||
@@ -178,7 +176,7 @@ def docker_build_inside_image( def build_image, String inside_args, String platf
|
||||
cd ${build_dir_rel}
|
||||
make install -j\$(nproc)
|
||||
make build_tests -i -j\$(nproc)
|
||||
ctest -E "(hipVectorTypes.tst|hipVectorTypesDevice.tst|hipFloatMath.tst)"
|
||||
ctest
|
||||
"""
|
||||
// If unit tests output a junit or xunit file in the future, jenkins can parse that file
|
||||
// to display test results on the dashboard
|
||||
|
||||
@@ -498,6 +498,10 @@ foreach $arg (@ARGV)
|
||||
$obj = "$tmpdir/$obj";
|
||||
my $fileType = `file $obj`;
|
||||
my $isObj = ($fileType =~ m/ELF/ or $fileType =~ m/COFF/);
|
||||
if ($fileType =~ m/ELF/) {
|
||||
my $sections = `readelf -e -W $obj`;
|
||||
$isObj = !($sections =~ m/__CLANG_OFFLOAD_BUNDLE__/);
|
||||
}
|
||||
$allIsObj = ($allIsObj and $isObj);
|
||||
if ($isObj) {
|
||||
$realObjs = ($realObjs . " " . $obj);
|
||||
|
||||
@@ -22,8 +22,8 @@
|
||||
| typedef |`CUDA_RESOURCE_VIEW_DESC_st` | |
|
||||
| struct |`CUDA_TEXTURE_DESC` | |
|
||||
| typedef |`CUDA_TEXTURE_DESC_st` | |
|
||||
| struct |`CUdevprop` |`hipDeviceProp_t` |
|
||||
| typedef |`CUdevprop_st` |`hipDeviceProp_t` |
|
||||
| struct |`CUdevprop` | |
|
||||
| typedef |`CUdevprop_st` | |
|
||||
| struct |`CUipcEventHandle` |`ihipIpcEventHandle_t` |
|
||||
| typedef |`CUipcEventHandle_st` |`ihipIpcEventHandle_t` |
|
||||
| struct |`CUipcMemHandle` |`hipIpcMemHandle_t` |
|
||||
@@ -763,6 +763,7 @@
|
||||
| `cuDeviceGetName` | `hipDeviceGetName` |
|
||||
| `cuDeviceTotalMem` | `hipDeviceTotalMem` |
|
||||
| `cuDeviceGetLuid` | |
|
||||
| `cuDeviceGetUuid` | |
|
||||
|
||||
## **6. Device Management [DEPRECATED]**
|
||||
|
||||
@@ -792,9 +793,9 @@
|
||||
| `cuCtxGetCurrent` | `hipCtxGetCurrent` |
|
||||
| `cuCtxGetDevice` | `hipCtxGetDevice` |
|
||||
| `cuCtxGetFlags` | `hipCtxGetFlags` |
|
||||
| `cuCtxGetLimit` | |
|
||||
| `cuCtxGetLimit` | `hipDeviceGetLimit` |
|
||||
| `cuCtxGetSharedMemConfig` | `hipCtxGetSharedMemConfig` |
|
||||
| `cuCtxGetStreamPriorityRange` | |
|
||||
| `cuCtxGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange`|
|
||||
| `cuCtxPopCurrent` | `hipCtxPopCurrent` |
|
||||
| `cuCtxPushCurrent` | `hipCtxPushCurrent` |
|
||||
| `cuCtxSetCacheConfig` | `hipCtxSetCacheConfig` |
|
||||
@@ -835,16 +836,16 @@
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
| `cuArray3DCreate` | `hipArray3DCreate` |
|
||||
| `cuArray3DGetDescriptor` | |
|
||||
| `cuArrayCreate` | |
|
||||
| `cuArrayCreate` | `hipArrayCreate` |
|
||||
| `cuArrayDestroy` | |
|
||||
| `cuArrayGetDescriptor` | |
|
||||
| `cuDeviceGetByPCIBusId` | `hipDeviceGetByPCIBusId` |
|
||||
| `cuDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` |
|
||||
| `cuIpcCloseMemHandle` | |
|
||||
| `cuIpcCloseMemHandle` | `hipIpcCloseMemHandle` |
|
||||
| `cuIpcGetEventHandle` | |
|
||||
| `cuIpcGetMemHandle` | |
|
||||
| `cuIpcGetMemHandle` | `hipIpcGetMemHandle` |
|
||||
| `cuIpcOpenEventHandle` | |
|
||||
| `cuIpcOpenMemHandle` | |
|
||||
| `cuIpcOpenMemHandle` | `hipIpcOpenMemHandle` |
|
||||
| `cuMemAlloc` | `hipMalloc` |
|
||||
| `cuMemAllocHost` | |
|
||||
| `cuMemAllocManaged` | |
|
||||
@@ -867,7 +868,7 @@
|
||||
| `cuMemcpyDtoDAsync` | `hipMemcpyDtoDAsync` |
|
||||
| `cuMemcpyDtoH` | `hipMemcpyDtoH` |
|
||||
| `cuMemcpyDtoHAsync` | `hipMemcpyDtoHAsync` |
|
||||
| `cuMemcpyHtoA` | |
|
||||
| `cuMemcpyHtoA` | `hipMemcpyHtoA` |
|
||||
| `cuMemcpyHtoAAsync` | |
|
||||
| `cuMemcpyHtoD` | `hipMemcpyHtoD` |
|
||||
| `cuMemcpyHtoDAsync` | `hipMemcpyHtoDAsync` |
|
||||
@@ -875,11 +876,11 @@
|
||||
| `cuMemcpyPeerAsync` | |
|
||||
| `cuMemFree` | `hipFree` |
|
||||
| `cuMemFreeHost` | `hipFreeHost` |
|
||||
| `cuMemGetAddressRange` | |
|
||||
| `cuMemGetAddressRange` | `hipMemGetAddressRange` |
|
||||
| `cuMemGetInfo` | `hipMemGetInfo` |
|
||||
| `cuMemHostAlloc` | `hipHostMalloc` |
|
||||
| `cuMemHostGetDevicePointer` | |
|
||||
| `cuMemHostGetFlags` | |
|
||||
| `cuMemHostGetDevicePointer` | `hipHostGetDevicePointer` |
|
||||
| `cuMemHostGetFlags` | `hipHostGetFlags` |
|
||||
| `cuMemHostRegister` | `hipHostRegister` |
|
||||
| `cuMemHostUnregister` | `hipHostUnregister` |
|
||||
| `cuMemsetD16` | |
|
||||
@@ -892,8 +893,8 @@
|
||||
| `cuMemsetD2D8Async` | |
|
||||
| `cuMemsetD32` | `hipMemset` |
|
||||
| `cuMemsetD32Async` | `hipMemsetAsync` |
|
||||
| `cuMemsetD2D8` | |
|
||||
| `cuMemsetD2D8Async` | |
|
||||
| `cuMemsetD8` | `hipMemsetD8` |
|
||||
| `cuMemsetD8Async` | |
|
||||
| `cuMipmappedArrayCreate` | |
|
||||
| `cuMipmappedArrayDestroy` | |
|
||||
| `cuMipmappedArrayGetLevel` | |
|
||||
@@ -916,8 +917,8 @@
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
| `cuStreamAddCallback` | `hipStreamAddCallback` |
|
||||
| `cuStreamAttachMemAsync` | |
|
||||
| `cuStreamCreate` | |
|
||||
| `cuStreamCreateWithPriority` | |
|
||||
| `cuStreamCreate` | `hipStreamCreateWithFlags` |
|
||||
| `cuStreamCreateWithPriority` | `hipStreamCreateWithPriority` |
|
||||
| `cuStreamDestroy` | `hipStreamDestroy` |
|
||||
| `cuStreamGetFlags` | `hipStreamGetFlags` |
|
||||
| `cuStreamGetPriority` | `hipStreamGetPriority` |
|
||||
@@ -932,7 +933,7 @@
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
| `cuEventCreate` | `hipEventCreate` |
|
||||
| `cuEventCreate` | `hipEventCreateWithFlags` |
|
||||
| `cuEventDestroy` | `hipEventDestroy` |
|
||||
| `cuEventElapsedTime` | `hipEventElapsedTime` |
|
||||
| `cuEventQuery` | `hipEventQuery` |
|
||||
@@ -967,10 +968,13 @@
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
| `cuFuncGetAttribute` | |
|
||||
| `cuFuncSetAttribute` | |
|
||||
| `cuFuncSetCacheConfig` | `hipFuncSetCacheConfig` |
|
||||
| `cuFuncSetSharedMemConfig` | |
|
||||
| `cuLaunchKernel` | `hipModuleLaunchKernel` |
|
||||
| `cuLaunchHostFunc` | |
|
||||
| `cuLaunchCooperativeKernel` | |
|
||||
| `cuLaunchCooperativeKernelMultiDevice` | |
|
||||
|
||||
## **18. Execution Control [DEPRECATED]**
|
||||
|
||||
@@ -1047,8 +1051,8 @@
|
||||
| `cuTexRefGetMipmapLevelBias` | |
|
||||
| `cuTexRefGetMipmapLevelClamp` | |
|
||||
| `cuTexRefGetMipmappedArray` | |
|
||||
| `cuTexRefSetAddress` | |
|
||||
| `cuTexRefSetAddress2D` | |
|
||||
| `cuTexRefSetAddress` | `hipTexRefSetAddress` |
|
||||
| `cuTexRefSetAddress2D` | `hipTexRefSetAddress2D` |
|
||||
| `cuTexRefSetAddressMode` | `hipTexRefSetAddressMode` |
|
||||
| `cuTexRefSetArray` | `hipTexRefSetArray` |
|
||||
| `cuTexRefSetBorderColor` | |
|
||||
@@ -1233,3 +1237,4 @@
|
||||
| `cuEGLStreamProducerReturnFrame` | |
|
||||
| `cuGraphicsEGLRegisterImage` | |
|
||||
| `cuGraphicsResourceGetMappedEglFrame` | |
|
||||
| `cuEventCreateFromEGLSync` | |
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
| `cudaDeviceGetLimit` | `hipDeviceGetLimit` |
|
||||
| `cudaDeviceGetPCIBusId` | `hipDeviceGetPCIBusId` |
|
||||
| `cudaDeviceGetSharedMemConfig` | `hipDeviceGetSharedMemConfig` |
|
||||
| `cudaDeviceGetStreamPriorityRange` | |
|
||||
| `cudaDeviceGetStreamPriorityRange` | `hipDeviceGetStreamPriorityRange` |
|
||||
| `cudaDeviceReset` | `hipDeviceReset` |
|
||||
| `cudaDeviceSetCacheConfig` | `hipDeviceSetCacheConfig` |
|
||||
| `cudaDeviceSetLimit` | `hipDeviceSetLimit` |
|
||||
@@ -19,7 +19,7 @@
|
||||
| `cudaDeviceSynchronize` | `hipDeviceSynchronize` |
|
||||
| `cudaGetDevice` | `hipGetDevice` |
|
||||
| `cudaGetDeviceCount` | `hipGetDeviceCount` |
|
||||
| `cudaGetDeviceFlags` | |
|
||||
| `cudaGetDeviceFlags` | `hipCtxGetFlags` |
|
||||
| `cudaGetDeviceProperties` | `hipGetDeviceProperties` |
|
||||
| `cudaIpcCloseMemHandle` | `hipIpcCloseMemHandle` |
|
||||
| `cudaIpcGetEventHandle` | `hipIpcGetEventHandle` |
|
||||
@@ -56,12 +56,15 @@
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
| `cudaStreamAddCallback` | `hipStreamAddCallback` |
|
||||
| `cudaStreamAttachMemAsync` | |
|
||||
| `cudaStreamBeginCapture` | |
|
||||
| `cudaStreamEndCapture` | |
|
||||
| `cudaStreamIsCapturing` | |
|
||||
| `cudaStreamCreate` | `hipStreamCreate` |
|
||||
| `cudaStreamCreateWithFlags` | `hipStreamCreateWithFlags` |
|
||||
| `cudaStreamCreateWithPriority` | |
|
||||
| `cudaStreamCreateWithPriority` | `hipStreamCreateWithPriority` |
|
||||
| `cudaStreamDestroy` | `hipStreamDestroy` |
|
||||
| `cudaStreamGetFlags` | `hipStreamGetFlags` |
|
||||
| `cudaStreamGetPriority` | |
|
||||
| `cudaStreamGetPriority` | `hipStreamGetPriority` |
|
||||
| `cudaStreamQuery` | `hipStreamQuery` |
|
||||
| `cudaStreamSynchronize` | `hipStreamSynchronize` |
|
||||
| `cudaStreamWaitEvent` | `hipStreamWaitEvent` |
|
||||
@@ -82,7 +85,14 @@
|
||||
|
||||
| **CUDA** | **HIP** |
|
||||
|-----------------------------------------------------------|-------------------------------|
|
||||
|
||||
| `cudaSignalExternalSemaphoresAsync` | |
|
||||
| `cudaWaitExternalSemaphoresAsync` | |
|
||||
| `cudaImportExternalMemory` | |
|
||||
| `cudaExternalMemoryGetMappedBuffer` | |
|
||||
| `cudaExternalMemoryGetMappedMipmappedArray` | |
|
||||
| `cudaDestroyExternalMemory` | |
|
||||
| `cudaImportExternalSemaphore` | |
|
||||
| `cudaDestroyExternalSemaphore` | |
|
||||
|
||||
## **7. Execution Control**
|
||||
|
||||
|
||||
@@ -1433,7 +1433,7 @@ __device__ float __expf(float x);
|
||||
__device__ static float __fadd_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_rn
|
||||
@@ -1441,7 +1441,7 @@ __device__ static float __fadd_rd(float x, float y);
|
||||
__device__ static float __fadd_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_ru
|
||||
@@ -1449,7 +1449,7 @@ __device__ static float __fadd_rn(float x, float y);
|
||||
__device__ static float __fadd_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fadd_rz
|
||||
@@ -1457,7 +1457,7 @@ __device__ static float __fadd_ru(float x, float y);
|
||||
__device__ static float __fadd_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rd
|
||||
@@ -1465,7 +1465,7 @@ __device__ static float __fadd_rz(float x, float y);
|
||||
__device__ static float __fdiv_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rn
|
||||
@@ -1473,7 +1473,7 @@ __device__ static float __fdiv_rd(float x, float y);
|
||||
__device__ static float __fdiv_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_ru
|
||||
@@ -1481,7 +1481,7 @@ __device__ static float __fdiv_rn(float x, float y);
|
||||
__device__ static float __fdiv_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdiv_rz
|
||||
@@ -1489,7 +1489,7 @@ __device__ static float __fdiv_ru(float x, float y);
|
||||
__device__ static float __fdiv_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fdividef
|
||||
@@ -1505,7 +1505,7 @@ __device__ static float __fdividef(float x, float y);
|
||||
__device__ float __fmaf_rd(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_rn
|
||||
@@ -1513,7 +1513,7 @@ __device__ float __fmaf_rd(float x, float y, float z);
|
||||
__device__ float __fmaf_rn(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_ru
|
||||
@@ -1521,7 +1521,7 @@ __device__ float __fmaf_rn(float x, float y, float z);
|
||||
__device__ float __fmaf_ru(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmaf_rz
|
||||
@@ -1529,7 +1529,7 @@ __device__ float __fmaf_ru(float x, float y, float z);
|
||||
__device__ float __fmaf_rz(float x, float y, float z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rd
|
||||
@@ -1537,7 +1537,7 @@ __device__ float __fmaf_rz(float x, float y, float z);
|
||||
__device__ static float __fmul_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rn
|
||||
@@ -1545,7 +1545,7 @@ __device__ static float __fmul_rd(float x, float y);
|
||||
__device__ static float __fmul_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_ru
|
||||
@@ -1553,7 +1553,7 @@ __device__ static float __fmul_rn(float x, float y);
|
||||
__device__ static float __fmul_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fmul_rz
|
||||
@@ -1561,7 +1561,7 @@ __device__ static float __fmul_ru(float x, float y);
|
||||
__device__ static float __fmul_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rd
|
||||
@@ -1569,7 +1569,7 @@ __device__ static float __fmul_rz(float x, float y);
|
||||
__device__ float __frcp_rd(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rn
|
||||
@@ -1577,7 +1577,7 @@ __device__ float __frcp_rd(float x);
|
||||
__device__ float __frcp_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_ru
|
||||
@@ -1585,7 +1585,7 @@ __device__ float __frcp_rn(float x);
|
||||
__device__ float __frcp_ru(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frcp_rz
|
||||
@@ -1593,7 +1593,7 @@ __device__ float __frcp_ru(float x);
|
||||
__device__ float __frcp_rz(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __frsqrt_rn
|
||||
@@ -1601,7 +1601,7 @@ __device__ float __frcp_rz(float x);
|
||||
__device__ float __frsqrt_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rd
|
||||
@@ -1609,7 +1609,7 @@ __device__ float __frsqrt_rn(float x);
|
||||
__device__ float __fsqrt_rd(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rn
|
||||
@@ -1617,7 +1617,7 @@ __device__ float __fsqrt_rd(float x);
|
||||
__device__ float __fsqrt_rn(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_ru
|
||||
@@ -1625,7 +1625,7 @@ __device__ float __fsqrt_rn(float x);
|
||||
__device__ float __fsqrt_ru(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsqrt_rz
|
||||
@@ -1633,7 +1633,7 @@ __device__ float __fsqrt_ru(float x);
|
||||
__device__ float __fsqrt_rz(float x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rd
|
||||
@@ -1641,7 +1641,7 @@ __device__ float __fsqrt_rz(float x);
|
||||
__device__ static float __fsub_rd(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rn
|
||||
@@ -1649,7 +1649,7 @@ __device__ static float __fsub_rd(float x, float y);
|
||||
__device__ static float __fsub_rn(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_ru
|
||||
@@ -1657,7 +1657,15 @@ __device__ static float __fsub_rn(float x, float y);
|
||||
__device__ static float __fsub_ru(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fsub_rz
|
||||
```cpp
|
||||
__device__ static float __fsub_rz(float x, float y);
|
||||
|
||||
```
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __log10f
|
||||
@@ -1729,7 +1737,7 @@ __device__ float __tanf(float x);
|
||||
__device__ static double __dadd_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_rn
|
||||
@@ -1737,7 +1745,7 @@ __device__ static double __dadd_rd(double x, double y);
|
||||
__device__ static double __dadd_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_ru
|
||||
@@ -1745,7 +1753,7 @@ __device__ static double __dadd_rn(double x, double y);
|
||||
__device__ static double __dadd_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dadd_rz
|
||||
@@ -1753,7 +1761,7 @@ __device__ static double __dadd_ru(double x, double y);
|
||||
__device__ static double __dadd_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rd
|
||||
@@ -1761,7 +1769,7 @@ __device__ static double __dadd_rz(double x, double y);
|
||||
__device__ static double __ddiv_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rn
|
||||
@@ -1769,7 +1777,7 @@ __device__ static double __ddiv_rd(double x, double y);
|
||||
__device__ static double __ddiv_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_ru
|
||||
@@ -1777,7 +1785,7 @@ __device__ static double __ddiv_rn(double x, double y);
|
||||
__device__ static double __ddiv_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __ddiv_rz
|
||||
@@ -1785,7 +1793,7 @@ __device__ static double __ddiv_ru(double x, double y);
|
||||
__device__ static double __ddiv_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rd
|
||||
@@ -1793,7 +1801,7 @@ __device__ static double __ddiv_rz(double x, double y);
|
||||
__device__ static double __dmul_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rn
|
||||
@@ -1801,7 +1809,7 @@ __device__ static double __dmul_rd(double x, double y);
|
||||
__device__ static double __dmul_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_ru
|
||||
@@ -1809,7 +1817,7 @@ __device__ static double __dmul_rn(double x, double y);
|
||||
__device__ static double __dmul_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dmul_rz
|
||||
@@ -1817,7 +1825,7 @@ __device__ static double __dmul_ru(double x, double y);
|
||||
__device__ static double __dmul_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rd
|
||||
@@ -1825,7 +1833,7 @@ __device__ static double __dmul_rz(double x, double y);
|
||||
__device__ double __drcp_rd(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rn
|
||||
@@ -1833,7 +1841,7 @@ __device__ double __drcp_rd(double x);
|
||||
__device__ double __drcp_rn(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_ru
|
||||
@@ -1841,7 +1849,7 @@ __device__ double __drcp_rn(double x);
|
||||
__device__ double __drcp_ru(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __drcp_rz
|
||||
@@ -1849,7 +1857,7 @@ __device__ double __drcp_ru(double x);
|
||||
__device__ double __drcp_rz(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rd
|
||||
@@ -1857,7 +1865,7 @@ __device__ double __drcp_rz(double x);
|
||||
__device__ double __dsqrt_rd(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rn
|
||||
@@ -1865,7 +1873,7 @@ __device__ double __dsqrt_rd(double x);
|
||||
__device__ double __dsqrt_rn(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_ru
|
||||
@@ -1873,7 +1881,7 @@ __device__ double __dsqrt_rn(double x);
|
||||
__device__ double __dsqrt_ru(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsqrt_rz
|
||||
@@ -1881,7 +1889,7 @@ __device__ double __dsqrt_ru(double x);
|
||||
__device__ double __dsqrt_rz(double x);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rd
|
||||
@@ -1889,7 +1897,7 @@ __device__ double __dsqrt_rz(double x);
|
||||
__device__ static double __dsub_rd(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rn
|
||||
@@ -1897,7 +1905,7 @@ __device__ static double __dsub_rd(double x, double y);
|
||||
__device__ static double __dsub_rn(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_ru
|
||||
@@ -1905,7 +1913,7 @@ __device__ static double __dsub_rn(double x, double y);
|
||||
__device__ static double __dsub_ru(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __dsub_rz
|
||||
@@ -1913,7 +1921,7 @@ __device__ static double __dsub_ru(double x, double y);
|
||||
__device__ static double __dsub_rz(double x, double y);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rd
|
||||
@@ -1921,7 +1929,7 @@ __device__ static double __dsub_rz(double x, double y);
|
||||
__device__ double __fma_rd(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rn
|
||||
@@ -1929,7 +1937,7 @@ __device__ double __fma_rd(double x, double y, double z);
|
||||
__device__ double __fma_rn(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_ru
|
||||
@@ -1937,7 +1945,7 @@ __device__ double __fma_rn(double x, double y, double z);
|
||||
__device__ double __fma_ru(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __fma_rz
|
||||
@@ -1945,7 +1953,7 @@ __device__ double __fma_ru(double x, double y, double z);
|
||||
__device__ double __fma_rz(double x, double y, double z);
|
||||
|
||||
```
|
||||
**Description:** Supported
|
||||
**Description:** Unsupported
|
||||
|
||||
|
||||
### __brev
|
||||
|
||||
@@ -51,6 +51,8 @@ const std::map<llvm::StringRef, hipCounter>& CUDA_RENAMES_MAP() {
|
||||
ret.insert(CUDA_DRIVER_FUNCTION_MAP.begin(), CUDA_DRIVER_FUNCTION_MAP.end());
|
||||
ret.insert(CUDA_RUNTIME_TYPE_NAME_MAP.begin(), CUDA_RUNTIME_TYPE_NAME_MAP.end());
|
||||
ret.insert(CUDA_RUNTIME_FUNCTION_MAP.begin(), CUDA_RUNTIME_FUNCTION_MAP.end());
|
||||
ret.insert(CUDA_COMPLEX_TYPE_NAME_MAP.begin(), CUDA_COMPLEX_TYPE_NAME_MAP.end());
|
||||
ret.insert(CUDA_COMPLEX_FUNCTION_MAP.begin(), CUDA_COMPLEX_FUNCTION_MAP.end());
|
||||
ret.insert(CUDA_BLAS_TYPE_NAME_MAP.begin(), CUDA_BLAS_TYPE_NAME_MAP.end());
|
||||
ret.insert(CUDA_BLAS_FUNCTION_MAP.begin(), CUDA_BLAS_FUNCTION_MAP.end());
|
||||
ret.insert(CUDA_RAND_TYPE_NAME_MAP.begin(), CUDA_RAND_TYPE_NAME_MAP.end());
|
||||
|
||||
@@ -15,6 +15,10 @@ extern const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP;
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_FUNCTION_MAP;
|
||||
// Maps the names of CUDA RUNTIME API types to the corresponding HIP types
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_TYPE_NAME_MAP;
|
||||
// Maps the names of CUDA Complex API types to the corresponding HIP types
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_TYPE_NAME_MAP;
|
||||
// Maps the names of CUDA Complex API functions to the corresponding HIP functions
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_FUNCTION_MAP;
|
||||
// Maps the names of CUDA RUNTIME API functions to the corresponding HIP functions
|
||||
extern const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP;
|
||||
// Maps the names of CUDA BLAS API types to the corresponding HIP types
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
#include "CUDA2HIP.h"
|
||||
|
||||
// Maps the names of CUDA DRIVER API types to the corresponding HIP types
|
||||
const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_FUNCTION_MAP{
|
||||
{"cuCrealf", {"hipCrealf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCimagf", {"hipCimagf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"make_cuFloatComplex", {"make_hipFloatComplex", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuConjf", {"hipConjf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCaddf", {"hipCaddf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCsubf", {"hipCsubf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCmulf", {"hipCmulf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCdivf", {"hipCdivf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCabsf", {"hipCabsf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCreal", {"hipCreal", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCimag", {"hipCimag", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"make_cuDoubleComplex", {"make_hipDoubleComplex", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuConj", {"hipConj", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCadd", {"hipCadd", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCsub", {"hipCsub", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCmul", {"hipCmul", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCdiv", {"hipCdiv", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCabs", {"hipCabs", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"make_cuComplex", {"make_hipComplex", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuComplexFloatToDouble", {"hipComplexFloatToDouble", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuComplexDoubleToFloat", {"hipComplexDoubleToFloat", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCfmaf", {"hipCfmaf", CONV_COMPLEX, API_COMPLEX}},
|
||||
{"cuCfma", {"hipCfma", CONV_COMPLEX, API_COMPLEX}},
|
||||
};
|
||||
@@ -0,0 +1,8 @@
|
||||
#include "CUDA2HIP.h"
|
||||
|
||||
// Maps the names of CUDA DRIVER API types to the corresponding HIP types
|
||||
const std::map<llvm::StringRef, hipCounter> CUDA_COMPLEX_TYPE_NAME_MAP{
|
||||
{"cuFloatComplex", {"hipFloatComplex", CONV_TYPE, API_COMPLEX}},
|
||||
{"cuDoubleComplex", {"hipDoubleComplex", CONV_TYPE, API_COMPLEX}},
|
||||
{"cuComplex", {"hipComplex", CONV_TYPE, API_COMPLEX}},
|
||||
};
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -80,8 +80,10 @@ const std::map<llvm::StringRef, hipCounter> CUDA_DRIVER_TYPE_NAME_MAP{
|
||||
{"CUDA_TEXTURE_DESC_st", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"CUDA_TEXTURE_DESC", {"HIP_TEXTURE_DESC", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
|
||||
{"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}},
|
||||
{"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER}},
|
||||
// no analogue
|
||||
// NOTE: cudaDeviceProp differs
|
||||
{"CUdevprop_st", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
{"CUdevprop", {"hipDeviceProp_t", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED}},
|
||||
|
||||
// cudaIpcEventHandle_st
|
||||
{"CUipcEventHandle_st", {"ihipIpcEventHandle_t", CONV_TYPE, API_DRIVER}},
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "CUDA2HIP.h"
|
||||
|
||||
// Map of all functions
|
||||
// Map of all CUDA Runtime API functions
|
||||
const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
// Error API
|
||||
{"cudaGetLastError", {"hipGetLastError", CONV_ERROR, API_RUNTIME}},
|
||||
@@ -9,29 +9,49 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
{"cudaGetErrorString", {"hipGetErrorString", CONV_ERROR, API_RUNTIME}},
|
||||
|
||||
// memcpy functions
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy due to different signatures
|
||||
{"cudaMemcpy", {"hipMemcpy", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpyToArray", {"hipMemcpyToArray", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpyToSymbol", {"hipMemcpyToSymbol", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpyToSymbolAsync", {"hipMemcpyToSymbolAsync", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
{"cudaMemcpyAsync", {"hipMemcpyAsync", CONV_MEMORY, API_RUNTIME}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy2D due to different signatures
|
||||
{"cudaMemcpy2D", {"hipMemcpy2D", CONV_MEMORY, API_RUNTIME}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy2DAsync due to different signatures
|
||||
{"cudaMemcpy2DAsync", {"hipMemcpy2DAsync", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpy2DToArray", {"hipMemcpy2DToArray", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpy2DArrayToArray", {"hipMemcpy2DArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMemcpy2DFromArray", {"hipMemcpy2DFromArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMemcpy2DFromArrayAsync", {"hipMemcpy2DFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMemcpy2DToArrayAsync", {"hipMemcpy2DToArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy3D due to different signatures
|
||||
{"cudaMemcpy3D", {"hipMemcpy3D", CONV_MEMORY, API_RUNTIME}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy3DAsync due to different signatures
|
||||
{"cudaMemcpy3DAsync", {"hipMemcpy3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy3DPeer due to different signatures
|
||||
{"cudaMemcpy3DPeer", {"hipMemcpy3DPeer", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpy3DPeerAsync due to different signatures
|
||||
{"cudaMemcpy3DPeerAsync", {"hipMemcpy3DPeerAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMemcpyAtoA due to different signatures
|
||||
{"cudaMemcpyArrayToArray", {"hipMemcpyArrayToArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMemcpyFromArrayAsync", {"hipMemcpyFromArrayAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMemcpyFromSymbol", {"hipMemcpyFromSymbol", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemcpyFromSymbolAsync", {"hipMemcpyFromSymbolAsync", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
|
||||
{"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
|
||||
{"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, //
|
||||
// cuMemAdvise
|
||||
{"cudaMemAdvise", {"hipMemAdvise", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuMemRangeGetAttribute
|
||||
{"cudaMemRangeGetAttribute", {"hipMemRangeGetAttribute", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuMemRangeGetAttributes
|
||||
{"cudaMemRangeGetAttributes", {"hipMemRangeGetAttributes", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
// memset
|
||||
{"cudaMemset", {"hipMemset", CONV_MEMORY, API_RUNTIME}},
|
||||
@@ -42,13 +62,17 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
{"cudaMemset3DAsync", {"hipMemset3DAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
// Memory management
|
||||
// cuMemGetInfo
|
||||
{"cudaMemGetInfo", {"hipMemGetInfo", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaArrayGetInfo", {"hipArrayGetInfo", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures
|
||||
{"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}}, // // API_Driver ANALOGUE (cuMemPrefetchAsync)
|
||||
// TODO: double check cuMemPrefetchAsync
|
||||
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
// malloc
|
||||
{"cudaMalloc", {"hipMalloc", CONV_MEMORY, API_RUNTIME}},
|
||||
@@ -57,15 +81,22 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
{"cudaMalloc3D", {"hipMalloc3D", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMalloc3DArray", {"hipMalloc3DArray", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaMallocManaged", {"hipMallocManaged", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuMipmappedArrayCreate due to different signatures
|
||||
{"cudaMallocMipmappedArray", {"hipMallocMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaMallocPitch", {"hipMallocPitch", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// cuMemFree
|
||||
{"cudaFree", {"hipFree", CONV_MEMORY, API_RUNTIME}},
|
||||
// cuMemFreeHost
|
||||
{"cudaFreeHost", {"hipHostFree", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaFreeArray", {"hipFreeArray", CONV_MEMORY, API_RUNTIME}},
|
||||
// cuMemHostRegister
|
||||
{"cudaHostRegister", {"hipHostRegister", CONV_MEMORY, API_RUNTIME}},
|
||||
// cuMemHostUnregister
|
||||
{"cudaHostUnregister", {"hipHostUnregister", CONV_MEMORY, API_RUNTIME}},
|
||||
// hipHostAlloc deprecated - use hipHostMalloc instead
|
||||
// cuMemHostAlloc
|
||||
// NOTE: hipHostAlloc deprecated - use hipHostMalloc instead
|
||||
{"cudaHostAlloc", {"hipHostMalloc", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// make memory functions
|
||||
@@ -74,35 +105,81 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
{"make_cudaPos", {"make_hipPos", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// Host Register Flags
|
||||
// cuMemHostGetFlags
|
||||
{"cudaHostGetFlags", {"hipHostGetFlags", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// Events
|
||||
{"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}},
|
||||
{"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}},
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuEventCreate due to different signatures
|
||||
{"cudaEventCreate", {"hipEventCreate", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventCreate
|
||||
{"cudaEventCreateWithFlags", {"hipEventCreateWithFlags", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventDestroy
|
||||
{"cudaEventDestroy", {"hipEventDestroy", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventRecord
|
||||
{"cudaEventRecord", {"hipEventRecord", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventElapsedTime
|
||||
{"cudaEventElapsedTime", {"hipEventElapsedTime", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventSynchronize
|
||||
{"cudaEventSynchronize", {"hipEventSynchronize", CONV_EVENT, API_RUNTIME}},
|
||||
// cuEventQuery
|
||||
{"cudaEventQuery", {"hipEventQuery", CONV_EVENT, API_RUNTIME}},
|
||||
|
||||
// 5.6. External Resource Interoperability
|
||||
// cuDestroyExternalMemory
|
||||
{"cudaDestroyExternalMemory", {"hipDestroyExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuDestroyExternalSemaphore
|
||||
{"cudaDestroyExternalSemaphore", {"hipDestroyExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuExternalMemoryGetMappedBuffer
|
||||
{"cudaExternalMemoryGetMappedBuffer", {"hipExternalMemoryGetMappedBuffer", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuExternalMemoryGetMappedMipmappedArray
|
||||
{"cudaExternalMemoryGetMappedMipmappedArray", {"hipExternalMemoryGetMappedMipmappedArray", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuImportExternalMemory
|
||||
{"cudaImportExternalMemory", {"hipImportExternalMemory", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuImportExternalSemaphore
|
||||
{"cudaImportExternalSemaphore", {"hipImportExternalSemaphore", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuSignalExternalSemaphoresAsync
|
||||
{"cudaSignalExternalSemaphoresAsync", {"hipSignalExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuWaitExternalSemaphoresAsync
|
||||
{"cudaWaitExternalSemaphoresAsync", {"hipWaitExternalSemaphoresAsync", CONV_EXT_RES, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
// Streams
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuStreamCreate due to different signatures
|
||||
{"cudaStreamCreate", {"hipStreamCreate", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamCreate
|
||||
{"cudaStreamCreateWithFlags", {"hipStreamCreateWithFlags", CONV_STREAM, API_RUNTIME}},
|
||||
{"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuStreamCreateWithPriority
|
||||
{"cudaStreamCreateWithPriority", {"hipStreamCreateWithPriority", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamDestroy
|
||||
{"cudaStreamDestroy", {"hipStreamDestroy", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamWaitEvent
|
||||
{"cudaStreamWaitEvent", {"hipStreamWaitEvent", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamSynchronize
|
||||
{"cudaStreamSynchronize", {"hipStreamSynchronize", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamGetFlags
|
||||
{"cudaStreamGetFlags", {"hipStreamGetFlags", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamQuery
|
||||
{"cudaStreamQuery", {"hipStreamQuery", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamAddCallback
|
||||
{"cudaStreamAddCallback", {"hipStreamAddCallback", CONV_STREAM, API_RUNTIME}},
|
||||
// cuStreamAttachMemAsync
|
||||
{"cudaStreamAttachMemAsync", {"hipStreamAttachMemAsync", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuStreamBeginCapture
|
||||
{"cudaStreamBeginCapture", {"hipStreamBeginCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuStreamEndCapture
|
||||
{"cudaStreamEndCapture", {"hipStreamEndCapture", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuStreamIsCapturing
|
||||
{"cudaStreamIsCapturing", {"hipStreamIsCapturing", CONV_STREAM, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuStreamGetPriority
|
||||
{"cudaStreamGetPriority", {"hipStreamGetPriority", CONV_STREAM, API_RUNTIME}},
|
||||
|
||||
// Other synchronization
|
||||
{"cudaDeviceSynchronize", {"hipDeviceSynchronize", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaDeviceReset", {"hipDeviceReset", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaSetDevice", {"hipSetDevice", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaGetDevice", {"hipGetDevice", CONV_DEVICE, API_RUNTIME}},
|
||||
// cuDeviceGetCount
|
||||
{"cudaGetDeviceCount", {"hipGetDeviceCount", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaChooseDevice", {"hipChooseDevice", CONV_DEVICE, API_RUNTIME}},
|
||||
|
||||
@@ -118,20 +195,25 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
{"cudaDeviceGetAttribute", {"hipDeviceGetAttribute", CONV_DEVICE, API_RUNTIME}},
|
||||
|
||||
// Pointer Attributes
|
||||
// struct cudaPointerAttributes
|
||||
{"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// no analogue
|
||||
// NOTE: Not equal to cuPointerGetAttributes due to different signatures
|
||||
{"cudaPointerGetAttributes", {"hipPointerGetAttributes", CONV_ADDRESSING, API_RUNTIME}},
|
||||
// cuMemHostGetDevicePointer
|
||||
{"cudaHostGetDevicePointer", {"hipHostGetDevicePointer", CONV_MEMORY, API_RUNTIME}},
|
||||
|
||||
// Device
|
||||
{"cudaGetDeviceProperties", {"hipGetDeviceProperties", CONV_DEVICE, API_RUNTIME}},
|
||||
// cuDeviceGetPCIBusId
|
||||
{"cudaDeviceGetPCIBusId", {"hipDeviceGetPCIBusId", CONV_DEVICE, API_RUNTIME}},
|
||||
// cuDeviceGetByPCIBusId
|
||||
{"cudaDeviceGetByPCIBusId", {"hipDeviceGetByPCIBusId", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuCtxGetStreamPriorityRange
|
||||
{"cudaDeviceGetStreamPriorityRange", {"hipDeviceGetStreamPriorityRange", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaSetValidDevices", {"hipSetValidDevices", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
// Device Flags
|
||||
{"cudaGetDeviceFlags", {"hipGetDeviceFlags", CONV_DEVICE, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
// cuCtxGetFlags
|
||||
{"cudaGetDeviceFlags", {"hipCtxGetFlags", CONV_DEVICE, API_RUNTIME}},
|
||||
{"cudaSetDeviceFlags", {"hipSetDeviceFlags", CONV_DEVICE, API_RUNTIME}},
|
||||
|
||||
// Cache config
|
||||
@@ -179,7 +261,7 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
// {"cudaThreadGetSharedMemConfig", {"hipDeviceGetSharedMemConfig", CONV_DEVICE, API_RUNTIME}},
|
||||
// {"cudaThreadSetSharedMemConfig", {"hipDeviceSetSharedMemConfig", CONV_DEVICE, API_RUNTIME}},
|
||||
|
||||
|
||||
// cuCtxGetLimit
|
||||
{"cudaDeviceGetLimit", {"hipDeviceGetLimit", CONV_DEVICE, API_RUNTIME}},
|
||||
|
||||
// Profiler
|
||||
|
||||
@@ -270,14 +270,14 @@ bool HipifyAction::cudaLaunchKernel(const clang::ast_matchers::MatchFinder::Matc
|
||||
if (numArgs > 0) {
|
||||
OS << ", ";
|
||||
// Start of the first argument.
|
||||
clang::SourceLocation argStart = launchKernel->getArg(0)->getLocStart();
|
||||
clang::SourceLocation argStart = llcompat::getBeginLoc(launchKernel->getArg(0));
|
||||
// End of the last argument.
|
||||
clang::SourceLocation argEnd = launchKernel->getArg(numArgs - 1)->getLocEnd();
|
||||
clang::SourceLocation argEnd = llcompat::getEndLoc(launchKernel->getArg(numArgs - 1));
|
||||
OS << readSourceText(*SM, {argStart, argEnd});
|
||||
}
|
||||
OS << ")";
|
||||
|
||||
clang::SourceRange replacementRange = getWriteRange(*SM, {launchKernel->getLocStart(), launchKernel->getLocEnd()});
|
||||
clang::SourceRange replacementRange = getWriteRange(*SM, {llcompat::getBeginLoc(launchKernel), llcompat::getEndLoc(launchKernel)});
|
||||
clang::SourceLocation launchStart = replacementRange.getBegin();
|
||||
clang::SourceLocation launchEnd = replacementRange.getEnd();
|
||||
size_t length = SM->getCharacterData(clang::Lexer::getLocForEndOfToken(launchEnd, 0, *SM, DefaultLangOptions)) - SM->getCharacterData(launchStart);
|
||||
@@ -320,8 +320,8 @@ bool HipifyAction::cudaSharedIncompleteArrayVar(const clang::ast_matchers::Match
|
||||
}
|
||||
|
||||
if (!typeName.empty()) {
|
||||
clang::SourceLocation slStart = sharedVar->getLocStart();
|
||||
clang::SourceLocation slEnd = sharedVar->getLocEnd();
|
||||
clang::SourceLocation slStart = llcompat::getBeginLoc(sharedVar->getTypeSourceInfo()->getTypeLoc());
|
||||
clang::SourceLocation slEnd = llcompat::getEndLoc(sharedVar->getTypeSourceInfo()->getTypeLoc());
|
||||
clang::SourceManager* SM = Result.SourceManager;
|
||||
size_t repLength = SM->getCharacterData(slEnd) - SM->getCharacterData(slStart) + 1;
|
||||
std::string varName = sharedVar->getNameAsString();
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "Statistics.h"
|
||||
|
||||
namespace ct = clang::tooling;
|
||||
using namespace llvm;
|
||||
|
||||
/**
|
||||
* A FrontendAction that hipifies CUDA programs.
|
||||
|
||||
@@ -8,11 +8,11 @@ void PrintStackTraceOnErrorSignal() {
|
||||
#if (LLVM_VERSION_MAJOR == 3) && (LLVM_VERSION_MINOR == 8)
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
#else
|
||||
llvm::sys::PrintStackTraceOnErrorSignal(clang::StringRef());
|
||||
llvm::sys::PrintStackTraceOnErrorSignal(StringRef());
|
||||
#endif
|
||||
}
|
||||
|
||||
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file) {
|
||||
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file) {
|
||||
#if LLVM_VERSION_MAJOR > 3
|
||||
// getReplacements() now returns a map from filename to Replacements - so create an entry
|
||||
// for this source file and return a reference to it.
|
||||
@@ -40,4 +40,36 @@ void EnterPreprocessorTokenStream(clang::Preprocessor& _pp, const clang::Token *
|
||||
#endif
|
||||
}
|
||||
|
||||
clang::SourceLocation getBeginLoc(const clang::Stmt* stmt) {
|
||||
#if LLVM_VERSION_MAJOR < 8
|
||||
return stmt->getLocStart();
|
||||
#else
|
||||
return stmt->getBeginLoc();
|
||||
#endif
|
||||
}
|
||||
|
||||
clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc) {
|
||||
#if LLVM_VERSION_MAJOR < 8
|
||||
return typeLoc.getLocStart();
|
||||
#else
|
||||
return typeLoc.getBeginLoc();
|
||||
#endif
|
||||
}
|
||||
|
||||
clang::SourceLocation getEndLoc(const clang::Stmt* stmt) {
|
||||
#if LLVM_VERSION_MAJOR < 8
|
||||
return stmt->getLocEnd();
|
||||
#else
|
||||
return stmt->getEndLoc();
|
||||
#endif
|
||||
}
|
||||
|
||||
clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc) {
|
||||
#if LLVM_VERSION_MAJOR < 8
|
||||
return typeLoc.getLocEnd();
|
||||
#else
|
||||
return typeLoc.getEndLoc();
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace llcompat
|
||||
|
||||
@@ -25,15 +25,23 @@ namespace llcompat {
|
||||
#define LLVM_DEBUG(X) DEBUG(X)
|
||||
#endif
|
||||
|
||||
clang::SourceLocation getBeginLoc(const clang::Stmt* stmt);
|
||||
clang::SourceLocation getBeginLoc(const clang::TypeLoc& typeLoc);
|
||||
|
||||
clang::SourceLocation getEndLoc(const clang::Stmt* stmt);
|
||||
clang::SourceLocation getEndLoc(const clang::TypeLoc& typeLoc);
|
||||
|
||||
void PrintStackTraceOnErrorSignal();
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/**
|
||||
* Get the replacement map for a given filename in a RefactoringTool.
|
||||
*
|
||||
* Older LLVM versions don't actually support multiple filenames, so everything all gets
|
||||
* smushed together. It is the caller's responsibility to cope with this.
|
||||
*/
|
||||
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, clang::StringRef file);
|
||||
ct::Replacements& getReplacements(ct::RefactoringTool& Tool, StringRef file);
|
||||
|
||||
/**
|
||||
* Add a Replacement to a Replacements.
|
||||
|
||||
@@ -33,6 +33,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
@@ -56,7 +57,9 @@ template <
|
||||
typename... Ts,
|
||||
typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>&) {
|
||||
const std::tuple<Ts...>&,
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>&,
|
||||
std::vector<std::uint8_t> kernarg) {
|
||||
return kernarg;
|
||||
}
|
||||
|
||||
@@ -65,7 +68,9 @@ template <
|
||||
typename... Ts,
|
||||
typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
std::vector<std::uint8_t> kernarg, const std::tuple<Ts...>& formals) {
|
||||
const std::tuple<Ts...>& formals,
|
||||
const std::vector<std::pair<std::size_t, std::size_t>>& size_align,
|
||||
std::vector<std::uint8_t> kernarg) {
|
||||
using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
|
||||
|
||||
static_assert(
|
||||
@@ -80,24 +85,44 @@ inline std::vector<std::uint8_t> make_kernarg(
|
||||
#endif
|
||||
|
||||
kernarg.resize(round_up_to_next_multiple_nonnegative(
|
||||
kernarg.size(), alignof(T)) + sizeof(T));
|
||||
kernarg.size(), size_align[n].second) +
|
||||
size_align[n].first);
|
||||
|
||||
new (kernarg.data() + kernarg.size() - sizeof(T)) T{std::get<n>(formals)};
|
||||
std::memcpy(
|
||||
kernarg.data() + kernarg.size() - size_align[n].first,
|
||||
&std::get<n>(formals),
|
||||
size_align[n].first);
|
||||
|
||||
return make_kernarg<n + 1>(std::move(kernarg), formals);
|
||||
return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
|
||||
}
|
||||
|
||||
template <typename... Formals, typename... Actuals>
|
||||
inline std::vector<std::uint8_t> make_kernarg(
|
||||
void (*)(Formals...), std::tuple<Actuals...> actuals) {
|
||||
void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
|
||||
static_assert(sizeof...(Formals) == sizeof...(Actuals),
|
||||
"The count of formal arguments must match the count of actuals.");
|
||||
|
||||
if (sizeof...(Formals) == 0) return {};
|
||||
|
||||
const auto it = function_names().find(
|
||||
reinterpret_cast<std::uintptr_t>(kernel));
|
||||
|
||||
if (it == function_names().cend()) {
|
||||
throw std::runtime_error{"Undefined __global__ function."};
|
||||
}
|
||||
|
||||
const auto it1 = kernargs().find(it->second);
|
||||
|
||||
if (it1 == kernargs().end()) {
|
||||
throw std::runtime_error{
|
||||
"Missing metadata for __global__ function: " + it->second};
|
||||
}
|
||||
|
||||
std::tuple<Formals...> to_formals{std::move(actuals)};
|
||||
std::vector<std::uint8_t> kernarg;
|
||||
kernarg.reserve(sizeof(to_formals));
|
||||
|
||||
return make_kernarg<0>(std::move(kernarg), to_formals);
|
||||
return make_kernarg<0>(to_formals, it1->second, std::move(kernarg));
|
||||
}
|
||||
|
||||
void hipLaunchKernelGGLImpl(std::uintptr_t function_address, const dim3& numBlocks,
|
||||
|
||||
@@ -41,8 +41,14 @@ THE SOFTWARE.
|
||||
|
||||
#define __HIP_SIZE_OF_HEAP (__HIP_NUM_PAGES * __HIP_SIZE_OF_PAGE)
|
||||
|
||||
#if __HIP__ && __HIP_DEVICE_COMPILE__
|
||||
__attribute__((weak)) __device__ char __hip_device_heap[__HIP_SIZE_OF_HEAP];
|
||||
__attribute__((weak)) __device__
|
||||
uint32_t __hip_device_page_flag[__HIP_NUM_PAGES];
|
||||
#else
|
||||
extern __device__ char __hip_device_heap[];
|
||||
extern __device__ uint32_t __hip_device_page_flag[];
|
||||
#endif
|
||||
|
||||
extern "C" inline __device__ void* __hip_malloc(size_t size) {
|
||||
char* heap = (char*)__hip_device_heap;
|
||||
|
||||
@@ -514,38 +514,41 @@ float __exp10f(float x) { return __ocml_exp10_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __expf(float x) { return __ocml_exp_f32(x); }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rd(float x, float y) { return __ocml_add_rtp_f32(x, y); }
|
||||
float __fadd_rd(float x, float y) { return __ocml_add_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rn(float x, float y) { return __ocml_add_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_ru(float x, float y) { return __ocml_add_rtn_f32(x, y); }
|
||||
float __fadd_ru(float x, float y) { return __ocml_add_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fadd_rz(float x, float y) { return __ocml_add_rtz_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rd(float x, float y) { return x / y; }
|
||||
float __fdiv_rd(float x, float y) { return __ocml_div_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rn(float x, float y) { return x / y; }
|
||||
float __fdiv_rn(float x, float y) { return __ocml_div_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_ru(float x, float y) { return x / y; }
|
||||
float __fdiv_ru(float x, float y) { return __ocml_div_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdiv_rz(float x, float y) { return x / y; }
|
||||
float __fdiv_rz(float x, float y) { return __ocml_div_rtz_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fdividef(float x, float y) { return x / y; }
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmaf_rd(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rtp_f32(x, y, z);
|
||||
return __ocml_fma_rtn_f32(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -557,7 +560,7 @@ __DEVICE__
|
||||
inline
|
||||
float __fmaf_ru(float x, float y, float z)
|
||||
{
|
||||
return __ocml_fma_rtn_f32(x, y, z);
|
||||
return __ocml_fma_rtp_f32(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -567,13 +570,13 @@ float __fmaf_rz(float x, float y, float z)
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rd(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
|
||||
float __fmul_rd(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rn(float x, float y) { return __ocml_mul_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_ru(float x, float y) { return __ocml_mul_rtn_f32(x, y); }
|
||||
float __fmul_ru(float x, float y) { return __ocml_mul_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fmul_rz(float x, float y) { return __ocml_mul_rtz_f32(x, y); }
|
||||
@@ -594,28 +597,29 @@ inline
|
||||
float __frsqrt_rn(float x) { return __llvm_amdgcn_rsq_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rd(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rd(float x) { return __ocml_sqrt_rtn_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rn(float x) { return __ocml_sqrt_rte_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_ru(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_ru(float x) { return __ocml_sqrt_rtp_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsqrt_rz(float x) { return __ocml_sqrt_f32(x); }
|
||||
float __fsqrt_rz(float x) { return __ocml_sqrt_rtz_f32(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rd(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
|
||||
float __fsub_rd(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rn(float x, float y) { return __ocml_sub_rte_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_ru(float x, float y) { return __ocml_sub_rtn_f32(x, y); }
|
||||
float __fsub_ru(float x, float y) { return __ocml_sub_rtp_f32(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
float __fsub_rz(float x, float y) { return __ocml_sub_rtz_f32(x, y); }
|
||||
#endif
|
||||
__DEVICE__
|
||||
inline
|
||||
float __log10f(float x) { return __ocml_log10_f32(x); }
|
||||
@@ -1034,39 +1038,40 @@ double yn(int n, double x)
|
||||
}
|
||||
|
||||
// BEGIN INTRINSICS
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rd(double x, double y) { return __ocml_add_rtp_f64(x, y); }
|
||||
double __dadd_rd(double x, double y) { return __ocml_add_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rn(double x, double y) { return __ocml_add_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_ru(double x, double y) { return __ocml_add_rtn_f64(x, y); }
|
||||
double __dadd_ru(double x, double y) { return __ocml_add_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dadd_rz(double x, double y) { return __ocml_add_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rd(double x, double y) { return x / y; }
|
||||
double __ddiv_rd(double x, double y) { return __ocml_div_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rn(double x, double y) { return x / y; }
|
||||
double __ddiv_rn(double x, double y) { return __ocml_div_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_ru(double x, double y) { return x / y; }
|
||||
double __ddiv_ru(double x, double y) { return __ocml_div_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __ddiv_rz(double x, double y) { return x / y; }
|
||||
double __ddiv_rz(double x, double y) { return __ocml_div_rtz_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rd(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
|
||||
double __dmul_rd(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rn(double x, double y) { return __ocml_mul_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_ru(double x, double y) { return __ocml_mul_rtn_f64(x, y); }
|
||||
double __dmul_ru(double x, double y) { return __ocml_mul_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dmul_rz(double x, double y) { return __ocml_mul_rtz_f64(x, y); }
|
||||
@@ -1084,25 +1089,25 @@ inline
|
||||
double __drcp_rz(double x) { return __llvm_amdgcn_rcp_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rd(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rd(double x) { return __ocml_sqrt_rtn_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rn(double x) { return __ocml_sqrt_rte_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_ru(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_ru(double x) { return __ocml_sqrt_rtp_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsqrt_rz(double x) { return __ocml_sqrt_f64(x); }
|
||||
double __dsqrt_rz(double x) { return __ocml_sqrt_rtz_f64(x); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rd(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
|
||||
double __dsub_rd(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rn(double x, double y) { return __ocml_sub_rte_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_ru(double x, double y) { return __ocml_sub_rtn_f64(x, y); }
|
||||
double __dsub_ru(double x, double y) { return __ocml_sub_rtp_f64(x, y); }
|
||||
__DEVICE__
|
||||
inline
|
||||
double __dsub_rz(double x, double y) { return __ocml_sub_rtz_f64(x, y); }
|
||||
@@ -1110,7 +1115,7 @@ __DEVICE__
|
||||
inline
|
||||
double __fma_rd(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtp_f64(x, y, z);
|
||||
return __ocml_fma_rtn_f64(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -1122,7 +1127,7 @@ __DEVICE__
|
||||
inline
|
||||
double __fma_ru(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtn_f64(x, y, z);
|
||||
return __ocml_fma_rtp_f64(x, y, z);
|
||||
}
|
||||
__DEVICE__
|
||||
inline
|
||||
@@ -1130,6 +1135,7 @@ double __fma_rz(double x, double y, double z)
|
||||
{
|
||||
return __ocml_fma_rtz_f64(x, y, z);
|
||||
}
|
||||
#endif
|
||||
// END INTRINSICS
|
||||
// END DOUBLE
|
||||
|
||||
|
||||
@@ -288,6 +288,30 @@ __attribute__((const))
|
||||
float __ocml_mul_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rte_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtn_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtp_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_div_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rte_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtn_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtp_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_sqrt_rtz_f32(float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
float __ocml_fma_rte_f32(float, float, float);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
@@ -572,6 +596,30 @@ __attribute__((const))
|
||||
double __ocml_mul_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rte_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtn_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtp_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_div_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rte_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtn_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtp_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_sqrt_rtz_f64(double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
double __ocml_fma_rte_f64(double, double, double);
|
||||
__device__
|
||||
__attribute__((const))
|
||||
@@ -594,4 +642,4 @@ double __llvm_amdgcn_rsq_f64(double) __asm("llvm.amdgcn.rsq.f64");
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} // extern "C"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -99,6 +99,8 @@ const std::unordered_map<std::uintptr_t, std::vector<std::pair<hsa_agent_t, Kern
|
||||
functions(bool rebuild = false);
|
||||
const std::unordered_map<std::uintptr_t, std::string>& function_names(bool rebuild = false);
|
||||
std::unordered_map<std::string, void*>& globals(bool rebuild = false);
|
||||
std::unordered_map<
|
||||
std::string, std::vector<std::pair<std::size_t, std::size_t>>>& kernargs();
|
||||
|
||||
hsa_executable_t load_executable(const std::string& file, hsa_executable_t executable,
|
||||
hsa_agent_t agent);
|
||||
|
||||
@@ -23,10 +23,6 @@ THE SOFTWARE.
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "hip/hip_runtime.h"
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#include <hc.hpp>
|
||||
#endif
|
||||
|
||||
|
||||
#define CHECK(cmd) \
|
||||
{ \
|
||||
@@ -44,7 +40,7 @@ __global__ void bit_extract_kernel(uint32_t* C_d, const uint32_t* A_d, size_t N)
|
||||
|
||||
for (size_t i = offset; i < N; i += stride) {
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
C_d[i] = hc::__bitextract_u32(A_d[i], 8, 4);
|
||||
C_d[i] = __bitextract_u32(A_d[i], 8, 4);
|
||||
#else /* defined __HIP_PLATFORM_NVCC__ or other path */
|
||||
C_d[i] = ((A_d[i] & 0xf00) >> 8);
|
||||
#endif
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
@@ -86,6 +87,7 @@ __hipRegisterFatBinary(const void* data)
|
||||
|
||||
std::string target{&desc->triple[sizeof(AMDGCN_AMDHSA_TRIPLE)],
|
||||
desc->tripleSize - sizeof(AMDGCN_AMDHSA_TRIPLE)};
|
||||
tprintf(DB_FB, "Found bundle for %s\n", target.c_str());
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
@@ -110,10 +112,35 @@ __hipRegisterFatBinary(const void* data)
|
||||
|
||||
if (module->executable.handle) {
|
||||
modules->at(deviceId) = module;
|
||||
tprintf(DB_FB, "Loaded code object for %s\n", name);
|
||||
if (HIP_DUMP_CODE_OBJECT) {
|
||||
char fname[30];
|
||||
static std::atomic<int> index;
|
||||
sprintf(fname, "__hip_dump_code_object%04d.o", index++);
|
||||
tprintf(DB_FB, "Dump code object %s\n", fname);
|
||||
std::ofstream ofs;
|
||||
ofs.open(fname, std::ios::binary);
|
||||
ofs << image;
|
||||
ofs.close();
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Failed to load code object for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hsa_agent_t agent = g_allAgents[deviceId + 1];
|
||||
|
||||
char name[64] = {};
|
||||
hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, name);
|
||||
if (!(*modules)[deviceId]) {
|
||||
fprintf(stderr, "No device code bundle for %s\n", name);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
tprintf(DB_FB, "__hipRegisterFatBinary succeeds and returns %p\n", modules);
|
||||
return modules;
|
||||
}
|
||||
@@ -132,13 +159,20 @@ extern "C" void __hipRegisterFunction(
|
||||
dim3* gridDim,
|
||||
int* wSize)
|
||||
{
|
||||
HIP_INIT_API(modules, hostFunction, deviceFunction, deviceName);
|
||||
std::vector<hipFunction_t> functions{g_deviceCnt};
|
||||
|
||||
assert(modules && modules->size() >= g_deviceCnt);
|
||||
for (int deviceId = 0; deviceId < g_deviceCnt; ++deviceId) {
|
||||
hipFunction_t function;
|
||||
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName)) {
|
||||
if (hipSuccess == hipModuleGetFunction(&function, modules->at(deviceId), deviceName) &&
|
||||
function != nullptr) {
|
||||
functions[deviceId] = function;
|
||||
}
|
||||
else {
|
||||
tprintf(DB_FB, "__hipRegisterFunction cannot find kernel %s for"
|
||||
" device %d\n", deviceName, deviceId);
|
||||
}
|
||||
}
|
||||
|
||||
g_functions.insert(std::make_pair(hostFunction, std::move(functions)));
|
||||
@@ -180,6 +214,7 @@ hipError_t hipSetupArgument(
|
||||
size_t size,
|
||||
size_t offset)
|
||||
{
|
||||
HIP_INIT_API(arg, size, offset);
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
LockedAccessor_CtxCrit_t crit(ctx->criticalData());
|
||||
auto& arguments = crit->_execStack.top()._arguments;
|
||||
@@ -194,6 +229,7 @@ hipError_t hipSetupArgument(
|
||||
|
||||
hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
{
|
||||
HIP_INIT_API(hostFunction);
|
||||
ihipExec_t exec;
|
||||
{
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
@@ -213,20 +249,28 @@ hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
deviceId = 0;
|
||||
}
|
||||
|
||||
hipError_t e = hipSuccess;
|
||||
decltype(g_functions)::iterator it;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end())
|
||||
return hipErrorUnknown;
|
||||
if ((it = g_functions.find(hostFunction)) == g_functions.end() ||
|
||||
!it->second[deviceId]) {
|
||||
e = hipErrorUnknown;
|
||||
fprintf(stderr, "hipLaunchByPtr cannot find kernel with stub address %p"
|
||||
" for device %d!\n", hostFunction, deviceId);
|
||||
abort();
|
||||
} else {
|
||||
size_t size = exec._arguments.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
|
||||
size_t size = exec._arguments.size();
|
||||
void *extra[] = {
|
||||
HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec._arguments[0],
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END
|
||||
};
|
||||
e = hipModuleLaunchKernel(it->second[deviceId],
|
||||
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
|
||||
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
|
||||
exec._sharedMem, exec._hStream, nullptr, extra);
|
||||
}
|
||||
|
||||
return hipModuleLaunchKernel(it->second[deviceId],
|
||||
exec._gridDim.x, exec._gridDim.y, exec._gridDim.z,
|
||||
exec._blockDim.x, exec._blockDim.y, exec._blockDim.z,
|
||||
exec._sharedMem, exec._hStream, nullptr, extra);
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
@@ -97,6 +97,8 @@ int HIP_INIT_ALLOC = -1;
|
||||
int HIP_SYNC_STREAM_WAIT = 0;
|
||||
int HIP_FORCE_NULL_STREAM = 0;
|
||||
|
||||
int HIP_DUMP_CODE_OBJECT = 0;
|
||||
|
||||
|
||||
#if (__hcc_workweek__ >= 17300)
|
||||
// Make sure we have required bug fix in HCC
|
||||
@@ -1294,6 +1296,10 @@ void HipReadEnv() {
|
||||
"overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag "
|
||||
"when creating the event.");
|
||||
|
||||
READ_ENV_I(release, HIP_DUMP_CODE_OBJECT, 0,
|
||||
"If set, dump code object as __hip_dump_code_object[nnnn].o in the current directory,"
|
||||
"where nnnn is the index number.");
|
||||
|
||||
// Some flags have both compile-time and runtime flags - generate a warning if user enables the
|
||||
// runtime flag but the compile-time flag is disabled.
|
||||
if (HIP_DB && !COMPILE_HIP_DB) {
|
||||
|
||||
@@ -83,11 +83,11 @@ extern int HIP_SYNC_NULL_STREAM;
|
||||
extern int HIP_INIT_ALLOC;
|
||||
extern int HIP_FORCE_NULL_STREAM;
|
||||
|
||||
extern int HIP_DUMP_CODE_OBJECT;
|
||||
|
||||
// TODO - remove when this is standard behavior.
|
||||
extern int HCC_OPT_FLUSH;
|
||||
|
||||
|
||||
// Class to assign a short TID to each new thread, for HIP debugging purposes.
|
||||
class TidInfo {
|
||||
public:
|
||||
|
||||
@@ -985,10 +985,9 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDefault ||
|
||||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
|
||||
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
|
||||
// acc.memcpy_symbol(dst, (void*)src, count+offset);
|
||||
stream->locked_copySync((char*)dst+offset, (void*)src, count, kind, false);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1018,9 +1017,9 @@ hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count,
|
||||
|
||||
hipStream_t stream = ihipSyncAndResolveStream(hipStreamNull);
|
||||
|
||||
if (kind == hipMemcpyHostToDevice || kind == hipMemcpyDeviceToHost ||
|
||||
if (kind == hipMemcpyDefault || kind == hipMemcpyDeviceToHost ||
|
||||
kind == hipMemcpyDeviceToDevice || kind == hipMemcpyHostToHost) {
|
||||
stream->lockedSymbolCopySync(acc, dst, (void*)src, count, offset, kind);
|
||||
stream->locked_copySync((void*)dst, (char*)src+offset, count, kind, false);
|
||||
} else {
|
||||
return ihipLogStatus(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -1052,7 +1051,7 @@ hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_
|
||||
|
||||
if (stream) {
|
||||
try {
|
||||
stream->lockedSymbolCopyAsync(acc, dst, (void*)src, count, offset, kind);
|
||||
hip_internal::memcpyAsync((char*)dst+offset, src, count, kind, stream);
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
@@ -1088,7 +1087,7 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
|
||||
stream = ihipSyncAndResolveStream(stream);
|
||||
if (stream) {
|
||||
try {
|
||||
stream->lockedSymbolCopyAsync(acc, dst, src, count, offset, kind);
|
||||
hip_internal::memcpyAsync(dst, (char*)src+offset, count, kind, stream);
|
||||
} catch (ihipException& ex) {
|
||||
e = ex._code;
|
||||
}
|
||||
|
||||
@@ -258,20 +258,29 @@ struct Agent_global {
|
||||
uint32_t byte_cnt;
|
||||
};
|
||||
|
||||
inline void track(const Agent_global& x) {
|
||||
inline void track(const Agent_global& x, hsa_agent_t agent) {
|
||||
tprintf(DB_MEM, " add variable '%s' with ptr=%p size=%u to tracker\n", x.name.c_str(),
|
||||
x.address, x.byte_cnt);
|
||||
|
||||
auto device = ihipGetTlsDefaultCtx()->getWriteableDevice();
|
||||
|
||||
int deviceIndex =0;
|
||||
for ( deviceIndex = 0; deviceIndex < g_deviceCnt; deviceIndex++) {
|
||||
if(g_allAgents[deviceIndex] == agent)
|
||||
break;
|
||||
}
|
||||
auto device = ihipGetDevice(deviceIndex - 1);
|
||||
hc::AmPointerInfo ptr_info(nullptr, x.address, x.address, x.byte_cnt, device->_acc, true,
|
||||
false);
|
||||
hc::am_memtracker_add(x.address, ptr_info);
|
||||
#if USE_APP_PTR_FOR_CTX
|
||||
hc::am_memtracker_update(x.address, device->_deviceId, 0u, ihipGetTlsDefaultCtx());
|
||||
#else
|
||||
hc::am_memtracker_update(x.address, device->_deviceId, 0u);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template <typename Container = vector<Agent_global>>
|
||||
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
|
||||
inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t agent,
|
||||
hsa_executable_symbol_t x, void* out) {
|
||||
assert(out);
|
||||
|
||||
@@ -281,7 +290,7 @@ inline hsa_status_t copy_agent_global_variables(hsa_executable_t, hsa_agent_t,
|
||||
if (t == HSA_SYMBOL_KIND_VARIABLE) {
|
||||
static_cast<Container*>(out)->push_back(Agent_global{name(x), address(x), size(x)});
|
||||
|
||||
track(static_cast<Container*>(out)->back());
|
||||
track(static_cast<Container*>(out)->back(),agent);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -342,7 +351,7 @@ hipError_t read_agent_global_from_module(hipDeviceptr_t* dptr, size_t* bytes, hi
|
||||
|
||||
tie(*dptr, *bytes) = read_global_description(it0->second.cbegin(), it0->second.cend(), name);
|
||||
|
||||
return dptr ? hipSuccess : hipErrorNotFound;
|
||||
return *dptr ? hipSuccess : hipErrorNotFound;
|
||||
}
|
||||
|
||||
hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, const char* name) {
|
||||
@@ -367,7 +376,7 @@ hipError_t read_agent_global_from_process(hipDeviceptr_t* dptr, size_t* bytes, c
|
||||
|
||||
tie(*dptr, *bytes) = read_global_description(it->second.cbegin(), it->second.cend(), name);
|
||||
|
||||
return dptr ? hipSuccess : hipErrorNotFound;
|
||||
return *dptr ? hipSuccess : hipErrorNotFound;
|
||||
}
|
||||
|
||||
hsa_executable_symbol_t find_kernel_by_name(hsa_executable_t executable, const char* kname) {
|
||||
|
||||
@@ -312,8 +312,8 @@ const unordered_map<string, vector<hsa_executable_symbol_t>>& kernels(bool rebui
|
||||
|
||||
void load_code_object_and_freeze_executable(
|
||||
const string& file, hsa_agent_t agent,
|
||||
hsa_executable_t
|
||||
executable) { // TODO: the following sequence is inefficient, should be refactored
|
||||
hsa_executable_t executable) {
|
||||
// TODO: the following sequence is inefficient, should be refactored
|
||||
// into a single load of the file and subsequent ELFIO
|
||||
// processing.
|
||||
static const auto cor_deleter = [](hsa_code_object_reader_t* p) {
|
||||
@@ -340,6 +340,90 @@ void load_code_object_and_freeze_executable(
|
||||
code_readers.push_back(move(tmp));
|
||||
}
|
||||
}
|
||||
|
||||
size_t parse_args(
|
||||
const string& metadata,
|
||||
size_t f,
|
||||
size_t l,
|
||||
vector<pair<size_t, size_t>>& size_align) {
|
||||
if (f == l) return f;
|
||||
if (!size_align.empty()) return l;
|
||||
|
||||
do {
|
||||
static constexpr size_t size_sz{5};
|
||||
f = metadata.find("Size:", f) + size_sz;
|
||||
|
||||
if (l <= f) return f;
|
||||
|
||||
auto size = strtoul(&metadata[f], nullptr, 10);
|
||||
|
||||
static constexpr size_t align_sz{6};
|
||||
f = metadata.find("Align:", f) + align_sz;
|
||||
|
||||
char* l{};
|
||||
auto align = strtoul(&metadata[f], &l, 10);
|
||||
|
||||
f += (l - &metadata[f]) + 1;
|
||||
|
||||
size_align.emplace_back(size, align);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
void read_kernarg_metadata(
|
||||
elfio& reader,
|
||||
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs)
|
||||
{ // TODO: this is inefficient.
|
||||
auto it = find_section_if(
|
||||
reader, [](const section* x) { return x->get_type() == SHT_NOTE; });
|
||||
|
||||
if (!it) return;
|
||||
|
||||
const note_section_accessor acc{reader, it};
|
||||
for (decltype(acc.get_notes_num()) i = 0; i != acc.get_notes_num(); ++i) {
|
||||
ELFIO::Elf_Word type{};
|
||||
string name{};
|
||||
void* desc{};
|
||||
Elf_Word desc_size{};
|
||||
|
||||
acc.get_note(i, type, name, desc, desc_size);
|
||||
|
||||
if (name != "AMD") continue; // TODO: switch to using NT_AMD_AMDGPU_HSA_METADATA.
|
||||
|
||||
string tmp{
|
||||
static_cast<char*>(desc), static_cast<char*>(desc) + desc_size};
|
||||
|
||||
auto dx = tmp.find("Kernels:");
|
||||
|
||||
if (dx == string::npos) continue;
|
||||
|
||||
static constexpr decltype(tmp.size()) kernels_sz{8};
|
||||
dx += kernels_sz;
|
||||
|
||||
do {
|
||||
dx = tmp.find("Name:", dx);
|
||||
|
||||
if (dx == string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) name_sz{5};
|
||||
dx = tmp.find_first_not_of(" '", dx + name_sz);
|
||||
|
||||
auto fn = tmp.substr(dx, tmp.find_first_of("'\n", dx) - dx);
|
||||
dx += fn.size();
|
||||
|
||||
auto dx1 = tmp.find("CodeProps", dx);
|
||||
dx = tmp.find("Args:", dx);
|
||||
|
||||
if (dx1 < dx) {
|
||||
dx = dx1;
|
||||
continue;
|
||||
}
|
||||
if (dx == string::npos) break;
|
||||
|
||||
static constexpr decltype(tmp.size()) args_sz{5};
|
||||
dx = parse_args(tmp, dx + args_sz, dx1, kernargs[fn]);
|
||||
} while (true);
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace hip_impl {
|
||||
@@ -501,6 +585,25 @@ unordered_map<string, void*>& globals(bool rebuild) {
|
||||
return r;
|
||||
}
|
||||
|
||||
unordered_map<string, vector<pair<size_t, size_t>>>& kernargs() {
|
||||
static unordered_map<string, vector<pair<size_t, size_t>>> r;
|
||||
static once_flag f;
|
||||
|
||||
call_once(f, []() {
|
||||
for (auto&& blob : code_object_blobs()) {
|
||||
stringstream tmp{std::string{
|
||||
blob.second.front().cbegin(), blob.second.front().cend()}};
|
||||
|
||||
elfio reader;
|
||||
if (!reader.load(tmp)) continue;
|
||||
|
||||
read_kernarg_metadata(reader, r);
|
||||
}
|
||||
});
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
hsa_executable_t load_executable(const string& file, hsa_executable_t executable,
|
||||
hsa_agent_t agent) {
|
||||
elfio reader;
|
||||
|
||||
@@ -34,6 +34,7 @@ THE SOFTWARE.
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
__device__ void double_precision_intrinsics() {
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__dadd_rd(0.0, 1.0);
|
||||
__dadd_rn(0.0, 1.0);
|
||||
__dadd_ru(0.0, 1.0);
|
||||
@@ -62,6 +63,7 @@ __device__ void double_precision_intrinsics() {
|
||||
__fma_rn(1.0, 2.0, 3.0);
|
||||
__fma_ru(1.0, 2.0, 3.0);
|
||||
__fma_rz(1.0, 2.0, 3.0);
|
||||
#endif
|
||||
}
|
||||
|
||||
__global__ void compileDoublePrecisionIntrinsics(int ignored) {
|
||||
|
||||
@@ -38,11 +38,13 @@ __global__ void floatMath(float* In, float* Out) {
|
||||
Out[tid] = __cosf(In[tid]);
|
||||
Out[tid] = __exp10f(Out[tid]);
|
||||
Out[tid] = __expf(Out[tid]);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
Out[tid] = __frsqrt_rn(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
//Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
//Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
Out[tid] = __fsqrt_rd(Out[tid]);
|
||||
Out[tid] = __fsqrt_rn(Out[tid]);
|
||||
Out[tid] = __fsqrt_ru(Out[tid]);
|
||||
Out[tid] = __fsqrt_rz(Out[tid]);
|
||||
#endif
|
||||
Out[tid] = __log10f(Out[tid]);
|
||||
Out[tid] = __log2f(Out[tid]);
|
||||
Out[tid] = __logf(Out[tid]);
|
||||
|
||||
@@ -39,6 +39,7 @@ __device__ void single_precision_intrinsics() {
|
||||
__cosf(0.0f);
|
||||
__exp10f(0.0f);
|
||||
__expf(0.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fadd_rd(0.0f, 1.0f);
|
||||
__fadd_rn(0.0f, 1.0f);
|
||||
__fadd_ru(0.0f, 1.0f);
|
||||
@@ -47,7 +48,9 @@ __device__ void single_precision_intrinsics() {
|
||||
__fdiv_rn(4.0f, 2.0f);
|
||||
__fdiv_ru(4.0f, 2.0f);
|
||||
__fdiv_rz(4.0f, 2.0f);
|
||||
#endif
|
||||
__fdividef(4.0f, 2.0f);
|
||||
#if defined OCML_BASIC_ROUNDED_OPERATIONS
|
||||
__fmaf_rd(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_rn(1.0f, 2.0f, 3.0f);
|
||||
__fmaf_ru(1.0f, 2.0f, 3.0f);
|
||||
@@ -69,6 +72,7 @@ __device__ void single_precision_intrinsics() {
|
||||
__fsub_rn(2.0f, 1.0f);
|
||||
__fsub_ru(2.0f, 1.0f);
|
||||
__fsub_rz(2.0f, 1.0f);
|
||||
#endif
|
||||
__log10f(1.0f);
|
||||
__log2f(1.0f);
|
||||
__logf(1.0f);
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user