Merge pull request #748 from mkuron/getsymboladdress

Implement hipGetSymbolAddress and hipGetSymbolSize

[ROCm/clr commit: 40d3184dd1]
This commit is contained in:
Maneesh Gupta
2018-11-21 10:32:01 +05:30
zatwierdzone przez GitHub
9 zmienionych plików z 93 dodań i 11 usunięć
+3
Wyświetl plik
@@ -287,6 +287,9 @@ while (@ARGV) {
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
$ft{'mem'} += s/\bcudaMemcpyToArray\b/hipMemcpyToArray/g;
$ft{'mem'} += s/\bcudaGetSymbolAddress\b/hipGetSymbolAddress/g;
$ft{'mem'} += s/\bcudaGetSymbolSize\b/hipGetSymbolSize/g;
#--------
# Memory management:
$ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g;
@@ -132,8 +132,8 @@
| `cudaFreeHost` | `hipHostFree` |
| `cudaFreeMipmappedArray` | |
| `cudaGetMipmappedArrayLevel` | |
| `cudaGetSymbolAddress` | |
| `cudaGetSymbolSize` | |
| `cudaGetSymbolAddress` | `hipGetSymbolAddress` |
| `cudaGetSymbolSize` | `hipGetSymbolSize` |
| `cudaHostAlloc` | `hipHostMalloc` |
| `cudaHostGetDevicePointer` | `hipHostGetDevicePointer` |
| `cudaHostGetFlags` | `hipHostGetFlags` |
@@ -383,8 +383,8 @@
| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
| `cudaFuncGetAttributes` | |
| `cudaFuncSetCacheConfig` | |
| `cudaGetSymbolAddress` | |
| `cudaGetSymbolSize` | |
| `cudaGetSymbolAddress` | `hipGetSymbolAddress` |
| `cudaGetSymbolSize` | `hipGetSymbolSize` |
| `cudaGetTextureAlignmentOffset` | |
| `cudaLaunch` | |
| `cudaLaunchKernel` | |
@@ -159,7 +159,7 @@ void callMyKernel()
## Variable-Type Qualifiers
### `__constant__`
The `__constant__` keyword is supported. The host writes constant memory before launching the kernel; from the GPU, this memory is read-only during kernel execution. The functions for accessing constant memory (hipGetSymbolAddress(), hipGetSymbolSize(), hipMemcpyToSymbol(), hipMemcpyToSymbolAsync, hipMemcpyFromSymbol, hipMemcpyFromSymbolAsync) are under development.
The `__constant__` keyword is supported. The host writes constant memory before launching the kernel; from the GPU, this memory is read-only during kernel execution. The functions for accessing constant memory (hipGetSymbolAddress(), hipGetSymbolSize(), hipMemcpyToSymbol(), hipMemcpyToSymbolAsync(), hipMemcpyFromSymbol(), hipMemcpyFromSymbolAsync()) are available.
### `__shared__`
The `__shared__` keyword is supported.
@@ -69,8 +69,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
// NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures
{"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME}},
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME}},
// TODO: double check cuMemPrefetchAsync
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
@@ -1385,6 +1385,32 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t siz
size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice));
/**
* @brief Copies the memory address of symbol @p symbolName to @p devPtr
*
* @param[in] symbolName - Symbol on device
* @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
*
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName);
/**
* @brief Copies the size of symbol @p symbolName to @p size
*
* @param[in] symbolName - Symbol on device
* @param[out] size - Pointer to the size of the symbol
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
*
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
* hipMemcpyFromSymbolAsync
*/
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName);
/**
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
* pointed to by @p offset bytes from the start of symbol @p symbol
@@ -2334,6 +2360,9 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func);
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
const char* name);
hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
const char* name);
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name);
/**
* @brief builds module from code object which resides in host memory. Image is pointer to that
@@ -551,6 +551,14 @@ inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolN
dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
}
inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
}
inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
}
inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
size_t width, size_t height, hipMemcpyKind kind) {
return hipCUDAErrorTohipError(
+18 -1
Wyświetl plik
@@ -961,7 +961,7 @@ inline hipDeviceptr_t agent_address_for_symbol(const char* symbolName) {
#if __hcc_workweek__ >= 17481
size_t byte_cnt = 0u;
hipModuleGetGlobal(&r, &byte_cnt, 0, symbolName);
ihipModuleGetGlobal(&r, &byte_cnt, 0, symbolName);
#else
auto ctx = ihipGetTlsDefaultCtx();
auto acc = ctx->getDevice()->_acc;
@@ -1106,6 +1106,23 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
return ihipLogStatus(e);
}
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
HIP_INIT_API(devPtr, symbolName);
size_t size = 0;
return ihipModuleGetGlobal(devPtr, &size, 0, static_cast<const char*>(symbolName));
}
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
HIP_INIT_API(size, symbolName);
void* devPtr = nullptr;
return ihipModuleGetGlobal(&devPtr, size, 0, static_cast<const char*>(symbolName));
}
//---
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind);
+8 -3
Wyświetl plik
@@ -472,14 +472,19 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
const char* name) {
HIP_INIT_API(dptr, bytes, hmod, name);
if (!dptr || !bytes) return ihipLogStatus(hipErrorInvalidValue);
return ihipLogStatus(ihipModuleGetGlobal(dptr, bytes, hmod, name));
}
if (!name) return ihipLogStatus(hipErrorNotInitialized);
hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
const char* name) {
if (!dptr || !bytes) return hipErrorInvalidValue;
if (!name) return hipErrorNotInitialized;
const auto r = hmod ? read_agent_global_from_module(dptr, bytes, hmod, name)
: read_agent_global_from_process(dptr, bytes, name);
return ihipLogStatus(r);
return r;
}
namespace
@@ -40,6 +40,12 @@ __global__ void Assign(int* Out) {
globalOut[tid] = globalIn[tid];
}
__device__ __constant__ int globalConst[NUM];
__global__ void checkAddress(int* addr, bool* out) {
*out = (globalConst == addr);
}
int main() {
int *A, *Am, *B, *Ad, *C, *Cm;
A = new int[NUM];
@@ -101,6 +107,20 @@ int main() {
assert(A[i] == B[i]);
assert(A[i] == C[i]);
}
bool *checkOkD;
bool checkOk = false;
size_t symbolSize = 0;
int *symbolAddress;
hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst));
hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst));
hipMalloc((void**)&checkOkD, sizeof(bool));
hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD);
hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost);
hipFree(checkOkD);
assert(checkOk);
assert(symbolSize == SIZE);
hipHostFree(Am);
hipHostFree(Cm);
hipFree(Ad);