Merge pull request #748 from mkuron/getsymboladdress
Implement hipGetSymbolAddress and hipGetSymbolSize
[ROCm/clr commit: 40d3184dd1]
This commit is contained in:
@@ -287,6 +287,9 @@ while (@ARGV) {
|
||||
$ft{'mem'} += s/\bcudaMemcpy2DToArray\b/hipMemcpy2DToArray/g;
|
||||
$ft{'mem'} += s/\bcudaMemcpyToArray\b/hipMemcpyToArray/g;
|
||||
|
||||
$ft{'mem'} += s/\bcudaGetSymbolAddress\b/hipGetSymbolAddress/g;
|
||||
$ft{'mem'} += s/\bcudaGetSymbolSize\b/hipGetSymbolSize/g;
|
||||
|
||||
#--------
|
||||
# Memory management:
|
||||
$ft{'mem'} += s/\bcudaMalloc\b/hipMalloc/g;
|
||||
|
||||
+4
-4
@@ -132,8 +132,8 @@
|
||||
| `cudaFreeHost` | `hipHostFree` |
|
||||
| `cudaFreeMipmappedArray` | |
|
||||
| `cudaGetMipmappedArrayLevel` | |
|
||||
| `cudaGetSymbolAddress` | |
|
||||
| `cudaGetSymbolSize` | |
|
||||
| `cudaGetSymbolAddress` | `hipGetSymbolAddress` |
|
||||
| `cudaGetSymbolSize` | `hipGetSymbolSize` |
|
||||
| `cudaHostAlloc` | `hipHostMalloc` |
|
||||
| `cudaHostGetDevicePointer` | `hipHostGetDevicePointer` |
|
||||
| `cudaHostGetFlags` | `hipHostGetFlags` |
|
||||
@@ -383,8 +383,8 @@
|
||||
| `cudaCreateChannelDesc` | `hipCreateChannelDesc` |
|
||||
| `cudaFuncGetAttributes` | |
|
||||
| `cudaFuncSetCacheConfig` | |
|
||||
| `cudaGetSymbolAddress` | |
|
||||
| `cudaGetSymbolSize` | |
|
||||
| `cudaGetSymbolAddress` | `hipGetSymbolAddress` |
|
||||
| `cudaGetSymbolSize` | `hipGetSymbolSize` |
|
||||
| `cudaGetTextureAlignmentOffset` | |
|
||||
| `cudaLaunch` | |
|
||||
| `cudaLaunchKernel` | |
|
||||
|
||||
@@ -159,7 +159,7 @@ void callMyKernel()
|
||||
## Variable-Type Qualifiers
|
||||
|
||||
### `__constant__`
|
||||
The `__constant__` keyword is supported. The host writes constant memory before launching the kernel; from the GPU, this memory is read-only during kernel execution. The functions for accessing constant memory (hipGetSymbolAddress(), hipGetSymbolSize(), hipMemcpyToSymbol(), hipMemcpyToSymbolAsync, hipMemcpyFromSymbol, hipMemcpyFromSymbolAsync) are under development.
|
||||
The `__constant__` keyword is supported. The host writes constant memory before launching the kernel; from the GPU, this memory is read-only during kernel execution. The functions for accessing constant memory (hipGetSymbolAddress(), hipGetSymbolSize(), hipMemcpyToSymbol(), hipMemcpyToSymbolAsync(), hipMemcpyFromSymbol(), hipMemcpyFromSymbolAsync()) are available.
|
||||
|
||||
### `__shared__`
|
||||
The `__shared__` keyword is supported.
|
||||
|
||||
@@ -69,8 +69,8 @@ const std::map<llvm::StringRef, hipCounter> CUDA_RUNTIME_FUNCTION_MAP{
|
||||
// NOTE: Not equal to cuMipmappedArrayDestroy due to different signatures
|
||||
{"cudaFreeMipmappedArray", {"hipFreeMipmappedArray", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetMipmappedArrayLevel", {"hipGetMipmappedArrayLevel", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
{"cudaGetSymbolAddress", {"hipGetSymbolAddress", CONV_MEMORY, API_RUNTIME}},
|
||||
{"cudaGetSymbolSize", {"hipGetSymbolSize", CONV_MEMORY, API_RUNTIME}},
|
||||
// TODO: double check cuMemPrefetchAsync
|
||||
{"cudaMemPrefetchAsync", {"hipMemPrefetchAsync", CONV_MEMORY, API_RUNTIME, HIP_UNSUPPORTED}},
|
||||
|
||||
|
||||
@@ -1385,6 +1385,32 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t siz
|
||||
size_t offset __dparm(0), hipMemcpyKind kind __dparm(hipMemcpyHostToDevice));
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copies the memory address of symbol @p symbolName to @p devPtr
|
||||
*
|
||||
* @param[in] symbolName - Symbol on device
|
||||
* @param[out] devPtr - Pointer to a pointer to the memory referred to by the symbol
|
||||
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
|
||||
*
|
||||
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
|
||||
* hipMemcpyFromSymbolAsync
|
||||
*/
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copies the size of symbol @p symbolName to @p size
|
||||
*
|
||||
* @param[in] symbolName - Symbol on device
|
||||
* @param[out] size - Pointer to the size of the symbol
|
||||
* @return #hipSuccess, #hipErrorNotInitialized, #hipErrorNotFound
|
||||
*
|
||||
* @see hipGetSymbolSize, hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync,
|
||||
* hipMemcpyFromSymbolAsync
|
||||
*/
|
||||
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName);
|
||||
|
||||
|
||||
/**
|
||||
* @brief Copies @p sizeBytes bytes from the memory area pointed to by @p src to the memory area
|
||||
* pointed to by @p offset bytes from the start of symbol @p symbol
|
||||
@@ -2334,6 +2360,9 @@ hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func);
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
||||
const char* name);
|
||||
|
||||
hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
||||
const char* name);
|
||||
|
||||
hipError_t hipModuleGetTexRef(textureReference** texRef, hipModule_t hmod, const char* name);
|
||||
/**
|
||||
* @brief builds module from code object which resides in host memory. Image is pointer to that
|
||||
|
||||
@@ -551,6 +551,14 @@ inline static hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolN
|
||||
dst, symbolName, sizeBytes, offset, hipMemcpyKindToCudaMemcpyKind(kind), stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
|
||||
return hipCUDAErrorTohipError(cudaGetSymbolAddress(devPtr, symbolName));
|
||||
}
|
||||
|
||||
inline static hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
|
||||
return hipCUDAErrorTohipError(cudaGetSymbolSize(size, symbolName));
|
||||
}
|
||||
|
||||
inline static hipError_t hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch,
|
||||
size_t width, size_t height, hipMemcpyKind kind) {
|
||||
return hipCUDAErrorTohipError(
|
||||
|
||||
@@ -961,7 +961,7 @@ inline hipDeviceptr_t agent_address_for_symbol(const char* symbolName) {
|
||||
|
||||
#if __hcc_workweek__ >= 17481
|
||||
size_t byte_cnt = 0u;
|
||||
hipModuleGetGlobal(&r, &byte_cnt, 0, symbolName);
|
||||
ihipModuleGetGlobal(&r, &byte_cnt, 0, symbolName);
|
||||
#else
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
auto acc = ctx->getDevice()->_acc;
|
||||
@@ -1106,6 +1106,23 @@ hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t co
|
||||
return ihipLogStatus(e);
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
|
||||
HIP_INIT_API(devPtr, symbolName);
|
||||
|
||||
size_t size = 0;
|
||||
return ihipModuleGetGlobal(devPtr, &size, 0, static_cast<const char*>(symbolName));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipGetSymbolSize(size_t* size, const void* symbolName) {
|
||||
HIP_INIT_API(size, symbolName);
|
||||
|
||||
void* devPtr = nullptr;
|
||||
return ihipModuleGetGlobal(&devPtr, size, 0, static_cast<const char*>(symbolName));
|
||||
}
|
||||
|
||||
|
||||
//---
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
|
||||
HIP_INIT_SPECIAL_API((TRACE_MCMD), dst, src, sizeBytes, kind);
|
||||
|
||||
@@ -472,14 +472,19 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
|
||||
const char* name) {
|
||||
HIP_INIT_API(dptr, bytes, hmod, name);
|
||||
|
||||
if (!dptr || !bytes) return ihipLogStatus(hipErrorInvalidValue);
|
||||
return ihipLogStatus(ihipModuleGetGlobal(dptr, bytes, hmod, name));
|
||||
}
|
||||
|
||||
if (!name) return ihipLogStatus(hipErrorNotInitialized);
|
||||
hipError_t ihipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
||||
const char* name) {
|
||||
if (!dptr || !bytes) return hipErrorInvalidValue;
|
||||
|
||||
if (!name) return hipErrorNotInitialized;
|
||||
|
||||
const auto r = hmod ? read_agent_global_from_module(dptr, bytes, hmod, name)
|
||||
: read_agent_global_from_process(dptr, bytes, name);
|
||||
|
||||
return ihipLogStatus(r);
|
||||
return r;
|
||||
}
|
||||
|
||||
namespace
|
||||
|
||||
@@ -40,6 +40,12 @@ __global__ void Assign(int* Out) {
|
||||
globalOut[tid] = globalIn[tid];
|
||||
}
|
||||
|
||||
__device__ __constant__ int globalConst[NUM];
|
||||
|
||||
__global__ void checkAddress(int* addr, bool* out) {
|
||||
*out = (globalConst == addr);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int *A, *Am, *B, *Ad, *C, *Cm;
|
||||
A = new int[NUM];
|
||||
@@ -101,6 +107,20 @@ int main() {
|
||||
assert(A[i] == B[i]);
|
||||
assert(A[i] == C[i]);
|
||||
}
|
||||
|
||||
bool *checkOkD;
|
||||
bool checkOk = false;
|
||||
size_t symbolSize = 0;
|
||||
int *symbolAddress;
|
||||
hipGetSymbolSize(&symbolSize, HIP_SYMBOL(globalConst));
|
||||
hipGetSymbolAddress((void**) &symbolAddress, HIP_SYMBOL(globalConst));
|
||||
hipMalloc((void**)&checkOkD, sizeof(bool));
|
||||
hipLaunchKernelGGL(checkAddress, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0, symbolAddress, checkOkD);
|
||||
hipMemcpy(&checkOk, checkOkD, sizeof(bool), hipMemcpyDeviceToHost);
|
||||
hipFree(checkOkD);
|
||||
assert(checkOk);
|
||||
assert(symbolSize == SIZE);
|
||||
|
||||
hipHostFree(Am);
|
||||
hipHostFree(Cm);
|
||||
hipFree(Ad);
|
||||
|
||||
Reference in New Issue
Block a user