diff --git a/hipamd/api/hip/hip_hcc.def.in b/hipamd/api/hip/hip_hcc.def.in index f0b4560fd6..01e58aef73 100644 --- a/hipamd/api/hip/hip_hcc.def.in +++ b/hipamd/api/hip/hip_hcc.def.in @@ -92,6 +92,8 @@ hipMemcpyFromArray hipMemcpyToSymbol hipMemcpyToSymbolAsync hipMemGetAddressRange +hipGetSymbolAddress +hipGetSymbolSize hipMemGetInfo hipMemPtrGetInfo hipMemset diff --git a/hipamd/api/hip/hip_hcc.map.in b/hipamd/api/hip/hip_hcc.map.in index b6d16035fa..64d55460a9 100644 --- a/hipamd/api/hip/hip_hcc.map.in +++ b/hipamd/api/hip/hip_hcc.map.in @@ -93,6 +93,8 @@ global: hipMemcpyToSymbol; hipMemcpyToSymbolAsync; hipMemGetAddressRange; + hipGetSymbolAddress; + hipGetSymbolSize; hipMemGetInfo; hipMemPtrGetInfo; hipMemset; diff --git a/hipamd/api/hip/hip_internal.hpp b/hipamd/api/hip/hip_internal.hpp index ed32d90476..c5ba11b68a 100644 --- a/hipamd/api/hip/hip_internal.hpp +++ b/hipamd/api/hip/hip_internal.hpp @@ -85,6 +85,59 @@ namespace hip { static Function* asFunction(hipFunction_t f) { return reinterpret_cast(f); } }; }; + +struct ihipExec_t { + dim3 gridDim_; + dim3 blockDim_; + size_t sharedMem_; + hipStream_t hStream_; + std::vector arguments_; +}; + +class PlatformState { + amd::Monitor lock_; + +public: + struct RegisteredVar { + public: + RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr) {} + RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr); + ~RegisteredVar() {} + + hipDeviceptr_t getdeviceptr() const { return devicePtr_; }; + size_t getvarsize() const { return size_; }; + + private: + char* hostVar_; // Variable name in host code + size_t size_; // Size of the variable + hipDeviceptr_t devicePtr_; //Device Memory Address of the variable. + }; + +private: + std::unordered_map > functions_; + std::unordered_map > vars_; + + static PlatformState* platform_; + + PlatformState() : lock_("Guards global function map") {} + ~PlatformState() {} +public: + static PlatformState& instance() { + return *platform_; + } + + void registerVar(const char* hostvar, const std::vector& rvar); + void registerFunction(const void* hostFunction, const std::vector& funcs); + + hipFunction_t getFunc(const void* hostFunction, int deviceId); + bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, + size_t* size_ptr); + void setupArgument(const void *arg, size_t size, size_t offset); + void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); + + void popExec(ihipExec_t& exec); +}; + extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); extern int ihipGetDevice(); diff --git a/hipamd/api/hip/hip_memory.cpp b/hipamd/api/hip/hip_memory.cpp index 2a73d98378..f45b4944d3 100644 --- a/hipamd/api/hip/hip_memory.cpp +++ b/hipamd/api/hip/hip_memory.cpp @@ -557,36 +557,96 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, src, count, offset, kind); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(device_ptr, src, count, kind)); } hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind) { HIP_INIT_API(symbolName, dst, count, offset, kind); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind)); } hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, src, count, offset, kind, stream); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream)); } hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count, size_t offset, hipMemcpyKind kind, hipStream_t stream) { HIP_INIT_API(symbolName, dst, count, offset, kind, stream); - assert(0 && "Unimplemented"); + size_t sym_size = 0; + hipDeviceptr_t device_ptr = nullptr; - HIP_RETURN(hipErrorUnknown); + /* Get address and size for the global symbol */ + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr, + &sym_size)) { + HIP_RETURN(hipErrorUnknown); + } + + /* Size Check to make sure offset is correct */ + if ((offset + count) != sym_size) { + return HIP_RETURN(hipErrorUnknown); + } + + device_ptr = reinterpret_cast
(device_ptr) + offset; + + /* Copy memory from source to destination address */ + HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream)); } hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) { diff --git a/hipamd/api/hip/hip_module.cpp b/hipamd/api/hip/hip_module.cpp index 43ac97d2ac..62f676dfe0 100644 --- a/hipamd/api/hip/hip_module.cpp +++ b/hipamd/api/hip/hip_module.cpp @@ -137,6 +137,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h { HIP_INIT_API(dptr, bytes, hmod, name); + amd::Program* program = nullptr; + const device::Program* dev_program = nullptr; + + /* Get Device Program pointer*/ + program = as_amd(reinterpret_cast(hmod)); + dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]); + + if (dev_program == nullptr) { + HIP_RETURN(hipErrorUnknown); + } + + /* Find the global Symbols */ + if(!dev_program->findGlobalSymbols(dptr, bytes, name)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); } diff --git a/hipamd/api/hip/hip_platform.cpp b/hipamd/api/hip/hip_platform.cpp index 795c55f87b..c6b731af58 100644 --- a/hipamd/api/hip/hip_platform.cpp +++ b/hipamd/api/hip/hip_platform.cpp @@ -31,6 +31,9 @@ THE SOFTWARE. constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" +thread_local std::stack execStack_; +PlatformState* PlatformState::platform_ = new PlatformState(); + struct __CudaFatBinaryWrapper { unsigned int magic; unsigned int version; @@ -111,94 +114,82 @@ extern "C" std::vector* __hipRegisterFatBinary(const void* data) return programs; } -struct ihipExec_t { - dim3 gridDim_; - dim3 blockDim_; - size_t sharedMem_; - hipStream_t hStream_; - std::vector arguments_; -}; +PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr) + : hostVar_(hostVar), size_(size), devicePtr_(devicePtr) { + amd::Memory* amd_mem_obj = nullptr; + uint32_t flags = 0; -thread_local std::stack execStack_; + /* Create an amd Memory object for the pointer */ + amd_mem_obj + = new (*hip::getCurrentContext()) amd::Buffer(*hip::getCurrentContext(), flags, size, devicePtr_); -class PlatformState { - amd::Monitor lock_; -private: - std::unordered_map > functions_; - - struct RegisteredVar { - char* var; - char* hostVar; - char* deviceVar; - int size; - bool constant; - }; - - std::unordered_map*, RegisteredVar> vars_; - - static PlatformState* platform_; - - PlatformState() : lock_("Guards global function map") {} - ~PlatformState() {} -public: - static PlatformState& instance() { - return *platform_; + if (amd_mem_obj == nullptr) { + LogError("[OCL] failed to create a mem object!"); } - void registerVar(std::vector* modules, - char* var, - char* hostVar, - char* deviceVar, - int size, - bool constant) { - amd::ScopedLock lock(lock_); - - const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 }; - - vars_.insert(std::make_pair(modules, rvar)); + if (!amd_mem_obj->create(nullptr)) { + LogError("[OCL] failed to create a svm hidden buffer!"); + amd_mem_obj->release(); } - void registerFunction(const void* hostFunction, const std::vector& funcs) { - amd::ScopedLock lock(lock_); + /* Add the memory to the MemObjMap */ + amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj); +} - functions_.insert(std::make_pair(hostFunction, funcs)); +void PlatformState::registerVar(const char* hostvar, + const std::vector& rvar) { + amd::ScopedLock lock(lock_); + vars_.insert(std::make_pair(hostvar, rvar)); +} + +void PlatformState::registerFunction(const void* hostFunction, + const std::vector& funcs) { + amd::ScopedLock lock(lock_); + functions_.insert(std::make_pair(hostFunction, funcs)); +} + +hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) { + amd::ScopedLock lock(lock_); + const auto it = functions_.find(hostFunction); + if (it != functions_.cend()) { + return it->second[deviceId]; + } else { + return nullptr; + } +} + +bool PlatformState::getGlobalVar(const void* hostVar, int deviceId, + hipDeviceptr_t* dev_ptr, size_t* size_ptr) { + amd::ScopedLock lock(lock_); + const auto it = vars_.find(hostVar); + if (it != vars_.cend()) { + *size_ptr = it->second[deviceId].getvarsize(); + *dev_ptr = it->second[deviceId].getdeviceptr(); + return true; + } else { + return false; + } +} + +void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { + auto& arguments = execStack_.top().arguments_; + + if (arguments.size() < offset + size) { + arguments.resize(offset + size); } - hipFunction_t getFunc(const void* hostFunction, int deviceId) { - amd::ScopedLock lock(lock_); - const auto it = functions_.find(hostFunction); - if (it != functions_.cend()) { - return it->second[deviceId]; - } else { - return nullptr; - } - } + ::memcpy(&arguments[offset], arg, size); +} - void setupArgument(const void *arg, - size_t size, - size_t offset) { - auto& arguments = execStack_.top().arguments_; +void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, + hipStream_t stream) { + execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); +} - if (arguments.size() < offset + size) { - arguments.resize(offset + size); - } - - ::memcpy(&arguments[offset], arg, size); - } - - void configureCall(dim3 gridDim, - dim3 blockDim, - size_t sharedMem, - hipStream_t stream) { - execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream}); - } - - void popExec(ihipExec_t& exec) { - exec = std::move(execStack_.top()); - execStack_.pop(); - } -}; -PlatformState* PlatformState::platform_ = new PlatformState(); +void PlatformState::popExec(ihipExec_t& exec) { + exec = std::move(execStack_.top()); + execStack_.pop(); +} extern "C" void __hipRegisterFunction( std::vector* modules, @@ -248,7 +239,26 @@ extern "C" void __hipRegisterVar( { HIP_INIT(); - PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0); + size_t sym_size = 0; + std::vector global_vars{g_devices.size()}; + + for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) { + hipDeviceptr_t device_ptr = nullptr; + if((hipSuccess == hipModuleGetGlobal(&device_ptr, &sym_size, modules->at(deviceId), + hostVar)) && (device_ptr != nullptr)) { + + if (static_cast(size) != sym_size) { + LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n"); + } + + global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr); + + } else { + LogError("[OCL] __hipRegisterVar cannot find kernel for device \n"); + } + } + + PlatformState::instance().registerVar(hostVar, global_vars); } extern "C" void __hipUnregisterFatBinary(std::vector* modules) @@ -314,6 +324,22 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction) exec.sharedMem_, exec.hStream_, nullptr, extra)); } +hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) { + size_t size = 0; + if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) { + hipDeviceptr_t devPtr = nullptr; + if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) { + HIP_RETURN(hipErrorUnknown); + } + HIP_RETURN(hipSuccess); +} + #if defined(ATI_OS_LINUX) namespace hip_impl {