From 39c608e98d968c761719caa10d75c6c8bfba601b Mon Sep 17 00:00:00 2001 From: agodavar Date: Tue, 16 Feb 2021 07:20:58 -0500 Subject: [PATCH] SWDEV-255979 - Added support of __managed__ static variable Change-Id: I9d5cbbecc8c19ec38a95c94ab4130465ba76c102 [ROCm/hip commit: 995e6336c6305b23c3a2fa69333d838a18430f82] --- projects/hip/rocclr/hip_code_object.cpp | 42 +++++++++- projects/hip/rocclr/hip_code_object.hpp | 9 +- projects/hip/rocclr/hip_global.cpp | 15 ++-- projects/hip/rocclr/hip_global.hpp | 14 +++- projects/hip/rocclr/hip_hcc.def.in | 1 + projects/hip/rocclr/hip_hcc.map.in | 1 + projects/hip/rocclr/hip_hmm.cpp | 8 +- projects/hip/rocclr/hip_module.cpp | 2 + projects/hip/rocclr/hip_platform.cpp | 38 ++++++++- projects/hip/rocclr/hip_platform.hpp | 4 + .../runtimeApi/memory/hipManagedKeyword.cpp | 84 +++++++++++++++++++ 11 files changed, 202 insertions(+), 16 deletions(-) create mode 100644 projects/hip/tests/src/runtimeApi/memory/hipManagedKeyword.cpp diff --git a/projects/hip/rocclr/hip_code_object.cpp b/projects/hip/rocclr/hip_code_object.cpp index 671ccd6d14..43a328419a 100755 --- a/projects/hip/rocclr/hip_code_object.cpp +++ b/projects/hip/rocclr/hip_code_object.cpp @@ -24,12 +24,15 @@ THE SOFTWARE. #include +#include #include "hip/hip_runtime_api.h" #include "hip/hip_runtime.h" #include "hip_internal.hpp" #include "platform/program.hpp" #include +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false); namespace { size_t constexpr strLiteralLength(char const* str) { return *str ? 1 + strLiteralLength(str + 1) : 0; @@ -634,7 +637,6 @@ FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) { if (initialized) { digestFatBinary(data, modules_[data]); } - return &modules_[data]; } @@ -651,6 +653,16 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) { } } + auto it = managedVars_.begin(); + while (it != managedVars_.end()) { + if ((*it)->moduleInfo() == module) { + delete *it; + managedVars_.erase(it); + } else { + ++it; + } + } + auto fit = functions_.begin(); while (fit != functions_.end()) { if (fit->second->moduleInfo() == module) { @@ -734,4 +746,32 @@ hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDevice *size_ptr = dvar->size(); return hipSuccess; } + +hipError_t StatCO::registerStatManagedVar(Var* var) { + managedVars_.emplace_back(var); + return hipSuccess; +} + +hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { + amd::ScopedLock lock(sclock_); + + if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() || + !managedVarsDevicePtrInitalized_[deviceId]) { + for (auto var : managedVars_) { + DeviceVar* dvar = nullptr; + IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); + + amd::HostQueue* queue = hip::getNullStream(); + if(queue != nullptr) { + ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), + dvar->size(), hipMemcpyHostToDevice, *queue); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + return hipErrorInvalidResourceHandle; + } + } + managedVarsDevicePtrInitalized_[deviceId] = true; + } + return hipSuccess; +} }; //namespace: hip diff --git a/projects/hip/rocclr/hip_code_object.hpp b/projects/hip/rocclr/hip_code_object.hpp index 8bfa79c4df..c580a8ac6a 100755 --- a/projects/hip/rocclr/hip_code_object.hpp +++ b/projects/hip/rocclr/hip_code_object.hpp @@ -124,9 +124,10 @@ public: hipError_t removeFatBinary(FatBinaryInfo** module); hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs); - //Register vars/funcs given to use from __hipRegister[Var/Func] + //Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar] hipError_t registerStatFunction(const void* hostFunction, Function* func); hipError_t registerStatGlobalVar(const void* hostVar, Var* var); + hipError_t registerStatManagedVar(Var *var); //Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise. hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); @@ -134,6 +135,9 @@ public: hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr); + //Managed variable is a defined symbol in code object + //pointer to the alocated managed memory has to be copied to the address of symbol + hipError_t initStatManagedVarDevicePtr(int deviceId); private: friend class ::PlatformState; //Populated during __hipRegisterFatBinary @@ -142,6 +146,9 @@ private: std::unordered_map functions_; //Populated during __hipRegisterVars std::unordered_map vars_; + //Populated during __hipRegisterManagedVar + std::vector managedVars_; + std::unordered_map managedVarsDevicePtrInitalized_; }; }; // namespace hip diff --git a/projects/hip/rocclr/hip_global.cpp b/projects/hip/rocclr/hip_global.cpp index 9297f9ad14..8fea6b9e00 100755 --- a/projects/hip/rocclr/hip_global.cpp +++ b/projects/hip/rocclr/hip_global.cpp @@ -160,6 +160,12 @@ Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int no dVar_.resize(g_devices.size()); } +Var::Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, + unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind), + size_(size), modules_(modules), managedVarPtr_(pointer), align_(align) { + dVar_.resize(g_devices.size()); +} + Var::~Var() { for (auto& elem : dVar_) { delete elem; @@ -186,15 +192,12 @@ hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) { guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero"); guarantee((static_cast(deviceId) < g_devices.size()), "Invalid DeviceId, greater than no of code objects"); - - hipModule_t hmod = nullptr; - IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); - IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); - if (dVar_[deviceId] == nullptr) { + hipModule_t hmod = nullptr; + IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId)); + IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod)); dVar_[deviceId] = new DeviceVar(name_, hmod); } - *dvar = dVar_[deviceId]; return hipSuccess; } diff --git a/projects/hip/rocclr/hip_global.hpp b/projects/hip/rocclr/hip_global.hpp index fd57ecfb50..83fcfab817 100755 --- a/projects/hip/rocclr/hip_global.hpp +++ b/projects/hip/rocclr/hip_global.hpp @@ -81,11 +81,16 @@ public: enum DeviceVarKind { DVK_Variable = 0, DVK_Surface, - DVK_Texture + DVK_Texture, + DVK_Managed }; Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm, FatBinaryInfo** modules = nullptr); + + Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align, + FatBinaryInfo** modules = nullptr); + ~Var(); //Return DeviceVar for this dynamically loaded module @@ -96,7 +101,7 @@ public: void resize_dVar(size_t size) { dVar_.resize(size); } FatBinaryInfo** moduleInfo() { return modules_; }; - + void* getManagedVarPtr() { return managedVarPtr_; }; private: std::vector dVar_; // DeviceVarObj per Device std::string name_; // Variable name (not unique identifier) @@ -104,7 +109,10 @@ private: size_t size_; // Size of the variable int type_; // Type(Textures/Surfaces only) int norm_; // Type(Textures/Surfaces only) - FatBinaryInfo** modules_; // static module where it is referenced + FatBinaryInfo** modules_; // static module where it is referenced + + void *managedVarPtr_; // Managed memory pointer with size_ & align_ + unsigned int align_; // Managed memory alignment }; }; //namespace: hip diff --git a/projects/hip/rocclr/hip_hcc.def.in b/projects/hip/rocclr/hip_hcc.def.in index 86cd1a21f7..94ef433b4d 100755 --- a/projects/hip/rocclr/hip_hcc.def.in +++ b/projects/hip/rocclr/hip_hcc.def.in @@ -179,6 +179,7 @@ __hipRegisterFunction __hipRegisterVar __hipRegisterSurface __hipRegisterTexture +__hipRegisterManagedVar __hipUnregisterFatBinary hipConfigureCall hipSetupArgument diff --git a/projects/hip/rocclr/hip_hcc.map.in b/projects/hip/rocclr/hip_hcc.map.in index 236bee0489..7f4bffdedc 100755 --- a/projects/hip/rocclr/hip_hcc.map.in +++ b/projects/hip/rocclr/hip_hcc.map.in @@ -179,6 +179,7 @@ global: __hipRegisterVar; __hipRegisterSurface; __hipRegisterTexture; + __hipRegisterManagedVar; __hipUnregisterFatBinary; __gnu_h2f_ieee; __gnu_f2h_ieee; diff --git a/projects/hip/rocclr/hip_hmm.cpp b/projects/hip/rocclr/hip_hmm.cpp index 2702fdfc4a..6f5ac7225b 100644 --- a/projects/hip/rocclr/hip_hmm.cpp +++ b/projects/hip/rocclr/hip_hmm.cpp @@ -25,8 +25,8 @@ #include "platform/command.hpp" #include "platform/memory.hpp" -// Forward declaraiton of a static function -static hipError_t ihipMallocManaged(void** ptr, size_t size); +// Forward declaraiton of a function +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); // Make sure HIP defines match ROCclr to avoid double conversion static_assert(hipCpuDeviceId == amd::CpuDeviceId, "CPU device ID mismatch with ROCclr!"); @@ -186,7 +186,7 @@ hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr, } // ================================================================================================ -static hipError_t ihipMallocManaged(void** ptr, size_t size) { +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align) { if (size == 0) { *ptr = nullptr; return hipSuccess; @@ -207,7 +207,7 @@ static hipError_t ihipMallocManaged(void** ptr, size_t size) { // Allocate SVM fine grain buffer with the forced host pointer, avoiding explicit memory // allocation in the device driver *ptr = amd::SvmBuffer::malloc(ctx, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR, - size, dev.info().memBaseAddrAlign_); + size, (align == 0) ? dev.info().memBaseAddrAlign_ : align); if (*ptr == nullptr) { return hipErrorMemoryAllocation; } diff --git a/projects/hip/rocclr/hip_module.cpp b/projects/hip/rocclr/hip_module.cpp index 829a72e20c..2060eaea07 100755 --- a/projects/hip/rocclr/hip_module.cpp +++ b/projects/hip/rocclr/hip_module.cpp @@ -222,6 +222,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent, stopEvent, flags, params); + HIP_RETURN_ONFAIL(PlatformState::instance().initStatManagedVarDevicePtr(ihipGetDevice())); + if (f == nullptr) { DevLogPrintfError("%s", "Function passed is null"); return hipErrorInvalidImage; diff --git a/projects/hip/rocclr/hip_platform.cpp b/projects/hip/rocclr/hip_platform.cpp index 49790d6520..04eb1f18e7 100755 --- a/projects/hip/rocclr/hip_platform.cpp +++ b/projects/hip/rocclr/hip_platform.cpp @@ -32,6 +32,11 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF" thread_local std::stack execStack_; PlatformState* PlatformState::platform_; // Initiaized as nullptr by default +//forward declaration of methods required for __hipRegisrterManagedVar +hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); +hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, + amd::HostQueue& queue, bool isAsync = false); + struct __CudaFatBinaryWrapper { unsigned int magic; unsigned int version; @@ -76,7 +81,6 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data) fbwrapper->magic, fbwrapper->version); return nullptr; } - return PlatformState::instance().addFatBinary(fbwrapper->binary); } @@ -138,6 +142,30 @@ extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The d PlatformState::instance().registerStatGlobalVar(var, var_ptr); } +extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary + void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align + // HIP runtime allocates such managed memory and assign it to \p pointer + void *init_value, // Initial value to be copied into \p pointer + const char *name, // Name of the variable in code object + size_t size, + unsigned align) { + HIP_INIT(); + hipError_t status = ihipMallocManaged(pointer, size, align); + if( status == hipSuccess) { + amd::HostQueue* queue = hip::getNullStream(); + if(queue != nullptr) { + ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue); + } else { + ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); + } + } else { + guarantee("Error during allocation of managed memory!"); + } + hip::Var* var_ptr = new hip::Var(std::string(name), hip::Var::DeviceVarKind::DVK_Managed, pointer, + size, align, reinterpret_cast(hipModule)); + PlatformState::instance().registerStatManagedVar(var_ptr); +} + extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object void* var, // The shadow variable in host code char* hostVar, // Variable name in host code @@ -851,6 +879,10 @@ hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* v return statCO_.registerStatGlobalVar(hostVar, var); } +hipError_t PlatformState::registerStatManagedVar(hip::Var* var) { + return statCO_.registerStatManagedVar(var); +} + hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) { return statCO_.getStatFunc(hfunc, hostFunction, deviceId); } @@ -867,6 +899,10 @@ hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hi return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr); } +hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) { + return statCO_.initStatManagedVarDevicePtr(deviceId); +} + void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) { auto& arguments = execStack_.top().arguments_; diff --git a/projects/hip/rocclr/hip_platform.hpp b/projects/hip/rocclr/hip_platform.hpp index e9417f9680..8ab3c1c09e 100755 --- a/projects/hip/rocclr/hip_platform.hpp +++ b/projects/hip/rocclr/hip_platform.hpp @@ -73,12 +73,16 @@ public: hipError_t registerStatFunction(const void* hostFunction, hip::Function* func); hipError_t registerStatGlobalVar(const void* hostVar, hip::Var* var); + hipError_t registerStatManagedVar(hip::Var* var); + hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId); hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId); hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr, size_t* size_ptr); + hipError_t initStatManagedVarDevicePtr(int deviceId); + //Exec Functions void setupArgument(const void *arg, size_t size, size_t offset); void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream); diff --git a/projects/hip/tests/src/runtimeApi/memory/hipManagedKeyword.cpp b/projects/hip/tests/src/runtimeApi/memory/hipManagedKeyword.cpp new file mode 100644 index 0000000000..5cdfbfa9f3 --- /dev/null +++ b/projects/hip/tests/src/runtimeApi/memory/hipManagedKeyword.cpp @@ -0,0 +1,84 @@ +#include +#include +#include "test_common.h" + +//Enable test when compiler support is available in mainline +/* HIT_START + * BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM amd + * HIT_END + */ +#define N 1048576 +__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!! +__managed__ float B[N]; +__managed__ int x = 0; + +__global__ void add() +{ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < N; i += stride) + B[i] = A[i] + B[i]; +} + +__global__ void GPU_func() { + x++; +} + +bool managedSingleGPUTest() { + bool testResult = true; + + for (int i = 0; i < N; i++) { + A[i] = 1.0f; + B[i] = 2.0f; + } + + int blockSize = 256; + int numBlocks = (N + blockSize - 1) / blockSize; + dim3 dimGrid(numBlocks, 1, 1); + dim3 dimBlock(blockSize, 1, 1); + hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0); + + hipDeviceSynchronize(); + + float maxError = 0.0f; + for (int i = 0; i < N; i++) + maxError = fmax(maxError, fabs(B[i]-3.0f)); + + if(maxError == 0.0f) { + return true; + } + return false; +} + +bool managedMultiGPUTest() { + int numDevices = 0; + hipGetDeviceCount(&numDevices); + + for (int i = 0; i < numDevices; i++) { + hipSetDevice(i); + GPU_func<<< 1, 1 >>>( ); + hipDeviceSynchronize(); + } + if(x == numDevices) { + return true; + } + return false; +} + +int main(int argc, char *argv[]) { + bool testStatus = true, OverAllStatus = true; + testStatus = managedSingleGPUTest(); + if (!testStatus) { + printf("managed keyword Single GPU Test failed\n"); + OverAllStatus = false; + } + testStatus = managedMultiGPUTest(); + if (!testStatus) { + printf("managed keyword Multi GPU Test failed\n"); + OverAllStatus = false; + } + if (!OverAllStatus) { + failed(""); + } + passed(); +}