SWDEV-255979 - Added support of __managed__ static variable

Change-Id: I9d5cbbecc8c19ec38a95c94ab4130465ba76c102


[ROCm/hip commit: 995e6336c6]
Этот коммит содержится в:
agodavar
2021-02-16 07:20:58 -05:00
коммит произвёл Anusha Godavarthy Surya
родитель db0c3fdaaf
Коммит 39c608e98d
11 изменённых файлов: 202 добавлений и 16 удалений
+41 -1
Просмотреть файл
@@ -24,12 +24,15 @@ THE SOFTWARE.
#include <cstring>
#include <hip/amd_detail/driver_types.h>
#include "hip/hip_runtime_api.h"
#include "hip/hip_runtime.h"
#include "hip_internal.hpp"
#include "platform/program.hpp"
#include <elf/elf.hpp>
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
namespace {
size_t constexpr strLiteralLength(char const* str) {
return *str ? 1 + strLiteralLength(str + 1) : 0;
@@ -634,7 +637,6 @@ FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) {
if (initialized) {
digestFatBinary(data, modules_[data]);
}
return &modules_[data];
}
@@ -651,6 +653,16 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) {
}
}
auto it = managedVars_.begin();
while (it != managedVars_.end()) {
if ((*it)->moduleInfo() == module) {
delete *it;
managedVars_.erase(it);
} else {
++it;
}
}
auto fit = functions_.begin();
while (fit != functions_.end()) {
if (fit->second->moduleInfo() == module) {
@@ -734,4 +746,32 @@ hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDevice
*size_ptr = dvar->size();
return hipSuccess;
}
hipError_t StatCO::registerStatManagedVar(Var* var) {
managedVars_.emplace_back(var);
return hipSuccess;
}
hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
amd::ScopedLock lock(sclock_);
if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() ||
!managedVarsDevicePtrInitalized_[deviceId]) {
for (auto var : managedVars_) {
DeviceVar* dvar = nullptr;
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
amd::HostQueue* queue = hip::getNullStream();
if(queue != nullptr) {
ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
dvar->size(), hipMemcpyHostToDevice, *queue);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
return hipErrorInvalidResourceHandle;
}
}
managedVarsDevicePtrInitalized_[deviceId] = true;
}
return hipSuccess;
}
}; //namespace: hip
+8 -1
Просмотреть файл
@@ -124,9 +124,10 @@ public:
hipError_t removeFatBinary(FatBinaryInfo** module);
hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs);
//Register vars/funcs given to use from __hipRegister[Var/Func]
//Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar]
hipError_t registerStatFunction(const void* hostFunction, Function* func);
hipError_t registerStatGlobalVar(const void* hostVar, Var* var);
hipError_t registerStatManagedVar(Var *var);
//Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise.
hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId);
@@ -134,6 +135,9 @@ public:
hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
size_t* size_ptr);
//Managed variable is a defined symbol in code object
//pointer to the alocated managed memory has to be copied to the address of symbol
hipError_t initStatManagedVarDevicePtr(int deviceId);
private:
friend class ::PlatformState;
//Populated during __hipRegisterFatBinary
@@ -142,6 +146,9 @@ private:
std::unordered_map<const void*, Function*> functions_;
//Populated during __hipRegisterVars
std::unordered_map<const void*, Var*> vars_;
//Populated during __hipRegisterManagedVar
std::vector<Var*> managedVars_;
std::unordered_map<int, bool> managedVarsDevicePtrInitalized_;
};
}; // namespace hip
+9 -6
Просмотреть файл
@@ -160,6 +160,12 @@ Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int no
dVar_.resize(g_devices.size());
}
Var::Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size,
unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind),
size_(size), modules_(modules), managedVarPtr_(pointer), align_(align) {
dVar_.resize(g_devices.size());
}
Var::~Var() {
for (auto& elem : dVar_) {
delete elem;
@@ -186,15 +192,12 @@ hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) {
guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero");
guarantee((static_cast<size_t>(deviceId) < g_devices.size()),
"Invalid DeviceId, greater than no of code objects");
hipModule_t hmod = nullptr;
IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId));
IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod));
if (dVar_[deviceId] == nullptr) {
hipModule_t hmod = nullptr;
IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId));
IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod));
dVar_[deviceId] = new DeviceVar(name_, hmod);
}
*dvar = dVar_[deviceId];
return hipSuccess;
}
+11 -3
Просмотреть файл
@@ -81,11 +81,16 @@ public:
enum DeviceVarKind {
DVK_Variable = 0,
DVK_Surface,
DVK_Texture
DVK_Texture,
DVK_Managed
};
Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm,
FatBinaryInfo** modules = nullptr);
Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align,
FatBinaryInfo** modules = nullptr);
~Var();
//Return DeviceVar for this dynamically loaded module
@@ -96,7 +101,7 @@ public:
void resize_dVar(size_t size) { dVar_.resize(size); }
FatBinaryInfo** moduleInfo() { return modules_; };
void* getManagedVarPtr() { return managedVarPtr_; };
private:
std::vector<DeviceVar*> dVar_; // DeviceVarObj per Device
std::string name_; // Variable name (not unique identifier)
@@ -104,7 +109,10 @@ private:
size_t size_; // Size of the variable
int type_; // Type(Textures/Surfaces only)
int norm_; // Type(Textures/Surfaces only)
FatBinaryInfo** modules_; // static module where it is referenced
FatBinaryInfo** modules_; // static module where it is referenced
void *managedVarPtr_; // Managed memory pointer with size_ & align_
unsigned int align_; // Managed memory alignment
};
}; //namespace: hip
+1
Просмотреть файл
@@ -179,6 +179,7 @@ __hipRegisterFunction
__hipRegisterVar
__hipRegisterSurface
__hipRegisterTexture
__hipRegisterManagedVar
__hipUnregisterFatBinary
hipConfigureCall
hipSetupArgument
+1
Просмотреть файл
@@ -179,6 +179,7 @@ global:
__hipRegisterVar;
__hipRegisterSurface;
__hipRegisterTexture;
__hipRegisterManagedVar;
__hipUnregisterFatBinary;
__gnu_h2f_ieee;
__gnu_f2h_ieee;
+4 -4
Просмотреть файл
@@ -25,8 +25,8 @@
#include "platform/command.hpp"
#include "platform/memory.hpp"
// Forward declaraiton of a static function
static hipError_t ihipMallocManaged(void** ptr, size_t size);
// Forward declaraiton of a function
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
// Make sure HIP defines match ROCclr to avoid double conversion
static_assert(hipCpuDeviceId == amd::CpuDeviceId, "CPU device ID mismatch with ROCclr!");
@@ -186,7 +186,7 @@ hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr,
}
// ================================================================================================
static hipError_t ihipMallocManaged(void** ptr, size_t size) {
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align) {
if (size == 0) {
*ptr = nullptr;
return hipSuccess;
@@ -207,7 +207,7 @@ static hipError_t ihipMallocManaged(void** ptr, size_t size) {
// Allocate SVM fine grain buffer with the forced host pointer, avoiding explicit memory
// allocation in the device driver
*ptr = amd::SvmBuffer::malloc(ctx, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR,
size, dev.info().memBaseAddrAlign_);
size, (align == 0) ? dev.info().memBaseAddrAlign_ : align);
if (*ptr == nullptr) {
return hipErrorMemoryAllocation;
}
+2
Просмотреть файл
@@ -222,6 +222,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent,
stopEvent, flags, params);
HIP_RETURN_ONFAIL(PlatformState::instance().initStatManagedVarDevicePtr(ihipGetDevice()));
if (f == nullptr) {
DevLogPrintfError("%s", "Function passed is null");
return hipErrorInvalidImage;
+37 -1
Просмотреть файл
@@ -32,6 +32,11 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
thread_local std::stack<ihipExec_t> execStack_;
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
//forward declaration of methods required for __hipRegisrterManagedVar
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
struct __CudaFatBinaryWrapper {
unsigned int magic;
unsigned int version;
@@ -76,7 +81,6 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
fbwrapper->magic, fbwrapper->version);
return nullptr;
}
return PlatformState::instance().addFatBinary(fbwrapper->binary);
}
@@ -138,6 +142,30 @@ extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The d
PlatformState::instance().registerStatGlobalVar(var, var_ptr);
}
extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align
// HIP runtime allocates such managed memory and assign it to \p pointer
void *init_value, // Initial value to be copied into \p pointer
const char *name, // Name of the variable in code object
size_t size,
unsigned align) {
HIP_INIT();
hipError_t status = ihipMallocManaged(pointer, size, align);
if( status == hipSuccess) {
amd::HostQueue* queue = hip::getNullStream();
if(queue != nullptr) {
ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
}
} else {
guarantee("Error during allocation of managed memory!");
}
hip::Var* var_ptr = new hip::Var(std::string(name), hip::Var::DeviceVarKind::DVK_Managed, pointer,
size, align, reinterpret_cast<hip::FatBinaryInfo**>(hipModule));
PlatformState::instance().registerStatManagedVar(var_ptr);
}
extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object
void* var, // The shadow variable in host code
char* hostVar, // Variable name in host code
@@ -851,6 +879,10 @@ hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* v
return statCO_.registerStatGlobalVar(hostVar, var);
}
hipError_t PlatformState::registerStatManagedVar(hip::Var* var) {
return statCO_.registerStatManagedVar(var);
}
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) {
return statCO_.getStatFunc(hfunc, hostFunction, deviceId);
}
@@ -867,6 +899,10 @@ hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hi
return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr);
}
hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) {
return statCO_.initStatManagedVarDevicePtr(deviceId);
}
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
auto& arguments = execStack_.top().arguments_;
+4
Просмотреть файл
@@ -73,12 +73,16 @@ public:
hipError_t registerStatFunction(const void* hostFunction, hip::Function* func);
hipError_t registerStatGlobalVar(const void* hostVar, hip::Var* var);
hipError_t registerStatManagedVar(hip::Var* var);
hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId);
hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId);
hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
size_t* size_ptr);
hipError_t initStatManagedVarDevicePtr(int deviceId);
//Exec Functions
void setupArgument(const void *arg, size_t size, size_t offset);
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
+84
Просмотреть файл
@@ -0,0 +1,84 @@
#include <hip/hip_runtime.h>
#include <math.h>
#include "test_common.h"
//Enable test when compiler support is available in mainline
/* HIT_START
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM amd
* HIT_END
*/
#define N 1048576
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
__managed__ float B[N];
__managed__ int x = 0;
__global__ void add()
{
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < N; i += stride)
B[i] = A[i] + B[i];
}
__global__ void GPU_func() {
x++;
}
bool managedSingleGPUTest() {
bool testResult = true;
for (int i = 0; i < N; i++) {
A[i] = 1.0f;
B[i] = 2.0f;
}
int blockSize = 256;
int numBlocks = (N + blockSize - 1) / blockSize;
dim3 dimGrid(numBlocks, 1, 1);
dim3 dimBlock(blockSize, 1, 1);
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0);
hipDeviceSynchronize();
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = fmax(maxError, fabs(B[i]-3.0f));
if(maxError == 0.0f) {
return true;
}
return false;
}
bool managedMultiGPUTest() {
int numDevices = 0;
hipGetDeviceCount(&numDevices);
for (int i = 0; i < numDevices; i++) {
hipSetDevice(i);
GPU_func<<< 1, 1 >>>( );
hipDeviceSynchronize();
}
if(x == numDevices) {
return true;
}
return false;
}
int main(int argc, char *argv[]) {
bool testStatus = true, OverAllStatus = true;
testStatus = managedSingleGPUTest();
if (!testStatus) {
printf("managed keyword Single GPU Test failed\n");
OverAllStatus = false;
}
testStatus = managedMultiGPUTest();
if (!testStatus) {
printf("managed keyword Multi GPU Test failed\n");
OverAllStatus = false;
}
if (!OverAllStatus) {
failed("");
}
passed();
}