SWDEV-255979 - Added support of __managed__ static variable
Change-Id: I9d5cbbecc8c19ec38a95c94ab4130465ba76c102
[ROCm/hip commit: 995e6336c6]
Этот коммит содержится в:
коммит произвёл
Anusha Godavarthy Surya
родитель
db0c3fdaaf
Коммит
39c608e98d
@@ -24,12 +24,15 @@ THE SOFTWARE.
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <hip/amd_detail/driver_types.h>
|
||||
#include "hip/hip_runtime_api.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_internal.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include <elf/elf.hpp>
|
||||
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
namespace {
|
||||
size_t constexpr strLiteralLength(char const* str) {
|
||||
return *str ? 1 + strLiteralLength(str + 1) : 0;
|
||||
@@ -634,7 +637,6 @@ FatBinaryInfo** StatCO::addFatBinary(const void* data, bool initialized) {
|
||||
if (initialized) {
|
||||
digestFatBinary(data, modules_[data]);
|
||||
}
|
||||
|
||||
return &modules_[data];
|
||||
}
|
||||
|
||||
@@ -651,6 +653,16 @@ hipError_t StatCO::removeFatBinary(FatBinaryInfo** module) {
|
||||
}
|
||||
}
|
||||
|
||||
auto it = managedVars_.begin();
|
||||
while (it != managedVars_.end()) {
|
||||
if ((*it)->moduleInfo() == module) {
|
||||
delete *it;
|
||||
managedVars_.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
auto fit = functions_.begin();
|
||||
while (fit != functions_.end()) {
|
||||
if (fit->second->moduleInfo() == module) {
|
||||
@@ -734,4 +746,32 @@ hipError_t StatCO::getStatGlobalVar(const void* hostVar, int deviceId, hipDevice
|
||||
*size_ptr = dvar->size();
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t StatCO::registerStatManagedVar(Var* var) {
|
||||
managedVars_.emplace_back(var);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
|
||||
amd::ScopedLock lock(sclock_);
|
||||
|
||||
if (managedVarsDevicePtrInitalized_.find(deviceId) == managedVarsDevicePtrInitalized_.end() ||
|
||||
!managedVarsDevicePtrInitalized_[deviceId]) {
|
||||
for (auto var : managedVars_) {
|
||||
DeviceVar* dvar = nullptr;
|
||||
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
|
||||
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if(queue != nullptr) {
|
||||
ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
|
||||
dvar->size(), hipMemcpyHostToDevice, *queue);
|
||||
} else {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
||||
return hipErrorInvalidResourceHandle;
|
||||
}
|
||||
}
|
||||
managedVarsDevicePtrInitalized_[deviceId] = true;
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
}; //namespace: hip
|
||||
|
||||
@@ -124,9 +124,10 @@ public:
|
||||
hipError_t removeFatBinary(FatBinaryInfo** module);
|
||||
hipError_t digestFatBinary(const void* data, FatBinaryInfo*& programs);
|
||||
|
||||
//Register vars/funcs given to use from __hipRegister[Var/Func]
|
||||
//Register vars/funcs given to use from __hipRegister[Var/Func/ManagedVar]
|
||||
hipError_t registerStatFunction(const void* hostFunction, Function* func);
|
||||
hipError_t registerStatGlobalVar(const void* hostVar, Var* var);
|
||||
hipError_t registerStatManagedVar(Var *var);
|
||||
|
||||
//Retrive Vars/Funcs for a given hostSidePtr(const void*), unless stated otherwise.
|
||||
hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId);
|
||||
@@ -134,6 +135,9 @@ public:
|
||||
hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
|
||||
size_t* size_ptr);
|
||||
|
||||
//Managed variable is a defined symbol in code object
|
||||
//pointer to the alocated managed memory has to be copied to the address of symbol
|
||||
hipError_t initStatManagedVarDevicePtr(int deviceId);
|
||||
private:
|
||||
friend class ::PlatformState;
|
||||
//Populated during __hipRegisterFatBinary
|
||||
@@ -142,6 +146,9 @@ private:
|
||||
std::unordered_map<const void*, Function*> functions_;
|
||||
//Populated during __hipRegisterVars
|
||||
std::unordered_map<const void*, Var*> vars_;
|
||||
//Populated during __hipRegisterManagedVar
|
||||
std::vector<Var*> managedVars_;
|
||||
std::unordered_map<int, bool> managedVarsDevicePtrInitalized_;
|
||||
};
|
||||
|
||||
}; // namespace hip
|
||||
|
||||
@@ -160,6 +160,12 @@ Var::Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int no
|
||||
dVar_.resize(g_devices.size());
|
||||
}
|
||||
|
||||
Var::Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size,
|
||||
unsigned align, FatBinaryInfo** modules) : name_(name), dVarKind_(dVarKind),
|
||||
size_(size), modules_(modules), managedVarPtr_(pointer), align_(align) {
|
||||
dVar_.resize(g_devices.size());
|
||||
}
|
||||
|
||||
Var::~Var() {
|
||||
for (auto& elem : dVar_) {
|
||||
delete elem;
|
||||
@@ -186,15 +192,12 @@ hipError_t Var::getStatDeviceVar(DeviceVar** dvar, int deviceId) {
|
||||
guarantee((deviceId >= 0) , "Invalid DeviceId, less than zero");
|
||||
guarantee((static_cast<size_t>(deviceId) < g_devices.size()),
|
||||
"Invalid DeviceId, greater than no of code objects");
|
||||
|
||||
hipModule_t hmod = nullptr;
|
||||
IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId));
|
||||
IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod));
|
||||
|
||||
if (dVar_[deviceId] == nullptr) {
|
||||
hipModule_t hmod = nullptr;
|
||||
IHIP_RETURN_ONFAIL((*modules_)->BuildProgram(deviceId));
|
||||
IHIP_RETURN_ONFAIL((*modules_)->GetModule(deviceId, &hmod));
|
||||
dVar_[deviceId] = new DeviceVar(name_, hmod);
|
||||
}
|
||||
|
||||
*dvar = dVar_[deviceId];
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -81,11 +81,16 @@ public:
|
||||
enum DeviceVarKind {
|
||||
DVK_Variable = 0,
|
||||
DVK_Surface,
|
||||
DVK_Texture
|
||||
DVK_Texture,
|
||||
DVK_Managed
|
||||
};
|
||||
|
||||
Var(std::string name, DeviceVarKind dVarKind, size_t size, int type, int norm,
|
||||
FatBinaryInfo** modules = nullptr);
|
||||
|
||||
Var(std::string name, DeviceVarKind dVarKind, void *pointer, size_t size, unsigned align,
|
||||
FatBinaryInfo** modules = nullptr);
|
||||
|
||||
~Var();
|
||||
|
||||
//Return DeviceVar for this dynamically loaded module
|
||||
@@ -96,7 +101,7 @@ public:
|
||||
void resize_dVar(size_t size) { dVar_.resize(size); }
|
||||
|
||||
FatBinaryInfo** moduleInfo() { return modules_; };
|
||||
|
||||
void* getManagedVarPtr() { return managedVarPtr_; };
|
||||
private:
|
||||
std::vector<DeviceVar*> dVar_; // DeviceVarObj per Device
|
||||
std::string name_; // Variable name (not unique identifier)
|
||||
@@ -104,7 +109,10 @@ private:
|
||||
size_t size_; // Size of the variable
|
||||
int type_; // Type(Textures/Surfaces only)
|
||||
int norm_; // Type(Textures/Surfaces only)
|
||||
FatBinaryInfo** modules_; // static module where it is referenced
|
||||
FatBinaryInfo** modules_; // static module where it is referenced
|
||||
|
||||
void *managedVarPtr_; // Managed memory pointer with size_ & align_
|
||||
unsigned int align_; // Managed memory alignment
|
||||
};
|
||||
|
||||
}; //namespace: hip
|
||||
|
||||
@@ -179,6 +179,7 @@ __hipRegisterFunction
|
||||
__hipRegisterVar
|
||||
__hipRegisterSurface
|
||||
__hipRegisterTexture
|
||||
__hipRegisterManagedVar
|
||||
__hipUnregisterFatBinary
|
||||
hipConfigureCall
|
||||
hipSetupArgument
|
||||
|
||||
@@ -179,6 +179,7 @@ global:
|
||||
__hipRegisterVar;
|
||||
__hipRegisterSurface;
|
||||
__hipRegisterTexture;
|
||||
__hipRegisterManagedVar;
|
||||
__hipUnregisterFatBinary;
|
||||
__gnu_h2f_ieee;
|
||||
__gnu_f2h_ieee;
|
||||
|
||||
@@ -25,8 +25,8 @@
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
|
||||
// Forward declaraiton of a static function
|
||||
static hipError_t ihipMallocManaged(void** ptr, size_t size);
|
||||
// Forward declaraiton of a function
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
|
||||
// Make sure HIP defines match ROCclr to avoid double conversion
|
||||
static_assert(hipCpuDeviceId == amd::CpuDeviceId, "CPU device ID mismatch with ROCclr!");
|
||||
@@ -186,7 +186,7 @@ hipError_t hipStreamAttachMemAsync(hipStream_t stream, hipDeviceptr_t* dev_ptr,
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
static hipError_t ihipMallocManaged(void** ptr, size_t size) {
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align) {
|
||||
if (size == 0) {
|
||||
*ptr = nullptr;
|
||||
return hipSuccess;
|
||||
@@ -207,7 +207,7 @@ static hipError_t ihipMallocManaged(void** ptr, size_t size) {
|
||||
// Allocate SVM fine grain buffer with the forced host pointer, avoiding explicit memory
|
||||
// allocation in the device driver
|
||||
*ptr = amd::SvmBuffer::malloc(ctx, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_ALLOC_HOST_PTR,
|
||||
size, dev.info().memBaseAddrAlign_);
|
||||
size, (align == 0) ? dev.info().memBaseAddrAlign_ : align);
|
||||
if (*ptr == nullptr) {
|
||||
return hipErrorMemoryAllocation;
|
||||
}
|
||||
|
||||
@@ -222,6 +222,8 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams, extra, startEvent,
|
||||
stopEvent, flags, params);
|
||||
|
||||
HIP_RETURN_ONFAIL(PlatformState::instance().initStatManagedVarDevicePtr(ihipGetDevice()));
|
||||
|
||||
if (f == nullptr) {
|
||||
DevLogPrintfError("%s", "Function passed is null");
|
||||
return hipErrorInvalidImage;
|
||||
|
||||
@@ -32,6 +32,11 @@ constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
thread_local std::stack<ihipExec_t> execStack_;
|
||||
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
||||
|
||||
//forward declaration of methods required for __hipRegisrterManagedVar
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
@@ -76,7 +81,6 @@ extern "C" hip::FatBinaryInfo** __hipRegisterFatBinary(const void* data)
|
||||
fbwrapper->magic, fbwrapper->version);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return PlatformState::instance().addFatBinary(fbwrapper->binary);
|
||||
}
|
||||
|
||||
@@ -138,6 +142,30 @@ extern "C" void __hipRegisterSurface(hip::FatBinaryInfo** modules, // The d
|
||||
PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterManagedVar(void *hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
|
||||
void **pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p align
|
||||
// HIP runtime allocates such managed memory and assign it to \p pointer
|
||||
void *init_value, // Initial value to be copied into \p pointer
|
||||
const char *name, // Name of the variable in code object
|
||||
size_t size,
|
||||
unsigned align) {
|
||||
HIP_INIT();
|
||||
hipError_t status = ihipMallocManaged(pointer, size, align);
|
||||
if( status == hipSuccess) {
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if(queue != nullptr) {
|
||||
ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
|
||||
} else {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
||||
}
|
||||
} else {
|
||||
guarantee("Error during allocation of managed memory!");
|
||||
}
|
||||
hip::Var* var_ptr = new hip::Var(std::string(name), hip::Var::DeviceVarKind::DVK_Managed, pointer,
|
||||
size, align, reinterpret_cast<hip::FatBinaryInfo**>(hipModule));
|
||||
PlatformState::instance().registerStatManagedVar(var_ptr);
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterTexture(hip::FatBinaryInfo** modules, // The device modules containing code object
|
||||
void* var, // The shadow variable in host code
|
||||
char* hostVar, // Variable name in host code
|
||||
@@ -851,6 +879,10 @@ hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* v
|
||||
return statCO_.registerStatGlobalVar(hostVar, var);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::registerStatManagedVar(hip::Var* var) {
|
||||
return statCO_.registerStatManagedVar(var);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId) {
|
||||
return statCO_.getStatFunc(hfunc, hostFunction, deviceId);
|
||||
}
|
||||
@@ -867,6 +899,10 @@ hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId, hi
|
||||
return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr);
|
||||
}
|
||||
|
||||
hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) {
|
||||
return statCO_.initStatManagedVarDevicePtr(deviceId);
|
||||
}
|
||||
|
||||
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
|
||||
auto& arguments = execStack_.top().arguments_;
|
||||
|
||||
|
||||
@@ -73,12 +73,16 @@ public:
|
||||
|
||||
hipError_t registerStatFunction(const void* hostFunction, hip::Function* func);
|
||||
hipError_t registerStatGlobalVar(const void* hostVar, hip::Var* var);
|
||||
hipError_t registerStatManagedVar(hip::Var* var);
|
||||
|
||||
|
||||
hipError_t getStatFunc(hipFunction_t* hfunc, const void* hostFunction, int deviceId);
|
||||
hipError_t getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction, int deviceId);
|
||||
hipError_t getStatGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
|
||||
size_t* size_ptr);
|
||||
|
||||
hipError_t initStatManagedVarDevicePtr(int deviceId);
|
||||
|
||||
//Exec Functions
|
||||
void setupArgument(const void *arg, size_t size, size_t offset);
|
||||
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <math.h>
|
||||
#include "test_common.h"
|
||||
|
||||
//Enable test when compiler support is available in mainline
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM amd
|
||||
* HIT_END
|
||||
*/
|
||||
#define N 1048576
|
||||
__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!!
|
||||
__managed__ float B[N];
|
||||
__managed__ int x = 0;
|
||||
|
||||
__global__ void add()
|
||||
{
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int stride = blockDim.x * gridDim.x;
|
||||
for (int i = index; i < N; i += stride)
|
||||
B[i] = A[i] + B[i];
|
||||
}
|
||||
|
||||
__global__ void GPU_func() {
|
||||
x++;
|
||||
}
|
||||
|
||||
bool managedSingleGPUTest() {
|
||||
bool testResult = true;
|
||||
|
||||
for (int i = 0; i < N; i++) {
|
||||
A[i] = 1.0f;
|
||||
B[i] = 2.0f;
|
||||
}
|
||||
|
||||
int blockSize = 256;
|
||||
int numBlocks = (N + blockSize - 1) / blockSize;
|
||||
dim3 dimGrid(numBlocks, 1, 1);
|
||||
dim3 dimBlock(blockSize, 1, 1);
|
||||
hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0);
|
||||
|
||||
hipDeviceSynchronize();
|
||||
|
||||
float maxError = 0.0f;
|
||||
for (int i = 0; i < N; i++)
|
||||
maxError = fmax(maxError, fabs(B[i]-3.0f));
|
||||
|
||||
if(maxError == 0.0f) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool managedMultiGPUTest() {
|
||||
int numDevices = 0;
|
||||
hipGetDeviceCount(&numDevices);
|
||||
|
||||
for (int i = 0; i < numDevices; i++) {
|
||||
hipSetDevice(i);
|
||||
GPU_func<<< 1, 1 >>>( );
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
if(x == numDevices) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
bool testStatus = true, OverAllStatus = true;
|
||||
testStatus = managedSingleGPUTest();
|
||||
if (!testStatus) {
|
||||
printf("managed keyword Single GPU Test failed\n");
|
||||
OverAllStatus = false;
|
||||
}
|
||||
testStatus = managedMultiGPUTest();
|
||||
if (!testStatus) {
|
||||
printf("managed keyword Multi GPU Test failed\n");
|
||||
OverAllStatus = false;
|
||||
}
|
||||
if (!OverAllStatus) {
|
||||
failed("");
|
||||
}
|
||||
passed();
|
||||
}
|
||||
Ссылка в новой задаче
Block a user