P4 to Git Change 1757948 by kjayapra@1_HIPWS_SL_IPC on 2019/03/18 18:29:24
SWDEV-144570 - Implementation of hipMemcpyToSymbol, hipMemcpyFromSymbol, hipMemcpyToSymbolAsync, hipMemcpyFromSymbolAsync, hipGetSymbolAddress, hipModuleGetGlobal Affected files ... ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#12 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#13 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#23 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#45 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#21 edit ... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#22 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#20 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#101 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#45 edit
This commit is contained in:
@@ -92,6 +92,8 @@ hipMemcpyFromArray
|
||||
hipMemcpyToSymbol
|
||||
hipMemcpyToSymbolAsync
|
||||
hipMemGetAddressRange
|
||||
hipGetSymbolAddress
|
||||
hipGetSymbolSize
|
||||
hipMemGetInfo
|
||||
hipMemPtrGetInfo
|
||||
hipMemset
|
||||
|
||||
@@ -93,6 +93,8 @@ global:
|
||||
hipMemcpyToSymbol;
|
||||
hipMemcpyToSymbolAsync;
|
||||
hipMemGetAddressRange;
|
||||
hipGetSymbolAddress;
|
||||
hipGetSymbolSize;
|
||||
hipMemGetInfo;
|
||||
hipMemPtrGetInfo;
|
||||
hipMemset;
|
||||
|
||||
@@ -85,6 +85,59 @@ namespace hip {
|
||||
static Function* asFunction(hipFunction_t f) { return reinterpret_cast<Function*>(f); }
|
||||
};
|
||||
};
|
||||
|
||||
struct ihipExec_t {
|
||||
dim3 gridDim_;
|
||||
dim3 blockDim_;
|
||||
size_t sharedMem_;
|
||||
hipStream_t hStream_;
|
||||
std::vector<char> arguments_;
|
||||
};
|
||||
|
||||
class PlatformState {
|
||||
amd::Monitor lock_;
|
||||
|
||||
public:
|
||||
struct RegisteredVar {
|
||||
public:
|
||||
RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr) {}
|
||||
RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr);
|
||||
~RegisteredVar() {}
|
||||
|
||||
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
|
||||
size_t getvarsize() const { return size_; };
|
||||
|
||||
private:
|
||||
char* hostVar_; // Variable name in host code
|
||||
size_t size_; // Size of the variable
|
||||
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
|
||||
};
|
||||
|
||||
private:
|
||||
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
|
||||
std::unordered_map<const void*, std::vector<RegisteredVar> > vars_;
|
||||
|
||||
static PlatformState* platform_;
|
||||
|
||||
PlatformState() : lock_("Guards global function map") {}
|
||||
~PlatformState() {}
|
||||
public:
|
||||
static PlatformState& instance() {
|
||||
return *platform_;
|
||||
}
|
||||
|
||||
void registerVar(const char* hostvar, const std::vector<RegisteredVar>& rvar);
|
||||
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs);
|
||||
|
||||
hipFunction_t getFunc(const void* hostFunction, int deviceId);
|
||||
bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
|
||||
size_t* size_ptr);
|
||||
void setupArgument(const void *arg, size_t size, size_t offset);
|
||||
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
|
||||
|
||||
void popExec(ihipExec_t& exec);
|
||||
};
|
||||
|
||||
extern std::vector<amd::Context*> g_devices;
|
||||
extern hipError_t ihipDeviceGetCount(int* count);
|
||||
extern int ihipGetDevice();
|
||||
|
||||
@@ -557,36 +557,96 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou
|
||||
size_t offset, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
|
||||
&sym_size)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
/* Size Check to make sure offset is correct */
|
||||
if ((offset + count) != sym_size) {
|
||||
return HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
|
||||
|
||||
/* Copy memory from source to destination address */
|
||||
HIP_RETURN(hipMemcpy(device_ptr, src, count, kind));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count,
|
||||
size_t offset, hipMemcpyKind kind) {
|
||||
HIP_INIT_API(symbolName, dst, count, offset, kind);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
|
||||
&sym_size)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
/* Size Check to make sure offset is correct */
|
||||
if ((offset + count) != sym_size) {
|
||||
return HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
|
||||
|
||||
/* Copy memory from source to destination address */
|
||||
HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count,
|
||||
size_t offset, hipMemcpyKind kind, hipStream_t stream) {
|
||||
HIP_INIT_API(symbolName, src, count, offset, kind, stream);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
|
||||
&sym_size)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
/* Size Check to make sure offset is correct */
|
||||
if ((offset + count) != sym_size) {
|
||||
return HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
|
||||
|
||||
/* Copy memory from source to destination address */
|
||||
HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count,
|
||||
size_t offset, hipMemcpyKind kind, hipStream_t stream) {
|
||||
HIP_INIT_API(symbolName, dst, count, offset, kind, stream);
|
||||
|
||||
assert(0 && "Unimplemented");
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
/* Get address and size for the global symbol */
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
|
||||
&sym_size)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
/* Size Check to make sure offset is correct */
|
||||
if ((offset + count) != sym_size) {
|
||||
return HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
|
||||
|
||||
/* Copy memory from source to destination address */
|
||||
HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) {
|
||||
|
||||
@@ -137,6 +137,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
|
||||
{
|
||||
HIP_INIT_API(dptr, bytes, hmod, name);
|
||||
|
||||
amd::Program* program = nullptr;
|
||||
const device::Program* dev_program = nullptr;
|
||||
|
||||
/* Get Device Program pointer*/
|
||||
program = as_amd(reinterpret_cast<cl_program>(hmod));
|
||||
dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
|
||||
|
||||
if (dev_program == nullptr) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
/* Find the global Symbols */
|
||||
if(!dev_program->findGlobalSymbols(dptr, bytes, name)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,9 @@ THE SOFTWARE.
|
||||
|
||||
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
||||
|
||||
thread_local std::stack<ihipExec_t> execStack_;
|
||||
PlatformState* PlatformState::platform_ = new PlatformState();
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
unsigned int version;
|
||||
@@ -111,94 +114,82 @@ extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
|
||||
return programs;
|
||||
}
|
||||
|
||||
struct ihipExec_t {
|
||||
dim3 gridDim_;
|
||||
dim3 blockDim_;
|
||||
size_t sharedMem_;
|
||||
hipStream_t hStream_;
|
||||
std::vector<char> arguments_;
|
||||
};
|
||||
PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr)
|
||||
: hostVar_(hostVar), size_(size), devicePtr_(devicePtr) {
|
||||
amd::Memory* amd_mem_obj = nullptr;
|
||||
uint32_t flags = 0;
|
||||
|
||||
thread_local std::stack<ihipExec_t> execStack_;
|
||||
/* Create an amd Memory object for the pointer */
|
||||
amd_mem_obj
|
||||
= new (*hip::getCurrentContext()) amd::Buffer(*hip::getCurrentContext(), flags, size, devicePtr_);
|
||||
|
||||
class PlatformState {
|
||||
amd::Monitor lock_;
|
||||
private:
|
||||
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
|
||||
|
||||
struct RegisteredVar {
|
||||
char* var;
|
||||
char* hostVar;
|
||||
char* deviceVar;
|
||||
int size;
|
||||
bool constant;
|
||||
};
|
||||
|
||||
std::unordered_map<std::vector<hipModule_t>*, RegisteredVar> vars_;
|
||||
|
||||
static PlatformState* platform_;
|
||||
|
||||
PlatformState() : lock_("Guards global function map") {}
|
||||
~PlatformState() {}
|
||||
public:
|
||||
static PlatformState& instance() {
|
||||
return *platform_;
|
||||
if (amd_mem_obj == nullptr) {
|
||||
LogError("[OCL] failed to create a mem object!");
|
||||
}
|
||||
|
||||
void registerVar(std::vector<hipModule_t>* modules,
|
||||
char* var,
|
||||
char* hostVar,
|
||||
char* deviceVar,
|
||||
int size,
|
||||
bool constant) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
|
||||
const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 };
|
||||
|
||||
vars_.insert(std::make_pair(modules, rvar));
|
||||
if (!amd_mem_obj->create(nullptr)) {
|
||||
LogError("[OCL] failed to create a svm hidden buffer!");
|
||||
amd_mem_obj->release();
|
||||
}
|
||||
|
||||
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
/* Add the memory to the MemObjMap */
|
||||
amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj);
|
||||
}
|
||||
|
||||
functions_.insert(std::make_pair(hostFunction, funcs));
|
||||
void PlatformState::registerVar(const char* hostvar,
|
||||
const std::vector<RegisteredVar>& rvar) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
vars_.insert(std::make_pair(hostvar, rvar));
|
||||
}
|
||||
|
||||
void PlatformState::registerFunction(const void* hostFunction,
|
||||
const std::vector<hipFunction_t>& funcs) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
functions_.insert(std::make_pair(hostFunction, funcs));
|
||||
}
|
||||
|
||||
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
const auto it = functions_.find(hostFunction);
|
||||
if (it != functions_.cend()) {
|
||||
return it->second[deviceId];
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
bool PlatformState::getGlobalVar(const void* hostVar, int deviceId,
|
||||
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
const auto it = vars_.find(hostVar);
|
||||
if (it != vars_.cend()) {
|
||||
*size_ptr = it->second[deviceId].getvarsize();
|
||||
*dev_ptr = it->second[deviceId].getdeviceptr();
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
|
||||
auto& arguments = execStack_.top().arguments_;
|
||||
|
||||
if (arguments.size() < offset + size) {
|
||||
arguments.resize(offset + size);
|
||||
}
|
||||
|
||||
hipFunction_t getFunc(const void* hostFunction, int deviceId) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
const auto it = functions_.find(hostFunction);
|
||||
if (it != functions_.cend()) {
|
||||
return it->second[deviceId];
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
::memcpy(&arguments[offset], arg, size);
|
||||
}
|
||||
|
||||
void setupArgument(const void *arg,
|
||||
size_t size,
|
||||
size_t offset) {
|
||||
auto& arguments = execStack_.top().arguments_;
|
||||
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
||||
hipStream_t stream) {
|
||||
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
||||
}
|
||||
|
||||
if (arguments.size() < offset + size) {
|
||||
arguments.resize(offset + size);
|
||||
}
|
||||
|
||||
::memcpy(&arguments[offset], arg, size);
|
||||
}
|
||||
|
||||
void configureCall(dim3 gridDim,
|
||||
dim3 blockDim,
|
||||
size_t sharedMem,
|
||||
hipStream_t stream) {
|
||||
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
||||
}
|
||||
|
||||
void popExec(ihipExec_t& exec) {
|
||||
exec = std::move(execStack_.top());
|
||||
execStack_.pop();
|
||||
}
|
||||
};
|
||||
PlatformState* PlatformState::platform_ = new PlatformState();
|
||||
void PlatformState::popExec(ihipExec_t& exec) {
|
||||
exec = std::move(execStack_.top());
|
||||
execStack_.pop();
|
||||
}
|
||||
|
||||
extern "C" void __hipRegisterFunction(
|
||||
std::vector<hipModule_t>* modules,
|
||||
@@ -248,7 +239,26 @@ extern "C" void __hipRegisterVar(
|
||||
{
|
||||
HIP_INIT();
|
||||
|
||||
PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0);
|
||||
size_t sym_size = 0;
|
||||
std::vector<PlatformState::RegisteredVar> global_vars{g_devices.size()};
|
||||
|
||||
for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) {
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
if((hipSuccess == hipModuleGetGlobal(&device_ptr, &sym_size, modules->at(deviceId),
|
||||
hostVar)) && (device_ptr != nullptr)) {
|
||||
|
||||
if (static_cast<size_t>(size) != sym_size) {
|
||||
LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n");
|
||||
}
|
||||
|
||||
global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr);
|
||||
|
||||
} else {
|
||||
LogError("[OCL] __hipRegisterVar cannot find kernel for device \n");
|
||||
}
|
||||
}
|
||||
|
||||
PlatformState::instance().registerVar(hostVar, global_vars);
|
||||
}
|
||||
|
||||
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
|
||||
@@ -314,6 +324,22 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
|
||||
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
||||
}
|
||||
|
||||
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
|
||||
size_t size = 0;
|
||||
if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) {
|
||||
hipDeviceptr_t devPtr = nullptr;
|
||||
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
#if defined(ATI_OS_LINUX)
|
||||
|
||||
namespace hip_impl {
|
||||
|
||||
Reference in New Issue
Block a user