P4 to Git Change 1757948 by kjayapra@1_HIPWS_SL_IPC on 2019/03/18 18:29:24

SWDEV-144570 - Implementation of
	               hipMemcpyToSymbol, hipMemcpyFromSymbol,
	               hipMemcpyToSymbolAsync, hipMemcpyFromSymbolAsync,
	               hipGetSymbolAddress, hipModuleGetGlobal

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.def.in#12 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_hcc.map.in#13 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_internal.hpp#23 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_memory.cpp#45 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_module.cpp#21 edit
... //depot/stg/opencl/drivers/opencl/api/hip/hip_platform.cpp#22 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/devprogram.hpp#20 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#101 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#45 edit
This commit is contained in:
foreman
2019-03-18 18:44:55 -04:00
rodzic f9942770e0
commit e432cfb2e9
6 zmienionych plików z 245 dodań i 86 usunięć
+2
Wyświetl plik
@@ -92,6 +92,8 @@ hipMemcpyFromArray
hipMemcpyToSymbol
hipMemcpyToSymbolAsync
hipMemGetAddressRange
hipGetSymbolAddress
hipGetSymbolSize
hipMemGetInfo
hipMemPtrGetInfo
hipMemset
+2
Wyświetl plik
@@ -93,6 +93,8 @@ global:
hipMemcpyToSymbol;
hipMemcpyToSymbolAsync;
hipMemGetAddressRange;
hipGetSymbolAddress;
hipGetSymbolSize;
hipMemGetInfo;
hipMemPtrGetInfo;
hipMemset;
+53
Wyświetl plik
@@ -85,6 +85,59 @@ namespace hip {
static Function* asFunction(hipFunction_t f) { return reinterpret_cast<Function*>(f); }
};
};
struct ihipExec_t {
dim3 gridDim_;
dim3 blockDim_;
size_t sharedMem_;
hipStream_t hStream_;
std::vector<char> arguments_;
};
class PlatformState {
amd::Monitor lock_;
public:
struct RegisteredVar {
public:
RegisteredVar(): hostVar_(nullptr), size_(0), devicePtr_(nullptr) {}
RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr);
~RegisteredVar() {}
hipDeviceptr_t getdeviceptr() const { return devicePtr_; };
size_t getvarsize() const { return size_; };
private:
char* hostVar_; // Variable name in host code
size_t size_; // Size of the variable
hipDeviceptr_t devicePtr_; //Device Memory Address of the variable.
};
private:
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
std::unordered_map<const void*, std::vector<RegisteredVar> > vars_;
static PlatformState* platform_;
PlatformState() : lock_("Guards global function map") {}
~PlatformState() {}
public:
static PlatformState& instance() {
return *platform_;
}
void registerVar(const char* hostvar, const std::vector<RegisteredVar>& rvar);
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs);
hipFunction_t getFunc(const void* hostFunction, int deviceId);
bool getGlobalVar(const void* hostVar, int deviceId, hipDeviceptr_t* dev_ptr,
size_t* size_ptr);
void setupArgument(const void *arg, size_t size, size_t offset);
void configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream);
void popExec(ihipExec_t& exec);
};
extern std::vector<amd::Context*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
extern int ihipGetDevice();
+68 -8
Wyświetl plik
@@ -557,36 +557,96 @@ hipError_t hipMemcpyToSymbol(const void* symbolName, const void* src, size_t cou
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(symbolName, src, count, offset, kind);
assert(0 && "Unimplemented");
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN(hipErrorUnknown);
/* Get address and size for the global symbol */
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
&sym_size)) {
HIP_RETURN(hipErrorUnknown);
}
/* Size Check to make sure offset is correct */
if ((offset + count) != sym_size) {
return HIP_RETURN(hipErrorUnknown);
}
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
/* Copy memory from source to destination address */
HIP_RETURN(hipMemcpy(device_ptr, src, count, kind));
}
hipError_t hipMemcpyFromSymbol(void* dst, const void* symbolName, size_t count,
size_t offset, hipMemcpyKind kind) {
HIP_INIT_API(symbolName, dst, count, offset, kind);
assert(0 && "Unimplemented");
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN(hipErrorUnknown);
/* Get address and size for the global symbol */
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
&sym_size)) {
HIP_RETURN(hipErrorUnknown);
}
/* Size Check to make sure offset is correct */
if ((offset + count) != sym_size) {
return HIP_RETURN(hipErrorUnknown);
}
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
/* Copy memory from source to destination address */
HIP_RETURN(hipMemcpy(dst, device_ptr, count, kind));
}
hipError_t hipMemcpyToSymbolAsync(const void* symbolName, const void* src, size_t count,
size_t offset, hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_API(symbolName, src, count, offset, kind, stream);
assert(0 && "Unimplemented");
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN(hipErrorUnknown);
/* Get address and size for the global symbol */
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
&sym_size)) {
HIP_RETURN(hipErrorUnknown);
}
/* Size Check to make sure offset is correct */
if ((offset + count) != sym_size) {
return HIP_RETURN(hipErrorUnknown);
}
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
/* Copy memory from source to destination address */
HIP_RETURN(hipMemcpyAsync(device_ptr, src, count, kind, stream));
}
hipError_t hipMemcpyFromSymbolAsync(void* dst, const void* symbolName, size_t count,
size_t offset, hipMemcpyKind kind, hipStream_t stream) {
HIP_INIT_API(symbolName, dst, count, offset, kind, stream);
assert(0 && "Unimplemented");
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
HIP_RETURN(hipErrorUnknown);
/* Get address and size for the global symbol */
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &device_ptr,
&sym_size)) {
HIP_RETURN(hipErrorUnknown);
}
/* Size Check to make sure offset is correct */
if ((offset + count) != sym_size) {
return HIP_RETURN(hipErrorUnknown);
}
device_ptr = reinterpret_cast<address>(device_ptr) + offset;
/* Copy memory from source to destination address */
HIP_RETURN(hipMemcpyAsync(dst, device_ptr, count, kind, stream));
}
hipError_t hipMemcpyHtoD(hipDeviceptr_t dst, void* src, size_t sizeBytes) {
+16
Wyświetl plik
@@ -137,6 +137,22 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
{
HIP_INIT_API(dptr, bytes, hmod, name);
amd::Program* program = nullptr;
const device::Program* dev_program = nullptr;
/* Get Device Program pointer*/
program = as_amd(reinterpret_cast<cl_program>(hmod));
dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
if (dev_program == nullptr) {
HIP_RETURN(hipErrorUnknown);
}
/* Find the global Symbols */
if(!dev_program->findGlobalSymbols(dptr, bytes, name)) {
HIP_RETURN(hipErrorUnknown);
}
HIP_RETURN(hipSuccess);
}
+104 -78
Wyświetl plik
@@ -31,6 +31,9 @@ THE SOFTWARE.
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
thread_local std::stack<ihipExec_t> execStack_;
PlatformState* PlatformState::platform_ = new PlatformState();
struct __CudaFatBinaryWrapper {
unsigned int magic;
unsigned int version;
@@ -111,94 +114,82 @@ extern "C" std::vector<hipModule_t>* __hipRegisterFatBinary(const void* data)
return programs;
}
struct ihipExec_t {
dim3 gridDim_;
dim3 blockDim_;
size_t sharedMem_;
hipStream_t hStream_;
std::vector<char> arguments_;
};
PlatformState::RegisteredVar::RegisteredVar(char* hostVar, size_t size, hipDeviceptr_t devicePtr)
: hostVar_(hostVar), size_(size), devicePtr_(devicePtr) {
amd::Memory* amd_mem_obj = nullptr;
uint32_t flags = 0;
thread_local std::stack<ihipExec_t> execStack_;
/* Create an amd Memory object for the pointer */
amd_mem_obj
= new (*hip::getCurrentContext()) amd::Buffer(*hip::getCurrentContext(), flags, size, devicePtr_);
class PlatformState {
amd::Monitor lock_;
private:
std::unordered_map<const void*, std::vector<hipFunction_t> > functions_;
struct RegisteredVar {
char* var;
char* hostVar;
char* deviceVar;
int size;
bool constant;
};
std::unordered_map<std::vector<hipModule_t>*, RegisteredVar> vars_;
static PlatformState* platform_;
PlatformState() : lock_("Guards global function map") {}
~PlatformState() {}
public:
static PlatformState& instance() {
return *platform_;
if (amd_mem_obj == nullptr) {
LogError("[OCL] failed to create a mem object!");
}
void registerVar(std::vector<hipModule_t>* modules,
char* var,
char* hostVar,
char* deviceVar,
int size,
bool constant) {
amd::ScopedLock lock(lock_);
const RegisteredVar rvar = { var, hostVar, deviceVar, size, constant != 0 };
vars_.insert(std::make_pair(modules, rvar));
if (!amd_mem_obj->create(nullptr)) {
LogError("[OCL] failed to create a svm hidden buffer!");
amd_mem_obj->release();
}
void registerFunction(const void* hostFunction, const std::vector<hipFunction_t>& funcs) {
amd::ScopedLock lock(lock_);
/* Add the memory to the MemObjMap */
amd::MemObjMap::AddMemObj(devicePtr_, amd_mem_obj);
}
functions_.insert(std::make_pair(hostFunction, funcs));
void PlatformState::registerVar(const char* hostvar,
const std::vector<RegisteredVar>& rvar) {
amd::ScopedLock lock(lock_);
vars_.insert(std::make_pair(hostvar, rvar));
}
void PlatformState::registerFunction(const void* hostFunction,
const std::vector<hipFunction_t>& funcs) {
amd::ScopedLock lock(lock_);
functions_.insert(std::make_pair(hostFunction, funcs));
}
hipFunction_t PlatformState::getFunc(const void* hostFunction, int deviceId) {
amd::ScopedLock lock(lock_);
const auto it = functions_.find(hostFunction);
if (it != functions_.cend()) {
return it->second[deviceId];
} else {
return nullptr;
}
}
bool PlatformState::getGlobalVar(const void* hostVar, int deviceId,
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
amd::ScopedLock lock(lock_);
const auto it = vars_.find(hostVar);
if (it != vars_.cend()) {
*size_ptr = it->second[deviceId].getvarsize();
*dev_ptr = it->second[deviceId].getdeviceptr();
return true;
} else {
return false;
}
}
void PlatformState::setupArgument(const void *arg, size_t size, size_t offset) {
auto& arguments = execStack_.top().arguments_;
if (arguments.size() < offset + size) {
arguments.resize(offset + size);
}
hipFunction_t getFunc(const void* hostFunction, int deviceId) {
amd::ScopedLock lock(lock_);
const auto it = functions_.find(hostFunction);
if (it != functions_.cend()) {
return it->second[deviceId];
} else {
return nullptr;
}
}
::memcpy(&arguments[offset], arg, size);
}
void setupArgument(const void *arg,
size_t size,
size_t offset) {
auto& arguments = execStack_.top().arguments_;
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
hipStream_t stream) {
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
}
if (arguments.size() < offset + size) {
arguments.resize(offset + size);
}
::memcpy(&arguments[offset], arg, size);
}
void configureCall(dim3 gridDim,
dim3 blockDim,
size_t sharedMem,
hipStream_t stream) {
execStack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
}
void popExec(ihipExec_t& exec) {
exec = std::move(execStack_.top());
execStack_.pop();
}
};
PlatformState* PlatformState::platform_ = new PlatformState();
void PlatformState::popExec(ihipExec_t& exec) {
exec = std::move(execStack_.top());
execStack_.pop();
}
extern "C" void __hipRegisterFunction(
std::vector<hipModule_t>* modules,
@@ -248,7 +239,26 @@ extern "C" void __hipRegisterVar(
{
HIP_INIT();
PlatformState::instance().registerVar(modules, var, hostVar, deviceVar, size, constant != 0);
size_t sym_size = 0;
std::vector<PlatformState::RegisteredVar> global_vars{g_devices.size()};
for (size_t deviceId=0; deviceId < g_devices.size(); ++deviceId) {
hipDeviceptr_t device_ptr = nullptr;
if((hipSuccess == hipModuleGetGlobal(&device_ptr, &sym_size, modules->at(deviceId),
hostVar)) && (device_ptr != nullptr)) {
if (static_cast<size_t>(size) != sym_size) {
LogError("[OCL] Size Mismatch with the HSA Symbol retrieved \n");
}
global_vars[deviceId] = PlatformState::RegisteredVar(hostVar, sym_size, device_ptr);
} else {
LogError("[OCL] __hipRegisterVar cannot find kernel for device \n");
}
}
PlatformState::instance().registerVar(hostVar, global_vars);
}
extern "C" void __hipUnregisterFatBinary(std::vector<hipModule_t>* modules)
@@ -314,6 +324,22 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
exec.sharedMem_, exec.hStream_, nullptr, extra));
}
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbolName) {
size_t size = 0;
if(!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), devPtr, &size)) {
HIP_RETURN(hipErrorUnknown);
}
HIP_RETURN(hipSuccess);
}
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbolName) {
hipDeviceptr_t devPtr = nullptr;
if (!PlatformState::instance().getGlobalVar(symbolName, ihipGetDevice(), &devPtr, sizePtr)) {
HIP_RETURN(hipErrorUnknown);
}
HIP_RETURN(hipSuccess);
}
#if defined(ATI_OS_LINUX)
namespace hip_impl {