Introducing hip::Device which wraps around amd::Context and deviceId

Change-Id: Ie35a6edb65c001b35eb9f5d2af26e765dc41c00e
Αυτή η υποβολή περιλαμβάνεται σε:
Christophe Paquot
2020-02-18 12:36:12 -08:00
υποβλήθηκε από Christophe Paquot
γονέας e066cc6f60
υποβολή b4ad4262cc
9 αρχεία άλλαξαν με 145 προσθήκες και 120 διαγραφές
+48 -43
Προβολή Αρχείου
@@ -24,17 +24,17 @@
#include "utils/flags.hpp"
#include "utils/versions.hpp"
std::vector<amd::Context*> g_devices;
std::vector<hip::Device*> g_devices;
namespace hip {
thread_local amd::Context* g_context = nullptr;
thread_local std::stack<amd::Context*> g_ctxtStack;
thread_local Device* g_device = nullptr;
thread_local std::stack<Device*> g_ctxtStack;
thread_local hipError_t g_lastError = hipSuccess;
std::once_flag g_ihipInitialized;
amd::Context* host_context = nullptr;
Device* host_device = nullptr;
std::map<amd::Context*, amd::HostQueue*> g_nullStreams;
std::map<Device*, amd::HostQueue*> g_nullStreams;
void init() {
if (!amd::Runtime::initialized()) {
@@ -53,27 +53,28 @@ void init() {
if (context && CL_SUCCESS != context->create(nullptr)) {
context->release();
} else {
g_devices.push_back(context);
g_devices.push_back(new Device(context, i));
}
}
host_context = new amd::Context(devices, amd::Context::Info());
if (!host_context) return;
amd::Context* hContext = new amd::Context(devices, amd::Context::Info());
if (!hContext) return;
if (host_context && CL_SUCCESS != host_context->create(nullptr)) {
host_context->release();
if (CL_SUCCESS != hContext->create(nullptr)) {
hContext->release();
}
host_device = new Device(hContext, -1);
PlatformState::instance().init();
}
amd::Context* getCurrentContext() {
return g_context;
Device* getCurrentDevice() {
return g_device;
}
void setCurrentContext(unsigned int index) {
void setCurrentDevice(unsigned int index) {
assert(index<g_devices.size());
g_context = g_devices[index];
g_device = g_devices[index];
}
amd::HostQueue* getQueue(hipStream_t stream) {
@@ -89,23 +90,31 @@ amd::HostQueue* getQueue(hipStream_t stream) {
}
}
amd::HostQueue* getNullStream(amd::Context& context) {
auto stream = g_nullStreams.find(&context);
amd::HostQueue* getNullStream(Device& dev) {
auto stream = g_nullStreams.find(&dev);
if (stream == g_nullStreams.end()) {
amd::Device* device = context.devices()[0];
amd::Device* device = dev.devices()[0];
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
amd::HostQueue* queue = new amd::HostQueue(context, *device, properties,
amd::HostQueue* queue = new amd::HostQueue(*dev.asContext(), *device, properties,
amd::CommandQueue::RealTimeDisabled,
amd::CommandQueue::Priority::Normal);
g_nullStreams[&context] = queue;
g_nullStreams[&dev] = queue;
return queue;
}
return stream->second;
}
amd::HostQueue* getNullStream(amd::Context& ctx) {
for (auto& it : g_nullStreams) {
if (it.first->asContext() == &ctx) {
return it.second;
}
}
return nullptr;
}
amd::HostQueue* getNullStream() {
amd::Context* context = getCurrentContext();
return context ? getNullStream(*context) : nullptr;
Device* device = getCurrentDevice();
return device ? getNullStream(*device) : nullptr;
}
};
@@ -142,11 +151,11 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
g_ctxtStack.pop();
}
} else {
hip::g_context = reinterpret_cast<amd::Context*>(as_amd(ctx));
hip::g_device = reinterpret_cast<hip::Device*>(ctx);
if(!g_ctxtStack.empty()) {
g_ctxtStack.pop();
}
g_ctxtStack.push(hip::getCurrentContext());
g_ctxtStack.push(hip::getCurrentDevice());
}
HIP_RETURN(hipSuccess);
@@ -155,7 +164,7 @@ hipError_t hipCtxSetCurrent(hipCtx_t ctx) {
hipError_t hipCtxGetCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxGetCurrent, ctx);
*ctx = reinterpret_cast<hipCtx_t>(hip::getCurrentContext());
*ctx = reinterpret_cast<hipCtx_t>(hip::getCurrentDevice());
HIP_RETURN(hipSuccess);
}
@@ -183,8 +192,8 @@ hipError_t hipRuntimeGetVersion(int *runtimeVersion) {
hipError_t hipCtxDestroy(hipCtx_t ctx) {
HIP_INIT_API(hipCtxDestroy, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
hip::Device* dev = reinterpret_cast<hip::Device*>(ctx);
if (dev == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -192,15 +201,15 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) {
hip::getNullStream()->setLastQueuedCommand(nullptr);
// Need to remove the ctx of calling thread if its the top one
if (!g_ctxtStack.empty() && g_ctxtStack.top() == amdContext) {
if (!g_ctxtStack.empty() && g_ctxtStack.top() == dev) {
g_ctxtStack.pop();
}
// Remove context from global context list
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == amdContext) {
if (g_devices[i] == dev) {
// Decrement ref count for device primary context
amdContext->release();
dev->release();
}
}
@@ -210,13 +219,13 @@ hipError_t hipCtxDestroy(hipCtx_t ctx) {
hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
HIP_INIT_API(hipCtxPopCurrent, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
hip::Device** dev = reinterpret_cast<hip::Device**>(ctx);
if (dev == nullptr) {
HIP_RETURN(hipErrorInvalidContext);
}
if (!g_ctxtStack.empty()) {
amdContext = g_ctxtStack.top();
*dev = g_ctxtStack.top();
g_ctxtStack.pop();
} else {
HIP_RETURN(hipErrorInvalidContext);
@@ -228,13 +237,13 @@ hipError_t hipCtxPopCurrent(hipCtx_t* ctx) {
hipError_t hipCtxPushCurrent(hipCtx_t ctx) {
HIP_INIT_API(hipCtxPushCurrent, ctx);
amd::Context* amdContext = reinterpret_cast<amd::Context*>(as_amd(ctx));
if (amdContext == nullptr) {
hip::Device* dev = reinterpret_cast<hip::Device*>(ctx);
if (dev == nullptr) {
HIP_RETURN(hipErrorInvalidContext);
}
hip::g_context = amdContext;
g_ctxtStack.push(hip::getCurrentContext());
hip::g_device = dev;
g_ctxtStack.push(hip::getCurrentDevice());
HIP_RETURN(hipSuccess);
}
@@ -259,12 +268,8 @@ hipError_t hipCtxGetDevice(hipDevice_t* device) {
HIP_INIT_API(hipCtxGetDevice, device);
if (device != nullptr) {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
*device = static_cast<hipDevice_t>(i);
HIP_RETURN(hipSuccess);
}
}
*device = hip::getCurrentDevice()->deviceId();
HIP_RETURN(hipSuccess);
} else {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -332,7 +337,7 @@ hipError_t hipDevicePrimaryCtxGetState(hipDevice_t dev, unsigned int* flags, int
}
if (active != nullptr) {
*active = (g_devices[dev] == hip::getCurrentContext())? 1 : 0;
*active = (g_devices[dev] == hip::getCurrentDevice())? 1 : 0;
}
HIP_RETURN(hipSuccess);
@@ -430,12 +430,7 @@ hipError_t hipDeviceSynchronize ( void ) {
}
int ihipGetDevice() {
for (unsigned int i = 0; i < g_devices.size(); i++) {
if (g_devices[i] == hip::getCurrentContext()) {
return i;
}
}
return -1;
return hip::getCurrentDevice()->deviceId();
}
hipError_t hipGetDevice ( int* deviceId ) {
@@ -483,7 +478,7 @@ hipError_t hipSetDevice ( int device ) {
HIP_INIT_API(hipSetDevice, device);
if (static_cast<unsigned int>(device) < g_devices.size()) {
hip::setCurrentContext(device);
hip::setCurrentDevice(device);
HIP_RETURN(hipSuccess);
}
+40 -12
Προβολή Αρχείου
@@ -43,9 +43,9 @@ typedef struct ihipIpcMemHandle_st {
} ihipIpcMemHandle_t;
#define HIP_INIT() \
std::call_once(hip::g_ihipInitialized, hip::init); \
if (hip::g_context == nullptr && g_devices.size() > 0) { \
hip::g_context = g_devices[0]; \
std::call_once(hip::g_ihipInitialized, hip::init); \
if (hip::g_device == nullptr && g_devices.size() > 0) { \
hip::g_device = g_devices[0]; \
}
// This macro should be called at the beginning of every HIP API.
@@ -69,19 +69,49 @@ class accelerator_view;
};
namespace hip {
/// HIP Device class
class Device {
/// VDI context
amd::Context* context_;
/// Device's ID
/// Store it here so we don't have to loop through the device list every time
int deviceId_;
public:
Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId) { assert(ctx != nullptr); }
~Device() {}
amd::Context* asContext() const { return context_; }
int deviceId() const { return deviceId_; }
void retain() const { context_->retain(); }
void release() const { context_->release(); }
const std::vector<amd::Device*>& devices() const { return context_->devices(); }
};
extern std::once_flag g_ihipInitialized;
extern thread_local amd::Context* g_context;
/// Current thread's device
extern thread_local Device* g_device;
extern thread_local hipError_t g_lastError;
extern amd::Context* host_context;
/// Device representing the host - for pinned memory
extern Device* host_device;
extern void init();
extern amd::Context* getCurrentContext();
extern void setCurrentContext(unsigned int index);
extern Device* getCurrentDevice();
extern void setCurrentDevice(unsigned int index);
/// Get VDI queue associated with hipStream
/// Note: This follows the CUDA spec to sync with default streams
/// and Blocking streams
extern amd::HostQueue* getQueue(hipStream_t s);
/// Get default stream of the device
extern amd::HostQueue* getNullStream(Device&);
/// Get default stream associated with the VDI context
extern amd::HostQueue* getNullStream(amd::Context&);
/// Get default stream of the thread
extern amd::HostQueue* getNullStream();
/// Sync Blocking streams on the current device
/// TODO: It currently syncs all Blocking streams on all devices
extern void syncStreams();
@@ -98,13 +128,11 @@ namespace hip {
struct Stream {
amd::HostQueue* queue;
amd::Device* device;
amd::Context* context;
Device* device;
amd::CommandQueue::Priority priority;
unsigned int flags;
int deviceId;
Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f, int d);
Stream(Device* dev, amd::CommandQueue::Priority p, unsigned int f);
void create();
amd::HostQueue* asHostQueue();
void destroy();
@@ -207,7 +235,7 @@ public:
void popExec(ihipExec_t& exec);
};
extern std::vector<amd::Context*> g_devices;
extern std::vector<hip::Device*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
extern int ihipGetDevice();
extern hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags);
+18 -18
Προβολή Αρχείου
@@ -65,7 +65,7 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
}
amd::Context* amdContext = ((flags & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0)?
hip::host_context : hip::getCurrentContext();
hip::host_device->asContext() : hip::getCurrentDevice()->asContext();
if (amdContext == nullptr) {
return hipErrorOutOfMemory;
@@ -234,7 +234,7 @@ hipError_t hipFree(void* ptr) {
for (size_t i=0; i<g_devices.size(); ++i) {
hip::getNullStream(*g_devices[i])->finish();
}
amd::SvmBuffer::free(*hip::getCurrentContext(), ptr);
amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr);
HIP_RETURN(hipSuccess);
}
HIP_RETURN(hipErrorInvalidValue);
@@ -276,7 +276,7 @@ hipError_t hipHostFree(void* ptr) {
HIP_INIT_API(hipHostFree, ptr);
if (amd::SvmBuffer::malloced(ptr)) {
amd::SvmBuffer::free(*hip::getCurrentContext(), ptr);
amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), ptr);
HIP_RETURN(hipSuccess);
}
HIP_RETURN(hipErrorInvalidValue);
@@ -286,7 +286,7 @@ hipError_t hipFreeArray(hipArray* array) {
HIP_INIT_API(hipFreeArray, array);
if (amd::SvmBuffer::malloced(array->data)) {
amd::SvmBuffer::free(*hip::getCurrentContext(), array->data);
amd::SvmBuffer::free(*hip::getCurrentDevice()->asContext(), array->data);
HIP_RETURN(hipSuccess);
}
HIP_RETURN(hipErrorInvalidValue);
@@ -314,7 +314,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) {
HIP_INIT_API(hipMemGetInfo, free, total);
size_t freeMemory[2];
amd::Device* device = hip::getCurrentContext()->devices()[0];
amd::Device* device = hip::getCurrentDevice()->devices()[0];
if(device == nullptr) {
HIP_RETURN(hipErrorInvalidDevice);
}
@@ -332,7 +332,7 @@ hipError_t hipMemGetInfo(size_t* free, size_t* total) {
hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t height, size_t depth,
cl_mem_object_type imageType, const cl_image_format* image_format) {
amd::Device* device = hip::getCurrentContext()->devices()[0];
amd::Device* device = hip::getCurrentDevice()->devices()[0];
if ((width == 0) || (height == 0)) {
*ptr = nullptr;
@@ -352,7 +352,7 @@ hipError_t ihipMallocPitch(void** ptr, size_t* pitch, size_t width, size_t heigh
*pitch = amd::alignUp(width * imageFormat.getElementSize(), device->info().imagePitchAlignment_);
size_t sizeBytes = *pitch * height * depth;
*ptr = amd::SvmBuffer::malloc(*hip::getCurrentContext(), 0, sizeBytes,
*ptr = amd::SvmBuffer::malloc(*hip::getCurrentDevice()->asContext(), 0, sizeBytes,
device->info().memBaseAddrAlign_);
if (*ptr == nullptr) {
@@ -554,7 +554,7 @@ hipError_t hipHostGetFlags(unsigned int* flagsPtr, void* hostPtr) {
hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags) {
HIP_INIT_API(hipHostRegister, hostPtr, sizeBytes, flags);
if(hostPtr != nullptr) {
amd::Memory* mem = new (*hip::host_context) amd::Buffer(*hip::host_context, CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes);
amd::Memory* mem = new (*hip::host_device->asContext()) amd::Buffer(*hip::host_device->asContext(), CL_MEM_USE_HOST_PTR | CL_MEM_SVM_ATOMICS, sizeBytes);
constexpr bool sysMemAlloc = false;
constexpr bool skipAlloc = false;
@@ -564,7 +564,7 @@ hipError_t hipHostRegister(void* hostPtr, size_t sizeBytes, unsigned int flags)
HIP_RETURN(hipErrorOutOfMemory);
}
for (const auto& device: hip::getCurrentContext()->devices()) {
for (const auto& device: hip::getCurrentDevice()->devices()) {
// Since the amd::Memory object is shared between all devices
// it's fine to have multiple addresses mapped to it
const device::Memory* devMem = mem->getDeviceMemory(*device);
@@ -584,7 +584,7 @@ hipError_t hipHostUnregister(void* hostPtr) {
if (amd::SvmBuffer::malloced(hostPtr)) {
hip::syncStreams();
hip::getNullStream()->finish();
amd::SvmBuffer::free(*hip::host_context, hostPtr);
amd::SvmBuffer::free(*hip::host_device->asContext(), hostPtr);
HIP_RETURN(hipSuccess);
} else {
size_t offset = 0;
@@ -593,7 +593,7 @@ hipError_t hipHostUnregister(void* hostPtr) {
if(mem) {
hip::syncStreams();
hip::getNullStream()->finish();
for (const auto& device: hip::getCurrentContext()->devices()) {
for (const auto& device: hip::getCurrentDevice()->devices()) {
const device::Memory* devMem = mem->getDeviceMemory(*device);
amd::MemObjMap::RemoveMemObj(reinterpret_cast<void*>(devMem->virtualAddress()));
}
@@ -1582,7 +1582,7 @@ hipError_t hipIpcGetMemHandle(hipIpcMemHandle_t* handle, void* dev_ptr) {
}
/* Get Device::Memory object pointer */
dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentContext()->devices()[0],false);
dev_mem_obj = amd_mem_obj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0],false);
if (dev_mem_obj == nullptr) {
HIP_RETURN(hipErrorInvalidDevicePointer);
}
@@ -1606,7 +1606,7 @@ hipError_t hipIpcOpenMemHandle(void** dev_ptr, hipIpcMemHandle_t handle, unsigne
}
/* Call the IPC Attach from Device class */
device = hip::getCurrentContext()->devices()[0];
device = hip::getCurrentDevice()->devices()[0];
ihandle = reinterpret_cast<ihipIpcMemHandle_t *>(&handle);
amd_mem_obj = device->IpcAttach(&(ihandle->ipc_handle), ihandle->psize, flags, dev_ptr);
@@ -1641,7 +1641,7 @@ hipError_t hipIpcCloseMemHandle(void* dev_ptr) {
}
/* Call IPC Detach from Device class */
device = hip::getCurrentContext()->devices()[0];
device = hip::getCurrentDevice()->devices()[0];
if (device == nullptr) {
HIP_RETURN(hipErrorNoDevice);
}
@@ -1674,7 +1674,7 @@ hipError_t hipHostGetDevicePointer(void** devicePointer, void* hostPointer, unsi
if (!memObj) {
HIP_RETURN(hipErrorInvalidValue);
}
*devicePointer = reinterpret_cast<void*>(memObj->getDeviceMemory(*hip::getCurrentContext()->devices()[0])->virtualAddress() + offset);
*devicePointer = reinterpret_cast<void*>(memObj->getDeviceMemory(*hip::getCurrentDevice()->devices()[0])->virtualAddress() + offset);
HIP_RETURN(hipSuccess);
}
@@ -1693,13 +1693,13 @@ hipError_t hipPointerGetAttributes(hipPointerAttribute_t* attributes, const void
attributes->isManaged = 0;
attributes->allocationFlags = memObj->getMemFlags() >> 16;
amd::Context &memObjCtx = memObj->getContext();
if (*hip::host_context == memObjCtx) {
amd::Context* memObjCtx = &memObj->getContext();
if (hip::host_device->asContext() == memObjCtx) {
attributes->device = ihipGetDevice();
HIP_RETURN(hipSuccess);
}
for (auto& ctx : g_devices) {
if (*ctx == memObjCtx) {
if (ctx->asContext() == memObjCtx) {
attributes->device = device;
HIP_RETURN(hipSuccess);
}
@@ -126,7 +126,7 @@ bool ihipModuleRegisterUndefined(amd::Program* program, hipModule_t* module) {
std::vector<std::string> undef_vars;
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
= program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
if (!dev_program->getUndefinedVarFromCodeObj(&undef_vars)) {
return false;
@@ -157,7 +157,7 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) {
std::vector<std::string> var_names;
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
= program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
if (!dev_program->getGlobalVarFromCodeObj(&var_names)) {
return false;
@@ -180,17 +180,17 @@ bool ihipModuleRegisterGlobal(amd::Program* program, hipModule_t* module) {
hipError_t ihipModuleLoadData(hipModule_t *module, const void *image)
{
std::vector<std::pair<const void*, size_t>> code_objs;
if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentContext()->devices()[0]->info().name_}, code_objs))
if (__hipExtractCodeObjectFromFatBinary(image, {hip::getCurrentDevice()->devices()[0]->info().name_}, code_objs))
image = code_objs[0].first;
amd::Program* program = new amd::Program(*hip::getCurrentContext());
amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext());
if (program == NULL) {
return hipErrorOutOfMemory;
}
program->setVarInfoCallBack(&getSvarInfo);
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0], image, ElfSize(image))) {
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0], image, ElfSize(image))) {
return hipErrorInvalidKernelFile;
}
@@ -204,7 +204,7 @@ hipError_t ihipModuleLoadData(hipModule_t *module, const void *image)
return hipErrorSharedObjectSymbolNotFound;
}
if(CL_SUCCESS != program->build(hip::getCurrentContext()->devices(), nullptr, nullptr, nullptr)) {
if(CL_SUCCESS != program->build(hip::getCurrentDevice()->devices(), nullptr, nullptr, nullptr)) {
return hipErrorSharedObjectInitFailed;
}
@@ -147,8 +147,7 @@ void PlatformState::digestFatBinary(const void* data, std::vector<std::pair<hipM
std::vector<std::pair<const void*, size_t>> code_objs;
std::vector<const char*> devices;
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
amd::Context* ctx = g_devices[dev];
devices.push_back(ctx->devices()[0]->info().name_);
devices.push_back(g_devices[dev]->devices()[0]->info().name_);
}
if (!__hipExtractCodeObjectFromFatBinary((char*)data, devices, code_objs)) {
@@ -158,7 +157,7 @@ void PlatformState::digestFatBinary(const void* data, std::vector<std::pair<hipM
programs.resize(g_devices.size());
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
amd::Context* ctx = g_devices[dev];
amd::Context* ctx = g_devices[dev]->asContext();
amd::Program* program = new amd::Program(*ctx);
if (program == nullptr) {
return;
@@ -259,7 +258,7 @@ void PlatformState::registerFunction(const void* hostFunction,
bool ihipGetFuncAttributes(const char* func_name, amd::Program* program, hipFuncAttributes* func_attr) {
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
= program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
const auto it = dev_program->kernels().find(std::string(func_name));
if (it == dev_program->kernels().cend()) {
@@ -540,7 +539,7 @@ extern "C" hipError_t hipLaunchByPtr(const void *hostFunction)
PlatformState::instance().popExec(exec);
hip::Stream* stream = reinterpret_cast<hip::Stream*>(exec.hStream_);
int deviceId = (stream != nullptr)? stream->deviceId : ihipGetDevice();
int deviceId = (stream != nullptr)? stream->device->deviceId() : ihipGetDevice();
if (deviceId == -1) {
HIP_RETURN(hipErrorNoDevice);
}
@@ -590,7 +589,7 @@ hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memor
/* Get Device Program pointer*/
program = as_amd(reinterpret_cast<cl_program>(hmod));
dev_program = program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
if (dev_program == nullptr) {
HIP_RETURN(hipErrorInvalidDeviceFunction);
@@ -631,7 +630,7 @@ hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks,
if (blockSize == 0) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Device* device = hip::getCurrentContext()->devices()[0];
amd::Device* device = hip::getCurrentDevice()->devices()[0];
const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel->getDeviceKernel(*device)->workGroupInfo();
// Find threads accupancy per CU => simd_per_cu * GPR usage
@@ -859,7 +858,7 @@ const std::vector<hipModule_t>& modules() {
std::string target(desc->triple + sizeof(HCC_AMDGCN_AMDHSA_TRIPLE),
desc->tripleSize - sizeof(HCC_AMDGCN_AMDHSA_TRIPLE));
if (isCompatibleCodeObject(target, hip::getCurrentContext()->devices()[0]->info().name_)) {
if (isCompatibleCodeObject(target, hip::getCurrentDevice()->devices()[0]->info().name_)) {
hipModule_t module;
if (hipSuccess == hipModuleLoadData(&module, reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(obheader) + desc->offset)))
@@ -942,7 +941,7 @@ extern "C" hipError_t hipLaunchKernel(const void *hostFunction,
stream);
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
int deviceId = (s != nullptr)? s->deviceId : ihipGetDevice();
int deviceId = (s != nullptr)? s->device->deviceId() : ihipGetDevice();
if (deviceId == -1) {
HIP_RETURN(hipErrorNoDevice);
}
+8 -8
Προβολή Αρχείου
@@ -211,12 +211,12 @@ hiprtcResult hiprtcCreateProgram(hiprtcProgram* prog, const char* src, const cha
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
amd::Program* program = new amd::Program(*hip::getCurrentContext(), src, amd::Program::HIP);
amd::Program* program = new amd::Program(*hip::getCurrentDevice()->asContext(), src, amd::Program::HIP);
if (program == NULL) {
HIPRTC_RETURN(HIPRTC_ERROR_INVALID_INPUT);
}
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentContext()->devices()[0])) {
if (CL_SUCCESS != program->addDeviceProgram(*hip::getCurrentDevice()->devices()[0])) {
program->release();
HIPRTC_RETURN(HIPRTC_ERROR_PROGRAM_CREATION_FAILURE);
}
@@ -241,7 +241,7 @@ hiprtcResult hiprtcCompileProgram(hiprtcProgram prog, int numOptions, const char
std::vector<const char*> oarr(&options[0], &options[numOptions]);
std::copy(oarr.begin(), oarr.end(), std::ostream_iterator<std::string>(ostrstr, " "));
std::vector<amd::Device*> devices{hip::getCurrentContext()->devices()[0]};
std::vector<amd::Device*> devices{hip::getCurrentDevice()->devices()[0]};
if (CL_SUCCESS != program->build(devices, ostrstr.str().c_str(), nullptr, nullptr)) {
HIPRTC_RETURN(HIPRTC_ERROR_COMPILATION);
}
@@ -278,7 +278,7 @@ hiprtcResult hiprtcGetLoweredName(hiprtcProgram prog, const char* name_expressio
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
device::Program* dev_program
= program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
= program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
auto it = ProgramState::instance().nameExpresssion_.find(name_expression);
if (it == ProgramState::instance().nameExpresssion_.end()) {
@@ -325,7 +325,7 @@ hiprtcResult hiprtcGetCode(hiprtcProgram prog, char* binaryMem) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program::binary_t& binary =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary();
program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary();
::memcpy(binaryMem, binary.first, binary.second);
@@ -339,7 +339,7 @@ hiprtcResult hiprtcGetCodeSize(hiprtcProgram prog, size_t* binarySizeRet) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
*binarySizeRet =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0])->binary().second;
program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0])->binary().second;
HIPRTC_RETURN(HIPRTC_SUCCESS);
}
@@ -349,7 +349,7 @@ hiprtcResult hiprtcGetProgramLog(hiprtcProgram prog, char* dst) {
HIPRTC_INIT_API(prog, dst);
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program* devProgram =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
auto log = program->programLog() + devProgram->buildLog().c_str();
@@ -365,7 +365,7 @@ hiprtcResult hiprtcGetProgramLogSize(hiprtcProgram prog, size_t* logSizeRet) {
amd::Program* program = as_amd(reinterpret_cast<cl_program>(prog));
const device::Program* devProgram =
program->getDeviceProgram(*hip::getCurrentContext()->devices()[0]);
program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
auto log = program->programLog() + devProgram->buildLog().c_str();
@@ -50,12 +50,12 @@ void syncStreams() {
}
}
Stream::Stream(amd::Device* dev, amd::Context* ctx, amd::CommandQueue::Priority p, unsigned int f, int d) :
queue(nullptr), device(dev), context(ctx), priority(p), flags(f), deviceId(d) {}
Stream::Stream(hip::Device* dev, amd::CommandQueue::Priority p, unsigned int f) :
queue(nullptr), device(dev), priority(p), flags(f) {}
void Stream::create() {
cl_command_queue_properties properties = CL_QUEUE_PROFILING_ENABLE;
queue = new amd::HostQueue(*context, *device, properties,
queue = new amd::HostQueue(*device->asContext(), *device->devices()[0], properties,
amd::CommandQueue::RealTimeDisabled, priority);
assert(queue != nullptr);
queue->create();
@@ -93,9 +93,7 @@ void CL_CALLBACK ihipStreamCallback(cl_event event, cl_int command_exec_status,
}
static hipError_t ihipStreamCreate(hipStream_t *stream, unsigned int flags, amd::CommandQueue::Priority priority) {
amd::Device* device = hip::getCurrentContext()->devices()[0];
hip::Stream* hStream = new hip::Stream(device, hip::getCurrentContext(), priority, flags, ihipGetDevice());
hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags);
if (hStream == nullptr) {
return hipErrorOutOfMemory;
+11 -11
Προβολή Αρχείου
@@ -242,7 +242,7 @@ amd::Sampler* fillSamplerDescriptor(enum hipTextureAddressMode addressMode,
address_mode = CL_ADDRESS_CLAMP_TO_EDGE;
break;
}
amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentContext(),
amd::Sampler* sampler = new amd::Sampler(*hip::getCurrentDevice()->asContext(),
normalizedCoords == CL_TRUE,
address_mode, filter_mode, CL_FILTER_NONE, 0.f, CL_MAXFLOAT);
if (sampler == nullptr) {
@@ -263,11 +263,11 @@ hip::TextureObject* ihipCreateTextureObject(const hipResourceDesc& resDesc, amd:
return nullptr;
}
device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
device::Memory* imageMem = image.getDeviceMemory(*hip::getCurrentDevice()->devices()[0]);
memcpy(texture->imageSRD, imageMem->cpuSrd(), sizeof(uint32_t)*HIP_IMAGE_OBJECT_SIZE_DWORD);
texture->image = &image;
device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentContext()->devices()[0]);
device::Sampler* devSampler = sampler.getDeviceSampler(*hip::getCurrentDevice()->devices()[0]);
memcpy(texture->samplerSRD, devSampler->hwState(), sizeof(uint32_t)*HIP_SAMPLER_OBJECT_SIZE_DWORD);
texture->sampler = &sampler;
@@ -281,7 +281,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou
const hipResourceViewDesc* pResViewDesc) {
HIP_INIT_API(NONE, pTexObject, pResDesc, pTexDesc, pResViewDesc);
amd::Device* device = hip::getCurrentContext()->devices()[0];
amd::Device* device = hip::getCurrentDevice()->devices()[0];
if (!device->info().imageSupport_) {
HIP_RETURN(hipErrorInvalidValue);
@@ -319,14 +319,14 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou
switch(pResDesc->res.array.array->textureType) {
case hipTextureType3D:
clType = CL_MEM_OBJECT_IMAGE3D;
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
image = new (*hip::getCurrentDevice()->asContext()) amd::Image(*memory->asBuffer(),
clType, memory->getMemFlags(), imageFormat,
pResDesc->res.array.array->width, pResDesc->res.array.array->height,
pResDesc->res.array.array->depth, 0, 0);
break;
case hipTextureType2D:
clType = CL_MEM_OBJECT_IMAGE2D;
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
image = new (*hip::getCurrentDevice()->asContext()) amd::Image(*memory->asBuffer(),
clType, memory->getMemFlags(), imageFormat,
pResDesc->res.array.array->width, pResDesc->res.array.array->height, 1, 0, 0);
break;
@@ -349,7 +349,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou
&image_format.image_channel_order, &image_format.image_channel_data_type);
const amd::Image::Format imageFormat(image_format);
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
image = new (*hip::getCurrentDevice()->asContext()) amd::Image(*memory->asBuffer(),
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
pResDesc->res.linear.sizeInBytes / imageFormat.getElementSize(), 1, 1,
pResDesc->res.linear.sizeInBytes, 0);
@@ -359,7 +359,7 @@ hipError_t hipCreateTextureObject(hipTextureObject_t* pTexObject, const hipResou
assert(pResViewDesc == nullptr);
memory = getMemoryObject(pResDesc->res.pitch2D.devPtr, offset);
image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
image = new (*hip::getCurrentDevice()->asContext()) amd::Image(*memory->asBuffer(),
CL_MEM_OBJECT_IMAGE2D, memory->getMemFlags(), imageFormat,
pResDesc->res.pitch2D.width, pResDesc->res.pitch2D.height, 1,
pResDesc->res.pitch2D.pitchInBytes, 0);
@@ -434,7 +434,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type,
if (tex == nullptr) {
return hipErrorInvalidImage;
}
if (hip::getCurrentContext()) {
if (hip::getCurrentDevice()) {
cl_image_format image_format;
size_t byteSize;
size_t rowPitch = 0;
@@ -461,7 +461,7 @@ hipError_t ihipBindTexture(cl_mem_object_type type,
break;
}
amd::Image* image = new (*hip::getCurrentContext()) amd::Image(*memory->asBuffer(),
amd::Image* image = new (*hip::getCurrentDevice()->asContext()) amd::Image(*memory->asBuffer(),
type, memory->getMemFlags(), imageFormat, width, height, depth, rowPitch, slicePitch);
if (!image->create()) {
delete image;
@@ -767,7 +767,7 @@ hipError_t hipTexRefGetAddress(hipDeviceptr_t* dev_ptr, textureReference tex) {
HIP_RETURN(hipErrorInvalidImage);
}
dev_mem = texture->image->getDeviceMemory(*hip::getCurrentContext()->devices()[0]);
dev_mem = texture->image->getDeviceMemory(*hip::getCurrentDevice()->devices()[0]);
if (dev_mem == nullptr) {
HIP_RETURN(hipErrorInvalidImage);
}