SWDEV-429529 - Allocate glb_ctx_ even for one device
Move context allocation into Device::init() method to simplify the logic and handle
HIP_VISIBLE_DEVICES properly
Change-Id: I0fc6f37c7ae39bedbdad0290295d6794c66d6c54
[ROCm/clr commit: a49d633883]
Этот коммит содержится в:
@@ -872,6 +872,7 @@ extern const char* SchedulerSourceCode;
|
||||
extern const char* SchedulerSourceCode20;
|
||||
extern const char* TrapHandlerCode;
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::create(Pal::IDevice* device) {
|
||||
resourceList_ = new std::unordered_set<Resource*>();
|
||||
if (nullptr == resourceList_) {
|
||||
@@ -1066,33 +1067,6 @@ bool Device::create(Pal::IDevice* device) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) {
|
||||
std::vector<amd::Device*> devices;
|
||||
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
|
||||
// Add all PAL devices
|
||||
for (uint32_t i = gStartDevice; i < numDevices; ++i) {
|
||||
devices.push_back(amd::Device::devices()[i]);
|
||||
}
|
||||
// Add current
|
||||
devices.push_back(this);
|
||||
|
||||
if (devices.size() > 1) {
|
||||
// Create a dummy context
|
||||
glb_ctx_ = new amd::Context(devices, info);
|
||||
if (glb_ctx_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
amd::Buffer* buf =
|
||||
new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
|
||||
if ((buf != nullptr) && buf->create()) {
|
||||
p2p_stage_ = buf;
|
||||
} else {
|
||||
delete buf;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1333,6 +1307,7 @@ static void parseRequestedDeviceList(const char* requestedDeviceList,
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::init() {
|
||||
gStartDevice = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
|
||||
bool useDeviceList = false;
|
||||
@@ -1438,10 +1413,34 @@ bool Device::init() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Query active devices only
|
||||
constexpr bool kNoOfflineDevices = false;
|
||||
std::vector<amd::Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, kNoOfflineDevices);
|
||||
if (devices.size() > 0) {
|
||||
// Create a dummy context for internal memory allocations on all reported devices
|
||||
glb_ctx_ = new amd::Context(devices, amd::Context::Info());
|
||||
if (glb_ctx_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
// Allocate a staging buffer for P2P emulation path
|
||||
if (devices.size() > 1) {
|
||||
amd::Buffer* buf =
|
||||
new (*glb_ctx_) amd::Buffer(*glb_ctx_, CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
|
||||
if ((buf != nullptr) && buf->create()) {
|
||||
p2p_stage_ = buf;
|
||||
} else {
|
||||
delete buf;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Device::tearDown() {
|
||||
if (platform_ != nullptr) {
|
||||
platform_->Destroy();
|
||||
|
||||
@@ -455,6 +455,7 @@ void Device::XferBuffers::release(VirtualGPU& gpu, Memory& buffer) {
|
||||
--acquiredCnt_;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::init() {
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Initializing HSA stack.");
|
||||
|
||||
@@ -543,21 +544,54 @@ bool Device::init() {
|
||||
roc_device.release()->registerDevice();
|
||||
}
|
||||
|
||||
if (0 != Device::numDevices(CL_DEVICE_TYPE_GPU, false)) {
|
||||
// Query active devices only
|
||||
constexpr bool kNoOfflineDevices = false;
|
||||
std::vector<amd::Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, kNoOfflineDevices);
|
||||
if (devices.size() > 0) {
|
||||
bool p2p_available = false;
|
||||
// Loop through all available devices
|
||||
for (auto device1: Device::devices()) {
|
||||
for (auto device1: devices) {
|
||||
// Find all agents that can have access to the current device
|
||||
for (auto agent: static_cast<Device*>(device1)->p2pAgents()) {
|
||||
// Find cl_device_id associated with the current agent
|
||||
for (auto device2: Device::devices()) {
|
||||
for (auto device2: devices) {
|
||||
if (agent.handle == static_cast<Device*>(device2)->getBackendDevice().handle) {
|
||||
// Device2 can have access to device1
|
||||
device2->p2pDevices_.push_back(as_cl(device1));
|
||||
device1->p2p_access_devices_.push_back(device2);
|
||||
p2p_available = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create a dummy context for internal memory allocations on all reported devices
|
||||
glb_ctx_ = new amd::Context(devices, amd::Context::Info());
|
||||
if (glb_ctx_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Allocate a staging buffer for P2P emulation path
|
||||
if ((devices.size() >= 1) && !p2p_available) {
|
||||
amd::Buffer* buf =
|
||||
new (*glb_ctx_) amd::Buffer(*glb_ctx_, CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
|
||||
if ((buf != nullptr) && buf->create()) {
|
||||
p2p_stage_ = buf;
|
||||
} else {
|
||||
delete buf;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate mgpu sync buffer for cooperative launches
|
||||
if (amd::IS_HIP) {
|
||||
mg_sync_ = reinterpret_cast<address>(amd::SvmBuffer::malloc(
|
||||
*glb_ctx_, (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS),
|
||||
kMGInfoSizePerDevice * devices.size(), kMGInfoSizePerDevice));
|
||||
if (mg_sync_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -741,44 +775,6 @@ bool Device::create() {
|
||||
// Use just 1 entry by default for the map cache
|
||||
mapCache_->push_back(nullptr);
|
||||
|
||||
if ((glb_ctx_ == nullptr) && (gpu_agents_.size() >= 1) &&
|
||||
// Allow creation for the last device in the list.
|
||||
(gpu_agents_[gpu_agents_.size() - 1].handle == bkendDevice_.handle)) {
|
||||
std::vector<amd::Device*> devices;
|
||||
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, false);
|
||||
// Add all PAL devices
|
||||
for (uint32_t i = 0; i < numDevices; ++i) {
|
||||
devices.push_back(amd::Device::devices()[i]);
|
||||
}
|
||||
// Add current
|
||||
devices.push_back(this);
|
||||
// Create a dummy context
|
||||
glb_ctx_ = new amd::Context(devices, info);
|
||||
if (glb_ctx_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((p2p_agents_.size() < (devices.size()-1)) && (devices.size() > 1)) {
|
||||
amd::Buffer* buf = new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
|
||||
if ((buf != nullptr) && buf->create()) {
|
||||
p2p_stage_ = buf;
|
||||
}
|
||||
else {
|
||||
delete buf;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// Check if sync buffer wasn't allocated yet
|
||||
if (amd::IS_HIP && mg_sync_ == nullptr) {
|
||||
mg_sync_ = reinterpret_cast<address>(amd::SvmBuffer::malloc(
|
||||
GlbCtx(), (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS),
|
||||
kMGInfoSizePerDevice * GlbCtx().devices().size(), kMGInfoSizePerDevice));
|
||||
if (mg_sync_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (settings().stagedXferSize_ != 0) {
|
||||
// Initialize staged write buffers
|
||||
if (settings().stagedXferWrite_) {
|
||||
|
||||
Ссылка в новой задаче
Block a user