SWDEV-429529 - Allocate glb_ctx_ even for one device

Move context allocation into Device::init() method to simplify the logic and handle
HIP_VISIBLE_DEVICES properly

Change-Id: I0fc6f37c7ae39bedbdad0290295d6794c66d6c54


[ROCm/clr commit: a49d633883]
Этот коммит содержится в:
German
2023-10-27 15:00:15 -04:00
родитель 39ae3fedd5
Коммит 005aebbfad
2 изменённых файлов: 63 добавлений и 68 удалений
+26 -27
Просмотреть файл
@@ -872,6 +872,7 @@ extern const char* SchedulerSourceCode;
extern const char* SchedulerSourceCode20;
extern const char* TrapHandlerCode;
// ================================================================================================
bool Device::create(Pal::IDevice* device) {
resourceList_ = new std::unordered_set<Resource*>();
if (nullptr == resourceList_) {
@@ -1066,33 +1067,6 @@ bool Device::create(Pal::IDevice* device) {
return false;
}
if ((glb_ctx_ == nullptr) && (gNumDevices > 1) && (device == gDeviceList[gNumDevices - 1])) {
std::vector<amd::Device*> devices;
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
// Add all PAL devices
for (uint32_t i = gStartDevice; i < numDevices; ++i) {
devices.push_back(amd::Device::devices()[i]);
}
// Add current
devices.push_back(this);
if (devices.size() > 1) {
// Create a dummy context
glb_ctx_ = new amd::Context(devices, info);
if (glb_ctx_ == nullptr) {
return false;
}
amd::Buffer* buf =
new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
if ((buf != nullptr) && buf->create()) {
p2p_stage_ = buf;
} else {
delete buf;
return false;
}
}
}
return true;
}
@@ -1333,6 +1307,7 @@ static void parseRequestedDeviceList(const char* requestedDeviceList,
}
}
// ================================================================================================
bool Device::init() {
gStartDevice = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, true);
bool useDeviceList = false;
@@ -1438,10 +1413,34 @@ bool Device::init() {
}
}
}
// Query active devices only
constexpr bool kNoOfflineDevices = false;
std::vector<amd::Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, kNoOfflineDevices);
if (devices.size() > 0) {
// Create a dummy context for internal memory allocations on all reported devices
glb_ctx_ = new amd::Context(devices, amd::Context::Info());
if (glb_ctx_ == nullptr) {
return false;
}
// Allocate a staging buffer for P2P emulation path
if (devices.size() > 1) {
amd::Buffer* buf =
new (*glb_ctx_) amd::Buffer(*glb_ctx_, CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
if ((buf != nullptr) && buf->create()) {
p2p_stage_ = buf;
} else {
delete buf;
return false;
}
}
}
}
return true;
}
// ================================================================================================
void Device::tearDown() {
if (platform_ != nullptr) {
platform_->Destroy();
+37 -41
Просмотреть файл
@@ -455,6 +455,7 @@ void Device::XferBuffers::release(VirtualGPU& gpu, Memory& buffer) {
--acquiredCnt_;
}
// ================================================================================================
bool Device::init() {
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Initializing HSA stack.");
@@ -543,21 +544,54 @@ bool Device::init() {
roc_device.release()->registerDevice();
}
if (0 != Device::numDevices(CL_DEVICE_TYPE_GPU, false)) {
// Query active devices only
constexpr bool kNoOfflineDevices = false;
std::vector<amd::Device*> devices = getDevices(CL_DEVICE_TYPE_GPU, kNoOfflineDevices);
if (devices.size() > 0) {
bool p2p_available = false;
// Loop through all available devices
for (auto device1: Device::devices()) {
for (auto device1: devices) {
// Find all agents that can have access to the current device
for (auto agent: static_cast<Device*>(device1)->p2pAgents()) {
// Find cl_device_id associated with the current agent
for (auto device2: Device::devices()) {
for (auto device2: devices) {
if (agent.handle == static_cast<Device*>(device2)->getBackendDevice().handle) {
// Device2 can have access to device1
device2->p2pDevices_.push_back(as_cl(device1));
device1->p2p_access_devices_.push_back(device2);
p2p_available = true;
}
}
}
}
// Create a dummy context for internal memory allocations on all reported devices
glb_ctx_ = new amd::Context(devices, amd::Context::Info());
if (glb_ctx_ == nullptr) {
return false;
}
// Allocate a staging buffer for P2P emulation path
if ((devices.size() >= 1) && !p2p_available) {
amd::Buffer* buf =
new (*glb_ctx_) amd::Buffer(*glb_ctx_, CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
if ((buf != nullptr) && buf->create()) {
p2p_stage_ = buf;
} else {
delete buf;
return false;
}
}
// Allocate mgpu sync buffer for cooperative launches
if (amd::IS_HIP) {
mg_sync_ = reinterpret_cast<address>(amd::SvmBuffer::malloc(
*glb_ctx_, (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS),
kMGInfoSizePerDevice * devices.size(), kMGInfoSizePerDevice));
if (mg_sync_ == nullptr) {
return false;
}
}
}
return true;
@@ -741,44 +775,6 @@ bool Device::create() {
// Use just 1 entry by default for the map cache
mapCache_->push_back(nullptr);
if ((glb_ctx_ == nullptr) && (gpu_agents_.size() >= 1) &&
// Allow creation for the last device in the list.
(gpu_agents_[gpu_agents_.size() - 1].handle == bkendDevice_.handle)) {
std::vector<amd::Device*> devices;
uint32_t numDevices = amd::Device::numDevices(CL_DEVICE_TYPE_GPU, false);
// Add all PAL devices
for (uint32_t i = 0; i < numDevices; ++i) {
devices.push_back(amd::Device::devices()[i]);
}
// Add current
devices.push_back(this);
// Create a dummy context
glb_ctx_ = new amd::Context(devices, info);
if (glb_ctx_ == nullptr) {
return false;
}
if ((p2p_agents_.size() < (devices.size()-1)) && (devices.size() > 1)) {
amd::Buffer* buf = new (GlbCtx()) amd::Buffer(GlbCtx(), CL_MEM_ALLOC_HOST_PTR, kP2PStagingSize);
if ((buf != nullptr) && buf->create()) {
p2p_stage_ = buf;
}
else {
delete buf;
return false;
}
}
// Check if sync buffer wasn't allocated yet
if (amd::IS_HIP && mg_sync_ == nullptr) {
mg_sync_ = reinterpret_cast<address>(amd::SvmBuffer::malloc(
GlbCtx(), (CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS),
kMGInfoSizePerDevice * GlbCtx().devices().size(), kMGInfoSizePerDevice));
if (mg_sync_ == nullptr) {
return false;
}
}
}
if (settings().stagedXferSize_ != 0) {
// Initialize staged write buffers
if (settings().stagedXferWrite_) {