SWDEV-567852 - Clean-up hip::init() (#1948)

这个提交包含在:
German Andryeyev
2025-11-25 19:05:41 -05:00
提交者 GitHub
父节点 cce94f6ee0
当前提交 93682f2f75
+36 -33
查看文件
@@ -38,40 +38,44 @@ std::vector<hip::Device*> g_devices ROCCLR_INIT_PRIORITY(101);
thread_local TlsAggregator tls;
amd::Context* host_context = nullptr;
// ================================================================================================
// init() is only to be called from the HIP_INIT macro only once
void init(bool* status) {
// Configure HIP runtime mode
amd::IS_HIP = true;
GPU_NUM_MEM_DEPENDENCY = 0;
// Determine direct dispatch capability based on build configuration
#if DISABLE_DIRECT_DISPATCH
constexpr bool kDirectDispatch = false;
#else
#if defined(WITH_HSA_DEVICE)
#elif defined(WITH_HSA_DEVICE)
constexpr bool kDirectDispatch = true;
#else
constexpr bool kDirectDispatch = false;
#endif
#endif
AMD_DIRECT_DISPATCH = flagIsDefault(AMD_DIRECT_DISPATCH) ? kDirectDispatch : AMD_DIRECT_DISPATCH;
// Initialize AMD runtime - critical for all subsequent operations
if (!amd::Runtime::init()) {
*status = false;
return;
}
// Log version and configuration for diagnostics
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "HIP Version: %d.%d.%d.%s, Direct Dispatch: %d",
HIP_VERSION_MAJOR, HIP_VERSION_MINOR, HIP_VERSION_PATCH, HIP_VERSION_GITHASH,
AMD_DIRECT_DISPATCH);
// Print the current path of the library
amd::Os::PrintLibraryLocation();
// Enumerate and initialize GPU devices
const std::vector<amd::Device*>& devices = amd::Device::getDevices(CL_DEVICE_TYPE_GPU, false);
const size_t deviceCount = devices.size();
g_devices.reserve(deviceCount); // Pre-allocate space for better performance
const size_t device_count = devices.size();
g_devices.reserve(device_count);
for (unsigned int i = 0; i < deviceCount; i++) {
// Enable active wait on the device by default
devices[i]->SetActiveWait(true);
// use the eternal contexts that already exist for new hip::Device's here
auto device = new Device(&devices[i]->context(), i);
if ((device == nullptr) || !device->Create()) {
for (size_t i = 0; i < device_count; ++i) {
amd::Device* const amd_device = devices[i];
amd_device->SetActiveWait(true);
// Use the eternal contexts that already exist in amd::Device for the new hip::Device
auto* device = new Device(&amd_device->context(), static_cast<unsigned int>(i));
if (!device || !device->Create()) {
*status = false;
return;
}
@@ -79,41 +83,40 @@ void init(bool* status) {
amd::RuntimeTearDown::RegisterObject(device);
}
if (hip::GetHipToolsDispatchTable()->__hipReportDevices_fn != nullptr) {
size_t numDevices = g_devices.size();
// Report devices to HIP tools layer if available
const auto* tools_dispatch_table = hip::GetHipToolsDispatchTable();
if (tools_dispatch_table->__hipReportDevices_fn) {
std::vector<hipUUID> uuids;
uuids.reserve(device_count);
std::vector<hipUUID> uuids(numDevices);
int i = 0;
for (const auto& dev : g_devices) {
auto* deviceHandle = dev->devices()[0];
const auto& info = deviceHandle->info();
memcpy(uuids[i].bytes, info.uuid_, sizeof(info.uuid_));
// if assert fails, the memcpy bytes param needs to be addressed
static_assert(sizeof(info.uuid_) == sizeof(uuids[0].bytes), "error ABI issue");
++i;
for (const auto* dev : g_devices) {
const auto& info = dev->devices()[0]->info();
static_assert(sizeof(info.uuid_) == sizeof(hipUUID::bytes), "UUID size mismatch");
uuids.emplace_back();
std::copy(std::begin(info.uuid_), std::end(info.uuid_), std::begin(uuids.back().bytes));
}
hip::GetHipToolsDispatchTable()->__hipReportDevices_fn(numDevices, uuids.data());
tools_dispatch_table->__hipReportDevices_fn(device_count, uuids.data());
}
amd::Context* hContext = new amd::Context(devices, amd::Context::Info());
if (!hContext) {
// Create and initialize host context
host_context = new amd::Context(devices, amd::Context::Info());
if (!host_context || CL_SUCCESS != host_context->create(nullptr)) {
if (host_context) {
host_context->release();
}
*status = false;
return;
}
if (CL_SUCCESS != hContext->create(nullptr)) {
hContext->release();
}
host_context = hContext;
amd::RuntimeTearDown::RegisterObject(hContext);
amd::RuntimeTearDown::RegisterObject(host_context);
// Complete platform initialization
PlatformState::instance().init();
*status = true;
return;
}
// ================================================================================================
Device* getCurrentDevice() { return tls.device_; }
void setCurrentDevice(unsigned int index) {