diff --git a/rocclr/runtime/device/devkernel.cpp b/rocclr/runtime/device/devkernel.cpp index ea4f1677a8..4c40b4ca85 100644 --- a/rocclr/runtime/device/devkernel.cpp +++ b/rocclr/runtime/device/devkernel.cpp @@ -776,11 +776,7 @@ bool Kernel::GetAttrCodePropMetadata(const amd_comgr_metadata_node_t programMD, KernelMD* kernelMD) { amd_comgr_metadata_node_t kernelMeta = {0}; - if (!GetKernelMetadata(programMD, name(), &kernelMeta)) { - if (kernelMeta.handle != 0) { - amd::Comgr::destroy_metadata(kernelMeta); - } return false; } @@ -846,10 +842,12 @@ bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD, amd_comgr_metadata_node_t* kernelNode) { amd_comgr_status_t status; amd_comgr_metadata_node_t kernelsMD; + bool hasKernelMD = false; size_t size = 0; status = amd::Comgr::metadata_lookup(programMD, "Kernels", &kernelsMD); if (status == AMD_COMGR_STATUS_SUCCESS) { + hasKernelMD = true; status = amd::Comgr::get_metadata_list_size(kernelsMD, &size); } @@ -858,12 +856,18 @@ bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD, std::string kernelName; amd_comgr_metadata_node_t nameMeta; + bool hasNameMeta = false; + bool hasKernelNode = false; + status = amd::Comgr::index_list_metadata(kernelsMD, i, kernelNode); + if (status == AMD_COMGR_STATUS_SUCCESS) { + hasKernelNode = true; status = amd::Comgr::metadata_lookup(*kernelNode, "Name", &nameMeta); } if (status == AMD_COMGR_STATUS_SUCCESS) { + hasNameMeta = true; status = getMetaBuf(nameMeta, &kernelName); } @@ -871,12 +875,19 @@ bool Kernel::GetKernelMetadata(const amd_comgr_metadata_node_t programMD, kernelFound = true; } else { - amd::Comgr::destroy_metadata(*kernelNode); + if (hasKernelNode) { + amd::Comgr::destroy_metadata(*kernelNode); + } + } + + if (hasNameMeta) { + amd::Comgr::destroy_metadata(nameMeta); } - amd::Comgr::destroy_metadata(nameMeta); } - amd::Comgr::destroy_metadata(kernelsMD); + if (hasKernelMD) { + amd::Comgr::destroy_metadata(kernelsMD); + } return kernelFound; } @@ -887,14 +898,19 @@ bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) { amd_comgr_metadata_node_t isaMeta; amd_comgr_metadata_node_t sgprMeta; amd_comgr_metadata_node_t vgprMeta; + bool hasIsaMeta = false; + bool hasSgprMeta = false; + bool hasVgprMeta = false; amd_comgr_status_t status = amd::Comgr::get_isa_metadata(targetIdent.c_str(), &isaMeta); if (status == AMD_COMGR_STATUS_SUCCESS) { + hasIsaMeta = true; status = amd::Comgr::metadata_lookup(isaMeta, "AddressableNumSGPRs", &sgprMeta); } if (status == AMD_COMGR_STATUS_SUCCESS) { + hasSgprMeta = true; status = getMetaBuf(sgprMeta, &buf); } @@ -905,13 +921,22 @@ bool Kernel::SetAvailableSgprVgpr(const std::string& targetIdent) { } if (status == AMD_COMGR_STATUS_SUCCESS) { + hasVgprMeta = true; status = getMetaBuf(vgprMeta, &buf); } workGroupInfo_.availableVGPRs_ = (status == AMD_COMGR_STATUS_SUCCESS) ? atoi(buf.c_str()) : 0; - amd::Comgr::destroy_metadata(vgprMeta); - amd::Comgr::destroy_metadata(sgprMeta); - amd::Comgr::destroy_metadata(isaMeta); + if (hasVgprMeta) { + amd::Comgr::destroy_metadata(vgprMeta); + } + + if (hasSgprMeta) { + amd::Comgr::destroy_metadata(sgprMeta); + } + + if (hasIsaMeta) { + amd::Comgr::destroy_metadata(isaMeta); + } return (status == AMD_COMGR_STATUS_SUCCESS); } @@ -920,6 +945,7 @@ bool Kernel::GetPrintfStr(const amd_comgr_metadata_node_t programMD, std::vector* printfStr) { amd_comgr_metadata_node_t printfMeta; + amd_comgr_status_t status = amd::Comgr::metadata_lookup(programMD, "Printf", &printfMeta); if (status != AMD_COMGR_STATUS_SUCCESS) { return true; // printf string metadata is not provided so just exit @@ -941,6 +967,7 @@ bool Kernel::GetPrintfStr(const amd_comgr_metadata_node_t programMD, } if (status != AMD_COMGR_STATUS_SUCCESS) { + amd::Comgr::destroy_metadata(printfMeta); return false; } @@ -961,10 +988,12 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t a size_t offsetStruct = argBufferSize; amd_comgr_metadata_node_t argsMeta; + bool hsaArgsMeta = false; size_t argsSize; amd_comgr_status_t status = amd::Comgr::metadata_lookup(kernelMD, "Args", &argsMeta); if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaArgsMeta = true; status = amd::Comgr::get_metadata_list_size(argsMeta, &argsSize); } @@ -977,10 +1006,12 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t a amd_comgr_metadata_node_t argsNode; amd_comgr_metadata_kind_t kind; + bool hsaArgsNode = false; status = amd::Comgr::index_list_metadata(argsMeta, i, &argsNode); if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaArgsNode = true; status = amd::Comgr::get_metadata_kind(argsNode, &kind); } if (kind != AMD_COMGR_METADATA_KIND_MAP) { @@ -990,10 +1021,14 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t a status = amd::Comgr::iterate_map_metadata(argsNode, populateArgs, static_cast(&lcArg)); } - amd::Comgr::destroy_metadata(argsNode); + if (hsaArgsNode) { + amd::Comgr::destroy_metadata(argsNode); + } if (status != AMD_COMGR_STATUS_SUCCESS) { - amd::Comgr::destroy_metadata(argsMeta); + if (hsaArgsMeta) { + amd::Comgr::destroy_metadata(argsMeta); + } return; } @@ -1052,7 +1087,9 @@ void Kernel::InitParameters(const amd_comgr_metadata_node_t kernelMD, uint32_t a } } - amd::Comgr::destroy_metadata(argsMeta); + if (hsaArgsMeta) { + amd::Comgr::destroy_metadata(argsMeta); + } // Save the number of OCL arguments uint32_t numParams = params.size(); diff --git a/rocclr/runtime/device/devprogram.cpp b/rocclr/runtime/device/devprogram.cpp index 6fe1691cc1..f9cdb9f43d 100644 --- a/rocclr/runtime/device/devprogram.cpp +++ b/rocclr/runtime/device/devprogram.cpp @@ -176,7 +176,7 @@ std::unique_ptr Program::newCompilerInstance() { #if !defined(USE_COMGR_LIBRARY) return std::unique_ptr( amd::opencl_driver::CompilerFactory().CreateAMDGPUCompiler(llvmBin_)); -#else +#else return std::unique_ptr(nullptr); #endif // !defined(USE_COMGR_LIBRARY) } @@ -307,12 +307,17 @@ void Program::setLangAndTargetStr(const char* clStd, amd_comgr_language_t* oclve amd_comgr_status_t Program::createAction(const amd_comgr_language_t oclver, const std::string& targetIdent, const std::string& options, - amd_comgr_action_info_t* action) { + amd_comgr_action_info_t* action, + bool* hasAction) { + *hasAction = false; amd_comgr_status_t status = amd::Comgr::create_action_info(action); - if ((oclver != AMD_COMGR_LANGUAGE_NONE) && (status == AMD_COMGR_STATUS_SUCCESS)) { - status = amd::Comgr::action_info_set_language(*action, oclver); + if (status == AMD_COMGR_STATUS_SUCCESS) { + *hasAction = true; + if (oclver != AMD_COMGR_LANGUAGE_NONE) { + status = amd::Comgr::action_info_set_language(*action, oclver); + } } if (!targetIdent.empty() && (status == AMD_COMGR_STATUS_SUCCESS)) { @@ -342,14 +347,17 @@ bool Program::linkLLVMBitcode(const amd_comgr_data_set_t inputs, // Create the action for linking amd_comgr_action_info_t action; amd_comgr_data_set_t dataSetDevLibs; + bool hasAction = false; + bool hasDataSetDevLibs = false; - amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action); + amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action, &hasAction); if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::create_data_set(&dataSetDevLibs); } if (status == AMD_COMGR_STATUS_SUCCESS) { + hasDataSetDevLibs = true; status = amd::Comgr::do_action(AMD_COMGR_ACTION_ADD_DEVICE_LIBRARIES, action, inputs, dataSetDevLibs); } @@ -367,8 +375,13 @@ bool Program::linkLLVMBitcode(const amd_comgr_data_set_t inputs, extractByteCodeBinary(*output, AMD_COMGR_DATA_KIND_BC, dumpFileName, binaryData, binarySize); } - amd::Comgr::destroy_action_info(action); - amd::Comgr::destroy_data_set(dataSetDevLibs); + if (hasAction) { + amd::Comgr::destroy_action_info(action); + } + + if (hasDataSetDevLibs) { + amd::Comgr::destroy_data_set(dataSetDevLibs); + } return (status == AMD_COMGR_STATUS_SUCCESS); } @@ -389,8 +402,11 @@ bool Program::compileToLLVMBitcode(const amd_comgr_data_set_t inputs, amd_comgr_action_info_t action; amd_comgr_data_set_t output; amd_comgr_data_set_t dataSetPCH; + bool hasAction = false; + bool hsaOutput = false; + bool hsaDataSetPCH = false; - amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action); + amd_comgr_status_t status = createAction(oclver, targetIdent, options, &action, &hasAction); if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::create_data_set(&output); @@ -398,12 +414,14 @@ bool Program::compileToLLVMBitcode(const amd_comgr_data_set_t inputs, // Adding Precompiled Headers if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaOutput = true; status = amd::Comgr::create_data_set(&dataSetPCH); } if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaDataSetPCH = true; status = amd::Comgr::do_action(AMD_COMGR_ACTION_ADD_PRECOMPILED_HEADERS, - action, inputs, dataSetPCH); + action, inputs, dataSetPCH); } // Compiling the source codes with precompiled headers @@ -420,9 +438,17 @@ bool Program::compileToLLVMBitcode(const amd_comgr_data_set_t inputs, extractByteCodeBinary(output, AMD_COMGR_DATA_KIND_BC, outFileName, binaryData, binarySize); } - amd::Comgr::destroy_action_info(action); - amd::Comgr::destroy_data_set(dataSetPCH); - amd::Comgr::destroy_data_set(output); + if (hasAction) { + amd::Comgr::destroy_action_info(action); + } + + if (hsaDataSetPCH) { + amd::Comgr::destroy_data_set(dataSetPCH); + } + + if (hsaOutput) { + amd::Comgr::destroy_data_set(output); + } return (status == AMD_COMGR_STATUS_SUCCESS); } @@ -442,28 +468,42 @@ bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs, amd_comgr_action_info_t action; amd_comgr_data_set_t output; amd_comgr_data_set_t relocatableData; + bool hasAction = false; + bool hsaOutput = false; + bool hsaRelocatableData = false; - amd_comgr_status_t status = createAction(AMD_COMGR_LANGUAGE_NONE, targetIdent, options, &action); + amd_comgr_status_t status = createAction(AMD_COMGR_LANGUAGE_NONE, targetIdent, options, + &action, &hasAction); if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::create_data_set(&output); } - if ((amdOptions->isDumpFlagSet(amd::option::DUMP_ISA)) && (status == AMD_COMGR_STATUS_SUCCESS)) { - // create the assembly data set - amd_comgr_data_set_t assemblyData; - status = amd::Comgr::create_data_set(&assemblyData); - if (status == AMD_COMGR_STATUS_SUCCESS) { - status = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY, - action, inputs, assemblyData); - } + if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaOutput = true; - // dump the ISA - if (status == AMD_COMGR_STATUS_SUCCESS) { - std::string dumpIsaName = amdOptions->getDumpFileName(".s"); - extractByteCodeBinary(assemblyData, AMD_COMGR_DATA_KIND_SOURCE, dumpIsaName); + if (amdOptions->isDumpFlagSet(amd::option::DUMP_ISA)){ + // create the assembly data set + amd_comgr_data_set_t assemblyData; + bool hsaAssemblyData = false; + + status = amd::Comgr::create_data_set(&assemblyData); + if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaAssemblyData = true; + status = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_ASSEMBLY, + action, inputs, assemblyData); + } + + // dump the ISA + if (status == AMD_COMGR_STATUS_SUCCESS) { + std::string dumpIsaName = amdOptions->getDumpFileName(".s"); + extractByteCodeBinary(assemblyData, AMD_COMGR_DATA_KIND_SOURCE, dumpIsaName); + } + + if (hsaAssemblyData) { + amd::Comgr::destroy_data_set(assemblyData); + } } - amd::Comgr::destroy_data_set(assemblyData); } // Create the relocatiable data set @@ -472,6 +512,7 @@ bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs, } if (status == AMD_COMGR_STATUS_SUCCESS) { + hsaRelocatableData = true; status = amd::Comgr::do_action(AMD_COMGR_ACTION_CODEGEN_BC_TO_RELOCATABLE, action, inputs, relocatableData); } @@ -493,9 +534,17 @@ bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs, executableSize); } - amd::Comgr::destroy_action_info(action); - amd::Comgr::destroy_data_set(relocatableData); - amd::Comgr::destroy_data_set(output); + if (hasAction) { + amd::Comgr::destroy_action_info(action); + } + + if (hsaRelocatableData) { + amd::Comgr::destroy_data_set(relocatableData); + } + + if (hsaOutput) { + amd::Comgr::destroy_data_set(output); + } return (status == AMD_COMGR_STATUS_SUCCESS); } @@ -553,7 +602,6 @@ bool Program::compileImplLC(const std::string& sourceCode, // Iterate through each source code and dump it into tmp std::fstream f; std::vector headerFileNames(headers.size()); - std::vector newDirs; if (!headers.empty()) { for (size_t i = 0; i < headers.size(); ++i) { @@ -1302,19 +1350,24 @@ bool Program::linkImplLC(amd::option::Options* options) { "LLVM Binary", &inputs); amd_comgr_data_set_t linked_bc; + bool hasLinkedBC = false; + if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::create_data_set(&linked_bc); } bool ret = (status == AMD_COMGR_STATUS_SUCCESS); if (ret) { + hasLinkedBC = true; ret = linkLLVMBitcode(inputs, linkOptions, true, options, &linked_bc); } amd::Comgr::destroy_data_set(inputs); if (!ret) { - amd::Comgr::destroy_data_set(linked_bc); + if (hasLinkedBC) { + amd::Comgr::destroy_data_set(linked_bc); + } buildLog_ += "Error: Linking bitcode failed: linking source & IR libraries.\n"; return false; } @@ -2689,7 +2742,7 @@ bool Program::FindGlobalVarSize(void* binary, size_t binSize) { status = amd::Comgr::create_data(AMD_COMGR_DATA_KIND_EXECUTABLE, &binaryData); if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::set_data(binaryData, binSize, - reinterpret_cast(binary)); + reinterpret_cast(binary)); } if (status == AMD_COMGR_STATUS_SUCCESS) { diff --git a/rocclr/runtime/device/devprogram.hpp b/rocclr/runtime/device/devprogram.hpp index 25ed61766d..713270e818 100644 --- a/rocclr/runtime/device/devprogram.hpp +++ b/rocclr/runtime/device/devprogram.hpp @@ -208,6 +208,9 @@ class Program : public amd::HeapObject { //! Get the machine target for the program const char* machineTarget() const { return machineTarget_; } + //! Check if xnack is enable + const bool xnackEnable() const { return (xnackEnabled_ == 1); } + protected: //! pre-compile setup bool initBuild(amd::option::Options* options); @@ -327,7 +330,7 @@ class Program : public amd::HeapObject { //! Create action for the specified language, target and options amd_comgr_status_t createAction(const amd_comgr_language_t oclvar, const std::string& targetIdent, const std::string& options, - amd_comgr_action_info_t* action); + amd_comgr_action_info_t* action, bool* hasAction); //! Create the bitcode of the linked input dataset bool linkLLVMBitcode(const amd_comgr_data_set_t inputs, diff --git a/rocclr/runtime/device/rocm/rockernel.cpp b/rocclr/runtime/device/rocm/rockernel.cpp index 7c843fce2c..e7f2642457 100644 --- a/rocclr/runtime/device/rocm/rockernel.cpp +++ b/rocclr/runtime/device/rocm/rockernel.cpp @@ -50,7 +50,11 @@ bool LightningKernel::init() { assert(workGroupInfo_.availableLDSSize_ > 0); // Get the available SGPRs and VGPRs - const std::string targetIdent = std::string("amdgcn-amd-amdhsa--")+program_->machineTarget(); + std::string targetIdent = std::string("amdgcn-amd-amdhsa--")+program_->machineTarget(); + if (program_->xnackEnable()) { + targetIdent.append("+xnack"); + } + if (!SetAvailableSgprVgpr(targetIdent)) { return false; }