diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index 44b93ebfd3..daf91fa7a5 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2008-presenet Advanced Micro Devices, Inc. +/* Copyright (c) 2008-present Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 4db476d56f..79493ac852 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -588,7 +588,7 @@ class Settings : public amd::HeapObject { uint reportFMAF_ : 1; //!< Report FP_FAST_FMAF define in CL program uint reportFMA_ : 1; //!< Report FP_FAST_FMA define in CL program uint singleFpDenorm_ : 1; //!< Support Single FP Denorm - uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this deivce + uint hsailExplicitXnack_ : 1; //!< Xnack in hsail path for this device uint useLightning_ : 1; //!< Enable LC path for this device uint enableWgpMode_ : 1; //!< Enable WGP mode for this device uint enableWave32Mode_ : 1; //!< Enable Wave32 mode for this device diff --git a/rocclr/device/devprogram.cpp b/rocclr/device/devprogram.cpp index 68fd97d5c7..97db40b883 100644 --- a/rocclr/device/devprogram.cpp +++ b/rocclr/device/devprogram.cpp @@ -522,7 +522,7 @@ bool Program::compileAndLinkExecutable(const amd_comgr_data_set_t inputs, } } - // Create the relocatiable data set + // Create the relocatable data set if (status == AMD_COMGR_STATUS_SUCCESS) { status = amd::Comgr::create_data_set(&relocatableData); } diff --git a/rocclr/device/gpu/gpuprogram.hpp b/rocclr/device/gpu/gpuprogram.hpp index 10bdafdf9b..17796d89af 100644 --- a/rocclr/device/gpu/gpuprogram.hpp +++ b/rocclr/device/gpu/gpuprogram.hpp @@ -138,7 +138,7 @@ class NullProgram : public device::Program { protected: /*! \brief Compiles GPU CL program to LLVM binary (compiler frontend) * - * \return True if we successefully compiled a GPU program + * \return True if we successfully compiled a GPU program */ virtual bool compileImpl(const std::string& sourceCode, //!< the program's source code const std::vector& headers, //!< header souce codes @@ -168,21 +168,21 @@ class NullProgram : public device::Program { /*! \brief Parses the GPU program and finds all available kernels * - * \return True if we successefully parsed the GPU program + * \return True if we successfully parsed the GPU program */ bool parseKernels(const std::string& source //! the program's source code ); /*! \brief Parse all functions in the program * - * \return True if we successefully parsed all functions + * \return True if we successfully parsed all functions */ bool parseAllILFuncs(const std::string& source //! the program's source code ); /*! \brief Parse a function's metadata given as source[posBegin:posEnd-1] * - * \return True if we successefully parsed the given metadata + * \return True if we successfully parsed the given metadata */ bool parseFuncMetadata(const std::string& source, //! string that contains metadata size_t posBegin, //! begin of metadata in 'source' @@ -192,7 +192,7 @@ class NullProgram : public device::Program { /*! \brief Finds functions with the given start and end string in the * program * - * \return True if we successefully found all functions + * \return True if we successfully found all functions */ bool findILFuncs(const std::string& source, //! the program's source code const std::string& func_start, //! the start string of a function @@ -339,10 +339,10 @@ class Program : public NullProgram { //! Returns pritnf info array const std::vector& printfInfo() const { return printf_; } - //! Return a typecasted GPU device + //! Return a typecasted GPU device gpu::Device& dev() { return const_cast(static_cast(device())); } - protected: +protected: private: //! Disable copy constructor Program(const Program&); diff --git a/rocclr/device/gpu/gslbe/src/rt/GSLDevice.cpp b/rocclr/device/gpu/gslbe/src/rt/GSLDevice.cpp index 8387b587cc..6b4d4dca46 100644 --- a/rocclr/device/gpu/gslbe/src/rt/GSLDevice.cpp +++ b/rocclr/device/gpu/gslbe/src/rt/GSLDevice.cpp @@ -1228,7 +1228,7 @@ CALGSLDevice::resMapLocal(size_t& pitch, //! @todo Workaround strange GSL/CMM-QS behavior. OCL doesn't require a sync, //! because resource isn't busy on the CAL device. However without sync there are less CBs available - //! Conformanace multidevice test will create around 60 queues, instead of 70 + //! Conformanace multidevice test will create around 60 queues, instead of 70 uint32 mode = (IS_LINUX) ? GSL_SYNCUPLOAD_SYNC_WAIT | GSL_SYNCUPLOAD_SYNC_START : 0; m_cs->DMACopy(mem, 0, memMap->mem, 0, surfaceSize, mode, NULL); @@ -1284,7 +1284,7 @@ CALGSLDevice::resUnmapLocal(gslMemObject mem) //! @todo Workaround strange GSL/CMM-QS behavior. OCL doesn't require a sync, //! because resource isn't busy on the CAL device. However without sync there are less CBs available - //! Conformanace multidevice test will create around 60 queues, instead of 70 + //! Conformanace multidevice test will create around 60 queues, instead of 70 uint32 mode = (IS_LINUX) ? GSL_SYNCUPLOAD_SYNC_WAIT | GSL_SYNCUPLOAD_SYNC_START : 0; m_cs->DMACopy(memMap->mem, 0, mem, 0, surfaceSize, mode, NULL); diff --git a/rocclr/device/pal/paldefs.hpp b/rocclr/device/pal/paldefs.hpp index a8a2b26640..65c2e34ca4 100644 --- a/rocclr/device/pal/paldefs.hpp +++ b/rocclr/device/pal/paldefs.hpp @@ -45,11 +45,11 @@ struct HwDbgKernelInfo { uint64_t scratchBufAddr; ///< Handle of GPU local memory for kernel private scratch space size_t scratchBufferSizeInBytes; ///< size of memory pointed to by pScratchBuffer, uint64_t heapBufAddr; ///< Address of the global heap base - const void* pAqlDispatchPacket; ///< Pointer to the dipatch packet + const void* pAqlDispatchPacket; ///< Pointer to the dispatch packet const void* pAqlQueuePtr; ///< pointer to the AQL Queue void* trapHandler; ///< address of the trap handler (TBA) void* trapHandlerBuffer; ///< address of the trap handler buffer (TMA) - uint32_t excpEn; ///< excecption mask + uint32_t excpEn; ///< exception mask bool trapPresent; ///< trap present flag bool sqDebugMode; ///< debug mode flag (GPU single step mode) uint32_t mgmtSe0Mask; ///< mask for SE0 (reserving CU for display) @@ -120,7 +120,7 @@ static constexpr uint MaxConstBuffers = MaxConstArguments + 8; //! Maximum number of constant buffers for arguments static constexpr uint MaxConstBuffersArguments = 2; -//! Alignment restriciton for the pinned memory +//! Alignment restriction for the pinned memory static constexpr size_t PinnedMemoryAlignment = 4 * Ki; //! HSA path specific defines for images diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 1283b2b7a8..72016d5c58 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -237,7 +237,6 @@ bool NullDevice::init() { return true; } - bool NullDevice::create(uint id, Pal::GfxIpLevel ipLevel) { // Update HW info for the device if ((GPU_ENABLE_PAL == 1) && (ipLevel == Pal::GfxIpLevel::_None)) { @@ -883,8 +882,8 @@ bool Device::create(Pal::IDevice* device) { properties().revision : static_cast(PAL_FORCE_ASIC_REVISION); - // XNACK flag should be set for PageMigration | IOMMUv2 Support - // Note: Navi2x should have a fix in HW + // XNACK flag should be set for PageMigration or IOMMUv2 support. + // Note: Navi2x should have a fix in HW. bool isXNACKSupported = (ipLevel_ <= Pal::GfxIpLevel::GfxIp10_1) && (static_cast(properties_.gpuMemoryProperties.flags.pageMigrationEnabled || properties_.gpuMemoryProperties.flags.iommuv2Support)); diff --git a/rocclr/device/pal/palkernel.hpp b/rocclr/device/pal/palkernel.hpp index 10846c8f7a..c5d067a94d 100644 --- a/rocclr/device/pal/palkernel.hpp +++ b/rocclr/device/pal/palkernel.hpp @@ -68,7 +68,7 @@ class HSAILKernel : public device::Kernel { //! finalizes the kernel if needed bool init(amd::hsa::loader::Symbol* sym, bool finalize = false); - //! Returns GPU device object, associated with this kernel + //! Returns PAL device object, associated with this kernel const Device& dev() const; //! Returns HSA program associated with this kernel diff --git a/rocclr/device/pal/palprogram.cpp b/rocclr/device/pal/palprogram.cpp index 0f50c00cea..38934c89dd 100644 --- a/rocclr/device/pal/palprogram.cpp +++ b/rocclr/device/pal/palprogram.cpp @@ -385,7 +385,7 @@ bool HSAILProgram::defineGlobalVar(const char* name, void* dptr) { agent.handle = 1; hsa_status = executable_->DefineAgentExternalVariable(name, agent, HSA_VARIABLE_SEGMENT_GLOBAL, dptr); - if(HSA_STATUS_SUCCESS != hsa_status) { + if (HSA_STATUS_SUCCESS != hsa_status) { buildLog_ += "Could not define Program External Variable"; buildLog_ += "\n"; } diff --git a/rocclr/device/pal/palprogram.hpp b/rocclr/device/pal/palprogram.hpp index 7cee5568aa..3027107fc8 100644 --- a/rocclr/device/pal/palprogram.hpp +++ b/rocclr/device/pal/palprogram.hpp @@ -157,7 +157,7 @@ class HSAILProgram : public device::Program { const std::vector& globalStores() const { return globalStores_; } - //! Return a typecasted GPU device + //! Return a typecasted PAL device pal::Device& dev() { return const_cast(static_cast(device())); } //! Returns GPU kernel table diff --git a/rocclr/device/rocm/rocdefs.hpp b/rocclr/device/rocm/rocdefs.hpp index f67e954d75..7908413492 100644 --- a/rocclr/device/rocm/rocdefs.hpp +++ b/rocclr/device/rocm/rocdefs.hpp @@ -24,7 +24,7 @@ namespace roc { -//! Alignment restriciton for the pinned memory +//! Alignment restriction for the pinned memory static constexpr size_t PinnedMemoryAlignment = 4 * Ki; //! Specific defines for images for Dynamic Parallelism @@ -73,7 +73,7 @@ static constexpr AMDDeviceInfo DeviceInfo[] = { /* DIMGREY CAVEFISH*/{"gfx1032", "gfx1032", 2, 32, 1, 256, 64 * Ki, 32, 10, 3, 2, 0} }; -} +} // namespace roc constexpr uint kMaxAsyncQueues = 8; // set to match the number of pipes, which is 8 #endif diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index c06b7d879e..59ccf67cac 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -60,11 +60,13 @@ #ifndef WITHOUT_HSA_BACKEND namespace { + inline bool getIsaMeta(const char* targetId, amd_comgr_metadata_node_t& isaMeta) { amd_comgr_status_t status; status = amd::Comgr::get_isa_metadata(targetId, &isaMeta); return (status == AMD_COMGR_STATUS_SUCCESS) ? true : false; } + bool getValueFromIsaMeta(amd_comgr_metadata_node_t& isaMeta, const char* key, std::string& retValue) { amd_comgr_status_t status; @@ -82,11 +84,12 @@ bool getValueFromIsaMeta(amd_comgr_metadata_node_t& isaMeta, const char* key, return (status == AMD_COMGR_STATUS_SUCCESS) ? true : false; } -} // namespace + +} // namespace namespace device { extern const char* BlitSourceCode; -} +} // namespace device namespace roc { amd::Device::Compiler* NullDevice::compilerHandle_; @@ -300,6 +303,7 @@ bool NullDevice::destroyCompiler() { } void NullDevice::tearDown() { destroyCompiler(); } + bool NullDevice::init() { // Initialize the compiler if (!initCompiler(offlineDevice_)) { diff --git a/rocclr/device/rocm/rocprogram.cpp b/rocclr/device/rocm/rocprogram.cpp index 70758d6a35..512bf4ee2c 100644 --- a/rocclr/device/rocm/rocprogram.cpp +++ b/rocclr/device/rocm/rocprogram.cpp @@ -230,7 +230,6 @@ HSAILProgram::HSAILProgram(roc::NullDevice& device, amd::Program& owner) : roc:: machineTarget_ = dev().deviceInfo().machineTarget_; } - HSAILProgram::~HSAILProgram() { #if defined(WITH_COMPILER_LIB) acl_error error;