diff --git a/include/hip/hcc_detail/hip_runtime_api.h b/include/hip/hcc_detail/hip_runtime_api.h index 8becab3c9c..fa39a766bc 100644 --- a/include/hip/hcc_detail/hip_runtime_api.h +++ b/include/hip/hcc_detail/hip_runtime_api.h @@ -94,6 +94,19 @@ typedef struct ihipModule_t* hipModule_t; typedef struct ihipModuleSymbol_t* hipFunction_t; +struct hipFuncAttributes { + int binaryVersion; + int cacheModeCA; + size_t constSizeBytes; + size_t localSizeBytes; + int maxDynamicSharedSizeBytes; + int maxThreadsPerBlock; + int numRegs; + int preferredShmemCarveout; + int ptxVersion; + size_t sharedSizeBytes; +}; + typedef struct ihipEvent_t* hipEvent_t; enum hipLimit_t { @@ -2222,6 +2235,17 @@ hipError_t hipModuleUnload(hipModule_t module); */ hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname); +/** + * @bried Find out attributes for a given function. + * + * @param [out] attr + * @param [in] func + * + * @returns hipSuccess, hipErrorInvalidDeviceFunction + */ + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func); + /** * @brief returns device memory pointer and size of the kernel present in the module with symbol @p * name diff --git a/include/hip/hcc_detail/program_state.hpp b/include/hip/hcc_detail/program_state.hpp index b6ccafb205..ac689fdb89 100644 --- a/include/hip/hcc_detail/program_state.hpp +++ b/include/hip/hcc_detail/program_state.hpp @@ -22,8 +22,10 @@ THE SOFTWARE. #pragma once +#include #include #include +#include #include #include @@ -46,11 +48,45 @@ struct hash { inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; } namespace hip_impl { -struct Kernel_descriptor { - std::uint64_t kernel_object_; - std::uint32_t group_size_; - std::uint32_t private_size_; - std::string name_; +class Kernel_descriptor { + std::uint64_t kernel_object_{}; + amd_kernel_code_t const* kernel_header_{nullptr}; + std::string name_{}; +public: + Kernel_descriptor() = default; + Kernel_descriptor(std::uint64_t kernel_object, const std::string& name) + : kernel_object_{kernel_object}, name_{name} + { + bool supported{false}; + std::uint16_t min_v{UINT16_MAX}; + auto r = hsa_system_major_extension_supported( + HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported); + + if (r != HSA_STATUS_SUCCESS || !supported) return; + + hsa_ven_amd_loader_1_01_pfn_t tbl{}; + + r = hsa_system_get_major_extension_table( + HSA_EXTENSION_AMD_LOADER, + 1, + sizeof(tbl), + reinterpret_cast(&tbl)); + + if (r != HSA_STATUS_SUCCESS) return; + if (!tbl.hsa_ven_amd_loader_query_host_address) return; + + r = tbl.hsa_ven_amd_loader_query_host_address( + reinterpret_cast(kernel_object_), + reinterpret_cast(&kernel_header_)); + + if (r != HSA_STATUS_SUCCESS) return; + } + Kernel_descriptor(const Kernel_descriptor&) = default; + Kernel_descriptor(Kernel_descriptor&&) = default; + ~Kernel_descriptor() = default; + + Kernel_descriptor& operator=(const Kernel_descriptor&) = default; + Kernel_descriptor& operator=(Kernel_descriptor&&) = default; operator hipFunction_t() const { // TODO: this is awful and only meant for illustration. return reinterpret_cast(const_cast(this)); diff --git a/src/functional_grid_launch.inl b/src/functional_grid_launch.inl index b555967ebc..9ecad51476 100644 --- a/src/functional_grid_launch.inl +++ b/src/functional_grid_launch.inl @@ -107,8 +107,8 @@ namespace hip_impl it0->second.cbegin(), it0->second.cend(), [=](const pair& x) { - return x.first.handle == agent.handle; - }); + return x.first == agent; + }); if (it1 == it0->second.cend()) { throw runtime_error{ diff --git a/src/hip_module.cpp b/src/hip_module.cpp index 84451a17a0..59831958bd 100644 --- a/src/hip_module.cpp +++ b/src/hip_module.cpp @@ -27,10 +27,11 @@ THE SOFTWARE. #include "hsa_helpers.hpp" #include "trace_helper.h" +#include #include #include -#include +#include #include #include #include @@ -71,9 +72,8 @@ struct ihipKernArgInfo { map kernelArguments; struct ihipModuleSymbol_t { - uint64_t _object; // The kernel object. - uint32_t _groupSegmentSize; - uint32_t _privateSegmentSize; + uint64_t _object{}; // The kernel object. + amd_kernel_code_t const* _header{}; string _name; // TODO - review for performance cost. Name is just used for debug. }; @@ -179,8 +179,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, aql.grid_size_x = globalWorkSizeX; aql.grid_size_y = globalWorkSizeY; aql.grid_size_z = globalWorkSizeZ; - aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes; - aql.private_segment_size = f->_privateSegmentSize; + aql.group_segment_size = + f->_header->workgroup_group_segment_byte_size + sharedMemBytes; + aql.private_segment_size = + f->_header->workitem_private_segment_byte_size; aql.kernel_object = f->_object; aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; aql.header = @@ -444,10 +446,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch if (kernel.handle == 0u) return hipErrorNotFound; - (*func)->_object = kernel_object(kernel); - (*func)->_groupSegmentSize = group_size(kernel); - (*func)->_privateSegmentSize = private_size(kernel); - (*func)->_name = name; + // TODO: refactor the whole ihipThisThat, which is a mess and yields the + // below, due to hipFunction_t being a pointer to ihipModuleSymbol_t. + func[0][0] = *static_cast( + Kernel_descriptor{kernel_object(kernel), name}); return hipSuccess; } @@ -471,6 +473,61 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h return ihipLogStatus(r); } +namespace +{ + inline + hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header) + { + hipFuncAttributes r{}; + + hipDeviceProp_t prop{}; + hipGetDeviceProperties( + &prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId); + + r.localSizeBytes = header.workitem_private_segment_byte_size; + r.sharedSizeBytes = header.workgroup_group_segment_byte_size; + r.maxDynamicSharedSizeBytes = + prop.sharedMemPerBlock - r.sharedSizeBytes; + r.numRegs = header.workitem_vgpr_count; + r.maxThreadsPerBlock = r.numRegs ? // TODO: proper query. + std::min(prop.maxThreadsPerBlock, 64 * 1024 / r.numRegs) : + prop.maxThreadsPerBlock; + r.binaryVersion = + header.amd_machine_version_major * 10 + + header.amd_machine_version_minor; + r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0. + + return r; + } +} + +hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func) +{ + if (!func) return hipErrorInvalidDeviceFunction; + + const auto it0 = functions().find(reinterpret_cast(func)); + + if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction; + + auto agent = this_agent(); + const auto it1 = find_if( + it0->second.cbegin(), + it0->second.cend(), + [=](const pair& x) { + return x.first == agent; + }); + + if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction; + + const auto header = static_cast(it1->second)->_header; + + if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."}; + + *attr = make_function_attributes(*header); + + return hipSuccess; +} + hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { if (!module) return hipErrorInvalidValue; @@ -487,7 +544,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) { (*module)->executable = hip_impl::load_executable( tmp.empty() ? read_elf_file_as_string(image) : tmp, (*module)->executable, this_agent()); - + return (*module)->executable.handle ? hipSuccess : hipErrorUnknown; } diff --git a/src/program_state.cpp b/src/program_state.cpp index 0c51fe4694..c4478bec2f 100644 --- a/src/program_state.cpp +++ b/src/program_state.cpp @@ -382,8 +382,7 @@ const unordered_map>>& fu for (auto&& kernel_symbol : it->second) { r[function.first].emplace_back( agent(kernel_symbol), - Kernel_descriptor{kernel_object(kernel_symbol), group_size(kernel_symbol), - private_size(kernel_symbol), it->first}); + Kernel_descriptor{kernel_object(kernel_symbol), it->first}); } } } diff --git a/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp new file mode 100644 index 0000000000..7d3eff5f73 --- /dev/null +++ b/tests/src/runtimeApi/module/hipFuncGetAttributes.cpp @@ -0,0 +1,53 @@ +/* +Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp + * RUN: %t + * HIT_END + */ + +#include +#include +#include "test_common.h" + +__global__ +void fn(float* px, float* py) +{ + bool a[42]; + __shared__ double b[69]; + + for (auto&& x : b) x = *py++; + for (auto&& x : a) x = *px++ > 0.0; + for (auto&& x : a) if (x) *--py = *--px; +} + +int main() { + hipInit(0); + + hipFuncAttributes attr{}; + + auto r = hipFuncGetAttributes(&attr, reinterpret_cast(&fn)); + + if (r != hipSuccess || attr.maxThreadsPerBlock == 0) { + failed("Failed to read attributes."); + } + + passed(); +}