Merge pull request #445 from ROCm-Developer-Tools/feature_func_attributes
Add support for the hipFuncGetAttributes interface.
このコミットが含まれているのは:
@@ -94,6 +94,19 @@ typedef struct ihipModule_t* hipModule_t;
|
||||
|
||||
typedef struct ihipModuleSymbol_t* hipFunction_t;
|
||||
|
||||
struct hipFuncAttributes {
|
||||
int binaryVersion;
|
||||
int cacheModeCA;
|
||||
size_t constSizeBytes;
|
||||
size_t localSizeBytes;
|
||||
int maxDynamicSharedSizeBytes;
|
||||
int maxThreadsPerBlock;
|
||||
int numRegs;
|
||||
int preferredShmemCarveout;
|
||||
int ptxVersion;
|
||||
size_t sharedSizeBytes;
|
||||
};
|
||||
|
||||
typedef struct ihipEvent_t* hipEvent_t;
|
||||
|
||||
enum hipLimit_t {
|
||||
@@ -2243,6 +2256,17 @@ hipError_t hipModuleUnload(hipModule_t module);
|
||||
*/
|
||||
hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, const char* kname);
|
||||
|
||||
/**
|
||||
* @bried Find out attributes for a given function.
|
||||
*
|
||||
* @param [out] attr
|
||||
* @param [in] func
|
||||
*
|
||||
* @returns hipSuccess, hipErrorInvalidDeviceFunction
|
||||
*/
|
||||
|
||||
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func);
|
||||
|
||||
/**
|
||||
* @brief returns device memory pointer and size of the kernel present in the module with symbol @p
|
||||
* name
|
||||
|
||||
@@ -22,8 +22,10 @@ THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <istream>
|
||||
@@ -46,11 +48,45 @@ struct hash<hsa_agent_t> {
|
||||
inline constexpr bool operator==(hsa_agent_t x, hsa_agent_t y) { return x.handle == y.handle; }
|
||||
|
||||
namespace hip_impl {
|
||||
struct Kernel_descriptor {
|
||||
std::uint64_t kernel_object_;
|
||||
std::uint32_t group_size_;
|
||||
std::uint32_t private_size_;
|
||||
std::string name_;
|
||||
class Kernel_descriptor {
|
||||
std::uint64_t kernel_object_{};
|
||||
amd_kernel_code_t const* kernel_header_{nullptr};
|
||||
std::string name_{};
|
||||
public:
|
||||
Kernel_descriptor() = default;
|
||||
Kernel_descriptor(std::uint64_t kernel_object, const std::string& name)
|
||||
: kernel_object_{kernel_object}, name_{name}
|
||||
{
|
||||
bool supported{false};
|
||||
std::uint16_t min_v{UINT16_MAX};
|
||||
auto r = hsa_system_major_extension_supported(
|
||||
HSA_EXTENSION_AMD_LOADER, 1, &min_v, &supported);
|
||||
|
||||
if (r != HSA_STATUS_SUCCESS || !supported) return;
|
||||
|
||||
hsa_ven_amd_loader_1_01_pfn_t tbl{};
|
||||
|
||||
r = hsa_system_get_major_extension_table(
|
||||
HSA_EXTENSION_AMD_LOADER,
|
||||
1,
|
||||
sizeof(tbl),
|
||||
reinterpret_cast<void*>(&tbl));
|
||||
|
||||
if (r != HSA_STATUS_SUCCESS) return;
|
||||
if (!tbl.hsa_ven_amd_loader_query_host_address) return;
|
||||
|
||||
r = tbl.hsa_ven_amd_loader_query_host_address(
|
||||
reinterpret_cast<void*>(kernel_object_),
|
||||
reinterpret_cast<const void**>(&kernel_header_));
|
||||
|
||||
if (r != HSA_STATUS_SUCCESS) return;
|
||||
}
|
||||
Kernel_descriptor(const Kernel_descriptor&) = default;
|
||||
Kernel_descriptor(Kernel_descriptor&&) = default;
|
||||
~Kernel_descriptor() = default;
|
||||
|
||||
Kernel_descriptor& operator=(const Kernel_descriptor&) = default;
|
||||
Kernel_descriptor& operator=(Kernel_descriptor&&) = default;
|
||||
|
||||
operator hipFunction_t() const { // TODO: this is awful and only meant for illustration.
|
||||
return reinterpret_cast<hipFunction_t>(const_cast<Kernel_descriptor*>(this));
|
||||
|
||||
@@ -107,8 +107,8 @@ namespace hip_impl
|
||||
it0->second.cbegin(),
|
||||
it0->second.cend(),
|
||||
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
|
||||
return x.first.handle == agent.handle;
|
||||
});
|
||||
return x.first == agent;
|
||||
});
|
||||
|
||||
if (it1 == it0->second.cend()) {
|
||||
throw runtime_error{
|
||||
|
||||
+72
-11
@@ -27,10 +27,11 @@ THE SOFTWARE.
|
||||
#include "hsa_helpers.hpp"
|
||||
#include "trace_helper.h"
|
||||
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
@@ -71,9 +72,8 @@ struct ihipKernArgInfo {
|
||||
map<string, ihipKernArgInfo> kernelArguments;
|
||||
|
||||
struct ihipModuleSymbol_t {
|
||||
uint64_t _object; // The kernel object.
|
||||
uint32_t _groupSegmentSize;
|
||||
uint32_t _privateSegmentSize;
|
||||
uint64_t _object{}; // The kernel object.
|
||||
amd_kernel_code_t const* _header{};
|
||||
string _name; // TODO - review for performance cost. Name is just used for debug.
|
||||
};
|
||||
|
||||
@@ -179,8 +179,10 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
aql.grid_size_x = globalWorkSizeX;
|
||||
aql.grid_size_y = globalWorkSizeY;
|
||||
aql.grid_size_z = globalWorkSizeZ;
|
||||
aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes;
|
||||
aql.private_segment_size = f->_privateSegmentSize;
|
||||
aql.group_segment_size =
|
||||
f->_header->workgroup_group_segment_byte_size + sharedMemBytes;
|
||||
aql.private_segment_size =
|
||||
f->_header->workitem_private_segment_byte_size;
|
||||
aql.kernel_object = f->_object;
|
||||
aql.setup = 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
|
||||
aql.header =
|
||||
@@ -444,10 +446,10 @@ hipError_t ihipModuleGetFunction(hipFunction_t* func, hipModule_t hmod, const ch
|
||||
|
||||
if (kernel.handle == 0u) return hipErrorNotFound;
|
||||
|
||||
(*func)->_object = kernel_object(kernel);
|
||||
(*func)->_groupSegmentSize = group_size(kernel);
|
||||
(*func)->_privateSegmentSize = private_size(kernel);
|
||||
(*func)->_name = name;
|
||||
// TODO: refactor the whole ihipThisThat, which is a mess and yields the
|
||||
// below, due to hipFunction_t being a pointer to ihipModuleSymbol_t.
|
||||
func[0][0] = *static_cast<hipFunction_t>(
|
||||
Kernel_descriptor{kernel_object(kernel), name});
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -471,6 +473,65 @@ hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t h
|
||||
return ihipLogStatus(r);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
inline
|
||||
hipFuncAttributes make_function_attributes(const amd_kernel_code_t& header)
|
||||
{
|
||||
hipFuncAttributes r{};
|
||||
|
||||
hipDeviceProp_t prop{};
|
||||
hipGetDeviceProperties(
|
||||
&prop, ihipGetTlsDefaultCtx()->getDevice()->_deviceId);
|
||||
// TODO: at the moment there is no way to query the count of registers
|
||||
// available per CU, therefore we hardcode it to 64 KiRegisters.
|
||||
prop.regsPerBlock = prop.regsPerBlock ? prop.regsPerBlock : 64 * 1024;
|
||||
|
||||
r.localSizeBytes = header.workitem_private_segment_byte_size;
|
||||
r.sharedSizeBytes = header.workgroup_group_segment_byte_size;
|
||||
r.maxDynamicSharedSizeBytes =
|
||||
prop.sharedMemPerBlock - r.sharedSizeBytes;
|
||||
r.numRegs = header.workitem_vgpr_count;
|
||||
r.maxThreadsPerBlock = r.numRegs ?
|
||||
std::min(prop.maxThreadsPerBlock, prop.regsPerBlock / r.numRegs) :
|
||||
prop.maxThreadsPerBlock;
|
||||
r.binaryVersion =
|
||||
header.amd_machine_version_major * 10 +
|
||||
header.amd_machine_version_minor;
|
||||
r.ptxVersion = prop.major * 10 + prop.minor; // HIP currently presents itself as PTX 3.0.
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t hipFuncGetAttributes(hipFuncAttributes* attr, const void* func)
|
||||
{
|
||||
if (!attr) return hipErrorInvalidValue;
|
||||
if (!func) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
const auto it0 = functions().find(reinterpret_cast<uintptr_t>(func));
|
||||
|
||||
if (it0 == functions().cend()) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
auto agent = this_agent();
|
||||
const auto it1 = find_if(
|
||||
it0->second.cbegin(),
|
||||
it0->second.cend(),
|
||||
[=](const pair<hsa_agent_t, Kernel_descriptor>& x) {
|
||||
return x.first == agent;
|
||||
});
|
||||
|
||||
if (it1 == it0->second.cend()) return hipErrorInvalidDeviceFunction;
|
||||
|
||||
const auto header = static_cast<hipFunction_t>(it1->second)->_header;
|
||||
|
||||
if (!header) throw runtime_error{"Ill-formed Kernel_descriptor."};
|
||||
|
||||
*attr = make_function_attributes(*header);
|
||||
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
|
||||
if (!module) return hipErrorInvalidValue;
|
||||
@@ -487,7 +548,7 @@ hipError_t ihipModuleLoadData(hipModule_t* module, const void* image) {
|
||||
|
||||
(*module)->executable = hip_impl::load_executable(
|
||||
tmp.empty() ? read_elf_file_as_string(image) : tmp, (*module)->executable, this_agent());
|
||||
|
||||
|
||||
return (*module)->executable.handle ? hipSuccess : hipErrorUnknown;
|
||||
}
|
||||
|
||||
|
||||
+1
-2
@@ -382,8 +382,7 @@ const unordered_map<uintptr_t, vector<pair<hsa_agent_t, Kernel_descriptor>>>& fu
|
||||
for (auto&& kernel_symbol : it->second) {
|
||||
r[function.first].emplace_back(
|
||||
agent(kernel_symbol),
|
||||
Kernel_descriptor{kernel_object(kernel_symbol), group_size(kernel_symbol),
|
||||
private_size(kernel_symbol), it->first});
|
||||
Kernel_descriptor{kernel_object(kernel_symbol), it->first});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright (c) 2015-Present Advanced Micro Devices, Inc. All rights reserved.
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* HIT_START
|
||||
* BUILD: %t %s ../../test_common.cpp
|
||||
* RUN: %t
|
||||
* HIT_END
|
||||
*/
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <iostream>
|
||||
#include "test_common.h"
|
||||
|
||||
__global__
|
||||
void fn(float* px, float* py)
|
||||
{
|
||||
bool a[42];
|
||||
__shared__ double b[69];
|
||||
|
||||
for (auto&& x : b) x = *py++;
|
||||
for (auto&& x : a) x = *px++ > 0.0;
|
||||
for (auto&& x : a) if (x) *--py = *--px;
|
||||
}
|
||||
|
||||
int main() {
|
||||
hipInit(0);
|
||||
|
||||
hipFuncAttributes attr{};
|
||||
|
||||
auto r = hipFuncGetAttributes(&attr, reinterpret_cast<const void*>(&fn));
|
||||
|
||||
if (r != hipSuccess || attr.maxThreadsPerBlock == 0) {
|
||||
failed("Failed to read attributes.");
|
||||
}
|
||||
|
||||
passed();
|
||||
}
|
||||
新しいイシューから参照
ユーザーをブロックする