2021-07-02 16:46:49 -07:00
|
|
|
/* Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc.
|
2020-02-04 08:45:01 -08:00
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
|
|
|
all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
THE SOFTWARE. */
|
2018-03-01 22:57:20 -05:00
|
|
|
|
|
|
|
|
#include <hip/hip_runtime.h>
|
2021-07-30 19:15:38 +00:00
|
|
|
#include <hip/texture_types.h>
|
2020-05-18 22:40:33 -04:00
|
|
|
#include "hip_platform.hpp"
|
2018-03-01 22:57:20 -05:00
|
|
|
#include "hip_internal.hpp"
|
|
|
|
|
#include "platform/program.hpp"
|
|
|
|
|
#include "platform/runtime.hpp"
|
2025-05-17 06:33:13 -04:00
|
|
|
#include "utils/flags.hpp"
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-04-27 21:21:31 -04:00
|
|
|
#include <unordered_map>
|
2024-07-25 15:10:23 +01:00
|
|
|
#include <mutex>
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
namespace hip {
|
2022-03-14 12:36:16 -04:00
|
|
|
constexpr unsigned __hipFatMAGIC2 = 0x48495046; // "HIPF"
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
2019-03-18 18:44:55 -04:00
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
// forward declaration of methods required for __hipRegisrterManagedVar
|
2025-07-31 08:30:23 -07:00
|
|
|
hipError_t ihipMallocManaged(void** ptr, size_t size, size_t align = 0, bool use_host_ptr = 0);
|
2021-02-16 07:20:58 -05:00
|
|
|
|
2018-03-01 22:57:20 -05:00
|
|
|
struct __CudaFatBinaryWrapper {
|
|
|
|
|
unsigned int magic;
|
|
|
|
|
unsigned int version;
|
2022-03-14 12:36:16 -04:00
|
|
|
void* binary;
|
|
|
|
|
void* dummy1;
|
2018-03-01 22:57:20 -05:00
|
|
|
};
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipModuleGetGlobal(hipDeviceptr_t* dptr, size_t* bytes, hipModule_t hmod,
|
|
|
|
|
const char* name);
|
2019-03-19 11:31:24 -04:00
|
|
|
|
2019-04-04 18:22:40 -04:00
|
|
|
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
|
|
|
|
hipDeviceptr_t* dptr, size_t* bytes);
|
|
|
|
|
|
2025-08-20 16:28:06 +02:00
|
|
|
extern hipError_t ihipModuleLaunchKernel(hipFunction_t f, amd::LaunchParams& launch_params,
|
|
|
|
|
hipStream_t hStream, void** kernelParams, void** extra,
|
|
|
|
|
hipEvent_t startEvent, hipEvent_t stopEvent,
|
|
|
|
|
uint32_t flags = 0, uint32_t params = 0,
|
|
|
|
|
uint32_t gridId = 0, uint32_t numGrids = 0,
|
|
|
|
|
uint64_t prevGridSum = 0, uint64_t allGridSum = 0,
|
|
|
|
|
uint32_t firstDevice = 0);
|
2022-03-14 12:36:16 -04:00
|
|
|
static bool isCompatibleCodeObject(const std::string& codeobj_target_id, const char* device_name) {
|
2019-06-28 08:06:23 -04:00
|
|
|
// Workaround for device name mismatch.
|
|
|
|
|
// Device name may contain feature strings delimited by '+', e.g.
|
|
|
|
|
// gfx900+xnack. Currently HIP-Clang does not include feature strings
|
|
|
|
|
// in code object target id in fat binary. Therefore drop the feature
|
|
|
|
|
// strings from device name before comparing it with code object target id.
|
|
|
|
|
std::string short_name(device_name);
|
|
|
|
|
auto feature_loc = short_name.find('+');
|
|
|
|
|
if (feature_loc != std::string::npos) {
|
|
|
|
|
short_name.erase(feature_loc);
|
|
|
|
|
}
|
|
|
|
|
return codeobj_target_id == short_name;
|
2019-05-09 14:19:54 -04:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void** __hipRegisterFatBinary(const void* data) {
|
2019-05-27 20:11:08 -04:00
|
|
|
const __CudaFatBinaryWrapper* fbwrapper = reinterpret_cast<const __CudaFatBinaryWrapper*>(data);
|
|
|
|
|
if (fbwrapper->magic != __hipFatMAGIC2 || fbwrapper->version != 1) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot Register fat binary. FatMagic: %u version: %u ", fbwrapper->magic,
|
|
|
|
|
fbwrapper->version);
|
2019-05-27 20:11:08 -04:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
2024-09-24 15:18:06 +00:00
|
|
|
|
|
|
|
|
bool success{};
|
|
|
|
|
auto fat_binary_info = PlatformState::instance().addFatBinary(fbwrapper->binary, success);
|
|
|
|
|
return success ? reinterpret_cast<void**>(fat_binary_info) : nullptr;
|
2019-12-11 03:11:19 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipRegisterFunction(hip::FatBinaryInfo** modules, const void* hostFunction,
|
2025-08-20 16:28:06 +02:00
|
|
|
char* deviceFunction, const char* deviceName, unsigned int threadLimit,
|
|
|
|
|
uint3* tid, uint3* bid, dim3* blockDim, dim3* gridDim, int* wSize) {
|
2022-03-14 12:36:16 -04:00
|
|
|
static int enable_deferred_loading{[]() {
|
|
|
|
|
char* var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
2020-07-11 18:14:03 -04:00
|
|
|
return var ? atoi(var) : 1;
|
2022-03-14 12:36:16 -04:00
|
|
|
}()};
|
2022-03-02 11:46:56 -08:00
|
|
|
hipError_t hip_error = hipSuccess;
|
2024-10-05 12:05:49 +00:00
|
|
|
// Compiler might share same hostFunction and hence it's needless to have another
|
|
|
|
|
// hip::Function and hip::Function is stored in map with hostFunction as key.
|
|
|
|
|
// Creating hip::Function in such case, Leaks it.
|
|
|
|
|
if (PlatformState::instance().getStatFuncName(hostFunction) == nullptr) {
|
|
|
|
|
hip::Function* func = new hip::Function(std::string(deviceName), modules);
|
|
|
|
|
hip_error = PlatformState::instance().registerStatFunction(hostFunction, func);
|
|
|
|
|
}
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((hip_error == hipSuccess), "Cannot register Static function, error: %d", hip_error);
|
2020-07-11 18:14:03 -04:00
|
|
|
|
|
|
|
|
if (!enable_deferred_loading) {
|
2021-11-18 16:19:55 -05:00
|
|
|
HIP_INIT_VOID();
|
2020-05-18 22:40:33 -04:00
|
|
|
hipFunction_t hfunc = nullptr;
|
2022-03-02 11:46:56 -08:00
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
for (size_t dev_idx = 0; dev_idx < g_devices.size(); ++dev_idx) {
|
|
|
|
|
hip_error = PlatformState::instance().getStatFunc(&hfunc, hostFunction, dev_idx);
|
2025-08-20 16:28:06 +02:00
|
|
|
guarantee((hip_error == hipSuccess), "Cannot retrieve Static function, error: %d", hip_error);
|
2020-05-14 00:18:32 -04:00
|
|
|
}
|
|
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2018-07-24 17:14:35 -04:00
|
|
|
// Registers a device-side global variable.
|
|
|
|
|
// For each global variable in device code, there is a corresponding shadow
|
|
|
|
|
// global variable in host code. The shadow host variable is used to keep
|
|
|
|
|
// track of the value of the device side global variable between kernel
|
|
|
|
|
// executions.
|
2025-08-20 16:28:06 +02:00
|
|
|
void __hipRegisterVar(hip::FatBinaryInfo** modules, // The device modules containing code object
|
|
|
|
|
void* var, // The shadow variable in host code
|
|
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int ext, // Whether this variable is external
|
|
|
|
|
size_t size, // Size of the variable
|
|
|
|
|
int constant, // Whether this variable is constant
|
|
|
|
|
int global) // Unknown, always 0
|
2018-03-01 22:57:20 -05:00
|
|
|
{
|
2022-03-14 12:36:16 -04:00
|
|
|
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Variable,
|
|
|
|
|
size, 0, 0, modules);
|
2022-03-02 11:46:56 -08:00
|
|
|
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((err == hipSuccess), "Cannot register Static Global Var, error:%d", err);
|
2020-04-06 10:57:03 -04:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipRegisterSurface(
|
2022-03-14 12:36:16 -04:00
|
|
|
hip::FatBinaryInfo** modules, // The device modules containing code object
|
|
|
|
|
void* var, // The shadow variable in host code
|
|
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int type, int ext) {
|
|
|
|
|
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Surface,
|
|
|
|
|
sizeof(surfaceReference), 0, 0, modules);
|
2022-03-02 11:46:56 -08:00
|
|
|
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((err == hipSuccess), "Cannot register Static Glbal Var, err:%d", err);
|
2020-04-06 10:57:03 -04:00
|
|
|
}
|
2019-03-18 18:44:55 -04:00
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipRegisterManagedVar(
|
2022-03-14 12:36:16 -04:00
|
|
|
void* hipModule, // Pointer to hip module returned from __hipRegisterFatbinary
|
|
|
|
|
void** pointer, // Pointer to a chunk of managed memory with size \p size and alignment \p
|
2025-08-20 16:28:06 +02:00
|
|
|
// align HIP runtime allocates such managed memory and assign it to \p pointer
|
2022-03-14 12:36:16 -04:00
|
|
|
void* init_value, // Initial value to be copied into \p pointer
|
|
|
|
|
const char* name, // Name of the variable in code object
|
|
|
|
|
size_t size, unsigned align) {
|
2025-07-31 08:30:23 -07:00
|
|
|
static int enable_deferred_loading{[]() {
|
2025-08-20 16:28:06 +02:00
|
|
|
#ifdef _WIN32 // Don't defer loading for windows
|
|
|
|
|
return 0;
|
|
|
|
|
#else
|
|
|
|
|
char* var = getenv("HIP_ENABLE_DEFERRED_LOADING");
|
|
|
|
|
return var ? atoi(var) : 1;
|
|
|
|
|
#endif
|
2025-07-31 08:30:23 -07:00
|
|
|
}()};
|
|
|
|
|
hipError_t hip_error = hipSuccess;
|
2021-02-16 07:20:58 -05:00
|
|
|
hip::Var* var_ptr = new hip::Var(std::string(name), hip::Var::DeviceVarKind::DVK_Managed, pointer,
|
|
|
|
|
size, align, reinterpret_cast<hip::FatBinaryInfo**>(hipModule));
|
2025-07-31 08:30:23 -07:00
|
|
|
hipError_t status = PlatformState::instance().registerStatManagedVar(var_ptr);
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((status == hipSuccess), "Cannot register Static Managed Var, error: %d", status);
|
2025-07-31 08:30:23 -07:00
|
|
|
|
|
|
|
|
if (enable_deferred_loading) {
|
|
|
|
|
// Allocate temporary var on host and initialize
|
|
|
|
|
*pointer = amd::Os::reserveMemory(0, size, align, amd::Os::MEM_PROT_RW);
|
|
|
|
|
::memcpy(*pointer, init_value, size);
|
|
|
|
|
} else {
|
|
|
|
|
HIP_INIT_VOID();
|
|
|
|
|
hipError_t status = ihipMallocManaged(pointer, size, align, 0);
|
2025-08-20 16:28:06 +02:00
|
|
|
var_ptr->setAllocFlag(true); // set flag true for managed alloc
|
2025-07-31 08:30:23 -07:00
|
|
|
if (status == hipSuccess) {
|
|
|
|
|
hip::Stream* stream = hip::getNullStream();
|
|
|
|
|
if (stream != nullptr) {
|
|
|
|
|
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *stream);
|
|
|
|
|
guarantee((status == hipSuccess), "Error during memcpy to managed memory, error:%d!",
|
2025-08-20 16:28:06 +02:00
|
|
|
status);
|
2025-07-31 08:30:23 -07:00
|
|
|
} else {
|
|
|
|
|
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
guarantee(false, "Error during allocation of managed memory!, error: %d", status);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-02-16 07:20:58 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipRegisterTexture(
|
2022-03-14 12:36:16 -04:00
|
|
|
hip::FatBinaryInfo** modules, // The device modules containing code object
|
|
|
|
|
void* var, // The shadow variable in host code
|
|
|
|
|
char* hostVar, // Variable name in host code
|
|
|
|
|
char* deviceVar, // Variable name in device code
|
|
|
|
|
int type, int norm, int ext) {
|
|
|
|
|
hip::Var* var_ptr = new hip::Var(std::string(hostVar), hip::Var::DeviceVarKind::DVK_Texture,
|
|
|
|
|
sizeof(textureReference), 0, 0, modules);
|
2022-03-02 11:46:56 -08:00
|
|
|
hipError_t err = PlatformState::instance().registerStatGlobalVar(var, var_ptr);
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((err == hipSuccess), "Cannot register Static Global Var, status: %d", err);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipUnregisterFatBinary(hip::FatBinaryInfo** modules) {
|
2024-07-25 15:10:23 +01:00
|
|
|
static std::once_flag unregister_device_sync;
|
2025-05-17 06:33:13 -04:00
|
|
|
// If SKIP ABORT is set and GPU is in error, dont need to sync streams.
|
|
|
|
|
if (!HIP_SKIP_ABORT_ON_GPU_ERROR || !amd::Device::IsGPUInError()) {
|
2025-08-20 16:28:06 +02:00
|
|
|
std::call_once(unregister_device_sync, []() {
|
2025-05-17 06:33:13 -04:00
|
|
|
for (auto& hipDevice : g_devices) {
|
|
|
|
|
// By synchronizing devices ensure that all HSA signal handlers
|
|
|
|
|
// complete before removeFatBinary
|
|
|
|
|
hipDevice->SyncAllStreams(true);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
2024-07-25 15:10:23 +01:00
|
|
|
hipError_t err = PlatformState::instance().removeFatBinary(modules);
|
2023-12-07 01:32:20 +00:00
|
|
|
guarantee((err == hipSuccess), "Cannot Unregister Fat Binary, error:%d", err);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
void __hipRegisterFunction(void** modules, const void* hostFunction, char* deviceFunction,
|
|
|
|
|
const char* deviceName, unsigned int threadLimit, uint3* tid, uint3* bid,
|
|
|
|
|
dim3* blockDim, dim3* gridDim, int* wSize) {
|
|
|
|
|
return __hipRegisterFunction(reinterpret_cast<hip::FatBinaryInfo**>(modules), hostFunction,
|
|
|
|
|
deviceFunction, deviceName, threadLimit, tid, bid, blockDim, gridDim,
|
|
|
|
|
wSize);
|
|
|
|
|
}
|
|
|
|
|
void __hipRegisterSurface(void** modules, void* var, char* hostVar, char* deviceVar, int type,
|
|
|
|
|
int ext) {
|
|
|
|
|
return __hipRegisterSurface(reinterpret_cast<hip::FatBinaryInfo**>(modules), var, hostVar,
|
|
|
|
|
deviceVar, type, ext);
|
|
|
|
|
}
|
|
|
|
|
void __hipRegisterTexture(void** modules, void* var, char* hostVar, char* deviceVar, int type,
|
|
|
|
|
int norm, int ext) {
|
|
|
|
|
return __hipRegisterTexture(reinterpret_cast<hip::FatBinaryInfo**>(modules), var, hostVar,
|
|
|
|
|
deviceVar, type, norm, ext);
|
|
|
|
|
}
|
|
|
|
|
void __hipRegisterVar(void** modules, void* var, char* hostVar, char* deviceVar, int ext,
|
|
|
|
|
size_t size, int constant, int global) {
|
2025-08-20 16:28:06 +02:00
|
|
|
return __hipRegisterVar(reinterpret_cast<hip::FatBinaryInfo**>(modules), var, hostVar, deviceVar,
|
|
|
|
|
ext, size, constant, global);
|
2023-04-21 10:46:05 +00:00
|
|
|
}
|
|
|
|
|
void __hipUnregisterFatBinary(void** modules) {
|
|
|
|
|
return __hipUnregisterFatBinary(reinterpret_cast<hip::FatBinaryInfo**>(modules));
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-20 16:28:06 +02:00
|
|
|
hipError_t hipConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, hipStream_t stream) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipConfigureCall, gridDim, blockDim, sharedMem, stream);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2018-08-02 12:33:55 -04:00
|
|
|
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
HIP_RETURN(hipSuccess);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
hipError_t __hipPushCallConfiguration(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
2025-08-20 16:28:06 +02:00
|
|
|
hipStream_t stream) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(__hipPushCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
2019-09-24 16:58:14 -04:00
|
|
|
|
|
|
|
|
PlatformState::instance().configureCall(gridDim, blockDim, sharedMem, stream);
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
hipError_t __hipPopCallConfiguration(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
|
2025-08-20 16:28:06 +02:00
|
|
|
hipStream_t* stream) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(__hipPopCallConfiguration, gridDim, blockDim, sharedMem, stream);
|
2019-09-24 16:58:14 -04:00
|
|
|
|
|
|
|
|
ihipExec_t exec;
|
|
|
|
|
PlatformState::instance().popExec(exec);
|
|
|
|
|
*gridDim = exec.gridDim_;
|
|
|
|
|
*blockDim = exec.blockDim_;
|
|
|
|
|
*sharedMem = exec.sharedMem_;
|
|
|
|
|
*stream = exec.hStream_;
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
hipError_t hipSetupArgument(const void* arg, size_t size, size_t offset) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipSetupArgument, arg, size, offset);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2018-08-02 12:33:55 -04:00
|
|
|
PlatformState::instance().setupArgument(arg, size, offset);
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-14 18:54:13 -04:00
|
|
|
HIP_RETURN(hipSuccess);
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
|
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
hipError_t hipLaunchByPtr(const void* hostFunction) {
|
2020-05-14 03:50:34 -05:00
|
|
|
HIP_INIT_API(hipLaunchByPtr, hostFunction);
|
2018-03-28 19:23:57 -04:00
|
|
|
|
2020-02-13 10:44:10 -08:00
|
|
|
ihipExec_t exec;
|
|
|
|
|
PlatformState::instance().popExec(exec);
|
|
|
|
|
|
|
|
|
|
hip::Stream* stream = reinterpret_cast<hip::Stream*>(exec.hStream_);
|
2022-03-14 12:36:16 -04:00
|
|
|
int deviceId = (stream != nullptr) ? stream->DeviceId() : ihipGetDevice();
|
2019-12-17 20:18:36 -05:00
|
|
|
if (deviceId == -1) {
|
2023-12-07 01:32:20 +00:00
|
|
|
LogPrintfError("Wrong DeviceId: %d", deviceId);
|
2019-12-17 20:18:36 -05:00
|
|
|
HIP_RETURN(hipErrorNoDevice);
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
hipFunction_t func = nullptr;
|
|
|
|
|
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
|
|
|
|
|
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
2023-12-07 01:32:20 +00:00
|
|
|
LogPrintfError("Could not retrieve hostFunction: 0x%x", hostFunction);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2018-08-14 18:54:13 -04:00
|
|
|
}
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2018-08-27 18:46:34 -04:00
|
|
|
size_t size = exec.arguments_.size();
|
2022-03-14 12:36:16 -04:00
|
|
|
void* extra[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &exec.arguments_[0],
|
|
|
|
|
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, HIP_LAUNCH_PARAM_END};
|
2018-03-01 22:57:20 -05:00
|
|
|
|
2024-10-24 14:06:37 +02:00
|
|
|
STREAM_CAPTURE(hipLaunchByPtr, exec.hStream_, func, exec.blockDim_, exec.gridDim_,
|
|
|
|
|
exec.sharedMem_, extra);
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
HIP_RETURN(hipModuleLaunchKernel(func, exec.gridDim_.x, exec.gridDim_.y, exec.gridDim_.z,
|
|
|
|
|
exec.blockDim_.x, exec.blockDim_.y, exec.blockDim_.z,
|
|
|
|
|
exec.sharedMem_, exec.hStream_, nullptr, extra));
|
2018-03-01 22:57:20 -05:00
|
|
|
}
|
2018-04-13 18:19:28 -04:00
|
|
|
|
2020-03-04 16:05:37 -05:00
|
|
|
hipError_t hipGetSymbolAddress(void** devPtr, const void* symbol) {
|
|
|
|
|
HIP_INIT_API(hipGetSymbolAddress, devPtr, symbol);
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
hipError_t hip_error = hipSuccess;
|
2022-05-12 08:52:27 +00:00
|
|
|
if (devPtr == nullptr) {
|
|
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
size_t sym_size = 0;
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
HIP_RETURN_ONFAIL(
|
|
|
|
|
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), devPtr, &sym_size));
|
2020-05-18 22:40:33 -04:00
|
|
|
|
2020-07-20 00:22:27 -07:00
|
|
|
HIP_RETURN(hipSuccess, *devPtr);
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
|
|
|
|
|
2020-03-04 16:05:37 -05:00
|
|
|
hipError_t hipGetSymbolSize(size_t* sizePtr, const void* symbol) {
|
|
|
|
|
HIP_INIT_API(hipGetSymbolSize, sizePtr, symbol);
|
|
|
|
|
|
2022-05-12 08:52:27 +00:00
|
|
|
if (sizePtr == nullptr) {
|
|
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
hipDeviceptr_t device_ptr = nullptr;
|
2022-03-14 12:36:16 -04:00
|
|
|
HIP_RETURN_ONFAIL(
|
|
|
|
|
PlatformState::instance().getStatGlobalVar(symbol, ihipGetDevice(), &device_ptr, sizePtr));
|
2020-05-18 22:40:33 -04:00
|
|
|
|
2020-07-20 00:22:27 -07:00
|
|
|
HIP_RETURN(hipSuccess, *sizePtr);
|
2019-03-18 18:44:55 -04:00
|
|
|
}
|
|
|
|
|
|
2019-11-04 10:13:20 -05:00
|
|
|
hipError_t ihipCreateGlobalVarObj(const char* name, hipModule_t hmod, amd::Memory** amd_mem_obj,
|
2022-03-14 12:36:16 -04:00
|
|
|
hipDeviceptr_t* dptr, size_t* bytes) {
|
2019-04-04 18:22:40 -04:00
|
|
|
/* Get Device Program pointer*/
|
2021-11-29 17:53:36 +00:00
|
|
|
amd::Program* program = as_amd(reinterpret_cast<cl_program>(hmod));
|
|
|
|
|
device::Program* dev_program = program->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
|
2019-04-04 18:22:40 -04:00
|
|
|
|
|
|
|
|
if (dev_program == nullptr) {
|
2023-12-07 01:32:20 +00:00
|
|
|
LogPrintfError("Cannot get Device Function for module: 0x%x", hmod);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2019-04-04 18:22:40 -04:00
|
|
|
}
|
|
|
|
|
/* Find the global Symbols */
|
2020-04-03 12:13:12 -04:00
|
|
|
if (!dev_program->createGlobalVarObj(amd_mem_obj, dptr, bytes, name)) {
|
2023-12-07 01:32:20 +00:00
|
|
|
LogPrintfError("Cannot create Global Var obj for symbol: %s", name);
|
2019-10-30 13:37:03 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidSymbol);
|
2019-04-04 18:22:40 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
HIP_RETURN(hipSuccess);
|
|
|
|
|
}
|
2023-04-21 10:46:05 +00:00
|
|
|
} // namespace hip
|
2019-04-04 18:22:40 -04:00
|
|
|
|
2019-06-12 10:00:38 -04:00
|
|
|
namespace hip_impl {
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
int* maxBlocksPerCU, int* numBlocksPerGrid, int* bestBlockSize, const amd::Device& device,
|
|
|
|
|
hipFunction_t func, int inputBlockSize, size_t dynamicSMemSize, bool bCalcPotentialBlkSz) {
|
2020-05-18 22:40:33 -04:00
|
|
|
hip::DeviceFunc* function = hip::DeviceFunc::asFunction(func);
|
|
|
|
|
const amd::Kernel& kernel = *function->kernel();
|
2020-04-03 12:13:12 -04:00
|
|
|
|
|
|
|
|
const device::Kernel::WorkGroupInfo* wrkGrpInfo = kernel.getDeviceKernel(device)->workGroupInfo();
|
2020-05-27 14:39:30 -05:00
|
|
|
if (bCalcPotentialBlkSz == false) {
|
2021-01-27 10:48:16 -05:00
|
|
|
if (inputBlockSize <= 0) {
|
2020-04-03 12:13:12 -04:00
|
|
|
return hipErrorInvalidValue;
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
*bestBlockSize = 0;
|
|
|
|
|
// Make sure the requested block size is smaller than max supported
|
|
|
|
|
if (inputBlockSize > int(device.info().maxWorkGroupSize_)) {
|
2022-03-14 12:36:16 -04:00
|
|
|
*maxBlocksPerCU = 0;
|
|
|
|
|
*numBlocksPerGrid = 0;
|
|
|
|
|
return hipSuccess;
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
2022-03-14 12:36:16 -04:00
|
|
|
} else {
|
|
|
|
|
if (inputBlockSize > int(device.info().maxWorkGroupSize_) || inputBlockSize <= 0) {
|
2020-05-27 14:39:30 -05:00
|
|
|
// The user wrote the kernel to work with a workgroup size
|
|
|
|
|
// bigger than this hardware can support. Or they do not care
|
|
|
|
|
// about the size So just assume its maximum size is
|
|
|
|
|
// constrained by hardware
|
|
|
|
|
inputBlockSize = device.info().maxWorkGroupSize_;
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
// Find wave occupancy per CU => simd_per_cu * GPR usage
|
2021-05-03 11:26:08 -07:00
|
|
|
size_t MaxWavesPerSimd;
|
|
|
|
|
|
|
|
|
|
if (device.isa().versionMajor() <= 9) {
|
|
|
|
|
MaxWavesPerSimd = 8; // Limited by SPI 32 per CU, hence 8 per SIMD
|
|
|
|
|
} else {
|
|
|
|
|
MaxWavesPerSimd = 16;
|
|
|
|
|
}
|
2020-03-30 09:10:16 -04:00
|
|
|
size_t VgprWaves = MaxWavesPerSimd;
|
2023-01-17 04:18:24 +00:00
|
|
|
uint32_t VgprGranularity = device.info().vgprAllocGranularity_;
|
|
|
|
|
size_t maxVGPRs = device.info().vgprsPerSimd_;
|
2022-12-23 10:37:48 +00:00
|
|
|
size_t wavefrontSize = wrkGrpInfo->wavefrontSize_;
|
2023-01-17 04:18:24 +00:00
|
|
|
if (device.isa().versionMajor() >= 10) {
|
|
|
|
|
if (wavefrontSize == 64) {
|
|
|
|
|
maxVGPRs = maxVGPRs >> 1;
|
|
|
|
|
VgprGranularity = VgprGranularity >> 1;
|
|
|
|
|
}
|
2021-05-03 11:26:08 -07:00
|
|
|
}
|
2023-04-12 22:46:42 +01:00
|
|
|
if (wrkGrpInfo->usedVGPRs_ > 0) {
|
2021-05-03 11:26:08 -07:00
|
|
|
VgprWaves = maxVGPRs / amd::alignUp(wrkGrpInfo->usedVGPRs_, VgprGranularity);
|
2020-03-30 09:10:16 -04:00
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2023-12-12 07:46:22 +00:00
|
|
|
if (VgprWaves == 0) {
|
2025-07-09 10:46:52 -04:00
|
|
|
// This should not happen ideally, but in case the value is
|
2023-12-12 07:46:22 +00:00
|
|
|
// incorrect, it can lead to a crash. By returning error, API can exit gracefully.
|
|
|
|
|
return hipErrorUnknown;
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-30 09:10:16 -04:00
|
|
|
size_t GprWaves = VgprWaves;
|
2019-08-01 16:40:24 -04:00
|
|
|
if (wrkGrpInfo->usedSGPRs_ > 0) {
|
2023-01-17 04:18:24 +00:00
|
|
|
size_t maxSGPRs = device.info().sgprsPerSimd_;
|
2020-05-27 14:39:30 -05:00
|
|
|
const size_t SgprWaves = maxSGPRs / amd::alignUp(wrkGrpInfo->usedSGPRs_, 16);
|
2019-08-01 16:40:24 -04:00
|
|
|
GprWaves = std::min(VgprWaves, SgprWaves);
|
|
|
|
|
}
|
2025-05-12 11:09:03 -04:00
|
|
|
|
|
|
|
|
// The table contains SIMD per CU, not per WGP, so when WGP mode is set on kernel metadata,
|
|
|
|
|
// multiply the number of SIMDs by 2, to account for 2CUs in 1 WGP.
|
|
|
|
|
uint32_t simdPerCU = device.isa().simdPerCU();
|
|
|
|
|
if (wrkGrpInfo->isWGPMode_) {
|
2025-08-20 16:28:06 +02:00
|
|
|
simdPerCU *= 2;
|
2025-05-12 11:09:03 -04:00
|
|
|
}
|
|
|
|
|
|
2023-01-02 12:26:12 +00:00
|
|
|
const size_t alu_occupancy = simdPerCU * std::min(MaxWavesPerSimd, GprWaves);
|
2020-05-27 14:39:30 -05:00
|
|
|
const int alu_limited_threads = alu_occupancy * wrkGrpInfo->wavefrontSize_;
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2020-05-27 14:39:30 -05:00
|
|
|
int lds_occupancy_wgs = INT_MAX;
|
|
|
|
|
const size_t total_used_lds = wrkGrpInfo->usedLDSSize_ + dynamicSMemSize;
|
2019-06-12 10:00:38 -04:00
|
|
|
if (total_used_lds != 0) {
|
2020-05-27 14:39:30 -05:00
|
|
|
lds_occupancy_wgs = static_cast<int>(device.info().localMemSize_ / total_used_lds);
|
|
|
|
|
}
|
|
|
|
|
// Calculate how many blocks of inputBlockSize we can fit per CU
|
|
|
|
|
// Need to align with hardware wavefront size. If they want 65 threads, but
|
|
|
|
|
// waves are 64, then we need 128 threads per block.
|
|
|
|
|
// So this calculates how many blocks we can fit.
|
|
|
|
|
*maxBlocksPerCU = alu_limited_threads / amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_);
|
|
|
|
|
// Unless those blocks are further constrained by LDS size.
|
|
|
|
|
*maxBlocksPerCU = std::min(*maxBlocksPerCU, lds_occupancy_wgs);
|
|
|
|
|
|
|
|
|
|
// Some callers of this function want to return the block size, in threads, that
|
|
|
|
|
// leads to the maximum occupancy. In that case, inputBlockSize is the maximum
|
|
|
|
|
// workgroup size the user wants to allow, or that the hardware can allow.
|
|
|
|
|
// It is either the number of threads that we are limited to due to occupancy, or
|
|
|
|
|
// the maximum available block size for this kernel, which could have come from the
|
|
|
|
|
// user. e.g., if the user indicates the maximum block size is 64 threads, but we
|
|
|
|
|
// calculate that 128 threads can fit in each CU, we have to give up and return 64.
|
2022-03-14 12:36:16 -04:00
|
|
|
*bestBlockSize =
|
|
|
|
|
std::min(alu_limited_threads, amd::alignUp(inputBlockSize, wrkGrpInfo->wavefrontSize_));
|
2020-05-27 14:39:30 -05:00
|
|
|
// If the best block size is smaller than the block size used to fit the maximum,
|
|
|
|
|
// then we need to make the grid bigger for full occupancy.
|
|
|
|
|
const int bestBlocksPerCU = alu_limited_threads / (*bestBlockSize);
|
2024-04-02 18:11:35 -07:00
|
|
|
uint32_t maxCUs = device.info().maxComputeUnits_;
|
|
|
|
|
if (wrkGrpInfo->isWGPMode_ == false && device.settings().enableWgpMode_ == true) {
|
|
|
|
|
maxCUs *= 2;
|
|
|
|
|
} else if ((wrkGrpInfo->isWGPMode_ == true && device.settings().enableWgpMode_ == false)) {
|
|
|
|
|
maxCUs /= 2;
|
|
|
|
|
}
|
2020-05-27 14:39:30 -05:00
|
|
|
// Unless those blocks are further constrained by LDS size.
|
2024-04-02 18:11:35 -07:00
|
|
|
*numBlocksPerGrid = (maxCUs * std::min(bestBlocksPerCU, lds_occupancy_wgs));
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2020-04-03 12:13:12 -04:00
|
|
|
return hipSuccess;
|
2019-06-12 10:00:38 -04:00
|
|
|
}
|
2022-03-14 12:36:16 -04:00
|
|
|
} // namespace hip_impl
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2023-04-21 10:46:05 +00:00
|
|
|
namespace hip {
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, const void* f,
|
|
|
|
|
size_t dynSharedMemPerBlk, int blockSizeLimit) {
|
2020-04-03 12:13:12 -04:00
|
|
|
HIP_INIT_API(hipOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
|
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
hipFunction_t func = nullptr;
|
|
|
|
|
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice());
|
|
|
|
|
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
2022-12-15 08:01:21 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSizeLimit,
|
|
|
|
|
dynSharedMemPerBlk, true);
|
2020-04-23 21:42:06 +05:30
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
2020-04-23 21:42:06 +05:30
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipModuleOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize, hipFunction_t f,
|
|
|
|
|
size_t dynSharedMemPerBlk, int blockSizeLimit) {
|
2020-04-23 21:42:06 +05:30
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSize, f, dynSharedMemPerBlk, blockSizeLimit);
|
2022-12-15 08:01:21 -05:00
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr) || (f == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
|
|
|
|
|
dynSharedMemPerBlk, true);
|
2020-04-23 21:42:06 +05:30
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
2020-04-03 12:13:12 -04:00
|
|
|
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t hipModuleOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
|
2022-03-14 12:36:16 -04:00
|
|
|
hipFunction_t f,
|
|
|
|
|
size_t dynSharedMemPerBlk,
|
|
|
|
|
int blockSizeLimit,
|
|
|
|
|
unsigned int flags) {
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxPotentialBlockSizeWithFlags, f, dynSharedMemPerBlk,
|
|
|
|
|
blockSizeLimit, flags);
|
2022-12-15 08:01:21 -05:00
|
|
|
if ((gridSize == nullptr) || (blockSize == nullptr) || (f == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
2023-03-06 13:58:04 +00:00
|
|
|
if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) {
|
|
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
|
|
|
}
|
2020-04-23 21:42:06 +05:30
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSizeLimit,
|
|
|
|
|
dynSharedMemPerBlk, true);
|
2020-04-03 12:13:12 -04:00
|
|
|
if (ret == hipSuccess) {
|
2020-05-27 14:39:30 -05:00
|
|
|
*blockSize = best_block_size;
|
|
|
|
|
*gridSize = max_blocks_per_grid;
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
HIP_RETURN(ret);
|
2020-03-11 19:06:22 +05:30
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, hipFunction_t f,
|
|
|
|
|
int blockSize,
|
|
|
|
|
size_t dynSharedMemPerBlk) {
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize,
|
|
|
|
|
dynSharedMemPerBlk);
|
2022-12-15 08:01:21 -05:00
|
|
|
if (numBlocks == nullptr || (f == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
|
|
|
|
|
false);
|
2020-04-23 21:42:06 +05:30
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
|
|
|
|
|
int* numBlocks, hipFunction_t f, int blockSize, size_t dynSharedMemPerBlk, unsigned int flags) {
|
|
|
|
|
HIP_INIT_API(hipModuleOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize,
|
|
|
|
|
dynSharedMemPerBlk, flags);
|
2022-12-15 08:01:21 -05:00
|
|
|
if (numBlocks == nullptr || (f == nullptr)) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-23 21:42:06 +05:30
|
|
|
}
|
2023-03-06 13:58:04 +00:00
|
|
|
if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) {
|
|
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
|
|
|
}
|
2020-04-23 21:42:06 +05:30
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-23 21:42:06 +05:30
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, f, blockSize, dynSharedMemPerBlk,
|
|
|
|
|
false);
|
2020-04-23 21:42:06 +05:30
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* f,
|
|
|
|
|
int blockSize, size_t dynamicSMemSize) {
|
2020-04-03 12:13:12 -04:00
|
|
|
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessor, f, blockSize, dynamicSMemSize);
|
|
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
hipFunction_t func = nullptr;
|
|
|
|
|
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice());
|
|
|
|
|
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
2022-12-15 08:01:21 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
|
|
|
|
|
false);
|
2020-04-03 12:13:12 -04:00
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
2019-06-12 10:00:38 -04:00
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* f,
|
|
|
|
|
int blockSize,
|
|
|
|
|
size_t dynamicSMemSize,
|
|
|
|
|
unsigned int flags) {
|
|
|
|
|
HIP_INIT_API(hipOccupancyMaxActiveBlocksPerMultiprocessorWithFlags, f, blockSize, dynamicSMemSize,
|
|
|
|
|
flags);
|
2020-04-03 12:13:12 -04:00
|
|
|
if (numBlocks == nullptr) {
|
2020-05-28 20:18:16 -04:00
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
2022-12-08 06:55:23 +00:00
|
|
|
if (flags != hipOccupancyDefault && flags != hipOccupancyDisableCachingOverride) {
|
|
|
|
|
HIP_RETURN(hipErrorInvalidValue);
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
hipFunction_t func = nullptr;
|
|
|
|
|
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, f, ihipGetDevice());
|
|
|
|
|
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
2022-12-15 08:01:21 -05:00
|
|
|
HIP_RETURN(hipErrorInvalidDeviceFunction);
|
2020-04-03 12:13:12 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const amd::Device& device = *hip::getCurrentDevice()->devices()[0];
|
|
|
|
|
|
|
|
|
|
int num_blocks = 0;
|
2020-05-27 14:39:30 -05:00
|
|
|
int max_blocks_per_grid = 0;
|
|
|
|
|
int best_block_size = 0;
|
2020-04-03 12:13:12 -04:00
|
|
|
hipError_t ret = hip_impl::ihipOccupancyMaxActiveBlocksPerMultiprocessor(
|
2022-03-14 12:36:16 -04:00
|
|
|
&num_blocks, &max_blocks_per_grid, &best_block_size, device, func, blockSize, dynamicSMemSize,
|
|
|
|
|
false);
|
2020-04-03 12:13:12 -04:00
|
|
|
*numBlocks = num_blocks;
|
|
|
|
|
HIP_RETURN(ret);
|
2019-10-30 01:19:24 -04:00
|
|
|
}
|
2019-06-12 10:00:38 -04:00
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDim, void** args,
|
|
|
|
|
size_t sharedMemBytes, hipStream_t stream, hipEvent_t startEvent,
|
|
|
|
|
hipEvent_t stopEvent, int flags) {
|
2024-06-03 11:17:51 +00:00
|
|
|
if (!hip::isValid(stream)) {
|
|
|
|
|
return hipErrorInvalidValue;
|
|
|
|
|
}
|
2022-03-14 12:36:16 -04:00
|
|
|
hipFunction_t func = nullptr;
|
2021-04-27 19:03:22 -04:00
|
|
|
int deviceId = hip::Stream::DeviceId(stream);
|
2020-05-18 22:40:33 -04:00
|
|
|
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
|
|
|
|
|
if ((hip_error != hipSuccess) || (func == nullptr)) {
|
2023-12-13 14:32:45 -05:00
|
|
|
if (hip_error == hipErrorNoBinaryForGpu) {
|
2022-09-14 18:13:55 -07:00
|
|
|
return hip_error;
|
2022-07-27 05:54:03 +00:00
|
|
|
} else {
|
2022-09-14 18:13:55 -07:00
|
|
|
return hipErrorInvalidDeviceFunction;
|
2022-07-27 05:54:03 +00:00
|
|
|
}
|
2020-02-06 13:56:41 -05:00
|
|
|
}
|
2024-09-06 19:04:21 -04:00
|
|
|
|
|
|
|
|
constexpr auto gridDimYZmax = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
|
|
|
|
|
const auto& isa = g_devices[deviceId]->devices()[0]->isa().versionMajor();
|
|
|
|
|
if (isa >= 12 && (gridDim.y > gridDimYZmax || gridDim.z > gridDimYZmax)) {
|
|
|
|
|
return hipErrorInvalidConfiguration;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-06 15:06:13 -04:00
|
|
|
amd::HIPLaunchParams launch_params(gridDim.x, gridDim.y, gridDim.z, blockDim.x, blockDim.y,
|
|
|
|
|
blockDim.z, sharedMemBytes);
|
|
|
|
|
if (!launch_params.IsValidConfig()) {
|
2022-09-14 18:13:55 -07:00
|
|
|
return hipErrorInvalidConfiguration;
|
2020-07-14 06:41:34 -04:00
|
|
|
}
|
2025-05-06 15:06:13 -04:00
|
|
|
|
2025-08-20 16:28:06 +02:00
|
|
|
return ihipModuleLaunchKernel(func, launch_params, stream, args, nullptr, startEvent, stopEvent,
|
|
|
|
|
flags);
|
2020-02-06 13:56:41 -05:00
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
|
2020-09-10 22:26:49 +00:00
|
|
|
// conversion routines between float and half precision
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
static inline std::uint32_t f32_as_u32(float f) {
|
|
|
|
|
union {
|
|
|
|
|
float f;
|
|
|
|
|
std::uint32_t u;
|
|
|
|
|
} v;
|
|
|
|
|
v.f = f;
|
|
|
|
|
return v.u;
|
|
|
|
|
}
|
2020-09-10 22:26:49 +00:00
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
static inline float u32_as_f32(std::uint32_t u) {
|
|
|
|
|
union {
|
|
|
|
|
float f;
|
|
|
|
|
std::uint32_t u;
|
|
|
|
|
} v;
|
|
|
|
|
v.u = u;
|
|
|
|
|
return v.f;
|
|
|
|
|
}
|
2020-09-10 22:26:49 +00:00
|
|
|
|
|
|
|
|
static inline int clamp_int(int i, int l, int h) { return std::min(std::max(i, l), h); }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// half float, the f16 is in the low 16 bits of the input argument
|
|
|
|
|
|
|
|
|
|
static inline float __convert_half_to_float(std::uint32_t a) noexcept {
|
|
|
|
|
std::uint32_t u = ((a << 13) + 0x70000000U) & 0x8fffe000U;
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
std::uint32_t v =
|
|
|
|
|
f32_as_u32(u32_as_f32(u) * u32_as_f32(0x77800000U) /*0x1.0p+112f*/) + 0x38000000U;
|
2020-09-10 22:26:49 +00:00
|
|
|
|
|
|
|
|
u = (a & 0x7fff) != 0 ? v : u;
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
return u32_as_f32(u) * u32_as_f32(0x07800000U) /*0x1.0p-112f*/;
|
2020-09-10 22:26:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// float half with nearest even rounding
|
|
|
|
|
// The lower 16 bits of the result is the bit pattern for the f16
|
|
|
|
|
static inline std::uint32_t __convert_float_to_half(float a) noexcept {
|
|
|
|
|
std::uint32_t u = f32_as_u32(a);
|
|
|
|
|
int e = static_cast<int>((u >> 23) & 0xff) - 127 + 15;
|
|
|
|
|
std::uint32_t m = ((u >> 11) & 0xffe) | ((u & 0xfff) != 0);
|
|
|
|
|
std::uint32_t i = 0x7c00 | (m != 0 ? 0x0200 : 0);
|
|
|
|
|
std::uint32_t n = ((std::uint32_t)e << 12) | m;
|
|
|
|
|
std::uint32_t s = (u >> 16) & 0x8000;
|
2022-03-14 12:36:16 -04:00
|
|
|
int b = clamp_int(1 - e, 0, 13);
|
2020-09-10 22:26:49 +00:00
|
|
|
std::uint32_t d = (0x1000 | m) >> b;
|
|
|
|
|
d |= (d << b) != (0x1000 | m);
|
|
|
|
|
std::uint32_t v = e < 1 ? d : n;
|
|
|
|
|
v = (v >> 2) + (((v & 0x7) == 3) | ((v & 0x7) > 5));
|
|
|
|
|
v = e > 30 ? 0x7c00 : v;
|
|
|
|
|
v = e == 143 ? i : v;
|
|
|
|
|
return s | v;
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-05 13:20:58 -04:00
|
|
|
extern "C"
|
|
|
|
|
#if !defined(_MSC_VER)
|
2022-03-14 12:36:16 -04:00
|
|
|
__attribute__((weak))
|
2020-10-05 13:20:58 -04:00
|
|
|
#endif
|
2022-03-14 12:36:16 -04:00
|
|
|
float
|
|
|
|
|
__gnu_h2f_ieee(unsigned short h) {
|
|
|
|
|
return __convert_half_to_float((std::uint32_t)h);
|
2020-09-10 22:26:49 +00:00
|
|
|
}
|
|
|
|
|
|
2020-10-05 13:20:58 -04:00
|
|
|
extern "C"
|
|
|
|
|
#if !defined(_MSC_VER)
|
2022-03-14 12:36:16 -04:00
|
|
|
__attribute__((weak))
|
2020-10-05 13:20:58 -04:00
|
|
|
#endif
|
2022-03-14 12:36:16 -04:00
|
|
|
unsigned short
|
|
|
|
|
__gnu_f2h_ieee(float f) {
|
2020-09-10 22:26:49 +00:00
|
|
|
return (unsigned short)__convert_float_to_half(f);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
void PlatformState::init() {
|
2020-05-18 22:40:33 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
2022-03-14 12:36:16 -04:00
|
|
|
if (initialized_ || g_devices.empty()) {
|
2020-05-18 22:40:33 -04:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
initialized_ = true;
|
2022-03-14 12:36:16 -04:00
|
|
|
for (auto& it : statCO_.vars_) {
|
2020-05-18 22:40:33 -04:00
|
|
|
it.second->resize_dVar(g_devices.size());
|
|
|
|
|
}
|
2025-07-31 08:30:23 -07:00
|
|
|
for (auto& it : statCO_.managedVars_) {
|
2025-08-20 16:28:06 +02:00
|
|
|
for (auto& var : it.second) {
|
2025-07-31 08:30:23 -07:00
|
|
|
var->resize_dVar(g_devices.size());
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-03-14 12:36:16 -04:00
|
|
|
for (auto& it : statCO_.functions_) {
|
2020-05-18 22:40:33 -04:00
|
|
|
it.second->resize_dFunc(g_devices.size());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t PlatformState::loadModule(hipModule_t* module, const char* fname, const void* image) {
|
|
|
|
|
if (module == nullptr) {
|
2020-10-13 01:19:47 -07:00
|
|
|
return hipErrorInvalidValue;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
hip::DynCO* dynCo = new hip::DynCO();
|
|
|
|
|
hipError_t hip_error = dynCo->loadCodeObject(fname, image);
|
|
|
|
|
if (hip_error != hipSuccess) {
|
|
|
|
|
delete dynCo;
|
|
|
|
|
return hip_error;
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-19 11:20:24 +00:00
|
|
|
*module = dynCo->getModule();
|
2020-05-18 22:40:33 -04:00
|
|
|
assert(*module != nullptr);
|
|
|
|
|
|
2023-02-13 18:18:23 -08:00
|
|
|
amd::ScopedLock lock(lock_);
|
2020-05-18 22:40:33 -04:00
|
|
|
if (dynCO_map_.find(*module) != dynCO_map_.end()) {
|
2021-11-29 17:53:36 +00:00
|
|
|
delete dynCo;
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipErrorAlreadyMapped;
|
|
|
|
|
}
|
|
|
|
|
dynCO_map_.insert(std::make_pair(*module, dynCo));
|
|
|
|
|
|
|
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t PlatformState::unloadModule(hipModule_t hmod) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
auto it = dynCO_map_.find(hmod);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
|
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delete it->second;
|
|
|
|
|
dynCO_map_.erase(hmod);
|
|
|
|
|
|
2020-06-11 17:17:29 -04:00
|
|
|
auto tex_it = texRef_map_.begin();
|
|
|
|
|
while (tex_it != texRef_map_.end()) {
|
|
|
|
|
if (tex_it->second.first == hmod) {
|
|
|
|
|
tex_it = texRef_map_.erase(tex_it);
|
|
|
|
|
} else {
|
|
|
|
|
++tex_it;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t PlatformState::getDynFunc(hipFunction_t* hfunc, hipModule_t hmod,
|
2022-03-14 12:36:16 -04:00
|
|
|
const char* func_name) {
|
2020-05-18 22:40:33 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
auto it = dynCO_map_.find(hmod);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot find the module: 0x%x", hmod);
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
2020-10-05 13:20:58 -04:00
|
|
|
if (0 == strlen(func_name)) {
|
|
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
|
|
|
|
|
return it->second->getDynFunc(hfunc, func_name);
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-13 20:28:12 -07:00
|
|
|
hipError_t PlatformState::getFuncCount(unsigned int* count, hipModule_t hmod) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
auto it = dynCO_map_.find(hmod);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
|
|
|
|
LogPrintfError("Cannot find the module: 0x%x", hmod);
|
|
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
|
|
|
|
return it->second->getFuncCount(count);
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-05 15:40:52 +00:00
|
|
|
bool PlatformState::isValidDynFunc(const void* hfunc) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
return std::any_of(dynCO_map_.begin(), dynCO_map_.end(),
|
|
|
|
|
[&](auto& it) { return it.second->isValidDynFunc(hfunc); });
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-28 17:53:32 -04:00
|
|
|
hipError_t PlatformState::getDynGlobalVar(const char* hostVar, hipModule_t hmod,
|
2020-05-18 22:40:33 -04:00
|
|
|
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
2024-02-29 09:06:03 +00:00
|
|
|
if (hostVar == nullptr) {
|
2020-10-13 01:19:47 -07:00
|
|
|
return hipErrorInvalidValue;
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
auto it = dynCO_map_.find(hmod);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot find the module: 0x%x", hmod);
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
2024-02-29 09:06:03 +00:00
|
|
|
if (dev_ptr) {
|
|
|
|
|
*dev_ptr = nullptr;
|
|
|
|
|
}
|
2022-03-02 11:46:56 -08:00
|
|
|
IHIP_RETURN_ONFAIL(it->second->getManagedVarPointer(hostVar, dev_ptr, size_ptr));
|
2021-03-16 23:54:10 -07:00
|
|
|
// if dev_ptr is nullptr, hostvar is not in managed variable list
|
2024-02-29 09:06:03 +00:00
|
|
|
if ((dev_ptr && *dev_ptr == nullptr) || (size_ptr && *size_ptr == 0)) {
|
2021-03-16 23:54:10 -07:00
|
|
|
hip::DeviceVar* dvar = nullptr;
|
|
|
|
|
IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar));
|
2024-02-29 09:06:03 +00:00
|
|
|
if (dev_ptr != nullptr) {
|
|
|
|
|
*dev_ptr = dvar->device_ptr();
|
|
|
|
|
}
|
|
|
|
|
if (size_ptr != nullptr) {
|
|
|
|
|
*size_ptr = dvar->size();
|
|
|
|
|
}
|
2021-03-16 23:54:10 -07:00
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-11 17:17:29 -04:00
|
|
|
hipError_t PlatformState::registerTexRef(textureReference* texRef, hipModule_t hmod,
|
|
|
|
|
std::string name) {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
texRef_map_.insert(std::make_pair(texRef, std::make_pair(hmod, name)));
|
|
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-09-28 17:53:32 -04:00
|
|
|
hipError_t PlatformState::getDynTexGlobalVar(textureReference* texRef, hipDeviceptr_t* dev_ptr,
|
|
|
|
|
size_t* size_ptr) {
|
2020-06-11 17:17:29 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
auto tex_it = texRef_map_.find(texRef);
|
|
|
|
|
if (tex_it == texRef_map_.end()) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot find the texRef Entry: 0x%x", texRef);
|
2020-06-11 17:17:29 -04:00
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto it = dynCO_map_.find(tex_it->second.first);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot find the module: 0x%x", tex_it->second.first);
|
2020-06-11 17:17:29 -04:00
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hip::DeviceVar* dvar = nullptr;
|
2020-09-28 17:53:32 -04:00
|
|
|
IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, tex_it->second.second));
|
2020-06-11 17:17:29 -04:00
|
|
|
*dev_ptr = dvar->device_ptr();
|
|
|
|
|
*size_ptr = dvar->size();
|
|
|
|
|
|
|
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t PlatformState::getDynTexRef(const char* hostVar, hipModule_t hmod,
|
|
|
|
|
textureReference** texRef) {
|
2020-05-18 22:40:33 -04:00
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
auto it = dynCO_map_.find(hmod);
|
|
|
|
|
if (it == dynCO_map_.end()) {
|
2021-02-17 23:54:39 +05:30
|
|
|
LogPrintfError("Cannot find the module: 0x%x", hmod);
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipErrorNotFound;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hip::DeviceVar* dvar = nullptr;
|
2020-09-28 17:53:32 -04:00
|
|
|
IHIP_RETURN_ONFAIL(it->second->getDeviceVar(&dvar, hostVar));
|
2020-05-18 22:40:33 -04:00
|
|
|
|
2021-08-20 17:47:05 -04:00
|
|
|
if (dvar->size() != sizeof(textureReference)) {
|
|
|
|
|
return hipErrorNotFound; // Any better way to verify texture type?
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-18 22:40:33 -04:00
|
|
|
dvar->shadowVptr = new texture<char>();
|
2022-03-14 12:36:16 -04:00
|
|
|
*texRef = reinterpret_cast<textureReference*>(dvar->shadowVptr);
|
2020-05-18 22:40:33 -04:00
|
|
|
return hipSuccess;
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-03 11:23:33 -04:00
|
|
|
hipError_t PlatformState::digestFatBinary(const void* data, hip::FatBinaryInfo*& programs) {
|
2022-03-14 12:36:16 -04:00
|
|
|
return statCO_.digestFatBinary(data, programs);
|
2020-05-18 22:40:33 -04:00
|
|
|
}
|
|
|
|
|
|
2024-09-24 15:18:06 +00:00
|
|
|
hip::FatBinaryInfo** PlatformState::addFatBinary(const void* data, bool& success) {
|
|
|
|
|
return statCO_.addFatBinary(data, initialized_, success);
|
2020-05-18 22:40:33 -04:00
|
|
|
}
|
|
|
|
|
|
2020-08-03 11:23:33 -04:00
|
|
|
hipError_t PlatformState::removeFatBinary(hip::FatBinaryInfo** module) {
|
2020-05-18 22:40:33 -04:00
|
|
|
return statCO_.removeFatBinary(module);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t PlatformState::registerStatFunction(const void* hostFunction, hip::Function* func) {
|
|
|
|
|
return statCO_.registerStatFunction(hostFunction, func);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hipError_t PlatformState::registerStatGlobalVar(const void* hostVar, hip::Var* var) {
|
|
|
|
|
return statCO_.registerStatGlobalVar(hostVar, var);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 07:20:58 -05:00
|
|
|
hipError_t PlatformState::registerStatManagedVar(hip::Var* var) {
|
|
|
|
|
return statCO_.registerStatManagedVar(var);
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-16 08:38:56 -07:00
|
|
|
const char* PlatformState::getStatFuncName(const void* hostFunction) {
|
2022-07-08 18:44:49 -05:00
|
|
|
return statCO_.getStatFuncName(hostFunction);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t PlatformState::getStatFunc(hipFunction_t* hfunc, const void* hostFunction,
|
|
|
|
|
int deviceId) {
|
2020-05-18 22:40:33 -04:00
|
|
|
return statCO_.getStatFunc(hfunc, hostFunction, deviceId);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t PlatformState::getStatFuncAttr(hipFuncAttributes* func_attr, const void* hostFunction,
|
|
|
|
|
int deviceId) {
|
2022-12-20 09:48:58 +00:00
|
|
|
if (func_attr == nullptr) {
|
2021-01-27 10:48:16 -05:00
|
|
|
return hipErrorInvalidValue;
|
|
|
|
|
}
|
2022-12-20 09:48:58 +00:00
|
|
|
if (hostFunction == nullptr) {
|
|
|
|
|
return hipErrorInvalidDeviceFunction;
|
|
|
|
|
}
|
2020-05-18 22:40:33 -04:00
|
|
|
return statCO_.getStatFuncAttr(func_attr, hostFunction, deviceId);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
hipError_t PlatformState::getStatGlobalVar(const void* hostVar, int deviceId,
|
|
|
|
|
hipDeviceptr_t* dev_ptr, size_t* size_ptr) {
|
2020-05-18 22:40:33 -04:00
|
|
|
return statCO_.getStatGlobalVar(hostVar, deviceId, dev_ptr, size_ptr);
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-16 07:20:58 -05:00
|
|
|
hipError_t PlatformState::initStatManagedVarDevicePtr(int deviceId) {
|
|
|
|
|
return statCO_.initStatManagedVarDevicePtr(deviceId);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-14 12:36:16 -04:00
|
|
|
void PlatformState::setupArgument(const void* arg, size_t size, size_t offset) {
|
2022-09-26 15:59:27 +05:30
|
|
|
auto& arguments = hip::tls.exec_stack_.top().arguments_;
|
2020-05-18 22:40:33 -04:00
|
|
|
|
|
|
|
|
if (arguments.size() < offset + size) {
|
|
|
|
|
arguments.resize(offset + size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
::memcpy(&arguments[offset], arg, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PlatformState::configureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem,
|
|
|
|
|
hipStream_t stream) {
|
2022-09-26 15:59:27 +05:30
|
|
|
hip::tls.exec_stack_.push(ihipExec_t{gridDim, blockDim, sharedMem, stream});
|
2020-05-18 22:40:33 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void PlatformState::popExec(ihipExec_t& exec) {
|
2022-09-26 15:59:27 +05:30
|
|
|
exec = std::move(hip::tls.exec_stack_.top());
|
|
|
|
|
hip::tls.exec_stack_.pop();
|
2020-05-18 22:40:33 -04:00
|
|
|
}
|
2023-08-17 14:59:55 -04:00
|
|
|
|
|
|
|
|
std::shared_ptr<UniqueFD> PlatformState::GetUniqueFileHandle(const std::string& file_path) {
|
|
|
|
|
amd::ScopedLock lock(ufd_lock_);
|
|
|
|
|
|
|
|
|
|
if (ufd_map_.cend() == ufd_map_.find(file_path)) {
|
|
|
|
|
// Get the file desc and file size from amd::Os API
|
|
|
|
|
amd::Os::FileDesc fdesc;
|
|
|
|
|
size_t fsize = 0;
|
|
|
|
|
if (!amd::Os::GetFileHandle(file_path.c_str(), &fdesc, &fsize)) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
ufd_map_.insert(std::make_pair(file_path, std::make_shared<UniqueFD>(file_path, fdesc, fsize)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// we should have an entry at this time.
|
|
|
|
|
return ufd_map_[file_path];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool PlatformState::CloseUniqueFileHandle(const std::shared_ptr<UniqueFD>& ufd) {
|
|
|
|
|
amd::ScopedLock lock(ufd_lock_);
|
|
|
|
|
|
|
|
|
|
// if use_count is 2, then there is 1 entry in the map and the current entry is the last close.
|
|
|
|
|
if (ufd.use_count() == 2) {
|
|
|
|
|
ufd_map_.erase(ufd->fpath_);
|
|
|
|
|
if (!amd::Os::CloseFileHandle(ufd->fdesc_)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2023-12-15 18:19:58 -05:00
|
|
|
|
|
|
|
|
void* PlatformState::getDynamicLibraryHandle() {
|
|
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
|
|
|
|
|
if (dynamicLibraryHandle_ != nullptr) {
|
|
|
|
|
return dynamicLibraryHandle_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
const char* libName = "amdhip64.dll";
|
|
|
|
|
#else
|
|
|
|
|
const char* libName = "libamdhip64.so";
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
dynamicLibraryHandle_ = amd::Os::loadLibrary(libName);
|
|
|
|
|
return dynamicLibraryHandle_;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-20 16:28:06 +02:00
|
|
|
void PlatformState::setDynamicLibraryHandle(void* handle) {
|
2023-12-15 18:19:58 -05:00
|
|
|
amd::ScopedLock lock(lock_);
|
|
|
|
|
dynamicLibraryHandle_ = handle;
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-20 16:28:06 +02:00
|
|
|
} // namespace hip
|