SWDEV-546287 - Implement hipLibrary load/unload (#975)

This commit is contained in:
Jatin Chaudhary
2025-09-19 22:23:49 +01:00
committed by GitHub
parent 775ac73d25
commit e79eaaa8a5
26 changed files with 1175 additions and 16 deletions
+5
View File
@@ -28,6 +28,11 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
- `hipGetDriverEntryPoint ` gets function pointer of a HIP API.
- `hipSetValidDevices` sets a default list of devices that can be used by HIP
- `hipStreamGetId` queries the id of a stream
- `hipLibraryLoadData` Create library object from code
- `hipLibraryLoadFromFile` Create library object from file
- `hipLibraryUnload` Unload library
- `hipLibraryGetKernel` Get a kernel from library
- `hipLibraryGetKernelCount` Get kernel count in library
* Changed HIP APIs
- `hipMemAllocationType` now has hip exclusive enum hipMemAllocationTypeUncached
- `hipMemCreate` now checks for hipMemAllocationTypeUncached enum from
@@ -63,7 +63,7 @@
#define HIP_API_TABLE_STEP_VERSION 0
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
#define HIP_TOOLS_API_TABLE_STEP_VERSION 0
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 14
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 15
// HIP API interface
// HIP compiler dispatch functions
@@ -1087,6 +1087,23 @@ typedef hipError_t (*t_hipGetDriverEntryPoint)(const char* symbol, void** funcPt
typedef hipError_t (*t_hipGetDriverEntryPoint_spt)(const char* symbol, void** funcPtr,
unsigned long long flags,
hipDriverEntryPointQueryResult* status);
typedef hipError_t (*t_hipLibraryLoadData)(hipLibrary_t* library, const void* code,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions,
hipLibraryOption** libraryOptions,
void** libraryOptionValues,
unsigned int numLibraryOptions);
typedef hipError_t (*t_hipLibraryLoadFromFile)(hipLibrary_t* library, const char* fileName,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions,
hipLibraryOption** libraryOptions,
void** libraryOptionValues,
unsigned int numLibraryOptions);
typedef hipError_t (*t_hipLibraryUnload)(hipLibrary_t library);
typedef hipError_t (*t_hipLibraryGetKernel)(hipKernel_t* pKernel, hipLibrary_t library,
const char* name);
typedef hipError_t (*t_hipLibraryGetKernelCount)(unsigned int *count,
hipLibrary_t library);
// HIP Compiler dispatch table
struct HipCompilerDispatchTable {
@@ -1655,6 +1672,13 @@ struct HipDispatchTable {
t_hipMemAdvise_v2 hipMemAdvise_v2_fn;
t_hipStreamGetId hipStreamGetId_fn;
// HIP_RUNTIME_API_TABLE_STEP_VERSION = 15
t_hipLibraryLoadData hipLibraryLoadData_fn;
t_hipLibraryLoadFromFile hipLibraryLoadFromFile_fn;
t_hipLibraryUnload hipLibraryUnload_fn;
t_hipLibraryGetKernel hipLibraryGetKernel_fn;
t_hipLibraryGetKernelCount hipLibraryGetKernelCount_fn;
// DO NOT EDIT ABOVE!
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 15
@@ -456,7 +456,12 @@ enum hip_api_id_t {
HIP_API_ID_hipMemPrefetchAsync_v2 = 436,
HIP_API_ID_hipMemAdvise_v2 = 437,
HIP_API_ID_hipStreamGetId = 438,
HIP_API_ID_LAST = 438,
HIP_API_ID_hipLibraryLoadData = 439,
HIP_API_ID_hipLibraryLoadFromFile = 440,
HIP_API_ID_hipLibraryUnload = 441,
HIP_API_ID_hipLibraryGetKernel = 442,
HIP_API_ID_hipLibraryGetKernelCount = 443,
HIP_API_ID_LAST = 443,
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -922,6 +927,11 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain";
case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync";
case HIP_API_ID_hipModuleGetFunctionCount: return "hipModuleGetFunctionCount";
case HIP_API_ID_hipLibraryLoadData: return "hipLibraryLoadData";
case HIP_API_ID_hipLibraryLoadFromFile: return "hipLibraryLoadFromFile";
case HIP_API_ID_hipLibraryUnload: return "hipLibraryUnload";
case HIP_API_ID_hipLibraryGetKernel: return "hipLibraryGetKernel";
case HIP_API_ID_hipLibraryGetKernelCount: return "hipLibraryGetKernelCount";
};
return "unknown";
};
@@ -1355,6 +1365,11 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain;
if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync;
if (strcmp("hipModuleGetFunctionCount", name) == 0) return HIP_API_ID_hipModuleGetFunctionCount;
if (strcmp("hipLibraryLoadData", name) == 0) return HIP_API_ID_hipLibraryLoadData;
if (strcmp("hipLibraryLoadFromFile", name) == 0) return HIP_API_ID_hipLibraryLoadFromFile;
if (strcmp("hipLibraryUnload", name) == 0) return HIP_API_ID_hipLibraryUnload;
if (strcmp("hipLibraryGetKernel", name) == 0) return HIP_API_ID_hipLibraryGetKernel;
if (strcmp("hipLibraryGetKernelCount", name) == 0) return HIP_API_ID_hipLibraryGetKernelCount;
return HIP_API_ID_NONE;
}
@@ -3936,6 +3951,44 @@ typedef struct hip_api_data_s {
unsigned int numExtSems;
hipStream_t stream;
} hipWaitExternalSemaphoresAsync;
struct {
hipLibrary_t* library;
hipLibrary_t library__val;
const void* image;
hipJitOption** jitOptions;
void** jitOptionsValues;
unsigned int numJitOptions;
hipLibraryOption** libraryOptions;
void** libraryOptionValues;
unsigned int numLibraryOptions;
} hipLibraryLoadData;
struct {
hipLibrary_t* library;
hipLibrary_t library__val;
const char* fname;
char fname__val;
hipJitOption** jitOptions;
void** jitOptionsValues;
unsigned int numJitOptions;
hipLibraryOption** libraryOptions;
void** libraryOptionValues;
unsigned int numLibraryOptions;
} hipLibraryLoadFromFile;
struct {
hipLibrary_t library;
} hipLibraryUnload;
struct {
hipKernel_t* kernel;
hipKernel_t kernel__val;
hipLibrary_t library;
const char* kname;
char kname__val;
} hipLibraryGetKernel;
struct {
unsigned int *count;
unsigned int count__val;
hipLibrary_t library;
} hipLibraryGetKernelCount;
} args;
uint64_t *phase_data;
} hip_api_data_t;
@@ -6601,6 +6654,46 @@ typedef struct hip_api_data_s {
#define INIT_hipTexRefSetMipmapFilterMode_CB_ARGS_DATA(cb_data) {};
// hipUnbindTexture()
#define INIT_hipUnbindTexture_CB_ARGS_DATA(cb_data) {};
// hipLibraryLoadData()
#define INIT_hipLibraryLoadData_CB_ARGS_DATA(cb_data) \
{ \
cb_data.args.hipLibraryLoadData.library = (hipLibrary_t*)library; \
cb_data.args.hipLibraryLoadData.image = (const void*)image; \
cb_data.args.hipLibraryLoadData.jitOptions = (hipJitOption**)jitOptions; \
cb_data.args.hipLibraryLoadData.jitOptionsValues = (void**)jitOptionsValues; \
cb_data.args.hipLibraryLoadData.numJitOptions = (unsigned int)numJitOptions; \
cb_data.args.hipLibraryLoadData.libraryOptions = (hipLibraryOption**)libraryOptions; \
cb_data.args.hipLibraryLoadData.libraryOptionValues = (void**)libraryOptionValues; \
cb_data.args.hipLibraryLoadData.numLibraryOptions = (unsigned int)numLibraryOptions; \
};
// hipLibraryLoadFromFile()
#define INIT_hipLibraryLoadFromFile_CB_ARGS_DATA(cb_data) \
{ \
cb_data.args.hipLibraryLoadFromFile.library = (hipLibrary_t*)library; \
cb_data.args.hipLibraryLoadFromFile.fname = (const char*)fname; \
cb_data.args.hipLibraryLoadFromFile.jitOptions = (hipJitOption**)jitOptions; \
cb_data.args.hipLibraryLoadFromFile.jitOptionsValues = (void**)jitOptionsValues; \
cb_data.args.hipLibraryLoadFromFile.numJitOptions = (unsigned int)numJitOptions; \
cb_data.args.hipLibraryLoadFromFile.libraryOptions = (hipLibraryOption**)libraryOptions; \
cb_data.args.hipLibraryLoadFromFile.libraryOptionValues = (void**)libraryOptionValues; \
cb_data.args.hipLibraryLoadFromFile.numLibraryOptions = (unsigned int)numLibraryOptions; \
};
// hipLibraryUnload()
#define INIT_hipLibraryUnload_CB_ARGS_DATA(cb_data) \
{ cb_data.args.hipLibraryUnload.library = (hipLibrary_t)library; };
// hipLibraryGetKernel()
#define INIT_hipLibraryGetKernel_CB_ARGS_DATA(cb_data) \
{ \
cb_data.args.hipLibraryGetKernel.kernel = (hipKernel_t *)kernel; \
cb_data.args.hipLibraryGetKernel.library = (hipLibrary_t)library; \
cb_data.args.hipLibraryGetKernel.kname = (const char *)kname; \
};
// hipLibraryGetKernelCount()
#define INIT_hipLibraryGetKernelCount_CB_ARGS_DATA(cb_data) \
{ \
cb_data.args.hipLibraryGetKernelCount.count = (unsigned int *)count; \
cb_data.args.hipLibraryGetKernelCount.library = (hipLibrary_t)library; \
};
#define INIT_NONE_CB_ARGS_DATA(cb_data) {};
@@ -8287,9 +8380,27 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
break;
// hipModuleGetFunctionCount[('unsigned int*', 'count'), ('hipModule_t', 'mod')]
case HIP_API_ID_hipModuleGetFunctionCount:
if (data->args.hipModuleGetFunctionCount.count) data->args.hipModuleGetFunctionCount.count__val = *(data->args.hipModuleGetFunctionCount.count);
if (data->args.hipModuleGetFunctionCount.count)
data->args.hipModuleGetFunctionCount.count__val =
*(data->args.hipModuleGetFunctionCount.count);
break;
case HIP_API_ID_hipLibraryLoadData:
if (data->args.hipLibraryLoadData.library)
data->args.hipLibraryLoadData.library__val = *(data->args.hipLibraryLoadData.library);
break;
case HIP_API_ID_hipLibraryLoadFromFile:
if (data->args.hipLibraryLoadFromFile.library)
data->args.hipLibraryLoadFromFile.library__val =
*(data->args.hipLibraryLoadFromFile.library);
if (data->args.hipLibraryLoadFromFile.fname)
data->args.hipLibraryLoadFromFile.fname__val = *(data->args.hipLibraryLoadFromFile.fname);
break;
case HIP_API_ID_hipLibraryGetKernel:
if (data->args.hipLibraryGetKernel.kernel)
data->args.hipLibraryGetKernel.kernel__val = *(data->args.hipLibraryGetKernel.kernel);
break;
default:
break;
default: break;
};
}
@@ -11741,8 +11852,110 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
else { oss << "count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunctionCount.count__val); }
oss << ", mod="; roctracer::hip_support::detail::operator<<(oss, data->args.hipModuleGetFunctionCount.mod);
oss << ")";
break;
default: oss << "unknown";
break;
case HIP_API_ID_hipLibraryLoadData:
oss << "hipLibraryLoadData(";
if (data->args.hipLibraryLoadData.library == NULL)
oss << "library=NULL";
else {
oss << "library=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.library__val);
}
oss << ", image=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.image);
oss << ", jitOptions=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.jitOptions);
oss << ", jitOptionsValues=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadData.jitOptionsValues);
oss << ", numJitOptions=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadData.numJitOptions);
oss << ", libraryOptions=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadData.libraryOptions);
oss << ", libraryOptionsValues=";
roctracer::hip_support::detail::operator<<(
oss, data->args.hipLibraryLoadData.libraryOptionValues);
oss << ", numLibraryOptions=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadData.numLibraryOptions);
oss << ")";
break;
case HIP_API_ID_hipLibraryLoadFromFile:
oss << "hipLibraryLoadFromFile(";
if (data->args.hipLibraryLoadFromFile.library == NULL)
oss << "library=NULL";
else {
oss << "library=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.library__val);
}
if (data->args.hipLibraryLoadFromFile.fname == NULL)
oss << "fname=NULL";
else {
oss << "fname=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadFromFile.fname__val);
}
oss << ", jitOptions=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.jitOptions);
oss << ")";
oss << ", jitOptionsValues=";
roctracer::hip_support::detail::operator<<(
oss, data->args.hipLibraryLoadFromFile.jitOptionsValues);
oss << ")";
oss << ", numJitOptions=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadFromFile.numJitOptions);
oss << ")";
oss << ", libraryOptions=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryLoadFromFile.libraryOptions);
oss << ")";
oss << ", libraryOptionsValues=";
roctracer::hip_support::detail::operator<<(
oss, data->args.hipLibraryLoadFromFile.libraryOptionValues);
oss << ")";
oss << ", numLibraryOptions=";
roctracer::hip_support::detail::operator<<(oss,
data->args.hipLibraryLoadFromFile.numLibraryOptions);
oss << ")";
break;
case HIP_API_ID_hipLibraryUnload:
oss << "hipLibraryUnload(";
oss << ", library=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryUnload.library);
break;
case HIP_API_ID_hipLibraryGetKernel:
oss << "hipLibraryGetKernel(";
if (data->args.hipLibraryGetKernel.kernel == NULL)
oss << "kernel=NULL";
else {
oss << "kernel=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.kernel__val);
}
oss << ", library=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.library);
if (data->args.hipLibraryGetKernel.kname == NULL)
oss << "kname=NULL";
else {
oss << "kname=";
roctracer::hip_support::detail::operator<<(oss, data->args.hipLibraryGetKernel.kname__val);
}
break;
case HIP_API_ID_hipLibraryGetKernelCount:
oss << "hipLibraryGetKernelCount(";
if (data->args.hipLibraryGetKernelCount.count == NULL)
oss << "count=NULL";
else {
oss << "count=";
roctracer::hip_support::detail::operator<<(
oss, data->args.hipLibraryGetKernelCount.count__val);
}
oss << ", library=";
roctracer::hip_support::detail::operator<<(
oss, data->args.hipLibraryGetKernelCount.library);
break;
default:
oss << "unknown";
};
return strdup(oss.str().c_str());
}
+2 -1
View File
@@ -127,7 +127,8 @@ target_sources(amdhip64 PRIVATE
hip_api_trace.cpp
hip_table_interface.cpp
hip_table_interface_c.cpp
hip_comgr_helper.cpp)
hip_comgr_helper.cpp
hip_library.cpp)
if(WIN32)
target_sources(amdhip64 PRIVATE hip_runtime.cpp)
+5
View File
@@ -511,3 +511,8 @@ hipGetDriverEntryPoint_spt
hipMemPrefetchAsync_v2
hipMemAdvise_v2
hipStreamGetId
hipLibraryLoadData
hipLibraryLoadFromFile
hipLibraryUnload
hipLibraryGetKernel
hipLibraryGetKernelCount
+24 -2
View File
@@ -863,6 +863,17 @@ hipError_t hipMemcpy3DBatchAsync(size_t numOps, struct hipMemcpy3DBatchOp* opLis
unsigned long long flags, hipStream_t stream);
hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p);
hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream);
hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption** jitOptions,
void** jitOptionsValues, unsigned int numJitOptions,
hipLibraryOption** libraryOptions, void** libraryOptionValues,
unsigned int numLibraryOptions);
hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions, hipLibraryOption** libraryOptions,
void** libraryOptionValues, unsigned int numLibraryOptions);
hipError_t hipLibraryUnload(hipLibrary_t library);
hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name);
hipError_t hipLibraryGetKernelCount(unsigned int* count, hipLibrary_t library);
} // namespace hip
namespace hip {
@@ -1398,6 +1409,11 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipMemcpy3DBatchAsync_fn = hip::hipMemcpy3DBatchAsync;
ptrDispatchTable->hipMemcpy3DPeer_fn = hip::hipMemcpy3DPeer;
ptrDispatchTable->hipMemcpy3DPeerAsync_fn = hip::hipMemcpy3DPeerAsync;
ptrDispatchTable->hipLibraryLoadData_fn = hip::hipLibraryLoadData;
ptrDispatchTable->hipLibraryLoadFromFile_fn = hip::hipLibraryLoadFromFile;
ptrDispatchTable->hipLibraryUnload_fn = hip::hipLibraryUnload;
ptrDispatchTable->hipLibraryGetKernel_fn = hip::hipLibraryGetKernel;
ptrDispatchTable->hipLibraryGetKernelCount_fn = hip::hipLibraryGetKernelCount;
}
#if HIP_ROCPROFILER_REGISTER > 0
@@ -2062,15 +2078,21 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipGetDriverEntryPoint_spt_fn, 492);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemPrefetchAsync_v2_fn, 493);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemAdvise_v2_fn, 494);
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetId_fn, 495);
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 15
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryLoadData_fn, 496);
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryLoadFromFile_fn, 497);
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryUnload_fn, 498);
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryGetKernel_fn, 499);
HIP_ENFORCE_ABI(HipDispatchTable, hipLibraryGetKernelCount_fn, 500);
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
//
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
//
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 496)
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 501)
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14,
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 15,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
"pointers and then update this check so it is true");
#endif
+12 -1
View File
@@ -629,4 +629,15 @@ global:
hipStreamGetId;
local:
*;
} hip_6.5;
} hip_6.5;
hip_7.2 {
global:
hipLibraryLoadData;
hipLibraryLoadFromFile;
hipLibraryUnload;
hipLibraryGetKernel;
hipLibraryGetKernelCount;
local:
*;
} hip_7.1;
+180
View File
@@ -0,0 +1,180 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <filesystem>
#include <mutex>
#include <string>
#include <vector>
#include "hip/hip_runtime.h"
#include "hip_library.hpp"
#include "hip_platform.hpp"
#include "utils/debug.hpp"
namespace hip {
void LibraryContainer::Register(std::string name, int device, hipKernel_t k) {
std::scoped_lock<std::mutex> lock(lib_mutex_);
auto key = std::make_pair(name, device);
if (kernels_.find(key) == kernels_.end()) {
kernels_.insert(std::make_pair(std::make_pair(name, device), k));
if (!hip::PlatformState::instance().RegisterLibraryFunction(k)) {
LogPrintfInfo("Already registered: %p", k);
}
}
}
hipError_t LibraryContainer::Kernel(hipKernel_t* k, std::string name) {
auto device_id = hip::ihipGetDevice();
if (auto ki = kernels_.find(std::make_pair(name, device_id)); ki != kernels_.end()) {
*k = ki->second;
return hipSuccess;
}
auto m = fatbin_->Module(device_id);
auto f = functions_.find(name);
if (f == functions_.end()) {
return hipErrorNotFound;
}
auto ret = f->second.get()->getDynFunc(reinterpret_cast<hipFunction_t*>(k), m);
// Register it, basically make it available for query though the hip context.
Register(name, device_id, *k);
return hipSuccess;
}
LibraryContainer::LibraryContainer(const char* code_object) {
fatbin_ = std::make_shared<hip::FatBinaryInfo>(nullptr, code_object);
}
LibraryContainer::LibraryContainer(const std::string file_name) {
fatbin_ = std::make_shared<hip::FatBinaryInfo>(file_name.c_str(), nullptr);
}
LibraryContainer::~LibraryContainer() {
for (const auto& k : kernels_) {
(void)hip::PlatformState::instance().UnregisterLibraryFunction(k.second);
}
kernels_.clear();
}
// BuildIt builds and loads the Library, default behavior is lazy load.
// This function needs to be called before any query on library.
hipError_t LibraryContainer::BuildIt() {
std::scoped_lock<std::mutex> lock(lib_mutex_);
if (built_) {
return hipSuccess;
}
if (!fatbin_) {
return hipErrorInvalidValue;
}
int device_id = ihipGetDevice();
std::vector<hip::Device*> devices = {g_devices[device_id]};
IHIP_RETURN_ONFAIL(fatbin_->ExtractFatBinaryUsingCOMGR(devices));
IHIP_RETURN_ONFAIL(fatbin_->BuildProgram(device_id));
auto program =
fatbin_->GetProgram(device_id)->getDeviceProgram(*hip::getCurrentDevice()->devices()[0]);
// Process Functions
std::vector<std::string> function_names;
program->getGlobalFuncFromCodeObj(&function_names);
for (auto& name : function_names) {
functions_.emplace(std::make_pair(name, std::make_shared<hip::Function>(name)));
}
built_ = true;
return hipSuccess;
}
hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* image, hipJitOption** jitOptions,
void** jitOptionsValues, unsigned int numJitOptions,
hipLibraryOption** libraryOptions, void** libraryOptionValues,
unsigned int numLibraryOptions) {
HIP_INIT_API(hipLibraryLoadData, library, image, jitOptions, jitOptionsValues, numJitOptions,
libraryOptions, libraryOptionValues, numLibraryOptions);
if (library == nullptr || image == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
// We do not support JIT options
if (numJitOptions > 0) {
HIP_RETURN(hipErrorInvalidValue);
}
auto* l = new hip::LibraryContainer((const char*)image);
*library = reinterpret_cast<hipLibrary_t>(l);
HIP_RETURN(hipSuccess);
}
hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fname,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions, hipLibraryOption** libraryOptions,
void** libraryOptionValues, unsigned int numLibraryOptions) {
HIP_INIT_API(hipLibraryLoadFromFile, library, fname, jitOptions, jitOptionsValues, numJitOptions,
libraryOptions, libraryOptionValues, numLibraryOptions);
if (library == nullptr || !std::filesystem::exists(fname) || numJitOptions > 0) {
HIP_RETURN(hipErrorInvalidValue);
}
auto* l = new hip::LibraryContainer(std::string(fname));
*library = reinterpret_cast<hipLibrary_t>(l);
HIP_RETURN(hipSuccess);
}
hipError_t hipLibraryUnload(hipLibrary_t library) {
HIP_INIT_API(hipLibraryUnload, library);
if (library == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
auto l = reinterpret_cast<hip::LibraryContainer*>(library);
delete l;
HIP_RETURN(hipSuccess);
}
hipError_t hipLibraryGetKernelCount(unsigned int* count, hipLibrary_t library) {
HIP_INIT_API(hipLibraryGetKernelCount, count, library);
if (library == nullptr || count == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
auto l = reinterpret_cast<hip::LibraryContainer*>(library);
auto ret = l->BuildIt();
if (ret != hipSuccess) {
HIP_RETURN(ret);
}
*count = static_cast<int>(l->KernelCount());
HIP_RETURN(hipSuccess);
}
hipError_t hipLibraryGetKernel(hipKernel_t* kernel, hipLibrary_t library, const char* kname) {
HIP_INIT_API(hipLibraryGetKernel, kernel, library, kname);
if (library == nullptr || kname == nullptr || kernel == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
auto l = reinterpret_cast<hip::LibraryContainer*>(library);
auto ret = l->BuildIt();
if (ret != hipSuccess) {
HIP_RETURN(ret);
}
ret = l->Kernel(kernel, kname);
HIP_RETURN(ret);
}
} // namespace hip
+74
View File
@@ -0,0 +1,74 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#pragma once
#include <cstdlib>
#include <memory>
#include <mutex>
#include <string>
#include <hip/hip_runtime.h>
#include "hip_code_object.hpp"
#include "hip_fatbin.hpp"
namespace hip {
// An abstract Library container
class LibraryContainer {
public:
// Create from pointer
explicit LibraryContainer(const char* code_object); // from pointer
// Create from file
explicit LibraryContainer(const std::string file_name); // deep copy from file
~LibraryContainer();
// Load and build the library
hipError_t BuildIt();
// Get the total Kernel count in Library
size_t KernelCount() const { return functions_.size(); }
// Get the Kernel from name
hipError_t Kernel(hipKernel_t* k, std::string name);
// Get Fatbin pointer
inline FatBinaryInfo* FatBin() { return fatbin_.get(); }
// Register the kernel function, make an entry in global state
void Register(std::string name, int device, hipKernel_t k);
private:
LibraryContainer() = delete;
LibraryContainer(const LibraryContainer&) = delete;
LibraryContainer(const LibraryContainer&&) = delete;
LibraryContainer& operator=(const LibraryContainer&) = delete;
LibraryContainer& operator=(const LibraryContainer&&) = delete;
std::mutex lib_mutex_;
std::atomic_bool built_ = false;
std::shared_ptr<FatBinaryInfo> fatbin_;
std::map<std::string, std::shared_ptr<hip::Function>> functions_;
// Store already looked up kernels for certain devices
std::map<std::pair<std::string /* name */, int /* device */>, hipKernel_t> kernels_;
};
} // namespace hip
+10 -6
View File
@@ -647,15 +647,19 @@ hipError_t ihipLaunchKernel(const void* hostFunction, dim3 gridDim, dim3 blockDi
if (!hip::isValid(stream)) {
return hipErrorInvalidValue;
}
if (hostFunction == nullptr) {
return hipErrorInvalidDeviceFunction;
}
hipFunction_t func = nullptr;
int deviceId = hip::Stream::DeviceId(stream);
hipError_t hip_error = PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
hipError_t hip_error =
PlatformState::instance().getStatFunc(&func, hostFunction, deviceId);
if ((hip_error != hipSuccess) || (func == nullptr)) {
if (hip_error == hipErrorNoBinaryForGpu) {
return hip_error;
} else {
return hipErrorInvalidDeviceFunction;
}
// assume its hip function type if we did not get a valid output from static
// func lookup
func = reinterpret_cast<hipFunction_t>(const_cast<void *>(hostFunction));
}
constexpr auto gridDimYZmax = static_cast<uint64_t>(std::numeric_limits<uint16_t>::max()) + 1;
+18
View File
@@ -113,6 +113,23 @@ class PlatformState {
size_t UfdMapSize() const { return ufd_map_.size(); }
inline bool RegisterLibraryFunction(const hipKernel_t f) {
amd::ScopedLock lock(lock_);
if (library_functions_.find(f) == library_functions_.end()) {
library_functions_.insert(f);
return true;
}
return false;
}
inline bool UnregisterLibraryFunction(const hipKernel_t f) {
amd::ScopedLock lock(lock_);
if (library_functions_.find(f) != library_functions_.end()) {
library_functions_.erase(f);
return true;
}
return false;
}
private:
// Dynamic Code Object map, keyin module to get the corresponding object
std::unordered_map<hipModule_t, hip::DynCO*> dynCO_map_;
@@ -123,5 +140,6 @@ class PlatformState {
std::unordered_map<std::string, std::shared_ptr<UniqueFD>> ufd_map_; //!< Unique File Desc Map
void* dynamicLibraryHandle_{nullptr};
std::unordered_set<hipKernel_t> library_functions_;
};
} // namespace hip
@@ -2010,4 +2010,31 @@ hipError_t hipGraphExecExternalSemaphoresWaitNodeSetParams(
const hipExternalSemaphoreWaitNodeParams* nodeParams) {
return hip::GetHipDispatchTable()->hipGraphExecExternalSemaphoresWaitNodeSetParams_fn(
hGraphExec, hNode, nodeParams);
}
hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption** jitOptions,
void** jitOptionsValues, unsigned int numJitOptions,
hipLibraryOption** libraryOptions, void** libraryOptionValues,
unsigned int numLibraryOptions) {
return hip::GetHipDispatchTable()->hipLibraryLoadData_fn(
library, code, jitOptions, jitOptionsValues, numJitOptions, libraryOptions,
libraryOptionValues, numLibraryOptions);
}
hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions, hipLibraryOption** libraryOptions,
void** libraryOptionValues, unsigned int numLibraryOptions) {
return hip::GetHipDispatchTable()->hipLibraryLoadFromFile_fn(
library, fileName, jitOptions, jitOptionsValues, numJitOptions, libraryOptions,
libraryOptionValues, numLibraryOptions);
}
hipError_t hipLibraryUnload(hipLibrary_t library) {
return hip::GetHipDispatchTable()->hipLibraryUnload_fn(library);
}
hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name) {
return hip::GetHipDispatchTable()->hipLibraryGetKernel_fn(pKernel, library,
name);
}
hipError_t hipLibraryGetKernelCount(unsigned int *count, hipLibrary_t library) {
return hip::GetHipDispatchTable()->hipLibraryGetKernelCount_fn(count,
library);
}
@@ -65,3 +65,4 @@ endif()
add_subdirectory(synchronization)
add_subdirectory(launchBounds)
add_subdirectory(assertion)
add_subdirectory(library)
@@ -0,0 +1,20 @@
set(TEST_SRC
loadlib_rtc.cc
loadlib_co.cc
library_negative.cc
)
add_custom_target(library_code_load.code
COMMAND ${CMAKE_CXX_COMPILER} --genco ${CMAKE_CURRENT_SOURCE_DIR}/library_code_load.cc
-o ${CMAKE_CURRENT_BINARY_DIR}/../library/library_code_load.code ${OFFLOAD_ARCH_STR}
-I${HIP_PATH}/include/ -I${CMAKE_CURRENT_SOURCE_DIR}/../../include
--rocm-path=${ROCM_PATH})
set_property(GLOBAL APPEND PROPERTY
G_INSTALL_CUSTOM_TARGETS ${CMAKE_CURRENT_BINARY_DIR}/library_code_load.code)
hip_add_exe_to_target(NAME LibraryTests
TEST_SRC ${TEST_SRC}
TEST_TARGET_NAME build_tests
LINKER_LIBS hiprtc)
add_dependencies(LibraryTests library_code_load.code)
@@ -0,0 +1,16 @@
#include <hip/hip_runtime.h>
extern "C" {
__global__ void add_kernel(float* out, float* a, float* b) {
size_t i = threadIdx.x;
out[i] = a[i] + b[i];
}
__global__ void sub_kernel(float* out, float* a, float* b) {
size_t i = threadIdx.x;
out[i] = a[i] - b[i];
}
__global__ void mul_kernel(float* out, float* a, float* b) {
size_t i = threadIdx.x;
out[i] = a[i] * b[i];
}
}
@@ -0,0 +1,47 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
TEST_CASE("Unit_library_negative") {
SECTION("load negative") {
HIP_CHECK_ERROR(hipLibraryLoadData(nullptr, nullptr, nullptr, nullptr, 0, nullptr, nullptr, 0),
hipErrorInvalidValue);
HIP_CHECK_ERROR(
hipLibraryLoadFromFile(nullptr, nullptr, nullptr, nullptr, 0, nullptr, nullptr, 0),
hipErrorInvalidValue);
HIP_CHECK_ERROR(hipLibraryUnload(nullptr), hipErrorInvalidValue);
HIP_CHECK_ERROR(hipLibraryGetKernel(nullptr, nullptr, nullptr), hipErrorInvalidValue);
HIP_CHECK_ERROR(hipLibraryGetKernelCount(nullptr, nullptr), hipErrorInvalidValue);
}
SECTION("Load random code") {
const char* code = "call me ishmael"; // definitely not compile-able
hipLibrary_t lib;
hipKernel_t kernel;
// Default behavior is lazy load, so if we pass anything to it, it should pass
HIP_CHECK(hipLibraryLoadData(&lib, code, nullptr, nullptr, 0, nullptr, nullptr, 0));
// But this check will fail
HIP_CHECK_ERROR(hipLibraryGetKernel(&kernel, lib, "moby"), hipErrorInvalidImage);
HIP_CHECK(hipLibraryUnload(lib));
}
}
@@ -0,0 +1,137 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
TEST_CASE("Unit_hip_library_load_co") {
constexpr size_t size = 32;
std::vector<float> input1, input2;
input1.reserve(size);
input2.reserve(size);
for (size_t i = 0; i < size; i++) {
input1[i] = (i + 1) * 2;
input2[i] = i;
}
float *d_in1, *d_in2, *d_out;
HIP_CHECK(hipMalloc(&d_in1, sizeof(float) * size));
HIP_CHECK(hipMalloc(&d_in2, sizeof(float) * size));
HIP_CHECK(hipMalloc(&d_out, sizeof(float) * size));
HIP_CHECK(hipMemset(d_out, 0, sizeof(float) * size));
HIP_CHECK(hipMemcpy(d_in1, input1.data(), sizeof(float) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_in2, input2.data(), sizeof(float) * size, hipMemcpyHostToDevice));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
std::string lib_co = "library_code_load.code";
SECTION("One Kernel") {
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(
hipLibraryLoadFromFile(&library, lib_co.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "add_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 3);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] + input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
SECTION("Two Kernel") {
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(
hipLibraryLoadFromFile(&library, lib_co.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "sub_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 3);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] - input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
SECTION("Three Kernel") {
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(
hipLibraryLoadFromFile(&library, lib_co.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "mul_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 3);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] * input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(d_in1));
HIP_CHECK(hipFree(d_in2));
HIP_CHECK(hipFree(d_out));
}
@@ -0,0 +1,173 @@
/*
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <hip_test_common.hh>
#include <hip/hiprtc.h>
#include <string>
#include <vector>
static std::vector<char> compile_using_hiprtc(const std::string& code, std::string gpu_arch) {
hiprtcProgram prog;
HIPRTC_CHECK(hiprtcCreateProgram(&prog, code.c_str(), "code.cu", 0, NULL, NULL));
std::string offload_arch = "--offload-arch=" + gpu_arch;
const char* opts[] = {offload_arch.c_str()};
HIPRTC_CHECK(hiprtcCompileProgram(prog, 1, opts));
size_t size;
HIPRTC_CHECK(hiprtcGetCodeSize(prog, &size));
std::vector<char> res(size, 0);
HIPRTC_CHECK(hiprtcGetCode(prog, res.data()));
HIPRTC_CHECK(hiprtcDestroyProgram(&prog));
return res;
}
TEST_CASE("Unit_hip_library_load_rtc") {
constexpr size_t size = 32;
const std::string kernel1 =
"extern \"C\" __global__ void add_kernel(float* out, float*a, float*b) { size_t i = "
"threadIdx.x; out[i] = a[i] + b[i]; }\n";
const std::string kernel2 =
"extern \"C\" __global__ void sub_kernel(float* out, float*a, float*b) { size_t i = "
"threadIdx.x; out[i] = a[i] - b[i]; }\n";
const std::string kernel3 =
"extern \"C\" __global__ void mul_kernel(float* out, float*a, float*b) { size_t i = "
"threadIdx.x; out[i] = a[i] * b[i]; }\n";
hipDeviceProp_t prop;
HIP_CHECK(hipGetDeviceProperties(&prop, 0));
std::string gpu_arch = prop.gcnArchName;
std::vector<float> input1, input2;
input1.reserve(size);
input2.reserve(size);
for (size_t i = 0; i < size; i++) {
input1[i] = (i + 1) * 2;
input2[i] = i;
}
float *d_in1, *d_in2, *d_out;
HIP_CHECK(hipMalloc(&d_in1, sizeof(float) * size));
HIP_CHECK(hipMalloc(&d_in2, sizeof(float) * size));
HIP_CHECK(hipMalloc(&d_out, sizeof(float) * size));
HIP_CHECK(hipMemset(d_out, 0, sizeof(float) * size));
HIP_CHECK(hipMemcpy(d_in1, input1.data(), sizeof(float) * size, hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(d_in2, input2.data(), sizeof(float) * size, hipMemcpyHostToDevice));
hipStream_t stream;
HIP_CHECK(hipStreamCreate(&stream));
SECTION("One Kernel") {
auto kernel = kernel1;
auto code = compile_using_hiprtc(kernel, gpu_arch);
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(hipLibraryLoadData(&library, code.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "add_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 1);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] + input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
SECTION("Two Kernel") {
auto kernel = kernel1 + kernel2;
auto code = compile_using_hiprtc(kernel, gpu_arch);
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(hipLibraryLoadData(&library, code.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "sub_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 2);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] - input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
SECTION("Three Kernel") {
auto kernel = kernel1 + kernel2 + kernel3;
auto code = compile_using_hiprtc(kernel, gpu_arch);
hipLibrary_t library;
hipKernel_t function;
HIP_CHECK(hipLibraryLoadData(&library, code.data(), nullptr, nullptr, 0, nullptr, nullptr, 0));
HIP_CHECK(hipLibraryGetKernel(&function, library, "mul_kernel"));
unsigned int count = 0;
HIP_CHECK(hipLibraryGetKernelCount(&count, library));
REQUIRE(count == 3);
void* args[] = {&d_out, &d_in1, &d_in2};
HIP_CHECK(hipLaunchKernel(function, 1, size, args, 0, stream));
HIP_CHECK(hipStreamSynchronize(stream));
HIP_CHECK(hipLibraryUnload(library));
std::vector<float> out(size, 0);
HIP_CHECK(hipMemcpy(out.data(), d_out, sizeof(float) * size, hipMemcpyDeviceToHost));
for (size_t i = 0; i < size; i++) {
float tmp = input1[i] * input2[i];
INFO("Index: " << i << " cpu res: " << tmp << " gpu res: " << out[i]);
REQUIRE(out[i] == tmp);
}
}
HIP_CHECK(hipStreamDestroy(stream));
HIP_CHECK(hipFree(d_in1));
HIP_CHECK(hipFree(d_in2));
HIP_CHECK(hipFree(d_out));
}
@@ -692,6 +692,8 @@ typedef struct hipIpcEventHandle_st {
typedef struct ihipModule_t* hipModule_t;
typedef struct ihipModuleSymbol_t* hipFunction_t;
typedef struct ihipLinkState_t* hipLinkState_t;
typedef struct ihipLibrary_t* hipLibrary_t;
typedef struct ihipKernel_t* hipKernel_t;
/**
* HIP memory pool
*/
@@ -6346,6 +6348,69 @@ hipError_t hipModuleGetFunction(hipFunction_t* function, hipModule_t module, con
*/
hipError_t hipModuleGetFunctionCount(unsigned int* count, hipModule_t mod);
/**
* @brief Load hip Library from inmemory object
*
* @param [out] library Output Library
* @param [in] code In memory object
* @param [in] jitOptions JIT options, CUDA only
* @param [in] jitOptionsValues JIT options values, CUDA only
* @param [in] numJitOptions Number of JIT options
* @param [in] libraryOptions Library options
* @param [in] libraryOptionValues Library options values
* @param [in] numLibraryOptions Number of library options
* @return #hipSuccess, #hipErrorInvalidValue,
*/
hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code, hipJitOption** jitOptions,
void** jitOptionsValues, unsigned int numJitOptions,
hipLibraryOption** libraryOptions, void** libraryOptionValues,
unsigned int numLibraryOptions);
/**
* @brief Load hip Library from file
*
* @param [out] library Output Library
* @param [in] fileName file which contains code object
* @param [in] jitOptions JIT options, CUDA only
* @param [in] jitOptionsValues JIT options values, CUDA only
* @param [in] numJitOptions Number of JIT options
* @param [in] libraryOptions Library options
* @param [in] libraryOptionValues Library options values
* @param [in] numLibraryOptions Number of library options
* @return #hipSuccess, #hipErrorInvalidValue
*/
hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions, hipLibraryOption** libraryOptions,
void** libraryOptionValues, unsigned int numLibraryOptions);
/**
* @brief Unload HIP Library
*
* @param [in] library Input created hip library
* @return #hipSuccess, #hipErrorInvalidValue
*/
hipError_t hipLibraryUnload(hipLibrary_t library);
/**
* @brief Get Kernel object from library
*
* @param [out] pKernel Output kernel object
* @param [in] library Input hip library
* @param [in] name kernel name to be searched for
* @return #hipSuccess, #hipErrorInvalidValue
*/
hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library, const char* name);
/**
* @brief Get Kernel count in library
*
* @param [out] count Count of kernels in library
* @param [in] library Input created hip library
* @return #hipSuccess, #hipErrorInvalidValue
*/
hipError_t hipLibraryGetKernelCount(unsigned int *count, hipLibrary_t library);
/**
* @brief Find out attributes for a given function.
* @ingroup Execution
+5
View File
@@ -121,6 +121,11 @@ typedef enum hipJitFallback {
hipJitPreferBinary,
} hipJitFallback;
typedef enum hipLibraryOption_e {
hipLibraryHostUniversalFunctionAndDataTable = 0,
hipLibraryBinaryIsPreserved = 1
} hipLibraryOption;
// doxygen end LinkerTypes
/**
* @}
@@ -482,6 +482,7 @@ typedef enum cudaSharedMemConfig hipSharedMemConfig;
typedef CUfunc_cache hipFuncCache;
typedef CUjitInputType hipJitInputType;
typedef CUjit_option hipJitOption;
typedef enum cudaLibraryOption hipLibraryOption;
typedef CUdevice hipDevice_t;
typedef enum cudaDeviceP2PAttr hipDeviceP2PAttr;
#define hipDevP2PAttrPerformanceRank cudaDevP2PAttrPerformanceRank
@@ -495,6 +496,8 @@ typedef CUlinkState hipLinkState_t;
typedef CUmodule hipModule_t;
typedef CUfunction hipFunction_t;
typedef CUdeviceptr hipDeviceptr_t;
typedef cudaLibrary_t hipLibrary_t;
typedef cudaKernel_t hipKernel_t;
typedef struct cudaArray* hipArray_t;
typedef struct cudaArray* hipArray_const_t;
typedef struct cudaFuncAttributes hipFuncAttributes;
@@ -3624,6 +3627,41 @@ inline static hipError_t hipModuleLoadDataEx(hipModule_t* module, const void* im
cuModuleLoadDataEx(module, image, numOptions, options, optionValues));
}
inline static hipError_t hipLibraryLoadData(hipLibrary_t* library, const void* code,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions,
hipLibraryOption** libraryOptions,
void** libraryOptionValues,
unsigned int numLibraryOptions) {
return hipCUResultTohipError(cudaLibraryLoadData(library, code, jitOptions, jitOptionsValues,
numJitOptions, libraryOptions,
libraryOptionValues, numLibraryOptions));
}
inline static hipError_t hipLibraryLoadFromFile(hipLibrary_t* library, const char* fileName,
hipJitOption** jitOptions, void** jitOptionsValues,
unsigned int numJitOptions,
hipLibraryOption** libraryOptions,
void** libraryOptionValues,
unsigned int numLibraryOptions) {
return hipCUResultTohipError(
cudaLibraryLoadFromFile(library, fileName, jitOptions, jitOptionsValues, numJitOptions,
libraryOptions, libraryOptionValues, numLibraryOptions));
}
inline static hipError_t hipLibraryUnload(hipLibrary_t library) {
return hipCUResultTohipError(cudaLibraryUnload(library));
}
inline static hipError_t hipLibraryGetKernel(hipKernel_t* pKernel, hipLibrary_t library,
const char* name) {
return hipCUResultTohipError(cudaLibraryGetKernel(pKernel, library, name));
}
inline static hipError_t hipLibraryGetKernelCount(unsigned int* count, hipLibrary_t library) {
return hipCUResultTohipError(cudaLibraryGetKernelCount(count, library));
}
inline static hipError_t hipLaunchKernel(const void* function_address, dim3 numBlocks,
dim3 dimBlocks, void** args, size_t sharedMemBytes,
hipStream_t stream) {
@@ -974,6 +974,13 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemPrefetchAsync_v2)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId)
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 15
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadData)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadFromFile)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryUnload)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernel)
ROCPROFILER_ENUM_LABEL(ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernelCount)
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 442);
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
@@ -1004,6 +1011,8 @@ static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 477);
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 477);
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 14
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 496);
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 15
static_assert(ROCPROFILER_HIP_RUNTIME_API_ID_LAST == 501);
#else
# if !defined(ROCPROFILER_UNSAFE_NO_VERSION_CHECK) && \
(defined(ROCPROFILER_CI) && ROCPROFILER_CI > 0)
@@ -3285,6 +3285,45 @@ typedef union rocprofiler_hip_api_args_t
unsigned long long* streamId;
} hipStreamGetId;
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 15
struct
{
hipLibrary_t* library;
const void* code;
hipJitOption** jitOptions;
void** jitOptionsValues;
unsigned int numJitOptions;
hipLibraryOption** libraryOptions;
void** libraryOptionValues;
unsigned int numLibraryOptions;
} hipLibraryLoadData;
struct
{
hipLibrary_t* library;
const char* fileName;
hipJitOption** jitOptions;
void** jitOptionsValues;
unsigned int numJitOptions;
hipLibraryOption** libraryOptions;
void** libraryOptionValues;
unsigned int numLibraryOptions;
} hipLibraryLoadFromFile;
struct
{
hipLibrary_t library;
} hipLibraryUnload;
struct
{
hipKernel_t* pKernel;
hipLibrary_t library;
const char* name;
} hipLibraryGetKernel;
struct
{
unsigned int* count;
hipLibrary_t library;
} hipLibraryGetKernelCount;
#endif
} rocprofiler_hip_api_args_t;
ROCPROFILER_EXTERN_C_FINI
@@ -551,6 +551,13 @@ typedef enum rocprofiler_hip_runtime_api_id_t // NOLINT(performance-enum-size)
ROCPROFILER_HIP_RUNTIME_API_ID_hipMemPrefetchAsync_v2,
ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2,
ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId,
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 15
ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadData,
ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadFromFile,
ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryUnload,
ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernel,
ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernelCount,
#endif
ROCPROFILER_HIP_RUNTIME_API_ID_LAST,
} rocprofiler_hip_runtime_api_id_t;
@@ -593,6 +593,14 @@ ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipMemAdvise_v2_fn, 494);
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipStreamGetId_fn, 495);
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 15
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipLibraryLoadData_fn, 496);
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipLibraryLoadFromFile_fn, 497);
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipLibraryUnload_fn, 498);
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipLibraryGetKernel_fn, 499);
ROCP_SDK_ENFORCE_ABI(::HipDispatchTable, hipLibraryGetKernelCount_fn, 500);
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION == 0
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 442)
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 1
@@ -623,6 +631,8 @@ ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 477)
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 477)
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 14
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 496)
#elif HIP_RUNTIME_API_TABLE_STEP_VERSION == 15
ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 501)
#else
INTERNAL_CI_ROCP_SDK_ENFORCE_ABI_VERSIONING(::HipDispatchTable, 0)
#endif
@@ -619,6 +619,14 @@ HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNT
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipMemAdvise_v2, hipMemAdvise_v2, hipMemAdvise_v2_fn, dev_ptr, count, advice, location);
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipStreamGetId, hipStreamGetId, hipStreamGetId_fn, stream, streamId);
#endif
#if HIP_RUNTIME_API_TABLE_STEP_VERSION >= 15
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadData, hipLibraryLoadData, hipLibraryLoadData_fn, library, code, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions);
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryLoadFromFile, hipLibraryLoadFromFile, hipLibraryLoadFromFile_fn, library, fileName, jitOptions, jitOptionsValues, numJitOptions, libraryOptions, libraryOptionValues, numLibraryOptions);
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryUnload, hipLibraryUnload, hipLibraryUnload_fn, library);
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernel, hipLibraryGetKernel, hipLibraryGetKernel_fn, pKernel, library, name);
HIP_API_INFO_DEFINITION_V(ROCPROFILER_HIP_TABLE_ID_Runtime, ROCPROFILER_HIP_RUNTIME_API_ID_hipLibraryGetKernelCount, hipLibraryGetKernelCount, hipLibraryGetKernelCount_fn, count, library);
#endif
// clang-format on
#else