Added kernel compilation driver apis
1. Added 2 new driver apis, hipModuleLoad, hipModuleGetFunction Change-Id: If464a7fad178121e3da791c7ac9e17ebc01a9cd0 Issues: When a sample written with them shows Aborted (core dumped) when exiting
Этот коммит содержится в:
@@ -165,7 +165,8 @@ if(HIP_PLATFORM STREQUAL "hcc")
|
||||
src/hip_peer.cpp
|
||||
src/hip_stream.cpp
|
||||
src/hip_fp16.cpp
|
||||
src/unpinned_copy_engine.cpp)
|
||||
src/unpinned_copy_engine.cpp
|
||||
src/hip_module.cpp)
|
||||
|
||||
if(${HIP_USE_SHARED_LIBRARY} EQUAL 1)
|
||||
add_library(hip_hcc SHARED ${SOURCE_FILES})
|
||||
|
||||
+1
-1
@@ -258,7 +258,7 @@ if ($needHipHcc) {
|
||||
if ($HIP_USE_SHARED_LIBRARY) {
|
||||
$HIPLDFLAGS .= " -L$HIP_PATH/lib -Wl,--rpath=$HIP_PATH/lib -lhip_hcc";
|
||||
} else {
|
||||
$HIPLDFLAGS .= " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/unpinned_copy_engine.cpp.o $HIP_PATH/lib/hip_ldg.cpp.o $HIP_PATH/lib/hip_fp16.cpp.o $HIP_PATH/lib/hip_context.cpp.o";
|
||||
$HIPLDFLAGS .= " $HIP_PATH/lib/device_util.cpp.o $HIP_PATH/lib/hip_device.cpp.o $HIP_PATH/lib/hip_error.cpp.o $HIP_PATH/lib/hip_event.cpp.o $HIP_PATH/lib/hip_hcc.cpp.o $HIP_PATH/lib/hip_memory.cpp.o $HIP_PATH/lib/hip_peer.cpp.o $HIP_PATH/lib/hip_stream.cpp.o $HIP_PATH/lib/unpinned_copy_engine.cpp.o $HIP_PATH/lib/hip_ldg.cpp.o $HIP_PATH/lib/hip_fp16.cpp.o $HIP_PATH/lib/hip_context.cpp.o $HIP_PATH/lib/hip_module.cpp.o";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -77,7 +77,6 @@ class ihipStream_t;
|
||||
class ihipDevice_t;
|
||||
class ihipCtx_t;
|
||||
|
||||
|
||||
// Color defs for debug messages:
|
||||
#define KNRM "\x1B[0m"
|
||||
#define KRED "\x1B[31m"
|
||||
@@ -397,8 +396,6 @@ public:
|
||||
typedef ihipStreamCriticalBase_t<StreamMutex> ihipStreamCritical_t;
|
||||
typedef LockedAccessor<ihipStreamCritical_t> LockedAccessor_StreamCrit_t;
|
||||
|
||||
|
||||
|
||||
// Internal stream structure.
|
||||
class ihipStream_t {
|
||||
public:
|
||||
@@ -660,8 +657,6 @@ extern void ihipSetTs(hipEvent_t e);
|
||||
|
||||
hipStream_t ihipSyncAndResolveStream(hipStream_t);
|
||||
|
||||
|
||||
|
||||
// Stream printf functions:
|
||||
inline std::ostream& operator<<(std::ostream& os, const ihipStream_t& s)
|
||||
{
|
||||
|
||||
@@ -51,6 +51,11 @@ typedef struct ihipCtx_t *hipCtx_t;
|
||||
typedef struct ihipDevice_t *hipDevice_t;
|
||||
|
||||
typedef struct ihipStream_t *hipStream_t;
|
||||
|
||||
typedef uint64_t hipFunction;
|
||||
|
||||
typedef uint64_t hipModule;
|
||||
|
||||
typedef struct hipEvent_t {
|
||||
struct ihipEvent_t *_handle;
|
||||
} hipEvent_t;
|
||||
@@ -1085,6 +1090,21 @@ hipError_t hipDeviceGetFromId(hipDevice_t *device, int deviceId);
|
||||
hipError_t hipDriverGetVersion(int *driverVersion) ;
|
||||
|
||||
|
||||
hipError_t hipModuleLoad(hipModule *module, const char *fname);
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction *function, hipModule module, const char *kname);
|
||||
|
||||
hipError_t hipDrvLaunchKernel(hipFunction f,
|
||||
unsigned int gridDimX,
|
||||
unsigned int gridDimY,
|
||||
unsigned int gridDimZ,
|
||||
unsigned int blockDimX,
|
||||
unsigned int blockDimY,
|
||||
unsigned int blockDimZ,
|
||||
unsigned int sharedMemBytes,
|
||||
hipStream_t stream,
|
||||
void **kernelParams,
|
||||
void **extra) __attribute__((deprecated("kernelParams is not fully supported, use extra instead"))) ;
|
||||
|
||||
// doxygen end Version Management
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
#include "hip_runtime.h"
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "hcc_detail/hip_hcc.h"
|
||||
#include "hcc_detail/trace_helper.h"
|
||||
#include <fstream>
|
||||
|
||||
hipError_t hipModuleLoad(hipModule *module, const char *fname){
|
||||
HIP_INIT_API(fname);
|
||||
hipError_t ret = hipSuccess;
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
if(ctx == nullptr){
|
||||
ret = hipErrorInvalidDevice;
|
||||
}else{
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
hc::accelerator acc = currentDevice->_acc;
|
||||
std::ifstream in(fname, std::ios::binary | std::ios::ate);
|
||||
if(!in){
|
||||
std::cout<<"Couldn't read file "<<fname<<std::endl;
|
||||
}else{
|
||||
size_t size = std::string::size_type(in.tellg());
|
||||
void *p = NULL;
|
||||
hsa_amd_memory_pool_t *pool = (hsa_amd_memory_pool_t*)acc.get_hsa_am_system_region();
|
||||
hsa_status_t status = hsa_amd_memory_pool_allocate(*pool, size, 0, (void**)&p);
|
||||
assert(status = HSA_STATUS_SUCCESS);
|
||||
char *ptr = (char*)p;
|
||||
if(!ptr){
|
||||
std::cout<<"Error: failed to allocate memory for code object"<<std::endl;
|
||||
}
|
||||
in.seekg(0, std::ios::beg);
|
||||
std::copy(std::istreambuf_iterator<char>(in),
|
||||
std::istreambuf_iterator<char>(), ptr);
|
||||
hsa_code_object_t obj;
|
||||
status = hsa_code_object_deserialize(ptr, size, NULL, &obj);
|
||||
*module = obj.handle;
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetFunction(hipFunction *func, hipModule hmod, const char *name){
|
||||
HIP_INIT_API(name);
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
if(ctx == nullptr){
|
||||
ret = hipErrorInvalidDevice;
|
||||
}else{
|
||||
int deviceId = ctx->getDevice()->_deviceId;
|
||||
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
|
||||
hc::accelerator acc = currentDevice->_acc;
|
||||
hsa_agent_t *gpuAgent = (hsa_agent_t*)acc.get_hsa_agent();
|
||||
|
||||
assert(gpuAgent != NULL);
|
||||
hsa_status_t status;
|
||||
hsa_executable_symbol_t kernel_symbol;
|
||||
hsa_executable_t executable;
|
||||
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
hsa_code_object_t obj;
|
||||
obj.handle = hmod;
|
||||
status = hsa_executable_load_code_object(executable, *gpuAgent, obj, NULL);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_executable_freeze(executable, NULL);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_executable_get_symbol(executable, NULL, name, *gpuAgent, 0, &kernel_symbol);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
status = hsa_executable_symbol_get_info(kernel_symbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
func);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
Ссылка в новой задаче
Block a user