Added support for executable and symbols for data structures

- symbol handle is added to hipFunction
- executable handle is added to hipModule
- This way, the APIs doesn't need to track the values

Change-Id: I7cf05329cf79fe946319d7746bd9f5503268fda4
Tento commit je obsažen v:
Aditya Atluri
2016-08-19 08:49:34 -05:00
rodič 8f19a51521
revize 9eaabb507c
2 změnil soubory, kde provedl 87 přidání a 43 odebrání
+8 -2
Zobrazit soubor
@@ -52,9 +52,15 @@ typedef struct ihipDevice_t *hipDevice_t;
typedef struct ihipStream_t *hipStream_t;
typedef uint64_t hipFunction;
typedef struct {
uint64_t kernel;
uint64_t symbol;
}hipFunction;
typedef uint64_t hipModule;
typedef struct{
uint64_t executable;
uint64_t object;
} hipModule;
typedef struct hipEvent_t {
struct ihipEvent_t *_handle;
+79 -41
Zobrazit soubor
@@ -20,6 +20,7 @@ THE SOFTWARE.
#include "hip_runtime.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/amd_hsa_kernel_code.h"
#include "hcc_detail/hip_hcc.h"
#include "hcc_detail/trace_helper.h"
#include <fstream>
@@ -27,66 +28,71 @@ THE SOFTWARE.
//TODO Use Pool APIs from HCC to get memory regions.
namespace hipdrv{
hsa_status_t findSystemRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if(segment_id != HSA_REGION_SEGMENT_GLOBAL){
return HSA_STATUS_SUCCESS;
}
hsa_status_t findSystemRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
hsa_region_global_flag_t flags;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if(segment_id != HSA_REGION_SEGMENT_GLOBAL){
return HSA_STATUS_SUCCESS;
}
hsa_region_t *reg = (hsa_region_t*)data;
hsa_region_global_flag_t flags;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){
*reg = region;
}
hsa_region_t *reg = (hsa_region_t*)data;
return HSA_STATUS_SUCCESS;
}
if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){
*reg = region;
}
hsa_status_t findKernArgRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
return HSA_STATUS_SUCCESS;
}
if(segment_id != HSA_REGION_SEGMENT_GLOBAL){
return HSA_STATUS_SUCCESS;
}
hsa_status_t findKernArgRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
hsa_region_global_flag_t flags;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if(segment_id != HSA_REGION_SEGMENT_GLOBAL){
return HSA_STATUS_SUCCESS;
}
hsa_region_t *reg = (hsa_region_t*)data;
hsa_region_global_flag_t flags;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){
*reg = region;
}
hsa_region_t *reg = (hsa_region_t*)data;
return HSA_STATUS_SUCCESS;
}
if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){
*reg = region;
}
return HSA_STATUS_SUCCESS;
}
}
} // End namespace hipdrv
hipError_t hipModuleLoad(hipModule *module, const char *fname){
HIP_INIT_API(fname);
hipError_t ret = hipSuccess;
if(module == NULL){
ret = hipErrorInvalidValue;
return hipErrorInvalidValue;
}
auto ctx = ihipGetTlsDefaultCtx();
if(ctx == nullptr){
ret = hipErrorInvalidContext;
}else{
int deviceId = ctx->getDevice()->_deviceId;
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
std::ifstream in(fname, std::ios::binary | std::ios::ate);
if(!in){
return hipErrorFileNotFound;
}else{
size_t size = std::string::size_type(in.tellg());
void *p = NULL;
@@ -97,22 +103,35 @@ hipError_t hipModuleLoad(hipModule *module, const char *fname){
if(status != HSA_STATUS_SUCCESS){
return hipErrorOutOfMemory;
}
char *ptr = (char*)p;
if(!ptr){
return hipErrorOutOfMemory;
std::cout<<"Error: failed to allocate memory for code object"<<std::endl;
}
hsa_code_object_t obj;
in.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(in),
std::istreambuf_iterator<char>(), ptr);
status = hsa_code_object_deserialize(ptr, size, NULL, &obj);
if(status != HSA_STATUS_SUCCESS){
return hipErrorSharedObjectInitFailed;
}
*module = obj.handle;
module->object = obj.handle;
hsa_executable_t executable;
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable);
if(status != HSA_STATUS_SUCCESS){
return hipErrorNotInitialized;
}
module->executable = executable.handle;
}
}
return ret;
}
@@ -120,41 +139,52 @@ hipError_t hipModuleGetFunction(hipFunction *func, hipModule hmod, const char *n
HIP_INIT_API(name);
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;
if(name == nullptr || hmod == 0){
if(name == nullptr){
return hipErrorInvalidValue;
}
if(ctx == nullptr){
ret = hipErrorInvalidContext;
}else{
int deviceId = ctx->getDevice()->_deviceId;
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
hsa_agent_t gpuAgent = (hsa_agent_t)currentDevice->_hsaAgent;
hsa_status_t status;
hsa_executable_symbol_t kernel_symbol;
hsa_executable_t executable;
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable);
hsa_executable_symbol_t kernel_symbol;
executable.handle = hmod.executable;
if(status != HSA_STATUS_SUCCESS){
return hipErrorNotInitialized;
}
hsa_code_object_t obj;
obj.handle = hmod;
obj.handle = hmod.object;
status = hsa_executable_load_code_object(executable, gpuAgent, obj, NULL);
if(status != HSA_STATUS_SUCCESS){
return hipErrorNotInitialized;
}
status = hsa_executable_freeze(executable, NULL);
status = hsa_executable_get_symbol(executable, NULL, name, gpuAgent, 0, &kernel_symbol);
if(status != HSA_STATUS_SUCCESS){
return hipErrorNotFound;
}
status = hsa_executable_symbol_get_info(kernel_symbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
func);
&func->kernel);
if(status != HSA_STATUS_SUCCESS){
return hipErrorNotFound;
}
func->symbol = kernel_symbol.handle;
}
return ret;
}
@@ -163,11 +193,13 @@ hipError_t hipLaunchModuleKernel(hipFunction f,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra){
HIP_INIT_API(f);
HIP_INIT_API(f.kernel);
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;
if(ctx == nullptr){
ret = hipErrorInvalidDevice;
}else{
int deviceId = ctx->getDevice()->_deviceId;
ihipDevice_t *currentDevice = ihipGetDevice(deviceId);
@@ -194,12 +226,15 @@ Kernel argument preparation.
hsa_status_t status = hsa_agent_iterate_regions(gpuAgent, hipdrv::findKernArgRegions, &kernArg);
void *kern;
status = hsa_memory_allocate(kernArg, kernSize, &kern);
if(status != HSA_STATUS_SUCCESS){
return hipErrorLaunchOutOfResources;
}
memcpy(kern, config[1], kernSize);
/*
Pre kernel launch
@@ -215,6 +250,10 @@ Pre kernel launch
hsa_signal_t signal;
status = hsa_signal_create(1, 0, NULL, &signal);
// hsa_memory_pool_t pool = av->get_hsa_kernarg_region();
// status = hsa_amd_memory_pool_allocate(pool, kernSize, 0, &kernArg);
// assert(status == HSA_STATUS_SUCCESS);
/*
Creating the packets
*/
@@ -230,17 +269,15 @@ Creating the packets
dispatch_packet->grid_size_x = blockDimX * gridDimX;
dispatch_packet->grid_size_y = blockDimY * gridDimY;
dispatch_packet->grid_size_z = blockDimZ * gridDimZ;
dispatch_packet->group_segment_size = 0;
dispatch_packet->private_segment_size = sharedMemBytes;
dispatch_packet->kernarg_address = kern;
dispatch_packet->kernel_object = f;
dispatch_packet->kernel_object = f.kernel;
uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
uint16_t setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
uint32_t header32 = header | (setup << 16);
@@ -250,5 +287,6 @@ Creating the packets
hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index);
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
}
return ret;
}