Module test correction and hipModuleUnload API

- Corrected the hipModule.cpp test to minimal code
- Added hipModuleUnload API
- Added hipModuleUnload API test

Change-Id: I9c40337043d7972a570b795e1bfc104bd2c4d8aa
This commit is contained in:
Aditya Atluri
2016-08-23 14:19:15 -05:00
parent f9d49c2aed
commit 2287af23a1
4 changed files with 65 additions and 188 deletions
+2
View File
@@ -1113,6 +1113,8 @@ hipError_t hipDriverGetVersion(int *driverVersion) ;
hipError_t hipModuleLoad(hipModule *module, const char *fname);
hipError_t hipModuleUnload(hipModule module);
hipError_t hipModuleGetFunction(hipFunction *function, hipModule module, const char *kname);
hipError_t hipLaunchModuleKernel(hipFunction f,
+13
View File
@@ -116,6 +116,19 @@ hipError_t hipModuleLoad(hipModule *module, const char *fname){
return ret;
}
hipError_t hipModuleUnload(hipModule hmod){
hsa_executable_t exec;
hsa_code_object_t co;
hipError_t ret = hipSuccess;
exec.handle = hmod.executable;
co.handle = hmod.object;
hsa_status_t status = hsa_executable_destroy(exec);
if(status != HSA_STATUS_SUCCESS){ret = hipErrorInvalidValue; }
status = hsa_code_object_destroy(co);
if(status != HSA_STATUS_SUCCESS){ret = hipErrorInvalidValue; }
return ret;
}
hipError_t hipModuleGetFunction(hipFunction *func, hipModule hmod, const char *name){
HIP_INIT_API(name);
auto ctx = ihipGetTlsDefaultCtx();
+19 -188
View File
@@ -1,3 +1,22 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include<hip_runtime.h>
#include<hip_runtime_api.h>
#include<iostream>
@@ -10,152 +29,6 @@
#define LEN 64
#define SIZE LEN<<2
typedef hsa_code_object_t hipmodule;
typedef uint64_t hipfunction;
typedef unsigned int hipDevicePtr;
hsa_region_t systemRegion;
hsa_region_t kernArgRegion;
hsa_agent_t gpuAgent;
hsa_queue_t *Queue;
hsa_signal_t signal;
hsa_status_t findGpu(hsa_agent_t agent, void *data){
hsa_device_type_t device_type;
hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
if(hsa_error_code != HSA_STATUS_SUCCESS){return hsa_error_code;}
if(device_type == HSA_DEVICE_TYPE_GPU){
gpuAgent = agent;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t FindRegions(hsa_region_t region, void *data){
hsa_region_segment_t segment_id;
hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id);
if (segment_id != HSA_REGION_SEGMENT_GLOBAL) {
return HSA_STATUS_SUCCESS;
}
hsa_region_global_flag_t flags;
hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags);
if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){
systemRegion = region;
}
if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){
kernArgRegion = region;
}
return HSA_STATUS_SUCCESS;
}
hipError_t ihipModuleLoad(hipmodule *module, const char *fname){
std::ifstream in(fname, std::ios::binary | std::ios::ate);
hipError_t ret = hipSuccess;
if(!in){
std::cout<<"Couldn't read file "<<fname<<std::endl;
ret = hipErrorInvalidValue;
}else{
size_t size = std::string::size_type(in.tellg());
void *p = NULL;
hsa_status_t status = hsa_memory_allocate(systemRegion, size, (void**)&p);
assert(status == HSA_STATUS_SUCCESS);
char *ptr = (char*)p;
if(!ptr){
std::cout<<"Error: failed to allocate memory for code object."<<std::endl;
}
in.seekg(0, std::ios::beg);
std::copy(std::istreambuf_iterator<char>(in),
std::istreambuf_iterator<char>(), ptr);
status = hsa_code_object_deserialize(ptr, size, NULL, module);
if (status != HSA_STATUS_SUCCESS) { std::cout<<"Failed to deserialize code object"<<std::endl; }
std::cout<<"File Read!"<<std::endl;
}
return ret;
}
hipError_t ihipModuleGetFunction(hipfunction *func, hipmodule hmod, const char *name){
hsa_status_t status;
hsa_executable_symbol_t kernel_symbol;
hsa_executable_t executable;
status = hsa_executable_create(HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, NULL, &executable);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_executable_load_code_object(executable, gpuAgent, hmod, NULL);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_executable_freeze(executable, NULL);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_executable_get_symbol(executable, NULL, name, gpuAgent, 0, &kernel_symbol);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_executable_symbol_get_info(kernel_symbol,
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
func);
assert(status == HSA_STATUS_SUCCESS);
return hipSuccess;
}
void hsaInit(){
hsa_status_t status;
uint32_t queue_size;
status = hsa_iterate_agents(findGpu, NULL);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_agent_iterate_regions(gpuAgent, FindRegions, NULL);
assert(status == HSA_STATUS_SUCCESS);
char agent_name[64];
status = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_NAME, agent_name);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_agent_get_info(gpuAgent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_queue_create(gpuAgent, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX, UINT32_MAX, &Queue);
assert(status == HSA_STATUS_SUCCESS);
status = hsa_signal_create(1, 0, NULL, &signal);
assert(status == HSA_STATUS_SUCCESS);
std::cout<<agent_name<<std::endl;
}
hipError_t hipDrvLaunchKernel(hipfunction f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, uint32_t size, void **extra){
void *kernarg;
std::vector<void*>Args;
void ***newP = (void***)kernelParams;
for(uint32_t i=0;i<size/sizeof(void*);i++){
Args.push_back((void*)*newP[i]);
}
hsa_status_t status = hsa_memory_allocate(kernArgRegion, size, &kernarg);
memcpy(kernarg, Args.data(), size);
const uint32_t queue_mask = Queue->size -1;
uint32_t packet_index = hsa_queue_load_write_index_relaxed(Queue);
hsa_kernel_dispatch_packet_t *dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(Queue->base_address))[packet_index & queue_mask]);
dispatch_packet->completion_signal = signal;
dispatch_packet->workgroup_size_x = blockDimX;
dispatch_packet->workgroup_size_y = blockDimY;
dispatch_packet->workgroup_size_z = blockDimZ;
dispatch_packet->grid_size_x = blockDimX * gridDimX;
dispatch_packet->grid_size_y = blockDimY * gridDimY;
dispatch_packet->grid_size_z = blockDimZ * gridDimZ;
dispatch_packet->group_segment_size = 0;
dispatch_packet->private_segment_size = sharedMemBytes;
dispatch_packet->kernarg_address = kernarg;
dispatch_packet->kernel_object = (uint64_t)f;
uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
uint16_t setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS;
uint32_t header32 = header | (setup << 16);
__atomic_store_n((uint32_t*)(dispatch_packet), header32, __ATOMIC_RELEASE);
hsa_queue_store_write_index_relaxed(Queue, packet_index+1);
hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index);
hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
return hipSuccess;
}
#define fileName "vcpy_isa.co"
#define kernel_name "hello_world"
@@ -164,36 +37,8 @@ __global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){
Bd[tx] = Ad[tx];
}
amd_kernel_code_t* getAkc(uint64_t handle){
bool ext_supported = false;
hsa_status_t status = hsa_system_extension_supported(
HSA_EXTENSION_AMD_LOADER, 1, 0, &ext_supported);
assert(HSA_STATUS_SUCCESS == status);
assert(true == ext_supported);
hsa_ven_amd_loader_1_00_pfn_t ext_table = {nullptr};
status = hsa_system_get_extension_table(
HSA_EXTENSION_AMD_LOADER, 1, 0, &ext_table);
assert(HSA_STATUS_SUCCESS == status);
assert(nullptr != ext_table.hsa_ven_amd_loader_query_host_address);
std::cout<<"Start"<<std::endl;
const void *akc = nullptr;//new amd_kernel_code_t;
std::cout<<"End"<<std::endl;
status = ext_table.hsa_ven_amd_loader_query_host_address(
reinterpret_cast<void*>(handle), &akc);
if(HSA_STATUS_SUCCESS != status){
akc = reinterpret_cast<void*>(handle);
}
assert(nullptr!=akc);
amd_kernel_code_t *Akc = (amd_kernel_code_t*)akc;
std::cout<<Akc->kernarg_segment_byte_size<<std::endl;
return nullptr;
}
int main(){
float *A, *B, *Ad, *Bd;
hipDevicePtr Aptr, Bptr;
A = new float[LEN];
B = new float[LEN];
@@ -205,9 +50,6 @@ int main(){
hipMalloc((void**)&Ad, SIZE);
hipMalloc((void**)&Bd, SIZE);
Aptr = reinterpret_cast<unsigned long>(Ad);
Bptr = reinterpret_cast<unsigned long>(Bd);
hsaInit();
hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
@@ -219,16 +61,6 @@ int main(){
hipStreamCreate(&stream);
void *args[2] = {&Ad, &Bd};
/* struct __attribute__((aligned(16))) args_t{
void *Aptr;
void *Bptr;
} args;
args.Aptr = Ad;
args.Bptr = Bd;
*/
// hipDrvLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, (void**)&args, sizeof(args), 0);
amd_kernel_code_t *akc = getAkc(Function.kernel);
std::vector<void*>argBuffer(2);
memcpy(&argBuffer[0], &Ad, sizeof(void*));
@@ -246,7 +78,6 @@ int main(){
hipStreamDestroy(stream);
// hipLaunchKernel(Cpy, dim3(1), dim3(LEN), 0, 0, Ad, Bd);
hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
for(uint32_t i=0;i<LEN;i++){
+31
View File
@@ -0,0 +1,31 @@
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include<hip/hip_runtime.h>
#include<hip/hip_runtime_api.h>
#include<iostream>
#define fileName "vcpy_isa.co"
int main(){
hipModule module;
hipModuleLoad(&module, fileName);
hipModuleUnload(module);
}