diff --git a/tests/src/hipModule.cpp b/tests/src/hipModule.cpp new file mode 100644 index 0000000000..8397b5f338 --- /dev/null +++ b/tests/src/hipModule.cpp @@ -0,0 +1,257 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define LEN 64 +#define SIZE LEN<<2 + +typedef hsa_code_object_t hipmodule; +typedef uint64_t hipfunction; +typedef unsigned int hipDevicePtr; + +hsa_region_t systemRegion; +hsa_region_t kernArgRegion; +hsa_agent_t gpuAgent; +hsa_queue_t *Queue; +hsa_signal_t signal; + +hsa_status_t findGpu(hsa_agent_t agent, void *data){ + hsa_device_type_t device_type; + hsa_status_t hsa_error_code = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + if(hsa_error_code != HSA_STATUS_SUCCESS){return hsa_error_code;} + if(device_type == HSA_DEVICE_TYPE_GPU){ + gpuAgent = agent; + } + + return HSA_STATUS_SUCCESS; +} + +hsa_status_t FindRegions(hsa_region_t region, void *data){ + hsa_region_segment_t segment_id; + hsa_region_get_info(region, HSA_REGION_INFO_SEGMENT, &segment_id); + + if (segment_id != HSA_REGION_SEGMENT_GLOBAL) { + return HSA_STATUS_SUCCESS; + } + + hsa_region_global_flag_t flags; + hsa_region_get_info(region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + + if(flags & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED){ + systemRegion = region; + } + + if(flags & HSA_REGION_GLOBAL_FLAG_KERNARG){ + kernArgRegion = region; + } + return HSA_STATUS_SUCCESS; +} + +hipError_t ihipModuleLoad(hipmodule *module, const char *fname){ + std::ifstream in(fname, std::ios::binary | std::ios::ate); + hipError_t ret = hipSuccess; + if(!in){ + std::cout<<"Couldn't read file "<(in), + std::istreambuf_iterator(), ptr); + status = hsa_code_object_deserialize(ptr, size, NULL, module); + if (status != HSA_STATUS_SUCCESS) { std::cout<<"Failed to deserialize code object"<Args; + void ***newP = (void***)kernelParams; + for(uint32_t i=0;isize -1; + uint32_t packet_index = hsa_queue_load_write_index_relaxed(Queue); + hsa_kernel_dispatch_packet_t *dispatch_packet = &(((hsa_kernel_dispatch_packet_t*)(Queue->base_address))[packet_index & queue_mask]); + dispatch_packet->completion_signal = signal; + dispatch_packet->workgroup_size_x = blockDimX; + dispatch_packet->workgroup_size_y = blockDimY; + dispatch_packet->workgroup_size_z = blockDimZ; + dispatch_packet->grid_size_x = blockDimX * gridDimX; + dispatch_packet->grid_size_y = blockDimY * gridDimY; + dispatch_packet->grid_size_z = blockDimZ * gridDimZ; + dispatch_packet->group_segment_size = 0; + dispatch_packet->private_segment_size = sharedMemBytes; + dispatch_packet->kernarg_address = kernarg; + dispatch_packet->kernel_object = (uint64_t)f; + uint16_t header = (HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) | + (1 << HSA_PACKET_HEADER_BARRIER) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) | + (HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE); + uint16_t setup = 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + uint32_t header32 = header | (setup << 16); + __atomic_store_n((uint32_t*)(dispatch_packet), header32, __ATOMIC_RELEASE); + hsa_queue_store_write_index_relaxed(Queue, packet_index+1); + hsa_signal_store_relaxed(Queue->doorbell_signal, packet_index); + hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_BLOCKED); + return hipSuccess; +} + +#define fileName "vcpy_isa.co" +#define kernel_name "hello_world" + +__global__ void Cpy(hipLaunchParm lp, float *Ad, float* Bd){ + int tx = hipThreadIdx_x; + Bd[tx] = Ad[tx]; +} + +amd_kernel_code_t* getAkc(uint64_t handle){ + bool ext_supported = false; + hsa_status_t status = hsa_system_extension_supported( + HSA_EXTENSION_AMD_LOADER, 1, 0, &ext_supported); + assert(HSA_STATUS_SUCCESS == status); + assert(true == ext_supported); + hsa_ven_amd_loader_1_00_pfn_t ext_table = {nullptr}; + status = hsa_system_get_extension_table( + HSA_EXTENSION_AMD_LOADER, 1, 0, &ext_table); + assert(HSA_STATUS_SUCCESS == status); + assert(nullptr != ext_table.hsa_ven_amd_loader_query_host_address); + std::cout<<"Start"<(handle), &akc); + + if(HSA_STATUS_SUCCESS != status){ + akc = reinterpret_cast(handle); + } + + assert(nullptr!=akc); + amd_kernel_code_t *Akc = (amd_kernel_code_t*)akc; + std::cout<kernarg_segment_byte_size<(Ad); + Bptr = reinterpret_cast(Bd); + hsaInit(); + + hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); + hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice); + hipModule Module; + hipFunction Function; + hipModuleLoad(&Module, fileName); + hipModuleGetFunction(&Function, Module, kernel_name); + hipStream_t stream; + hipStreamCreate(&stream); + void *args[2] = {&Ad, &Bd}; + +/* struct __attribute__((aligned(16))) args_t{ + void *Aptr; + void *Bptr; + } args; + args.Aptr = Ad; + args.Bptr = Bd; +*/ +// hipDrvLaunchKernel(Function, 1, 1, 1, LEN, 1, 1, 0, 0, (void**)&args, sizeof(args), 0); + + amd_kernel_code_t *akc = getAkc(Function.kernel); + + std::vectorargBuffer(2); + memcpy(&argBuffer[0], &Ad, sizeof(void*)); + memcpy(&argBuffer[1], &Bd, sizeof(void*)); + + size_t size = argBuffer.size()*sizeof(void*); + + void *config[] = { + HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0], + HIP_LAUNCH_PARAM_BUFFER_SIZE, &size, + HIP_LAUNCH_PARAM_END + }; + + hipLaunchModuleKernel(Function, 1, 1, 1, LEN, 1, 1, 0, stream, NULL, (void**)&config); + + hipStreamDestroy(stream); + +// hipLaunchKernel(Cpy, dim3(1), dim3(LEN), 0, 0, Ad, Bd); + hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost); + + for(uint32_t i=0;i