added new api hipHccModuleLaunchKernel

1. hipHccModuleLaunchKernel is same as hipModuleLaunchKernel with OpenCL workitem model
2. Added copy right
3. Fixed header naming

Change-Id: I6a7c35a3566e2f8d3f5056613e34193775d4b236
Este commit está contenido en:
Aditya Atluri
2017-03-31 12:11:34 -05:00
padre e3a4547703
commit 1cead6a4cd
Se han modificado 30 ficheros con 1138 adiciones y 91 borrados
+45 -20
Ver fichero
@@ -1,5 +1,5 @@
/*
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -27,15 +27,13 @@ THE SOFTWARE.
#include <elf.h>
#include <gelf.h>
#include <map>
#include "AMDGPUPTNote.h"
#include "AMDGPURuntimeMetadata.h"
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
#include "hsa/amd_hsa_kernel_code.h"
#include "hip/hip_runtime.h"
#include "hip_hcc.h"
#include "hip_hcc_internal.h"
#include "trace_helper.h"
//TODO Use Pool APIs from HCC to get memory regions.
@@ -365,16 +363,12 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
}
hipError_t hipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
size_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra)
{
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra);
auto ctx = ihipGetTlsDefaultCtx();
hipError_t ret = hipSuccess;
@@ -420,7 +414,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
*/
grid_launch_parm lp;
lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel.
hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name.c_str());
hStream = ihipPreLaunchKernel(hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ), dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str());
hsa_kernel_dispatch_packet_t aql;
@@ -430,12 +424,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
//aql.completion_signal._handle = 0;
//aql.kernarg_address = 0;
aql.workgroup_size_x = blockDimX;
aql.workgroup_size_y = blockDimY;
aql.workgroup_size_z = blockDimZ;
aql.grid_size_x = blockDimX * gridDimX;
aql.grid_size_y = blockDimY * gridDimY;
aql.grid_size_z = blockDimZ * gridDimZ;
aql.workgroup_size_x = localWorkSizeX;
aql.workgroup_size_y = localWorkSizeY;
aql.workgroup_size_z = localWorkSizeZ;
aql.grid_size_x = globalWorkSizeX;
aql.grid_size_y = globalWorkSizeY;
aql.grid_size_z = globalWorkSizeZ;
aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes;
aql.private_segment_size = f->_privateSegmentSize;
aql.kernel_object = f->_object;
@@ -459,9 +453,40 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
ihipPostLaunchKernel(f->_name.c_str(), hStream, lp);
}
return ihipLogStatus(ret);
return ret;
}
hipError_t hipModuleLaunchKernel(hipFunction_t f,
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
uint32_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra)
{
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream,
kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(f,
blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ,
blockDimX, blockDimY, blockDimZ,
sharedMemBytes, hStream, kernelParams, extra));
}
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
size_t sharedMemBytes, hipStream_t hStream,
void **kernelParams, void **extra)
{
HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
sharedMemBytes, hStream,
kernelParams, extra);
return ihipLogStatus(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
sharedMemBytes, hStream, kernelParams, extra));
}
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
hipModule_t hmod, const char* name)