added new api hipHccModuleLaunchKernel
1. hipHccModuleLaunchKernel is same as hipModuleLaunchKernel with OpenCL workitem model 2. Added copy right 3. Fixed header naming Change-Id: I6a7c35a3566e2f8d3f5056613e34193775d4b236
Este commit está contenido en:
+45
-20
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (c) 2015 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -27,15 +27,13 @@ THE SOFTWARE.
|
||||
#include <elf.h>
|
||||
#include <gelf.h>
|
||||
#include <map>
|
||||
#include "AMDGPUPTNote.h"
|
||||
#include "AMDGPURuntimeMetadata.h"
|
||||
|
||||
#include "hsa/hsa.h"
|
||||
#include "hsa/hsa_ext_amd.h"
|
||||
#include "hsa/amd_hsa_kernel_code.h"
|
||||
|
||||
#include "hip/hip_runtime.h"
|
||||
#include "hip_hcc.h"
|
||||
#include "hip_hcc_internal.h"
|
||||
#include "trace_helper.h"
|
||||
|
||||
//TODO Use Pool APIs from HCC to get memory regions.
|
||||
@@ -365,16 +363,12 @@ hipError_t hipModuleGetFunction(hipFunction_t *hfunc, hipModule_t hmod,
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
hipError_t ihipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
|
||||
auto ctx = ihipGetTlsDefaultCtx();
|
||||
hipError_t ret = hipSuccess;
|
||||
@@ -420,7 +414,7 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
*/
|
||||
grid_launch_parm lp;
|
||||
lp.dynamic_group_mem_bytes = sharedMemBytes; // TODO - this should be part of preLaunchKernel.
|
||||
hStream = ihipPreLaunchKernel(hStream, dim3(gridDimX, gridDimY, gridDimZ), dim3(blockDimX, blockDimY, blockDimZ), &lp, f->_name.c_str());
|
||||
hStream = ihipPreLaunchKernel(hStream, dim3(globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ), dim3(localWorkSizeX, localWorkSizeY, localWorkSizeZ), &lp, f->_name.c_str());
|
||||
|
||||
|
||||
hsa_kernel_dispatch_packet_t aql;
|
||||
@@ -430,12 +424,12 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
//aql.completion_signal._handle = 0;
|
||||
//aql.kernarg_address = 0;
|
||||
|
||||
aql.workgroup_size_x = blockDimX;
|
||||
aql.workgroup_size_y = blockDimY;
|
||||
aql.workgroup_size_z = blockDimZ;
|
||||
aql.grid_size_x = blockDimX * gridDimX;
|
||||
aql.grid_size_y = blockDimY * gridDimY;
|
||||
aql.grid_size_z = blockDimZ * gridDimZ;
|
||||
aql.workgroup_size_x = localWorkSizeX;
|
||||
aql.workgroup_size_y = localWorkSizeY;
|
||||
aql.workgroup_size_z = localWorkSizeZ;
|
||||
aql.grid_size_x = globalWorkSizeX;
|
||||
aql.grid_size_y = globalWorkSizeY;
|
||||
aql.grid_size_z = globalWorkSizeZ;
|
||||
aql.group_segment_size = f->_groupSegmentSize + sharedMemBytes;
|
||||
aql.private_segment_size = f->_privateSegmentSize;
|
||||
aql.kernel_object = f->_object;
|
||||
@@ -459,9 +453,40 @@ hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
ihipPostLaunchKernel(f->_name.c_str(), hStream, lp);
|
||||
}
|
||||
|
||||
return ihipLogStatus(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
hipError_t hipModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ,
|
||||
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
|
||||
uint32_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, gridDimX, gridDimY, gridDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f,
|
||||
blockDimX * gridDimX, blockDimY * gridDimY, gridDimZ * blockDimZ,
|
||||
blockDimX, blockDimY, blockDimZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
}
|
||||
|
||||
|
||||
hipError_t hipHccModuleLaunchKernel(hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
|
||||
uint32_t localWorkSizeX, uint32_t localWorkSizeY, uint32_t localWorkSizeZ,
|
||||
size_t sharedMemBytes, hipStream_t hStream,
|
||||
void **kernelParams, void **extra)
|
||||
{
|
||||
HIP_INIT_API(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream,
|
||||
kernelParams, extra);
|
||||
return ihipLogStatus(ihipModuleLaunchKernel(f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
localWorkSizeX, localWorkSizeY, localWorkSizeZ,
|
||||
sharedMemBytes, hStream, kernelParams, extra));
|
||||
}
|
||||
|
||||
hipError_t hipModuleGetGlobal(hipDeviceptr_t *dptr, size_t *bytes,
|
||||
hipModule_t hmod, const char* name)
|
||||
|
||||
Referencia en una nueva incidencia
Block a user