SWDEV-286322 - clean up trailing space (#2361)
Change-Id: I03c07e67a8d1fa1a874718ffba43eb396c2aa05c
Este commit está contenido en:
@@ -38,9 +38,9 @@ THE SOFTWARE.
|
||||
__global__ void EmptyKernel() { }
|
||||
|
||||
void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1) {
|
||||
|
||||
|
||||
float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f;
|
||||
|
||||
|
||||
// skip warm-up runs
|
||||
auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT);
|
||||
auto end_iter = results.end();
|
||||
@@ -48,7 +48,7 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
|
||||
// mean
|
||||
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
|
||||
total_us += (run_ms * 1000) / batch;
|
||||
});
|
||||
});
|
||||
mean_us = total_us / TIMING_RUN_COUNT;
|
||||
|
||||
// stddev
|
||||
@@ -63,18 +63,18 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
|
||||
printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us);
|
||||
}
|
||||
|
||||
int main() {
|
||||
int main() {
|
||||
hipStream_t stream0 = 0;
|
||||
hipDevice_t device;
|
||||
hipDeviceGet(&device, 0);
|
||||
hipCtx_t context;
|
||||
hipCtxCreate(&context, 0, device);
|
||||
hipCtx_t context;
|
||||
hipCtxCreate(&context, 0, device);
|
||||
hipModule_t module;
|
||||
hipFunction_t function;
|
||||
hipModuleLoad(&module, FILE_NAME);
|
||||
hipModuleGetFunction(&function, module, KERNEL_NAME);
|
||||
void* params = nullptr;
|
||||
|
||||
|
||||
std::array<float, TOTAL_RUN_COUNT> results;
|
||||
hipEvent_t start, stop;
|
||||
hipEventCreate(&start);
|
||||
@@ -83,7 +83,7 @@ int main() {
|
||||
/************************************************************************************/
|
||||
/* HIP kernel launch enqueue rate: */
|
||||
/* Measure time taken to enqueue a kernel on the GPU */
|
||||
/************************************************************************************/
|
||||
/************************************************************************************/
|
||||
|
||||
// Timing hipModuleLaunchKernel
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
@@ -104,8 +104,8 @@ int main() {
|
||||
print_timing("hipLaunchKernelGGL enqueue rate", results);
|
||||
|
||||
/***********************************************************************************/
|
||||
/* Single dispatch execution latency using HIP events: */
|
||||
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
|
||||
/* Single dispatch execution latency using HIP events: */
|
||||
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
|
||||
/***********************************************************************************/
|
||||
|
||||
//Timing around the dispatch
|
||||
@@ -120,7 +120,7 @@ int main() {
|
||||
|
||||
/*********************************************************************************/
|
||||
/* Batch dispatch execution latency using HIP events: */
|
||||
/* Measures latency to start & finish executing each dispatch in a batch */
|
||||
/* Measures latency to start & finish executing each dispatch in a batch */
|
||||
/*********************************************************************************/
|
||||
|
||||
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
|
||||
|
||||
@@ -29,23 +29,23 @@ THE SOFTWARE.
|
||||
|
||||
// Device (Kernel) function
|
||||
__global__ void multiply(float* C, float* A, float* B, int N){
|
||||
|
||||
|
||||
int tx = blockDim.x*blockIdx.x+threadIdx.x;
|
||||
|
||||
|
||||
if (tx < N){
|
||||
C[tx] = A[tx] * B[tx];
|
||||
}
|
||||
}
|
||||
// CPU implementation
|
||||
void multiplyCPU(float* C, float* A, float* B, int N){
|
||||
|
||||
for(unsigned int i=0; i<N; i++){
|
||||
C[i] = A[i] * B[i];
|
||||
|
||||
for(unsigned int i=0; i<N; i++){
|
||||
C[i] = A[i] * B[i];
|
||||
}
|
||||
}
|
||||
|
||||
void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
@@ -59,9 +59,9 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
int mingridSize = 0;
|
||||
int gridSize = 0;
|
||||
int blockSize = 0;
|
||||
|
||||
|
||||
if (manual){
|
||||
blockSize = threadsperblock;
|
||||
blockSize = threadsperblock;
|
||||
gridSize = blocks;
|
||||
std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl;
|
||||
}
|
||||
@@ -69,15 +69,15 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
|
||||
std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl;
|
||||
std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl;
|
||||
gridSize = (NUM/blockSize)+1;
|
||||
gridSize = (NUM/blockSize)+1;
|
||||
}
|
||||
|
||||
// Record the start event
|
||||
HIP_CHECK(hipEventRecord(start, NULL));
|
||||
HIP_CHECK(hipEventRecord(start, NULL));
|
||||
|
||||
// Launching the Kernel from Host
|
||||
hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM);
|
||||
|
||||
|
||||
// Record the stop event
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
@@ -88,7 +88,7 @@ void launchKernel(float* C, float* A, float* B, bool manual){
|
||||
//Calculate Occupancy
|
||||
int numBlock = 0;
|
||||
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
|
||||
|
||||
|
||||
if(devProp.maxThreadsPerMultiProcessor){
|
||||
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
|
||||
}
|
||||
@@ -106,26 +106,26 @@ int main() {
|
||||
C0 = (float *)malloc(NUM * sizeof(float));
|
||||
C1 = (float *)malloc(NUM * sizeof(float));
|
||||
cpuC = (float *)malloc(NUM * sizeof(float));
|
||||
|
||||
|
||||
for(i=0; i< NUM; i++){
|
||||
A[i] = i;
|
||||
B[i] = i;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float)));
|
||||
HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float)));
|
||||
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
//Kernel launch with manual/default block size
|
||||
launchKernel(C0d, Ad, Bd, 1);
|
||||
|
||||
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
|
||||
|
||||
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
|
||||
launchKernel(C1d, Ad, Bd, 0);
|
||||
|
||||
// Memory transfer from device to host
|
||||
@@ -137,26 +137,26 @@ int main() {
|
||||
|
||||
//verify the results
|
||||
double eps = 1.0E-6;
|
||||
|
||||
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(C0[i] - cpuC[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (errors != 0){
|
||||
printf("\nManual Test FAILED: %d errors\n", errors);
|
||||
errors=0;
|
||||
} else {
|
||||
printf("\nManual Test PASSED!\n");
|
||||
}
|
||||
|
||||
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(C1[i] - cpuC[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (errors != 0){
|
||||
printf("\n Automatic Test FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
|
||||
Referencia en una nueva incidencia
Block a user