SWDEV-286322 - clean up trailing space (#2361)

Change-Id: I03c07e67a8d1fa1a874718ffba43eb396c2aa05c
Este commit está contenido en:
Julia Jiang
2021-09-24 06:57:51 -04:00
cometido por GitHub
padre 770b64e848
commit 43a2f75882
Se han modificado 2 ficheros con 33 adiciones y 33 borrados
@@ -38,9 +38,9 @@ THE SOFTWARE.
__global__ void EmptyKernel() { }
void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1) {
float total_us = 0.0f, mean_us = 0.0f, stddev_us = 0.0f;
// skip warm-up runs
auto start_iter = std::next(results.begin(), WARMUP_RUN_COUNT);
auto end_iter = results.end();
@@ -48,7 +48,7 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
// mean
std::for_each(start_iter, end_iter, [&](const float &run_ms) {
total_us += (run_ms * 1000) / batch;
});
});
mean_us = total_us / TIMING_RUN_COUNT;
// stddev
@@ -63,18 +63,18 @@ void print_timing(std::string test, const std::array<float, TOTAL_RUN_COUNT> &re
printf("\n %s: %.1f us, std: %.1f us\n", test.c_str(), mean_us, stddev_us);
}
int main() {
int main() {
hipStream_t stream0 = 0;
hipDevice_t device;
hipDeviceGet(&device, 0);
hipCtx_t context;
hipCtxCreate(&context, 0, device);
hipCtx_t context;
hipCtxCreate(&context, 0, device);
hipModule_t module;
hipFunction_t function;
hipModuleLoad(&module, FILE_NAME);
hipModuleGetFunction(&function, module, KERNEL_NAME);
void* params = nullptr;
std::array<float, TOTAL_RUN_COUNT> results;
hipEvent_t start, stop;
hipEventCreate(&start);
@@ -83,7 +83,7 @@ int main() {
/************************************************************************************/
/* HIP kernel launch enqueue rate: */
/* Measure time taken to enqueue a kernel on the GPU */
/************************************************************************************/
/************************************************************************************/
// Timing hipModuleLaunchKernel
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
@@ -104,8 +104,8 @@ int main() {
print_timing("hipLaunchKernelGGL enqueue rate", results);
/***********************************************************************************/
/* Single dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/* Single dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing a kernel with GPU-scope visibility */
/***********************************************************************************/
//Timing around the dispatch
@@ -120,7 +120,7 @@ int main() {
/*********************************************************************************/
/* Batch dispatch execution latency using HIP events: */
/* Measures latency to start & finish executing each dispatch in a batch */
/* Measures latency to start & finish executing each dispatch in a batch */
/*********************************************************************************/
for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
+22 -22
Ver fichero
@@ -29,23 +29,23 @@ THE SOFTWARE.
// Device (Kernel) function
__global__ void multiply(float* C, float* A, float* B, int N){
int tx = blockDim.x*blockIdx.x+threadIdx.x;
if (tx < N){
C[tx] = A[tx] * B[tx];
}
}
// CPU implementation
void multiplyCPU(float* C, float* A, float* B, int N){
for(unsigned int i=0; i<N; i++){
C[i] = A[i] * B[i];
for(unsigned int i=0; i<N; i++){
C[i] = A[i] * B[i];
}
}
void launchKernel(float* C, float* A, float* B, bool manual){
hipDeviceProp_t devProp;
HIP_CHECK(hipGetDeviceProperties(&devProp, 0));
@@ -59,9 +59,9 @@ void launchKernel(float* C, float* A, float* B, bool manual){
int mingridSize = 0;
int gridSize = 0;
int blockSize = 0;
if (manual){
blockSize = threadsperblock;
blockSize = threadsperblock;
gridSize = blocks;
std::cout << std::endl << "Manual Configuration with block size " << blockSize << std::endl;
}
@@ -69,15 +69,15 @@ void launchKernel(float* C, float* A, float* B, bool manual){
HIP_CHECK(hipOccupancyMaxPotentialBlockSize(&mingridSize, &blockSize, multiply, 0, 0));
std::cout << std::endl << "Automatic Configuation based on hipOccupancyMaxPotentialBlockSize " << std::endl;
std::cout << "Suggested blocksize is " << blockSize << ", Minimum gridsize is " << mingridSize << std::endl;
gridSize = (NUM/blockSize)+1;
gridSize = (NUM/blockSize)+1;
}
// Record the start event
HIP_CHECK(hipEventRecord(start, NULL));
HIP_CHECK(hipEventRecord(start, NULL));
// Launching the Kernel from Host
hipLaunchKernelGGL(multiply, dim3(gridSize), dim3(blockSize), 0, 0, C, A, B, NUM);
// Record the stop event
HIP_CHECK(hipEventRecord(stop, NULL));
HIP_CHECK(hipEventSynchronize(stop));
@@ -88,7 +88,7 @@ void launchKernel(float* C, float* A, float* B, bool manual){
//Calculate Occupancy
int numBlock = 0;
HIP_CHECK(hipOccupancyMaxActiveBlocksPerMultiprocessor(&numBlock, multiply, blockSize, 0));
if(devProp.maxThreadsPerMultiProcessor){
std::cout << "Theoretical Occupancy is " << (double)numBlock* blockSize/devProp.maxThreadsPerMultiProcessor * 100 << "%" << std::endl;
}
@@ -106,26 +106,26 @@ int main() {
C0 = (float *)malloc(NUM * sizeof(float));
C1 = (float *)malloc(NUM * sizeof(float));
cpuC = (float *)malloc(NUM * sizeof(float));
for(i=0; i< NUM; i++){
A[i] = i;
B[i] = i;
}
// allocate the memory on the device side
// allocate the memory on the device side
HIP_CHECK(hipMalloc((void**)&Ad, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&Bd, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C0d, NUM * sizeof(float)));
HIP_CHECK(hipMalloc((void**)&C1d, NUM * sizeof(float)));
// Memory transfer from host to device
HIP_CHECK(hipMemcpy(Ad,A,NUM * sizeof(float), hipMemcpyHostToDevice));
HIP_CHECK(hipMemcpy(Bd,B,NUM * sizeof(float), hipMemcpyHostToDevice));
//Kernel launch with manual/default block size
launchKernel(C0d, Ad, Bd, 1);
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
//Kernel launch with the block size suggested by hipOccupancyMaxPotentialBlockSize
launchKernel(C1d, Ad, Bd, 0);
// Memory transfer from device to host
@@ -137,26 +137,26 @@ int main() {
//verify the results
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(C0[i] - cpuC[i]) > eps) {
errors++;
}
}
if (errors != 0){
printf("\nManual Test FAILED: %d errors\n", errors);
errors=0;
} else {
printf("\nManual Test PASSED!\n");
}
for (i = 0; i < NUM; i++) {
if (std::abs(C1[i] - cpuC[i]) > eps) {
errors++;
}
}
if (errors != 0){
printf("\n Automatic Test FAILED: %d errors\n", errors);
} else {