From 13a12f65d53fe4c8a3a63d9fab0fefe152e1eaa4 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Thu, 22 Sep 2016 12:24:55 -0500 Subject: [PATCH] Sample improvements. - Enable -O3 for hipDispatchLatency. - Use nearly-null kernel to prevent it from being optimized away. - Formatting for hipDispatchLatency. - Formatting for hipInfo. [ROCm/hip commit: 1160cefc6de506fd3a620f1af3b8f0b53fd5bb60] --- .../1_Utils/hipDispatchLatency/Makefile | 8 ++-- .../hipDispatchLatency/ResultDatabase.cpp | 14 ++++--- .../hipDispatchLatency/hipDispatchLatency.cpp | 39 +++++++++++-------- .../hip/samples/1_Utils/hipInfo/hipInfo.cpp | 2 +- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile index 387cb9aac6..3b69c4a335 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/Makefile @@ -6,10 +6,12 @@ HIPCC=$(HIP_PATH)/bin/hipcc EXE=hipDispatchLatency -all: install +CXXFLAGS = -O3 -$(EXE): hipDispatchLatency.cpp - $(HIPCC) hipDispatchLatency.cpp ResultDatabase.cpp -o $@ +all: ${EXE} + +$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp + $(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@ install: $(EXE) cp $(EXE) $(HIP_PATH)/bin diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp b/projects/hip/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp index 2ec686f260..d207154e39 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/ResultDatabase.cpp @@ -253,10 +253,12 @@ void ResultDatabase::DumpDetailed(ostream &out) out << endl; } - out << endl - << "Note: Any results marked with (*) had missing values." << endl - << " This can occur on systems with a mixture of" << endl - << " device types or architectural capabilities." << endl; + if (0) { + out << endl + << "Note: Any results marked with (*) had missing values." << endl + << " This can occur on systems with a mixture of" << endl + << " device types or architectural capabilities." << endl; + } } @@ -330,9 +332,11 @@ void ResultDatabase::DumpSummary(ostream &out) out << endl; } - out << endl + if (0) { + out << endl << "Note: results marked with (*) had missing values such as" << endl << "might occur with a mixture of architectural capabilities." << endl; + } } // **************************************************************************** diff --git a/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp b/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp index 1c15ab51d7..65e8603a4e 100644 --- a/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp +++ b/projects/hip/samples/1_Utils/hipDispatchLatency/hipDispatchLatency.cpp @@ -30,15 +30,22 @@ if(status != hipSuccess){ \ #define LEN 1024*1024 #define SIZE LEN * sizeof(float) -#define ITER 5120 +#define ITER 10120 -__global__ void One(hipLaunchParm lp, float* Ad){ + +// HCC optimizes away fully NULL kernel calls, so run one that is nearly null: +__global__ void NearlyNull(hipLaunchParm lp, float* Ad){ + if (Ad) { + Ad[0] = 42; + } } + int main(){ hipError_t err; - float *A, *Ad; + float *A; + float *Ad = NULL; A = new float[LEN]; @@ -50,11 +57,10 @@ int main(){ err = hipStreamCreate(&stream); check("Creating stream",err); - err = hipMalloc(&Ad, SIZE); - check("Allocating Ad memory on device", err); - - err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); - check("Doing memory copy from A to Ad", err); + //err = hipMalloc(&Ad, SIZE); + //check("Allocating Ad memory on device", err); + //err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice); + //check("Doing memory copy from A to Ad", err); float mS = 0; hipEvent_t start, stop; @@ -63,15 +69,16 @@ int main(){ ResultDatabase resultDB[8]; + hipEventRecord(start); - hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad); + hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad); hipEventRecord(stop); hipEventElapsedTime(&mS, start, stop); resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000); // std::cout<<"First Kernel Launch: \t\t"<