- Enable -O3 for hipDispatchLatency.
- Use nearly-null kernel to prevent it from being optimized away.
- Formatting for hipDispatchLatency.
- Formatting for hipInfo.


[ROCm/hip commit: 1160cefc6d]
Этот коммит содержится в:
Ben Sander
2016-09-22 12:24:55 -05:00
родитель f13bd90b39
Коммит 13a12f65d5
4 изменённых файлов: 38 добавлений и 25 удалений
+5 -3
Просмотреть файл
@@ -6,10 +6,12 @@ HIPCC=$(HIP_PATH)/bin/hipcc
EXE=hipDispatchLatency
all: install
CXXFLAGS = -O3
$(EXE): hipDispatchLatency.cpp
$(HIPCC) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
all: ${EXE}
$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
install: $(EXE)
cp $(EXE) $(HIP_PATH)/bin
+9 -5
Просмотреть файл
@@ -253,10 +253,12 @@ void ResultDatabase::DumpDetailed(ostream &out)
out << endl;
}
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
if (0) {
out << endl
<< "Note: Any results marked with (*) had missing values." << endl
<< " This can occur on systems with a mixture of" << endl
<< " device types or architectural capabilities." << endl;
}
}
@@ -330,9 +332,11 @@ void ResultDatabase::DumpSummary(ostream &out)
out << endl;
}
out << endl
if (0) {
out << endl
<< "Note: results marked with (*) had missing values such as" << endl
<< "might occur with a mixture of architectural capabilities." << endl;
}
}
// ****************************************************************************
+23 -16
Просмотреть файл
@@ -30,15 +30,22 @@ if(status != hipSuccess){ \
#define LEN 1024*1024
#define SIZE LEN * sizeof(float)
#define ITER 5120
#define ITER 10120
__global__ void One(hipLaunchParm lp, float* Ad){
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
__global__ void NearlyNull(hipLaunchParm lp, float* Ad){
if (Ad) {
Ad[0] = 42;
}
}
int main(){
hipError_t err;
float *A, *Ad;
float *A;
float *Ad = NULL;
A = new float[LEN];
@@ -50,11 +57,10 @@ int main(){
err = hipStreamCreate(&stream);
check("Creating stream",err);
err = hipMalloc(&Ad, SIZE);
check("Allocating Ad memory on device", err);
err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
check("Doing memory copy from A to Ad", err);
//err = hipMalloc(&Ad, SIZE);
//check("Allocating Ad memory on device", err);
//err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
//check("Doing memory copy from A to Ad", err);
float mS = 0;
hipEvent_t start, stop;
@@ -63,15 +69,16 @@ int main(){
ResultDatabase resultDB[8];
hipEventRecord(start);
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000);
// std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
resultDB[0].DumpSummary(std::cout);
hipEventRecord(start);
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000);
@@ -79,7 +86,7 @@ int main(){
resultDB[1].DumpSummary(std::cout);
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipDeviceSynchronize();
hipEventRecord(stop);
@@ -91,7 +98,7 @@ int main(){
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipEventRecord(stop);
hipDeviceSynchronize();
@@ -103,7 +110,7 @@ int main(){
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
hipDeviceSynchronize();
}
hipEventRecord(stop);
@@ -114,7 +121,7 @@ int main(){
hipDeviceSynchronize();
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
}
hipDeviceSynchronize();
hipEventRecord(stop);
@@ -126,7 +133,7 @@ int main(){
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
}
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
@@ -137,7 +144,7 @@ int main(){
hipEventRecord(start);
for(int i=0;i<ITER;i++){
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
}
hipEventRecord(stop);
hipEventElapsedTime(&mS, start, stop);
+1 -1
Просмотреть файл
@@ -65,7 +65,7 @@ double bytesToGB(size_t s)
void printDeviceProp (int deviceId)
{
using namespace std;
const int w1 = 30;
const int w1 = 34;
cout << left;