Sample improvements.
- Enable -O3 for hipDispatchLatency.
- Use nearly-null kernel to prevent it from being optimized away.
- Formatting for hipDispatchLatency.
- Formatting for hipInfo.
[ROCm/hip commit: 1160cefc6d]
Этот коммит содержится в:
@@ -6,10 +6,12 @@ HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
EXE=hipDispatchLatency
|
||||
|
||||
all: install
|
||||
CXXFLAGS = -O3
|
||||
|
||||
$(EXE): hipDispatchLatency.cpp
|
||||
$(HIPCC) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
|
||||
all: ${EXE}
|
||||
|
||||
$(EXE): hipDispatchLatency.cpp ResultDatabase.cpp
|
||||
$(HIPCC) $(CXXFLAGS) hipDispatchLatency.cpp ResultDatabase.cpp -o $@
|
||||
|
||||
install: $(EXE)
|
||||
cp $(EXE) $(HIP_PATH)/bin
|
||||
|
||||
@@ -253,10 +253,12 @@ void ResultDatabase::DumpDetailed(ostream &out)
|
||||
|
||||
out << endl;
|
||||
}
|
||||
out << endl
|
||||
<< "Note: Any results marked with (*) had missing values." << endl
|
||||
<< " This can occur on systems with a mixture of" << endl
|
||||
<< " device types or architectural capabilities." << endl;
|
||||
if (0) {
|
||||
out << endl
|
||||
<< "Note: Any results marked with (*) had missing values." << endl
|
||||
<< " This can occur on systems with a mixture of" << endl
|
||||
<< " device types or architectural capabilities." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -330,9 +332,11 @@ void ResultDatabase::DumpSummary(ostream &out)
|
||||
|
||||
out << endl;
|
||||
}
|
||||
out << endl
|
||||
if (0) {
|
||||
out << endl
|
||||
<< "Note: results marked with (*) had missing values such as" << endl
|
||||
<< "might occur with a mixture of architectural capabilities." << endl;
|
||||
}
|
||||
}
|
||||
|
||||
// ****************************************************************************
|
||||
|
||||
@@ -30,15 +30,22 @@ if(status != hipSuccess){ \
|
||||
|
||||
#define LEN 1024*1024
|
||||
#define SIZE LEN * sizeof(float)
|
||||
#define ITER 5120
|
||||
#define ITER 10120
|
||||
|
||||
__global__ void One(hipLaunchParm lp, float* Ad){
|
||||
|
||||
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
|
||||
__global__ void NearlyNull(hipLaunchParm lp, float* Ad){
|
||||
if (Ad) {
|
||||
Ad[0] = 42;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(){
|
||||
|
||||
hipError_t err;
|
||||
float *A, *Ad;
|
||||
float *A;
|
||||
float *Ad = NULL;
|
||||
|
||||
A = new float[LEN];
|
||||
|
||||
@@ -50,11 +57,10 @@ int main(){
|
||||
err = hipStreamCreate(&stream);
|
||||
check("Creating stream",err);
|
||||
|
||||
err = hipMalloc(&Ad, SIZE);
|
||||
check("Allocating Ad memory on device", err);
|
||||
|
||||
err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
|
||||
check("Doing memory copy from A to Ad", err);
|
||||
//err = hipMalloc(&Ad, SIZE);
|
||||
//check("Allocating Ad memory on device", err);
|
||||
//err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
|
||||
//check("Doing memory copy from A to Ad", err);
|
||||
|
||||
float mS = 0;
|
||||
hipEvent_t start, stop;
|
||||
@@ -63,15 +69,16 @@ int main(){
|
||||
|
||||
ResultDatabase resultDB[8];
|
||||
|
||||
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000);
|
||||
// std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
|
||||
resultDB[0].DumpSummary(std::cout);
|
||||
hipEventRecord(start);
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000);
|
||||
@@ -79,7 +86,7 @@ int main(){
|
||||
resultDB[1].DumpSummary(std::cout);
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(stop);
|
||||
@@ -91,7 +98,7 @@ int main(){
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipDeviceSynchronize();
|
||||
@@ -103,7 +110,7 @@ int main(){
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
hipDeviceSynchronize();
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
@@ -114,7 +121,7 @@ int main(){
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
}
|
||||
hipDeviceSynchronize();
|
||||
hipEventRecord(stop);
|
||||
@@ -126,7 +133,7 @@ int main(){
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, 0, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
@@ -137,7 +144,7 @@ int main(){
|
||||
|
||||
hipEventRecord(start);
|
||||
for(int i=0;i<ITER;i++){
|
||||
hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
hipLaunchKernel(NearlyNull, dim3(LEN/512), dim3(512), 0, stream, Ad);
|
||||
}
|
||||
hipEventRecord(stop);
|
||||
hipEventElapsedTime(&mS, start, stop);
|
||||
|
||||
@@ -65,7 +65,7 @@ double bytesToGB(size_t s)
|
||||
void printDeviceProp (int deviceId)
|
||||
{
|
||||
using namespace std;
|
||||
const int w1 = 30;
|
||||
const int w1 = 34;
|
||||
|
||||
cout << left;
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user