SWDEV-268552 - HIP sample hipDispatchEnqueueRateMT segfaults, updated to use hipModuleLoadData

Change-Id: Ibf34c780d5bd4278adcedba1d6e057637c258485
2021-01-19 09:30:13 -05:00
@@ -23,6 +23,7 @@ THE SOFTWARE.
 #include "hip/hip_ext.h"
 #endif
 #include <iostream>
+#include <fstream>
 #include <chrono>
 #include <algorithm>
 #include <atomic>
@@ -36,9 +37,34 @@ THE SOFTWARE.
 #define WARMUP_RUN_COUNT 10
 #define TIMING_RUN_COUNT 100
 #define TOTAL_RUN_COUNT WARMUP_RUN_COUNT + TIMING_RUN_COUNT
+#define FILENAME "test_kernel.code"
+#define failed(...)                                                                                \
+    abort();
+
+#define HIPCHECK(error)                                                                            \
+    {                                                                                              \
+        hipError_t localError = error;                                                             \
+        if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) {      \
+            printf("error: '%s'(%d) from %s at %s:%d\n",  hipGetErrorString(localError),           \
+                   localError, #error, __FILE__, __LINE__);                                        \
+                   failed("API returned error code.");                                             \
+        }                                                                                          \
+    }
+

 __global__ void EmptyKernel() {}

+std::vector<char> load_file() {
+  std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
+  std::streamsize fsize = file.tellg();
+  file.seekg(0, std::ios::beg);
+
+  std::vector<char> buffer(fsize);
+  if (!file.read(buffer.data(), fsize)) {
+    failed("could not open code object '%s'\n", FILENAME);
+  }
+  return buffer;
+}
 // Helper to print various timing metrics
 void print_timing(std::string test, std::array<float, TOTAL_RUN_COUNT> &results, int batch = 1)
 {
@@ -68,15 +94,17 @@ void print_timing(std::string test, std::array<float, TOTAL_RUN_COUNT> &results,
 }

 // Measure time taken to enqueue a kernel on the GPU using hipModuleLaunchKernel
-void hipModuleLaunchKernel_enqueue_rate(std::atomic_int* shared, int max_threads)
+void hipModuleLaunchKernel_enqueue_rate(const std::vector<char> buffer, std::atomic_int* shared, int max_threads)
 {
    //resources necessary for this thread
    hipStream_t stream;
-    hipStreamCreate(&stream);
+    HIPCHECK(hipStreamCreate(&stream));
    hipModule_t module;
    hipFunction_t function;
-    hipModuleLoad(&module, "test_kernel.code");
-    hipModuleGetFunction(&function, module, "test");
+
+    HIPCHECK(hipModuleLoadData(&module, &buffer[0]));
+    HIPCHECK(hipModuleGetFunction(&function, module, "test"));
+
    void* kernel_params = nullptr;
    std::array<float, TOTAL_RUN_COUNT> results;

@@ -86,19 +114,20 @@ void hipModuleLaunchKernel_enqueue_rate(std::atomic_int* shared, int max_threads

    for (auto i = 0; i < TOTAL_RUN_COUNT; ++i) {
        auto start = std::chrono::high_resolution_clock::now();
-        hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr);
+        HIPCHECK(hipModuleLaunchKernel(function, 1, 1, 1, 1, 1, 1, 0, stream, &kernel_params, nullptr));
        auto stop = std::chrono::high_resolution_clock::now();
        results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
    }
    print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipModuleLaunchKernel enqueue rate", results);
+    HIPCHECK(hipStreamDestroy(stream));
 }

 // Measure time taken to enqueue a kernel on the GPU using hipLaunchKernelGGL
-void hipLaunchKernelGGL_enqueue_rate(std::atomic_int* shared, int max_threads)
+void hipLaunchKernelGGL_enqueue_rate(const std::vector<char> buffer, std::atomic_int* shared, int max_threads)
 {
    //resources necessary for this thread
    hipStream_t stream;
-    hipStreamCreate(&stream);
+    HIPCHECK(hipStreamCreate(&stream));
    std::array<float, TOTAL_RUN_COUNT> results;

    //synchronize all threads, before running
@@ -112,14 +141,16 @@ void hipLaunchKernelGGL_enqueue_rate(std::atomic_int* shared, int max_threads)
        results[i] = std::chrono::duration<double, std::milli>(stop - start).count();
    }
    print_timing("Thread ID : " + std::to_string(tid) + " , " + "hipLaunchKernelGGL enqueue rate", results);
+    HIPCHECK(hipStreamDestroy(stream));
 }

 // Simple thread pool
 struct thread_pool {
    thread_pool(int total_threads) : max_threads(total_threads) {}
-    void start(std::function<void(std::atomic_int*, int)> f) {
+    void start(std::function<void(const std::vector<char>, std::atomic_int*, int)> f) {
+        auto buffer = load_file();
        for (int i = 0; i < max_threads; ++i) {
-            threads.push_back(std::async(std::launch::async, f, &shared, max_threads));
+            threads.push_back(std::async(std::launch::async, f, buffer, &shared, max_threads));
        }
    }
    void finish() {
@@ -127,7 +158,7 @@ struct thread_pool {
            thread.get();
        }
        threads.clear();
-        shared = {0};
+        shared = 0;
    }
    ~thread_pool() {
        finish();
@@ -162,7 +193,6 @@ int main(int argc, char* argv[])
        task.start(hipLaunchKernelGGL_enqueue_rate);
        task.finish();
    }
-
    return 0;
 }