SWDEV-277697 - [CatchTest] Fix Documentation, Add test to AMD specific, Add HIP Macros, New Binary for multiproc tests
Change-Id: I3783caf85c694b724ed55b778220b8ef9a39f84b
This commit is contained in:
committed by
Jatin Chaudhary
parent
f088812b6f
commit
da360c2aab
+12
-2
@@ -44,15 +44,25 @@ include_directories(
|
||||
${HIP_PATH}/include
|
||||
${JSON_PARSER}
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd" AND HIP_COMPILER MATCHES "clang")
|
||||
add_compile_options(-Wall -Wextra -pedantic -Werror)
|
||||
endif()
|
||||
|
||||
cmake_policy(PUSH)
|
||||
if(POLICY CMP0037)
|
||||
cmake_policy(SET CMP0037 OLD)
|
||||
endif()
|
||||
|
||||
# Use clang as host compiler with nvcc
|
||||
if(HIP_COMPILER MATCHES "nvcc")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ccbin clang")
|
||||
endif()
|
||||
|
||||
add_custom_target(build_tests)
|
||||
add_custom_target(test COMMAND ${CMAKE_CTEST_COMMAND})
|
||||
add_dependencies(test build_tests)
|
||||
|
||||
add_subdirectory(unit)
|
||||
add_subdirectory(hipTestMain)
|
||||
add_subdirectory(stress)
|
||||
add_subdirectory(multiproc)
|
||||
cmake_policy(POP)
|
||||
|
||||
+2
-5
@@ -12,7 +12,7 @@ Tests in Catch2 are declared via ```TEST_CASE```.
|
||||
|
||||
## Taking care of existing features
|
||||
- Don’t build on platform: EXCLUDE_(HIP_PLATFORM/HIP_RUNTIME), can be done via CMAKE. Adding source in if(HIP_PLATFORM == amd/nvidia).
|
||||
- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
|
||||
- HIPCC_OPTIONS/CLANG Options: Can be done via: set_source_files_properties(src.cc PROPERTIES COMPILE_FLAGS “…”).
|
||||
- Additional libraries: Can be done via target_link_libraries()
|
||||
- Multiple runs with different args: This can be done by Catch’s Feature: GENERATE(…)
|
||||
Running Subtest: ctest –R “...” (Regex to match the subtest name)
|
||||
@@ -31,8 +31,6 @@ Some useful functions are:
|
||||
- `bool isLinux()` : true if os is linux
|
||||
- `bool isAmd()` : true if platform is AMD
|
||||
- `bool isNvidia()` : true if platform is NVIDIA
|
||||
- `std::vector<std::string> getDevices()` : returns a vector of strings that contains device names (eg: For AMD: gfx906, gfx908 etc / For NVIDIA: RTX 2070 Super)
|
||||
- `std::vector<std::string> getTargetId()` : (AMD Only) returns target id for gpus (eg: gfx906:sramecc+:xnack- etc)
|
||||
|
||||
This information can be accessed in any test via using: `TestContext::get().isAmd()`.
|
||||
|
||||
@@ -72,5 +70,4 @@ Catch2 allows multiple ways in which you can debug the test case.
|
||||
|
||||
## External Libs being used
|
||||
- [Catch2](https://github.com/catchorg/Catch2) - Testing framework
|
||||
- [taocpp/json](https://github.com/taocpp/json) - For config file parsing
|
||||
- [taocpp/PEGTL](https://github.com/taocpp/PEGTL) - Helper lib for taojson
|
||||
- [picojson](https://github.com/kazuho/picojson) - For config file parsing
|
||||
|
||||
@@ -1,11 +1,29 @@
|
||||
if(CMAKE_BUILD_TYPE MATCHES "^Debug$")
|
||||
add_definitions(-DHT_LOG_ENABLE)
|
||||
endif()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTAO_PEGTL_STD_EXPERIMENTAL_FILESYSTEM=1")
|
||||
add_library(ht_context SHARED EXCLUDE_FROM_ALL hip_test_context.cc)
|
||||
set_property(TARGET ht_context PROPERTY CXX_STANDARD 17)
|
||||
target_link_libraries(ht_context PRIVATE stdc++fs)
|
||||
add_executable(UnitTests EXCLUDE_FROM_ALL main.cc)
|
||||
target_link_libraries(UnitTests PRIVATE ht_context DeviceLibs MemoryTest Kernels stdc++fs)
|
||||
|
||||
add_executable(UnitTests EXCLUDE_FROM_ALL main.cc hip_test_context.cc)
|
||||
set_property(TARGET UnitTests PROPERTY CXX_STANDARD 17)
|
||||
|
||||
target_link_libraries(UnitTests PRIVATE DeviceLibs
|
||||
MemoryTest
|
||||
Kernels
|
||||
stdc++fs)
|
||||
|
||||
# Add AMD Only Tests
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
target_link_libraries(UnitTests PRIVATE RTC)
|
||||
endif()
|
||||
|
||||
catch_discover_tests(UnitTests PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
|
||||
add_dependencies(build_tests UnitTests)
|
||||
|
||||
# Add Multiproc tests as seperate binary
|
||||
if(UNIX)
|
||||
add_executable(MultiProcTests EXCLUDE_FROM_ALL main.cc hip_test_context.cc)
|
||||
set_property(TARGET MultiProcTests PROPERTY CXX_STANDARD 17)
|
||||
target_link_libraries(MultiProcTests PRIVATE MultiProc
|
||||
stdc++fs)
|
||||
catch_discover_tests(MultiProcTests PROPERTIES SKIP_REGULAR_EXPRESSION "HIP_SKIP_THIS_TEST")
|
||||
add_dependencies(build_tests MultiProcTests)
|
||||
endif()
|
||||
|
||||
@@ -15,26 +15,6 @@ namespace fs = std::experimental::filesystem;
|
||||
|
||||
#include <regex>
|
||||
|
||||
static std::string getValue(std::string option, const std::string& opt_str) {
|
||||
std::string s_opt = opt_str;
|
||||
return s_opt.erase(0, option.size());
|
||||
}
|
||||
|
||||
static std::string trimName(std::string input, char trim) {
|
||||
auto pos_ = input.find(trim);
|
||||
auto res = input;
|
||||
if (pos_ == std::string::npos) {
|
||||
input = "";
|
||||
} else {
|
||||
res = input.substr(0, pos_);
|
||||
input = input.substr(pos_);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
const std::vector<std::string>& TestContext::getDevices() const { return config_.devices; }
|
||||
const std::vector<std::string>& TestContext::getTargetId() const { return config_.targetId; }
|
||||
|
||||
void TestContext::detectOS() {
|
||||
#if (HT_WIN == 1)
|
||||
p_windows = true;
|
||||
@@ -57,18 +37,21 @@ void TestContext::fillConfig() {
|
||||
(env_config != nullptr) ? env_config : "Not found, using default config");
|
||||
|
||||
// Check if path has been provided
|
||||
std::string def_config_json = "config.json";
|
||||
std::string config_str;
|
||||
if (env_config != nullptr) {
|
||||
config_str = env_config;
|
||||
} else {
|
||||
config_str = "config.json";
|
||||
config_str = def_config_json;
|
||||
}
|
||||
|
||||
fs::path config_path = config_str;
|
||||
if (config_path.has_parent_path()) {
|
||||
if (config_path.has_parent_path() && config_path.has_filename()) {
|
||||
config_.json_file = config_str;
|
||||
} else if (config_path.has_parent_path()) {
|
||||
config_.json_file = config_path / def_config_json;
|
||||
} else {
|
||||
config_.json_file = exe_path + config_str;
|
||||
config_.json_file = exe_path + def_config_json;
|
||||
}
|
||||
LogPrintf("Config file path: %s", config_.json_file.c_str());
|
||||
|
||||
@@ -79,37 +62,6 @@ void TestContext::fillConfig() {
|
||||
LogPrintf("%s", "Either Config or Os is unknown, this wont end well");
|
||||
abort();
|
||||
}
|
||||
|
||||
int deviceCount = 0;
|
||||
auto res = hipGetDeviceCount(&deviceCount);
|
||||
if (res != hipSuccess) {
|
||||
LogPrintf("HIP Device Count query failed with: %s", hipGetErrorString(res));
|
||||
abort();
|
||||
}
|
||||
if (deviceCount == 0) {
|
||||
LogPrintf("%s", "No hip devices found");
|
||||
abort();
|
||||
}
|
||||
config_.devices.reserve(deviceCount);
|
||||
for (int i = 0; i < deviceCount; i++) {
|
||||
hipDeviceProp_t props;
|
||||
res = hipGetDeviceProperties(&props, i);
|
||||
if (res != hipSuccess) {
|
||||
LogPrintf("HIP Device Count query failed with: %s", hipGetErrorString(res));
|
||||
abort();
|
||||
}
|
||||
if (amd) {
|
||||
std::string tid = std::string(props.gcnArchName);
|
||||
config_.targetId.push_back(tid);
|
||||
std::string dev = trimName(tid, ':');
|
||||
config_.devices.push_back(dev);
|
||||
} else if (nvidia) {
|
||||
config_.devices.push_back(std::string(props.name));
|
||||
} else {
|
||||
LogPrintf("%s", "Unknown platform");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TestContext::TestContext(int argc, char** argv) {
|
||||
@@ -122,6 +74,7 @@ TestContext::TestContext(int argc, char** argv) {
|
||||
}
|
||||
|
||||
void TestContext::setExePath(int argc, char** argv) {
|
||||
if (argc == 0) return;
|
||||
fs::path p = std::string(argv[0]);
|
||||
if (p.has_filename()) p.remove_filename();
|
||||
exe_path = p.string();
|
||||
@@ -168,8 +121,6 @@ bool TestContext::parseJsonFile() {
|
||||
|
||||
picojson::value v;
|
||||
std::string err;
|
||||
const char* json_end =
|
||||
picojson::parse(v, json_str.data(), json_str.data() + json_str.size(), &err);
|
||||
if (err.size() > 1) {
|
||||
LogPrintf("Error from PicoJson: %s", err.data());
|
||||
return false;
|
||||
|
||||
@@ -6,7 +6,7 @@ int main(int argc, char** argv) {
|
||||
auto& context = TestContext::get(argc, argv);
|
||||
if (context.skipTest()) {
|
||||
// CTest uses this regex to figure out if the test has been skipped
|
||||
std::cout << "HIP_SKIP_THIS_TEST" << context.getCurrentTest() << std::endl;
|
||||
std::cout << "HIP_SKIP_THIS_TEST" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
return Catch::Session().run(argc, argv);
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
#pragma once
|
||||
#include "hip_test_common.hh"
|
||||
|
||||
namespace HipTest {
|
||||
template <typename T>
|
||||
size_t checkVectors(T* A, T* B, T* Out, size_t N, T (*F)(T a, T b), bool expectMatch = true,
|
||||
bool reportMismatch = true) {
|
||||
size_t mismatchCount = 0;
|
||||
size_t firstMismatch = 0;
|
||||
size_t mismatchesToPrint = 10;
|
||||
for (size_t i = 0; i < N; i++) {
|
||||
T expected = F(A[i], B[i]);
|
||||
if (Out[i] != expected) {
|
||||
if (mismatchCount == 0) {
|
||||
firstMismatch = i;
|
||||
}
|
||||
mismatchCount++;
|
||||
if ((mismatchCount <= mismatchesToPrint) && expectMatch) {
|
||||
INFO("Mismatch at " << i << " Computed: " << Out[i] << " Expeted: " << expected);
|
||||
CHECK(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (reportMismatch) {
|
||||
if (expectMatch) {
|
||||
if (mismatchCount) {
|
||||
INFO(mismatchCount << " Mismatches First Mismatch at index : " << firstMismatch);
|
||||
REQUIRE(false);
|
||||
}
|
||||
} else {
|
||||
if (mismatchCount == 0) {
|
||||
INFO("Expected Mismatch but not found any");
|
||||
REQUIRE(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mismatchCount;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t checkVectorADD(T* A_h, T* B_h, T* result_H, size_t N, bool expectMatch = true,
|
||||
bool reportMismatch = true) {
|
||||
return checkVectors<T>(
|
||||
A_h, B_h, result_H, N, [](T a, T b) { return a + b; }, expectMatch, reportMismatch);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void checkTest(T* expected_H, T* result_H, size_t N, bool expectMatch = true) {
|
||||
checkVectors<T>(
|
||||
expected_H, expected_H, result_H, N,
|
||||
[](T a, T b) {
|
||||
assert(a == b);
|
||||
return a;
|
||||
},
|
||||
expectMatch);
|
||||
}
|
||||
|
||||
|
||||
// Setters and Memory Management
|
||||
|
||||
template <typename T> void setDefaultData(size_t numElements, T* A_h, T* B_h, T* C_h) {
|
||||
// Initialize the host data:
|
||||
for (size_t i = 0; i < numElements; i++) {
|
||||
if (A_h) (A_h)[i] = 3.146f + i; // Pi
|
||||
if (B_h) (B_h)[i] = 1.618f + i; // Phi
|
||||
if (C_h) (C_h)[i] = 0.0f + i;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool initArraysForHost(T** A_h, T** B_h, T** C_h, size_t N, bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK(hipHostMalloc((void**)A_h, Nbytes));
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK(hipHostMalloc((void**)B_h, Nbytes));
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK(hipHostMalloc((void**)C_h, Nbytes));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
*A_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*A_h != NULL);
|
||||
}
|
||||
|
||||
if (B_h) {
|
||||
*B_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*B_h != NULL);
|
||||
}
|
||||
|
||||
if (C_h) {
|
||||
*C_h = (T*)malloc(Nbytes);
|
||||
REQUIRE(*C_h != NULL);
|
||||
}
|
||||
}
|
||||
|
||||
setDefaultData(N, A_h ? *A_h : NULL, B_h ? *B_h : NULL, C_h ? *C_h : NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool initArrays(T** A_d, T** B_d, T** C_d, T** A_h, T** B_h, T** C_h, size_t N,
|
||||
bool usePinnedHost = false) {
|
||||
size_t Nbytes = N * sizeof(T);
|
||||
|
||||
if (A_d) {
|
||||
HIPCHECK(hipMalloc(A_d, Nbytes));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipMalloc(B_d, Nbytes));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipMalloc(C_d, Nbytes));
|
||||
}
|
||||
|
||||
return initArraysForHost(A_h, B_h, C_h, N, usePinnedHost);
|
||||
}
|
||||
|
||||
template <typename T> bool freeArraysForHost(T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (usePinnedHost) {
|
||||
if (A_h) {
|
||||
HIPCHECK(hipHostFree(A_h));
|
||||
}
|
||||
if (B_h) {
|
||||
HIPCHECK(hipHostFree(B_h));
|
||||
}
|
||||
if (C_h) {
|
||||
HIPCHECK(hipHostFree(C_h));
|
||||
}
|
||||
} else {
|
||||
if (A_h) {
|
||||
free(A_h);
|
||||
}
|
||||
if (B_h) {
|
||||
free(B_h);
|
||||
}
|
||||
if (C_h) {
|
||||
free(C_h);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool freeArrays(T* A_d, T* B_d, T* C_d, T* A_h, T* B_h, T* C_h, bool usePinnedHost) {
|
||||
if (A_d) {
|
||||
HIPCHECK(hipFree(A_d));
|
||||
}
|
||||
if (B_d) {
|
||||
HIPCHECK(hipFree(B_d));
|
||||
}
|
||||
if (C_d) {
|
||||
HIPCHECK(hipFree(C_d));
|
||||
}
|
||||
|
||||
return freeArraysForHost(A_h, B_h, C_h, usePinnedHost);
|
||||
}
|
||||
} // namespace HipTest
|
||||
@@ -1,2 +1,16 @@
|
||||
#pragma once
|
||||
#include "hip_test_context.hh"
|
||||
#include <catch.hpp>
|
||||
|
||||
#define HIP_PRINT_STATUS(status) INFO(hipGetErrorName(status) << " at line: " << __LINE__);
|
||||
|
||||
#define HIPCHECK(error) \
|
||||
{ \
|
||||
hipError_t localError = error; \
|
||||
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
|
||||
INFO("Error: " << hipGetErrorString(localError) << " Code: " << localError << " Str: " \
|
||||
<< #error << " In File: " << __FILE__ << " At line: " << __LINE__); \
|
||||
REQUIRE(false); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
@@ -34,12 +34,9 @@ static int _log_enable = (std::getenv("HT_LOG_ENABLE") ? 1 : 0);
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
typedef struct Config_ {
|
||||
std::string json_file; // Json file
|
||||
std::string platform; // amd/nvidia
|
||||
std::vector<std::string> devices; // gfx906, etc
|
||||
std::vector<std::string> targetId; // Target Ids, only for AMD, gfx906:sramecc+:xnack-
|
||||
std::string os; // windows/linux
|
||||
} Config;
|
||||
|
||||
@@ -73,8 +70,6 @@ class TestContext {
|
||||
bool isNvidia() const;
|
||||
bool isAmd() const;
|
||||
bool skipTest() const;
|
||||
const std::vector<std::string>& getDevices() const;
|
||||
const std::vector<std::string>& getTargetId() const;
|
||||
|
||||
const std::string& getCurrentTest() const { return current_test; }
|
||||
std::string currentPath();
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
#pragma once
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
namespace HipTest {
|
||||
template <typename T> __global__ void vectorADD(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void vectorADDReverse(const T* A_d, const T* B_d, T* C_d, size_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + B_d[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void addCount(const T* A_d, T* C_d, size_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (size_t i = offset; i < NELEM; i += stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void addCountReverse(const T* A_d, T* C_d, int64_t NELEM, int count) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
// Deliberately do this in an inefficient way to increase kernel runtime
|
||||
for (int i = 0; i < count; i++) {
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = A_d[i] + (T)count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T> __global__ void memsetReverse(T* C_d, T val, int64_t NELEM) {
|
||||
size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
|
||||
size_t stride = blockDim.x * gridDim.x;
|
||||
|
||||
for (int64_t i = NELEM - stride + offset; i >= 0; i -= stride) {
|
||||
C_d[i] = val;
|
||||
}
|
||||
}
|
||||
} // namespace HipTest
|
||||
@@ -0,0 +1,13 @@
|
||||
# AMD Tests
|
||||
set(LINUX_TEST_SRC
|
||||
hipMallocConcurrency.cc
|
||||
childMalloc.cc
|
||||
)
|
||||
|
||||
if(UNIX)
|
||||
# Create shared lib of all tests
|
||||
add_library(MultiProc SHARED EXCLUDE_FROM_ALL ${LINUX_TEST_SRC})
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_tests MultiProc)
|
||||
endif()
|
||||
@@ -0,0 +1,62 @@
|
||||
#include <hip_test_common.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
|
||||
#ifdef __linux__
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/wait.h>
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
|
||||
bool testMallocFromChild() {
|
||||
int fd[2];
|
||||
pid_t childpid;
|
||||
bool testResult = false;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
childpid = fork();
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &testResult, sizeof(testResult));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
return testResult;
|
||||
|
||||
} else if (!childpid) { // Child
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
char* A_d = nullptr;
|
||||
hipError_t ret = hipMalloc(&A_d, 1024);
|
||||
|
||||
printf("hipMalloc returned : %s\n", hipGetErrorString(ret));
|
||||
if (ret == hipSuccess)
|
||||
testResult = true;
|
||||
else
|
||||
testResult = false;
|
||||
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &testResult, sizeof(testResult));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("ChildMalloc") {
|
||||
auto res = testMallocFromChild();
|
||||
REQUIRE(res == true);
|
||||
}
|
||||
@@ -0,0 +1,188 @@
|
||||
#include <sys/types.h>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_kernels.hh>
|
||||
#ifdef __linux__
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <atomic>
|
||||
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
unsigned blocksPerCU = 6; // to hide latency
|
||||
unsigned threadsPerBlock = 256;
|
||||
|
||||
|
||||
unsigned setNumBlocks(unsigned blocksPerCU, unsigned threadsPerBlock, size_t N) {
|
||||
int device;
|
||||
HIPCHECK(hipGetDevice(&device));
|
||||
hipDeviceProp_t props;
|
||||
HIPCHECK(hipGetDeviceProperties(&props, device));
|
||||
|
||||
unsigned blocks = props.multiProcessorCount * blocksPerCU;
|
||||
if (blocks * threadsPerBlock > N) {
|
||||
blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Validates data consitency on supplied gpu
|
||||
*/
|
||||
bool validateMemoryOnGPU(int gpu, bool concurOnOneGPU = false) {
|
||||
size_t Nbytes = N * sizeof(int);
|
||||
int *A_d, *B_d, *C_d;
|
||||
int *A_h, *B_h, *C_h;
|
||||
size_t prevAvl, prevTot, curAvl, curTot;
|
||||
bool TestPassed = true;
|
||||
|
||||
HIPCHECK(hipSetDevice(gpu));
|
||||
HIPCHECK(hipMemGetInfo(&prevAvl, &prevTot));
|
||||
printf("tgs allocating..\n");
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
|
||||
|
||||
unsigned blocks = setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
||||
|
||||
HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
|
||||
static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
|
||||
|
||||
HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
if (!HipTest::checkVectorADD(A_h, B_h, C_h, N)) {
|
||||
printf("Validation PASSED for gpu %d from pid %d\n", gpu, getpid());
|
||||
} else {
|
||||
printf("%s : Validation FAILED for gpu %d from pid %d\n", __func__, gpu, getpid());
|
||||
TestPassed &= false;
|
||||
}
|
||||
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
HIPCHECK(hipMemGetInfo(&curAvl, &curTot));
|
||||
|
||||
if (!concurOnOneGPU && (prevAvl != curAvl || prevTot != curTot)) {
|
||||
// In concurrent calls on one GPU, we cannot verify leaking in this way
|
||||
printf(
|
||||
"%s : Memory allocation mismatch observed."
|
||||
"Possible memory leak.\n",
|
||||
__func__);
|
||||
TestPassed &= false;
|
||||
}
|
||||
|
||||
return TestPassed;
|
||||
}
|
||||
|
||||
|
||||
#if 1
|
||||
/**
|
||||
* Fetches Gpu device count
|
||||
*/
|
||||
void getDeviceCount1(int* pdevCnt) {
|
||||
#ifdef __linux__
|
||||
int fd[2], val = 0;
|
||||
pid_t childpid;
|
||||
|
||||
// create pipe descriptors
|
||||
pipe(fd);
|
||||
|
||||
// disable visible_devices env from shell
|
||||
unsetenv("ROCR_VISIBLE_DEVICES");
|
||||
unsetenv("HIP_VISIBLE_DEVICES");
|
||||
|
||||
childpid = fork();
|
||||
|
||||
if (childpid > 0) { // Parent
|
||||
close(fd[1]);
|
||||
// parent will wait to read the device cnt
|
||||
read(fd[0], &val, sizeof(val));
|
||||
|
||||
// close the read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
// wait for child exit
|
||||
wait(NULL);
|
||||
|
||||
*pdevCnt = val;
|
||||
} else if (!childpid) { // Child
|
||||
int devCnt = 1;
|
||||
// writing only, no need for read-descriptor
|
||||
close(fd[0]);
|
||||
|
||||
HIPCHECK(hipGetDeviceCount(&devCnt));
|
||||
// send the value on the write-descriptor:
|
||||
write(fd[1], &devCnt, sizeof(devCnt));
|
||||
|
||||
// close the write descriptor:
|
||||
close(fd[1]);
|
||||
exit(0);
|
||||
} else { // failure
|
||||
*pdevCnt = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
HIPCHECK(hipGetDeviceCount(pdevCnt));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
TEST_CASE("hipMallocChild_Concurrency_MultiGpu") {
|
||||
bool TestPassed = false;
|
||||
#ifdef __linux__
|
||||
// Parallel execution on multiple gpus from different child processes
|
||||
int devCnt = 1, pid = 0;
|
||||
|
||||
// Get GPU count
|
||||
getDeviceCount1(&devCnt);
|
||||
|
||||
// Spawn child for each GPU
|
||||
for (int gpu = 0; gpu < devCnt; gpu++) {
|
||||
if ((pid = fork()) < 0) {
|
||||
INFO("Child_Concurrency_MultiGpu : fork() returned error" << pid);
|
||||
REQUIRE(false);
|
||||
|
||||
} else if (!pid) { // Child process
|
||||
bool TestPassedChild = false;
|
||||
TestPassedChild = validateMemoryOnGPU(gpu);
|
||||
|
||||
if (TestPassedChild) {
|
||||
printf("returning exit(1) for success\n");
|
||||
exit(1); // child exit with success status
|
||||
} else {
|
||||
printf("Child_Concurrency_MultiGpu : childpid %d failed\n", getpid());
|
||||
exit(2); // child exit with failure status
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parent shall wait for child to complete
|
||||
int cnt = 0;
|
||||
|
||||
for (int i = 0; i < devCnt; i++) {
|
||||
int pidwait = 0, exitStatus;
|
||||
pidwait = wait(&exitStatus);
|
||||
|
||||
printf("exitStatus for iter:%d is %d\n", i, exitStatus);
|
||||
if (pidwait < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (WEXITSTATUS(exitStatus) == 1) cnt++;
|
||||
}
|
||||
|
||||
if (cnt && (cnt == devCnt)) TestPassed = true;
|
||||
|
||||
#else
|
||||
INFO("Test hipMallocChild_Concurrency_MultiGpu skipped on non-linux");
|
||||
#endif
|
||||
REQUIRE(TestPassed == true);
|
||||
}
|
||||
@@ -1,9 +1,18 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
floatMath.cc
|
||||
)
|
||||
|
||||
# AMD only tests
|
||||
set(AMD_TEST_SRC
|
||||
vectorTypesDevice.cc
|
||||
)
|
||||
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
set(TEST_SRC ${TEST_SRC} ${AMD_TEST_SRC})
|
||||
endif()
|
||||
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(DeviceLibs SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
#include <iostream>
|
||||
|
||||
template <typename T> __global__ void add(T* a, T* b, T* c, size_t size) {
|
||||
int i = threadIdx.x;
|
||||
c[i] = a[i] + b[i];
|
||||
size_t i = threadIdx.x;
|
||||
if (i < size) c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, double) {
|
||||
auto addKernel = add<TestType>;
|
||||
auto size = GENERATE(as<size_t>{}, 100, 500, 1000);
|
||||
TestType *d_a, *d_b, *d_c;
|
||||
auto res = hipMalloc(&d_a, sizeof(TestType) * size);
|
||||
@@ -18,7 +17,7 @@ TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, d
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
std::vector<TestType> a, b, c;
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
a.push_back(i + 1);
|
||||
b.push_back(i + 1);
|
||||
c.push_back(2 * (i + 1));
|
||||
@@ -29,7 +28,7 @@ TEMPLATE_TEST_CASE("Add Kernel", "[kernel][add]", int, long, float, long long, d
|
||||
res = hipMemcpy(d_b, b.data(), sizeof(TestType) * size, hipMemcpyHostToDevice);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
hipLaunchKernelGGL(addKernel, 1, size, 0, 0, d_a, d_b, d_c, size);
|
||||
hipLaunchKernelGGL(add<TestType>, 1, size, 0, 0, d_a, d_b, d_c, size);
|
||||
|
||||
res = hipMemcpy(a.data(), d_c, sizeof(TestType) * size, hipMemcpyDeviceToHost);
|
||||
REQUIRE(res == hipSuccess);
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
# Common Tests - Test independent of all platforms
|
||||
set(TEST_SRC
|
||||
# AMD Tests
|
||||
set(AMD_TEST_SRC
|
||||
saxpy.cc
|
||||
)
|
||||
|
||||
# Set source File properties
|
||||
set_source_files_properties(saxpy.cc PROPERTIES COMPILE_FLAGS " -std=c++14 ")
|
||||
set_source_files_properties(test.cc PROPERTIES COMPILE_FLAGS " -std=c++17 ")
|
||||
if(HIP_PLATFORM MATCHES "amd")
|
||||
# Create shared lib of all tests
|
||||
add_library(RTC SHARED EXCLUDE_FROM_ALL ${AMD_TEST_SRC})
|
||||
|
||||
# Create shared lib of all tests
|
||||
add_library(RTC SHARED EXCLUDE_FROM_ALL ${TEST_SRC})
|
||||
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_tests RTC)
|
||||
# Add dependency on build_tests to build it on this custom target
|
||||
add_dependencies(build_tests RTC)
|
||||
endif()
|
||||
|
||||
+33
-34
@@ -15,7 +15,7 @@ static constexpr auto NUM_THREADS{128};
|
||||
static constexpr auto NUM_BLOCKS{32};
|
||||
|
||||
static constexpr auto saxpy{
|
||||
R"(
|
||||
R"(
|
||||
#include <hip/hip_runtime.h>
|
||||
extern "C"
|
||||
__global__
|
||||
@@ -23,8 +23,7 @@ void saxpy(float a, float* x, float* y, float* out, size_t n)
|
||||
{
|
||||
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < n) {
|
||||
out[tid] = a * x[tid] + y[tid] ;
|
||||
|
||||
out[tid] = a * x[tid] + y[tid];
|
||||
}
|
||||
|
||||
}
|
||||
@@ -72,42 +71,42 @@ TEST_CASE("saxpy", "[hiprtc][saxpy]") {
|
||||
unique_ptr<float[]> hX{new float[n]};
|
||||
unique_ptr<float[]> hY{new float[n]};
|
||||
unique_ptr<float[]> hOut{new float[n]};
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
hX[i] = static_cast<float>(i);
|
||||
hY[i] = static_cast<float>(i * 2);
|
||||
}
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
hX[i] = static_cast<float>(i);
|
||||
hY[i] = static_cast<float>(i * 2);
|
||||
}
|
||||
|
||||
hipDeviceptr_t dX, dY, dOut;
|
||||
hipMalloc(&dX, bufferSize);
|
||||
hipMalloc(&dY, bufferSize);
|
||||
hipMalloc(&dOut, bufferSize);
|
||||
hipMemcpyHtoD(dX, hX.get(), bufferSize);
|
||||
hipMemcpyHtoD(dY, hY.get(), bufferSize);
|
||||
hipDeviceptr_t dX, dY, dOut;
|
||||
hipMalloc(&dX, bufferSize);
|
||||
hipMalloc(&dY, bufferSize);
|
||||
hipMalloc(&dOut, bufferSize);
|
||||
hipMemcpyHtoD(dX, hX.get(), bufferSize);
|
||||
hipMemcpyHtoD(dY, hY.get(), bufferSize);
|
||||
|
||||
struct {
|
||||
float a_;
|
||||
hipDeviceptr_t b_;
|
||||
hipDeviceptr_t c_;
|
||||
hipDeviceptr_t d_;
|
||||
size_t e_;
|
||||
} args{a, dX, dY, dOut, n};
|
||||
struct {
|
||||
float a_;
|
||||
hipDeviceptr_t b_;
|
||||
hipDeviceptr_t c_;
|
||||
hipDeviceptr_t d_;
|
||||
size_t e_;
|
||||
} args{a, dX, dY, dOut, n};
|
||||
|
||||
auto size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args,
|
||||
HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
auto size = sizeof(args);
|
||||
void* config[] = {HIP_LAUNCH_PARAM_BUFFER_POINTER, &args, HIP_LAUNCH_PARAM_BUFFER_SIZE, &size,
|
||||
HIP_LAUNCH_PARAM_END};
|
||||
|
||||
hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1,
|
||||
0, nullptr, nullptr, config);
|
||||
hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
|
||||
hipModuleLaunchKernel(kernel, NUM_BLOCKS, 1, 1, NUM_THREADS, 1, 1, 0, nullptr, nullptr, config);
|
||||
hipMemcpyDtoH(hOut.get(), dOut, bufferSize);
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
REQUIRE(fabs(a * hX[i] + hY[i] - hOut[i]) > fabs(hOut[i]) * 1e-6);
|
||||
}
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
INFO("For " << i << " Value: " << fabs(a * hX[i] + hY[i] - hOut[i])
|
||||
<< " with: " << (fabs(hOut[i] * 1.0f) * 1e-6));
|
||||
REQUIRE(fabs(a * hX[i] + hY[i] - hOut[i]) <= fabs(hOut[i]) * 1e-6);
|
||||
}
|
||||
|
||||
hipFree(dX);
|
||||
hipFree(dY);
|
||||
hipFree(dOut);
|
||||
hipFree(dX);
|
||||
hipFree(dY);
|
||||
hipFree(dOut);
|
||||
|
||||
hipModuleUnload(module);
|
||||
hipModuleUnload(module);
|
||||
}
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
TEST_CASE("cpp17 test") {
|
||||
constexpr auto l = []() { return 2 * 10 * 30; };
|
||||
REQUIRE(l() == 600);
|
||||
}
|
||||
Reference in New Issue
Block a user