From 0b973bcfc7ff654d622c7cb4993b94951f49acbc Mon Sep 17 00:00:00 2001 From: Avinash Kethineedi Date: Tue, 2 Sep 2025 11:37:49 -0400 Subject: [PATCH] tests: remove `rocthrust` and `rocprim` dependencies from free_list unit tests (#231) --- tests/unit_tests/CMakeLists.txt | 5 - tests/unit_tests/free_list_gtest.cpp | 178 +++++++++++++++++++-------- tests/unit_tests/free_list_gtest.hpp | 5 +- 3 files changed, 127 insertions(+), 61 deletions(-) diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index 3510ccd1d5..a85c57c0df 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -101,10 +101,6 @@ target_link_libraries( ############################################################################### # GTEST DEPENDENCY ############################################################################### -# These packages are required for the unit tests -find_package(rocprim REQUIRED PATHS /opt/rocm) -find_package(rocthrust REQUIRED PATHS /opt/rocm) - include(FetchContent) FetchContent_Declare( @@ -126,5 +122,4 @@ target_link_libraries( PRIVATE gtest gtest_main - roc::rocthrust ) diff --git a/tests/unit_tests/free_list_gtest.cpp b/tests/unit_tests/free_list_gtest.cpp index f468421181..2be4bd01b2 100644 --- a/tests/unit_tests/free_list_gtest.cpp +++ b/tests/unit_tests/free_list_gtest.cpp @@ -24,8 +24,6 @@ #include "free_list_gtest.hpp" -#include - #include "../src/util.hpp" using namespace rocshmem; @@ -76,16 +74,22 @@ TYPED_TEST(FreeListTestFixture, pop_empty_device) { using T = typename TestFixture::T; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; + bool *is_empty {nullptr}; + hip_allocator_.allocate(reinterpret_cast(&is_empty), + sizeof(bool)); + + CHECK_HIP(hipMemset(is_empty, 0, sizeof(bool))); FreeListProxy empty_list_proxy{}; FreeList* empty_free_list{empty_list_proxy.get()}; - thrust::device_vector is_empty(1); - rocshmem::pop_empty<<<1, 1>>>(empty_free_list, is_empty.data().get()); + rocshmem::pop_empty<<<1, 1>>>(empty_free_list, is_empty); CHECK_HIP(hipDeviceSynchronize()); EXPECT_TRUE(is_empty[0]); + + hip_allocator_.deallocate(is_empty); } TYPED_TEST(FreeListTestFixture, push_host_pop_device) { @@ -93,24 +97,30 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_device) { using T = typename TestFixture::T; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; - thrust::device_vector results(h_input.size()); + T *results {nullptr}; + bool *is_empty {nullptr}; + size_t size_bytes = sizeof(T) * h_input.size() + sizeof(bool); + hip_allocator_.allocate(reinterpret_cast(&results), + size_bytes); + + CHECK_HIP(hipMemset(results, 0, size_bytes)); + is_empty = reinterpret_cast(results + h_input.size()); const auto block_size = WF_SIZE; - rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(), - results.size()); + rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); - for (std::size_t i = 0; i < results.size(); i++) { + for (std::size_t i = 0; i < h_input.size(); i++) { EXPECT_EQ(results[i], h_input[i]); } - thrust::device_vector is_empty(1); - rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty.data().get()); + rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty); CHECK_HIP(hipDeviceSynchronize()); EXPECT_TRUE(is_empty[0]); + hip_allocator_.deallocate(results); } TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_device) { @@ -118,29 +128,41 @@ TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_device) { using T = typename TestFixture::T; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; - thrust::device_vector results(h_input.size()); + T *results {nullptr}; + bool *is_empty {nullptr}; + size_t size_bytes = sizeof(T) * h_input.size() + sizeof(bool); + hip_allocator_.allocate(reinterpret_cast(&results), + size_bytes); + + CHECK_HIP(hipMemset(results, 0, size_bytes)); + is_empty = reinterpret_cast(results + h_input.size()); const auto num_blocks = h_input.size(); const auto block_size = WF_SIZE; - rocshmem::pop_all<<>>(free_list, results.data().get(), - results.size()); + rocshmem::pop_all<<>>( + free_list, results, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); - // sort to guarantee that the ordering is correct - thrust::sort(results.begin(), results.end()); - thrust::sort(h_input.begin(), h_input.end()); + std::vector h_results(h_input.size()); + CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(), + hipMemcpyDeviceToHost)); - for (std::size_t i = 0; i < results.size(); i++) { - EXPECT_EQ(results[i], h_input[i]); + // sort to guarantee that the ordering is correct + std::sort(h_input.begin(), h_input.end()); + std::sort(h_results.begin(), h_results.end()); + + + for (std::size_t i = 0; i < h_results.size(); i++) { + EXPECT_EQ(h_results[i], h_input[i]); } - thrust::device_vector is_empty(1); - rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty.data().get()); + rocshmem::pop_empty<<<1, 1>>>(free_list, is_empty); CHECK_HIP(hipDeviceSynchronize()); EXPECT_TRUE(is_empty[0]); + hip_allocator_.deallocate(results); } TYPED_TEST(FreeListTestFixture, push_host_pop_push_device) { @@ -149,24 +171,39 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_push_device) { using FreeListType = FreeList; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; + T *results {nullptr}; + T *d_input {nullptr}; + bool *is_empty {nullptr}; + size_t size_bytes = 2 * sizeof(T) * h_input.size() + sizeof(bool); + hip_allocator_.allocate(reinterpret_cast(&results), + size_bytes); + + CHECK_HIP(hipMemset(results, 0, size_bytes)); + d_input = reinterpret_cast(results + h_input.size()); + is_empty = reinterpret_cast(d_input + h_input.size()); const auto block_size = WF_SIZE; - rocshmem::pop_all<<<1, block_size>>>(free_list, nullptr, 0); + CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(), + hipMemcpyHostToDevice)); + + rocshmem::pop_all<<<1, block_size>>>( + free_list, nullptr, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); - rocshmem::push_all<<<1, 1>>>(free_list, d_input.data().get(), d_input.size()); + rocshmem::push_all<<<1, block_size>>>(free_list, d_input, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); - thrust::device_vector results(d_input.size()); - rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(), - results.size()); + rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size()); + CHECK_HIP(hipDeviceSynchronize()); - for (std::size_t i = 0; i < results.size(); i++) { + for (std::size_t i = 0; i < h_input.size(); i++) { EXPECT_EQ(results[i], h_input[i]); } + + hip_allocator_.deallocate(results); } TYPED_TEST(FreeListTestFixture, push_host_pop_concurrent_push_device) { @@ -175,30 +212,48 @@ TYPED_TEST(FreeListTestFixture, push_host_pop_concurrent_push_device) { using FreeListType = FreeList; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; + T *results {nullptr}; + T *d_input {nullptr}; + size_t size_bytes = 2 * sizeof(T) * h_input.size(); + hip_allocator_.allocate(reinterpret_cast(&results), + size_bytes); + + CHECK_HIP(hipMemset(results, 0, size_bytes)); + d_input = reinterpret_cast(results + h_input.size()); const auto block_size = WF_SIZE; - rocshmem::pop_all<<<1, block_size>>>(free_list, nullptr, 0); + + CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(), + hipMemcpyHostToDevice)); + + rocshmem::pop_all<<<1, block_size>>>( + free_list, nullptr,h_input.size()); CHECK_HIP(hipDeviceSynchronize()); // Concurrently push all values const auto num_blocks = h_input.size(); rocshmem::push_all<<>>( - free_list, d_input.data().get(), d_input.size()); + free_list, d_input, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); - thrust::device_vector results(d_input.size()); - rocshmem::pop_all<<<1, block_size>>>(free_list, results.data().get(), - results.size()); + rocshmem::pop_all<<<1, block_size>>>(free_list, results, h_input.size()); + CHECK_HIP(hipDeviceSynchronize()); - // Sort to guarantee that the ordering is correct - thrust::sort(results.begin(), results.end()); - thrust::sort(h_input.begin(), h_input.end()); + std::vector h_results(h_input.size()); + CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(), + hipMemcpyDeviceToHost)); - for (std::size_t i = 0; i < results.size(); i++) { - EXPECT_EQ(results[i], h_input[i]); + // sort to guarantee that the ordering is correct + std::sort(h_input.begin(), h_input.end()); + std::sort(h_results.begin(), h_results.end()); + + for (std::size_t i = 0; i < h_results.size(); i++) { + EXPECT_EQ(h_results[i], h_input[i]); } + + hip_allocator_.deallocate(results); } TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_push_device) { @@ -207,29 +262,48 @@ TYPED_TEST(FreeListTestFixture, push_host_concurrent_pop_push_device) { using FreeListType = FreeList; auto& h_input = this->h_input; - auto& d_input = this->d_input; auto& free_list = this->free_list; + auto& hip_allocator_ = this->hip_allocator_; + + T *results {nullptr}; + T *d_input {nullptr}; + size_t size_bytes = 2 * sizeof(T) * h_input.size(); + hip_allocator_.allocate(reinterpret_cast(&results), + size_bytes); + + CHECK_HIP(hipMemset(results, 0, size_bytes)); + d_input = reinterpret_cast(results + h_input.size()); + + CHECK_HIP(hipMemcpy(d_input, h_input.data(), sizeof(T) * h_input.size(), + hipMemcpyHostToDevice)); const auto block_size = WF_SIZE; - rocshmem::pop_all<<<1, block_size>>>(free_list, nullptr, 0); + rocshmem::pop_all<<<1, block_size>>>( + free_list, nullptr, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); // Concurrently push all values const auto num_blocks = h_input.size(); rocshmem::push_all<<>>( - free_list, d_input.data().get(), d_input.size()); + free_list, d_input, h_input.size()); CHECK_HIP(hipDeviceSynchronize()); // Concurrently pop all values - thrust::device_vector results(d_input.size()); - rocshmem::pop_all<<>>(free_list, results.data().get(), - results.size()); + rocshmem::pop_all<<>>( + free_list, results, h_input.size()); + CHECK_HIP(hipDeviceSynchronize()); - // Sort to guarantee that the ordering is correct - thrust::sort(results.begin(), results.end()); - thrust::sort(h_input.begin(), h_input.end()); + std::vector h_results(h_input.size()); + CHECK_HIP(hipMemcpy(h_results.data(), results, sizeof(T) * h_input.size(), + hipMemcpyDeviceToHost)); - for (std::size_t i = 0; i < results.size(); i++) { - EXPECT_EQ(results[i], h_input[i]); + // sort to guarantee that the ordering is correct + std::sort(h_input.begin(), h_input.end()); + std::sort(h_results.begin(), h_results.end()); + + for (std::size_t i = 0; i < h_results.size(); i++) { + EXPECT_EQ(h_results[i], h_input[i]); } + + hip_allocator_.deallocate(results); } diff --git a/tests/unit_tests/free_list_gtest.hpp b/tests/unit_tests/free_list_gtest.hpp index 8850ad375b..0541a4c434 100644 --- a/tests/unit_tests/free_list_gtest.hpp +++ b/tests/unit_tests/free_list_gtest.hpp @@ -25,8 +25,6 @@ #ifndef ROCSHMEM_FREE_LIST_GTEST_HPP #define ROCSHMEM_FREE_LIST_GTEST_HPP -#include - #include #include @@ -41,7 +39,6 @@ class FreeListTestFixture : public ::testing::Test { public: FreeListTestFixture() : h_input(num_elements) { std::iota(h_input.begin(), h_input.end(), T{1}); - d_input = h_input; free_list = list_proxy.get(); } @@ -52,9 +49,9 @@ class FreeListTestFixture : public ::testing::Test { using T = ValueType; using Allocator = HIPAllocator; + Allocator hip_allocator_ {}; const std::size_t num_elements{32}; std::vector h_input{}; - thrust::device_vector d_input{}; FreeListProxy list_proxy{}; FreeList* free_list{};