diff --git a/CMakeLists.txt b/CMakeLists.txt index 3202833350..c86e2d2d58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -503,7 +503,7 @@ list(APPEND HIP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp) # Create a custom target that updates git_version.cpp and executes whenever rccl is built add_custom_target(git_version_check COMMENT "Updating git_version.cpp if necessary" - COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/git_version.cmake + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/git_version.cmake VERBATIM ) @@ -583,6 +583,7 @@ if(DEMANGLE_DIR) target_compile_definitions(rccl PRIVATE "HAVE_DECL_BASENAME=1") endif() if(${hipcc_version_string} VERSION_GREATER_EQUAL "6.1.33591") + set(LL128_ENABLED ON) target_compile_definitions(rccl PRIVATE ENABLE_LL128) message(STATUS "RCCL LL128 protocol enabled") endif() @@ -730,6 +731,12 @@ if(BUILD_TESTS) rocm_package_setup_component(clients) rocm_package_setup_client_component(tests PACKAGE_NAME unittests) add_subdirectory(test) + + add_custom_command(TARGET rccl POST_BUILD + COMMENT "Extracting metadata from librccl.so" + COMMAND COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/extract_metadata.cmake + VERBATIM + ) endif() rocm_create_package( diff --git a/cmake/scripts/extract_metadata.cmake b/cmake/scripts/extract_metadata.cmake new file mode 100644 index 0000000000..ca025ab033 --- /dev/null +++ b/cmake/scripts/extract_metadata.cmake @@ -0,0 +1,55 @@ +# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +## List the objects for each gfx architecture +execute_process( COMMAND roc-obj-ls librccl.so + RESULT_VARIABLE list_result + OUTPUT_VARIABLE cmd_output +) + +if(list_result EQUAL 0) + ## Convert cmd output to list of lines + string(REGEX REPLACE "\n$" "" cmd_output "${cmd_output}") + string(REPLACE "\n" ";" cmd_output "${cmd_output}") + + ## Extract file paths for the selected gfx archs + foreach(line ${cmd_output}) + if(line MATCHES "(gfx90a|gfx940|gfx941|gfx942)") + string(REGEX MATCH "\\file://(.*)" file_match ${line}) + if(file_match) + list(APPEND file_paths ${file_match}) + endif() + endif() + endforeach() + + ## Extract objects from files + foreach(file ${file_paths}) + execute_process( + COMMAND roc-obj-extract ${file} + RESULT_VARIABLE extraction_result + ) + if(NOT extraction_result EQUAL 0) + message(WARNING "Could not extract objects from ${file}") + endif() + endforeach() +else() + ## We don't want to stop building unit-tests if this command fails. + message(WARNING "Command failed with error code ${result}") +endif() \ No newline at end of file diff --git a/cmake/git_version.cmake b/cmake/scripts/git_version.cmake similarity index 100% rename from cmake/git_version.cmake rename to cmake/scripts/git_version.cmake diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5108e4dce4..48e57ac0ee 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -68,9 +68,19 @@ if(BUILD_TESTS) endif() add_executable(rccl-UnitTests ${COMMON_SOURCE_FILES} ${TEST_SOURCE_FILES}) + + ## Set rccl-UnitTests include directories target_include_directories(rccl-UnitTests PRIVATE ${ROCM_PATH} ${GTEST_INCLUDE_DIRS}) - target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header + target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/hipify/src/include) # for rccl_bfloat16.h + + ## Set rccl-UnitTests compile definitions + if(LL128_ENABLED) + target_compile_definitions(rccl-UnitTests PRIVATE ENABLE_LL128) + endif() + target_compile_definitions(rccl-UnitTests PRIVATE ROCM_PATH="${ROCM_PATH}") + + ## Set rccl-UnitTests linked libraries target_link_libraries(rccl-UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES}) target_link_libraries(rccl-UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64) target_link_libraries(rccl-UnitTests PRIVATE Threads::Threads) diff --git a/test/StandaloneTests.cpp b/test/StandaloneTests.cpp index 4db1dcacf7..2e80b865f5 100644 --- a/test/StandaloneTests.cpp +++ b/test/StandaloneTests.cpp @@ -9,7 +9,11 @@ #include "StandaloneUtils.hpp" -namespace RcclUnitTesting { +namespace RcclUnitTesting +{ + /** + * \brief Verify that each device is assigned to the right rank using ncclCommSplit API. + * ******************************************************************************************/ TEST(Standalone, SplitComms_RankCheck) { // Check for multi-gpu @@ -52,6 +56,9 @@ namespace RcclUnitTesting { NCCLCHECK(ncclCommDestroy(comm)); } + /** + * \brief Creates a communicator for each device and gathers them all in one rank. + * ******************************************************************************************/ TEST(Standalone, SplitComms_OneColor) { // Check for multi-gpu @@ -93,6 +100,9 @@ namespace RcclUnitTesting { NCCLCHECK(ncclCommDestroy(comm)); } + /** + * \brief Creates a communicator for each device and reduces them into (numDevices / 2) ranks. + * ******************************************************************************************/ TEST(Standalone, SplitComms_Reduce) { // Check for multi-gpu @@ -140,7 +150,10 @@ namespace RcclUnitTesting { for (auto& comm : comms) NCCLCHECK(ncclCommDestroy(comm)); } - + + /** + * \brief Verify there is no regression in timing for each protocol [LL, LL128, Simple] + * ******************************************************************************************/ TEST(Standalone, RegressionTiming) { // timing @@ -241,4 +254,41 @@ namespace RcclUnitTesting { else unsetenv("NCCL_PROTO"); } + + /** + * \brief Verify rccl generic kernel stack size for each gfx architecture is less than the + * expected MAX_STACK_SIZE. + * ******************************************************************************************/ + TEST(Standalone, StackSize) { + const char* mainKernel = "rccl_main_kernel"; + + // Look for the .co files + std::vector coFileList = splitString(executeCommand("find ../ -type f -name \"*.co\""), '\n'); + + // Check if the .co files exist in the build directory + if (coFileList.empty()) + GTEST_SKIP() << "Skipping... Could not found required files in the build directory."; + + for (const auto& file : coFileList) { + // Store the output in a list + std::string cmd = std::string(ROCM_PATH) + "/llvm/bin/llvm-readelf --notes " + file; + std::vector metadata = splitString(executeCommand(cmd.c_str()), '\n'); + + // Skip if llvm is not installed + if (metadata.empty()) + GTEST_SKIP() << "Skipping... llvm is not found."; + + // Parse metadata from file and store it for each arch + ArchInfo archInfo = parseMetadata(metadata); + + // iterate over each archs kernels + for (const auto& kernel : archInfo.kernels) { + if (kernel.name.find(mainKernel) != std::string::npos) { + // Kernel stack size should be less than or equal to the maxStackSize value + printf("[ INFO ] Arch: %s Kernel: %s Size: %d\n", archInfo.archName.c_str(), kernel.name.c_str(), kernel.privateSegmentFixedSize); + EXPECT_LE(kernel.privateSegmentFixedSize, archInfo.archName == "gfx90a" ? MAX_STACK_SIZE_gfx90a : MAX_STACK_SIZE); + } + } + } + } } diff --git a/test/common/StandaloneUtils.hpp b/test/common/StandaloneUtils.hpp index 5be85c0c33..7fc63e70df 100644 --- a/test/common/StandaloneUtils.hpp +++ b/test/common/StandaloneUtils.hpp @@ -1,6 +1,10 @@ #ifndef STANDALONE_UTILS_H #define STANDALONE_UTILS_H +#include +#include +#include + #define HIPCALL(cmd) \ do { \ hipError_t error = (cmd); \ @@ -20,4 +24,83 @@ } \ } while(0) +#define MAX_STACK_SIZE 112 + +#ifdef ENABLE_LL128 +#define MAX_STACK_SIZE_gfx90a 288 +#else +#define MAX_STACK_SIZE_gfx90a MAX_STACK_SIZE +#endif + +struct KernelInfo { + std::string name; + int privateSegmentFixedSize = 0; +}; + +struct ArchInfo { + std::string archName; + std::vector kernels; +}; + +std::string executeCommand(const char* cmd) { + std::string result; + FILE* pipe = popen(cmd, "r"); + + if (!pipe) { + std::cerr << "Error executing command: " << cmd << std::endl; + return result; + } + + char buffer[128]; + while (!feof(pipe)) { + if (fgets(buffer, 128, pipe) != NULL) { + result += buffer; + } + } + + pclose(pipe); + return result; +} + +std::vector splitString(const std::string& str, char delimiter) { + std::vector result; + std::istringstream iss(str); + + std::string line; + while(std::getline(iss, line, delimiter)) { + result.push_back(line); + } + + return result; +} + + +ArchInfo parseMetadata(const std::vector& list) { + ArchInfo archInfo; + KernelInfo currKernelInfo; + + std::regex amdhsaTargetRegex("amdhsa.target:\\s+(?:'?)amdgcn-amd-amdhsa--(\\w+)(?:'?)"); + std::regex kernelNameRegex("\\.name:\\s+(\\w+)"); + std::regex privateSegmentSizeRegex("\\.private_segment_fixed_size:\\s+(\\d+)"); + + for (const auto& line : list) { + std::smatch match; + + if (std::regex_search(line, match, amdhsaTargetRegex)) { + archInfo.archName = match[1]; + } else if (std::regex_search(line, match, kernelNameRegex)) { + currKernelInfo.name = match[1]; + } else if (std::regex_search(line, match, privateSegmentSizeRegex)) { + currKernelInfo.privateSegmentFixedSize = std::stoi(match[1]); + } + + if (!currKernelInfo.name.empty() && currKernelInfo.privateSegmentFixedSize != 0) { + archInfo.kernels.push_back(currKernelInfo); + currKernelInfo = {}; // Empty kernelInfo + } + } + + return archInfo; +} + #endif \ No newline at end of file