Add stack size UT (#1081)

* Add stack size UT
This commit is contained in:
Bertan Dogancay
2024-02-12 17:56:15 -07:00
committad av GitHub
förälder 5669b0d7b6
incheckning dc2d486ba0
6 ändrade filer med 209 tillägg och 4 borttagningar
+8 -1
Visa fil
@@ -503,7 +503,7 @@ list(APPEND HIP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp)
# Create a custom target that updates git_version.cpp and executes whenever rccl is built
add_custom_target(git_version_check
COMMENT "Updating git_version.cpp if necessary"
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/git_version.cmake
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/git_version.cmake
VERBATIM
)
@@ -583,6 +583,7 @@ if(DEMANGLE_DIR)
target_compile_definitions(rccl PRIVATE "HAVE_DECL_BASENAME=1")
endif()
if(${hipcc_version_string} VERSION_GREATER_EQUAL "6.1.33591")
set(LL128_ENABLED ON)
target_compile_definitions(rccl PRIVATE ENABLE_LL128)
message(STATUS "RCCL LL128 protocol enabled")
endif()
@@ -730,6 +731,12 @@ if(BUILD_TESTS)
rocm_package_setup_component(clients)
rocm_package_setup_client_component(tests PACKAGE_NAME unittests)
add_subdirectory(test)
add_custom_command(TARGET rccl POST_BUILD
COMMENT "Extracting metadata from librccl.so"
COMMAND COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/extract_metadata.cmake
VERBATIM
)
endif()
rocm_create_package(
+55
Visa fil
@@ -0,0 +1,55 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
## List the objects for each gfx architecture
execute_process( COMMAND roc-obj-ls librccl.so
RESULT_VARIABLE list_result
OUTPUT_VARIABLE cmd_output
)
if(list_result EQUAL 0)
## Convert cmd output to list of lines
string(REGEX REPLACE "\n$" "" cmd_output "${cmd_output}")
string(REPLACE "\n" ";" cmd_output "${cmd_output}")
## Extract file paths for the selected gfx archs
foreach(line ${cmd_output})
if(line MATCHES "(gfx90a|gfx940|gfx941|gfx942)")
string(REGEX MATCH "\\file://(.*)" file_match ${line})
if(file_match)
list(APPEND file_paths ${file_match})
endif()
endif()
endforeach()
## Extract objects from files
foreach(file ${file_paths})
execute_process(
COMMAND roc-obj-extract ${file}
RESULT_VARIABLE extraction_result
)
if(NOT extraction_result EQUAL 0)
message(WARNING "Could not extract objects from ${file}")
endif()
endforeach()
else()
## We don't want to stop building unit-tests if this command fails.
message(WARNING "Command failed with error code ${result}")
endif()
+11 -1
Visa fil
@@ -68,9 +68,19 @@ if(BUILD_TESTS)
endif()
add_executable(rccl-UnitTests ${COMMON_SOURCE_FILES} ${TEST_SOURCE_FILES})
## Set rccl-UnitTests include directories
target_include_directories(rccl-UnitTests PRIVATE ${ROCM_PATH} ${GTEST_INCLUDE_DIRS})
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/hipify/src/include) # for rccl_bfloat16.h
## Set rccl-UnitTests compile definitions
if(LL128_ENABLED)
target_compile_definitions(rccl-UnitTests PRIVATE ENABLE_LL128)
endif()
target_compile_definitions(rccl-UnitTests PRIVATE ROCM_PATH="${ROCM_PATH}")
## Set rccl-UnitTests linked libraries
target_link_libraries(rccl-UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES})
target_link_libraries(rccl-UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64)
target_link_libraries(rccl-UnitTests PRIVATE Threads::Threads)
+52 -2
Visa fil
@@ -9,7 +9,11 @@
#include "StandaloneUtils.hpp"
namespace RcclUnitTesting {
namespace RcclUnitTesting
{
/**
* \brief Verify that each device is assigned to the right rank using ncclCommSplit API.
* ******************************************************************************************/
TEST(Standalone, SplitComms_RankCheck)
{
// Check for multi-gpu
@@ -52,6 +56,9 @@ namespace RcclUnitTesting {
NCCLCHECK(ncclCommDestroy(comm));
}
/**
* \brief Creates a communicator for each device and gathers them all in one rank.
* ******************************************************************************************/
TEST(Standalone, SplitComms_OneColor)
{
// Check for multi-gpu
@@ -93,6 +100,9 @@ namespace RcclUnitTesting {
NCCLCHECK(ncclCommDestroy(comm));
}
/**
* \brief Creates a communicator for each device and reduces them into (numDevices / 2) ranks.
* ******************************************************************************************/
TEST(Standalone, SplitComms_Reduce)
{
// Check for multi-gpu
@@ -140,7 +150,10 @@ namespace RcclUnitTesting {
for (auto& comm : comms)
NCCLCHECK(ncclCommDestroy(comm));
}
/**
* \brief Verify there is no regression in timing for each protocol [LL, LL128, Simple]
* ******************************************************************************************/
TEST(Standalone, RegressionTiming)
{
// timing
@@ -241,4 +254,41 @@ namespace RcclUnitTesting {
else
unsetenv("NCCL_PROTO");
}
/**
* \brief Verify rccl generic kernel stack size for each gfx architecture is less than the
* expected MAX_STACK_SIZE.
* ******************************************************************************************/
TEST(Standalone, StackSize) {
const char* mainKernel = "rccl_main_kernel";
// Look for the .co files
std::vector<std::string> coFileList = splitString(executeCommand("find ../ -type f -name \"*.co\""), '\n');
// Check if the .co files exist in the build directory
if (coFileList.empty())
GTEST_SKIP() << "Skipping... Could not found required files in the build directory.";
for (const auto& file : coFileList) {
// Store the output in a list
std::string cmd = std::string(ROCM_PATH) + "/llvm/bin/llvm-readelf --notes " + file;
std::vector<std::string> metadata = splitString(executeCommand(cmd.c_str()), '\n');
// Skip if llvm is not installed
if (metadata.empty())
GTEST_SKIP() << "Skipping... llvm is not found.";
// Parse metadata from file and store it for each arch
ArchInfo archInfo = parseMetadata(metadata);
// iterate over each archs kernels
for (const auto& kernel : archInfo.kernels) {
if (kernel.name.find(mainKernel) != std::string::npos) {
// Kernel stack size should be less than or equal to the maxStackSize value
printf("[ INFO ] Arch: %s Kernel: %s Size: %d\n", archInfo.archName.c_str(), kernel.name.c_str(), kernel.privateSegmentFixedSize);
EXPECT_LE(kernel.privateSegmentFixedSize, archInfo.archName == "gfx90a" ? MAX_STACK_SIZE_gfx90a : MAX_STACK_SIZE);
}
}
}
}
}
+83
Visa fil
@@ -1,6 +1,10 @@
#ifndef STANDALONE_UTILS_H
#define STANDALONE_UTILS_H
#include <iostream>
#include <cstdio>
#include <regex>
#define HIPCALL(cmd) \
do { \
hipError_t error = (cmd); \
@@ -20,4 +24,83 @@
} \
} while(0)
#define MAX_STACK_SIZE 112
#ifdef ENABLE_LL128
#define MAX_STACK_SIZE_gfx90a 288
#else
#define MAX_STACK_SIZE_gfx90a MAX_STACK_SIZE
#endif
struct KernelInfo {
std::string name;
int privateSegmentFixedSize = 0;
};
struct ArchInfo {
std::string archName;
std::vector<KernelInfo> kernels;
};
std::string executeCommand(const char* cmd) {
std::string result;
FILE* pipe = popen(cmd, "r");
if (!pipe) {
std::cerr << "Error executing command: " << cmd << std::endl;
return result;
}
char buffer[128];
while (!feof(pipe)) {
if (fgets(buffer, 128, pipe) != NULL) {
result += buffer;
}
}
pclose(pipe);
return result;
}
std::vector<std::string> splitString(const std::string& str, char delimiter) {
std::vector<std::string> result;
std::istringstream iss(str);
std::string line;
while(std::getline(iss, line, delimiter)) {
result.push_back(line);
}
return result;
}
ArchInfo parseMetadata(const std::vector<std::string>& list) {
ArchInfo archInfo;
KernelInfo currKernelInfo;
std::regex amdhsaTargetRegex("amdhsa.target:\\s+(?:'?)amdgcn-amd-amdhsa--(\\w+)(?:'?)");
std::regex kernelNameRegex("\\.name:\\s+(\\w+)");
std::regex privateSegmentSizeRegex("\\.private_segment_fixed_size:\\s+(\\d+)");
for (const auto& line : list) {
std::smatch match;
if (std::regex_search(line, match, amdhsaTargetRegex)) {
archInfo.archName = match[1];
} else if (std::regex_search(line, match, kernelNameRegex)) {
currKernelInfo.name = match[1];
} else if (std::regex_search(line, match, privateSegmentSizeRegex)) {
currKernelInfo.privateSegmentFixedSize = std::stoi(match[1]);
}
if (!currKernelInfo.name.empty() && currKernelInfo.privateSegmentFixedSize != 0) {
archInfo.kernels.push_back(currKernelInfo);
currKernelInfo = {}; // Empty kernelInfo
}
}
return archInfo;
}
#endif