коммит произвёл
GitHub
родитель
5669b0d7b6
Коммит
dc2d486ba0
@@ -503,7 +503,7 @@ list(APPEND HIP_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/git_version.cpp)
|
||||
# Create a custom target that updates git_version.cpp and executes whenever rccl is built
|
||||
add_custom_target(git_version_check
|
||||
COMMENT "Updating git_version.cpp if necessary"
|
||||
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/git_version.cmake
|
||||
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/git_version.cmake
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
@@ -583,6 +583,7 @@ if(DEMANGLE_DIR)
|
||||
target_compile_definitions(rccl PRIVATE "HAVE_DECL_BASENAME=1")
|
||||
endif()
|
||||
if(${hipcc_version_string} VERSION_GREATER_EQUAL "6.1.33591")
|
||||
set(LL128_ENABLED ON)
|
||||
target_compile_definitions(rccl PRIVATE ENABLE_LL128)
|
||||
message(STATUS "RCCL LL128 protocol enabled")
|
||||
endif()
|
||||
@@ -730,6 +731,12 @@ if(BUILD_TESTS)
|
||||
rocm_package_setup_component(clients)
|
||||
rocm_package_setup_client_component(tests PACKAGE_NAME unittests)
|
||||
add_subdirectory(test)
|
||||
|
||||
add_custom_command(TARGET rccl POST_BUILD
|
||||
COMMENT "Extracting metadata from librccl.so"
|
||||
COMMAND COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/scripts/extract_metadata.cmake
|
||||
VERBATIM
|
||||
)
|
||||
endif()
|
||||
|
||||
rocm_create_package(
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
## List the objects for each gfx architecture
|
||||
execute_process( COMMAND roc-obj-ls librccl.so
|
||||
RESULT_VARIABLE list_result
|
||||
OUTPUT_VARIABLE cmd_output
|
||||
)
|
||||
|
||||
if(list_result EQUAL 0)
|
||||
## Convert cmd output to list of lines
|
||||
string(REGEX REPLACE "\n$" "" cmd_output "${cmd_output}")
|
||||
string(REPLACE "\n" ";" cmd_output "${cmd_output}")
|
||||
|
||||
## Extract file paths for the selected gfx archs
|
||||
foreach(line ${cmd_output})
|
||||
if(line MATCHES "(gfx90a|gfx940|gfx941|gfx942)")
|
||||
string(REGEX MATCH "\\file://(.*)" file_match ${line})
|
||||
if(file_match)
|
||||
list(APPEND file_paths ${file_match})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
## Extract objects from files
|
||||
foreach(file ${file_paths})
|
||||
execute_process(
|
||||
COMMAND roc-obj-extract ${file}
|
||||
RESULT_VARIABLE extraction_result
|
||||
)
|
||||
if(NOT extraction_result EQUAL 0)
|
||||
message(WARNING "Could not extract objects from ${file}")
|
||||
endif()
|
||||
endforeach()
|
||||
else()
|
||||
## We don't want to stop building unit-tests if this command fails.
|
||||
message(WARNING "Command failed with error code ${result}")
|
||||
endif()
|
||||
@@ -68,9 +68,19 @@ if(BUILD_TESTS)
|
||||
endif()
|
||||
|
||||
add_executable(rccl-UnitTests ${COMMON_SOURCE_FILES} ${TEST_SOURCE_FILES})
|
||||
|
||||
## Set rccl-UnitTests include directories
|
||||
target_include_directories(rccl-UnitTests PRIVATE ${ROCM_PATH} ${GTEST_INCLUDE_DIRS})
|
||||
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
|
||||
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/include) # for generated rccl.h header
|
||||
target_include_directories(rccl-UnitTests PRIVATE ${PROJECT_BINARY_DIR}/hipify/src/include) # for rccl_bfloat16.h
|
||||
|
||||
## Set rccl-UnitTests compile definitions
|
||||
if(LL128_ENABLED)
|
||||
target_compile_definitions(rccl-UnitTests PRIVATE ENABLE_LL128)
|
||||
endif()
|
||||
target_compile_definitions(rccl-UnitTests PRIVATE ROCM_PATH="${ROCM_PATH}")
|
||||
|
||||
## Set rccl-UnitTests linked libraries
|
||||
target_link_libraries(rccl-UnitTests PRIVATE ${GTEST_BOTH_LIBRARIES})
|
||||
target_link_libraries(rccl-UnitTests PRIVATE hip::host hip::device hsa-runtime64::hsa-runtime64)
|
||||
target_link_libraries(rccl-UnitTests PRIVATE Threads::Threads)
|
||||
|
||||
@@ -9,7 +9,11 @@
|
||||
|
||||
#include "StandaloneUtils.hpp"
|
||||
|
||||
namespace RcclUnitTesting {
|
||||
namespace RcclUnitTesting
|
||||
{
|
||||
/**
|
||||
* \brief Verify that each device is assigned to the right rank using ncclCommSplit API.
|
||||
* ******************************************************************************************/
|
||||
TEST(Standalone, SplitComms_RankCheck)
|
||||
{
|
||||
// Check for multi-gpu
|
||||
@@ -52,6 +56,9 @@ namespace RcclUnitTesting {
|
||||
NCCLCHECK(ncclCommDestroy(comm));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates a communicator for each device and gathers them all in one rank.
|
||||
* ******************************************************************************************/
|
||||
TEST(Standalone, SplitComms_OneColor)
|
||||
{
|
||||
// Check for multi-gpu
|
||||
@@ -93,6 +100,9 @@ namespace RcclUnitTesting {
|
||||
NCCLCHECK(ncclCommDestroy(comm));
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Creates a communicator for each device and reduces them into (numDevices / 2) ranks.
|
||||
* ******************************************************************************************/
|
||||
TEST(Standalone, SplitComms_Reduce)
|
||||
{
|
||||
// Check for multi-gpu
|
||||
@@ -140,7 +150,10 @@ namespace RcclUnitTesting {
|
||||
for (auto& comm : comms)
|
||||
NCCLCHECK(ncclCommDestroy(comm));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* \brief Verify there is no regression in timing for each protocol [LL, LL128, Simple]
|
||||
* ******************************************************************************************/
|
||||
TEST(Standalone, RegressionTiming)
|
||||
{
|
||||
// timing
|
||||
@@ -241,4 +254,41 @@ namespace RcclUnitTesting {
|
||||
else
|
||||
unsetenv("NCCL_PROTO");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Verify rccl generic kernel stack size for each gfx architecture is less than the
|
||||
* expected MAX_STACK_SIZE.
|
||||
* ******************************************************************************************/
|
||||
TEST(Standalone, StackSize) {
|
||||
const char* mainKernel = "rccl_main_kernel";
|
||||
|
||||
// Look for the .co files
|
||||
std::vector<std::string> coFileList = splitString(executeCommand("find ../ -type f -name \"*.co\""), '\n');
|
||||
|
||||
// Check if the .co files exist in the build directory
|
||||
if (coFileList.empty())
|
||||
GTEST_SKIP() << "Skipping... Could not found required files in the build directory.";
|
||||
|
||||
for (const auto& file : coFileList) {
|
||||
// Store the output in a list
|
||||
std::string cmd = std::string(ROCM_PATH) + "/llvm/bin/llvm-readelf --notes " + file;
|
||||
std::vector<std::string> metadata = splitString(executeCommand(cmd.c_str()), '\n');
|
||||
|
||||
// Skip if llvm is not installed
|
||||
if (metadata.empty())
|
||||
GTEST_SKIP() << "Skipping... llvm is not found.";
|
||||
|
||||
// Parse metadata from file and store it for each arch
|
||||
ArchInfo archInfo = parseMetadata(metadata);
|
||||
|
||||
// iterate over each archs kernels
|
||||
for (const auto& kernel : archInfo.kernels) {
|
||||
if (kernel.name.find(mainKernel) != std::string::npos) {
|
||||
// Kernel stack size should be less than or equal to the maxStackSize value
|
||||
printf("[ INFO ] Arch: %s Kernel: %s Size: %d\n", archInfo.archName.c_str(), kernel.name.c_str(), kernel.privateSegmentFixedSize);
|
||||
EXPECT_LE(kernel.privateSegmentFixedSize, archInfo.archName == "gfx90a" ? MAX_STACK_SIZE_gfx90a : MAX_STACK_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
#ifndef STANDALONE_UTILS_H
|
||||
#define STANDALONE_UTILS_H
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <regex>
|
||||
|
||||
#define HIPCALL(cmd) \
|
||||
do { \
|
||||
hipError_t error = (cmd); \
|
||||
@@ -20,4 +24,83 @@
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define MAX_STACK_SIZE 112
|
||||
|
||||
#ifdef ENABLE_LL128
|
||||
#define MAX_STACK_SIZE_gfx90a 288
|
||||
#else
|
||||
#define MAX_STACK_SIZE_gfx90a MAX_STACK_SIZE
|
||||
#endif
|
||||
|
||||
struct KernelInfo {
|
||||
std::string name;
|
||||
int privateSegmentFixedSize = 0;
|
||||
};
|
||||
|
||||
struct ArchInfo {
|
||||
std::string archName;
|
||||
std::vector<KernelInfo> kernels;
|
||||
};
|
||||
|
||||
std::string executeCommand(const char* cmd) {
|
||||
std::string result;
|
||||
FILE* pipe = popen(cmd, "r");
|
||||
|
||||
if (!pipe) {
|
||||
std::cerr << "Error executing command: " << cmd << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
char buffer[128];
|
||||
while (!feof(pipe)) {
|
||||
if (fgets(buffer, 128, pipe) != NULL) {
|
||||
result += buffer;
|
||||
}
|
||||
}
|
||||
|
||||
pclose(pipe);
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> splitString(const std::string& str, char delimiter) {
|
||||
std::vector<std::string> result;
|
||||
std::istringstream iss(str);
|
||||
|
||||
std::string line;
|
||||
while(std::getline(iss, line, delimiter)) {
|
||||
result.push_back(line);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
ArchInfo parseMetadata(const std::vector<std::string>& list) {
|
||||
ArchInfo archInfo;
|
||||
KernelInfo currKernelInfo;
|
||||
|
||||
std::regex amdhsaTargetRegex("amdhsa.target:\\s+(?:'?)amdgcn-amd-amdhsa--(\\w+)(?:'?)");
|
||||
std::regex kernelNameRegex("\\.name:\\s+(\\w+)");
|
||||
std::regex privateSegmentSizeRegex("\\.private_segment_fixed_size:\\s+(\\d+)");
|
||||
|
||||
for (const auto& line : list) {
|
||||
std::smatch match;
|
||||
|
||||
if (std::regex_search(line, match, amdhsaTargetRegex)) {
|
||||
archInfo.archName = match[1];
|
||||
} else if (std::regex_search(line, match, kernelNameRegex)) {
|
||||
currKernelInfo.name = match[1];
|
||||
} else if (std::regex_search(line, match, privateSegmentSizeRegex)) {
|
||||
currKernelInfo.privateSegmentFixedSize = std::stoi(match[1]);
|
||||
}
|
||||
|
||||
if (!currKernelInfo.name.empty() && currKernelInfo.privateSegmentFixedSize != 0) {
|
||||
archInfo.kernels.push_back(currKernelInfo);
|
||||
currKernelInfo = {}; // Empty kernelInfo
|
||||
}
|
||||
}
|
||||
|
||||
return archInfo;
|
||||
}
|
||||
|
||||
#endif
|
||||
Ссылка в новой задаче
Block a user