Merge pull request #61 from gilbertlee-amd/master
Reducing scope of linker flags to rccl target to avoid warnings. Changing to tagged version of googletests Checking for chrpath
This commit is contained in:
+6
-5
@@ -68,11 +68,6 @@ list(APPEND CMAKE_PREFIX_PATH
|
||||
|
||||
find_package(hip REQUIRED)
|
||||
|
||||
link_libraries(-amdgpu-target=gfx803
|
||||
-amdgpu-target=gfx900
|
||||
-amdgpu-target=gfx906
|
||||
-hc-function-calls)
|
||||
|
||||
option(BUILD_SHARED_LIBS "Build as a shared library" ON)
|
||||
|
||||
configure_file(src/nccl.h.in ${PROJECT_BINARY_DIR}/rccl.h)
|
||||
@@ -138,6 +133,12 @@ if(TRACE)
|
||||
add_definitions(-DENABLE_TRACE)
|
||||
endif()
|
||||
|
||||
target_link_libraries(rccl
|
||||
PRIVATE -amdgpu-target=gfx803
|
||||
PRIVATE -amdgpu-target=gfx900
|
||||
PRIVATE -amdgpu-target=gfx906
|
||||
PRIVATE -hc-function-calls)
|
||||
|
||||
if(TARGET hip::device)
|
||||
target_link_libraries(rccl PRIVATE hip::device)
|
||||
target_link_libraries(rccl INTERFACE hip::host)
|
||||
|
||||
@@ -4,6 +4,12 @@ if(BUILD_TESTS)
|
||||
|
||||
message("Going to build unit tests (Installed in /test/UnitTests)")
|
||||
|
||||
# chrpath is required to properly set rpath for the UnitTests executable
|
||||
find_program(CHRPATH chrpath)
|
||||
if(NOT CHRPATH)
|
||||
message(FATAL_ERROR "chrpath is required for UnitTests. Please install (e.g. sudo apt-get install chrpath)")
|
||||
endif()
|
||||
|
||||
# OpenMP is used to drive GPUs (one per thread)
|
||||
find_package(OpenMP REQUIRED)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
|
||||
@@ -5,7 +5,7 @@ project(googletest-download NONE)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(googletest
|
||||
GIT_REPOSITORY https://github.com/google/googletest.git
|
||||
GIT_TAG master
|
||||
GIT_TAG release-1.8.1
|
||||
SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src"
|
||||
BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build"
|
||||
CONFIGURE_COMMAND ""
|
||||
|
||||
@@ -154,7 +154,10 @@ namespace CorrectnessTests
|
||||
// Create streams
|
||||
streams.resize(numDevices);
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
HIP_CALL(hipStreamCreate(&streams[i]));
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up per TestTuple
|
||||
@@ -219,6 +222,16 @@ namespace CorrectnessTests
|
||||
free(arrayI1);
|
||||
}
|
||||
|
||||
void Synchronize() const
|
||||
{
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
HIP_CALL(hipStreamSynchronize(streams[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void ValidateResults(Dataset const& dataset) const
|
||||
{
|
||||
int8_t* outputI1 = (int8_t *)malloc(dataset.NumBytes());
|
||||
|
||||
+22
-23
@@ -32,32 +32,31 @@ namespace CorrectnessTests
|
||||
}
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results
|
||||
ValidateResults(dataset);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(AllGatherCorrectnessSweep,
|
||||
AllGatherCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator (not used)
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(AllGatherCorrectnessSweep,
|
||||
AllGatherCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator (not used)
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
+22
-23
@@ -28,32 +28,31 @@ namespace CorrectnessTests
|
||||
}
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results
|
||||
ValidateResults(dataset);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(AllReduceCorrectnessSweep,
|
||||
AllReduceCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(AllReduceCorrectnessSweep,
|
||||
AllReduceCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
+23
-23
@@ -34,34 +34,34 @@ namespace CorrectnessTests
|
||||
root, comms[i], streams[i]);
|
||||
}
|
||||
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results
|
||||
ValidateResults(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(BroadcastCorrectnessSweep,
|
||||
BroadcastCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator is not used
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(BroadcastCorrectnessSweep,
|
||||
BroadcastCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator is not used
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
@@ -43,7 +43,6 @@ namespace CorrectnessTests
|
||||
size_t const elemCount = numElements / numDevices;
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
ncclAllGather((int8_t *)datasets[0].inputs[i] + (i * byteCount),
|
||||
datasets[0].outputs[i], elemCount,
|
||||
dataType, comms[i], streams[i]);
|
||||
@@ -52,7 +51,6 @@ namespace CorrectnessTests
|
||||
// AllReduce
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
ncclAllReduce(datasets[1].inputs[i], datasets[1].outputs[i],
|
||||
numElements, dataType, op, comms[i], streams[i]);
|
||||
}
|
||||
@@ -60,7 +58,6 @@ namespace CorrectnessTests
|
||||
// Broadcast
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
ncclBroadcast(datasets[2].inputs[i],
|
||||
datasets[2].outputs[i],
|
||||
numElements, dataType,
|
||||
@@ -70,7 +67,6 @@ namespace CorrectnessTests
|
||||
// Reduce
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
{
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
ncclReduce(datasets[3].inputs[i],
|
||||
datasets[3].outputs[i],
|
||||
numElements, dataType, op,
|
||||
@@ -84,15 +80,13 @@ namespace CorrectnessTests
|
||||
(int8_t *)datasets[4].outputs[i] + (i * byteCount),
|
||||
elemCount, dataType, op,
|
||||
comms[i], streams[i]);
|
||||
HIP_CALL(hipSetDevice(i));
|
||||
}
|
||||
|
||||
// Signal end of group call
|
||||
ncclGroupEnd();
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results for each collective in the group
|
||||
for (int i = 0; i < 5; i++)
|
||||
@@ -101,25 +95,25 @@ namespace CorrectnessTests
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GroupCallsCorrectnessSweep,
|
||||
GroupCallsCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator (not used)
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(GroupCallsCorrectnessSweep,
|
||||
GroupCallsCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator (not used)
|
||||
testing::Values(ncclSum),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
+22
-23
@@ -35,33 +35,32 @@ namespace CorrectnessTests
|
||||
}
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results
|
||||
ValidateResults(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ReduceCorrectnessSweep,
|
||||
ReduceCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(ReduceCorrectnessSweep,
|
||||
ReduceCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(1024, 1048576),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
@@ -33,33 +33,33 @@ namespace CorrectnessTests
|
||||
comms[i], streams[i]);
|
||||
}
|
||||
|
||||
|
||||
// Wait for reduction to complete
|
||||
for (int i = 0; i < numDevices; i++)
|
||||
hipStreamSynchronize(streams[i]);
|
||||
Synchronize();
|
||||
|
||||
// Check results
|
||||
ValidateResults(dataset);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(ReduceScatterCorrectnessSweep,
|
||||
ReduceScatterCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
INSTANTIATE_TEST_CASE_P(ReduceScatterCorrectnessSweep,
|
||||
ReduceScatterCorrectnessTest,
|
||||
testing::Combine(
|
||||
// Reduction operator
|
||||
testing::Values(ncclSum, ncclProd, ncclMax, ncclMin),
|
||||
// Data types
|
||||
testing::Values(ncclInt8,
|
||||
ncclUint8,
|
||||
ncclInt32,
|
||||
ncclUint32,
|
||||
ncclInt64,
|
||||
ncclUint64,
|
||||
//ncclFloat16,
|
||||
ncclFloat32,
|
||||
ncclFloat64),
|
||||
// Number of elements
|
||||
testing::Values(3072, 3145728),
|
||||
// Number of devices
|
||||
testing::Values(2,3,4),
|
||||
// In-place or not
|
||||
testing::Values(false, true)));
|
||||
} // namespace
|
||||
|
||||
مرجع در شماره جدید
Block a user