Disable Colltrace for --fast option (#778)
* Disable Colltrace for --fast option * Limit nprocs for CI
Tento commit je obsažen v:
@@ -17,7 +17,7 @@ def runCI =
|
||||
def prj = new rocProject('rccl', 'Extended')
|
||||
|
||||
prj.timeout.test = 600
|
||||
prj.paths.build_command = './install.sh -t --npkit-enable '
|
||||
prj.paths.build_command = './install.sh -t --npkit-enable --limit-nprocs'
|
||||
|
||||
// Define test architectures, optional rocm version argument is available
|
||||
def nodes = new dockerNodes(nodeDetails, jobName, prj)
|
||||
|
||||
@@ -18,7 +18,7 @@ def runCI =
|
||||
def prj = new rocProject('rccl', 'PreCheckin')
|
||||
|
||||
prj.timeout.test = 300
|
||||
prj.paths.build_command = './install.sh -t --fast'
|
||||
prj.paths.build_command = './install.sh -t --fast --limit-nprocs'
|
||||
|
||||
// Define test architectures, optional rocm version argument is available
|
||||
def nodes = new dockerNodes(nodeDetails, jobName, prj)
|
||||
|
||||
@@ -12,7 +12,7 @@ def runCI =
|
||||
def prj = new rocProject('rccl', 'Static Library PreCheckin')
|
||||
|
||||
prj.timeout.test = 1440
|
||||
prj.paths.build_command = './install.sh -t --static'
|
||||
prj.paths.build_command = './install.sh -t --static --limit-nprocs'
|
||||
|
||||
def nodes = new dockerNodes(nodeDetails, jobName, prj)
|
||||
|
||||
|
||||
+47
-30
@@ -13,11 +13,12 @@ function display_help()
|
||||
echo " -d|--dependencies Install RCCL depdencencies"
|
||||
echo " --debug Build debug library"
|
||||
echo " --disable_backtrace Build without custom backtrace support"
|
||||
echo " --fast Quick-build RCCL (local gpu arch only, no backtrace support)"
|
||||
echo " --disable-colltrace Build without collective trace"
|
||||
echo " --fast Quick-build RCCL (local gpu arch only, no backtrace, and collective trace support)"
|
||||
echo " -h|--help Prints this help message"
|
||||
echo " -i|--install Install RCCL library (see --prefix argument below)"
|
||||
echo " -l|--limit-nprocs Limit the number of procs to 16 while building"
|
||||
echo " --local_gpu_only Only compile for local GPU architecture"
|
||||
echo " --max-jobs Use nproc instead of default number of 16"
|
||||
echo " --no_clean Don't delete files if they already exist"
|
||||
echo " --npkit-enable Compile with npkit enabled"
|
||||
echo " -p|--package_build Build RCCL package"
|
||||
@@ -38,6 +39,7 @@ ROCM_PATH=${ROCM_PATH:="/opt/rocm"}
|
||||
|
||||
build_address_sanitizer=false
|
||||
build_allreduce_only=false
|
||||
collective_trace=true
|
||||
install_dependencies=false
|
||||
build_release=true
|
||||
build_bfd=true
|
||||
@@ -53,7 +55,7 @@ build_static=false
|
||||
build_tests=false
|
||||
build_verbose=0
|
||||
time_trace=false
|
||||
enable_all_jobs=false
|
||||
enable_all_jobs=true
|
||||
enable_ninja=""
|
||||
|
||||
# #################################################
|
||||
@@ -63,7 +65,7 @@ enable_ninja=""
|
||||
# check if we have a modern version of getopt that can handle whitespace and long parameters
|
||||
getopt -T
|
||||
if [[ $? -eq 4 ]]; then
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,fast,help,install,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,max-jobs,verbose --options hidptrs -- "$@")
|
||||
GETOPT_PARSE=$(getopt --name "${0}" --longoptions address-sanitizer,build_allreduce_only,dependencies,debug,disable_backtrace,disable-colltrace,fast,help,install,limit-nprocs,local_gpu_only,no_clean,npkit-enable,package_build,prefix:,rm-legacy-include-dir,run_tests_all,run_tests_quick,tests_build,time-trace,verbose --options hidptrs -- "$@")
|
||||
else
|
||||
echo "Need a new version of getopt"
|
||||
exit 1
|
||||
@@ -78,27 +80,28 @@ eval set -- "${GETOPT_PARSE}"
|
||||
|
||||
while true; do
|
||||
case "${1}" in
|
||||
--address-sanitizer) build_address_sanitizer=true; shift ;;
|
||||
--build_allreduce_only) build_allreduce_only=true; shift ;;
|
||||
-d | --dependencies) install_dependencies=true; shift ;;
|
||||
--debug) build_release=false; shift ;;
|
||||
--disable_backtrace) build_bfd=false; shift ;;
|
||||
--fast) build_bfd=false; build_local_gpu_only=true; shift ;;
|
||||
-h | --help) display_help; exit 0 ;;
|
||||
-i | --install) install_library=true; shift ;;
|
||||
--local_gpu_only) build_local_gpu_only=true; shift ;;
|
||||
--max-jobs) enable_all_jobs=true; shift ;;
|
||||
--no_clean) clean_build=false; shift ;;
|
||||
--npkit-enable) npkit_enabled=true; shift ;;
|
||||
-p | --package_build) build_package=true; shift ;;
|
||||
--prefix) install_prefix=${2} shift 2 ;;
|
||||
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
|
||||
-r | --run_tests_quick) run_tests=true; shift ;;
|
||||
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
|
||||
--static) build_static=true; shift ;;
|
||||
-t | --tests_build) build_tests=true; shift ;;
|
||||
--time-trace) time_trace=true; shift ;;
|
||||
--verbose) build_verbose=1; shift ;;
|
||||
--address-sanitizer) build_address_sanitizer=true; shift ;;
|
||||
--build_allreduce_only) build_allreduce_only=true; shift ;;
|
||||
-d | --dependencies) install_dependencies=true; shift ;;
|
||||
--debug) build_release=false; shift ;;
|
||||
--disable_backtrace) build_bfd=false; shift ;;
|
||||
--disable-colltrace) collective_trace=false; shift ;;
|
||||
--fast) build_bfd=false; build_local_gpu_only=true; collective_trace=false; shift ;;
|
||||
-h | --help) display_help; exit 0 ;;
|
||||
-i | --install) install_library=true; shift ;;
|
||||
-l | --limit-nprocs) enable_all_jobs=false; shift ;;
|
||||
--local_gpu_only) build_local_gpu_only=true; shift ;;
|
||||
--no_clean) clean_build=false; shift ;;
|
||||
--npkit-enable) npkit_enabled=true; shift ;;
|
||||
-p | --package_build) build_package=true; shift ;;
|
||||
--prefix) install_prefix=${2} shift 2 ;;
|
||||
--rm-legacy-include-dir) build_freorg_bkwdcomp=false; shift ;;
|
||||
-r | --run_tests_quick) run_tests=true; shift ;;
|
||||
--run_tests_all) run_tests=true; run_tests_all=true; shift ;;
|
||||
--static) build_static=true; shift ;;
|
||||
-t | --tests_build) build_tests=true; shift ;;
|
||||
--time-trace) time_trace=true; shift ;;
|
||||
--verbose) build_verbose=1; shift ;;
|
||||
--) shift ; break ;;
|
||||
*) echo "Unexpected command line parameter received; aborting";
|
||||
exit 1
|
||||
@@ -199,6 +202,10 @@ if [[ "${build_static}" == true ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DBUILD_SHARED_LIBS=OFF"
|
||||
fi
|
||||
|
||||
# Disable collective trace
|
||||
if [[ "${collective_trace}" == false ]]; then
|
||||
cmake_common_options="${cmake_common_options} -DCOLLTRACE=OFF"
|
||||
fi
|
||||
|
||||
# Install dependencies
|
||||
if ($install_dependencies); then
|
||||
@@ -344,9 +351,19 @@ if ($run_tests); then
|
||||
fi
|
||||
fi
|
||||
|
||||
if ($time_trace) then
|
||||
cd ../../tools/time-trace
|
||||
chmod +x ./rccl-TimeTrace.sh
|
||||
echo "Generating RCCL-compile-timeline.html..."
|
||||
./rccl-TimeTrace.sh
|
||||
if ($time_trace); then
|
||||
search_dir="../../"
|
||||
time_trace_dir=$(find "$search_dir" -type d -name "time-trace" -print -quit)
|
||||
|
||||
if [ "$time_trace_dir" ]; then
|
||||
time_trace_script="$time_trace_dir/rccl-TimeTrace.sh"
|
||||
if [ -x "$time_trace_script" ]; then
|
||||
echo "Generating RCCL-compile-timeline.html..."
|
||||
(cd "$time_trace_dir" && ./rccl-TimeTrace.sh)
|
||||
else
|
||||
echo "Error: Unable to execute $time_trace_script. Make sure the file has the correct permissions."
|
||||
fi
|
||||
else
|
||||
echo "Error: time-trace folder not found in $search_dir."
|
||||
fi
|
||||
fi
|
||||
@@ -341,6 +341,10 @@ class ncclFunction {
|
||||
collTrace->type = ncclCollTraceDataType; \
|
||||
}
|
||||
#else
|
||||
#define traceColl(launch_type)
|
||||
#define traceKernelLaunch(firstLaunch)
|
||||
#define traceKernelEnd()
|
||||
#define traceAbort()
|
||||
#define traceData(data2, data4, data8_0, data8_1)
|
||||
#endif
|
||||
|
||||
@@ -519,7 +523,6 @@ __forceinline__ __device__ void ncclKernel(
|
||||
}
|
||||
#endif
|
||||
if (tid == 0) __insert_timestamp(__LINE__);
|
||||
|
||||
if (COLLTRACE && tid == 0) traceKernelLaunch(true);
|
||||
|
||||
while (true) {
|
||||
@@ -563,6 +566,7 @@ __forceinline__ __device__ void ncclKernel(
|
||||
if (COLLTRACE && tid == 0) traceColl(false);
|
||||
}
|
||||
if (COLLTRACE && tid == 0) traceKernelEnd();
|
||||
|
||||
#ifdef ENABLE_PROFILING
|
||||
if (ncclShmem.comm.devProf->seq < PROFILE_NUM_LAUNCHES) {
|
||||
__synclds();
|
||||
@@ -572,6 +576,7 @@ __forceinline__ __device__ void ncclKernel(
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ENABLE_COLLTRACE
|
||||
#define IMPL_COLL_KERN(func, algo, proto, devredop, type, fIndex) \
|
||||
__launch_bounds__(NCCL_MAX_NTHREADS, 1) \
|
||||
__global__ void NCCL_KERN_NAME(func, algo, proto, devredop, type)(struct ncclDevComm* comm, uint64_t channelMask, struct ncclWork* workHead) { \
|
||||
@@ -582,6 +587,13 @@ __launch_bounds__(NCCL_MAX_NTHREADS, 1) \
|
||||
__global__ void NCCL_KERN_NAME_DEBUG(func, algo, proto, devredop, type)(struct ncclDevComm* comm, uint64_t channelMask, struct ncclWork* workHead) { \
|
||||
ncclKernel<ncclFunc##func, type, Func##devredop<type>, NCCL_ALGO_##algo, NCCL_PROTO_##proto, fIndex, true>(comm, channelMask, workHead); \
|
||||
}
|
||||
#else
|
||||
#define IMPL_COLL_KERN(func, algo, proto, devredop, type, fIndex) \
|
||||
__launch_bounds__(NCCL_MAX_NTHREADS, 1) \
|
||||
__global__ void NCCL_KERN_NAME(func, algo, proto, devredop, type)(struct ncclDevComm* comm, uint64_t channelMask, struct ncclWork* workHead) { \
|
||||
ncclKernel<ncclFunc##func, type, Func##devredop<type>, NCCL_ALGO_##algo, NCCL_PROTO_##proto, fIndex, false>(comm, channelMask, workHead); \
|
||||
}
|
||||
#endif
|
||||
|
||||
// Examples : AllReduce, RING, LL, Sum, uint8
|
||||
/* Functions for aggregation case */
|
||||
|
||||
@@ -28,11 +28,18 @@ struct ncclKernelMatch {
|
||||
};
|
||||
|
||||
typedef void(*ncclKern_t)(struct ncclDevComm* comm, uint64_t channelMask, struct ncclWork* workHead);
|
||||
|
||||
// Must be consistent with the ncclFuncSet enum
|
||||
#ifdef ENABLE_COLLTRACE
|
||||
static ncclKernelMatch const ncclKerns[2] = {
|
||||
{(void *)NCCL_KERN_NAME(SendRecv, RING, SIMPLE, Sum, int8_t), true},
|
||||
{(void *)NCCL_KERN_NAME_DEBUG(SendRecv, RING, SIMPLE, Sum, int8_t), true},
|
||||
};
|
||||
#else
|
||||
static ncclKernelMatch const ncclKerns[1] = {
|
||||
{(void*)NCCL_KERN_NAME(SendRecv, RING, SIMPLE, Sum, int8_t), true}
|
||||
};
|
||||
#endif
|
||||
|
||||
static ncclResult_t computeColl(struct ncclInfo* info /* input */, int* workFuncIndex, struct ncclWorkElem* work, struct ncclProxyOp* proxyOp /* output */);
|
||||
|
||||
|
||||
@@ -224,7 +224,10 @@ static float ncclTopoXGMISpeed(int gcn) {
|
||||
return gcn == 910 ? MI200_XGMI_WIDTH : VEGA_XGMI_WIDTH;
|
||||
}
|
||||
|
||||
#define ncclGetKernelIndex(p_comm) \
|
||||
((p_comm)->collTraceThread ? 1 : 0)
|
||||
#if ENABLE_COLLTRACE
|
||||
#define ncclGetKernelIndex(p_comm) ((p_comm)->collTraceThread ? 1 : 0)
|
||||
#else
|
||||
#define ncclGetKernelIndex(p_comm) (0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,9 +4,6 @@ import pandas as pd
|
||||
import plotly.graph_objects as go
|
||||
import argparse
|
||||
|
||||
# Specify the path to the .log file
|
||||
log_file = '../../build/release/time_trace.log'
|
||||
|
||||
def generateRandomColors(df, colorList):
|
||||
|
||||
for _ in range(len(df)):
|
||||
@@ -102,8 +99,14 @@ def plotCompileTime(log_file, minVal):
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--min_val", nargs='?', default='5', type=int, help="Ignore any if it's less than the value provided.")
|
||||
parser.add_argument("--include_linking", action='store_true', help="Include linking when plotting.")
|
||||
parser.add_argument("--min_val", nargs='?', default='5', type=int, help="Ignore any if it's less than the value provided")
|
||||
parser.add_argument("--include_linking", action='store_true', help="Include linking when plotting")
|
||||
parser.add_argument("--log_file_path", type=str, help="Location of the log file generated with --time-trace flag")
|
||||
args = parser.parse_args()
|
||||
|
||||
plotCompileTime(log_file, args.min_val)
|
||||
|
||||
if args.log_file_path is not None:
|
||||
log_file_path = args.log_file_path
|
||||
else:
|
||||
log_file_path = '../../build/release/time_trace.log'
|
||||
|
||||
plotCompileTime(log_file_path, args.min_val)
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele