Merge pull request #20 from edgargabriel/topic/remove-internal-dir
remove internal directory
[ROCm/rocshmem commit: 12aeab1a59]
This commit is contained in:
@@ -1,95 +0,0 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
|
||||
|
||||
###############################################################################
|
||||
# GLOBAL COMPILE FLAGS
|
||||
###############################################################################
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc )
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb")
|
||||
|
||||
###############################################################################
|
||||
# DEFAULT BUILD TYPE
|
||||
###############################################################################
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
|
||||
|
||||
set(
|
||||
CMAKE_BUILD_TYPE
|
||||
"Release"
|
||||
CACHE
|
||||
STRING
|
||||
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
|
||||
FORCE
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
###############################################################################
|
||||
project(rocshmem_example_driver VERSION 1.1.0 LANGUAGES CXX)
|
||||
|
||||
###############################################################################
|
||||
# SOURCES
|
||||
###############################################################################
|
||||
add_executable(${PROJECT_NAME} "")
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
test_driver.cpp
|
||||
tester.cpp
|
||||
tester_arguments.cpp
|
||||
primitive_tester.cpp
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# ROCSHMEM
|
||||
###############################################################################
|
||||
find_package(hip REQUIRED)
|
||||
find_package(rocshmem CONFIG REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
hip::host
|
||||
-fgpu-rdc
|
||||
# xnack allows address translation fault recovery
|
||||
# required option for managed heap configs
|
||||
# -mxnack
|
||||
)
|
||||
@@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -1,17 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -1,143 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "primitive_tester.hpp"
|
||||
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
#include <rocshmem/rocshmem_debug.hpp>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
/******************************************************************************
|
||||
* DEVICE TEST KERNEL
|
||||
*****************************************************************************/
|
||||
__global__ void
|
||||
PrimitiveTest(int loop,
|
||||
int *flag,
|
||||
char *s_buf,
|
||||
char *r_buf,
|
||||
int size,
|
||||
int my_pe,
|
||||
ShmemContextType ctx_type)
|
||||
{
|
||||
__shared__ rocshmem_ctx_t ctx;
|
||||
rocshmem_wg_init();
|
||||
rocshmem_wg_ctx_create(ctx_type, &ctx);
|
||||
|
||||
int block_id = hipBlockIdx_x;
|
||||
for(int i =0; i< loop; i++){
|
||||
rocshmem_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
|
||||
if(hipThreadIdx_x==0){
|
||||
//rocshmem_ctx_quiet(ctx);
|
||||
//rocshmem_ctx_threadfence_system(ctx);
|
||||
rocshmem_ctx_int_p(ctx, &flag[my_pe], i+1, block_id);
|
||||
//rocshmem_ctx_quiet(ctx);
|
||||
rocshmem_int_wait_until(&flag[block_id], ROCSHMEM_CMP_EQ, i+1);
|
||||
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
rocshmem_wg_ctx_destroy(ctx);
|
||||
rocshmem_wg_finalize();
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS METHODS
|
||||
*****************************************************************************/
|
||||
PrimitiveTester::PrimitiveTester(TesterArguments args)
|
||||
: Tester(args)
|
||||
{
|
||||
flag = (int*) rocshmem_malloc(args.numprocs);
|
||||
memset(flag, 0, args.numprocs*sizeof(int));
|
||||
// s_buf = (char *)rocshmem_malloc(args.max_msg_size * args.wg_size);
|
||||
// r_buf = (char *)rocshmem_malloc(args.max_msg_size * args.wg_size);
|
||||
}
|
||||
|
||||
PrimitiveTester::~PrimitiveTester()
|
||||
{
|
||||
rocshmem_free(s_buf);
|
||||
rocshmem_free(r_buf);
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::resetBuffers(uint64_t size)
|
||||
{
|
||||
memset(s_buf, '0', size * args.numprocs);
|
||||
memset(r_buf, '1', size * args.numprocs);
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe)
|
||||
{
|
||||
|
||||
void* sendBuf = malloc(64);
|
||||
void* recvBuf = malloc(64 * nproc);
|
||||
|
||||
s_buf = (char *)rocshmem_malloc(size * nproc);
|
||||
r_buf = (char *)rocshmem_malloc(size * nproc);
|
||||
resetBuffers(size);
|
||||
|
||||
MPI_Allgather(sendBuf, 64, MPI_CHAR,
|
||||
recvBuf, 64, MPI_CHAR,
|
||||
MPI_COMM_WORLD);
|
||||
|
||||
size_t shared_bytes;
|
||||
rocshmem_dynamic_shared(&shared_bytes);
|
||||
|
||||
hipLaunchKernelGGL(PrimitiveTest,
|
||||
gridSize,
|
||||
blockSize,
|
||||
shared_bytes,
|
||||
stream,
|
||||
loop,
|
||||
flag,
|
||||
s_buf,
|
||||
r_buf,
|
||||
size,
|
||||
my_pe,
|
||||
_shmem_context);
|
||||
|
||||
//num_msgs = (loop + args.skip) * gridSize.x;
|
||||
num_timed_msgs = loop ;
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::verifyResults(uint64_t size)
|
||||
{
|
||||
int check_id =0;
|
||||
if (args.myid == check_id) {
|
||||
for (int i = 0; i < size*args.numprocs; i++) {
|
||||
if (r_buf[i] != '0') {
|
||||
fprintf(stderr, "Data validation error at idx %d\n", i);
|
||||
fprintf(stderr, "Got %c, Expected %c\n", r_buf[i], '0');
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _PRIMITIVE_TESTER_HPP_
|
||||
#define _PRIMITIVE_TESTER_HPP_
|
||||
|
||||
#include "tester.hpp"
|
||||
#include <mpi.h>
|
||||
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS
|
||||
*****************************************************************************/
|
||||
class PrimitiveTester : public Tester
|
||||
{
|
||||
public:
|
||||
explicit PrimitiveTester(TesterArguments args);
|
||||
virtual ~PrimitiveTester();
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
resetBuffers(uint64_t size) override;
|
||||
|
||||
virtual void
|
||||
launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe) override;
|
||||
|
||||
virtual void
|
||||
verifyResults(uint64_t size) override;
|
||||
|
||||
char *s_buf = nullptr;
|
||||
char *r_buf = nullptr;
|
||||
int *flag = nullptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,84 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
|
||||
#include "tester.hpp"
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
/**
|
||||
* Setup the tester arguments.
|
||||
*/
|
||||
TesterArguments args(argc, argv);
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int rank = rocshmem_my_pe();
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = rank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
/**
|
||||
* Must initialize rocshmem to access arguments needed by the tester.
|
||||
*/
|
||||
rocshmem_init(args.num_wgs);
|
||||
|
||||
/**
|
||||
* Now grab the arguments from rocshmem.
|
||||
*/
|
||||
args.get_rocshmem_arguments();
|
||||
|
||||
/**
|
||||
* Using the arguments we just constructed, call the tester factory
|
||||
* method to get the tester (specified by the arguments).
|
||||
*/
|
||||
std::vector<Tester *> tests = Tester::create(args);
|
||||
|
||||
/**
|
||||
* Run the tests
|
||||
*/
|
||||
for (auto test : tests) {
|
||||
test->execute();
|
||||
|
||||
/**
|
||||
* The tester factory method news the tester to create it so we clean
|
||||
* up the memory here.
|
||||
*/
|
||||
delete test;
|
||||
}
|
||||
|
||||
/**
|
||||
* The rocshmem library needs to be cleaned up with this call. It pairs
|
||||
* with the init function above.
|
||||
*/
|
||||
rocshmem_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,213 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "tester.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <mpi.h>
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
|
||||
//#include "broadcast_tester.hpp"
|
||||
#include "primitive_tester.hpp"
|
||||
|
||||
Tester::Tester(TesterArguments args)
|
||||
: args(args)
|
||||
{
|
||||
_type = (TestType) args.algorithm;
|
||||
_shmem_context = args.shmem_context;
|
||||
hipStreamCreate(&stream);
|
||||
hipEventCreate(&start_event);
|
||||
hipEventCreate(&stop_event);
|
||||
hipMalloc((void**)&timer, sizeof(uint64_t) * args.num_wgs);
|
||||
}
|
||||
|
||||
Tester::~Tester()
|
||||
{
|
||||
hipFree(timer);
|
||||
hipEventDestroy(stop_event);
|
||||
hipEventDestroy(start_event);
|
||||
hipStreamDestroy(stream);
|
||||
}
|
||||
|
||||
std::vector<Tester*>
|
||||
Tester::create(TesterArguments args)
|
||||
{
|
||||
int rank = args.myid;
|
||||
std::vector<Tester*> testers;
|
||||
|
||||
if (rank == 0)
|
||||
std::cout << "*** Creating Test: ";
|
||||
|
||||
TestType type = (TestType) args.algorithm;
|
||||
|
||||
switch (type) {
|
||||
case AlltoAll_Put:
|
||||
if (rank == 0)
|
||||
std::cout << "AlltoAll Puts***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
case AlltoAll_Get:
|
||||
if (rank == 0)
|
||||
std::cout << "AlltoAll Gets***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
default:
|
||||
if (rank == 0)
|
||||
std::cout << "Unknown***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
}
|
||||
return testers;
|
||||
}
|
||||
|
||||
void
|
||||
Tester::execute()
|
||||
{
|
||||
|
||||
int num_loops = args.loop;
|
||||
|
||||
/**
|
||||
* Some tests loop through data sizes in powers of 2 and report the
|
||||
* results for those ranges.
|
||||
*/
|
||||
for (uint64_t size = args.min_msg_size;
|
||||
size <= args.max_msg_size;
|
||||
size <<= 1) {
|
||||
|
||||
|
||||
/**
|
||||
* Restricts the number of iterations of really large messages.
|
||||
*/
|
||||
if (size > args.large_message_size)
|
||||
num_loops = args.loop_large;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* TODO:
|
||||
* Verify that this timer type is actually uint64_t on the
|
||||
* device side.
|
||||
*/
|
||||
memset(timer, 0, sizeof(uint64_t) * args.num_wgs);
|
||||
|
||||
const dim3 blockSize(args.wg_size, 1, 1);
|
||||
const dim3 gridSize(args.num_wgs, 1, 1);
|
||||
|
||||
hipEventRecord(start_event, stream);
|
||||
|
||||
launchKernel(gridSize, blockSize, num_loops, size, args.numprocs, args.myid);
|
||||
|
||||
hipEventRecord(stop_event, stream);
|
||||
hipError_t err = hipStreamSynchronize(stream);
|
||||
if (err != hipSuccess) {
|
||||
printf("error = %d \n", err);
|
||||
}
|
||||
|
||||
// rocshmem_dump_stats();
|
||||
// rocshmem_reset_stats();
|
||||
|
||||
|
||||
|
||||
// data validation
|
||||
verifyResults(size);
|
||||
|
||||
barrier();
|
||||
resetBuffers(size);
|
||||
|
||||
print(size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Tester::print(uint64_t size)
|
||||
{
|
||||
if (args.myid != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// uint64_t timer_avg = timerAvgInMicroseconds();
|
||||
// double latency_avg = static_cast<double>(timer_avg) / num_timed_msgs;
|
||||
// double avg_msg_rate = num_timed_msgs / (timer_avg / 1e6);
|
||||
|
||||
float total_kern_time_ms;
|
||||
hipEventElapsedTime(&total_kern_time_ms, start_event, stop_event);
|
||||
float total_kern_time_s = total_kern_time_ms / 1000;
|
||||
double bandwidth_avg_gbs = num_timed_msgs * size * bw_factor / total_kern_time_s / pow(2, 30);
|
||||
|
||||
float latency_us = (total_kern_time_ms *1000) /num_timed_msgs;
|
||||
|
||||
int field_width = 20;
|
||||
int float_precision = 2;
|
||||
|
||||
printf("\n##### Message Size %lu #####\n", size);
|
||||
|
||||
printf("%*s%*s\n",
|
||||
field_width + 1, "Latency AVG (us)",
|
||||
field_width + 1, "Bandwidth (GB/s)");
|
||||
|
||||
printf("%*.*f %*.*f \n",
|
||||
field_width, float_precision, latency_us,
|
||||
field_width, float_precision, bandwidth_avg_gbs);
|
||||
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void
|
||||
Tester::barrier()
|
||||
{
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tester::gpuCyclesToMicroseconds(uint64_t cycles)
|
||||
{
|
||||
/**
|
||||
* The dGPU asm core timer runs at 27MHz. This is different from the
|
||||
* core clock returned by HIP. For an APU, this is different and might
|
||||
* need adjusting.
|
||||
*/
|
||||
uint64_t gpu_frequency_MHz = 27;
|
||||
|
||||
/**
|
||||
* hipDeviceGetAttribute(&gpu_frequency_khz,
|
||||
* hipDeviceAttributeClockRate,
|
||||
* 0);
|
||||
*/
|
||||
|
||||
return cycles / gpu_frequency_MHz;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tester::timerAvgInMicroseconds()
|
||||
{
|
||||
uint64_t sum = 0;
|
||||
|
||||
for (int i = 0; i < args.num_wgs; i++) {
|
||||
sum += gpuCyclesToMicroseconds(timer[i]);
|
||||
}
|
||||
|
||||
return sum / args.num_wgs;
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _TESTER_HPP_
|
||||
#define _TESTER_HPP_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
/******************************************************************************
|
||||
* TESTER CLASS TYPES
|
||||
*****************************************************************************/
|
||||
enum TestType
|
||||
{
|
||||
AlltoAll_Put = 0,
|
||||
AlltoAll_Get = 1
|
||||
};
|
||||
|
||||
typedef int ShmemContextType;
|
||||
|
||||
/******************************************************************************
|
||||
* TESTER INTERFACE
|
||||
*****************************************************************************/
|
||||
class Tester
|
||||
{
|
||||
public:
|
||||
explicit Tester(TesterArguments args);
|
||||
virtual ~Tester();
|
||||
|
||||
void
|
||||
execute();
|
||||
|
||||
static std::vector<Tester*>
|
||||
create(TesterArguments args);
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
resetBuffers(uint64_t size) = 0;
|
||||
|
||||
virtual void
|
||||
preLaunchKernel() {}
|
||||
|
||||
virtual void
|
||||
launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe) = 0;
|
||||
|
||||
virtual void
|
||||
postLaunchKernel() {}
|
||||
|
||||
virtual void
|
||||
verifyResults(uint64_t size) = 0;
|
||||
|
||||
int num_msgs = 0;
|
||||
int num_timed_msgs = 0;
|
||||
int bw_factor = 1;
|
||||
|
||||
TesterArguments args;
|
||||
|
||||
TestType _type;
|
||||
ShmemContextType _shmem_context = 8; //SHMEM_CTX_WP_PRIVATE
|
||||
|
||||
hipStream_t stream;
|
||||
|
||||
uint64_t *timer = nullptr;
|
||||
|
||||
private:
|
||||
void
|
||||
print(uint64_t size);
|
||||
|
||||
void
|
||||
barrier();
|
||||
|
||||
uint64_t
|
||||
gpuCyclesToMicroseconds(uint64_t cycles);
|
||||
|
||||
uint64_t
|
||||
timerAvgInMicroseconds();
|
||||
|
||||
bool
|
||||
peLaunchesKernel();
|
||||
|
||||
hipEvent_t start_event;
|
||||
hipEvent_t stop_event;
|
||||
};
|
||||
|
||||
#endif /* _TESTER_HPP */
|
||||
@@ -1,84 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "tester.hpp"
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
TesterArguments::TesterArguments(int argc, char *argv[])
|
||||
{
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
if (arg == "-w") {
|
||||
i++;
|
||||
num_wgs = atoi(argv[i]);
|
||||
} else if (arg == "-S") {
|
||||
i++;
|
||||
max_msg_size = atoll(argv[i]);
|
||||
} else if (arg == "-s") {
|
||||
i++;
|
||||
min_msg_size = atoll(argv[i]);
|
||||
} else if (arg == "-a") {
|
||||
i++;
|
||||
algorithm = atoi(argv[i]);
|
||||
} else if (arg == "-z") {
|
||||
i++;
|
||||
wg_size = atoi(argv[i]);
|
||||
} else if (arg == "-x") {
|
||||
i++;
|
||||
shmem_context = atoi(argv[i]);
|
||||
} else {
|
||||
show_usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
TesterArguments::show_usage(std::string executable_name)
|
||||
{
|
||||
std::cout << "Usage: " << executable_name << std::endl;
|
||||
std::cout << "\t-t <number of rocshmem service threads>\n";
|
||||
std::cout << "\t-w <number of workgroups>\n";
|
||||
std::cout << "\t-s <maximum message size (in bytes)>\n";
|
||||
std::cout << "\t-a <algorithm number to test>\n";
|
||||
std::cout << "\t-z <WorkGroup Size>\n";
|
||||
std::cout << "\t-c <Coalescing Coefficient>\n";
|
||||
std::cout << "\t-o <Operation type for the random_access test>\n";
|
||||
std::cout << "\t-ta <Number of Thread Accessing the communication>\n";
|
||||
std::cout << "\t-x <shmem context>\n";
|
||||
}
|
||||
|
||||
void
|
||||
TesterArguments::get_rocshmem_arguments()
|
||||
{
|
||||
numprocs = rocshmem_n_pes();
|
||||
myid = rocshmem_my_pe();
|
||||
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _TESTER_ARGUMENTS_HPP_
|
||||
#define _TESTER_ARGUMENTS_HPP_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
|
||||
class TesterArguments
|
||||
{
|
||||
public:
|
||||
TesterArguments(int argc, char *argv[]);
|
||||
|
||||
/**
|
||||
* Initialize rocshmem members
|
||||
* Valid after rocshmem_init function called.
|
||||
*/
|
||||
void get_rocshmem_arguments();
|
||||
|
||||
private:
|
||||
/**
|
||||
* Output method which displays available command line options
|
||||
*/
|
||||
static void show_usage(std::string executable_name);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Arguments obtained from command line
|
||||
*/
|
||||
unsigned num_wgs = 1;
|
||||
unsigned algorithm = 0;
|
||||
uint64_t min_msg_size = 1;
|
||||
uint64_t max_msg_size = 1 << 20;
|
||||
unsigned wg_size = 64;
|
||||
unsigned shmem_context = 8; // ROCSHMEM_CTX_WG_PRIVATE
|
||||
|
||||
/**
|
||||
* Arguments obtained from rocshmem
|
||||
*/
|
||||
unsigned numprocs = UINT_MAX;
|
||||
unsigned myid = UINT_MAX;
|
||||
|
||||
/**
|
||||
* Defaults tester values
|
||||
*/
|
||||
int loop = 100;
|
||||
int skip = 10;
|
||||
int loop_large = 25;
|
||||
int large_message_size = 32768;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,144 +0,0 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
|
||||
|
||||
###############################################################################
|
||||
# GLOBAL COMPILE FLAGS
|
||||
###############################################################################
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc)
|
||||
|
||||
###############################################################################
|
||||
# DEFAULT BUILD TYPE
|
||||
###############################################################################
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
|
||||
|
||||
set(
|
||||
CMAKE_BUILD_TYPE
|
||||
"Release"
|
||||
CACHE
|
||||
STRING
|
||||
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
|
||||
FORCE
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
###############################################################################
|
||||
project(spts VERSION 1.1.0 LANGUAGES CXX)
|
||||
|
||||
###############################################################################
|
||||
# CONFIGURATION OPTIONS
|
||||
###############################################################################
|
||||
option(USE_HIP "Build HIP version of the solver" OFF)
|
||||
option(USE_ROCSHMEM "Build rocSHMEM enabled version of the solver" OFF)
|
||||
option(ALL_ANALYZE "Build analyze and solve algorithm" OFF)
|
||||
option(USE_DOUBLE "Use double precision floats for the data" OFF)
|
||||
option(ALL_LEVELSET "Build levelset algorithm" OFF)
|
||||
option(ALL_LEVELSYNC "Build levelsync algorithm" OFF)
|
||||
option(ALL_SYNCFREE "Build syncfree algorithm" OFF)
|
||||
|
||||
configure_file(cmake/config.h.in config.h)
|
||||
|
||||
###############################################################################
|
||||
# SOURCES
|
||||
###############################################################################
|
||||
add_executable(${PROJECT_NAME} "")
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> # CONFIG.H
|
||||
)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
InputFlags.cpp
|
||||
Main.cpp
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# HIP / HIP + rocSHMEM
|
||||
###############################################################################
|
||||
if(USE_HIP)
|
||||
find_package(hip REQUIRED)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
HIPHelper.cpp
|
||||
)
|
||||
|
||||
if(USE_ROCSHMEM)
|
||||
find_package(rocshmem CONFIG REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
hip::host
|
||||
-fgpu-rdc
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# OPENCL
|
||||
###############################################################################
|
||||
else()
|
||||
|
||||
if(USE_ROCSHMEM)
|
||||
message(FATAL_ERROR "Cannot use rocSHMEM without USE_HIP")
|
||||
endif()
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
OpenCLHelper.cpp
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
/opt/rocm/opencl/include
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
-L/opt/rocm/opencl/lib/x86_64
|
||||
-lOpenCL
|
||||
)
|
||||
|
||||
endif()
|
||||
@@ -1,85 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef GPUHelper_H
|
||||
#define GPUHelper_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
|
||||
#define ROW_BITS 32 // May be not the right place to define this macro
|
||||
#define WG_BITS 24
|
||||
|
||||
static int SPTS_BLOCK_SIZE = 0;
|
||||
|
||||
#ifdef USE_ROCSHMEM
|
||||
#define WF_PER_WG 1
|
||||
#else
|
||||
#define WF_PER_WG 16
|
||||
#endif
|
||||
#define WF_SIZE 64
|
||||
|
||||
#ifdef USE_HIP
|
||||
#include <hip/hip_runtime.h>
|
||||
typedef void * memPointer;
|
||||
typedef int memPointer_flags;
|
||||
typedef int gpuInt;
|
||||
typedef bool gpuBool;
|
||||
typedef hipEvent_t gpuEvent;
|
||||
typedef hipError_t gpuError;
|
||||
#define GPU_MEM_READ_ONLY 0
|
||||
#define GPU_MEM_READ_WRITE 0
|
||||
#define GPU_MEM_USE_HOST_PTR 0
|
||||
#define GPU_TRUE true
|
||||
#define GPU_FALSE false
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
typedef cl_mem memPointer;
|
||||
typedef cl_mem_flags memPointer_flags;
|
||||
typedef cl_int gpuInt;
|
||||
typedef cl_bool gpuBool;
|
||||
typedef cl_event gpuEvent;
|
||||
typedef cl_int gpuError;
|
||||
#define GPU_MEM_READ_ONLY CL_MEM_READ_ONLY
|
||||
#define GPU_MEM_READ_WRITE CL_MEM_READ_ONLY
|
||||
#define GPU_MEM_USE_HOST_PTR CL_MEM_USE_HOST_PTR
|
||||
#define GPU_TRUE CL_TRUE
|
||||
#define GPU_FALSE CL_FALSE
|
||||
#endif
|
||||
|
||||
class GPUHelper
|
||||
{
|
||||
public:
|
||||
GPUHelper() {}
|
||||
virtual int Init(const std::string &_filename, InputFlags &in_flags) = 0;
|
||||
virtual void checkStatus(gpuError status, const std::string errString) = 0;
|
||||
virtual void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
|
||||
virtual void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
|
||||
virtual memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *) = 0;
|
||||
virtual void FreeMem(memPointer ptr) = 0;
|
||||
virtual void Flush() = 0;
|
||||
};
|
||||
|
||||
#endif //GPUHelper_H
|
||||
@@ -1,99 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
|
||||
#include "HIPHelper.h"
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
int HIPHelper::Init(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
int device = 0;
|
||||
hipSetDevice(device);
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, device /*deviceID*/);
|
||||
printf("info: running on device %s\n", props.name);
|
||||
printf("info: architecture on AMD GPU device is: %d\n", props.gcnArch);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void HIPHelper::checkStatus(gpuError status, const std::string errString)
|
||||
{
|
||||
if (status != HIP_SUCCESS)
|
||||
{
|
||||
std::cerr << errString << " : " << hipGetErrorString(status) << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
memPointer HIPHelper::AllocateMem(const std::string name,
|
||||
size_t size,
|
||||
memPointer_flags flags,
|
||||
void *hostBuffer)
|
||||
{
|
||||
void* buf;
|
||||
std::string errString = "HIP error allocating " + name + " !";
|
||||
checkStatus(hipMalloc(&buf, size), errString);
|
||||
printf("Allocating %s of size %zu at buf %p\n", name.c_str(), size, buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void HIPHelper::CopyToDevice(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
gpuBool blocking,
|
||||
gpuEvent *ev)
|
||||
{
|
||||
assert(offset == 0);
|
||||
memcpy(devBuffer, hostBuffer, size);
|
||||
/*
|
||||
if (blocking == GPU_TRUE) {
|
||||
checkStatus(hipMemcpy(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
|
||||
"HIP error copying data to device !");
|
||||
} else {
|
||||
checkStatus(hipMemcpyAsync(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
|
||||
"HIP error copying data to device !");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void HIPHelper::CopyToHost(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
gpuBool blocking,
|
||||
gpuEvent *ev)
|
||||
{
|
||||
assert(offset == 0);
|
||||
memcpy(hostBuffer, devBuffer, size);
|
||||
/*
|
||||
if (blocking == GPU_TRUE) {
|
||||
checkStatus(hipMemcpy(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
|
||||
"HIP error copying data to device !");
|
||||
} else {
|
||||
checkStatus(hipMemcpyAsync(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
|
||||
"HIP error copying data to device !");
|
||||
}
|
||||
*/
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef CLHelper_H
|
||||
#define CLHelper_H
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
#include "GPUHelper.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
class HIPHelper : public GPUHelper
|
||||
{
|
||||
public:
|
||||
HIPHelper() {}
|
||||
int Init(const std::string &_filename, InputFlags &in_flags);
|
||||
void checkStatus(gpuError status, const std::string errString);
|
||||
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
|
||||
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
|
||||
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
|
||||
void FreeMem(memPointer ptr) { hipFree(ptr); }
|
||||
void Flush() { hipDeviceSynchronize(); }
|
||||
};
|
||||
|
||||
#endif //CLHelper_H
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "InputFlags.h"
|
||||
|
||||
InputFlags::InputFlags()
|
||||
{
|
||||
AddInputFlag("help", 'h', "", "Print Help Message", "string");
|
||||
}
|
||||
|
||||
void InputFlags::AddInputFlag(const std::string &_long_name,
|
||||
char _short_name,
|
||||
const std::string &_value,
|
||||
const std::string &_help_text,
|
||||
const std::string &_type)
|
||||
{
|
||||
Input in;
|
||||
in.long_name = _long_name;
|
||||
in.short_name = _short_name;
|
||||
in.value = _value;
|
||||
in.help_text = _help_text;
|
||||
in.type = _type;
|
||||
|
||||
if(MapInputs.count(_short_name) > 0)
|
||||
printf("Input flag: %s (%c) already exists !", _long_name.c_str(), _short_name);
|
||||
else
|
||||
MapInputs[_short_name] = in;
|
||||
}
|
||||
|
||||
void InputFlags::Print()
|
||||
{
|
||||
printf("SpTS Input Flags: \n\n");
|
||||
|
||||
for(auto &content : MapInputs)
|
||||
std::cout<<std::setw(8)<<"--"<<content.second.long_name<<std::setw(20 - content.second.long_name.length())<<"-"<<content.first<<std::setw(8)<<" "<<content.second.help_text<<"\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
char InputFlags::FindShortName(const std::string &long_name)
|
||||
{
|
||||
char short_name = '\0';
|
||||
|
||||
for(auto &content : MapInputs)
|
||||
{
|
||||
if(content.second.long_name == long_name)
|
||||
short_name = content.first;
|
||||
}
|
||||
if(short_name == '\0')
|
||||
{
|
||||
std::cout<<"Long Name: "<<long_name<<" Not Found !";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
return short_name;
|
||||
}
|
||||
|
||||
void InputFlags::Parse(int argc, char *argv[])
|
||||
{
|
||||
std::vector<std::string> args;
|
||||
for(int i = 1; i < argc; i++)
|
||||
args.push_back(argv[i]);
|
||||
|
||||
if(args.size() == 0) // No Input Flag
|
||||
Print();
|
||||
|
||||
for(int i = 0; i < args.size(); i++)
|
||||
{
|
||||
std::string temp = args[i];
|
||||
if(temp[0] != '-')
|
||||
{
|
||||
printf("Illegal input flag\n");
|
||||
Print();
|
||||
}
|
||||
else if(temp[0] == '-' && temp[1] == '-') // Long Name Input
|
||||
{
|
||||
std::string long_name = temp.substr(2);
|
||||
if(long_name == "help")
|
||||
Print();
|
||||
|
||||
char short_name = FindShortName(long_name);
|
||||
|
||||
if (short_name == 'n' || short_name == 'z' || short_name == 'v')
|
||||
{
|
||||
MapInputs[short_name].value = "true";
|
||||
}
|
||||
else
|
||||
{
|
||||
MapInputs[short_name].value = args[i+1];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else if (temp[0] == '-' && temp[1] == '?') // Help Input
|
||||
Print();
|
||||
else // Short Name Input
|
||||
{
|
||||
char short_name = temp[1];
|
||||
if(MapInputs.find(short_name) == MapInputs.end())
|
||||
{
|
||||
std::cout<<"Input Flag: "<<short_name<<" Not Found !";
|
||||
exit(0);
|
||||
}
|
||||
if(short_name == 'h')
|
||||
Print();
|
||||
|
||||
if(short_name == 'n' || short_name == 'z' || short_name == 'v' )
|
||||
{
|
||||
MapInputs[short_name].value = "true";
|
||||
}
|
||||
else
|
||||
{
|
||||
MapInputs[short_name].value = args[i+1];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string InputFlags::GetValueStr(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
std::string value = MapInputs[short_name].value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
int InputFlags::GetValueInt(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
int value = atoi(MapInputs[short_name].value.c_str());
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
uint64_t InputFlags::GetValueUint64(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
uint64_t value = strtoull(MapInputs[short_name].value.c_str(), NULL, 10);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
float InputFlags::GetValueFloat(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
float value = std::stof(MapInputs[short_name].value);
|
||||
|
||||
return value;
|
||||
|
||||
}
|
||||
|
||||
bool InputFlags::GetValueBool(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
if (MapInputs[short_name].value == "true")
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef InputFlags_H
|
||||
#define InputFlags_H
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
struct Input
|
||||
{
|
||||
std::string long_name;
|
||||
char short_name;
|
||||
std::string value;
|
||||
std::string help_text;
|
||||
std::string type;
|
||||
};
|
||||
|
||||
class InputFlags
|
||||
{
|
||||
std::map<char, Input> MapInputs;
|
||||
|
||||
public:
|
||||
InputFlags();
|
||||
virtual void AddDerivedInputFlags() = 0;
|
||||
void AddInputFlag(const std::string &_long_name,
|
||||
char _short_name,
|
||||
const std::string &_value,
|
||||
const std::string &_help_text,
|
||||
const std::string &type);
|
||||
void Parse(int argc, char *argv[]);
|
||||
char FindShortName(const std::string &long_name);
|
||||
void Print();
|
||||
|
||||
std::string GetValueStr(const std::string &long_name);
|
||||
int GetValueInt(const std::string &long_name);
|
||||
uint64_t GetValueUint64(const std::string &long_name);
|
||||
float GetValueFloat(const std::string &long_name);
|
||||
bool GetValueBool(const std::string &long_name);
|
||||
|
||||
virtual ~InputFlags() {}
|
||||
};
|
||||
|
||||
#endif //InputFlags_H
|
||||
@@ -1,19 +0,0 @@
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -1,193 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef USE_HIP
|
||||
#include "hip/hip_runtime.h"
|
||||
#else
|
||||
#include "OpenCLHelper.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_RO_SHMEM
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
|
||||
#include "MatrixMarketReader.h"
|
||||
#include "SpTS.h"
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef USE_DOUBLE
|
||||
typedef double FPTYPE;
|
||||
#else
|
||||
typedef float FPTYPE;
|
||||
#endif
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
SparseTriangularSolve<FPTYPE> spts_obj;
|
||||
InputFlags &in_flags = spts_obj;
|
||||
in_flags.AddDerivedInputFlags();
|
||||
in_flags.Parse(argc, argv);
|
||||
FPTYPE alpha = in_flags.GetValueFloat("alpha");
|
||||
|
||||
printf("Reading input file: %s...", in_flags.GetValueStr("filename").c_str());fflush(stdout);
|
||||
MatrixMarketReader<FPTYPE> mm_reader;
|
||||
if (mm_reader.MMReadFormat(in_flags.GetValueStr("filename"), in_flags) != 0)
|
||||
{
|
||||
fprintf(stderr, "ERROR reading input file !\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Done.\n");
|
||||
|
||||
GPUHelper *GPU;
|
||||
#ifdef USE_HIP
|
||||
printf("Initializing HIP runtime...\n\t");fflush(stdout);
|
||||
GPU = new HIPHelper();
|
||||
char buf[PATH_MAX + 1];
|
||||
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
|
||||
std::string str(buf);
|
||||
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
|
||||
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr,"\nError Initializing HIP Runtime !\n");
|
||||
exit(-1);
|
||||
}
|
||||
#else
|
||||
printf("Initializing OpenCL runtime...\n\t");fflush(stdout);
|
||||
GPU = new CLHelper();
|
||||
char buf[PATH_MAX + 1];
|
||||
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
|
||||
std::string str(buf);
|
||||
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
|
||||
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr,"\nError Initializing OpenCL Runtime !\n");
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating sparse matrices...");fflush(stdout);
|
||||
spts_obj.AllocateSparseMatrix(mm_reader, in_flags, GPU);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Converting COO to CSR...");fflush(stdout);
|
||||
spts_obj.ConvertFromCOOToCSR(mm_reader.GetCoordinates(), in_flags);
|
||||
printf("Done.\n");
|
||||
|
||||
SPTS_BLOCK_SIZE = in_flags.GetValueInt("block_size");
|
||||
printf("Finding Stats For Parallel Decomposition...");fflush(stdout);
|
||||
spts_obj.FindStatsForParallelDecomposition();
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating parallel sparse matrices...");fflush(stdout);
|
||||
spts_obj.AllocateParallelSparseMatrix(mm_reader, in_flags);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating vectors...");fflush(stdout);
|
||||
spts_obj.AllocateVectors(mm_reader);
|
||||
printf("Done.\n");
|
||||
|
||||
float gflops = 0.f;
|
||||
int errors = 0;
|
||||
uint64_t ns_per_iter = 0;
|
||||
uint64_t ns_per_analysis_iter = 0;
|
||||
uint64_t ns_per_syncfree_iter = 0;
|
||||
uint64_t ns_per_levelset_iter = 0;
|
||||
uint64_t ns_per_levelsync_iter = 0;
|
||||
|
||||
printf("Performing SpTS on the CPU with alpha=%f...", (float)alpha);fflush(stdout);
|
||||
spts_obj.CSRSpTSCPU(alpha);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Checking results of CPU-side SpTS...");fflush(stdout);
|
||||
if (!spts_obj.CSRCheckCPU(alpha))
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "CPU-based results were 'wrong', likely due to FP rounding. Expect the CPU and GPU to differ wildly.\n");
|
||||
//exit(-1);
|
||||
}
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Performing %d iterations of SpTS on the GPU with alpha=%f...", in_flags.GetValueInt("iterations"), (float)alpha);fflush(stdout);
|
||||
gflops = spts_obj.CSRSpTSGPU(ns_per_iter, ns_per_analysis_iter, ns_per_syncfree_iter, ns_per_levelset_iter, ns_per_levelsync_iter, alpha);
|
||||
printf("Done.\n");
|
||||
|
||||
if (in_flags.GetValueBool("verify")) {
|
||||
printf("Checking whether GPU SpTS caused non-deterministic errors...\n");fflush(stdout);
|
||||
int non_det_errors = spts_obj.NonDeterministicErrors();
|
||||
printf("Done.\n");
|
||||
if (non_det_errors)
|
||||
fprintf(stderr, "ERROR!! -- Saw %d GPU iterations that had non-deterministic differences.\n", non_det_errors);
|
||||
int max_errors = spts_obj.MaxErrors();
|
||||
if (max_errors)
|
||||
{
|
||||
if (max_errors > 1)
|
||||
printf(" -- %d rows differed between CPU and GPU results.\n", max_errors);
|
||||
else
|
||||
printf(" -- %d row differed between CPU and GPU results.\n", max_errors);
|
||||
}
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("File %s : SpTS Gflops: %f ms_per_iter: %lf ", in_flags.GetValueStr("filename").c_str(), gflops, ((double)ns_per_iter/1000000.));
|
||||
printf(" ( ms_per_analysis_iter: ");
|
||||
if (ns_per_analysis_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_analysis_iter/1000000.));
|
||||
printf(" | ms_per_syncfree_iter: ");
|
||||
if (ns_per_syncfree_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_syncfree_iter/1000000.));
|
||||
printf(" | ms_per_levelset_iter: ");
|
||||
if (ns_per_levelset_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_levelset_iter/1000000.));
|
||||
printf(" | ms_per_levelsync_iter: ");
|
||||
if (ns_per_levelsync_iter == 0)
|
||||
printf("no_iter )");
|
||||
else
|
||||
printf("%lf )", ((double)ns_per_levelsync_iter/1000000.));
|
||||
|
||||
#ifdef USE_ROCSHMEM
|
||||
MPI_Allreduce(MPI_IN_PLACE, (void *) &ns_per_analysis_iter, 1,
|
||||
MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if (spts_obj.Get_this_pe() == 0) {
|
||||
printf("\nRANK 0: analysis avg ms = %lf\n",
|
||||
((double) ns_per_analysis_iter / 1000000.) / spts_obj.Get_total_pes());
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,377 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef MatrixMarketReader_H
|
||||
#define MatrixMarketReader_H
|
||||
/*
|
||||
Portions of this file include code provided by The National Institute of
|
||||
Standards and Technology (NIST). The code includes
|
||||
macro definitions from mmio.h and is subject to the following disclaimer.
|
||||
|
||||
Software Disclaimer
|
||||
|
||||
NIST-developed software is provided by NIST as a public service. You may use,
|
||||
copy and distribute copies of the software in any medium, provided that you
|
||||
keep intact this entire notice. You may improve, modify and create derivative
|
||||
works of the software or any portion of the software, and you may copy and
|
||||
distribute such modifications or works. Modified works should carry a notice
|
||||
stating that you changed the software and should note the date and nature of
|
||||
any such change. Please explicitly acknowledge the National Institute of
|
||||
Standards and Technology as the source of the software.
|
||||
|
||||
NIST-developed software is expressly provided "AS IS" NIST MAKES NO WARRANTY
|
||||
OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW,
|
||||
INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
|
||||
NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
|
||||
UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES
|
||||
NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR
|
||||
THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
|
||||
RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
|
||||
|
||||
You are solely responsible for determining the appropriateness of using and
|
||||
distributing the software and you assume all risks associated with its use,
|
||||
including but not limited to the risks and costs of program errors, compliance
|
||||
with applicable laws, damage to or loss of data, programs or equipment, and
|
||||
the unavailability or interruption of operation. This software is not intended
|
||||
to be used in any situation where a failure could cause risk of injury or
|
||||
damage to property. The software developed by NIST employees is not subject
|
||||
to copyright protection within the United States.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include "InputFlags.h"
|
||||
#include <typeinfo>
|
||||
#include "mmio.h"
|
||||
|
||||
// Class declaration
|
||||
|
||||
template<typename FloatType>
|
||||
struct Coordinate {
|
||||
int x;
|
||||
int y;
|
||||
FloatType val;
|
||||
};
|
||||
|
||||
template <typename FloatType>
|
||||
class MatrixMarketReader
|
||||
{
|
||||
char Typecode[4];
|
||||
int nNZ;
|
||||
int nRows;
|
||||
int nCols;
|
||||
int isSymmetric;
|
||||
int isDoubleMem;
|
||||
Coordinate<FloatType> *coords;
|
||||
bool *has_seen_diag;
|
||||
|
||||
public:
|
||||
MatrixMarketReader() : nNZ(0), nRows(0), nCols(0), isSymmetric(0), isDoubleMem(0)
|
||||
{
|
||||
for (int i = 0; i < sizeof(Typecode); i++)
|
||||
Typecode[i] = '\0';
|
||||
coords = NULL;
|
||||
}
|
||||
bool MMReadFormat(const std::string &_filename, InputFlags &_in_flags);
|
||||
bool MMReadBanner(FILE *_infile);
|
||||
bool MMReadMtxCrdSize(FILE *_infile);
|
||||
void MMGenerateCOOFromFile(FILE *_infile, InputFlags &_in_flags);
|
||||
|
||||
int GetNumRows() { return nRows; }
|
||||
int GetNumCols() { return nCols; }
|
||||
int GetNumNonZeroes() { return nNZ; }
|
||||
int GetSymmetric() { return isSymmetric; }
|
||||
|
||||
char *GetTypecode() { return Typecode; }
|
||||
Coordinate<FloatType> *GetCoordinates() { return coords; }
|
||||
|
||||
~MatrixMarketReader()
|
||||
{
|
||||
delete[] coords;
|
||||
}
|
||||
};
|
||||
|
||||
// Class definition
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadFormat(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
FILE *mm_file = fopen(filename.c_str(), "r");
|
||||
if( mm_file == NULL)
|
||||
{
|
||||
printf("Cannot Open Matrix-Market File !\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int status = MMReadBanner(mm_file);
|
||||
if(status != 0)
|
||||
{
|
||||
printf("Error Reading Banner in Matrix-Market File !\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(! mm_is_coordinate(Typecode))
|
||||
{printf(" only handling coordinate format\n"); return(1);}
|
||||
|
||||
if(mm_is_complex(Typecode)) {
|
||||
printf("Error: cannot handle complex format\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if(mm_is_symmetric(Typecode))
|
||||
isSymmetric = 1;
|
||||
|
||||
status = MMReadMtxCrdSize(mm_file);
|
||||
if(status != 0) {
|
||||
printf("Error reading Matrix Market crd_size %d\n",status);
|
||||
return(1);
|
||||
}
|
||||
|
||||
if(mm_is_symmetric(Typecode))
|
||||
coords = new Coordinate<FloatType>[nNZ+nRows];
|
||||
else if (in_flags.GetValueBool("non_symmetric"))
|
||||
coords = new Coordinate<FloatType>[nNZ+nRows]; // This is too large, but oh well.
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Error: Input matrix is NOT symmetric. This will not work for SpTS.\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
has_seen_diag = new bool[nRows];
|
||||
for (int i = 0; i < nRows; i++)
|
||||
has_seen_diag[i] = false;
|
||||
|
||||
MMGenerateCOOFromFile(mm_file, in_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void FillCoordData(char Typecode[],
|
||||
Coordinate<FloatType> *coords,
|
||||
bool *has_seen_diag,
|
||||
int &actual_nnz,
|
||||
int ir,
|
||||
int ic,
|
||||
FloatType val)
|
||||
{
|
||||
int new_x = ir - 1;
|
||||
int new_y = ic - 1;
|
||||
if (new_y > new_x)
|
||||
{
|
||||
// Skip stuff in the upper diagonal
|
||||
// Just keep our lower diag.
|
||||
return;
|
||||
}
|
||||
if (new_y == new_x)
|
||||
has_seen_diag[new_x] = true;
|
||||
coords[actual_nnz].x = new_x;
|
||||
coords[actual_nnz].y = new_y;
|
||||
coords[actual_nnz ++].val = val;
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void FixupMissingDiags(char Typecode[],
|
||||
Coordinate<FloatType> *coords,
|
||||
int &actual_nnz,
|
||||
int nRows,
|
||||
bool *has_seen_diag,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
for(int i = 0; i < nRows; i++)
|
||||
{
|
||||
if (has_seen_diag[i] == false)
|
||||
{
|
||||
coords[actual_nnz].x = i;
|
||||
coords[actual_nnz].y = i;
|
||||
coords[actual_nnz ++].val = 1.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void MatrixMarketReader<FloatType>::MMGenerateCOOFromFile(FILE *infile,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
int actual_nnz = 0;
|
||||
FloatType val;
|
||||
int ir, ic;
|
||||
|
||||
int exp_zeroes = in_flags.GetValueBool("exp_zeroes");
|
||||
|
||||
for(int i = 0; i < nNZ; i++)
|
||||
{
|
||||
if(mm_is_real(Typecode))
|
||||
{
|
||||
if(typeid(FloatType) == typeid(float))
|
||||
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
|
||||
else if(typeid(FloatType) == typeid(double))
|
||||
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
}
|
||||
else if (mm_is_integer(Typecode))
|
||||
{
|
||||
if(typeid(FloatType) == typeid(float))
|
||||
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
|
||||
else if(typeid(FloatType) == typeid(double))
|
||||
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
|
||||
}
|
||||
else if(mm_is_pattern(Typecode))
|
||||
{
|
||||
fscanf(infile, "%d %d", &ir, &ic);
|
||||
//val = ((FloatType) MAX_RAND_VAL * (rand() / (RAND_MAX + 1.0)));
|
||||
val = 3.;
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
}
|
||||
}
|
||||
FixupMissingDiags(Typecode, coords, actual_nnz, nRows, has_seen_diag, in_flags);
|
||||
nNZ = actual_nnz;
|
||||
printf("\n\tNNZ in the lower triangular and fixedup diagonal: %d\n", nNZ);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadBanner(FILE *infile)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
char banner[MM_MAX_TOKEN_LENGTH];
|
||||
char mtx[MM_MAX_TOKEN_LENGTH];
|
||||
char crd[MM_MAX_TOKEN_LENGTH];
|
||||
char data_type[MM_MAX_TOKEN_LENGTH];
|
||||
char storage_scheme[MM_MAX_TOKEN_LENGTH];
|
||||
char *p;
|
||||
|
||||
mm_clear_typecode(Typecode);
|
||||
|
||||
if (fgets(line, MM_MAX_LINE_LENGTH, infile) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
|
||||
storage_scheme) != 5)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
|
||||
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
|
||||
|
||||
/* check for banner */
|
||||
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
|
||||
return MM_NO_HEADER;
|
||||
|
||||
/* first field should be "mtx" */
|
||||
if (strcmp(mtx, MM_MTX_STR) != 0)
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
mm_set_matrix(Typecode);
|
||||
|
||||
|
||||
/* second field describes whether this is a sparse matrix (in coordinate
|
||||
storgae) or a dense array */
|
||||
|
||||
|
||||
if (strcmp(crd, MM_SPARSE_STR) == 0)
|
||||
mm_set_sparse(Typecode);
|
||||
else if (strcmp(crd, MM_DENSE_STR) == 0)
|
||||
mm_set_dense(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* third field */
|
||||
|
||||
if (strcmp(data_type, MM_REAL_STR) == 0)
|
||||
mm_set_real(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
|
||||
mm_set_complex(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_PATTERN_STR) == 0)
|
||||
mm_set_pattern(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_INT_STR) == 0)
|
||||
mm_set_integer(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* fourth field */
|
||||
|
||||
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
|
||||
mm_set_general(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
|
||||
mm_set_symmetric(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
|
||||
mm_set_hermitian(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
|
||||
mm_set_skew(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadMtxCrdSize(FILE *infile)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH, infile) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", &nRows, &nCols, &nNZ) == 3)
|
||||
return 0;
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(infile, "%d %d %d", &nRows, &nCols, &nNZ);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // MatrixMarketReader_H
|
||||
@@ -1,486 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#include "OpenCLHelper.h"
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
cl_context CLHelper::context = NULL;
|
||||
cl_command_queue CLHelper::commandQueue = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_analyze = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_levelset = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_scalar = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_vector = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_levelsync = NULL;
|
||||
|
||||
const char * get_cl_err_string(cl_int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case CL_SUCCESS:
|
||||
return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND:
|
||||
return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE:
|
||||
return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE:
|
||||
return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE:
|
||||
return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP:
|
||||
return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH:
|
||||
return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
|
||||
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE:
|
||||
return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE:
|
||||
return "CL_MAP_FAILURE";
|
||||
#ifdef CL_VERSION_1_1
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
||||
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
|
||||
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
#endif
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_COMPILE_PROGRAM_FAILURE:
|
||||
return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE:
|
||||
return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE:
|
||||
return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED:
|
||||
return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
|
||||
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
#endif
|
||||
case CL_INVALID_VALUE:
|
||||
return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE:
|
||||
return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_PLATFORM:
|
||||
return "CL_INVALID_PLATFORM";
|
||||
case CL_INVALID_DEVICE:
|
||||
return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT:
|
||||
return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES:
|
||||
return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR:
|
||||
return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE:
|
||||
return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER:
|
||||
return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY:
|
||||
return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS:
|
||||
return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM:
|
||||
return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||
return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME:
|
||||
return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION:
|
||||
return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL:
|
||||
return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX:
|
||||
return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE:
|
||||
return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE:
|
||||
return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS:
|
||||
return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION:
|
||||
return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE:
|
||||
return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE:
|
||||
return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET:
|
||||
return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT:
|
||||
return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION:
|
||||
return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT:
|
||||
return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE:
|
||||
return "CL_INVALID_BUFFER_SIZE";
|
||||
#ifdef CL_VERSION_1_1
|
||||
case CL_INVALID_MIP_LEVEL:
|
||||
return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||
return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY:
|
||||
return "CL_INVALID_PROPERTY";
|
||||
#ifdef cl_ext_device_fission
|
||||
case CL_DEVICE_PARTITION_FAILED_EXT:
|
||||
return "CL_DEVICE_PARTITION_FAILED_EXT";
|
||||
case CL_INVALID_PARTITION_COUNT_EXT:
|
||||
return "CL_INVALID_PARTITION_COUNT_EXT";
|
||||
case CL_INVALID_PARTITION_NAME_EXT:
|
||||
return "CL_INVALID_PARTITION_NAME_EXT";
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS:
|
||||
return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS:
|
||||
return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT:
|
||||
return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
#endif
|
||||
#ifdef CL_VERSION_2_0
|
||||
case CL_INVALID_PIPE_SIZE:
|
||||
return "CL_INVALID_PIPE_SIZE";
|
||||
case CL_INVALID_DEVICE_QUEUE:
|
||||
return "CL_INVALID_DEVICE_QUEUE";
|
||||
#endif
|
||||
#ifdef CL_VERSION_2_2
|
||||
case CL_INVALID_SPEC_ID:
|
||||
return "CL_INVALID_SPEC_ID";
|
||||
case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
|
||||
return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
|
||||
#endif
|
||||
#ifdef cl_khr_icd
|
||||
case CL_PLATFORM_NOT_FOUND_KHR:
|
||||
return "CL_PLATFORM_NOT_FOUND_KHR";
|
||||
#endif
|
||||
default:
|
||||
return "UNKNOWN CL ERROR CODE";
|
||||
}
|
||||
}
|
||||
|
||||
void convertToStr(char **source, size_t* sourceSize, const std::string fname)
|
||||
{
|
||||
FILE *fp = fopen(fname.c_str(), "r");
|
||||
fseek(fp, 0, SEEK_END);
|
||||
*sourceSize = ftell(fp);
|
||||
fseek(fp , 0, SEEK_SET);
|
||||
*source = (char *)malloc(*sourceSize * sizeof(char));
|
||||
fread(*source, 1, *sourceSize, fp);
|
||||
fclose(fp);
|
||||
|
||||
}
|
||||
|
||||
int CLHelper::Init(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
cl_int status = 0;
|
||||
size_t deviceListSize;
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* Have a look at the available platforms and pick either
|
||||
* the AMD one if available or a reasonable default.
|
||||
*/
|
||||
cl_uint numPlatforms;
|
||||
platform = NULL;
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"clGetPlatformIDs failed. %u",numPlatforms);
|
||||
return 1;
|
||||
}
|
||||
if (0 < numPlatforms)
|
||||
{
|
||||
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "clGetPlatformIDs failed: %s\n", get_cl_err_string(status) );
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
char pbuf[100];
|
||||
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
|
||||
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"clGetPlatformInfo failed: %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
platform = platforms[i];
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(platforms);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL context
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
|
||||
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
|
||||
context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("status: %d", status);
|
||||
fprintf(stderr,"Error: Creating Context. (clCreateContextFromType): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
/* First, get the size of device list data */
|
||||
status = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(size_t), &deviceListSize, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Getting Context Info (device list size, clGetContextInfo): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Detect OpenCL devices
|
||||
/////////////////////////////////////////////////////////////////
|
||||
devices = (cl_device_id *)malloc(deviceListSize * sizeof(cl_device_id));
|
||||
if(devices == 0)
|
||||
{
|
||||
fprintf(stderr,"Error: No devices found: %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Now, get the device list data */
|
||||
status = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize*sizeof(cl_device_id), devices, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Getting Context Info (device list, clGetContextInfo): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *deviceName;
|
||||
size_t dev_name_size = 0;
|
||||
|
||||
int deviceNum = in_flags.GetValueInt("device");
|
||||
|
||||
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(char*), NULL, &dev_name_size);
|
||||
deviceName = (char *)malloc(sizeof(char)*dev_name_size);
|
||||
|
||||
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
|
||||
printf("Device Name: %s\n", deviceName);
|
||||
|
||||
bool use_gcn3 = false;
|
||||
bool use_gcn2 = false;
|
||||
char *found_gfx8 = strstr(deviceName, "gfx8");
|
||||
char *found_gfx7 = strstr(deviceName, "gfx7");
|
||||
if (found_gfx8 != NULL)
|
||||
use_gcn3 = true;
|
||||
if (found_gfx7 != NULL)
|
||||
use_gcn2 = true;
|
||||
|
||||
free(deviceName);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL command queue
|
||||
/////////////////////////////////////////////////////////////////
|
||||
commandQueue = clCreateCommandQueue(context, devices[deviceNum], CL_QUEUE_PROFILING_ENABLE, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Creating Command Queue. (clCreateCommandQueue): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Load CL file, build CL program object, create CL kernel object
|
||||
/////////////////////////////////////////////////////////////////
|
||||
char* source;
|
||||
size_t sourceSize;
|
||||
convertToStr(&source, &sourceSize, filename);
|
||||
|
||||
syncfree_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
analyze_levelset_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string buildFlags = "-x clc++ -Dcl_khr_int64_base_atomics=1 -cl-std=CL2.0";
|
||||
if (use_gcn3)
|
||||
buildFlags += " -DGCN3 ";
|
||||
if (use_gcn2)
|
||||
buildFlags += " -DGCN2 ";
|
||||
buildFlags += " -DROW_BITS=" + std::to_string(ROW_BITS);
|
||||
buildFlags += " -DWG_BITS=" + std::to_string(WG_BITS);
|
||||
buildFlags += " -DWF_SIZE=" + std::to_string(WF_SIZE);
|
||||
buildFlags += " -DWF_PER_WG=" + std::to_string(WF_PER_WG);
|
||||
#ifdef USE_DOUBLE
|
||||
buildFlags += " -DDOUBLE";
|
||||
#endif
|
||||
|
||||
/* create a cl program executable for all the devices specified */
|
||||
status = clBuildProgram(analyze_levelset_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Building Analyze and Levelset Program (clBuildProgram): %d\n", status);
|
||||
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
|
||||
size_t size;
|
||||
clGetProgramBuildInfo(analyze_levelset_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
|
||||
printf("%s ", errorbuf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
buildFlags += " -DSYNCFREE_KERNEL";
|
||||
status = clBuildProgram(syncfree_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Building Syncfree Program (clBuildProgram): %d\n", status);
|
||||
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
|
||||
size_t size;
|
||||
clGetProgramBuildInfo(syncfree_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
|
||||
printf("%s ", errorbuf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel = clCreateKernel(syncfree_program, "amd_spts_syncfree_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_analyze = clCreateKernel(analyze_levelset_program, "amd_spts_analyze_and_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_analyze): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_levelset = clCreateKernel(analyze_levelset_program, "amd_spts_levelset_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelset): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_scalar = clCreateKernel(analyze_levelset_program, "amd_spts_scalar_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_scalar): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_vector = clCreateKernel(analyze_levelset_program, "amd_spts_vector_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_vector): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_levelsync = clCreateKernel(analyze_levelset_program, "amd_spts_levelsync_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelsync): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// All good
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLHelper::checkStatus(cl_int status, const std::string errString)
|
||||
{
|
||||
if (status != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << errString << " : " << get_cl_err_string(status) << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
memPointer CLHelper::AllocateMem(const std::string name,
|
||||
size_t size,
|
||||
memPointer_flags flags,
|
||||
void *hostBuffer)
|
||||
{
|
||||
cl_mem buf;
|
||||
cl_int status;
|
||||
|
||||
buf = clCreateBuffer(context, flags, size, hostBuffer, &status);
|
||||
std::string errString = "OpenCL error allocating " + name + " !";
|
||||
checkStatus(status, errString);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void CLHelper::CopyToDevice(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
cl_bool blocking,
|
||||
cl_event *ev)
|
||||
{
|
||||
cl_int status;
|
||||
status = clEnqueueWriteBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
|
||||
|
||||
checkStatus(status, "OpenCL error copying data to device !");
|
||||
}
|
||||
|
||||
void CLHelper::CopyToHost(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
cl_bool blocking,
|
||||
cl_event *ev)
|
||||
{
|
||||
cl_int status;
|
||||
status = clEnqueueReadBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
|
||||
|
||||
checkStatus(status, "OpenCL error copying data to device !");
|
||||
}
|
||||
|
||||
int64_t CLHelper::ComputeTime(cl_event event)
|
||||
{
|
||||
int64_t start_time, end_time;
|
||||
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(int64_t), &start_time, NULL);
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(int64_t), &end_time, NULL);
|
||||
|
||||
return end_time - start_time;
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef CLHelper_H
|
||||
#define CLHelper_H
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
#include "GPUHelper.h"
|
||||
|
||||
struct LocalMemArg
|
||||
{
|
||||
LocalMemArg(size_t _size) : size(_size) {}
|
||||
size_t GetSize() const { return size; }
|
||||
|
||||
private:
|
||||
size_t size;
|
||||
};
|
||||
|
||||
class CLHelper : public GPUHelper
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id *devices;
|
||||
cl_program syncfree_program;
|
||||
cl_program analyze_levelset_program;
|
||||
|
||||
public:
|
||||
static cl_context context;
|
||||
static cl_kernel SpTSKernel;
|
||||
static cl_kernel SpTSKernel_analyze;
|
||||
static cl_kernel SpTSKernel_levelset;
|
||||
static cl_kernel SpTSKernel_scalar;
|
||||
static cl_kernel SpTSKernel_vector;
|
||||
static cl_kernel SpTSKernel_levelsync;
|
||||
static cl_command_queue commandQueue;
|
||||
|
||||
CLHelper() {}
|
||||
int Init(const std::string &_filename, InputFlags &in_flags);
|
||||
void checkStatus(gpuError status, const std::string errString);
|
||||
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
|
||||
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
|
||||
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
|
||||
void FreeMem(memPointer ptr) { clReleaseMemObject(ptr); }
|
||||
void Flush() { clFinish(commandQueue); }
|
||||
|
||||
template<typename T, typename... Args>
|
||||
void SetArgs(cl_kernel, int i, const T& first, const Args&... rest);
|
||||
template<typename... Args>
|
||||
void SetArgs(cl_kernel, int i, const LocalMemArg &lmem, const Args&... rest);
|
||||
void SetArgs(cl_kernel, int i) {}
|
||||
|
||||
int64_t ComputeTime(cl_event event);
|
||||
|
||||
};
|
||||
|
||||
template<typename T, typename... Args>
|
||||
void CLHelper::SetArgs(cl_kernel kernel, int i, const T& first, const Args&... rest)
|
||||
{
|
||||
cl_int status;
|
||||
|
||||
status = clSetKernelArg(kernel, i++, sizeof(T), (void *)& first);
|
||||
std::stringstream errStream;
|
||||
errStream<<"OpenCL error setting kernel argument "<<i;
|
||||
checkStatus(status, errStream.str()) ;
|
||||
|
||||
SetArgs(kernel, i, rest...);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
void CLHelper::SetArgs(cl_kernel kernel, int i, const LocalMemArg &lmem, const Args&... rest)
|
||||
{
|
||||
cl_int status;
|
||||
status = clSetKernelArg(kernel, i++, lmem.GetSize(), NULL);
|
||||
std::stringstream errStream;
|
||||
errStream<<"OpenCL error setting kernel argument (local memory) "<<i;
|
||||
checkStatus(status, errStream.str()) ;
|
||||
|
||||
SetArgs(kernel, i, rest...);
|
||||
|
||||
}
|
||||
|
||||
#endif //CLHelper_H
|
||||
|
||||
Plik diff jest za duży
Load Diff
@@ -1,287 +0,0 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef SparseMatrix_H
|
||||
#define SparseMatrix_H
|
||||
|
||||
#include "GPUHelper.h"
|
||||
#ifndef USE_HIP
|
||||
#include "OpenCLHelper.h"
|
||||
#include <CL/cl.h>
|
||||
#else
|
||||
#include "HIPHelper.h"
|
||||
#endif
|
||||
|
||||
#include "InputFlags.h"
|
||||
#include "MatrixMarketReader.h"
|
||||
#include "OpenCLHelper.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
template<typename FloatType>
|
||||
class SparseMatrix
|
||||
{
|
||||
|
||||
public:
|
||||
int nRows;
|
||||
int nCols;
|
||||
int nNZ;
|
||||
|
||||
int *cols;
|
||||
int *row_ptrs;
|
||||
|
||||
FloatType *vals;
|
||||
|
||||
memPointer d_cols;
|
||||
memPointer d_vals;
|
||||
memPointer d_row_ptrs;
|
||||
|
||||
// info about parallel procs
|
||||
int this_pe;
|
||||
int total_pes;
|
||||
|
||||
int nRows_p;
|
||||
int nCols_p;
|
||||
|
||||
protected:
|
||||
|
||||
GPUHelper *GPU;
|
||||
|
||||
public:
|
||||
|
||||
SparseMatrix() : nRows(0), nCols(0), nNZ(0), nRows_p(0), nCols_p(0)
|
||||
{
|
||||
cols = NULL;
|
||||
row_ptrs = NULL;
|
||||
vals = NULL;
|
||||
|
||||
d_cols = NULL;
|
||||
d_vals = NULL;
|
||||
d_row_ptrs = NULL;
|
||||
|
||||
this_pe = -1;//rocshmem_my_pe(handle); // this pe
|
||||
total_pes = -1;//rocshmem_n_pes(handle); // total number of pes
|
||||
|
||||
}
|
||||
void AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags,
|
||||
GPUHelper *gpu);
|
||||
void AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags);
|
||||
void ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
|
||||
InputFlags &in_flags);
|
||||
|
||||
void PopulateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags);
|
||||
|
||||
void FindStatsForParallelDecomposition();
|
||||
|
||||
void Set_total_pes(int val){
|
||||
this->total_pes = val;
|
||||
}
|
||||
void Set_this_pe(int val){
|
||||
this->this_pe = val;
|
||||
}
|
||||
|
||||
int Get_total_pes(){
|
||||
return this->total_pes;
|
||||
}
|
||||
int Get_this_pe(){
|
||||
return this->this_pe;
|
||||
}
|
||||
|
||||
int GetNumRows_p() {return nRows_p;}
|
||||
|
||||
int *GetCols() { return cols; }
|
||||
FloatType *GetVals() { return vals; }
|
||||
int *GetRowPtrs() { return row_ptrs; }
|
||||
|
||||
memPointer GetDevCols() {return d_cols; }
|
||||
memPointer GetDevVals() {return d_vals; }
|
||||
memPointer GetDevRowPtrs() {return d_row_ptrs; }
|
||||
|
||||
~SparseMatrix()
|
||||
{
|
||||
delete[] cols;
|
||||
delete[] vals;
|
||||
delete[] row_ptrs;
|
||||
|
||||
GPU->FreeMem(d_cols);
|
||||
GPU->FreeMem(d_vals);
|
||||
GPU->FreeMem(d_row_ptrs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags,
|
||||
GPUHelper *gpu)
|
||||
{
|
||||
GPU = gpu;
|
||||
nRows = mm_reader.GetNumRows();
|
||||
nCols = mm_reader.GetNumCols();
|
||||
nNZ = mm_reader.GetNumNonZeroes();
|
||||
printf("Allocating a sparse matrix with-- nRows: %d nCols: %d nNZ: %d\n", nRows, nCols, nNZ);
|
||||
|
||||
assert(total_pes != -1);
|
||||
assert(this_pe != -1);
|
||||
|
||||
#ifdef USE_RO_SHMEM
|
||||
if (nRows != nCols){
|
||||
fprintf(stderr, "RO_SHMEM port requires the global matrix to be "
|
||||
"square!\n");
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
cols = new int[nNZ];
|
||||
if (cols == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side cols array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
vals = new FloatType[nNZ];
|
||||
if (vals == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side vals array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
row_ptrs = new int[nRows + 1];
|
||||
if (row_ptrs == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side row_ptrs array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool CoordinateCompare(const Coordinate<FloatType> &c1, const Coordinate<FloatType> &c2)
|
||||
{
|
||||
if(c1.x != c2.x)
|
||||
return (c1.x < c2.x);
|
||||
else
|
||||
return (c1.y < c2.y);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
std::sort(coords, coords + nNZ, CoordinateCompare<FloatType>);
|
||||
|
||||
int current_row = 1;
|
||||
bool has_seen_diagonal = false;
|
||||
row_ptrs[0] = 0;
|
||||
for (int i = 0; i < nNZ; i++)
|
||||
{
|
||||
cols[i] = coords[i].y;
|
||||
vals[i] = coords[i].val;
|
||||
//fprintf(stderr,"Row %d Col %d Val %lf (cur_row: %d)\n", coords[i].x, coords[i].y, coords[i].val, current_row-1);
|
||||
|
||||
while(coords[i].x >= current_row)
|
||||
{
|
||||
// We've reached the end of a row. Did we see a diagonal?
|
||||
// If not, the triangular solve will be underconstrained.
|
||||
if (!has_seen_diagonal)
|
||||
{
|
||||
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
|
||||
fprintf(stderr, "\tMissing diagonal on row %d\n", current_row-1);
|
||||
exit(-1);
|
||||
}
|
||||
has_seen_diagonal = false;
|
||||
row_ptrs[current_row] = i;
|
||||
current_row++;
|
||||
}
|
||||
if (coords[i].x == coords[i].y)
|
||||
has_seen_diagonal = true;
|
||||
|
||||
}
|
||||
row_ptrs[current_row++] = nNZ;
|
||||
while (current_row <= nRows)
|
||||
{
|
||||
if (!has_seen_diagonal)
|
||||
{
|
||||
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
|
||||
fprintf(stderr, "\tNo values on row %d, so no diagonal.\n", current_row-1);
|
||||
exit(-1);
|
||||
}
|
||||
has_seen_diagonal = false;
|
||||
row_ptrs[current_row++] = nNZ;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
d_cols = GPU->AllocateMem("cols", nNZ*sizeof(int), 0, NULL);
|
||||
d_vals = GPU->AllocateMem("vals", nNZ*sizeof(FloatType), 0, NULL);
|
||||
d_row_ptrs = GPU->AllocateMem("row_ptrs", (nRows+1)*sizeof(int), 0, NULL);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::FindStatsForParallelDecomposition()
|
||||
{
|
||||
|
||||
assert(SPTS_BLOCK_SIZE % 64 == 0);
|
||||
|
||||
// Rows left over in the potentially partial final block
|
||||
int left_over_last_block = nRows % SPTS_BLOCK_SIZE;
|
||||
printf("%d: lolb %d\n", this_pe, left_over_last_block);
|
||||
// Number of complete blocks, not including any partial block at the end
|
||||
int total_blocks = nRows / SPTS_BLOCK_SIZE;
|
||||
printf("%d: totb %d\n", this_pe, total_blocks);
|
||||
|
||||
// Everyone has at least this many rows
|
||||
nRows_p = (total_blocks / total_pes) * SPTS_BLOCK_SIZE;
|
||||
printf("%d: initial nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
// Last cycle might not assign to all PEs
|
||||
int straggler_blocks = total_blocks % total_pes;
|
||||
if (this_pe < straggler_blocks)
|
||||
nRows_p += SPTS_BLOCK_SIZE;
|
||||
printf("%d: straggler nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
// Last block of last cycle might have less than SPTS_BLOCK_SIZE rows
|
||||
if (left_over_last_block) {
|
||||
int final_pe = ((total_blocks + 1) % total_pes) - 1;
|
||||
if (final_pe == -1)
|
||||
final_pe = total_pes - 1;
|
||||
if (this_pe == final_pe)
|
||||
nRows_p += left_over_last_block;
|
||||
}
|
||||
printf("%d: final nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
if (nRows_p <= 0) {
|
||||
fprintf(stderr, "Block Size %d too small for input row size %d with "
|
||||
"%d number of nodes. Please decrease the block size or "
|
||||
"decrease the number of nodes\n", SPTS_BLOCK_SIZE, nRows,
|
||||
total_pes);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// print to check!
|
||||
printf("\nPE: %d total_rows: %d my_rows: %d\n", this_pe, nRows, nRows_p);
|
||||
|
||||
nCols_p = nCols; // 1D decomposition
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROCSHMEM=OFF \
|
||||
-DUSE_HIP=ON \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROCSHMEM=OFF \
|
||||
-DUSE_HIP=OFF \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROCSHMEM=ON \
|
||||
-DUSE_HIP=ON \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -1,7 +0,0 @@
|
||||
#cmakedefine USE_ROCSHMEM
|
||||
#cmakedefine USE_HIP
|
||||
#cmakedefine ALL_ANALYZE
|
||||
#cmakedefine USE_DOUBLE
|
||||
#cmakedefine ALL_LEVELSET
|
||||
#cmakedefine ALL_LEVELSYNC
|
||||
#cmakedefine ALL_SYNCFREE
|
||||
@@ -1,43 +0,0 @@
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
echo Test Name $2
|
||||
|
||||
INPUTS=/mnt/mlebeane/spts_data
|
||||
|
||||
case $2 in
|
||||
*"single_thread")
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
|
||||
;;
|
||||
*"multi_thread")
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 1 -b 256 -v -i 3 > $3/not_quite_diagonal_get.log
|
||||
;;
|
||||
*)
|
||||
echo "UNKNOWN TEST TYPE: $2"
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $?
|
||||
@@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MM_IO_H
|
||||
#define MM_IO_H
|
||||
|
||||
/********************* MM_typecode query fucntions ***************************/
|
||||
|
||||
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
|
||||
|
||||
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
|
||||
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
|
||||
#define mm_is_dense(typecode) ((typecode)[1]=='A')
|
||||
#define mm_is_array(typecode) ((typecode)[1]=='A')
|
||||
|
||||
#define mm_is_complex(typecode) ((typecode)[2]=='C')
|
||||
#define mm_is_real(typecode) ((typecode)[2]=='R')
|
||||
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
|
||||
#define mm_is_integer(typecode) ((typecode)[2]=='I')
|
||||
|
||||
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
|
||||
#define mm_is_general(typecode) ((typecode)[3]=='G')
|
||||
#define mm_is_skew(typecode) ((typecode)[3]=='K')
|
||||
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
|
||||
|
||||
/********************* MM_typecode modify fucntions ***************************/
|
||||
|
||||
#define mm_set_matrix(typecode) ((typecode)[0]='M')
|
||||
#define mm_set_coordinate(typecode) ((typecode)[1]='C')
|
||||
#define mm_set_array(typecode) ((typecode)[1]='A')
|
||||
#define mm_set_dense(typecode) mm_set_array(typecode)
|
||||
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
|
||||
|
||||
#define mm_set_complex(typecode)((typecode)[2]='C')
|
||||
#define mm_set_real(typecode) ((typecode)[2]='R')
|
||||
#define mm_set_pattern(typecode)((typecode)[2]='P')
|
||||
#define mm_set_integer(typecode)((typecode)[2]='I')
|
||||
|
||||
|
||||
#define mm_set_symmetric(typecode)((typecode)[3]='S')
|
||||
#define mm_set_general(typecode)((typecode)[3]='G')
|
||||
#define mm_set_skew(typecode) ((typecode)[3]='K')
|
||||
#define mm_set_hermitian(typecode)((typecode)[3]='H')
|
||||
|
||||
#define mm_clear_typecode(typecode) ((typecode)[0]=(typecode)[1]= \
|
||||
(typecode)[2]=' ',(typecode)[3]='G')
|
||||
|
||||
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
|
||||
|
||||
|
||||
/********************* Matrix Market error codes ***************************/
|
||||
|
||||
|
||||
#define MM_COULD_NOT_READ_FILE 11
|
||||
#define MM_PREMATURE_EOF 12
|
||||
#define MM_NOT_MTX 13
|
||||
#define MM_NO_HEADER 14
|
||||
#define MM_UNSUPPORTED_TYPE 15
|
||||
#define MM_LINE_TOO_LONG 16
|
||||
#define MM_COULD_NOT_WRITE_FILE 17
|
||||
|
||||
#define MM_MTX_STR "matrix"
|
||||
#define MM_ARRAY_STR "array"
|
||||
#define MM_DENSE_STR "array"
|
||||
#define MM_COORDINATE_STR "coordinate"
|
||||
#define MM_SPARSE_STR "coordinate"
|
||||
#define MM_COMPLEX_STR "complex"
|
||||
#define MM_REAL_STR "real"
|
||||
#define MM_INT_STR "integer"
|
||||
#define MM_GENERAL_STR "general"
|
||||
#define MM_SYMM_STR "symmetric"
|
||||
#define MM_HERM_STR "hermitian"
|
||||
#define MM_SKEW_STR "skew-symmetric"
|
||||
#define MM_PATTERN_STR "pattern"
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MAX_RAND_VAL 5.0
|
||||
|
||||
#endif
|
||||
Plik diff jest za duży
Load Diff
@@ -1,118 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>true</onSuccessful>
|
||||
<onFailed>true</onFailed>
|
||||
<onUnstable>true</onUnstable>
|
||||
<onNotBuilt>true</onNotBuilt>
|
||||
<onAborted>true</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames></dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!COMPILE</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/compile/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -1,221 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
// sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Generate Checker Metadata') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
sh 'git fetch --tags'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'changeset_delta.txt'
|
||||
}
|
||||
dir("clients/functional_tests/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'RO_NET_BASIC/**/*.log'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,413 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
UCX_HOME="/home/resperf/ucx/install"
|
||||
|
||||
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
|
||||
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
|
||||
|
||||
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
|
||||
|
||||
UCX_WARN_UNUSED_ENV_VARS="n"
|
||||
HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_IPC") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_ipc install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE all ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL all ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG all ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE all ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE all ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI all ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC all ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG all ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE all ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh 'ROCSHMEM_RO=1 ./driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<hudson.triggers.TimerTrigger>
|
||||
<spec>H 22 * * *</spec>
|
||||
</hudson.triggers.TimerTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>*/amd-master</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -1,335 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/${BUILD_ID}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
git branch: 'amd-master', changelog: false, poll: false, url: 'ssh://gerritgit/rsch/ec/shmem'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_IPC") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_ipc install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,45 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<hudson.triggers.TimerTrigger>
|
||||
<spec>H 22 * * *</spec>
|
||||
</hudson.triggers.TimerTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>*/amd-master</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -1,288 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-7' }
|
||||
environment {
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
|
||||
MPI_HOME="/home/resperf/mpich/install"
|
||||
UCX_HOME="/home/resperf/ucx/install"
|
||||
|
||||
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
|
||||
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
|
||||
|
||||
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
|
||||
|
||||
UCX_WARN_UNUSED_ENV_VARS="n"
|
||||
HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
UCX_TLS="rc"
|
||||
ROCSHMEM_USE_SQ_GPU_MEM=0
|
||||
ROCSHMEM_USE_CQ_GPU_MEM=0
|
||||
ROCSHMEM_NUM_BLOCKS=128
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RO_NET") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/ro_net install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RO_NET") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RO_NET") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RO_NET") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE short ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL short ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI short ${build_dir}/RC_MULTI'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE short ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI short ${build_dir}/DC_MULTI'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/rocshmem_example_driver ro ${build_dir}/RO_NET'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh 'ROCSHMEM_RO=1 ./driver.sh ${build_dir}/RO_NET short ${build_dir}/RO_NET'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/spts multi_thread ${build_dir}/RO_NET'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Generate Checker Metadata') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
sh 'git fetch --tags'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'changeset_delta.txt'
|
||||
}
|
||||
dir("clients/functional_tests/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'RO_NET/**/*.log'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
build job: 'shmem_perf_check', wait: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import glob
|
||||
import pprint
|
||||
|
||||
class PathGlobber():
|
||||
def __init__(self, name, *partial_paths_to_concatenate):
|
||||
self._search_path = ''
|
||||
for partial_path in partial_paths_to_concatenate:
|
||||
self._search_path += partial_path
|
||||
self.dirs = []
|
||||
self._name = name
|
||||
|
||||
def generate(self):
|
||||
self.dirs = glob.glob(self._search_path, recursive=True)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._name
|
||||
str_out += pprint.pformat(self.dirs, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import absolute_path
|
||||
import glob
|
||||
|
||||
class Archive(absolute_path.PathGlobber):
|
||||
def __init__(self, args, name=''):
|
||||
archive_path = args.archive_path
|
||||
super().__init__(name, args.jenkins_path, archive_path,
|
||||
args.benchmark_path)
|
||||
|
||||
def path_of_build(self, build_id):
|
||||
path = self._search_path.replace('*/archive', build_id + '/archive')
|
||||
path = glob.glob(path)
|
||||
return path[0]
|
||||
@@ -1,54 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import parser
|
||||
import dictionary
|
||||
import archive_path
|
||||
import checker
|
||||
|
||||
def main():
|
||||
# This script accepts command line values, but has reasonable defaults
|
||||
# needed to run as part of the CI infrastructure.
|
||||
p = parser.Parser()
|
||||
args = p.parse_command_line()
|
||||
|
||||
# Jenkins is configured to archive build artifacts in a directory.
|
||||
# The 'archives' variable holds the set of directories for
|
||||
# successful Jenkins builds (those which run to completion).
|
||||
# Partitioning of successful builds is useful since we can ignore
|
||||
# failed build directories while searching for performance data.
|
||||
archives = archive_path.Archive(args)
|
||||
archives.generate()
|
||||
print(archives.dump())
|
||||
|
||||
# Jenkins records changeset information in a changeset_delta.txt file.
|
||||
# We parse the changelog for the commit hash and save it into
|
||||
# 'builds_to_changesets'.
|
||||
build_to_changeset = dictionary.BuildToChangesetDict()
|
||||
build_to_changeset.generate(archives.dirs)
|
||||
print(build_to_changeset.dump())
|
||||
|
||||
# 'changeset_to_build' holds the changeset mappings with a
|
||||
# list of build numbers that match the changeset value.
|
||||
# Builds may be executed many times with the same changeset.
|
||||
# The most recent build (identified by the largest build number) will
|
||||
# be used to retrieve performance data.
|
||||
changeset_to_build = dictionary.ChangesetToBuildDict()
|
||||
changeset_to_build.generate(build_to_changeset)
|
||||
print(changeset_to_build.dump())
|
||||
|
||||
# Jenkins is configured to dump Gerrit-esque relation chain changesets
|
||||
# to an archived output file 'changeset-delta.txt'.
|
||||
# The relation chain will be used to determine changeset performance
|
||||
# data for each changeset in the relation chain (when possible).
|
||||
build_to_relation_chain = dictionary.BuildToRelationChainDict()
|
||||
build_to_relation_chain.generate(archives.dirs)
|
||||
print(build_to_relation_chain.dump())
|
||||
|
||||
perf_checker = checker.Performance(args,
|
||||
archives,
|
||||
changeset_to_build,
|
||||
build_to_relation_chain)
|
||||
perf_checker.run()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,97 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import archive_path
|
||||
import log
|
||||
import dictionary
|
||||
import report
|
||||
import violation
|
||||
|
||||
class Performance():
|
||||
def __init__(self, args, archives, changeset_to_build,
|
||||
build_to_relation_chain):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._changeset_to_build = changeset_to_build
|
||||
self._build_to_relation_chain = build_to_relation_chain
|
||||
self._build_id = build_to_relation_chain.most_recent_build()
|
||||
self._archive_path = archives.path_of_build(self._build_id)
|
||||
self._output = report.Report(self._build_id,
|
||||
self._archive_path,
|
||||
'performance_diff.txt')
|
||||
|
||||
def _other_build_id(self, other_changeset):
|
||||
packed_id = [build_id for chng,
|
||||
build_id in self._changeset_to_build.data.items()
|
||||
if chng.startswith(other_changeset)]
|
||||
|
||||
# The 'packed_id' variable is a list containing lists.
|
||||
# We need the content inside the packed_id data structure.
|
||||
try:
|
||||
build_id = packed_id[0][0]
|
||||
return True, build_id
|
||||
except IndexError:
|
||||
# An index error can occur if builds in the relation chain
|
||||
# have not been tested before attempting to test this
|
||||
# changeset.
|
||||
return False, 0
|
||||
|
||||
def _log_difference(self, log_filename, other_changeset,
|
||||
other_archive_path, violations):
|
||||
print('determining difference of log file ' + log_filename)
|
||||
self._output.record(log_filename)
|
||||
|
||||
current_file_path = self._archive_path + '/' + log_filename
|
||||
other_file_path = other_archive_path + '/' + log_filename
|
||||
log_pair = log.Pair(current_file_path, other_file_path)
|
||||
log_pair.calculate_differences()
|
||||
|
||||
latency_perc = [float(i.strip('%')) \
|
||||
for i in log_pair.latency_percentage_differences]
|
||||
max_latency = max(latency_perc)
|
||||
violations.check(max_latency, other_changeset, log_filename)
|
||||
|
||||
self._output.record(log_pair.dump())
|
||||
|
||||
def _changeset_difference(self, current_changeset, other_changeset):
|
||||
violations = violation.Threshold(self._args.latency_max, 'latency')
|
||||
|
||||
change_pair = '(' + current_changeset + ',' + other_changeset + ')'
|
||||
print('comparing changesets ' + change_pair)
|
||||
self._output.record(change_pair)
|
||||
|
||||
status, other_build_id = self._other_build_id(other_changeset)
|
||||
if status == False:
|
||||
message = 'skipping changeset ' + other_changeset
|
||||
print(message)
|
||||
self._output.record(message)
|
||||
return violations
|
||||
|
||||
other_archive_path = self._archives.path_of_build(other_build_id)
|
||||
print(self._archive_path)
|
||||
print(other_archive_path)
|
||||
|
||||
for filename in self._args.logs:
|
||||
self._log_difference(filename, other_changeset,
|
||||
other_archive_path, violations)
|
||||
print('\n')
|
||||
|
||||
return violations
|
||||
|
||||
def _calculate_performance_differences(self):
|
||||
current_changeset = \
|
||||
self._build_to_relation_chain.data[self._build_id][0]
|
||||
other_changesets = \
|
||||
self._build_to_relation_chain.data[self._build_id][1:]
|
||||
|
||||
for other_changeset in other_changesets:
|
||||
violations = self._changeset_difference(current_changeset,
|
||||
other_changeset)
|
||||
|
||||
# Only report on the last pairwise changeset combination.
|
||||
# This combination represents the changeset being tested and
|
||||
# the amd-master:HEAD.
|
||||
violations.provide_violations_to_report(self._output)
|
||||
|
||||
def run(self):
|
||||
self._output.open()
|
||||
self._calculate_performance_differences()
|
||||
@@ -1,163 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import abc
|
||||
import os
|
||||
import pprint
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class BaseDict(metaclass=abc.ABCMeta):
|
||||
def __init__(self):
|
||||
self.data = {}
|
||||
self._delimiter_path = 'archive'
|
||||
self._changeset_delta_filename = 'changeset_delta.txt'
|
||||
|
||||
def _build_id(self, build_directory):
|
||||
sub_directory_strings = build_directory.split('/')
|
||||
word_count = 0
|
||||
for word in sub_directory_strings:
|
||||
if word == '':
|
||||
continue
|
||||
if word == self._delimiter_path:
|
||||
break
|
||||
word_count += 1
|
||||
bld_id = sub_directory_strings[word_count]
|
||||
return bld_id
|
||||
|
||||
def _open_changeset_delta_file(self, archive_directory):
|
||||
build_directory, config_directory = os.path.split(archive_directory)
|
||||
changeset_file_path = build_directory + '/' + \
|
||||
self._changeset_delta_filename
|
||||
try:
|
||||
file_handle = open(changeset_file_path, 'r')
|
||||
except:
|
||||
sys.exit('failed to open: ' + changeset_file_path)
|
||||
return file_handle
|
||||
|
||||
@abc.abstractmethod
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
pass
|
||||
|
||||
def generate(self, archives):
|
||||
for d in archives:
|
||||
bld_id = self._build_id(d)
|
||||
f = self._open_changeset_delta_file(d)
|
||||
self._changeset_delta_operations(f, bld_id)
|
||||
|
||||
def most_recent_build(self):
|
||||
build_id_strings = self.data.keys()
|
||||
build_id_ints = list(map(int, build_id_strings))
|
||||
most_recent_build_id_int = max(build_id_ints)
|
||||
return str(most_recent_build_id_int)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
|
||||
class BuildToChangesetDict(BaseDict):
|
||||
def __init__(self, name=''):
|
||||
super().__init__()
|
||||
self._print_text = name
|
||||
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
commit_line = file_handle.readline()
|
||||
try:
|
||||
commit_hash = commit_line.split()[0]
|
||||
except IndexError:
|
||||
commit_hash = None
|
||||
if commit_hash != None:
|
||||
self.data[bld_id] = commit_hash
|
||||
|
||||
class BuildToRelationChainDict(BaseDict):
|
||||
def __init__(self, name=''):
|
||||
super().__init__()
|
||||
self._print_text = name
|
||||
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
changes = []
|
||||
for line in file_handle:
|
||||
changes.append(line.split()[0])
|
||||
self.data[bld_id] = changes
|
||||
|
||||
class ChangesetToBuildDict():
|
||||
def __init__(self, name=''):
|
||||
self.data = {}
|
||||
self._print_text = name
|
||||
|
||||
def _invert_dict(self, dictionary):
|
||||
dict_with_duplicates = {}
|
||||
for key, value in dictionary.data.items():
|
||||
list_with_duplicates = dict_with_duplicates.get(value, [])
|
||||
list_with_duplicates.append(key)
|
||||
dict_with_duplicates[value] = list_with_duplicates
|
||||
return dict_with_duplicates
|
||||
|
||||
def generate(self, dictionary):
|
||||
self.data = self._invert_dict(dictionary)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
|
||||
class ChangelogToMostRecentBuild():
|
||||
def __init__(self, name=''):
|
||||
self._print_text = name
|
||||
self._all_changesets = []
|
||||
self._changesets_with_builds = []
|
||||
self._changesets_without_builds = []
|
||||
self.data = {}
|
||||
|
||||
def _build_id(self, changeset_to_build, changeset):
|
||||
try:
|
||||
build_id_strings = changeset_to_build.data[changeset]
|
||||
build_id_ints = list(map(int, build_id_strings))
|
||||
most_recent_build_id_int = max(build_id_ints)
|
||||
build_id_str = str(most_recent_build_id_int)
|
||||
except:
|
||||
build_id_str = ''
|
||||
return build_id_str
|
||||
|
||||
def _changelog(self):
|
||||
# print git hash along with file modification stats
|
||||
shellcmd = 'git log --pretty=tformat:"%H" --shortstat | '
|
||||
# condense the output down to single line
|
||||
shellcmd += "awk 'ORS=NR%3?\" \":\"\\n\"' | "
|
||||
# parse out the git hash by itself
|
||||
shellcmd += "awk '{print $1}'"
|
||||
x = subprocess.getoutput(shellcmd)
|
||||
self._all_changesets = x.split()
|
||||
|
||||
def _with_builds(self, changeset_to_build):
|
||||
changesets = list(changeset_to_build.data.keys())
|
||||
self._changesets_with_builds = changesets
|
||||
|
||||
def _without_builds(self):
|
||||
self._changesets_without_builds = \
|
||||
list(set(self._all_changesets) - \
|
||||
set(self._changesets_with_builds))
|
||||
|
||||
def generate(self, changeset_to_build):
|
||||
self._changelog()
|
||||
self._with_builds(changeset_to_build)
|
||||
self._without_builds()
|
||||
for changeset in self._all_changesets:
|
||||
if changeset in self._changesets_with_builds:
|
||||
build = self._build_id(changeset_to_build, changeset)
|
||||
self.data[changeset] = build
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += 'git-log_changesets_in_order:\n'
|
||||
str_out += pprint.pformat(self._all_changesets, width=120)
|
||||
str_out += '\nfilesystem_with_builds:\n'
|
||||
str_out += pprint.pformat(self._changesets_with_builds, width=120)
|
||||
str_out += '\nfilesystem_without_builds:\n'
|
||||
str_out += pprint.pformat(self._changesets_without_builds, width=120)
|
||||
str_out += '\ngit-log_changesets_to_build-id_mappings:\n'
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -1,107 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import pprint
|
||||
import re
|
||||
import sys
|
||||
|
||||
class Log():
|
||||
def __init__(self, logfile_abspath):
|
||||
self._file_path = logfile_abspath
|
||||
self.latency = []
|
||||
self.bandwidth = []
|
||||
# regex matches the latency and bandwidth lines in the log files
|
||||
self._regex = '.*[0-9]+\.[0-9]+.*[0-9]\.[0-9].*'
|
||||
|
||||
def open(self):
|
||||
try:
|
||||
self._file_handle = open(self._file_path, 'r')
|
||||
except:
|
||||
sys.exit('failed to open: ' + self._file_path)
|
||||
|
||||
def parse(self):
|
||||
for line in self._file_handle:
|
||||
if re.match(self._regex, line):
|
||||
entries = line.split()
|
||||
self.latency.append(round(float(entries[0]), 4))
|
||||
self.bandwidth.append(round(float(entries[1]), 4))
|
||||
|
||||
class Pair():
|
||||
def __init__(self, first_logfile_abspath, second_logfile_abspath):
|
||||
self.first = Log(first_logfile_abspath)
|
||||
self.first.open()
|
||||
self.first.parse()
|
||||
self.second = Log(second_logfile_abspath)
|
||||
self.second.open()
|
||||
self.second.parse()
|
||||
|
||||
def _ratio(self, a, b):
|
||||
diff = [round((x - y), 4) for x, y in zip(a, b)]
|
||||
ratio = []
|
||||
for numerator, denominator in zip(diff, a):
|
||||
try:
|
||||
ratio.append(round(numerator / denominator, 4))
|
||||
except:
|
||||
ratio.append(float(0.0000))
|
||||
return ratio
|
||||
|
||||
def _percent(self, ratio):
|
||||
perc = ['{0:.2%}'.format(x) for x in ratio]
|
||||
return perc
|
||||
|
||||
def _percentage_difference(self, a, b):
|
||||
ratio = self._ratio(a, b)
|
||||
percent = self._percent(ratio)
|
||||
return percent
|
||||
|
||||
def calculate_differences(self):
|
||||
self.latency_percentage_differences = \
|
||||
self._percentage_difference(self.first.latency,
|
||||
self.second.latency)
|
||||
self.bandwidth_percentage_differences = \
|
||||
self._percentage_difference(self.first.bandwidth,
|
||||
self.second.bandwidth)
|
||||
|
||||
def dump(self):
|
||||
delim = ', '
|
||||
output = '\tlatency:'
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.first.latency))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.second.latency))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.latency_percentage_differences))
|
||||
output += '\n\tbandwidth:'
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.first.bandwidth))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.second.bandwidth))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.bandwidth_percentage_differences))
|
||||
return output
|
||||
|
||||
class Tracker():
|
||||
def __init__(self, args, archives):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._data = {}
|
||||
|
||||
def add(self, changeset, most_recent_build_id):
|
||||
archive_path = self._archives.path_of_build(most_recent_build_id)
|
||||
for filename in self._args.logs:
|
||||
abs_file_path = archive_path + '/' + filename
|
||||
log = Log(abs_file_path)
|
||||
log.open()
|
||||
log.parse()
|
||||
key = (changeset, filename)
|
||||
self._data[key] = log
|
||||
|
||||
def dump(self):
|
||||
out_str = ''
|
||||
for key in self._data.keys():
|
||||
log = self._data[key]
|
||||
line_str = pprint.pformat(key, width=120)
|
||||
line_str += ' = '
|
||||
line_str += pprint.pformat(log.latency, width=120)
|
||||
line_str += '\n'
|
||||
out_str += line_str
|
||||
return out_str
|
||||
@@ -1,85 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import argparse
|
||||
|
||||
class Parser():
|
||||
def __init__(self):
|
||||
# A parent directory containing log file output from one of the
|
||||
# configuration runs. The output directories are intended to
|
||||
# be symmetric in naming with the various configurations supplied
|
||||
# by the library's build_configs.
|
||||
self._default_config = 'RC_SINGLE'
|
||||
|
||||
# The list of log files which need to be checked for performance
|
||||
# differences.
|
||||
self._default_logs = ['get.log',
|
||||
'get_nbi.log',
|
||||
'get_swarm.log',
|
||||
'put.log',
|
||||
'put_nbi.log']
|
||||
|
||||
# The maximum pairwise difference for the log file latencies.
|
||||
self._default_latency_max = 5.0
|
||||
|
||||
# The minimum bandwidth difference for the log file bandwidths.
|
||||
self._default_bandwidth_min = -50.0
|
||||
|
||||
# The Jenkins tester archives slave output on the master's
|
||||
# filesystem which currently uses this top-level path (as the
|
||||
# resperf account).
|
||||
self._default_jenkins_path = \
|
||||
'/proj/radl_extra/users/resperf/jenkins-2.192/'
|
||||
|
||||
# The performance tester runs as part of the 'short' job to
|
||||
# verify that no performance degradation has occurred between
|
||||
# commits. This archive path is the generic archive path
|
||||
# for all of the builds. The Kleene star is used as a place
|
||||
# holder for the Jenkins build number.
|
||||
self._default_archive_path = \
|
||||
'jobs/shmem_short/builds/*/archive/'
|
||||
|
||||
# The default benchmark path can be used to alter archive
|
||||
# output placement. Currently, this is initialized to an empty
|
||||
# string, but subsequently initialized to inject the config
|
||||
# path.
|
||||
self._default_benchmark_path = ''
|
||||
|
||||
def setup_options(self, argparser):
|
||||
argparser.add_argument('-j',
|
||||
dest='jenkins_path',
|
||||
default=self._default_jenkins_path)
|
||||
argparser.add_argument('-a',
|
||||
dest='archive_path',
|
||||
default=self._default_archive_path)
|
||||
argparser.add_argument('-b',
|
||||
dest='benchmark_path',
|
||||
default=self._default_benchmark_path)
|
||||
argparser.add_argument('-c',
|
||||
dest='config',
|
||||
default=self._default_config)
|
||||
argparser.add_argument('-l',
|
||||
dest='logs',
|
||||
nargs='*',
|
||||
default=self._default_logs)
|
||||
argparser.add_argument('-x',
|
||||
dest='latency_max',
|
||||
type=float,
|
||||
default=self._default_latency_max)
|
||||
argparser.add_argument('-y',
|
||||
dest='bandwidth_min',
|
||||
type=float,
|
||||
default=self._default_bandwidth_min)
|
||||
argparser.add_argument('-o',
|
||||
dest='one_changeset')
|
||||
argparser.add_argument('-r',
|
||||
dest='changeset_range',
|
||||
nargs=2,
|
||||
metavar=("most_recent_changeset", "least_recent_changeset"))
|
||||
return argparser
|
||||
|
||||
def parse_command_line(self):
|
||||
p = argparse.ArgumentParser()
|
||||
p = self.setup_options(p)
|
||||
args = p.parse_args()
|
||||
args.benchmark_path = args.config + args.benchmark_path
|
||||
return args
|
||||
@@ -1,30 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<project>
|
||||
<actions/>
|
||||
<description>shmem performance delta checker</description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties/>
|
||||
<scm class="hudson.scm.NullSCM"/>
|
||||
<assignedNode>master</assignedNode>
|
||||
<canRoam>false</canRoam>
|
||||
<disabled>false</disabled>
|
||||
<blockBuildWhenDownstreamBuilding>false</blockBuildWhenDownstreamBuilding>
|
||||
<blockBuildWhenUpstreamBuilding>false</blockBuildWhenUpstreamBuilding>
|
||||
<triggers/>
|
||||
<concurrentBuild>false</concurrentBuild>
|
||||
<builders>
|
||||
<hudson.tasks.Shell>
|
||||
<command>/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RC_SINGLE" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RC_MULTI" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log get_swarm.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "DC_SINGLE" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "DC_MULTI" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log get_swarm.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RO_NET_BASIC" -x "75.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
|
||||
</command>
|
||||
<configuredLocalRules/>
|
||||
</hudson.tasks.Shell>
|
||||
</builders>
|
||||
<publishers/>
|
||||
<buildWrappers>
|
||||
<hudson.plugins.timestamper.TimestamperBuildWrapper plugin="timestamper@1.12"/>
|
||||
</buildWrappers>
|
||||
</project>
|
||||
@@ -1,35 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import parser
|
||||
import dictionary
|
||||
import archive_path
|
||||
import plotter
|
||||
|
||||
def main():
|
||||
p = parser.Parser()
|
||||
args = p.parse_command_line()
|
||||
|
||||
archives = archive_path.Archive(args)
|
||||
archives.generate()
|
||||
print(archives.dump())
|
||||
|
||||
build_to_changeset = dictionary.BuildToChangesetDict()
|
||||
build_to_changeset.generate(archives.dirs)
|
||||
print(build_to_changeset.dump())
|
||||
|
||||
changeset_to_build = dictionary.ChangesetToBuildDict()
|
||||
changeset_to_build.generate(build_to_changeset)
|
||||
print(changeset_to_build.dump())
|
||||
|
||||
plot = plotter.Plot(args,
|
||||
archives,
|
||||
changeset_to_build)
|
||||
|
||||
# either plot with all the changesets or the slice provided
|
||||
plot.changeset_slice()
|
||||
|
||||
if (args.one_changeset):
|
||||
plot.one_changeset_plot()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,230 +0,0 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# load the required libraries:
|
||||
library(tidyverse)
|
||||
library(RColorBrewer)
|
||||
library(optparse)
|
||||
|
||||
# declare some helper functions
|
||||
ggpreview <- function (..., device = "png") {
|
||||
fname <- tempfile(fileext = paste0(".", device))
|
||||
ggplot2::ggsave(filename = fname, device = device, ...)
|
||||
system2("open", fname)
|
||||
invisible(NULL)
|
||||
}
|
||||
|
||||
set_right_order <- function(df) {
|
||||
# reverse the order of the rows so that oldest commit is first
|
||||
df <- df %>% map_df(rev)
|
||||
# ensure that ggplot plots the x-axis in the right order
|
||||
df$Commit <- factor(df$Commit, levels = unique(df$Commit))
|
||||
return(df)
|
||||
}
|
||||
|
||||
plot_and_save <- function(df, xval, yval, title, subtitle, xlabel, filename) {
|
||||
p <- ggplot(df, aes_string(x=xval, y=yval, group=1)) +
|
||||
geom_line(size = 0.5, color=mycolors[1]) +
|
||||
geom_point(size = 1.5, alpha = 1, color=mycolors[2]) +
|
||||
theme_minimal() +
|
||||
expand_limits(y=0) +
|
||||
xlab(xlabel) +
|
||||
ggtitle(title, subtitle = subtitle) +
|
||||
theme(
|
||||
axis.text.x = element_text(angle=90,hjust=1),
|
||||
axis.title.y = element_blank()
|
||||
) +
|
||||
scale_fill_manual(values = mycolors)
|
||||
#ggpreview(width=7.5, height=5, units="in", dpi=500)
|
||||
ggsave(filename, p, device=pdf, dpi=500)
|
||||
}
|
||||
|
||||
## Set up options ##
|
||||
|
||||
option_list = list(
|
||||
make_option(c("-o", "--output"), type="character", default=NULL, action="store",
|
||||
help="path (without trailing /) to a folder that will
|
||||
contain the plots", metavar="folder-path"),
|
||||
make_option(c("-a", "--changeset_a"), type="character", default=NULL, action="store",
|
||||
help="beginning (inclusive) changeset of slice", metavar="changeset"),
|
||||
make_option(c("-b", "--changeset_b"), type="character", default=NULL, action="store",
|
||||
help="ending (inclusive) changeset of slice", metavar="changeset"),
|
||||
make_option(c("-c", "--one_changeset"), type="character", default=NULL, action="store",
|
||||
help="if set, will prepare plots for one changeset; if not, plots for a changeset slice")
|
||||
|
||||
)
|
||||
|
||||
## SCRIPT START ##
|
||||
|
||||
# parse the options
|
||||
opt_parser <- OptionParser(option_list=option_list)
|
||||
opts <- parse_args(opt_parser)
|
||||
if (is.null(opts$output)) {
|
||||
print_help(opt_parser)
|
||||
stop("Please set the --output flag.", call.=FALSE)
|
||||
}
|
||||
slice_opt = 0
|
||||
single_opt = 0
|
||||
if (!is.null(opts$changeset_a) && !is.null(opts$changeset_b)) {
|
||||
slice_opt = 1
|
||||
}
|
||||
if (!is.null(opts$one_changeset)) {
|
||||
single_opt = 1
|
||||
}
|
||||
|
||||
if ( (slice_opt && single_opt) || (!slice_opt && !single_opt) ) {
|
||||
stop("Please supply a slice or a single changeset, not both.", call.= FALSE)
|
||||
}
|
||||
|
||||
# choose color palette
|
||||
mycolors <- brewer.pal(5, "Set2")
|
||||
|
||||
if (length(opts$one_changeset) > 0) {
|
||||
## Plotting data for a single changeset ##
|
||||
|
||||
# read the files
|
||||
non_amo <- read.csv("non_amo_one_changeset.csv", header=TRUE)
|
||||
amo <- read.csv("amo_one_changeset.csv", header=TRUE)
|
||||
ping_pong <- read.csv("ping_pong_one_changeset.csv", header=TRUE)
|
||||
|
||||
# ensure that ggplot plots the x-axis in the right order
|
||||
non_amo$size <- factor(non_amo$size, levels = unique(non_amo$size))
|
||||
amo$op <- factor(amo$op, levels = unique(amo$op))
|
||||
|
||||
# plot
|
||||
non_amo_ops <- list("put","put_nbi","get","get_nbi")
|
||||
for (op in non_amo_ops) {
|
||||
plot_and_save(df=non_amo,
|
||||
xval="size",
|
||||
yval=op,
|
||||
title=op,
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Message size (bytes)",
|
||||
filename=paste(opts$output,"/",op,"_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
# prepare data for plots with fixed message size and ops as x axis
|
||||
non_amo$bsize <- paste("b",non_amo$size,sep="") # (so that the columns in non_amo_t start with a character)
|
||||
non_amo_t <- setNames(data.frame(t(non_amo[,2:5])), non_amo[,6]) # transpose + set column names
|
||||
non_amo_t$op <- colnames(non_amo[,2:5]) # make a column with operation names
|
||||
|
||||
sizes <- colnames(non_amo_t[,-(length(colnames(non_amo_t)))])
|
||||
for (size in sizes) {
|
||||
plot_and_save(df=non_amo_t,
|
||||
xval="op",
|
||||
yval=size,
|
||||
title=paste(sub('.', '', size),"byte"),
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Operation",
|
||||
filename=paste(opts$output,"/",size,"_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
plot_and_save(df=amo,
|
||||
xval="op",
|
||||
yval="latency",
|
||||
title="Atomics",
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Operation",
|
||||
filename=paste(opts$output,"/atomic_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
|
||||
ping_pong$type <- c("ping_pong")
|
||||
p<-ggplot(ping_pong, aes(x=type, y=latency, fill=type)) +
|
||||
geom_bar(stat="identity", width=0.5) +
|
||||
theme_minimal() +
|
||||
ggtitle("Ping pong", subtitle = "Latency (us)") +
|
||||
theme(
|
||||
axis.title.y = element_blank(),
|
||||
axis.text.y = element_blank(),
|
||||
axis.title.x = element_blank(),
|
||||
legend.position = "none"
|
||||
) +
|
||||
coord_flip() +
|
||||
scale_fill_manual(values = mycolors)
|
||||
#ggpreview(width=7.5, height=5, units="in", dpi=500)
|
||||
ggsave(paste(opts$output,"/ping_pong_changeset_",opts$one_changeset,".pdf", sep=""), p, device=pdf, dpi=500)
|
||||
|
||||
} else {
|
||||
## Plotting across a changeset slice ##
|
||||
|
||||
# read the files
|
||||
put <- read.csv("put.csv", header=TRUE)
|
||||
put_nbi <- read.csv("put_nbi.csv", header=TRUE)
|
||||
get <- read.csv("get.csv", header=TRUE)
|
||||
get_nbi <- read.csv("get_nbi.csv", header=TRUE)
|
||||
amo <- read.csv("amo.csv", header=TRUE)
|
||||
ping_pong <- read.csv("ping_pong.csv", header=TRUE)
|
||||
|
||||
# slice out the commits
|
||||
start <- match(c(opts$changeset_a), put$Commit)
|
||||
end <- match(c(opts$changeset_b), put$Commit)
|
||||
# (start and end should be the same for all the frames) #
|
||||
put <- put[start:end,]
|
||||
put_nbi <- put_nbi[start:end,]
|
||||
get <- get[start:end,]
|
||||
get_nbi <- get_nbi[start:end,]
|
||||
amo <- amo[start:end,]
|
||||
ping_pong <- ping_pong[start:end,]
|
||||
|
||||
put <- set_right_order(put)
|
||||
put_nbi <- set_right_order(put_nbi)
|
||||
get <- set_right_order(get)
|
||||
get_nbi <- set_right_order(get_nbi)
|
||||
amo <- set_right_order(amo)
|
||||
ping_pong <- set_right_order(ping_pong)
|
||||
|
||||
# plot
|
||||
non_amo_ops <- list("put","put_nbi","get","get_nbi")
|
||||
sizes_to_subtitle_map <- list("b1"="1 byte",
|
||||
"b2"="2 bytes",
|
||||
"b4"="4 bytes",
|
||||
"b8"="8 bytes",
|
||||
"b16"="16 bytes",
|
||||
"b32"="32 bytes",
|
||||
"b64"="64 bytes",
|
||||
"b128"="128 bytes",
|
||||
"b256"="256 bytes",
|
||||
"b512"="512 bytes",
|
||||
"b1024"="1024 bytes",
|
||||
"b2048"="2048 bytes",
|
||||
"b4096"="4096 bytes",
|
||||
"b8192"="8192 bytes",
|
||||
"b16384"="16384 bytes",
|
||||
"b32768"="32768 bytes")
|
||||
for (op in non_amo_ops) {
|
||||
for (size in names(sizes_to_subtitle_map)) {
|
||||
plot_and_save(df=eval(parse(text=op)),
|
||||
xval="Commit",
|
||||
yval=size,
|
||||
title=op,
|
||||
subtitle=paste("Latency (us) for ",sizes_to_subtitle_map[[size]],sep=""),
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/",op,"_",size,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
amo_ops <- list("add","cswap","fadd","fcswap","fetch","finc","inc")
|
||||
for (op in amo_ops) {
|
||||
plot_and_save(df=amo,
|
||||
xval="Commit",
|
||||
yval=op,
|
||||
title=op,
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/",op,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
plot_and_save(df=ping_pong,
|
||||
xval="Commit",
|
||||
yval="latency",
|
||||
title="ping_pong",
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/","ping_pong.pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
## SCRIPT END ##
|
||||
@@ -1,295 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import dictionary
|
||||
import log
|
||||
#import matplotlib.pyplot
|
||||
import numpy
|
||||
import csv
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class Plot():
|
||||
def __init__(self, args, archives, changeset_to_build):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._changelog = dictionary.ChangelogToMostRecentBuild()
|
||||
self._changelog.generate(changeset_to_build)
|
||||
print(self._changelog.dump())
|
||||
|
||||
def abbreviate_changesets(self, changesets):
|
||||
return [changeset[0:8] for changeset in changesets]
|
||||
|
||||
@staticmethod
|
||||
def write_dict_to_file(tracker, field_names, file_name):
|
||||
with open(file_name, 'w') as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||||
writer.writeheader()
|
||||
writer.writerows(tracker)
|
||||
|
||||
@staticmethod
|
||||
def check_and_add_to_dict(dictionary, key, array):
|
||||
if len(array) > 0:
|
||||
dictionary[key] = array[0]
|
||||
else:
|
||||
dictionary[key] = 0
|
||||
|
||||
def changeset_slice(self):
|
||||
self._log_tracker = log.Tracker(self._args, self._archives)
|
||||
for changeset in self._changelog._all_changesets:
|
||||
if changeset in self._changelog.data.keys():
|
||||
build_id = self._changelog.data[changeset]
|
||||
self._log_tracker.add(changeset, build_id)
|
||||
print(self._log_tracker.dump())
|
||||
|
||||
"""
|
||||
separate out dictionaries based on operation
|
||||
and prepare them in a format that works with
|
||||
the csv module
|
||||
"""
|
||||
put_tracker = []
|
||||
put_nbi_tracker = []
|
||||
get_tracker = []
|
||||
get_nbi_tracker = []
|
||||
amo_tracker = []
|
||||
ping_pong_tracker = []
|
||||
prev_commit = list(self._log_tracker._data.keys())[0][0]
|
||||
amo_dict = {}
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (key[1] == "put.log"):
|
||||
put_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "put_nbi.log"):
|
||||
put_nbi_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "get.log"):
|
||||
get_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "get_nbi.log"):
|
||||
get_nbi_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "ping_pong.log"):
|
||||
ping_pong_tracker.append({'Commit':key[0][0:7],
|
||||
'latency':value.latency[0]
|
||||
})
|
||||
|
||||
# check to see if we have moved to a new commit
|
||||
# if we have, store the dict in the amo_tracker
|
||||
if (key[0] != prev_commit):
|
||||
amo_dict['Commit'] = prev_commit[0:7]
|
||||
amo_tracker.append(amo_dict.copy())
|
||||
amo_dict.clear()
|
||||
|
||||
prev_commit = key[0]
|
||||
|
||||
if (key[1] == "amo_add.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'add', value.latency)
|
||||
if (key[1] == "amo_cswap.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'cswap', value.latency)
|
||||
if (key[1] == "amo_fadd.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fadd', value.latency)
|
||||
if (key[1] == "amo_fcswap.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fcswap', value.latency)
|
||||
if (key[1] == "amo_fetch.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fetch', value.latency)
|
||||
if (key[1] == "amo_finc.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'finc', value.latency)
|
||||
if (key[1] == "amo_inc.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'inc', value.latency)
|
||||
|
||||
# store the last commit's amo data
|
||||
amo_dict['Commit'] = prev_commit[0:7]
|
||||
amo_tracker.append(amo_dict.copy())
|
||||
|
||||
# write put results into a file:
|
||||
size_field_names= ['Commit','b1','b2','b4','b8','b16','b32','b64','b128','b256','b512','b1024','b2048','b4096','b8192','b16384','b32768']
|
||||
amo_field_names= ['Commit','add','cswap','fadd','fcswap','fetch','finc','inc']
|
||||
ping_pong_field_names= ['Commit','latency']
|
||||
|
||||
self.write_dict_to_file(put_tracker, size_field_names, "put.csv")
|
||||
self.write_dict_to_file(put_nbi_tracker, size_field_names, "put_nbi.csv")
|
||||
self.write_dict_to_file(get_tracker, size_field_names, "get.csv")
|
||||
self.write_dict_to_file(get_nbi_tracker, size_field_names, "get_nbi.csv")
|
||||
self.write_dict_to_file(amo_tracker, amo_field_names, "amo.csv")
|
||||
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong.csv")
|
||||
|
||||
# make a directory and execute the R script to generate plots in that directory
|
||||
current_dir = os.getcwd()
|
||||
plot_dir = os.path.join(current_dir, 'plots')
|
||||
if not os.path.exists(plot_dir):
|
||||
os.makedirs(plot_dir)
|
||||
|
||||
changeset_a = list(self._log_tracker._data.keys())[0][0]
|
||||
changeset_b = list(self._log_tracker._data.keys())[-1][0]
|
||||
|
||||
# check if the provided changesets are correct
|
||||
if (self._args.changeset_range):
|
||||
found_changeset_a = False
|
||||
found_changeset_b = False
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (found_changeset_a and found_changeset_b):
|
||||
break
|
||||
if (not found_changeset_a):
|
||||
if (self._args.changeset_range[0] == key[0]):
|
||||
found_changeset_a = True
|
||||
if (not found_changeset_b):
|
||||
if (self._args.changeset_range[1] == key[0]):
|
||||
found_changeset_b = True
|
||||
|
||||
if ((not found_changeset_a) and (not found_changeset_b)):
|
||||
sys.exit("One of the specified changesets was not found. Please specify correct/complete commit IDs.")
|
||||
else:
|
||||
changeset_a = self._args.changeset_range[0]
|
||||
changeset_b = self._args.changeset_range[1]
|
||||
|
||||
r_command = "Rscript ./plotter.R -o ./plots -a " + changeset_a[0:7] + " -b " + changeset_b[0:7]
|
||||
|
||||
print(r_command)
|
||||
subprocess.check_call(r_command, shell=True)
|
||||
|
||||
|
||||
def one_changeset_plot(self):
|
||||
found_changeset = 0
|
||||
non_amo_tracker = []
|
||||
amo_tracker = []
|
||||
ping_pong_tracker = []
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (key[0] == self._args.one_changeset):
|
||||
found_changeset = 1
|
||||
if (key[1] == "put.log"):
|
||||
put_vals = value.latency
|
||||
if (key[1] == "put_nbi.log"):
|
||||
put_nbi_vals = value.latency
|
||||
if (key[1] == "get.log"):
|
||||
get_vals = value.latency
|
||||
if (key[1] == "get_nbi.log"):
|
||||
get_nbi_vals = value.latency
|
||||
if (key[1] == "amo_add.log"):
|
||||
amo_tracker.append({'op':'add',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_add.log"):
|
||||
amo_tracker.append({'op':'add',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_cswap.log"):
|
||||
amo_tracker.append({'op':'cswap',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fadd.log"):
|
||||
amo_tracker.append({'op':'fadd',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fcswap.log"):
|
||||
amo_tracker.append({'op':'fcswap',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fetch.log"):
|
||||
amo_tracker.append({'op':'fetch',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_finc.log"):
|
||||
amo_tracker.append({'op':'finc',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_inc.log"):
|
||||
amo_tracker.append({'op':'inc',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "ping_pong.log"):
|
||||
ping_pong_tracker.append({'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
|
||||
|
||||
if (not found_changeset):
|
||||
sys.exit("The requested changeset was not found. Please specify correct/complete commit IDs.")
|
||||
|
||||
index = 0
|
||||
for size in [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]:
|
||||
non_amo_tracker.append({'size':size,
|
||||
'put':put_vals[index],
|
||||
'put_nbi':put_nbi_vals[index],
|
||||
'get':get_vals[index],
|
||||
'get_nbi':get_nbi_vals[index]
|
||||
})
|
||||
index = index + 1
|
||||
|
||||
# write results into a file:
|
||||
non_amo_field_names= ['size','put','put_nbi','get','get_nbi']
|
||||
amo_field_names= ['op','latency']
|
||||
ping_pong_field_names= ['latency']
|
||||
|
||||
self.write_dict_to_file(non_amo_tracker, non_amo_field_names, "non_amo_one_changeset.csv")
|
||||
self.write_dict_to_file(amo_tracker, amo_field_names, "amo_one_changeset.csv")
|
||||
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong_one_changeset.csv")
|
||||
|
||||
# call the R script with an option that tells it to plot figures for
|
||||
r_command = "Rscript ./plotter.R -o ./plots -c " + self._args.one_changeset
|
||||
|
||||
print(r_command)
|
||||
subprocess.check_call(r_command, shell=True)
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import sys
|
||||
|
||||
class Report():
|
||||
def __init__(self, identifier, path, filename):
|
||||
self._identifier = identifier
|
||||
self._path = path
|
||||
self._filename = filename
|
||||
|
||||
def open(self):
|
||||
print('opening report for ' + self._identifier)
|
||||
try:
|
||||
report_path = self._path + '/' + self._filename
|
||||
print('report_path: ' + report_path)
|
||||
self._file_handle = open(report_path, 'w')
|
||||
except:
|
||||
sys.exit('failed to open report: ' + report_path)
|
||||
|
||||
def record(self, message):
|
||||
self._file_handle.write(message + '\n')
|
||||
@@ -1,96 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.DisableConcurrentBuildsJobProperty/>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>false</onSuccessful>
|
||||
<onFailed>false</onFailed>
|
||||
<onUnstable>false</onUnstable>
|
||||
<onNotBuilt>false</onNotBuilt>
|
||||
<onAborted>false</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames>shmem_perf_check, </dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!SHORT</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<name>origin</name>
|
||||
<refspec>${GERRIT_REFSPEC}</refspec>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/short/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -1,34 +0,0 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import pprint
|
||||
import report
|
||||
import sys
|
||||
|
||||
class Threshold():
|
||||
def __init__(self, maximum_threshold, violation_type):
|
||||
self._violations = {}
|
||||
self._maximum_threshold = maximum_threshold
|
||||
self._violation_type = violation_type
|
||||
|
||||
def check(self, value, changeset, filename):
|
||||
if value >= self._maximum_threshold:
|
||||
key = changeset + '|' + filename + '|' + self._violation_type
|
||||
self._violations[key] = value
|
||||
print(key + ': ' + str(value) + '%')
|
||||
|
||||
def provide_violations_to_report(self, report):
|
||||
if self.has_violations():
|
||||
report.record('FAILURE')
|
||||
report.record(self.dump())
|
||||
sys.exit(1)
|
||||
else:
|
||||
report.record('SUCCESS')
|
||||
sys.exit(0)
|
||||
|
||||
def has_violations(self):
|
||||
return bool(self._violations)
|
||||
|
||||
def dump(self):
|
||||
str_out = pprint.pformat(self._violations, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -1,151 +0,0 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions/>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>true</onSuccessful>
|
||||
<onFailed>true</onFailed>
|
||||
<onUnstable>true</onUnstable>
|
||||
<onNotBuilt>true</onNotBuilt>
|
||||
<onAborted>true</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames></dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!SMOKE</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/smoke/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -1,47 +0,0 @@
|
||||
#! /usr/bin/python
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
import os
|
||||
|
||||
suffixes = [ '.cpp', '.hpp', '.c', '.h' ]
|
||||
directories = [ 'src', 'include' ]
|
||||
|
||||
def oksuffix(f):
|
||||
for s in suffixes:
|
||||
if f.endswith(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_index_dir(directory):
|
||||
for dirpath,subdirs,files in os.walk(os.path.join(cwd, directory)):
|
||||
okfiles = [f for f in files if oksuffix(f)]
|
||||
if okfiles:
|
||||
print >> file_list, \
|
||||
'\n'.join([os.path.join(dirpath, f) for f in okfiles])
|
||||
|
||||
|
||||
file_list = file('cscope.files', 'w')
|
||||
cwd = os.getcwd()
|
||||
for d in directories:
|
||||
try_index_dir(d)
|
||||
file_list.close()
|
||||
|
||||
os.system("cscope -b")
|
||||
@@ -1,105 +0,0 @@
|
||||
HIPCC=hipcc
|
||||
BUILD=./build
|
||||
SRC=./src
|
||||
RESULTS=./results
|
||||
|
||||
#rocshmem_DIR=${HOME}/rocshmem
|
||||
#MPI_HOME=${HOME}/mpich/install
|
||||
NCCL_HOME=${HOME}/rccl/build
|
||||
|
||||
MPI_FLAGS=-lmpi -lhsa-runtime64 -lrt -L${MPI_HOME}/lib -fgpu-rdc
|
||||
SHMEM_FLAGS=${MPI_FLAGS} -lmlx5 -libverbs
|
||||
RCCL_FLAGS=${MPI_FLAGS} -Wl,-rpath,$(NCCL_HOME) -L${NCCL_HOME} -lrccl
|
||||
|
||||
.SILENT: run_scan extract_scan run_sort run_sort_shmem run_sort_rccl extract_sort
|
||||
|
||||
all: ${BUILD}/sort_shmem ${BUILD}/sort_rccl ${BUILD}/sort_mpi
|
||||
|
||||
${BUILD}/sort_shmem: ${BUILD}/sort_shmem.o ${rocshmem_DIR}/lib/librocshmem.a
|
||||
${HIPCC} $^ ${SHMEM_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_shmem.o: ${SRC}/sort_shmem.cu
|
||||
${HIPCC} $^ -I${rocshmem_DIR}/include -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
${BUILD}/sort_rccl: ${BUILD}/sort_rccl.o
|
||||
${HIPCC} $^ ${RCCL_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_rccl.o: ${SRC}/sort_rccl.cu
|
||||
${HIPCC} $^ -I$(NCCL_HOME)/include/rccl -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
${BUILD}/sort_mpi: ${BUILD}/sort_mpi.o
|
||||
${HIPCC} $^ ${MPI_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_mpi.o: ${SRC}/sort_mpi.cu
|
||||
${HIPCC} $^ -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
RO_FLAGS=ROCSHMEM_RO=1 RO_NET_CPU_QUEUE=1
|
||||
ITERS?=0 1 2 3 4 5 6 7 8 9
|
||||
TIMEOUT=1m
|
||||
HOSTS=sv-pdp-0,sv-pdp-1,sv-pdp-2,sv-pdp-3
|
||||
SCAN_SIZE=1024
|
||||
PES=2 4 8 12 16
|
||||
PES_RCCL=2 4 8
|
||||
|
||||
TYPE ?= Naive
|
||||
LABEL ?= naive
|
||||
PARAM ?= 0
|
||||
NUM_PES ?= 2
|
||||
|
||||
run_sort_shmem: ${BUILD}/sort_shmem
|
||||
printf "${TYPE} ";\
|
||||
echo "" > ${RESULTS}/sort_${LABEL}_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
${RO_FLAGS} timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_shmem ${PARAM} >> ${RESULTS}/sort_${LABEL}_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort_rccl: ${BUILD}/sort_rccl
|
||||
printf "RCCL "; \
|
||||
echo "" > ${RESULTS}/sort_rccl_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_rccl >> ${RESULTS}/sort_rccl_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort_mpi: ${BUILD}/sort_rccl
|
||||
printf "MPI2 "; \
|
||||
echo "" > ${RESULTS}/sort_mpi2_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_mpi >> ${RESULTS}/sort_mpi2_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort: ${BUILD}/sort_shmem ${BUILD}/sort_rccl
|
||||
for i in ${PES}; do \
|
||||
printf "%d " $$i; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=NAIVE LABEL=naive PARAM=0 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=MPI LABEL=mpi PARAM=1 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN LABEL=gcen PARAM=2 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN2 LABEL=gcen2 PARAM=3 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_mpi NUM_PES=$${i}; \
|
||||
printf "\n";\
|
||||
done
|
||||
for i in ${PES_RCCL}; do \
|
||||
$(MAKE) --no-print-directory run_sort_rccl NUM_PES=$${i}; \
|
||||
printf "%d " $$i; \
|
||||
done
|
||||
|
||||
$(MAKE) extract_sort
|
||||
|
||||
|
||||
extract_sort:
|
||||
printf "Sort latency\n"
|
||||
printf "PROCS\tType\tRuns"
|
||||
for i in ${PES}; do \
|
||||
for type in mpi mpi2 rccl naive gcen gcen2; do\
|
||||
printf "\n%d\t$${type}\t" $$i; \
|
||||
file=${RESULTS}/sort_$${type}_$${i}.out;\
|
||||
latency=$$(grep -E "Avg time" $${file}); \
|
||||
grep -E "Avg time" $${file} | while read -r j; do\
|
||||
val=$$(echo $$j | grep -oE -m1 "[0-9]+\.[0-9]+");\
|
||||
printf "%s\t" $${val};\
|
||||
done; \
|
||||
done;\
|
||||
done
|
||||
printf "\n"
|
||||
|
||||
clean:
|
||||
rm build/*;
|
||||
@@ -1,70 +0,0 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <unistd.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
using namespace std;
|
||||
|
||||
#define TIME_NOW std::chrono::steady_clock::now()
|
||||
#define TIME_DIFF(a, b) std::chrono::duration_cast<std::chrono::nanoseconds>(a - b).count()
|
||||
|
||||
#define HIPCHECK(cmd) do { \
|
||||
hipError_t e = cmd; \
|
||||
if( e != hipSuccess ) { \
|
||||
printf("Failed: Hip error %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,hipGetErrorString(e)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define NCCLCHECK(cmd) do { \
|
||||
ncclResult_t r = cmd; \
|
||||
if (r!= ncclSuccess) { \
|
||||
printf("Failed, NCCL error %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,ncclGetErrorString(r)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// Copied from rccl-tests, used to hash hostname
|
||||
static uint64_t getHash(const char* string, size_t n) {
|
||||
// Based on DJB2a, result = result * 33 ^ char
|
||||
uint64_t result = 5381;
|
||||
for (size_t c = 0; c < n; c++) {
|
||||
result = ((result << 5) + result) ^ string[c];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Generate a hash of the unique identifying string for this host
|
||||
* that will be unique for both bare-metal and container instances
|
||||
* Equivalent of a hash of;
|
||||
*
|
||||
* $(hostname)$(cat /proc/sys/kernel/random/boot_id)
|
||||
*
|
||||
*/
|
||||
#define HOSTID_FILE "/proc/sys/kernel/random/boot_id"
|
||||
static uint64_t getHostHash(const char* hostname) {
|
||||
char hostHash[1024];
|
||||
|
||||
// Fall back is the hostname if something fails
|
||||
(void) strncpy(hostHash, hostname, sizeof(hostHash));
|
||||
int offset = strlen(hostHash);
|
||||
|
||||
FILE *file = fopen(HOSTID_FILE, "r");
|
||||
if (file != NULL) {
|
||||
char *p;
|
||||
if (fscanf(file, "%ms", &p) == 1) {
|
||||
strncpy(hostHash+offset, p, sizeof(hostHash)-offset-1);
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
// Make sure the string is terminated
|
||||
hostHash[sizeof(hostHash)-1]='\0';
|
||||
|
||||
return getHash(hostHash, strlen(hostHash));
|
||||
}
|
||||
@@ -1,231 +0,0 @@
|
||||
/*************************************************************************
|
||||
* *
|
||||
* N A S P A R A L L E L B E N C H M A R K S 3.3 *
|
||||
* *
|
||||
* I S *
|
||||
* *
|
||||
*************************************************************************
|
||||
* *
|
||||
* This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
|
||||
* It is described in NAS Technical Report 95-020. *
|
||||
* *
|
||||
* Permission to use, copy, distribute and modify this software *
|
||||
* for any purpose with or without fee is hereby granted. We *
|
||||
* request, however, that all derived work reference the NAS *
|
||||
* Parallel Benchmarks 3.3. This software is provided "as is" *
|
||||
* without express or implied warranty. *
|
||||
* *
|
||||
* Information on NPB 3.3, including the technical report, the *
|
||||
* original specifications, source code, results and information *
|
||||
* on how to submit new results, is available at: *
|
||||
* *
|
||||
* http://www.nas.nasa.gov/Software/NPB *
|
||||
* *
|
||||
* Send comments or suggestions to npb@nas.nasa.gov *
|
||||
* Send bug reports to npb-bugs@nas.nasa.gov *
|
||||
* *
|
||||
* NAS Parallel Benchmarks Group *
|
||||
* NASA Ames Research Center *
|
||||
* Mail Stop: T27A-1 *
|
||||
* Moffett Field, CA 94035-1000 *
|
||||
* *
|
||||
* E-mail: npb@nas.nasa.gov *
|
||||
* Fax: (650) 604-3957 *
|
||||
* *
|
||||
*************************************************************************
|
||||
* *
|
||||
* Author: M. Yarrow *
|
||||
* H. Jin *
|
||||
* *
|
||||
*************************************************************************/
|
||||
|
||||
#define NUM_WGS 1
|
||||
#define WG_SIZE 1024
|
||||
#define MAX_PES 128
|
||||
|
||||
#define MAX_KEY (1 << 11)
|
||||
|
||||
/*
|
||||
* FUNCTION RANDLC (X, A)
|
||||
*
|
||||
* This routine returns a uniform pseudorandom double precision number in the
|
||||
* range (0, 1) by using the linear congruential generator
|
||||
*
|
||||
* x_{k+1} = a x_k (mod 2^46)
|
||||
*
|
||||
* where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
* before repeating. The argument A is the same as 'a' in the above formula,
|
||||
* and X is the same as x_0. A and X must be odd double precision integers
|
||||
* in the range (1, 2^46). The returned value RANDLC is normalized to be
|
||||
* between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
|
||||
* the new seed x_1, so that subsequent calls to RANDLC using the same
|
||||
* arguments will generate a continuous sequence.
|
||||
*
|
||||
* This routine should produce the same results on any computer with at least
|
||||
* 48 mantissa bits in double precision floating point data. On Cray systems,
|
||||
* double precision should be disabled.
|
||||
*
|
||||
* David H. Bailey October 26, 1990
|
||||
*
|
||||
* IMPLICIT DOUBLE PRECISION (A-H, O-Z)
|
||||
* SAVE KS, R23, R46, T23, T46
|
||||
* DATA KS/0/
|
||||
*
|
||||
* If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46,
|
||||
* T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than
|
||||
* by merely using the ** operator, in order to insure that the results are
|
||||
* exact on all systems. This code assumes that 0.5D0 is represented exactly.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************* R A N D L C ************/
|
||||
/************* ************/
|
||||
/************* portable random number generator ************/
|
||||
/*****************************************************************/
|
||||
|
||||
double randlc( double *X, double *A )
|
||||
{
|
||||
static int KS=0;
|
||||
static double R23, R46, T23, T46;
|
||||
double T1, T2, T3, T4;
|
||||
double A1;
|
||||
double A2;
|
||||
double X1;
|
||||
double X2;
|
||||
double Z;
|
||||
int i, j;
|
||||
|
||||
if (KS == 0)
|
||||
{
|
||||
R23 = 1.0;
|
||||
R46 = 1.0;
|
||||
T23 = 1.0;
|
||||
T46 = 1.0;
|
||||
|
||||
for (i=1; i<=23; i++)
|
||||
{
|
||||
R23 = 0.50 * R23;
|
||||
T23 = 2.0 * T23;
|
||||
}
|
||||
for (i=1; i<=46; i++)
|
||||
{
|
||||
R46 = 0.50 * R46;
|
||||
T46 = 2.0 * T46;
|
||||
}
|
||||
KS = 1;
|
||||
}
|
||||
|
||||
/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */
|
||||
|
||||
T1 = R23 * *A;
|
||||
j = T1;
|
||||
A1 = j;
|
||||
A2 = *A - T23 * A1;
|
||||
|
||||
/* Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
X = 2^23 * Z + A2 * X2 (mod 2^46). */
|
||||
|
||||
T1 = R23 * *X;
|
||||
j = T1;
|
||||
X1 = j;
|
||||
X2 = *X - T23 * X1;
|
||||
T1 = A1 * X2 + A2 * X1;
|
||||
|
||||
j = R23 * T1;
|
||||
T2 = j;
|
||||
Z = T1 - T23 * T2;
|
||||
T3 = T23 * Z + A2 * X2;
|
||||
j = R46 * T3;
|
||||
T4 = j;
|
||||
*X = T3 - T46 * T4;
|
||||
return(R46 * *X);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************ F I N D _ M Y _ S E E D ************/
|
||||
/************ ************/
|
||||
/************ returns parallel random number seq seed ************/
|
||||
/*****************************************************************/
|
||||
|
||||
/*
|
||||
* Create a random number sequence of total length nn residing
|
||||
* on np number of processors. Each processor will therefore have a
|
||||
* subsequence of length nn/np. This routine returns that random
|
||||
* number which is the first random number for the subsequence belonging
|
||||
* to processor rank kn, and which is used as seed for proc kn ran # gen.
|
||||
*/
|
||||
|
||||
double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */
|
||||
int np, /* np = num procs */
|
||||
long nn, /* total num of ran numbers, all procs */
|
||||
double s, /* Ran num seed, for ex.: 314159265.00 */
|
||||
double a ) /* Ran num gen mult, try 1220703125.00 */
|
||||
{
|
||||
|
||||
long i;
|
||||
|
||||
double t1,t2,t3,an;
|
||||
long mq,nq,kk,ik;
|
||||
|
||||
|
||||
|
||||
nq = nn / np;
|
||||
|
||||
for( mq=0; nq>1; mq++,nq/=2 )
|
||||
;
|
||||
|
||||
t1 = a;
|
||||
|
||||
for( i=1; i<=mq; i++ )
|
||||
t2 = randlc( &t1, &t1 );
|
||||
|
||||
an = t1;
|
||||
|
||||
kk = kn;
|
||||
t1 = s;
|
||||
t2 = an;
|
||||
|
||||
for( i=1; i<=100; i++ )
|
||||
{
|
||||
ik = kk / 2;
|
||||
if( 2 * ik != kk )
|
||||
t3 = randlc( &t1, &t2 );
|
||||
if( ik == 0 )
|
||||
break;
|
||||
t3 = randlc( &t2, &t2 );
|
||||
kk = ik;
|
||||
}
|
||||
|
||||
return( t1 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************* C R E A T E _ S E Q ************/
|
||||
/*****************************************************************/
|
||||
|
||||
void create_seq( double seed, double a, int *key_array, int size )
|
||||
{
|
||||
double x;
|
||||
int i, k;
|
||||
|
||||
k = MAX_KEY/4;
|
||||
|
||||
for (i=0; i < size; i++)
|
||||
{
|
||||
x = randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
|
||||
key_array[i] = k*x;
|
||||
}
|
||||
}
|
||||
@@ -1,380 +0,0 @@
|
||||
#include "mpi.h"
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = rocshmem_timer() - time_start;\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__global__ void sort1(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
TIME(3)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
__global__ void sort2(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int max_iters) {
|
||||
int nProcs, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
|
||||
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1,
|
||||
MPI_INT, MPI_COMM_WORLD);
|
||||
MPI_Alltoall(sendOffset, 1, MPI_INT, recvOffset, 1,
|
||||
MPI_INT, MPI_COMM_WORLD);
|
||||
int total_size = 0;
|
||||
MPI_Request *req = new MPI_Request[2 * nProcs];
|
||||
const int TAG = 10000;
|
||||
for(int i = 0; i < nProcs; ++i) {
|
||||
MPI_Isend(&keyBuffer1[sendOffset[i]], sendCount[i],
|
||||
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i]);
|
||||
MPI_Irecv(&keyBuffer2[total_size], recvCount[i],
|
||||
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i + 1]);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
MPI_Waitall(2 * nProcs, req, MPI_STATUS_IGNORE);
|
||||
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
}
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU()
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int nProcs, proc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
void *rocshmem_malloc(size_t size)
|
||||
{
|
||||
void *v;
|
||||
hipMalloc((void **)&v, size);
|
||||
return v;
|
||||
}
|
||||
|
||||
int rocshmem_free(void *v)
|
||||
{
|
||||
return hipFree(v);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc < 1) {
|
||||
printf("Format: %s [iterations]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Init stuff
|
||||
MPI_Init(&argc, &argv);
|
||||
initGPU();
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[1]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)rocshmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)rocshmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
|
||||
sendCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
printf("Begin untimed run\n");
|
||||
// Untimed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, 1);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
printf("Verify untimed run\n");
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Begin timed run\n");
|
||||
// Timed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
auto time_start = TIME_NOW;
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, iterations);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Done verify for %d\n", my_pe);
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
rocshmem_free(keyBuffer1);
|
||||
rocshmem_free(keyBuffer2);
|
||||
rocshmem_free(sendCount);
|
||||
rocshmem_free(recvCount);
|
||||
rocshmem_free(sendOffset);
|
||||
rocshmem_free(recvOffset);
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,394 +0,0 @@
|
||||
#include "rccl.h"
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = rocshmem_timer() - time_start;\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__global__ void sort1(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
TIME(3)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
__global__ void sort2(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int max_iters, ncclComm_t comm) {
|
||||
int nProcs, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
|
||||
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
NCCLCHECK(ncclAllToAll(sendCount, recvCount, 1,
|
||||
ncclInt, comm, stream));
|
||||
NCCLCHECK(ncclAllToAll(sendOffset, recvOffset, 1,
|
||||
ncclInt, comm, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
NCCLCHECK(ncclGroupStart());
|
||||
int total_size = 0;
|
||||
for(int i = 0; i < nProcs; ++i) {
|
||||
ncclSend(&keyBuffer1[sendOffset[i]], sendCount[i],
|
||||
ncclInt, i, comm, stream);
|
||||
ncclRecv(&keyBuffer2[total_size], recvCount[i],
|
||||
ncclInt, i, comm, stream);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
NCCLCHECK(ncclGroupEnd());
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU(ncclComm_t &comms)
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int nProcs, proc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
ncclUniqueId ncclId;
|
||||
if (proc == 0) {
|
||||
NCCLCHECK(ncclGetUniqueId(&ncclId));
|
||||
}
|
||||
MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
#ifdef RCCL_MULTIRANKPERGPU
|
||||
NCCLCHECK(ncclCommInitRankMulti(&comms, nProcs, ncclId, proc, proc));
|
||||
#else
|
||||
NCCLCHECK(ncclCommInitRank(&comms, nProcs, ncclId, proc));
|
||||
#endif
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
void *rocshmem_malloc(size_t size)
|
||||
{
|
||||
void *v;
|
||||
hipMalloc((void **)&v, size);
|
||||
return v;
|
||||
}
|
||||
|
||||
int rocshmem_free(void *v)
|
||||
{
|
||||
return hipFree(v);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc < 1) {
|
||||
printf("Format: %s [iterations]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Init stuff
|
||||
MPI_Init(&argc, &argv);
|
||||
ncclComm_t comms;
|
||||
initGPU(comms);
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[1]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)rocshmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)rocshmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
|
||||
sendCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
printf("Begin untimed run\n");
|
||||
// Untimed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, 1, comms);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
printf("Verify untimed run\n");
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Begin timed run\n");
|
||||
// Timed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
auto time_start = TIME_NOW;
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, iterations, comms);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
rocshmem_free(keyBuffer1);
|
||||
rocshmem_free(keyBuffer2);
|
||||
rocshmem_free(sendCount);
|
||||
rocshmem_free(recvCount);
|
||||
rocshmem_free(sendOffset);
|
||||
rocshmem_free(recvOffset);
|
||||
ncclCommDestroy(comms);
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,358 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <rocshmem/rocshmem.hpp>
|
||||
#include <unistd.h>
|
||||
using namespace std;
|
||||
using namespace rocshmem;
|
||||
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = rocshmem_timer() - time_start;\
|
||||
time_start = rocshmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__device__ __inline__ void alltoall(rocshmem_ctx_t &ctx,
|
||||
rocshmem_team_t team,
|
||||
int *dst, int *src) {
|
||||
// Perform alltoall
|
||||
rocshmem_ctx_int_wg_alltoall(ctx,
|
||||
team,
|
||||
dst, // T* dest
|
||||
src, // const T* source
|
||||
1); // int nelement
|
||||
}
|
||||
|
||||
__global__ void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, rocshmem_team_t team,
|
||||
int max_iters) {
|
||||
__shared__ rocshmem_ctx_t ctx;
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
rocshmem_wg_init();
|
||||
rocshmem_wg_ctx_create(ROCSHMEM_CTX_WG_PRIVATE, &ctx);
|
||||
|
||||
int n_pes = rocshmem_ctx_n_pes(ctx);
|
||||
int my_pe = rocshmem_my_pe();
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
rocshmem_ctx_threadfence_system(ctx);
|
||||
// Force sync to wait for all PEs to update bucket sizes
|
||||
rocshmem_ctx_wg_team_sync(ctx, team);
|
||||
TIME(3)
|
||||
// Let all PEs know how many keys you wish to send
|
||||
alltoall(ctx, team, recvCount, sendCount);
|
||||
// Let all PEs know where the offsets are of the keys
|
||||
alltoall(ctx, team, recvOffset, sendOffset);
|
||||
__syncthreads();
|
||||
TIME(4)
|
||||
if(threadIdx.x == 0) {
|
||||
total_size = 0;
|
||||
for(int i = 0; i < buckets; ++i) {
|
||||
rocshmem_int_get_nbi(&keyBuffer2[total_size],
|
||||
&keyBuffer1[recvOffset[i]], recvCount[i], i);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
rocshmem_quiet();
|
||||
}
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
}
|
||||
OUTPUT_TIME()
|
||||
rocshmem_wg_ctx_destroy(ctx);
|
||||
rocshmem_wg_finalize();
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes = rocshmem_n_pes();
|
||||
int my_pe = rocshmem_my_pe();
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU()
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int proc = rocshmem_my_pe();
|
||||
int nProcs = rocshmem_n_pes();
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// Init rocshmem stuff
|
||||
initGPU();
|
||||
rocshmem_init(NUM_WGS);
|
||||
int n_pes = rocshmem_team_n_pes(ROCSHMEM_TEAM_WORLD);
|
||||
rocshmem_team_t team_world_dup = ROCSHMEM_TEAM_INVALID;
|
||||
rocshmem_team_split_strided(ROCSHMEM_TEAM_WORLD,
|
||||
0,
|
||||
1,
|
||||
n_pes,
|
||||
nullptr,
|
||||
0,
|
||||
&team_world_dup);
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes = rocshmem_n_pes();
|
||||
int my_pe = rocshmem_my_pe();
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[2]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)rocshmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)rocshmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount, *recvCount, *sendOffset, *recvOffset;
|
||||
sendCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)rocshmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
// Untimed run
|
||||
rocshmem_barrier_all();
|
||||
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, team_world_dup, 1);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Timed run
|
||||
rocshmem_barrier_all();
|
||||
auto time_start = TIME_NOW;
|
||||
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, team_world_dup, iterations);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%f\tus\n", all_time /
|
||||
(double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
rocshmem_free(keyBuffer1);
|
||||
rocshmem_free(keyBuffer2);
|
||||
rocshmem_free(sendCount);
|
||||
rocshmem_free(recvCount);
|
||||
rocshmem_free(sendOffset);
|
||||
rocshmem_free(recvOffset);
|
||||
rocshmem_finalize();
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user