Transfer files from RAD repository

[ROCm/rocshmem commit: ea8f264a11]
This commit is contained in:
Brandon Potter
2024-07-01 09:57:08 -05:00
والد a78cfbd283
کامیت ad4ab69c19
382فایلهای تغییر یافته به همراه67034 افزوده شده و 1 حذف شده
+13
مشاهده پرونده
@@ -0,0 +1,13 @@
# This is the list of ROCSHMEM's significant contributors.
#
# This does not necessarily list everyone who has contributed code,
# especially since many employees of one corporation may be contributing.
# To see the full list of contributors, see the revision history in
# source control.
Khaled Hamidouche
Brandon Potter
Michael LeBeane
Rohit Zambre
Kishore Punniyamurthy
Ruchi Shah
Muhammad A. Awad
+12
مشاهده پرونده
@@ -0,0 +1,12 @@
Version 1.6.0 (20 June 2023)
* Support for Frontier and Crusher systems
* Multithread support with Reverse Offload backend
* Support for all AMO operations and types with Reverse Offload backend
* Support for private context on global memory
* Reduce LDS usage to improve occupancy
* Code refactoring to reduce register file usage
* Fix and enhance default context support
* Fix and enhance IPC support
* Fix race condition with multi-thread support
* Add fine-grain locking schemes
@@ -0,0 +1,432 @@
###############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
###############################################################################
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
###############################################################################
# AVOID IN SOURCE BUILD
###############################################################################
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL
CMAKE_SOURCE_DIR)
set(MSG "")
message(STATUS "Warning! Building from the source directory is not recommended")
message(STATUS "If unintended, please remove 'CMakeCache.txt' and 'CMakeFiles'")
message(STATUS "and build from a separate directory")
message(FATAL_ERROR "In-source build")
endif()
###############################################################################
# SYSTEM INFO
###############################################################################
cmake_host_system_information(RESULT LOCALHOST QUERY FQDN)
message(STATUS "Hostname: ${LOCALHOST}")
###############################################################################
# VERSIONING INFO DERIVED FROM FILE
###############################################################################
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" FULL_VERSION_STRING LIMIT_COUNT 1)
string(REGEX REPLACE "(\n|\r)" "" FULL_VERSION_STRING "${FULL_VERSION_STRING}")
set(ROCSHMEM_FULL_VERSION "${FULL_VERSION_STRING}")
string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3" ROCSHMEM_VERSION
"${FULL_VERSION_STRING}")
###############################################################################
# CONFIGURATION OPTIONS
###############################################################################
option(DEBUG "Enable debug trace" OFF)
option(PROFILE "Enable statistics and timing support" OFF)
option(USE_GPU_IB "Enable GPU_IB conduit. If off, RO_NET will be used" ON)
option(USE_DC "Enable IB dynamically connected transport (DC)" OFF)
option(USE_IPC "Enable IPC support (using HIP)" OFF)
option(USE_THREADS "Enable workgroup threads to share network queues" OFF)
option(USE_WF_COAL "Enable wavefront message coalescing" OFF)
option(USE_COHERENT_HEAP "Enable support for coherent systems" OFF)
option(USE_CACHED_HEAP "Enable support for cached systems" OFF)
option(USE_MANAGED_HEAP "Enable managed memory" OFF)
option(USE_HOST_HEAP "Enable host memory using malloc/free" OFF)
option(USE_HIP_HOST_HEAP "Enable host memory using hip api" OFF)
option(USE_FUNC_CALL "Force compiler to use function calls on library API" OFF)
option(USE_SHARED_CTX "Request support for shared ctx between WG" OFF)
option(USE_SINGLE_NODE "Enable single node support only." OFF)
option(USE_HOST_SIDE_HDP_FLUSH "Use a polling thread to flush the HDP cache on the host." OFF)
option(BUILD_FUNCTIONAL_TESTS "Build the functional tests" ON)
option(BUILD_SOS_TESTS "Build the host-facing tests" OFF)
option(BUILD_UNIT_TESTS "Build the unit tests" ON)
set(ROCM_PATH "" CACHE PATH "ROCm path to use")
configure_file(cmake/config.h.in config.h)
###############################################################################
# Validate user passed options
###############################################################################
if(ROCM_PATH)
set(HIPCONFIG_PATH "${ROCM_PATH}/bin/hipconfig")
execute_process(
COMMAND "${HIPCONFIG_PATH}" --version
RESULT_VARIABLE HIPCONFIG_RETURN
OUTPUT_VARIABLE ROCM_VERSION
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(HIPCONFIG_RETURN EQUAL 0)
message(STATUS "Using ROCm with version: ${ROCM_VERSION}")
else()
message(FATAL_ERROR "ROCm at ${ROCM_PATH} is missing the hipconfig binary. "
"Use -DROCM_PATH=/path/to/rocm or ensure ROCm's hipconfig is in your path.")
endif()
else()
set(HIPCONFIG_PATH "hipconfig")
execute_process(
COMMAND "${HIPCONFIG_PATH}" --rocmpath
RESULT_VARIABLE HIPCONFIG_RETURN
OUTPUT_VARIABLE ROCM_PATH
ERROR_QUIET
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(HIPCONFIG_RETURN EQUAL 0)
message(STATUS "Found ROCm at: ${ROCM_PATH}")
else()
message(FATAL_ERROR "Ensure hipconfig is in your path or use -DROCM_PATH=/path/to/rocm.")
endif()
endif()
###############################################################################
# GLOBAL COMPILE FLAGS
###############################################################################
if (NOT DEFINED CMAKE_CXX_COMPILER)
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/hipcc)
endif()
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb")
###############################################################################
# MODULE SEARCH PATH
###############################################################################
set(
CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${CMAKE_SOURCE_DIR}/cmake/Modules"
"{ROCM_PATH}/hip/cmake"
"{ROCM_PATH}/rocclr/lib/cmake/rocclr"
)
###############################################################################
# PROJECT
###############################################################################
project(
rocshmem
VERSION ${ROCSHMEM_VERSION}
LANGUAGES CXX
DESCRIPTION "ROCSHMEM"
HOMEPAGE_URL "https://github.com/ROCm-Developer-Tools/ROC_SHMEM")
###############################################################################
# DEFAULT BUILD TYPE
###############################################################################
if(NOT CMAKE_BUILD_TYPE)
set(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
FORCE
)
message(STATUS "CMAKE_BUILD_TYPE unspecified: using ${CMAKE_BUILD_TYPE}")
endif()
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX
"/opt/lib/rocshmem"
CACHE PATH "default install path" FORCE)
endif()
message(STATUS "Installation path: ${CMAKE_INSTALL_PREFIX}")
###############################################################################
# CREATE ROCSHMEM LIBRARY
###############################################################################
add_library(
${PROJECT_NAME}
STATIC
""
)
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
###############################################################################
# INCLUDE DIRECTORIES
###############################################################################
target_include_directories(
${PROJECT_NAME}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> # CONFIG.H
$<INSTALL_INTERFACE:include>
)
###############################################################################
# SPECIFY PUBLIC HEADER FILES
###############################################################################
set_target_properties(
${PROJECT_NAME}
PROPERTIES
PUBLIC_HEADER
"${CMAKE_BINARY_DIR}/config.h;${CMAKE_CURRENT_SOURCE_DIR}/include/roc_shmem/roc_shmem.hpp;${CMAKE_CURRENT_SOURCE_DIR}/include/roc_shmem/debug.hpp"
)
###############################################################################
# SUBDIRECTORY TARGETS
###############################################################################
add_subdirectory(src)
add_subdirectory(docs)
add_subdirectory(tests)
###############################################################################
# HIP
###############################################################################
find_package(hip REQUIRED)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
hip::device
hip::host
)
###############################################################################
# HSA-RUNTIME64
###############################################################################
find_package(hsa-runtime64 REQUIRED)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
hsa-runtime64::hsa-runtime64
)
###############################################################################
# PTHREADS
###############################################################################
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
find_package(Threads REQUIRED)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
Threads::Threads
)
###############################################################################
# IBVERBS
###############################################################################
IF (USE_GPU_IB)
find_package(Ibverbs REQUIRED)
target_include_directories(
${PROJECT_NAME}
PUBLIC
${IBVERBS_INCLUDE_DIRS}
)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
${IBVERBS_LIBRARIES}
)
ENDIF()
###############################################################################
# MPI
###############################################################################
find_package(MPI REQUIRED)
target_include_directories(
${PROJECT_NAME}
PUBLIC
${MPI_CXX_HEADER_DIR}
)
target_link_libraries(
${PROJECT_NAME}
PUBLIC
${MPI_mpi_LIBRARY}
${MPI_mpicxx_LIBRARY}
)
###############################################################################
# MODULEFILE
###############################################################################
set(MOD_INSTALL_PATH
"${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles"
CACHE STRING "Install path for modulefile")
message(STATUS "Modulefile install path: ${MOD_INSTALL_PATH}")
set(moduleFileTemplate "${PROJECT_NAME}.lua.in")
configure_file(
${PROJECT_SOURCE_DIR}/cmake/${moduleFileTemplate}
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
@ONLY)
# Crusher
if(LOCALHOST MATCHES ".*\.crusher\.olcf\.ornl\.gov")
list(APPEND CMAKE_MESSAGE_INDENT " ")
message(STATUS "Using crusher-specific modulefile modification")
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.crusher.mod mod_additions)
file(
APPEND
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
${mod_additions})
list(POP_BACK CMAKE_MESSAGE_INDENT)
endif()
# Frontier
if(LOCALHOST MATCHES ".*\.frontier\.olcf\.ornl\.gov")
list(APPEND CMAKE_MESSAGE_INDENT " ")
message(STATUS "Using frontier-specific modulefile modification")
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.frontier.mod mod_additions)
file(
APPEND
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
${mod_additions})
list(POP_BACK CMAKE_MESSAGE_INDENT)
endif()
# hpcfund
if(LOCALHOST MATCHES ".*\.hpcfund")
list(APPEND CMAKE_MESSAGE_INDENT " ")
message(STATUS "Using hpcfund-specific modulefile modification")
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.hpcfund.mod mod_additions)
file(
APPEND
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
${mod_additions})
list(POP_BACK CMAKE_MESSAGE_INDENT)
endif()
###############################################################################
# INSTALL
###############################################################################
include(GNUInstallDirs)
# Specify layout within the build directory
set(
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}
)
set(
CMAKE_LIBRARY_OUTPUT_DIRECTORY
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}
)
set(
CMAKE_RUNTIME_OUTPUT_DIRECTORY
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}
)
# Offer alternatives for default installation subdirectories
set(
INSTALL_LIBDIR
${CMAKE_INSTALL_LIBDIR} CACHE PATH
"Installation directory for libraries"
)
set(
INSTALL_BINDIR
${CMAKE_INSTALL_BINDIR} CACHE PATH
"Installation directory for executables"
)
set(
INSTALL_INCLUDEDIR
${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH
"Installation directory for header files"
)
set(
DEF_INSTALL_CMAKEDIR
share/cmake/${PROJECT_NAME}
)
set(
INSTALL_CMAKEDIR
${DEF_INSTALL_CMAKEDIR} CACHE PATH
"Installation directory for CMake files"
)
install(
TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Targets
ARCHIVE
DESTINATION ${INSTALL_LIBDIR}
COMPONENT lib
LIBRARY
DESTINATION ${INSTALL_LIBDIR}
COMPONENT lib
PUBLIC_HEADER
DESTINATION ${INSTALL_INCLUDEDIR}
COMPONENT dev
RUNTIME
DESTINATION ${INSTALL_BINDIR}
COMPONENT bin
)
install(
EXPORT
${PROJECT_NAME}Targets
FILE
${PROJECT_NAME}Targets.cmake
NAMESPACE
${PROJECT_NAME}::
DESTINATION
${INSTALL_CMAKEDIR}
COMPONENT
dev
)
install(
FILES
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
DESTINATION
${INSTALL_CMAKEDIR}
)
install(
FILES
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
DESTINATION ${MOD_INSTALL_PATH}/${PROJECT_NAME})
###############################################################################
# PACKAGE
###############################################################################
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
VERSION
${PROJECT_VERSION}
COMPATIBILITY
SameMajorVersion
)
configure_package_config_file(
${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
INSTALL_DESTINATION
${INSTALL_CMAKEDIR}
)
@@ -0,0 +1,59 @@
## How to fork from us
To keep our development fast and conflict free, we recommend you to [fork](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/fork) our repository and start your work from our `dev` branch in your private repository.
Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote.
```
git remote add mainline https://github.com/ROCm-Developer-Tools/ROC_SHMEM.git
git checkout dev
```
As always in git, start a new branch with
```
git checkout -b topic-<yourFeatureName>
```
and apply your changes there.
## How to contribute to ROCSHMEM
### Did you find a bug?
- Ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/issues).
- If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/issues/new).
### Did you write a patch that fixes a bug?
- Open a new GitHub [pull request](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/compare) with the patch.
- Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it.
- Ensure the PR is based on the `dev` branch of the ROCSHMEM GitHub repository.
- ROCSHMEM requires new commits to include a "Signed-off-by" token in the commit message (typically enabled via the `git commit -s` option), indicating your agreement to the projects's [Developer's Certificate of Origin](https://developercertificate.org/) and compatability with the project [LICENSE](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/blob/main/LICENSE):
> (a) The contribution was created in whole or in part by me and I
> have the right to submit it under the open source license
> indicated in the file; or
>
> (b) The contribution is based upon previous work that, to the best
> of my knowledge, is covered under an appropriate open source
> license and I have the right under that license to submit that
> work with modifications, whether created in whole or in part
> by me, under the same open source license (unless I am
> permitted to submit under a different license), as indicated
> in the file; or
>
> (c) The contribution was provided directly to me by some other
> person who certified (a), (b) or (c) and I have not modified
> it.
>
> (d) I understand and agree that this project and the contribution
> are public and that a record of the contribution (including all
> personal information I submit with it, including my sign-off) is
> maintained indefinitely and may be redistributed consistent with
> this project or the open source license(s) involved.
@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
+194 -1
مشاهده پرونده
@@ -1 +1,194 @@
# rocSHMEM-internal
# ROCm OpenSHMEM (ROC_SHMEM)
The ROCm OpenSHMEM (ROC_SHMEM) runtime is part of an AMD Research
initiative to provide a unified runtime for heterogeneous systems.
ROC_SHMEM supports both host-centric (a traditional host-driven
OpenSHMEM runtime) and GPU-centric networking (provided a GPU kernel
the ability to perform network operations) through an
OpenSHMEM-like interface. This intra-kernel networking simplifies application
code complexity and enables more fine-grained communication/computation
overlap than traditional host-driven networking.
ROC_SHMEM's primary target is heterogeneous computing; hence, for both
CPU-centric and GPU-centric communications, ROC_SHMEM uses a single
symmetric heap (SHEAP) that is allocated on GPU memories.
ROC_SHMEM's GPU-centric communication has two different backend designs.
The backends primarily differ in their implementations of
intra-kernel networking.
The first design will be referred to as the the GPU InfiniBand (GPU-IB)
backend. This backend implements a lightweight InfiniBand verbs interface
on the GPU. The GPU itself is responsible with building commands and ringing
the doorbell on the NIC to send network commands. GPU-IB is the default and
preferred backend design that offers the best performance.
The second design will be referred to as the Reverse Offload (RO) backend. With
the RO backend, the GPU runtime forwards ROC_SHMEM networking operations to the
host-side runtime, which calls into a traditional MPI or OpenSHMEM
implementation. This forwarding of requests is transparent to the
programmer, who only sees the GPU-side interface.
Both designs of the GPU-centric interface coexist seamlessly with the
CPU-centric interface of the unified runtime. ROC_SHMEM ensures that CPU-centric
updates to the SHEAP are consistent and visible to a GPU kernel that is executing
in parallel to host-initiated communication.
## Limitations
ROC_SHMEM is an experimental prototype from AMD Research and not an official
ROCm product. The software is provided as-is with no guarantees of support
from AMD or AMD Research.
ROC_SHMEM base requirements:
* ROCm version 4.3.1 onwards
* May work with other versions, but not tested
* AMD GFX9 GPUs (e.g.: MI25, Vega 56, Vega 64, MI50, MI60, MI100, Radeon VII)
* AMD MI200 GPUs: To enable the support on MI200, please configure the library
with USE_CACHED_HEAP
* ROCm-aware MPI as described in
[Building the Dependencies](#building-the-dependencies)
* InfiniBand adaptor compatable with ROCm RDMA technology
* UCX 1.6 or greater with ROCm support
ROC_SHMEM optional requirements
* For Documentation:
* Doxygen
ROC_SHMEM only supports HIP applications. There are no plans to port to
OpenCL.
## Building and Installation
ROC_SHMEM uses the CMake build system. The CMakeLists file contains
additional details about library options.
To create an out-of-source build:
mkdir build
cd build
Next, choose one configuration from the build_configs subdirectory. These
scripts pass configuration options to CMake to setup canonical builds which
are regularly tested:
../scripts/build_configs/dc_single
../scripts/build_configs/dc_multi
../scripts/build_configs/rc_single
../scripts/build_configs/rc_multi
../scripts/build_configs/rc_multi_wf_coal
../scripts/build_configs/ro_net_basic
By default, the library is installed in `~/rocshmem`. You may provide a
custom install path by supplying it as an argument. For example:
../scripts/build_configs/rc_single /path/to/install
## Compiling/linking and Running with ROC_SHMEM
ROC_SHMEM is built as a host and device side library that can be statically
linked to your application during compilation using hipcc.
During the compilation of your application, include the ROC_SHMEM header files
and the ROC_SHMEM library when using hipcc:
-I/path/to/rocshmem/install/include
-L/path/to/rocshmem/install/lib -lrocshmem
NOTE: ROC_SHMEM depends on MPI for its host code. So, you will need to link
to an MPI library. Since you must use the hipcc compiler, the arguments for
MPI linkage must be added manually as opposed to using mpicc. Similary,
ROC_SHMEM depends on Verbs for its device code. So, you will need to link
to a Verbs library.
When using hipcc directly (as opposed to through a build system), we
recommend performing the compilation and linking steps separately.
Here are the steps to build a standalone program, say
roc_shmem_hello.cpp.
```
# Compile
/opt/rocm/bin/hipcc ./roc_shmem_hello.cpp -I/path/to/rocshmem/install/include -fgpu-rdc -o ./roc_shmem_hello.o -c
# Link
/opt/rocm/bin/hipcc ./roc_shmem_hello.o /path/to/rocshmem/install/lib/librocshmem.a -lmpi -lmlx5 -libverbs -lhsa-runtime64 -fgpu-rdc -o roc_shmem_hello
```
If your project uses cmake, please refer to the CMakeLists.txt files
in the clients directory for examples. You may also find the
[Using CMake with AMD ROCm](https://rocmdocs.amd.com/en/latest/conceptual/cmake-packages.html)
page useful.
## Runtime Parameters
ROC_SHMEM_HEAP_SIZE (default : 1 GB)
Defines the size of the OpenSHMEM symmetric heap
Note the heap is on the GPU memory.
ROC_SHMEM_SQ_SIZE (default 1024)
Defines the size of the SQ as number of network
packet (WQE). Each WQE is 64B. This only for
GPU-IB conduit
ROC_SHMEM_USE_CQ_GPU_MEM (default : 1)
Set the placement of CQ on GPU memory (1)
or CPU memory (0)
ROC_SHMEM_USE_SQ_GPU_MEM (default : 1)
Set the placement of SQ on GPU memory (1)
or CPU memory (0)
RO_NET_CPU_QUEUE (default: not set)
Force producer/consumer queues between CPU and GPU to
be in CPU memory. RO backend only.
ROC_SHMEM also requires the following environment variable be set for ROCm:
export HSA_FORCE_FINE_GRAIN_PCIE=1
## Documentation
To generate doxygen documentation for ROC_SHMEM's API, run the following
from the library's build directory:
make docs
The doxygen output will be in the `docs` folder of the build directory.
## Examples
ROC_SHMEM is similar to OpenSHMEM and should be familiar to programmers who
have experience with OpenSHMEM or other PGAS network programming APIs in the
context of CPUs. The best way to learn how to use ROC_SHMEM is to read the
autogenerated doxygen documentation for functions described in
`roc_shmem/roc_shmem.hpp`, or to look at the provided sample applications in the
`tests/` folder. ROC_SHMEM is shipped with a basic test suite for the
supported ROC_SHMEM API. The examples test Puts, Gets, nonblocking Puts,
nonblocking Gets, Quiets, Atomics, Tests, Wai-untils, Broadcasts, and
Reductions.
To run the examples, you may use the driver scripts provided in respective
folders of device- or host-initiated communication examples. Simply
executing `./driver.sh` will show the help message on how to use the script.
Here are some example uses of the driver script:
./scripts/functional_tests/driver.sh ./build/rocshmem_example_driver single_thread ./build (for device-initiated communication)
./scripts/sos_tests/driver.sh ./build short (for host-initiated communication)
## Building the Dependencies
ROC_SHMEM requires an MPI runtime on the host that supports ROCm-Aware MPI.
Currently all ROCm-Aware MPI runtimes require the usage of ROCm-Aware UCX.
To build and configure ROCm-Aware UCX, you need to:
1. Download the latest UCX
2. Configure and build UCX with ROCm support: --with-rocm=/opt/rocm
Then, you need to build your MPI (OpenMPI or MPICH CH4) with UCX support.
For more information on OpenMPI-UCX support, please visit:
https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
For more information on MPICH-UCX support, please visit:
https://www.mpich.org/about/news/
@@ -0,0 +1 @@
1.6.0
@@ -0,0 +1,62 @@
###############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
###############################################################################
find_package(PkgConfig REQUIRED QUIET)
pkg_check_modules(PC_IBVERBS QUIET libibverbs)
find_path(
IBVERBS_INCLUDE_DIR infiniband/verbs.h
HINTS ${PC_IBVERBS_INCLUDEDIR} ${PC_IBVERBS_INCLUDE_DIRS}
PATH_SUFFIXES include
)
find_library(
IBVERBS_LIBRARY
NAMES ibverbs libibverbs
HINTS ${PC_IBVERBS_LIBDIR} ${PC_IBVERBS_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64
)
find_library(
MLX5_LIBRARY
NAMES mlx5 libmlx5
HINTS ${PC_IBVERBS_LIBDIR} ${PC_IBVERBS_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64
)
set(
IBVERBS_LIBRARIES
${IBVERBS_LIBRARY} ${MLX5_LIBRARY}
CACHE INTERNAL ""
)
set(
IBVERBS_INCLUDE_DIRS
${IBVERBS_INCLUDE_DIR}
CACHE INTERNAL ""
)
find_package_handle_standard_args(
Ibverbs DEFAULT_MSG IBVERBS_LIBRARY IBVERBS_INCLUDE_DIR
)
mark_as_advanced(IBVERBS_LIBRARY IBVERBS_INCLUDE_DIR)
@@ -0,0 +1 @@
@ROCSHMEM_GIT_REV@
@@ -0,0 +1,16 @@
#cmakedefine DEBUG
#cmakedefine PROFILE
#cmakedefine USE_GPU_IB
#cmakedefine USE_DC
#cmakedefine USE_IPC
#cmakedefine USE_THREADS
#cmakedefine USE_SHARED_CTX
#cmakedefine USE_WF_COAL
#cmakedefine USE_COHERENT_HEAP
#cmakedefine USE_CACHED_HEAP
#cmakedefine USE_MANAGED_HEAP
#cmakedefine USE_HOST_HEAP
#cmakedefine USE_HIP_HOST_HEAP
#cmakedefine USE_FUNC_CALL
#cmakedefine USE_SINGLE_NODE
#cmakedefine USE_HOST_SIDE_HDP_FLUSH
@@ -0,0 +1,4 @@
-- Crusher-specific additions
depends_on "craype-accel-amd-gfx90a"
depends_on "amd-mixed"
depends_on "cray-mpich"
@@ -0,0 +1,3 @@
-- Crusher-specific additions
depends_on "rocm"
prereq(atleast("rocm","5.3.0"))
@@ -0,0 +1,4 @@
-- hpcfund-specific additions
depends_on "rocm"
prereq(atleast("rocm","6.0.2"))
@@ -0,0 +1,33 @@
local help_message = [[
ROC_SHMEM is an open-source GPU initiated networking library
for High Performance Computing and Machine Learning workloads.
Version @ROCSHMEM_FULL_VERSION@
]]
help(help_message,"\n")
whatis("Name: rocshmem")
whatis("Version: @ROCSHMEM_FULL_VERSION@")
whatis("Keywords: GPU, PGAS, RMA, HPC")
whatis("Description: tool for GPU initiated networking")
whatis("URL: https://github.com/ROCm-Developer-Tools/ROC_SHMEM")
-- Export environmental variables
local topDir="@CMAKE_INSTALL_PREFIX@"
local binDir="@CMAKE_INSTALL_FULL_BINDIR@"
local shareDir="@CMAKE_INSTALL_FULL_DATADIR@"
local pythonDeps="@PYTHON_DEPS@"
setenv("ROCSHMEM_DIR",topDir)
setenv("ROCSHMEM_BIN",binDir)
setenv("ROCSHMEM_SHARE",shareDir)
setenv("MPICH_GPU_SUPPORT_ENABLED",1)
-- Update relevant PATH variables
prepend_path("PATH",binDir)
if ( pythonDeps ~= "" ) then
prepend_path("PYTHONPATH",pythonDeps)
end
@@ -0,0 +1,7 @@
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/rocshmemTargets.cmake")
check_required_components(
"rocshmem"
)
@@ -0,0 +1,39 @@
###############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
###############################################################################
find_package(Doxygen)
if (DOXYGEN_FOUND)
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
set(DOXYGEN_SOURCE_BROWSER YES)
set(DOXYGEN_EXTRACT_PRIVATE YES)
set(DOXYGEN_MACRO_EXPANSION YES)
set(DOXYGEN_GENERATE_LATEX YES)
set(DOXYGEN_USE_PDFLATEX YES)
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
doxygen_add_docs(
docs
${PROJECT_SOURCE_DIR}
COMMENT "Generate man pages"
MACRO_EXPANSION YES
)
endif()
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,59 @@
# Builds for host-facing support
As of July 29, 2021 (when support for host-facing functions was merged in),
we are running it with MPICH-3.4 and UCX-1.10.0.
## Background
MPICH-3.4 does not have support for HIP (GPU support in MPICH checks whether
the buffer is on the host or the device).
UCX-1.10.0 claims it does not support GPU-aware communication for RMA
operations. As of the time of this writing, this claim remains even on the
latest UCX version (1.11.0 and the master branch). UCX developers to merge
in stable GPU-aware support by the end of 2021.
A side note: OSU microbechmarks with RoCM memory hang with UCX-1.10.0
(during ucp_mem_map() which is called by MPI_Win_create()).
I don't see this hang with the latest version of UCX.
So, for RoCM memory, MPICH-3.4 still offloads RMA operations to UCX.
## So, how does it work with the current builds?
Theoretically, there are no limitations preventing GPU-aware RDMA
communicaiton. As long as the GPU memory is registered with the NIC,
the NIC can perform operations on device memory.
Even though UCX claims to not support GPU-aware RMA communication, it does
not check whether or not the buffer being passed in is a device or host
buffer. So, as long as the device memory being used is registered with the
NIC (this does occur during MPI_Win_create), we are good.
UCX claims to not support GPU-aware communication because they have
not added in support for the different types of scenarios that could
exist in a system (eg, when a system does not have GPU-direct). The
scope of ROC_SHMEM is currenlty limited to configurations that UCX
already supports.
## But the main branch of MPICH does support HIP now?
Since MPICH is going off of UCX's claim that it does not support
GPU-aware RMA communication, MPICH executes its RMA operations
using active messages when it notices that the buffer is a GPU
buffer. So, if we use MPICH with HIP support, we end up using
active-message implementations unnecessarily, and hence lose
a lot of performance.
## Moving forward
We should switch to using MPICH "correctly" (i.e. with HIP support)
only when UCX officially claims to support GPU-aware RMA
communication because that is when MPICH will offload MPI
RMA operations to UCX RMA operations.
But if there is a need for MPICH's HIP support for GPU IPC (unsure
if this is needed for now), we will need an alternative. In the
current MPICH configuration, communication between processes on
the same node are funneled through the netmod (UCX in our case) as
well.
@@ -0,0 +1,34 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_INCLUDE_DEBUG_HPP
#define LIBRARY_INCLUDE_DEBUG_HPP
namespace rocshmem {
void debug_print_cq(int dest_pe, int src_wg, int cqe_index);
void debug_print_sq(int dest_pe, int src_wg, int index_wqe);
} // namespace rocshmem
#endif // LIBRARY_INCLUDE_DEBUG_HPP
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,96 @@
###############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
###############################################################################
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
###############################################################################
# GLOBAL COMPILE FLAGS
###############################################################################
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc )
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb")
###############################################################################
# DEFAULT BUILD TYPE
###############################################################################
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
set(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
FORCE
)
endif()
###############################################################################
# PROJECT
###############################################################################
project(rocshmem_example_driver VERSION 1.1.0 LANGUAGES CXX)
###############################################################################
# SOURCES
###############################################################################
add_executable(${PROJECT_NAME} "")
target_include_directories(
${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)
target_sources(
${PROJECT_NAME}
PRIVATE
test_driver.cpp
tester.cpp
tester_arguments.cpp
primitive_tester.cpp
)
###############################################################################
# ROCSHMEM
###############################################################################
find_package(hip REQUIRED)
find_package(rocshmem CONFIG REQUIRED)
target_include_directories(
${PROJECT_NAME}
PRIVATE
rocshmem::rocshmem
)
target_link_libraries(
${PROJECT_NAME}
PRIVATE
rocshmem::rocshmem
hip::host
-fgpu-rdc
--hipcc-func-supp
# xnack allows address translation fault recovery
# required option for managed heap configs
# -mxnack
)
@@ -0,0 +1,17 @@
#!/bin/bash
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/..
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
$src_path
cmake --build . --parallel 8
@@ -0,0 +1,17 @@
#!/bin/bash
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/..
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
$src_path
cmake --build . --parallel 8
@@ -0,0 +1,143 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "primitive_tester.hpp"
#include <roc_shmem/roc_shmem.hpp>
#include <debug.hpp>
#include <unistd.h>
using namespace rocshmem;
/******************************************************************************
* DEVICE TEST KERNEL
*****************************************************************************/
__global__ void
PrimitiveTest(int loop,
int *flag,
char *s_buf,
char *r_buf,
int size,
int my_pe,
ShmemContextType ctx_type)
{
__shared__ roc_shmem_ctx_t ctx;
roc_shmem_wg_init();
roc_shmem_wg_ctx_create(ctx_type, &ctx);
int block_id = hipBlockIdx_x;
for(int i =0; i< loop; i++){
roc_shmemx_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
if(hipThreadIdx_x==0){
//roc_shmem_ctx_quiet(ctx);
//roc_shmem_ctx_threadfence_system(ctx);
roc_shmem_ctx_int_p(ctx, &flag[my_pe], i+1, block_id);
//roc_shmem_ctx_quiet(ctx);
roc_shmem_int_wait_until(&flag[block_id], ROC_SHMEM_CMP_EQ, i+1);
}
__syncthreads();
}
roc_shmem_wg_ctx_destroy(ctx);
roc_shmem_wg_finalize();
}
/******************************************************************************
* HOST TESTER CLASS METHODS
*****************************************************************************/
PrimitiveTester::PrimitiveTester(TesterArguments args)
: Tester(args)
{
flag = (int*) roc_shmem_malloc(args.numprocs);
memset(flag, 0, args.numprocs*sizeof(int));
// s_buf = (char *)roc_shmem_malloc(args.max_msg_size * args.wg_size);
// r_buf = (char *)roc_shmem_malloc(args.max_msg_size * args.wg_size);
}
PrimitiveTester::~PrimitiveTester()
{
roc_shmem_free(s_buf);
roc_shmem_free(r_buf);
}
void
PrimitiveTester::resetBuffers(uint64_t size)
{
memset(s_buf, '0', size * args.numprocs);
memset(r_buf, '1', size * args.numprocs);
}
void
PrimitiveTester::launchKernel(dim3 gridSize,
dim3 blockSize,
int loop,
uint64_t size,
int nproc, int my_pe)
{
void* sendBuf = malloc(64);
void* recvBuf = malloc(64 * nproc);
s_buf = (char *)roc_shmem_malloc(size * nproc);
r_buf = (char *)roc_shmem_malloc(size * nproc);
resetBuffers(size);
MPI_Allgather(sendBuf, 64, MPI_CHAR,
recvBuf, 64, MPI_CHAR,
MPI_COMM_WORLD);
size_t shared_bytes;
roc_shmem_dynamic_shared(&shared_bytes);
hipLaunchKernelGGL(PrimitiveTest,
gridSize,
blockSize,
shared_bytes,
stream,
loop,
flag,
s_buf,
r_buf,
size,
my_pe,
_shmem_context);
//num_msgs = (loop + args.skip) * gridSize.x;
num_timed_msgs = loop ;
}
void
PrimitiveTester::verifyResults(uint64_t size)
{
int check_id =0;
if (args.myid == check_id) {
for (int i = 0; i < size*args.numprocs; i++) {
if (r_buf[i] != '0') {
fprintf(stderr, "Data validation error at idx %d\n", i);
fprintf(stderr, "Got %c, Expected %c\n", r_buf[i], '0');
exit(-1);
}
}
}
}
@@ -0,0 +1,57 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef _PRIMITIVE_TESTER_HPP_
#define _PRIMITIVE_TESTER_HPP_
#include "tester.hpp"
#include <mpi.h>
/******************************************************************************
* HOST TESTER CLASS
*****************************************************************************/
class PrimitiveTester : public Tester
{
public:
explicit PrimitiveTester(TesterArguments args);
virtual ~PrimitiveTester();
protected:
virtual void
resetBuffers(uint64_t size) override;
virtual void
launchKernel(dim3 gridSize,
dim3 blockSize,
int loop,
uint64_t size,
int nproc, int my_pe) override;
virtual void
verifyResults(uint64_t size) override;
char *s_buf = nullptr;
char *r_buf = nullptr;
int *flag = nullptr;
};
#endif
@@ -0,0 +1,84 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include <vector>
#include <roc_shmem/roc_shmem.hpp>
#include "tester.hpp"
#include "tester_arguments.hpp"
using namespace rocshmem;
int main(int argc, char * argv[])
{
/**
* Setup the tester arguments.
*/
TesterArguments args(argc, argv);
/***
* Select a GPU
*/
int rank = roc_shmem_my_pe();
int ndevices, my_device=0;
hipGetDeviceCount (&ndevices);
my_device = rank % ndevices;
hipSetDevice(my_device);
/**
* Must initialize rocshmem to access arguments needed by the tester.
*/
roc_shmem_init(args.num_wgs);
/**
* Now grab the arguments from rocshmem.
*/
args.get_rocshmem_arguments();
/**
* Using the arguments we just constructed, call the tester factory
* method to get the tester (specified by the arguments).
*/
std::vector<Tester *> tests = Tester::create(args);
/**
* Run the tests
*/
for (auto test : tests) {
test->execute();
/**
* The tester factory method news the tester to create it so we clean
* up the memory here.
*/
delete test;
}
/**
* The rocshmem library needs to be cleaned up with this call. It pairs
* with the init function above.
*/
roc_shmem_finalize();
return 0;
}
@@ -0,0 +1,213 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "tester.hpp"
#include <functional>
#include <vector>
#include <iostream>
#include <hip/hip_runtime.h>
#include <mpi.h>
#include <roc_shmem/roc_shmem.hpp>
//#include "broadcast_tester.hpp"
#include "primitive_tester.hpp"
Tester::Tester(TesterArguments args)
: args(args)
{
_type = (TestType) args.algorithm;
_shmem_context = args.shmem_context;
hipStreamCreate(&stream);
hipEventCreate(&start_event);
hipEventCreate(&stop_event);
hipMalloc((void**)&timer, sizeof(uint64_t) * args.num_wgs);
}
Tester::~Tester()
{
hipFree(timer);
hipEventDestroy(stop_event);
hipEventDestroy(start_event);
hipStreamDestroy(stream);
}
std::vector<Tester*>
Tester::create(TesterArguments args)
{
int rank = args.myid;
std::vector<Tester*> testers;
if (rank == 0)
std::cout << "*** Creating Test: ";
TestType type = (TestType) args.algorithm;
switch (type) {
case AlltoAll_Put:
if (rank == 0)
std::cout << "AlltoAll Puts***" << std::endl;
testers.push_back(new PrimitiveTester(args));
return testers;
case AlltoAll_Get:
if (rank == 0)
std::cout << "AlltoAll Gets***" << std::endl;
testers.push_back(new PrimitiveTester(args));
return testers;
default:
if (rank == 0)
std::cout << "Unknown***" << std::endl;
testers.push_back(new PrimitiveTester(args));
return testers;
}
return testers;
}
void
Tester::execute()
{
int num_loops = args.loop;
/**
* Some tests loop through data sizes in powers of 2 and report the
* results for those ranges.
*/
for (uint64_t size = args.min_msg_size;
size <= args.max_msg_size;
size <<= 1) {
/**
* Restricts the number of iterations of really large messages.
*/
if (size > args.large_message_size)
num_loops = args.loop_large;
/**
* TODO:
* Verify that this timer type is actually uint64_t on the
* device side.
*/
memset(timer, 0, sizeof(uint64_t) * args.num_wgs);
const dim3 blockSize(args.wg_size, 1, 1);
const dim3 gridSize(args.num_wgs, 1, 1);
hipEventRecord(start_event, stream);
launchKernel(gridSize, blockSize, num_loops, size, args.numprocs, args.myid);
hipEventRecord(stop_event, stream);
hipError_t err = hipStreamSynchronize(stream);
if (err != hipSuccess) {
printf("error = %d \n", err);
}
// roc_shmem_dump_stats();
// roc_shmem_reset_stats();
// data validation
verifyResults(size);
barrier();
resetBuffers(size);
print(size);
}
}
void
Tester::print(uint64_t size)
{
if (args.myid != 0) {
return;
}
// uint64_t timer_avg = timerAvgInMicroseconds();
// double latency_avg = static_cast<double>(timer_avg) / num_timed_msgs;
// double avg_msg_rate = num_timed_msgs / (timer_avg / 1e6);
float total_kern_time_ms;
hipEventElapsedTime(&total_kern_time_ms, start_event, stop_event);
float total_kern_time_s = total_kern_time_ms / 1000;
double bandwidth_avg_gbs = num_timed_msgs * size * bw_factor / total_kern_time_s / pow(2, 30);
float latency_us = (total_kern_time_ms *1000) /num_timed_msgs;
int field_width = 20;
int float_precision = 2;
printf("\n##### Message Size %lu #####\n", size);
printf("%*s%*s\n",
field_width + 1, "Latency AVG (us)",
field_width + 1, "Bandwidth (GB/s)");
printf("%*.*f %*.*f \n",
field_width, float_precision, latency_us,
field_width, float_precision, bandwidth_avg_gbs);
fflush(stdout);
}
void
Tester::barrier()
{
MPI_Barrier(MPI_COMM_WORLD);
}
uint64_t
Tester::gpuCyclesToMicroseconds(uint64_t cycles)
{
/**
* The dGPU asm core timer runs at 27MHz. This is different from the
* core clock returned by HIP. For an APU, this is different and might
* need adjusting.
*/
uint64_t gpu_frequency_MHz = 27;
/**
* hipDeviceGetAttribute(&gpu_frequency_khz,
* hipDeviceAttributeClockRate,
* 0);
*/
return cycles / gpu_frequency_MHz;
}
uint64_t
Tester::timerAvgInMicroseconds()
{
uint64_t sum = 0;
for (int i = 0; i < args.num_wgs; i++) {
sum += gpuCyclesToMicroseconds(timer[i]);
}
return sum / args.num_wgs;
}
@@ -0,0 +1,111 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef _TESTER_HPP_
#define _TESTER_HPP_
#include <vector>
#include <roc_shmem/roc_shmem.hpp>
#include "tester_arguments.hpp"
/******************************************************************************
* TESTER CLASS TYPES
*****************************************************************************/
enum TestType
{
AlltoAll_Put = 0,
AlltoAll_Get = 1
};
typedef int ShmemContextType;
/******************************************************************************
* TESTER INTERFACE
*****************************************************************************/
class Tester
{
public:
explicit Tester(TesterArguments args);
virtual ~Tester();
void
execute();
static std::vector<Tester*>
create(TesterArguments args);
protected:
virtual void
resetBuffers(uint64_t size) = 0;
virtual void
preLaunchKernel() {}
virtual void
launchKernel(dim3 gridSize,
dim3 blockSize,
int loop,
uint64_t size,
int nproc, int my_pe) = 0;
virtual void
postLaunchKernel() {}
virtual void
verifyResults(uint64_t size) = 0;
int num_msgs = 0;
int num_timed_msgs = 0;
int bw_factor = 1;
TesterArguments args;
TestType _type;
ShmemContextType _shmem_context = 8; //SHMEM_CTX_WP_PRIVATE
hipStream_t stream;
uint64_t *timer = nullptr;
private:
void
print(uint64_t size);
void
barrier();
uint64_t
gpuCyclesToMicroseconds(uint64_t cycles);
uint64_t
timerAvgInMicroseconds();
bool
peLaunchesKernel();
hipEvent_t start_event;
hipEvent_t stop_event;
};
#endif /* _TESTER_HPP */
@@ -0,0 +1,84 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#include "tester.hpp"
#include "tester_arguments.hpp"
#include <cstdlib>
#include <iostream>
#include <roc_shmem/roc_shmem.hpp>
using namespace rocshmem;
TesterArguments::TesterArguments(int argc, char *argv[])
{
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
if (arg == "-w") {
i++;
num_wgs = atoi(argv[i]);
} else if (arg == "-S") {
i++;
max_msg_size = atoll(argv[i]);
} else if (arg == "-s") {
i++;
min_msg_size = atoll(argv[i]);
} else if (arg == "-a") {
i++;
algorithm = atoi(argv[i]);
} else if (arg == "-z") {
i++;
wg_size = atoi(argv[i]);
} else if (arg == "-x") {
i++;
shmem_context = atoi(argv[i]);
} else {
show_usage(argv[0]);
exit(-1);
}
}
}
void
TesterArguments::show_usage(std::string executable_name)
{
std::cout << "Usage: " << executable_name << std::endl;
std::cout << "\t-t <number of roc_shmem service threads>\n";
std::cout << "\t-w <number of workgroups>\n";
std::cout << "\t-s <maximum message size (in bytes)>\n";
std::cout << "\t-a <algorithm number to test>\n";
std::cout << "\t-z <WorkGroup Size>\n";
std::cout << "\t-c <Coalescing Coefficient>\n";
std::cout << "\t-o <Operation type for the random_access test>\n";
std::cout << "\t-ta <Number of Thread Accessing the communication>\n";
std::cout << "\t-x <shmem context>\n";
}
void
TesterArguments::get_rocshmem_arguments()
{
numprocs = roc_shmem_n_pes();
myid = roc_shmem_my_pe();
}
@@ -0,0 +1,74 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef _TESTER_ARGUMENTS_HPP_
#define _TESTER_ARGUMENTS_HPP_
#include <string>
#include <climits>
#include <cstdint>
class TesterArguments
{
public:
TesterArguments(int argc, char *argv[]);
/**
* Initialize rocshmem members
* Valid after roc_shmem_init function called.
*/
void get_rocshmem_arguments();
private:
/**
* Output method which displays available command line options
*/
static void show_usage(std::string executable_name);
public:
/**
* Arguments obtained from command line
*/
unsigned num_wgs = 1;
unsigned algorithm = 0;
uint64_t min_msg_size = 1;
uint64_t max_msg_size = 1 << 20;
unsigned wg_size = 64;
unsigned shmem_context = 8; // ROC_SHMEM_CTX_WG_PRIVATE
/**
* Arguments obtained from rocshmem
*/
unsigned numprocs = UINT_MAX;
unsigned myid = UINT_MAX;
/**
* Defaults tester values
*/
int loop = 100;
int skip = 10;
int loop_large = 25;
int large_message_size = 32768;
};
#endif
@@ -0,0 +1,144 @@
###############################################################################
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
###############################################################################
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
###############################################################################
# GLOBAL COMPILE FLAGS
###############################################################################
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc)
###############################################################################
# DEFAULT BUILD TYPE
###############################################################################
if(NOT CMAKE_BUILD_TYPE)
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
set(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
FORCE
)
endif()
###############################################################################
# PROJECT
###############################################################################
project(spts VERSION 1.1.0 LANGUAGES CXX)
###############################################################################
# CONFIGURATION OPTIONS
###############################################################################
option(USE_HIP "Build HIP version of the solver" OFF)
option(USE_ROCSHMEM "Build ROC_SHMEM enabled version of the solver" OFF)
option(ALL_ANALYZE "Build analyze and solve algorithm" OFF)
option(USE_DOUBLE "Use double precision floats for the data" OFF)
option(ALL_LEVELSET "Build levelset algorithm" OFF)
option(ALL_LEVELSYNC "Build levelsync algorithm" OFF)
option(ALL_SYNCFREE "Build syncfree algorithm" OFF)
configure_file(cmake/config.h.in config.h)
###############################################################################
# SOURCES
###############################################################################
add_executable(${PROJECT_NAME} "")
target_include_directories(
${PROJECT_NAME}
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> # CONFIG.H
)
target_sources(
${PROJECT_NAME}
PRIVATE
InputFlags.cpp
Main.cpp
)
###############################################################################
# HIP / HIP + ROC_SHMEM
###############################################################################
if(USE_HIP)
find_package(hip REQUIRED)
target_sources(
${PROJECT_NAME}
PRIVATE
HIPHelper.cpp
)
if(USE_ROC_SHMEM)
find_package(rocshmem CONFIG REQUIRED)
target_include_directories(
${PROJECT_NAME}
PRIVATE
rocshmem::rocshmem
)
target_link_libraries(
${PROJECT_NAME}
PRIVATE
rocshmem::rocshmem
hip::host
-fgpu-rdc
)
endif()
###############################################################################
# OPENCL
###############################################################################
else()
if(USE_ROC_SHMEM)
message(FATAL_ERROR "Cannot use ROC_SHMEM without USE_HIP")
endif()
target_sources(
${PROJECT_NAME}
PRIVATE
OpenCLHelper.cpp
)
target_include_directories(
${PROJECT_NAME}
PRIVATE
/opt/rocm/opencl/include
)
target_link_libraries(
${PROJECT_NAME}
PRIVATE
-L/opt/rocm/opencl/lib/x86_64
-lOpenCL
)
endif()
@@ -0,0 +1,85 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef GPUHelper_H
#define GPUHelper_H
#include "config.h"
#include <string>
#include <iostream>
#include <sstream>
#include "InputFlags.h"
#define ROW_BITS 32 // May be not the right place to define this macro
#define WG_BITS 24
static int SPTS_BLOCK_SIZE = 0;
#ifdef USE_ROC_SHMEM
#define WF_PER_WG 1
#else
#define WF_PER_WG 16
#endif
#define WF_SIZE 64
#ifdef USE_HIP
#include <hip/hip_runtime.h>
typedef void * memPointer;
typedef int memPointer_flags;
typedef int gpuInt;
typedef bool gpuBool;
typedef hipEvent_t gpuEvent;
typedef hipError_t gpuError;
#define GPU_MEM_READ_ONLY 0
#define GPU_MEM_READ_WRITE 0
#define GPU_MEM_USE_HOST_PTR 0
#define GPU_TRUE true
#define GPU_FALSE false
#else
#include <CL/cl.h>
typedef cl_mem memPointer;
typedef cl_mem_flags memPointer_flags;
typedef cl_int gpuInt;
typedef cl_bool gpuBool;
typedef cl_event gpuEvent;
typedef cl_int gpuError;
#define GPU_MEM_READ_ONLY CL_MEM_READ_ONLY
#define GPU_MEM_READ_WRITE CL_MEM_READ_ONLY
#define GPU_MEM_USE_HOST_PTR CL_MEM_USE_HOST_PTR
#define GPU_TRUE CL_TRUE
#define GPU_FALSE CL_FALSE
#endif
class GPUHelper
{
public:
GPUHelper() {}
virtual int Init(const std::string &_filename, InputFlags &in_flags) = 0;
virtual void checkStatus(gpuError status, const std::string errString) = 0;
virtual void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
virtual void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
virtual memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *) = 0;
virtual void FreeMem(memPointer ptr) = 0;
virtual void Flush() = 0;
};
#endif //GPUHelper_H
@@ -0,0 +1,99 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#include "HIPHelper.h"
#include <cstring>
#include <string>
#include <iostream>
int HIPHelper::Init(const std::string &filename, InputFlags &in_flags)
{
int device = 0;
hipSetDevice(device);
hipDeviceProp_t props;
hipGetDeviceProperties(&props, device /*deviceID*/);
printf("info: running on device %s\n", props.name);
printf("info: architecture on AMD GPU device is: %d\n", props.gcnArch);
return 0;
}
void HIPHelper::checkStatus(gpuError status, const std::string errString)
{
if (status != HIP_SUCCESS)
{
std::cerr << errString << " : " << hipGetErrorString(status) << std::endl;
exit(-1);
}
}
memPointer HIPHelper::AllocateMem(const std::string name,
size_t size,
memPointer_flags flags,
void *hostBuffer)
{
void* buf;
std::string errString = "HIP error allocating " + name + " !";
checkStatus(hipMalloc(&buf, size), errString);
printf("Allocating %s of size %zu at buf %p\n", name.c_str(), size, buf);
return buf;
}
void HIPHelper::CopyToDevice(memPointer devBuffer,
void *hostBuffer,
size_t size,
size_t offset,
gpuBool blocking,
gpuEvent *ev)
{
assert(offset == 0);
memcpy(devBuffer, hostBuffer, size);
/*
if (blocking == GPU_TRUE) {
checkStatus(hipMemcpy(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
"HIP error copying data to device !");
} else {
checkStatus(hipMemcpyAsync(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
"HIP error copying data to device !");
}
*/
}
void HIPHelper::CopyToHost(memPointer devBuffer,
void *hostBuffer,
size_t size,
size_t offset,
gpuBool blocking,
gpuEvent *ev)
{
assert(offset == 0);
memcpy(hostBuffer, devBuffer, size);
/*
if (blocking == GPU_TRUE) {
checkStatus(hipMemcpy(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
"HIP error copying data to device !");
} else {
checkStatus(hipMemcpyAsync(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
"HIP error copying data to device !");
}
*/
}
@@ -0,0 +1,50 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef CLHelper_H
#define CLHelper_H
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include <string>
#include <iostream>
#include <sstream>
#include "InputFlags.h"
#include "GPUHelper.h"
#include "hip/hip_runtime.h"
class HIPHelper : public GPUHelper
{
public:
HIPHelper() {}
int Init(const std::string &_filename, InputFlags &in_flags);
void checkStatus(gpuError status, const std::string errString);
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
void FreeMem(memPointer ptr) { hipFree(ptr); }
void Flush() { hipDeviceSynchronize(); }
};
#endif //CLHelper_H
@@ -0,0 +1,179 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#include <iomanip>
#include <vector>
#include <iostream>
#include "InputFlags.h"
InputFlags::InputFlags()
{
AddInputFlag("help", 'h', "", "Print Help Message", "string");
}
void InputFlags::AddInputFlag(const std::string &_long_name,
char _short_name,
const std::string &_value,
const std::string &_help_text,
const std::string &_type)
{
Input in;
in.long_name = _long_name;
in.short_name = _short_name;
in.value = _value;
in.help_text = _help_text;
in.type = _type;
if(MapInputs.count(_short_name) > 0)
printf("Input flag: %s (%c) already exists !", _long_name.c_str(), _short_name);
else
MapInputs[_short_name] = in;
}
void InputFlags::Print()
{
printf("SpTS Input Flags: \n\n");
for(auto &content : MapInputs)
std::cout<<std::setw(8)<<"--"<<content.second.long_name<<std::setw(20 - content.second.long_name.length())<<"-"<<content.first<<std::setw(8)<<" "<<content.second.help_text<<"\n";
exit(0);
}
char InputFlags::FindShortName(const std::string &long_name)
{
char short_name = '\0';
for(auto &content : MapInputs)
{
if(content.second.long_name == long_name)
short_name = content.first;
}
if(short_name == '\0')
{
std::cout<<"Long Name: "<<long_name<<" Not Found !";
exit(0);
}
return short_name;
}
void InputFlags::Parse(int argc, char *argv[])
{
std::vector<std::string> args;
for(int i = 1; i < argc; i++)
args.push_back(argv[i]);
if(args.size() == 0) // No Input Flag
Print();
for(int i = 0; i < args.size(); i++)
{
std::string temp = args[i];
if(temp[0] != '-')
{
printf("Illegal input flag\n");
Print();
}
else if(temp[0] == '-' && temp[1] == '-') // Long Name Input
{
std::string long_name = temp.substr(2);
if(long_name == "help")
Print();
char short_name = FindShortName(long_name);
if (short_name == 'n' || short_name == 'z' || short_name == 'v')
{
MapInputs[short_name].value = "true";
}
else
{
MapInputs[short_name].value = args[i+1];
i++;
}
}
else if (temp[0] == '-' && temp[1] == '?') // Help Input
Print();
else // Short Name Input
{
char short_name = temp[1];
if(MapInputs.find(short_name) == MapInputs.end())
{
std::cout<<"Input Flag: "<<short_name<<" Not Found !";
exit(0);
}
if(short_name == 'h')
Print();
if(short_name == 'n' || short_name == 'z' || short_name == 'v' )
{
MapInputs[short_name].value = "true";
}
else
{
MapInputs[short_name].value = args[i+1];
i++;
}
}
}
}
std::string InputFlags::GetValueStr(const std::string &long_name)
{
char short_name = FindShortName(long_name);
std::string value = MapInputs[short_name].value;
return value;
}
int InputFlags::GetValueInt(const std::string &long_name)
{
char short_name = FindShortName(long_name);
int value = atoi(MapInputs[short_name].value.c_str());
return value;
}
uint64_t InputFlags::GetValueUint64(const std::string &long_name)
{
char short_name = FindShortName(long_name);
uint64_t value = strtoull(MapInputs[short_name].value.c_str(), NULL, 10);
return value;
}
float InputFlags::GetValueFloat(const std::string &long_name)
{
char short_name = FindShortName(long_name);
float value = std::stof(MapInputs[short_name].value);
return value;
}
bool InputFlags::GetValueBool(const std::string &long_name)
{
char short_name = FindShortName(long_name);
if (MapInputs[short_name].value == "true")
return true;
else
return false;
}
@@ -0,0 +1,62 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef InputFlags_H
#define InputFlags_H
#include <string>
#include <map>
struct Input
{
std::string long_name;
char short_name;
std::string value;
std::string help_text;
std::string type;
};
class InputFlags
{
std::map<char, Input> MapInputs;
public:
InputFlags();
virtual void AddDerivedInputFlags() = 0;
void AddInputFlag(const std::string &_long_name,
char _short_name,
const std::string &_value,
const std::string &_help_text,
const std::string &type);
void Parse(int argc, char *argv[]);
char FindShortName(const std::string &long_name);
void Print();
std::string GetValueStr(const std::string &long_name);
int GetValueInt(const std::string &long_name);
uint64_t GetValueUint64(const std::string &long_name);
float GetValueFloat(const std::string &long_name);
bool GetValueBool(const std::string &long_name);
virtual ~InputFlags() {}
};
#endif //InputFlags_H
@@ -0,0 +1,19 @@
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
@@ -0,0 +1,193 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#include "config.h"
#ifdef USE_HIP
#include "hip/hip_runtime.h"
#else
#include "OpenCLHelper.h"
#endif
#ifdef USE_RO_SHMEM
#include "mpi.h"
#endif
#include "MatrixMarketReader.h"
#include "SpTS.h"
#include <iostream>
#include <unistd.h>
#include <limits.h>
#ifdef USE_DOUBLE
typedef double FPTYPE;
#else
typedef float FPTYPE;
#endif
using namespace rocshmem;
int main(int argc, char *argv[])
{
SparseTriangularSolve<FPTYPE> spts_obj;
InputFlags &in_flags = spts_obj;
in_flags.AddDerivedInputFlags();
in_flags.Parse(argc, argv);
FPTYPE alpha = in_flags.GetValueFloat("alpha");
printf("Reading input file: %s...", in_flags.GetValueStr("filename").c_str());fflush(stdout);
MatrixMarketReader<FPTYPE> mm_reader;
if (mm_reader.MMReadFormat(in_flags.GetValueStr("filename"), in_flags) != 0)
{
fprintf(stderr, "ERROR reading input file !\n");
exit(1);
}
printf("Done.\n");
GPUHelper *GPU;
#ifdef USE_HIP
printf("Initializing HIP runtime...\n\t");fflush(stdout);
GPU = new HIPHelper();
char buf[PATH_MAX + 1];
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
std::string str(buf);
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
{
fflush(stdout);
fprintf(stderr,"\nError Initializing HIP Runtime !\n");
exit(-1);
}
#else
printf("Initializing OpenCL runtime...\n\t");fflush(stdout);
GPU = new CLHelper();
char buf[PATH_MAX + 1];
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
std::string str(buf);
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
{
fflush(stdout);
fprintf(stderr,"\nError Initializing OpenCL Runtime !\n");
exit(-1);
}
#endif
printf("Done.\n");
printf("Allocating sparse matrices...");fflush(stdout);
spts_obj.AllocateSparseMatrix(mm_reader, in_flags, GPU);
printf("Done.\n");
printf("Converting COO to CSR...");fflush(stdout);
spts_obj.ConvertFromCOOToCSR(mm_reader.GetCoordinates(), in_flags);
printf("Done.\n");
SPTS_BLOCK_SIZE = in_flags.GetValueInt("block_size");
printf("Finding Stats For Parallel Decomposition...");fflush(stdout);
spts_obj.FindStatsForParallelDecomposition();
printf("Done.\n");
printf("Allocating parallel sparse matrices...");fflush(stdout);
spts_obj.AllocateParallelSparseMatrix(mm_reader, in_flags);
printf("Done.\n");
printf("Allocating vectors...");fflush(stdout);
spts_obj.AllocateVectors(mm_reader);
printf("Done.\n");
float gflops = 0.f;
int errors = 0;
uint64_t ns_per_iter = 0;
uint64_t ns_per_analysis_iter = 0;
uint64_t ns_per_syncfree_iter = 0;
uint64_t ns_per_levelset_iter = 0;
uint64_t ns_per_levelsync_iter = 0;
printf("Performing SpTS on the CPU with alpha=%f...", (float)alpha);fflush(stdout);
spts_obj.CSRSpTSCPU(alpha);
printf("Done.\n");
printf("Checking results of CPU-side SpTS...");fflush(stdout);
if (!spts_obj.CSRCheckCPU(alpha))
{
fflush(stdout);
fprintf(stderr, "CPU-based results were 'wrong', likely due to FP rounding. Expect the CPU and GPU to differ wildly.\n");
//exit(-1);
}
printf("Done.\n");
printf("Performing %d iterations of SpTS on the GPU with alpha=%f...", in_flags.GetValueInt("iterations"), (float)alpha);fflush(stdout);
gflops = spts_obj.CSRSpTSGPU(ns_per_iter, ns_per_analysis_iter, ns_per_syncfree_iter, ns_per_levelset_iter, ns_per_levelsync_iter, alpha);
printf("Done.\n");
if (in_flags.GetValueBool("verify")) {
printf("Checking whether GPU SpTS caused non-deterministic errors...\n");fflush(stdout);
int non_det_errors = spts_obj.NonDeterministicErrors();
printf("Done.\n");
if (non_det_errors)
fprintf(stderr, "ERROR!! -- Saw %d GPU iterations that had non-deterministic differences.\n", non_det_errors);
int max_errors = spts_obj.MaxErrors();
if (max_errors)
{
if (max_errors > 1)
printf(" -- %d rows differed between CPU and GPU results.\n", max_errors);
else
printf(" -- %d row differed between CPU and GPU results.\n", max_errors);
}
else
printf("\n");
}
printf("File %s : SpTS Gflops: %f ms_per_iter: %lf ", in_flags.GetValueStr("filename").c_str(), gflops, ((double)ns_per_iter/1000000.));
printf(" ( ms_per_analysis_iter: ");
if (ns_per_analysis_iter == 0)
printf("no_iter");
else
printf("%lf", ((double)ns_per_analysis_iter/1000000.));
printf(" | ms_per_syncfree_iter: ");
if (ns_per_syncfree_iter == 0)
printf("no_iter");
else
printf("%lf", ((double)ns_per_syncfree_iter/1000000.));
printf(" | ms_per_levelset_iter: ");
if (ns_per_levelset_iter == 0)
printf("no_iter");
else
printf("%lf", ((double)ns_per_levelset_iter/1000000.));
printf(" | ms_per_levelsync_iter: ");
if (ns_per_levelsync_iter == 0)
printf("no_iter )");
else
printf("%lf )", ((double)ns_per_levelsync_iter/1000000.));
#ifdef USE_ROC_SHMEM
MPI_Allreduce(MPI_IN_PLACE, (void *) &ns_per_analysis_iter, 1,
MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
if (spts_obj.Get_this_pe() == 0) {
printf("\nRANK 0: analysis avg ms = %lf\n",
((double) ns_per_analysis_iter / 1000000.) / spts_obj.Get_total_pes());
}
#endif
return 0;
}
@@ -0,0 +1,377 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef MatrixMarketReader_H
#define MatrixMarketReader_H
/*
Portions of this file include code provided by The National Institute of
Standards and Technology (NIST). The code includes
macro definitions from mmio.h and is subject to the following disclaimer.
Software Disclaimer
NIST-developed software is provided by NIST as a public service. You may use,
copy and distribute copies of the software in any medium, provided that you
keep intact this entire notice. You may improve, modify and create derivative
works of the software or any portion of the software, and you may copy and
distribute such modifications or works. Modified works should carry a notice
stating that you changed the software and should note the date and nature of
any such change. Please explicitly acknowledge the National Institute of
Standards and Technology as the source of the software.
NIST-developed software is expressly provided "AS IS" NIST MAKES NO WARRANTY
OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW,
INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES
NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR
THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
You are solely responsible for determining the appropriateness of using and
distributing the software and you assume all risks associated with its use,
including but not limited to the risks and costs of program errors, compliance
with applicable laws, damage to or loss of data, programs or equipment, and
the unavailability or interruption of operation. This software is not intended
to be used in any situation where a failure could cause risk of injury or
damage to property. The software developed by NIST employees is not subject
to copyright protection within the United States.
*/
#include <string>
#include <cstring>
#include <fstream>
#include <cstdio>
#include <iostream>
#include "InputFlags.h"
#include <typeinfo>
#include "mmio.h"
// Class declaration
template<typename FloatType>
struct Coordinate {
int x;
int y;
FloatType val;
};
template <typename FloatType>
class MatrixMarketReader
{
char Typecode[4];
int nNZ;
int nRows;
int nCols;
int isSymmetric;
int isDoubleMem;
Coordinate<FloatType> *coords;
bool *has_seen_diag;
public:
MatrixMarketReader() : nNZ(0), nRows(0), nCols(0), isSymmetric(0), isDoubleMem(0)
{
for (int i = 0; i < sizeof(Typecode); i++)
Typecode[i] = '\0';
coords = NULL;
}
bool MMReadFormat(const std::string &_filename, InputFlags &_in_flags);
bool MMReadBanner(FILE *_infile);
bool MMReadMtxCrdSize(FILE *_infile);
void MMGenerateCOOFromFile(FILE *_infile, InputFlags &_in_flags);
int GetNumRows() { return nRows; }
int GetNumCols() { return nCols; }
int GetNumNonZeroes() { return nNZ; }
int GetSymmetric() { return isSymmetric; }
char *GetTypecode() { return Typecode; }
Coordinate<FloatType> *GetCoordinates() { return coords; }
~MatrixMarketReader()
{
delete[] coords;
}
};
// Class definition
template<typename FloatType>
bool MatrixMarketReader<FloatType>::MMReadFormat(const std::string &filename, InputFlags &in_flags)
{
FILE *mm_file = fopen(filename.c_str(), "r");
if( mm_file == NULL)
{
printf("Cannot Open Matrix-Market File !\n");
return 1;
}
int status = MMReadBanner(mm_file);
if(status != 0)
{
printf("Error Reading Banner in Matrix-Market File !\n");
return 1;
}
if(! mm_is_coordinate(Typecode))
{printf(" only handling coordinate format\n"); return(1);}
if(mm_is_complex(Typecode)) {
printf("Error: cannot handle complex format\n");
return (1);
}
if(mm_is_symmetric(Typecode))
isSymmetric = 1;
status = MMReadMtxCrdSize(mm_file);
if(status != 0) {
printf("Error reading Matrix Market crd_size %d\n",status);
return(1);
}
if(mm_is_symmetric(Typecode))
coords = new Coordinate<FloatType>[nNZ+nRows];
else if (in_flags.GetValueBool("non_symmetric"))
coords = new Coordinate<FloatType>[nNZ+nRows]; // This is too large, but oh well.
else
{
fprintf(stderr, "Error: Input matrix is NOT symmetric. This will not work for SpTS.\n");
return (1);
}
has_seen_diag = new bool[nRows];
for (int i = 0; i < nRows; i++)
has_seen_diag[i] = false;
MMGenerateCOOFromFile(mm_file, in_flags);
return 0;
}
template<typename FloatType>
void FillCoordData(char Typecode[],
Coordinate<FloatType> *coords,
bool *has_seen_diag,
int &actual_nnz,
int ir,
int ic,
FloatType val)
{
int new_x = ir - 1;
int new_y = ic - 1;
if (new_y > new_x)
{
// Skip stuff in the upper diagonal
// Just keep our lower diag.
return;
}
if (new_y == new_x)
has_seen_diag[new_x] = true;
coords[actual_nnz].x = new_x;
coords[actual_nnz].y = new_y;
coords[actual_nnz ++].val = val;
}
template<typename FloatType>
void FixupMissingDiags(char Typecode[],
Coordinate<FloatType> *coords,
int &actual_nnz,
int nRows,
bool *has_seen_diag,
InputFlags &in_flags)
{
for(int i = 0; i < nRows; i++)
{
if (has_seen_diag[i] == false)
{
coords[actual_nnz].x = i;
coords[actual_nnz].y = i;
coords[actual_nnz ++].val = 1.;
}
}
}
template<typename FloatType>
void MatrixMarketReader<FloatType>::MMGenerateCOOFromFile(FILE *infile,
InputFlags &in_flags)
{
int actual_nnz = 0;
FloatType val;
int ir, ic;
int exp_zeroes = in_flags.GetValueBool("exp_zeroes");
for(int i = 0; i < nNZ; i++)
{
if(mm_is_real(Typecode))
{
if(typeid(FloatType) == typeid(float))
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
else if(typeid(FloatType) == typeid(double))
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
if(exp_zeroes == 0 && val == 0)
continue;
else
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
}
else if (mm_is_integer(Typecode))
{
if(typeid(FloatType) == typeid(float))
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
else if(typeid(FloatType) == typeid(double))
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
if(exp_zeroes == 0 && val == 0)
continue;
else
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
}
else if(mm_is_pattern(Typecode))
{
fscanf(infile, "%d %d", &ir, &ic);
//val = ((FloatType) MAX_RAND_VAL * (rand() / (RAND_MAX + 1.0)));
val = 3.;
if(exp_zeroes == 0 && val == 0)
continue;
else
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
}
}
FixupMissingDiags(Typecode, coords, actual_nnz, nRows, has_seen_diag, in_flags);
nNZ = actual_nnz;
printf("\n\tNNZ in the lower triangular and fixedup diagonal: %d\n", nNZ);
}
template<typename FloatType>
bool MatrixMarketReader<FloatType>::MMReadBanner(FILE *infile)
{
char line[MM_MAX_LINE_LENGTH];
char banner[MM_MAX_TOKEN_LENGTH];
char mtx[MM_MAX_TOKEN_LENGTH];
char crd[MM_MAX_TOKEN_LENGTH];
char data_type[MM_MAX_TOKEN_LENGTH];
char storage_scheme[MM_MAX_TOKEN_LENGTH];
char *p;
mm_clear_typecode(Typecode);
if (fgets(line, MM_MAX_LINE_LENGTH, infile) == NULL)
return MM_PREMATURE_EOF;
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
storage_scheme) != 5)
return MM_PREMATURE_EOF;
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
/* check for banner */
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
return MM_NO_HEADER;
/* first field should be "mtx" */
if (strcmp(mtx, MM_MTX_STR) != 0)
return MM_UNSUPPORTED_TYPE;
mm_set_matrix(Typecode);
/* second field describes whether this is a sparse matrix (in coordinate
storgae) or a dense array */
if (strcmp(crd, MM_SPARSE_STR) == 0)
mm_set_sparse(Typecode);
else if (strcmp(crd, MM_DENSE_STR) == 0)
mm_set_dense(Typecode);
else
return MM_UNSUPPORTED_TYPE;
/* third field */
if (strcmp(data_type, MM_REAL_STR) == 0)
mm_set_real(Typecode);
else
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
mm_set_complex(Typecode);
else
if (strcmp(data_type, MM_PATTERN_STR) == 0)
mm_set_pattern(Typecode);
else
if (strcmp(data_type, MM_INT_STR) == 0)
mm_set_integer(Typecode);
else
return MM_UNSUPPORTED_TYPE;
/* fourth field */
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
mm_set_general(Typecode);
else
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
mm_set_symmetric(Typecode);
else
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
mm_set_hermitian(Typecode);
else
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
mm_set_skew(Typecode);
else
return MM_UNSUPPORTED_TYPE;
return 0;
}
template<typename FloatType>
bool MatrixMarketReader<FloatType>::MMReadMtxCrdSize(FILE *infile)
{
char line[MM_MAX_LINE_LENGTH];
int num_items_read;
/* now continue scanning until you reach the end-of-comments */
do
{
if (fgets(line,MM_MAX_LINE_LENGTH, infile) == NULL)
return MM_PREMATURE_EOF;
}while (line[0] == '%');
/* line[] is either blank or has M,N, nz */
if (sscanf(line, "%d %d %d", &nRows, &nCols, &nNZ) == 3)
return 0;
else
do
{
num_items_read = fscanf(infile, "%d %d %d", &nRows, &nCols, &nNZ);
if (num_items_read == EOF) return MM_PREMATURE_EOF;
}
while (num_items_read != 3);
return 0;
}
#endif // MatrixMarketReader_H
@@ -0,0 +1,486 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#include "OpenCLHelper.h"
#include <cstring>
#include <string>
#include <iostream>
cl_context CLHelper::context = NULL;
cl_command_queue CLHelper::commandQueue = NULL;
cl_kernel CLHelper::SpTSKernel = NULL;
cl_kernel CLHelper::SpTSKernel_analyze = NULL;
cl_kernel CLHelper::SpTSKernel_levelset = NULL;
cl_kernel CLHelper::SpTSKernel_scalar = NULL;
cl_kernel CLHelper::SpTSKernel_vector = NULL;
cl_kernel CLHelper::SpTSKernel_levelsync = NULL;
const char * get_cl_err_string(cl_int err)
{
switch (err)
{
case CL_SUCCESS:
return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND:
return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE:
return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE:
return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES:
return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY:
return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP:
return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH:
return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE:
return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE:
return "CL_MAP_FAILURE";
#ifdef CL_VERSION_1_1
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
#endif
#ifdef CL_VERSION_1_2
case CL_COMPILE_PROGRAM_FAILURE:
return "CL_COMPILE_PROGRAM_FAILURE";
case CL_LINKER_NOT_AVAILABLE:
return "CL_LINKER_NOT_AVAILABLE";
case CL_LINK_PROGRAM_FAILURE:
return "CL_LINK_PROGRAM_FAILURE";
case CL_DEVICE_PARTITION_FAILED:
return "CL_DEVICE_PARTITION_FAILED";
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
#endif
case CL_INVALID_VALUE:
return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE:
return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_PLATFORM:
return "CL_INVALID_PLATFORM";
case CL_INVALID_DEVICE:
return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT:
return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES:
return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE:
return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR:
return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT:
return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE:
return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER:
return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY:
return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS:
return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM:
return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE:
return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME:
return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION:
return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL:
return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX:
return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE:
return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE:
return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS:
return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION:
return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE:
return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE:
return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET:
return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST:
return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT:
return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION:
return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT:
return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE:
return "CL_INVALID_BUFFER_SIZE";
#ifdef CL_VERSION_1_1
case CL_INVALID_MIP_LEVEL:
return "CL_INVALID_MIP_LEVEL";
case CL_INVALID_GLOBAL_WORK_SIZE:
return "CL_INVALID_GLOBAL_WORK_SIZE";
case CL_INVALID_PROPERTY:
return "CL_INVALID_PROPERTY";
#ifdef cl_ext_device_fission
case CL_DEVICE_PARTITION_FAILED_EXT:
return "CL_DEVICE_PARTITION_FAILED_EXT";
case CL_INVALID_PARTITION_COUNT_EXT:
return "CL_INVALID_PARTITION_COUNT_EXT";
case CL_INVALID_PARTITION_NAME_EXT:
return "CL_INVALID_PARTITION_NAME_EXT";
#endif
#endif
#ifdef CL_VERSION_1_2
case CL_INVALID_IMAGE_DESCRIPTOR:
return "CL_INVALID_IMAGE_DESCRIPTOR";
case CL_INVALID_COMPILER_OPTIONS:
return "CL_INVALID_COMPILER_OPTIONS";
case CL_INVALID_LINKER_OPTIONS:
return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
#endif
#ifdef CL_VERSION_2_0
case CL_INVALID_PIPE_SIZE:
return "CL_INVALID_PIPE_SIZE";
case CL_INVALID_DEVICE_QUEUE:
return "CL_INVALID_DEVICE_QUEUE";
#endif
#ifdef CL_VERSION_2_2
case CL_INVALID_SPEC_ID:
return "CL_INVALID_SPEC_ID";
case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
#endif
#ifdef cl_khr_icd
case CL_PLATFORM_NOT_FOUND_KHR:
return "CL_PLATFORM_NOT_FOUND_KHR";
#endif
default:
return "UNKNOWN CL ERROR CODE";
}
}
void convertToStr(char **source, size_t* sourceSize, const std::string fname)
{
FILE *fp = fopen(fname.c_str(), "r");
fseek(fp, 0, SEEK_END);
*sourceSize = ftell(fp);
fseek(fp , 0, SEEK_SET);
*source = (char *)malloc(*sourceSize * sizeof(char));
fread(*source, 1, *sourceSize, fp);
fclose(fp);
}
int CLHelper::Init(const std::string &filename, InputFlags &in_flags)
{
cl_int status = 0;
size_t deviceListSize;
unsigned int i;
/*
* Have a look at the available platforms and pick either
* the AMD one if available or a reasonable default.
*/
cl_uint numPlatforms;
platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
fprintf(stderr,"clGetPlatformIDs failed. %u",numPlatforms);
return 1;
}
if (0 < numPlatforms)
{
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
fprintf(stderr, "clGetPlatformIDs failed: %s\n", get_cl_err_string(status) );
return 1;
}
for (i = 0; i < numPlatforms; ++i)
{
char pbuf[100];
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
if(status != CL_SUCCESS)
{
fprintf(stderr,"clGetPlatformInfo failed: %s\n", get_cl_err_string(status));
return 1;
}
platform = platforms[i];
if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
{
break;
}
}
free(platforms);
}
/////////////////////////////////////////////////////////////////
// Create an OpenCL context
/////////////////////////////////////////////////////////////////
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
if(status != CL_SUCCESS)
{
printf("status: %d", status);
fprintf(stderr,"Error: Creating Context. (clCreateContextFromType): %s\n", get_cl_err_string(status));
return 1;
}
/* First, get the size of device list data */
status = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(size_t), &deviceListSize, NULL);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Getting Context Info (device list size, clGetContextInfo): %s\n", get_cl_err_string(status));
return 1;
}
/////////////////////////////////////////////////////////////////
// Detect OpenCL devices
/////////////////////////////////////////////////////////////////
devices = (cl_device_id *)malloc(deviceListSize * sizeof(cl_device_id));
if(devices == 0)
{
fprintf(stderr,"Error: No devices found: %s\n", get_cl_err_string(status));
return 1;
}
/* Now, get the device list data */
status = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize*sizeof(cl_device_id), devices, NULL);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Getting Context Info (device list, clGetContextInfo): %s\n", get_cl_err_string(status));
return 1;
}
char *deviceName;
size_t dev_name_size = 0;
int deviceNum = in_flags.GetValueInt("device");
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(char*), NULL, &dev_name_size);
deviceName = (char *)malloc(sizeof(char)*dev_name_size);
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
printf("Device Name: %s\n", deviceName);
bool use_gcn3 = false;
bool use_gcn2 = false;
char *found_gfx8 = strstr(deviceName, "gfx8");
char *found_gfx7 = strstr(deviceName, "gfx7");
if (found_gfx8 != NULL)
use_gcn3 = true;
if (found_gfx7 != NULL)
use_gcn2 = true;
free(deviceName);
/////////////////////////////////////////////////////////////////
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
commandQueue = clCreateCommandQueue(context, devices[deviceNum], CL_QUEUE_PROFILING_ENABLE, &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Creating Command Queue. (clCreateCommandQueue): %s\n", get_cl_err_string(status));
return 1;
}
/////////////////////////////////////////////////////////////////
// Load CL file, build CL program object, create CL kernel object
/////////////////////////////////////////////////////////////////
char* source;
size_t sourceSize;
convertToStr(&source, &sourceSize, filename);
syncfree_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
return 1;
}
analyze_levelset_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
return 1;
}
std::string buildFlags = "-x clc++ -Dcl_khr_int64_base_atomics=1 -cl-std=CL2.0";
if (use_gcn3)
buildFlags += " -DGCN3 ";
if (use_gcn2)
buildFlags += " -DGCN2 ";
buildFlags += " -DROW_BITS=" + std::to_string(ROW_BITS);
buildFlags += " -DWG_BITS=" + std::to_string(WG_BITS);
buildFlags += " -DWF_SIZE=" + std::to_string(WF_SIZE);
buildFlags += " -DWF_PER_WG=" + std::to_string(WF_PER_WG);
#ifdef USE_DOUBLE
buildFlags += " -DDOUBLE";
#endif
/* create a cl program executable for all the devices specified */
status = clBuildProgram(analyze_levelset_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Building Analyze and Levelset Program (clBuildProgram): %d\n", status);
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
size_t size;
clGetProgramBuildInfo(analyze_levelset_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
printf("%s ", errorbuf);
return 1;
}
buildFlags += " -DSYNCFREE_KERNEL";
status = clBuildProgram(syncfree_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Building Syncfree Program (clBuildProgram): %d\n", status);
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
size_t size;
clGetProgramBuildInfo(syncfree_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
printf("%s ", errorbuf);
return 1;
}
SpTSKernel = clCreateKernel(syncfree_program, "amd_spts_syncfree_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS): %s\n", get_cl_err_string(status));
return 1;
}
SpTSKernel_analyze = clCreateKernel(analyze_levelset_program, "amd_spts_analyze_and_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_analyze): %s\n", get_cl_err_string(status));
return 1;
}
SpTSKernel_levelset = clCreateKernel(analyze_levelset_program, "amd_spts_levelset_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelset): %s\n", get_cl_err_string(status));
return 1;
}
SpTSKernel_scalar = clCreateKernel(analyze_levelset_program, "amd_spts_scalar_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_scalar): %s\n", get_cl_err_string(status));
return 1;
}
SpTSKernel_vector = clCreateKernel(analyze_levelset_program, "amd_spts_vector_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_vector): %s\n", get_cl_err_string(status));
return 1;
}
SpTSKernel_levelsync = clCreateKernel(analyze_levelset_program, "amd_spts_levelsync_solve", &status);
if(status != CL_SUCCESS)
{
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelsync): %s\n", get_cl_err_string(status));
return 1;
}
// All good
return 0;
}
void CLHelper::checkStatus(cl_int status, const std::string errString)
{
if (status != CL_SUCCESS)
{
std::cerr << errString << " : " << get_cl_err_string(status) << std::endl;
exit(-1);
}
}
memPointer CLHelper::AllocateMem(const std::string name,
size_t size,
memPointer_flags flags,
void *hostBuffer)
{
cl_mem buf;
cl_int status;
buf = clCreateBuffer(context, flags, size, hostBuffer, &status);
std::string errString = "OpenCL error allocating " + name + " !";
checkStatus(status, errString);
return buf;
}
void CLHelper::CopyToDevice(memPointer devBuffer,
void *hostBuffer,
size_t size,
size_t offset,
cl_bool blocking,
cl_event *ev)
{
cl_int status;
status = clEnqueueWriteBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
checkStatus(status, "OpenCL error copying data to device !");
}
void CLHelper::CopyToHost(memPointer devBuffer,
void *hostBuffer,
size_t size,
size_t offset,
cl_bool blocking,
cl_event *ev)
{
cl_int status;
status = clEnqueueReadBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
checkStatus(status, "OpenCL error copying data to device !");
}
int64_t CLHelper::ComputeTime(cl_event event)
{
int64_t start_time, end_time;
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(int64_t), &start_time, NULL);
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(int64_t), &end_time, NULL);
return end_time - start_time;
}
@@ -0,0 +1,108 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef CLHelper_H
#define CLHelper_H
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include <CL/cl.h>
#include <string>
#include <iostream>
#include <sstream>
#include "InputFlags.h"
#include "GPUHelper.h"
struct LocalMemArg
{
LocalMemArg(size_t _size) : size(_size) {}
size_t GetSize() const { return size; }
private:
size_t size;
};
class CLHelper : public GPUHelper
{
cl_platform_id platform;
cl_device_id *devices;
cl_program syncfree_program;
cl_program analyze_levelset_program;
public:
static cl_context context;
static cl_kernel SpTSKernel;
static cl_kernel SpTSKernel_analyze;
static cl_kernel SpTSKernel_levelset;
static cl_kernel SpTSKernel_scalar;
static cl_kernel SpTSKernel_vector;
static cl_kernel SpTSKernel_levelsync;
static cl_command_queue commandQueue;
CLHelper() {}
int Init(const std::string &_filename, InputFlags &in_flags);
void checkStatus(gpuError status, const std::string errString);
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
void FreeMem(memPointer ptr) { clReleaseMemObject(ptr); }
void Flush() { clFinish(commandQueue); }
template<typename T, typename... Args>
void SetArgs(cl_kernel, int i, const T& first, const Args&... rest);
template<typename... Args>
void SetArgs(cl_kernel, int i, const LocalMemArg &lmem, const Args&... rest);
void SetArgs(cl_kernel, int i) {}
int64_t ComputeTime(cl_event event);
};
template<typename T, typename... Args>
void CLHelper::SetArgs(cl_kernel kernel, int i, const T& first, const Args&... rest)
{
cl_int status;
status = clSetKernelArg(kernel, i++, sizeof(T), (void *)& first);
std::stringstream errStream;
errStream<<"OpenCL error setting kernel argument "<<i;
checkStatus(status, errStream.str()) ;
SetArgs(kernel, i, rest...);
}
template<typename... Args>
void CLHelper::SetArgs(cl_kernel kernel, int i, const LocalMemArg &lmem, const Args&... rest)
{
cl_int status;
status = clSetKernelArg(kernel, i++, lmem.GetSize(), NULL);
std::stringstream errStream;
errStream<<"OpenCL error setting kernel argument (local memory) "<<i;
checkStatus(status, errStream.str()) ;
SetArgs(kernel, i, rest...);
}
#endif //CLHelper_H
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,287 @@
/********************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
********************************************************************************/
#ifndef SparseMatrix_H
#define SparseMatrix_H
#include "GPUHelper.h"
#ifndef USE_HIP
#include "OpenCLHelper.h"
#include <CL/cl.h>
#else
#include "HIPHelper.h"
#endif
#include "InputFlags.h"
#include "MatrixMarketReader.h"
#include "OpenCLHelper.h"
#include <algorithm>
#include <cassert>
template<typename FloatType>
class SparseMatrix
{
public:
int nRows;
int nCols;
int nNZ;
int *cols;
int *row_ptrs;
FloatType *vals;
memPointer d_cols;
memPointer d_vals;
memPointer d_row_ptrs;
// info about parallel procs
int this_pe;
int total_pes;
int nRows_p;
int nCols_p;
protected:
GPUHelper *GPU;
public:
SparseMatrix() : nRows(0), nCols(0), nNZ(0), nRows_p(0), nCols_p(0)
{
cols = NULL;
row_ptrs = NULL;
vals = NULL;
d_cols = NULL;
d_vals = NULL;
d_row_ptrs = NULL;
this_pe = -1;//roc_shmem_my_pe(handle); // this pe
total_pes = -1;//roc_shmem_n_pes(handle); // total number of pes
}
void AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
InputFlags &in_flags,
GPUHelper *gpu);
void AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
InputFlags &in_flags);
void ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
InputFlags &in_flags);
void PopulateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
InputFlags &in_flags);
void FindStatsForParallelDecomposition();
void Set_total_pes(int val){
this->total_pes = val;
}
void Set_this_pe(int val){
this->this_pe = val;
}
int Get_total_pes(){
return this->total_pes;
}
int Get_this_pe(){
return this->this_pe;
}
int GetNumRows_p() {return nRows_p;}
int *GetCols() { return cols; }
FloatType *GetVals() { return vals; }
int *GetRowPtrs() { return row_ptrs; }
memPointer GetDevCols() {return d_cols; }
memPointer GetDevVals() {return d_vals; }
memPointer GetDevRowPtrs() {return d_row_ptrs; }
~SparseMatrix()
{
delete[] cols;
delete[] vals;
delete[] row_ptrs;
GPU->FreeMem(d_cols);
GPU->FreeMem(d_vals);
GPU->FreeMem(d_row_ptrs);
}
};
template<typename FloatType>
void SparseMatrix<FloatType>::AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
InputFlags &in_flags,
GPUHelper *gpu)
{
GPU = gpu;
nRows = mm_reader.GetNumRows();
nCols = mm_reader.GetNumCols();
nNZ = mm_reader.GetNumNonZeroes();
printf("Allocating a sparse matrix with-- nRows: %d nCols: %d nNZ: %d\n", nRows, nCols, nNZ);
assert(total_pes != -1);
assert(this_pe != -1);
#ifdef USE_RO_SHMEM
if (nRows != nCols){
fprintf(stderr, "RO_SHMEM port requires the global matrix to be "
"square!\n");
exit(-1);
}
#endif
cols = new int[nNZ];
if (cols == NULL)
{
fprintf(stderr, "Failed to allocate host-side cols array !\n");
exit(-1);
}
vals = new FloatType[nNZ];
if (vals == NULL)
{
fprintf(stderr, "Failed to allocate host-side vals array !\n");
exit(-1);
}
row_ptrs = new int[nRows + 1];
if (row_ptrs == NULL)
{
fprintf(stderr, "Failed to allocate host-side row_ptrs array !\n");
exit(-1);
}
}
template<typename FloatType>
bool CoordinateCompare(const Coordinate<FloatType> &c1, const Coordinate<FloatType> &c2)
{
if(c1.x != c2.x)
return (c1.x < c2.x);
else
return (c1.y < c2.y);
}
template<typename FloatType>
void SparseMatrix<FloatType>::ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
InputFlags &in_flags)
{
std::sort(coords, coords + nNZ, CoordinateCompare<FloatType>);
int current_row = 1;
bool has_seen_diagonal = false;
row_ptrs[0] = 0;
for (int i = 0; i < nNZ; i++)
{
cols[i] = coords[i].y;
vals[i] = coords[i].val;
//fprintf(stderr,"Row %d Col %d Val %lf (cur_row: %d)\n", coords[i].x, coords[i].y, coords[i].val, current_row-1);
while(coords[i].x >= current_row)
{
// We've reached the end of a row. Did we see a diagonal?
// If not, the triangular solve will be underconstrained.
if (!has_seen_diagonal)
{
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
fprintf(stderr, "\tMissing diagonal on row %d\n", current_row-1);
exit(-1);
}
has_seen_diagonal = false;
row_ptrs[current_row] = i;
current_row++;
}
if (coords[i].x == coords[i].y)
has_seen_diagonal = true;
}
row_ptrs[current_row++] = nNZ;
while (current_row <= nRows)
{
if (!has_seen_diagonal)
{
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
fprintf(stderr, "\tNo values on row %d, so no diagonal.\n", current_row-1);
exit(-1);
}
has_seen_diagonal = false;
row_ptrs[current_row++] = nNZ;
}
}
template<typename FloatType>
void SparseMatrix<FloatType>::AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
InputFlags &in_flags)
{
d_cols = GPU->AllocateMem("cols", nNZ*sizeof(int), 0, NULL);
d_vals = GPU->AllocateMem("vals", nNZ*sizeof(FloatType), 0, NULL);
d_row_ptrs = GPU->AllocateMem("row_ptrs", (nRows+1)*sizeof(int), 0, NULL);
}
template<typename FloatType>
void SparseMatrix<FloatType>::FindStatsForParallelDecomposition()
{
assert(SPTS_BLOCK_SIZE % 64 == 0);
// Rows left over in the potentially partial final block
int left_over_last_block = nRows % SPTS_BLOCK_SIZE;
printf("%d: lolb %d\n", this_pe, left_over_last_block);
// Number of complete blocks, not including any partial block at the end
int total_blocks = nRows / SPTS_BLOCK_SIZE;
printf("%d: totb %d\n", this_pe, total_blocks);
// Everyone has at least this many rows
nRows_p = (total_blocks / total_pes) * SPTS_BLOCK_SIZE;
printf("%d: initial nRows_p %d\n", this_pe, nRows_p);
// Last cycle might not assign to all PEs
int straggler_blocks = total_blocks % total_pes;
if (this_pe < straggler_blocks)
nRows_p += SPTS_BLOCK_SIZE;
printf("%d: straggler nRows_p %d\n", this_pe, nRows_p);
// Last block of last cycle might have less than SPTS_BLOCK_SIZE rows
if (left_over_last_block) {
int final_pe = ((total_blocks + 1) % total_pes) - 1;
if (final_pe == -1)
final_pe = total_pes - 1;
if (this_pe == final_pe)
nRows_p += left_over_last_block;
}
printf("%d: final nRows_p %d\n", this_pe, nRows_p);
if (nRows_p <= 0) {
fprintf(stderr, "Block Size %d too small for input row size %d with "
"%d number of nodes. Please decrease the block size or "
"decrease the number of nodes\n", SPTS_BLOCK_SIZE, nRows,
total_pes);
exit(-1);
}
// print to check!
printf("\nPE: %d total_rows: %d my_rows: %d\n", this_pe, nRows, nRows_p);
nCols_p = nCols; // 1D decomposition
}
#endif
@@ -0,0 +1,16 @@
#!/bin/bash
src_path=$(dirname "$(realpath $0)")/..
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DUSE_ROC_SHMEM=OFF \
-DUSE_HIP=ON \
-DALL_ANALYZE=ON \
-DUSE_DOUBLE=OFF \
-DALL_LEVELSET=OFF \
-DALL_LEVELSYNC=OFF \
-DALL_SYNCFREE=OFF \
$src_path
cmake --build . --parallel 8
@@ -0,0 +1,16 @@
#!/bin/bash
src_path=$(dirname "$(realpath $0)")/..
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DUSE_ROC_SHMEM=OFF \
-DUSE_HIP=OFF \
-DALL_ANALYZE=ON \
-DUSE_DOUBLE=OFF \
-DALL_LEVELSET=OFF \
-DALL_LEVELSYNC=OFF \
-DALL_SYNCFREE=OFF \
$src_path
cmake --build . --parallel 8
@@ -0,0 +1,24 @@
#!/bin/bash
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/..
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DUSE_ROC_SHMEM=ON \
-DUSE_HIP=ON \
-DALL_ANALYZE=ON \
-DUSE_DOUBLE=OFF \
-DALL_LEVELSET=OFF \
-DALL_LEVELSYNC=OFF \
-DALL_SYNCFREE=OFF \
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
$src_path
cmake --build . --parallel 8
@@ -0,0 +1,7 @@
#cmakedefine USE_ROC_SHMEM
#cmakedefine USE_HIP
#cmakedefine ALL_ANALYZE
#cmakedefine USE_DOUBLE
#cmakedefine ALL_LEVELSET
#cmakedefine ALL_LEVELSYNC
#cmakedefine ALL_SYNCFREE
@@ -0,0 +1,43 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#!/bin/bash
echo Test Name $2
INPUTS=/mnt/mlebeane/spts_data
case $2 in
*"single_thread")
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
;;
*"multi_thread")
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 1 -b 256 -v -i 3 > $3/not_quite_diagonal_get.log
;;
*)
echo "UNKNOWN TEST TYPE: $2"
exit -1
;;
esac
exit $?
@@ -0,0 +1,86 @@
/*
* Matrix Market I/O library for ANSI C
*
* See http://math.nist.gov/MatrixMarket for details.
*
*
*/
#ifndef MM_IO_H
#define MM_IO_H
/********************* MM_typecode query fucntions ***************************/
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
#define mm_is_dense(typecode) ((typecode)[1]=='A')
#define mm_is_array(typecode) ((typecode)[1]=='A')
#define mm_is_complex(typecode) ((typecode)[2]=='C')
#define mm_is_real(typecode) ((typecode)[2]=='R')
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
#define mm_is_integer(typecode) ((typecode)[2]=='I')
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
#define mm_is_general(typecode) ((typecode)[3]=='G')
#define mm_is_skew(typecode) ((typecode)[3]=='K')
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
/********************* MM_typecode modify fucntions ***************************/
#define mm_set_matrix(typecode) ((typecode)[0]='M')
#define mm_set_coordinate(typecode) ((typecode)[1]='C')
#define mm_set_array(typecode) ((typecode)[1]='A')
#define mm_set_dense(typecode) mm_set_array(typecode)
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
#define mm_set_complex(typecode)((typecode)[2]='C')
#define mm_set_real(typecode) ((typecode)[2]='R')
#define mm_set_pattern(typecode)((typecode)[2]='P')
#define mm_set_integer(typecode)((typecode)[2]='I')
#define mm_set_symmetric(typecode)((typecode)[3]='S')
#define mm_set_general(typecode)((typecode)[3]='G')
#define mm_set_skew(typecode) ((typecode)[3]='K')
#define mm_set_hermitian(typecode)((typecode)[3]='H')
#define mm_clear_typecode(typecode) ((typecode)[0]=(typecode)[1]= \
(typecode)[2]=' ',(typecode)[3]='G')
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
/********************* Matrix Market error codes ***************************/
#define MM_COULD_NOT_READ_FILE 11
#define MM_PREMATURE_EOF 12
#define MM_NOT_MTX 13
#define MM_NO_HEADER 14
#define MM_UNSUPPORTED_TYPE 15
#define MM_LINE_TOO_LONG 16
#define MM_COULD_NOT_WRITE_FILE 17
#define MM_MTX_STR "matrix"
#define MM_ARRAY_STR "array"
#define MM_DENSE_STR "array"
#define MM_COORDINATE_STR "coordinate"
#define MM_SPARSE_STR "coordinate"
#define MM_COMPLEX_STR "complex"
#define MM_REAL_STR "real"
#define MM_INT_STR "integer"
#define MM_GENERAL_STR "general"
#define MM_SYMM_STR "symmetric"
#define MM_HERM_STR "hermitian"
#define MM_SKEW_STR "skew-symmetric"
#define MM_PATTERN_STR "pattern"
#define MM_MAX_LINE_LENGTH 1025
#define MM_MAX_TOKEN_LENGTH 64
#define MatrixMarketBanner "%%MatrixMarket"
#define MAX_RAND_VAL 5.0
#endif
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است Diff را بارگزاری کن
@@ -0,0 +1,118 @@
pipeline {
agent { label 'sv-pdp-5' }
environment {
HSA_FORCE_FINE_GRAIN_PCIE = 1
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
PATH = "$MPI_HOME/bin:$PATH"
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
}
stages {
stage('Synchronize Source Code') {
steps {
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
}
}
}
}
}
}
@@ -0,0 +1,93 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-definition plugin="workflow-job@2.40">
<actions>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
<jobProperties/>
<triggers/>
<parameters/>
<options/>
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
</actions>
<description></description>
<keepDependencies>false</keepDependencies>
<properties>
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
<triggers>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
<spec></spec>
<gerritProjects>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
<compareType>PLAIN</compareType>
<pattern>rsch/ec/shmem</pattern>
<branches>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
<compareType>PLAIN</compareType>
<pattern>amd-master</pattern>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
</branches>
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
</gerritProjects>
<dynamicGerritProjects class="empty-list"/>
<skipVote>
<onSuccessful>true</onSuccessful>
<onFailed>true</onFailed>
<onUnstable>true</onUnstable>
<onNotBuilt>true</onNotBuilt>
<onAborted>true</onAborted>
</skipVote>
<silentMode>false</silentMode>
<enableTopicAssociation>false</enableTopicAssociation>
<notificationLevel></notificationLevel>
<silentStartMode>false</silentStartMode>
<escapeQuotes>true</escapeQuotes>
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
<dependencyJobsNames></dependencyJobsNames>
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
<commentTextParameterMode>BASE64</commentTextParameterMode>
<buildStartMessage></buildStartMessage>
<buildFailureMessage></buildFailureMessage>
<buildSuccessfulMessage></buildSuccessfulMessage>
<buildUnstableMessage></buildUnstableMessage>
<buildNotBuiltMessage></buildNotBuiltMessage>
<buildAbortedMessage></buildAbortedMessage>
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
<customUrl></customUrl>
<serverName>amd-gerrit</serverName>
<triggerOnEvents>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
<commentAddedCommentContains>!COMPILE</commentAddedCommentContains>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
</triggerOnEvents>
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
<triggerConfigURL></triggerConfigURL>
<triggerInformationAction/>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
</triggers>
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
</properties>
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
<configVersion>2</configVersion>
<userRemoteConfigs>
<hudson.plugins.git.UserRemoteConfig>
<url>ssh://gerritgit/rsch/ec/shmem</url>
</hudson.plugins.git.UserRemoteConfig>
</userRemoteConfigs>
<branches>
<hudson.plugins.git.BranchSpec>
<name>FETCH_HEAD</name>
</hudson.plugins.git.BranchSpec>
</branches>
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
<submoduleCfg class="empty-list"/>
<extensions/>
</scm>
<scriptPath>internal/continuous_integration/compile/Jenkinsfile</scriptPath>
<lightweight>false</lightweight>
</definition>
<triggers/>
<disabled>false</disabled>
</flow-definition>
@@ -0,0 +1,221 @@
pipeline {
agent { label 'sv-pdp-5' }
environment {
HSA_FORCE_FINE_GRAIN_PCIE = 1
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
PATH = "$MPI_HOME/bin:$PATH"
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
}
stages {
stage('Synchronize Source Code') {
steps {
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
}
}
stage('Env Variables') {
steps {
sh 'printenv'
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
failFast true
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
//}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
//}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
//}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
//}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
//}
}
}
}
}
stage('Run Tests') {
stages {
stage('RC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE true'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
//}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL true'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
//}
}
}
stage('RC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI true'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
//}
}
}
stage('DC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE true'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
//}
}
}
stage('DC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI true'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
//}
}
}
stage('RO_NET_BASIC') {
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
steps {
dir("clients/functional_tests") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC true'
}
//dir("internal/clients/spts") {
// sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
// sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
//}
}
}
}
}
stage('Generate Checker Metadata') {
steps {
dir("library/${build_dir}") {
sh 'git fetch --tags'
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
}
}
}
stage('Archive Artifacts') {
steps {
dir("library/${build_dir}") {
archiveArtifacts artifacts: 'changeset_delta.txt'
}
dir("clients/functional_tests/${build_dir}") {
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
archiveArtifacts artifacts: 'RO_NET_BASIC/**/*.log'
}
}
}
}
}
@@ -0,0 +1,413 @@
pipeline {
agent { label 'sv-pdp-5' }
environment {
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
UCX_HOME="/home/resperf/ucx/install"
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
UCX_WARN_UNUSED_ENV_VARS="n"
HSA_FORCE_FINE_GRAIN_PCIE=1
}
stages {
stage('Synchronize Source Code') {
steps {
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
}
}
stage('Env Variables') {
steps {
sh 'printenv'
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
failFast true
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
}
}
stage('RC_SINGLE_DEBUG') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single_debug install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
}
dir("clients/sos_tests/${build_dir}/RC_SINGLE_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
}
}
}
stage('RC_SINGLE_PROFILE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single_profile install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
}
dir("clients/sos_tests/${build_dir}/RC_SINGLE_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
}
}
stage('DC_MULTI_IPC') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_IPC") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_ipc install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
}
dir("clients/sos_tests/${build_dir}/DC_MULTI_IPC") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
}
}
}
stage('DC_MULTI_DEBUG') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_DEBUG") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_debug install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
}
dir("clients/sos_tests/${build_dir}/DC_MULTI_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
}
}
}
stage('DC_MULTI_PROFILE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_PROFILE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_profile install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
}
dir("clients/sos_tests/${build_dir}/DC_MULTI_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
}
}
}
}
}
stage('Run Tests') {
stages {
stage('RC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE all ${build_dir}/RC_SINGLE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL all ${build_dir}/RC_MULTI_WF_COAL'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
}
}
stage('RC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
}
}
}
stage('RC_SINGLE_DEBUG') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG all ${build_dir}/RC_SINGLE_DEBUG'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
}
}
}
stage('RC_SINGLE_PROFILE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE all ${build_dir}/RC_SINGLE_PROFILE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
}
}
}
stage('DC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE all ${build_dir}/DC_SINGLE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
}
}
}
stage('DC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_MULTI all ${build_dir}/DC_MULTI'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
}
}
}
stage('DC_MULTI_IPC') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_IPC all ${build_dir}/DC_MULTI_IPC'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
}
}
}
stage('DC_MULTI_DEBUG') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG all ${build_dir}/DC_MULTI_DEBUG'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
}
}
}
stage('DC_MULTI_PROFILE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE all ${build_dir}/DC_MULTI_PROFILE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
}
}
}
stage('RO_NET_BASIC') {
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
steps {
dir("clients/functional_tests") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
}
dir("clients/sos_tests") {
sh 'ROC_SHMEM_RO=1 ./driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
}
dir("internal/clients/spts") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
}
}
}
}
}
}
}
@@ -0,0 +1,45 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-definition plugin="workflow-job@2.40">
<actions>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
<jobProperties/>
<triggers/>
<parameters/>
<options/>
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
</actions>
<description></description>
<keepDependencies>false</keepDependencies>
<properties>
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
<triggers>
<hudson.triggers.TimerTrigger>
<spec>H 22 * * *</spec>
</hudson.triggers.TimerTrigger>
</triggers>
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
</properties>
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
<configVersion>2</configVersion>
<userRemoteConfigs>
<hudson.plugins.git.UserRemoteConfig>
<url>ssh://gerritgit/rsch/ec/shmem</url>
</hudson.plugins.git.UserRemoteConfig>
</userRemoteConfigs>
<branches>
<hudson.plugins.git.BranchSpec>
<name>*/amd-master</name>
</hudson.plugins.git.BranchSpec>
</branches>
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
<submoduleCfg class="empty-list"/>
<extensions/>
</scm>
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
<lightweight>false</lightweight>
</definition>
<triggers/>
<disabled>false</disabled>
</flow-definition>
@@ -0,0 +1,335 @@
pipeline {
agent { label 'sv-pdp-5' }
environment {
HSA_FORCE_FINE_GRAIN_PCIE = 1
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
PATH = "$MPI_HOME/bin:$PATH"
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
build_dir = "builds/${BUILD_ID}"
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
}
stages {
stage('Synchronize Source Code') {
steps {
git branch: 'amd-master', changelog: false, poll: false, url: 'ssh://gerritgit/rsch/ec/shmem'
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
}
}
stage('RC_SINGLE_DEBUG') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single_debug install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
}
}
}
stage('RC_SINGLE_PROFILE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single_profile install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
}
}
stage('DC_MULTI_IPC') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_IPC") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_ipc install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
}
}
}
stage('DC_MULTI_DEBUG') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_DEBUG") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_debug install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
}
}
}
stage('DC_MULTI_PROFILE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI_PROFILE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi_profile install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
}
//===================== SPTS ==========================
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
}
}
}
}
}
stage('Run Tests') {
stages {
stage('RC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
}
}
stage('RC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
}
}
}
stage('RC_SINGLE_DEBUG') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
}
}
}
stage('RC_SINGLE_PROFILE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
}
}
}
stage('DC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
}
}
}
stage('DC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
}
}
}
stage('DC_MULTI_IPC') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
}
}
}
stage('DC_MULTI_DEBUG') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
}
}
}
stage('DC_MULTI_PROFILE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
}
dir("internal/clients/spts") {
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
}
}
}
stage('RO_NET_BASIC') {
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
steps {
dir("clients/functional_tests") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
}
dir("internal/clients/spts") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
}
}
}
}
}
}
}
@@ -0,0 +1,45 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-definition plugin="workflow-job@2.40">
<actions>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
<jobProperties/>
<triggers/>
<parameters/>
<options/>
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
</actions>
<description></description>
<keepDependencies>false</keepDependencies>
<properties>
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
<triggers>
<hudson.triggers.TimerTrigger>
<spec>H 22 * * *</spec>
</hudson.triggers.TimerTrigger>
</triggers>
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
</properties>
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
<configVersion>2</configVersion>
<userRemoteConfigs>
<hudson.plugins.git.UserRemoteConfig>
<url>ssh://gerritgit/rsch/ec/shmem</url>
</hudson.plugins.git.UserRemoteConfig>
</userRemoteConfigs>
<branches>
<hudson.plugins.git.BranchSpec>
<name>*/amd-master</name>
</hudson.plugins.git.BranchSpec>
</branches>
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
<submoduleCfg class="empty-list"/>
<extensions/>
</scm>
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
<lightweight>false</lightweight>
</definition>
<triggers/>
<disabled>false</disabled>
</flow-definition>
@@ -0,0 +1,288 @@
pipeline {
agent { label 'sv-pdp-7' }
environment {
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
MPI_HOME="/home/resperf/mpich/install"
UCX_HOME="/home/resperf/ucx/install"
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
UCX_WARN_UNUSED_ENV_VARS="n"
HSA_FORCE_FINE_GRAIN_PCIE=1
UCX_TLS="rc"
ROC_SHMEM_USE_SQ_GPU_MEM=0
ROC_SHMEM_USE_CQ_GPU_MEM=0
ROC_SHMEM_NUM_BLOCKS=128
}
stages {
stage('Synchronize Source Code') {
steps {
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
}
}
stage('Env Variables') {
steps {
sh 'printenv'
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
failFast true
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
//}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
//}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
//}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
//}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
//}
}
}
stage('RO_NET') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RO_NET") {
sh 'mkdir -p install'
sh '../../../build_configs/ro_net install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RO_NET") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
}
dir("clients/sos_tests/${build_dir}/RO_NET") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
}
//===================== SPTS ==========================
//dir("internal/clients/spts/${build_dir}/RO_NET") {
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RO_NET/install'
//}
}
}
}
}
stage('Run Tests') {
stages {
stage('RC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE short ${build_dir}/RC_SINGLE'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
//}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL short ${build_dir}/RC_MULTI_WF_COAL'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
//}
}
}
stage('RC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/RC_MULTI short ${build_dir}/RC_MULTI'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
//}
}
}
stage('DC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE short ${build_dir}/DC_SINGLE'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
//}
}
}
stage('DC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
}
dir("clients/sos_tests") {
sh './driver.sh ${build_dir}/DC_MULTI short ${build_dir}/DC_MULTI'
}
//dir("internal/clients/spts") {
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
//}
}
}
stage('RO_NET') {
steps {
dir("clients/functional_tests") {
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/rocshmem_example_driver ro ${build_dir}/RO_NET'
}
dir("clients/sos_tests") {
sh 'ROC_SHMEM_RO=1 ./driver.sh ${build_dir}/RO_NET short ${build_dir}/RO_NET'
}
//dir("internal/clients/spts") {
// sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/spts multi_thread ${build_dir}/RO_NET'
//}
}
}
}
}
stage('Generate Checker Metadata') {
steps {
dir("library/${build_dir}") {
sh 'git fetch --tags'
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
}
}
}
stage('Archive Artifacts') {
steps {
dir("library/${build_dir}") {
archiveArtifacts artifacts: 'changeset_delta.txt'
}
dir("clients/functional_tests/${build_dir}") {
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
archiveArtifacts artifacts: 'RO_NET/**/*.log'
}
}
}
}
post {
success {
build job: 'shmem_perf_check', wait: true
}
}
}
@@ -0,0 +1,21 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import glob
import pprint
class PathGlobber():
def __init__(self, name, *partial_paths_to_concatenate):
self._search_path = ''
for partial_path in partial_paths_to_concatenate:
self._search_path += partial_path
self.dirs = []
self._name = name
def generate(self):
self.dirs = glob.glob(self._search_path, recursive=True)
def dump(self):
str_out = self._name
str_out += pprint.pformat(self.dirs, width=120)
str_out += '\n'
return str_out
@@ -0,0 +1,15 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import absolute_path
import glob
class Archive(absolute_path.PathGlobber):
def __init__(self, args, name=''):
archive_path = args.archive_path
super().__init__(name, args.jenkins_path, archive_path,
args.benchmark_path)
def path_of_build(self, build_id):
path = self._search_path.replace('*/archive', build_id + '/archive')
path = glob.glob(path)
return path[0]
@@ -0,0 +1,54 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import parser
import dictionary
import archive_path
import checker
def main():
# This script accepts command line values, but has reasonable defaults
# needed to run as part of the CI infrastructure.
p = parser.Parser()
args = p.parse_command_line()
# Jenkins is configured to archive build artifacts in a directory.
# The 'archives' variable holds the set of directories for
# successful Jenkins builds (those which run to completion).
# Partitioning of successful builds is useful since we can ignore
# failed build directories while searching for performance data.
archives = archive_path.Archive(args)
archives.generate()
print(archives.dump())
# Jenkins records changeset information in a changeset_delta.txt file.
# We parse the changelog for the commit hash and save it into
# 'builds_to_changesets'.
build_to_changeset = dictionary.BuildToChangesetDict()
build_to_changeset.generate(archives.dirs)
print(build_to_changeset.dump())
# 'changeset_to_build' holds the changeset mappings with a
# list of build numbers that match the changeset value.
# Builds may be executed many times with the same changeset.
# The most recent build (identified by the largest build number) will
# be used to retrieve performance data.
changeset_to_build = dictionary.ChangesetToBuildDict()
changeset_to_build.generate(build_to_changeset)
print(changeset_to_build.dump())
# Jenkins is configured to dump Gerrit-esque relation chain changesets
# to an archived output file 'changeset-delta.txt'.
# The relation chain will be used to determine changeset performance
# data for each changeset in the relation chain (when possible).
build_to_relation_chain = dictionary.BuildToRelationChainDict()
build_to_relation_chain.generate(archives.dirs)
print(build_to_relation_chain.dump())
perf_checker = checker.Performance(args,
archives,
changeset_to_build,
build_to_relation_chain)
perf_checker.run()
if __name__ == '__main__':
main()
@@ -0,0 +1,97 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import archive_path
import log
import dictionary
import report
import violation
class Performance():
def __init__(self, args, archives, changeset_to_build,
build_to_relation_chain):
self._args = args
self._archives = archives
self._changeset_to_build = changeset_to_build
self._build_to_relation_chain = build_to_relation_chain
self._build_id = build_to_relation_chain.most_recent_build()
self._archive_path = archives.path_of_build(self._build_id)
self._output = report.Report(self._build_id,
self._archive_path,
'performance_diff.txt')
def _other_build_id(self, other_changeset):
packed_id = [build_id for chng,
build_id in self._changeset_to_build.data.items()
if chng.startswith(other_changeset)]
# The 'packed_id' variable is a list containing lists.
# We need the content inside the packed_id data structure.
try:
build_id = packed_id[0][0]
return True, build_id
except IndexError:
# An index error can occur if builds in the relation chain
# have not been tested before attempting to test this
# changeset.
return False, 0
def _log_difference(self, log_filename, other_changeset,
other_archive_path, violations):
print('determining difference of log file ' + log_filename)
self._output.record(log_filename)
current_file_path = self._archive_path + '/' + log_filename
other_file_path = other_archive_path + '/' + log_filename
log_pair = log.Pair(current_file_path, other_file_path)
log_pair.calculate_differences()
latency_perc = [float(i.strip('%')) \
for i in log_pair.latency_percentage_differences]
max_latency = max(latency_perc)
violations.check(max_latency, other_changeset, log_filename)
self._output.record(log_pair.dump())
def _changeset_difference(self, current_changeset, other_changeset):
violations = violation.Threshold(self._args.latency_max, 'latency')
change_pair = '(' + current_changeset + ',' + other_changeset + ')'
print('comparing changesets ' + change_pair)
self._output.record(change_pair)
status, other_build_id = self._other_build_id(other_changeset)
if status == False:
message = 'skipping changeset ' + other_changeset
print(message)
self._output.record(message)
return violations
other_archive_path = self._archives.path_of_build(other_build_id)
print(self._archive_path)
print(other_archive_path)
for filename in self._args.logs:
self._log_difference(filename, other_changeset,
other_archive_path, violations)
print('\n')
return violations
def _calculate_performance_differences(self):
current_changeset = \
self._build_to_relation_chain.data[self._build_id][0]
other_changesets = \
self._build_to_relation_chain.data[self._build_id][1:]
for other_changeset in other_changesets:
violations = self._changeset_difference(current_changeset,
other_changeset)
# Only report on the last pairwise changeset combination.
# This combination represents the changeset being tested and
# the amd-master:HEAD.
violations.provide_violations_to_report(self._output)
def run(self):
self._output.open()
self._calculate_performance_differences()
@@ -0,0 +1,163 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import abc
import os
import pprint
import subprocess
import sys
class BaseDict(metaclass=abc.ABCMeta):
def __init__(self):
self.data = {}
self._delimiter_path = 'archive'
self._changeset_delta_filename = 'changeset_delta.txt'
def _build_id(self, build_directory):
sub_directory_strings = build_directory.split('/')
word_count = 0
for word in sub_directory_strings:
if word == '':
continue
if word == self._delimiter_path:
break
word_count += 1
bld_id = sub_directory_strings[word_count]
return bld_id
def _open_changeset_delta_file(self, archive_directory):
build_directory, config_directory = os.path.split(archive_directory)
changeset_file_path = build_directory + '/' + \
self._changeset_delta_filename
try:
file_handle = open(changeset_file_path, 'r')
except:
sys.exit('failed to open: ' + changeset_file_path)
return file_handle
@abc.abstractmethod
def _changeset_delta_operations(self, file_handle, bld_id):
pass
def generate(self, archives):
for d in archives:
bld_id = self._build_id(d)
f = self._open_changeset_delta_file(d)
self._changeset_delta_operations(f, bld_id)
def most_recent_build(self):
build_id_strings = self.data.keys()
build_id_ints = list(map(int, build_id_strings))
most_recent_build_id_int = max(build_id_ints)
return str(most_recent_build_id_int)
def dump(self):
str_out = self._print_text
str_out += pprint.pformat(self.data, width=120)
str_out += '\n'
return str_out
class BuildToChangesetDict(BaseDict):
def __init__(self, name=''):
super().__init__()
self._print_text = name
def _changeset_delta_operations(self, file_handle, bld_id):
commit_line = file_handle.readline()
try:
commit_hash = commit_line.split()[0]
except IndexError:
commit_hash = None
if commit_hash != None:
self.data[bld_id] = commit_hash
class BuildToRelationChainDict(BaseDict):
def __init__(self, name=''):
super().__init__()
self._print_text = name
def _changeset_delta_operations(self, file_handle, bld_id):
changes = []
for line in file_handle:
changes.append(line.split()[0])
self.data[bld_id] = changes
class ChangesetToBuildDict():
def __init__(self, name=''):
self.data = {}
self._print_text = name
def _invert_dict(self, dictionary):
dict_with_duplicates = {}
for key, value in dictionary.data.items():
list_with_duplicates = dict_with_duplicates.get(value, [])
list_with_duplicates.append(key)
dict_with_duplicates[value] = list_with_duplicates
return dict_with_duplicates
def generate(self, dictionary):
self.data = self._invert_dict(dictionary)
def dump(self):
str_out = self._print_text
str_out += pprint.pformat(self.data, width=120)
str_out += '\n'
return str_out
class ChangelogToMostRecentBuild():
def __init__(self, name=''):
self._print_text = name
self._all_changesets = []
self._changesets_with_builds = []
self._changesets_without_builds = []
self.data = {}
def _build_id(self, changeset_to_build, changeset):
try:
build_id_strings = changeset_to_build.data[changeset]
build_id_ints = list(map(int, build_id_strings))
most_recent_build_id_int = max(build_id_ints)
build_id_str = str(most_recent_build_id_int)
except:
build_id_str = ''
return build_id_str
def _changelog(self):
# print git hash along with file modification stats
shellcmd = 'git log --pretty=tformat:"%H" --shortstat | '
# condense the output down to single line
shellcmd += "awk 'ORS=NR%3?\" \":\"\\n\"' | "
# parse out the git hash by itself
shellcmd += "awk '{print $1}'"
x = subprocess.getoutput(shellcmd)
self._all_changesets = x.split()
def _with_builds(self, changeset_to_build):
changesets = list(changeset_to_build.data.keys())
self._changesets_with_builds = changesets
def _without_builds(self):
self._changesets_without_builds = \
list(set(self._all_changesets) - \
set(self._changesets_with_builds))
def generate(self, changeset_to_build):
self._changelog()
self._with_builds(changeset_to_build)
self._without_builds()
for changeset in self._all_changesets:
if changeset in self._changesets_with_builds:
build = self._build_id(changeset_to_build, changeset)
self.data[changeset] = build
def dump(self):
str_out = self._print_text
str_out += 'git-log_changesets_in_order:\n'
str_out += pprint.pformat(self._all_changesets, width=120)
str_out += '\nfilesystem_with_builds:\n'
str_out += pprint.pformat(self._changesets_with_builds, width=120)
str_out += '\nfilesystem_without_builds:\n'
str_out += pprint.pformat(self._changesets_without_builds, width=120)
str_out += '\ngit-log_changesets_to_build-id_mappings:\n'
str_out += pprint.pformat(self.data, width=120)
str_out += '\n'
return str_out
@@ -0,0 +1,107 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import pprint
import re
import sys
class Log():
def __init__(self, logfile_abspath):
self._file_path = logfile_abspath
self.latency = []
self.bandwidth = []
# regex matches the latency and bandwidth lines in the log files
self._regex = '.*[0-9]+\.[0-9]+.*[0-9]\.[0-9].*'
def open(self):
try:
self._file_handle = open(self._file_path, 'r')
except:
sys.exit('failed to open: ' + self._file_path)
def parse(self):
for line in self._file_handle:
if re.match(self._regex, line):
entries = line.split()
self.latency.append(round(float(entries[0]), 4))
self.bandwidth.append(round(float(entries[1]), 4))
class Pair():
def __init__(self, first_logfile_abspath, second_logfile_abspath):
self.first = Log(first_logfile_abspath)
self.first.open()
self.first.parse()
self.second = Log(second_logfile_abspath)
self.second.open()
self.second.parse()
def _ratio(self, a, b):
diff = [round((x - y), 4) for x, y in zip(a, b)]
ratio = []
for numerator, denominator in zip(diff, a):
try:
ratio.append(round(numerator / denominator, 4))
except:
ratio.append(float(0.0000))
return ratio
def _percent(self, ratio):
perc = ['{0:.2%}'.format(x) for x in ratio]
return perc
def _percentage_difference(self, a, b):
ratio = self._ratio(a, b)
percent = self._percent(ratio)
return percent
def calculate_differences(self):
self.latency_percentage_differences = \
self._percentage_difference(self.first.latency,
self.second.latency)
self.bandwidth_percentage_differences = \
self._percentage_difference(self.first.bandwidth,
self.second.bandwidth)
def dump(self):
delim = ', '
output = '\tlatency:'
output += '\n\t\t'
output += delim.join(map(str, self.first.latency))
output += '\n\t\t'
output += delim.join(map(str, self.second.latency))
output += '\n\t\t'
output += delim.join(map(str, self.latency_percentage_differences))
output += '\n\tbandwidth:'
output += '\n\t\t'
output += delim.join(map(str, self.first.bandwidth))
output += '\n\t\t'
output += delim.join(map(str, self.second.bandwidth))
output += '\n\t\t'
output += delim.join(map(str, self.bandwidth_percentage_differences))
return output
class Tracker():
def __init__(self, args, archives):
self._args = args
self._archives = archives
self._data = {}
def add(self, changeset, most_recent_build_id):
archive_path = self._archives.path_of_build(most_recent_build_id)
for filename in self._args.logs:
abs_file_path = archive_path + '/' + filename
log = Log(abs_file_path)
log.open()
log.parse()
key = (changeset, filename)
self._data[key] = log
def dump(self):
out_str = ''
for key in self._data.keys():
log = self._data[key]
line_str = pprint.pformat(key, width=120)
line_str += ' = '
line_str += pprint.pformat(log.latency, width=120)
line_str += '\n'
out_str += line_str
return out_str
@@ -0,0 +1,85 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import argparse
class Parser():
def __init__(self):
# A parent directory containing log file output from one of the
# configuration runs. The output directories are intended to
# be symmetric in naming with the various configurations supplied
# by the library's build_configs.
self._default_config = 'RC_SINGLE'
# The list of log files which need to be checked for performance
# differences.
self._default_logs = ['get.log',
'get_nbi.log',
'get_swarm.log',
'put.log',
'put_nbi.log']
# The maximum pairwise difference for the log file latencies.
self._default_latency_max = 5.0
# The minimum bandwidth difference for the log file bandwidths.
self._default_bandwidth_min = -50.0
# The Jenkins tester archives slave output on the master's
# filesystem which currently uses this top-level path (as the
# resperf account).
self._default_jenkins_path = \
'/proj/radl_extra/users/resperf/jenkins-2.192/'
# The performance tester runs as part of the 'short' job to
# verify that no performance degradation has occurred between
# commits. This archive path is the generic archive path
# for all of the builds. The Kleene star is used as a place
# holder for the Jenkins build number.
self._default_archive_path = \
'jobs/shmem_short/builds/*/archive/'
# The default benchmark path can be used to alter archive
# output placement. Currently, this is initialized to an empty
# string, but subsequently initialized to inject the config
# path.
self._default_benchmark_path = ''
def setup_options(self, argparser):
argparser.add_argument('-j',
dest='jenkins_path',
default=self._default_jenkins_path)
argparser.add_argument('-a',
dest='archive_path',
default=self._default_archive_path)
argparser.add_argument('-b',
dest='benchmark_path',
default=self._default_benchmark_path)
argparser.add_argument('-c',
dest='config',
default=self._default_config)
argparser.add_argument('-l',
dest='logs',
nargs='*',
default=self._default_logs)
argparser.add_argument('-x',
dest='latency_max',
type=float,
default=self._default_latency_max)
argparser.add_argument('-y',
dest='bandwidth_min',
type=float,
default=self._default_bandwidth_min)
argparser.add_argument('-o',
dest='one_changeset')
argparser.add_argument('-r',
dest='changeset_range',
nargs=2,
metavar=("most_recent_changeset", "least_recent_changeset"))
return argparser
def parse_command_line(self):
p = argparse.ArgumentParser()
p = self.setup_options(p)
args = p.parse_args()
args.benchmark_path = args.config + args.benchmark_path
return args
@@ -0,0 +1,30 @@
<?xml version='1.1' encoding='UTF-8'?>
<project>
<actions/>
<description>shmem performance delta checker</description>
<keepDependencies>false</keepDependencies>
<properties/>
<scm class="hudson.scm.NullSCM"/>
<assignedNode>master</assignedNode>
<canRoam>false</canRoam>
<disabled>false</disabled>
<blockBuildWhenDownstreamBuilding>false</blockBuildWhenDownstreamBuilding>
<blockBuildWhenUpstreamBuilding>false</blockBuildWhenUpstreamBuilding>
<triggers/>
<concurrentBuild>false</concurrentBuild>
<builders>
<hudson.tasks.Shell>
<command>/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c &quot;RC_SINGLE&quot; -x &quot;15.0&quot; -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c &quot;RC_MULTI&quot; -x &quot;15.0&quot; -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log get_swarm.log
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c &quot;DC_SINGLE&quot; -x &quot;15.0&quot; -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c &quot;DC_MULTI&quot; -x &quot;15.0&quot; -l put.log put_nbi.log get.log get_nbi.log ping_pong.log get_swarm.log
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c &quot;RO_NET_BASIC&quot; -x &quot;75.0&quot; -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
</command>
<configuredLocalRules/>
</hudson.tasks.Shell>
</builders>
<publishers/>
<buildWrappers>
<hudson.plugins.timestamper.TimestamperBuildWrapper plugin="timestamper@1.12"/>
</buildWrappers>
</project>
@@ -0,0 +1,35 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import parser
import dictionary
import archive_path
import plotter
def main():
p = parser.Parser()
args = p.parse_command_line()
archives = archive_path.Archive(args)
archives.generate()
print(archives.dump())
build_to_changeset = dictionary.BuildToChangesetDict()
build_to_changeset.generate(archives.dirs)
print(build_to_changeset.dump())
changeset_to_build = dictionary.ChangesetToBuildDict()
changeset_to_build.generate(build_to_changeset)
print(changeset_to_build.dump())
plot = plotter.Plot(args,
archives,
changeset_to_build)
# either plot with all the changesets or the slice provided
plot.changeset_slice()
if (args.one_changeset):
plot.one_changeset_plot()
if __name__ == '__main__':
main()
@@ -0,0 +1,230 @@
#!/usr/bin/env Rscript
# load the required libraries:
library(tidyverse)
library(RColorBrewer)
library(optparse)
# declare some helper functions
ggpreview <- function (..., device = "png") {
fname <- tempfile(fileext = paste0(".", device))
ggplot2::ggsave(filename = fname, device = device, ...)
system2("open", fname)
invisible(NULL)
}
set_right_order <- function(df) {
# reverse the order of the rows so that oldest commit is first
df <- df %>% map_df(rev)
# ensure that ggplot plots the x-axis in the right order
df$Commit <- factor(df$Commit, levels = unique(df$Commit))
return(df)
}
plot_and_save <- function(df, xval, yval, title, subtitle, xlabel, filename) {
p <- ggplot(df, aes_string(x=xval, y=yval, group=1)) +
geom_line(size = 0.5, color=mycolors[1]) +
geom_point(size = 1.5, alpha = 1, color=mycolors[2]) +
theme_minimal() +
expand_limits(y=0) +
xlab(xlabel) +
ggtitle(title, subtitle = subtitle) +
theme(
axis.text.x = element_text(angle=90,hjust=1),
axis.title.y = element_blank()
) +
scale_fill_manual(values = mycolors)
#ggpreview(width=7.5, height=5, units="in", dpi=500)
ggsave(filename, p, device=pdf, dpi=500)
}
## Set up options ##
option_list = list(
make_option(c("-o", "--output"), type="character", default=NULL, action="store",
help="path (without trailing /) to a folder that will
contain the plots", metavar="folder-path"),
make_option(c("-a", "--changeset_a"), type="character", default=NULL, action="store",
help="beginning (inclusive) changeset of slice", metavar="changeset"),
make_option(c("-b", "--changeset_b"), type="character", default=NULL, action="store",
help="ending (inclusive) changeset of slice", metavar="changeset"),
make_option(c("-c", "--one_changeset"), type="character", default=NULL, action="store",
help="if set, will prepare plots for one changeset; if not, plots for a changeset slice")
)
## SCRIPT START ##
# parse the options
opt_parser <- OptionParser(option_list=option_list)
opts <- parse_args(opt_parser)
if (is.null(opts$output)) {
print_help(opt_parser)
stop("Please set the --output flag.", call.=FALSE)
}
slice_opt = 0
single_opt = 0
if (!is.null(opts$changeset_a) && !is.null(opts$changeset_b)) {
slice_opt = 1
}
if (!is.null(opts$one_changeset)) {
single_opt = 1
}
if ( (slice_opt && single_opt) || (!slice_opt && !single_opt) ) {
stop("Please supply a slice or a single changeset, not both.", call.= FALSE)
}
# choose color palette
mycolors <- brewer.pal(5, "Set2")
if (length(opts$one_changeset) > 0) {
## Plotting data for a single changeset ##
# read the files
non_amo <- read.csv("non_amo_one_changeset.csv", header=TRUE)
amo <- read.csv("amo_one_changeset.csv", header=TRUE)
ping_pong <- read.csv("ping_pong_one_changeset.csv", header=TRUE)
# ensure that ggplot plots the x-axis in the right order
non_amo$size <- factor(non_amo$size, levels = unique(non_amo$size))
amo$op <- factor(amo$op, levels = unique(amo$op))
# plot
non_amo_ops <- list("put","put_nbi","get","get_nbi")
for (op in non_amo_ops) {
plot_and_save(df=non_amo,
xval="size",
yval=op,
title=op,
subtitle="Latency (us)",
xlabel="Message size (bytes)",
filename=paste(opts$output,"/",op,"_changeset_",opts$one_changeset,".pdf", sep="")
)
}
# prepare data for plots with fixed message size and ops as x axis
non_amo$bsize <- paste("b",non_amo$size,sep="") # (so that the columns in non_amo_t start with a character)
non_amo_t <- setNames(data.frame(t(non_amo[,2:5])), non_amo[,6]) # transpose + set column names
non_amo_t$op <- colnames(non_amo[,2:5]) # make a column with operation names
sizes <- colnames(non_amo_t[,-(length(colnames(non_amo_t)))])
for (size in sizes) {
plot_and_save(df=non_amo_t,
xval="op",
yval=size,
title=paste(sub('.', '', size),"byte"),
subtitle="Latency (us)",
xlabel="Operation",
filename=paste(opts$output,"/",size,"_changeset_",opts$one_changeset,".pdf", sep="")
)
}
plot_and_save(df=amo,
xval="op",
yval="latency",
title="Atomics",
subtitle="Latency (us)",
xlabel="Operation",
filename=paste(opts$output,"/atomic_changeset_",opts$one_changeset,".pdf", sep="")
)
ping_pong$type <- c("ping_pong")
p<-ggplot(ping_pong, aes(x=type, y=latency, fill=type)) +
geom_bar(stat="identity", width=0.5) +
theme_minimal() +
ggtitle("Ping pong", subtitle = "Latency (us)") +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "none"
) +
coord_flip() +
scale_fill_manual(values = mycolors)
#ggpreview(width=7.5, height=5, units="in", dpi=500)
ggsave(paste(opts$output,"/ping_pong_changeset_",opts$one_changeset,".pdf", sep=""), p, device=pdf, dpi=500)
} else {
## Plotting across a changeset slice ##
# read the files
put <- read.csv("put.csv", header=TRUE)
put_nbi <- read.csv("put_nbi.csv", header=TRUE)
get <- read.csv("get.csv", header=TRUE)
get_nbi <- read.csv("get_nbi.csv", header=TRUE)
amo <- read.csv("amo.csv", header=TRUE)
ping_pong <- read.csv("ping_pong.csv", header=TRUE)
# slice out the commits
start <- match(c(opts$changeset_a), put$Commit)
end <- match(c(opts$changeset_b), put$Commit)
# (start and end should be the same for all the frames) #
put <- put[start:end,]
put_nbi <- put_nbi[start:end,]
get <- get[start:end,]
get_nbi <- get_nbi[start:end,]
amo <- amo[start:end,]
ping_pong <- ping_pong[start:end,]
put <- set_right_order(put)
put_nbi <- set_right_order(put_nbi)
get <- set_right_order(get)
get_nbi <- set_right_order(get_nbi)
amo <- set_right_order(amo)
ping_pong <- set_right_order(ping_pong)
# plot
non_amo_ops <- list("put","put_nbi","get","get_nbi")
sizes_to_subtitle_map <- list("b1"="1 byte",
"b2"="2 bytes",
"b4"="4 bytes",
"b8"="8 bytes",
"b16"="16 bytes",
"b32"="32 bytes",
"b64"="64 bytes",
"b128"="128 bytes",
"b256"="256 bytes",
"b512"="512 bytes",
"b1024"="1024 bytes",
"b2048"="2048 bytes",
"b4096"="4096 bytes",
"b8192"="8192 bytes",
"b16384"="16384 bytes",
"b32768"="32768 bytes")
for (op in non_amo_ops) {
for (size in names(sizes_to_subtitle_map)) {
plot_and_save(df=eval(parse(text=op)),
xval="Commit",
yval=size,
title=op,
subtitle=paste("Latency (us) for ",sizes_to_subtitle_map[[size]],sep=""),
xlabel="Commit (older to newer)",
filename=paste(opts$output,"/",op,"_",size,".pdf", sep="")
)
}
}
amo_ops <- list("add","cswap","fadd","fcswap","fetch","finc","inc")
for (op in amo_ops) {
plot_and_save(df=amo,
xval="Commit",
yval=op,
title=op,
subtitle="Latency (us)",
xlabel="Commit (older to newer)",
filename=paste(opts$output,"/",op,".pdf", sep="")
)
}
plot_and_save(df=ping_pong,
xval="Commit",
yval="latency",
title="ping_pong",
subtitle="Latency (us)",
xlabel="Commit (older to newer)",
filename=paste(opts$output,"/","ping_pong.pdf", sep="")
)
}
## SCRIPT END ##
@@ -0,0 +1,295 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import dictionary
import log
#import matplotlib.pyplot
import numpy
import csv
import os
import subprocess
import sys
class Plot():
def __init__(self, args, archives, changeset_to_build):
self._args = args
self._archives = archives
self._changelog = dictionary.ChangelogToMostRecentBuild()
self._changelog.generate(changeset_to_build)
print(self._changelog.dump())
def abbreviate_changesets(self, changesets):
return [changeset[0:8] for changeset in changesets]
@staticmethod
def write_dict_to_file(tracker, field_names, file_name):
with open(file_name, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=field_names)
writer.writeheader()
writer.writerows(tracker)
@staticmethod
def check_and_add_to_dict(dictionary, key, array):
if len(array) > 0:
dictionary[key] = array[0]
else:
dictionary[key] = 0
def changeset_slice(self):
self._log_tracker = log.Tracker(self._args, self._archives)
for changeset in self._changelog._all_changesets:
if changeset in self._changelog.data.keys():
build_id = self._changelog.data[changeset]
self._log_tracker.add(changeset, build_id)
print(self._log_tracker.dump())
"""
separate out dictionaries based on operation
and prepare them in a format that works with
the csv module
"""
put_tracker = []
put_nbi_tracker = []
get_tracker = []
get_nbi_tracker = []
amo_tracker = []
ping_pong_tracker = []
prev_commit = list(self._log_tracker._data.keys())[0][0]
amo_dict = {}
for key, value in self._log_tracker._data.items():
if (key[1] == "put.log"):
put_tracker.append({'Commit':key[0][0:7],
'b1':value.latency[0],
'b2':value.latency[1],
'b4':value.latency[2],
'b8':value.latency[3],
'b16':value.latency[4],
'b32':value.latency[5],
'b64':value.latency[6],
'b128':value.latency[7],
'b256':value.latency[8],
'b512':value.latency[9],
'b1024':value.latency[10],
'b2048':value.latency[11],
'b4096':value.latency[12],
'b8192':value.latency[13],
'b16384':value.latency[14],
'b32768':value.latency[15]
})
if (key[1] == "put_nbi.log"):
put_nbi_tracker.append({'Commit':key[0][0:7],
'b1':value.latency[0],
'b2':value.latency[1],
'b4':value.latency[2],
'b8':value.latency[3],
'b16':value.latency[4],
'b32':value.latency[5],
'b64':value.latency[6],
'b128':value.latency[7],
'b256':value.latency[8],
'b512':value.latency[9],
'b1024':value.latency[10],
'b2048':value.latency[11],
'b4096':value.latency[12],
'b8192':value.latency[13],
'b16384':value.latency[14],
'b32768':value.latency[15]
})
if (key[1] == "get.log"):
get_tracker.append({'Commit':key[0][0:7],
'b1':value.latency[0],
'b2':value.latency[1],
'b4':value.latency[2],
'b8':value.latency[3],
'b16':value.latency[4],
'b32':value.latency[5],
'b64':value.latency[6],
'b128':value.latency[7],
'b256':value.latency[8],
'b512':value.latency[9],
'b1024':value.latency[10],
'b2048':value.latency[11],
'b4096':value.latency[12],
'b8192':value.latency[13],
'b16384':value.latency[14],
'b32768':value.latency[15]
})
if (key[1] == "get_nbi.log"):
get_nbi_tracker.append({'Commit':key[0][0:7],
'b1':value.latency[0],
'b2':value.latency[1],
'b4':value.latency[2],
'b8':value.latency[3],
'b16':value.latency[4],
'b32':value.latency[5],
'b64':value.latency[6],
'b128':value.latency[7],
'b256':value.latency[8],
'b512':value.latency[9],
'b1024':value.latency[10],
'b2048':value.latency[11],
'b4096':value.latency[12],
'b8192':value.latency[13],
'b16384':value.latency[14],
'b32768':value.latency[15]
})
if (key[1] == "ping_pong.log"):
ping_pong_tracker.append({'Commit':key[0][0:7],
'latency':value.latency[0]
})
# check to see if we have moved to a new commit
# if we have, store the dict in the amo_tracker
if (key[0] != prev_commit):
amo_dict['Commit'] = prev_commit[0:7]
amo_tracker.append(amo_dict.copy())
amo_dict.clear()
prev_commit = key[0]
if (key[1] == "amo_add.log"):
self.check_and_add_to_dict(amo_dict, 'add', value.latency)
if (key[1] == "amo_cswap.log"):
self.check_and_add_to_dict(amo_dict, 'cswap', value.latency)
if (key[1] == "amo_fadd.log"):
self.check_and_add_to_dict(amo_dict, 'fadd', value.latency)
if (key[1] == "amo_fcswap.log"):
self.check_and_add_to_dict(amo_dict, 'fcswap', value.latency)
if (key[1] == "amo_fetch.log"):
self.check_and_add_to_dict(amo_dict, 'fetch', value.latency)
if (key[1] == "amo_finc.log"):
self.check_and_add_to_dict(amo_dict, 'finc', value.latency)
if (key[1] == "amo_inc.log"):
self.check_and_add_to_dict(amo_dict, 'inc', value.latency)
# store the last commit's amo data
amo_dict['Commit'] = prev_commit[0:7]
amo_tracker.append(amo_dict.copy())
# write put results into a file:
size_field_names= ['Commit','b1','b2','b4','b8','b16','b32','b64','b128','b256','b512','b1024','b2048','b4096','b8192','b16384','b32768']
amo_field_names= ['Commit','add','cswap','fadd','fcswap','fetch','finc','inc']
ping_pong_field_names= ['Commit','latency']
self.write_dict_to_file(put_tracker, size_field_names, "put.csv")
self.write_dict_to_file(put_nbi_tracker, size_field_names, "put_nbi.csv")
self.write_dict_to_file(get_tracker, size_field_names, "get.csv")
self.write_dict_to_file(get_nbi_tracker, size_field_names, "get_nbi.csv")
self.write_dict_to_file(amo_tracker, amo_field_names, "amo.csv")
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong.csv")
# make a directory and execute the R script to generate plots in that directory
current_dir = os.getcwd()
plot_dir = os.path.join(current_dir, 'plots')
if not os.path.exists(plot_dir):
os.makedirs(plot_dir)
changeset_a = list(self._log_tracker._data.keys())[0][0]
changeset_b = list(self._log_tracker._data.keys())[-1][0]
# check if the provided changesets are correct
if (self._args.changeset_range):
found_changeset_a = False
found_changeset_b = False
for key, value in self._log_tracker._data.items():
if (found_changeset_a and found_changeset_b):
break
if (not found_changeset_a):
if (self._args.changeset_range[0] == key[0]):
found_changeset_a = True
if (not found_changeset_b):
if (self._args.changeset_range[1] == key[0]):
found_changeset_b = True
if ((not found_changeset_a) and (not found_changeset_b)):
sys.exit("One of the specified changesets was not found. Please specify correct/complete commit IDs.")
else:
changeset_a = self._args.changeset_range[0]
changeset_b = self._args.changeset_range[1]
r_command = "Rscript ./plotter.R -o ./plots -a " + changeset_a[0:7] + " -b " + changeset_b[0:7]
print(r_command)
subprocess.check_call(r_command, shell=True)
def one_changeset_plot(self):
found_changeset = 0
non_amo_tracker = []
amo_tracker = []
ping_pong_tracker = []
for key, value in self._log_tracker._data.items():
if (key[0] == self._args.one_changeset):
found_changeset = 1
if (key[1] == "put.log"):
put_vals = value.latency
if (key[1] == "put_nbi.log"):
put_nbi_vals = value.latency
if (key[1] == "get.log"):
get_vals = value.latency
if (key[1] == "get_nbi.log"):
get_nbi_vals = value.latency
if (key[1] == "amo_add.log"):
amo_tracker.append({'op':'add',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_add.log"):
amo_tracker.append({'op':'add',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_cswap.log"):
amo_tracker.append({'op':'cswap',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_fadd.log"):
amo_tracker.append({'op':'fadd',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_fcswap.log"):
amo_tracker.append({'op':'fcswap',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_fetch.log"):
amo_tracker.append({'op':'fetch',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_finc.log"):
amo_tracker.append({'op':'finc',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "amo_inc.log"):
amo_tracker.append({'op':'inc',
'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (key[1] == "ping_pong.log"):
ping_pong_tracker.append({'latency': value.latency[0] if len(value.latency) > 0 else 0
})
if (not found_changeset):
sys.exit("The requested changeset was not found. Please specify correct/complete commit IDs.")
index = 0
for size in [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]:
non_amo_tracker.append({'size':size,
'put':put_vals[index],
'put_nbi':put_nbi_vals[index],
'get':get_vals[index],
'get_nbi':get_nbi_vals[index]
})
index = index + 1
# write results into a file:
non_amo_field_names= ['size','put','put_nbi','get','get_nbi']
amo_field_names= ['op','latency']
ping_pong_field_names= ['latency']
self.write_dict_to_file(non_amo_tracker, non_amo_field_names, "non_amo_one_changeset.csv")
self.write_dict_to_file(amo_tracker, amo_field_names, "amo_one_changeset.csv")
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong_one_changeset.csv")
# call the R script with an option that tells it to plot figures for
r_command = "Rscript ./plotter.R -o ./plots -c " + self._args.one_changeset
print(r_command)
subprocess.check_call(r_command, shell=True)
@@ -0,0 +1,21 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import sys
class Report():
def __init__(self, identifier, path, filename):
self._identifier = identifier
self._path = path
self._filename = filename
def open(self):
print('opening report for ' + self._identifier)
try:
report_path = self._path + '/' + self._filename
print('report_path: ' + report_path)
self._file_handle = open(report_path, 'w')
except:
sys.exit('failed to open report: ' + report_path)
def record(self, message):
self._file_handle.write(message + '\n')
@@ -0,0 +1,96 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-definition plugin="workflow-job@2.40">
<actions>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
<jobProperties/>
<triggers/>
<parameters/>
<options/>
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
</actions>
<description></description>
<keepDependencies>false</keepDependencies>
<properties>
<org.jenkinsci.plugins.workflow.job.properties.DisableConcurrentBuildsJobProperty/>
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
<triggers>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
<spec></spec>
<gerritProjects>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
<compareType>PLAIN</compareType>
<pattern>rsch/ec/shmem</pattern>
<branches>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
<compareType>PLAIN</compareType>
<pattern>amd-master</pattern>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
</branches>
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
</gerritProjects>
<dynamicGerritProjects class="empty-list"/>
<skipVote>
<onSuccessful>false</onSuccessful>
<onFailed>false</onFailed>
<onUnstable>false</onUnstable>
<onNotBuilt>false</onNotBuilt>
<onAborted>false</onAborted>
</skipVote>
<silentMode>false</silentMode>
<enableTopicAssociation>false</enableTopicAssociation>
<notificationLevel></notificationLevel>
<silentStartMode>false</silentStartMode>
<escapeQuotes>true</escapeQuotes>
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
<dependencyJobsNames>shmem_perf_check, </dependencyJobsNames>
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
<commentTextParameterMode>BASE64</commentTextParameterMode>
<buildStartMessage></buildStartMessage>
<buildFailureMessage></buildFailureMessage>
<buildSuccessfulMessage></buildSuccessfulMessage>
<buildUnstableMessage></buildUnstableMessage>
<buildNotBuiltMessage></buildNotBuiltMessage>
<buildAbortedMessage></buildAbortedMessage>
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
<customUrl></customUrl>
<serverName>amd-gerrit</serverName>
<triggerOnEvents>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
<commentAddedCommentContains>!SHORT</commentAddedCommentContains>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
</triggerOnEvents>
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
<triggerConfigURL></triggerConfigURL>
<triggerInformationAction/>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
</triggers>
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
</properties>
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
<configVersion>2</configVersion>
<userRemoteConfigs>
<hudson.plugins.git.UserRemoteConfig>
<name>origin</name>
<refspec>${GERRIT_REFSPEC}</refspec>
<url>ssh://gerritgit/rsch/ec/shmem</url>
</hudson.plugins.git.UserRemoteConfig>
</userRemoteConfigs>
<branches>
<hudson.plugins.git.BranchSpec>
<name>FETCH_HEAD</name>
</hudson.plugins.git.BranchSpec>
</branches>
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
<submoduleCfg class="empty-list"/>
<extensions/>
</scm>
<scriptPath>internal/continuous_integration/short/Jenkinsfile</scriptPath>
<lightweight>false</lightweight>
</definition>
<triggers/>
<disabled>false</disabled>
</flow-definition>
@@ -0,0 +1,34 @@
#!/tool/pandora64/.package/python-3.8.0/bin/python3
import pprint
import report
import sys
class Threshold():
def __init__(self, maximum_threshold, violation_type):
self._violations = {}
self._maximum_threshold = maximum_threshold
self._violation_type = violation_type
def check(self, value, changeset, filename):
if value >= self._maximum_threshold:
key = changeset + '|' + filename + '|' + self._violation_type
self._violations[key] = value
print(key + ': ' + str(value) + '%')
def provide_violations_to_report(self, report):
if self.has_violations():
report.record('FAILURE')
report.record(self.dump())
sys.exit(1)
else:
report.record('SUCCESS')
sys.exit(0)
def has_violations(self):
return bool(self._violations)
def dump(self):
str_out = pprint.pformat(self._violations, width=120)
str_out += '\n'
return str_out
@@ -0,0 +1,151 @@
pipeline {
agent { label 'sv-pdp-5' }
environment {
HSA_FORCE_FINE_GRAIN_PCIE = 1
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
PATH = "$MPI_HOME/bin:$PATH"
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
}
stages {
stage('Synchronize Source Code') {
steps {
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
}
}
stage('Make Build Directory') {
steps {
dir("library") {
sh "mkdir -p ${build_dir}"
}
}
}
stage('Build Source Code') {
parallel {
stage('RC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi_wf_coal install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
}
}
}
stage('RC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/RC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/rc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
}
}
}
stage('DC_SINGLE') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_SINGLE") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_single install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
}
}
}
stage('DC_MULTI') {
steps {
//===================== LIBRARY =======================
dir("library/${build_dir}/DC_MULTI") {
sh 'mkdir -p install'
sh '../../../build_configs/dc_multi install'
}
//===================== CLIENT ========================
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
}
}
}
}
}
stage('Run Tests') {
parallel {
stage('RC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
}
}
}
stage('RC_MULTI_WF_COAL') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
}
}
}
stage('RC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
}
}
}
stage('DC_SINGLE') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
}
}
}
stage('DC_MULTI') {
steps {
dir("clients/functional_tests") {
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
}
}
}
stage('RO_NET_BASIC') {
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
steps {
dir("clients/functional_tests") {
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
}
}
}
}
}
}
}
@@ -0,0 +1,85 @@
<?xml version='1.1' encoding='UTF-8'?>
<flow-definition plugin="workflow-job@2.40">
<actions/>
<description></description>
<keepDependencies>false</keepDependencies>
<properties>
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
<triggers>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
<spec></spec>
<gerritProjects>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
<compareType>PLAIN</compareType>
<pattern>rsch/ec/shmem</pattern>
<branches>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
<compareType>PLAIN</compareType>
<pattern>amd-master</pattern>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
</branches>
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
</gerritProjects>
<dynamicGerritProjects class="empty-list"/>
<skipVote>
<onSuccessful>true</onSuccessful>
<onFailed>true</onFailed>
<onUnstable>true</onUnstable>
<onNotBuilt>true</onNotBuilt>
<onAborted>true</onAborted>
</skipVote>
<silentMode>false</silentMode>
<enableTopicAssociation>false</enableTopicAssociation>
<notificationLevel></notificationLevel>
<silentStartMode>false</silentStartMode>
<escapeQuotes>true</escapeQuotes>
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
<dependencyJobsNames></dependencyJobsNames>
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
<commentTextParameterMode>BASE64</commentTextParameterMode>
<buildStartMessage></buildStartMessage>
<buildFailureMessage></buildFailureMessage>
<buildSuccessfulMessage></buildSuccessfulMessage>
<buildUnstableMessage></buildUnstableMessage>
<buildNotBuiltMessage></buildNotBuiltMessage>
<buildAbortedMessage></buildAbortedMessage>
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
<customUrl></customUrl>
<serverName>amd-gerrit</serverName>
<triggerOnEvents>
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
<commentAddedCommentContains>!SMOKE</commentAddedCommentContains>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
</triggerOnEvents>
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
<triggerConfigURL></triggerConfigURL>
<triggerInformationAction/>
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
</triggers>
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
</properties>
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
<configVersion>2</configVersion>
<userRemoteConfigs>
<hudson.plugins.git.UserRemoteConfig>
<url>ssh://gerritgit/rsch/ec/shmem</url>
</hudson.plugins.git.UserRemoteConfig>
</userRemoteConfigs>
<branches>
<hudson.plugins.git.BranchSpec>
<name>FETCH_HEAD</name>
</hudson.plugins.git.BranchSpec>
</branches>
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
<submoduleCfg class="empty-list"/>
<extensions/>
</scm>
<scriptPath>internal/continuous_integration/smoke/Jenkinsfile</scriptPath>
<lightweight>false</lightweight>
</definition>
<triggers/>
<disabled>false</disabled>
</flow-definition>
@@ -0,0 +1,47 @@
#! /usr/bin/python
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import os
suffixes = [ '.cpp', '.hpp', '.c', '.h' ]
directories = [ 'src', 'include' ]
def oksuffix(f):
for s in suffixes:
if f.endswith(s):
return True
return False
def try_index_dir(directory):
for dirpath,subdirs,files in os.walk(os.path.join(cwd, directory)):
okfiles = [f for f in files if oksuffix(f)]
if okfiles:
print >> file_list, \
'\n'.join([os.path.join(dirpath, f) for f in okfiles])
file_list = file('cscope.files', 'w')
cwd = os.getcwd()
for d in directories:
try_index_dir(d)
file_list.close()
os.system("cscope -b")
@@ -0,0 +1,105 @@
HIPCC=hipcc
BUILD=./build
SRC=./src
RESULTS=./results
#rocshmem_DIR=${HOME}/rocshmem
#MPI_HOME=${HOME}/mpich/install
NCCL_HOME=${HOME}/rccl/build
MPI_FLAGS=-lmpi -lhsa-runtime64 -lrt -L${MPI_HOME}/lib -fgpu-rdc
SHMEM_FLAGS=${MPI_FLAGS} -lmlx5 -libverbs
RCCL_FLAGS=${MPI_FLAGS} -Wl,-rpath,$(NCCL_HOME) -L${NCCL_HOME} -lrccl
.SILENT: run_scan extract_scan run_sort run_sort_shmem run_sort_rccl extract_sort
all: ${BUILD}/sort_shmem ${BUILD}/sort_rccl ${BUILD}/sort_mpi
${BUILD}/sort_shmem: ${BUILD}/sort_shmem.o ${rocshmem_DIR}/lib/librocshmem.a
${HIPCC} $^ ${SHMEM_FLAGS} -o $@
${BUILD}/sort_shmem.o: ${SRC}/sort_shmem.cu
${HIPCC} $^ -I${rocshmem_DIR}/include -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
${BUILD}/sort_rccl: ${BUILD}/sort_rccl.o
${HIPCC} $^ ${RCCL_FLAGS} -o $@
${BUILD}/sort_rccl.o: ${SRC}/sort_rccl.cu
${HIPCC} $^ -I$(NCCL_HOME)/include/rccl -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
${BUILD}/sort_mpi: ${BUILD}/sort_mpi.o
${HIPCC} $^ ${MPI_FLAGS} -o $@
${BUILD}/sort_mpi.o: ${SRC}/sort_mpi.cu
${HIPCC} $^ -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
RO_FLAGS=ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1
ITERS?=0 1 2 3 4 5 6 7 8 9
TIMEOUT=1m
HOSTS=sv-pdp-0,sv-pdp-1,sv-pdp-2,sv-pdp-3
SCAN_SIZE=1024
PES=2 4 8 12 16
PES_RCCL=2 4 8
TYPE ?= Naive
LABEL ?= naive
PARAM ?= 0
NUM_PES ?= 2
run_sort_shmem: ${BUILD}/sort_shmem
printf "${TYPE} ";\
echo "" > ${RESULTS}/sort_${LABEL}_${NUM_PES}.out; \
for j in ${ITERS}; do \
${RO_FLAGS} timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_shmem ${PARAM} >> ${RESULTS}/sort_${LABEL}_${NUM_PES}.out;\
done;
run_sort_rccl: ${BUILD}/sort_rccl
printf "RCCL "; \
echo "" > ${RESULTS}/sort_rccl_${NUM_PES}.out; \
for j in ${ITERS}; do \
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_rccl >> ${RESULTS}/sort_rccl_${NUM_PES}.out;\
done;
run_sort_mpi: ${BUILD}/sort_rccl
printf "MPI2 "; \
echo "" > ${RESULTS}/sort_mpi2_${NUM_PES}.out; \
for j in ${ITERS}; do \
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_mpi >> ${RESULTS}/sort_mpi2_${NUM_PES}.out;\
done;
run_sort: ${BUILD}/sort_shmem ${BUILD}/sort_rccl
for i in ${PES}; do \
printf "%d " $$i; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=NAIVE LABEL=naive PARAM=0 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=MPI LABEL=mpi PARAM=1 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN LABEL=gcen PARAM=2 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN2 LABEL=gcen2 PARAM=3 NUM_PES=$${i}; \
$(MAKE) --no-print-directory run_sort_mpi NUM_PES=$${i}; \
printf "\n";\
done
for i in ${PES_RCCL}; do \
$(MAKE) --no-print-directory run_sort_rccl NUM_PES=$${i}; \
printf "%d " $$i; \
done
$(MAKE) extract_sort
extract_sort:
printf "Sort latency\n"
printf "PROCS\tType\tRuns"
for i in ${PES}; do \
for type in mpi mpi2 rccl naive gcen gcen2; do\
printf "\n%d\t$${type}\t" $$i; \
file=${RESULTS}/sort_$${type}_$${i}.out;\
latency=$$(grep -E "Avg time" $${file}); \
grep -E "Avg time" $${file} | while read -r j; do\
val=$$(echo $$j | grep -oE -m1 "[0-9]+\.[0-9]+");\
printf "%s\t" $${val};\
done; \
done;\
done
printf "\n"
clean:
rm build/*;
@@ -0,0 +1,70 @@
#include <chrono>
#include <iostream>
#include <stdio.h>
#include <mpi.h>
#include <unistd.h>
#include <hip/hip_runtime.h>
using namespace std;
#define TIME_NOW std::chrono::steady_clock::now()
#define TIME_DIFF(a, b) std::chrono::duration_cast<std::chrono::nanoseconds>(a - b).count()
#define HIPCHECK(cmd) do { \
hipError_t e = cmd; \
if( e != hipSuccess ) { \
printf("Failed: Hip error %s:%d '%s'\n", \
__FILE__,__LINE__,hipGetErrorString(e)); \
exit(EXIT_FAILURE); \
} \
} while(0)
#define NCCLCHECK(cmd) do { \
ncclResult_t r = cmd; \
if (r!= ncclSuccess) { \
printf("Failed, NCCL error %s:%d '%s'\n", \
__FILE__,__LINE__,ncclGetErrorString(r)); \
exit(EXIT_FAILURE); \
} \
} while(0)
// Copied from rccl-tests, used to hash hostname
static uint64_t getHash(const char* string, size_t n) {
// Based on DJB2a, result = result * 33 ^ char
uint64_t result = 5381;
for (size_t c = 0; c < n; c++) {
result = ((result << 5) + result) ^ string[c];
}
return result;
}
/* Generate a hash of the unique identifying string for this host
* that will be unique for both bare-metal and container instances
* Equivalent of a hash of;
*
* $(hostname)$(cat /proc/sys/kernel/random/boot_id)
*
*/
#define HOSTID_FILE "/proc/sys/kernel/random/boot_id"
static uint64_t getHostHash(const char* hostname) {
char hostHash[1024];
// Fall back is the hostname if something fails
(void) strncpy(hostHash, hostname, sizeof(hostHash));
int offset = strlen(hostHash);
FILE *file = fopen(HOSTID_FILE, "r");
if (file != NULL) {
char *p;
if (fscanf(file, "%ms", &p) == 1) {
strncpy(hostHash+offset, p, sizeof(hostHash)-offset-1);
free(p);
}
}
fclose(file);
// Make sure the string is terminated
hostHash[sizeof(hostHash)-1]='\0';
return getHash(hostHash, strlen(hostHash));
}
@@ -0,0 +1,231 @@
/*************************************************************************
* *
* N A S P A R A L L E L B E N C H M A R K S 3.3 *
* *
* I S *
* *
*************************************************************************
* *
* This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
* It is described in NAS Technical Report 95-020. *
* *
* Permission to use, copy, distribute and modify this software *
* for any purpose with or without fee is hereby granted. We *
* request, however, that all derived work reference the NAS *
* Parallel Benchmarks 3.3. This software is provided "as is" *
* without express or implied warranty. *
* *
* Information on NPB 3.3, including the technical report, the *
* original specifications, source code, results and information *
* on how to submit new results, is available at: *
* *
* http://www.nas.nasa.gov/Software/NPB *
* *
* Send comments or suggestions to npb@nas.nasa.gov *
* Send bug reports to npb-bugs@nas.nasa.gov *
* *
* NAS Parallel Benchmarks Group *
* NASA Ames Research Center *
* Mail Stop: T27A-1 *
* Moffett Field, CA 94035-1000 *
* *
* E-mail: npb@nas.nasa.gov *
* Fax: (650) 604-3957 *
* *
*************************************************************************
* *
* Author: M. Yarrow *
* H. Jin *
* *
*************************************************************************/
#define NUM_WGS 1
#define WG_SIZE 1024
#define MAX_PES 128
#define MAX_KEY (1 << 11)
/*
* FUNCTION RANDLC (X, A)
*
* This routine returns a uniform pseudorandom double precision number in the
* range (0, 1) by using the linear congruential generator
*
* x_{k+1} = a x_k (mod 2^46)
*
* where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
* before repeating. The argument A is the same as 'a' in the above formula,
* and X is the same as x_0. A and X must be odd double precision integers
* in the range (1, 2^46). The returned value RANDLC is normalized to be
* between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
* the new seed x_1, so that subsequent calls to RANDLC using the same
* arguments will generate a continuous sequence.
*
* This routine should produce the same results on any computer with at least
* 48 mantissa bits in double precision floating point data. On Cray systems,
* double precision should be disabled.
*
* David H. Bailey October 26, 1990
*
* IMPLICIT DOUBLE PRECISION (A-H, O-Z)
* SAVE KS, R23, R46, T23, T46
* DATA KS/0/
*
* If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46,
* T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than
* by merely using the ** operator, in order to insure that the results are
* exact on all systems. This code assumes that 0.5D0 is represented exactly.
*/
/*****************************************************************/
/************* R A N D L C ************/
/************* ************/
/************* portable random number generator ************/
/*****************************************************************/
double randlc( double *X, double *A )
{
static int KS=0;
static double R23, R46, T23, T46;
double T1, T2, T3, T4;
double A1;
double A2;
double X1;
double X2;
double Z;
int i, j;
if (KS == 0)
{
R23 = 1.0;
R46 = 1.0;
T23 = 1.0;
T46 = 1.0;
for (i=1; i<=23; i++)
{
R23 = 0.50 * R23;
T23 = 2.0 * T23;
}
for (i=1; i<=46; i++)
{
R46 = 0.50 * R46;
T46 = 2.0 * T46;
}
KS = 1;
}
/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */
T1 = R23 * *A;
j = T1;
A1 = j;
A2 = *A - T23 * A1;
/* Break X into two parts such that X = 2^23 * X1 + X2, compute
Z = A1 * X2 + A2 * X1 (mod 2^23), and then
X = 2^23 * Z + A2 * X2 (mod 2^46). */
T1 = R23 * *X;
j = T1;
X1 = j;
X2 = *X - T23 * X1;
T1 = A1 * X2 + A2 * X1;
j = R23 * T1;
T2 = j;
Z = T1 - T23 * T2;
T3 = T23 * Z + A2 * X2;
j = R46 * T3;
T4 = j;
*X = T3 - T46 * T4;
return(R46 * *X);
}
/*****************************************************************/
/************ F I N D _ M Y _ S E E D ************/
/************ ************/
/************ returns parallel random number seq seed ************/
/*****************************************************************/
/*
* Create a random number sequence of total length nn residing
* on np number of processors. Each processor will therefore have a
* subsequence of length nn/np. This routine returns that random
* number which is the first random number for the subsequence belonging
* to processor rank kn, and which is used as seed for proc kn ran # gen.
*/
double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */
int np, /* np = num procs */
long nn, /* total num of ran numbers, all procs */
double s, /* Ran num seed, for ex.: 314159265.00 */
double a ) /* Ran num gen mult, try 1220703125.00 */
{
long i;
double t1,t2,t3,an;
long mq,nq,kk,ik;
nq = nn / np;
for( mq=0; nq>1; mq++,nq/=2 )
;
t1 = a;
for( i=1; i<=mq; i++ )
t2 = randlc( &t1, &t1 );
an = t1;
kk = kn;
t1 = s;
t2 = an;
for( i=1; i<=100; i++ )
{
ik = kk / 2;
if( 2 * ik != kk )
t3 = randlc( &t1, &t2 );
if( ik == 0 )
break;
t3 = randlc( &t2, &t2 );
kk = ik;
}
return( t1 );
}
/*****************************************************************/
/************* C R E A T E _ S E Q ************/
/*****************************************************************/
void create_seq( double seed, double a, int *key_array, int size )
{
double x;
int i, k;
k = MAX_KEY/4;
for (i=0; i < size; i++)
{
x = randlc(&seed, &a);
x += randlc(&seed, &a);
x += randlc(&seed, &a);
x += randlc(&seed, &a);
key_array[i] = k*x;
}
}
@@ -0,0 +1,380 @@
#include "mpi.h"
#include "common.h"
#include "sort.h"
//#define TIME_PERF
#ifdef TIME_PERF
#define TIMERS 10
__device__ uint64_t timers[TIMERS] = {0};
__device__ uint64_t time_start;
#define TIMERS_START() \
if(threadIdx.x == 0) {\
time_start = roc_shmem_timer();\
}
#define TIME(TIMER_NUM) \
if(threadIdx.x == 0) {\
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
time_start = roc_shmem_timer();\
}
#define OUTPUT_TIME() \
if(threadIdx.x == 0 && my_pe == 0) { \
uint64_t sum = 0; \
for(int i = 0; i < TIMERS; ++i) { \
sum += timers[i]; \
} \
for(int i = 0; i < TIMERS; ++i) { \
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
} \
}
#else
#define TIMERS_START()
#define TIME(x)
#define OUTPUT_TIME()
#endif
__global__ void sort1(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int n_pes, int my_pe) {
__shared__ int bucketCounter[MAX_PES];
__shared__ int bucketPtr[MAX_PES];
__shared__ int total_size;
int buckets = n_pes;
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
const int K_PER_BUCK = (MAX_KEY / buckets);
// Reset
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
bucketCounter[i] = 0;
bucketPtr[i] = 0;
}
__syncthreads();
TIMERS_START()
// Count size of each bucket
for(int i = tid; i < size; i += blockDim.x) {
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
}
__syncthreads();
TIME(0)
// Update in global memory
for(int i = tid; i < buckets; i += blockDim.x) {
sendCount[i] = bucketPtr[i] = bucketCounter[i];
}
__syncthreads();
TIME(1)
// Perform local scan to get ptrs set
for(int shift = 1; shift < buckets; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < buckets) {
temp = bucketPtr[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < buckets) {
bucketPtr[threadIdx.x] += temp;
}
__syncthreads();
}
__syncthreads();
TIME(2)
// Find offsets of where we're sending
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
sendOffset[i] = bucketPtr[i] - sendCount[i];
}
// Sort keys into buckets
for(int i = threadIdx.x; i < size; i += blockDim.x) {
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
keyBuffer1[loc] = keys[i];
}
TIME(3)
OUTPUT_TIME()
}
__global__ void sort2(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int n_pes, int my_pe) {
__shared__ int total_size;
int buckets = n_pes;
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
const int K_PER_BUCK = (MAX_KEY / buckets);
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
outputKeys[i] = 0;
__syncthreads();
TIME(5)
int min_key_val = my_pe * K_PER_BUCK;
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
int *key_buff_ptr = outputKeys - min_key_val;
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
}
__syncthreads();
TIME(6)
// Perform local scan on keys
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
temp = outputKeys[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < K_PER_BUCK) {
outputKeys[threadIdx.x] += temp;
}
__syncthreads();
}
TIME(7)
OUTPUT_TIME()
}
void sort(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int max_iters) {
int nProcs, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
for(int iter = 0; iter < max_iters; ++iter) {
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
keyBuffer2, sendCount, recvCount, sendOffset,
recvOffset, outputKeys, size, nProcs, my_pe);
HIPCHECK(hipStreamSynchronize(stream));
MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1,
MPI_INT, MPI_COMM_WORLD);
MPI_Alltoall(sendOffset, 1, MPI_INT, recvOffset, 1,
MPI_INT, MPI_COMM_WORLD);
int total_size = 0;
MPI_Request *req = new MPI_Request[2 * nProcs];
const int TAG = 10000;
for(int i = 0; i < nProcs; ++i) {
MPI_Isend(&keyBuffer1[sendOffset[i]], sendCount[i],
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i]);
MPI_Irecv(&keyBuffer2[total_size], recvCount[i],
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i + 1]);
total_size += recvCount[i];
}
MPI_Waitall(2 * nProcs, req, MPI_STATUS_IGNORE);
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
keyBuffer2, sendCount, recvCount, sendOffset,
recvOffset, outputKeys, size, nProcs, my_pe);
}
}
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
{
int num_pes, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
MPI_Status status;
MPI_Request request;
int min_key_val = my_pe * (MAX_KEY / num_pes);
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
int *key_array = new int[size];
// Perform final untimed sort on keys
for(int i = 0; i < size; ++i)
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
else {
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
return false;
}
if(size < 1)
size = 1;
int k;
const int MPI_TAG = 1000;
// Check if largest key is smaller than next processor's
if(my_pe > 0)
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
&request);
if(my_pe < num_pes - 1)
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
MPI_COMM_WORLD );
if(my_pe > 0)
MPI_Wait(&request, &status);
// Check if it is smaller
int j = 0;
if( my_pe > 0 && size > 1 )
if( k > key_array[0] )
j++;
// Check if keys correctly sorted
for(int i = 1; i < size; i++)
if(key_array[i - 1] > key_array[i])
j++;
delete[] key_array;
if(j != 0) {
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
my_pe, j );
return false;
}
return true;
}
void initGPU()
{
// Calculation for local rank, taken from rccl-tests
int localRank = 0;
int nProcs, proc;
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
char hostname[1024];
gethostname(hostname, 1024);
for (int i=0; i< 1024; i++) {
if (hostname[i] == '.') {
hostname[i] = '\0';
break;
}
}
uint64_t hostHashs[nProcs];
hostHashs[proc] = getHostHash(hostname);
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
for (int p=0; p<nProcs; p++) {
if (p == proc) break;
if (hostHashs[p] == hostHashs[proc]) localRank++;
}
/***
* Select a GPU
*/
int ndevices, my_device=0;
hipGetDeviceCount (&ndevices);
my_device = localRank % ndevices;
hipSetDevice(my_device);
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
}
void *roc_shmem_malloc(size_t size)
{
void *v;
hipMalloc((void **)&v, size);
return v;
}
int roc_shmem_free(void *v)
{
return hipFree(v);
}
int main(int argc, char *argv[])
{
if(argc < 1) {
printf("Format: %s [iterations]\n", argv[0]);
return -1;
}
// Init stuff
MPI_Init(&argc, &argv);
initGPU();
int iterations = 1000;
if(argc > 1)
iterations = atoi(argv[1]);
int num_pes, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
// Configure input and outputs
size_t size = 1024; //atoi(argv[1]);
int *keys, *outputKeys;
hipMalloc((void**)&keys, sizeof(int) * size);
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
/* Generate random number sequence and subsequent keys on all procs */
create_seq( find_my_seed( my_pe,
num_pes,
4*(long)size*num_pes,
314159265.00, /* Random number gen seed */
1220703125.00 ), /* Random number gen mult */
1220703125.00, keys, size ); /* Random number gen mult */
// Init buffers
int *keyBuffer1, *keyBuffer2;
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
printf("Begin untimed run\n");
// Untimed run
MPI_Barrier(MPI_COMM_WORLD);
sort((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, 1);
hipDeviceSynchronize();
printf("Verify untimed run\n");
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
printf("Begin timed run\n");
// Timed run
MPI_Barrier(MPI_COMM_WORLD);
auto time_start = TIME_NOW;
sort((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, iterations);
hipDeviceSynchronize();
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
double all_time = 0;
MPI_Allreduce(&tot_time, &all_time, 1,
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
if(my_pe == 0) {
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
}
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
fprintf(stderr, "Done verify for %d\n", my_pe);
// Clean up
hipFree(keys);
hipFree(outputKeys);
roc_shmem_free(keyBuffer1);
roc_shmem_free(keyBuffer2);
roc_shmem_free(sendCount);
roc_shmem_free(recvCount);
roc_shmem_free(sendOffset);
roc_shmem_free(recvOffset);
MPI_Finalize();
return 0;
}
@@ -0,0 +1,394 @@
#include "rccl.h"
#include "common.h"
#include "sort.h"
//#define TIME_PERF
#ifdef TIME_PERF
#define TIMERS 10
__device__ uint64_t timers[TIMERS] = {0};
__device__ uint64_t time_start;
#define TIMERS_START() \
if(threadIdx.x == 0) {\
time_start = roc_shmem_timer();\
}
#define TIME(TIMER_NUM) \
if(threadIdx.x == 0) {\
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
time_start = roc_shmem_timer();\
}
#define OUTPUT_TIME() \
if(threadIdx.x == 0 && my_pe == 0) { \
uint64_t sum = 0; \
for(int i = 0; i < TIMERS; ++i) { \
sum += timers[i]; \
} \
for(int i = 0; i < TIMERS; ++i) { \
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
} \
}
#else
#define TIMERS_START()
#define TIME(x)
#define OUTPUT_TIME()
#endif
__global__ void sort1(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int n_pes, int my_pe) {
__shared__ int bucketCounter[MAX_PES];
__shared__ int bucketPtr[MAX_PES];
__shared__ int total_size;
int buckets = n_pes;
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
const int K_PER_BUCK = (MAX_KEY / buckets);
// Reset
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
bucketCounter[i] = 0;
bucketPtr[i] = 0;
}
__syncthreads();
TIMERS_START()
// Count size of each bucket
for(int i = tid; i < size; i += blockDim.x) {
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
}
__syncthreads();
TIME(0)
// Update in global memory
for(int i = tid; i < buckets; i += blockDim.x) {
sendCount[i] = bucketPtr[i] = bucketCounter[i];
}
__syncthreads();
TIME(1)
// Perform local scan to get ptrs set
for(int shift = 1; shift < buckets; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < buckets) {
temp = bucketPtr[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < buckets) {
bucketPtr[threadIdx.x] += temp;
}
__syncthreads();
}
__syncthreads();
TIME(2)
// Find offsets of where we're sending
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
sendOffset[i] = bucketPtr[i] - sendCount[i];
}
// Sort keys into buckets
for(int i = threadIdx.x; i < size; i += blockDim.x) {
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
keyBuffer1[loc] = keys[i];
}
TIME(3)
OUTPUT_TIME()
}
__global__ void sort2(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int n_pes, int my_pe) {
__shared__ int total_size;
int buckets = n_pes;
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
const int K_PER_BUCK = (MAX_KEY / buckets);
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
outputKeys[i] = 0;
__syncthreads();
TIME(5)
int min_key_val = my_pe * K_PER_BUCK;
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
int *key_buff_ptr = outputKeys - min_key_val;
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
}
__syncthreads();
TIME(6)
// Perform local scan on keys
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
temp = outputKeys[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < K_PER_BUCK) {
outputKeys[threadIdx.x] += temp;
}
__syncthreads();
}
TIME(7)
OUTPUT_TIME()
}
void sort(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, int max_iters, ncclComm_t comm) {
int nProcs, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
for(int iter = 0; iter < max_iters; ++iter) {
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
keyBuffer2, sendCount, recvCount, sendOffset,
recvOffset, outputKeys, size, nProcs, my_pe);
NCCLCHECK(ncclAllToAll(sendCount, recvCount, 1,
ncclInt, comm, stream));
NCCLCHECK(ncclAllToAll(sendOffset, recvOffset, 1,
ncclInt, comm, stream));
HIPCHECK(hipStreamSynchronize(stream));
NCCLCHECK(ncclGroupStart());
int total_size = 0;
for(int i = 0; i < nProcs; ++i) {
ncclSend(&keyBuffer1[sendOffset[i]], sendCount[i],
ncclInt, i, comm, stream);
ncclRecv(&keyBuffer2[total_size], recvCount[i],
ncclInt, i, comm, stream);
total_size += recvCount[i];
}
NCCLCHECK(ncclGroupEnd());
HIPCHECK(hipStreamSynchronize(stream));
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
keyBuffer2, sendCount, recvCount, sendOffset,
recvOffset, outputKeys, size, nProcs, my_pe);
HIPCHECK(hipStreamSynchronize(stream));
}
}
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
{
int num_pes, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
MPI_Status status;
MPI_Request request;
int min_key_val = my_pe * (MAX_KEY / num_pes);
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
int *key_array = new int[size];
// Perform final untimed sort on keys
for(int i = 0; i < size; ++i)
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
else {
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
return false;
}
if(size < 1)
size = 1;
int k;
const int MPI_TAG = 1000;
// Check if largest key is smaller than next processor's
if(my_pe > 0)
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
&request);
if(my_pe < num_pes - 1)
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
MPI_COMM_WORLD );
if(my_pe > 0)
MPI_Wait(&request, &status);
// Check if it is smaller
int j = 0;
if( my_pe > 0 && size > 1 )
if( k > key_array[0] )
j++;
// Check if keys correctly sorted
for(int i = 1; i < size; i++)
if(key_array[i - 1] > key_array[i])
j++;
delete[] key_array;
if(j != 0) {
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
my_pe, j );
return false;
}
return true;
}
void initGPU(ncclComm_t &comms)
{
// Calculation for local rank, taken from rccl-tests
int localRank = 0;
int nProcs, proc;
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
char hostname[1024];
gethostname(hostname, 1024);
for (int i=0; i< 1024; i++) {
if (hostname[i] == '.') {
hostname[i] = '\0';
break;
}
}
uint64_t hostHashs[nProcs];
hostHashs[proc] = getHostHash(hostname);
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
for (int p=0; p<nProcs; p++) {
if (p == proc) break;
if (hostHashs[p] == hostHashs[proc]) localRank++;
}
/***
* Select a GPU
*/
int ndevices, my_device=0;
hipGetDeviceCount (&ndevices);
my_device = localRank % ndevices;
hipSetDevice(my_device);
ncclUniqueId ncclId;
if (proc == 0) {
NCCLCHECK(ncclGetUniqueId(&ncclId));
}
MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, MPI_COMM_WORLD);
MPI_Barrier(MPI_COMM_WORLD);
#ifdef RCCL_MULTIRANKPERGPU
NCCLCHECK(ncclCommInitRankMulti(&comms, nProcs, ncclId, proc, proc));
#else
NCCLCHECK(ncclCommInitRank(&comms, nProcs, ncclId, proc));
#endif
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
}
void *roc_shmem_malloc(size_t size)
{
void *v;
hipMalloc((void **)&v, size);
return v;
}
int roc_shmem_free(void *v)
{
return hipFree(v);
}
int main(int argc, char *argv[])
{
if(argc < 1) {
printf("Format: %s [iterations]\n", argv[0]);
return -1;
}
// Init stuff
MPI_Init(&argc, &argv);
ncclComm_t comms;
initGPU(comms);
int iterations = 1000;
if(argc > 1)
iterations = atoi(argv[1]);
int num_pes, my_pe;
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
// Configure input and outputs
size_t size = 1024; //atoi(argv[1]);
int *keys, *outputKeys;
hipMalloc((void**)&keys, sizeof(int) * size);
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
/* Generate random number sequence and subsequent keys on all procs */
create_seq( find_my_seed( my_pe,
num_pes,
4*(long)size*num_pes,
314159265.00, /* Random number gen seed */
1220703125.00 ), /* Random number gen mult */
1220703125.00, keys, size ); /* Random number gen mult */
// Init buffers
int *keyBuffer1, *keyBuffer2;
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
printf("Begin untimed run\n");
// Untimed run
MPI_Barrier(MPI_COMM_WORLD);
sort((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, 1, comms);
hipDeviceSynchronize();
printf("Verify untimed run\n");
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
printf("Begin timed run\n");
// Timed run
MPI_Barrier(MPI_COMM_WORLD);
auto time_start = TIME_NOW;
sort((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, iterations, comms);
hipDeviceSynchronize();
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
double all_time = 0;
MPI_Allreduce(&tot_time, &all_time, 1,
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
if(my_pe == 0) {
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
}
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
// Clean up
hipFree(keys);
hipFree(outputKeys);
roc_shmem_free(keyBuffer1);
roc_shmem_free(keyBuffer2);
roc_shmem_free(sendCount);
roc_shmem_free(recvCount);
roc_shmem_free(sendOffset);
roc_shmem_free(recvOffset);
ncclCommDestroy(comms);
MPI_Finalize();
return 0;
}
@@ -0,0 +1,358 @@
#include <iostream>
#include <stdio.h>
#include <mpi.h>
#include <roc_shmem/roc_shmem.hpp>
#include <unistd.h>
using namespace std;
using namespace rocshmem;
#include "common.h"
#include "sort.h"
//#define TIME_PERF
#ifdef TIME_PERF
#define TIMERS 10
__device__ uint64_t timers[TIMERS] = {0};
__device__ uint64_t time_start;
#define TIMERS_START() \
if(threadIdx.x == 0) {\
time_start = roc_shmem_timer();\
}
#define TIME(TIMER_NUM) \
if(threadIdx.x == 0) {\
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
time_start = roc_shmem_timer();\
}
#define OUTPUT_TIME() \
if(threadIdx.x == 0 && my_pe == 0) { \
uint64_t sum = 0; \
for(int i = 0; i < TIMERS; ++i) { \
sum += timers[i]; \
} \
for(int i = 0; i < TIMERS; ++i) { \
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
} \
}
#else
#define TIMERS_START()
#define TIME(x)
#define OUTPUT_TIME()
#endif
__device__ __inline__ void alltoall(roc_shmem_ctx_t &ctx,
roc_shmem_team_t team,
int *dst, int *src) {
// Perform alltoall
roc_shmem_ctx_int_wg_alltoall(ctx,
team,
dst, // T* dest
src, // const T* source
1); // int nelement
}
__global__ void sort(volatile int *keys, int *keyBuffer1,
int *keyBuffer2, int *sendCount,
int *recvCount, int *sendOffset,
int *recvOffset, int *outputKeys,
size_t size, roc_shmem_team_t team,
int max_iters) {
__shared__ roc_shmem_ctx_t ctx;
__shared__ int bucketCounter[MAX_PES];
__shared__ int bucketPtr[MAX_PES];
__shared__ int total_size;
roc_shmem_wg_init();
roc_shmem_wg_ctx_create(ROC_SHMEM_CTX_WG_PRIVATE, &ctx);
int n_pes = roc_shmem_ctx_n_pes(ctx);
int my_pe = roc_shmem_my_pe();
int buckets = n_pes;
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
const int K_PER_BUCK = (MAX_KEY / buckets);
for(int iter = 0; iter < max_iters; ++iter) {
// Reset
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
bucketCounter[i] = 0;
bucketPtr[i] = 0;
}
__syncthreads();
TIMERS_START()
// Count size of each bucket
for(int i = tid; i < size; i += blockDim.x) {
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
}
__syncthreads();
TIME(0)
// Update in global memory
for(int i = tid; i < buckets; i += blockDim.x) {
sendCount[i] = bucketPtr[i] = bucketCounter[i];
}
__syncthreads();
TIME(1)
// Perform local scan to get ptrs set
for(int shift = 1; shift < buckets; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < buckets) {
temp = bucketPtr[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < buckets) {
bucketPtr[threadIdx.x] += temp;
}
__syncthreads();
}
__syncthreads();
TIME(2)
// Find offsets of where we're sending
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
sendOffset[i] = bucketPtr[i] - sendCount[i];
}
// Sort keys into buckets
for(int i = threadIdx.x; i < size; i += blockDim.x) {
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
keyBuffer1[loc] = keys[i];
}
roc_shmem_ctx_threadfence_system(ctx);
// Force sync to wait for all PEs to update bucket sizes
roc_shmem_ctx_wg_team_sync(ctx, team);
TIME(3)
// Let all PEs know how many keys you wish to send
alltoall(ctx, team, recvCount, sendCount);
// Let all PEs know where the offsets are of the keys
alltoall(ctx, team, recvOffset, sendOffset);
__syncthreads();
TIME(4)
if(threadIdx.x == 0) {
total_size = 0;
for(int i = 0; i < buckets; ++i) {
roc_shmem_int_get_nbi(&keyBuffer2[total_size],
&keyBuffer1[recvOffset[i]], recvCount[i], i);
total_size += recvCount[i];
}
roc_shmem_quiet();
}
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
outputKeys[i] = 0;
__syncthreads();
TIME(5)
int min_key_val = my_pe * K_PER_BUCK;
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
int *key_buff_ptr = outputKeys - min_key_val;
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
}
__syncthreads();
TIME(6)
// Perform local scan on keys
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
int temp = 0;
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
temp = outputKeys[threadIdx.x - shift];
}
__syncthreads();
if(threadIdx.x < K_PER_BUCK) {
outputKeys[threadIdx.x] += temp;
}
__syncthreads();
}
TIME(7)
}
OUTPUT_TIME()
roc_shmem_wg_ctx_destroy(ctx);
roc_shmem_wg_finalize();
}
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
{
int num_pes = roc_shmem_n_pes();
int my_pe = roc_shmem_my_pe();
MPI_Status status;
MPI_Request request;
int min_key_val = my_pe * (MAX_KEY / num_pes);
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
int *key_array = new int[size];
// Perform final untimed sort on keys
for(int i = 0; i < size; ++i)
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
else {
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
return false;
}
if(size < 1)
size = 1;
int k;
const int MPI_TAG = 1000;
// Check if largest key is smaller than next processor's
if(my_pe > 0)
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
&request);
if(my_pe < num_pes - 1)
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
MPI_COMM_WORLD );
if(my_pe > 0)
MPI_Wait(&request, &status);
// Check if it is smaller
int j = 0;
if( my_pe > 0 && size > 1 )
if( k > key_array[0] )
j++;
// Check if keys correctly sorted
for(int i = 1; i < size; i++)
if(key_array[i - 1] > key_array[i])
j++;
delete[] key_array;
if(j != 0) {
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
my_pe, j );
return false;
}
return true;
}
void initGPU()
{
// Calculation for local rank, taken from rccl-tests
int localRank = 0;
int proc = roc_shmem_my_pe();
int nProcs = roc_shmem_n_pes();
char hostname[1024];
gethostname(hostname, 1024);
for (int i=0; i< 1024; i++) {
if (hostname[i] == '.') {
hostname[i] = '\0';
break;
}
}
uint64_t hostHashs[nProcs];
hostHashs[proc] = getHostHash(hostname);
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
for (int p=0; p<nProcs; p++) {
if (p == proc) break;
if (hostHashs[p] == hostHashs[proc]) localRank++;
}
/***
* Select a GPU
*/
int ndevices, my_device=0;
hipGetDeviceCount (&ndevices);
my_device = localRank % ndevices;
hipSetDevice(my_device);
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
fflush(stdout);
MPI_Barrier(MPI_COMM_WORLD);
}
int main(int argc, char *argv[])
{
// Init roc_shmem stuff
initGPU();
roc_shmem_init(NUM_WGS);
int n_pes = roc_shmem_team_n_pes(ROC_SHMEM_TEAM_WORLD);
roc_shmem_team_t team_world_dup = ROC_SHMEM_TEAM_INVALID;
roc_shmem_team_split_strided(ROC_SHMEM_TEAM_WORLD,
0,
1,
n_pes,
nullptr,
0,
&team_world_dup);
int iterations = 1000;
if(argc > 1)
iterations = atoi(argv[1]);
int num_pes = roc_shmem_n_pes();
int my_pe = roc_shmem_my_pe();
// Configure input and outputs
size_t size = 1024; //atoi(argv[2]);
int *keys, *outputKeys;
hipMalloc((void**)&keys, sizeof(int) * size);
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
/* Generate random number sequence and subsequent keys on all procs */
create_seq( find_my_seed( my_pe,
num_pes,
4*(long)size*num_pes,
314159265.00, /* Random number gen seed */
1220703125.00 ), /* Random number gen mult */
1220703125.00, keys, size ); /* Random number gen mult */
// Init buffers
int *keyBuffer1, *keyBuffer2;
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
int *sendCount, *recvCount, *sendOffset, *recvOffset;
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
// Untimed run
roc_shmem_barrier_all();
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, team_world_dup, 1);
hipDeviceSynchronize();
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
// Timed run
roc_shmem_barrier_all();
auto time_start = TIME_NOW;
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
sendCount, recvCount, sendOffset, recvOffset,
outputKeys, size, team_world_dup, iterations);
hipDeviceSynchronize();
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
double all_time = 0;
MPI_Allreduce(&tot_time, &all_time, 1,
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
if(my_pe == 0) {
printf("Avg time:\t%f\tus\n", all_time /
(double)(1000.0 * iterations * num_pes));
}
// Verify correctness
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
fprintf(stderr, "Wrong output\n");
return -1;
}
// Clean up
hipFree(keys);
hipFree(outputKeys);
roc_shmem_free(keyBuffer1);
roc_shmem_free(keyBuffer2);
roc_shmem_free(sendCount);
roc_shmem_free(recvCount);
roc_shmem_free(sendOffset);
roc_shmem_free(recvOffset);
roc_shmem_finalize();
return 0;
}
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=ON \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DDEBUG=ON \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=ON \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=ON \
-DUSE_IPC=ON \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=ON \
-DUSE_GPU_IB=ON \
-DUSE_DC=ON \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=ON \
-DUSE_IPC=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=ON \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,28 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_COHERENT_HEAP=OFF \
-DUSE_CACHED_HEAP=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DDEBUG=ON \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
-DUSE_MANAGED_HEAP=ON \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DDEBUG=ON \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
-DUSE_MANAGED_HEAP=ON \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DDEBUG=OFF \
-DPROFILE=ON \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,31 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=ON \
-DUSE_COHERENT_HEAP=OFF \
-DUSE_CACHED_HEAP=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
-DUSE_SINGLE_NODE=ON \
-DUSE_HOST_SIDE_HDP_FLUSH=ON\
-DROCM_PATH="/opt/rocm-5.4.2/"\
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,30 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=ON \
-DUSE_DC=OFF \
-DUSE_IPC=ON \
-DUSE_COHERENT_HEAP=OFF \
-DUSE_CACHED_HEAP=OFF \
-DUSE_THREADS=OFF \
-DUSE_WF_COAL=OFF \
-DUSE_SINGLE_NODE=ON \
-DUSE_HOST_SIDE_HDP_FLUSH=ON\
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=OFF \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
-DUSE_COHERENT_HEAP=ON \
$src_path
cmake --build . --parallel 8
cmake --install .
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
if [ -z $1 ]
then
install_path=~/rocshmem
else
install_path=$1
fi
src_path=$(dirname "$(realpath $0)")/../../
cmake \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_INSTALL_PREFIX=$install_path \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DDEBUG=OFF \
-DPROFILE=OFF \
-DUSE_GPU_IB=OFF \
-DUSE_DC=OFF \
-DUSE_IPC=OFF \
-DUSE_THREADS=ON \
-DUSE_WF_COAL=OFF \
-DUSE_COHERENT_HEAP=OFF \
$src_path
cmake --build . --parallel 8
# cmake --install .
@@ -0,0 +1,5 @@
gdb scripts allow launching rocshmem tests repeatedly with gdb
and dump backtrace on error
- gdbscript - consists of commands which are executed on gdb launch
- gdbrun - run script, launches test in loop with gdb enabled
e.g ./gdbrun 14 10 launches pingPong 10 times

برخی از فایل ها نشان داده نشدند زیرا تعداد زیادی فایل در این تفاوت تغییر کرده اند نمایش بیشتر