@@ -0,0 +1,13 @@
|
||||
# This is the list of ROCSHMEM's significant contributors.
|
||||
#
|
||||
# This does not necessarily list everyone who has contributed code,
|
||||
# especially since many employees of one corporation may be contributing.
|
||||
# To see the full list of contributors, see the revision history in
|
||||
# source control.
|
||||
Khaled Hamidouche
|
||||
Brandon Potter
|
||||
Michael LeBeane
|
||||
Rohit Zambre
|
||||
Kishore Punniyamurthy
|
||||
Ruchi Shah
|
||||
Muhammad A. Awad
|
||||
@@ -0,0 +1,12 @@
|
||||
Version 1.6.0 (20 June 2023)
|
||||
|
||||
* Support for Frontier and Crusher systems
|
||||
* Multithread support with Reverse Offload backend
|
||||
* Support for all AMO operations and types with Reverse Offload backend
|
||||
* Support for private context on global memory
|
||||
* Reduce LDS usage to improve occupancy
|
||||
* Code refactoring to reduce register file usage
|
||||
* Fix and enhance default context support
|
||||
* Fix and enhance IPC support
|
||||
* Fix race condition with multi-thread support
|
||||
* Add fine-grain locking schemes
|
||||
@@ -0,0 +1,432 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
|
||||
|
||||
###############################################################################
|
||||
# AVOID IN SOURCE BUILD
|
||||
###############################################################################
|
||||
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND CMAKE_CURRENT_SOURCE_DIR STREQUAL
|
||||
CMAKE_SOURCE_DIR)
|
||||
set(MSG "")
|
||||
message(STATUS "Warning! Building from the source directory is not recommended")
|
||||
message(STATUS "If unintended, please remove 'CMakeCache.txt' and 'CMakeFiles'")
|
||||
message(STATUS "and build from a separate directory")
|
||||
message(FATAL_ERROR "In-source build")
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# SYSTEM INFO
|
||||
###############################################################################
|
||||
cmake_host_system_information(RESULT LOCALHOST QUERY FQDN)
|
||||
message(STATUS "Hostname: ${LOCALHOST}")
|
||||
|
||||
###############################################################################
|
||||
# VERSIONING INFO DERIVED FROM FILE
|
||||
###############################################################################
|
||||
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" FULL_VERSION_STRING LIMIT_COUNT 1)
|
||||
string(REGEX REPLACE "(\n|\r)" "" FULL_VERSION_STRING "${FULL_VERSION_STRING}")
|
||||
set(ROCSHMEM_FULL_VERSION "${FULL_VERSION_STRING}")
|
||||
string(REGEX REPLACE "([0-9]+)\.([0-9]+)\.([0-9]+)(.*)" "\\1.\\2.\\3" ROCSHMEM_VERSION
|
||||
"${FULL_VERSION_STRING}")
|
||||
|
||||
###############################################################################
|
||||
# CONFIGURATION OPTIONS
|
||||
###############################################################################
|
||||
option(DEBUG "Enable debug trace" OFF)
|
||||
option(PROFILE "Enable statistics and timing support" OFF)
|
||||
option(USE_GPU_IB "Enable GPU_IB conduit. If off, RO_NET will be used" ON)
|
||||
option(USE_DC "Enable IB dynamically connected transport (DC)" OFF)
|
||||
option(USE_IPC "Enable IPC support (using HIP)" OFF)
|
||||
option(USE_THREADS "Enable workgroup threads to share network queues" OFF)
|
||||
option(USE_WF_COAL "Enable wavefront message coalescing" OFF)
|
||||
option(USE_COHERENT_HEAP "Enable support for coherent systems" OFF)
|
||||
option(USE_CACHED_HEAP "Enable support for cached systems" OFF)
|
||||
option(USE_MANAGED_HEAP "Enable managed memory" OFF)
|
||||
option(USE_HOST_HEAP "Enable host memory using malloc/free" OFF)
|
||||
option(USE_HIP_HOST_HEAP "Enable host memory using hip api" OFF)
|
||||
option(USE_FUNC_CALL "Force compiler to use function calls on library API" OFF)
|
||||
option(USE_SHARED_CTX "Request support for shared ctx between WG" OFF)
|
||||
option(USE_SINGLE_NODE "Enable single node support only." OFF)
|
||||
option(USE_HOST_SIDE_HDP_FLUSH "Use a polling thread to flush the HDP cache on the host." OFF)
|
||||
option(BUILD_FUNCTIONAL_TESTS "Build the functional tests" ON)
|
||||
option(BUILD_SOS_TESTS "Build the host-facing tests" OFF)
|
||||
option(BUILD_UNIT_TESTS "Build the unit tests" ON)
|
||||
|
||||
set(ROCM_PATH "" CACHE PATH "ROCm path to use")
|
||||
|
||||
configure_file(cmake/config.h.in config.h)
|
||||
|
||||
###############################################################################
|
||||
# Validate user passed options
|
||||
###############################################################################
|
||||
if(ROCM_PATH)
|
||||
set(HIPCONFIG_PATH "${ROCM_PATH}/bin/hipconfig")
|
||||
execute_process(
|
||||
COMMAND "${HIPCONFIG_PATH}" --version
|
||||
RESULT_VARIABLE HIPCONFIG_RETURN
|
||||
OUTPUT_VARIABLE ROCM_VERSION
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(HIPCONFIG_RETURN EQUAL 0)
|
||||
message(STATUS "Using ROCm with version: ${ROCM_VERSION}")
|
||||
else()
|
||||
message(FATAL_ERROR "ROCm at ${ROCM_PATH} is missing the hipconfig binary. "
|
||||
"Use -DROCM_PATH=/path/to/rocm or ensure ROCm's hipconfig is in your path.")
|
||||
endif()
|
||||
else()
|
||||
set(HIPCONFIG_PATH "hipconfig")
|
||||
execute_process(
|
||||
COMMAND "${HIPCONFIG_PATH}" --rocmpath
|
||||
RESULT_VARIABLE HIPCONFIG_RETURN
|
||||
OUTPUT_VARIABLE ROCM_PATH
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(HIPCONFIG_RETURN EQUAL 0)
|
||||
message(STATUS "Found ROCm at: ${ROCM_PATH}")
|
||||
else()
|
||||
message(FATAL_ERROR "Ensure hipconfig is in your path or use -DROCM_PATH=/path/to/rocm.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# GLOBAL COMPILE FLAGS
|
||||
###############################################################################
|
||||
if (NOT DEFINED CMAKE_CXX_COMPILER)
|
||||
set(CMAKE_CXX_COMPILER ${ROCM_PATH}/bin/hipcc)
|
||||
endif()
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb")
|
||||
|
||||
###############################################################################
|
||||
# MODULE SEARCH PATH
|
||||
###############################################################################
|
||||
set(
|
||||
CMAKE_MODULE_PATH
|
||||
${CMAKE_MODULE_PATH}
|
||||
"${CMAKE_SOURCE_DIR}/cmake/Modules"
|
||||
"{ROCM_PATH}/hip/cmake"
|
||||
"{ROCM_PATH}/rocclr/lib/cmake/rocclr"
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
###############################################################################
|
||||
project(
|
||||
rocshmem
|
||||
VERSION ${ROCSHMEM_VERSION}
|
||||
LANGUAGES CXX
|
||||
DESCRIPTION "ROCSHMEM"
|
||||
HOMEPAGE_URL "https://github.com/ROCm-Developer-Tools/ROC_SHMEM")
|
||||
|
||||
###############################################################################
|
||||
# DEFAULT BUILD TYPE
|
||||
###############################################################################
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
set(
|
||||
CMAKE_BUILD_TYPE
|
||||
"Release"
|
||||
CACHE
|
||||
STRING
|
||||
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
|
||||
FORCE
|
||||
)
|
||||
message(STATUS "CMAKE_BUILD_TYPE unspecified: using ${CMAKE_BUILD_TYPE}")
|
||||
endif()
|
||||
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
set(CMAKE_INSTALL_PREFIX
|
||||
"/opt/lib/rocshmem"
|
||||
CACHE PATH "default install path" FORCE)
|
||||
endif()
|
||||
message(STATUS "Installation path: ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
###############################################################################
|
||||
# CREATE ROCSHMEM LIBRARY
|
||||
###############################################################################
|
||||
add_library(
|
||||
${PROJECT_NAME}
|
||||
STATIC
|
||||
""
|
||||
)
|
||||
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
|
||||
|
||||
###############################################################################
|
||||
# INCLUDE DIRECTORIES
|
||||
###############################################################################
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> # CONFIG.H
|
||||
$<INSTALL_INTERFACE:include>
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# SPECIFY PUBLIC HEADER FILES
|
||||
###############################################################################
|
||||
set_target_properties(
|
||||
${PROJECT_NAME}
|
||||
PROPERTIES
|
||||
PUBLIC_HEADER
|
||||
"${CMAKE_BINARY_DIR}/config.h;${CMAKE_CURRENT_SOURCE_DIR}/include/roc_shmem/roc_shmem.hpp;${CMAKE_CURRENT_SOURCE_DIR}/include/roc_shmem/debug.hpp"
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# SUBDIRECTORY TARGETS
|
||||
###############################################################################
|
||||
add_subdirectory(src)
|
||||
add_subdirectory(docs)
|
||||
add_subdirectory(tests)
|
||||
|
||||
###############################################################################
|
||||
# HIP
|
||||
###############################################################################
|
||||
find_package(hip REQUIRED)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
hip::device
|
||||
hip::host
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# HSA-RUNTIME64
|
||||
###############################################################################
|
||||
find_package(hsa-runtime64 REQUIRED)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
hsa-runtime64::hsa-runtime64
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# PTHREADS
|
||||
###############################################################################
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
Threads::Threads
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# IBVERBS
|
||||
###############################################################################
|
||||
IF (USE_GPU_IB)
|
||||
find_package(Ibverbs REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
${IBVERBS_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
${IBVERBS_LIBRARIES}
|
||||
)
|
||||
ENDIF()
|
||||
|
||||
###############################################################################
|
||||
# MPI
|
||||
###############################################################################
|
||||
find_package(MPI REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
${MPI_CXX_HEADER_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PUBLIC
|
||||
${MPI_mpi_LIBRARY}
|
||||
${MPI_mpicxx_LIBRARY}
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# MODULEFILE
|
||||
###############################################################################
|
||||
set(MOD_INSTALL_PATH
|
||||
"${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles"
|
||||
CACHE STRING "Install path for modulefile")
|
||||
message(STATUS "Modulefile install path: ${MOD_INSTALL_PATH}")
|
||||
|
||||
set(moduleFileTemplate "${PROJECT_NAME}.lua.in")
|
||||
|
||||
configure_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/${moduleFileTemplate}
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
|
||||
@ONLY)
|
||||
|
||||
# Crusher
|
||||
if(LOCALHOST MATCHES ".*\.crusher\.olcf\.ornl\.gov")
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
message(STATUS "Using crusher-specific modulefile modification")
|
||||
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.crusher.mod mod_additions)
|
||||
file(
|
||||
APPEND
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
|
||||
${mod_additions})
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
endif()
|
||||
|
||||
# Frontier
|
||||
if(LOCALHOST MATCHES ".*\.frontier\.olcf\.ornl\.gov")
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
message(STATUS "Using frontier-specific modulefile modification")
|
||||
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.frontier.mod mod_additions)
|
||||
file(
|
||||
APPEND
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
|
||||
${mod_additions})
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
endif()
|
||||
|
||||
# hpcfund
|
||||
if(LOCALHOST MATCHES ".*\.hpcfund")
|
||||
list(APPEND CMAKE_MESSAGE_INDENT " ")
|
||||
message(STATUS "Using hpcfund-specific modulefile modification")
|
||||
file(READ ${PROJECT_SOURCE_DIR}/cmake/modfile.hpcfund.mod mod_additions)
|
||||
file(
|
||||
APPEND
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
|
||||
${mod_additions})
|
||||
list(POP_BACK CMAKE_MESSAGE_INDENT)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# INSTALL
|
||||
###############################################################################
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# Specify layout within the build directory
|
||||
set(
|
||||
CMAKE_ARCHIVE_OUTPUT_DIRECTORY
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
set(
|
||||
CMAKE_LIBRARY_OUTPUT_DIRECTORY
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}
|
||||
)
|
||||
set(
|
||||
CMAKE_RUNTIME_OUTPUT_DIRECTORY
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}
|
||||
)
|
||||
|
||||
# Offer alternatives for default installation subdirectories
|
||||
set(
|
||||
INSTALL_LIBDIR
|
||||
${CMAKE_INSTALL_LIBDIR} CACHE PATH
|
||||
"Installation directory for libraries"
|
||||
)
|
||||
set(
|
||||
INSTALL_BINDIR
|
||||
${CMAKE_INSTALL_BINDIR} CACHE PATH
|
||||
"Installation directory for executables"
|
||||
)
|
||||
set(
|
||||
INSTALL_INCLUDEDIR
|
||||
${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH
|
||||
"Installation directory for header files"
|
||||
)
|
||||
set(
|
||||
DEF_INSTALL_CMAKEDIR
|
||||
share/cmake/${PROJECT_NAME}
|
||||
)
|
||||
set(
|
||||
INSTALL_CMAKEDIR
|
||||
${DEF_INSTALL_CMAKEDIR} CACHE PATH
|
||||
"Installation directory for CMake files"
|
||||
)
|
||||
|
||||
install(
|
||||
TARGETS ${PROJECT_NAME} EXPORT ${PROJECT_NAME}Targets
|
||||
ARCHIVE
|
||||
DESTINATION ${INSTALL_LIBDIR}
|
||||
COMPONENT lib
|
||||
LIBRARY
|
||||
DESTINATION ${INSTALL_LIBDIR}
|
||||
COMPONENT lib
|
||||
PUBLIC_HEADER
|
||||
DESTINATION ${INSTALL_INCLUDEDIR}
|
||||
COMPONENT dev
|
||||
RUNTIME
|
||||
DESTINATION ${INSTALL_BINDIR}
|
||||
COMPONENT bin
|
||||
)
|
||||
|
||||
install(
|
||||
EXPORT
|
||||
${PROJECT_NAME}Targets
|
||||
FILE
|
||||
${PROJECT_NAME}Targets.cmake
|
||||
NAMESPACE
|
||||
${PROJECT_NAME}::
|
||||
DESTINATION
|
||||
${INSTALL_CMAKEDIR}
|
||||
COMPONENT
|
||||
dev
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||
DESTINATION
|
||||
${INSTALL_CMAKEDIR}
|
||||
)
|
||||
|
||||
install(
|
||||
FILES
|
||||
${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/modulefiles/${PROJECT_NAME}/${ROCSHMEM_FULL_VERSION}.lua
|
||||
DESTINATION ${MOD_INSTALL_PATH}/${PROJECT_NAME})
|
||||
|
||||
###############################################################################
|
||||
# PACKAGE
|
||||
###############################################################################
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
write_basic_package_version_file(
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||
VERSION
|
||||
${PROJECT_VERSION}
|
||||
COMPATIBILITY
|
||||
SameMajorVersion
|
||||
)
|
||||
|
||||
configure_package_config_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
INSTALL_DESTINATION
|
||||
${INSTALL_CMAKEDIR}
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
## How to fork from us
|
||||
|
||||
To keep our development fast and conflict free, we recommend you to [fork](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/fork) our repository and start your work from our `dev` branch in your private repository.
|
||||
|
||||
Afterwards, git clone your repository to your local machine. But that is not it! To keep track of the original develop repository, add it as another remote.
|
||||
|
||||
```
|
||||
git remote add mainline https://github.com/ROCm-Developer-Tools/ROC_SHMEM.git
|
||||
git checkout dev
|
||||
```
|
||||
|
||||
As always in git, start a new branch with
|
||||
|
||||
```
|
||||
git checkout -b topic-<yourFeatureName>
|
||||
```
|
||||
|
||||
and apply your changes there.
|
||||
|
||||
## How to contribute to ROCSHMEM
|
||||
|
||||
### Did you find a bug?
|
||||
|
||||
- Ensure the bug was not already reported by searching on GitHub under [Issues](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/issues).
|
||||
|
||||
- If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/issues/new).
|
||||
|
||||
### Did you write a patch that fixes a bug?
|
||||
|
||||
- Open a new GitHub [pull request](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/compare) with the patch.
|
||||
|
||||
- Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it.
|
||||
|
||||
- Ensure the PR is based on the `dev` branch of the ROCSHMEM GitHub repository.
|
||||
|
||||
- ROCSHMEM requires new commits to include a "Signed-off-by" token in the commit message (typically enabled via the `git commit -s` option), indicating your agreement to the projects's [Developer's Certificate of Origin](https://developercertificate.org/) and compatability with the project [LICENSE](https://github.com/ROCm-Developer-Tools/ROC_SHMEM/blob/main/LICENSE):
|
||||
|
||||
|
||||
> (a) The contribution was created in whole or in part by me and I
|
||||
> have the right to submit it under the open source license
|
||||
> indicated in the file; or
|
||||
>
|
||||
> (b) The contribution is based upon previous work that, to the best
|
||||
> of my knowledge, is covered under an appropriate open source
|
||||
> license and I have the right under that license to submit that
|
||||
> work with modifications, whether created in whole or in part
|
||||
> by me, under the same open source license (unless I am
|
||||
> permitted to submit under a different license), as indicated
|
||||
> in the file; or
|
||||
>
|
||||
> (c) The contribution was provided directly to me by some other
|
||||
> person who certified (a), (b) or (c) and I have not modified
|
||||
> it.
|
||||
>
|
||||
> (d) I understand and agree that this project and the contribution
|
||||
> are public and that a record of the contribution (including all
|
||||
> personal information I submit with it, including my sign-off) is
|
||||
> maintained indefinitely and may be redistributed consistent with
|
||||
> this project or the open source license(s) involved.
|
||||
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1 +1,194 @@
|
||||
# rocSHMEM-internal
|
||||
# ROCm OpenSHMEM (ROC_SHMEM)
|
||||
|
||||
The ROCm OpenSHMEM (ROC_SHMEM) runtime is part of an AMD Research
|
||||
initiative to provide a unified runtime for heterogeneous systems.
|
||||
ROC_SHMEM supports both host-centric (a traditional host-driven
|
||||
OpenSHMEM runtime) and GPU-centric networking (provided a GPU kernel
|
||||
the ability to perform network operations) through an
|
||||
OpenSHMEM-like interface. This intra-kernel networking simplifies application
|
||||
code complexity and enables more fine-grained communication/computation
|
||||
overlap than traditional host-driven networking.
|
||||
|
||||
ROC_SHMEM's primary target is heterogeneous computing; hence, for both
|
||||
CPU-centric and GPU-centric communications, ROC_SHMEM uses a single
|
||||
symmetric heap (SHEAP) that is allocated on GPU memories.
|
||||
|
||||
ROC_SHMEM's GPU-centric communication has two different backend designs.
|
||||
The backends primarily differ in their implementations of
|
||||
intra-kernel networking.
|
||||
|
||||
The first design will be referred to as the the GPU InfiniBand (GPU-IB)
|
||||
backend. This backend implements a lightweight InfiniBand verbs interface
|
||||
on the GPU. The GPU itself is responsible with building commands and ringing
|
||||
the doorbell on the NIC to send network commands. GPU-IB is the default and
|
||||
preferred backend design that offers the best performance.
|
||||
|
||||
The second design will be referred to as the Reverse Offload (RO) backend. With
|
||||
the RO backend, the GPU runtime forwards ROC_SHMEM networking operations to the
|
||||
host-side runtime, which calls into a traditional MPI or OpenSHMEM
|
||||
implementation. This forwarding of requests is transparent to the
|
||||
programmer, who only sees the GPU-side interface.
|
||||
|
||||
Both designs of the GPU-centric interface coexist seamlessly with the
|
||||
CPU-centric interface of the unified runtime. ROC_SHMEM ensures that CPU-centric
|
||||
updates to the SHEAP are consistent and visible to a GPU kernel that is executing
|
||||
in parallel to host-initiated communication.
|
||||
|
||||
## Limitations
|
||||
|
||||
ROC_SHMEM is an experimental prototype from AMD Research and not an official
|
||||
ROCm product. The software is provided as-is with no guarantees of support
|
||||
from AMD or AMD Research.
|
||||
|
||||
ROC_SHMEM base requirements:
|
||||
* ROCm version 4.3.1 onwards
|
||||
* May work with other versions, but not tested
|
||||
* AMD GFX9 GPUs (e.g.: MI25, Vega 56, Vega 64, MI50, MI60, MI100, Radeon VII)
|
||||
* AMD MI200 GPUs: To enable the support on MI200, please configure the library
|
||||
with USE_CACHED_HEAP
|
||||
* ROCm-aware MPI as described in
|
||||
[Building the Dependencies](#building-the-dependencies)
|
||||
* InfiniBand adaptor compatable with ROCm RDMA technology
|
||||
* UCX 1.6 or greater with ROCm support
|
||||
|
||||
ROC_SHMEM optional requirements
|
||||
* For Documentation:
|
||||
* Doxygen
|
||||
|
||||
ROC_SHMEM only supports HIP applications. There are no plans to port to
|
||||
OpenCL.
|
||||
|
||||
## Building and Installation
|
||||
|
||||
ROC_SHMEM uses the CMake build system. The CMakeLists file contains
|
||||
additional details about library options.
|
||||
|
||||
To create an out-of-source build:
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
Next, choose one configuration from the build_configs subdirectory. These
|
||||
scripts pass configuration options to CMake to setup canonical builds which
|
||||
are regularly tested:
|
||||
|
||||
../scripts/build_configs/dc_single
|
||||
../scripts/build_configs/dc_multi
|
||||
../scripts/build_configs/rc_single
|
||||
../scripts/build_configs/rc_multi
|
||||
../scripts/build_configs/rc_multi_wf_coal
|
||||
../scripts/build_configs/ro_net_basic
|
||||
|
||||
By default, the library is installed in `~/rocshmem`. You may provide a
|
||||
custom install path by supplying it as an argument. For example:
|
||||
|
||||
../scripts/build_configs/rc_single /path/to/install
|
||||
|
||||
## Compiling/linking and Running with ROC_SHMEM
|
||||
|
||||
ROC_SHMEM is built as a host and device side library that can be statically
|
||||
linked to your application during compilation using hipcc.
|
||||
|
||||
During the compilation of your application, include the ROC_SHMEM header files
|
||||
and the ROC_SHMEM library when using hipcc:
|
||||
|
||||
-I/path/to/rocshmem/install/include
|
||||
-L/path/to/rocshmem/install/lib -lrocshmem
|
||||
|
||||
NOTE: ROC_SHMEM depends on MPI for its host code. So, you will need to link
|
||||
to an MPI library. Since you must use the hipcc compiler, the arguments for
|
||||
MPI linkage must be added manually as opposed to using mpicc. Similary,
|
||||
ROC_SHMEM depends on Verbs for its device code. So, you will need to link
|
||||
to a Verbs library.
|
||||
|
||||
When using hipcc directly (as opposed to through a build system), we
|
||||
recommend performing the compilation and linking steps separately.
|
||||
Here are the steps to build a standalone program, say
|
||||
roc_shmem_hello.cpp.
|
||||
|
||||
```
|
||||
# Compile
|
||||
/opt/rocm/bin/hipcc ./roc_shmem_hello.cpp -I/path/to/rocshmem/install/include -fgpu-rdc -o ./roc_shmem_hello.o -c
|
||||
|
||||
# Link
|
||||
/opt/rocm/bin/hipcc ./roc_shmem_hello.o /path/to/rocshmem/install/lib/librocshmem.a -lmpi -lmlx5 -libverbs -lhsa-runtime64 -fgpu-rdc -o roc_shmem_hello
|
||||
|
||||
```
|
||||
|
||||
If your project uses cmake, please refer to the CMakeLists.txt files
|
||||
in the clients directory for examples. You may also find the
|
||||
[Using CMake with AMD ROCm](https://rocmdocs.amd.com/en/latest/conceptual/cmake-packages.html)
|
||||
page useful.
|
||||
|
||||
## Runtime Parameters
|
||||
|
||||
ROC_SHMEM_HEAP_SIZE (default : 1 GB)
|
||||
Defines the size of the OpenSHMEM symmetric heap
|
||||
Note the heap is on the GPU memory.
|
||||
|
||||
ROC_SHMEM_SQ_SIZE (default 1024)
|
||||
Defines the size of the SQ as number of network
|
||||
packet (WQE). Each WQE is 64B. This only for
|
||||
GPU-IB conduit
|
||||
|
||||
ROC_SHMEM_USE_CQ_GPU_MEM (default : 1)
|
||||
Set the placement of CQ on GPU memory (1)
|
||||
or CPU memory (0)
|
||||
|
||||
ROC_SHMEM_USE_SQ_GPU_MEM (default : 1)
|
||||
Set the placement of SQ on GPU memory (1)
|
||||
or CPU memory (0)
|
||||
|
||||
RO_NET_CPU_QUEUE (default: not set)
|
||||
Force producer/consumer queues between CPU and GPU to
|
||||
be in CPU memory. RO backend only.
|
||||
|
||||
ROC_SHMEM also requires the following environment variable be set for ROCm:
|
||||
|
||||
export HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
|
||||
## Documentation
|
||||
|
||||
To generate doxygen documentation for ROC_SHMEM's API, run the following
|
||||
from the library's build directory:
|
||||
|
||||
make docs
|
||||
|
||||
The doxygen output will be in the `docs` folder of the build directory.
|
||||
|
||||
## Examples
|
||||
|
||||
ROC_SHMEM is similar to OpenSHMEM and should be familiar to programmers who
|
||||
have experience with OpenSHMEM or other PGAS network programming APIs in the
|
||||
context of CPUs. The best way to learn how to use ROC_SHMEM is to read the
|
||||
autogenerated doxygen documentation for functions described in
|
||||
`roc_shmem/roc_shmem.hpp`, or to look at the provided sample applications in the
|
||||
`tests/` folder. ROC_SHMEM is shipped with a basic test suite for the
|
||||
supported ROC_SHMEM API. The examples test Puts, Gets, nonblocking Puts,
|
||||
nonblocking Gets, Quiets, Atomics, Tests, Wai-untils, Broadcasts, and
|
||||
Reductions.
|
||||
|
||||
To run the examples, you may use the driver scripts provided in respective
|
||||
folders of device- or host-initiated communication examples. Simply
|
||||
executing `./driver.sh` will show the help message on how to use the script.
|
||||
Here are some example uses of the driver script:
|
||||
|
||||
./scripts/functional_tests/driver.sh ./build/rocshmem_example_driver single_thread ./build (for device-initiated communication)
|
||||
./scripts/sos_tests/driver.sh ./build short (for host-initiated communication)
|
||||
|
||||
## Building the Dependencies
|
||||
|
||||
ROC_SHMEM requires an MPI runtime on the host that supports ROCm-Aware MPI.
|
||||
Currently all ROCm-Aware MPI runtimes require the usage of ROCm-Aware UCX.
|
||||
|
||||
To build and configure ROCm-Aware UCX, you need to:
|
||||
1. Download the latest UCX
|
||||
2. Configure and build UCX with ROCm support: --with-rocm=/opt/rocm
|
||||
|
||||
Then, you need to build your MPI (OpenMPI or MPICH CH4) with UCX support.
|
||||
|
||||
For more information on OpenMPI-UCX support, please visit:
|
||||
https://github.com/openucx/ucx/wiki/OpenMPI-and-OpenSHMEM-installation-with-UCX
|
||||
|
||||
For more information on MPICH-UCX support, please visit:
|
||||
https://www.mpich.org/about/news/
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
1.6.0
|
||||
@@ -0,0 +1,62 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
find_package(PkgConfig REQUIRED QUIET)
|
||||
pkg_check_modules(PC_IBVERBS QUIET libibverbs)
|
||||
|
||||
find_path(
|
||||
IBVERBS_INCLUDE_DIR infiniband/verbs.h
|
||||
HINTS ${PC_IBVERBS_INCLUDEDIR} ${PC_IBVERBS_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES include
|
||||
)
|
||||
|
||||
find_library(
|
||||
IBVERBS_LIBRARY
|
||||
NAMES ibverbs libibverbs
|
||||
HINTS ${PC_IBVERBS_LIBDIR} ${PC_IBVERBS_LIBRARY_DIRS}
|
||||
PATH_SUFFIXES lib lib64
|
||||
)
|
||||
|
||||
find_library(
|
||||
MLX5_LIBRARY
|
||||
NAMES mlx5 libmlx5
|
||||
HINTS ${PC_IBVERBS_LIBDIR} ${PC_IBVERBS_LIBRARY_DIRS}
|
||||
PATH_SUFFIXES lib lib64
|
||||
)
|
||||
|
||||
set(
|
||||
IBVERBS_LIBRARIES
|
||||
${IBVERBS_LIBRARY} ${MLX5_LIBRARY}
|
||||
CACHE INTERNAL ""
|
||||
)
|
||||
|
||||
set(
|
||||
IBVERBS_INCLUDE_DIRS
|
||||
${IBVERBS_INCLUDE_DIR}
|
||||
CACHE INTERNAL ""
|
||||
)
|
||||
|
||||
find_package_handle_standard_args(
|
||||
Ibverbs DEFAULT_MSG IBVERBS_LIBRARY IBVERBS_INCLUDE_DIR
|
||||
)
|
||||
|
||||
mark_as_advanced(IBVERBS_LIBRARY IBVERBS_INCLUDE_DIR)
|
||||
@@ -0,0 +1 @@
|
||||
@ROCSHMEM_GIT_REV@
|
||||
@@ -0,0 +1,16 @@
|
||||
#cmakedefine DEBUG
|
||||
#cmakedefine PROFILE
|
||||
#cmakedefine USE_GPU_IB
|
||||
#cmakedefine USE_DC
|
||||
#cmakedefine USE_IPC
|
||||
#cmakedefine USE_THREADS
|
||||
#cmakedefine USE_SHARED_CTX
|
||||
#cmakedefine USE_WF_COAL
|
||||
#cmakedefine USE_COHERENT_HEAP
|
||||
#cmakedefine USE_CACHED_HEAP
|
||||
#cmakedefine USE_MANAGED_HEAP
|
||||
#cmakedefine USE_HOST_HEAP
|
||||
#cmakedefine USE_HIP_HOST_HEAP
|
||||
#cmakedefine USE_FUNC_CALL
|
||||
#cmakedefine USE_SINGLE_NODE
|
||||
#cmakedefine USE_HOST_SIDE_HDP_FLUSH
|
||||
@@ -0,0 +1,4 @@
|
||||
-- Crusher-specific additions
|
||||
depends_on "craype-accel-amd-gfx90a"
|
||||
depends_on "amd-mixed"
|
||||
depends_on "cray-mpich"
|
||||
@@ -0,0 +1,3 @@
|
||||
-- Crusher-specific additions
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","5.3.0"))
|
||||
@@ -0,0 +1,4 @@
|
||||
-- hpcfund-specific additions
|
||||
depends_on "rocm"
|
||||
prereq(atleast("rocm","6.0.2"))
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
local help_message = [[
|
||||
|
||||
ROC_SHMEM is an open-source GPU initiated networking library
|
||||
for High Performance Computing and Machine Learning workloads.
|
||||
|
||||
Version @ROCSHMEM_FULL_VERSION@
|
||||
]]
|
||||
|
||||
help(help_message,"\n")
|
||||
|
||||
whatis("Name: rocshmem")
|
||||
whatis("Version: @ROCSHMEM_FULL_VERSION@")
|
||||
whatis("Keywords: GPU, PGAS, RMA, HPC")
|
||||
whatis("Description: tool for GPU initiated networking")
|
||||
whatis("URL: https://github.com/ROCm-Developer-Tools/ROC_SHMEM")
|
||||
|
||||
-- Export environmental variables
|
||||
local topDir="@CMAKE_INSTALL_PREFIX@"
|
||||
local binDir="@CMAKE_INSTALL_FULL_BINDIR@"
|
||||
local shareDir="@CMAKE_INSTALL_FULL_DATADIR@"
|
||||
local pythonDeps="@PYTHON_DEPS@"
|
||||
|
||||
setenv("ROCSHMEM_DIR",topDir)
|
||||
setenv("ROCSHMEM_BIN",binDir)
|
||||
setenv("ROCSHMEM_SHARE",shareDir)
|
||||
setenv("MPICH_GPU_SUPPORT_ENABLED",1)
|
||||
|
||||
-- Update relevant PATH variables
|
||||
prepend_path("PATH",binDir)
|
||||
if ( pythonDeps ~= "" ) then
|
||||
prepend_path("PYTHONPATH",pythonDeps)
|
||||
end
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/rocshmemTargets.cmake")
|
||||
|
||||
check_required_components(
|
||||
"rocshmem"
|
||||
)
|
||||
@@ -0,0 +1,39 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
find_package(Doxygen)
|
||||
if (DOXYGEN_FOUND)
|
||||
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
|
||||
set(DOXYGEN_SOURCE_BROWSER YES)
|
||||
set(DOXYGEN_EXTRACT_PRIVATE YES)
|
||||
set(DOXYGEN_MACRO_EXPANSION YES)
|
||||
set(DOXYGEN_GENERATE_LATEX YES)
|
||||
set(DOXYGEN_USE_PDFLATEX YES)
|
||||
|
||||
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
|
||||
doxygen_add_docs(
|
||||
docs
|
||||
${PROJECT_SOURCE_DIR}
|
||||
COMMENT "Generate man pages"
|
||||
MACRO_EXPANSION YES
|
||||
)
|
||||
endif()
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,59 @@
|
||||
# Builds for host-facing support
|
||||
|
||||
As of July 29, 2021 (when support for host-facing functions was merged in),
|
||||
we are running it with MPICH-3.4 and UCX-1.10.0.
|
||||
|
||||
## Background
|
||||
|
||||
MPICH-3.4 does not have support for HIP (GPU support in MPICH checks whether
|
||||
the buffer is on the host or the device).
|
||||
|
||||
UCX-1.10.0 claims it does not support GPU-aware communication for RMA
|
||||
operations. As of the time of this writing, this claim remains even on the
|
||||
latest UCX version (1.11.0 and the master branch). UCX developers to merge
|
||||
in stable GPU-aware support by the end of 2021.
|
||||
|
||||
A side note: OSU microbechmarks with RoCM memory hang with UCX-1.10.0
|
||||
(during ucp_mem_map() which is called by MPI_Win_create()).
|
||||
I don't see this hang with the latest version of UCX.
|
||||
|
||||
So, for RoCM memory, MPICH-3.4 still offloads RMA operations to UCX.
|
||||
|
||||
## So, how does it work with the current builds?
|
||||
|
||||
Theoretically, there are no limitations preventing GPU-aware RDMA
|
||||
communicaiton. As long as the GPU memory is registered with the NIC,
|
||||
the NIC can perform operations on device memory.
|
||||
|
||||
Even though UCX claims to not support GPU-aware RMA communication, it does
|
||||
not check whether or not the buffer being passed in is a device or host
|
||||
buffer. So, as long as the device memory being used is registered with the
|
||||
NIC (this does occur during MPI_Win_create), we are good.
|
||||
|
||||
UCX claims to not support GPU-aware communication because they have
|
||||
not added in support for the different types of scenarios that could
|
||||
exist in a system (eg, when a system does not have GPU-direct). The
|
||||
scope of ROC_SHMEM is currenlty limited to configurations that UCX
|
||||
already supports.
|
||||
|
||||
## But the main branch of MPICH does support HIP now?
|
||||
|
||||
Since MPICH is going off of UCX's claim that it does not support
|
||||
GPU-aware RMA communication, MPICH executes its RMA operations
|
||||
using active messages when it notices that the buffer is a GPU
|
||||
buffer. So, if we use MPICH with HIP support, we end up using
|
||||
active-message implementations unnecessarily, and hence lose
|
||||
a lot of performance.
|
||||
|
||||
## Moving forward
|
||||
|
||||
We should switch to using MPICH "correctly" (i.e. with HIP support)
|
||||
only when UCX officially claims to support GPU-aware RMA
|
||||
communication because that is when MPICH will offload MPI
|
||||
RMA operations to UCX RMA operations.
|
||||
|
||||
But if there is a need for MPICH's HIP support for GPU IPC (unsure
|
||||
if this is needed for now), we will need an alternative. In the
|
||||
current MPICH configuration, communication between processes on
|
||||
the same node are funneled through the netmod (UCX in our case) as
|
||||
well.
|
||||
@@ -0,0 +1,34 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef LIBRARY_INCLUDE_DEBUG_HPP
|
||||
#define LIBRARY_INCLUDE_DEBUG_HPP
|
||||
|
||||
namespace rocshmem {
|
||||
|
||||
void debug_print_cq(int dest_pe, int src_wg, int cqe_index);
|
||||
|
||||
void debug_print_sq(int dest_pe, int src_wg, int index_wqe);
|
||||
|
||||
} // namespace rocshmem
|
||||
|
||||
#endif // LIBRARY_INCLUDE_DEBUG_HPP
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,96 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
|
||||
|
||||
###############################################################################
|
||||
# GLOBAL COMPILE FLAGS
|
||||
###############################################################################
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc )
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -ggdb")
|
||||
|
||||
###############################################################################
|
||||
# DEFAULT BUILD TYPE
|
||||
###############################################################################
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
|
||||
|
||||
set(
|
||||
CMAKE_BUILD_TYPE
|
||||
"Release"
|
||||
CACHE
|
||||
STRING
|
||||
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
|
||||
FORCE
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
###############################################################################
|
||||
project(rocshmem_example_driver VERSION 1.1.0 LANGUAGES CXX)
|
||||
|
||||
###############################################################################
|
||||
# SOURCES
|
||||
###############################################################################
|
||||
add_executable(${PROJECT_NAME} "")
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
test_driver.cpp
|
||||
tester.cpp
|
||||
tester_arguments.cpp
|
||||
primitive_tester.cpp
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# ROCSHMEM
|
||||
###############################################################################
|
||||
find_package(hip REQUIRED)
|
||||
find_package(rocshmem CONFIG REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
hip::host
|
||||
-fgpu-rdc
|
||||
--hipcc-func-supp
|
||||
# xnack allows address translation fault recovery
|
||||
# required option for managed heap configs
|
||||
# -mxnack
|
||||
)
|
||||
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -0,0 +1,143 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "primitive_tester.hpp"
|
||||
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
#include <debug.hpp>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
/******************************************************************************
|
||||
* DEVICE TEST KERNEL
|
||||
*****************************************************************************/
|
||||
__global__ void
|
||||
PrimitiveTest(int loop,
|
||||
int *flag,
|
||||
char *s_buf,
|
||||
char *r_buf,
|
||||
int size,
|
||||
int my_pe,
|
||||
ShmemContextType ctx_type)
|
||||
{
|
||||
__shared__ roc_shmem_ctx_t ctx;
|
||||
roc_shmem_wg_init();
|
||||
roc_shmem_wg_ctx_create(ctx_type, &ctx);
|
||||
|
||||
int block_id = hipBlockIdx_x;
|
||||
for(int i =0; i< loop; i++){
|
||||
roc_shmemx_ctx_putmem_nbi_wg(ctx, &r_buf[my_pe*size], &s_buf[block_id * size], size, block_id);
|
||||
if(hipThreadIdx_x==0){
|
||||
//roc_shmem_ctx_quiet(ctx);
|
||||
//roc_shmem_ctx_threadfence_system(ctx);
|
||||
roc_shmem_ctx_int_p(ctx, &flag[my_pe], i+1, block_id);
|
||||
//roc_shmem_ctx_quiet(ctx);
|
||||
roc_shmem_int_wait_until(&flag[block_id], ROC_SHMEM_CMP_EQ, i+1);
|
||||
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
roc_shmem_wg_ctx_destroy(ctx);
|
||||
roc_shmem_wg_finalize();
|
||||
}
|
||||
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS METHODS
|
||||
*****************************************************************************/
|
||||
PrimitiveTester::PrimitiveTester(TesterArguments args)
|
||||
: Tester(args)
|
||||
{
|
||||
flag = (int*) roc_shmem_malloc(args.numprocs);
|
||||
memset(flag, 0, args.numprocs*sizeof(int));
|
||||
// s_buf = (char *)roc_shmem_malloc(args.max_msg_size * args.wg_size);
|
||||
// r_buf = (char *)roc_shmem_malloc(args.max_msg_size * args.wg_size);
|
||||
}
|
||||
|
||||
PrimitiveTester::~PrimitiveTester()
|
||||
{
|
||||
roc_shmem_free(s_buf);
|
||||
roc_shmem_free(r_buf);
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::resetBuffers(uint64_t size)
|
||||
{
|
||||
memset(s_buf, '0', size * args.numprocs);
|
||||
memset(r_buf, '1', size * args.numprocs);
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe)
|
||||
{
|
||||
|
||||
void* sendBuf = malloc(64);
|
||||
void* recvBuf = malloc(64 * nproc);
|
||||
|
||||
s_buf = (char *)roc_shmem_malloc(size * nproc);
|
||||
r_buf = (char *)roc_shmem_malloc(size * nproc);
|
||||
resetBuffers(size);
|
||||
|
||||
MPI_Allgather(sendBuf, 64, MPI_CHAR,
|
||||
recvBuf, 64, MPI_CHAR,
|
||||
MPI_COMM_WORLD);
|
||||
|
||||
size_t shared_bytes;
|
||||
roc_shmem_dynamic_shared(&shared_bytes);
|
||||
|
||||
hipLaunchKernelGGL(PrimitiveTest,
|
||||
gridSize,
|
||||
blockSize,
|
||||
shared_bytes,
|
||||
stream,
|
||||
loop,
|
||||
flag,
|
||||
s_buf,
|
||||
r_buf,
|
||||
size,
|
||||
my_pe,
|
||||
_shmem_context);
|
||||
|
||||
//num_msgs = (loop + args.skip) * gridSize.x;
|
||||
num_timed_msgs = loop ;
|
||||
}
|
||||
|
||||
void
|
||||
PrimitiveTester::verifyResults(uint64_t size)
|
||||
{
|
||||
int check_id =0;
|
||||
if (args.myid == check_id) {
|
||||
for (int i = 0; i < size*args.numprocs; i++) {
|
||||
if (r_buf[i] != '0') {
|
||||
fprintf(stderr, "Data validation error at idx %d\n", i);
|
||||
fprintf(stderr, "Got %c, Expected %c\n", r_buf[i], '0');
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _PRIMITIVE_TESTER_HPP_
|
||||
#define _PRIMITIVE_TESTER_HPP_
|
||||
|
||||
#include "tester.hpp"
|
||||
#include <mpi.h>
|
||||
|
||||
/******************************************************************************
|
||||
* HOST TESTER CLASS
|
||||
*****************************************************************************/
|
||||
class PrimitiveTester : public Tester
|
||||
{
|
||||
public:
|
||||
explicit PrimitiveTester(TesterArguments args);
|
||||
virtual ~PrimitiveTester();
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
resetBuffers(uint64_t size) override;
|
||||
|
||||
virtual void
|
||||
launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe) override;
|
||||
|
||||
virtual void
|
||||
verifyResults(uint64_t size) override;
|
||||
|
||||
char *s_buf = nullptr;
|
||||
char *r_buf = nullptr;
|
||||
int *flag = nullptr;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,84 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
|
||||
#include "tester.hpp"
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
/**
|
||||
* Setup the tester arguments.
|
||||
*/
|
||||
TesterArguments args(argc, argv);
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int rank = roc_shmem_my_pe();
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = rank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
/**
|
||||
* Must initialize rocshmem to access arguments needed by the tester.
|
||||
*/
|
||||
roc_shmem_init(args.num_wgs);
|
||||
|
||||
/**
|
||||
* Now grab the arguments from rocshmem.
|
||||
*/
|
||||
args.get_rocshmem_arguments();
|
||||
|
||||
/**
|
||||
* Using the arguments we just constructed, call the tester factory
|
||||
* method to get the tester (specified by the arguments).
|
||||
*/
|
||||
std::vector<Tester *> tests = Tester::create(args);
|
||||
|
||||
/**
|
||||
* Run the tests
|
||||
*/
|
||||
for (auto test : tests) {
|
||||
test->execute();
|
||||
|
||||
/**
|
||||
* The tester factory method news the tester to create it so we clean
|
||||
* up the memory here.
|
||||
*/
|
||||
delete test;
|
||||
}
|
||||
|
||||
/**
|
||||
* The rocshmem library needs to be cleaned up with this call. It pairs
|
||||
* with the init function above.
|
||||
*/
|
||||
roc_shmem_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,213 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "tester.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <mpi.h>
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
|
||||
//#include "broadcast_tester.hpp"
|
||||
#include "primitive_tester.hpp"
|
||||
|
||||
Tester::Tester(TesterArguments args)
|
||||
: args(args)
|
||||
{
|
||||
_type = (TestType) args.algorithm;
|
||||
_shmem_context = args.shmem_context;
|
||||
hipStreamCreate(&stream);
|
||||
hipEventCreate(&start_event);
|
||||
hipEventCreate(&stop_event);
|
||||
hipMalloc((void**)&timer, sizeof(uint64_t) * args.num_wgs);
|
||||
}
|
||||
|
||||
Tester::~Tester()
|
||||
{
|
||||
hipFree(timer);
|
||||
hipEventDestroy(stop_event);
|
||||
hipEventDestroy(start_event);
|
||||
hipStreamDestroy(stream);
|
||||
}
|
||||
|
||||
std::vector<Tester*>
|
||||
Tester::create(TesterArguments args)
|
||||
{
|
||||
int rank = args.myid;
|
||||
std::vector<Tester*> testers;
|
||||
|
||||
if (rank == 0)
|
||||
std::cout << "*** Creating Test: ";
|
||||
|
||||
TestType type = (TestType) args.algorithm;
|
||||
|
||||
switch (type) {
|
||||
case AlltoAll_Put:
|
||||
if (rank == 0)
|
||||
std::cout << "AlltoAll Puts***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
case AlltoAll_Get:
|
||||
if (rank == 0)
|
||||
std::cout << "AlltoAll Gets***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
default:
|
||||
if (rank == 0)
|
||||
std::cout << "Unknown***" << std::endl;
|
||||
testers.push_back(new PrimitiveTester(args));
|
||||
return testers;
|
||||
}
|
||||
return testers;
|
||||
}
|
||||
|
||||
void
|
||||
Tester::execute()
|
||||
{
|
||||
|
||||
int num_loops = args.loop;
|
||||
|
||||
/**
|
||||
* Some tests loop through data sizes in powers of 2 and report the
|
||||
* results for those ranges.
|
||||
*/
|
||||
for (uint64_t size = args.min_msg_size;
|
||||
size <= args.max_msg_size;
|
||||
size <<= 1) {
|
||||
|
||||
|
||||
/**
|
||||
* Restricts the number of iterations of really large messages.
|
||||
*/
|
||||
if (size > args.large_message_size)
|
||||
num_loops = args.loop_large;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* TODO:
|
||||
* Verify that this timer type is actually uint64_t on the
|
||||
* device side.
|
||||
*/
|
||||
memset(timer, 0, sizeof(uint64_t) * args.num_wgs);
|
||||
|
||||
const dim3 blockSize(args.wg_size, 1, 1);
|
||||
const dim3 gridSize(args.num_wgs, 1, 1);
|
||||
|
||||
hipEventRecord(start_event, stream);
|
||||
|
||||
launchKernel(gridSize, blockSize, num_loops, size, args.numprocs, args.myid);
|
||||
|
||||
hipEventRecord(stop_event, stream);
|
||||
hipError_t err = hipStreamSynchronize(stream);
|
||||
if (err != hipSuccess) {
|
||||
printf("error = %d \n", err);
|
||||
}
|
||||
|
||||
// roc_shmem_dump_stats();
|
||||
// roc_shmem_reset_stats();
|
||||
|
||||
|
||||
|
||||
// data validation
|
||||
verifyResults(size);
|
||||
|
||||
barrier();
|
||||
resetBuffers(size);
|
||||
|
||||
print(size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Tester::print(uint64_t size)
|
||||
{
|
||||
if (args.myid != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// uint64_t timer_avg = timerAvgInMicroseconds();
|
||||
// double latency_avg = static_cast<double>(timer_avg) / num_timed_msgs;
|
||||
// double avg_msg_rate = num_timed_msgs / (timer_avg / 1e6);
|
||||
|
||||
float total_kern_time_ms;
|
||||
hipEventElapsedTime(&total_kern_time_ms, start_event, stop_event);
|
||||
float total_kern_time_s = total_kern_time_ms / 1000;
|
||||
double bandwidth_avg_gbs = num_timed_msgs * size * bw_factor / total_kern_time_s / pow(2, 30);
|
||||
|
||||
float latency_us = (total_kern_time_ms *1000) /num_timed_msgs;
|
||||
|
||||
int field_width = 20;
|
||||
int float_precision = 2;
|
||||
|
||||
printf("\n##### Message Size %lu #####\n", size);
|
||||
|
||||
printf("%*s%*s\n",
|
||||
field_width + 1, "Latency AVG (us)",
|
||||
field_width + 1, "Bandwidth (GB/s)");
|
||||
|
||||
printf("%*.*f %*.*f \n",
|
||||
field_width, float_precision, latency_us,
|
||||
field_width, float_precision, bandwidth_avg_gbs);
|
||||
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void
|
||||
Tester::barrier()
|
||||
{
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tester::gpuCyclesToMicroseconds(uint64_t cycles)
|
||||
{
|
||||
/**
|
||||
* The dGPU asm core timer runs at 27MHz. This is different from the
|
||||
* core clock returned by HIP. For an APU, this is different and might
|
||||
* need adjusting.
|
||||
*/
|
||||
uint64_t gpu_frequency_MHz = 27;
|
||||
|
||||
/**
|
||||
* hipDeviceGetAttribute(&gpu_frequency_khz,
|
||||
* hipDeviceAttributeClockRate,
|
||||
* 0);
|
||||
*/
|
||||
|
||||
return cycles / gpu_frequency_MHz;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
Tester::timerAvgInMicroseconds()
|
||||
{
|
||||
uint64_t sum = 0;
|
||||
|
||||
for (int i = 0; i < args.num_wgs; i++) {
|
||||
sum += gpuCyclesToMicroseconds(timer[i]);
|
||||
}
|
||||
|
||||
return sum / args.num_wgs;
|
||||
}
|
||||
@@ -0,0 +1,111 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _TESTER_HPP_
|
||||
#define _TESTER_HPP_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
/******************************************************************************
|
||||
* TESTER CLASS TYPES
|
||||
*****************************************************************************/
|
||||
enum TestType
|
||||
{
|
||||
AlltoAll_Put = 0,
|
||||
AlltoAll_Get = 1
|
||||
};
|
||||
|
||||
typedef int ShmemContextType;
|
||||
|
||||
/******************************************************************************
|
||||
* TESTER INTERFACE
|
||||
*****************************************************************************/
|
||||
class Tester
|
||||
{
|
||||
public:
|
||||
explicit Tester(TesterArguments args);
|
||||
virtual ~Tester();
|
||||
|
||||
void
|
||||
execute();
|
||||
|
||||
static std::vector<Tester*>
|
||||
create(TesterArguments args);
|
||||
|
||||
protected:
|
||||
virtual void
|
||||
resetBuffers(uint64_t size) = 0;
|
||||
|
||||
virtual void
|
||||
preLaunchKernel() {}
|
||||
|
||||
virtual void
|
||||
launchKernel(dim3 gridSize,
|
||||
dim3 blockSize,
|
||||
int loop,
|
||||
uint64_t size,
|
||||
int nproc, int my_pe) = 0;
|
||||
|
||||
virtual void
|
||||
postLaunchKernel() {}
|
||||
|
||||
virtual void
|
||||
verifyResults(uint64_t size) = 0;
|
||||
|
||||
int num_msgs = 0;
|
||||
int num_timed_msgs = 0;
|
||||
int bw_factor = 1;
|
||||
|
||||
TesterArguments args;
|
||||
|
||||
TestType _type;
|
||||
ShmemContextType _shmem_context = 8; //SHMEM_CTX_WP_PRIVATE
|
||||
|
||||
hipStream_t stream;
|
||||
|
||||
uint64_t *timer = nullptr;
|
||||
|
||||
private:
|
||||
void
|
||||
print(uint64_t size);
|
||||
|
||||
void
|
||||
barrier();
|
||||
|
||||
uint64_t
|
||||
gpuCyclesToMicroseconds(uint64_t cycles);
|
||||
|
||||
uint64_t
|
||||
timerAvgInMicroseconds();
|
||||
|
||||
bool
|
||||
peLaunchesKernel();
|
||||
|
||||
hipEvent_t start_event;
|
||||
hipEvent_t stop_event;
|
||||
};
|
||||
|
||||
#endif /* _TESTER_HPP */
|
||||
@@ -0,0 +1,84 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "tester.hpp"
|
||||
#include "tester_arguments.hpp"
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
TesterArguments::TesterArguments(int argc, char *argv[])
|
||||
{
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
if (arg == "-w") {
|
||||
i++;
|
||||
num_wgs = atoi(argv[i]);
|
||||
} else if (arg == "-S") {
|
||||
i++;
|
||||
max_msg_size = atoll(argv[i]);
|
||||
} else if (arg == "-s") {
|
||||
i++;
|
||||
min_msg_size = atoll(argv[i]);
|
||||
} else if (arg == "-a") {
|
||||
i++;
|
||||
algorithm = atoi(argv[i]);
|
||||
} else if (arg == "-z") {
|
||||
i++;
|
||||
wg_size = atoi(argv[i]);
|
||||
} else if (arg == "-x") {
|
||||
i++;
|
||||
shmem_context = atoi(argv[i]);
|
||||
} else {
|
||||
show_usage(argv[0]);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
TesterArguments::show_usage(std::string executable_name)
|
||||
{
|
||||
std::cout << "Usage: " << executable_name << std::endl;
|
||||
std::cout << "\t-t <number of roc_shmem service threads>\n";
|
||||
std::cout << "\t-w <number of workgroups>\n";
|
||||
std::cout << "\t-s <maximum message size (in bytes)>\n";
|
||||
std::cout << "\t-a <algorithm number to test>\n";
|
||||
std::cout << "\t-z <WorkGroup Size>\n";
|
||||
std::cout << "\t-c <Coalescing Coefficient>\n";
|
||||
std::cout << "\t-o <Operation type for the random_access test>\n";
|
||||
std::cout << "\t-ta <Number of Thread Accessing the communication>\n";
|
||||
std::cout << "\t-x <shmem context>\n";
|
||||
}
|
||||
|
||||
void
|
||||
TesterArguments::get_rocshmem_arguments()
|
||||
{
|
||||
numprocs = roc_shmem_n_pes();
|
||||
myid = roc_shmem_my_pe();
|
||||
|
||||
}
|
||||
@@ -0,0 +1,74 @@
|
||||
/******************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef _TESTER_ARGUMENTS_HPP_
|
||||
#define _TESTER_ARGUMENTS_HPP_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
|
||||
class TesterArguments
|
||||
{
|
||||
public:
|
||||
TesterArguments(int argc, char *argv[]);
|
||||
|
||||
/**
|
||||
* Initialize rocshmem members
|
||||
* Valid after roc_shmem_init function called.
|
||||
*/
|
||||
void get_rocshmem_arguments();
|
||||
|
||||
private:
|
||||
/**
|
||||
* Output method which displays available command line options
|
||||
*/
|
||||
static void show_usage(std::string executable_name);
|
||||
|
||||
public:
|
||||
/**
|
||||
* Arguments obtained from command line
|
||||
*/
|
||||
unsigned num_wgs = 1;
|
||||
unsigned algorithm = 0;
|
||||
uint64_t min_msg_size = 1;
|
||||
uint64_t max_msg_size = 1 << 20;
|
||||
unsigned wg_size = 64;
|
||||
unsigned shmem_context = 8; // ROC_SHMEM_CTX_WG_PRIVATE
|
||||
|
||||
/**
|
||||
* Arguments obtained from rocshmem
|
||||
*/
|
||||
unsigned numprocs = UINT_MAX;
|
||||
unsigned myid = UINT_MAX;
|
||||
|
||||
/**
|
||||
* Defaults tester values
|
||||
*/
|
||||
int loop = 100;
|
||||
int skip = 10;
|
||||
int loop_large = 25;
|
||||
int large_message_size = 32768;
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,144 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.3 FATAL_ERROR)
|
||||
|
||||
###############################################################################
|
||||
# GLOBAL COMPILE FLAGS
|
||||
###############################################################################
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc)
|
||||
|
||||
###############################################################################
|
||||
# DEFAULT BUILD TYPE
|
||||
###############################################################################
|
||||
if(NOT CMAKE_BUILD_TYPE)
|
||||
message(STATUS "CMAKE_BUILD_TYPE unspecified: generating Release build")
|
||||
|
||||
set(
|
||||
CMAKE_BUILD_TYPE
|
||||
"Release"
|
||||
CACHE
|
||||
STRING
|
||||
"build type: Release, Debug, RelWithDebInfo, MinSizeRel"
|
||||
FORCE
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# PROJECT
|
||||
###############################################################################
|
||||
project(spts VERSION 1.1.0 LANGUAGES CXX)
|
||||
|
||||
###############################################################################
|
||||
# CONFIGURATION OPTIONS
|
||||
###############################################################################
|
||||
option(USE_HIP "Build HIP version of the solver" OFF)
|
||||
option(USE_ROCSHMEM "Build ROC_SHMEM enabled version of the solver" OFF)
|
||||
option(ALL_ANALYZE "Build analyze and solve algorithm" OFF)
|
||||
option(USE_DOUBLE "Use double precision floats for the data" OFF)
|
||||
option(ALL_LEVELSET "Build levelset algorithm" OFF)
|
||||
option(ALL_LEVELSYNC "Build levelsync algorithm" OFF)
|
||||
option(ALL_SYNCFREE "Build syncfree algorithm" OFF)
|
||||
|
||||
configure_file(cmake/config.h.in config.h)
|
||||
|
||||
###############################################################################
|
||||
# SOURCES
|
||||
###############################################################################
|
||||
add_executable(${PROJECT_NAME} "")
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}> # CONFIG.H
|
||||
)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
InputFlags.cpp
|
||||
Main.cpp
|
||||
)
|
||||
|
||||
###############################################################################
|
||||
# HIP / HIP + ROC_SHMEM
|
||||
###############################################################################
|
||||
if(USE_HIP)
|
||||
find_package(hip REQUIRED)
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
HIPHelper.cpp
|
||||
)
|
||||
|
||||
if(USE_ROC_SHMEM)
|
||||
find_package(rocshmem CONFIG REQUIRED)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
rocshmem::rocshmem
|
||||
hip::host
|
||||
-fgpu-rdc
|
||||
)
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# OPENCL
|
||||
###############################################################################
|
||||
else()
|
||||
|
||||
if(USE_ROC_SHMEM)
|
||||
message(FATAL_ERROR "Cannot use ROC_SHMEM without USE_HIP")
|
||||
endif()
|
||||
|
||||
target_sources(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
OpenCLHelper.cpp
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
/opt/rocm/opencl/include
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
${PROJECT_NAME}
|
||||
PRIVATE
|
||||
-L/opt/rocm/opencl/lib/x86_64
|
||||
-lOpenCL
|
||||
)
|
||||
|
||||
endif()
|
||||
@@ -0,0 +1,85 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef GPUHelper_H
|
||||
#define GPUHelper_H
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
|
||||
#define ROW_BITS 32 // May be not the right place to define this macro
|
||||
#define WG_BITS 24
|
||||
|
||||
static int SPTS_BLOCK_SIZE = 0;
|
||||
|
||||
#ifdef USE_ROC_SHMEM
|
||||
#define WF_PER_WG 1
|
||||
#else
|
||||
#define WF_PER_WG 16
|
||||
#endif
|
||||
#define WF_SIZE 64
|
||||
|
||||
#ifdef USE_HIP
|
||||
#include <hip/hip_runtime.h>
|
||||
typedef void * memPointer;
|
||||
typedef int memPointer_flags;
|
||||
typedef int gpuInt;
|
||||
typedef bool gpuBool;
|
||||
typedef hipEvent_t gpuEvent;
|
||||
typedef hipError_t gpuError;
|
||||
#define GPU_MEM_READ_ONLY 0
|
||||
#define GPU_MEM_READ_WRITE 0
|
||||
#define GPU_MEM_USE_HOST_PTR 0
|
||||
#define GPU_TRUE true
|
||||
#define GPU_FALSE false
|
||||
#else
|
||||
#include <CL/cl.h>
|
||||
typedef cl_mem memPointer;
|
||||
typedef cl_mem_flags memPointer_flags;
|
||||
typedef cl_int gpuInt;
|
||||
typedef cl_bool gpuBool;
|
||||
typedef cl_event gpuEvent;
|
||||
typedef cl_int gpuError;
|
||||
#define GPU_MEM_READ_ONLY CL_MEM_READ_ONLY
|
||||
#define GPU_MEM_READ_WRITE CL_MEM_READ_ONLY
|
||||
#define GPU_MEM_USE_HOST_PTR CL_MEM_USE_HOST_PTR
|
||||
#define GPU_TRUE CL_TRUE
|
||||
#define GPU_FALSE CL_FALSE
|
||||
#endif
|
||||
|
||||
class GPUHelper
|
||||
{
|
||||
public:
|
||||
GPUHelper() {}
|
||||
virtual int Init(const std::string &_filename, InputFlags &in_flags) = 0;
|
||||
virtual void checkStatus(gpuError status, const std::string errString) = 0;
|
||||
virtual void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
|
||||
virtual void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev) = 0;
|
||||
virtual memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *) = 0;
|
||||
virtual void FreeMem(memPointer ptr) = 0;
|
||||
virtual void Flush() = 0;
|
||||
};
|
||||
|
||||
#endif //GPUHelper_H
|
||||
@@ -0,0 +1,99 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
|
||||
#include "HIPHelper.h"
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
int HIPHelper::Init(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
int device = 0;
|
||||
hipSetDevice(device);
|
||||
hipDeviceProp_t props;
|
||||
hipGetDeviceProperties(&props, device /*deviceID*/);
|
||||
printf("info: running on device %s\n", props.name);
|
||||
printf("info: architecture on AMD GPU device is: %d\n", props.gcnArch);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void HIPHelper::checkStatus(gpuError status, const std::string errString)
|
||||
{
|
||||
if (status != HIP_SUCCESS)
|
||||
{
|
||||
std::cerr << errString << " : " << hipGetErrorString(status) << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
memPointer HIPHelper::AllocateMem(const std::string name,
|
||||
size_t size,
|
||||
memPointer_flags flags,
|
||||
void *hostBuffer)
|
||||
{
|
||||
void* buf;
|
||||
std::string errString = "HIP error allocating " + name + " !";
|
||||
checkStatus(hipMalloc(&buf, size), errString);
|
||||
printf("Allocating %s of size %zu at buf %p\n", name.c_str(), size, buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
void HIPHelper::CopyToDevice(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
gpuBool blocking,
|
||||
gpuEvent *ev)
|
||||
{
|
||||
assert(offset == 0);
|
||||
memcpy(devBuffer, hostBuffer, size);
|
||||
/*
|
||||
if (blocking == GPU_TRUE) {
|
||||
checkStatus(hipMemcpy(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
|
||||
"HIP error copying data to device !");
|
||||
} else {
|
||||
checkStatus(hipMemcpyAsync(devBuffer, hostBuffer, size, hipMemcpyHostToDevice),
|
||||
"HIP error copying data to device !");
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
void HIPHelper::CopyToHost(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
gpuBool blocking,
|
||||
gpuEvent *ev)
|
||||
{
|
||||
assert(offset == 0);
|
||||
memcpy(hostBuffer, devBuffer, size);
|
||||
/*
|
||||
if (blocking == GPU_TRUE) {
|
||||
checkStatus(hipMemcpy(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
|
||||
"HIP error copying data to device !");
|
||||
} else {
|
||||
checkStatus(hipMemcpyAsync(hostBuffer, devBuffer, size, hipMemcpyDeviceToHost),
|
||||
"HIP error copying data to device !");
|
||||
}
|
||||
*/
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef CLHelper_H
|
||||
#define CLHelper_H
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
#include "GPUHelper.h"
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
class HIPHelper : public GPUHelper
|
||||
{
|
||||
public:
|
||||
HIPHelper() {}
|
||||
int Init(const std::string &_filename, InputFlags &in_flags);
|
||||
void checkStatus(gpuError status, const std::string errString);
|
||||
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
|
||||
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, gpuBool _blocking, gpuEvent *_ev);
|
||||
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
|
||||
void FreeMem(memPointer ptr) { hipFree(ptr); }
|
||||
void Flush() { hipDeviceSynchronize(); }
|
||||
};
|
||||
|
||||
#endif //CLHelper_H
|
||||
|
||||
@@ -0,0 +1,179 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "InputFlags.h"
|
||||
|
||||
InputFlags::InputFlags()
|
||||
{
|
||||
AddInputFlag("help", 'h', "", "Print Help Message", "string");
|
||||
}
|
||||
|
||||
void InputFlags::AddInputFlag(const std::string &_long_name,
|
||||
char _short_name,
|
||||
const std::string &_value,
|
||||
const std::string &_help_text,
|
||||
const std::string &_type)
|
||||
{
|
||||
Input in;
|
||||
in.long_name = _long_name;
|
||||
in.short_name = _short_name;
|
||||
in.value = _value;
|
||||
in.help_text = _help_text;
|
||||
in.type = _type;
|
||||
|
||||
if(MapInputs.count(_short_name) > 0)
|
||||
printf("Input flag: %s (%c) already exists !", _long_name.c_str(), _short_name);
|
||||
else
|
||||
MapInputs[_short_name] = in;
|
||||
}
|
||||
|
||||
void InputFlags::Print()
|
||||
{
|
||||
printf("SpTS Input Flags: \n\n");
|
||||
|
||||
for(auto &content : MapInputs)
|
||||
std::cout<<std::setw(8)<<"--"<<content.second.long_name<<std::setw(20 - content.second.long_name.length())<<"-"<<content.first<<std::setw(8)<<" "<<content.second.help_text<<"\n";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
char InputFlags::FindShortName(const std::string &long_name)
|
||||
{
|
||||
char short_name = '\0';
|
||||
|
||||
for(auto &content : MapInputs)
|
||||
{
|
||||
if(content.second.long_name == long_name)
|
||||
short_name = content.first;
|
||||
}
|
||||
if(short_name == '\0')
|
||||
{
|
||||
std::cout<<"Long Name: "<<long_name<<" Not Found !";
|
||||
exit(0);
|
||||
}
|
||||
|
||||
return short_name;
|
||||
}
|
||||
|
||||
void InputFlags::Parse(int argc, char *argv[])
|
||||
{
|
||||
std::vector<std::string> args;
|
||||
for(int i = 1; i < argc; i++)
|
||||
args.push_back(argv[i]);
|
||||
|
||||
if(args.size() == 0) // No Input Flag
|
||||
Print();
|
||||
|
||||
for(int i = 0; i < args.size(); i++)
|
||||
{
|
||||
std::string temp = args[i];
|
||||
if(temp[0] != '-')
|
||||
{
|
||||
printf("Illegal input flag\n");
|
||||
Print();
|
||||
}
|
||||
else if(temp[0] == '-' && temp[1] == '-') // Long Name Input
|
||||
{
|
||||
std::string long_name = temp.substr(2);
|
||||
if(long_name == "help")
|
||||
Print();
|
||||
|
||||
char short_name = FindShortName(long_name);
|
||||
|
||||
if (short_name == 'n' || short_name == 'z' || short_name == 'v')
|
||||
{
|
||||
MapInputs[short_name].value = "true";
|
||||
}
|
||||
else
|
||||
{
|
||||
MapInputs[short_name].value = args[i+1];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else if (temp[0] == '-' && temp[1] == '?') // Help Input
|
||||
Print();
|
||||
else // Short Name Input
|
||||
{
|
||||
char short_name = temp[1];
|
||||
if(MapInputs.find(short_name) == MapInputs.end())
|
||||
{
|
||||
std::cout<<"Input Flag: "<<short_name<<" Not Found !";
|
||||
exit(0);
|
||||
}
|
||||
if(short_name == 'h')
|
||||
Print();
|
||||
|
||||
if(short_name == 'n' || short_name == 'z' || short_name == 'v' )
|
||||
{
|
||||
MapInputs[short_name].value = "true";
|
||||
}
|
||||
else
|
||||
{
|
||||
MapInputs[short_name].value = args[i+1];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string InputFlags::GetValueStr(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
std::string value = MapInputs[short_name].value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
int InputFlags::GetValueInt(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
int value = atoi(MapInputs[short_name].value.c_str());
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
uint64_t InputFlags::GetValueUint64(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
uint64_t value = strtoull(MapInputs[short_name].value.c_str(), NULL, 10);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
float InputFlags::GetValueFloat(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
float value = std::stof(MapInputs[short_name].value);
|
||||
|
||||
return value;
|
||||
|
||||
}
|
||||
|
||||
bool InputFlags::GetValueBool(const std::string &long_name)
|
||||
{
|
||||
char short_name = FindShortName(long_name);
|
||||
if (MapInputs[short_name].value == "true")
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,62 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef InputFlags_H
|
||||
#define InputFlags_H
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
struct Input
|
||||
{
|
||||
std::string long_name;
|
||||
char short_name;
|
||||
std::string value;
|
||||
std::string help_text;
|
||||
std::string type;
|
||||
};
|
||||
|
||||
class InputFlags
|
||||
{
|
||||
std::map<char, Input> MapInputs;
|
||||
|
||||
public:
|
||||
InputFlags();
|
||||
virtual void AddDerivedInputFlags() = 0;
|
||||
void AddInputFlag(const std::string &_long_name,
|
||||
char _short_name,
|
||||
const std::string &_value,
|
||||
const std::string &_help_text,
|
||||
const std::string &type);
|
||||
void Parse(int argc, char *argv[]);
|
||||
char FindShortName(const std::string &long_name);
|
||||
void Print();
|
||||
|
||||
std::string GetValueStr(const std::string &long_name);
|
||||
int GetValueInt(const std::string &long_name);
|
||||
uint64_t GetValueUint64(const std::string &long_name);
|
||||
float GetValueFloat(const std::string &long_name);
|
||||
bool GetValueBool(const std::string &long_name);
|
||||
|
||||
virtual ~InputFlags() {}
|
||||
};
|
||||
|
||||
#endif //InputFlags_H
|
||||
@@ -0,0 +1,19 @@
|
||||
Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,193 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#ifdef USE_HIP
|
||||
#include "hip/hip_runtime.h"
|
||||
#else
|
||||
#include "OpenCLHelper.h"
|
||||
#endif
|
||||
|
||||
#ifdef USE_RO_SHMEM
|
||||
#include "mpi.h"
|
||||
#endif
|
||||
|
||||
#include "MatrixMarketReader.h"
|
||||
#include "SpTS.h"
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef USE_DOUBLE
|
||||
typedef double FPTYPE;
|
||||
#else
|
||||
typedef float FPTYPE;
|
||||
#endif
|
||||
|
||||
using namespace rocshmem;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
SparseTriangularSolve<FPTYPE> spts_obj;
|
||||
InputFlags &in_flags = spts_obj;
|
||||
in_flags.AddDerivedInputFlags();
|
||||
in_flags.Parse(argc, argv);
|
||||
FPTYPE alpha = in_flags.GetValueFloat("alpha");
|
||||
|
||||
printf("Reading input file: %s...", in_flags.GetValueStr("filename").c_str());fflush(stdout);
|
||||
MatrixMarketReader<FPTYPE> mm_reader;
|
||||
if (mm_reader.MMReadFormat(in_flags.GetValueStr("filename"), in_flags) != 0)
|
||||
{
|
||||
fprintf(stderr, "ERROR reading input file !\n");
|
||||
exit(1);
|
||||
}
|
||||
printf("Done.\n");
|
||||
|
||||
GPUHelper *GPU;
|
||||
#ifdef USE_HIP
|
||||
printf("Initializing HIP runtime...\n\t");fflush(stdout);
|
||||
GPU = new HIPHelper();
|
||||
char buf[PATH_MAX + 1];
|
||||
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
|
||||
std::string str(buf);
|
||||
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
|
||||
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr,"\nError Initializing HIP Runtime !\n");
|
||||
exit(-1);
|
||||
}
|
||||
#else
|
||||
printf("Initializing OpenCL runtime...\n\t");fflush(stdout);
|
||||
GPU = new CLHelper();
|
||||
char buf[PATH_MAX + 1];
|
||||
readlink("/proc/self/exe", buf, sizeof(buf) - 1);
|
||||
std::string str(buf);
|
||||
printf("Going to try to open %s\n", (str.substr(0, str.rfind('/'))+"/spts_kernel.cl").c_str());
|
||||
if(GPU->Init((str.substr(0, str.rfind('/'))+ "/spts_kernel.cl").c_str(), in_flags) == 1)
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr,"\nError Initializing OpenCL Runtime !\n");
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating sparse matrices...");fflush(stdout);
|
||||
spts_obj.AllocateSparseMatrix(mm_reader, in_flags, GPU);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Converting COO to CSR...");fflush(stdout);
|
||||
spts_obj.ConvertFromCOOToCSR(mm_reader.GetCoordinates(), in_flags);
|
||||
printf("Done.\n");
|
||||
|
||||
SPTS_BLOCK_SIZE = in_flags.GetValueInt("block_size");
|
||||
printf("Finding Stats For Parallel Decomposition...");fflush(stdout);
|
||||
spts_obj.FindStatsForParallelDecomposition();
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating parallel sparse matrices...");fflush(stdout);
|
||||
spts_obj.AllocateParallelSparseMatrix(mm_reader, in_flags);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Allocating vectors...");fflush(stdout);
|
||||
spts_obj.AllocateVectors(mm_reader);
|
||||
printf("Done.\n");
|
||||
|
||||
float gflops = 0.f;
|
||||
int errors = 0;
|
||||
uint64_t ns_per_iter = 0;
|
||||
uint64_t ns_per_analysis_iter = 0;
|
||||
uint64_t ns_per_syncfree_iter = 0;
|
||||
uint64_t ns_per_levelset_iter = 0;
|
||||
uint64_t ns_per_levelsync_iter = 0;
|
||||
|
||||
printf("Performing SpTS on the CPU with alpha=%f...", (float)alpha);fflush(stdout);
|
||||
spts_obj.CSRSpTSCPU(alpha);
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Checking results of CPU-side SpTS...");fflush(stdout);
|
||||
if (!spts_obj.CSRCheckCPU(alpha))
|
||||
{
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "CPU-based results were 'wrong', likely due to FP rounding. Expect the CPU and GPU to differ wildly.\n");
|
||||
//exit(-1);
|
||||
}
|
||||
printf("Done.\n");
|
||||
|
||||
printf("Performing %d iterations of SpTS on the GPU with alpha=%f...", in_flags.GetValueInt("iterations"), (float)alpha);fflush(stdout);
|
||||
gflops = spts_obj.CSRSpTSGPU(ns_per_iter, ns_per_analysis_iter, ns_per_syncfree_iter, ns_per_levelset_iter, ns_per_levelsync_iter, alpha);
|
||||
printf("Done.\n");
|
||||
|
||||
if (in_flags.GetValueBool("verify")) {
|
||||
printf("Checking whether GPU SpTS caused non-deterministic errors...\n");fflush(stdout);
|
||||
int non_det_errors = spts_obj.NonDeterministicErrors();
|
||||
printf("Done.\n");
|
||||
if (non_det_errors)
|
||||
fprintf(stderr, "ERROR!! -- Saw %d GPU iterations that had non-deterministic differences.\n", non_det_errors);
|
||||
int max_errors = spts_obj.MaxErrors();
|
||||
if (max_errors)
|
||||
{
|
||||
if (max_errors > 1)
|
||||
printf(" -- %d rows differed between CPU and GPU results.\n", max_errors);
|
||||
else
|
||||
printf(" -- %d row differed between CPU and GPU results.\n", max_errors);
|
||||
}
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("File %s : SpTS Gflops: %f ms_per_iter: %lf ", in_flags.GetValueStr("filename").c_str(), gflops, ((double)ns_per_iter/1000000.));
|
||||
printf(" ( ms_per_analysis_iter: ");
|
||||
if (ns_per_analysis_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_analysis_iter/1000000.));
|
||||
printf(" | ms_per_syncfree_iter: ");
|
||||
if (ns_per_syncfree_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_syncfree_iter/1000000.));
|
||||
printf(" | ms_per_levelset_iter: ");
|
||||
if (ns_per_levelset_iter == 0)
|
||||
printf("no_iter");
|
||||
else
|
||||
printf("%lf", ((double)ns_per_levelset_iter/1000000.));
|
||||
printf(" | ms_per_levelsync_iter: ");
|
||||
if (ns_per_levelsync_iter == 0)
|
||||
printf("no_iter )");
|
||||
else
|
||||
printf("%lf )", ((double)ns_per_levelsync_iter/1000000.));
|
||||
|
||||
#ifdef USE_ROC_SHMEM
|
||||
MPI_Allreduce(MPI_IN_PLACE, (void *) &ns_per_analysis_iter, 1,
|
||||
MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if (spts_obj.Get_this_pe() == 0) {
|
||||
printf("\nRANK 0: analysis avg ms = %lf\n",
|
||||
((double) ns_per_analysis_iter / 1000000.) / spts_obj.Get_total_pes());
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,377 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef MatrixMarketReader_H
|
||||
#define MatrixMarketReader_H
|
||||
/*
|
||||
Portions of this file include code provided by The National Institute of
|
||||
Standards and Technology (NIST). The code includes
|
||||
macro definitions from mmio.h and is subject to the following disclaimer.
|
||||
|
||||
Software Disclaimer
|
||||
|
||||
NIST-developed software is provided by NIST as a public service. You may use,
|
||||
copy and distribute copies of the software in any medium, provided that you
|
||||
keep intact this entire notice. You may improve, modify and create derivative
|
||||
works of the software or any portion of the software, and you may copy and
|
||||
distribute such modifications or works. Modified works should carry a notice
|
||||
stating that you changed the software and should note the date and nature of
|
||||
any such change. Please explicitly acknowledge the National Institute of
|
||||
Standards and Technology as the source of the software.
|
||||
|
||||
NIST-developed software is expressly provided "AS IS" NIST MAKES NO WARRANTY
|
||||
OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION OF LAW,
|
||||
INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA ACCURACY. NIST
|
||||
NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE SOFTWARE WILL BE
|
||||
UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE CORRECTED. NIST DOES
|
||||
NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OF THE SOFTWARE OR
|
||||
THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE CORRECTNESS, ACCURACY,
|
||||
RELIABILITY, OR USEFULNESS OF THE SOFTWARE.
|
||||
|
||||
You are solely responsible for determining the appropriateness of using and
|
||||
distributing the software and you assume all risks associated with its use,
|
||||
including but not limited to the risks and costs of program errors, compliance
|
||||
with applicable laws, damage to or loss of data, programs or equipment, and
|
||||
the unavailability or interruption of operation. This software is not intended
|
||||
to be used in any situation where a failure could cause risk of injury or
|
||||
damage to property. The software developed by NIST employees is not subject
|
||||
to copyright protection within the United States.
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include "InputFlags.h"
|
||||
#include <typeinfo>
|
||||
#include "mmio.h"
|
||||
|
||||
// Class declaration
|
||||
|
||||
template<typename FloatType>
|
||||
struct Coordinate {
|
||||
int x;
|
||||
int y;
|
||||
FloatType val;
|
||||
};
|
||||
|
||||
template <typename FloatType>
|
||||
class MatrixMarketReader
|
||||
{
|
||||
char Typecode[4];
|
||||
int nNZ;
|
||||
int nRows;
|
||||
int nCols;
|
||||
int isSymmetric;
|
||||
int isDoubleMem;
|
||||
Coordinate<FloatType> *coords;
|
||||
bool *has_seen_diag;
|
||||
|
||||
public:
|
||||
MatrixMarketReader() : nNZ(0), nRows(0), nCols(0), isSymmetric(0), isDoubleMem(0)
|
||||
{
|
||||
for (int i = 0; i < sizeof(Typecode); i++)
|
||||
Typecode[i] = '\0';
|
||||
coords = NULL;
|
||||
}
|
||||
bool MMReadFormat(const std::string &_filename, InputFlags &_in_flags);
|
||||
bool MMReadBanner(FILE *_infile);
|
||||
bool MMReadMtxCrdSize(FILE *_infile);
|
||||
void MMGenerateCOOFromFile(FILE *_infile, InputFlags &_in_flags);
|
||||
|
||||
int GetNumRows() { return nRows; }
|
||||
int GetNumCols() { return nCols; }
|
||||
int GetNumNonZeroes() { return nNZ; }
|
||||
int GetSymmetric() { return isSymmetric; }
|
||||
|
||||
char *GetTypecode() { return Typecode; }
|
||||
Coordinate<FloatType> *GetCoordinates() { return coords; }
|
||||
|
||||
~MatrixMarketReader()
|
||||
{
|
||||
delete[] coords;
|
||||
}
|
||||
};
|
||||
|
||||
// Class definition
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadFormat(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
FILE *mm_file = fopen(filename.c_str(), "r");
|
||||
if( mm_file == NULL)
|
||||
{
|
||||
printf("Cannot Open Matrix-Market File !\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int status = MMReadBanner(mm_file);
|
||||
if(status != 0)
|
||||
{
|
||||
printf("Error Reading Banner in Matrix-Market File !\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(! mm_is_coordinate(Typecode))
|
||||
{printf(" only handling coordinate format\n"); return(1);}
|
||||
|
||||
if(mm_is_complex(Typecode)) {
|
||||
printf("Error: cannot handle complex format\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if(mm_is_symmetric(Typecode))
|
||||
isSymmetric = 1;
|
||||
|
||||
status = MMReadMtxCrdSize(mm_file);
|
||||
if(status != 0) {
|
||||
printf("Error reading Matrix Market crd_size %d\n",status);
|
||||
return(1);
|
||||
}
|
||||
|
||||
if(mm_is_symmetric(Typecode))
|
||||
coords = new Coordinate<FloatType>[nNZ+nRows];
|
||||
else if (in_flags.GetValueBool("non_symmetric"))
|
||||
coords = new Coordinate<FloatType>[nNZ+nRows]; // This is too large, but oh well.
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Error: Input matrix is NOT symmetric. This will not work for SpTS.\n");
|
||||
return (1);
|
||||
}
|
||||
|
||||
has_seen_diag = new bool[nRows];
|
||||
for (int i = 0; i < nRows; i++)
|
||||
has_seen_diag[i] = false;
|
||||
|
||||
MMGenerateCOOFromFile(mm_file, in_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void FillCoordData(char Typecode[],
|
||||
Coordinate<FloatType> *coords,
|
||||
bool *has_seen_diag,
|
||||
int &actual_nnz,
|
||||
int ir,
|
||||
int ic,
|
||||
FloatType val)
|
||||
{
|
||||
int new_x = ir - 1;
|
||||
int new_y = ic - 1;
|
||||
if (new_y > new_x)
|
||||
{
|
||||
// Skip stuff in the upper diagonal
|
||||
// Just keep our lower diag.
|
||||
return;
|
||||
}
|
||||
if (new_y == new_x)
|
||||
has_seen_diag[new_x] = true;
|
||||
coords[actual_nnz].x = new_x;
|
||||
coords[actual_nnz].y = new_y;
|
||||
coords[actual_nnz ++].val = val;
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void FixupMissingDiags(char Typecode[],
|
||||
Coordinate<FloatType> *coords,
|
||||
int &actual_nnz,
|
||||
int nRows,
|
||||
bool *has_seen_diag,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
for(int i = 0; i < nRows; i++)
|
||||
{
|
||||
if (has_seen_diag[i] == false)
|
||||
{
|
||||
coords[actual_nnz].x = i;
|
||||
coords[actual_nnz].y = i;
|
||||
coords[actual_nnz ++].val = 1.;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void MatrixMarketReader<FloatType>::MMGenerateCOOFromFile(FILE *infile,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
int actual_nnz = 0;
|
||||
FloatType val;
|
||||
int ir, ic;
|
||||
|
||||
int exp_zeroes = in_flags.GetValueBool("exp_zeroes");
|
||||
|
||||
for(int i = 0; i < nNZ; i++)
|
||||
{
|
||||
if(mm_is_real(Typecode))
|
||||
{
|
||||
if(typeid(FloatType) == typeid(float))
|
||||
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
|
||||
else if(typeid(FloatType) == typeid(double))
|
||||
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
}
|
||||
else if (mm_is_integer(Typecode))
|
||||
{
|
||||
if(typeid(FloatType) == typeid(float))
|
||||
fscanf(infile, "%d %d %f\n", &ir, &ic, (float*)(&val));
|
||||
else if(typeid(FloatType) == typeid(double))
|
||||
fscanf(infile, "%d %d %lf\n", &ir, &ic, (double*)(&val));
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
|
||||
}
|
||||
else if(mm_is_pattern(Typecode))
|
||||
{
|
||||
fscanf(infile, "%d %d", &ir, &ic);
|
||||
//val = ((FloatType) MAX_RAND_VAL * (rand() / (RAND_MAX + 1.0)));
|
||||
val = 3.;
|
||||
|
||||
if(exp_zeroes == 0 && val == 0)
|
||||
continue;
|
||||
else
|
||||
FillCoordData(Typecode, coords, has_seen_diag, actual_nnz, ir, ic, val);
|
||||
}
|
||||
}
|
||||
FixupMissingDiags(Typecode, coords, actual_nnz, nRows, has_seen_diag, in_flags);
|
||||
nNZ = actual_nnz;
|
||||
printf("\n\tNNZ in the lower triangular and fixedup diagonal: %d\n", nNZ);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadBanner(FILE *infile)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
char banner[MM_MAX_TOKEN_LENGTH];
|
||||
char mtx[MM_MAX_TOKEN_LENGTH];
|
||||
char crd[MM_MAX_TOKEN_LENGTH];
|
||||
char data_type[MM_MAX_TOKEN_LENGTH];
|
||||
char storage_scheme[MM_MAX_TOKEN_LENGTH];
|
||||
char *p;
|
||||
|
||||
mm_clear_typecode(Typecode);
|
||||
|
||||
if (fgets(line, MM_MAX_LINE_LENGTH, infile) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type,
|
||||
storage_scheme) != 5)
|
||||
return MM_PREMATURE_EOF;
|
||||
|
||||
for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */
|
||||
for (p=crd; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=data_type; *p!='\0'; *p=tolower(*p),p++);
|
||||
for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++);
|
||||
|
||||
/* check for banner */
|
||||
if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0)
|
||||
return MM_NO_HEADER;
|
||||
|
||||
/* first field should be "mtx" */
|
||||
if (strcmp(mtx, MM_MTX_STR) != 0)
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
mm_set_matrix(Typecode);
|
||||
|
||||
|
||||
/* second field describes whether this is a sparse matrix (in coordinate
|
||||
storgae) or a dense array */
|
||||
|
||||
|
||||
if (strcmp(crd, MM_SPARSE_STR) == 0)
|
||||
mm_set_sparse(Typecode);
|
||||
else if (strcmp(crd, MM_DENSE_STR) == 0)
|
||||
mm_set_dense(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* third field */
|
||||
|
||||
if (strcmp(data_type, MM_REAL_STR) == 0)
|
||||
mm_set_real(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_COMPLEX_STR) == 0)
|
||||
mm_set_complex(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_PATTERN_STR) == 0)
|
||||
mm_set_pattern(Typecode);
|
||||
else
|
||||
if (strcmp(data_type, MM_INT_STR) == 0)
|
||||
mm_set_integer(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
|
||||
/* fourth field */
|
||||
|
||||
if (strcmp(storage_scheme, MM_GENERAL_STR) == 0)
|
||||
mm_set_general(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SYMM_STR) == 0)
|
||||
mm_set_symmetric(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_HERM_STR) == 0)
|
||||
mm_set_hermitian(Typecode);
|
||||
else
|
||||
if (strcmp(storage_scheme, MM_SKEW_STR) == 0)
|
||||
mm_set_skew(Typecode);
|
||||
else
|
||||
return MM_UNSUPPORTED_TYPE;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool MatrixMarketReader<FloatType>::MMReadMtxCrdSize(FILE *infile)
|
||||
{
|
||||
char line[MM_MAX_LINE_LENGTH];
|
||||
int num_items_read;
|
||||
|
||||
/* now continue scanning until you reach the end-of-comments */
|
||||
do
|
||||
{
|
||||
if (fgets(line,MM_MAX_LINE_LENGTH, infile) == NULL)
|
||||
return MM_PREMATURE_EOF;
|
||||
}while (line[0] == '%');
|
||||
|
||||
/* line[] is either blank or has M,N, nz */
|
||||
if (sscanf(line, "%d %d %d", &nRows, &nCols, &nNZ) == 3)
|
||||
return 0;
|
||||
else
|
||||
do
|
||||
{
|
||||
num_items_read = fscanf(infile, "%d %d %d", &nRows, &nCols, &nNZ);
|
||||
if (num_items_read == EOF) return MM_PREMATURE_EOF;
|
||||
}
|
||||
while (num_items_read != 3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // MatrixMarketReader_H
|
||||
@@ -0,0 +1,486 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#include "OpenCLHelper.h"
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
cl_context CLHelper::context = NULL;
|
||||
cl_command_queue CLHelper::commandQueue = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_analyze = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_levelset = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_scalar = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_vector = NULL;
|
||||
cl_kernel CLHelper::SpTSKernel_levelsync = NULL;
|
||||
|
||||
const char * get_cl_err_string(cl_int err)
|
||||
{
|
||||
switch (err)
|
||||
{
|
||||
case CL_SUCCESS:
|
||||
return "CL_SUCCESS";
|
||||
case CL_DEVICE_NOT_FOUND:
|
||||
return "CL_DEVICE_NOT_FOUND";
|
||||
case CL_DEVICE_NOT_AVAILABLE:
|
||||
return "CL_DEVICE_NOT_AVAILABLE";
|
||||
case CL_COMPILER_NOT_AVAILABLE:
|
||||
return "CL_COMPILER_NOT_AVAILABLE";
|
||||
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
|
||||
return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
|
||||
case CL_OUT_OF_RESOURCES:
|
||||
return "CL_OUT_OF_RESOURCES";
|
||||
case CL_OUT_OF_HOST_MEMORY:
|
||||
return "CL_OUT_OF_HOST_MEMORY";
|
||||
case CL_PROFILING_INFO_NOT_AVAILABLE:
|
||||
return "CL_PROFILING_INFO_NOT_AVAILABLE";
|
||||
case CL_MEM_COPY_OVERLAP:
|
||||
return "CL_MEM_COPY_OVERLAP";
|
||||
case CL_IMAGE_FORMAT_MISMATCH:
|
||||
return "CL_IMAGE_FORMAT_MISMATCH";
|
||||
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
|
||||
return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
|
||||
case CL_BUILD_PROGRAM_FAILURE:
|
||||
return "CL_BUILD_PROGRAM_FAILURE";
|
||||
case CL_MAP_FAILURE:
|
||||
return "CL_MAP_FAILURE";
|
||||
#ifdef CL_VERSION_1_1
|
||||
case CL_MISALIGNED_SUB_BUFFER_OFFSET:
|
||||
return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
|
||||
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
|
||||
return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
|
||||
#endif
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_COMPILE_PROGRAM_FAILURE:
|
||||
return "CL_COMPILE_PROGRAM_FAILURE";
|
||||
case CL_LINKER_NOT_AVAILABLE:
|
||||
return "CL_LINKER_NOT_AVAILABLE";
|
||||
case CL_LINK_PROGRAM_FAILURE:
|
||||
return "CL_LINK_PROGRAM_FAILURE";
|
||||
case CL_DEVICE_PARTITION_FAILED:
|
||||
return "CL_DEVICE_PARTITION_FAILED";
|
||||
case CL_KERNEL_ARG_INFO_NOT_AVAILABLE:
|
||||
return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
|
||||
#endif
|
||||
case CL_INVALID_VALUE:
|
||||
return "CL_INVALID_VALUE";
|
||||
case CL_INVALID_DEVICE_TYPE:
|
||||
return "CL_INVALID_DEVICE_TYPE";
|
||||
case CL_INVALID_PLATFORM:
|
||||
return "CL_INVALID_PLATFORM";
|
||||
case CL_INVALID_DEVICE:
|
||||
return "CL_INVALID_DEVICE";
|
||||
case CL_INVALID_CONTEXT:
|
||||
return "CL_INVALID_CONTEXT";
|
||||
case CL_INVALID_QUEUE_PROPERTIES:
|
||||
return "CL_INVALID_QUEUE_PROPERTIES";
|
||||
case CL_INVALID_COMMAND_QUEUE:
|
||||
return "CL_INVALID_COMMAND_QUEUE";
|
||||
case CL_INVALID_HOST_PTR:
|
||||
return "CL_INVALID_HOST_PTR";
|
||||
case CL_INVALID_MEM_OBJECT:
|
||||
return "CL_INVALID_MEM_OBJECT";
|
||||
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
|
||||
case CL_INVALID_IMAGE_SIZE:
|
||||
return "CL_INVALID_IMAGE_SIZE";
|
||||
case CL_INVALID_SAMPLER:
|
||||
return "CL_INVALID_SAMPLER";
|
||||
case CL_INVALID_BINARY:
|
||||
return "CL_INVALID_BINARY";
|
||||
case CL_INVALID_BUILD_OPTIONS:
|
||||
return "CL_INVALID_BUILD_OPTIONS";
|
||||
case CL_INVALID_PROGRAM:
|
||||
return "CL_INVALID_PROGRAM";
|
||||
case CL_INVALID_PROGRAM_EXECUTABLE:
|
||||
return "CL_INVALID_PROGRAM_EXECUTABLE";
|
||||
case CL_INVALID_KERNEL_NAME:
|
||||
return "CL_INVALID_KERNEL_NAME";
|
||||
case CL_INVALID_KERNEL_DEFINITION:
|
||||
return "CL_INVALID_KERNEL_DEFINITION";
|
||||
case CL_INVALID_KERNEL:
|
||||
return "CL_INVALID_KERNEL";
|
||||
case CL_INVALID_ARG_INDEX:
|
||||
return "CL_INVALID_ARG_INDEX";
|
||||
case CL_INVALID_ARG_VALUE:
|
||||
return "CL_INVALID_ARG_VALUE";
|
||||
case CL_INVALID_ARG_SIZE:
|
||||
return "CL_INVALID_ARG_SIZE";
|
||||
case CL_INVALID_KERNEL_ARGS:
|
||||
return "CL_INVALID_KERNEL_ARGS";
|
||||
case CL_INVALID_WORK_DIMENSION:
|
||||
return "CL_INVALID_WORK_DIMENSION";
|
||||
case CL_INVALID_WORK_GROUP_SIZE:
|
||||
return "CL_INVALID_WORK_GROUP_SIZE";
|
||||
case CL_INVALID_WORK_ITEM_SIZE:
|
||||
return "CL_INVALID_WORK_ITEM_SIZE";
|
||||
case CL_INVALID_GLOBAL_OFFSET:
|
||||
return "CL_INVALID_GLOBAL_OFFSET";
|
||||
case CL_INVALID_EVENT_WAIT_LIST:
|
||||
return "CL_INVALID_EVENT_WAIT_LIST";
|
||||
case CL_INVALID_EVENT:
|
||||
return "CL_INVALID_EVENT";
|
||||
case CL_INVALID_OPERATION:
|
||||
return "CL_INVALID_OPERATION";
|
||||
case CL_INVALID_GL_OBJECT:
|
||||
return "CL_INVALID_GL_OBJECT";
|
||||
case CL_INVALID_BUFFER_SIZE:
|
||||
return "CL_INVALID_BUFFER_SIZE";
|
||||
#ifdef CL_VERSION_1_1
|
||||
case CL_INVALID_MIP_LEVEL:
|
||||
return "CL_INVALID_MIP_LEVEL";
|
||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||
return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||
case CL_INVALID_PROPERTY:
|
||||
return "CL_INVALID_PROPERTY";
|
||||
#ifdef cl_ext_device_fission
|
||||
case CL_DEVICE_PARTITION_FAILED_EXT:
|
||||
return "CL_DEVICE_PARTITION_FAILED_EXT";
|
||||
case CL_INVALID_PARTITION_COUNT_EXT:
|
||||
return "CL_INVALID_PARTITION_COUNT_EXT";
|
||||
case CL_INVALID_PARTITION_NAME_EXT:
|
||||
return "CL_INVALID_PARTITION_NAME_EXT";
|
||||
#endif
|
||||
#endif
|
||||
#ifdef CL_VERSION_1_2
|
||||
case CL_INVALID_IMAGE_DESCRIPTOR:
|
||||
return "CL_INVALID_IMAGE_DESCRIPTOR";
|
||||
case CL_INVALID_COMPILER_OPTIONS:
|
||||
return "CL_INVALID_COMPILER_OPTIONS";
|
||||
case CL_INVALID_LINKER_OPTIONS:
|
||||
return "CL_INVALID_LINKER_OPTIONS";
|
||||
case CL_INVALID_DEVICE_PARTITION_COUNT:
|
||||
return "CL_INVALID_DEVICE_PARTITION_COUNT";
|
||||
#endif
|
||||
#ifdef CL_VERSION_2_0
|
||||
case CL_INVALID_PIPE_SIZE:
|
||||
return "CL_INVALID_PIPE_SIZE";
|
||||
case CL_INVALID_DEVICE_QUEUE:
|
||||
return "CL_INVALID_DEVICE_QUEUE";
|
||||
#endif
|
||||
#ifdef CL_VERSION_2_2
|
||||
case CL_INVALID_SPEC_ID:
|
||||
return "CL_INVALID_SPEC_ID";
|
||||
case CL_MAX_SIZE_RESTRICTION_EXCEEDED:
|
||||
return "CL_MAX_SIZE_RESTRICTION_EXCEEDED";
|
||||
#endif
|
||||
#ifdef cl_khr_icd
|
||||
case CL_PLATFORM_NOT_FOUND_KHR:
|
||||
return "CL_PLATFORM_NOT_FOUND_KHR";
|
||||
#endif
|
||||
default:
|
||||
return "UNKNOWN CL ERROR CODE";
|
||||
}
|
||||
}
|
||||
|
||||
void convertToStr(char **source, size_t* sourceSize, const std::string fname)
|
||||
{
|
||||
FILE *fp = fopen(fname.c_str(), "r");
|
||||
fseek(fp, 0, SEEK_END);
|
||||
*sourceSize = ftell(fp);
|
||||
fseek(fp , 0, SEEK_SET);
|
||||
*source = (char *)malloc(*sourceSize * sizeof(char));
|
||||
fread(*source, 1, *sourceSize, fp);
|
||||
fclose(fp);
|
||||
|
||||
}
|
||||
|
||||
int CLHelper::Init(const std::string &filename, InputFlags &in_flags)
|
||||
{
|
||||
cl_int status = 0;
|
||||
size_t deviceListSize;
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* Have a look at the available platforms and pick either
|
||||
* the AMD one if available or a reasonable default.
|
||||
*/
|
||||
cl_uint numPlatforms;
|
||||
platform = NULL;
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"clGetPlatformIDs failed. %u",numPlatforms);
|
||||
return 1;
|
||||
}
|
||||
if (0 < numPlatforms)
|
||||
{
|
||||
cl_platform_id* platforms = (cl_platform_id*)malloc(numPlatforms * sizeof(cl_platform_id));
|
||||
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr, "clGetPlatformIDs failed: %s\n", get_cl_err_string(status) );
|
||||
return 1;
|
||||
}
|
||||
for (i = 0; i < numPlatforms; ++i)
|
||||
{
|
||||
char pbuf[100];
|
||||
status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
|
||||
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"clGetPlatformInfo failed: %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
platform = platforms[i];
|
||||
if (!strcmp(pbuf, "Advanced Micro Devices, Inc."))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(platforms);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL context
|
||||
/////////////////////////////////////////////////////////////////
|
||||
|
||||
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
|
||||
cl_context_properties* cprops = (NULL == platform) ? NULL : cps;
|
||||
context = clCreateContextFromType(cprops, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("status: %d", status);
|
||||
fprintf(stderr,"Error: Creating Context. (clCreateContextFromType): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
/* First, get the size of device list data */
|
||||
status = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(size_t), &deviceListSize, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Getting Context Info (device list size, clGetContextInfo): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Detect OpenCL devices
|
||||
/////////////////////////////////////////////////////////////////
|
||||
devices = (cl_device_id *)malloc(deviceListSize * sizeof(cl_device_id));
|
||||
if(devices == 0)
|
||||
{
|
||||
fprintf(stderr,"Error: No devices found: %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Now, get the device list data */
|
||||
status = clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize*sizeof(cl_device_id), devices, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Getting Context Info (device list, clGetContextInfo): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *deviceName;
|
||||
size_t dev_name_size = 0;
|
||||
|
||||
int deviceNum = in_flags.GetValueInt("device");
|
||||
|
||||
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(char*), NULL, &dev_name_size);
|
||||
deviceName = (char *)malloc(sizeof(char)*dev_name_size);
|
||||
|
||||
clGetDeviceInfo(devices[deviceNum], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
|
||||
printf("Device Name: %s\n", deviceName);
|
||||
|
||||
bool use_gcn3 = false;
|
||||
bool use_gcn2 = false;
|
||||
char *found_gfx8 = strstr(deviceName, "gfx8");
|
||||
char *found_gfx7 = strstr(deviceName, "gfx7");
|
||||
if (found_gfx8 != NULL)
|
||||
use_gcn3 = true;
|
||||
if (found_gfx7 != NULL)
|
||||
use_gcn2 = true;
|
||||
|
||||
free(deviceName);
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Create an OpenCL command queue
|
||||
/////////////////////////////////////////////////////////////////
|
||||
commandQueue = clCreateCommandQueue(context, devices[deviceNum], CL_QUEUE_PROFILING_ENABLE, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Creating Command Queue. (clCreateCommandQueue): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Load CL file, build CL program object, create CL kernel object
|
||||
/////////////////////////////////////////////////////////////////
|
||||
char* source;
|
||||
size_t sourceSize;
|
||||
convertToStr(&source, &sourceSize, filename);
|
||||
|
||||
syncfree_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
analyze_levelset_program = clCreateProgramWithSource(context, 1, (const char**)&source, &sourceSize, &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Loading Binary into cl_program (clCreateProgramWithBinary): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
std::string buildFlags = "-x clc++ -Dcl_khr_int64_base_atomics=1 -cl-std=CL2.0";
|
||||
if (use_gcn3)
|
||||
buildFlags += " -DGCN3 ";
|
||||
if (use_gcn2)
|
||||
buildFlags += " -DGCN2 ";
|
||||
buildFlags += " -DROW_BITS=" + std::to_string(ROW_BITS);
|
||||
buildFlags += " -DWG_BITS=" + std::to_string(WG_BITS);
|
||||
buildFlags += " -DWF_SIZE=" + std::to_string(WF_SIZE);
|
||||
buildFlags += " -DWF_PER_WG=" + std::to_string(WF_PER_WG);
|
||||
#ifdef USE_DOUBLE
|
||||
buildFlags += " -DDOUBLE";
|
||||
#endif
|
||||
|
||||
/* create a cl program executable for all the devices specified */
|
||||
status = clBuildProgram(analyze_levelset_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Building Analyze and Levelset Program (clBuildProgram): %d\n", status);
|
||||
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
|
||||
size_t size;
|
||||
clGetProgramBuildInfo(analyze_levelset_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
|
||||
printf("%s ", errorbuf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
buildFlags += " -DSYNCFREE_KERNEL";
|
||||
status = clBuildProgram(syncfree_program, 1, &devices[deviceNum], buildFlags.c_str(), NULL, NULL);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
printf("Error: Building Syncfree Program (clBuildProgram): %d\n", status);
|
||||
char * errorbuf = (char*)calloc(sizeof(char),1024*1024);
|
||||
size_t size;
|
||||
clGetProgramBuildInfo(syncfree_program, devices[deviceNum], CL_PROGRAM_BUILD_LOG, 1024*1024, errorbuf, &size);
|
||||
printf("%s ", errorbuf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel = clCreateKernel(syncfree_program, "amd_spts_syncfree_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_analyze = clCreateKernel(analyze_levelset_program, "amd_spts_analyze_and_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_analyze): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_levelset = clCreateKernel(analyze_levelset_program, "amd_spts_levelset_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelset): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_scalar = clCreateKernel(analyze_levelset_program, "amd_spts_scalar_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_scalar): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_vector = clCreateKernel(analyze_levelset_program, "amd_spts_vector_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_vector): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
SpTSKernel_levelsync = clCreateKernel(analyze_levelset_program, "amd_spts_levelsync_solve", &status);
|
||||
if(status != CL_SUCCESS)
|
||||
{
|
||||
fprintf(stderr,"Error: Creating Kernel from program. (SpTS_levelsync): %s\n", get_cl_err_string(status));
|
||||
return 1;
|
||||
}
|
||||
|
||||
// All good
|
||||
return 0;
|
||||
}
|
||||
|
||||
void CLHelper::checkStatus(cl_int status, const std::string errString)
|
||||
{
|
||||
if (status != CL_SUCCESS)
|
||||
{
|
||||
std::cerr << errString << " : " << get_cl_err_string(status) << std::endl;
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
memPointer CLHelper::AllocateMem(const std::string name,
|
||||
size_t size,
|
||||
memPointer_flags flags,
|
||||
void *hostBuffer)
|
||||
{
|
||||
cl_mem buf;
|
||||
cl_int status;
|
||||
|
||||
buf = clCreateBuffer(context, flags, size, hostBuffer, &status);
|
||||
std::string errString = "OpenCL error allocating " + name + " !";
|
||||
checkStatus(status, errString);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
void CLHelper::CopyToDevice(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
cl_bool blocking,
|
||||
cl_event *ev)
|
||||
{
|
||||
cl_int status;
|
||||
status = clEnqueueWriteBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
|
||||
|
||||
checkStatus(status, "OpenCL error copying data to device !");
|
||||
}
|
||||
|
||||
void CLHelper::CopyToHost(memPointer devBuffer,
|
||||
void *hostBuffer,
|
||||
size_t size,
|
||||
size_t offset,
|
||||
cl_bool blocking,
|
||||
cl_event *ev)
|
||||
{
|
||||
cl_int status;
|
||||
status = clEnqueueReadBuffer(commandQueue, devBuffer, blocking, offset, size, hostBuffer, 0, NULL, ev);
|
||||
|
||||
checkStatus(status, "OpenCL error copying data to device !");
|
||||
}
|
||||
|
||||
int64_t CLHelper::ComputeTime(cl_event event)
|
||||
{
|
||||
int64_t start_time, end_time;
|
||||
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(int64_t), &start_time, NULL);
|
||||
clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(int64_t), &end_time, NULL);
|
||||
|
||||
return end_time - start_time;
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef CLHelper_H
|
||||
#define CLHelper_H
|
||||
|
||||
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
|
||||
|
||||
#include <CL/cl.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "InputFlags.h"
|
||||
#include "GPUHelper.h"
|
||||
|
||||
struct LocalMemArg
|
||||
{
|
||||
LocalMemArg(size_t _size) : size(_size) {}
|
||||
size_t GetSize() const { return size; }
|
||||
|
||||
private:
|
||||
size_t size;
|
||||
};
|
||||
|
||||
class CLHelper : public GPUHelper
|
||||
{
|
||||
cl_platform_id platform;
|
||||
cl_device_id *devices;
|
||||
cl_program syncfree_program;
|
||||
cl_program analyze_levelset_program;
|
||||
|
||||
public:
|
||||
static cl_context context;
|
||||
static cl_kernel SpTSKernel;
|
||||
static cl_kernel SpTSKernel_analyze;
|
||||
static cl_kernel SpTSKernel_levelset;
|
||||
static cl_kernel SpTSKernel_scalar;
|
||||
static cl_kernel SpTSKernel_vector;
|
||||
static cl_kernel SpTSKernel_levelsync;
|
||||
static cl_command_queue commandQueue;
|
||||
|
||||
CLHelper() {}
|
||||
int Init(const std::string &_filename, InputFlags &in_flags);
|
||||
void checkStatus(gpuError status, const std::string errString);
|
||||
void CopyToDevice(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
|
||||
void CopyToHost(memPointer _d_buf, void *_h_buf, size_t _size, size_t _offset, cl_bool _blocking, cl_event *_ev);
|
||||
memPointer AllocateMem(const std::string name, size_t, memPointer_flags flags, void *);
|
||||
void FreeMem(memPointer ptr) { clReleaseMemObject(ptr); }
|
||||
void Flush() { clFinish(commandQueue); }
|
||||
|
||||
template<typename T, typename... Args>
|
||||
void SetArgs(cl_kernel, int i, const T& first, const Args&... rest);
|
||||
template<typename... Args>
|
||||
void SetArgs(cl_kernel, int i, const LocalMemArg &lmem, const Args&... rest);
|
||||
void SetArgs(cl_kernel, int i) {}
|
||||
|
||||
int64_t ComputeTime(cl_event event);
|
||||
|
||||
};
|
||||
|
||||
template<typename T, typename... Args>
|
||||
void CLHelper::SetArgs(cl_kernel kernel, int i, const T& first, const Args&... rest)
|
||||
{
|
||||
cl_int status;
|
||||
|
||||
status = clSetKernelArg(kernel, i++, sizeof(T), (void *)& first);
|
||||
std::stringstream errStream;
|
||||
errStream<<"OpenCL error setting kernel argument "<<i;
|
||||
checkStatus(status, errStream.str()) ;
|
||||
|
||||
SetArgs(kernel, i, rest...);
|
||||
}
|
||||
|
||||
template<typename... Args>
|
||||
void CLHelper::SetArgs(cl_kernel kernel, int i, const LocalMemArg &lmem, const Args&... rest)
|
||||
{
|
||||
cl_int status;
|
||||
status = clSetKernelArg(kernel, i++, lmem.GetSize(), NULL);
|
||||
std::stringstream errStream;
|
||||
errStream<<"OpenCL error setting kernel argument (local memory) "<<i;
|
||||
checkStatus(status, errStream.str()) ;
|
||||
|
||||
SetArgs(kernel, i, rest...);
|
||||
|
||||
}
|
||||
|
||||
#endif //CLHelper_H
|
||||
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,287 @@
|
||||
/********************************************************************************
|
||||
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
********************************************************************************/
|
||||
#ifndef SparseMatrix_H
|
||||
#define SparseMatrix_H
|
||||
|
||||
#include "GPUHelper.h"
|
||||
#ifndef USE_HIP
|
||||
#include "OpenCLHelper.h"
|
||||
#include <CL/cl.h>
|
||||
#else
|
||||
#include "HIPHelper.h"
|
||||
#endif
|
||||
|
||||
#include "InputFlags.h"
|
||||
#include "MatrixMarketReader.h"
|
||||
#include "OpenCLHelper.h"
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
template<typename FloatType>
|
||||
class SparseMatrix
|
||||
{
|
||||
|
||||
public:
|
||||
int nRows;
|
||||
int nCols;
|
||||
int nNZ;
|
||||
|
||||
int *cols;
|
||||
int *row_ptrs;
|
||||
|
||||
FloatType *vals;
|
||||
|
||||
memPointer d_cols;
|
||||
memPointer d_vals;
|
||||
memPointer d_row_ptrs;
|
||||
|
||||
// info about parallel procs
|
||||
int this_pe;
|
||||
int total_pes;
|
||||
|
||||
int nRows_p;
|
||||
int nCols_p;
|
||||
|
||||
protected:
|
||||
|
||||
GPUHelper *GPU;
|
||||
|
||||
public:
|
||||
|
||||
SparseMatrix() : nRows(0), nCols(0), nNZ(0), nRows_p(0), nCols_p(0)
|
||||
{
|
||||
cols = NULL;
|
||||
row_ptrs = NULL;
|
||||
vals = NULL;
|
||||
|
||||
d_cols = NULL;
|
||||
d_vals = NULL;
|
||||
d_row_ptrs = NULL;
|
||||
|
||||
this_pe = -1;//roc_shmem_my_pe(handle); // this pe
|
||||
total_pes = -1;//roc_shmem_n_pes(handle); // total number of pes
|
||||
|
||||
}
|
||||
void AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags,
|
||||
GPUHelper *gpu);
|
||||
void AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags);
|
||||
void ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
|
||||
InputFlags &in_flags);
|
||||
|
||||
void PopulateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags);
|
||||
|
||||
void FindStatsForParallelDecomposition();
|
||||
|
||||
void Set_total_pes(int val){
|
||||
this->total_pes = val;
|
||||
}
|
||||
void Set_this_pe(int val){
|
||||
this->this_pe = val;
|
||||
}
|
||||
|
||||
int Get_total_pes(){
|
||||
return this->total_pes;
|
||||
}
|
||||
int Get_this_pe(){
|
||||
return this->this_pe;
|
||||
}
|
||||
|
||||
int GetNumRows_p() {return nRows_p;}
|
||||
|
||||
int *GetCols() { return cols; }
|
||||
FloatType *GetVals() { return vals; }
|
||||
int *GetRowPtrs() { return row_ptrs; }
|
||||
|
||||
memPointer GetDevCols() {return d_cols; }
|
||||
memPointer GetDevVals() {return d_vals; }
|
||||
memPointer GetDevRowPtrs() {return d_row_ptrs; }
|
||||
|
||||
~SparseMatrix()
|
||||
{
|
||||
delete[] cols;
|
||||
delete[] vals;
|
||||
delete[] row_ptrs;
|
||||
|
||||
GPU->FreeMem(d_cols);
|
||||
GPU->FreeMem(d_vals);
|
||||
GPU->FreeMem(d_row_ptrs);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::AllocateSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags,
|
||||
GPUHelper *gpu)
|
||||
{
|
||||
GPU = gpu;
|
||||
nRows = mm_reader.GetNumRows();
|
||||
nCols = mm_reader.GetNumCols();
|
||||
nNZ = mm_reader.GetNumNonZeroes();
|
||||
printf("Allocating a sparse matrix with-- nRows: %d nCols: %d nNZ: %d\n", nRows, nCols, nNZ);
|
||||
|
||||
assert(total_pes != -1);
|
||||
assert(this_pe != -1);
|
||||
|
||||
#ifdef USE_RO_SHMEM
|
||||
if (nRows != nCols){
|
||||
fprintf(stderr, "RO_SHMEM port requires the global matrix to be "
|
||||
"square!\n");
|
||||
exit(-1);
|
||||
}
|
||||
#endif
|
||||
|
||||
cols = new int[nNZ];
|
||||
if (cols == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side cols array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
vals = new FloatType[nNZ];
|
||||
if (vals == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side vals array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
row_ptrs = new int[nRows + 1];
|
||||
if (row_ptrs == NULL)
|
||||
{
|
||||
fprintf(stderr, "Failed to allocate host-side row_ptrs array !\n");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
bool CoordinateCompare(const Coordinate<FloatType> &c1, const Coordinate<FloatType> &c2)
|
||||
{
|
||||
if(c1.x != c2.x)
|
||||
return (c1.x < c2.x);
|
||||
else
|
||||
return (c1.y < c2.y);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::ConvertFromCOOToCSR(Coordinate<FloatType> *coords,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
std::sort(coords, coords + nNZ, CoordinateCompare<FloatType>);
|
||||
|
||||
int current_row = 1;
|
||||
bool has_seen_diagonal = false;
|
||||
row_ptrs[0] = 0;
|
||||
for (int i = 0; i < nNZ; i++)
|
||||
{
|
||||
cols[i] = coords[i].y;
|
||||
vals[i] = coords[i].val;
|
||||
//fprintf(stderr,"Row %d Col %d Val %lf (cur_row: %d)\n", coords[i].x, coords[i].y, coords[i].val, current_row-1);
|
||||
|
||||
while(coords[i].x >= current_row)
|
||||
{
|
||||
// We've reached the end of a row. Did we see a diagonal?
|
||||
// If not, the triangular solve will be underconstrained.
|
||||
if (!has_seen_diagonal)
|
||||
{
|
||||
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
|
||||
fprintf(stderr, "\tMissing diagonal on row %d\n", current_row-1);
|
||||
exit(-1);
|
||||
}
|
||||
has_seen_diagonal = false;
|
||||
row_ptrs[current_row] = i;
|
||||
current_row++;
|
||||
}
|
||||
if (coords[i].x == coords[i].y)
|
||||
has_seen_diagonal = true;
|
||||
|
||||
}
|
||||
row_ptrs[current_row++] = nNZ;
|
||||
while (current_row <= nRows)
|
||||
{
|
||||
if (!has_seen_diagonal)
|
||||
{
|
||||
fprintf(stderr, "ERROR Converting the COO to CSR.\n");
|
||||
fprintf(stderr, "\tNo values on row %d, so no diagonal.\n", current_row-1);
|
||||
exit(-1);
|
||||
}
|
||||
has_seen_diagonal = false;
|
||||
row_ptrs[current_row++] = nNZ;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::AllocateParallelSparseMatrix(MatrixMarketReader<FloatType> &mm_reader,
|
||||
InputFlags &in_flags)
|
||||
{
|
||||
d_cols = GPU->AllocateMem("cols", nNZ*sizeof(int), 0, NULL);
|
||||
d_vals = GPU->AllocateMem("vals", nNZ*sizeof(FloatType), 0, NULL);
|
||||
d_row_ptrs = GPU->AllocateMem("row_ptrs", (nRows+1)*sizeof(int), 0, NULL);
|
||||
}
|
||||
|
||||
template<typename FloatType>
|
||||
void SparseMatrix<FloatType>::FindStatsForParallelDecomposition()
|
||||
{
|
||||
|
||||
assert(SPTS_BLOCK_SIZE % 64 == 0);
|
||||
|
||||
// Rows left over in the potentially partial final block
|
||||
int left_over_last_block = nRows % SPTS_BLOCK_SIZE;
|
||||
printf("%d: lolb %d\n", this_pe, left_over_last_block);
|
||||
// Number of complete blocks, not including any partial block at the end
|
||||
int total_blocks = nRows / SPTS_BLOCK_SIZE;
|
||||
printf("%d: totb %d\n", this_pe, total_blocks);
|
||||
|
||||
// Everyone has at least this many rows
|
||||
nRows_p = (total_blocks / total_pes) * SPTS_BLOCK_SIZE;
|
||||
printf("%d: initial nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
// Last cycle might not assign to all PEs
|
||||
int straggler_blocks = total_blocks % total_pes;
|
||||
if (this_pe < straggler_blocks)
|
||||
nRows_p += SPTS_BLOCK_SIZE;
|
||||
printf("%d: straggler nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
// Last block of last cycle might have less than SPTS_BLOCK_SIZE rows
|
||||
if (left_over_last_block) {
|
||||
int final_pe = ((total_blocks + 1) % total_pes) - 1;
|
||||
if (final_pe == -1)
|
||||
final_pe = total_pes - 1;
|
||||
if (this_pe == final_pe)
|
||||
nRows_p += left_over_last_block;
|
||||
}
|
||||
printf("%d: final nRows_p %d\n", this_pe, nRows_p);
|
||||
|
||||
if (nRows_p <= 0) {
|
||||
fprintf(stderr, "Block Size %d too small for input row size %d with "
|
||||
"%d number of nodes. Please decrease the block size or "
|
||||
"decrease the number of nodes\n", SPTS_BLOCK_SIZE, nRows,
|
||||
total_pes);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// print to check!
|
||||
printf("\nPE: %d total_rows: %d my_rows: %d\n", this_pe, nRows, nRows_p);
|
||||
|
||||
nCols_p = nCols; // 1D decomposition
|
||||
}
|
||||
|
||||
#endif
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROC_SHMEM=OFF \
|
||||
-DUSE_HIP=ON \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
#!/bin/bash
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROC_SHMEM=OFF \
|
||||
-DUSE_HIP=OFF \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/..
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DUSE_ROC_SHMEM=ON \
|
||||
-DUSE_HIP=ON \
|
||||
-DALL_ANALYZE=ON \
|
||||
-DUSE_DOUBLE=OFF \
|
||||
-DALL_LEVELSET=OFF \
|
||||
-DALL_LEVELSYNC=OFF \
|
||||
-DALL_SYNCFREE=OFF \
|
||||
-Drocshmem_DIR=$install_path/share/cmake/rocshmem \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
@@ -0,0 +1,7 @@
|
||||
#cmakedefine USE_ROC_SHMEM
|
||||
#cmakedefine USE_HIP
|
||||
#cmakedefine ALL_ANALYZE
|
||||
#cmakedefine USE_DOUBLE
|
||||
#cmakedefine ALL_LEVELSET
|
||||
#cmakedefine ALL_LEVELSYNC
|
||||
#cmakedefine ALL_SYNCFREE
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
echo Test Name $2
|
||||
|
||||
INPUTS=/mnt/mlebeane/spts_data
|
||||
|
||||
case $2 in
|
||||
*"single_thread")
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
|
||||
;;
|
||||
*"multi_thread")
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/diagonal_large.mtx -a 2 -b 512 -p 64 -v -i 3 > $3/diagonal_large_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 2 -b 256 -p 64 -v -i 3 > $3/not_quite_diagonal_bput.log
|
||||
mpirun -np 2 $1 -f $INPUTS/test_matrices/not_quite_diagonal.mtx -a 1 -b 256 -v -i 3 > $3/not_quite_diagonal_get.log
|
||||
;;
|
||||
*)
|
||||
echo "UNKNOWN TEST TYPE: $2"
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit $?
|
||||
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Matrix Market I/O library for ANSI C
|
||||
*
|
||||
* See http://math.nist.gov/MatrixMarket for details.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef MM_IO_H
|
||||
#define MM_IO_H
|
||||
|
||||
/********************* MM_typecode query fucntions ***************************/
|
||||
|
||||
#define mm_is_matrix(typecode) ((typecode)[0]=='M')
|
||||
|
||||
#define mm_is_sparse(typecode) ((typecode)[1]=='C')
|
||||
#define mm_is_coordinate(typecode)((typecode)[1]=='C')
|
||||
#define mm_is_dense(typecode) ((typecode)[1]=='A')
|
||||
#define mm_is_array(typecode) ((typecode)[1]=='A')
|
||||
|
||||
#define mm_is_complex(typecode) ((typecode)[2]=='C')
|
||||
#define mm_is_real(typecode) ((typecode)[2]=='R')
|
||||
#define mm_is_pattern(typecode) ((typecode)[2]=='P')
|
||||
#define mm_is_integer(typecode) ((typecode)[2]=='I')
|
||||
|
||||
#define mm_is_symmetric(typecode)((typecode)[3]=='S')
|
||||
#define mm_is_general(typecode) ((typecode)[3]=='G')
|
||||
#define mm_is_skew(typecode) ((typecode)[3]=='K')
|
||||
#define mm_is_hermitian(typecode)((typecode)[3]=='H')
|
||||
|
||||
/********************* MM_typecode modify fucntions ***************************/
|
||||
|
||||
#define mm_set_matrix(typecode) ((typecode)[0]='M')
|
||||
#define mm_set_coordinate(typecode) ((typecode)[1]='C')
|
||||
#define mm_set_array(typecode) ((typecode)[1]='A')
|
||||
#define mm_set_dense(typecode) mm_set_array(typecode)
|
||||
#define mm_set_sparse(typecode) mm_set_coordinate(typecode)
|
||||
|
||||
#define mm_set_complex(typecode)((typecode)[2]='C')
|
||||
#define mm_set_real(typecode) ((typecode)[2]='R')
|
||||
#define mm_set_pattern(typecode)((typecode)[2]='P')
|
||||
#define mm_set_integer(typecode)((typecode)[2]='I')
|
||||
|
||||
|
||||
#define mm_set_symmetric(typecode)((typecode)[3]='S')
|
||||
#define mm_set_general(typecode)((typecode)[3]='G')
|
||||
#define mm_set_skew(typecode) ((typecode)[3]='K')
|
||||
#define mm_set_hermitian(typecode)((typecode)[3]='H')
|
||||
|
||||
#define mm_clear_typecode(typecode) ((typecode)[0]=(typecode)[1]= \
|
||||
(typecode)[2]=' ',(typecode)[3]='G')
|
||||
|
||||
#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode)
|
||||
|
||||
|
||||
/********************* Matrix Market error codes ***************************/
|
||||
|
||||
|
||||
#define MM_COULD_NOT_READ_FILE 11
|
||||
#define MM_PREMATURE_EOF 12
|
||||
#define MM_NOT_MTX 13
|
||||
#define MM_NO_HEADER 14
|
||||
#define MM_UNSUPPORTED_TYPE 15
|
||||
#define MM_LINE_TOO_LONG 16
|
||||
#define MM_COULD_NOT_WRITE_FILE 17
|
||||
|
||||
#define MM_MTX_STR "matrix"
|
||||
#define MM_ARRAY_STR "array"
|
||||
#define MM_DENSE_STR "array"
|
||||
#define MM_COORDINATE_STR "coordinate"
|
||||
#define MM_SPARSE_STR "coordinate"
|
||||
#define MM_COMPLEX_STR "complex"
|
||||
#define MM_REAL_STR "real"
|
||||
#define MM_INT_STR "integer"
|
||||
#define MM_GENERAL_STR "general"
|
||||
#define MM_SYMM_STR "symmetric"
|
||||
#define MM_HERM_STR "hermitian"
|
||||
#define MM_SKEW_STR "skew-symmetric"
|
||||
#define MM_PATTERN_STR "pattern"
|
||||
|
||||
#define MM_MAX_LINE_LENGTH 1025
|
||||
#define MM_MAX_TOKEN_LENGTH 64
|
||||
#define MatrixMarketBanner "%%MatrixMarket"
|
||||
#define MAX_RAND_VAL 5.0
|
||||
|
||||
#endif
|
||||
تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
Diff را بارگزاری کن
@@ -0,0 +1,118 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>true</onSuccessful>
|
||||
<onFailed>true</onFailed>
|
||||
<onUnstable>true</onUnstable>
|
||||
<onNotBuilt>true</onNotBuilt>
|
||||
<onAborted>true</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames></dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!COMPILE</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/compile/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -0,0 +1,221 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC true'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
// sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Generate Checker Metadata') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
sh 'git fetch --tags'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'changeset_delta.txt'
|
||||
}
|
||||
dir("clients/functional_tests/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'RO_NET_BASIC/**/*.log'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,413 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
UCX_HOME="/home/resperf/ucx/install"
|
||||
|
||||
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
|
||||
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
|
||||
|
||||
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
|
||||
|
||||
UCX_WARN_UNUSED_ENV_VARS="n"
|
||||
HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_IPC") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_ipc install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE all ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL all ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG all ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE all ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE all ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI all ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC all ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG all ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE all ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh 'ROC_SHMEM_RO=1 ./driver.sh ${build_dir}/RC_MULTI all ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<hudson.triggers.TimerTrigger>
|
||||
<spec>H 22 * * *</spec>
|
||||
</hudson.triggers.TimerTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>*/amd-master</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -0,0 +1,335 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/${BUILD_ID}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
git branch: 'amd-master', changelog: false, poll: false, url: 'ssh://gerritgit/rsch/ec/shmem'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/RC_SINGLE_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_IPC") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_ipc install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_IPC") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_IPC/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_debug install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_DEBUG") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_DEBUG/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi_profile install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
dir("internal/clients/spts/${build_dir}/DC_MULTI_PROFILE") {
|
||||
sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI_PROFILE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_DEBUG/spts single_thread ${build_dir}/RC_SINGLE_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_SINGLE_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE_PROFILE/spts single_thread ${build_dir}/RC_SINGLE_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_IPC') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_IPC/spts multi_thread ${build_dir}/DC_MULTI_IPC'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_DEBUG') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_DEBUG/spts multi_thread ${build_dir}/DC_MULTI_DEBUG'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI_PROFILE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI_PROFILE/spts multi_thread ${build_dir}/DC_MULTI_PROFILE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
dir("internal/clients/spts") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<hudson.triggers.TimerTrigger>
|
||||
<spec>H 22 * * *</spec>
|
||||
</hudson.triggers.TimerTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>*/amd-master</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/nightly/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -0,0 +1,288 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-7' }
|
||||
environment {
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
|
||||
MPI_HOME="/home/resperf/mpich/install"
|
||||
UCX_HOME="/home/resperf/ucx/install"
|
||||
|
||||
PATH="$MPI_HOME/bin:$UCX_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH="$MPI_HOME/lib:$UCX_HOME/lib:$LD_LIBRARY_PATH"
|
||||
PKG_CONFIG_PATH="$MPI_HOME/lib/pkgconfig:$UCX_HOME/lib/pkgconfig"
|
||||
|
||||
CMAKE_PREFIX_PATH="/opt/rocm/lib/cmake"
|
||||
|
||||
UCX_WARN_UNUSED_ENV_VARS="n"
|
||||
HSA_FORCE_FINE_GRAIN_PCIE=1
|
||||
UCX_TLS="rc"
|
||||
ROC_SHMEM_USE_SQ_GPU_MEM=0
|
||||
ROC_SHMEM_USE_CQ_GPU_MEM=0
|
||||
ROC_SHMEM_NUM_BLOCKS=128
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Env Variables') {
|
||||
steps {
|
||||
sh 'printenv'
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
|
||||
failFast true
|
||||
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_SINGLE") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/DC_MULTI") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RO_NET") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/ro_net install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RO_NET") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
}
|
||||
dir("clients/sos_tests/${build_dir}/RO_NET") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
}
|
||||
//===================== SPTS ==========================
|
||||
//dir("internal/clients/spts/${build_dir}/RO_NET") {
|
||||
// sh '../../../build_configs/analyze_single_rocshmem ${WORKSPACE}/library/${build_dir}/RO_NET/install'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
stages {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE short ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_SINGLE/spts single_thread ${build_dir}/RC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL short ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/spts multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI short ${build_dir}/RC_MULTI'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/RC_MULTI/spts multi_thread ${build_dir}/RC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE short ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_SINGLE/spts single_thread ${build_dir}/DC_SINGLE'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI short ${build_dir}/DC_MULTI'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh './driver.sh ${build_dir}/DC_MULTI/spts multi_thread ${build_dir}/DC_MULTI'
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/rocshmem_example_driver ro ${build_dir}/RO_NET'
|
||||
}
|
||||
dir("clients/sos_tests") {
|
||||
sh 'ROC_SHMEM_RO=1 ./driver.sh ${build_dir}/RO_NET short ${build_dir}/RO_NET'
|
||||
}
|
||||
//dir("internal/clients/spts") {
|
||||
// sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RO_NET/spts multi_thread ${build_dir}/RO_NET'
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Generate Checker Metadata') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
sh 'git fetch --tags'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master.. > changeset_delta.txt'
|
||||
sh 'git log --pretty=oneline remotes/origin/amd-master~1..remotes/origin/amd-master >> changeset_delta.txt'
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Archive Artifacts') {
|
||||
steps {
|
||||
dir("library/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'changeset_delta.txt'
|
||||
}
|
||||
dir("clients/functional_tests/${build_dir}") {
|
||||
archiveArtifacts artifacts: 'RC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'RC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_SINGLE/**/*.log'
|
||||
archiveArtifacts artifacts: 'DC_MULTI/**/*.log'
|
||||
archiveArtifacts artifacts: 'RO_NET/**/*.log'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
build job: 'shmem_perf_check', wait: true
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import glob
|
||||
import pprint
|
||||
|
||||
class PathGlobber():
|
||||
def __init__(self, name, *partial_paths_to_concatenate):
|
||||
self._search_path = ''
|
||||
for partial_path in partial_paths_to_concatenate:
|
||||
self._search_path += partial_path
|
||||
self.dirs = []
|
||||
self._name = name
|
||||
|
||||
def generate(self):
|
||||
self.dirs = glob.glob(self._search_path, recursive=True)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._name
|
||||
str_out += pprint.pformat(self.dirs, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -0,0 +1,15 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import absolute_path
|
||||
import glob
|
||||
|
||||
class Archive(absolute_path.PathGlobber):
|
||||
def __init__(self, args, name=''):
|
||||
archive_path = args.archive_path
|
||||
super().__init__(name, args.jenkins_path, archive_path,
|
||||
args.benchmark_path)
|
||||
|
||||
def path_of_build(self, build_id):
|
||||
path = self._search_path.replace('*/archive', build_id + '/archive')
|
||||
path = glob.glob(path)
|
||||
return path[0]
|
||||
+54
@@ -0,0 +1,54 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import parser
|
||||
import dictionary
|
||||
import archive_path
|
||||
import checker
|
||||
|
||||
def main():
|
||||
# This script accepts command line values, but has reasonable defaults
|
||||
# needed to run as part of the CI infrastructure.
|
||||
p = parser.Parser()
|
||||
args = p.parse_command_line()
|
||||
|
||||
# Jenkins is configured to archive build artifacts in a directory.
|
||||
# The 'archives' variable holds the set of directories for
|
||||
# successful Jenkins builds (those which run to completion).
|
||||
# Partitioning of successful builds is useful since we can ignore
|
||||
# failed build directories while searching for performance data.
|
||||
archives = archive_path.Archive(args)
|
||||
archives.generate()
|
||||
print(archives.dump())
|
||||
|
||||
# Jenkins records changeset information in a changeset_delta.txt file.
|
||||
# We parse the changelog for the commit hash and save it into
|
||||
# 'builds_to_changesets'.
|
||||
build_to_changeset = dictionary.BuildToChangesetDict()
|
||||
build_to_changeset.generate(archives.dirs)
|
||||
print(build_to_changeset.dump())
|
||||
|
||||
# 'changeset_to_build' holds the changeset mappings with a
|
||||
# list of build numbers that match the changeset value.
|
||||
# Builds may be executed many times with the same changeset.
|
||||
# The most recent build (identified by the largest build number) will
|
||||
# be used to retrieve performance data.
|
||||
changeset_to_build = dictionary.ChangesetToBuildDict()
|
||||
changeset_to_build.generate(build_to_changeset)
|
||||
print(changeset_to_build.dump())
|
||||
|
||||
# Jenkins is configured to dump Gerrit-esque relation chain changesets
|
||||
# to an archived output file 'changeset-delta.txt'.
|
||||
# The relation chain will be used to determine changeset performance
|
||||
# data for each changeset in the relation chain (when possible).
|
||||
build_to_relation_chain = dictionary.BuildToRelationChainDict()
|
||||
build_to_relation_chain.generate(archives.dirs)
|
||||
print(build_to_relation_chain.dump())
|
||||
|
||||
perf_checker = checker.Performance(args,
|
||||
archives,
|
||||
changeset_to_build,
|
||||
build_to_relation_chain)
|
||||
perf_checker.run()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,97 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import archive_path
|
||||
import log
|
||||
import dictionary
|
||||
import report
|
||||
import violation
|
||||
|
||||
class Performance():
|
||||
def __init__(self, args, archives, changeset_to_build,
|
||||
build_to_relation_chain):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._changeset_to_build = changeset_to_build
|
||||
self._build_to_relation_chain = build_to_relation_chain
|
||||
self._build_id = build_to_relation_chain.most_recent_build()
|
||||
self._archive_path = archives.path_of_build(self._build_id)
|
||||
self._output = report.Report(self._build_id,
|
||||
self._archive_path,
|
||||
'performance_diff.txt')
|
||||
|
||||
def _other_build_id(self, other_changeset):
|
||||
packed_id = [build_id for chng,
|
||||
build_id in self._changeset_to_build.data.items()
|
||||
if chng.startswith(other_changeset)]
|
||||
|
||||
# The 'packed_id' variable is a list containing lists.
|
||||
# We need the content inside the packed_id data structure.
|
||||
try:
|
||||
build_id = packed_id[0][0]
|
||||
return True, build_id
|
||||
except IndexError:
|
||||
# An index error can occur if builds in the relation chain
|
||||
# have not been tested before attempting to test this
|
||||
# changeset.
|
||||
return False, 0
|
||||
|
||||
def _log_difference(self, log_filename, other_changeset,
|
||||
other_archive_path, violations):
|
||||
print('determining difference of log file ' + log_filename)
|
||||
self._output.record(log_filename)
|
||||
|
||||
current_file_path = self._archive_path + '/' + log_filename
|
||||
other_file_path = other_archive_path + '/' + log_filename
|
||||
log_pair = log.Pair(current_file_path, other_file_path)
|
||||
log_pair.calculate_differences()
|
||||
|
||||
latency_perc = [float(i.strip('%')) \
|
||||
for i in log_pair.latency_percentage_differences]
|
||||
max_latency = max(latency_perc)
|
||||
violations.check(max_latency, other_changeset, log_filename)
|
||||
|
||||
self._output.record(log_pair.dump())
|
||||
|
||||
def _changeset_difference(self, current_changeset, other_changeset):
|
||||
violations = violation.Threshold(self._args.latency_max, 'latency')
|
||||
|
||||
change_pair = '(' + current_changeset + ',' + other_changeset + ')'
|
||||
print('comparing changesets ' + change_pair)
|
||||
self._output.record(change_pair)
|
||||
|
||||
status, other_build_id = self._other_build_id(other_changeset)
|
||||
if status == False:
|
||||
message = 'skipping changeset ' + other_changeset
|
||||
print(message)
|
||||
self._output.record(message)
|
||||
return violations
|
||||
|
||||
other_archive_path = self._archives.path_of_build(other_build_id)
|
||||
print(self._archive_path)
|
||||
print(other_archive_path)
|
||||
|
||||
for filename in self._args.logs:
|
||||
self._log_difference(filename, other_changeset,
|
||||
other_archive_path, violations)
|
||||
print('\n')
|
||||
|
||||
return violations
|
||||
|
||||
def _calculate_performance_differences(self):
|
||||
current_changeset = \
|
||||
self._build_to_relation_chain.data[self._build_id][0]
|
||||
other_changesets = \
|
||||
self._build_to_relation_chain.data[self._build_id][1:]
|
||||
|
||||
for other_changeset in other_changesets:
|
||||
violations = self._changeset_difference(current_changeset,
|
||||
other_changeset)
|
||||
|
||||
# Only report on the last pairwise changeset combination.
|
||||
# This combination represents the changeset being tested and
|
||||
# the amd-master:HEAD.
|
||||
violations.provide_violations_to_report(self._output)
|
||||
|
||||
def run(self):
|
||||
self._output.open()
|
||||
self._calculate_performance_differences()
|
||||
@@ -0,0 +1,163 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import abc
|
||||
import os
|
||||
import pprint
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class BaseDict(metaclass=abc.ABCMeta):
|
||||
def __init__(self):
|
||||
self.data = {}
|
||||
self._delimiter_path = 'archive'
|
||||
self._changeset_delta_filename = 'changeset_delta.txt'
|
||||
|
||||
def _build_id(self, build_directory):
|
||||
sub_directory_strings = build_directory.split('/')
|
||||
word_count = 0
|
||||
for word in sub_directory_strings:
|
||||
if word == '':
|
||||
continue
|
||||
if word == self._delimiter_path:
|
||||
break
|
||||
word_count += 1
|
||||
bld_id = sub_directory_strings[word_count]
|
||||
return bld_id
|
||||
|
||||
def _open_changeset_delta_file(self, archive_directory):
|
||||
build_directory, config_directory = os.path.split(archive_directory)
|
||||
changeset_file_path = build_directory + '/' + \
|
||||
self._changeset_delta_filename
|
||||
try:
|
||||
file_handle = open(changeset_file_path, 'r')
|
||||
except:
|
||||
sys.exit('failed to open: ' + changeset_file_path)
|
||||
return file_handle
|
||||
|
||||
@abc.abstractmethod
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
pass
|
||||
|
||||
def generate(self, archives):
|
||||
for d in archives:
|
||||
bld_id = self._build_id(d)
|
||||
f = self._open_changeset_delta_file(d)
|
||||
self._changeset_delta_operations(f, bld_id)
|
||||
|
||||
def most_recent_build(self):
|
||||
build_id_strings = self.data.keys()
|
||||
build_id_ints = list(map(int, build_id_strings))
|
||||
most_recent_build_id_int = max(build_id_ints)
|
||||
return str(most_recent_build_id_int)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
|
||||
class BuildToChangesetDict(BaseDict):
|
||||
def __init__(self, name=''):
|
||||
super().__init__()
|
||||
self._print_text = name
|
||||
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
commit_line = file_handle.readline()
|
||||
try:
|
||||
commit_hash = commit_line.split()[0]
|
||||
except IndexError:
|
||||
commit_hash = None
|
||||
if commit_hash != None:
|
||||
self.data[bld_id] = commit_hash
|
||||
|
||||
class BuildToRelationChainDict(BaseDict):
|
||||
def __init__(self, name=''):
|
||||
super().__init__()
|
||||
self._print_text = name
|
||||
|
||||
def _changeset_delta_operations(self, file_handle, bld_id):
|
||||
changes = []
|
||||
for line in file_handle:
|
||||
changes.append(line.split()[0])
|
||||
self.data[bld_id] = changes
|
||||
|
||||
class ChangesetToBuildDict():
|
||||
def __init__(self, name=''):
|
||||
self.data = {}
|
||||
self._print_text = name
|
||||
|
||||
def _invert_dict(self, dictionary):
|
||||
dict_with_duplicates = {}
|
||||
for key, value in dictionary.data.items():
|
||||
list_with_duplicates = dict_with_duplicates.get(value, [])
|
||||
list_with_duplicates.append(key)
|
||||
dict_with_duplicates[value] = list_with_duplicates
|
||||
return dict_with_duplicates
|
||||
|
||||
def generate(self, dictionary):
|
||||
self.data = self._invert_dict(dictionary)
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
|
||||
class ChangelogToMostRecentBuild():
|
||||
def __init__(self, name=''):
|
||||
self._print_text = name
|
||||
self._all_changesets = []
|
||||
self._changesets_with_builds = []
|
||||
self._changesets_without_builds = []
|
||||
self.data = {}
|
||||
|
||||
def _build_id(self, changeset_to_build, changeset):
|
||||
try:
|
||||
build_id_strings = changeset_to_build.data[changeset]
|
||||
build_id_ints = list(map(int, build_id_strings))
|
||||
most_recent_build_id_int = max(build_id_ints)
|
||||
build_id_str = str(most_recent_build_id_int)
|
||||
except:
|
||||
build_id_str = ''
|
||||
return build_id_str
|
||||
|
||||
def _changelog(self):
|
||||
# print git hash along with file modification stats
|
||||
shellcmd = 'git log --pretty=tformat:"%H" --shortstat | '
|
||||
# condense the output down to single line
|
||||
shellcmd += "awk 'ORS=NR%3?\" \":\"\\n\"' | "
|
||||
# parse out the git hash by itself
|
||||
shellcmd += "awk '{print $1}'"
|
||||
x = subprocess.getoutput(shellcmd)
|
||||
self._all_changesets = x.split()
|
||||
|
||||
def _with_builds(self, changeset_to_build):
|
||||
changesets = list(changeset_to_build.data.keys())
|
||||
self._changesets_with_builds = changesets
|
||||
|
||||
def _without_builds(self):
|
||||
self._changesets_without_builds = \
|
||||
list(set(self._all_changesets) - \
|
||||
set(self._changesets_with_builds))
|
||||
|
||||
def generate(self, changeset_to_build):
|
||||
self._changelog()
|
||||
self._with_builds(changeset_to_build)
|
||||
self._without_builds()
|
||||
for changeset in self._all_changesets:
|
||||
if changeset in self._changesets_with_builds:
|
||||
build = self._build_id(changeset_to_build, changeset)
|
||||
self.data[changeset] = build
|
||||
|
||||
def dump(self):
|
||||
str_out = self._print_text
|
||||
str_out += 'git-log_changesets_in_order:\n'
|
||||
str_out += pprint.pformat(self._all_changesets, width=120)
|
||||
str_out += '\nfilesystem_with_builds:\n'
|
||||
str_out += pprint.pformat(self._changesets_with_builds, width=120)
|
||||
str_out += '\nfilesystem_without_builds:\n'
|
||||
str_out += pprint.pformat(self._changesets_without_builds, width=120)
|
||||
str_out += '\ngit-log_changesets_to_build-id_mappings:\n'
|
||||
str_out += pprint.pformat(self.data, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -0,0 +1,107 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import pprint
|
||||
import re
|
||||
import sys
|
||||
|
||||
class Log():
|
||||
def __init__(self, logfile_abspath):
|
||||
self._file_path = logfile_abspath
|
||||
self.latency = []
|
||||
self.bandwidth = []
|
||||
# regex matches the latency and bandwidth lines in the log files
|
||||
self._regex = '.*[0-9]+\.[0-9]+.*[0-9]\.[0-9].*'
|
||||
|
||||
def open(self):
|
||||
try:
|
||||
self._file_handle = open(self._file_path, 'r')
|
||||
except:
|
||||
sys.exit('failed to open: ' + self._file_path)
|
||||
|
||||
def parse(self):
|
||||
for line in self._file_handle:
|
||||
if re.match(self._regex, line):
|
||||
entries = line.split()
|
||||
self.latency.append(round(float(entries[0]), 4))
|
||||
self.bandwidth.append(round(float(entries[1]), 4))
|
||||
|
||||
class Pair():
|
||||
def __init__(self, first_logfile_abspath, second_logfile_abspath):
|
||||
self.first = Log(first_logfile_abspath)
|
||||
self.first.open()
|
||||
self.first.parse()
|
||||
self.second = Log(second_logfile_abspath)
|
||||
self.second.open()
|
||||
self.second.parse()
|
||||
|
||||
def _ratio(self, a, b):
|
||||
diff = [round((x - y), 4) for x, y in zip(a, b)]
|
||||
ratio = []
|
||||
for numerator, denominator in zip(diff, a):
|
||||
try:
|
||||
ratio.append(round(numerator / denominator, 4))
|
||||
except:
|
||||
ratio.append(float(0.0000))
|
||||
return ratio
|
||||
|
||||
def _percent(self, ratio):
|
||||
perc = ['{0:.2%}'.format(x) for x in ratio]
|
||||
return perc
|
||||
|
||||
def _percentage_difference(self, a, b):
|
||||
ratio = self._ratio(a, b)
|
||||
percent = self._percent(ratio)
|
||||
return percent
|
||||
|
||||
def calculate_differences(self):
|
||||
self.latency_percentage_differences = \
|
||||
self._percentage_difference(self.first.latency,
|
||||
self.second.latency)
|
||||
self.bandwidth_percentage_differences = \
|
||||
self._percentage_difference(self.first.bandwidth,
|
||||
self.second.bandwidth)
|
||||
|
||||
def dump(self):
|
||||
delim = ', '
|
||||
output = '\tlatency:'
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.first.latency))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.second.latency))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.latency_percentage_differences))
|
||||
output += '\n\tbandwidth:'
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.first.bandwidth))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.second.bandwidth))
|
||||
output += '\n\t\t'
|
||||
output += delim.join(map(str, self.bandwidth_percentage_differences))
|
||||
return output
|
||||
|
||||
class Tracker():
|
||||
def __init__(self, args, archives):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._data = {}
|
||||
|
||||
def add(self, changeset, most_recent_build_id):
|
||||
archive_path = self._archives.path_of_build(most_recent_build_id)
|
||||
for filename in self._args.logs:
|
||||
abs_file_path = archive_path + '/' + filename
|
||||
log = Log(abs_file_path)
|
||||
log.open()
|
||||
log.parse()
|
||||
key = (changeset, filename)
|
||||
self._data[key] = log
|
||||
|
||||
def dump(self):
|
||||
out_str = ''
|
||||
for key in self._data.keys():
|
||||
log = self._data[key]
|
||||
line_str = pprint.pformat(key, width=120)
|
||||
line_str += ' = '
|
||||
line_str += pprint.pformat(log.latency, width=120)
|
||||
line_str += '\n'
|
||||
out_str += line_str
|
||||
return out_str
|
||||
@@ -0,0 +1,85 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import argparse
|
||||
|
||||
class Parser():
|
||||
def __init__(self):
|
||||
# A parent directory containing log file output from one of the
|
||||
# configuration runs. The output directories are intended to
|
||||
# be symmetric in naming with the various configurations supplied
|
||||
# by the library's build_configs.
|
||||
self._default_config = 'RC_SINGLE'
|
||||
|
||||
# The list of log files which need to be checked for performance
|
||||
# differences.
|
||||
self._default_logs = ['get.log',
|
||||
'get_nbi.log',
|
||||
'get_swarm.log',
|
||||
'put.log',
|
||||
'put_nbi.log']
|
||||
|
||||
# The maximum pairwise difference for the log file latencies.
|
||||
self._default_latency_max = 5.0
|
||||
|
||||
# The minimum bandwidth difference for the log file bandwidths.
|
||||
self._default_bandwidth_min = -50.0
|
||||
|
||||
# The Jenkins tester archives slave output on the master's
|
||||
# filesystem which currently uses this top-level path (as the
|
||||
# resperf account).
|
||||
self._default_jenkins_path = \
|
||||
'/proj/radl_extra/users/resperf/jenkins-2.192/'
|
||||
|
||||
# The performance tester runs as part of the 'short' job to
|
||||
# verify that no performance degradation has occurred between
|
||||
# commits. This archive path is the generic archive path
|
||||
# for all of the builds. The Kleene star is used as a place
|
||||
# holder for the Jenkins build number.
|
||||
self._default_archive_path = \
|
||||
'jobs/shmem_short/builds/*/archive/'
|
||||
|
||||
# The default benchmark path can be used to alter archive
|
||||
# output placement. Currently, this is initialized to an empty
|
||||
# string, but subsequently initialized to inject the config
|
||||
# path.
|
||||
self._default_benchmark_path = ''
|
||||
|
||||
def setup_options(self, argparser):
|
||||
argparser.add_argument('-j',
|
||||
dest='jenkins_path',
|
||||
default=self._default_jenkins_path)
|
||||
argparser.add_argument('-a',
|
||||
dest='archive_path',
|
||||
default=self._default_archive_path)
|
||||
argparser.add_argument('-b',
|
||||
dest='benchmark_path',
|
||||
default=self._default_benchmark_path)
|
||||
argparser.add_argument('-c',
|
||||
dest='config',
|
||||
default=self._default_config)
|
||||
argparser.add_argument('-l',
|
||||
dest='logs',
|
||||
nargs='*',
|
||||
default=self._default_logs)
|
||||
argparser.add_argument('-x',
|
||||
dest='latency_max',
|
||||
type=float,
|
||||
default=self._default_latency_max)
|
||||
argparser.add_argument('-y',
|
||||
dest='bandwidth_min',
|
||||
type=float,
|
||||
default=self._default_bandwidth_min)
|
||||
argparser.add_argument('-o',
|
||||
dest='one_changeset')
|
||||
argparser.add_argument('-r',
|
||||
dest='changeset_range',
|
||||
nargs=2,
|
||||
metavar=("most_recent_changeset", "least_recent_changeset"))
|
||||
return argparser
|
||||
|
||||
def parse_command_line(self):
|
||||
p = argparse.ArgumentParser()
|
||||
p = self.setup_options(p)
|
||||
args = p.parse_args()
|
||||
args.benchmark_path = args.config + args.benchmark_path
|
||||
return args
|
||||
@@ -0,0 +1,30 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<project>
|
||||
<actions/>
|
||||
<description>shmem performance delta checker</description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties/>
|
||||
<scm class="hudson.scm.NullSCM"/>
|
||||
<assignedNode>master</assignedNode>
|
||||
<canRoam>false</canRoam>
|
||||
<disabled>false</disabled>
|
||||
<blockBuildWhenDownstreamBuilding>false</blockBuildWhenDownstreamBuilding>
|
||||
<blockBuildWhenUpstreamBuilding>false</blockBuildWhenUpstreamBuilding>
|
||||
<triggers/>
|
||||
<concurrentBuild>false</concurrentBuild>
|
||||
<builders>
|
||||
<hudson.tasks.Shell>
|
||||
<command>/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RC_SINGLE" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RC_MULTI" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log amo_add.log amo_fadd.log amo_fcswap.log amo_fetch.log amo_finc.log amo_inc.log ping_pong.log get_swarm.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "DC_SINGLE" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "DC_MULTI" -x "15.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log get_swarm.log
|
||||
/proj/radl_extra/users/resperf/jenkins-2.192/workspace/shmem_short@script/internal/continuous_integration/short/check_perf_delta.py -c "RO_NET_BASIC" -x "75.0" -l put.log put_nbi.log get.log get_nbi.log ping_pong.log
|
||||
</command>
|
||||
<configuredLocalRules/>
|
||||
</hudson.tasks.Shell>
|
||||
</builders>
|
||||
<publishers/>
|
||||
<buildWrappers>
|
||||
<hudson.plugins.timestamper.TimestamperBuildWrapper plugin="timestamper@1.12"/>
|
||||
</buildWrappers>
|
||||
</project>
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import parser
|
||||
import dictionary
|
||||
import archive_path
|
||||
import plotter
|
||||
|
||||
def main():
|
||||
p = parser.Parser()
|
||||
args = p.parse_command_line()
|
||||
|
||||
archives = archive_path.Archive(args)
|
||||
archives.generate()
|
||||
print(archives.dump())
|
||||
|
||||
build_to_changeset = dictionary.BuildToChangesetDict()
|
||||
build_to_changeset.generate(archives.dirs)
|
||||
print(build_to_changeset.dump())
|
||||
|
||||
changeset_to_build = dictionary.ChangesetToBuildDict()
|
||||
changeset_to_build.generate(build_to_changeset)
|
||||
print(changeset_to_build.dump())
|
||||
|
||||
plot = plotter.Plot(args,
|
||||
archives,
|
||||
changeset_to_build)
|
||||
|
||||
# either plot with all the changesets or the slice provided
|
||||
plot.changeset_slice()
|
||||
|
||||
if (args.one_changeset):
|
||||
plot.one_changeset_plot()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env Rscript
|
||||
|
||||
# load the required libraries:
|
||||
library(tidyverse)
|
||||
library(RColorBrewer)
|
||||
library(optparse)
|
||||
|
||||
# declare some helper functions
|
||||
ggpreview <- function (..., device = "png") {
|
||||
fname <- tempfile(fileext = paste0(".", device))
|
||||
ggplot2::ggsave(filename = fname, device = device, ...)
|
||||
system2("open", fname)
|
||||
invisible(NULL)
|
||||
}
|
||||
|
||||
set_right_order <- function(df) {
|
||||
# reverse the order of the rows so that oldest commit is first
|
||||
df <- df %>% map_df(rev)
|
||||
# ensure that ggplot plots the x-axis in the right order
|
||||
df$Commit <- factor(df$Commit, levels = unique(df$Commit))
|
||||
return(df)
|
||||
}
|
||||
|
||||
plot_and_save <- function(df, xval, yval, title, subtitle, xlabel, filename) {
|
||||
p <- ggplot(df, aes_string(x=xval, y=yval, group=1)) +
|
||||
geom_line(size = 0.5, color=mycolors[1]) +
|
||||
geom_point(size = 1.5, alpha = 1, color=mycolors[2]) +
|
||||
theme_minimal() +
|
||||
expand_limits(y=0) +
|
||||
xlab(xlabel) +
|
||||
ggtitle(title, subtitle = subtitle) +
|
||||
theme(
|
||||
axis.text.x = element_text(angle=90,hjust=1),
|
||||
axis.title.y = element_blank()
|
||||
) +
|
||||
scale_fill_manual(values = mycolors)
|
||||
#ggpreview(width=7.5, height=5, units="in", dpi=500)
|
||||
ggsave(filename, p, device=pdf, dpi=500)
|
||||
}
|
||||
|
||||
## Set up options ##
|
||||
|
||||
option_list = list(
|
||||
make_option(c("-o", "--output"), type="character", default=NULL, action="store",
|
||||
help="path (without trailing /) to a folder that will
|
||||
contain the plots", metavar="folder-path"),
|
||||
make_option(c("-a", "--changeset_a"), type="character", default=NULL, action="store",
|
||||
help="beginning (inclusive) changeset of slice", metavar="changeset"),
|
||||
make_option(c("-b", "--changeset_b"), type="character", default=NULL, action="store",
|
||||
help="ending (inclusive) changeset of slice", metavar="changeset"),
|
||||
make_option(c("-c", "--one_changeset"), type="character", default=NULL, action="store",
|
||||
help="if set, will prepare plots for one changeset; if not, plots for a changeset slice")
|
||||
|
||||
)
|
||||
|
||||
## SCRIPT START ##
|
||||
|
||||
# parse the options
|
||||
opt_parser <- OptionParser(option_list=option_list)
|
||||
opts <- parse_args(opt_parser)
|
||||
if (is.null(opts$output)) {
|
||||
print_help(opt_parser)
|
||||
stop("Please set the --output flag.", call.=FALSE)
|
||||
}
|
||||
slice_opt = 0
|
||||
single_opt = 0
|
||||
if (!is.null(opts$changeset_a) && !is.null(opts$changeset_b)) {
|
||||
slice_opt = 1
|
||||
}
|
||||
if (!is.null(opts$one_changeset)) {
|
||||
single_opt = 1
|
||||
}
|
||||
|
||||
if ( (slice_opt && single_opt) || (!slice_opt && !single_opt) ) {
|
||||
stop("Please supply a slice or a single changeset, not both.", call.= FALSE)
|
||||
}
|
||||
|
||||
# choose color palette
|
||||
mycolors <- brewer.pal(5, "Set2")
|
||||
|
||||
if (length(opts$one_changeset) > 0) {
|
||||
## Plotting data for a single changeset ##
|
||||
|
||||
# read the files
|
||||
non_amo <- read.csv("non_amo_one_changeset.csv", header=TRUE)
|
||||
amo <- read.csv("amo_one_changeset.csv", header=TRUE)
|
||||
ping_pong <- read.csv("ping_pong_one_changeset.csv", header=TRUE)
|
||||
|
||||
# ensure that ggplot plots the x-axis in the right order
|
||||
non_amo$size <- factor(non_amo$size, levels = unique(non_amo$size))
|
||||
amo$op <- factor(amo$op, levels = unique(amo$op))
|
||||
|
||||
# plot
|
||||
non_amo_ops <- list("put","put_nbi","get","get_nbi")
|
||||
for (op in non_amo_ops) {
|
||||
plot_and_save(df=non_amo,
|
||||
xval="size",
|
||||
yval=op,
|
||||
title=op,
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Message size (bytes)",
|
||||
filename=paste(opts$output,"/",op,"_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
# prepare data for plots with fixed message size and ops as x axis
|
||||
non_amo$bsize <- paste("b",non_amo$size,sep="") # (so that the columns in non_amo_t start with a character)
|
||||
non_amo_t <- setNames(data.frame(t(non_amo[,2:5])), non_amo[,6]) # transpose + set column names
|
||||
non_amo_t$op <- colnames(non_amo[,2:5]) # make a column with operation names
|
||||
|
||||
sizes <- colnames(non_amo_t[,-(length(colnames(non_amo_t)))])
|
||||
for (size in sizes) {
|
||||
plot_and_save(df=non_amo_t,
|
||||
xval="op",
|
||||
yval=size,
|
||||
title=paste(sub('.', '', size),"byte"),
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Operation",
|
||||
filename=paste(opts$output,"/",size,"_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
plot_and_save(df=amo,
|
||||
xval="op",
|
||||
yval="latency",
|
||||
title="Atomics",
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Operation",
|
||||
filename=paste(opts$output,"/atomic_changeset_",opts$one_changeset,".pdf", sep="")
|
||||
)
|
||||
|
||||
ping_pong$type <- c("ping_pong")
|
||||
p<-ggplot(ping_pong, aes(x=type, y=latency, fill=type)) +
|
||||
geom_bar(stat="identity", width=0.5) +
|
||||
theme_minimal() +
|
||||
ggtitle("Ping pong", subtitle = "Latency (us)") +
|
||||
theme(
|
||||
axis.title.y = element_blank(),
|
||||
axis.text.y = element_blank(),
|
||||
axis.title.x = element_blank(),
|
||||
legend.position = "none"
|
||||
) +
|
||||
coord_flip() +
|
||||
scale_fill_manual(values = mycolors)
|
||||
#ggpreview(width=7.5, height=5, units="in", dpi=500)
|
||||
ggsave(paste(opts$output,"/ping_pong_changeset_",opts$one_changeset,".pdf", sep=""), p, device=pdf, dpi=500)
|
||||
|
||||
} else {
|
||||
## Plotting across a changeset slice ##
|
||||
|
||||
# read the files
|
||||
put <- read.csv("put.csv", header=TRUE)
|
||||
put_nbi <- read.csv("put_nbi.csv", header=TRUE)
|
||||
get <- read.csv("get.csv", header=TRUE)
|
||||
get_nbi <- read.csv("get_nbi.csv", header=TRUE)
|
||||
amo <- read.csv("amo.csv", header=TRUE)
|
||||
ping_pong <- read.csv("ping_pong.csv", header=TRUE)
|
||||
|
||||
# slice out the commits
|
||||
start <- match(c(opts$changeset_a), put$Commit)
|
||||
end <- match(c(opts$changeset_b), put$Commit)
|
||||
# (start and end should be the same for all the frames) #
|
||||
put <- put[start:end,]
|
||||
put_nbi <- put_nbi[start:end,]
|
||||
get <- get[start:end,]
|
||||
get_nbi <- get_nbi[start:end,]
|
||||
amo <- amo[start:end,]
|
||||
ping_pong <- ping_pong[start:end,]
|
||||
|
||||
put <- set_right_order(put)
|
||||
put_nbi <- set_right_order(put_nbi)
|
||||
get <- set_right_order(get)
|
||||
get_nbi <- set_right_order(get_nbi)
|
||||
amo <- set_right_order(amo)
|
||||
ping_pong <- set_right_order(ping_pong)
|
||||
|
||||
# plot
|
||||
non_amo_ops <- list("put","put_nbi","get","get_nbi")
|
||||
sizes_to_subtitle_map <- list("b1"="1 byte",
|
||||
"b2"="2 bytes",
|
||||
"b4"="4 bytes",
|
||||
"b8"="8 bytes",
|
||||
"b16"="16 bytes",
|
||||
"b32"="32 bytes",
|
||||
"b64"="64 bytes",
|
||||
"b128"="128 bytes",
|
||||
"b256"="256 bytes",
|
||||
"b512"="512 bytes",
|
||||
"b1024"="1024 bytes",
|
||||
"b2048"="2048 bytes",
|
||||
"b4096"="4096 bytes",
|
||||
"b8192"="8192 bytes",
|
||||
"b16384"="16384 bytes",
|
||||
"b32768"="32768 bytes")
|
||||
for (op in non_amo_ops) {
|
||||
for (size in names(sizes_to_subtitle_map)) {
|
||||
plot_and_save(df=eval(parse(text=op)),
|
||||
xval="Commit",
|
||||
yval=size,
|
||||
title=op,
|
||||
subtitle=paste("Latency (us) for ",sizes_to_subtitle_map[[size]],sep=""),
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/",op,"_",size,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
amo_ops <- list("add","cswap","fadd","fcswap","fetch","finc","inc")
|
||||
for (op in amo_ops) {
|
||||
plot_and_save(df=amo,
|
||||
xval="Commit",
|
||||
yval=op,
|
||||
title=op,
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/",op,".pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
plot_and_save(df=ping_pong,
|
||||
xval="Commit",
|
||||
yval="latency",
|
||||
title="ping_pong",
|
||||
subtitle="Latency (us)",
|
||||
xlabel="Commit (older to newer)",
|
||||
filename=paste(opts$output,"/","ping_pong.pdf", sep="")
|
||||
)
|
||||
}
|
||||
|
||||
## SCRIPT END ##
|
||||
@@ -0,0 +1,295 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import dictionary
|
||||
import log
|
||||
#import matplotlib.pyplot
|
||||
import numpy
|
||||
import csv
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
class Plot():
|
||||
def __init__(self, args, archives, changeset_to_build):
|
||||
self._args = args
|
||||
self._archives = archives
|
||||
self._changelog = dictionary.ChangelogToMostRecentBuild()
|
||||
self._changelog.generate(changeset_to_build)
|
||||
print(self._changelog.dump())
|
||||
|
||||
def abbreviate_changesets(self, changesets):
|
||||
return [changeset[0:8] for changeset in changesets]
|
||||
|
||||
@staticmethod
|
||||
def write_dict_to_file(tracker, field_names, file_name):
|
||||
with open(file_name, 'w') as csvfile:
|
||||
writer = csv.DictWriter(csvfile, fieldnames=field_names)
|
||||
writer.writeheader()
|
||||
writer.writerows(tracker)
|
||||
|
||||
@staticmethod
|
||||
def check_and_add_to_dict(dictionary, key, array):
|
||||
if len(array) > 0:
|
||||
dictionary[key] = array[0]
|
||||
else:
|
||||
dictionary[key] = 0
|
||||
|
||||
def changeset_slice(self):
|
||||
self._log_tracker = log.Tracker(self._args, self._archives)
|
||||
for changeset in self._changelog._all_changesets:
|
||||
if changeset in self._changelog.data.keys():
|
||||
build_id = self._changelog.data[changeset]
|
||||
self._log_tracker.add(changeset, build_id)
|
||||
print(self._log_tracker.dump())
|
||||
|
||||
"""
|
||||
separate out dictionaries based on operation
|
||||
and prepare them in a format that works with
|
||||
the csv module
|
||||
"""
|
||||
put_tracker = []
|
||||
put_nbi_tracker = []
|
||||
get_tracker = []
|
||||
get_nbi_tracker = []
|
||||
amo_tracker = []
|
||||
ping_pong_tracker = []
|
||||
prev_commit = list(self._log_tracker._data.keys())[0][0]
|
||||
amo_dict = {}
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (key[1] == "put.log"):
|
||||
put_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "put_nbi.log"):
|
||||
put_nbi_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "get.log"):
|
||||
get_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "get_nbi.log"):
|
||||
get_nbi_tracker.append({'Commit':key[0][0:7],
|
||||
'b1':value.latency[0],
|
||||
'b2':value.latency[1],
|
||||
'b4':value.latency[2],
|
||||
'b8':value.latency[3],
|
||||
'b16':value.latency[4],
|
||||
'b32':value.latency[5],
|
||||
'b64':value.latency[6],
|
||||
'b128':value.latency[7],
|
||||
'b256':value.latency[8],
|
||||
'b512':value.latency[9],
|
||||
'b1024':value.latency[10],
|
||||
'b2048':value.latency[11],
|
||||
'b4096':value.latency[12],
|
||||
'b8192':value.latency[13],
|
||||
'b16384':value.latency[14],
|
||||
'b32768':value.latency[15]
|
||||
})
|
||||
if (key[1] == "ping_pong.log"):
|
||||
ping_pong_tracker.append({'Commit':key[0][0:7],
|
||||
'latency':value.latency[0]
|
||||
})
|
||||
|
||||
# check to see if we have moved to a new commit
|
||||
# if we have, store the dict in the amo_tracker
|
||||
if (key[0] != prev_commit):
|
||||
amo_dict['Commit'] = prev_commit[0:7]
|
||||
amo_tracker.append(amo_dict.copy())
|
||||
amo_dict.clear()
|
||||
|
||||
prev_commit = key[0]
|
||||
|
||||
if (key[1] == "amo_add.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'add', value.latency)
|
||||
if (key[1] == "amo_cswap.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'cswap', value.latency)
|
||||
if (key[1] == "amo_fadd.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fadd', value.latency)
|
||||
if (key[1] == "amo_fcswap.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fcswap', value.latency)
|
||||
if (key[1] == "amo_fetch.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'fetch', value.latency)
|
||||
if (key[1] == "amo_finc.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'finc', value.latency)
|
||||
if (key[1] == "amo_inc.log"):
|
||||
self.check_and_add_to_dict(amo_dict, 'inc', value.latency)
|
||||
|
||||
# store the last commit's amo data
|
||||
amo_dict['Commit'] = prev_commit[0:7]
|
||||
amo_tracker.append(amo_dict.copy())
|
||||
|
||||
# write put results into a file:
|
||||
size_field_names= ['Commit','b1','b2','b4','b8','b16','b32','b64','b128','b256','b512','b1024','b2048','b4096','b8192','b16384','b32768']
|
||||
amo_field_names= ['Commit','add','cswap','fadd','fcswap','fetch','finc','inc']
|
||||
ping_pong_field_names= ['Commit','latency']
|
||||
|
||||
self.write_dict_to_file(put_tracker, size_field_names, "put.csv")
|
||||
self.write_dict_to_file(put_nbi_tracker, size_field_names, "put_nbi.csv")
|
||||
self.write_dict_to_file(get_tracker, size_field_names, "get.csv")
|
||||
self.write_dict_to_file(get_nbi_tracker, size_field_names, "get_nbi.csv")
|
||||
self.write_dict_to_file(amo_tracker, amo_field_names, "amo.csv")
|
||||
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong.csv")
|
||||
|
||||
# make a directory and execute the R script to generate plots in that directory
|
||||
current_dir = os.getcwd()
|
||||
plot_dir = os.path.join(current_dir, 'plots')
|
||||
if not os.path.exists(plot_dir):
|
||||
os.makedirs(plot_dir)
|
||||
|
||||
changeset_a = list(self._log_tracker._data.keys())[0][0]
|
||||
changeset_b = list(self._log_tracker._data.keys())[-1][0]
|
||||
|
||||
# check if the provided changesets are correct
|
||||
if (self._args.changeset_range):
|
||||
found_changeset_a = False
|
||||
found_changeset_b = False
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (found_changeset_a and found_changeset_b):
|
||||
break
|
||||
if (not found_changeset_a):
|
||||
if (self._args.changeset_range[0] == key[0]):
|
||||
found_changeset_a = True
|
||||
if (not found_changeset_b):
|
||||
if (self._args.changeset_range[1] == key[0]):
|
||||
found_changeset_b = True
|
||||
|
||||
if ((not found_changeset_a) and (not found_changeset_b)):
|
||||
sys.exit("One of the specified changesets was not found. Please specify correct/complete commit IDs.")
|
||||
else:
|
||||
changeset_a = self._args.changeset_range[0]
|
||||
changeset_b = self._args.changeset_range[1]
|
||||
|
||||
r_command = "Rscript ./plotter.R -o ./plots -a " + changeset_a[0:7] + " -b " + changeset_b[0:7]
|
||||
|
||||
print(r_command)
|
||||
subprocess.check_call(r_command, shell=True)
|
||||
|
||||
|
||||
def one_changeset_plot(self):
|
||||
found_changeset = 0
|
||||
non_amo_tracker = []
|
||||
amo_tracker = []
|
||||
ping_pong_tracker = []
|
||||
for key, value in self._log_tracker._data.items():
|
||||
if (key[0] == self._args.one_changeset):
|
||||
found_changeset = 1
|
||||
if (key[1] == "put.log"):
|
||||
put_vals = value.latency
|
||||
if (key[1] == "put_nbi.log"):
|
||||
put_nbi_vals = value.latency
|
||||
if (key[1] == "get.log"):
|
||||
get_vals = value.latency
|
||||
if (key[1] == "get_nbi.log"):
|
||||
get_nbi_vals = value.latency
|
||||
if (key[1] == "amo_add.log"):
|
||||
amo_tracker.append({'op':'add',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_add.log"):
|
||||
amo_tracker.append({'op':'add',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_cswap.log"):
|
||||
amo_tracker.append({'op':'cswap',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fadd.log"):
|
||||
amo_tracker.append({'op':'fadd',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fcswap.log"):
|
||||
amo_tracker.append({'op':'fcswap',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_fetch.log"):
|
||||
amo_tracker.append({'op':'fetch',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_finc.log"):
|
||||
amo_tracker.append({'op':'finc',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "amo_inc.log"):
|
||||
amo_tracker.append({'op':'inc',
|
||||
'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
if (key[1] == "ping_pong.log"):
|
||||
ping_pong_tracker.append({'latency': value.latency[0] if len(value.latency) > 0 else 0
|
||||
})
|
||||
|
||||
|
||||
if (not found_changeset):
|
||||
sys.exit("The requested changeset was not found. Please specify correct/complete commit IDs.")
|
||||
|
||||
index = 0
|
||||
for size in [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]:
|
||||
non_amo_tracker.append({'size':size,
|
||||
'put':put_vals[index],
|
||||
'put_nbi':put_nbi_vals[index],
|
||||
'get':get_vals[index],
|
||||
'get_nbi':get_nbi_vals[index]
|
||||
})
|
||||
index = index + 1
|
||||
|
||||
# write results into a file:
|
||||
non_amo_field_names= ['size','put','put_nbi','get','get_nbi']
|
||||
amo_field_names= ['op','latency']
|
||||
ping_pong_field_names= ['latency']
|
||||
|
||||
self.write_dict_to_file(non_amo_tracker, non_amo_field_names, "non_amo_one_changeset.csv")
|
||||
self.write_dict_to_file(amo_tracker, amo_field_names, "amo_one_changeset.csv")
|
||||
self.write_dict_to_file(ping_pong_tracker, ping_pong_field_names, "ping_pong_one_changeset.csv")
|
||||
|
||||
# call the R script with an option that tells it to plot figures for
|
||||
r_command = "Rscript ./plotter.R -o ./plots -c " + self._args.one_changeset
|
||||
|
||||
print(r_command)
|
||||
subprocess.check_call(r_command, shell=True)
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import sys
|
||||
|
||||
class Report():
|
||||
def __init__(self, identifier, path, filename):
|
||||
self._identifier = identifier
|
||||
self._path = path
|
||||
self._filename = filename
|
||||
|
||||
def open(self):
|
||||
print('opening report for ' + self._identifier)
|
||||
try:
|
||||
report_path = self._path + '/' + self._filename
|
||||
print('report_path: ' + report_path)
|
||||
self._file_handle = open(report_path, 'w')
|
||||
except:
|
||||
sys.exit('failed to open report: ' + report_path)
|
||||
|
||||
def record(self, message):
|
||||
self._file_handle.write(message + '\n')
|
||||
@@ -0,0 +1,96 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobAction plugin="pipeline-model-definition@1.8.4"/>
|
||||
<org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction plugin="pipeline-model-definition@1.8.4">
|
||||
<jobProperties/>
|
||||
<triggers/>
|
||||
<parameters/>
|
||||
<options/>
|
||||
</org.jenkinsci.plugins.pipeline.modeldefinition.actions.DeclarativeJobPropertyTrackerAction>
|
||||
</actions>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.DisableConcurrentBuildsJobProperty/>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>false</onSuccessful>
|
||||
<onFailed>false</onFailed>
|
||||
<onUnstable>false</onUnstable>
|
||||
<onNotBuilt>false</onNotBuilt>
|
||||
<onAborted>false</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames>shmem_perf_check, </dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!SHORT</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<name>origin</name>
|
||||
<refspec>${GERRIT_REFSPEC}</refspec>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/short/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/tool/pandora64/.package/python-3.8.0/bin/python3
|
||||
|
||||
import pprint
|
||||
import report
|
||||
import sys
|
||||
|
||||
class Threshold():
|
||||
def __init__(self, maximum_threshold, violation_type):
|
||||
self._violations = {}
|
||||
self._maximum_threshold = maximum_threshold
|
||||
self._violation_type = violation_type
|
||||
|
||||
def check(self, value, changeset, filename):
|
||||
if value >= self._maximum_threshold:
|
||||
key = changeset + '|' + filename + '|' + self._violation_type
|
||||
self._violations[key] = value
|
||||
print(key + ': ' + str(value) + '%')
|
||||
|
||||
def provide_violations_to_report(self, report):
|
||||
if self.has_violations():
|
||||
report.record('FAILURE')
|
||||
report.record(self.dump())
|
||||
sys.exit(1)
|
||||
else:
|
||||
report.record('SUCCESS')
|
||||
sys.exit(0)
|
||||
|
||||
def has_violations(self):
|
||||
return bool(self._violations)
|
||||
|
||||
def dump(self):
|
||||
str_out = pprint.pformat(self._violations, width=120)
|
||||
str_out += '\n'
|
||||
return str_out
|
||||
@@ -0,0 +1,151 @@
|
||||
pipeline {
|
||||
agent { label 'sv-pdp-5' }
|
||||
environment {
|
||||
HSA_FORCE_FINE_GRAIN_PCIE = 1
|
||||
MPI_HOME="/home/resperf/mpich-4.0.1/install/global"
|
||||
PATH = "$MPI_HOME/bin:$PATH"
|
||||
LD_LIBRARY_PATH = "$MPI_HOME/lib:$LD_LIBRARY_PATH"
|
||||
build_dir = "builds/change-${GERRIT_CHANGE_NUMBER}-${GERRIT_PATCHSET_NUMBER}"
|
||||
CMAKE_PREFIX_PATH = "/opt/rocm/lib/cmake"
|
||||
}
|
||||
stages {
|
||||
stage('Synchronize Source Code') {
|
||||
steps {
|
||||
checkout changelog: false, poll: false, scm: [$class: 'GitSCM', branches: [[name: 'FETCH_HEAD']], doGenerateSubmoduleConfigurations: false, extensions: [[$class: 'CloneOption', depth: 0, noTags: false, reference: '', shallow: false]], submoduleCfg: [], userRemoteConfigs: [[name: 'origin', refspec: '${GERRIT_REFSPEC}', url: 'ssh://gerritgit/rsch/ec/shmem']]]
|
||||
}
|
||||
}
|
||||
stage('Make Build Directory') {
|
||||
steps {
|
||||
dir("library") {
|
||||
sh "mkdir -p ${build_dir}"
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build Source Code') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi_wf_coal install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI_WF_COAL") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI_WF_COAL/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/RC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/rc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/RC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/RC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_SINGLE") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_single install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_SINGLE") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_SINGLE/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
//===================== LIBRARY =======================
|
||||
dir("library/${build_dir}/DC_MULTI") {
|
||||
sh 'mkdir -p install'
|
||||
sh '../../../build_configs/dc_multi install'
|
||||
}
|
||||
//===================== CLIENT ========================
|
||||
dir("clients/functional_tests/${build_dir}/DC_MULTI") {
|
||||
sh '../../../build_configs/release ${WORKSPACE}/library/${build_dir}/DC_MULTI/install'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Run Tests') {
|
||||
parallel {
|
||||
stage('RC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/RC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI_WF_COAL') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI_WF_COAL/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI_WF_COAL'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/RC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_SINGLE') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_SINGLE/rocshmem_example_driver single_thread ${build_dir}/DC_SINGLE'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('DC_MULTI') {
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh './driver.sh ${build_dir}/DC_MULTI/rocshmem_example_driver multi_thread ${build_dir}/DC_MULTI'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('RO_NET_BASIC') {
|
||||
// RO_NET controlled at runtime, no need for a new build. Use RC_MULTI
|
||||
steps {
|
||||
dir("clients/functional_tests") {
|
||||
sh 'mkdir -p ${build_dir}/RO_NET_BASIC'
|
||||
sh 'ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1 UCX_TLS=rc ./driver.sh ${build_dir}/RC_MULTI/rocshmem_example_driver ro ${build_dir}/RO_NET_BASIC'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
<?xml version='1.1' encoding='UTF-8'?>
|
||||
<flow-definition plugin="workflow-job@2.40">
|
||||
<actions/>
|
||||
<description></description>
|
||||
<keepDependencies>false</keepDependencies>
|
||||
<properties>
|
||||
<org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
<triggers>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger plugin="gerrit-trigger@2.33.0">
|
||||
<spec></spec>
|
||||
<gerritProjects>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>rsch/ec/shmem</pattern>
|
||||
<branches>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
<compareType>PLAIN</compareType>
|
||||
<pattern>amd-master</pattern>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.Branch>
|
||||
</branches>
|
||||
<disableStrictForbiddenFileVerification>false</disableStrictForbiddenFileVerification>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.data.GerritProject>
|
||||
</gerritProjects>
|
||||
<dynamicGerritProjects class="empty-list"/>
|
||||
<skipVote>
|
||||
<onSuccessful>true</onSuccessful>
|
||||
<onFailed>true</onFailed>
|
||||
<onUnstable>true</onUnstable>
|
||||
<onNotBuilt>true</onNotBuilt>
|
||||
<onAborted>true</onAborted>
|
||||
</skipVote>
|
||||
<silentMode>false</silentMode>
|
||||
<enableTopicAssociation>false</enableTopicAssociation>
|
||||
<notificationLevel></notificationLevel>
|
||||
<silentStartMode>false</silentStartMode>
|
||||
<escapeQuotes>true</escapeQuotes>
|
||||
<nameAndEmailParameterMode>PLAIN</nameAndEmailParameterMode>
|
||||
<dependencyJobsNames></dependencyJobsNames>
|
||||
<commitMessageParameterMode>BASE64</commitMessageParameterMode>
|
||||
<changeSubjectParameterMode>PLAIN</changeSubjectParameterMode>
|
||||
<commentTextParameterMode>BASE64</commentTextParameterMode>
|
||||
<buildStartMessage></buildStartMessage>
|
||||
<buildFailureMessage></buildFailureMessage>
|
||||
<buildSuccessfulMessage></buildSuccessfulMessage>
|
||||
<buildUnstableMessage></buildUnstableMessage>
|
||||
<buildNotBuiltMessage></buildNotBuiltMessage>
|
||||
<buildAbortedMessage></buildAbortedMessage>
|
||||
<buildUnsuccessfulFilepath></buildUnsuccessfulFilepath>
|
||||
<customUrl></customUrl>
|
||||
<serverName>amd-gerrit</serverName>
|
||||
<triggerOnEvents>
|
||||
<com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
<commentAddedCommentContains>!SMOKE</commentAddedCommentContains>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.events.PluginCommentAddedContainsEvent>
|
||||
</triggerOnEvents>
|
||||
<dynamicTriggerConfiguration>false</dynamicTriggerConfiguration>
|
||||
<triggerConfigURL></triggerConfigURL>
|
||||
<triggerInformationAction/>
|
||||
</com.sonyericsson.hudson.plugins.gerrit.trigger.hudsontrigger.GerritTrigger>
|
||||
</triggers>
|
||||
</org.jenkinsci.plugins.workflow.job.properties.PipelineTriggersJobProperty>
|
||||
</properties>
|
||||
<definition class="org.jenkinsci.plugins.workflow.cps.CpsScmFlowDefinition" plugin="workflow-cps@2.90">
|
||||
<scm class="hudson.plugins.git.GitSCM" plugin="git@4.7.1">
|
||||
<configVersion>2</configVersion>
|
||||
<userRemoteConfigs>
|
||||
<hudson.plugins.git.UserRemoteConfig>
|
||||
<url>ssh://gerritgit/rsch/ec/shmem</url>
|
||||
</hudson.plugins.git.UserRemoteConfig>
|
||||
</userRemoteConfigs>
|
||||
<branches>
|
||||
<hudson.plugins.git.BranchSpec>
|
||||
<name>FETCH_HEAD</name>
|
||||
</hudson.plugins.git.BranchSpec>
|
||||
</branches>
|
||||
<doGenerateSubmoduleConfigurations>false</doGenerateSubmoduleConfigurations>
|
||||
<submoduleCfg class="empty-list"/>
|
||||
<extensions/>
|
||||
</scm>
|
||||
<scriptPath>internal/continuous_integration/smoke/Jenkinsfile</scriptPath>
|
||||
<lightweight>false</lightweight>
|
||||
</definition>
|
||||
<triggers/>
|
||||
<disabled>false</disabled>
|
||||
</flow-definition>
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
#! /usr/bin/python
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
|
||||
import os
|
||||
|
||||
suffixes = [ '.cpp', '.hpp', '.c', '.h' ]
|
||||
directories = [ 'src', 'include' ]
|
||||
|
||||
def oksuffix(f):
|
||||
for s in suffixes:
|
||||
if f.endswith(s):
|
||||
return True
|
||||
return False
|
||||
|
||||
def try_index_dir(directory):
|
||||
for dirpath,subdirs,files in os.walk(os.path.join(cwd, directory)):
|
||||
okfiles = [f for f in files if oksuffix(f)]
|
||||
if okfiles:
|
||||
print >> file_list, \
|
||||
'\n'.join([os.path.join(dirpath, f) for f in okfiles])
|
||||
|
||||
|
||||
file_list = file('cscope.files', 'w')
|
||||
cwd = os.getcwd()
|
||||
for d in directories:
|
||||
try_index_dir(d)
|
||||
file_list.close()
|
||||
|
||||
os.system("cscope -b")
|
||||
@@ -0,0 +1,105 @@
|
||||
HIPCC=hipcc
|
||||
BUILD=./build
|
||||
SRC=./src
|
||||
RESULTS=./results
|
||||
|
||||
#rocshmem_DIR=${HOME}/rocshmem
|
||||
#MPI_HOME=${HOME}/mpich/install
|
||||
NCCL_HOME=${HOME}/rccl/build
|
||||
|
||||
MPI_FLAGS=-lmpi -lhsa-runtime64 -lrt -L${MPI_HOME}/lib -fgpu-rdc
|
||||
SHMEM_FLAGS=${MPI_FLAGS} -lmlx5 -libverbs
|
||||
RCCL_FLAGS=${MPI_FLAGS} -Wl,-rpath,$(NCCL_HOME) -L${NCCL_HOME} -lrccl
|
||||
|
||||
.SILENT: run_scan extract_scan run_sort run_sort_shmem run_sort_rccl extract_sort
|
||||
|
||||
all: ${BUILD}/sort_shmem ${BUILD}/sort_rccl ${BUILD}/sort_mpi
|
||||
|
||||
${BUILD}/sort_shmem: ${BUILD}/sort_shmem.o ${rocshmem_DIR}/lib/librocshmem.a
|
||||
${HIPCC} $^ ${SHMEM_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_shmem.o: ${SRC}/sort_shmem.cu
|
||||
${HIPCC} $^ -I${rocshmem_DIR}/include -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
${BUILD}/sort_rccl: ${BUILD}/sort_rccl.o
|
||||
${HIPCC} $^ ${RCCL_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_rccl.o: ${SRC}/sort_rccl.cu
|
||||
${HIPCC} $^ -I$(NCCL_HOME)/include/rccl -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
${BUILD}/sort_mpi: ${BUILD}/sort_mpi.o
|
||||
${HIPCC} $^ ${MPI_FLAGS} -o $@
|
||||
|
||||
${BUILD}/sort_mpi.o: ${SRC}/sort_mpi.cu
|
||||
${HIPCC} $^ -I${MPI_HOME}/include -fgpu-rdc -o $@ -c
|
||||
|
||||
RO_FLAGS=ROC_SHMEM_RO=1 RO_NET_CPU_QUEUE=1
|
||||
ITERS?=0 1 2 3 4 5 6 7 8 9
|
||||
TIMEOUT=1m
|
||||
HOSTS=sv-pdp-0,sv-pdp-1,sv-pdp-2,sv-pdp-3
|
||||
SCAN_SIZE=1024
|
||||
PES=2 4 8 12 16
|
||||
PES_RCCL=2 4 8
|
||||
|
||||
TYPE ?= Naive
|
||||
LABEL ?= naive
|
||||
PARAM ?= 0
|
||||
NUM_PES ?= 2
|
||||
|
||||
run_sort_shmem: ${BUILD}/sort_shmem
|
||||
printf "${TYPE} ";\
|
||||
echo "" > ${RESULTS}/sort_${LABEL}_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
${RO_FLAGS} timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_shmem ${PARAM} >> ${RESULTS}/sort_${LABEL}_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort_rccl: ${BUILD}/sort_rccl
|
||||
printf "RCCL "; \
|
||||
echo "" > ${RESULTS}/sort_rccl_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_rccl >> ${RESULTS}/sort_rccl_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort_mpi: ${BUILD}/sort_rccl
|
||||
printf "MPI2 "; \
|
||||
echo "" > ${RESULTS}/sort_mpi2_${NUM_PES}.out; \
|
||||
for j in ${ITERS}; do \
|
||||
timeout ${TIMEOUT} mpirun -np ${NUM_PES} -hosts ${HOSTS} ${BUILD}/sort_mpi >> ${RESULTS}/sort_mpi2_${NUM_PES}.out;\
|
||||
done;
|
||||
|
||||
run_sort: ${BUILD}/sort_shmem ${BUILD}/sort_rccl
|
||||
for i in ${PES}; do \
|
||||
printf "%d " $$i; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=NAIVE LABEL=naive PARAM=0 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=MPI LABEL=mpi PARAM=1 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN LABEL=gcen PARAM=2 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_shmem TYPE=GCEN2 LABEL=gcen2 PARAM=3 NUM_PES=$${i}; \
|
||||
$(MAKE) --no-print-directory run_sort_mpi NUM_PES=$${i}; \
|
||||
printf "\n";\
|
||||
done
|
||||
for i in ${PES_RCCL}; do \
|
||||
$(MAKE) --no-print-directory run_sort_rccl NUM_PES=$${i}; \
|
||||
printf "%d " $$i; \
|
||||
done
|
||||
|
||||
$(MAKE) extract_sort
|
||||
|
||||
|
||||
extract_sort:
|
||||
printf "Sort latency\n"
|
||||
printf "PROCS\tType\tRuns"
|
||||
for i in ${PES}; do \
|
||||
for type in mpi mpi2 rccl naive gcen gcen2; do\
|
||||
printf "\n%d\t$${type}\t" $$i; \
|
||||
file=${RESULTS}/sort_$${type}_$${i}.out;\
|
||||
latency=$$(grep -E "Avg time" $${file}); \
|
||||
grep -E "Avg time" $${file} | while read -r j; do\
|
||||
val=$$(echo $$j | grep -oE -m1 "[0-9]+\.[0-9]+");\
|
||||
printf "%s\t" $${val};\
|
||||
done; \
|
||||
done;\
|
||||
done
|
||||
printf "\n"
|
||||
|
||||
clean:
|
||||
rm build/*;
|
||||
@@ -0,0 +1,70 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <unistd.h>
|
||||
#include <hip/hip_runtime.h>
|
||||
using namespace std;
|
||||
|
||||
#define TIME_NOW std::chrono::steady_clock::now()
|
||||
#define TIME_DIFF(a, b) std::chrono::duration_cast<std::chrono::nanoseconds>(a - b).count()
|
||||
|
||||
#define HIPCHECK(cmd) do { \
|
||||
hipError_t e = cmd; \
|
||||
if( e != hipSuccess ) { \
|
||||
printf("Failed: Hip error %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,hipGetErrorString(e)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#define NCCLCHECK(cmd) do { \
|
||||
ncclResult_t r = cmd; \
|
||||
if (r!= ncclSuccess) { \
|
||||
printf("Failed, NCCL error %s:%d '%s'\n", \
|
||||
__FILE__,__LINE__,ncclGetErrorString(r)); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// Copied from rccl-tests, used to hash hostname
|
||||
static uint64_t getHash(const char* string, size_t n) {
|
||||
// Based on DJB2a, result = result * 33 ^ char
|
||||
uint64_t result = 5381;
|
||||
for (size_t c = 0; c < n; c++) {
|
||||
result = ((result << 5) + result) ^ string[c];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Generate a hash of the unique identifying string for this host
|
||||
* that will be unique for both bare-metal and container instances
|
||||
* Equivalent of a hash of;
|
||||
*
|
||||
* $(hostname)$(cat /proc/sys/kernel/random/boot_id)
|
||||
*
|
||||
*/
|
||||
#define HOSTID_FILE "/proc/sys/kernel/random/boot_id"
|
||||
static uint64_t getHostHash(const char* hostname) {
|
||||
char hostHash[1024];
|
||||
|
||||
// Fall back is the hostname if something fails
|
||||
(void) strncpy(hostHash, hostname, sizeof(hostHash));
|
||||
int offset = strlen(hostHash);
|
||||
|
||||
FILE *file = fopen(HOSTID_FILE, "r");
|
||||
if (file != NULL) {
|
||||
char *p;
|
||||
if (fscanf(file, "%ms", &p) == 1) {
|
||||
strncpy(hostHash+offset, p, sizeof(hostHash)-offset-1);
|
||||
free(p);
|
||||
}
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
// Make sure the string is terminated
|
||||
hostHash[sizeof(hostHash)-1]='\0';
|
||||
|
||||
return getHash(hostHash, strlen(hostHash));
|
||||
}
|
||||
@@ -0,0 +1,231 @@
|
||||
/*************************************************************************
|
||||
* *
|
||||
* N A S P A R A L L E L B E N C H M A R K S 3.3 *
|
||||
* *
|
||||
* I S *
|
||||
* *
|
||||
*************************************************************************
|
||||
* *
|
||||
* This benchmark is part of the NAS Parallel Benchmark 3.3 suite. *
|
||||
* It is described in NAS Technical Report 95-020. *
|
||||
* *
|
||||
* Permission to use, copy, distribute and modify this software *
|
||||
* for any purpose with or without fee is hereby granted. We *
|
||||
* request, however, that all derived work reference the NAS *
|
||||
* Parallel Benchmarks 3.3. This software is provided "as is" *
|
||||
* without express or implied warranty. *
|
||||
* *
|
||||
* Information on NPB 3.3, including the technical report, the *
|
||||
* original specifications, source code, results and information *
|
||||
* on how to submit new results, is available at: *
|
||||
* *
|
||||
* http://www.nas.nasa.gov/Software/NPB *
|
||||
* *
|
||||
* Send comments or suggestions to npb@nas.nasa.gov *
|
||||
* Send bug reports to npb-bugs@nas.nasa.gov *
|
||||
* *
|
||||
* NAS Parallel Benchmarks Group *
|
||||
* NASA Ames Research Center *
|
||||
* Mail Stop: T27A-1 *
|
||||
* Moffett Field, CA 94035-1000 *
|
||||
* *
|
||||
* E-mail: npb@nas.nasa.gov *
|
||||
* Fax: (650) 604-3957 *
|
||||
* *
|
||||
*************************************************************************
|
||||
* *
|
||||
* Author: M. Yarrow *
|
||||
* H. Jin *
|
||||
* *
|
||||
*************************************************************************/
|
||||
|
||||
#define NUM_WGS 1
|
||||
#define WG_SIZE 1024
|
||||
#define MAX_PES 128
|
||||
|
||||
#define MAX_KEY (1 << 11)
|
||||
|
||||
/*
|
||||
* FUNCTION RANDLC (X, A)
|
||||
*
|
||||
* This routine returns a uniform pseudorandom double precision number in the
|
||||
* range (0, 1) by using the linear congruential generator
|
||||
*
|
||||
* x_{k+1} = a x_k (mod 2^46)
|
||||
*
|
||||
* where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
* before repeating. The argument A is the same as 'a' in the above formula,
|
||||
* and X is the same as x_0. A and X must be odd double precision integers
|
||||
* in the range (1, 2^46). The returned value RANDLC is normalized to be
|
||||
* between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
|
||||
* the new seed x_1, so that subsequent calls to RANDLC using the same
|
||||
* arguments will generate a continuous sequence.
|
||||
*
|
||||
* This routine should produce the same results on any computer with at least
|
||||
* 48 mantissa bits in double precision floating point data. On Cray systems,
|
||||
* double precision should be disabled.
|
||||
*
|
||||
* David H. Bailey October 26, 1990
|
||||
*
|
||||
* IMPLICIT DOUBLE PRECISION (A-H, O-Z)
|
||||
* SAVE KS, R23, R46, T23, T46
|
||||
* DATA KS/0/
|
||||
*
|
||||
* If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46,
|
||||
* T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than
|
||||
* by merely using the ** operator, in order to insure that the results are
|
||||
* exact on all systems. This code assumes that 0.5D0 is represented exactly.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************* R A N D L C ************/
|
||||
/************* ************/
|
||||
/************* portable random number generator ************/
|
||||
/*****************************************************************/
|
||||
|
||||
double randlc( double *X, double *A )
|
||||
{
|
||||
static int KS=0;
|
||||
static double R23, R46, T23, T46;
|
||||
double T1, T2, T3, T4;
|
||||
double A1;
|
||||
double A2;
|
||||
double X1;
|
||||
double X2;
|
||||
double Z;
|
||||
int i, j;
|
||||
|
||||
if (KS == 0)
|
||||
{
|
||||
R23 = 1.0;
|
||||
R46 = 1.0;
|
||||
T23 = 1.0;
|
||||
T46 = 1.0;
|
||||
|
||||
for (i=1; i<=23; i++)
|
||||
{
|
||||
R23 = 0.50 * R23;
|
||||
T23 = 2.0 * T23;
|
||||
}
|
||||
for (i=1; i<=46; i++)
|
||||
{
|
||||
R46 = 0.50 * R46;
|
||||
T46 = 2.0 * T46;
|
||||
}
|
||||
KS = 1;
|
||||
}
|
||||
|
||||
/* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */
|
||||
|
||||
T1 = R23 * *A;
|
||||
j = T1;
|
||||
A1 = j;
|
||||
A2 = *A - T23 * A1;
|
||||
|
||||
/* Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
X = 2^23 * Z + A2 * X2 (mod 2^46). */
|
||||
|
||||
T1 = R23 * *X;
|
||||
j = T1;
|
||||
X1 = j;
|
||||
X2 = *X - T23 * X1;
|
||||
T1 = A1 * X2 + A2 * X1;
|
||||
|
||||
j = R23 * T1;
|
||||
T2 = j;
|
||||
Z = T1 - T23 * T2;
|
||||
T3 = T23 * Z + A2 * X2;
|
||||
j = R46 * T3;
|
||||
T4 = j;
|
||||
*X = T3 - T46 * T4;
|
||||
return(R46 * *X);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************ F I N D _ M Y _ S E E D ************/
|
||||
/************ ************/
|
||||
/************ returns parallel random number seq seed ************/
|
||||
/*****************************************************************/
|
||||
|
||||
/*
|
||||
* Create a random number sequence of total length nn residing
|
||||
* on np number of processors. Each processor will therefore have a
|
||||
* subsequence of length nn/np. This routine returns that random
|
||||
* number which is the first random number for the subsequence belonging
|
||||
* to processor rank kn, and which is used as seed for proc kn ran # gen.
|
||||
*/
|
||||
|
||||
double find_my_seed( int kn, /* my processor rank, 0<=kn<=num procs */
|
||||
int np, /* np = num procs */
|
||||
long nn, /* total num of ran numbers, all procs */
|
||||
double s, /* Ran num seed, for ex.: 314159265.00 */
|
||||
double a ) /* Ran num gen mult, try 1220703125.00 */
|
||||
{
|
||||
|
||||
long i;
|
||||
|
||||
double t1,t2,t3,an;
|
||||
long mq,nq,kk,ik;
|
||||
|
||||
|
||||
|
||||
nq = nn / np;
|
||||
|
||||
for( mq=0; nq>1; mq++,nq/=2 )
|
||||
;
|
||||
|
||||
t1 = a;
|
||||
|
||||
for( i=1; i<=mq; i++ )
|
||||
t2 = randlc( &t1, &t1 );
|
||||
|
||||
an = t1;
|
||||
|
||||
kk = kn;
|
||||
t1 = s;
|
||||
t2 = an;
|
||||
|
||||
for( i=1; i<=100; i++ )
|
||||
{
|
||||
ik = kk / 2;
|
||||
if( 2 * ik != kk )
|
||||
t3 = randlc( &t1, &t2 );
|
||||
if( ik == 0 )
|
||||
break;
|
||||
t3 = randlc( &t2, &t2 );
|
||||
kk = ik;
|
||||
}
|
||||
|
||||
return( t1 );
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*****************************************************************/
|
||||
/************* C R E A T E _ S E Q ************/
|
||||
/*****************************************************************/
|
||||
|
||||
void create_seq( double seed, double a, int *key_array, int size )
|
||||
{
|
||||
double x;
|
||||
int i, k;
|
||||
|
||||
k = MAX_KEY/4;
|
||||
|
||||
for (i=0; i < size; i++)
|
||||
{
|
||||
x = randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
x += randlc(&seed, &a);
|
||||
|
||||
key_array[i] = k*x;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,380 @@
|
||||
#include "mpi.h"
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__global__ void sort1(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
TIME(3)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
__global__ void sort2(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int max_iters) {
|
||||
int nProcs, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
|
||||
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
MPI_Alltoall(sendCount, 1, MPI_INT, recvCount, 1,
|
||||
MPI_INT, MPI_COMM_WORLD);
|
||||
MPI_Alltoall(sendOffset, 1, MPI_INT, recvOffset, 1,
|
||||
MPI_INT, MPI_COMM_WORLD);
|
||||
int total_size = 0;
|
||||
MPI_Request *req = new MPI_Request[2 * nProcs];
|
||||
const int TAG = 10000;
|
||||
for(int i = 0; i < nProcs; ++i) {
|
||||
MPI_Isend(&keyBuffer1[sendOffset[i]], sendCount[i],
|
||||
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i]);
|
||||
MPI_Irecv(&keyBuffer2[total_size], recvCount[i],
|
||||
MPI_INT, i, TAG, MPI_COMM_WORLD, &req[2 * i + 1]);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
MPI_Waitall(2 * nProcs, req, MPI_STATUS_IGNORE);
|
||||
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
}
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU()
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int nProcs, proc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
void *roc_shmem_malloc(size_t size)
|
||||
{
|
||||
void *v;
|
||||
hipMalloc((void **)&v, size);
|
||||
return v;
|
||||
}
|
||||
|
||||
int roc_shmem_free(void *v)
|
||||
{
|
||||
return hipFree(v);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc < 1) {
|
||||
printf("Format: %s [iterations]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Init stuff
|
||||
MPI_Init(&argc, &argv);
|
||||
initGPU();
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[1]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
|
||||
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
printf("Begin untimed run\n");
|
||||
// Untimed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, 1);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
printf("Verify untimed run\n");
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Begin timed run\n");
|
||||
// Timed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
auto time_start = TIME_NOW;
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, iterations);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Done verify for %d\n", my_pe);
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
roc_shmem_free(keyBuffer1);
|
||||
roc_shmem_free(keyBuffer2);
|
||||
roc_shmem_free(sendCount);
|
||||
roc_shmem_free(recvCount);
|
||||
roc_shmem_free(sendOffset);
|
||||
roc_shmem_free(recvOffset);
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,394 @@
|
||||
#include "rccl.h"
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__global__ void sort1(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
TIME(3)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
__global__ void sort2(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int n_pes, int my_pe) {
|
||||
__shared__ int total_size;
|
||||
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
OUTPUT_TIME()
|
||||
}
|
||||
|
||||
void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, int max_iters, ncclComm_t comm) {
|
||||
int nProcs, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
hipStream_t stream;
|
||||
HIPCHECK(hipStreamCreate(&stream));
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
//fprintf(stderr, "%d: %d %d %p %p\n", my_pe, iter, max_iters, sendCount, recvCount);
|
||||
sort1<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
NCCLCHECK(ncclAllToAll(sendCount, recvCount, 1,
|
||||
ncclInt, comm, stream));
|
||||
NCCLCHECK(ncclAllToAll(sendOffset, recvOffset, 1,
|
||||
ncclInt, comm, stream));
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
NCCLCHECK(ncclGroupStart());
|
||||
int total_size = 0;
|
||||
for(int i = 0; i < nProcs; ++i) {
|
||||
ncclSend(&keyBuffer1[sendOffset[i]], sendCount[i],
|
||||
ncclInt, i, comm, stream);
|
||||
ncclRecv(&keyBuffer2[total_size], recvCount[i],
|
||||
ncclInt, i, comm, stream);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
NCCLCHECK(ncclGroupEnd());
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
sort2<<<1, WG_SIZE, 0, stream>>>(keys, keyBuffer1,
|
||||
keyBuffer2, sendCount, recvCount, sendOffset,
|
||||
recvOffset, outputKeys, size, nProcs, my_pe);
|
||||
HIPCHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU(ncclComm_t &comms)
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int nProcs, proc;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &nProcs);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &proc);
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
ncclUniqueId ncclId;
|
||||
if (proc == 0) {
|
||||
NCCLCHECK(ncclGetUniqueId(&ncclId));
|
||||
}
|
||||
MPI_Bcast(&ncclId, sizeof(ncclId), MPI_BYTE, 0, MPI_COMM_WORLD);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
|
||||
#ifdef RCCL_MULTIRANKPERGPU
|
||||
NCCLCHECK(ncclCommInitRankMulti(&comms, nProcs, ncclId, proc, proc));
|
||||
#else
|
||||
NCCLCHECK(ncclCommInitRank(&comms, nProcs, ncclId, proc));
|
||||
#endif
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
void *roc_shmem_malloc(size_t size)
|
||||
{
|
||||
void *v;
|
||||
hipMalloc((void **)&v, size);
|
||||
return v;
|
||||
}
|
||||
|
||||
int roc_shmem_free(void *v)
|
||||
{
|
||||
return hipFree(v);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if(argc < 1) {
|
||||
printf("Format: %s [iterations]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Init stuff
|
||||
MPI_Init(&argc, &argv);
|
||||
ncclComm_t comms;
|
||||
initGPU(comms);
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes, my_pe;
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[1]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount = 0, *recvCount = 0, *sendOffset = 0, *recvOffset = 0;
|
||||
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
printf("Begin untimed run\n");
|
||||
// Untimed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, 1, comms);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
printf("Verify untimed run\n");
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
printf("Begin timed run\n");
|
||||
// Timed run
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
auto time_start = TIME_NOW;
|
||||
sort((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, iterations, comms);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%.3f\tus\n", all_time / (double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
roc_shmem_free(keyBuffer1);
|
||||
roc_shmem_free(keyBuffer2);
|
||||
roc_shmem_free(sendCount);
|
||||
roc_shmem_free(recvCount);
|
||||
roc_shmem_free(sendOffset);
|
||||
roc_shmem_free(recvOffset);
|
||||
ncclCommDestroy(comms);
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,358 @@
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <roc_shmem/roc_shmem.hpp>
|
||||
#include <unistd.h>
|
||||
using namespace std;
|
||||
using namespace rocshmem;
|
||||
|
||||
#include "common.h"
|
||||
#include "sort.h"
|
||||
|
||||
//#define TIME_PERF
|
||||
#ifdef TIME_PERF
|
||||
#define TIMERS 10
|
||||
__device__ uint64_t timers[TIMERS] = {0};
|
||||
__device__ uint64_t time_start;
|
||||
#define TIMERS_START() \
|
||||
if(threadIdx.x == 0) {\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define TIME(TIMER_NUM) \
|
||||
if(threadIdx.x == 0) {\
|
||||
timers[TIMER_NUM] = roc_shmem_timer() - time_start;\
|
||||
time_start = roc_shmem_timer();\
|
||||
}
|
||||
|
||||
#define OUTPUT_TIME() \
|
||||
if(threadIdx.x == 0 && my_pe == 0) { \
|
||||
uint64_t sum = 0; \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
sum += timers[i]; \
|
||||
} \
|
||||
for(int i = 0; i < TIMERS; ++i) { \
|
||||
printf("%d: %f\n", i, (double)timers[i] / (double)sum); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define TIMERS_START()
|
||||
#define TIME(x)
|
||||
#define OUTPUT_TIME()
|
||||
#endif
|
||||
|
||||
__device__ __inline__ void alltoall(roc_shmem_ctx_t &ctx,
|
||||
roc_shmem_team_t team,
|
||||
int *dst, int *src) {
|
||||
// Perform alltoall
|
||||
roc_shmem_ctx_int_wg_alltoall(ctx,
|
||||
team,
|
||||
dst, // T* dest
|
||||
src, // const T* source
|
||||
1); // int nelement
|
||||
}
|
||||
|
||||
__global__ void sort(volatile int *keys, int *keyBuffer1,
|
||||
int *keyBuffer2, int *sendCount,
|
||||
int *recvCount, int *sendOffset,
|
||||
int *recvOffset, int *outputKeys,
|
||||
size_t size, roc_shmem_team_t team,
|
||||
int max_iters) {
|
||||
__shared__ roc_shmem_ctx_t ctx;
|
||||
__shared__ int bucketCounter[MAX_PES];
|
||||
__shared__ int bucketPtr[MAX_PES];
|
||||
__shared__ int total_size;
|
||||
|
||||
roc_shmem_wg_init();
|
||||
roc_shmem_wg_ctx_create(ROC_SHMEM_CTX_WG_PRIVATE, &ctx);
|
||||
|
||||
int n_pes = roc_shmem_ctx_n_pes(ctx);
|
||||
int my_pe = roc_shmem_my_pe();
|
||||
int buckets = n_pes;
|
||||
|
||||
int tid = threadIdx.x; // + blockDim.x * blockIdx.x;
|
||||
const int K_PER_BUCK = (MAX_KEY / buckets);
|
||||
|
||||
for(int iter = 0; iter < max_iters; ++iter) {
|
||||
// Reset
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
bucketCounter[i] = 0;
|
||||
bucketPtr[i] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
TIMERS_START()
|
||||
// Count size of each bucket
|
||||
for(int i = tid; i < size; i += blockDim.x) {
|
||||
atomicAdd(&bucketCounter[keys[i] / K_PER_BUCK], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(0)
|
||||
// Update in global memory
|
||||
for(int i = tid; i < buckets; i += blockDim.x) {
|
||||
sendCount[i] = bucketPtr[i] = bucketCounter[i];
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(1)
|
||||
// Perform local scan to get ptrs set
|
||||
for(int shift = 1; shift < buckets; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < buckets) {
|
||||
temp = bucketPtr[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < buckets) {
|
||||
bucketPtr[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(2)
|
||||
// Find offsets of where we're sending
|
||||
for(int i = threadIdx.x; i < buckets; i += blockDim.x) {
|
||||
sendOffset[i] = bucketPtr[i] - sendCount[i];
|
||||
}
|
||||
// Sort keys into buckets
|
||||
for(int i = threadIdx.x; i < size; i += blockDim.x) {
|
||||
int loc = atomicAdd(&bucketPtr[keys[i] / K_PER_BUCK], -1) - 1;
|
||||
keyBuffer1[loc] = keys[i];
|
||||
}
|
||||
roc_shmem_ctx_threadfence_system(ctx);
|
||||
// Force sync to wait for all PEs to update bucket sizes
|
||||
roc_shmem_ctx_wg_team_sync(ctx, team);
|
||||
TIME(3)
|
||||
// Let all PEs know how many keys you wish to send
|
||||
alltoall(ctx, team, recvCount, sendCount);
|
||||
// Let all PEs know where the offsets are of the keys
|
||||
alltoall(ctx, team, recvOffset, sendOffset);
|
||||
__syncthreads();
|
||||
TIME(4)
|
||||
if(threadIdx.x == 0) {
|
||||
total_size = 0;
|
||||
for(int i = 0; i < buckets; ++i) {
|
||||
roc_shmem_int_get_nbi(&keyBuffer2[total_size],
|
||||
&keyBuffer1[recvOffset[i]], recvCount[i], i);
|
||||
total_size += recvCount[i];
|
||||
}
|
||||
roc_shmem_quiet();
|
||||
}
|
||||
for(int i = threadIdx.x; i < K_PER_BUCK; i += blockDim.x)
|
||||
outputKeys[i] = 0;
|
||||
__syncthreads();
|
||||
TIME(5)
|
||||
int min_key_val = my_pe * K_PER_BUCK;
|
||||
int max_key_val = (my_pe + 1) * K_PER_BUCK - 1;
|
||||
|
||||
int *key_buff_ptr = outputKeys - min_key_val;
|
||||
for(int i = threadIdx.x; i < total_size; i += blockDim.x) {
|
||||
atomicAdd(&key_buff_ptr[keyBuffer2[i]], 1);
|
||||
}
|
||||
__syncthreads();
|
||||
TIME(6)
|
||||
// Perform local scan on keys
|
||||
for(int shift = 1; shift < K_PER_BUCK; shift *= 2) {
|
||||
int temp = 0;
|
||||
if(threadIdx.x >= shift && threadIdx.x < K_PER_BUCK) {
|
||||
temp = outputKeys[threadIdx.x - shift];
|
||||
}
|
||||
__syncthreads();
|
||||
if(threadIdx.x < K_PER_BUCK) {
|
||||
outputKeys[threadIdx.x] += temp;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
TIME(7)
|
||||
}
|
||||
OUTPUT_TIME()
|
||||
roc_shmem_wg_ctx_destroy(ctx);
|
||||
roc_shmem_wg_finalize();
|
||||
}
|
||||
|
||||
bool verify(int *outputKeys, int *keyBuffer2, size_t size)
|
||||
{
|
||||
int num_pes = roc_shmem_n_pes();
|
||||
int my_pe = roc_shmem_my_pe();
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Request request;
|
||||
|
||||
int min_key_val = my_pe * (MAX_KEY / num_pes);
|
||||
int max_key_val = (my_pe + 1) * (MAX_KEY / num_pes) - 1;
|
||||
|
||||
int *key_array = new int[size];
|
||||
// Perform final untimed sort on keys
|
||||
for(int i = 0; i < size; ++i)
|
||||
if(outputKeys[keyBuffer2[i] - min_key_val] > 0)
|
||||
key_array[--outputKeys[keyBuffer2[i] - min_key_val]] = keyBuffer2[i];
|
||||
else {
|
||||
fprintf(stderr, "%d: Found wrong key %d at %d with %d\n", my_pe, keyBuffer2[i], i, outputKeys[keyBuffer2[i]]);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(size < 1)
|
||||
size = 1;
|
||||
|
||||
int k;
|
||||
const int MPI_TAG = 1000;
|
||||
// Check if largest key is smaller than next processor's
|
||||
if(my_pe > 0)
|
||||
MPI_Irecv(&k, 1, MPI_INT, my_pe - 1, MPI_TAG, MPI_COMM_WORLD,
|
||||
&request);
|
||||
if(my_pe < num_pes - 1)
|
||||
MPI_Send(&key_array[size - 1], 1, MPI_INT, my_pe + 1, MPI_TAG,
|
||||
MPI_COMM_WORLD );
|
||||
if(my_pe > 0)
|
||||
MPI_Wait(&request, &status);
|
||||
|
||||
// Check if it is smaller
|
||||
int j = 0;
|
||||
if( my_pe > 0 && size > 1 )
|
||||
if( k > key_array[0] )
|
||||
j++;
|
||||
|
||||
// Check if keys correctly sorted
|
||||
for(int i = 1; i < size; i++)
|
||||
if(key_array[i - 1] > key_array[i])
|
||||
j++;
|
||||
|
||||
delete[] key_array;
|
||||
|
||||
if(j != 0) {
|
||||
fprintf(stderr, "Processor %d: Full_verify: number of keys out of sort: %d\n",
|
||||
my_pe, j );
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void initGPU()
|
||||
{
|
||||
// Calculation for local rank, taken from rccl-tests
|
||||
int localRank = 0;
|
||||
int proc = roc_shmem_my_pe();
|
||||
int nProcs = roc_shmem_n_pes();
|
||||
char hostname[1024];
|
||||
gethostname(hostname, 1024);
|
||||
for (int i=0; i< 1024; i++) {
|
||||
if (hostname[i] == '.') {
|
||||
hostname[i] = '\0';
|
||||
break;
|
||||
}
|
||||
}
|
||||
uint64_t hostHashs[nProcs];
|
||||
hostHashs[proc] = getHostHash(hostname);
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostHashs, sizeof(uint64_t), MPI_BYTE, MPI_COMM_WORLD);
|
||||
for (int p=0; p<nProcs; p++) {
|
||||
if (p == proc) break;
|
||||
if (hostHashs[p] == hostHashs[proc]) localRank++;
|
||||
}
|
||||
|
||||
/***
|
||||
* Select a GPU
|
||||
*/
|
||||
int ndevices, my_device=0;
|
||||
hipGetDeviceCount (&ndevices);
|
||||
my_device = localRank % ndevices;
|
||||
hipSetDevice(my_device);
|
||||
|
||||
printf("Rank %d: Device %d, Host %s\n", proc, my_device, hostname);
|
||||
fflush(stdout);
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// Init roc_shmem stuff
|
||||
initGPU();
|
||||
roc_shmem_init(NUM_WGS);
|
||||
int n_pes = roc_shmem_team_n_pes(ROC_SHMEM_TEAM_WORLD);
|
||||
roc_shmem_team_t team_world_dup = ROC_SHMEM_TEAM_INVALID;
|
||||
roc_shmem_team_split_strided(ROC_SHMEM_TEAM_WORLD,
|
||||
0,
|
||||
1,
|
||||
n_pes,
|
||||
nullptr,
|
||||
0,
|
||||
&team_world_dup);
|
||||
|
||||
int iterations = 1000;
|
||||
if(argc > 1)
|
||||
iterations = atoi(argv[1]);
|
||||
|
||||
int num_pes = roc_shmem_n_pes();
|
||||
int my_pe = roc_shmem_my_pe();
|
||||
|
||||
// Configure input and outputs
|
||||
size_t size = 1024; //atoi(argv[2]);
|
||||
int *keys, *outputKeys;
|
||||
hipMalloc((void**)&keys, sizeof(int) * size);
|
||||
hipMalloc((void**)&outputKeys, sizeof(int) * WG_SIZE);
|
||||
|
||||
/* Generate random number sequence and subsequent keys on all procs */
|
||||
create_seq( find_my_seed( my_pe,
|
||||
num_pes,
|
||||
4*(long)size*num_pes,
|
||||
314159265.00, /* Random number gen seed */
|
||||
1220703125.00 ), /* Random number gen mult */
|
||||
1220703125.00, keys, size ); /* Random number gen mult */
|
||||
|
||||
|
||||
// Init buffers
|
||||
int *keyBuffer1, *keyBuffer2;
|
||||
keyBuffer1 = (int*)roc_shmem_malloc(sizeof(int) * size);
|
||||
keyBuffer2 = (int*)roc_shmem_malloc(sizeof(int) * size * 4);
|
||||
|
||||
int *sendCount, *recvCount, *sendOffset, *recvOffset;
|
||||
sendCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvCount = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
sendOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
recvOffset = (int*)roc_shmem_malloc(sizeof(int) * MAX_PES);
|
||||
|
||||
// Untimed run
|
||||
roc_shmem_barrier_all();
|
||||
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, team_world_dup, 1);
|
||||
hipDeviceSynchronize();
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Timed run
|
||||
roc_shmem_barrier_all();
|
||||
auto time_start = TIME_NOW;
|
||||
sort<<<1, WG_SIZE>>>((int*)keys, keyBuffer1, keyBuffer2,
|
||||
sendCount, recvCount, sendOffset, recvOffset,
|
||||
outputKeys, size, team_world_dup, iterations);
|
||||
hipDeviceSynchronize();
|
||||
double tot_time = (double)TIME_DIFF(TIME_NOW, time_start);
|
||||
|
||||
double all_time = 0;
|
||||
MPI_Allreduce(&tot_time, &all_time, 1,
|
||||
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
|
||||
|
||||
if(my_pe == 0) {
|
||||
printf("Avg time:\t%f\tus\n", all_time /
|
||||
(double)(1000.0 * iterations * num_pes));
|
||||
}
|
||||
|
||||
// Verify correctness
|
||||
if(!verify(outputKeys, keyBuffer2, outputKeys[MAX_KEY / num_pes - 1])) {
|
||||
fprintf(stderr, "Wrong output\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Clean up
|
||||
hipFree(keys);
|
||||
hipFree(outputKeys);
|
||||
roc_shmem_free(keyBuffer1);
|
||||
roc_shmem_free(keyBuffer2);
|
||||
roc_shmem_free(sendCount);
|
||||
roc_shmem_free(recvCount);
|
||||
roc_shmem_free(sendOffset);
|
||||
roc_shmem_free(recvOffset);
|
||||
roc_shmem_finalize();
|
||||
return 0;
|
||||
}
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=ON \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DDEBUG=ON \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=ON \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=ON \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=ON \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=ON \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
+26
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=ON \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=ON \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DDEBUG=ON \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_MANAGED_HEAP=ON \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DDEBUG=ON \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_MANAGED_HEAP=ON \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=ON \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_SINGLE_NODE=ON \
|
||||
-DUSE_HOST_SIDE_HDP_FLUSH=ON\
|
||||
-DROCM_PATH="/opt/rocm-5.4.2/"\
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,30 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=ON \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_SINGLE_NODE=ON \
|
||||
-DUSE_HOST_SIDE_HDP_FLUSH=ON\
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=OFF \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_COHERENT_HEAP=ON \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
if [ -z $1 ]
|
||||
then
|
||||
install_path=~/rocshmem
|
||||
else
|
||||
install_path=$1
|
||||
fi
|
||||
|
||||
src_path=$(dirname "$(realpath $0)")/../../
|
||||
|
||||
cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_INSTALL_PREFIX=$install_path \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=OFF \
|
||||
-DDEBUG=OFF \
|
||||
-DPROFILE=OFF \
|
||||
-DUSE_GPU_IB=OFF \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_THREADS=ON \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
# cmake --install .
|
||||
@@ -0,0 +1,5 @@
|
||||
gdb scripts allow launching rocshmem tests repeatedly with gdb
|
||||
and dump backtrace on error
|
||||
- gdbscript - consists of commands which are executed on gdb launch
|
||||
- gdbrun - run script, launches test in loop with gdb enabled
|
||||
e.g ./gdbrun 14 10 launches pingPong 10 times
|
||||
برخی از فایل ها نشان داده نشدند زیرا تعداد زیادی فایل در این تفاوت تغییر کرده اند نمایش بیشتر
مرجع در شماره جدید
Block a user