[ROCm/rocshmem commit: c4b7e0d91b]
Tento commit je obsažen v:
Brandon Potter
2024-08-07 11:03:18 -07:00
rodič 4610659bf1
revize cec6ba91a0
3 změnil soubory, kde provedl 140 přidání a 18 odebrání
+1 -1
Zobrazit soubor
@@ -24,4 +24,4 @@ cmake \
-DUSE_COHERENT_HEAP=ON \
$src_path
cmake --build . --parallel 8
cmake --install .
#cmake --install .
+131
Zobrazit soubor
@@ -0,0 +1,131 @@
/******************************************************************************
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*****************************************************************************/
#ifndef LIBRARY_SRC_ATOMIC_HPP
#define LIBRARY_SRC_ATOMIC_HPP
namespace rocshmem {
namespace detail {
namespace atomic {
typedef enum rocshmem_memory_scope {
memory_scope_thread = __HIP_MEMORY_SCOPE_SINGLETHREAD,
memory_scope_wavefront = __HIP_MEMORY_SCOPE_WAVEFRONT,
memory_scope_workgroup = __HIP_MEMORY_SCOPE_WORKGROUP,
memory_scope_agent = __HIP_MEMORY_SCOPE_AGENT,
memory_scope_system = __HIP_MEMORY_SCOPE_SYSTEM,
} rocshmem_memory_scope;
typedef enum rocshmem_memory_order {
memory_order_relaxed = __ATOMIC_RELAXED,
memory_order_consume = __ATOMIC_CONSUME,
memory_order_acquire = __ATOMIC_ACQUIRE,
memory_order_release = __ATOMIC_RELEASE,
memory_order_acq_rel = __ATOMIC_ACQ_REL,
memory_order_seq_cst = __ATOMIC_SEQ_CST
} rocshmem_memory_order;
template <typename T, rocshmem_memory_scope Scope>
__host__ __device__
T load(const T* address, rocshmem_memory_order order) {
return __hip_atomic_load(address, order, Scope);
}
template <typename T, rocshmem_memory_scope Scope>
__host__ __device__
void store(const T value, const T* address, rocshmem_memory_order order) {
return __hip_atomic_store(value, address, order, Scope);
}
template <typename T, rocshmem_memory_scope Scope>
__host__ __device__
bool compare_exchange_weak(T& expected, T desired, rocshmem_memory_order success,
rocshmem_memory_order failure) {
return __hip_atomic_compare_exchange_weak(expected, desired, success, failure, Scope);
}
template <typename T, rocshmem_memory_scope Scope>
__host__ __device__
bool compare_exchange_strong(T& expected, T desired, rocshmem_memory_order success,
rocshmem_memory_order failure) {
return __hip_atomic_compare_exchange_strong(expected, desired, success, failure, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_add(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_add(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_sub(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_sub(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_and(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_and(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_or(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_or(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_xor(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_xor(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_max(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_max(obj, arg, order, Scope);
}
template <class T, class U, rocshmem_memory_scope Scope>
__host__ __device__
T fetch_min(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_min(obj, arg, order, Scope);
}
template <rocshmem_memory_scope Scope>
__device__
void thread_fence([[maybe_unused]] rocshmem_memory_order order) {
if constexpr (Scope == memory_scope_system) {
__threadfence_system();
} else if constexpr (Scope == memory_scope_agent) {
__threadfence();
} else if constexpr (Scope == memory_scope_workgroup) {
__threadfence_block();
}
}
} // namespace atomic
} // namespace detail
} // namespace rocshmem
#endif // LIBRARY_SRC_ATOMIC_HPP_
+8 -17
Zobrazit soubor
@@ -20,30 +20,21 @@
* IN THE SOFTWARE.
*****************************************************************************/
/**
* @file notifier.hpp
*
* @brief Contains the notification memory space for threads to communicate
* results with one another.
*
* Assume one thread does work on behalf of other threads (as a leader) and
* that work needs to be communicated to the other threads as a result.
* To expose the result, the threads need to share a memory space where the
* result can be written. The leader thread writes the result out to this
* memory space and all threads synchronize on it.
*
* This class allows the leader thread to notify other threads of the update.
*/
#ifndef LIBRARY_SRC_MEMORY_NOTIFIER_HPP_
#define LIBRARY_SRC_MEMORY_NOTIFIER_HPP_
#include "../device_proxy.hpp"
#include "../util.hpp"
#include "../atomic.hpp"
namespace rocshmem {
template<detail::atomic::rocshmem_memory_scope Scope>
class Notifier {
};
template<detail::atomic::rocshmem_memory_scope Scope>
class Notifier<detail::atomic::memory_scope_workgroup> {
public:
__device__ uint64_t read() { return value_; }
@@ -67,9 +58,9 @@ class Notifier {
uint64_t value_{};
};
template <typename ALLOCATOR>
template <typename ALLOCATOR, detail::atomic::rocshmem_memory_scope Scope>
class NotifierProxy {
using ProxyT = DeviceProxy<ALLOCATOR, Notifier, 1>;
using ProxyT = DeviceProxy<ALLOCATOR, Notifier<Scope>, 1>;
public:
__host__ __device__ Notifier* get() { return proxy_.get(); }