[ROCm/rocshmem commit: 0c53a075f2]
Этот коммит содержится в:
Brandon Potter
2024-08-12 11:29:31 -07:00
родитель cec6ba91a0
Коммит ef4a14e947
4 изменённых файлов: 61 добавлений и 55 удалений
+48 -39
Просмотреть файл
@@ -44,82 +44,91 @@ typedef enum rocshmem_memory_order {
memory_order_seq_cst = __ATOMIC_SEQ_CST
} rocshmem_memory_order;
template <typename T, rocshmem_memory_scope Scope>
struct rocshmem_memory_orders {
rocshmem_memory_order load {memory_order_acquire};
rocshmem_memory_order store {memory_order_release};
rocshmem_memory_order fence {memory_order_acq_rel};
rocshmem_memory_order atomic {memory_order_acq_rel};
rocshmem_memory_order weak_cas_success {memory_order_acq_rel};
rocshmem_memory_order weak_cas_failure {memory_order_acq_rel};
rocshmem_memory_order strong_cas_success {memory_order_acq_rel};
rocshmem_memory_order strong_cas_failure {memory_order_acq_rel};
};
template <typename T, rocshmem_memory_scope s>
__host__ __device__
T load(const T* address, rocshmem_memory_order order) {
return __hip_atomic_load(address, order, Scope);
T load(const T* address, rocshmem_memory_orders o) {
return __hip_atomic_load(address, o.load, s);
}
template <typename T, rocshmem_memory_scope Scope>
template <typename T, rocshmem_memory_scope s>
__host__ __device__
void store(const T value, const T* address, rocshmem_memory_order order) {
return __hip_atomic_store(value, address, order, Scope);
void store(T* address, const T value, rocshmem_memory_orders o) {
return __hip_atomic_store(address, value, o.store, s);
}
template <typename T, rocshmem_memory_scope Scope>
template <typename T, rocshmem_memory_scope s>
__host__ __device__
bool compare_exchange_weak(T& expected, T desired, rocshmem_memory_order success,
rocshmem_memory_order failure) {
return __hip_atomic_compare_exchange_weak(expected, desired, success, failure, Scope);
bool compare_exchange_weak(T& expected, T desired, rocshmem_memory_orders o) {
return __hip_atomic_compare_exchange_weak(expected, desired, o.weak_cas_success, o.weak_cas_failure, s);
}
template <typename T, rocshmem_memory_scope Scope>
template <typename T, rocshmem_memory_scope s>
__host__ __device__
bool compare_exchange_strong(T& expected, T desired, rocshmem_memory_order success,
rocshmem_memory_order failure) {
return __hip_atomic_compare_exchange_strong(expected, desired, success, failure, Scope);
bool compare_exchange_strong(T& expected, T desired, rocshmem_memory_orders o) {
return __hip_atomic_compare_exchange_strong(expected, desired, o.strong_cas_success, o.strong_cas_failure, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_add(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_add(obj, arg, order, Scope);
T fetch_add(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_add(obj, arg, o.atomic, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_sub(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_sub(obj, arg, order, Scope);
T fetch_sub(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_sub(obj, arg, o.atomic, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_and(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_and(obj, arg, order, Scope);
T fetch_and(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_and(obj, arg, o.atomic, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_or(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_or(obj, arg, order, Scope);
T fetch_or(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_or(obj, arg, o, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_xor(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_xor(obj, arg, order, Scope);
T fetch_xor(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_xor(obj, arg, o.atomic, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_max(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_max(obj, arg, order, Scope);
T fetch_max(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_max(obj, arg, o.atomic, s);
}
template <class T, class U, rocshmem_memory_scope Scope>
template <class T, class U, rocshmem_memory_scope s>
__host__ __device__
T fetch_min(T* obj, U arg, rocshmem_memory_order order) {
return __hip_atomic_fetch_min(obj, arg, order, Scope);
T fetch_min(T* obj, U arg, rocshmem_memory_orders o) {
return __hip_atomic_fetch_min(obj, arg, o.atomic, s);
}
template <rocshmem_memory_scope Scope>
template <rocshmem_memory_scope s>
__device__
void thread_fence([[maybe_unused]] rocshmem_memory_order order) {
if constexpr (Scope == memory_scope_system) {
if constexpr (s == memory_scope_system) {
__threadfence_system();
} else if constexpr (Scope == memory_scope_agent) {
} else if constexpr (s == memory_scope_agent) {
__threadfence();
} else if constexpr (Scope == memory_scope_workgroup) {
} else if constexpr (s == memory_scope_workgroup) {
__threadfence_block();
}
}
+10 -13
Просмотреть файл
@@ -29,20 +29,15 @@
namespace rocshmem {
template<detail::atomic::rocshmem_memory_scope Scope>
template<detail::atomic::rocshmem_memory_scope scope>
class Notifier {
};
template<detail::atomic::rocshmem_memory_scope Scope>
class Notifier<detail::atomic::memory_scope_workgroup> {
public:
__device__ uint64_t read() { return value_; }
__device__ uint64_t read() {
return detail::atomic::load<uint64_t, scope>(&value_, orders);
}
__device__ void write(uint64_t val) {
if (is_thread_zero_in_block()) {
value_ = val;
}
publish();
detail::atomic::store<uint64_t, scope>(&value_, val, orders);
}
__device__ void done() { __syncthreads(); }
@@ -55,15 +50,17 @@ class Notifier<detail::atomic::memory_scope_workgroup> {
__syncthreads();
}
detail::atomic::rocshmem_memory_orders orders;
uint64_t value_{};
};
template <typename ALLOCATOR, detail::atomic::rocshmem_memory_scope Scope>
template <typename ALLOCATOR, detail::atomic::rocshmem_memory_scope scope>
class NotifierProxy {
using ProxyT = DeviceProxy<ALLOCATOR, Notifier<Scope>, 1>;
using ProxyT = DeviceProxy<ALLOCATOR, Notifier<scope>, 1>;
public:
__host__ __device__ Notifier* get() { return proxy_.get(); }
__host__ __device__ Notifier<scope>* get() { return proxy_.get(); }
private:
ProxyT proxy_{};
+1 -1
Просмотреть файл
@@ -48,7 +48,7 @@ class SlabHeap {
/**
* @brief Helper type for notifier
*/
using NOTIFIER_PROXY_T = NotifierProxy<HIPAllocator>;
using NOTIFIER_PROXY_T = NotifierProxy<HIPAllocator, detail::atomic::memory_scope_workgroup>;
/**
* @brief Helper type for notifier
+2 -2
Просмотреть файл
@@ -52,7 +52,7 @@ write_to_memory(uint8_t* raw_memory) {
__global__
void
all_threads_once(uint8_t* raw_memory,
Notifier* notifier) {
Notifier<detail::atomic::memory_scope_workgroup> * notifier) {
notifier->write(NOTIFIER_OFFSET);
uint64_t offset_u64 {notifier->read()};
notifier->done();
@@ -65,7 +65,7 @@ all_threads_once(uint8_t* raw_memory,
}
class NotifierTestFixture : public ::testing::Test {
using NotifierProxyT = NotifierProxy<HIPAllocator>;
using NotifierProxyT = NotifierProxy<HIPAllocator, detail::atomic::memory_scope_workgroup>;
public:
NotifierTestFixture() {