@@ -60,7 +60,6 @@ option(USE_IPC "Enable IPC support (using HIP)" OFF)
|
||||
option(USE_THREADS "Enable workgroup threads to share network queues" OFF)
|
||||
option(USE_WF_COAL "Enable wavefront message coalescing" OFF)
|
||||
option(USE_COHERENT_HEAP "Enable support for coherent systems" OFF)
|
||||
option(USE_CACHED_HEAP "Enable support for cached systems" OFF)
|
||||
option(USE_MANAGED_HEAP "Enable managed memory" OFF)
|
||||
option(USE_HOST_HEAP "Enable host memory using malloc/free" OFF)
|
||||
option(USE_HIP_HOST_HEAP "Enable host memory using hip api" OFF)
|
||||
|
||||
@@ -45,7 +45,7 @@ ROC_SHMEM base requirements:
|
||||
* May work with other versions, but not tested
|
||||
* AMD GFX9 GPUs (e.g.: MI25, Vega 56, Vega 64, MI50, MI60, MI100, Radeon VII)
|
||||
* AMD MI200 GPUs: To enable the support on MI200, please configure the library
|
||||
with USE_CACHED_HEAP
|
||||
with USE_COHERENT_HEAP
|
||||
* ROCm-aware MPI as described in
|
||||
[Building the Dependencies](#building-the-dependencies)
|
||||
* InfiniBand adaptor compatable with ROCm RDMA technology
|
||||
|
||||
@@ -7,10 +7,9 @@
|
||||
#cmakedefine USE_SHARED_CTX
|
||||
#cmakedefine USE_WF_COAL
|
||||
#cmakedefine USE_COHERENT_HEAP
|
||||
#cmakedefine USE_CACHED_HEAP
|
||||
#cmakedefine USE_MANAGED_HEAP
|
||||
#cmakedefine USE_HOST_HEAP
|
||||
#cmakedefine USE_HIP_HOST_HEAP
|
||||
#cmakedefine USE_FUNC_CALL
|
||||
#cmakedefine USE_SINGLE_NODE
|
||||
#cmakedefine USE_HOST_SIDE_HDP_FLUSH
|
||||
#cmakedefine USE_HOST_SIDE_HDP_FLUSH
|
||||
|
||||
@@ -20,7 +20,6 @@ cmake \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=OFF \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
$src_path
|
||||
|
||||
@@ -20,12 +20,10 @@ cmake \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_SINGLE_NODE=ON \
|
||||
-DUSE_HOST_SIDE_HDP_FLUSH=ON\
|
||||
-DROCM_PATH="/opt/rocm-5.4.2/"\
|
||||
$src_path
|
||||
cmake --build . --parallel 8
|
||||
cmake --install .
|
||||
|
||||
@@ -20,7 +20,6 @@ cmake \
|
||||
-DUSE_DC=OFF \
|
||||
-DUSE_IPC=ON \
|
||||
-DUSE_COHERENT_HEAP=OFF \
|
||||
-DUSE_CACHED_HEAP=OFF \
|
||||
-DUSE_THREADS=OFF \
|
||||
-DUSE_WF_COAL=OFF \
|
||||
-DUSE_SINGLE_NODE=ON \
|
||||
|
||||
@@ -256,7 +256,7 @@ void* Connection::buf_alloc([[maybe_unused]] struct ibv_pd* pd,
|
||||
if (use_gpu_mem) {
|
||||
void* dev_ptr;
|
||||
if (coherent_cq == 1) {
|
||||
#if defined USE_COHERENT_HEAP || defined USE_CACHED_HEAP
|
||||
#if defined USE_COHERENT_HEAP
|
||||
CHECK_HIP(hipMalloc(reinterpret_cast<void**>(&dev_ptr), size));
|
||||
#else
|
||||
#ifdef HIP_SUPPORTS_MALLOC_UNCACHED
|
||||
|
||||
@@ -40,7 +40,7 @@ namespace rocshmem {
|
||||
|
||||
#if defined USE_MANAGED_HEAP
|
||||
using HEAP_T = HeapMemory<HIPAllocatorManaged>;
|
||||
#elif defined USE_COHERENT_HEAP || defined USE_CACHED_HEAP
|
||||
#elif defined USE_COHERENT_HEAP
|
||||
using HEAP_T = HeapMemory<HIPAllocator>;
|
||||
#elif defined USE_HOST_HEAP
|
||||
using HEAP_T = HeapMemory<HostAllocator>;
|
||||
|
||||
@@ -126,7 +126,7 @@ extern const int gpu_clock_freq_mhz;
|
||||
__device__ __forceinline__ void __roc_inv() { asm volatile("buffer_wbinvl1;"); }
|
||||
|
||||
__device__ __forceinline__ void __roc_flush() {
|
||||
#if defined USE_COHERENT_HEAP || defined USE_CACHED_HEAP
|
||||
#if defined USE_COHERENT_HEAP
|
||||
#if __gfx90a__
|
||||
asm volatile("s_dcache_wb;");
|
||||
asm volatile("buffer_wbl2;");
|
||||
|
||||
Ссылка в новой задаче
Block a user