FEAT-47686 - Changes to support new uncached memory segment in ROCr.

Change-Id: I0ba8769d6737cdf1cc8a8644b2e82109f584a430


[ROCm/clr commit: aca7d9e14a]
Этот коммит содержится в:
kjayapra-amd
2023-06-12 12:52:29 -05:00
коммит произвёл Karthik Jayaprakash
родитель 7e80d81b1e
Коммит 03504e2f6c
2 изменённых файлов: 17 добавлений и 4 удалений
+15 -4
Просмотреть файл
@@ -901,6 +901,12 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
return stat;
}
// If the flag set is ext scoped fine grain, break the loop
if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) {
dev->gpu_ext_fine_grained_segment_ = pool;
break;
}
if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) {
dev->gpu_fine_grained_segment_ = pool;
} else if ((global_flag & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) != 0) {
@@ -959,6 +965,12 @@ hsa_status_t Device::iterateCpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
break;
}
// If the flag set is ext scoped fine grain, break the loop
if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) {
agentInfo->ext_fine_grain_pool_ = pool;
break;
}
if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) {
if (agentInfo->fine_grain_pool.handle == 0) {
agentInfo->fine_grain_pool = pool;
@@ -2158,9 +2170,8 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
}
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
const hsa_amd_memory_pool_t& pool = (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
uint32_t hsa_mem_flags = (atomics && pseudo_fine_grain) ? HSA_AMD_MEMORY_POOL_PCIE_FLAG
: HSA_AMD_MEMORY_POOL_STANDARD_FLAG;
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
DevLogPrintfError("Invalid argument, pool_handle: 0x%x , max_alloc: %u \n",
@@ -2169,7 +2180,7 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain
}
void* ptr = nullptr;
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, hsa_mem_flags, &ptr);
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
if (stat != HSA_STATUS_SUCCESS) {
LogError("Fail allocation local memory");
+2
Просмотреть файл
@@ -288,6 +288,7 @@ struct AgentInfo {
hsa_amd_memory_pool_t fine_grain_pool;
hsa_amd_memory_pool_t coarse_grain_pool;
hsa_amd_memory_pool_t kern_arg_pool;
hsa_amd_memory_pool_t ext_fine_grain_pool_;
};
//! A HSA device ordinal (physical HSA device)
@@ -590,6 +591,7 @@ class Device : public NullDevice {
hsa_amd_memory_pool_t system_kernarg_segment_;
hsa_amd_memory_pool_t gpuvm_segment_;
hsa_amd_memory_pool_t gpu_fine_grained_segment_;
hsa_amd_memory_pool_t gpu_ext_fine_grained_segment_;
hsa_signal_t prefetch_signal_; //!< Prefetch signal, used to explicitly prefetch SVM on device
std::atomic<int> cache_state_; //!< State of cache, kUnknown/kFlushedToDevice/kFlushedToSystem