FEAT-47686 - Changes to support new uncached memory segment in ROCr.
Change-Id: I0ba8769d6737cdf1cc8a8644b2e82109f584a430
[ROCm/clr commit: aca7d9e14a]
Этот коммит содержится в:
коммит произвёл
Karthik Jayaprakash
родитель
7e80d81b1e
Коммит
03504e2f6c
@@ -901,6 +901,12 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
|
||||
return stat;
|
||||
}
|
||||
|
||||
// If the flag set is ext scoped fine grain, break the loop
|
||||
if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) {
|
||||
dev->gpu_ext_fine_grained_segment_ = pool;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) {
|
||||
dev->gpu_fine_grained_segment_ = pool;
|
||||
} else if ((global_flag & HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED) != 0) {
|
||||
@@ -959,6 +965,12 @@ hsa_status_t Device::iterateCpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
|
||||
break;
|
||||
}
|
||||
|
||||
// If the flag set is ext scoped fine grain, break the loop
|
||||
if ((global_flag & HSA_REGION_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED) != 0) {
|
||||
agentInfo->ext_fine_grain_pool_ = pool;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((global_flag & HSA_REGION_GLOBAL_FLAG_FINE_GRAINED) != 0) {
|
||||
if (agentInfo->fine_grain_pool.handle == 0) {
|
||||
agentInfo->fine_grain_pool = pool;
|
||||
@@ -2158,9 +2170,8 @@ bool Device::allowPeerAccess(device::Memory* memory) const {
|
||||
}
|
||||
|
||||
void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain) const {
|
||||
const hsa_amd_memory_pool_t& pool = (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
uint32_t hsa_mem_flags = (atomics && pseudo_fine_grain) ? HSA_AMD_MEMORY_POOL_PCIE_FLAG
|
||||
: HSA_AMD_MEMORY_POOL_STANDARD_FLAG;
|
||||
const hsa_amd_memory_pool_t& pool = (pseudo_fine_grain) ? gpu_ext_fine_grained_segment_
|
||||
: (atomics) ? gpu_fine_grained_segment_ : gpuvm_segment_;
|
||||
|
||||
if (pool.handle == 0 || gpuvm_segment_max_alloc_ == 0) {
|
||||
DevLogPrintfError("Invalid argument, pool_handle: 0x%x , max_alloc: %u \n",
|
||||
@@ -2169,7 +2180,7 @@ void* Device::deviceLocalAlloc(size_t size, bool atomics, bool pseudo_fine_grain
|
||||
}
|
||||
|
||||
void* ptr = nullptr;
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, hsa_mem_flags, &ptr);
|
||||
hsa_status_t stat = hsa_amd_memory_pool_allocate(pool, size, 0, &ptr);
|
||||
ClPrint(amd::LOG_DEBUG, amd::LOG_MEM, "Allocate hsa device memory %p, size 0x%zx", ptr, size);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogError("Fail allocation local memory");
|
||||
|
||||
@@ -288,6 +288,7 @@ struct AgentInfo {
|
||||
hsa_amd_memory_pool_t fine_grain_pool;
|
||||
hsa_amd_memory_pool_t coarse_grain_pool;
|
||||
hsa_amd_memory_pool_t kern_arg_pool;
|
||||
hsa_amd_memory_pool_t ext_fine_grain_pool_;
|
||||
};
|
||||
|
||||
//! A HSA device ordinal (physical HSA device)
|
||||
@@ -590,6 +591,7 @@ class Device : public NullDevice {
|
||||
hsa_amd_memory_pool_t system_kernarg_segment_;
|
||||
hsa_amd_memory_pool_t gpuvm_segment_;
|
||||
hsa_amd_memory_pool_t gpu_fine_grained_segment_;
|
||||
hsa_amd_memory_pool_t gpu_ext_fine_grained_segment_;
|
||||
hsa_signal_t prefetch_signal_; //!< Prefetch signal, used to explicitly prefetch SVM on device
|
||||
std::atomic<int> cache_state_; //!< State of cache, kUnknown/kFlushedToDevice/kFlushedToSystem
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user