SWDEV-558080 - Add recommended granularity (#1176)
* Add recommended granularity * Improve granularity testing * Update based on feedback
Tento commit je obsažen v:
@@ -719,10 +719,10 @@ class Graph {
|
||||
void* ptr;
|
||||
const auto& dev_info = g_devices[0]->devices()[0]->info();
|
||||
|
||||
size = amd::alignUp(size, dev_info.virtualMemAllocGranularity_);
|
||||
size = amd::alignUp(size, dev_info.virtualMemAllocGranularityRecommended_);
|
||||
// Single virtual alloc would reserve for all devices.
|
||||
ptr = g_devices[0]->devices()[0]->virtualAlloc(startAddress, size,
|
||||
dev_info.virtualMemAllocGranularity_);
|
||||
dev_info.virtualMemAllocGranularityRecommended_);
|
||||
if (ptr == nullptr) {
|
||||
LogError("Failed to reserve Virtual Address");
|
||||
}
|
||||
@@ -2412,7 +2412,7 @@ class GraphMemAllocNode final : public GraphNode {
|
||||
}
|
||||
// Allocate real memory for mapping
|
||||
const auto& dev_info = queue()->device().info();
|
||||
auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularity_);
|
||||
auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularityRecommended_);
|
||||
auto dptr = graph_->AllocateMemory(aligned_size, static_cast<hip::Stream*>(queue()), nullptr);
|
||||
if (dptr == nullptr) {
|
||||
setStatus(CL_INVALID_OPERATION);
|
||||
@@ -2609,7 +2609,7 @@ class GraphMemFreeNode : public GraphNode {
|
||||
// Unmap virtual address from memory
|
||||
amd::Command* cmd = new VirtualMemFreeNode(
|
||||
graph, stream->DeviceId(), *stream, amd::Command::EventWaitList{}, device_ptr_,
|
||||
amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularity_), nullptr);
|
||||
amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularityRecommended_), nullptr);
|
||||
commands_.push_back(cmd);
|
||||
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
|
||||
}
|
||||
|
||||
@@ -60,7 +60,7 @@ hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void*
|
||||
}
|
||||
|
||||
const auto& dev_info = g_devices[0]->devices()[0]->info();
|
||||
if (size == 0 || ((size % dev_info.virtualMemAllocGranularity_) != 0) ||
|
||||
if (size == 0 || ((size % dev_info.virtualMemAllocGranularityMinimum_) != 0) ||
|
||||
((alignment & (alignment - 1)) != 0)) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
@@ -228,7 +228,11 @@ hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAlloc
|
||||
amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext;
|
||||
const auto& dev_info = amdContext->devices()[0]->info();
|
||||
|
||||
*granularity = dev_info.virtualMemAllocGranularity_;
|
||||
if (option == hipMemAllocationGranularityMinimum) {
|
||||
*granularity = dev_info.virtualMemAllocGranularityMinimum_;
|
||||
} else {
|
||||
*granularity = dev_info.virtualMemAllocGranularityRecommended_;
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
@@ -650,7 +650,8 @@ struct Info : public amd::EmbeddedObject {
|
||||
bool pcie_atomics_; //!< Pcie atomics support flag
|
||||
|
||||
bool virtualMemoryManagement_; //!< Virtual memory management support
|
||||
size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity
|
||||
size_t virtualMemAllocGranularityMinimum_; //!< minimum virtual memory allocation size/addr granularity
|
||||
size_t virtualMemAllocGranularityRecommended_; //!< recommended virtual memory allocation size/addr granularity
|
||||
|
||||
uint32_t driverNodeId_;
|
||||
//! Number of Physical SGPRs per SIMD
|
||||
|
||||
@@ -630,7 +630,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
|
||||
#endif // _WIN64
|
||||
}
|
||||
info_.virtualMemoryManagement_ = true;
|
||||
info_.virtualMemAllocGranularity_ =
|
||||
info_.virtualMemAllocGranularityMinimum_ =
|
||||
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
|
||||
info_.virtualMemAllocGranularityRecommended_ =
|
||||
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
|
||||
info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity;
|
||||
info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd;
|
||||
|
||||
@@ -792,13 +792,22 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
|
||||
|
||||
// Query the recommended granularity for this pool.
|
||||
stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
|
||||
&(dev->info_.virtualMemAllocGranularity_));
|
||||
&(dev->info_.virtualMemAllocGranularityMinimum_));
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE info"
|
||||
"failed with hsa_status: %d \n",
|
||||
stat);
|
||||
}
|
||||
// Query the recommended granularity for this pool.
|
||||
stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE,
|
||||
&(dev->info_.virtualMemAllocGranularityRecommended_));
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
LogPrintfError(
|
||||
"Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE info"
|
||||
"failed with hsa_status: %d \n",
|
||||
stat);
|
||||
}
|
||||
}
|
||||
|
||||
if (dev->gpuvm_segment_.handle == 0) {
|
||||
|
||||
@@ -51,7 +51,7 @@ bool VmHeap::ReleaseAddressRange(void* addr) {
|
||||
// ================================================================================================
|
||||
bool VmHeap::CommitMemory(void* addr, size_t size) {
|
||||
const auto& dev_info = device_->info();
|
||||
size_t granularity = dev_info.virtualMemAllocGranularity_;
|
||||
size_t granularity = dev_info.virtualMemAllocGranularityRecommended_;
|
||||
auto padded_size = alignUp(size, granularity);
|
||||
|
||||
// Allocate physical memory
|
||||
|
||||
+12
-45
@@ -47,48 +47,6 @@ void getGranularity(size_t* granularity, hipMemAllocationGranularity_flags optio
|
||||
HIP_CHECK(hipMemGetAllocationGranularity(granularity, &prop, option));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Functional Test to get granularity size for
|
||||
* hipMemAllocationGranularityMinimum option.
|
||||
* ------------------------
|
||||
* - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.1
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") {
|
||||
HIP_CHECK(hipFree(0));
|
||||
size_t granularity = 0;
|
||||
hipDevice_t device;
|
||||
HIP_CHECK(hipDeviceGet(&device, 0));
|
||||
checkVMMSupported(device);
|
||||
getGranularity(&granularity, hipMemAllocationGranularityMinimum, 0);
|
||||
REQUIRE(granularity > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
* - Functional Test to get granularity size for
|
||||
* hipMemAllocationGranularityRecommended option.
|
||||
* ------------------------
|
||||
* - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc
|
||||
* Test requirements
|
||||
* ------------------------
|
||||
* - HIP_VERSION >= 6.1
|
||||
*/
|
||||
TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") {
|
||||
HIP_CHECK(hipFree(0));
|
||||
size_t granularity = 0;
|
||||
hipDevice_t device;
|
||||
HIP_CHECK(hipDeviceGet(&device, 0));
|
||||
checkVMMSupported(device);
|
||||
getGranularity(&granularity, hipMemAllocationGranularityRecommended, 0);
|
||||
REQUIRE(granularity > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Description
|
||||
* ------------------------
|
||||
@@ -105,12 +63,21 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") {
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
for (int dev = 0; dev < numDevices; dev++) {
|
||||
size_t granularity = 0;
|
||||
hipDevice_t device;
|
||||
HIP_CHECK(hipDeviceGet(&device, dev));
|
||||
checkVMMSupported(device);
|
||||
getGranularity(&granularity, hipMemAllocationGranularityRecommended, dev);
|
||||
REQUIRE(granularity > 0);
|
||||
|
||||
size_t min_granularity = 0;
|
||||
size_t recommended_granularity = 0;
|
||||
|
||||
getGranularity(&min_granularity, hipMemAllocationGranularityMinimum, dev);
|
||||
REQUIRE(min_granularity >= 1024);
|
||||
|
||||
getGranularity(&recommended_granularity, hipMemAllocationGranularityRecommended, dev);
|
||||
REQUIRE(recommended_granularity >= 1024);
|
||||
|
||||
// Check the recommended_granularity is greater than or equal to the minimum
|
||||
REQUIRE(recommended_granularity >= min_granularity);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele