SWDEV-558080 - Add recommended granularity (#1176)

* Add recommended granularity

* Improve granularity testing

* Update based on feedback
Tento commit je obsažen v:
AidanBeltonS
2025-11-26 16:10:58 +00:00
odevzdal GitHub
rodič 422253f871
revize d849b88aef
7 změnil soubory, kde provedl 38 přidání a 55 odebrání
+4 -4
Zobrazit soubor
@@ -719,10 +719,10 @@ class Graph {
void* ptr;
const auto& dev_info = g_devices[0]->devices()[0]->info();
size = amd::alignUp(size, dev_info.virtualMemAllocGranularity_);
size = amd::alignUp(size, dev_info.virtualMemAllocGranularityRecommended_);
// Single virtual alloc would reserve for all devices.
ptr = g_devices[0]->devices()[0]->virtualAlloc(startAddress, size,
dev_info.virtualMemAllocGranularity_);
dev_info.virtualMemAllocGranularityRecommended_);
if (ptr == nullptr) {
LogError("Failed to reserve Virtual Address");
}
@@ -2412,7 +2412,7 @@ class GraphMemAllocNode final : public GraphNode {
}
// Allocate real memory for mapping
const auto& dev_info = queue()->device().info();
auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularity_);
auto aligned_size = amd::alignUp(size_, dev_info.virtualMemAllocGranularityRecommended_);
auto dptr = graph_->AllocateMemory(aligned_size, static_cast<hip::Stream*>(queue()), nullptr);
if (dptr == nullptr) {
setStatus(CL_INVALID_OPERATION);
@@ -2609,7 +2609,7 @@ class GraphMemFreeNode : public GraphNode {
// Unmap virtual address from memory
amd::Command* cmd = new VirtualMemFreeNode(
graph, stream->DeviceId(), *stream, amd::Command::EventWaitList{}, device_ptr_,
amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularity_), nullptr);
amd::alignUp(va->getSize(), dev_info.virtualMemAllocGranularityRecommended_), nullptr);
commands_.push_back(cmd);
ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_MEM_POOL, "Graph FreeMem create: %p", device_ptr_);
}
+6 -2
Zobrazit soubor
@@ -60,7 +60,7 @@ hipError_t hipMemAddressReserve(void** ptr, size_t size, size_t alignment, void*
}
const auto& dev_info = g_devices[0]->devices()[0]->info();
if (size == 0 || ((size % dev_info.virtualMemAllocGranularity_) != 0) ||
if (size == 0 || ((size % dev_info.virtualMemAllocGranularityMinimum_) != 0) ||
((alignment & (alignment - 1)) != 0)) {
HIP_RETURN(hipErrorInvalidValue);
}
@@ -228,7 +228,11 @@ hipError_t hipMemGetAllocationGranularity(size_t* granularity, const hipMemAlloc
amd::Context* amdContext = useHostDevice ? hip::host_context : curDevContext;
const auto& dev_info = amdContext->devices()[0]->info();
*granularity = dev_info.virtualMemAllocGranularity_;
if (option == hipMemAllocationGranularityMinimum) {
*granularity = dev_info.virtualMemAllocGranularityMinimum_;
} else {
*granularity = dev_info.virtualMemAllocGranularityRecommended_;
}
HIP_RETURN(hipSuccess);
}
+2 -1
Zobrazit soubor
@@ -650,7 +650,8 @@ struct Info : public amd::EmbeddedObject {
bool pcie_atomics_; //!< Pcie atomics support flag
bool virtualMemoryManagement_; //!< Virtual memory management support
size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity
size_t virtualMemAllocGranularityMinimum_; //!< minimum virtual memory allocation size/addr granularity
size_t virtualMemAllocGranularityRecommended_; //!< recommended virtual memory allocation size/addr granularity
uint32_t driverNodeId_;
//! Number of Physical SGPRs per SIMD
+3 -1
Zobrazit soubor
@@ -630,7 +630,9 @@ void NullDevice::fillDeviceInfo(const Pal::DeviceProperties& palProp,
#endif // _WIN64
}
info_.virtualMemoryManagement_ = true;
info_.virtualMemAllocGranularity_ =
info_.virtualMemAllocGranularityMinimum_ =
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
info_.virtualMemAllocGranularityRecommended_ =
static_cast<size_t>(palProp.gpuMemoryProperties.virtualMemAllocGranularity);
info_.vgprAllocGranularity_ = palProp.gfxipProperties.shaderCore.vgprAllocGranularity;
info_.vgprsPerSimd_ = palProp.gfxipProperties.shaderCore.vgprsPerSimd;
+10 -1
Zobrazit soubor
@@ -792,13 +792,22 @@ hsa_status_t Device::iterateGpuMemoryPoolCallback(hsa_amd_memory_pool_t pool, vo
// Query the recommended granularity for this pool.
stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
&(dev->info_.virtualMemAllocGranularity_));
&(dev->info_.virtualMemAllocGranularityMinimum_));
if (stat != HSA_STATUS_SUCCESS) {
LogPrintfError(
"Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE info"
"failed with hsa_status: %d \n",
stat);
}
// Query the recommended granularity for this pool.
stat = Hsa::memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE,
&(dev->info_.virtualMemAllocGranularityRecommended_));
if (stat != HSA_STATUS_SUCCESS) {
LogPrintfError(
"Cannot query HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE info"
"failed with hsa_status: %d \n",
stat);
}
}
if (dev->gpuvm_segment_.handle == 0) {
+1 -1
Zobrazit soubor
@@ -51,7 +51,7 @@ bool VmHeap::ReleaseAddressRange(void* addr) {
// ================================================================================================
bool VmHeap::CommitMemory(void* addr, size_t size) {
const auto& dev_info = device_->info();
size_t granularity = dev_info.virtualMemAllocGranularity_;
size_t granularity = dev_info.virtualMemAllocGranularityRecommended_;
auto padded_size = alignUp(size, granularity);
// Allocate physical memory
@@ -47,48 +47,6 @@ void getGranularity(size_t* granularity, hipMemAllocationGranularity_flags optio
HIP_CHECK(hipMemGetAllocationGranularity(granularity, &prop, option));
}
/**
* Test Description
* ------------------------
* - Functional Test to get granularity size for
* hipMemAllocationGranularityMinimum option.
* ------------------------
* - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipMemGetAllocationGranularity_MinGranularity") {
HIP_CHECK(hipFree(0));
size_t granularity = 0;
hipDevice_t device;
HIP_CHECK(hipDeviceGet(&device, 0));
checkVMMSupported(device);
getGranularity(&granularity, hipMemAllocationGranularityMinimum, 0);
REQUIRE(granularity > 0);
}
/**
* Test Description
* ------------------------
* - Functional Test to get granularity size for
* hipMemAllocationGranularityRecommended option.
* ------------------------
* - unit/virtualMemoryManagement/hipMemGetAllocationGranularity.cc
* Test requirements
* ------------------------
* - HIP_VERSION >= 6.1
*/
TEST_CASE("Unit_hipMemGetAllocationGranularity_RecommendedGranularity") {
HIP_CHECK(hipFree(0));
size_t granularity = 0;
hipDevice_t device;
HIP_CHECK(hipDeviceGet(&device, 0));
checkVMMSupported(device);
getGranularity(&granularity, hipMemAllocationGranularityRecommended, 0);
REQUIRE(granularity > 0);
}
/**
* Test Description
* ------------------------
@@ -105,12 +63,21 @@ TEST_CASE("Unit_hipMemGetAllocationGranularity_AllGPUs") {
int numDevices = 0;
HIP_CHECK(hipGetDeviceCount(&numDevices));
for (int dev = 0; dev < numDevices; dev++) {
size_t granularity = 0;
hipDevice_t device;
HIP_CHECK(hipDeviceGet(&device, dev));
checkVMMSupported(device);
getGranularity(&granularity, hipMemAllocationGranularityRecommended, dev);
REQUIRE(granularity > 0);
size_t min_granularity = 0;
size_t recommended_granularity = 0;
getGranularity(&min_granularity, hipMemAllocationGranularityMinimum, dev);
REQUIRE(min_granularity >= 1024);
getGranularity(&recommended_granularity, hipMemAllocationGranularityRecommended, dev);
REQUIRE(recommended_granularity >= 1024);
// Check the recommended_granularity is greater than or equal to the minimum
REQUIRE(recommended_granularity >= min_granularity);
}
}