From 4b6a6ba8e83bdae9b10bd6ac8adba1c74a1724ed Mon Sep 17 00:00:00 2001 From: German Date: Thu, 17 Nov 2022 14:44:24 -0500 Subject: [PATCH] SWDEV-363074 - Adjust staging copy limits in Windows Pinned copy can cause big performance drops, because slow pinning under Windows. Use up to 128MB for staging transfers. Change staging buffer size to 4MB. Linux path should still have the old defaults. Change-Id: I954edceb3ec89e8e670be116aa2d0a9564c8b11c [ROCm/clr commit: 79d12df147fc16f592ea14191d0190bb5b439f83] --- projects/clr/rocclr/device/pal/palsettings.cpp | 11 +++-------- projects/clr/rocclr/device/rocm/rocsettings.cpp | 9 +++++---- projects/clr/rocclr/utils/flags.hpp | 10 +++++----- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/projects/clr/rocclr/device/pal/palsettings.cpp b/projects/clr/rocclr/device/pal/palsettings.cpp index a144a7d5fa..dd1ab9700b 100644 --- a/projects/clr/rocclr/device/pal/palsettings.cpp +++ b/projects/clr/rocclr/device/pal/palsettings.cpp @@ -54,7 +54,7 @@ Settings::Settings() { stagedXferRead_ = true; stagedXferWrite_ = true; - stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Ki; + stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Mi; // We will enable staged read/write if we use local memory disablePersistent_ = false; @@ -78,14 +78,9 @@ Settings::Settings() { // By default use host blit blitEngine_ = BlitEngineHost; - constexpr size_t MaxPinnedXferSize = 64; - pinnedXferSize_ = std::min(GPU_PINNED_XFER_SIZE, MaxPinnedXferSize) * Mi; - - constexpr size_t PinnedMinXferSize = 4 * Mi; + pinnedXferSize_ = GPU_PINNED_MIN_XFER_SIZE * Mi; pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE) - ? PinnedMinXferSize - : GPU_PINNED_MIN_XFER_SIZE * Ki; - pinnedMinXferSize_ = std::min(pinnedMinXferSize_, pinnedXferSize_); + ? 128 * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi; // Disable FP_FAST_FMA defines by default reportFMAF_ = false; diff --git a/projects/clr/rocclr/device/rocm/rocsettings.cpp b/projects/clr/rocclr/device/rocm/rocsettings.cpp index 228b9a84fd..222198dbec 100644 --- a/projects/clr/rocclr/device/rocm/rocsettings.cpp +++ b/projects/clr/rocclr/device/rocm/rocsettings.cpp @@ -62,14 +62,15 @@ Settings::Settings() { stagedXferRead_ = true; stagedXferWrite_ = true; - stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Ki; + stagedXferSize_ = flagIsDefault(GPU_STAGING_BUFFER_SIZE) + ? 1 * Mi : GPU_STAGING_BUFFER_SIZE * Mi; // Initialize transfer buffer size to 1MB by default xferBufSize_ = 1024 * Ki; - const static size_t MaxPinnedXferSize = 128; - pinnedXferSize_ = std::min(GPU_PINNED_XFER_SIZE, MaxPinnedXferSize) * Mi; - pinnedMinXferSize_ = std::min(GPU_PINNED_MIN_XFER_SIZE * Ki, pinnedXferSize_); + pinnedXferSize_ = GPU_PINNED_MIN_XFER_SIZE * Mi; + pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE) + ? 1 * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi; sdmaCopyThreshold_ = GPU_FORCE_BLIT_COPY_SIZE * Ki; diff --git a/projects/clr/rocclr/utils/flags.hpp b/projects/clr/rocclr/utils/flags.hpp index 364d716c54..66ae1a0b27 100644 --- a/projects/clr/rocclr/utils/flags.hpp +++ b/projects/clr/rocclr/utils/flags.hpp @@ -66,8 +66,8 @@ release(uint, GPU_CP_DMA_COPY_SIZE, 1, \ "Set maximum size of CP DMA copy in KiB") \ release(uint, GPU_MAX_HEAP_SIZE, 100, \ "Set maximum size of the GPU heap to % of board memory") \ -release(uint, GPU_STAGING_BUFFER_SIZE, 1024, \ - "Size of the GPU staging buffer in KiB") \ +release(uint, GPU_STAGING_BUFFER_SIZE, 4, \ + "Size of the GPU staging buffer in MiB") \ release(bool, GPU_DUMP_BLIT_KERNELS, false, \ "Dump the kernels for blit manager") \ release(uint, GPU_BLIT_ENGINE_TYPE, 0x0, \ @@ -93,9 +93,9 @@ debug(cstring, AMD_OCL_SUBST_OBJFILE, 0, \ debug(bool, AMD_OCL_ENABLE_MESSAGE_BOX, false, \ "Enable the error dialog on Windows") \ release(size_t, GPU_PINNED_XFER_SIZE, 32, \ - "The pinned buffer size for pinning in read/write transfers") \ -release(size_t, GPU_PINNED_MIN_XFER_SIZE, 1024, \ - "The minimal buffer size for pinned read/write transfers in KBytes") \ + "The pinned buffer size for pinning in read/write transfers in MiB") \ +release(size_t, GPU_PINNED_MIN_XFER_SIZE, 128, \ + "The minimal buffer size for pinned read/write transfers in MiB") \ release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \ "The resource cache size in MB") \ release(size_t, GPU_MAX_SUBALLOC_SIZE, 4096, \