From 79d12df147fc16f592ea14191d0190bb5b439f83 Mon Sep 17 00:00:00 2001 From: German Date: Thu, 17 Nov 2022 14:44:24 -0500 Subject: [PATCH] SWDEV-363074 - Adjust staging copy limits in Windows Pinned copy can cause big performance drops, because slow pinning under Windows. Use up to 128MB for staging transfers. Change staging buffer size to 4MB. Linux path should still have the old defaults. Change-Id: I954edceb3ec89e8e670be116aa2d0a9564c8b11c --- rocclr/device/pal/palsettings.cpp | 11 +++-------- rocclr/device/rocm/rocsettings.cpp | 9 +++++---- rocclr/utils/flags.hpp | 10 +++++----- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/rocclr/device/pal/palsettings.cpp b/rocclr/device/pal/palsettings.cpp index a144a7d5fa..dd1ab9700b 100644 --- a/rocclr/device/pal/palsettings.cpp +++ b/rocclr/device/pal/palsettings.cpp @@ -54,7 +54,7 @@ Settings::Settings() { stagedXferRead_ = true; stagedXferWrite_ = true; - stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Ki; + stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Mi; // We will enable staged read/write if we use local memory disablePersistent_ = false; @@ -78,14 +78,9 @@ Settings::Settings() { // By default use host blit blitEngine_ = BlitEngineHost; - constexpr size_t MaxPinnedXferSize = 64; - pinnedXferSize_ = std::min(GPU_PINNED_XFER_SIZE, MaxPinnedXferSize) * Mi; - - constexpr size_t PinnedMinXferSize = 4 * Mi; + pinnedXferSize_ = GPU_PINNED_MIN_XFER_SIZE * Mi; pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE) - ? PinnedMinXferSize - : GPU_PINNED_MIN_XFER_SIZE * Ki; - pinnedMinXferSize_ = std::min(pinnedMinXferSize_, pinnedXferSize_); + ? 128 * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi; // Disable FP_FAST_FMA defines by default reportFMAF_ = false; diff --git a/rocclr/device/rocm/rocsettings.cpp b/rocclr/device/rocm/rocsettings.cpp index 228b9a84fd..222198dbec 100644 --- a/rocclr/device/rocm/rocsettings.cpp +++ b/rocclr/device/rocm/rocsettings.cpp @@ -62,14 +62,15 @@ Settings::Settings() { stagedXferRead_ = true; stagedXferWrite_ = true; - stagedXferSize_ = GPU_STAGING_BUFFER_SIZE * Ki; + stagedXferSize_ = flagIsDefault(GPU_STAGING_BUFFER_SIZE) + ? 1 * Mi : GPU_STAGING_BUFFER_SIZE * Mi; // Initialize transfer buffer size to 1MB by default xferBufSize_ = 1024 * Ki; - const static size_t MaxPinnedXferSize = 128; - pinnedXferSize_ = std::min(GPU_PINNED_XFER_SIZE, MaxPinnedXferSize) * Mi; - pinnedMinXferSize_ = std::min(GPU_PINNED_MIN_XFER_SIZE * Ki, pinnedXferSize_); + pinnedXferSize_ = GPU_PINNED_MIN_XFER_SIZE * Mi; + pinnedMinXferSize_ = flagIsDefault(GPU_PINNED_MIN_XFER_SIZE) + ? 1 * Mi : GPU_PINNED_MIN_XFER_SIZE * Mi; sdmaCopyThreshold_ = GPU_FORCE_BLIT_COPY_SIZE * Ki; diff --git a/rocclr/utils/flags.hpp b/rocclr/utils/flags.hpp index 364d716c54..66ae1a0b27 100644 --- a/rocclr/utils/flags.hpp +++ b/rocclr/utils/flags.hpp @@ -66,8 +66,8 @@ release(uint, GPU_CP_DMA_COPY_SIZE, 1, \ "Set maximum size of CP DMA copy in KiB") \ release(uint, GPU_MAX_HEAP_SIZE, 100, \ "Set maximum size of the GPU heap to % of board memory") \ -release(uint, GPU_STAGING_BUFFER_SIZE, 1024, \ - "Size of the GPU staging buffer in KiB") \ +release(uint, GPU_STAGING_BUFFER_SIZE, 4, \ + "Size of the GPU staging buffer in MiB") \ release(bool, GPU_DUMP_BLIT_KERNELS, false, \ "Dump the kernels for blit manager") \ release(uint, GPU_BLIT_ENGINE_TYPE, 0x0, \ @@ -93,9 +93,9 @@ debug(cstring, AMD_OCL_SUBST_OBJFILE, 0, \ debug(bool, AMD_OCL_ENABLE_MESSAGE_BOX, false, \ "Enable the error dialog on Windows") \ release(size_t, GPU_PINNED_XFER_SIZE, 32, \ - "The pinned buffer size for pinning in read/write transfers") \ -release(size_t, GPU_PINNED_MIN_XFER_SIZE, 1024, \ - "The minimal buffer size for pinned read/write transfers in KBytes") \ + "The pinned buffer size for pinning in read/write transfers in MiB") \ +release(size_t, GPU_PINNED_MIN_XFER_SIZE, 128, \ + "The minimal buffer size for pinned read/write transfers in MiB") \ release(size_t, GPU_RESOURCE_CACHE_SIZE, 64, \ "The resource cache size in MB") \ release(size_t, GPU_MAX_SUBALLOC_SIZE, 4096, \