From 01a8efbb76e530f5d576fb10d8ec30168eb2ada6 Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Tue, 15 Jun 2021 08:41:57 -0600 Subject: [PATCH] [TransferBench] Adding ability to specify source data pattern (#394) * [TransferBench] Adding ability to specify source data pattern [ROCm/rccl commit: ff413be933653cc023c2eb05371b6b2666948a97] --- projects/rccl/tools/TransferBench/EnvVars.hpp | 64 +++++++++++++++++++ .../tools/TransferBench/TransferBench.cpp | 18 ++++-- .../tools/TransferBench/TransferBench.hpp | 2 +- 3 files changed, 79 insertions(+), 5 deletions(-) diff --git a/projects/rccl/tools/TransferBench/EnvVars.hpp b/projects/rccl/tools/TransferBench/EnvVars.hpp index 258f0ee33c..d299b93bf6 100644 --- a/projects/rccl/tools/TransferBench/EnvVars.hpp +++ b/projects/rccl/tools/TransferBench/EnvVars.hpp @@ -25,6 +25,7 @@ public: int numIterations; // Number of timed iterations to perform int samplingFactor; // Affects how many different values of N are generated (when N set to 0) int numCpuPerLink; // Number of CPU child threads to use per CPU link + std::vector fillPattern; // Pattern of floats used to fill source data // Constructor that collects values EnvVars() @@ -42,6 +43,59 @@ public: samplingFactor = GetEnvVar("SAMPLING_FACTOR" , DEFAULT_SAMPLING_FACTOR); numCpuPerLink = GetEnvVar("NUM_CPU_PER_LINK" , DEFAULT_NUM_CPU_PER_LINK); + // Check for fill pattern + char* pattern = getenv("FILL_PATTERN"); + if (pattern != NULL) + { + int patternLen = strlen(pattern); + if (patternLen % 2) + { + printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits\n"); + exit(1); + } + + // Figure out how many copies of the pattern are necessary to fill a 4-byte float properly + int copies; + switch (patternLen % 8) + { + case 0: copies = 1; break; + case 4: copies = 2; break; + default: copies = 4; break; + } + + fillPattern.resize(copies * patternLen / 8); + unsigned char* rawData = (unsigned char*) fillPattern.data(); + + unsigned char val = 0; + for (int c = 0; c < copies; c++) + { + for (int i = 0; i < patternLen; i++) + { + if ('0' <= pattern[i] && pattern[i] <= '9') + val += (pattern[i] - '0'); + else if ('A' <= pattern[i] && pattern[i] <= 'F') + val += (pattern[i] - 'A' + 10); + else if ('a' <= pattern[i] && pattern[i] <= 'f') + val += (pattern[i] - 'a' + 10); + else + { + printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits (0-9'/a-f/A-F). (not %c)\n", pattern[i]); + exit(1); + } + + // Bit shift or else add and reset to 0 + if (i % 2 == 0) + val <<= 4; + else + { + rawData[(c * patternLen + i) / 2] = val; + val = 0; + } + } + } + } + else fillPattern.clear(); + // Perform some basic validation if (byteOffset % sizeof(float)) { @@ -87,6 +141,7 @@ public: printf(" NUM_ITERATIONS=I - Perform I timed iteration(s) per test\n"); printf(" SAMPLING_FACTOR=F - Add F samples (when possible) between powers of 2 when auto-generating data sizes\n"); printf(" NUM_CPU_PER_LINK=C - Use C threads per Link for CPU-executed copies\n"); + printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits\n"); } // Display env var settings @@ -120,6 +175,15 @@ public: printf("%-20s = %12d : Running %d warmup iteration(s) per topology\n", "NUM_WARMUPS", numWarmups, numWarmups); printf("%-20s = %12d : Running %d timed iteration(s) per topology\n", "NUM_ITERATIONS", numIterations, numIterations); printf("%-20s = %12d : Using %d CPU thread(s) per CPU-based-copy Link\n", "NUM_CPU_PER_LINK", numCpuPerLink, numCpuPerLink); + printf("%-20s = %12s : ", "FILL_PATTERN", getenv("FILL_PATTERN") ? "(specified)" : "(unspecified)"); + if (fillPattern.size()) + { + printf("Pattern: %s", getenv("FILL_PATTERN")); + } + else + { + printf("Pseudo-random: (Element i = i modulo 383 + 31)"); + } printf("\n"); } }; diff --git a/projects/rccl/tools/TransferBench/TransferBench.cpp b/projects/rccl/tools/TransferBench/TransferBench.cpp index a6ab78c46d..cb7b4d7fa4 100644 --- a/projects/rccl/tools/TransferBench/TransferBench.cpp +++ b/projects/rccl/tools/TransferBench/TransferBench.cpp @@ -176,7 +176,7 @@ int main(int argc, char **argv) for (int i = 0; i < numLinks; i++) { // Initialize source memory with patterned data - CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, links[i].srcMem + initOffset); + CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].srcMem + initOffset); // Each block needs to know src/dst pointers and how many elements to transfer // Figure out the sub-array each block does for this Link @@ -265,7 +265,7 @@ int main(int argc, char **argv) // Validate that each link has transferred correctly for (int i = 0; i < numLinks; i++) - CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, links[i].dstMem + initOffset); + CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].dstMem + initOffset); // Report timings totalCpuTime = totalCpuTime / (1.0 * ev.numIterations) * 1000; @@ -828,7 +828,7 @@ void CheckPages(char* array, size_t numBytes, int targetId) } // Helper function to either fill a device pointer with pseudo-random data, or to check to see if it matches -void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr) +void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vectorconst& fillPattern, float* ptr) { // Prepare reference resultx float* refBuffer = (float*)malloc(N * sizeof(float)); @@ -846,8 +846,18 @@ void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr } else { - for (int i = 0; i < N; i++) + // Fill with repeated pattern if specified + size_t patternLen = fillPattern.size(); + if (patternLen > 0) + { + for (int i = 0; i < N; i++) + refBuffer[i] = fillPattern[i % patternLen]; + } + else // Otherwise fill with pseudo-random values + { + for (int i = 0; i < N; i++) refBuffer[i] = (i % 383 + 31); + } } // Either fill the memory with the reference buffer, or compare against it diff --git a/projects/rccl/tools/TransferBench/TransferBench.hpp b/projects/rccl/tools/TransferBench/TransferBench.hpp index 11b0e851ca..129ee198c3 100644 --- a/projects/rccl/tools/TransferBench/TransferBench.hpp +++ b/projects/rccl/tools/TransferBench/TransferBench.hpp @@ -121,7 +121,7 @@ void EnablePeerAccess(int const deviceId, int const peerDeviceId); void AllocateMemory(MemType memType, int devIndex, size_t numBytes, float** memPtr); void DeallocateMemory(MemType memType, int devIndex, float* memPtr); void CheckPages(char* byteArray, size_t numBytes, int targetId); -void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr); +void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vector const& fillPattern, float* ptr); void RunLink(EnvVars const& ev, size_t const N, int const iteration, Link& link);