[TransferBench] Adding ability to specify source data pattern (#394)
* [TransferBench] Adding ability to specify source data pattern
[ROCm/rccl commit: ff413be933]
Cette révision appartient à :
@@ -25,6 +25,7 @@ public:
|
||||
int numIterations; // Number of timed iterations to perform
|
||||
int samplingFactor; // Affects how many different values of N are generated (when N set to 0)
|
||||
int numCpuPerLink; // Number of CPU child threads to use per CPU link
|
||||
std::vector<float> fillPattern; // Pattern of floats used to fill source data
|
||||
|
||||
// Constructor that collects values
|
||||
EnvVars()
|
||||
@@ -42,6 +43,59 @@ public:
|
||||
samplingFactor = GetEnvVar("SAMPLING_FACTOR" , DEFAULT_SAMPLING_FACTOR);
|
||||
numCpuPerLink = GetEnvVar("NUM_CPU_PER_LINK" , DEFAULT_NUM_CPU_PER_LINK);
|
||||
|
||||
// Check for fill pattern
|
||||
char* pattern = getenv("FILL_PATTERN");
|
||||
if (pattern != NULL)
|
||||
{
|
||||
int patternLen = strlen(pattern);
|
||||
if (patternLen % 2)
|
||||
{
|
||||
printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Figure out how many copies of the pattern are necessary to fill a 4-byte float properly
|
||||
int copies;
|
||||
switch (patternLen % 8)
|
||||
{
|
||||
case 0: copies = 1; break;
|
||||
case 4: copies = 2; break;
|
||||
default: copies = 4; break;
|
||||
}
|
||||
|
||||
fillPattern.resize(copies * patternLen / 8);
|
||||
unsigned char* rawData = (unsigned char*) fillPattern.data();
|
||||
|
||||
unsigned char val = 0;
|
||||
for (int c = 0; c < copies; c++)
|
||||
{
|
||||
for (int i = 0; i < patternLen; i++)
|
||||
{
|
||||
if ('0' <= pattern[i] && pattern[i] <= '9')
|
||||
val += (pattern[i] - '0');
|
||||
else if ('A' <= pattern[i] && pattern[i] <= 'F')
|
||||
val += (pattern[i] - 'A' + 10);
|
||||
else if ('a' <= pattern[i] && pattern[i] <= 'f')
|
||||
val += (pattern[i] - 'a' + 10);
|
||||
else
|
||||
{
|
||||
printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits (0-9'/a-f/A-F). (not %c)\n", pattern[i]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Bit shift or else add and reset to 0
|
||||
if (i % 2 == 0)
|
||||
val <<= 4;
|
||||
else
|
||||
{
|
||||
rawData[(c * patternLen + i) / 2] = val;
|
||||
val = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else fillPattern.clear();
|
||||
|
||||
// Perform some basic validation
|
||||
if (byteOffset % sizeof(float))
|
||||
{
|
||||
@@ -87,6 +141,7 @@ public:
|
||||
printf(" NUM_ITERATIONS=I - Perform I timed iteration(s) per test\n");
|
||||
printf(" SAMPLING_FACTOR=F - Add F samples (when possible) between powers of 2 when auto-generating data sizes\n");
|
||||
printf(" NUM_CPU_PER_LINK=C - Use C threads per Link for CPU-executed copies\n");
|
||||
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits\n");
|
||||
}
|
||||
|
||||
// Display env var settings
|
||||
@@ -120,6 +175,15 @@ public:
|
||||
printf("%-20s = %12d : Running %d warmup iteration(s) per topology\n", "NUM_WARMUPS", numWarmups, numWarmups);
|
||||
printf("%-20s = %12d : Running %d timed iteration(s) per topology\n", "NUM_ITERATIONS", numIterations, numIterations);
|
||||
printf("%-20s = %12d : Using %d CPU thread(s) per CPU-based-copy Link\n", "NUM_CPU_PER_LINK", numCpuPerLink, numCpuPerLink);
|
||||
printf("%-20s = %12s : ", "FILL_PATTERN", getenv("FILL_PATTERN") ? "(specified)" : "(unspecified)");
|
||||
if (fillPattern.size())
|
||||
{
|
||||
printf("Pattern: %s", getenv("FILL_PATTERN"));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("Pseudo-random: (Element i = i modulo 383 + 31)");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
};
|
||||
|
||||
@@ -176,7 +176,7 @@ int main(int argc, char **argv)
|
||||
for (int i = 0; i < numLinks; i++)
|
||||
{
|
||||
// Initialize source memory with patterned data
|
||||
CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, links[i].srcMem + initOffset);
|
||||
CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].srcMem + initOffset);
|
||||
|
||||
// Each block needs to know src/dst pointers and how many elements to transfer
|
||||
// Figure out the sub-array each block does for this Link
|
||||
@@ -265,7 +265,7 @@ int main(int argc, char **argv)
|
||||
|
||||
// Validate that each link has transferred correctly
|
||||
for (int i = 0; i < numLinks; i++)
|
||||
CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, links[i].dstMem + initOffset);
|
||||
CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].dstMem + initOffset);
|
||||
|
||||
// Report timings
|
||||
totalCpuTime = totalCpuTime / (1.0 * ev.numIterations) * 1000;
|
||||
@@ -828,7 +828,7 @@ void CheckPages(char* array, size_t numBytes, int targetId)
|
||||
}
|
||||
|
||||
// Helper function to either fill a device pointer with pseudo-random data, or to check to see if it matches
|
||||
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr)
|
||||
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vector<float>const& fillPattern, float* ptr)
|
||||
{
|
||||
// Prepare reference resultx
|
||||
float* refBuffer = (float*)malloc(N * sizeof(float));
|
||||
@@ -846,8 +846,18 @@ void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
// Fill with repeated pattern if specified
|
||||
size_t patternLen = fillPattern.size();
|
||||
if (patternLen > 0)
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
refBuffer[i] = fillPattern[i % patternLen];
|
||||
}
|
||||
else // Otherwise fill with pseudo-random values
|
||||
{
|
||||
for (int i = 0; i < N; i++)
|
||||
refBuffer[i] = (i % 383 + 31);
|
||||
}
|
||||
}
|
||||
|
||||
// Either fill the memory with the reference buffer, or compare against it
|
||||
|
||||
@@ -121,7 +121,7 @@ void EnablePeerAccess(int const deviceId, int const peerDeviceId);
|
||||
void AllocateMemory(MemType memType, int devIndex, size_t numBytes, float** memPtr);
|
||||
void DeallocateMemory(MemType memType, int devIndex, float* memPtr);
|
||||
void CheckPages(char* byteArray, size_t numBytes, int targetId);
|
||||
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr);
|
||||
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vector<float> const& fillPattern, float* ptr);
|
||||
void RunLink(EnvVars const& ev, size_t const N, int const iteration, Link& link);
|
||||
|
||||
|
||||
|
||||
Référencer dans un nouveau ticket
Bloquer un utilisateur