[TransferBench] Adding ability to specify source data pattern (#394)

* [TransferBench] Adding ability to specify source data pattern

[ROCm/rccl commit: ff413be933]
Cette révision appartient à :
gilbertlee-amd
2021-06-15 08:41:57 -06:00
révisé par GitHub
Parent dbb867942d
révision 01a8efbb76
3 fichiers modifiés avec 79 ajouts et 5 suppressions
+64
Voir le fichier
@@ -25,6 +25,7 @@ public:
int numIterations; // Number of timed iterations to perform
int samplingFactor; // Affects how many different values of N are generated (when N set to 0)
int numCpuPerLink; // Number of CPU child threads to use per CPU link
std::vector<float> fillPattern; // Pattern of floats used to fill source data
// Constructor that collects values
EnvVars()
@@ -42,6 +43,59 @@ public:
samplingFactor = GetEnvVar("SAMPLING_FACTOR" , DEFAULT_SAMPLING_FACTOR);
numCpuPerLink = GetEnvVar("NUM_CPU_PER_LINK" , DEFAULT_NUM_CPU_PER_LINK);
// Check for fill pattern
char* pattern = getenv("FILL_PATTERN");
if (pattern != NULL)
{
int patternLen = strlen(pattern);
if (patternLen % 2)
{
printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits\n");
exit(1);
}
// Figure out how many copies of the pattern are necessary to fill a 4-byte float properly
int copies;
switch (patternLen % 8)
{
case 0: copies = 1; break;
case 4: copies = 2; break;
default: copies = 4; break;
}
fillPattern.resize(copies * patternLen / 8);
unsigned char* rawData = (unsigned char*) fillPattern.data();
unsigned char val = 0;
for (int c = 0; c < copies; c++)
{
for (int i = 0; i < patternLen; i++)
{
if ('0' <= pattern[i] && pattern[i] <= '9')
val += (pattern[i] - '0');
else if ('A' <= pattern[i] && pattern[i] <= 'F')
val += (pattern[i] - 'A' + 10);
else if ('a' <= pattern[i] && pattern[i] <= 'f')
val += (pattern[i] - 'a' + 10);
else
{
printf("[ERROR] FILL_PATTERN must contain an even-number of hex digits (0-9'/a-f/A-F). (not %c)\n", pattern[i]);
exit(1);
}
// Bit shift or else add and reset to 0
if (i % 2 == 0)
val <<= 4;
else
{
rawData[(c * patternLen + i) / 2] = val;
val = 0;
}
}
}
}
else fillPattern.clear();
// Perform some basic validation
if (byteOffset % sizeof(float))
{
@@ -87,6 +141,7 @@ public:
printf(" NUM_ITERATIONS=I - Perform I timed iteration(s) per test\n");
printf(" SAMPLING_FACTOR=F - Add F samples (when possible) between powers of 2 when auto-generating data sizes\n");
printf(" NUM_CPU_PER_LINK=C - Use C threads per Link for CPU-executed copies\n");
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits\n");
}
// Display env var settings
@@ -120,6 +175,15 @@ public:
printf("%-20s = %12d : Running %d warmup iteration(s) per topology\n", "NUM_WARMUPS", numWarmups, numWarmups);
printf("%-20s = %12d : Running %d timed iteration(s) per topology\n", "NUM_ITERATIONS", numIterations, numIterations);
printf("%-20s = %12d : Using %d CPU thread(s) per CPU-based-copy Link\n", "NUM_CPU_PER_LINK", numCpuPerLink, numCpuPerLink);
printf("%-20s = %12s : ", "FILL_PATTERN", getenv("FILL_PATTERN") ? "(specified)" : "(unspecified)");
if (fillPattern.size())
{
printf("Pattern: %s", getenv("FILL_PATTERN"));
}
else
{
printf("Pseudo-random: (Element i = i modulo 383 + 31)");
}
printf("\n");
}
};
+14 -4
Voir le fichier
@@ -176,7 +176,7 @@ int main(int argc, char **argv)
for (int i = 0; i < numLinks; i++)
{
// Initialize source memory with patterned data
CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, links[i].srcMem + initOffset);
CheckOrFill(MODE_FILL, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].srcMem + initOffset);
// Each block needs to know src/dst pointers and how many elements to transfer
// Figure out the sub-array each block does for this Link
@@ -265,7 +265,7 @@ int main(int argc, char **argv)
// Validate that each link has transferred correctly
for (int i = 0; i < numLinks; i++)
CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, links[i].dstMem + initOffset);
CheckOrFill(MODE_CHECK, N, ev.useMemset, ev.useHipCall, ev.fillPattern, links[i].dstMem + initOffset);
// Report timings
totalCpuTime = totalCpuTime / (1.0 * ev.numIterations) * 1000;
@@ -828,7 +828,7 @@ void CheckPages(char* array, size_t numBytes, int targetId)
}
// Helper function to either fill a device pointer with pseudo-random data, or to check to see if it matches
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr)
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vector<float>const& fillPattern, float* ptr)
{
// Prepare reference resultx
float* refBuffer = (float*)malloc(N * sizeof(float));
@@ -846,8 +846,18 @@ void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr
}
else
{
for (int i = 0; i < N; i++)
// Fill with repeated pattern if specified
size_t patternLen = fillPattern.size();
if (patternLen > 0)
{
for (int i = 0; i < N; i++)
refBuffer[i] = fillPattern[i % patternLen];
}
else // Otherwise fill with pseudo-random values
{
for (int i = 0; i < N; i++)
refBuffer[i] = (i % 383 + 31);
}
}
// Either fill the memory with the reference buffer, or compare against it
+1 -1
Voir le fichier
@@ -121,7 +121,7 @@ void EnablePeerAccess(int const deviceId, int const peerDeviceId);
void AllocateMemory(MemType memType, int devIndex, size_t numBytes, float** memPtr);
void DeallocateMemory(MemType memType, int devIndex, float* memPtr);
void CheckPages(char* byteArray, size_t numBytes, int targetId);
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, float* ptr);
void CheckOrFill(ModeType mode, int N, bool isMemset, bool isHipCall, std::vector<float> const& fillPattern, float* ptr);
void RunLink(EnvVars const& ev, size_t const N, int const iteration, Link& link);