Files
rocm-systems/samples/1_Utils/hipCommander/hipCommander.cpp
T
Rahul Garg ba8fe1675f Fixed hipcommander default execution for HCSWAP-106
Change-Id: I9fbd10dfaeeb4928b2ec23ceed131b5200a658f9
2017-01-19 15:04:32 +05:30

1097 lines
29 KiB
C++

#include <stdio.h>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <algorithm>
#include <string>
#include <typeinfo>
#include <hip/hip_runtime.h>
#ifdef __HIP_PLATFORM_HCC__
#include <elf.h>
#include <hsa/hsa.h>
#include <hc.hpp>
#include <hip/hcc_detail/hcc_acc.h>
#endif
#include <sys/time.h>
#include "ResultDatabase.h"
#include "nullkernel.hip.cpp"
bool g_printedTiming = false;
// Cmdline parms:
int p_device = 0;
const char* p_command = "setstream(1); H2D; NullKernel; D2H;";
const char* p_file = nullptr;
unsigned p_verbose = 0x0;
unsigned p_db = 0x0;
unsigned p_blockingSync = 0x0;
//---
int p_iterations = 1;
#define KNRM "\x1B[0m"
#define KRED "\x1B[31m"
#define KGRN "\x1B[32m"
#define failed(...) \
printf ("error: ");\
printf (__VA_ARGS__);\
printf ("\n");\
abort();
#define HIPCHECK(error) \
{\
hipError_t localError = error; \
if (localError != hipSuccess) { \
printf("%serror: '%s'(%d) from %s at %s:%d%s\n", \
KRED,hipGetErrorString(localError), localError,\
#error,\
__FILE__, __LINE__,KNRM); \
failed("API returned error code.");\
}\
}
#define HIPASSERT(condition, msg) \
if (! (condition) ) { \
failed("%sassertion %s at %s:%d: %s%s\n", \
KRED, #condition,\
__FILE__, __LINE__,msg, KNRM); \
}
int parseInt(const char *str, int *output)
{
char *next;
*output = strtol(str, &next, 0);
return !strlen(next);
}
void printConfig() {
hipDeviceProp_t props;
HIPCHECK(hipGetDeviceProperties(&props, p_device));
printf ("Device:%s Mem=%.1fGB #CUs=%d Freq=%.0fMhz\n", props.name, props.totalGlobalMem/1024.0/1024.0/1024.0, props.multiProcessorCount, props.clockRate/1000.0);
}
void help() {
printf ("Usage: hipBusBandwidth [OPTIONS]\n");
printf (" --file, -f : Read string of commands from file\n");
printf (" --command, -c : String specifying commands to run.\n");
printf (" --iterations, -i : Number of copy iterations to run.\n");
printf (" --device, -d : Device ID to use (0..numDevices).\n");
printf (" --verbose, -v : Verbose printing of status. Fore more info, combine with HIP_TRACE_API on ROCm\n");
};
int parseStandardArguments(int argc, char *argv[])
{
for (int i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, " ")) {
// skip NULL args.
} else if (!strcmp(arg, "--iterations") || (!strcmp(arg, "-i"))) {
if (++i >= argc || !parseInt(argv[i], &p_iterations)) {
failed("Bad --iterations argument");
}
} else if (!strcmp(arg, "--device") || (!strcmp(arg, "-d"))) {
if (++i >= argc || !parseInt(argv[i], &p_device)) {
failed("Bad --device argument");
}
} else if (!strcmp(arg, "--file") || (!strcmp(arg, "-f"))) {
if (++i >= argc) {
failed("Bad --file argument");
} else {
p_file = argv[i];
}
} else if (!strcmp(arg, "--commands") || (!strcmp(arg, "-c"))) {
if (++i >= argc) {
failed("Bad --commands argument");
} else {
p_command = argv[i];
}
} else if (!strcmp(arg, "--verbose") || (!strcmp(arg, "-v"))) {
p_verbose = 1;
} else if (!strcmp(arg, "--blockingSync") || (!strcmp(arg, "-B"))) {
p_blockingSync = 1;
} else if (!strcmp(arg, "--help") || (!strcmp(arg, "-h"))) {
help();
exit(EXIT_SUCCESS);
} else {
failed("Bad argument '%s'", arg);
}
}
return 0;
};
// Returns the current system time in microseconds
inline long long get_time()
{
struct timeval tv;
gettimeofday(&tv, 0);
return (tv.tv_sec * 1000000) + tv.tv_usec;
}
class Command;
//=================================================================================================
// A stream of commands , specified as a string.
class CommandStream {
public:
// State that is inherited by sub-blocks:
struct CommandStreamState {
hipStream_t _currentStream;
std::vector<hipStream_t> _streams;
vector<CommandStream *> _subBlocks;
};
public:
CommandStream(std::string commandStreamString, int iterations);
~CommandStream();
hipStream_t currentStream() const { return _state._currentStream; };
void print(const std::string &indent="") const;
void printBrief(std::ostream &s=std::cout) const ;
void run();
void recordTime();
void printTiming(int iterations=0);
CommandStream *currentCommandStream() {
return _parseInSubBlock ? _state._subBlocks.back() : this;
};
void enterSubBlock(CommandStream *commandStream) {
_parseInSubBlock = true;
_state._subBlocks.push_back(commandStream);
};
void exitSubBlock() {
_parseInSubBlock = false;
};
void setParent(CommandStream *parentCmdStream)
{
_parentCommandStream = parentCmdStream;
_state = parentCmdStream->_state;
};
CommandStream * getParent() { return _parentCommandStream; };
void setStream(int streamIndex);
CommandStreamState &getState() { return _state; };
private:
static void tokenize(const std::string &s, char delim, std::vector<std::string> &tokens);
void parse(const std::string fullCmd);
protected:
CommandStreamState _state;
private:
// List of commands to run in this stream:
std::vector<Command*> _commands;
// Number of iterations to run the command loop
int _iterations;
// Us to run the the command-stream. Only valid after run is called.
long long _startTime;
double _elapsedUs;
// Track nested loop of command streams:
CommandStream *_parentCommandStream;
// Track if we are parsing commands in the subblock.
bool _parseInSubBlock;
};
//=================================================================================================
class Command {
public:
// @p minArgs : Minimum arguments for command. -1 = don't check.
// @p maxArgs : Minimum arguments for command. 0 means min=max, ie exact #arguments expected. -1 = don't check max.
Command(CommandStream *cmdStream, const std::vector<std::string> &args, int minArgs=0, int maxArgs=0)
: _commandStream(cmdStream),
_args(args)
{
int numArgs = args.size() - 1;
if ((minArgs != -1 ) && (numArgs < minArgs)) {
// TODO - print full command here.
failed ("Not enough arguments for command %s. (Expected %d, got %d)", args[0].c_str(), minArgs, numArgs);
}
// Check for an exact number of arguments:
if (maxArgs == 0) {
maxArgs = minArgs;
}
if ((maxArgs != -1 ) && (numArgs > maxArgs)) {
failed ("Too many arguments for command %s. (Expected %d, got %d)", args[0].c_str(), maxArgs, numArgs);
}
};
void printBrief(std::ostream &s=std::cout) const
{
s << _args[0];
}
virtual ~Command() {};
virtual void print(const std::string &indent = "") const {
std::cout << indent << "[";
std::for_each(_args.begin(), _args.end(), [] (const std::string &s) {
std::cout << s;
});
std::cout << "]";
};
virtual void run() = 0;
protected:
int readIntArg(int argIndex, const std::string &argName)
{
// TODO - catch references to non-existant arguments here.
int argVal;
try {
argVal = std::stoi(_args[argIndex]);
} catch (std::invalid_argument) {
failed ("Command %s has bad %s argument ('%s')", _args[0].c_str(), argName.c_str(), _args[argIndex].c_str());
}
return argVal;
}
protected:
CommandStream *_commandStream;
std::vector<std::string> _args;
};
#define FILENAME "nullkernel.hsaco"
#define KERNEL_NAME "NullKernel"
#ifdef __HIP_PLATFORM_HCC__
//=================================================================================================
// Use Aql to launch the NULL kernel.
class AqlKernelCommand : public Command
{
public:
AqlKernelCommand(CommandStream *cmdStream, const std::vector<std::string> args) :
Command(cmdStream, args)
{
hc::accelerator_view *av;
HIPCHECK(hipHccGetAcceleratorView(cmdStream->currentStream(), &av));
hc::accelerator acc = av->get_accelerator();
hsa_region_t systemRegion = *(hsa_region_t*)acc.get_hsa_am_system_region();
_hsaAgent = *(hsa_agent_t*) acc.get_hsa_agent();
std::ifstream file(FILENAME, std::ios::binary | std::ios::ate);
std::streamsize fsize = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> buffer(fsize);
if (file.read(buffer.data(), fsize))
{
uint64_t elfSize = ElfSize(&buffer[0]);
assert(fsize == elfSize);
//TODO - replace module load code with explicit module load and unload.
hipModule_t module;
HIPCHECK(hipModuleLoadData(&module, &buffer[0]));
HIPCHECK(hipModuleGetFunction(&_function, module, KERNEL_NAME));
} else {
failed("could not open code object '%s'\n", FILENAME);
}
};
~AqlKernelCommand() {};
void run() override {
#define LEN 64
uint32_t len = LEN;
uint32_t one = 1;
float *Ad = NULL;
size_t argSize = 36;
char argBuffer[argSize];
*(uint32_t*) (&argBuffer[0]) = len;
*(uint32_t*) (&argBuffer[4]) = one;
*(uint32_t*) (&argBuffer[8]) = one;
*(uint32_t*) (&argBuffer[12]) = len;
*(uint32_t*) (&argBuffer[16]) = one;
*(uint32_t*) (&argBuffer[20]) = one;
*(float**) (&argBuffer[24]) = Ad; // Ad pointer argument
void *config[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &argSize,
HIP_LAUNCH_PARAM_END
};
hipModuleLaunchKernel(_function, len, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
};
public:
hsa_queue_t _hsaQueue;
hsa_agent_t _hsaAgent;
hipFunction_t _function;
private:
static uint64_t ElfSize(const void *emi){
const Elf64_Ehdr *ehdr = (const Elf64_Ehdr*)emi;
const Elf64_Shdr *shdr = (const Elf64_Shdr*)((char*)emi + ehdr->e_shoff);
uint64_t max_offset = ehdr->e_shoff;
uint64_t total_size = max_offset + ehdr->e_shentsize * ehdr->e_shnum;
for(uint16_t i=0;i < ehdr->e_shnum;++i){
uint64_t cur_offset = static_cast<uint64_t>(shdr[i].sh_offset);
if(max_offset < cur_offset){
max_offset = cur_offset;
total_size = max_offset;
if(SHT_NOBITS != shdr[i].sh_type){
total_size += static_cast<uint64_t>(shdr[i].sh_size);
}
}
}
return total_size;
}
};
#endif
//=================================================================================================
// HCC optimizes away fully NULL kernel calls, so run one that is nearly null:
class ModuleKernelCommand : public Command
{
public:
ModuleKernelCommand(CommandStream *cmdStream, const std::vector<std::string> args) :
Command(cmdStream, args),
_stream (cmdStream->currentStream()) {
hipModule_t module;
HIPCHECK(hipModuleLoad(&module, FILENAME));
HIPCHECK(hipModuleGetFunction(&_function, module, KERNEL_NAME));
};
~ModuleKernelCommand() {};
void run() override {
#define LEN 64
uint32_t len = LEN;
uint32_t one = 1;
float *Ad = NULL;
size_t argSize = 36;
char argBuffer[argSize];
*(uint32_t*) (&argBuffer[0]) = len;
*(uint32_t*) (&argBuffer[4]) = one;
*(uint32_t*) (&argBuffer[8]) = one;
*(uint32_t*) (&argBuffer[12]) = len;
*(uint32_t*) (&argBuffer[16]) = one;
*(uint32_t*) (&argBuffer[20]) = one;
*(float**) (&argBuffer[24]) = Ad; // Ad pointer argument
void *config[] = {
HIP_LAUNCH_PARAM_BUFFER_POINTER, &argBuffer[0],
HIP_LAUNCH_PARAM_BUFFER_SIZE, &argSize,
HIP_LAUNCH_PARAM_END
};
hipModuleLaunchKernel(_function, len, 1, 1, LEN, 1, 1, 0, 0, NULL, (void**)&config);
};
public:
hipFunction_t _function;
hipStream_t _stream;
};
class KernelCommand : public Command
{
public:
enum Type {Null, VectorAdd};
KernelCommand(CommandStream *cmdStream, const std::vector<std::string> args, Type kind) :
Command(cmdStream, args),
_kind(kind),
_stream (cmdStream->currentStream()) {
};
~KernelCommand() {};
void run() override {
static const int gridX = 64;
static const int groupX = 64;
switch (_kind) {
case Null:
hipLaunchKernel(NullKernel, dim3(gridX/groupX), dim3(gridX), 0, _stream, nullptr);
break;
case VectorAdd:
assert(0); // TODO
break;
};
}
private:
Type _kind;
hipStream_t _stream;
};
#ifdef __HIP_PLATFORM_HCC__
//=================================================================================================
class PfeCommand : public Command
{
public:
PfeCommand(CommandStream *cmdStream, const std::vector<std::string> args, hipStream_t stream = 0) :
Command(cmdStream, args)
{
HIPCHECK(hipHccGetAcceleratorView(stream, &_av));
}
~PfeCommand() { }
void run() override {
static const int gridX = 64;
static const int groupX = 64;
auto cf = hc::parallel_for_each(*_av, hc::extent<1>(gridX).tile(groupX),
[=](hc::index<1>& idx) __HC__ {
});
}
private:
hc::accelerator_view *_av;
};
#endif
//=================================================================================================
class CopyCommand : public Command
{
enum MemType {PinnedHost, UnpinnedHost, Device} ;
public:
CopyCommand(CommandStream *cmdStream, const std::vector<std::string> &args, hipMemcpyKind kind, bool isAsync, bool isPinnedHost) ;
~CopyCommand()
{
if (_dst) {
dealloc(_dst, _dstType);
_dst = NULL;
};
if (_src) {
dealloc(_src, _srcType);
_src = NULL;
}
}
void run() override {
if (_isAsync) {
HIPCHECK(hipMemcpyAsync(_dst, _src, _sizeBytes, _kind, _stream));
} else {
HIPCHECK(hipMemcpy(_dst, _src, _sizeBytes, _kind));
}
};
private:
void * alloc(size_t size, MemType memType) {
void * p;
if (memType == Device) {
HIPCHECK(hipMalloc(&p, size));
} else if (memType == PinnedHost) {
HIPCHECK(hipHostMalloc(&p, size));
} else if (memType == UnpinnedHost) {
p = (char*)malloc(size);
HIPASSERT(p, "malloc failed");
} else {
HIPASSERT(0, "unsupported memType");
}
return p;
};
void dealloc(void *p, MemType memType) {
if (memType == Device) {
HIPCHECK(hipFree(p));
} else if (memType == PinnedHost) {
HIPCHECK(hipHostFree(p));
} else if (memType == UnpinnedHost) {
free(p);
} else {
HIPASSERT(0, "unsupported memType");
}
}
private:
bool _isAsync;
hipStream_t _stream;
hipMemcpyKind _kind;
size_t _sizeBytes;
void *_dst;
MemType _dstType;
void *_src;
MemType _srcType;
};
//=================================================================================================
class DeviceSyncCommand : public Command
{
public:
DeviceSyncCommand(CommandStream *cmdStream, const std::vector<std::string> &args) :
Command(cmdStream, args) {};
void run() override {
HIPCHECK(hipDeviceSynchronize());
};
};
//=================================================================================================
class StreamSyncCommand : public Command
{
public:
StreamSyncCommand(CommandStream *cmdStream, const std::vector<std::string> &args) :
Command(cmdStream, args),
_stream(cmdStream->currentStream())
{};
const char *help() {
return "synchronizes the current stream";
};
void run() override {
HIPCHECK(hipStreamSynchronize(_stream));
};
private:
hipStream_t _stream;
};
//=================================================================================================
//=================================================================================================
class LoopCommand : public Command
{
public:
LoopCommand(CommandStream *parentCmdStream, const std::vector<std::string> &args) :
Command(parentCmdStream, args, 1)
{
int loopCnt;
try {
loopCnt = std::stoi(args[1]);
} catch (std::invalid_argument) {
failed ("bad LOOP_CNT=%s", args[1].c_str());
}
_commandStream = new CommandStream("", loopCnt);
_commandStream->setParent(parentCmdStream);
parentCmdStream->enterSubBlock(_commandStream);
};
void print(const std::string &indent = "") const override {
Command::print();
_commandStream->print (indent + " ");
};
void run() override {
_commandStream->run();
};
};
//=================================================================================================
class EndBlockCommand : public Command
{
public:
EndBlockCommand(CommandStream *blockCmdStream, CommandStream *parentCmdStream, const std::vector<std::string> &args) :
Command(parentCmdStream, args, 0, 1),
_blockCmdStream(blockCmdStream),
_printTiming(0)
{
int argCnt = args.size()-1;
if (argCnt >= 1 ) {
_printTiming = readIntArg(1, "PRINT_TIMING");
}
if (parentCmdStream == nullptr) {
failed ("%s without corresponding command to start block", args[0].c_str());
}
parentCmdStream->exitSubBlock();
};
void run() override {
if (_printTiming) {
_blockCmdStream->printTiming();
}
};
private:
CommandStream *_blockCmdStream;
// print the stream when loop exits.
int _printTiming;
};
//=================================================================================================
class SetStreamCommand : public Command
{
public:
SetStreamCommand(CommandStream *cmdStream, const std::vector<std::string> &args) :
Command(cmdStream, args, 1)
{
int streamIndex = readIntArg(1, "STREAM_INDEX");
cmdStream->setStream(streamIndex);
};
void run() override {
};
};
//=================================================================================================
class PrintTimingCommand : public Command
{
public:
PrintTimingCommand(CommandStream *cmdStream, const std::vector<std::string> &args)
: Command(cmdStream, args, 1)
{
_iterations = readIntArg(1, "ITERATIONS");
};
void run() override {
_commandStream->printTiming(_iterations);
};
private:
int _iterations;
};
//=================================================================================================
CopyCommand::CopyCommand(CommandStream *cmdStream, const std::vector<std::string> &args,
hipMemcpyKind kind, bool isAsync, bool isPinnedHost) :
Command(cmdStream, args) ,
_isAsync(isAsync),
_kind(kind),
_stream(cmdStream->currentStream())
{
switch (kind) {
case hipMemcpyDeviceToHost:
_srcType = Device;
_dstType = isPinnedHost ? PinnedHost : UnpinnedHost;
break;
case hipMemcpyHostToDevice:
_srcType = isPinnedHost ? PinnedHost : UnpinnedHost;
_dstType = Device;
break;
default:
HIPASSERT(0, "Unknown hipMemcpyKind");
};
_sizeBytes = 64; //TODO, support reading from arg.
_dst = alloc(_sizeBytes, _dstType);
_src = alloc(_sizeBytes, _srcType);
};
//=================================================================================================
//=================================================================================================
// Implementations:
//=================================================================================================
//=================================================================================================
CommandStream::CommandStream(std::string commandStreamString, int iterations)
: _iterations(iterations),
_startTime(0),
_elapsedUs(0.0),
_parentCommandStream(nullptr),
_parseInSubBlock(false)
{
std::vector<std::string> tokens;
tokenize(commandStreamString, ';', tokens);
std::for_each(tokens.begin(), tokens.end(), [&] (const std::string s) {
this->parse(s);
});
setStream(0);
}
CommandStream::~CommandStream()
{
std::for_each(_state._streams.begin(), _state._streams.end(), [&] (hipStream_t s) {
if (s) {
HIPCHECK(hipStreamDestroy(s));
}
});
std::for_each(_commands.begin(), _commands.end(), [&] (Command *c) {
delete c;
});
}
void CommandStream::setStream(int streamIndex)
{
if (streamIndex >= _state._streams.size()) {
_state._streams.resize(streamIndex+1);
}
if (streamIndex && (_state._streams[streamIndex] == nullptr)) {
// Create new stream:
hipStream_t stream;
HIPCHECK(hipStreamCreate(&stream));
_state._streams[streamIndex] = stream;
_state._currentStream = stream;
} else {
// Use existing stream:
_state._currentStream = _state._streams[streamIndex];
}
}
void CommandStream::tokenize(const std::string &s, char delim, std::vector<std::string> &tokens)
{
std::stringstream ss;
ss.str(s);
std::string item;
while (getline(ss, item, delim)) {
item.erase (std::remove (item.begin(), item.end(), ' '), item.end()); // remove whitespace.
tokens.push_back(item);
}
}
void trim(std::string *s)
{
// trim whitespace from begin and end:
const char *t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
s->erase(s->find_last_not_of(t)+1);
}
void ltrim(std::string *s)
{
// trim whitespace from begin and end:
const char *t = "\t\n\r\f\v";
s->erase(0, s->find_first_not_of(t));
}
void CommandStream::parse(std::string fullCmd)
{
//convert to lower-case:
std::transform(fullCmd.begin(), fullCmd.end(), fullCmd.begin(), ::tolower);
trim(&fullCmd);
if (p_db) {
printf ("parse: <%s>\n", fullCmd.c_str());
}
std::string c;
std::vector<std::string> args;
size_t leftParenZ = fullCmd.find_first_of('(');
if (leftParenZ == string::npos) {
c = fullCmd;
args.push_back(c);
} else {
c = fullCmd.substr(0, leftParenZ);
args.push_back(c);
size_t rightParenZ = fullCmd.find_first_of(')', leftParenZ);
std::string argStr = fullCmd.substr(leftParenZ+1, rightParenZ-leftParenZ-1);
//printf ("c=%s argstr='%s' leftParenZ=%zu rightParenZ=%zu\n", c.c_str(), argStr.c_str(), leftParenZ, rightParenZ);
tokenize(argStr, ',', args);
}
if ((args.size()==0) || (fullCmd.c_str()[0] == '#') ) {
if (p_db) {
printf (" skip comment\n");
}
return;
}
Command *cmd = NULL;
CommandStream *cmdStream = currentCommandStream();
if (c == "h2d") {
cmd = new CopyCommand(cmdStream, args, hipMemcpyHostToDevice, true/*isAsync*/, true/*isPinned*/);
//= h2d
//= Performs an async host-to-device copy of array A_h to A_d.
//= The size of these arrays may be set with the datasize command.
} else if (c == "d2h") {
cmd = new CopyCommand(cmdStream, args, hipMemcpyDeviceToHost, true/*isAsync*/, true/*isPinned*/);
//= d2h
//= Performs an async device-to-host copy of array A_d to A_h.
//= The size of these arrays may be set with the datasize command.
} else if (c == "modulekernel") {
cmd = new ModuleKernelCommand(cmdStream, args);
} else if (c == "nullkernel") {
cmd = new KernelCommand(cmdStream, args, KernelCommand::Null);
//= nullkernel
//= Dispatches a null kernel to the device.
} else if (c == "vectoraddkernel") {
cmd = new KernelCommand(cmdStream, args, KernelCommand::VectorAdd);
#ifdef __HIP_PLATFORM_HCC__
} else if (c == "nullpfe") {
cmd = new PfeCommand(cmdStream, args);
} else if (c == "aqlkernel") {
cmd = new AqlKernelCommand(cmdStream, args);
#endif
} else if (c == "devicesync") {
cmd = new DeviceSyncCommand(cmdStream, args);
} else if (c == "streamsync") {
//= streamsync
//= Execute hipStreamSynchronize.
//= This will cause the host thread to wait until the current stream
//= completes all pending operations.
cmd = new StreamSyncCommand(cmdStream, args);
} else if (c == "setstream") {
//= setstream(STREAM_INDEX);
//= Set current stream used by subsequent commands.
//= STREAM_INDEX is index starting from 0...N.
//= This function will create new stream on first call to setstream or re-use previous
//= stream if setstream has already been called with STREAM_INDEX.
//= STREAM_INDEX=0 will use the default "null" stream associated with the device, and will not create a new stream.
//= The default stream has special, conservative synchronization properties.
cmd = new SetStreamCommand(cmdStream, args);
} else if (c == "printtiming") {
cmd = new PrintTimingCommand(cmdStream, args);
} else if (c == "loop") {
//= loop(LOOP_CNT)
//= Loop over next set of commands (until 'endloop' command) for LOOP_CNT iterations.
//= Loops can be nested.
cmd = new LoopCommand(cmdStream, args);
} else if (c == "endloop") {
//= endloop
//= End a looped sequence. Must be paired with a preceding loop command.
//= Command between the `loop` and `endloop` must be executed
CommandStream * parentCmdStream = cmdStream->getParent() ;
cmd = new EndBlockCommand(cmdStream, parentCmdStream, args);
cmdStream = parentCmdStream;
} else {
std::cerr << "error: Bad command '" << fullCmd << "\n";
HIPASSERT(0, "bad command in command-stream");
}
if (cmd) {
cmdStream->_commands.push_back(cmd);
}
}
void CommandStream::print(const std::string &indent) const
{
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
(*cmdI)->print(indent);
};
}
void CommandStream::printBrief(std::ostream &s) const
{
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
(*cmdI)->printBrief(s);
s << ";";
};
}
void CommandStream::run()
{
_startTime = get_time();
for (int i=0; i<_iterations; i++) {
for (auto cmdI = _commands.begin(); cmdI != _commands.end(); cmdI++) {
if (p_verbose) {
(*cmdI)->print();
}
(*cmdI)->run();
}
}
// Record time, if not already stored. (an earlier printTime command will also store the time)
recordTime();
};
void CommandStream::recordTime()
{
if (_elapsedUs == 0.0) {
auto stopTime = get_time();
_elapsedUs = stopTime - _startTime;
}
}
void CommandStream::printTiming(int iterations)
{
if ((_state._subBlocks.size() == 1) && (_commands.size()==1)) {
//printf ("print just the loop\n");
_state._subBlocks.front()->printTiming(iterations);
} else {
g_printedTiming = true;
recordTime();
if (iterations == 0) {
iterations = _iterations;
}
std::cout << "command<"; printBrief(std::cout);
std::cout << ">," ;
printf (" iterations,%d, total_time,%6.3f, time/iteration,%6.3f\n", iterations, _elapsedUs, _elapsedUs/iterations);
}
};
//=================================================================================================
int main(int argc, char *argv[])
{
parseStandardArguments(argc, argv);
printConfig();
CommandStream *cs;
if (p_blockingSync) {
#ifdef __HIP_PLATFORM_HCC__
printf ("setting BlockingSync for AMD\n");
setenv("HIP_BLOCKING_SYNC", "1", 1);
#endif
#ifdef __HIP_PLATFORM_NVCC__
printf ("setting cudaDeviceBlockingSync\n");
HIPCHECK(hipSetDeviceFlags(cudaDeviceBlockingSync));
#endif
};
if (p_file) {
// TODO - catch exception on file IO here:
std::ifstream file(p_file);
std::string str;
std::string file_contents;
while (std::getline(file, str))
{
file_contents += str;
}
cs = new CommandStream(file_contents, p_iterations);
} else {
cs = new CommandStream(p_command, p_iterations);
}
cs->print();
printf ("------\n");
cs->run();
if (!g_printedTiming) {
cs->printTiming();
}
delete cs;
}
// TODO - add error checking for arguments.