Files
rocm-systems/projects/clr/hipamd/src/hip_graph_internal.cpp
T
agunashe fae697b4d6 SWDEV-293742 - Update copyrights end year for hipamd
Change-Id: I08f620f84563a9214b59f1b943ed091b67229eab


[ROCm/clr commit: d9d9e81acb]
2021-07-09 12:08:39 -04:00

377 lines
13 KiB
C++

/* Copyright (c) 2021 - 2021 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "hip_graph_internal.hpp"
#include <queue>
#define CASE_STRING(X, C) \
case X: \
case_string = #C; \
break;
const char* GetGraphNodeTypeString(uint32_t op) {
const char* case_string;
switch (static_cast<hipGraphNodeType>(op)) {
CASE_STRING(hipGraphNodeTypeKernel, KernelNode)
CASE_STRING(hipGraphNodeTypeMemcpy, Memcpy3DNode)
CASE_STRING(hipGraphNodeTypeMemset, MemsetNode)
CASE_STRING(hipGraphNodeTypeHost, HostNode)
CASE_STRING(hipGraphNodeTypeGraph, GraphNode)
CASE_STRING(hipGraphNodeTypeEmpty, EmptyNode)
CASE_STRING(hipGraphNodeTypeWaitEvent, WaitEventNode)
CASE_STRING(hipGraphNodeTypeEventRecord, EventRecordNode)
CASE_STRING(hipGraphNodeTypeMemcpy1D, Memcpy1DNode)
CASE_STRING(hipGraphNodeTypeMemcpyFromSymbol, MemcpyFromSymbolNode)
CASE_STRING(hipGraphNodeTypeMemcpyToSymbol, MemcpyToSymbolNode)
default:
case_string = "Unknown node type";
};
return case_string;
};
hipError_t hipGraph::AddNode(const Node& node) {
vertices_.emplace_back(node);
nodeOutDegree_[node] = 0;
nodeInDegree_[node] = 0;
node->SetLevel(0);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Add %s(%p)\n",
GetGraphNodeTypeString(node->GetType()), node);
return hipSuccess;
}
hipError_t hipGraph::AddEdge(const Node& parentNode, const Node& childNode) {
// if vertice doesn't exist, add it to the graph
if (std::find(vertices_.begin(), vertices_.end(), parentNode) == vertices_.end()) {
AddNode(parentNode);
}
if (std::find(vertices_.begin(), vertices_.end(), childNode) == vertices_.end()) {
AddNode(childNode);
}
// Check if edge already exists
auto connectedEdges = edges_.find(parentNode);
if (connectedEdges != edges_.end()) {
if (std::find(connectedEdges->second.begin(), connectedEdges->second.end(), childNode) !=
connectedEdges->second.end()) {
return hipSuccess;
}
connectedEdges->second.emplace_back(childNode);
} else {
edges_[parentNode] = {childNode};
}
nodeOutDegree_[parentNode]++;
nodeInDegree_[childNode]++;
childNode->SetLevel(std::max(childNode->GetLevel(), parentNode->GetLevel() + 1));
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Add edge btwn %s(%p) - %s(%p)\n",
GetGraphNodeTypeString(parentNode->GetType()), parentNode,
GetGraphNodeTypeString(childNode->GetType()), childNode);
return hipSuccess;
}
// root nodes are all vertices with 0 in-degrees
std::vector<Node> hipGraph::GetRootNodes() const {
std::vector<Node> roots;
for (auto entry : vertices_) {
if (nodeInDegree_.at(entry) == 0) {
roots.push_back(entry);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] root node: %s(%p)\n",
GetGraphNodeTypeString(entry->GetType()), entry);
}
}
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "\n");
return roots;
}
// leaf nodes are all vertices with 0 out-degrees
std::vector<Node> hipGraph::GetLeafNodes() const {
std::vector<Node> leafNodes;
for (auto entry : vertices_) {
if (nodeOutDegree_.at(entry) == 0) {
leafNodes.push_back(entry);
}
}
return leafNodes;
}
size_t hipGraph::GetLeafNodeCount() const {
int numLeafNodes = 0;
for (auto entry : vertices_) {
if (nodeOutDegree_.at(entry) == 0) {
numLeafNodes++;
}
}
return numLeafNodes;
}
std::vector<std::pair<Node, Node>> hipGraph::GetEdges() const {
std::vector<std::pair<Node, Node>> edges;
for (const auto& i : edges_) {
for (const auto& j : i.second) {
edges.push_back(std::make_pair(i.first, j));
}
}
return edges;
}
void hipGraph::GetRunListUtil(Node v, std::unordered_map<Node, bool>& visited,
std::vector<Node>& singleList,
std::vector<std::vector<Node>>& parallelLists,
std::unordered_map<Node, std::vector<Node>>& dependencies) {
// Mark the current node as visited.
visited[v] = true;
singleList.push_back(v);
// Recurse for all the vertices adjacent to this vertex
for (auto& adjNode : edges_[v]) {
if (!visited[adjNode]) {
// For the parallel list nodes add parent as the dependency
if (singleList.empty()) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE,
"[hipGraph] For %s(%p)- add parent as dependency %s(%p)\n",
GetGraphNodeTypeString(adjNode->GetType()), adjNode,
GetGraphNodeTypeString(v->GetType()), v);
dependencies[adjNode].push_back(v);
}
GetRunListUtil(adjNode, visited, singleList, parallelLists, dependencies);
} else {
for (auto& list : parallelLists) {
// Merge singleList when adjNode matches with the first element of the list in existing
// lists
if (adjNode == list[0]) {
for (auto k = singleList.rbegin(); k != singleList.rend(); ++k) {
list.insert(list.begin(), *k);
}
singleList.erase(singleList.begin(), singleList.end());
}
}
// If the list cannot be merged with the existing list add as dependancy
if (!singleList.empty()) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] For %s(%p)- add dependency %s(%p)\n",
GetGraphNodeTypeString(adjNode->GetType()), adjNode,
GetGraphNodeTypeString(v->GetType()), v);
dependencies[adjNode].push_back(v);
}
}
}
if (!singleList.empty()) {
parallelLists.push_back(singleList);
singleList.erase(singleList.begin(), singleList.end());
}
}
// The function to do Topological Sort.
// It uses recursive GetRunListUtil()
void hipGraph::GetRunList(std::vector<std::vector<Node>>& parallelLists,
std::unordered_map<Node, std::vector<Node>>& dependencies) {
std::vector<Node> singleList;
// Mark all the vertices as not visited
std::unordered_map<Node, bool> visited;
for (auto node : vertices_) visited[node] = false;
// Call the recursive helper function for all vertices one by one
for (auto node : vertices_) {
if (visited[node] == false) {
GetRunListUtil(node, visited, singleList, parallelLists, dependencies);
}
}
for (size_t i = 0; i < parallelLists.size(); i++) {
for (size_t j = 0; j < parallelLists[i].size(); j++) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] list %d - %s(%p)\n", i + 1,
GetGraphNodeTypeString(parallelLists[i][j]->GetType()), parallelLists[i][j]);
}
}
}
hipError_t hipGraph::LevelOrder(std::vector<Node>& levelOrder) {
std::vector<Node> roots = GetRootNodes();
std::unordered_map<Node, bool> visited;
std::queue<Node> q;
for (auto it = roots.begin(); it != roots.end(); it++) {
q.push(*it);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] %s(%p) level:%d \n",
GetGraphNodeTypeString((*it)->GetType()), *it, (*it)->GetLevel());
}
while (!q.empty()) {
Node& node = q.front();
q.pop();
levelOrder.push_back(node);
for (const auto& i : edges_[node]) {
if (visited.find(i) == visited.end() && i->GetLevel() == (node->GetLevel() + 1)) {
q.push(i);
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] %s(%p) level:%d \n",
GetGraphNodeTypeString(i->GetType()), i, i->GetLevel());
visited[i] = true;
}
}
}
return hipSuccess;
}
hipError_t hipGraphExec::CreateQueues() {
parallelQueues_.reserve(parallelLists_.size());
for (size_t i = 0; i < parallelLists_.size(); i++) {
amd::HostQueue* queue;
cl_command_queue_properties properties =
(callbacks_table.is_enabled() || HIP_FORCE_QUEUE_PROFILING) ? CL_QUEUE_PROFILING_ENABLE : 0;
queue = new amd::HostQueue(*hip::getCurrentDevice()->asContext(),
*hip::getCurrentDevice()->devices()[0], properties);
bool result = (queue != nullptr) ? queue->create() : false;
// Create a host queue
if (result) {
parallelQueues_.push_back(queue);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to create host queue\n");
return hipErrorOutOfMemory;
}
}
return hipSuccess;
}
hipError_t hipGraphExec::FillCommands() {
// Create commands
int i = 0;
hipError_t status;
for (const auto& list : parallelLists_) {
for (auto& node : list) {
status = node->CreateCommand(parallelQueues_[i]);
if (status != hipSuccess) return status;
}
i++;
}
// Add waitlists for all the commands
for (auto& node : levelOrder_) {
auto nodeWaitList = nodeWaitLists_.find(node);
if (nodeWaitList != nodeWaitLists_.end()) {
amd::Command::EventWaitList waitList;
for (auto depNode : nodeWaitList->second) {
for (auto command : depNode->GetCommands()) {
waitList.push_back(command);
}
}
for (auto command : nodeWaitList->first->GetCommands()) {
command->updateEventWaitList(waitList);
}
}
}
return status;
}
hipError_t hipGraphExec::Init() {
hipError_t status;
status = CreateQueues();
if (status != hipSuccess) {
return status;
}
status = FillCommands();
if (status != hipSuccess) {
return status;
}
rootCommand_ = nullptr;
/// stream should execute next command after graph finishes
/// Add marker to the stream that waits for all the last commands in parallel queues of graph
for (auto& singleList : parallelLists_) {
graphLastCmdWaitList_.push_back(singleList.back()->GetCommands().back());
}
return status;
}
void hipGraphExec::ResetGraph(cl_event event, cl_int command_exec_status, void* user_data) {
ClPrint(amd::LOG_INFO, amd::LOG_CODE, "[hipGraph] Inside resetGraph!\n");
hipGraphExec_t graphExec =
hipGraphExec::activeGraphExec_[reinterpret_cast<amd::Command*>(user_data)];
if (graphExec != nullptr) {
for (auto& node : graphExec->levelOrder_) {
for (auto& command : node->GetCommands()) {
command->resetStatus(CL_INT_MAX);
}
}
graphExec->rootCommand_->resetStatus(CL_INT_MAX);
graphExec->bExecPending_.store(false);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] graphExec is nullptr during resetGraph!\n");
}
}
hipError_t hipGraphExec::UpdateGraphToWaitOnRoot() {
for (auto& singleList : parallelLists_) {
amd::Command::EventWaitList waitList;
waitList.push_back(rootCommand_);
if (!singleList.empty()) {
auto commands = singleList[0]->GetCommands();
if (!commands.empty()) {
commands[0]->updateEventWaitList(waitList);
}
}
}
return hipSuccess;
}
hipError_t hipGraphExec::Run(hipStream_t stream) {
if (bExecPending_.load() == true) {
ClPrint(
amd::LOG_INFO, amd::LOG_CODE,
"[hipGraph] Same graph launched while previous one is active, wait for it to finish!\n");
lastEnqueuedGraphCmd_->awaitCompletion();
}
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
return hipErrorInvalidResourceHandle;
}
if (rootCommand_ == nullptr || rootCommand_->queue() != queue) {
if (rootCommand_ != nullptr) {
rootCommand_->release();
}
rootCommand_ = new amd::Marker(*queue, false, {});
UpdateGraphToWaitOnRoot();
}
rootCommand_->enqueue();
for (auto& node : levelOrder_) {
for (auto& command : node->GetCommands()) {
command->enqueue();
}
}
amd::Command* command = new amd::Marker(*queue, false, graphLastCmdWaitList_);
if (command == nullptr) {
return hipErrorOutOfMemory;
}
amd::Event& event = command->event();
if (!event.setCallback(CL_COMPLETE, hipGraphExec::ResetGraph, command)) {
return hipErrorInvalidHandle;
}
hipGraphExec::activeGraphExec_[command] = this;
lastEnqueuedGraphCmd_ = command;
bExecPending_.store(true);
command->enqueue();
// Add the new barrier to stall the stream, until the callback is done
amd::Command::EventWaitList eventWaitList;;
eventWaitList.push_back(command);
amd::Command* block_command = new amd::Marker(*queue, !kMarkerDisableFlush, eventWaitList);
if (block_command == nullptr) {
return hipErrorInvalidValue;
}
block_command->enqueue();
block_command->release();
command->release();
return hipSuccess;
}