SWDEV-538606 - Handle updateStreams from multiple threads (#505)
このコミットが含まれているのは:
@@ -64,6 +64,9 @@ std::unordered_set<GraphExec*> GraphExec::graphExecSet_;
|
||||
// Guards global exec graph set
|
||||
// we have graphExec object as part of child graph and we need recursive lock
|
||||
amd::Monitor GraphExec::graphExecSetLock_(true);
|
||||
// Serialize the creation of internal streams from multiple threads, ensuring that each stream is
|
||||
// mapped to different HSA queues.
|
||||
amd::Monitor GraphExec::graphExecStreamCreateLock_(true);
|
||||
std::unordered_set<UserObject*> UserObject::ObjectSet_;
|
||||
// Guards global user object
|
||||
amd::Monitor UserObject::UserObjectLock_{};
|
||||
@@ -330,6 +333,7 @@ bool GraphExec::isGraphExecValid(GraphExec* pGraphExec) {
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t GraphExec::CreateStreams(uint32_t num_streams) {
|
||||
amd::ScopedLock lock(graphExecStreamCreateLock_);
|
||||
parallel_streams_.reserve(num_streams);
|
||||
for (uint32_t i = 0; i < num_streams; ++i) {
|
||||
auto stream = new hip::Stream(hip::getCurrentDevice(), hip::Stream::Priority::Normal,
|
||||
@@ -490,21 +494,32 @@ hipError_t GraphExec::EnqueueGraphWithSingleList(hip::Stream* hip_stream) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Graph::UpdateStreams(hip::Stream* launch_stream,
|
||||
const std::vector<hip::Stream*>& parallel_streams) {
|
||||
// Allocate array for parallel streams, based on the graph scheduling + current stream
|
||||
// We create extra stream to avoid collision
|
||||
streams_.resize(max_streams_);
|
||||
hipError_t Graph::UpdateStreams(hip::Stream* launch_stream,
|
||||
const std::vector<hip::Stream*>& parallel_streams) {
|
||||
// Current stream is the default in the assignment
|
||||
streams_[0] = launch_stream;
|
||||
// Assign the streams in the array of all streams
|
||||
// Avoid stream that has collision with launch stream
|
||||
for (uint32_t i = 1, j = 0; i < streams_.size(); j++) {
|
||||
assert(j != parallel_streams.size());
|
||||
if (launch_stream->getQueueID() != parallel_streams[j]->getQueueID()) {
|
||||
streams_[i++] = parallel_streams[j];
|
||||
}
|
||||
streams_.push_back(launch_stream);
|
||||
int* unique_stream_ids = new int[GPU_MAX_HW_QUEUES]();
|
||||
if (unique_stream_ids == nullptr) {
|
||||
LogError("Stream id array allocation is nullptr!");
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
unique_stream_ids[launch_stream->getQueueID()] = 1;
|
||||
std::vector<hip::Stream*> collided_streams;
|
||||
// Assign streams that are unique in parallel_streams and doesnt collide with launch stream
|
||||
for (uint32_t i = 0; i < parallel_streams.size(); i++) {
|
||||
if (unique_stream_ids[parallel_streams[i]->getQueueID()] == 0) {
|
||||
streams_.push_back(parallel_streams[i]);
|
||||
} else {
|
||||
collided_streams.push_back(parallel_streams[i]);
|
||||
}
|
||||
unique_stream_ids[parallel_streams[i]->getQueueID()]++;
|
||||
}
|
||||
// Assign the remaining streams for execution.
|
||||
for (int i = streams_.size(), j = 0; i < max_streams_ && j < collided_streams.size(); i++, j++) {
|
||||
streams_.push_back(collided_streams[j]);
|
||||
}
|
||||
delete[] unique_stream_ids;
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
@@ -728,7 +743,10 @@ hipError_t GraphExec::Run(hip::Stream* launch_stream) {
|
||||
}
|
||||
} else {
|
||||
// Update streams for the graph execution
|
||||
UpdateStreams(launch_stream, parallel_streams_);
|
||||
status = UpdateStreams(launch_stream, parallel_streams_);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
// Execute all nodes in the graph
|
||||
if (!RunNodes()) {
|
||||
LogError("Failed to launch nodes!");
|
||||
|
||||
@@ -601,7 +601,7 @@ class Graph {
|
||||
void ScheduleNodes();
|
||||
|
||||
//! Update streams for the graph execution
|
||||
void UpdateStreams(
|
||||
hipError_t UpdateStreams(
|
||||
hip::Stream* launch_stream, //!< Launch stream from the application
|
||||
const std::vector<hip::Stream*>& parallel_stream //!< The list of parallel streams
|
||||
);
|
||||
@@ -735,6 +735,7 @@ class GraphExec : public amd::ReferenceCountedObject, public Graph {
|
||||
public:
|
||||
static std::unordered_set<GraphExec*> graphExecSet_;
|
||||
static amd::Monitor graphExecSetLock_;
|
||||
static amd::Monitor graphExecStreamCreateLock_;
|
||||
GraphExec(uint64_t flags = 0)
|
||||
: ReferenceCountedObject(), Graph(hip::getCurrentDevice()), flags_(flags) {
|
||||
amd::ScopedLock lock(graphExecSetLock_);
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする