* The QueuePair object was out of scope at the end of the for loop.
So the deconstructor was called.
* Although correct for C++ to do this, it ignores that we copied the QueuePair object into
device memory and have an instance there.
* Early deconstruction resulted in calling ibv_dereg_mr on the atomics memory region.
So when the GPU kernel tried to use the memory region it wasn't
registered which resulted in a protection domain error.
* The solution was to allocate our QueuePair obj with the new operator which leaves memory
management to us, then we can manually call the deconstructor.
[ROCm/rocshmem commit: e856fbb0eb]
Tá an tiomantas seo le fáil i:
@@ -715,16 +715,35 @@ void GDABackend::cleanup_heap_memory_rkey() {
|
||||
}
|
||||
|
||||
void GDABackend::setup_gpu_qps() {
|
||||
CHECK_HIP(hipMalloc(&gpu_qps, sizeof(QueuePair) * (maximum_num_contexts_ + 1) * num_pes));
|
||||
for (int i = 0; i < (maximum_num_contexts_ + 1) * num_pes; i++) {
|
||||
QueuePair qp(pd_orig);
|
||||
CHECK_HIP(hipMemcpy(&gpu_qps[i], &qp, sizeof(QueuePair), hipMemcpyDefault));
|
||||
size_t qp_objs_count;
|
||||
size_t qp_objs_mem_size;
|
||||
|
||||
qp_objs_count = (maximum_num_contexts_ + 1) * num_pes;
|
||||
qp_objs_mem_size = sizeof(QueuePair) * qp_objs_count;
|
||||
|
||||
CHECK_HIP(hipMalloc(&gpu_qps, qp_objs_mem_size));
|
||||
|
||||
host_qps = (QueuePair*) malloc(qp_objs_mem_size);
|
||||
CHECK_NNULL(host_qps, "malloc (host_qps)");
|
||||
|
||||
for (int i = 0; i < qp_objs_count; i++) {
|
||||
new (&host_qps[i]) QueuePair(pd_orig);
|
||||
CHECK_HIP(hipMemcpy(&gpu_qps[i], &host_qps[i], sizeof(QueuePair), hipMemcpyDefault));
|
||||
initialize_gpu_qp(&gpu_qps[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
void GDABackend::cleanup_gpu_qps() {
|
||||
//TODO need to destruct qp[i]?
|
||||
size_t qp_objs_count;
|
||||
|
||||
qp_objs_count = (maximum_num_contexts_ + 1) * num_pes;
|
||||
|
||||
for (int i = 0; i < qp_objs_count; i++) {
|
||||
host_qps[i].~QueuePair();
|
||||
}
|
||||
|
||||
free(host_qps);
|
||||
|
||||
CHECK_HIP(hipFree(gpu_qps));
|
||||
gpu_qps = nullptr;
|
||||
}
|
||||
|
||||
@@ -119,6 +119,7 @@ class GDABackend : public Backend {
|
||||
|
||||
uint32_t sq_size = 1024;
|
||||
uint32_t inline_threshold = 8;
|
||||
QueuePair *host_qps = nullptr;
|
||||
QueuePair *gpu_qps = nullptr;
|
||||
std::vector<ibv_qp*> qps;
|
||||
std::vector<ibv_cq*> cqs;
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir