95f721f8a5
* Check emulator mode at runtime * Reduce emu mode function call to one time and use result * Move function to main.cc * Address feedback * EmuMode check improvement; convert to AoS * replace g_isEmuMode with func call * Add mode check func for every sample
609 lines
19 KiB
C++
Executable File
609 lines
19 KiB
C++
Executable File
/*
|
|
* =============================================================================
|
|
* ROC Runtime Conformance Release License
|
|
* =============================================================================
|
|
* The University of Illinois/NCSA
|
|
* Open Source License (NCSA)
|
|
*
|
|
* Copyright (c) 2017, Advanced Micro Devices, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Developed by:
|
|
*
|
|
* AMD Research and AMD ROC Software Development
|
|
*
|
|
* Advanced Micro Devices, Inc.
|
|
*
|
|
* www.amd.com
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to
|
|
* deal with the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* - Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimers.
|
|
* - Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimers in
|
|
* the documentation and/or other materials provided with the distribution.
|
|
* - Neither the names of <Name of Development Group, Name of Institution>,
|
|
* nor the names of its contributors may be used to endorse or promote
|
|
* products derived from this Software without specific prior written
|
|
* permission.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS WITH THE SOFTWARE.
|
|
*
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
#include <sys/wait.h>
|
|
|
|
#include <cassert>
|
|
#include <iostream>
|
|
|
|
#include "hsa/hsa.h"
|
|
#include "hsa/hsa_ext_amd.h"
|
|
|
|
|
|
static const uint32_t kShmemID = 1594685;
|
|
|
|
#define RET_IF_HSA_ERR(err) { \
|
|
if ((err) != HSA_STATUS_SUCCESS) { \
|
|
const char* msg = 0; \
|
|
hsa_status_string(err, &msg); \
|
|
std::cout << "hsa api call failure at line " << __LINE__ << ", file: " << \
|
|
__FILE__ << ". Call returned " << err << std::endl; \
|
|
std::cout << msg << std::endl; \
|
|
return (err); \
|
|
} \
|
|
}
|
|
|
|
bool isEmuModeEnabled() {
|
|
auto checkMode = []{
|
|
const char* path = "/sys/module/amdgpu/parameters/emu_mode";
|
|
FILE* file = fopen(path, "r");
|
|
if (!file) {
|
|
std::cout << "Failed to open file." << std::endl;
|
|
return false;
|
|
}
|
|
|
|
int emu_mode = 0;
|
|
if (fscanf(file, "%d", &emu_mode) != 1) {
|
|
std::cout << "Failed to parse as a decimal." << std::endl;
|
|
fclose(file);
|
|
return false;
|
|
}
|
|
fclose(file);
|
|
return emu_mode != 0;
|
|
};
|
|
|
|
static bool emu_mode = checkMode();
|
|
return emu_mode;
|
|
}
|
|
|
|
struct callback_args {
|
|
hsa_agent_t host;
|
|
hsa_agent_t device;
|
|
hsa_amd_memory_pool_t cpu_pool;
|
|
hsa_amd_memory_pool_t gpu_pool;
|
|
size_t gpu_mem_granule;
|
|
};
|
|
|
|
// This function will test whether the provided memory pool is 1) in the
|
|
// GLOBAL segment, 2) allows allocation and 3) is accessible by the provided
|
|
// agent. If the provided pool meets these criteria, HSA_STATUS_INFO_BREAK is
|
|
// returned
|
|
static hsa_status_t
|
|
FindPool(hsa_amd_memory_pool_t in_pool, hsa_agent_t agent) {
|
|
hsa_amd_segment_t segment;
|
|
hsa_status_t err;
|
|
|
|
err = hsa_amd_memory_pool_get_info(in_pool,
|
|
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
|
RET_IF_HSA_ERR(err);
|
|
if (segment != HSA_AMD_SEGMENT_GLOBAL) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
bool canAlloc;
|
|
err = hsa_amd_memory_pool_get_info(in_pool,
|
|
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &canAlloc);
|
|
RET_IF_HSA_ERR(err);
|
|
if (!canAlloc) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
hsa_amd_memory_pool_access_t access =
|
|
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED;
|
|
err = hsa_amd_agent_memory_pool_get_info(agent, in_pool,
|
|
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
return HSA_STATUS_INFO_BREAK;
|
|
}
|
|
|
|
// Callback function for hsa_amd_agent_iterate_memory_pools(). If the provided
|
|
// pool is suitable (see comments for FindPool()), HSA_STATUS_INFO_BREAK is
|
|
// returned. The input parameter "data" should point to memory for a "struct
|
|
// callback_args", which includes a gpu pool and a granule field. These fields
|
|
// will be filled in by this function if the provided pool meets all the
|
|
// requirements.
|
|
static hsa_status_t FindDevicePool(hsa_amd_memory_pool_t pool, void* data) {
|
|
hsa_status_t err;
|
|
|
|
if (nullptr == data) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
struct callback_args *args = (struct callback_args *)data;
|
|
|
|
err = FindPool(pool, args->device);
|
|
|
|
if (err == HSA_STATUS_INFO_BREAK) {
|
|
args->gpu_pool = pool;
|
|
|
|
if (isEmuModeEnabled()) {
|
|
args->gpu_mem_granule = 4;
|
|
} else {
|
|
err = hsa_amd_memory_pool_get_info(args->gpu_pool,
|
|
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &args->gpu_mem_granule);
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
|
|
// We found what we were looking for, so return HSA_STATUS_INFO_BREAK
|
|
return HSA_STATUS_INFO_BREAK;
|
|
}
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
// Callback function for hsa_amd_agent_iterate_memory_pools(). If the provided
|
|
// pool is suitable (see comments for FindPool()), HSA_STATUS_INFO_BREAK is
|
|
// returned. The input parameter "data" should point to memory for a "struct
|
|
// callback_args", which includes a cpu pool. This field will be filled in by
|
|
// this function if the provided pool meets all the requirements.
|
|
static hsa_status_t FindCPUPool(hsa_amd_memory_pool_t pool, void* data) {
|
|
hsa_status_t err;
|
|
|
|
if (nullptr == data) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
struct callback_args *args = (struct callback_args *)data;
|
|
|
|
err = FindPool(pool, args->host);
|
|
|
|
if (err == HSA_STATUS_INFO_BREAK) {
|
|
args->cpu_pool = pool;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
|
|
// This function is meant to be a call-back to hsa_iterate_agents. Find the
|
|
// first GPU agent that has memory accessible by CPU
|
|
// Return values:
|
|
// HSA_STATUS_INFO_BREAK -- 2 GPU agents have been found and stored. Iterator
|
|
// should stop iterating
|
|
// HSA_STATUS_SUCCESS -- 2 GPU agents have not yet been found; iterator
|
|
// should keep iterating
|
|
// Other -- Some error occurred
|
|
static hsa_status_t FindGpu(hsa_agent_t agent, void *data) {
|
|
if (data == NULL) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
hsa_device_type_t hsa_device_type;
|
|
hsa_status_t err = hsa_agent_get_info(agent,
|
|
HSA_AGENT_INFO_DEVICE, &hsa_device_type);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
if (hsa_device_type != HSA_DEVICE_TYPE_GPU) {
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
struct callback_args *args = (struct callback_args *)data;
|
|
|
|
// Make sure GPU device has pool host can access
|
|
args->device = agent;
|
|
err = hsa_amd_agent_iterate_memory_pools(agent, FindDevicePool, args);
|
|
|
|
if (err == HSA_STATUS_INFO_BREAK) {
|
|
// We were looking for, so return HSA_STATUS_INFO_BREAK
|
|
return HSA_STATUS_INFO_BREAK;
|
|
} else {
|
|
args->device = {0};
|
|
}
|
|
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
// This function is meant to be a call-back to hsa_iterate_agents. For each
|
|
// input agent the iterator provides as input, this function will check to
|
|
// see if the input agent is a CPU. If so, it will update the callback_args
|
|
// structure pointed to by the input parameter "data".
|
|
|
|
// Return values:
|
|
// HSA_STATUS_INFO_BREAK -- CPU agent has been found and stored. Iterator
|
|
// should stop iterating
|
|
// HSA_STATUS_SUCCESS -- CPU agent has not yet been found; iterator
|
|
// should keep iterating
|
|
// Other -- Some error occurred
|
|
static hsa_status_t FindCPUDevice(hsa_agent_t agent, void *data) {
|
|
if (data == NULL) {
|
|
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
|
}
|
|
|
|
hsa_device_type_t hsa_device_type;
|
|
hsa_status_t err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE,
|
|
&hsa_device_type);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
|
|
struct callback_args *args = (struct callback_args *)data;
|
|
|
|
args->host = agent;
|
|
|
|
err = hsa_amd_agent_iterate_memory_pools(agent, FindCPUPool, args);
|
|
|
|
if (err == HSA_STATUS_INFO_BREAK) { // we found what we were looking for
|
|
return HSA_STATUS_INFO_BREAK;
|
|
} else {
|
|
args->host = {0};
|
|
return err;
|
|
}
|
|
}
|
|
|
|
// Returning HSA_STATUS_SUCCESS tells the calling iterator to keep iterating
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
// This function will test whether the gpu-local buffer has been filled
|
|
// with an expected value and return an error if not. The expected value is
|
|
// also replaced with a new value.
|
|
// Implementation notes: We create a buffer in system memory and copy
|
|
// the gpu-local data buffer to be tested to this system memory buffer.
|
|
// We also write the system memory buffer with the new value, and then copy
|
|
// it back the gpu-local buffer.
|
|
static hsa_status_t
|
|
CheckAndFillBuffer(struct callback_args *args, void *gpu_src_ptr,
|
|
uint32_t exp_cur_val, uint32_t new_val) {
|
|
hsa_signal_t copy_signal;
|
|
size_t sz = args->gpu_mem_granule;
|
|
hsa_agent_t cpu_ag = args->host;
|
|
hsa_agent_t gpu_ag = args->device;
|
|
hsa_status_t err;
|
|
|
|
err = hsa_signal_create(1, 0, NULL, ©_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
uint32_t *sysBuf;
|
|
|
|
err = hsa_amd_memory_pool_allocate(args->cpu_pool, sz, 0,
|
|
reinterpret_cast<void **>(&sysBuf));
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
hsa_agent_t ag_list[2] = {args->device, args->host};
|
|
err = hsa_amd_agents_allow_access(2, ag_list, NULL, sysBuf);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
err = hsa_amd_memory_async_copy(sysBuf, cpu_ag, gpu_src_ptr, gpu_ag,
|
|
sz, 0, NULL, copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
|
|
1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
|
|
printf("Async copy returned error value.\n");
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
|
|
uint32_t count = sz/sizeof(uint32_t);
|
|
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
if (sysBuf[i] != exp_cur_val) {
|
|
fprintf(stdout, "Expected %d but got %d in buffer.\n",
|
|
exp_cur_val, sysBuf[i]);
|
|
err = HSA_STATUS_ERROR;
|
|
break;
|
|
}
|
|
sysBuf[i] = new_val;
|
|
}
|
|
|
|
hsa_signal_store_relaxed(copy_signal, 1);
|
|
|
|
err = hsa_amd_memory_async_copy(gpu_src_ptr, gpu_ag, sysBuf, cpu_ag,
|
|
sz, 0, NULL, copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
if (hsa_signal_wait_relaxed(copy_signal, HSA_SIGNAL_CONDITION_LT,
|
|
1, -1, HSA_WAIT_STATE_BLOCKED) != 0) {
|
|
printf("Async copy returned error value.\n");
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
|
|
err = hsa_signal_destroy(copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
err = hsa_amd_memory_pool_free(sysBuf);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
return HSA_STATUS_SUCCESS;
|
|
}
|
|
|
|
// See if the other process wrote an error value to the token; if not, write
|
|
// the newVal to the token.
|
|
static void CheckAndSetToken(volatile int *token, int newVal) {
|
|
if (*token == -1) {
|
|
printf("Error in other process. Exiting.\n");
|
|
exit(-1);
|
|
} else {
|
|
*token = newVal;
|
|
}
|
|
}
|
|
|
|
// Summary of this IPC Sample:
|
|
// This program demonstrates the IPC apis. Run it by executing 2 instances
|
|
// of the program.
|
|
// The first process will allocate some gpu-local memory and fill it with
|
|
// 1's. This HSA buffer will be made shareable with hsa_amd_ipc_memory_create()
|
|
// The 2nd process will access this shared buffer with
|
|
// hsa_amd_ipc_memory_attach(), verify that 1's were written, and then fill
|
|
// the buffer with 2's. Finally, the first process will then read the
|
|
// gpu-local buffer and verify that the 2's were indeed written. The main
|
|
// point is to show how hsa memory buffer handles can be shared among
|
|
// processes.
|
|
//
|
|
// Implementation Notes:
|
|
// -Standard linux shared memory is used in this sample program as a way
|
|
// of sharing info and synchronizing the 2 processes. This is independent
|
|
// of RocR IPC and should not be confused with it.
|
|
int main(int argc, char** argv) {
|
|
// IPC test
|
|
struct Shared {
|
|
volatile int token;
|
|
volatile int count;
|
|
volatile size_t size;
|
|
volatile hsa_amd_ipc_memory_t handle;
|
|
volatile hsa_amd_ipc_signal_t signal_handle;
|
|
};
|
|
|
|
// Allocate linux shared memory.
|
|
Shared* shared = (Shared*)mmap(nullptr, sizeof(Shared), PROT_READ | PROT_WRITE,
|
|
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
|
if (shared == MAP_FAILED) {
|
|
fprintf(stdout, "Unable to allocate shared memory. Exiting.\n");
|
|
return -1;
|
|
}
|
|
|
|
// "token" is used to signal state changes between the 2 processes.
|
|
volatile int* token = &shared->token;
|
|
*token = 0;
|
|
bool processOne;
|
|
|
|
// Spawn second process and verify communication
|
|
int child = fork();
|
|
if (child == -1) {
|
|
printf("fork failed. Exiting.\n");
|
|
return -1;
|
|
}
|
|
if (child != 0) {
|
|
processOne = true;
|
|
|
|
// Signal to other process we are waiting, and then wait...
|
|
*token = 1;
|
|
while (*token == 1) {
|
|
sched_yield();
|
|
}
|
|
|
|
fprintf(stdout, "Second process observed, handshake...\n");
|
|
*token = 1;
|
|
while (*token == 1) {
|
|
sched_yield();
|
|
}
|
|
} else {
|
|
processOne = false;
|
|
fprintf(stdout, "Second process running.\n");
|
|
|
|
while (*token == 0) {
|
|
sched_yield();
|
|
}
|
|
|
|
CheckAndSetToken(token, 0);
|
|
// Wait for handshake
|
|
while (*token == 0) {
|
|
sched_yield();
|
|
}
|
|
CheckAndSetToken(token, 0);
|
|
fprintf(stdout, "Handshake complete.\n");
|
|
}
|
|
|
|
hsa_status_t err;
|
|
|
|
err = hsa_init();
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
struct callback_args args = {0, 0, 0};
|
|
|
|
err = hsa_iterate_agents(FindCPUDevice, &args);
|
|
assert(err == HSA_STATUS_INFO_BREAK);
|
|
if (err != HSA_STATUS_INFO_BREAK) {
|
|
return -1;
|
|
}
|
|
|
|
err = hsa_iterate_agents(FindGpu, &args);
|
|
|
|
if (err != HSA_STATUS_INFO_BREAK) {
|
|
printf(
|
|
"No GPU with accessible VRAM required for this program found. Exiting\n");
|
|
return -1;
|
|
}
|
|
|
|
// Print out name of the device.
|
|
char name1[64] = {0};
|
|
char name2[64] = {0};
|
|
err = hsa_agent_get_info(args.host, HSA_AGENT_INFO_NAME, name1);
|
|
RET_IF_HSA_ERR(err);
|
|
err = hsa_agent_get_info(args.device, HSA_AGENT_INFO_NAME, name2);
|
|
RET_IF_HSA_ERR(err);
|
|
uint16_t loc1, loc2;
|
|
err = hsa_agent_get_info(args.host,
|
|
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc1);
|
|
RET_IF_HSA_ERR(err);
|
|
err = hsa_agent_get_info(args.device,
|
|
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &loc2);
|
|
RET_IF_HSA_ERR(err);
|
|
fprintf(stdout, "Using: %s (%d) and %s (%d)\n", name1, loc1, name2, loc2);
|
|
|
|
// Get signal for async copy
|
|
hsa_signal_t copy_signal;
|
|
err = hsa_signal_create(1, 0, NULL, ©_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
// Wrap printf to add first or second process indicator
|
|
#define PROCESS_LOG(format, ...) \
|
|
fprintf(stdout, "line:%d P%u: " format, \
|
|
__LINE__, static_cast<int>(!processOne), ##__VA_ARGS__);
|
|
|
|
hsa_agent_t ag_list[2] = {args.device, args.host};
|
|
|
|
if (processOne) {
|
|
// Allocate some VRAM and fill it with 1's
|
|
uint32_t* gpuBuf = NULL;
|
|
err = hsa_amd_memory_pool_allocate(args.gpu_pool, args.gpu_mem_granule, 0,
|
|
reinterpret_cast<void**>(&gpuBuf));
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
PROCESS_LOG("Allocated local memory buffer at %p\n", gpuBuf);
|
|
|
|
err = hsa_amd_agents_allow_access(2, ag_list, NULL, gpuBuf);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
err = hsa_amd_ipc_memory_create(gpuBuf, args.gpu_mem_granule,
|
|
const_cast<hsa_amd_ipc_memory_t*>(&shared->handle));
|
|
PROCESS_LOG(
|
|
"Created IPC handle associated with gpu-local buffer at P0 address %p\n",
|
|
gpuBuf);
|
|
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
uint32_t count = args.gpu_mem_granule/sizeof(uint32_t);
|
|
shared->size = args.gpu_mem_granule;
|
|
shared->count = count;
|
|
|
|
err = hsa_amd_memory_fill(gpuBuf, 1, count);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
// Get IPC capable signal
|
|
hsa_signal_t ipc_signal;
|
|
err = hsa_amd_signal_create(1, 0, NULL, HSA_AMD_SIGNAL_IPC, &ipc_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
err = hsa_amd_ipc_signal_create(ipc_signal,
|
|
const_cast<hsa_amd_ipc_signal_t*>(&shared->signal_handle));
|
|
PROCESS_LOG("Created IPC handle associated with ipc_signal\n");
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
// Signal Process 2 that the gpu buffer is ready to read.
|
|
CheckAndSetToken(token, 1);
|
|
|
|
PROCESS_LOG("Allocated buffer and filled it with 1's. Wait for P1...\n");
|
|
hsa_signal_value_t ret =
|
|
hsa_signal_wait_acquire(ipc_signal, HSA_SIGNAL_CONDITION_NE, 1, -1, HSA_WAIT_STATE_BLOCKED);
|
|
|
|
if (ret != 2) {
|
|
hsa_signal_store_release(ipc_signal, -1);
|
|
return -1;
|
|
}
|
|
|
|
err = CheckAndFillBuffer(&args, gpuBuf, 2, 0);
|
|
RET_IF_HSA_ERR(err);
|
|
PROCESS_LOG("Confirmed P1 filled buffer with 2\n")
|
|
PROCESS_LOG("PASSED on P0\n");
|
|
|
|
hsa_signal_store_relaxed(ipc_signal, 0);
|
|
|
|
err = hsa_signal_destroy(ipc_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
err = hsa_amd_memory_pool_free(gpuBuf);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
waitpid(child, nullptr, 0);
|
|
|
|
} else { // "ProcessTwo"
|
|
PROCESS_LOG("Waiting for process 0 to write 1 to token...\n");
|
|
while (*token == 0) {
|
|
sched_yield();
|
|
}
|
|
if (*token != 1) {
|
|
*token = -1;
|
|
return -1;
|
|
}
|
|
|
|
// Attach shared VRAM
|
|
void* ptr;
|
|
err = hsa_amd_ipc_memory_attach(
|
|
const_cast<hsa_amd_ipc_memory_t*>(&shared->handle), shared->size, 1,
|
|
ag_list, &ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
PROCESS_LOG(
|
|
"Attached to IPC handle; P1 buffer address gpu-local memory is %p\n",
|
|
ptr);
|
|
|
|
// Attach shared signal
|
|
hsa_signal_t ipc_signal;
|
|
err = hsa_amd_ipc_signal_attach(const_cast<hsa_amd_ipc_signal_t*>(&shared->signal_handle),
|
|
&ipc_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
PROCESS_LOG("Attached to signal IPC handle\n");
|
|
|
|
err = CheckAndFillBuffer(&args, reinterpret_cast<uint32_t *>(ptr), 1, 2);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
PROCESS_LOG(
|
|
"Confirmed P0 filled buffer with 1; P1 re-filled buffer with 2\n");
|
|
PROCESS_LOG("PASSED on P1\n");
|
|
|
|
hsa_signal_store_release(ipc_signal, 2);
|
|
|
|
err = hsa_amd_ipc_memory_detach(ptr);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
hsa_signal_wait_relaxed(ipc_signal, HSA_SIGNAL_CONDITION_NE, 2, -1, HSA_WAIT_STATE_BLOCKED);
|
|
|
|
err = hsa_signal_destroy(ipc_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
}
|
|
|
|
err = hsa_signal_destroy(copy_signal);
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
munmap(shared, sizeof(Shared));
|
|
|
|
err = hsa_shut_down();
|
|
RET_IF_HSA_ERR(err);
|
|
|
|
#undef PROCESS_LOG
|
|
return 0;
|
|
}
|