From 45d863851d154c6151a91f8ae840e0bbd7897df7 Mon Sep 17 00:00:00 2001 From: pensun Date: Tue, 16 Feb 2016 07:39:04 -0600 Subject: [PATCH 1/4] Implement to read HIP_VISIBLE_DEVICES to internal global variable --- hipamd/src/hip_hcc.cpp | 91 +++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 32 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 4f95320ac3..24d6c20dfd 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include @@ -60,7 +61,10 @@ int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; int HIP_LAUNCH_BLOCKING = 0; int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ -int HIP_STAGING_BUFFERS = 2; +int HIP_STAGING_BUFFERS = 2; +int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ +// vector of integers that contains the visible device IDs +std::vector HIP_VISIBLE_DEVICES_IDS; #define TRACE_API 0x1 /* trace API calls and return values */ #define TRACE_SYNC 0x2 /* trace synchronization pieces */ @@ -176,9 +180,9 @@ public: //================================================================================================= // -//Reset the device - this is called from hipDeviceReset. +//Reset the device - this is called from hipDeviceReset. //Device may be reset multiple times, and may be reset after init. -void ihipDevice_t::reset() +void ihipDevice_t::reset() { _staging_buffer[0] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); _staging_buffer[1] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); @@ -477,17 +481,39 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } - // Default is set when variable is initialized (at top of this file), so only override if we find - // an environment variable. - if (env) { - long int v = strtol(env, NULL, 0); - *var_ptr = (int) (v); + // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which + // contains a sequence of comma-separated device IDs + if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ + // Parse the string stream of env and store the device ids to HIP_VISIBLE_DEVICES_IDS global variable + std::string str = env; + std::istringstream ss(str); + std::string device_id; + + while (std::getline(ss, device_id, ',')) { + HIP_VISIBLE_DEVICES_IDS.push_back(atoi(device_id.c_str())); + } + // Print out the number of ids for debugging + if (HIP_PRINT_ENV) { + std::cout << "HIP visible device id is set to be: "; + for(int i=0;i _max_buffers ? _max_buffers : numBuffers) { - - + + for (int i=0; i<_numBuffers; i++) { // TODO - experiment with alignment here. _pinnedStagingBuffer[i] = hc::AM_alloc(_bufferSize, device->_acc, amHostPinned); @@ -1605,10 +1632,10 @@ StagingBuffer::~StagingBuffer() //--- -void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) +void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) { - const char *srcp = static_cast (src); - char *dstp = static_cast (dst); + const char *srcp = static_cast (src); + char *dstp = static_cast (dst); for (int i=0; i<_numBuffers; i++) { hsa_signal_store_relaxed(_completion_signal[i], 0); @@ -1621,7 +1648,7 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte size_t theseBytes = (bytesRemaining > _bufferSize) ? _bufferSize : bytesRemaining; tprintf (TRACE_COPY2, "waiting... on completion signal\n"); - hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); tprintf (TRACE_COPY2, "copy %zu bytes %p to stagingBuf[%d]:%p\n", theseBytes, srcp, bufferIndex, _pinnedStagingBuffer[bufferIndex]); // TODO - use uncached memcpy, someday. @@ -1632,7 +1659,7 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); hsa_status_t hsa_status = hsa_amd_memory_async_copy(dstp, _pinnedStagingBuffer[bufferIndex], theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); - assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw srcp += theseBytes; dstp += theseBytes; @@ -1643,15 +1670,15 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte for (int i=0; i<_numBuffers; i++) { - hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); } } //--- -void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeBytes) +void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeBytes) { - const char *srcp0 = static_cast (src); - char *dstp1 = static_cast (dst); + const char *srcp0 = static_cast (src); + char *dstp1 = static_cast (dst); for (int i=0; i<_numBuffers; i++) { hsa_signal_store_relaxed(_completion_signal[i], 0); @@ -1671,7 +1698,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte tprintf (TRACE_COPY2, "D2H: async_copy %zu bytes src:%p to staging:%p\n", theseBytes, srcp0, _pinnedStagingBuffer[bufferIndex]); hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); hsa_status_t hsa_status = hsa_amd_memory_async_copy(_pinnedStagingBuffer[bufferIndex], srcp0, theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); - assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw srcp0 += theseBytes; } @@ -1682,7 +1709,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte size_t theseBytes = (bytesRemaining1 > _bufferSize) ? _bufferSize : bytesRemaining1; tprintf (TRACE_COPY2, "D2H: wait_completion[%d] bytesRemaining=%zu\n", bufferIndex, bytesRemaining1); - hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); tprintf (TRACE_COPY2, "D2H: copy %zu bytes stagingBuf[%d]:%p to dst:%p\n", theseBytes, bufferIndex, _pinnedStagingBuffer[bufferIndex], dstp1); memcpy(dstp1, _pinnedStagingBuffer[bufferIndex], theseBytes); @@ -1693,7 +1720,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte //for (int i=0; i<_numBuffers; i++) { - // hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + // hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); //} } @@ -1709,7 +1736,7 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size bool dstNotTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) != AM_SUCCESS); bool srcNotTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) != AM_SUCCESS); - bool useStagingBuffer = true; // TODO - remove when new copy bakes a bit. + bool useStagingBuffer = true; // TODO - remove when new copy bakes a bit. // Resolve default to a specific Kind, since we use different algorithms: if (kind == hipMemcpyDefault) { @@ -1753,7 +1780,7 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, device->_copy_signal); if (hsa_status == HSA_STATUS_SUCCESS) { - hsa_signal_wait_relaxed(device->_copy_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_relaxed(device->_copy_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); } device->_copy_lock[1].unlock(); @@ -1786,7 +1813,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind } else { e = hipErrorInvalidResourceHandle; } - + #else // TODO-hsart - what synchronization does hsa_copy provide? From 7309e9ea6a4a45a1ea1f3ba828e667e80340f8df Mon Sep 17 00:00:00 2001 From: pensun Date: Tue, 16 Feb 2016 10:00:05 -0600 Subject: [PATCH 2/4] modify to add remove invalid devices numbers --- hipamd/src/hip_hcc.cpp | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 24d6c20dfd..c955629ed6 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -32,6 +32,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -64,7 +65,7 @@ int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ int HIP_STAGING_BUFFERS = 2; int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ // vector of integers that contains the visible device IDs -std::vector HIP_VISIBLE_DEVICES_IDS; +std::vector g_hip_visible_devices; #define TRACE_API 0x1 /* trace API calls and return values */ #define TRACE_SYNC 0x2 /* trace synchronization pieces */ @@ -484,20 +485,23 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which // contains a sequence of comma-separated device IDs if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ - // Parse the string stream of env and store the device ids to HIP_VISIBLE_DEVICES_IDS global variable + // Parse the string stream of env and store the device ids to g_hip_visible_devices global variable std::string str = env; std::istringstream ss(str); std::string device_id; - while (std::getline(ss, device_id, ',')) { - HIP_VISIBLE_DEVICES_IDS.push_back(atoi(device_id.c_str())); + if (atoi(device_id.c_str()) >= 0) { + g_hip_visible_devices.push_back(atoi(device_id.c_str())); + }else// Any device number after invalid number will not present + break; } - // Print out the number of ids for debugging + + // Print out the number of ids if (HIP_PRINT_ENV) { - std::cout << "HIP visible device id is set to be: "; - for(int i=0;i= g_deviceCnt){ + // Make sure any DeviceID after invalid DeviceID will be erased. + g_hip_visible_devices.resize(i); + break; + } + } tprintf(TRACE_API, "pid=%u %-30s\n", getpid(), ""); From 43785243a50f5f368e965b2f99af15efac63c916 Mon Sep 17 00:00:00 2001 From: pensun Date: Wed, 17 Feb 2016 06:59:18 -0600 Subject: [PATCH 3/4] Implementation of HIP_VISIBLE_DEVICES in runtime --- hipamd/src/hip_hcc.cpp | 54 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index c955629ed6..ae0f00320f 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -64,8 +64,7 @@ int HIP_LAUNCH_BLOCKING = 0; int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ int HIP_STAGING_BUFFERS = 2; int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ -// vector of integers that contains the visible device IDs -std::vector g_hip_visible_devices; +std::vector g_hip_visible_devices; /* vector of integers that contains the visible device IDs */ #define TRACE_API 0x1 /* trace API calls and return values */ #define TRACE_SYNC 0x2 /* trace synchronization pieces */ @@ -489,14 +488,16 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c std::string str = env; std::istringstream ss(str); std::string device_id; + // Clean up the defult value + g_hip_visible_devices.clear(); + // Read the visible device numbers while (std::getline(ss, device_id, ',')) { if (atoi(device_id.c_str()) >= 0) { g_hip_visible_devices.push_back(atoi(device_id.c_str())); }else// Any device number after invalid number will not present break; } - - // Print out the number of ids + // Print out the number of ids if (HIP_PRINT_ENV) { printf ("%-30s = ", var_name1); for(int i=0;i= g_deviceCnt){ + if(g_hip_visible_devices[i] >= deviceCnt){ // Make sure any DeviceID after invalid DeviceID will be erased. g_hip_visible_devices.resize(i); break; } } + g_devices = new ihipDevice_t[deviceCnt]; + g_deviceCnt = 0; + for (int i=0; i"); } @@ -596,6 +608,12 @@ INLINE bool ihipIsValidDevice(unsigned deviceIndex) return (deviceIndex < g_deviceCnt); } +// check if the device ID is set as visible +INLINE bool ihipIsVisibleDevice(unsigned deviceIndex) +{ + return std::find(g_hip_visible_devices.begin(), g_hip_visible_devices.end(), + (int)deviceIndex) != g_hip_visible_devices.end(); +} //--- INLINE ihipDevice_t *ihipGetTlsDefaultDevice() From c1da0f1e129d47762f114b37f4cadcfb63edb3cb Mon Sep 17 00:00:00 2001 From: pensun Date: Wed, 17 Feb 2016 09:24:39 -0600 Subject: [PATCH 4/4] 1. Bug fix 2. passed initial tests on different sets of HIP_VISIBLE_DEVICES: (0),(1),(0,1),(1,2),(2,3),(1,2,3),(2,3,4),(1,5,2,3) and achieved expected choice of GPU devices at the runtime. 3. Passed HIP test suite. --- hipamd/src/hip_hcc.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index ae0f00320f..1f2f6f0b3b 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -33,6 +33,7 @@ THE SOFTWARE. #include #include #include +#include #include #include @@ -580,15 +581,19 @@ void ihipInit() g_deviceCnt = 0; for (int i=0; i