From 07e4d8261cfc094ce1afd99c29bfb5324e69a98d Mon Sep 17 00:00:00 2001 From: pensun Date: Tue, 16 Feb 2016 07:39:04 -0600 Subject: [PATCH] Implement to read HIP_VISIBLE_DEVICES to internal global variable [ROCm/clr commit: 45d863851d154c6151a91f8ae840e0bbd7897df7] --- projects/clr/hipamd/src/hip_hcc.cpp | 91 +++++++++++++++++++---------- 1 file changed, 59 insertions(+), 32 deletions(-) diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 4f95320ac3..24d6c20dfd 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -28,6 +28,7 @@ THE SOFTWARE. #include #include #include +#include #include #include #include @@ -60,7 +61,10 @@ int HIP_PRINT_ENV = 0; int HIP_TRACE_API= 0; int HIP_LAUNCH_BLOCKING = 0; int HIP_STAGING_SIZE = 64; /* size of staging buffers, in KB */ -int HIP_STAGING_BUFFERS = 2; +int HIP_STAGING_BUFFERS = 2; +int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */ +// vector of integers that contains the visible device IDs +std::vector HIP_VISIBLE_DEVICES_IDS; #define TRACE_API 0x1 /* trace API calls and return values */ #define TRACE_SYNC 0x2 /* trace synchronization pieces */ @@ -176,9 +180,9 @@ public: //================================================================================================= // -//Reset the device - this is called from hipDeviceReset. +//Reset the device - this is called from hipDeviceReset. //Device may be reset multiple times, and may be reset after init. -void ihipDevice_t::reset() +void ihipDevice_t::reset() { _staging_buffer[0] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); _staging_buffer[1] = new StagingBuffer(this, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS); @@ -477,17 +481,39 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c env = getenv(var_name2); } - // Default is set when variable is initialized (at top of this file), so only override if we find - // an environment variable. - if (env) { - long int v = strtol(env, NULL, 0); - *var_ptr = (int) (v); + // Check if the environment variable is either HIP_VISIBLE_DEVICES or CUDA_LAUNCH_BLOCKING, which + // contains a sequence of comma-separated device IDs + if (!(strcmp(var_name1,"HIP_VISIBLE_DEVICES") && strcmp(var_name2, "CUDA_VISIBLE_DEVICES")) && env){ + // Parse the string stream of env and store the device ids to HIP_VISIBLE_DEVICES_IDS global variable + std::string str = env; + std::istringstream ss(str); + std::string device_id; + + while (std::getline(ss, device_id, ',')) { + HIP_VISIBLE_DEVICES_IDS.push_back(atoi(device_id.c_str())); + } + // Print out the number of ids for debugging + if (HIP_PRINT_ENV) { + std::cout << "HIP visible device id is set to be: "; + for(int i=0;i _max_buffers ? _max_buffers : numBuffers) { - - + + for (int i=0; i<_numBuffers; i++) { // TODO - experiment with alignment here. _pinnedStagingBuffer[i] = hc::AM_alloc(_bufferSize, device->_acc, amHostPinned); @@ -1605,10 +1632,10 @@ StagingBuffer::~StagingBuffer() //--- -void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) +void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeBytes) { - const char *srcp = static_cast (src); - char *dstp = static_cast (dst); + const char *srcp = static_cast (src); + char *dstp = static_cast (dst); for (int i=0; i<_numBuffers; i++) { hsa_signal_store_relaxed(_completion_signal[i], 0); @@ -1621,7 +1648,7 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte size_t theseBytes = (bytesRemaining > _bufferSize) ? _bufferSize : bytesRemaining; tprintf (TRACE_COPY2, "waiting... on completion signal\n"); - hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); tprintf (TRACE_COPY2, "copy %zu bytes %p to stagingBuf[%d]:%p\n", theseBytes, srcp, bufferIndex, _pinnedStagingBuffer[bufferIndex]); // TODO - use uncached memcpy, someday. @@ -1632,7 +1659,7 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); hsa_status_t hsa_status = hsa_amd_memory_async_copy(dstp, _pinnedStagingBuffer[bufferIndex], theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); - assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw srcp += theseBytes; dstp += theseBytes; @@ -1643,15 +1670,15 @@ void StagingBuffer::CopyHostToDevice(void* dst, const void* src, size_t sizeByte for (int i=0; i<_numBuffers; i++) { - hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); } } //--- -void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeBytes) +void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeBytes) { - const char *srcp0 = static_cast (src); - char *dstp1 = static_cast (dst); + const char *srcp0 = static_cast (src); + char *dstp1 = static_cast (dst); for (int i=0; i<_numBuffers; i++) { hsa_signal_store_relaxed(_completion_signal[i], 0); @@ -1671,7 +1698,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte tprintf (TRACE_COPY2, "D2H: async_copy %zu bytes src:%p to staging:%p\n", theseBytes, srcp0, _pinnedStagingBuffer[bufferIndex]); hsa_signal_store_relaxed(_completion_signal[bufferIndex], 1); hsa_status_t hsa_status = hsa_amd_memory_async_copy(_pinnedStagingBuffer[bufferIndex], srcp0, theseBytes, _device->_hsa_agent, 0, NULL, _completion_signal[bufferIndex]); - assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw + assert(hsa_status == HSA_STATUS_SUCCESS); // TODO - throw srcp0 += theseBytes; } @@ -1682,7 +1709,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte size_t theseBytes = (bytesRemaining1 > _bufferSize) ? _bufferSize : bytesRemaining1; tprintf (TRACE_COPY2, "D2H: wait_completion[%d] bytesRemaining=%zu\n", bufferIndex, bytesRemaining1); - hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_acquire(_completion_signal[bufferIndex], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); tprintf (TRACE_COPY2, "D2H: copy %zu bytes stagingBuf[%d]:%p to dst:%p\n", theseBytes, bufferIndex, _pinnedStagingBuffer[bufferIndex], dstp1); memcpy(dstp1, _pinnedStagingBuffer[bufferIndex], theseBytes); @@ -1693,7 +1720,7 @@ void StagingBuffer::CopyDeviceToHost(void* dst, const void* src, size_t sizeByte //for (int i=0; i<_numBuffers; i++) { - // hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + // hsa_signal_wait_acquire(_completion_signal[i], HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); //} } @@ -1709,7 +1736,7 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size bool dstNotTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) != AM_SUCCESS); bool srcNotTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) != AM_SUCCESS); - bool useStagingBuffer = true; // TODO - remove when new copy bakes a bit. + bool useStagingBuffer = true; // TODO - remove when new copy bakes a bit. // Resolve default to a specific Kind, since we use different algorithms: if (kind == hipMemcpyDefault) { @@ -1753,7 +1780,7 @@ void ihipAsyncCopy(ihipDevice_t *device, void* dst, const void* src, size_t size hsa_status_t hsa_status = hsa_amd_memory_async_copy(dst, src, sizeBytes, device->_hsa_agent, 0, NULL, device->_copy_signal); if (hsa_status == HSA_STATUS_SUCCESS) { - hsa_signal_wait_relaxed(device->_copy_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); + hsa_signal_wait_relaxed(device->_copy_signal, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, HSA_WAIT_STATE_ACTIVE); } device->_copy_lock[1].unlock(); @@ -1786,7 +1813,7 @@ hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind } else { e = hipErrorInvalidResourceHandle; } - + #else // TODO-hsart - what synchronization does hsa_copy provide?