From e30a4d78196da005781f3c1f4f43d65eacafb7e5 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Mon, 7 Mar 2016 11:01:29 +0530 Subject: [PATCH 1/5] Update release notes --- hipamd/RELEASE.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hipamd/RELEASE.md b/hipamd/RELEASE.md index e5a0e61d13..4b1dec0cbd 100644 --- a/hipamd/RELEASE.md +++ b/hipamd/RELEASE.md @@ -25,6 +25,24 @@ Next: ## Revision History: +=================================================================================================== +Release:0.82.00 +Date: 2016.03.07 +- Bump minimum required HCC workweek to 16074. +- Enable multi-GPU support. + * Use hipSetDevice to select a device for subsequent kernel calls and memory allocations. + * CUDA_VISIBLE_DEVICES / HIP_VISIBLE_DEVICE environment variable selects devices visible to the runtime. +- Support hipStreams – send sequences of copy and kernel commands to a device. + * Asynchronous copies supported. +- Optimize memory copy operations. +- Support hipPointerGetAttribute – can determine if a pointer is host or device. +- Enable atomics to local memory. +- Support for LC Direct-To-ISA path. +- Improved free memory reporting. + * hipMemGetInfo (report full memory used in current process). + * hipDeviceReset (deletes all memory allocated by current process). + + =================================================================================================== Release:0.80.01 Date: 2016.02.18 From ac57672eaae00288e6b203f3ac23fd6a52cdd647 Mon Sep 17 00:00:00 2001 From: Maneesh Gupta Date: Tue, 8 Mar 2016 11:14:03 +0530 Subject: [PATCH 2/5] Fix release notes and minimum required hcc workweek check --- hipamd/RELEASE.md | 1 + hipamd/include/hcc_detail/hip_runtime_api.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hipamd/RELEASE.md b/hipamd/RELEASE.md index 4b1dec0cbd..8041657b8e 100644 --- a/hipamd/RELEASE.md +++ b/hipamd/RELEASE.md @@ -29,6 +29,7 @@ Next: Release:0.82.00 Date: 2016.03.07 - Bump minimum required HCC workweek to 16074. +- Bump minimum required ROCK-Kernel-Driver and ROCR-Runtime to Developer Preview 2. - Enable multi-GPU support. * Use hipSetDevice to select a device for subsequent kernel calls and memory allocations. * CUDA_VISIBLE_DEVICES / HIP_VISIBLE_DEVICE environment variable selects devices visible to the runtime. diff --git a/hipamd/include/hcc_detail/hip_runtime_api.h b/hipamd/include/hcc_detail/hip_runtime_api.h index 92aef3d213..22247af4b5 100644 --- a/hipamd/include/hcc_detail/hip_runtime_api.h +++ b/hipamd/include/hcc_detail/hip_runtime_api.h @@ -33,7 +33,7 @@ THE SOFTWARE. #include #include "hip_hcc.h" -#if defined (__HCC__) && (__hcc_workweek__ < 1602) +#if defined (__HCC__) && (__hcc_workweek__ < 16074) #error("This version of HIP requires a newer version of HCC."); #endif From 65510d6e4cd5b496e2eae328d62b2da35b86405f Mon Sep 17 00:00:00 2001 From: Aditya Atluri Date: Fri, 25 Mar 2016 05:49:33 -0500 Subject: [PATCH 3/5] added functional tests for hip device apis --- hipamd/tests/src/CMakeLists.txt | 6 +++ hipamd/tests/src/hipFuncDeviceSynchronize.cpp | 51 +++++++++++++++++++ hipamd/tests/src/hipFuncGetDevice.cpp | 38 ++++++++++++++ hipamd/tests/src/hipFuncSetDevice.cpp | 30 +++++++++++ 4 files changed, 125 insertions(+) create mode 100644 hipamd/tests/src/hipFuncDeviceSynchronize.cpp create mode 100644 hipamd/tests/src/hipFuncGetDevice.cpp create mode 100644 hipamd/tests/src/hipFuncSetDevice.cpp diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index 3846c1c287..e5adf4e4d3 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -152,6 +152,8 @@ make_hip_executable (hipHostRegister hipHostRegister.cpp) make_hip_executable (hipRandomMemcpyAsync hipRandomMemcpyAsync.cpp) make_hip_executable (hipMemoryAllocate hipMemoryAllocate.cpp) make_hip_executable (hipFuncSetDeviceFlags hipFuncSetDeviceFlags.cpp) +make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp) +make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp) make_test(hip_ballot " " ) make_test(hip_anyall " " ) @@ -190,4 +192,8 @@ make_test(hipRandomMemcpyAsync " ") #make_test(hipAPIStreamDisable " ") make_test(hipMemoryAllocate " ") make_test(hipFuncSetDeviceFlags " ") +make_test(hipFuncGetDevice " ") +make_test(hipFuncSetDevice " ") + + make_hipify_test(specialFunc.cu ) diff --git a/hipamd/tests/src/hipFuncDeviceSynchronize.cpp b/hipamd/tests/src/hipFuncDeviceSynchronize.cpp new file mode 100644 index 0000000000..306136d1a9 --- /dev/null +++ b/hipamd/tests/src/hipFuncDeviceSynchronize.cpp @@ -0,0 +1,51 @@ +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include"test_common.h" + +#define _SIZE sizeof(int)*1024*1024 +#define NUM_STREAMS 10 + +__global__ void Iter(hipLaunchParm lp, int *Ad, int num){ +int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; +if(tx == 0){ +for(int i = 0; i Date: Fri, 25 Mar 2016 06:41:49 -0500 Subject: [PATCH 4/5] updated hipdevicesync test --- hipamd/tests/src/CMakeLists.txt | 3 +- hipamd/tests/src/hipFuncDeviceSynchronize.cpp | 29 ++++++++++++++----- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/hipamd/tests/src/CMakeLists.txt b/hipamd/tests/src/CMakeLists.txt index e5adf4e4d3..acd2060647 100644 --- a/hipamd/tests/src/CMakeLists.txt +++ b/hipamd/tests/src/CMakeLists.txt @@ -154,6 +154,7 @@ make_hip_executable (hipMemoryAllocate hipMemoryAllocate.cpp) make_hip_executable (hipFuncSetDeviceFlags hipFuncSetDeviceFlags.cpp) make_hip_executable (hipFuncGetDevice hipFuncGetDevice.cpp) make_hip_executable (hipFuncSetDevice hipFuncSetDevice.cpp) +make_hip_executable (hipFuncDeviceSynchronize hipFuncDeviceSynchronize.cpp) make_test(hip_ballot " " ) make_test(hip_anyall " " ) @@ -194,6 +195,6 @@ make_test(hipMemoryAllocate " ") make_test(hipFuncSetDeviceFlags " ") make_test(hipFuncGetDevice " ") make_test(hipFuncSetDevice " ") - +make_test(hipFuncDeviceSynchronize " ") make_hipify_test(specialFunc.cu ) diff --git a/hipamd/tests/src/hipFuncDeviceSynchronize.cpp b/hipamd/tests/src/hipFuncDeviceSynchronize.cpp index 306136d1a9..bb274f47bc 100644 --- a/hipamd/tests/src/hipFuncDeviceSynchronize.cpp +++ b/hipamd/tests/src/hipFuncDeviceSynchronize.cpp @@ -17,18 +17,23 @@ OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/* + * Test for checking the functionality of + * hipError_t hipDeviceSynchronize(); +*/ + #include"test_common.h" #define _SIZE sizeof(int)*1024*1024 -#define NUM_STREAMS 10 +#define NUM_STREAMS 2 __global__ void Iter(hipLaunchParm lp, int *Ad, int num){ -int tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; -if(tx == 0){ -for(int i = 0; i Date: Fri, 25 Mar 2016 09:24:08 -0500 Subject: [PATCH 5/5] fix query of memoryClockRate and memoryBusWidth for both NV and HCC path --- hipamd/include/nvcc_detail/hip_runtime_api.h | 28 +++++++++++--------- hipamd/src/hip_hcc.cpp | 20 +++++++------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/hipamd/include/nvcc_detail/hip_runtime_api.h b/hipamd/include/nvcc_detail/hip_runtime_api.h index 89b5a2dfee..e5cfcf597f 100644 --- a/hipamd/include/nvcc_detail/hip_runtime_api.h +++ b/hipamd/include/nvcc_detail/hip_runtime_api.h @@ -43,7 +43,7 @@ hipMemcpyHostToHost // hipErrorNoDevice. -/*typedef enum hipTextureFilterMode +/*typedef enum hipTextureFilterMode { hipFilterModePoint = cudaFilterModePoint, ///< Point filter mode. //! @warning cudaFilterModeLinear is not supported. @@ -76,7 +76,7 @@ default: return hipErrorUnknown; } } -// TODO match the error enum names of hip and cuda +// TODO match the error enum names of hip and cuda inline static cudaError_t hipErrorToCudaError(hipError_t hError) { switch(hError) { case hipSuccess: @@ -214,9 +214,11 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ; p_prop->computeMode = cdprop.computeMode ; p_prop->canMapHostMemory = cdprop.canMapHostMemory; + p_prop->memoryClockRate = cdprop.memoryClockRate; + p_prop->memoryBusWidth = cdprop.memoryBusWidth; // Same as clock-rate: - p_prop->clockInstructionRate = cdprop.clockRate; + p_prop->clockInstructionRate = cdprop.clockRate; int ccVers = p_prop->major*100 + p_prop->minor * 10; @@ -253,7 +255,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att { cudaDeviceAttr cdattr; cudaError_t cerror; - + switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: cdattr = cudaDevAttrMaxThreadsPerBlock; break; @@ -344,7 +346,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event) { return hipCUDAErrorTohipError(cudaEventCreate(event)); } - + inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL) { return hipCUDAErrorTohipError(cudaEventRecord(event,stream)); @@ -377,18 +379,18 @@ inline static hipError_t hipStreamCreate(hipStream_t *stream) return hipCUDAErrorTohipError(cudaStreamCreate(stream)); } -inline static hipError_t hipStreamSynchronize(hipStream_t stream) +inline static hipError_t hipStreamSynchronize(hipStream_t stream) { return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); } -inline static hipError_t hipStreamDestroy(hipStream_t stream) +inline static hipError_t hipStreamDestroy(hipStream_t stream) { return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); } -inline static hipError_t hipDriverGetVersion(int *driverVersion) +inline static hipError_t hipDriverGetVersion(int *driverVersion) { cudaError_t err = cudaDriverGetVersion(driverVersion); @@ -443,11 +445,11 @@ inline static hipError_t hipBindTexture(size_t *offset, } template -inline static hipError_t hipBindTexture(size_t *offset, - struct texture *tex, - const void *devPtr, - const struct hipChannelFormatDesc *desc, - size_t size=UINT_MAX) +inline static hipError_t hipBindTexture(size_t *offset, + struct texture *tex, + const void *devPtr, + const struct hipChannelFormatDesc *desc, + size_t size=UINT_MAX) { return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); } diff --git a/hipamd/src/hip_hcc.cpp b/hipamd/src/hip_hcc.cpp index 0631cc6814..1c93ae48dd 100644 --- a/hipamd/src/hip_hcc.cpp +++ b/hipamd/src/hip_hcc.cpp @@ -221,7 +221,7 @@ ihipSignal_t *ihipStream_t::allocSignal() SIGSEQNUM oldSigId = _signalPool[thisCursor]._sig_id; _signalPool[thisCursor]._index = thisCursor; _signalPool[thisCursor]._sig_id = ++_stream_sig_id; // allocate it. - tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n", + tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n", _signalPool[thisCursor]._sig_id, thisCursor, oldSigId, _oldest_live_sig_id); @@ -627,12 +627,12 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Get Max memory clock frequency - //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); DeviceErrorCheck(err); prop->memoryClockRate *= 1000.0; // convert Mhz to Khz. // Get global memory bus width in bits - //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); DeviceErrorCheck(err); // Set feature flags - these are all mandatory for HIP on HCC path: @@ -676,7 +676,7 @@ void ihipDevice_t::syncDefaultStream(bool waitOnSelf) for (auto streamI=_streams.begin(); streamI!=_streams.end(); streamI++) { ihipStream_t *stream = *streamI; - + // Don't wait for streams that have "opted-out" of syncing with NULL stream. // And - don't wait for the NULL stream if (!(stream->_flags & hipStreamNonBlocking)) { @@ -769,7 +769,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c #endif // Determines if the given agent is of type HSA_DEVICE_TYPE_GPU and counts it. -static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) +static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) { hsa_device_type_t device_type; hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); @@ -794,9 +794,9 @@ static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) void ihipInit() { -#if COMPILE_TRACE_MARKER +#if COMPILE_TRACE_MARKER amdtInitializeActivityLogger(); - amdtScopedMarker("ihipInit", "HIP", NULL); + amdtScopedMarker("ihipInit", "HIP", NULL); #endif /* * Environment variables @@ -942,7 +942,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) tprintf(DB_SYNC, "stream %p wait default stream\n", stream); stream->getDevice()->_default_stream->wait(); } - + return stream; } } @@ -1138,7 +1138,7 @@ void ihipStream_t::copySync(void* dst, const void* src, size_t sizeBytes, unsign hc::am_copy(dst, src, sizeBytes); #endif } - } else if (kind == hipMemcpyHostToHost) { + } else if (kind == hipMemcpyHostToHost) { int depSignalCnt = preCopyCommand(NULL, &depSignal, ihipCommandCopyH2H); if (depSignalCnt) { @@ -1207,7 +1207,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); - // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. + // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. if (!dstTracked || !srcTracked) { trueAsync = false;