From fa114274acbd541d14004da4e53ade63164e49bb Mon Sep 17 00:00:00 2001 From: pensun Date: Fri, 25 Mar 2016 09:24:08 -0500 Subject: [PATCH] fix query of memoryClockRate and memoryBusWidth for both NV and HCC path [ROCm/hip commit: 535de2ecc119de75a87a87bb3109ce210c9a0445] --- .../hip/include/nvcc_detail/hip_runtime_api.h | 28 ++++++++++--------- projects/hip/src/hip_hcc.cpp | 20 ++++++------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/projects/hip/include/nvcc_detail/hip_runtime_api.h b/projects/hip/include/nvcc_detail/hip_runtime_api.h index 89b5a2dfee..e5cfcf597f 100644 --- a/projects/hip/include/nvcc_detail/hip_runtime_api.h +++ b/projects/hip/include/nvcc_detail/hip_runtime_api.h @@ -43,7 +43,7 @@ hipMemcpyHostToHost // hipErrorNoDevice. -/*typedef enum hipTextureFilterMode +/*typedef enum hipTextureFilterMode { hipFilterModePoint = cudaFilterModePoint, ///< Point filter mode. //! @warning cudaFilterModeLinear is not supported. @@ -76,7 +76,7 @@ default: return hipErrorUnknown; } } -// TODO match the error enum names of hip and cuda +// TODO match the error enum names of hip and cuda inline static cudaError_t hipErrorToCudaError(hipError_t hError) { switch(hError) { case hipSuccess: @@ -214,9 +214,11 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ; p_prop->computeMode = cdprop.computeMode ; p_prop->canMapHostMemory = cdprop.canMapHostMemory; + p_prop->memoryClockRate = cdprop.memoryClockRate; + p_prop->memoryBusWidth = cdprop.memoryBusWidth; // Same as clock-rate: - p_prop->clockInstructionRate = cdprop.clockRate; + p_prop->clockInstructionRate = cdprop.clockRate; int ccVers = p_prop->major*100 + p_prop->minor * 10; @@ -253,7 +255,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att { cudaDeviceAttr cdattr; cudaError_t cerror; - + switch (attr) { case hipDeviceAttributeMaxThreadsPerBlock: cdattr = cudaDevAttrMaxThreadsPerBlock; break; @@ -344,7 +346,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event) { return hipCUDAErrorTohipError(cudaEventCreate(event)); } - + inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL) { return hipCUDAErrorTohipError(cudaEventRecord(event,stream)); @@ -377,18 +379,18 @@ inline static hipError_t hipStreamCreate(hipStream_t *stream) return hipCUDAErrorTohipError(cudaStreamCreate(stream)); } -inline static hipError_t hipStreamSynchronize(hipStream_t stream) +inline static hipError_t hipStreamSynchronize(hipStream_t stream) { return hipCUDAErrorTohipError(cudaStreamSynchronize(stream)); } -inline static hipError_t hipStreamDestroy(hipStream_t stream) +inline static hipError_t hipStreamDestroy(hipStream_t stream) { return hipCUDAErrorTohipError(cudaStreamDestroy(stream)); } -inline static hipError_t hipDriverGetVersion(int *driverVersion) +inline static hipError_t hipDriverGetVersion(int *driverVersion) { cudaError_t err = cudaDriverGetVersion(driverVersion); @@ -443,11 +445,11 @@ inline static hipError_t hipBindTexture(size_t *offset, } template -inline static hipError_t hipBindTexture(size_t *offset, - struct texture *tex, - const void *devPtr, - const struct hipChannelFormatDesc *desc, - size_t size=UINT_MAX) +inline static hipError_t hipBindTexture(size_t *offset, + struct texture *tex, + const void *devPtr, + const struct hipChannelFormatDesc *desc, + size_t size=UINT_MAX) { return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size)); } diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 0631cc6814..1c93ae48dd 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -221,7 +221,7 @@ ihipSignal_t *ihipStream_t::allocSignal() SIGSEQNUM oldSigId = _signalPool[thisCursor]._sig_id; _signalPool[thisCursor]._index = thisCursor; _signalPool[thisCursor]._sig_id = ++_stream_sig_id; // allocate it. - tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n", + tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n", _signalPool[thisCursor]._sig_id, thisCursor, oldSigId, _oldest_live_sig_id); @@ -627,12 +627,12 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop) prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem; // Get Max memory clock frequency - //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate); DeviceErrorCheck(err); prop->memoryClockRate *= 1000.0; // convert Mhz to Khz. // Get global memory bus width in bits - //err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); + err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth); DeviceErrorCheck(err); // Set feature flags - these are all mandatory for HIP on HCC path: @@ -676,7 +676,7 @@ void ihipDevice_t::syncDefaultStream(bool waitOnSelf) for (auto streamI=_streams.begin(); streamI!=_streams.end(); streamI++) { ihipStream_t *stream = *streamI; - + // Don't wait for streams that have "opted-out" of syncing with NULL stream. // And - don't wait for the NULL stream if (!(stream->_flags & hipStreamNonBlocking)) { @@ -769,7 +769,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c #endif // Determines if the given agent is of type HSA_DEVICE_TYPE_GPU and counts it. -static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) +static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) { hsa_device_type_t device_type; hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); @@ -794,9 +794,9 @@ static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data) void ihipInit() { -#if COMPILE_TRACE_MARKER +#if COMPILE_TRACE_MARKER amdtInitializeActivityLogger(); - amdtScopedMarker("ihipInit", "HIP", NULL); + amdtScopedMarker("ihipInit", "HIP", NULL); #endif /* * Environment variables @@ -942,7 +942,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream) tprintf(DB_SYNC, "stream %p wait default stream\n", stream); stream->getDevice()->_default_stream->wait(); } - + return stream; } } @@ -1138,7 +1138,7 @@ void ihipStream_t::copySync(void* dst, const void* src, size_t sizeBytes, unsign hc::am_copy(dst, src, sizeBytes); #endif } - } else if (kind == hipMemcpyHostToHost) { + } else if (kind == hipMemcpyHostToHost) { int depSignalCnt = preCopyCommand(NULL, &depSignal, ihipCommandCopyH2H); if (depSignalCnt) { @@ -1207,7 +1207,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS); - // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. + // "tracked" really indicates if the pointer's virtual address is available in the GPU address space. // If both pointers are not tracked, we need to fall back to a sync copy. if (!dstTracked || !srcTracked) { trueAsync = false;