fix query of memoryClockRate and memoryBusWidth for both NV and HCC path

[ROCm/hip commit: 535de2ecc1]
Этот коммит содержится в:
pensun
2016-03-25 09:24:08 -05:00
родитель fca3c4b260
Коммит fa114274ac
2 изменённых файлов: 25 добавлений и 23 удалений
+15 -13
Просмотреть файл
@@ -43,7 +43,7 @@ hipMemcpyHostToHost
// hipErrorNoDevice.
/*typedef enum hipTextureFilterMode
/*typedef enum hipTextureFilterMode
{
hipFilterModePoint = cudaFilterModePoint, ///< Point filter mode.
//! @warning cudaFilterModeLinear is not supported.
@@ -76,7 +76,7 @@ default:
return hipErrorUnknown;
}
}
// TODO match the error enum names of hip and cuda
// TODO match the error enum names of hip and cuda
inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
switch(hError) {
case hipSuccess:
@@ -214,9 +214,11 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ;
p_prop->computeMode = cdprop.computeMode ;
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
p_prop->memoryClockRate = cdprop.memoryClockRate;
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
// Same as clock-rate:
p_prop->clockInstructionRate = cdprop.clockRate;
p_prop->clockInstructionRate = cdprop.clockRate;
int ccVers = p_prop->major*100 + p_prop->minor * 10;
@@ -253,7 +255,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
{
cudaDeviceAttr cdattr;
cudaError_t cerror;
switch (attr) {
case hipDeviceAttributeMaxThreadsPerBlock:
cdattr = cudaDevAttrMaxThreadsPerBlock; break;
@@ -344,7 +346,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event)
{
return hipCUDAErrorTohipError(cudaEventCreate(event));
}
inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL)
{
return hipCUDAErrorTohipError(cudaEventRecord(event,stream));
@@ -377,18 +379,18 @@ inline static hipError_t hipStreamCreate(hipStream_t *stream)
return hipCUDAErrorTohipError(cudaStreamCreate(stream));
}
inline static hipError_t hipStreamSynchronize(hipStream_t stream)
inline static hipError_t hipStreamSynchronize(hipStream_t stream)
{
return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
}
inline static hipError_t hipStreamDestroy(hipStream_t stream)
inline static hipError_t hipStreamDestroy(hipStream_t stream)
{
return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
}
inline static hipError_t hipDriverGetVersion(int *driverVersion)
inline static hipError_t hipDriverGetVersion(int *driverVersion)
{
cudaError_t err = cudaDriverGetVersion(driverVersion);
@@ -443,11 +445,11 @@ inline static hipError_t hipBindTexture(size_t *offset,
}
template <class T, int dim, enum cudaTextureReadMode readMode>
inline static hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> *tex,
const void *devPtr,
const struct hipChannelFormatDesc *desc,
size_t size=UINT_MAX)
inline static hipError_t hipBindTexture(size_t *offset,
struct texture<T, dim, readMode> *tex,
const void *devPtr,
const struct hipChannelFormatDesc *desc,
size_t size=UINT_MAX)
{
return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
}
+10 -10
Просмотреть файл
@@ -221,7 +221,7 @@ ihipSignal_t *ihipStream_t::allocSignal()
SIGSEQNUM oldSigId = _signalPool[thisCursor]._sig_id;
_signalPool[thisCursor]._index = thisCursor;
_signalPool[thisCursor]._sig_id = ++_stream_sig_id; // allocate it.
tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n",
tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n",
_signalPool[thisCursor]._sig_id,
thisCursor, oldSigId, _oldest_live_sig_id);
@@ -627,12 +627,12 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
// Get Max memory clock frequency
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
DeviceErrorCheck(err);
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
// Get global memory bus width in bits
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
DeviceErrorCheck(err);
// Set feature flags - these are all mandatory for HIP on HCC path:
@@ -676,7 +676,7 @@ void ihipDevice_t::syncDefaultStream(bool waitOnSelf)
for (auto streamI=_streams.begin(); streamI!=_streams.end(); streamI++) {
ihipStream_t *stream = *streamI;
// Don't wait for streams that have "opted-out" of syncing with NULL stream.
// And - don't wait for the NULL stream
if (!(stream->_flags & hipStreamNonBlocking)) {
@@ -769,7 +769,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
#endif
// Determines if the given agent is of type HSA_DEVICE_TYPE_GPU and counts it.
static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
{
hsa_device_type_t device_type;
hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
@@ -794,9 +794,9 @@ static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
void ihipInit()
{
#if COMPILE_TRACE_MARKER
#if COMPILE_TRACE_MARKER
amdtInitializeActivityLogger();
amdtScopedMarker("ihipInit", "HIP", NULL);
amdtScopedMarker("ihipInit", "HIP", NULL);
#endif
/*
* Environment variables
@@ -942,7 +942,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
tprintf(DB_SYNC, "stream %p wait default stream\n", stream);
stream->getDevice()->_default_stream->wait();
}
return stream;
}
}
@@ -1138,7 +1138,7 @@ void ihipStream_t::copySync(void* dst, const void* src, size_t sizeBytes, unsign
hc::am_copy(dst, src, sizeBytes);
#endif
}
} else if (kind == hipMemcpyHostToHost) {
} else if (kind == hipMemcpyHostToHost) {
int depSignalCnt = preCopyCommand(NULL, &depSignal, ihipCommandCopyH2H);
if (depSignalCnt) {
@@ -1207,7 +1207,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
if (!dstTracked || !srcTracked) {
trueAsync = false;