fix query of memoryClockRate and memoryBusWidth for both NV and HCC path
[ROCm/hip commit: 535de2ecc1]
Этот коммит содержится в:
@@ -43,7 +43,7 @@ hipMemcpyHostToHost
|
||||
|
||||
// hipErrorNoDevice.
|
||||
|
||||
/*typedef enum hipTextureFilterMode
|
||||
/*typedef enum hipTextureFilterMode
|
||||
{
|
||||
hipFilterModePoint = cudaFilterModePoint, ///< Point filter mode.
|
||||
//! @warning cudaFilterModeLinear is not supported.
|
||||
@@ -76,7 +76,7 @@ default:
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
}
|
||||
// TODO match the error enum names of hip and cuda
|
||||
// TODO match the error enum names of hip and cuda
|
||||
inline static cudaError_t hipErrorToCudaError(hipError_t hError) {
|
||||
switch(hError) {
|
||||
case hipSuccess:
|
||||
@@ -214,9 +214,11 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t *p_prop, int dev
|
||||
p_prop->maxThreadsPerMultiProcessor = cdprop.maxThreadsPerMultiProcessor ;
|
||||
p_prop->computeMode = cdprop.computeMode ;
|
||||
p_prop->canMapHostMemory = cdprop.canMapHostMemory;
|
||||
p_prop->memoryClockRate = cdprop.memoryClockRate;
|
||||
p_prop->memoryBusWidth = cdprop.memoryBusWidth;
|
||||
|
||||
// Same as clock-rate:
|
||||
p_prop->clockInstructionRate = cdprop.clockRate;
|
||||
p_prop->clockInstructionRate = cdprop.clockRate;
|
||||
|
||||
int ccVers = p_prop->major*100 + p_prop->minor * 10;
|
||||
|
||||
@@ -253,7 +255,7 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
{
|
||||
cudaDeviceAttr cdattr;
|
||||
cudaError_t cerror;
|
||||
|
||||
|
||||
switch (attr) {
|
||||
case hipDeviceAttributeMaxThreadsPerBlock:
|
||||
cdattr = cudaDevAttrMaxThreadsPerBlock; break;
|
||||
@@ -344,7 +346,7 @@ inline static hipError_t hipEventCreate( hipEvent_t* event)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaEventCreate(event));
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipEventRecord( hipEvent_t event, hipStream_t stream = NULL)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaEventRecord(event,stream));
|
||||
@@ -377,18 +379,18 @@ inline static hipError_t hipStreamCreate(hipStream_t *stream)
|
||||
return hipCUDAErrorTohipError(cudaStreamCreate(stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
inline static hipError_t hipStreamSynchronize(hipStream_t stream)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
inline static hipError_t hipStreamDestroy(hipStream_t stream)
|
||||
inline static hipError_t hipStreamDestroy(hipStream_t stream)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
|
||||
inline static hipError_t hipDriverGetVersion(int *driverVersion)
|
||||
inline static hipError_t hipDriverGetVersion(int *driverVersion)
|
||||
{
|
||||
cudaError_t err = cudaDriverGetVersion(driverVersion);
|
||||
|
||||
@@ -443,11 +445,11 @@ inline static hipError_t hipBindTexture(size_t *offset,
|
||||
}
|
||||
|
||||
template <class T, int dim, enum cudaTextureReadMode readMode>
|
||||
inline static hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode> *tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc *desc,
|
||||
size_t size=UINT_MAX)
|
||||
inline static hipError_t hipBindTexture(size_t *offset,
|
||||
struct texture<T, dim, readMode> *tex,
|
||||
const void *devPtr,
|
||||
const struct hipChannelFormatDesc *desc,
|
||||
size_t size=UINT_MAX)
|
||||
{
|
||||
return hipCUDAErrorTohipError(cudaBindTexture(offset, tex, devPtr, desc, size));
|
||||
}
|
||||
|
||||
@@ -221,7 +221,7 @@ ihipSignal_t *ihipStream_t::allocSignal()
|
||||
SIGSEQNUM oldSigId = _signalPool[thisCursor]._sig_id;
|
||||
_signalPool[thisCursor]._index = thisCursor;
|
||||
_signalPool[thisCursor]._sig_id = ++_stream_sig_id; // allocate it.
|
||||
tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n",
|
||||
tprintf(DB_SIGNAL, "allocatSignal #%lu at pos:%i (old sigId:%lu < oldest_live:%lu)\n",
|
||||
_signalPool[thisCursor]._sig_id,
|
||||
thisCursor, oldSigId, _oldest_live_sig_id);
|
||||
|
||||
@@ -627,12 +627,12 @@ hipError_t ihipDevice_t::getProperties(hipDeviceProp_t* prop)
|
||||
prop->maxSharedMemoryPerMultiProcessor = prop->totalGlobalMem;
|
||||
|
||||
// Get Max memory clock frequency
|
||||
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
|
||||
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_MAX_CLOCK_FREQUENCY, &prop->memoryClockRate);
|
||||
DeviceErrorCheck(err);
|
||||
prop->memoryClockRate *= 1000.0; // convert Mhz to Khz.
|
||||
|
||||
// Get global memory bus width in bits
|
||||
//err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
|
||||
err = hsa_region_get_info(*am_region, (hsa_region_info_t)HSA_AMD_REGION_INFO_BUS_WIDTH, &prop->memoryBusWidth);
|
||||
DeviceErrorCheck(err);
|
||||
|
||||
// Set feature flags - these are all mandatory for HIP on HCC path:
|
||||
@@ -676,7 +676,7 @@ void ihipDevice_t::syncDefaultStream(bool waitOnSelf)
|
||||
|
||||
for (auto streamI=_streams.begin(); streamI!=_streams.end(); streamI++) {
|
||||
ihipStream_t *stream = *streamI;
|
||||
|
||||
|
||||
// Don't wait for streams that have "opted-out" of syncing with NULL stream.
|
||||
// And - don't wait for the NULL stream
|
||||
if (!(stream->_flags & hipStreamNonBlocking)) {
|
||||
@@ -769,7 +769,7 @@ void ihipReadEnv_I(int *var_ptr, const char *var_name1, const char *var_name2, c
|
||||
#endif
|
||||
|
||||
// Determines if the given agent is of type HSA_DEVICE_TYPE_GPU and counts it.
|
||||
static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
|
||||
static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
|
||||
{
|
||||
hsa_device_type_t device_type;
|
||||
hsa_status_t status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
|
||||
@@ -794,9 +794,9 @@ static hsa_status_t findCpuAgent(hsa_agent_t agent, void *data)
|
||||
void ihipInit()
|
||||
{
|
||||
|
||||
#if COMPILE_TRACE_MARKER
|
||||
#if COMPILE_TRACE_MARKER
|
||||
amdtInitializeActivityLogger();
|
||||
amdtScopedMarker("ihipInit", "HIP", NULL);
|
||||
amdtScopedMarker("ihipInit", "HIP", NULL);
|
||||
#endif
|
||||
/*
|
||||
* Environment variables
|
||||
@@ -942,7 +942,7 @@ hipStream_t ihipSyncAndResolveStream(hipStream_t stream)
|
||||
tprintf(DB_SYNC, "stream %p wait default stream\n", stream);
|
||||
stream->getDevice()->_default_stream->wait();
|
||||
}
|
||||
|
||||
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
@@ -1138,7 +1138,7 @@ void ihipStream_t::copySync(void* dst, const void* src, size_t sizeBytes, unsign
|
||||
hc::am_copy(dst, src, sizeBytes);
|
||||
#endif
|
||||
}
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
} else if (kind == hipMemcpyHostToHost) {
|
||||
int depSignalCnt = preCopyCommand(NULL, &depSignal, ihipCommandCopyH2H);
|
||||
|
||||
if (depSignalCnt) {
|
||||
@@ -1207,7 +1207,7 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
|
||||
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
|
||||
|
||||
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
|
||||
// If both pointers are not tracked, we need to fall back to a sync copy.
|
||||
if (!dstTracked || !srcTracked) {
|
||||
trueAsync = false;
|
||||
|
||||
Ссылка в новой задаче
Block a user