Add missing texturePitchAlignment member to the hipDeviceProp_t struct. (#1802)
* Add missing texturePitchAlignment member to the hipDeviceProp_t struct.
* Add missing hipDeviceAttributeTexturePitchAlignment enumerator to the hipDeviceAttribute_t enum.
* Initialize texturePitchAlignment to 256. This works for gfx9+, but is technically overaligned in most cases for pre-gfx9.
* Add the texturePitchAlignment property to the NVCC path.
[ROCm/hip commit: f72a669487]
이 커밋은 다음에 포함됨:
@@ -124,6 +124,7 @@ typedef struct hipDeviceProp_t {
|
||||
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
|
||||
size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
|
||||
size_t textureAlignment; ///<Alignment requirement for textures
|
||||
size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
|
||||
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
|
||||
int ECCEnabled; ///<Device has ECC support enabled
|
||||
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
|
||||
@@ -321,6 +322,7 @@ typedef enum hipDeviceAttribute_t {
|
||||
|
||||
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
|
||||
hipDeviceAttributeTextureAlignment, ///<Alignment requirement for textures
|
||||
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
|
||||
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
|
||||
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
|
||||
hipDeviceAttributeEccEnabled ///<Device has ECC support enabled
|
||||
|
||||
@@ -1130,6 +1130,7 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev
|
||||
|
||||
p_prop->memPitch = cdprop.memPitch;
|
||||
p_prop->textureAlignment = cdprop.textureAlignment;
|
||||
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
|
||||
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
|
||||
p_prop->ECCEnabled = cdprop.ECCEnabled;
|
||||
p_prop->tccDriver = cdprop.tccDriver;
|
||||
@@ -1244,6 +1245,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
cdattr = cudaDevAttrTextureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTexturePitchAlignment:
|
||||
cdattr = cudaDevAttrTexturePitchAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
cdattr = cudaDevAttrKernelExecTimeout;
|
||||
break;
|
||||
|
||||
@@ -312,9 +312,12 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
|
||||
case hipDeviceAttributeMaxPitch:
|
||||
*pi = prop->memPitch;
|
||||
break;
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
case hipDeviceAttributeTextureAlignment:
|
||||
*pi = prop->textureAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeTexturePitchAlignment:
|
||||
*pi = prop->texturePitchAlignment;
|
||||
break;
|
||||
case hipDeviceAttributeKernelExecTimeout:
|
||||
*pi = prop->kernelExecTimeoutEnabled;
|
||||
break;
|
||||
|
||||
@@ -925,6 +925,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) {
|
||||
|
||||
prop->memPitch = INT_MAX; //Maximum pitch in bytes allowed by memory copies (hardcoded 128 bytes in hipMallocPitch)
|
||||
prop->textureAlignment = 0; //Alignment requirement for textures
|
||||
prop->texturePitchAlignment = IMAGE_PITCH_ALIGNMENT; //Alignment requirment for texture pitch
|
||||
prop->kernelExecTimeoutEnabled = 0; //no run time limit for running kernels on device
|
||||
|
||||
hsa_isa_t isa;
|
||||
|
||||
새 이슈에서 참조
사용자 차단