Add missing texturePitchAlignment member to the hipDeviceProp_t struct. (#1802)

* Add missing texturePitchAlignment member to the hipDeviceProp_t struct.

* Add missing hipDeviceAttributeTexturePitchAlignment enumerator to the hipDeviceAttribute_t enum.

* Initialize texturePitchAlignment to 256. This works for gfx9+, but is technically overaligned in most cases for pre-gfx9.

* Add the texturePitchAlignment property to the NVCC path.


[ROCm/hip commit: f72a669487]
이 커밋은 다음에 포함됨:
vsytch
2020-01-27 19:37:00 -05:00
커밋한 사람 Rahul Garg
부모 843711e222
커밋 1af35a6044
4개의 변경된 파일11개의 추가작업 그리고 1개의 파일을 삭제
+2
파일 보기
@@ -124,6 +124,7 @@ typedef struct hipDeviceProp_t {
unsigned int* hdpRegFlushCntl; ///< Addres of HDP_REG_COHERENCY_FLUSH_CNTL register
size_t memPitch; ///<Maximum pitch in bytes allowed by memory copies
size_t textureAlignment; ///<Alignment requirement for textures
size_t texturePitchAlignment; ///<Pitch alignment requirement for texture references bound to pitched memory
int kernelExecTimeoutEnabled; ///<Run time limit for kernels executed on the device
int ECCEnabled; ///<Device has ECC support enabled
int tccDriver; ///< 1:If device is Tesla device using TCC driver, else 0
@@ -321,6 +322,7 @@ typedef enum hipDeviceAttribute_t {
hipDeviceAttributeMaxPitch, ///< Maximum pitch in bytes allowed by memory copies
hipDeviceAttributeTextureAlignment, ///<Alignment requirement for textures
hipDeviceAttributeTexturePitchAlignment, ///<Pitch alignment requirement for 2D texture references bound to pitched memory;
hipDeviceAttributeKernelExecTimeout, ///<Run time limit for kernels executed on the device
hipDeviceAttributeCanMapHostMemory, ///<Device can map host memory into device address space
hipDeviceAttributeEccEnabled ///<Device has ECC support enabled
+4
파일 보기
@@ -1130,6 +1130,7 @@ inline static hipError_t hipGetDeviceProperties(hipDeviceProp_t* p_prop, int dev
p_prop->memPitch = cdprop.memPitch;
p_prop->textureAlignment = cdprop.textureAlignment;
p_prop->texturePitchAlignment = cdprop.texturePitchAlignment;
p_prop->kernelExecTimeoutEnabled = cdprop.kernelExecTimeoutEnabled;
p_prop->ECCEnabled = cdprop.ECCEnabled;
p_prop->tccDriver = cdprop.tccDriver;
@@ -1244,6 +1245,9 @@ inline static hipError_t hipDeviceGetAttribute(int* pi, hipDeviceAttribute_t att
case hipDeviceAttributeTextureAlignment:
cdattr = cudaDevAttrTextureAlignment;
break;
case hipDeviceAttributeTexturePitchAlignment:
cdattr = cudaDevAttrTexturePitchAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
cdattr = cudaDevAttrKernelExecTimeout;
break;
+4 -1
파일 보기
@@ -312,9 +312,12 @@ hipError_t ihipDeviceGetAttribute(int* pi, hipDeviceAttribute_t attr, int device
case hipDeviceAttributeMaxPitch:
*pi = prop->memPitch;
break;
case hipDeviceAttributeTextureAlignment:
case hipDeviceAttributeTextureAlignment:
*pi = prop->textureAlignment;
break;
case hipDeviceAttributeTexturePitchAlignment:
*pi = prop->texturePitchAlignment;
break;
case hipDeviceAttributeKernelExecTimeout:
*pi = prop->kernelExecTimeoutEnabled;
break;
+1
파일 보기
@@ -925,6 +925,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) {
prop->memPitch = INT_MAX; //Maximum pitch in bytes allowed by memory copies (hardcoded 128 bytes in hipMallocPitch)
prop->textureAlignment = 0; //Alignment requirement for textures
prop->texturePitchAlignment = IMAGE_PITCH_ALIGNMENT; //Alignment requirment for texture pitch
prop->kernelExecTimeoutEnabled = 0; //no run time limit for running kernels on device
hsa_isa_t isa;