SWDEV-299127 - Support External Mipmap

Support hipExternalMemoryGetMappedMipmappedArray().
Add ImageExternalBuffer to differiate ImageBuffer.
Currently we only support tiling_optimal mode as
vulkan driver doesn't provide tiling information.

Change-Id: I7e3524cdde53e4df9f728894bcebf4bd3f58d4d9
Этот коммит содержится в:
taosang2
2023-08-10 11:49:48 -04:00
коммит произвёл Tao Sang
родитель e63c280d4d
Коммит 6398f604b0
12 изменённых файлов: 136 добавлений и 41 удалений
+28 -1
Просмотреть файл
@@ -376,7 +376,8 @@ enum hip_api_id_t {
HIP_API_ID_hipArrayGetDescriptor = 361,
HIP_API_ID_hipArrayGetInfo = 362,
HIP_API_ID_hipStreamGetDevice = 363,
HIP_API_ID_LAST = 363,
HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray = 364,
HIP_API_ID_LAST = 364,
HIP_API_ID_hipBindTexture = HIP_API_ID_NONE,
HIP_API_ID_hipBindTexture2D = HIP_API_ID_NONE,
@@ -779,6 +780,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease";
case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain";
case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync";
case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray: return "hipExternalMemoryGetMappedMipmappedArray";
};
return "unknown";
};
@@ -1145,6 +1147,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease;
if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain;
if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync;
if (strcmp("hipExternalMemoryGetMappedMipmappedArray", name) == 0) return HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray;
return HIP_API_ID_NONE;
}
@@ -3266,6 +3269,12 @@ typedef struct hip_api_data_s {
unsigned int numExtSems;
hipStream_t stream;
} hipWaitExternalSemaphoresAsync;
struct {
hipMipmappedArray_t* mipmap;
hipExternalMemory_t extMem;
const hipExternalMemoryMipmappedArrayDesc* mipmapDesc;
hipExternalMemoryMipmappedArrayDesc mipmapDesc__val;
} hipExternalMemoryGetMappedMipmappedArray;
} args;
uint64_t *phase_data;
} hip_api_data_t;
@@ -3692,6 +3701,12 @@ typedef struct hip_api_data_s {
cb_data.args.hipExternalMemoryGetMappedBuffer.extMem = (hipExternalMemory_t)extMem; \
cb_data.args.hipExternalMemoryGetMappedBuffer.bufferDesc = (const hipExternalMemoryBufferDesc*)bufferDesc; \
};
// hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')]
#define INIT_hipExternalMemoryGetMappedMipmappedArray_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmap = (hipMipmappedArray_t*)mipmap; \
cb_data.args.hipExternalMemoryGetMappedMipmappedArray.extMem = (hipExternalMemory_t)extMem; \
cb_data.args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc = (const hipExternalMemoryMipmappedArrayDesc*)mipmapDesc; \
};
// hipFree[('void*', 'ptr')]
#define INIT_hipFree_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipFree.ptr = (void*)ptr; \
@@ -5835,6 +5850,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
if (data->args.hipExternalMemoryGetMappedBuffer.devPtr) data->args.hipExternalMemoryGetMappedBuffer.devPtr__val = *(data->args.hipExternalMemoryGetMappedBuffer.devPtr);
if (data->args.hipExternalMemoryGetMappedBuffer.bufferDesc) data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val = *(data->args.hipExternalMemoryGetMappedBuffer.bufferDesc);
break;
// hipExternalMemoryGetMappedMipmappedArray[('hipMipmappedArray_t*', 'mipmap'), ('hipExternalMemory_t', 'extMem'), ('const hipExternalMemoryMipmappedArrayDesc*', 'mipmapDesc')]
case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray:
if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc) data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val = *(data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc);
break;
// hipFree[('void*', 'ptr')]
case HIP_API_ID_hipFree:
break;
@@ -7503,6 +7522,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
else { oss << ", bufferDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedBuffer.bufferDesc__val); }
oss << ")";
break;
case HIP_API_ID_hipExternalMemoryGetMappedMipmappedArray:
oss << "hipExternalMemoryGetMappedMipmappedArray(";
oss << "mipmap="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmap);
oss << ", extMem="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.extMem);
if (data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc == NULL) oss << ", mipmapDesc=NULL";
else { oss << ", mipmapDesc="; roctracer::hip_support::detail::operator<<(oss, data->args.hipExternalMemoryGetMappedMipmappedArray.mipmapDesc__val); }
oss << ")";
break;
case HIP_API_ID_hipFree:
oss << "hipFree(";
oss << "ptr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipFree.ptr);
+9
Просмотреть файл
@@ -1284,6 +1284,7 @@ typedef enum cudaExternalMemoryHandleType hipExternalMemoryHandleType;
typedef struct cudaExternalMemoryHandleDesc hipExternalMemoryHandleDesc;
typedef struct cudaExternalMemoryBufferDesc hipExternalMemoryBufferDesc;
typedef cudaExternalMemory_t hipExternalMemory_t;
typedef cudaExternalMemoryMipmappedArrayDesc hipExternalMemoryMipmappedArrayDesc;
typedef enum cudaExternalSemaphoreHandleType hipExternalSemaphoreHandleType;
#define hipExternalSemaphoreHandleTypeOpaqueFd cudaExternalSemaphoreHandleTypeOpaqueFd
@@ -2944,6 +2945,14 @@ inline static hipError_t hipExternalMemoryGetMappedBuffer(void **devPtr, hipExte
return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedBuffer(devPtr, extMem, (const struct cudaExternalMemoryBufferDesc*)bufferDesc));
}
inline static hipError_t hipExternalMemoryGetMappedMipmappedArray(
hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem,
const hipExternalMemoryMipmappedArrayDesc* mipmapDesc) {
return hipCUDAErrorTohipError(cudaExternalMemoryGetMappedMipmappedArray(
(cudaMipmappedArray_t*)mipmap, (cudaExternalMemory_t)extMem,
(const struct cudaExternalMemoryMipmappedArrayDesc*)mipmapDesc));
}
inline static hipError_t hipDestroyExternalMemory(hipExternalMemory_t extMem) {
return hipCUDAErrorTohipError(cudaDestroyExternalMemory(extMem));
}
+1
Просмотреть файл
@@ -31,6 +31,7 @@ hipDeviceGetUuid
hipDeviceGetPCIBusId
hipDeviceGetSharedMemConfig
hipDeviceGetP2PAttribute
hipExternalMemoryGetMappedMipmappedArray
hipDevicePrimaryCtxGetState
hipDevicePrimaryCtxRelease
hipDevicePrimaryCtxReset
+65 -7
Просмотреть файл
@@ -875,6 +875,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder,
const size_t imageRowPitch,
const size_t imageSlicePitch,
const uint32_t numMipLevels,
const size_t offset,
amd::Memory* buffer,
hipError_t& status) {
status = hipSuccess;
@@ -925,9 +926,34 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder,
}
// TODO validate the image descriptor.
bool isExternalBuffer = buffer != nullptr ? buffer->isInterop() : false;
amd::Image* image = nullptr;
if (buffer != nullptr) {
if (isExternalBuffer) {
// We will create mipmap image view on top of external buffer which is from Vulkan or D3D
// image. The buffer contains subchunks of tiled image, or it's just a linear image. Lower
// level PAL will infer memory layout (offset, pitch, slice pitch, size) for each level
// array in terms of tiling mode(linear or optimal), extent, format and mipmap levels.
// Note that RocR will support mipmap in the future.
switch (imageType) {
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE3D:
image = new (context) amd::Image(*buffer->asBuffer(),
imageType,
CL_MEM_READ_WRITE,
imageFormat,
imageWidth,
(imageHeight == 0) ? 1 : imageHeight,
(imageDepth == 0) ? 1 : imageDepth,
imageRowPitch,
imageSlicePitch,
numMipLevels,
offset);
break;
default:
LogPrintfError("Cannot create image of imageType: 0x%x for external buffer\n", imageType);
}
} else if (buffer != nullptr) {
switch (imageType) {
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE2D:
@@ -939,7 +965,9 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder,
(imageHeight == 0) ? 1 : imageHeight,
(imageDepth == 0) ? 1 : imageDepth,
imageRowPitch,
imageSlicePitch);
imageSlicePitch,
numMipLevels,
offset);
break;
default:
LogPrintfError("Cannot create image of imageType: 0x%x \n", imageType);
@@ -1040,7 +1068,7 @@ hipError_t ihipArrayCreate(hipArray** array,
pAllocateArray->Depth, /* array size */
0, /* row pitch */
0, /* slice pitch */
numMipmapLevels,
numMipmapLevels, 0,
nullptr, /* buffer */
status);
@@ -3933,7 +3961,8 @@ hipError_t hipDrvMemcpy2DUnaligned(const hip_Memcpy2D* pCopy) {
hipError_t ihipMipmapArrayCreate(hipMipmappedArray_t* mipmapped_array_pptr,
HIP_ARRAY3D_DESCRIPTOR* mipmapped_array_desc_ptr,
unsigned int num_mipmap_levels) {
unsigned int num_mipmap_levels,
size_t offset = 0, amd::Memory* buffer = nullptr) {
bool mipMapSupport = true;
amd::Context& context = *hip::getCurrentDevice()->asContext();
const std::vector<amd::Device*>& devices = context.devices();
@@ -3966,7 +3995,8 @@ hipError_t ihipMipmapArrayCreate(hipMipmappedArray_t* mipmapped_array_pptr,
0 /* row pitch */,
0 /* slice pitch */,
num_mipmap_levels,
nullptr, /* buffer */
offset,
buffer,
status);
if (image == nullptr) {
@@ -3985,7 +4015,7 @@ hipError_t ihipMipmapArrayCreate(hipMipmappedArray_t* mipmapped_array_pptr,
(*mipmapped_array_pptr)->height = mipmapped_array_desc_ptr->Height;
(*mipmapped_array_pptr)->depth = mipmapped_array_desc_ptr->Depth;
(*mipmapped_array_pptr)->min_mipmap_level = 0;
(*mipmapped_array_pptr)->max_mipmap_level = num_mipmap_levels;
(*mipmapped_array_pptr)->max_mipmap_level = num_mipmap_levels - 1;
(*mipmapped_array_pptr)->flags = mipmapped_array_desc_ptr->Flags;
(*mipmapped_array_pptr)->format = mipmapped_array_desc_ptr->Format;
(*mipmapped_array_pptr)->num_channels = mipmapped_array_desc_ptr->NumChannels;
@@ -4127,3 +4157,31 @@ hipError_t hipGetMipmappedArrayLevel(hipArray_t *levelArray,
const_cast<hipMipmappedArray_t>(mipmappedArray),
level));
}
// ================================================================================================
hipError_t hipExternalMemoryGetMappedMipmappedArray(
hipMipmappedArray_t* mipmap, hipExternalMemory_t extMem,
const hipExternalMemoryMipmappedArrayDesc* mipmapDesc) {
HIP_INIT_API(hipExternalMemoryGetMappedMipmappedArray, mipmap, extMem, mipmapDesc);
if (mipmap == nullptr || extMem == nullptr || mipmapDesc == nullptr ||
mipmapDesc->flags != hipArrayDefault) {
HIP_RETURN(hipErrorInvalidValue);
}
CHECK_STREAM_CAPTURE_SUPPORTED();
auto buf = reinterpret_cast<amd::ExternalBuffer*>(extMem);
const device::Memory* devMem = buf->getDeviceMemory(*hip::getCurrentDevice()->devices()[0]);
HIP_ARRAY3D_DESCRIPTOR allocateArray = {mipmapDesc->extent.width,
mipmapDesc->extent.height,
mipmapDesc->extent.depth,
hip::getArrayFormat(mipmapDesc->formatDesc),
hip::getNumChannels(mipmapDesc->formatDesc),
mipmapDesc->flags};
if (!hip::CheckArrayFormat(mipmapDesc->formatDesc)) {
return HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN(ihipMipmapArrayCreate(mipmap, &allocateArray, mipmapDesc->numLevels,
(size_t)mipmapDesc->offset, buf));
}
+3
Просмотреть файл
@@ -67,6 +67,7 @@ amd::Image* ihipImageCreate(const cl_channel_order channelOrder,
const size_t imageRowPitch,
const size_t imageSlicePitch,
const uint32_t numMipLevels,
const size_t offset,
amd::Memory* buffer,
hipError_t& status);
@@ -301,6 +302,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject,
0, /* imageRowPitch */
0, /* imageSlicePitch */
0, /* numMipLevels */
0, /* offset */
buffer,
status);
buffer->release();
@@ -328,6 +330,7 @@ hipError_t ihipCreateTextureObject(hipTextureObject_t* pTexObject,
pResDesc->res.pitch2D.pitchInBytes, /* imageRowPitch */
0, /* imageSlicePitch */
0, /* numMipLevels */
0, /* offset */
buffer,
status);
if (buffer != nullptr) {
+2 -2
Просмотреть файл
@@ -1719,9 +1719,9 @@ pal::Memory* Device::createImage(amd::Memory& owner, bool directAccess) const {
params.owner_ = &owner;
params.resource_ = buffer;
params.memory_ = buffer;
// Create memory object
result = gpuImage->create(Resource::ImageBuffer, &params);
result = gpuImage->create(amd::IS_HIP && owner.parent()->isInterop() ?
Resource::ImageExternalBuffer : Resource::ImageBuffer, &params);
} else if (directAccess && (owner.getMemFlags() & CL_MEM_ALLOC_HOST_PTR)) {
Resource::PinnedParams params;
params.owner_ = &owner;
+1
Просмотреть файл
@@ -181,6 +181,7 @@ bool Memory::create(Resource::MemoryType memType, Resource::CreateParams* params
flags_ |= SubMemoryObject | (parent_->flags_ & HostMemoryDirectAccess);
break;
}
case Resource::ImageExternalBuffer:
case Resource::ImageBuffer: {
Resource::ImageBufferParams* view = reinterpret_cast<Resource::ImageBufferParams*>(params);
parent_ = reinterpret_cast<const Memory*>(view->memory_);
+19 -4
Просмотреть файл
@@ -613,7 +613,7 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
ImgSubresRange.startSubres.arraySlice = imageView->layer_;
viewOwner_ = imageView->resource_;
image_ = viewOwner_->image_;
} else if (memoryType() == ImageBuffer) {
} else if (memoryType() == ImageBuffer || memoryType() == ImageExternalBuffer) {
ImageBufferParams* imageBuffer = reinterpret_cast<ImageBufferParams*>(params);
viewOwner_ = imageBuffer->resource_;
}
@@ -639,6 +639,14 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
if (((memoryType() == Persistent) && dev().settings().linearPersistentImage_) ||
(memoryType() == ImageBuffer)) {
tiling = Pal::ImageTiling::Linear;
} else if (memoryType() == ImageExternalBuffer) {
// We cannot get tiling info from vulkan/d3d driver now. So assume it to be optimal.
// When we get tiling info, we can easily update here
tiling = Pal::ImageTiling::Optimal;
// Pal will infer row pitch. If rowPitch != 0 and Pal inferred row picth isn't equal to
// rowPitch, assert(false) will be called
rowPitch = 0;
offset_ += params->owner_->getOrigin();
} else if (memoryType() == ImageView) {
tiling = viewOwner_->image_->GetImageCreateInfo().tiling;
// Find the new pitch in pixels for the new format
@@ -677,7 +685,8 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
// createInfo.priority;
}
if ((memoryType() != ImageView) && (memoryType() != ImageBuffer)) {
if ((memoryType() != ImageView) && (memoryType() != ImageBuffer) &&
(memoryType() != ImageExternalBuffer)) {
Pal::GpuMemoryCreateInfo createInfo = {};
createInfo.size = amd::alignUp(req.size, MaxGpuAlignment);
createInfo.alignment = std::max(req.alignment, MaxGpuAlignment);
@@ -710,7 +719,13 @@ bool Resource::CreateImage(CreateParams* params, bool forceLinear) {
mapCount_++;
}
result = image_->BindGpuMemory(memRef_->gpuMem_, offset_);
if (result != Pal::Result::Success) {
LogPrintfError(
"BindGpuMemory return %d, offset_=%zu, req.size=%zu, "
"viewOwner_->iMem()->Desc().size=%zu\n",
result, offset_, req.size,
viewOwner_ && viewOwner_->iMem() ? viewOwner_->iMem()->Desc().size : 0);
return false;
}
@@ -1345,8 +1360,8 @@ void Resource::free() {
return;
}
const bool wait =
(memoryType() != ImageView) && (memoryType() != ImageBuffer) && (memoryType() != View);
const bool wait = (memoryType() != ImageView) && (memoryType() != ImageBuffer) &&
(memoryType() != ImageExternalBuffer) && (memoryType() != View);
// OCL has to wait, even if resource is placed in the cache, since reallocation can occur
// and resource can be reused on another async queue without a wait on a busy operation
+2 -1
Просмотреть файл
@@ -178,7 +178,8 @@ class Resource : public amd::HeapObject {
P2PAccess, //!< resource is a shared resource for P2P access
VkInterop, //!< resource is a Vulkan memory object
VaRange, //!< reousrce is a virtual address range
IpcMemory //!< reousrce is a IPC memory object
IpcMemory, //!< reousrce is a IPC memory object
ImageExternalBuffer //!< resource is an image view of an external buffer
};
//! Resource map flags
-21
Просмотреть файл
@@ -71,25 +71,4 @@ namespace amd
virtual ~ExternalBuffer() {}
};
// to be modified once image requirments are known, for now, implement like buffer
class ExternalImage final : public Buffer, public ExternalMemory
{
protected:
// Initializes device memory array, which is located after ExternalImage object in memory
void initDeviceMemory() {
deviceMemories_ =
reinterpret_cast<DeviceMemory*>(reinterpret_cast<char*>(this) + sizeof(ExternalImage));
memset(deviceMemories_, 0, context_().devices().size() * sizeof(DeviceMemory));
}
public:
ExternalImage(Context& amdContext, size_t size_in_bytes, amd::Os::FileDesc handle,
ExternalMemory::HandleType handle_type)
: Buffer(amdContext, 0, size_in_bytes), ExternalMemory(handle, handle_type) {
setInteropObj(this);
}
virtual ~ExternalImage() {}
};
}
+5 -4
Просмотреть файл
@@ -114,7 +114,7 @@ Memory::Memory(Memory& parent, Flags flags, size_t origin, size_t size, Type typ
flags_(flags),
version_(parent.getVersion()),
lastWriter_(parent.getLastWriter()),
interopObj_(parent.getInteropObj()),
interopObj_(nullptr),
vDev_(NULL),
mapCount_(0),
svmHostAddress_(parent.getSvmPtr()),
@@ -606,10 +606,11 @@ Image::Image(Context& context, Type type, Flags flags, const Format& format, siz
}
Image::Image(Buffer& buffer, Type type, Flags flags, const Format& format, size_t width,
size_t height, size_t depth, size_t rowPitch, size_t slicePitch)
: Memory(buffer, flags, 0, buffer.getSize(), type),
size_t height, size_t depth, size_t rowPitch, size_t slicePitch, uint mipLevels,
size_t offset)
: Memory(buffer, flags, offset, buffer.getSize(), type),
impl_(format, Coord3D(width, height, depth), rowPitch, slicePitch),
mipLevels_(1),
mipLevels_(mipLevels),
baseMipLevel_(0) {
initDimension();
}
+1 -1
Просмотреть файл
@@ -566,7 +566,7 @@ class Image : public Memory {
size_t depth, size_t rowPitch, size_t slicePitch, uint mipLevels = 1);
Image(Buffer& buffer, Type type, Flags flags, const Format& format, size_t width, size_t height,
size_t depth, size_t rowPitch, size_t slicePitch);
size_t depth, size_t rowPitch, size_t slicePitch, uint mipLevels = 1, size_t offset = 0);
//! Validate image dimensions with supported sizes
static bool validateDimensions(