2014-07-04 16:17:05 -04:00
//
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
//
# include "amdocl/cl_common.hpp"
# include "os/alloc.hpp"
# include "platform/context.hpp"
# include "platform/object.hpp"
# include "platform/memory.hpp"
# include "device/device.hpp"
namespace amd {
bool
BufferRect : : create (
const size_t * bufferOrigin ,
const size_t * region ,
size_t bufferRowPitch ,
size_t bufferSlicePitch )
{
bool valid = false ;
// Find the buffer's row pitch
rowPitch_ = ( bufferRowPitch ! = 0 ) ? bufferRowPitch : region [ 0 ] ;
// Find the buffer's slice pitch
slicePitch_ = ( bufferSlicePitch ! = 0 ) ? bufferSlicePitch :
rowPitch_ * region [ 1 ] ;
// Find the region start offset
start_ = bufferOrigin [ 2 ] * slicePitch_ +
bufferOrigin [ 1 ] * rowPitch_ + bufferOrigin [ 0 ] ;
// Find the region relative end offset
end_ = ( region [ 2 ] - 1 ) * slicePitch_ + ( region [ 1 ] - 1 ) * rowPitch_ + region [ 0 ] ;
// Make sure we have a valid region
if ( ( rowPitch_ > = region [ 0 ] ) & &
( slicePitch_ > = ( region [ 1 ] * rowPitch_ ) ) & &
( ( slicePitch_ % rowPitch_ ) = = 0 ) ) {
valid = true ;
}
return valid ;
}
bool
HostMemoryReference : : allocateMemory ( size_t size , const Context & context ) {
assert ( ! alloced_ & & " Runtime should not reallocate system memory! " ) ;
size_t memoryAlignment = ( CPU_MEMORY_ALIGNMENT_SIZE < = 0 ) ? 256 : CPU_MEMORY_ALIGNMENT_SIZE ;
size_ = amd : : alignUp ( size , memoryAlignment ) ;
//! \note memory size must be aligned for CAL pinning
hostMem_ = CPU_MEMORY_GUARD_PAGES
? GuardedMemory : : allocate ( size_ , MEMOBJ_BASE_ADDR_ALIGN , CPU_MEMORY_GUARD_PAGE_SIZE * Ki )
: context . hostAlloc ( size_ , MEMOBJ_BASE_ADDR_ALIGN ) ;
alloced_ = ( hostMem_ ! = NULL ) ;
return alloced_ ;
}
// Frees system memory if it was allocated
void
HostMemoryReference : : deallocateMemory ( const Context & context )
{
if ( alloced_ ) {
if ( CPU_MEMORY_GUARD_PAGES ) GuardedMemory : : deallocate ( hostMem_ ) ;
else context . hostFree ( hostMem_ ) ;
size_ = 0 ;
alloced_ = false ;
hostMem_ = NULL ;
}
}
Memory : : Memory (
Context & context ,
Type type ,
Flags flags ,
size_t size ,
void * svmPtr )
: numDevices_ ( 0 )
, deviceMemories_ ( NULL )
, destructorCallbacks_ ( NULL )
, context_ ( context )
, parent_ ( NULL )
, type_ ( type )
, hostMemRef_ ( NULL )
, origin_ ( 0 )
, size_ ( size )
, flags_ ( flags )
, version_ ( 0 )
, lastWriter_ ( NULL )
, interopObj_ ( NULL )
, isParent_ ( false )
, vDev_ ( NULL )
, forceSysMemAlloc_ ( false )
, svmHostAddress_ ( svmPtr )
, svmPtrCommited_ ( false )
, canBeCached_ ( true )
, lockMemoryOps_ ( " Memory Ops Lock " , true )
{
2014-09-29 17:38:55 -04:00
std : : atomic_init ( & mapCount_ , 0u ) ;
2014-07-04 16:17:05 -04:00
}
Memory : : Memory (
Memory & parent ,
Flags flags ,
size_t origin ,
size_t size ,
Type type )
: numDevices_ ( 0 )
, deviceMemories_ ( NULL )
, destructorCallbacks_ ( NULL )
, context_ ( parent . getContext ( ) )
, parent_ ( & parent )
, type_ ( ( type = = 0 ) ? parent . type_ : type )
, hostMemRef_ ( NULL )
, origin_ ( origin )
, size_ ( size )
, flags_ ( flags )
, version_ ( parent . getVersion ( ) )
, lastWriter_ ( parent . getLastWriter ( ) )
, interopObj_ ( parent . getInteropObj ( ) )
, isParent_ ( false )
, vDev_ ( NULL )
, forceSysMemAlloc_ ( false )
, svmHostAddress_ ( parent . getSvmPtr ( ) )
, svmPtrCommited_ ( parent . isSvmPtrCommited ( ) )
, canBeCached_ ( true )
, lockMemoryOps_ ( " Memory Ops Lock " , true )
{
parent_ - > retain ( ) ;
parent_ - > isParent_ = true ;
// Inherit memory flags from the parent
if ( ( flags_ & ( CL_MEM_READ_WRITE | CL_MEM_READ_ONLY |
CL_MEM_WRITE_ONLY ) ) = = 0 ) {
flags_ | = parent_ - > getMemFlags ( ) &
( CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY ) ;
}
flags_ | = parent_ - > getMemFlags ( ) &
( CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR ) ;
if ( ( flags_ & ( CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
CL_MEM_HOST_NO_ACCESS ) ) = = 0 ) {
flags_ | = parent_ - > getMemFlags ( ) &
( CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
CL_MEM_HOST_NO_ACCESS ) ;
}
2014-09-29 17:38:55 -04:00
std : : atomic_init ( & mapCount_ , 0u ) ;
2014-07-04 16:17:05 -04:00
}
void
Memory : : initDeviceMemory ( )
{
deviceMemories_ = reinterpret_cast < DeviceMemory * > (
reinterpret_cast < char * > ( this ) + sizeof ( Memory ) ) ;
memset ( deviceMemories_ , 0 ,
context_ ( ) . devices ( ) . size ( ) * sizeof ( DeviceMemory ) ) ;
}
void *
Memory : : operator new ( size_t size , const Context & context )
{
return RuntimeObject : : operator new (
size + context . devices ( ) . size ( ) * sizeof ( DeviceMemory ) ) ;
}
void
Memory : : operator delete ( void * p )
{
RuntimeObject : : operator delete ( p ) ;
}
void
Memory : : operator delete ( void * p , const Context & context )
{
Memory : : operator delete ( p ) ;
}
void
Memory : : addSubBuffer ( Memory * view )
{
amd : : ScopedLock lock ( lockMemoryOps ( ) ) ;
subBuffers_ . push_back ( view ) ;
}
void
Memory : : removeSubBuffer ( Memory * view )
{
amd : : ScopedLock lock ( lockMemoryOps ( ) ) ;
subBuffers_ . remove ( view ) ;
}
bool
Memory : : allocHostMemory ( void * initFrom , bool allocHostMem , bool forceCopy )
{
// Sanity checks (the parameters should have been prevalidated by the API)
assert ( ! ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR ) & &
( initFrom = = NULL ) & & ! allocHostMem & & ! isSvmPtrCommited ( ) ) ) ;
assert ( ! ( ( initFrom ! = NULL ) & & ! forceCopy & &
! ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR |
CL_MEM_EXTERNAL_PHYSICAL_AMD ) ) ) ) ;
assert ( ! ( flags_ & CL_MEM_COPY_HOST_PTR & & flags_ & CL_MEM_USE_HOST_PTR ) ) ;
const std : : vector < Device * > & devices = context_ ( ) . devices ( ) ;
// Find if a non GPU device was created with the context
for ( size_t i = 0 ; i < devices . size ( ) ; i + + ) {
if ( ! ( devices [ i ] - > info ( ) . type_ & CL_DEVICE_TYPE_GPU ) ) {
allocHostMem = true ;
break ;
}
}
// This allocation is necessary to use coherency mechanism
// for the initialization
if ( getMemFlags ( ) & ( CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR ) ) {
allocHostMem = true ;
}
// Did application request to use host memory?
if ( getMemFlags ( ) & CL_MEM_USE_HOST_PTR ) {
setHostMem ( initFrom ) ;
// Recalculate image size according to pitch
Image * image = asImage ( ) ;
if ( image ! = NULL ) {
if ( image - > getDims ( ) < 3 ) {
size_ = image - > getRowPitch ( ) * image - > getHeight ( ) ;
}
else {
size_ = image - > getSlicePitch ( ) * image - > getDepth ( ) ;
}
}
}
// Allocate host memory buffer if needed
else if ( allocHostMem & & ! isInterop ( ) ) {
if ( ! hostMemRef_ . allocateMemory ( size_ , context_ ( ) ) ) {
return false ;
}
// Copy data to the backing store if the app has requested
if ( ( ( flags_ & CL_MEM_COPY_HOST_PTR ) | | forceCopy ) & & ( initFrom ! = NULL ) ) {
copyToBackingStore ( initFrom ) ;
}
}
if ( allocHostMem & & type_ = = CL_MEM_OBJECT_PIPE )
{
// Initialize the pipe for a CPU device
clk_pipe_t * pipe = reinterpret_cast < clk_pipe_t * > ( getHostMem ( ) ) ;
pipe - > read_idx = 0 ;
pipe - > write_idx = 0 ;
pipe - > end_idx = asPipe ( ) - > getMaxNumPackets ( ) ;
}
if ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR ) ) {
// Signal write, so coherency mechanism will initialize
// memory on all devices
signalWrite ( NULL ) ;
}
return true ;
}
bool
Memory : : create ( void * initFrom , bool sysMemAlloc )
{
static const bool forceAllocHostMem = false ;
initDeviceMemory ( ) ;
// Check if it's a subbuffer allocation
if ( parent_ ! = NULL ) {
// Find host memory pointer for subbuffer
if ( parent_ - > getHostMem ( ) ! = NULL ) {
setHostMem ( ( address ) parent_ - > getHostMem ( ) + origin_ ) ;
}
// Add a new subbuffer to the list
parent_ - > addSubBuffer ( this ) ;
}
// Allocate host memory if requested
else if ( ! allocHostMemory ( initFrom , forceAllocHostMem ) ) {
return false ;
}
bool ok = true ;
const std : : vector < Device * > & devices = context_ ( ) . devices ( ) ;
// Create memory on all available devices
for ( size_t i = 0 ; ok & & i < devices . size ( ) ; i + + ) {
deviceAlloced_ [ devices [ i ] ] = AllocInit ;
// Only GPU devices have device memory objects
if ( devices [ i ] - > info ( ) . type_ & CL_DEVICE_TYPE_GPU ) {
deviceMemories_ [ i ] . ref_ = devices [ i ] ;
deviceMemories_ [ i ] . value_ = NULL ;
}
}
// Forces system memory allocation on the device,
// instead of device memory
forceSysMemAlloc_ = sysMemAlloc ;
return ok ;
}
bool
Memory : : addDeviceMemory ( const Device * dev )
{
bool result = false ;
AllocState create = AllocCreate ;
AllocState init = AllocInit ;
if ( make_atomic ( deviceAlloced_ [ dev ] ) . compareAndSet ( init , create ) ) {
device : : Memory * dm = dev - > createMemory ( * this ) ;
// Add the new memory allocation to the device map
if ( NULL ! = dm ) {
deviceMemories_ [ numDevices_ ] . ref_ = dev ;
deviceMemories_ [ numDevices_ ] . value_ = dm ;
numDevices_ + + ;
assert ( ( numDevices ( ) < = context_ ( ) . devices ( ) . size ( ) )
& & " Too many device objects " ) ;
// Mark the allocation with the complete flag
deviceAlloced_ [ dev ] = AllocComplete ;
}
else {
// Mark the allocation as an empty
deviceAlloced_ [ dev ] = AllocInit ;
}
}
// Make sure runtime finished memory allocation.
// Loop if in the create state
while ( deviceAlloced_ [ dev ] = = AllocCreate ) {
Os : : yield ( ) ;
}
if ( deviceAlloced_ [ dev ] = = AllocComplete ) {
result = true ;
}
return result ;
}
void
Memory : : replaceDeviceMemory ( const Device * dev , device : : Memory * dm )
{
uint i ;
for ( i = 0 ; i < numDevices_ ; + + i ) {
if ( deviceMemories_ [ i ] . ref_ = = dev ) {
delete deviceMemories_ [ i ] . value_ ;
break ;
}
}
if ( numDevices_ = = 0 ) {
+ + numDevices_ ;
deviceMemories_ [ 0 ] . ref_ = dev ;
}
deviceMemories_ [ i ] . value_ = dm ;
deviceAlloced_ [ dev ] = AllocRealloced ;
}
device : : Memory *
Memory : : getDeviceMemory ( const Device & dev , bool alloc )
{
device : : Memory * dm = NULL ;
for ( uint i = 0 ; i < numDevices_ ; + + i ) {
if ( deviceMemories_ [ i ] . ref_ = = & dev ) {
dm = deviceMemories_ [ i ] . value_ ;
break ;
}
}
if ( ( NULL = = dm ) & & alloc ) {
if ( ! addDeviceMemory ( & dev ) ) {
LogError ( " Video memory allocation failed! " ) ;
return NULL ;
}
dm = deviceMemories_ [ numDevices ( ) - 1 ] . value_ ;
}
return dm ;
}
Memory : : ~ Memory ( )
{
// For_each destructor callback:
DestructorCallBackEntry * entry ;
for ( entry = destructorCallbacks_ ; entry ! = NULL ; entry = entry - > next_ ) {
// invoke the callback function.
entry - > callback_ ( const_cast < cl_mem > ( as_cl ( this ) ) , entry - > data_ ) ;
}
// Release the parent.
if ( NULL ! = parent_ ) {
// Update cache if runtime destroys a subbuffer
if ( NULL ! = parent_ - > getHostMem ( ) ) {
cacheWriteBack ( ) ;
}
parent_ - > removeSubBuffer ( this ) ;
}
if ( NULL ! = deviceMemories_ ) {
// Destroy all device memory objects
for ( uint i = 0 ; i < numDevices_ ; + + i ) {
delete deviceMemories_ [ i ] . value_ ;
}
}
// Sanity check
if ( subBuffers_ . size ( ) ! = 0 ) {
LogError ( " Can't have views if parent is destroyed! " ) ;
}
// Destroy the destructor callback entries
DestructorCallBackEntry * callback = destructorCallbacks_ ;
while ( callback ! = NULL ) {
DestructorCallBackEntry * next = callback - > next_ ;
delete callback ;
callback = next ;
}
// Make sure runtime destroys the parent only after subbuffer destruction
if ( NULL ! = parent_ ) {
parent_ - > release ( ) ;
}
hostMemRef_ . deallocateMemory ( context_ ( ) ) ;
}
bool
Memory : : setDestructorCallback ( DestructorCallBackFunction callback , void * data )
{
DestructorCallBackEntry * entry = new DestructorCallBackEntry ( callback , data ) ;
if ( entry = = NULL ) {
return false ;
}
entry - > next_ = destructorCallbacks_ ;
2014-09-30 17:21:19 -04:00
while ( ! destructorCallbacks_ . compare_exchange_weak ( entry - > next_ , entry ) )
; // Someone else is also updating the head of the linked list! reload.
2014-07-04 16:17:05 -04:00
return true ;
}
void
Memory : : signalWrite ( const Device * writer )
{
// (the potential race condition below doesn't matter, no critical
// section needed)
+ + version_ ;
lastWriter_ = writer ;
}
void
Memory : : cacheWriteBack ( )
{
if ( NULL ! = lastWriter_ ) {
device : : Memory * dmem = getDeviceMemory ( * lastWriter_ ) ;
dmem - > syncHostFromCache ( ) ;
}
else if ( isParent ( ) ) {
// On CPU parent can't be synchronized, because lastWriter_ could be NULL
// and syncHostFromCache() won't be called.
for ( uint i = 0 ; i < numDevices_ ; + + i ) {
deviceMemories_ [ i ] . value_ - > syncHostFromCache ( ) ;
}
}
}
void
Memory : : copyToBackingStore ( void * initFrom )
{
memcpy ( getHostMem ( ) , initFrom , size_ ) ;
}
bool
Memory : : usesSvmPointer ( ) const
{
if ( ! ( flags_ & CL_MEM_USE_HOST_PTR ) ) {
return false ;
}
// If the application host pointer lies within a SVM region, so does the
// sub-buffer host pointer - so the following check works in both cases
return ( SvmBuffer : : malloced ( getHostMem ( ) ) | | NULL ! = svmHostAddress_ ) ;
}
void
Memory : : commitSvmMemory ( )
{
ScopedLock lock ( lockMemoryOps_ ) ;
if ( ! svmPtrCommited_ ) {
amd : : Os : : commitMemory ( svmHostAddress_ , size_ , amd : : Os : : MEM_PROT_RW ) ;
svmPtrCommited_ = true ;
}
}
void
Buffer : : initDeviceMemory ( )
{
deviceMemories_ = reinterpret_cast < DeviceMemory * > (
reinterpret_cast < char * > ( this ) + sizeof ( Buffer ) ) ;
memset ( deviceMemories_ , 0 ,
context_ ( ) . devices ( ) . size ( ) * sizeof ( DeviceMemory ) ) ;
}
bool
Buffer : : create ( void * initFrom , bool sysMemAlloc )
{
if ( ( getMemFlags ( ) & CL_MEM_EXTERNAL_PHYSICAL_AMD ) & & ( initFrom ! = NULL ) ) {
busAddress_ = * ( reinterpret_cast < cl_bus_address_amd * > ( initFrom ) ) ;
initFrom = NULL ;
}
else {
busAddress_ . surface_bus_address = 0 ;
busAddress_ . marker_bus_address = 0 ;
}
return Memory : : create ( initFrom , sysMemAlloc ) ;
}
bool
Buffer : : isEntirelyCovered ( const Coord3D & origin , const Coord3D & region ) const
{
return ( ( origin [ 0 ] = = 0 ) & & ( region [ 0 ] = = getSize ( ) ) ) ? true : false ;
}
bool
Buffer : : validateRegion ( const Coord3D & origin , const Coord3D & region ) const
{
return ( ( region [ 0 ] > 0 ) & &
( origin [ 0 ] < getSize ( ) ) & &
( ( origin [ 0 ] + region [ 0 ] ) < = getSize ( ) ) ) ? true : false ;
}
void
Pipe : : initDeviceMemory ( )
{
deviceMemories_ = reinterpret_cast < DeviceMemory * > (
reinterpret_cast < char * > ( this ) + sizeof ( Pipe ) ) ;
memset ( deviceMemories_ , 0 ,
context_ ( ) . devices ( ) . size ( ) * sizeof ( DeviceMemory ) ) ;
}
Image : : Image (
const Format & format ,
Image & parent ) :
Memory ( parent , 0 , 0 , parent . getWidth ( ) * parent . getHeight ( ) * parent . getDepth ( ) * format . getElementSize ( ) ) ,
impl_ ( format , Coord3D ( parent . getWidth ( ) * parent . getImageFormat ( ) . getElementSize ( ) / format . getElementSize ( ) , parent . getHeight ( ) , parent . getDepth ( ) ) , parent . getRowPitch ( ) , parent . getSlicePitch ( ) , parent . getBytePitch ( ) )
{
initDimension ( ) ;
}
Image : : Image (
Context & context ,
Type type ,
Flags flags ,
const Format & format ,
size_t width ,
size_t height ,
size_t depth ,
size_t rowPitch ,
size_t slicePitch ) :
Memory ( context , type , flags ,
width * height * depth * format . getElementSize ( ) ) ,
impl_ ( format , Coord3D ( width , height , depth ) , rowPitch , slicePitch )
{
initDimension ( ) ;
}
Image : : Image (
Buffer & buffer ,
Type type ,
Flags flags ,
const Format & format ,
size_t width ,
size_t height ,
size_t depth ,
size_t rowPitch ,
size_t slicePitch ) :
Memory ( buffer , flags , 0 ,
buffer . getSize ( ) , type ) ,
impl_ ( format , Coord3D ( width , height , depth ) , rowPitch , slicePitch )
{
initDimension ( ) ;
}
bool
Image : : validateDimensions (
const std : : vector < amd : : Device * > & devices ,
cl_mem_object_type type ,
size_t width ,
size_t height ,
size_t depth ,
size_t arraySize )
{
std : : vector < amd : : Device * > : : const_iterator it ;
bool sizePass = false ;
switch ( type ) {
case CL_MEM_OBJECT_IMAGE3D :
if ( ( width = = 0 ) | | ( height = = 0 ) | | ( depth < 1 ) ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( ( * it ) - > info ( ) . image3DMaxWidth_ > = width ) & &
( ( * it ) - > info ( ) . image3DMaxHeight_ > = height ) & &
( ( * it ) - > info ( ) . image3DMaxDepth_ > = depth ) ) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE2D_ARRAY :
if ( arraySize = = 0 ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( * it ) - > info ( ) . imageMaxArraySize_ > = arraySize ) {
sizePass = true ;
break ;
}
}
if ( ! sizePass ) {
return false ;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE2D :
if ( ( width = = 0 ) | | ( height = = 0 ) ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( ( * it ) - > info ( ) . image2DMaxHeight_ > = height ) & &
( ( * it ) - > info ( ) . image2DMaxWidth_ > = width ) ) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE1D_ARRAY :
if ( arraySize = = 0 ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( * it ) - > info ( ) . imageMaxArraySize_ > = arraySize ) {
sizePass = true ;
break ;
}
}
if ( ! sizePass ) {
return false ;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE1D :
if ( width = = 0 ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( * it ) - > info ( ) . image2DMaxWidth_ > = width ) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE1D_BUFFER :
if ( width = = 0 ) {
return false ;
}
for ( it = devices . begin ( ) ; it ! = devices . end ( ) ; + + it ) {
if ( ( * it ) - > info ( ) . imageMaxBufferSize_ > = width ) {
return true ;
}
}
break ;
default :
break ;
}
return false ;
}
void
Image : : initDimension ( )
{
const size_t elemSize = impl_ . format_ . getElementSize ( ) ;
if ( impl_ . rp_ = = 0 ) {
impl_ . rp_ = impl_ . region_ [ 0 ] * elemSize ;
}
switch ( type_ ) {
case CL_MEM_OBJECT_IMAGE3D :
case CL_MEM_OBJECT_IMAGE2D_ARRAY :
dim_ = 3 ;
if ( impl_ . sp_ = = 0 ) {
impl_ . sp_ = impl_ . region_ [ 0 ] * impl_ . region_ [ 1 ] * elemSize ;
}
break ;
case CL_MEM_OBJECT_IMAGE2D :
case CL_MEM_OBJECT_IMAGE1D_ARRAY :
dim_ = 2 ;
if ( ( impl_ . sp_ = = 0 ) & &
( type_ = = CL_MEM_OBJECT_IMAGE1D_ARRAY ) ) {
impl_ . sp_ = impl_ . rp_ ;
}
break ;
case CL_MEM_OBJECT_IMAGE1D :
case CL_MEM_OBJECT_IMAGE1D_BUFFER :
default :
dim_ = 1 ;
break ;
}
}
void
Image : : initDeviceMemory ( )
{
deviceMemories_ = reinterpret_cast < DeviceMemory * > (
reinterpret_cast < char * > ( this ) + sizeof ( Image ) ) ;
memset ( deviceMemories_ , 0 ,
context_ ( ) . devices ( ) . size ( ) * sizeof ( DeviceMemory ) ) ;
}
bool
Image : : create ( void * initFrom )
{
return Memory : : create ( initFrom ) ;
}
size_t
Image : : Format : : getNumChannels ( ) const
{
switch ( image_channel_order )
{
case CL_RG :
case CL_RA :
return 2 ;
case CL_RGB :
case CL_sRGB :
case CL_sRGBx :
return 3 ;
case CL_RGBA :
case CL_BGRA :
case CL_ARGB :
case CL_sRGBA :
case CL_sBGRA :
return 4 ;
}
return 1 ;
}
size_t
Image : : Format : : getElementSize ( ) const
{
size_t bytesPerPixel = getNumChannels ( ) ;
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_UNORM_INT8 :
case CL_SIGNED_INT8 :
case CL_UNSIGNED_INT8 :
break ;
case CL_UNORM_INT_101010 :
2014-07-21 14:27:24 -04:00
bytesPerPixel = 4 ;
break ;
2014-07-04 16:17:05 -04:00
case CL_SIGNED_INT32 :
case CL_UNSIGNED_INT32 :
case CL_FLOAT :
bytesPerPixel * = 4 ;
break ;
default :
bytesPerPixel * = 2 ;
break ;
}
return bytesPerPixel ;
}
bool
Image : : Format : : isValid ( ) const
{
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_SNORM_INT16 :
case CL_UNORM_INT8 :
case CL_UNORM_INT16 :
case CL_UNORM_SHORT_565 :
case CL_UNORM_SHORT_555 :
case CL_UNORM_INT_101010 :
case CL_SIGNED_INT8 :
case CL_SIGNED_INT16 :
case CL_SIGNED_INT32 :
case CL_UNSIGNED_INT8 :
case CL_UNSIGNED_INT16 :
case CL_UNSIGNED_INT32 :
case CL_HALF_FLOAT :
case CL_FLOAT :
break ;
default :
return false ;
}
switch ( image_channel_order )
{
case CL_R :
case CL_A :
case CL_RG :
case CL_RA :
case CL_RGBA :
break ;
case CL_INTENSITY :
case CL_LUMINANCE :
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_SNORM_INT16 :
case CL_UNORM_INT8 :
case CL_UNORM_INT16 :
case CL_HALF_FLOAT :
case CL_FLOAT :
break ;
default :
return false ;
}
break ;
case CL_RGB :
switch ( image_channel_data_type )
{
case CL_UNORM_SHORT_565 :
case CL_UNORM_SHORT_555 :
case CL_UNORM_INT_101010 :
break ;
default :
return false ;
}
break ;
case CL_BGRA :
case CL_ARGB :
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_UNORM_INT8 :
case CL_SIGNED_INT8 :
case CL_UNSIGNED_INT8 :
break ;
default :
return false ;
}
break ;
case CL_sRGB :
case CL_sRGBx :
case CL_sRGBA :
case CL_sBGRA :
switch ( image_channel_data_type )
{
case CL_UNORM_INT8 :
break ;
default :
return false ;
}
break ;
case CL_DEPTH :
switch ( image_channel_data_type )
{
case CL_UNORM_INT16 :
case CL_FLOAT :
break ;
default :
return false ;
}
break ;
default :
return false ;
}
return true ;
}
// definition of list of supported formats
cl_image_format
Image : : supportedFormats [ ] = {
// R
{ CL_R , CL_SNORM_INT8 } , { CL_R , CL_SNORM_INT16 } ,
{ CL_R , CL_UNORM_INT8 } , { CL_R , CL_UNORM_INT16 } ,
{ CL_R , CL_SIGNED_INT8 } , { CL_R , CL_SIGNED_INT16 } ,
{ CL_R , CL_SIGNED_INT32 } , { CL_R , CL_UNSIGNED_INT8 } ,
{ CL_R , CL_UNSIGNED_INT16 } , { CL_R , CL_UNSIGNED_INT32 } ,
{ CL_R , CL_HALF_FLOAT } , { CL_R , CL_FLOAT } ,
// A
{ CL_A , CL_SNORM_INT8 } , { CL_A , CL_SNORM_INT16 } ,
{ CL_A , CL_UNORM_INT8 } , { CL_A , CL_UNORM_INT16 } ,
{ CL_A , CL_SIGNED_INT8 } , { CL_A , CL_SIGNED_INT16 } ,
{ CL_A , CL_SIGNED_INT32 } , { CL_A , CL_UNSIGNED_INT8 } ,
{ CL_A , CL_UNSIGNED_INT16 } , { CL_A , CL_UNSIGNED_INT32 } ,
{ CL_A , CL_HALF_FLOAT } , { CL_A , CL_FLOAT } ,
// RG
{ CL_RG , CL_SNORM_INT8 } , { CL_RG , CL_SNORM_INT16 } ,
{ CL_RG , CL_UNORM_INT8 } , { CL_RG , CL_UNORM_INT16 } ,
{ CL_RG , CL_SIGNED_INT8 } , { CL_RG , CL_SIGNED_INT16 } ,
{ CL_RG , CL_SIGNED_INT32 } , { CL_RG , CL_UNSIGNED_INT8 } ,
{ CL_RG , CL_UNSIGNED_INT16 } , { CL_RG , CL_UNSIGNED_INT32 } ,
{ CL_RG , CL_HALF_FLOAT } , { CL_RG , CL_FLOAT } ,
// RGBA
{ CL_RGBA , CL_SNORM_INT8 } , { CL_RGBA , CL_SNORM_INT16 } ,
{ CL_RGBA , CL_UNORM_INT8 } , { CL_RGBA , CL_UNORM_INT16 } ,
{ CL_RGBA , CL_SIGNED_INT8 } , { CL_RGBA , CL_SIGNED_INT16 } ,
{ CL_RGBA , CL_SIGNED_INT32 } , { CL_RGBA , CL_UNSIGNED_INT8 } ,
{ CL_RGBA , CL_UNSIGNED_INT16 } , { CL_RGBA , CL_UNSIGNED_INT32 } ,
{ CL_RGBA , CL_HALF_FLOAT } , { CL_RGBA , CL_FLOAT } ,
// ARGB
{ CL_ARGB , CL_SNORM_INT8 } , { CL_ARGB , CL_UNORM_INT8 } ,
{ CL_ARGB , CL_SIGNED_INT8 } , { CL_ARGB , CL_UNSIGNED_INT8 } ,
// BGRA
{ CL_BGRA , CL_SNORM_INT8 } , { CL_BGRA , CL_UNORM_INT8 } ,
{ CL_BGRA , CL_SIGNED_INT8 } , { CL_BGRA , CL_UNSIGNED_INT8 } ,
// LUMINANCE
{ CL_LUMINANCE , CL_SNORM_INT8 } , { CL_LUMINANCE , CL_SNORM_INT16 } ,
{ CL_LUMINANCE , CL_UNORM_INT8 } , { CL_LUMINANCE , CL_UNORM_INT16 } ,
{ CL_LUMINANCE , CL_HALF_FLOAT } , { CL_LUMINANCE , CL_FLOAT } ,
// INTENSITY
{ CL_INTENSITY , CL_SNORM_INT8 } , { CL_INTENSITY , CL_SNORM_INT16 } ,
{ CL_INTENSITY , CL_UNORM_INT8 } , { CL_INTENSITY , CL_UNORM_INT16 } ,
{ CL_INTENSITY , CL_HALF_FLOAT } , { CL_INTENSITY , CL_FLOAT } ,
2014-08-08 16:09:29 -04:00
// RGB
{ CL_RGB , CL_UNORM_INT_101010 } ,
// sRGB
2014-07-04 16:17:05 -04:00
{ CL_sRGBA , CL_UNORM_INT8 } ,
2014-08-08 16:09:29 -04:00
// DEPTH
2014-07-04 16:17:05 -04:00
{ CL_DEPTH , CL_UNORM_INT16 } , { CL_DEPTH , CL_FLOAT } ,
} ;
2014-08-08 16:09:29 -04:00
const cl_uint NUM_CHANNEL_ORDER_OF_RGB = 1 ; // The number of channel orders of RGB at the end of the table supportedFormats above and before sRGB and depth.
2014-07-04 16:17:05 -04:00
const cl_uint NUM_CHANNEL_ORDER_OF_sRGB = 1 ; // The number of channel orders of sRGB at the end of the table supportedFormats above and before depth.
const cl_uint NUM_CHANNEL_ORDER_OF_DEPTH = 2 ; // The number of channel orders of DEPTH at the end of the table supportedFormats above.
// definition of list of supported RA formats
cl_image_format
Image : : supportedFormatsRA [ ] = {
{ CL_RA , CL_SNORM_INT8 } , { CL_RA , CL_SNORM_INT16 } ,
{ CL_RA , CL_UNORM_INT8 } , { CL_RA , CL_UNORM_INT16 } ,
{ CL_RA , CL_SIGNED_INT8 } , { CL_RA , CL_SIGNED_INT16 } ,
{ CL_RA , CL_SIGNED_INT32 } , { CL_RA , CL_UNSIGNED_INT8 } ,
{ CL_RA , CL_UNSIGNED_INT16 } , { CL_RA , CL_UNSIGNED_INT32 } ,
{ CL_RA , CL_HALF_FLOAT } , { CL_RA , CL_FLOAT } ,
} ;
cl_image_format depthFormats [ ] = {
//DEPTH
{ CL_DEPTH , CL_FLOAT } , { CL_DEPTH , CL_UNORM_INT16 } ,
//DEPTH STENCIL
{ CL_DEPTH_STENCIL , CL_FLOAT } , { CL_DEPTH_STENCIL , CL_UNORM_INT24 }
} ;
cl_uint
Image : : numSupportedFormats ( const Context & context , cl_mem_object_type image_type , cl_mem_flags flags )
{
const std : : vector < amd : : Device * > & devices = context . devices ( ) ;
cl_uint numFormats = sizeof ( supportedFormats ) / sizeof ( cl_image_format ) ;
bool supportRA = false ;
bool supportDepthsRGB = false ;
// Add RA if RA is supported.
for ( size_t i = 0 ; i < devices . size ( ) ; i + + ) {
if ( devices [ i ] - > settings ( ) . supportRA_ ) {
supportRA = true ;
}
if ( devices [ i ] - > settings ( ) . supportDepthsRGB_ ) {
supportDepthsRGB = true ;
}
}
if ( supportDepthsRGB ) {
if ( ( image_type ! = CL_MEM_OBJECT_IMAGE2D ) & &
( image_type ! = CL_MEM_OBJECT_IMAGE2D_ARRAY ) ) {
numFormats - = NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if ( ( image_type = = CL_MEM_OBJECT_IMAGE1D_BUFFER ) | |
( ( flags & ( CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE ) ) ! = 0 ) ) {
numFormats - = NUM_CHANNEL_ORDER_OF_sRGB ;
}
}
else {
2014-08-08 16:09:29 -04:00
numFormats - = NUM_CHANNEL_ORDER_OF_RGB ; // substract channel order of RGB type.
numFormats - = NUM_CHANNEL_ORDER_OF_sRGB ; // substract channel order of sRGB type.
2014-07-04 16:17:05 -04:00
numFormats - = NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
}
// Add RA if RA is supported. RA isn't supported on SI.
if ( supportRA ) {
numFormats + = sizeof ( supportedFormatsRA ) / sizeof ( cl_image_format ) ; // Add channel order of RA type.
}
return numFormats ;
}
cl_uint
Image : : getSupportedFormats (
const Context & context ,
cl_mem_object_type image_type ,
const cl_uint num_entries ,
cl_image_format * image_formats ,
cl_mem_flags flags )
{
const std : : vector < amd : : Device * > & devices = context . devices ( ) ;
cl_uint numFormats = 0 ;
bool supportRA = false ;
bool supportDepthsRGB = false ;
// Add RA if RA is supported.
for ( size_t i = 0 ; i < devices . size ( ) ; i + + ) {
if ( devices [ i ] - > settings ( ) . supportRA_ ) {
supportRA = true ;
}
if ( devices [ i ] - > settings ( ) . supportDepthsRGB_ ) {
supportDepthsRGB = true ;
}
}
cl_image_format * format = image_formats ;
cl_uint numSupportedFormats = sizeof ( supportedFormats ) / sizeof ( cl_image_format ) ;
bool srgbWriteSupported = true ;
if ( supportDepthsRGB ) {
if ( ( image_type ! = CL_MEM_OBJECT_IMAGE2D ) & &
( image_type ! = CL_MEM_OBJECT_IMAGE2D_ARRAY ) ) {
numSupportedFormats - = NUM_CHANNEL_ORDER_OF_DEPTH ;
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if ( ( image_type = = CL_MEM_OBJECT_IMAGE1D_BUFFER ) | |
2014-10-02 19:24:13 -04:00
( ( flags & ( CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE ) ) ! = 0 ) ) {
2014-07-04 16:17:05 -04:00
srgbWriteSupported = false ;
}
}
else {
2014-08-08 16:09:29 -04:00
numSupportedFormats - = NUM_CHANNEL_ORDER_OF_RGB ; // substract channel order of RGB type.
numSupportedFormats - = NUM_CHANNEL_ORDER_OF_sRGB ; // substract channel order of sRGB type.
numSupportedFormats - = NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
2014-07-04 16:17:05 -04:00
}
for ( size_t i = 0 ; i < numSupportedFormats ; i + + ) {
if ( numFormats = = num_entries )
break ;
if ( ! srgbWriteSupported ) {
if ( ( amd : : Image : : supportedFormats [ i ] . image_channel_order = = CL_sRGBA ) | |
( amd : : Image : : supportedFormats [ i ] . image_channel_order = = CL_sRGB ) | |
( amd : : Image : : supportedFormats [ i ] . image_channel_order = = CL_sRGBx ) | |
( amd : : Image : : supportedFormats [ i ] . image_channel_order = = CL_sBGRA ) ) {
continue ;
}
}
* format + + = amd : : Image : : supportedFormats [ i ] ;
numFormats + + ;
}
// Add RA if RA is supported.
if ( supportRA ) {
for ( size_t i = 0 ; i < sizeof ( supportedFormatsRA ) / sizeof ( cl_image_format ) ; i + + ) {
if ( numFormats = = num_entries )
break ;
* format + + = amd : : Image : : supportedFormatsRA [ i ] ;
numFormats + + ;
}
}
return numFormats ;
}
bool
Image : : Format : : isSupported ( const Context & context , cl_mem_object_type image_type ) const
{
bool supportDepthMSAA = true ;
const std : : vector < amd : : Device * > & devices = context . devices ( ) ;
for ( size_t i = 0 ; i < devices . size ( ) ; i + + ) {
if ( ! devices [ i ] - > settings ( ) . depthMSAAInterop_ ) {
supportDepthMSAA = false ;
}
}
cl_uint numFormats = numSupportedFormats ( context , image_type ) ;
cl_image_format * image_formats = new cl_image_format [ numFormats ] ;
if ( image_formats = = NULL ) {
return false ;
}
getSupportedFormats ( context , image_type , numFormats , image_formats ) ;
for ( cl_uint i = 0 ; i < numFormats ; i + + ) {
if ( * this = = image_formats [ i ] ) {
delete image_formats ;
return true ;
}
}
delete image_formats ;
if ( supportDepthMSAA ) {
for ( cl_uint i = 0 ; i < sizeof ( depthFormats ) / sizeof ( cl_image_format ) ; i + + ) {
if ( * this = = depthFormats [ i ] ) {
return true ;
}
}
}
return false ;
}
Image *
Image : : createView (
const Context & context ,
const Format & format ,
device : : VirtualDevice * vDev )
{
Image * view = NULL ;
// Find the image dimensions and create a corresponding object
view = new ( context ) Image ( format , * this ) ;
// Set GPU virtual device for this view
view - > setVirtualDevice ( vDev ) ;
if ( view ! = NULL ) {
// Initialize view
view - > initDeviceMemory ( ) ;
}
return view ;
}
bool
Image : : isEntirelyCovered ( const Coord3D & origin , const Coord3D & region ) const
{
return ( origin [ 0 ] = = 0 & & origin [ 1 ] = = 0 & & origin [ 2 ] = = 0 & &
region [ 0 ] = = getWidth ( ) & &
region [ 1 ] = = getHeight ( ) & &
region [ 2 ] = = getDepth ( ) ) ? true : false ;
}
bool
Image : : validateRegion ( const Coord3D & origin , const Coord3D & region ) const
{
return ( ( region [ 0 ] > 0 ) & & ( region [ 1 ] > 0 ) & & ( region [ 2 ] > 0 ) & &
( origin [ 0 ] < getWidth ( ) ) & & ( region [ 0 ] ! = 0 ) & &
( origin [ 1 ] < getHeight ( ) ) & & ( region [ 1 ] ! = 0 ) & &
( origin [ 2 ] < getDepth ( ) ) & & ( region [ 2 ] ! = 0 ) & &
( ( origin [ 0 ] + region [ 0 ] ) < = getWidth ( ) ) & &
( ( origin [ 1 ] + region [ 1 ] ) < = getHeight ( ) ) & &
( ( origin [ 2 ] + region [ 2 ] ) < = getDepth ( ) ) ) ? true : false ;
}
bool
Image : : isSliceValid (
const size_t & rowPitch ,
const size_t & slice ,
const size_t & height ) const
{
size_t tmpHeight =
( getType ( ) = = CL_MEM_OBJECT_IMAGE1D_ARRAY ) ? 1 : height ;
return ( ( slice = = 0 ) | |
( ( slice ! = 0 ) & &
( slice > = rowPitch * tmpHeight ) ) ) ? true : false ;
}
void
Image : : copyToBackingStore ( void * initFrom )
{
char * src ;
char * dst = reinterpret_cast < char * > ( getHostMem ( ) ) ;
size_t cpySize = getWidth ( ) * getImageFormat ( ) . getElementSize ( ) ;
for ( uint z = 0 ; z < getDepth ( ) ; + + z ) {
src = reinterpret_cast < char * > ( initFrom ) + z * getSlicePitch ( ) ;
for ( uint y = 0 ; y < getHeight ( ) ; + + y ) {
memcpy ( dst , src , cpySize ) ;
dst + = cpySize ;
src + = getRowPitch ( ) ;
}
}
impl_ . rp_ = cpySize ;
if ( impl_ . sp_ ! = 0 ) {
impl_ . sp_ = impl_ . rp_ ;
if ( getDims ( ) = = 3 ) {
impl_ . sp_ * = getHeight ( ) ;
}
}
}
static int
round_to_even ( float v )
{
// clamp overflow
if ( v > = - ( float ) INT_MIN ) {
return INT_MAX ;
}
if ( v < = ( float ) INT_MIN ) {
return INT_MIN ;
}
static const unsigned int magic [ 2 ] = { 0x4b000000u , 0xcb000000u } ;
// round fractional values to integer value
if ( fabsf ( v ) < * reinterpret_cast < const float * > ( & magic [ 0 ] ) ) {
float magicVal = * reinterpret_cast < const float * > ( & magic [ v < 0.0f ] ) ;
v + = magicVal ;
v - = magicVal ;
}
return static_cast < int > ( v ) ;
}
static uint16_t
float2half_rtz ( float f )
{
union { float f ; cl_uint u ; } u = { f } ;
cl_uint sign = ( u . u > > 16 ) & 0x8000 ;
float x = fabsf ( f ) ;
//Nan
if ( x ! = x ) {
u . u > > = ( 24 - 11 ) ;
u . u & = 0x7fff ;
u . u | = 0x0200 ; //silence the NaN
return u . u | sign ;
}
int values [ 5 ] = { 0x47800000 , 0x33800000 , 0x38800000 , 0x4b800000 , 0x7f800000 } ;
// overflow
if ( x > = * reinterpret_cast < float * > ( & values [ 0 ] ) ) {
if ( x = = * reinterpret_cast < float * > ( & values [ 4 ] ) ) {
return 0x7c00 | sign ;
}
return 0x7bff | sign ;
}
// underflow
if ( x < * reinterpret_cast < float * > ( & values [ 1 ] ) ) {
return sign ; // The halfway case can return 0x0001 or 0. 0 is even.
}
// half denormal
if ( x < * reinterpret_cast < float * > ( & values [ 2 ] ) ) {
x * = * reinterpret_cast < float * > ( & values [ 3 ] ) ;
return static_cast < uint16_t > ( ( int ) x | sign ) ;
}
u . u & = 0xFFFFE000U ;
u . u - = 0x38000000U ;
return ( u . u > > ( 24 - 11 ) ) | sign ;
}
void
Image : : Format : : getChannelOrder ( uint8_t * channelOrder ) const
{
enum { CH_ORDER_R = 0 , CH_ORDER_G , CH_ORDER_B , CH_ORDER_A } ;
switch ( image_channel_order ) {
case CL_A :
channelOrder [ 0 ] = CH_ORDER_A ;
break ;
case CL_RA :
channelOrder [ 0 ] = CH_ORDER_R ;
channelOrder [ 1 ] = CH_ORDER_A ;
break ;
case CL_BGRA :
channelOrder [ 0 ] = CH_ORDER_B ;
channelOrder [ 1 ] = CH_ORDER_G ;
channelOrder [ 2 ] = CH_ORDER_R ;
channelOrder [ 3 ] = CH_ORDER_A ;
break ;
case CL_ARGB :
channelOrder [ 0 ] = CH_ORDER_A ;
channelOrder [ 1 ] = CH_ORDER_R ;
channelOrder [ 2 ] = CH_ORDER_G ;
channelOrder [ 3 ] = CH_ORDER_B ;
break ;
default :
channelOrder [ 0 ] = CH_ORDER_R ;
channelOrder [ 1 ] = CH_ORDER_G ;
channelOrder [ 2 ] = CH_ORDER_B ;
channelOrder [ 3 ] = CH_ORDER_A ;
break ;
}
}
// "colorRGBA" is a four component RGBA floating-point color value if the image
// channel data type is not an unnormalized signed and unsigned integer type,
// is a four component signed integer value if the image channel data type is
// an unnormalized signed integer type and is a four component unsigned integer
// value if the image channel data type is an unormalized unsigned integer type.
void
Image : : Format : : formatColor ( const void * colorRGBA , void * colorFormat ) const
{
union t565 {
struct {
uint16_t r_ : 5 ;
uint16_t g_ : 6 ;
uint16_t b_ : 5 ;
} ;
uint16_t rgba_ ;
} ;
union t555 {
struct {
uint16_t r_ : 5 ;
uint16_t g_ : 5 ;
uint16_t b_ : 5 ;
uint16_t a_ : 1 ;
} ;
uint16_t rgba_ ;
} ;
union t101010 {
struct {
uint32_t b_ : 10 ;
2014-07-21 14:27:24 -04:00
uint32_t g_ : 10 ;
uint32_t r_ : 10 ;
2014-07-04 16:17:05 -04:00
uint32_t a_ : 2 ;
} ;
uint32_t rgba_ ;
} ;
const float * colorRGBAf = reinterpret_cast < const float * > ( colorRGBA ) ;
const int32_t * colorRGBAi = reinterpret_cast < const int32_t * > ( colorRGBA ) ;
const uint32_t * colorRGBAui = reinterpret_cast < const uint32_t * > ( colorRGBA ) ;
size_t chCount = getNumChannels ( ) ;
uint8_t chOrder [ 4 ] ;
getChannelOrder ( chOrder ) ;
bool allChannels = false ;
for ( size_t i = 0 ; i < chCount & & ! allChannels ; + + i ) {
switch ( image_channel_data_type ) {
case CL_SNORM_INT8 : {
int8_t * color = reinterpret_cast < int8_t * > ( colorFormat ) ;
color [ i ] = round_to_even ( INT8_MAX * colorRGBAf [ chOrder [ i ] ] ) ;
}
break ;
case CL_SNORM_INT16 : {
int16_t * color = reinterpret_cast < int16_t * > ( colorFormat ) ;
color [ i ] = round_to_even ( INT16_MAX * colorRGBAf [ chOrder [ i ] ] ) ;
}
break ;
case CL_UNORM_INT8 : {
uint8_t * color = reinterpret_cast < uint8_t * > ( colorFormat ) ;
color [ i ] = round_to_even ( UINT8_MAX * colorRGBAf [ chOrder [ i ] ] ) ;
}
break ;
case CL_UNORM_INT16 : {
uint16_t * color = reinterpret_cast < uint16_t * > ( colorFormat ) ;
color [ i ] = round_to_even ( UINT16_MAX * colorRGBAf [ chOrder [ i ] ] ) ;
}
break ;
case CL_UNORM_SHORT_565 : {
t565 * color = reinterpret_cast < t565 * > ( colorFormat ) ;
color - > r_ = round_to_even ( 0x1F * colorRGBAf [ 0 ] ) ;
color - > g_ = round_to_even ( 0x3F * colorRGBAf [ 1 ] ) ;
color - > b_ = round_to_even ( 0x1F * colorRGBAf [ 2 ] ) ;
allChannels = true ;
}
break ;
case CL_UNORM_SHORT_555 : {
t555 * color = reinterpret_cast < t555 * > ( colorFormat ) ;
color - > r_ = round_to_even ( 0x1F * colorRGBAf [ 0 ] ) ;
color - > g_ = round_to_even ( 0x1F * colorRGBAf [ 1 ] ) ;
color - > b_ = round_to_even ( 0x1F * colorRGBAf [ 2 ] ) ;
color - > a_ = round_to_even ( colorRGBAf [ 3 ] ) ;
allChannels = true ;
}
break ;
case CL_UNORM_INT_101010 : {
t101010 * color = reinterpret_cast < t101010 * > ( colorFormat ) ;
color - > r_ = round_to_even ( 0x3FF * colorRGBAf [ 0 ] ) ;
color - > g_ = round_to_even ( 0x3FF * colorRGBAf [ 1 ] ) ;
color - > b_ = round_to_even ( 0x3FF * colorRGBAf [ 2 ] ) ;
color - > a_ = round_to_even ( 0x3 * colorRGBAf [ 3 ] ) ;
allChannels = true ;
}
break ;
case CL_SIGNED_INT8 : {
int8_t * color = reinterpret_cast < int8_t * > ( colorFormat ) ;
color [ i ] = colorRGBAi [ chOrder [ i ] ] ;
}
break ;
case CL_SIGNED_INT16 : {
int16_t * color = reinterpret_cast < int16_t * > ( colorFormat ) ;
color [ i ] = colorRGBAi [ chOrder [ i ] ] ;
}
break ;
case CL_SIGNED_INT32 : {
int32_t * color = reinterpret_cast < int32_t * > ( colorFormat ) ;
color [ i ] = colorRGBAi [ chOrder [ i ] ] ;
}
break ;
case CL_UNSIGNED_INT8 : {
uint8_t * color = reinterpret_cast < uint8_t * > ( colorFormat ) ;
color [ i ] = colorRGBAui [ chOrder [ i ] ] ;
}
break ;
case CL_UNSIGNED_INT16 : {
uint16_t * color = reinterpret_cast < uint16_t * > ( colorFormat ) ;
color [ i ] = colorRGBAui [ chOrder [ i ] ] ;
}
break ;
case CL_UNSIGNED_INT32 : {
uint32_t * color = reinterpret_cast < uint32_t * > ( colorFormat ) ;
color [ i ] = colorRGBAui [ chOrder [ i ] ] ;
}
break ;
case CL_HALF_FLOAT : {
uint16_t * color = reinterpret_cast < uint16_t * > ( colorFormat ) ;
color [ i ] = float2half_rtz ( colorRGBAf [ chOrder [ i ] ] ) ;
}
break ;
case CL_FLOAT : {
float * color = reinterpret_cast < float * > ( colorFormat ) ;
color [ i ] = colorRGBAf [ chOrder [ i ] ] ;
}
break ;
}
}
}
std : : map < uintptr_t , uintptr_t > SvmBuffer : : Allocated_ ;
Monitor SvmBuffer : : AllocatedLock_ ( " Guards SVM allocation list " ) ;
void
SvmBuffer : : Add ( uintptr_t k , uintptr_t v )
{
ScopedLock lock ( AllocatedLock_ ) ;
Allocated_ . insert ( std : : pair < uintptr_t , uintptr_t > ( k , v ) ) ;
}
void
SvmBuffer : : Remove ( uintptr_t k )
{
ScopedLock lock ( AllocatedLock_ ) ;
Allocated_ . erase ( k ) ;
}
bool
SvmBuffer : : Contains ( uintptr_t ptr )
{
ScopedLock lock ( AllocatedLock_ ) ;
std : : map < uintptr_t , uintptr_t > : : iterator it = Allocated_ . upper_bound ( ptr ) ;
if ( it = = Allocated_ . begin ( ) ) {
return false ;
}
- - it ;
return ptr > = it - > first & & ptr < it - > second ;
}
// The allocation flags are ignored for now.
void *
SvmBuffer : : malloc (
Context & context ,
cl_svm_mem_flags flags ,
size_t size ,
size_t alignment )
{
bool atomics = ( flags & CL_MEM_SVM_ATOMICS ) ! = 0 ;
void * ret = context . svmAlloc ( size , alignment , flags ) ;
if ( ret = = NULL ) {
LogError ( " Unable to allocate aligned memory " ) ;
return NULL ;
}
uintptr_t ret_u = reinterpret_cast < uintptr_t > ( ret ) ;
Add ( ret_u , ret_u + size ) ;
return ret ;
}
void
SvmBuffer : : free ( Context & context , void * ptr )
{
Remove ( reinterpret_cast < uintptr_t > ( ptr ) ) ;
context . svmFree ( ptr ) ;
}
void
SvmBuffer : : memFill (
void * dst ,
const void * src ,
size_t srcSize ,
size_t times )
{
address dstAddress = reinterpret_cast < address > ( dst ) ;
const_address srcAddress = reinterpret_cast < const_address > ( src ) ;
for ( size_t i = 0 ; i < times ; i + + ) {
: : memcpy ( dstAddress + i * srcSize , srcAddress , srcSize ) ;
}
}
bool SvmBuffer : : malloced ( const void * ptr )
{
return Contains ( reinterpret_cast < uintptr_t > ( ptr ) ) ;
}
} // namespace amd