2014-07-04 16:17:05 -04:00
//
// Copyright 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#include "amdocl/cl_common.hpp"
#include "os/alloc.hpp"
#include "platform/context.hpp"
#include "platform/object.hpp"
#include "platform/memory.hpp"
#include "device/device.hpp"
namespace amd {
bool
BufferRect :: create (
const size_t * bufferOrigin ,
const size_t * region ,
size_t bufferRowPitch ,
size_t bufferSlicePitch )
{
bool valid = false ;
// Find the buffer's row pitch
rowPitch_ = ( bufferRowPitch != 0 ) ? bufferRowPitch : region [ 0 ];
// Find the buffer's slice pitch
slicePitch_ = ( bufferSlicePitch != 0 ) ? bufferSlicePitch :
rowPitch_ * region [ 1 ];
// Find the region start offset
start_ = bufferOrigin [ 2 ] * slicePitch_ +
bufferOrigin [ 1 ] * rowPitch_ + bufferOrigin [ 0 ];
// Find the region relative end offset
end_ = ( region [ 2 ] - 1 ) * slicePitch_ + ( region [ 1 ] - 1 ) * rowPitch_ + region [ 0 ];
// Make sure we have a valid region
if (( rowPitch_ >= region [ 0 ]) &&
( slicePitch_ >= ( region [ 1 ] * rowPitch_ )) &&
(( slicePitch_ % rowPitch_ ) == 0 )) {
valid = true ;
}
return valid ;
}
bool
HostMemoryReference :: allocateMemory ( size_t size , const Context & context ) {
assert ( ! alloced_ && "Runtime should not reallocate system memory!" );
size_t memoryAlignment = ( CPU_MEMORY_ALIGNMENT_SIZE <= 0 ) ? 256 : CPU_MEMORY_ALIGNMENT_SIZE ;
size_ = amd :: alignUp ( size , memoryAlignment );
//! \note memory size must be aligned for CAL pinning
hostMem_ = CPU_MEMORY_GUARD_PAGES
? GuardedMemory :: allocate ( size_ , MEMOBJ_BASE_ADDR_ALIGN , CPU_MEMORY_GUARD_PAGE_SIZE * Ki )
: context . hostAlloc ( size_ , MEMOBJ_BASE_ADDR_ALIGN );
alloced_ = ( hostMem_ != NULL );
return alloced_ ;
}
// Frees system memory if it was allocated
void
HostMemoryReference :: deallocateMemory ( const Context & context )
{
if ( alloced_ ) {
if ( CPU_MEMORY_GUARD_PAGES ) GuardedMemory :: deallocate ( hostMem_ );
else context . hostFree ( hostMem_ );
size_ = 0 ;
alloced_ = false ;
hostMem_ = NULL ;
}
}
Memory :: Memory (
Context & context ,
Type type ,
Flags flags ,
size_t size ,
void * svmPtr )
: numDevices_ ( 0 )
, deviceMemories_ ( NULL )
, destructorCallbacks_ ( NULL )
, context_ ( context )
, parent_ ( NULL )
, type_ ( type )
, hostMemRef_ ( NULL )
, origin_ ( 0 )
, size_ ( size )
, flags_ ( flags )
, version_ ( 0 )
, lastWriter_ ( NULL )
, interopObj_ ( NULL )
, isParent_ ( false )
, vDev_ ( NULL )
, forceSysMemAlloc_ ( false )
, svmHostAddress_ ( svmPtr )
, svmPtrCommited_ ( false )
, canBeCached_ ( true )
, lockMemoryOps_ ( "Memory Ops Lock" , true )
{
2014-09-29 17:38:55 -04:00
std :: atomic_init ( & mapCount_ , 0u );
2014-07-04 16:17:05 -04:00
}
Memory :: Memory (
Memory & parent ,
Flags flags ,
size_t origin ,
size_t size ,
Type type )
: numDevices_ ( 0 )
, deviceMemories_ ( NULL )
, destructorCallbacks_ ( NULL )
, context_ ( parent . getContext ())
, parent_ ( & parent )
, type_ (( type == 0 ) ? parent . type_ : type )
, hostMemRef_ ( NULL )
, origin_ ( origin )
, size_ ( size )
, flags_ ( flags )
, version_ ( parent . getVersion ())
, lastWriter_ ( parent . getLastWriter ())
, interopObj_ ( parent . getInteropObj ())
, isParent_ ( false )
, vDev_ ( NULL )
, forceSysMemAlloc_ ( false )
, svmHostAddress_ ( parent . getSvmPtr ())
, svmPtrCommited_ ( parent . isSvmPtrCommited ())
, canBeCached_ ( true )
, lockMemoryOps_ ( "Memory Ops Lock" , true )
{
parent_ -> retain ();
parent_ -> isParent_ = true ;
// Inherit memory flags from the parent
if (( flags_ & ( CL_MEM_READ_WRITE | CL_MEM_READ_ONLY |
CL_MEM_WRITE_ONLY )) == 0 ) {
flags_ |= parent_ -> getMemFlags () &
( CL_MEM_READ_WRITE | CL_MEM_READ_ONLY | CL_MEM_WRITE_ONLY );
}
flags_ |= parent_ -> getMemFlags () &
( CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR );
if (( flags_ & ( CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
CL_MEM_HOST_NO_ACCESS )) == 0 ) {
flags_ |= parent_ -> getMemFlags () &
( CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
CL_MEM_HOST_NO_ACCESS );
}
2014-09-29 17:38:55 -04:00
std :: atomic_init ( & mapCount_ , 0u );
2014-07-04 16:17:05 -04:00
}
void
Memory :: initDeviceMemory ()
{
deviceMemories_ = reinterpret_cast < DeviceMemory *> (
reinterpret_cast < char *> ( this ) + sizeof ( Memory ));
memset ( deviceMemories_ , 0 ,
context_ (). devices (). size () * sizeof ( DeviceMemory ));
}
void *
Memory :: operator new ( size_t size , const Context & context )
{
return RuntimeObject :: operator new (
size + context . devices (). size () * sizeof ( DeviceMemory ));
}
void
Memory :: operator delete ( void * p )
{
RuntimeObject :: operator delete ( p );
}
void
Memory :: operator delete ( void * p , const Context & context )
{
Memory :: operator delete ( p );
}
void
Memory :: addSubBuffer ( Memory * view )
{
amd :: ScopedLock lock ( lockMemoryOps ());
subBuffers_ . push_back ( view );
}
void
Memory :: removeSubBuffer ( Memory * view )
{
amd :: ScopedLock lock ( lockMemoryOps ());
subBuffers_ . remove ( view );
}
bool
Memory :: allocHostMemory ( void * initFrom , bool allocHostMem , bool forceCopy )
{
// Sanity checks (the parameters should have been prevalidated by the API)
assert ( ! ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR ) &&
( initFrom == NULL ) && ! allocHostMem && ! isSvmPtrCommited ()));
assert ( ! (( initFrom != NULL ) && ! forceCopy &&
! ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR |
CL_MEM_EXTERNAL_PHYSICAL_AMD ))));
assert ( ! ( flags_ & CL_MEM_COPY_HOST_PTR && flags_ & CL_MEM_USE_HOST_PTR ));
const std :: vector < Device *>& devices = context_ (). devices ();
// Find if a non GPU device was created with the context
for ( size_t i = 0 ; i < devices . size (); i ++ ) {
if ( ! ( devices [ i ] -> info (). type_ & CL_DEVICE_TYPE_GPU )) {
allocHostMem = true ;
break ;
}
}
// This allocation is necessary to use coherency mechanism
// for the initialization
if ( getMemFlags () & ( CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR )) {
allocHostMem = true ;
}
// Did application request to use host memory?
if ( getMemFlags () & CL_MEM_USE_HOST_PTR ) {
setHostMem ( initFrom );
// Recalculate image size according to pitch
Image * image = asImage ();
if ( image != NULL ) {
if ( image -> getDims () < 3 ) {
size_ = image -> getRowPitch () * image -> getHeight ();
}
else {
size_ = image -> getSlicePitch () * image -> getDepth ();
}
}
}
// Allocate host memory buffer if needed
else if ( allocHostMem && ! isInterop ()) {
if ( ! hostMemRef_ . allocateMemory ( size_ , context_ ())) {
return false ;
}
// Copy data to the backing store if the app has requested
if ((( flags_ & CL_MEM_COPY_HOST_PTR ) || forceCopy ) && ( initFrom != NULL )) {
copyToBackingStore ( initFrom );
}
}
if ( allocHostMem && type_ == CL_MEM_OBJECT_PIPE )
{
// Initialize the pipe for a CPU device
clk_pipe_t * pipe = reinterpret_cast < clk_pipe_t *> ( getHostMem ());
pipe -> read_idx = 0 ;
pipe -> write_idx = 0 ;
pipe -> end_idx = asPipe () -> getMaxNumPackets ();
}
if ( flags_ & ( CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR )) {
// Signal write, so coherency mechanism will initialize
// memory on all devices
signalWrite ( NULL );
}
return true ;
}
bool
Memory :: create ( void * initFrom , bool sysMemAlloc )
{
static const bool forceAllocHostMem = false ;
initDeviceMemory ();
// Check if it's a subbuffer allocation
if ( parent_ != NULL ) {
// Find host memory pointer for subbuffer
if ( parent_ -> getHostMem () != NULL ) {
setHostMem (( address ) parent_ -> getHostMem () + origin_ );
}
// Add a new subbuffer to the list
parent_ -> addSubBuffer ( this );
}
// Allocate host memory if requested
else if ( ! allocHostMemory ( initFrom , forceAllocHostMem )) {
return false ;
}
bool ok = true ;
const std :: vector < Device *>& devices = context_ (). devices ();
// Create memory on all available devices
for ( size_t i = 0 ; ok && i < devices . size (); i ++ ) {
deviceAlloced_ [ devices [ i ]] = AllocInit ;
// Only GPU devices have device memory objects
if ( devices [ i ] -> info (). type_ & CL_DEVICE_TYPE_GPU ) {
deviceMemories_ [ i ]. ref_ = devices [ i ];
deviceMemories_ [ i ]. value_ = NULL ;
}
}
// Forces system memory allocation on the device,
// instead of device memory
forceSysMemAlloc_ = sysMemAlloc ;
return ok ;
}
bool
Memory :: addDeviceMemory ( const Device * dev )
{
bool result = false ;
AllocState create = AllocCreate ;
AllocState init = AllocInit ;
if ( make_atomic ( deviceAlloced_ [ dev ]). compareAndSet ( init , create )) {
device :: Memory * dm = dev -> createMemory ( * this );
// Add the new memory allocation to the device map
if ( NULL != dm ) {
deviceMemories_ [ numDevices_ ]. ref_ = dev ;
deviceMemories_ [ numDevices_ ]. value_ = dm ;
numDevices_ ++ ;
assert (( numDevices () <= context_ (). devices (). size ())
&& "Too many device objects" );
// Mark the allocation with the complete flag
deviceAlloced_ [ dev ] = AllocComplete ;
}
else {
// Mark the allocation as an empty
deviceAlloced_ [ dev ] = AllocInit ;
}
}
// Make sure runtime finished memory allocation.
// Loop if in the create state
while ( deviceAlloced_ [ dev ] == AllocCreate ) {
Os :: yield ();
}
if ( deviceAlloced_ [ dev ] == AllocComplete ) {
result = true ;
}
return result ;
}
void
Memory :: replaceDeviceMemory ( const Device * dev , device :: Memory * dm )
{
uint i ;
for ( i = 0 ; i < numDevices_ ; ++ i ) {
if ( deviceMemories_ [ i ]. ref_ == dev ) {
delete deviceMemories_ [ i ]. value_ ;
break ;
}
}
if ( numDevices_ == 0 ) {
++ numDevices_ ;
deviceMemories_ [ 0 ]. ref_ = dev ;
}
deviceMemories_ [ i ]. value_ = dm ;
deviceAlloced_ [ dev ] = AllocRealloced ;
}
device :: Memory *
Memory :: getDeviceMemory ( const Device & dev , bool alloc )
{
device :: Memory * dm = NULL ;
for ( uint i = 0 ; i < numDevices_ ; ++ i ) {
if ( deviceMemories_ [ i ]. ref_ == & dev ) {
dm = deviceMemories_ [ i ]. value_ ;
break ;
}
}
if (( NULL == dm ) && alloc ) {
if ( ! addDeviceMemory ( & dev )) {
LogError ( "Video memory allocation failed!" );
return NULL ;
}
dm = deviceMemories_ [ numDevices () - 1 ]. value_ ;
}
return dm ;
}
Memory ::~ Memory ()
{
// For_each destructor callback:
DestructorCallBackEntry * entry ;
for ( entry = destructorCallbacks_ ; entry != NULL ; entry = entry -> next_ ) {
// invoke the callback function.
entry -> callback_ ( const_cast < cl_mem > ( as_cl ( this )), entry -> data_ );
}
// Release the parent.
if ( NULL != parent_ ) {
// Update cache if runtime destroys a subbuffer
if ( NULL != parent_ -> getHostMem ()) {
cacheWriteBack ();
}
parent_ -> removeSubBuffer ( this );
}
if ( NULL != deviceMemories_ ) {
// Destroy all device memory objects
for ( uint i = 0 ; i < numDevices_ ; ++ i ) {
delete deviceMemories_ [ i ]. value_ ;
}
}
// Sanity check
if ( subBuffers_ . size () != 0 ) {
LogError ( "Can't have views if parent is destroyed!" );
}
// Destroy the destructor callback entries
DestructorCallBackEntry * callback = destructorCallbacks_ ;
while ( callback != NULL ) {
DestructorCallBackEntry * next = callback -> next_ ;
delete callback ;
callback = next ;
}
// Make sure runtime destroys the parent only after subbuffer destruction
if ( NULL != parent_ ) {
parent_ -> release ();
}
hostMemRef_ . deallocateMemory ( context_ ());
}
bool
Memory :: setDestructorCallback ( DestructorCallBackFunction callback , void * data )
{
DestructorCallBackEntry * entry = new DestructorCallBackEntry ( callback , data );
if ( entry == NULL ) {
return false ;
}
entry -> next_ = destructorCallbacks_ ;
2014-09-30 17:21:19 -04:00
while ( ! destructorCallbacks_ . compare_exchange_weak ( entry -> next_ , entry ))
; // Someone else is also updating the head of the linked list! reload.
2014-07-04 16:17:05 -04:00
return true ;
}
void
Memory :: signalWrite ( const Device * writer )
{
// (the potential race condition below doesn't matter, no critical
// section needed)
++ version_ ;
lastWriter_ = writer ;
}
void
Memory :: cacheWriteBack ()
{
if ( NULL != lastWriter_ ) {
device :: Memory * dmem = getDeviceMemory ( * lastWriter_ );
dmem -> syncHostFromCache ();
}
else if ( isParent ()) {
// On CPU parent can't be synchronized, because lastWriter_ could be NULL
// and syncHostFromCache() won't be called.
for ( uint i = 0 ; i < numDevices_ ; ++ i ) {
deviceMemories_ [ i ]. value_ -> syncHostFromCache ();
}
}
}
void
Memory :: copyToBackingStore ( void * initFrom )
{
memcpy ( getHostMem (), initFrom , size_ );
}
bool
Memory :: usesSvmPointer () const
{
if ( ! ( flags_ & CL_MEM_USE_HOST_PTR )) {
return false ;
}
// If the application host pointer lies within a SVM region, so does the
// sub-buffer host pointer - so the following check works in both cases
return ( SvmBuffer :: malloced ( getHostMem ()) || NULL != svmHostAddress_ );
}
void
Memory :: commitSvmMemory ()
{
ScopedLock lock ( lockMemoryOps_ );
if ( ! svmPtrCommited_ ) {
amd :: Os :: commitMemory ( svmHostAddress_ , size_ , amd :: Os :: MEM_PROT_RW );
svmPtrCommited_ = true ;
}
}
void
Buffer :: initDeviceMemory ()
{
deviceMemories_ = reinterpret_cast < DeviceMemory *> (
reinterpret_cast < char *> ( this ) + sizeof ( Buffer ));
memset ( deviceMemories_ , 0 ,
context_ (). devices (). size () * sizeof ( DeviceMemory ));
}
bool
Buffer :: create ( void * initFrom , bool sysMemAlloc )
{
if (( getMemFlags () & CL_MEM_EXTERNAL_PHYSICAL_AMD ) && ( initFrom != NULL )) {
busAddress_ = * ( reinterpret_cast < cl_bus_address_amd *> ( initFrom ));
initFrom = NULL ;
}
else {
busAddress_ . surface_bus_address = 0 ;
busAddress_ . marker_bus_address = 0 ;
}
return Memory :: create ( initFrom , sysMemAlloc );
}
bool
Buffer :: isEntirelyCovered ( const Coord3D & origin , const Coord3D & region ) const
{
return (( origin [ 0 ] == 0 ) && ( region [ 0 ] == getSize ())) ? true : false ;
}
bool
Buffer :: validateRegion ( const Coord3D & origin , const Coord3D & region ) const
{
return (( region [ 0 ] > 0 ) &&
( origin [ 0 ] < getSize ()) &&
(( origin [ 0 ] + region [ 0 ]) <= getSize ())) ? true : false ;
}
void
Pipe :: initDeviceMemory ()
{
deviceMemories_ = reinterpret_cast < DeviceMemory *> (
reinterpret_cast < char *> ( this ) + sizeof ( Pipe ));
memset ( deviceMemories_ , 0 ,
context_ (). devices (). size () * sizeof ( DeviceMemory ));
}
Image :: Image (
const Format & format ,
Image & parent ) :
Memory ( parent , 0 , 0 , parent . getWidth () * parent . getHeight () * parent . getDepth () * format . getElementSize ()) ,
impl_ ( format , Coord3D ( parent . getWidth () * parent . getImageFormat (). getElementSize () / format . getElementSize (), parent . getHeight (), parent . getDepth ()), parent . getRowPitch (), parent . getSlicePitch (), parent . getBytePitch ())
{
initDimension ();
}
Image :: Image (
Context & context ,
Type type ,
Flags flags ,
const Format & format ,
size_t width ,
size_t height ,
size_t depth ,
size_t rowPitch ,
size_t slicePitch ) :
Memory ( context , type , flags ,
width * height * depth * format . getElementSize ()) ,
impl_ ( format , Coord3D ( width , height , depth ), rowPitch , slicePitch )
{
initDimension ();
}
Image :: Image (
Buffer & buffer ,
Type type ,
Flags flags ,
const Format & format ,
size_t width ,
size_t height ,
size_t depth ,
size_t rowPitch ,
size_t slicePitch ) :
Memory ( buffer , flags , 0 ,
buffer . getSize (), type ) ,
impl_ ( format , Coord3D ( width , height , depth ), rowPitch , slicePitch )
{
initDimension ();
}
bool
Image :: validateDimensions (
const std :: vector < amd :: Device *>& devices ,
cl_mem_object_type type ,
size_t width ,
size_t height ,
size_t depth ,
size_t arraySize )
{
std :: vector < amd :: Device *>:: const_iterator it ;
bool sizePass = false ;
switch ( type ) {
case CL_MEM_OBJECT_IMAGE3D :
if (( width == 0 ) || ( height == 0 ) || ( depth < 1 )) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if ((( * it ) -> info (). image3DMaxWidth_ >= width ) &&
(( * it ) -> info (). image3DMaxHeight_ >= height ) &&
(( * it ) -> info (). image3DMaxDepth_ >= depth )) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE2D_ARRAY :
if ( arraySize == 0 ) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if (( * it ) -> info (). imageMaxArraySize_ >= arraySize ) {
sizePass = true ;
break ;
}
}
if ( ! sizePass ) {
return false ;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE2D :
if (( width == 0 ) || ( height == 0 )) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if ((( * it ) -> info (). image2DMaxHeight_ >= height ) &&
(( * it ) -> info (). image2DMaxWidth_ >= width )) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE1D_ARRAY :
if ( arraySize == 0 ) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if (( * it ) -> info (). imageMaxArraySize_ >= arraySize ) {
sizePass = true ;
break ;
}
}
if ( ! sizePass ) {
return false ;
}
// Fall through...
case CL_MEM_OBJECT_IMAGE1D :
if ( width == 0 ) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if (( * it ) -> info (). image2DMaxWidth_ >= width ) {
return true ;
}
}
break ;
case CL_MEM_OBJECT_IMAGE1D_BUFFER :
if ( width == 0 ) {
return false ;
}
for ( it = devices . begin (); it != devices . end (); ++ it ) {
if (( * it ) -> info (). imageMaxBufferSize_ >= width ) {
return true ;
}
}
break ;
default :
break ;
}
return false ;
}
void
Image :: initDimension ()
{
const size_t elemSize = impl_ . format_ . getElementSize ();
if ( impl_ . rp_ == 0 ) {
impl_ . rp_ = impl_ . region_ [ 0 ] * elemSize ;
}
switch ( type_ ) {
case CL_MEM_OBJECT_IMAGE3D :
case CL_MEM_OBJECT_IMAGE2D_ARRAY :
dim_ = 3 ;
if ( impl_ . sp_ == 0 ) {
impl_ . sp_ = impl_ . region_ [ 0 ] * impl_ . region_ [ 1 ] * elemSize ;
}
break ;
case CL_MEM_OBJECT_IMAGE2D :
case CL_MEM_OBJECT_IMAGE1D_ARRAY :
dim_ = 2 ;
if (( impl_ . sp_ == 0 ) &&
( type_ == CL_MEM_OBJECT_IMAGE1D_ARRAY )) {
impl_ . sp_ = impl_ . rp_ ;
}
break ;
case CL_MEM_OBJECT_IMAGE1D :
case CL_MEM_OBJECT_IMAGE1D_BUFFER :
default :
dim_ = 1 ;
break ;
}
}
void
Image :: initDeviceMemory ()
{
deviceMemories_ = reinterpret_cast < DeviceMemory *> (
reinterpret_cast < char *> ( this ) + sizeof ( Image ));
memset ( deviceMemories_ , 0 ,
context_ (). devices (). size () * sizeof ( DeviceMemory ));
}
bool
Image :: create ( void * initFrom )
{
return Memory :: create ( initFrom );
}
size_t
Image :: Format :: getNumChannels () const
{
switch ( image_channel_order )
{
case CL_RG :
case CL_RA :
return 2 ;
case CL_RGB :
case CL_sRGB :
case CL_sRGBx :
return 3 ;
case CL_RGBA :
case CL_BGRA :
case CL_ARGB :
case CL_sRGBA :
case CL_sBGRA :
return 4 ;
}
return 1 ;
}
size_t
Image :: Format :: getElementSize () const
{
size_t bytesPerPixel = getNumChannels ();
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_UNORM_INT8 :
case CL_SIGNED_INT8 :
case CL_UNSIGNED_INT8 :
break ;
case CL_UNORM_INT_101010 :
2014-07-21 14:27:24 -04:00
bytesPerPixel = 4 ;
break ;
2014-07-04 16:17:05 -04:00
case CL_SIGNED_INT32 :
case CL_UNSIGNED_INT32 :
case CL_FLOAT :
bytesPerPixel *= 4 ;
break ;
default :
bytesPerPixel *= 2 ;
break ;
}
return bytesPerPixel ;
}
bool
Image :: Format :: isValid () const
{
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_SNORM_INT16 :
case CL_UNORM_INT8 :
case CL_UNORM_INT16 :
case CL_UNORM_SHORT_565 :
case CL_UNORM_SHORT_555 :
case CL_UNORM_INT_101010 :
case CL_SIGNED_INT8 :
case CL_SIGNED_INT16 :
case CL_SIGNED_INT32 :
case CL_UNSIGNED_INT8 :
case CL_UNSIGNED_INT16 :
case CL_UNSIGNED_INT32 :
case CL_HALF_FLOAT :
case CL_FLOAT :
break ;
default :
return false ;
}
switch ( image_channel_order )
{
case CL_R :
case CL_A :
case CL_RG :
case CL_RA :
case CL_RGBA :
break ;
case CL_INTENSITY :
case CL_LUMINANCE :
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_SNORM_INT16 :
case CL_UNORM_INT8 :
case CL_UNORM_INT16 :
case CL_HALF_FLOAT :
case CL_FLOAT :
break ;
default :
return false ;
}
break ;
case CL_RGB :
switch ( image_channel_data_type )
{
case CL_UNORM_SHORT_565 :
case CL_UNORM_SHORT_555 :
case CL_UNORM_INT_101010 :
break ;
default :
return false ;
}
break ;
case CL_BGRA :
case CL_ARGB :
switch ( image_channel_data_type )
{
case CL_SNORM_INT8 :
case CL_UNORM_INT8 :
case CL_SIGNED_INT8 :
case CL_UNSIGNED_INT8 :
break ;
default :
return false ;
}
break ;
case CL_sRGB :
case CL_sRGBx :
case CL_sRGBA :
case CL_sBGRA :
switch ( image_channel_data_type )
{
case CL_UNORM_INT8 :
break ;
default :
return false ;
}
break ;
case CL_DEPTH :
switch ( image_channel_data_type )
{
case CL_UNORM_INT16 :
case CL_FLOAT :
break ;
default :
return false ;
}
break ;
default :
return false ;
}
return true ;
}
// definition of list of supported formats
cl_image_format
Image :: supportedFormats [] = {
// R
{ CL_R , CL_SNORM_INT8 }, { CL_R , CL_SNORM_INT16 },
{ CL_R , CL_UNORM_INT8 }, { CL_R , CL_UNORM_INT16 },
{ CL_R , CL_SIGNED_INT8 }, { CL_R , CL_SIGNED_INT16 },
{ CL_R , CL_SIGNED_INT32 }, { CL_R , CL_UNSIGNED_INT8 },
{ CL_R , CL_UNSIGNED_INT16 }, { CL_R , CL_UNSIGNED_INT32 },
{ CL_R , CL_HALF_FLOAT }, { CL_R , CL_FLOAT },
// A
{ CL_A , CL_SNORM_INT8 }, { CL_A , CL_SNORM_INT16 },
{ CL_A , CL_UNORM_INT8 }, { CL_A , CL_UNORM_INT16 },
{ CL_A , CL_SIGNED_INT8 }, { CL_A , CL_SIGNED_INT16 },
{ CL_A , CL_SIGNED_INT32 }, { CL_A , CL_UNSIGNED_INT8 },
{ CL_A , CL_UNSIGNED_INT16 }, { CL_A , CL_UNSIGNED_INT32 },
{ CL_A , CL_HALF_FLOAT }, { CL_A , CL_FLOAT },
// RG
{ CL_RG , CL_SNORM_INT8 }, { CL_RG , CL_SNORM_INT16 },
{ CL_RG , CL_UNORM_INT8 }, { CL_RG , CL_UNORM_INT16 },
{ CL_RG , CL_SIGNED_INT8 }, { CL_RG , CL_SIGNED_INT16 },
{ CL_RG , CL_SIGNED_INT32 }, { CL_RG , CL_UNSIGNED_INT8 },
{ CL_RG , CL_UNSIGNED_INT16 }, { CL_RG , CL_UNSIGNED_INT32 },
{ CL_RG , CL_HALF_FLOAT }, { CL_RG , CL_FLOAT },
// RGBA
{ CL_RGBA , CL_SNORM_INT8 }, { CL_RGBA , CL_SNORM_INT16 },
{ CL_RGBA , CL_UNORM_INT8 }, { CL_RGBA , CL_UNORM_INT16 },
{ CL_RGBA , CL_SIGNED_INT8 }, { CL_RGBA , CL_SIGNED_INT16 },
{ CL_RGBA , CL_SIGNED_INT32 }, { CL_RGBA , CL_UNSIGNED_INT8 },
{ CL_RGBA , CL_UNSIGNED_INT16 }, { CL_RGBA , CL_UNSIGNED_INT32 },
{ CL_RGBA , CL_HALF_FLOAT }, { CL_RGBA , CL_FLOAT },
// ARGB
{ CL_ARGB , CL_SNORM_INT8 }, { CL_ARGB , CL_UNORM_INT8 },
{ CL_ARGB , CL_SIGNED_INT8 }, { CL_ARGB , CL_UNSIGNED_INT8 },
// BGRA
{ CL_BGRA , CL_SNORM_INT8 }, { CL_BGRA , CL_UNORM_INT8 },
{ CL_BGRA , CL_SIGNED_INT8 }, { CL_BGRA , CL_UNSIGNED_INT8 },
// LUMINANCE
{ CL_LUMINANCE , CL_SNORM_INT8 }, { CL_LUMINANCE , CL_SNORM_INT16 },
{ CL_LUMINANCE , CL_UNORM_INT8 }, { CL_LUMINANCE , CL_UNORM_INT16 },
{ CL_LUMINANCE , CL_HALF_FLOAT }, { CL_LUMINANCE , CL_FLOAT },
// INTENSITY
{ CL_INTENSITY , CL_SNORM_INT8 }, { CL_INTENSITY , CL_SNORM_INT16 },
{ CL_INTENSITY , CL_UNORM_INT8 }, { CL_INTENSITY , CL_UNORM_INT16 },
{ CL_INTENSITY , CL_HALF_FLOAT }, { CL_INTENSITY , CL_FLOAT },
2014-08-08 16:09:29 -04:00
// RGB
{ CL_RGB , CL_UNORM_INT_101010 },
// sRGB
2014-07-04 16:17:05 -04:00
{ CL_sRGBA , CL_UNORM_INT8 },
2014-08-08 16:09:29 -04:00
// DEPTH
2014-07-04 16:17:05 -04:00
{ CL_DEPTH , CL_UNORM_INT16 }, { CL_DEPTH , CL_FLOAT },
};
2014-08-08 16:09:29 -04:00
const cl_uint NUM_CHANNEL_ORDER_OF_RGB = 1 ; // The number of channel orders of RGB at the end of the table supportedFormats above and before sRGB and depth.
2014-07-04 16:17:05 -04:00
const cl_uint NUM_CHANNEL_ORDER_OF_sRGB = 1 ; // The number of channel orders of sRGB at the end of the table supportedFormats above and before depth.
const cl_uint NUM_CHANNEL_ORDER_OF_DEPTH = 2 ; // The number of channel orders of DEPTH at the end of the table supportedFormats above.
// definition of list of supported RA formats
cl_image_format
Image :: supportedFormatsRA [] = {
{ CL_RA , CL_SNORM_INT8 }, { CL_RA , CL_SNORM_INT16 },
{ CL_RA , CL_UNORM_INT8 }, { CL_RA , CL_UNORM_INT16 },
{ CL_RA , CL_SIGNED_INT8 }, { CL_RA , CL_SIGNED_INT16 },
{ CL_RA , CL_SIGNED_INT32 }, { CL_RA , CL_UNSIGNED_INT8 },
{ CL_RA , CL_UNSIGNED_INT16 }, { CL_RA , CL_UNSIGNED_INT32 },
{ CL_RA , CL_HALF_FLOAT }, { CL_RA , CL_FLOAT },
};
cl_image_format depthFormats [] = {
//DEPTH
{ CL_DEPTH , CL_FLOAT }, { CL_DEPTH , CL_UNORM_INT16 },
//DEPTH STENCIL
{ CL_DEPTH_STENCIL , CL_FLOAT }, { CL_DEPTH_STENCIL , CL_UNORM_INT24 }
};
cl_uint
Image :: numSupportedFormats ( const Context & context , cl_mem_object_type image_type , cl_mem_flags flags )
{
const std :: vector < amd :: Device *>& devices = context . devices ();
cl_uint numFormats = sizeof ( supportedFormats ) / sizeof ( cl_image_format );
bool supportRA = false ;
bool supportDepthsRGB = false ;
// Add RA if RA is supported.
for ( size_t i = 0 ; i < devices . size (); i ++ ) {
if ( devices [ i ] -> settings (). supportRA_ ) {
supportRA = true ;
}
if ( devices [ i ] -> settings (). supportDepthsRGB_ ) {
supportDepthsRGB = true ;
}
}
if ( supportDepthsRGB ) {
if (( image_type != CL_MEM_OBJECT_IMAGE2D ) &&
( image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY )) {
numFormats -= NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if (( image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ) ||
(( flags & ( CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE )) != 0 )) {
numFormats -= NUM_CHANNEL_ORDER_OF_sRGB ;
}
}
else {
2014-08-08 16:09:29 -04:00
numFormats -= NUM_CHANNEL_ORDER_OF_RGB ; // substract channel order of RGB type.
numFormats -= NUM_CHANNEL_ORDER_OF_sRGB ; // substract channel order of sRGB type.
2014-07-04 16:17:05 -04:00
numFormats -= NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
}
// Add RA if RA is supported. RA isn't supported on SI.
if ( supportRA ) {
numFormats += sizeof ( supportedFormatsRA ) / sizeof ( cl_image_format ); // Add channel order of RA type.
}
return numFormats ;
}
cl_uint
Image :: getSupportedFormats (
const Context & context ,
cl_mem_object_type image_type ,
const cl_uint num_entries ,
cl_image_format * image_formats ,
cl_mem_flags flags )
{
const std :: vector < amd :: Device *>& devices = context . devices ();
cl_uint numFormats = 0 ;
bool supportRA = false ;
bool supportDepthsRGB = false ;
// Add RA if RA is supported.
for ( size_t i = 0 ; i < devices . size (); i ++ ) {
if ( devices [ i ] -> settings (). supportRA_ ) {
supportRA = true ;
}
if ( devices [ i ] -> settings (). supportDepthsRGB_ ) {
supportDepthsRGB = true ;
}
}
cl_image_format * format = image_formats ;
cl_uint numSupportedFormats = sizeof ( supportedFormats ) / sizeof ( cl_image_format );
bool srgbWriteSupported = true ;
if ( supportDepthsRGB ) {
if (( image_type != CL_MEM_OBJECT_IMAGE2D ) &&
( image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY )) {
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_DEPTH ;
}
// Currently we are not supported sRGB for write_imagef (extension cl_khr_srgb_image_writes)
if (( image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ) ||
2014-10-02 19:24:13 -04:00
(( flags & ( CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE )) != 0 )) {
2014-07-04 16:17:05 -04:00
srgbWriteSupported = false ;
}
}
else {
2014-08-08 16:09:29 -04:00
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_RGB ; // substract channel order of RGB type.
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_sRGB ; // substract channel order of sRGB type.
numSupportedFormats -= NUM_CHANNEL_ORDER_OF_DEPTH ; // substract channel order of DEPTH type.
2014-07-04 16:17:05 -04:00
}
for ( size_t i = 0 ; i < numSupportedFormats ; i ++ ) {
if ( numFormats == num_entries )
break ;
if ( ! srgbWriteSupported ) {
if (( amd :: Image :: supportedFormats [ i ]. image_channel_order == CL_sRGBA ) ||
( amd :: Image :: supportedFormats [ i ]. image_channel_order == CL_sRGB ) ||
( amd :: Image :: supportedFormats [ i ]. image_channel_order == CL_sRGBx ) ||
( amd :: Image :: supportedFormats [ i ]. image_channel_order == CL_sBGRA )) {
continue ;
}
}
* format ++ = amd :: Image :: supportedFormats [ i ];
numFormats ++ ;
}
// Add RA if RA is supported.
if ( supportRA ) {
for ( size_t i = 0 ; i < sizeof ( supportedFormatsRA ) / sizeof ( cl_image_format ); i ++ ) {
if ( numFormats == num_entries )
break ;
* format ++ = amd :: Image :: supportedFormatsRA [ i ];
numFormats ++ ;
}
}
return numFormats ;
}
bool
Image :: Format :: isSupported ( const Context & context , cl_mem_object_type image_type ) const
{
bool supportDepthMSAA = true ;
const std :: vector < amd :: Device *>& devices = context . devices ();
for ( size_t i = 0 ; i < devices . size (); i ++ ) {
if ( ! devices [ i ] -> settings (). depthMSAAInterop_ ) {
supportDepthMSAA = false ;
}
}
cl_uint numFormats = numSupportedFormats ( context , image_type ) ;
cl_image_format * image_formats = new cl_image_format [ numFormats ];
if ( image_formats == NULL ) {
return false ;
}
getSupportedFormats ( context , image_type , numFormats , image_formats ) ;
for ( cl_uint i = 0 ; i < numFormats ; i ++ ) {
if ( * this == image_formats [ i ]) {
delete image_formats ;
return true ;
}
}
delete image_formats ;
if ( supportDepthMSAA ) {
for ( cl_uint i = 0 ; i < sizeof ( depthFormats ) / sizeof ( cl_image_format ); i ++ ) {
if ( * this == depthFormats [ i ]) {
return true ;
}
}
}
return false ;
}
Image *
Image :: createView (
const Context & context ,
const Format & format ,
device :: VirtualDevice * vDev )
{
Image * view = NULL ;
// Find the image dimensions and create a corresponding object
view = new ( context ) Image ( format , * this );
// Set GPU virtual device for this view
view -> setVirtualDevice ( vDev );
if ( view != NULL ) {
// Initialize view
view -> initDeviceMemory ();
}
return view ;
}
bool
Image :: isEntirelyCovered ( const Coord3D & origin , const Coord3D & region ) const
{
return ( origin [ 0 ] == 0 && origin [ 1 ] == 0 && origin [ 2 ] == 0 &&
region [ 0 ] == getWidth () &&
region [ 1 ] == getHeight () &&
region [ 2 ] == getDepth ()) ? true : false ;
}
bool
Image :: validateRegion ( const Coord3D & origin , const Coord3D & region ) const
{
return (( region [ 0 ] > 0 ) && ( region [ 1 ] > 0 ) && ( region [ 2 ] > 0 ) &&
( origin [ 0 ] < getWidth ()) && ( region [ 0 ] != 0 ) &&
( origin [ 1 ] < getHeight ()) && ( region [ 1 ] != 0 ) &&
( origin [ 2 ] < getDepth ()) && ( region [ 2 ] != 0 ) &&
(( origin [ 0 ] + region [ 0 ]) <= getWidth ()) &&
(( origin [ 1 ] + region [ 1 ]) <= getHeight ()) &&
(( origin [ 2 ] + region [ 2 ]) <= getDepth ())) ? true : false ;
}
bool
Image :: isSliceValid (
const size_t & rowPitch ,
const size_t & slice ,
const size_t & height ) const
{
size_t tmpHeight =
( getType () == CL_MEM_OBJECT_IMAGE1D_ARRAY ) ? 1 : height ;
return (( slice == 0 ) ||
(( slice != 0 ) &&
( slice >= rowPitch * tmpHeight ))) ? true : false ;
}
void
Image :: copyToBackingStore ( void * initFrom )
{
char * src ;
char * dst = reinterpret_cast < char *> ( getHostMem ());
size_t cpySize = getWidth () * getImageFormat (). getElementSize ();
for ( uint z = 0 ; z < getDepth (); ++ z ) {
src = reinterpret_cast < char *> ( initFrom ) + z * getSlicePitch ();
for ( uint y = 0 ; y < getHeight (); ++ y ) {
memcpy ( dst , src , cpySize );
dst += cpySize ;
src += getRowPitch ();
}
}
impl_ . rp_ = cpySize ;
if ( impl_ . sp_ != 0 ) {
impl_ . sp_ = impl_ . rp_ ;
if ( getDims () == 3 ) {
impl_ . sp_ *= getHeight ();
}
}
}
static int
round_to_even ( float v )
{
// clamp overflow
if ( v >= - ( float ) INT_MIN ) {
return INT_MAX ;
}
if ( v <= ( float ) INT_MIN ) {
return INT_MIN ;
}
static const unsigned int magic [ 2 ] = { 0x4b000000u , 0xcb000000u };
// round fractional values to integer value
if ( fabsf ( v ) < * reinterpret_cast < const float *> ( & magic [ 0 ])) {
float magicVal = * reinterpret_cast < const float *> ( & magic [ v < 0.0f ]);
v += magicVal ;
v -= magicVal ;
}
return static_cast < int > ( v );
}
static uint16_t
float2half_rtz ( float f )
{
union { float f ; cl_uint u ; } u = { f };
cl_uint sign = ( u . u >> 16 ) & 0x8000 ;
float x = fabsf ( f );
//Nan
if ( x != x ) {
u . u >>= ( 24 - 11 );
u . u &= 0x7fff ;
u . u |= 0x0200 ; //silence the NaN
return u . u | sign ;
}
int values [ 5 ] = { 0x47800000 , 0x33800000 , 0x38800000 , 0x4b800000 , 0x7f800000 };
// overflow
if ( x >= * reinterpret_cast < float *> ( & values [ 0 ])) {
if ( x == * reinterpret_cast < float *> ( & values [ 4 ])) {
return 0x7c00 | sign ;
}
return 0x7bff | sign ;
}
// underflow
if ( x < * reinterpret_cast < float *> ( & values [ 1 ])) {
return sign ; // The halfway case can return 0x0001 or 0. 0 is even.
}
// half denormal
if ( x < * reinterpret_cast < float *> ( & values [ 2 ])) {
x *= * reinterpret_cast < float *> ( & values [ 3 ]);
return static_cast < uint16_t > (( int ) x | sign );
}
u . u &= 0xFFFFE000U ;
u . u -= 0x38000000U ;
return ( u . u >> ( 24 - 11 )) | sign ;
}
void
Image :: Format :: getChannelOrder ( uint8_t * channelOrder ) const
{
enum { CH_ORDER_R = 0 , CH_ORDER_G , CH_ORDER_B , CH_ORDER_A };
switch ( image_channel_order ) {
case CL_A :
channelOrder [ 0 ] = CH_ORDER_A ;
break ;
case CL_RA :
channelOrder [ 0 ] = CH_ORDER_R ;
channelOrder [ 1 ] = CH_ORDER_A ;
break ;
case CL_BGRA :
channelOrder [ 0 ] = CH_ORDER_B ;
channelOrder [ 1 ] = CH_ORDER_G ;
channelOrder [ 2 ] = CH_ORDER_R ;
channelOrder [ 3 ] = CH_ORDER_A ;
break ;
case CL_ARGB :
channelOrder [ 0 ] = CH_ORDER_A ;
channelOrder [ 1 ] = CH_ORDER_R ;
channelOrder [ 2 ] = CH_ORDER_G ;
channelOrder [ 3 ] = CH_ORDER_B ;
break ;
default :
channelOrder [ 0 ] = CH_ORDER_R ;
channelOrder [ 1 ] = CH_ORDER_G ;
channelOrder [ 2 ] = CH_ORDER_B ;
channelOrder [ 3 ] = CH_ORDER_A ;
break ;
}
}
// "colorRGBA" is a four component RGBA floating-point color value if the image
// channel data type is not an unnormalized signed and unsigned integer type,
// is a four component signed integer value if the image channel data type is
// an unnormalized signed integer type and is a four component unsigned integer
// value if the image channel data type is an unormalized unsigned integer type.
void
Image :: Format :: formatColor ( const void * colorRGBA , void * colorFormat ) const
{
union t565 {
struct {
uint16_t r_ : 5 ;
uint16_t g_ : 6 ;
uint16_t b_ : 5 ;
};
uint16_t rgba_ ;
};
union t555 {
struct {
uint16_t r_ : 5 ;
uint16_t g_ : 5 ;
uint16_t b_ : 5 ;
uint16_t a_ : 1 ;
};
uint16_t rgba_ ;
};
union t101010 {
struct {
uint32_t b_ : 10 ;
2014-07-21 14:27:24 -04:00
uint32_t g_ : 10 ;
uint32_t r_ : 10 ;
2014-07-04 16:17:05 -04:00
uint32_t a_ : 2 ;
};
uint32_t rgba_ ;
};
const float * colorRGBAf = reinterpret_cast < const float *> ( colorRGBA );
const int32_t * colorRGBAi = reinterpret_cast < const int32_t *> ( colorRGBA );
const uint32_t * colorRGBAui = reinterpret_cast < const uint32_t *> ( colorRGBA );
size_t chCount = getNumChannels ();
uint8_t chOrder [ 4 ];
getChannelOrder ( chOrder );
bool allChannels = false ;
for ( size_t i = 0 ; i < chCount && ! allChannels ; ++ i ) {
switch ( image_channel_data_type ) {
case CL_SNORM_INT8 : {
int8_t * color = reinterpret_cast < int8_t *> ( colorFormat );
color [ i ] = round_to_even ( INT8_MAX * colorRGBAf [ chOrder [ i ]]);
}
break ;
case CL_SNORM_INT16 : {
int16_t * color = reinterpret_cast < int16_t *> ( colorFormat );
color [ i ] = round_to_even ( INT16_MAX * colorRGBAf [ chOrder [ i ]]);
}
break ;
case CL_UNORM_INT8 : {
uint8_t * color = reinterpret_cast < uint8_t *> ( colorFormat );
color [ i ] = round_to_even ( UINT8_MAX * colorRGBAf [ chOrder [ i ]]);
}
break ;
case CL_UNORM_INT16 : {
uint16_t * color = reinterpret_cast < uint16_t *> ( colorFormat );
color [ i ] = round_to_even ( UINT16_MAX * colorRGBAf [ chOrder [ i ]]);
}
break ;
case CL_UNORM_SHORT_565 : {
t565 * color = reinterpret_cast < t565 *> ( colorFormat );
color -> r_ = round_to_even ( 0x1F * colorRGBAf [ 0 ]);
color -> g_ = round_to_even ( 0x3F * colorRGBAf [ 1 ]);
color -> b_ = round_to_even ( 0x1F * colorRGBAf [ 2 ]);
allChannels = true ;
}
break ;
case CL_UNORM_SHORT_555 : {
t555 * color = reinterpret_cast < t555 *> ( colorFormat );
color -> r_ = round_to_even ( 0x1F * colorRGBAf [ 0 ]);
color -> g_ = round_to_even ( 0x1F * colorRGBAf [ 1 ]);
color -> b_ = round_to_even ( 0x1F * colorRGBAf [ 2 ]);
color -> a_ = round_to_even ( colorRGBAf [ 3 ]);
allChannels = true ;
}
break ;
case CL_UNORM_INT_101010 : {
t101010 * color = reinterpret_cast < t101010 *> ( colorFormat );
color -> r_ = round_to_even ( 0x3FF * colorRGBAf [ 0 ]);
color -> g_ = round_to_even ( 0x3FF * colorRGBAf [ 1 ]);
color -> b_ = round_to_even ( 0x3FF * colorRGBAf [ 2 ]);
color -> a_ = round_to_even ( 0x3 * colorRGBAf [ 3 ]);
allChannels = true ;
}
break ;
case CL_SIGNED_INT8 : {
int8_t * color = reinterpret_cast < int8_t *> ( colorFormat );
color [ i ] = colorRGBAi [ chOrder [ i ]];
}
break ;
case CL_SIGNED_INT16 : {
int16_t * color = reinterpret_cast < int16_t *> ( colorFormat );
color [ i ] = colorRGBAi [ chOrder [ i ]];
}
break ;
case CL_SIGNED_INT32 : {
int32_t * color = reinterpret_cast < int32_t *> ( colorFormat );
color [ i ] = colorRGBAi [ chOrder [ i ]];
}
break ;
case CL_UNSIGNED_INT8 : {
uint8_t * color = reinterpret_cast < uint8_t *> ( colorFormat );
color [ i ] = colorRGBAui [ chOrder [ i ]];
}
break ;
case CL_UNSIGNED_INT16 : {
uint16_t * color = reinterpret_cast < uint16_t *> ( colorFormat );
color [ i ] = colorRGBAui [ chOrder [ i ]];
}
break ;
case CL_UNSIGNED_INT32 : {
uint32_t * color = reinterpret_cast < uint32_t *> ( colorFormat );
color [ i ] = colorRGBAui [ chOrder [ i ]];
}
break ;
case CL_HALF_FLOAT : {
uint16_t * color = reinterpret_cast < uint16_t *> ( colorFormat );
color [ i ] = float2half_rtz ( colorRGBAf [ chOrder [ i ]]);
}
break ;
case CL_FLOAT : {
float * color = reinterpret_cast < float *> ( colorFormat );
color [ i ] = colorRGBAf [ chOrder [ i ]];
}
break ;
}
}
}
std :: map < uintptr_t , uintptr_t > SvmBuffer :: Allocated_ ;
Monitor SvmBuffer :: AllocatedLock_ ( "Guards SVM allocation list" );
void
SvmBuffer :: Add ( uintptr_t k , uintptr_t v )
{
ScopedLock lock ( AllocatedLock_ );
Allocated_ . insert ( std :: pair < uintptr_t , uintptr_t > ( k , v ));
}
void
SvmBuffer :: Remove ( uintptr_t k )
{
ScopedLock lock ( AllocatedLock_ );
Allocated_ . erase ( k );
}
bool
SvmBuffer :: Contains ( uintptr_t ptr )
{
ScopedLock lock ( AllocatedLock_ );
std :: map < uintptr_t , uintptr_t >:: iterator it = Allocated_ . upper_bound ( ptr );
if ( it == Allocated_ . begin ()) {
return false ;
}
-- it ;
return ptr >= it -> first && ptr < it -> second ;
}
// The allocation flags are ignored for now.
void *
SvmBuffer :: malloc (
Context & context ,
cl_svm_mem_flags flags ,
size_t size ,
size_t alignment )
{
bool atomics = ( flags & CL_MEM_SVM_ATOMICS ) != 0 ;
void * ret = context . svmAlloc ( size , alignment , flags );
if ( ret == NULL ) {
LogError ( "Unable to allocate aligned memory" );
return NULL ;
}
uintptr_t ret_u = reinterpret_cast < uintptr_t > ( ret );
Add ( ret_u , ret_u + size );
return ret ;
}
void
SvmBuffer :: free ( Context & context , void * ptr )
{
Remove ( reinterpret_cast < uintptr_t > ( ptr ));
context . svmFree ( ptr );
}
void
SvmBuffer :: memFill (
void * dst ,
const void * src ,
size_t srcSize ,
size_t times )
{
address dstAddress = reinterpret_cast < address > ( dst );
const_address srcAddress = reinterpret_cast < const_address > ( src );
for ( size_t i = 0 ; i < times ; i ++ ) {
:: memcpy ( dstAddress + i * srcSize , srcAddress , srcSize );
}
}
bool SvmBuffer :: malloced ( const void * ptr )
{
return Contains ( reinterpret_cast < uintptr_t > ( ptr ));
}
} // namespace amd