P4 to Git Change 1294021 by lmoriche@lmoriche_opencl_dev on 2016/07/21 12:28:39
SWDEV-94640 - Back out CL#1293210:
[OCL-LC-ROCm] OpenCL Runtime Library Implements OpenCL runtime API. Add HSA virtual device to ORCA.
- Rename hsa_foundation to ROCm.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/Makefile#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/Makefile.oclrocm#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/build/wNow64a/Makefile#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/mesa_glinterop.h#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocappprofile.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocbinary.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocblit.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompiler.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/roccompilerlib.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdefs.hpp#3 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.cpp#3 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocdevice.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocglinterop.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rockernel.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocmemory.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprintf.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocprogram.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocregisters.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocsettings.hpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.cpp#2 delete
... //depot/stg/opencl/drivers/opencl/runtime/device/rocm/rocvirtual.hpp#2 delete
[ROCm/clr commit: 355257ed71]
Tá an tiomantas seo le fáil i:
@@ -1,271 +0,0 @@
|
||||
/*
|
||||
* Mesa 3-D graphics library
|
||||
*
|
||||
* Copyright 2016 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/* Mesa OpenGL inter-driver interoperability interface designed for but not
|
||||
* limited to OpenCL.
|
||||
*
|
||||
* This is a driver-agnostic, backward-compatible interface. The structures
|
||||
* are only allowed to grow. They can never shrink and their members can
|
||||
* never be removed, renamed, or redefined.
|
||||
*
|
||||
* The interface doesn't return a lot of static texture parameters like
|
||||
* width, height, etc. It mainly returns mutable buffer and texture view
|
||||
* parameters that can't be part of the texture allocation (because they are
|
||||
* mutable). If drivers want to return more data or want to return static
|
||||
* allocation parameters, they can do it in one of these two ways:
|
||||
* - attaching the data to the DMABUF handle in a driver-specific way
|
||||
* - passing the data via "out_driver_data" in the "in" structure.
|
||||
*
|
||||
* Mesa is expected to do a lot of error checking on behalf of OpenCL, such
|
||||
* as checking the target, miplevel, and texture completeness.
|
||||
*
|
||||
* OpenCL, on the other hand, needs to check if the display+context combo
|
||||
* is compatible with the OpenCL driver by querying the device information.
|
||||
* It also needs to check if the texture internal format and channel ordering
|
||||
* (returned in a driver-specific way) is supported by OpenCL, among other
|
||||
* things.
|
||||
*/
|
||||
|
||||
#ifndef MESA_GLINTEROP_H
|
||||
#define MESA_GLINTEROP_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(MESA_GLINTEROP_NO_GLX)
|
||||
#include <GL/glx.h>
|
||||
#include <EGL/egl.h>
|
||||
#else
|
||||
#include <GL/gl.h>
|
||||
#include <EGL/egl.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MESA_GLINTEROP_VERSION 1
|
||||
|
||||
/** Returned error codes. */
|
||||
enum {
|
||||
MESA_GLINTEROP_SUCCESS = 0,
|
||||
MESA_GLINTEROP_OUT_OF_RESOURCES,
|
||||
MESA_GLINTEROP_OUT_OF_HOST_MEMORY,
|
||||
MESA_GLINTEROP_INVALID_OPERATION,
|
||||
MESA_GLINTEROP_INVALID_VALUE,
|
||||
MESA_GLINTEROP_INVALID_DISPLAY,
|
||||
MESA_GLINTEROP_INVALID_CONTEXT,
|
||||
MESA_GLINTEROP_INVALID_TARGET,
|
||||
MESA_GLINTEROP_INVALID_OBJECT,
|
||||
MESA_GLINTEROP_INVALID_MIP_LEVEL,
|
||||
MESA_GLINTEROP_UNSUPPORTED
|
||||
};
|
||||
|
||||
/** Access flags. */
|
||||
enum {
|
||||
MESA_GLINTEROP_ACCESS_READ_WRITE = 0,
|
||||
MESA_GLINTEROP_ACCESS_READ_ONLY,
|
||||
MESA_GLINTEROP_ACCESS_WRITE_ONLY
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Device information returned by Mesa.
|
||||
*/
|
||||
typedef struct _mesa_glinterop_device_info {
|
||||
uint32_t size; /* size of this structure */
|
||||
|
||||
/* PCI location */
|
||||
uint32_t pci_segment_group;
|
||||
uint32_t pci_bus;
|
||||
uint32_t pci_device;
|
||||
uint32_t pci_function;
|
||||
|
||||
/* Device identification */
|
||||
uint32_t vendor_id;
|
||||
uint32_t device_id;
|
||||
} mesa_glinterop_device_info;
|
||||
|
||||
|
||||
/**
|
||||
* Input parameters to Mesa interop export functions.
|
||||
*/
|
||||
typedef struct _mesa_glinterop_export_in {
|
||||
uint32_t size; /* size of this structure */
|
||||
|
||||
/* One of the following:
|
||||
* - GL_TEXTURE_BUFFER
|
||||
* - GL_TEXTURE_1D
|
||||
* - GL_TEXTURE_2D
|
||||
* - GL_TEXTURE_3D
|
||||
* - GL_TEXTURE_RECTANGLE
|
||||
* - GL_TEXTURE_1D_ARRAY
|
||||
* - GL_TEXTURE_2D_ARRAY
|
||||
* - GL_TEXTURE_CUBE_MAP_ARRAY
|
||||
* - GL_TEXTURE_CUBE_MAP
|
||||
* - GL_TEXTURE_CUBE_MAP_POSITIVE_X
|
||||
* - GL_TEXTURE_CUBE_MAP_NEGATIVE_X
|
||||
* - GL_TEXTURE_CUBE_MAP_POSITIVE_Y
|
||||
* - GL_TEXTURE_CUBE_MAP_NEGATIVE_Y
|
||||
* - GL_TEXTURE_CUBE_MAP_POSITIVE_Z
|
||||
* - GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
|
||||
* - GL_TEXTURE_2D_MULTISAMPLE
|
||||
* - GL_TEXTURE_2D_MULTISAMPLE_ARRAY
|
||||
* - GL_TEXTURE_EXTERNAL_OES
|
||||
* - GL_RENDERBUFFER
|
||||
* - GL_ARRAY_BUFFER
|
||||
*/
|
||||
GLenum target;
|
||||
|
||||
/* If target is GL_ARRAY_BUFFER, it's a buffer object.
|
||||
* If target is GL_RENDERBUFFER, it's a renderbuffer object.
|
||||
* If target is GL_TEXTURE_*, it's a texture object.
|
||||
*/
|
||||
GLuint obj;
|
||||
|
||||
/* Mipmap level. Ignored for non-texture objects. */
|
||||
GLuint miplevel;
|
||||
|
||||
/* One of MESA_GLINTEROP_ACCESS_* flags. This describes how the exported
|
||||
* object is going to be used.
|
||||
*/
|
||||
uint32_t access;
|
||||
|
||||
/* Size of memory pointed to by out_driver_data. */
|
||||
uint32_t out_driver_data_size;
|
||||
|
||||
/* If the caller wants to query driver-specific data about the OpenGL
|
||||
* object, this should point to the memory where that data will be stored.
|
||||
*/
|
||||
void *out_driver_data;
|
||||
} mesa_glinterop_export_in;
|
||||
|
||||
|
||||
/**
|
||||
* Outputs of Mesa interop export functions.
|
||||
*/
|
||||
typedef struct _mesa_glinterop_export_out {
|
||||
uint32_t size; /* size of this structure */
|
||||
|
||||
/* The DMABUF handle. It must be closed by the caller using the POSIX
|
||||
* close() function when it's not needed anymore. Mesa is not responsible
|
||||
* for closing the handle.
|
||||
*
|
||||
* Not closing the handle by the caller will lead to a resource leak,
|
||||
* prevents releasing the GPU buffer, and may prevent creating new DMABUF
|
||||
* handles until the process termination.
|
||||
*/
|
||||
int dmabuf_fd;
|
||||
|
||||
/* The mutable OpenGL internal format specified by glTextureView or
|
||||
* glTexBuffer. If the object is not one of those, the original internal
|
||||
* format specified by glTexStorage, glTexImage, or glRenderbufferStorage
|
||||
* will be returned.
|
||||
*/
|
||||
GLenum internalformat;
|
||||
|
||||
/* Parameters specified by glTexBufferRange for GL_TEXTURE_BUFFER. */
|
||||
GLintptr buf_offset;
|
||||
GLsizeiptr buf_size;
|
||||
|
||||
/* Parameters specified by glTextureView. If the object is not a texture
|
||||
* view, default parameters covering the whole texture will be returned.
|
||||
*/
|
||||
GLuint view_minlevel;
|
||||
GLuint view_numlevels;
|
||||
GLuint view_minlayer;
|
||||
GLuint view_numlayers;
|
||||
} mesa_glinterop_export_out;
|
||||
|
||||
#if !defined(MESA_GLINTEROP_NO_GLX)
|
||||
/**
|
||||
* Query device information.
|
||||
*
|
||||
* \param dpy GLX display
|
||||
* \param context GLX context
|
||||
* \param out where to return the information
|
||||
*
|
||||
* \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
|
||||
*/
|
||||
GLAPI int GLAPIENTRY
|
||||
MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
|
||||
mesa_glinterop_device_info *out);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Same as MesaGLInteropGLXQueryDeviceInfo except that it accepts EGLDisplay
|
||||
* and EGLContext.
|
||||
*/
|
||||
GLAPI int GLAPIENTRY
|
||||
MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
|
||||
mesa_glinterop_device_info *out);
|
||||
|
||||
|
||||
#if !defined(MESA_GLINTEROP_NO_GLX)
|
||||
/**
|
||||
* Create and return a DMABUF handle corresponding to the given OpenGL
|
||||
* object, and return other parameters about the OpenGL object.
|
||||
*
|
||||
* \param dpy GLX display
|
||||
* \param context GLX context
|
||||
* \param in input parameters
|
||||
* \param out return values
|
||||
*
|
||||
* \return MESA_GLINTEROP_SUCCESS or MESA_GLINTEROP_* != 0 on error
|
||||
*/
|
||||
GLAPI int GLAPIENTRY
|
||||
MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
|
||||
mesa_glinterop_export_in *in,
|
||||
mesa_glinterop_export_out *out);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Same as MesaGLInteropGLXExportObject except that it accepts
|
||||
* EGLDisplay and EGLContext.
|
||||
*/
|
||||
GLAPI int GLAPIENTRY
|
||||
MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
|
||||
mesa_glinterop_export_in *in,
|
||||
mesa_glinterop_export_out *out);
|
||||
|
||||
|
||||
#if !defined(MESA_GLINTEROP_NO_GLX)
|
||||
typedef int (APIENTRYP PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)(Display *dpy, GLXContext context,
|
||||
mesa_glinterop_device_info *out);
|
||||
#endif
|
||||
typedef int (APIENTRYP PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)(EGLDisplay dpy, EGLContext context,
|
||||
mesa_glinterop_device_info *out);
|
||||
#if !defined(MESA_GLINTEROP_NO_GLX)
|
||||
typedef int (APIENTRYP PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)(Display *dpy, GLXContext context,
|
||||
mesa_glinterop_export_in *in,
|
||||
mesa_glinterop_export_out *out);
|
||||
#endif
|
||||
typedef int (APIENTRYP PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)(EGLDisplay dpy, EGLContext context,
|
||||
mesa_glinterop_export_in *in,
|
||||
mesa_glinterop_export_out *out);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* MESA_GLINTEROP_H */
|
||||
@@ -1,61 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/appprofile.hpp"
|
||||
#include "device/rocm/rocappprofile.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
amd::AppProfile* rocCreateAppProfile()
|
||||
{
|
||||
amd::AppProfile* appProfile = new roc::AppProfile;
|
||||
|
||||
if ((appProfile == NULL) || !appProfile->init()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return appProfile;
|
||||
}
|
||||
|
||||
namespace roc {
|
||||
|
||||
bool AppProfile::ParseApplicationProfile()
|
||||
{
|
||||
std::string appName("Explorer");
|
||||
|
||||
std::transform(appName.begin(), appName.end(), appName.begin(), ::tolower);
|
||||
std::transform(appFileName_.begin(), appFileName_.end(), appFileName_.begin(), ::tolower);
|
||||
|
||||
if (appFileName_.compare(appName) == 0 ) {
|
||||
hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
|
||||
gpuvmHighAddr_ = false;
|
||||
noHsaInit_ = true;
|
||||
profileOverridesAllSettings_ = true;
|
||||
}
|
||||
|
||||
// Setting both bits is invalid, make it niether.
|
||||
if (hsaDeviceHint_ & CL_HSA_ENABLED_AMD
|
||||
&& hsaDeviceHint_ & CL_HSA_DISABLED_AMD) {
|
||||
hsaDeviceHint_ = 0;
|
||||
}
|
||||
|
||||
if (noHsaInit_) {
|
||||
// If no HSA initialization, then force hint flag to non-HSA device.
|
||||
// Even if this is not forced, the device selection logic will endure it.
|
||||
// After all hint flags are treated as hint only - depending on
|
||||
// availibility.
|
||||
hsaDeviceHint_ = CL_HSA_DISABLED_AMD;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,23 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
|
||||
class AppProfile : public amd::AppProfile
|
||||
{
|
||||
public:
|
||||
AppProfile(): amd::AppProfile() {}
|
||||
|
||||
protected:
|
||||
//! parse application profile based on application file name
|
||||
virtual bool ParseApplicationProfile();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "top.hpp"
|
||||
#include "rocdevice.hpp"
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
|
||||
typedef std::map<std::string, device::Kernel*> NameKernelMap;
|
||||
|
||||
class ClBinary : public device::ClBinary
|
||||
{
|
||||
public:
|
||||
ClBinary(const Device& dev, BinaryImageFormat bifVer = BIF_VERSION3)
|
||||
: device::ClBinary(dev, bifVer)
|
||||
{}
|
||||
|
||||
//! Destructor
|
||||
~ClBinary() {}
|
||||
|
||||
|
||||
protected:
|
||||
bool setElfTarget() {
|
||||
uint32_t target = static_cast<uint32_t>(21);//dev().calTarget());
|
||||
assert (((0xFFFF8000 & target) == 0) && "ASIC target ID >= 2^15");
|
||||
uint16_t elf_target = (uint16_t)(0x7FFF & target);
|
||||
return elfOut()->setTarget(elf_target, amd::OclElf::CAL_PLATFORM);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
//! Disable default copy constructor
|
||||
ClBinary(const ClBinary&);
|
||||
|
||||
//! Disable default operator=
|
||||
ClBinary& operator=(const ClBinary&);
|
||||
|
||||
//! Returns the HSA device for this object
|
||||
const Device& dev() const { return static_cast<const Device&>(dev_); }
|
||||
|
||||
};
|
||||
|
||||
} // namespace roc
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -1,412 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2013 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include "top.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/commandqueue.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/blit.hpp"
|
||||
|
||||
/*! \addtogroup HSA Blit Implementation
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA Blit Manager Implementation
|
||||
namespace roc {
|
||||
|
||||
class Device;
|
||||
class Kernel;
|
||||
class Memory;
|
||||
class VirtualGPU;
|
||||
|
||||
//! DMA Blit Manager
|
||||
class HsaBlitManager : public device::HostBlitManager
|
||||
{
|
||||
public:
|
||||
//! Constructor
|
||||
HsaBlitManager(
|
||||
device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
|
||||
Setup setup = Setup() //!< Specifies HW accelerated blits
|
||||
);
|
||||
|
||||
//! Destructor
|
||||
virtual ~HsaBlitManager() {
|
||||
if (completion_signal_.handle != 0) {
|
||||
hsa_signal_destroy(completion_signal_);
|
||||
}
|
||||
}
|
||||
|
||||
//! Creates HostBlitManager object
|
||||
virtual bool create(amd::Device& device) {
|
||||
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, NULL, &completion_signal_)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destinaiton host memory
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
virtual bool readImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBuffer(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBufferRect(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
virtual bool writeImage(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
virtual bool copyImageToBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
virtual bool copyBufferToImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
virtual bool copyImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer(
|
||||
device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool fillImage(
|
||||
device::Memory& dstMemory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
protected:
|
||||
//! Returns the virtual GPU object
|
||||
VirtualGPU& gpu() const { return static_cast<VirtualGPU&>(vDev_); }
|
||||
|
||||
private:
|
||||
//! Handle of Hsa Device object
|
||||
const roc::Device& roc_device_;
|
||||
|
||||
hsa_signal_t completion_signal_;
|
||||
|
||||
//! Assits in transferring data from Host to Local or vice versa
|
||||
//! taking into account the Hsail profile supported by Hsa Agent
|
||||
bool hsaCopy(
|
||||
const void *hostSrc, //!< Contains source data to be copied
|
||||
void *hostDst, //!< Destination buffer address for copying
|
||||
uint32_t size, //!< Size of data to copy in bytes
|
||||
bool hostToDev //!< True if data is copied from Host To Device
|
||||
) const;
|
||||
|
||||
//! Disable copy constructor
|
||||
HsaBlitManager(const HsaBlitManager&);
|
||||
|
||||
//! Disable operator=
|
||||
HsaBlitManager& operator=(const HsaBlitManager&);
|
||||
};
|
||||
|
||||
//! Kernel Blit Manager
|
||||
//class KernelBlitManager : public HsaBlitManager
|
||||
class KernelBlitManager : public HsaBlitManager
|
||||
{
|
||||
private:
|
||||
VirtualGPU& gpu() const { return static_cast<VirtualGPU&>(vDev_); }
|
||||
public:
|
||||
enum {
|
||||
BlitCopyImage = 0,
|
||||
BlitCopyImage1DA,
|
||||
BlitCopyImageToBuffer,
|
||||
BlitCopyBufferToImage,
|
||||
BlitCopyBufferRect,
|
||||
BlitCopyBufferRectAligned,
|
||||
BlitCopyBuffer,
|
||||
BlitCopyBufferAligned,
|
||||
FillBuffer,
|
||||
FillImage,
|
||||
BlitTotal
|
||||
};
|
||||
|
||||
//! Constructor
|
||||
KernelBlitManager(
|
||||
device::VirtualDevice& vdev, //!< Virtual GPU to be used for blits
|
||||
Setup setup = Setup() //!< Specifies HW accelerated blits
|
||||
);
|
||||
|
||||
//! Destructor
|
||||
virtual ~KernelBlitManager();
|
||||
|
||||
//! Creates HostBlitManager object
|
||||
virtual bool create(amd::Device& device);
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to system memory
|
||||
virtual bool readBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destinaiton host memory
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to system memory
|
||||
virtual bool readImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
void* dstHost, //!< Destination host memory
|
||||
const amd::Coord3D& origin, //!< Source origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBuffer(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to a buffer object
|
||||
virtual bool writeBufferRect(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& hostRect, //!< Destination rectangle
|
||||
const amd::BufferRect& bufRect, //!< Source rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies system memory to an image object
|
||||
virtual bool writeImage(
|
||||
const void* srcHost, //!< Source host memory
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
size_t rowPitch, //!< Row pitch for host memory
|
||||
size_t slicePitch, //!< Slice pitch for host memory
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to another buffer object
|
||||
virtual bool copyBufferRect(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::BufferRect& srcRect, //!< Source rectangle
|
||||
const amd::BufferRect& dstRect, //!< Destination rectangle
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Copies an image object to a buffer object
|
||||
virtual bool copyImageToBuffer(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies a buffer object to an image object
|
||||
virtual bool copyBufferToImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false, //!< Entire buffer will be updated
|
||||
size_t rowPitch = 0, //!< Pitch for buffer
|
||||
size_t slicePitch = 0 //!< Slice for buffer
|
||||
) const;
|
||||
|
||||
//! Copies an image object to another image object
|
||||
virtual bool copyImage(
|
||||
device::Memory& srcMemory, //!< Source memory object
|
||||
device::Memory& dstMemory, //!< Destination memory object
|
||||
const amd::Coord3D& srcOrigin, //!< Source origin
|
||||
const amd::Coord3D& dstOrigin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills a buffer memory with a pattern data
|
||||
virtual bool fillBuffer(
|
||||
device::Memory& memory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
size_t patternSize, //!< Pattern size
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
//! Fills an image memory with a pattern data
|
||||
virtual bool fillImage(
|
||||
device::Memory& dstMemory, //!< Memory object to fill with pattern
|
||||
const void* pattern, //!< Pattern data
|
||||
const amd::Coord3D& origin, //!< Destination origin
|
||||
const amd::Coord3D& size, //!< Size of the copy region
|
||||
bool entire = false //!< Entire buffer will be updated
|
||||
) const;
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
KernelBlitManager(const KernelBlitManager&);
|
||||
|
||||
//! Disable operator=
|
||||
KernelBlitManager& operator=(const KernelBlitManager&);
|
||||
|
||||
//! Creates a program for all blit operations
|
||||
bool createProgram(
|
||||
Device& device //!< Device object
|
||||
);
|
||||
|
||||
amd::Image::Format filterFormat(amd::Image::Format oldFormat) const;
|
||||
|
||||
device::Memory *createImageView(
|
||||
device::Memory &parent,
|
||||
amd::Image::Format newFormat) const;
|
||||
|
||||
amd::Context *context_; //!< A dummy context
|
||||
amd::Program *program_; //!< GPU program obejct
|
||||
amd::Kernel *kernels_[BlitTotal]; //!< GPU kernels for blit
|
||||
};
|
||||
|
||||
static const char* BlitName[KernelBlitManager::BlitTotal] = {
|
||||
"copyImage",
|
||||
"copyImage1DA",
|
||||
"copyImageToBuffer",
|
||||
"copyBufferToImage",
|
||||
"copyBufferRect",
|
||||
"copyBufferRectAligned",
|
||||
"copyBuffer",
|
||||
"copyBufferAligned",
|
||||
"fillBuffer",
|
||||
"fillImage"
|
||||
};
|
||||
|
||||
/*@}*/
|
||||
} // namespace roc
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "os/os.hpp"
|
||||
#include "rocdevice.hpp"
|
||||
#include "rocprogram.hpp"
|
||||
#include "roccompilerlib.hpp"
|
||||
#include "utils/options.hpp"
|
||||
#include <cstdio>
|
||||
|
||||
//CLC_IN_PROCESS_CHANGE
|
||||
extern int openclFrontEnd(const char* cmdline, std::string*, std::string* typeInfo = NULL);
|
||||
|
||||
namespace roc {
|
||||
|
||||
/* Temporary log function for the compiler library */
|
||||
static void logFunction(const char* msg, size_t size)
|
||||
{
|
||||
std::cout<< "Compiler Log: " << msg << std::endl;
|
||||
}
|
||||
|
||||
static int programsCount = 0;
|
||||
|
||||
bool
|
||||
HSAILProgram::compileImpl(const std::string& sourceCode,
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options)
|
||||
{
|
||||
acl_error errorCode;
|
||||
aclTargetInfo target;
|
||||
|
||||
//Defaulting to bonaire
|
||||
//Todo (sramalin) : Query the device for asic type-
|
||||
//Defaulting to Bonair for now.
|
||||
target = g_complibApi._aclGetTargetInfo(LP64_SWITCH("hsail","hsail64"), "Bonaire",
|
||||
&errorCode);
|
||||
|
||||
//end if asic info is ready
|
||||
// We dump the source code for each program (param: headers)
|
||||
// into their filenames (headerIncludeNames) into the TEMP
|
||||
// folder specific to the OS and add the include path while
|
||||
// compiling
|
||||
|
||||
//Find the temp folder for the OS
|
||||
std::string tempFolder = amd::Os::getEnvironment("TEMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = amd::Os::getEnvironment("TMP");
|
||||
if (tempFolder.empty()) {
|
||||
tempFolder = WINDOWS_SWITCH(".","/tmp");;
|
||||
}
|
||||
}
|
||||
//Iterate through each source code and dump it into tmp
|
||||
std::fstream f;
|
||||
std::vector<std::string> headerFileNames(headers.size());
|
||||
std::vector<std::string> newDirs;
|
||||
for (size_t i = 0; i < headers.size(); ++i) {
|
||||
std::string headerPath = tempFolder;
|
||||
std::string headerIncludeName(headerIncludeNames[i]);
|
||||
// replace / in path with current os's file separator
|
||||
if ( amd::Os::fileSeparator() != '/') {
|
||||
for (std::string::iterator it = headerIncludeName.begin(),
|
||||
end = headerIncludeName.end();
|
||||
it != end;
|
||||
++it) {
|
||||
if (*it == '/') *it = amd::Os::fileSeparator();
|
||||
}
|
||||
}
|
||||
size_t pos = headerIncludeName.rfind(amd::Os::fileSeparator());
|
||||
if (pos != std::string::npos) {
|
||||
headerPath += amd::Os::fileSeparator();
|
||||
headerPath += headerIncludeName.substr(0, pos);
|
||||
headerIncludeName = headerIncludeName.substr(pos+1);
|
||||
}
|
||||
if (!amd::Os::pathExists(headerPath)) {
|
||||
bool ret = amd::Os::createPath(headerPath);
|
||||
assert(ret && "failed creating path!");
|
||||
newDirs.push_back(headerPath);
|
||||
}
|
||||
std::string headerFullName
|
||||
= headerPath + amd::Os::fileSeparator() + headerIncludeName;
|
||||
headerFileNames[i] = headerFullName;
|
||||
f.open(headerFullName.c_str(), std::fstream::out);
|
||||
//Should we allow asserts
|
||||
assert(!f.fail() && "failed creating header file!");
|
||||
f.write(headers[i]->c_str(), headers[i]->length());
|
||||
f.close();
|
||||
}
|
||||
|
||||
//Create Binary
|
||||
binaryElf_ = g_complibApi._aclBinaryInit(sizeof(aclBinary),
|
||||
&target,
|
||||
&binOpts_,
|
||||
&errorCode);
|
||||
|
||||
if( errorCode!=ACL_SUCCESS ) {
|
||||
buildLog_ += "Error while compiling opencl source:\
|
||||
aclBinary init failure \n";
|
||||
LogWarning("aclBinaryInit failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
//Insert opencl into binary
|
||||
errorCode = g_complibApi._aclInsertSection(device().compiler(),
|
||||
binaryElf_,
|
||||
sourceCode.c_str(),
|
||||
strlen(sourceCode.c_str()),
|
||||
aclSOURCE);
|
||||
|
||||
if ( errorCode != ACL_SUCCESS ) {
|
||||
buildLog_ += "Error while converting to BRIG: \
|
||||
Inserting openCl Source \n";
|
||||
}
|
||||
|
||||
//Set the options for the compiler
|
||||
//Set the include path for the temp folder that contains the includes
|
||||
if(!headers.empty()) {
|
||||
this->compileOptions_.append(" -I");
|
||||
this->compileOptions_.append(tempFolder);
|
||||
}
|
||||
|
||||
//Add only for CL2.0 and later
|
||||
if (options->oVariables->CLStd[2] >= '2') {
|
||||
std::stringstream opts;
|
||||
opts << " -D" << "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE="
|
||||
<< device().info().maxGlobalVariableSize_;
|
||||
compileOptions_.append(opts.str());
|
||||
}
|
||||
|
||||
//Compile source to IR
|
||||
this->compileOptions_.append(hsailOptions());
|
||||
|
||||
errorCode = g_complibApi._aclCompile(device().compiler(),
|
||||
binaryElf_,
|
||||
//"-Wf,--support_all_extensions",
|
||||
this->compileOptions_.c_str(),
|
||||
ACL_TYPE_OPENCL,
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
logFunction);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
if( errorCode!=ACL_SUCCESS ) {
|
||||
LogWarning("aclCompile failed");
|
||||
buildLog_ += "Error while compiling \
|
||||
opencl source: Compiling CL to IR";
|
||||
return false;
|
||||
}
|
||||
// Save the binary in the interface class
|
||||
saveBinaryAndSetType(TYPE_COMPILED);
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
#endif // WITHOUT_GPU_BACKEND
|
||||
@@ -1,59 +0,0 @@
|
||||
#include "roccompilerlib.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
|
||||
#include "acl.h"
|
||||
|
||||
namespace roc {
|
||||
|
||||
void* g_complibModule = NULL;
|
||||
struct CompLibApi g_complibApi;
|
||||
|
||||
//
|
||||
// g_complibModule is defined in LoadCompLib(). This macro must be used only in LoadCompLib() function.
|
||||
//
|
||||
#define LOADSYMBOL(api) \
|
||||
g_complibApi._##api = (pfn_##api) amd::Os::getSymbol(g_complibModule, #api); \
|
||||
if( g_complibApi._##api == NULL ) { \
|
||||
LogError ("amd::Os::getSymbol() for exported func " #api " failed."); \
|
||||
amd::Os::unloadLibrary(g_complibModule); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
bool LoadCompLib(bool offline)
|
||||
{
|
||||
g_complibModule = amd::Os::loadLibrary("amdhsacl" LP64_SWITCH(LINUX_SWITCH("32",""), "64"));
|
||||
if( g_complibModule == NULL ) {
|
||||
if (!offline) {
|
||||
LogError( "amd::Os::loadLibrary() for loading of amdhsacl.dll failed.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
LOADSYMBOL(aclCompilerInit)
|
||||
LOADSYMBOL(aclGetTargetInfo)
|
||||
LOADSYMBOL(aclBinaryInit)
|
||||
LOADSYMBOL(aclInsertSection)
|
||||
LOADSYMBOL(aclCompile)
|
||||
LOADSYMBOL(aclCompilerFini)
|
||||
LOADSYMBOL(aclBinaryFini)
|
||||
LOADSYMBOL(aclWriteToMem)
|
||||
LOADSYMBOL(aclQueryInfo)
|
||||
LOADSYMBOL(aclExtractSymbol)
|
||||
LOADSYMBOL(aclGetCompilerLog)
|
||||
LOADSYMBOL(aclCreateFromBinary)
|
||||
LOADSYMBOL(aclReadFromMem)
|
||||
LOADSYMBOL(aclBinaryVersion)
|
||||
LOADSYMBOL(aclLink)
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void UnloadCompLib()
|
||||
{
|
||||
if( g_complibModule )
|
||||
{
|
||||
amd::Os::unloadLibrary(g_complibModule);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace roc
|
||||
@@ -1,77 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// This file hsa the code for explicity loading amdoclcl.dll.
|
||||
// Exported functions from amdoclcl.dll can be added for usage as need-basis.
|
||||
// With explicit/dynamic loading roc will not have any linkage to amdoclcl.lib.
|
||||
//
|
||||
|
||||
#include "thread/thread.hpp"
|
||||
#include "acl.h"
|
||||
#include "utils/debug.hpp"
|
||||
|
||||
using namespace amd;
|
||||
|
||||
namespace roc {
|
||||
|
||||
//
|
||||
// To use any new exported function from amdhsacl.dll please add/make that function specific changes
|
||||
// in typedef below, struct CompLibApi and in hsacompilerLib.cpp::LoadCompLib() function.
|
||||
//
|
||||
|
||||
//
|
||||
// Convention: The typedefed function name must be prefixed with pfn_
|
||||
//
|
||||
typedef aclCompiler* (ACL_API_ENTRY *pfn_aclCompilerInit) (aclCompilerOptions *opts, acl_error *error_code);
|
||||
typedef aclTargetInfo (ACL_API_ENTRY *pfn_aclGetTargetInfo) (const char*, const char*, acl_error*);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclBinaryInit) (size_t, const aclTargetInfo*, const aclBinaryOptions*, acl_error*);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclInsertSection) (aclCompiler *cl, aclBinary *binary, const void *data, size_t data_size, aclSections id);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclCompile) (aclCompiler *cl, aclBinary *bin, const char *options, aclType from, aclType to, aclLogFunction compile_callback);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclCompilerFini) (aclCompiler *cl);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclBinaryFini) (aclBinary *bin);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclWriteToMem) (aclBinary *bin,void **mem, size_t *size);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclQueryInfo) (aclCompiler *cl, const aclBinary *binary, aclQueryType query, const char *kernel, void *data_ptr, size_t *ptr_size);
|
||||
typedef const void* (ACL_API_ENTRY *pfn_aclExtractSymbol) (aclCompiler *cl,const aclBinary *binary,size_t *size,aclSections id,const char *symbol,acl_error *error_code);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclReadFromMem) (void *mem,size_t size, acl_error *error_code);
|
||||
typedef char* (ACL_API_ENTRY *pfn_aclGetCompilerLog) (aclCompiler* cl);
|
||||
typedef aclBinary* (ACL_API_ENTRY *pfn_aclCreateFromBinary) (const aclBinary *binary,aclBIFVersion version);
|
||||
typedef aclBIFVersion (ACL_API_ENTRY *pfn_aclBinaryVersion) (const aclBinary *binary);
|
||||
typedef acl_error (ACL_API_ENTRY *pfn_aclLink) (aclCompiler* cl, aclBinary *src_bin, unsigned int num_libs, aclBinary **libs, aclType link_mode,const char* options, aclLogFunction link_callback);
|
||||
//
|
||||
// Convention: prefix struct member variable with with underscore '_'
|
||||
// would be nice if there was no underscore prfix, but on Linux the token
|
||||
// pasting in the macro is srtict and his is the workaround.
|
||||
//
|
||||
struct CompLibApi
|
||||
{
|
||||
pfn_aclCompilerInit _aclCompilerInit;
|
||||
pfn_aclGetTargetInfo _aclGetTargetInfo;
|
||||
pfn_aclBinaryInit _aclBinaryInit;
|
||||
pfn_aclInsertSection _aclInsertSection;
|
||||
pfn_aclCompile _aclCompile;
|
||||
pfn_aclCompilerFini _aclCompilerFini;
|
||||
pfn_aclBinaryFini _aclBinaryFini;
|
||||
pfn_aclWriteToMem _aclWriteToMem;
|
||||
pfn_aclQueryInfo _aclQueryInfo;
|
||||
pfn_aclExtractSymbol _aclExtractSymbol;
|
||||
pfn_aclReadFromMem _aclReadFromMem;
|
||||
pfn_aclGetCompilerLog _aclGetCompilerLog;
|
||||
pfn_aclCreateFromBinary _aclCreateFromBinary;
|
||||
pfn_aclBinaryVersion _aclBinaryVersion;
|
||||
pfn_aclLink _aclLink;
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// Use g_ prefix for all global variables.
|
||||
//
|
||||
extern void* g_complibModule;
|
||||
extern CompLibApi g_complibApi;
|
||||
|
||||
// Note: initializes global variable g_complibApi.
|
||||
// Not sure what error values we have, for now returning false on failure.
|
||||
bool LoadCompLib(bool isOfflineDevice=false);
|
||||
void UnloadCompLib();
|
||||
|
||||
} // namespace roc
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
|
||||
typedef uint HsaDeviceId;
|
||||
|
||||
struct AMDDeviceInfo {
|
||||
HsaDeviceId hsaDeviceId_; //!< Machine id
|
||||
const char* targetName_; //!< Target name for compilation
|
||||
const char* machineTarget_; //!< Machine target
|
||||
uint simdPerCU_; //!< Number of SIMDs per CU
|
||||
uint simdWidth_; //!< Number of workitems processed per SIMD
|
||||
uint simdInstructionWidth_; //!< Number of instructions processed per SIMD
|
||||
uint memChannelBankWidth_; //!< Memory channel bank width
|
||||
uint localMemSizePerCU_; //!< Local memory size per CU
|
||||
uint localMemBanks_; //!< Number of banks of local memory
|
||||
uint pciDeviceId; //!< PCIe device id
|
||||
};
|
||||
|
||||
//The device ID must match with the device's index into DeviceInfo
|
||||
const HsaDeviceId HSA_SPECTRE_ID = 0;
|
||||
const HsaDeviceId HSA_SPOOKY_ID = 1;
|
||||
const HsaDeviceId HSA_TONGA_ID = 2;
|
||||
const HsaDeviceId HSA_CARRIZO_ID = 3;
|
||||
const HsaDeviceId HSA_ICELAND_ID = 4;
|
||||
const HsaDeviceId HSA_FIJI_ID = 5;
|
||||
const HsaDeviceId HSA_HAWAII_ID = 6;
|
||||
const HsaDeviceId HSA_ELLESMERE_ID = 7;
|
||||
const HsaDeviceId HSA_BAFFIN_ID = 8;
|
||||
const HsaDeviceId HSA_INVALID_DEVICE_ID = -1;
|
||||
|
||||
static const AMDDeviceInfo DeviceInfo[] = {
|
||||
// targetName machineTarget
|
||||
/* TARGET_KAVERI_SPECTRE */ {HSA_SPECTRE_ID, "Spectre", "Spectre", 4, 16, 1, 256, 64 * Ki, 32, 0 },
|
||||
/* TARGET_KAVERI_SPOOKY */ {HSA_SPOOKY_ID, "Spooky", "Spooky", 4, 16, 1, 256, 64 * Ki, 32, 0 },
|
||||
/* TARGET_TONGA */ {HSA_TONGA_ID, "Tonga", "Tonga", 4, 16, 1, 256, 64 * Ki, 32, 0},
|
||||
/* TARGET_CARRIZO */ {HSA_CARRIZO_ID, "Carrizo", "Carrizo", 4, 16, 1, 256, 64 * Ki, 32, 0},
|
||||
/* TARGET_ICELAND */ {HSA_ICELAND_ID, "Topaz", "Topaz", 4, 16, 1, 256, 64 * Ki, 32, 0},
|
||||
/* TARGET_FIJI */ {HSA_FIJI_ID, "Fiji", "Fiji", 4, 16, 1, 256, 64 * Ki, 32, 0 },
|
||||
/* TARGET HAWAII */ {HSA_HAWAII_ID, "Hawaii", "Hawaii", 4, 16, 1, 256, 64 * Ki, 32, 0 },
|
||||
/* TARGET ELLESMERE */ {HSA_ELLESMERE_ID, "Ellesmere", "Ellesmere", 4, 16, 1, 256, 64 * Ki, 32, 0 },
|
||||
/* TARGET BAFFIN */ {HSA_BAFFIN_ID, "Baffin", "Baffin", 4, 16, 1, 256, 64 * Ki, 32, 0 }
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -1,376 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "CL/cl.h"
|
||||
#include "device/device.hpp"
|
||||
#include "platform/command.hpp"
|
||||
#include "platform/program.hpp"
|
||||
#include "platform/perfctr.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "utils/concurrent.hpp"
|
||||
#include "thread/thread.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "utils/versions.hpp"
|
||||
#include "aclTypes.h"
|
||||
|
||||
#include "device/rocm/rocsettings.hpp"
|
||||
#include "device/rocm/rocvirtual.hpp"
|
||||
#include "device/rocm/rocdefs.hpp"
|
||||
#include "device/rocm/rocprintf.hpp"
|
||||
#include "device/rocm/rocglinterop.hpp"
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
// extern hsa::Runtime* g_hsaruntime;
|
||||
|
||||
/*! \addtogroup HSA
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA Device Implementation
|
||||
namespace roc {
|
||||
|
||||
/**
|
||||
* @brief List of environment variables that could be used to
|
||||
* configure the behavior of Hsa Runtime
|
||||
*/
|
||||
#define ENVVAR_HSA_POLL_KERNEL_COMPLETION "HSA_POLL_COMPLETION"
|
||||
|
||||
//! Forward declarations
|
||||
class Command;
|
||||
class Device;
|
||||
class GpuCommand;
|
||||
class Heap;
|
||||
class HeapBlock;
|
||||
class Program;
|
||||
class Kernel;
|
||||
class Memory;
|
||||
class Resource;
|
||||
class VirtualDevice;
|
||||
class PrintfDbg;
|
||||
|
||||
//A NULL Device type used only for offline compilation
|
||||
// Only functions that are used for compilation will be in this device
|
||||
class NullDevice : public amd::Device {
|
||||
public:
|
||||
//! constructor
|
||||
NullDevice(){};
|
||||
|
||||
//!create the device
|
||||
bool create(const AMDDeviceInfo& deviceInfo);
|
||||
|
||||
//! Initialise all the offline devices that can be used for compilation
|
||||
static bool init();
|
||||
//! Teardown for offline devices
|
||||
static void tearDown();
|
||||
|
||||
//! Destructor for the Null device
|
||||
virtual ~NullDevice();
|
||||
|
||||
aclCompiler *compiler() const { return compilerHandle_; }
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
virtual device::Program *createProgram(amd::option::Options* options = NULL);
|
||||
const AMDDeviceInfo& deviceInfo() const {
|
||||
return deviceInfo_;
|
||||
}
|
||||
//! Gets the backend device for the NULL device type
|
||||
virtual hsa_agent_t getBackendDevice() const {
|
||||
ShouldNotReachHere();
|
||||
const hsa_agent_t kInvalidAgent = { 0 };
|
||||
return kInvalidAgent;
|
||||
}
|
||||
|
||||
//List of dummy functions which are disabled for NullDevice
|
||||
|
||||
//! Create sub-devices according to the given partition scheme.
|
||||
virtual cl_int createSubDevices(
|
||||
device::CreateSubDevicesInfo& create_info,
|
||||
cl_uint num_entries,
|
||||
cl_device_id* devices,
|
||||
cl_uint* num_devices) {
|
||||
ShouldNotReachHere();
|
||||
return CL_INVALID_VALUE; };
|
||||
|
||||
//! Create a new virtual device environment.
|
||||
virtual device::VirtualDevice* createVirtualDevice(
|
||||
amd::CommandQueue* queue = NULL) {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
virtual bool registerSvmMemory(void* ptr, size_t size) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual void deregisterSvmMemory(void* ptr) const {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createMemory(amd::Memory& owner) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL; }
|
||||
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(
|
||||
const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const
|
||||
{
|
||||
ShouldNotReachHere();
|
||||
return true;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory* createView(
|
||||
amd::Memory& owner, //!< Owner memory object
|
||||
const device::Memory& parent //!< Parent device memory object for the view
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual void* svmAlloc(
|
||||
amd::Context& context, //!< The context used to create a buffer
|
||||
size_t size, //!< size of svm spaces
|
||||
size_t alignment, //!< alignment requirement of svm spaces
|
||||
cl_svm_mem_flags flags, //!< flags of creation svm spaces
|
||||
void* svmPtr //!< existing svm pointer for mGPU case
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual void svmFree(
|
||||
void* ptr //!< svm pointer needed to be freed
|
||||
) const {
|
||||
ShouldNotReachHere();
|
||||
return;
|
||||
}
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory& owner) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
|
||||
virtual bool bindExternalDevice(
|
||||
uint flags, void* const pDevice[], void* pContext, bool validateOnly) {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool unbindExternalDevice(
|
||||
uint flags, void* const pDevice[], void* pContext, bool validateOnly) {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
//! Releases non-blocking map target memory
|
||||
virtual void freeMapTarget(amd::Memory& mem, void* target) { ShouldNotReachHere();}
|
||||
|
||||
//! Empty implementation on Null device
|
||||
virtual bool globalFreeMemory(size_t* freeMemory) const {
|
||||
ShouldNotReachHere();
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
//! Initialize compiler instance and handle
|
||||
static bool initCompiler(bool isOffline);
|
||||
//! destroy compiler instance and handle
|
||||
static bool destroyCompiler();
|
||||
//! Handle to the the compiler
|
||||
static aclCompiler* compilerHandle_;
|
||||
//! Device Id for an HsaDevice
|
||||
AMDDeviceInfo deviceInfo_;
|
||||
private:
|
||||
static const bool offlineDevice_;
|
||||
};
|
||||
|
||||
//! A HSA device ordinal (physical HSA device)
|
||||
class Device : public NullDevice {
|
||||
public:
|
||||
//! Initialise the whole HSA device subsystem (CAL init, device enumeration, etc).
|
||||
static bool init();
|
||||
static void tearDown();
|
||||
|
||||
//! Lookup all AMD HSA devices and memory regions.
|
||||
static hsa_status_t iterateAgentCallback(hsa_agent_t agent, void *data);
|
||||
static hsa_status_t iterateGpuMemoryPoolCallback(
|
||||
hsa_amd_memory_pool_t region, void* data);
|
||||
static hsa_status_t iterateCpuMemoryPoolCallback(
|
||||
hsa_amd_memory_pool_t region, void* data);
|
||||
|
||||
static bool loadHsaModules();
|
||||
|
||||
bool create();
|
||||
|
||||
//! Construct a new physical HSA device
|
||||
Device(hsa_agent_t bkendDevice);
|
||||
virtual hsa_agent_t getBackendDevice() const { return _bkendDevice; }
|
||||
|
||||
static const std::vector<hsa_agent_t>& getGpuAgents() {
|
||||
return gpu_agents_;
|
||||
}
|
||||
|
||||
static hsa_agent_t getCpuAgent()
|
||||
{
|
||||
return cpu_agent_;
|
||||
}
|
||||
|
||||
//! Destructor for the physical HSA device
|
||||
virtual ~Device();
|
||||
|
||||
bool mapHSADeviceToOpenCLDevice(hsa_agent_t hsadevice);
|
||||
|
||||
// Temporary, delete it later when HSA Runtime and KFD is fully fucntional.
|
||||
void fake_device();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// TODO: Below are all mocked up virtual functions from amd::Device, they may
|
||||
// need real implementation.
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// #ifdef cl_ext_device_fission
|
||||
//! Create sub-devices according to the given partition scheme.
|
||||
virtual cl_int createSubDevices(
|
||||
device::CreateSubDevicesInfo &create_inf,
|
||||
cl_uint num_entries,
|
||||
cl_device_id *devices,
|
||||
cl_uint *num_devices)
|
||||
{ return CL_INVALID_VALUE; }
|
||||
// #endif // cl_ext_device_fission
|
||||
|
||||
// bool Device::create(CALuint ordinal);
|
||||
|
||||
//! Instantiate a new virtual device
|
||||
virtual device::VirtualDevice *createVirtualDevice(
|
||||
amd::CommandQueue* queue = NULL);
|
||||
|
||||
//! Construct an HSAIL program object from the ELF assuming it is valid
|
||||
virtual device::Program *createProgram(amd::option::Options* options = NULL);
|
||||
|
||||
virtual device::Memory *createMemory(amd::Memory &owner) const;
|
||||
|
||||
//! Sampler object allocation
|
||||
virtual bool createSampler(
|
||||
const amd::Sampler& owner, //!< abstraction layer sampler object
|
||||
device::Sampler** sampler //!< device sampler object
|
||||
) const
|
||||
{
|
||||
//! \todo HSA team has to implement sampler allocation
|
||||
*sampler = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//! Just returns NULL for the dummy device
|
||||
virtual device::Memory *createView(
|
||||
amd::Memory &owner, //!< Owner memory object
|
||||
const device::Memory &parent //!< Parent device memory object for the view
|
||||
) const { return NULL; }
|
||||
|
||||
//! Reallocates the provided buffer object
|
||||
virtual bool reallocMemory(amd::Memory &owner) const {return true; }
|
||||
|
||||
//! Acquire external graphics API object in the host thread
|
||||
//! Needed for OpenGL objects on CPU device
|
||||
virtual bool bindExternalDevice(
|
||||
uint flags, void * const pDevice[], void *pContext, bool validateOnly);
|
||||
|
||||
/**
|
||||
* @brief Removes the external device as an available device.
|
||||
*
|
||||
* @note: The current implementation is to avoid build break
|
||||
* and does not represent actual / correct implementation. This
|
||||
* needs to be done.
|
||||
*/
|
||||
bool unbindExternalDevice(
|
||||
uint flags, //!< Enum val. for ext.API type: GL, D3D10, etc.
|
||||
void * const gfxDevice[], //!< D3D device do D3D, HDC/Display handle of X Window for GL
|
||||
void *gfxContext, //!< HGLRC/GLXContext handle
|
||||
bool validateOnly //!< Only validate if the device can inter-operate with
|
||||
//!< pDevice/pContext, do not bind.
|
||||
);
|
||||
|
||||
//! Gets free memory on a GPU device
|
||||
virtual bool globalFreeMemory(size_t *freeMemory) const;
|
||||
|
||||
virtual void* hostAlloc(size_t size, size_t alignment, bool atomics = false) const;
|
||||
|
||||
virtual void hostFree(void* ptr, size_t size = 0) const;
|
||||
|
||||
void *deviceLocalAlloc(size_t size) const;
|
||||
|
||||
void deviceLocalFree(void *ptr, size_t size) const;
|
||||
|
||||
virtual void* svmAlloc(amd::Context& context, size_t size, size_t alignment, cl_svm_mem_flags flags = CL_MEM_READ_WRITE, void* svmPtr = NULL) const;
|
||||
|
||||
virtual void svmFree(void* ptr) const;
|
||||
|
||||
const Settings &settings() const { return reinterpret_cast<Settings &>(*settings_); }
|
||||
|
||||
//! Returns transfer engine object
|
||||
const device::BlitManager& xferMgr() const { return xferQueue()->blitMgr(); }
|
||||
|
||||
const size_t alloc_granularity() const { return alloc_granularity_; }
|
||||
|
||||
const hsa_profile_t agent_profile() const { return agent_profile_; }
|
||||
|
||||
const MesaInterop& mesa() const { return mesa_; }
|
||||
|
||||
//! Finds an appropriate map target
|
||||
amd::Memory* findMapTarget(size_t size) const;
|
||||
|
||||
//! Adds a map target to the cache
|
||||
bool addMapTarget(amd::Memory* memory) const;
|
||||
|
||||
private:
|
||||
amd::Monitor* mapCacheOps_; //!< Lock to serialise cache for the map resources
|
||||
std::vector<amd::Memory*>* mapCache_; //!< Map cache info structure
|
||||
|
||||
bool populateOCLDeviceConstants();
|
||||
static bool isHsaInitialized_;
|
||||
static hsa_agent_t cpu_agent_;
|
||||
static std::vector<hsa_agent_t> gpu_agents_;
|
||||
MesaInterop mesa_;
|
||||
hsa_agent_t _bkendDevice;
|
||||
hsa_profile_t agent_profile_;
|
||||
hsa_amd_memory_pool_t group_segment_;
|
||||
hsa_amd_memory_pool_t system_segment_;
|
||||
hsa_amd_memory_pool_t system_coarse_segment_;
|
||||
hsa_amd_memory_pool_t gpuvm_segment_;
|
||||
size_t gpuvm_segment_max_alloc_;
|
||||
size_t alloc_granularity_;
|
||||
static const bool offlineDevice_;
|
||||
amd::Context *context_; //!< A dummy context for internal data transfer
|
||||
VirtualGPU *xferQueue_; //!< Transfer queue, created on demand
|
||||
|
||||
VirtualGPU* xferQueue() const;
|
||||
}; // class roc::Device
|
||||
} // namespace roc
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
#endif /*WITHOUT_HSA_BACKEND*/
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "os/os.hpp"
|
||||
#include "utils/debug.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "device/rocm/rocglinterop.hpp"
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
namespace roc
|
||||
{
|
||||
|
||||
#if !defined(_WIN32)
|
||||
static PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC GlxInfo = nullptr;
|
||||
static PFNMESAGLINTEROPGLXEXPORTOBJECTPROC GlxExport = nullptr;
|
||||
static PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC EglInfo = nullptr;
|
||||
static PFNMESAGLINTEROPEGLEXPORTOBJECTPROC EglExport = nullptr;
|
||||
#endif
|
||||
|
||||
std::atomic<uint32_t> MesaInterop::refCount(0);
|
||||
|
||||
bool MesaInterop::Supported()
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
//Attempt to locate Mesa interop APIs. Return which of glx/egl are supported.
|
||||
bool MesaInterop::Bind(MESA_INTEROP_KIND Kind, const DisplayHandle& Display, const ContextHandle& Context)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return false;
|
||||
#else
|
||||
if(Kind==MESA_INTEROP_NONE)
|
||||
return false;
|
||||
|
||||
if(kind!=MESA_INTEROP_NONE)
|
||||
{
|
||||
LogError("Error - MesaInterop Bind while already bound.");
|
||||
return false;
|
||||
}
|
||||
|
||||
void* glxinfo=dlsym(RTLD_DEFAULT, "MesaGLInteropGLXQueryDeviceInfo");
|
||||
void* eglinfo=dlsym(RTLD_DEFAULT, "MesaGLInteropEGLQueryDeviceInfo");
|
||||
|
||||
if(((glxinfo!=GlxInfo) || (eglinfo!=EglInfo)) && (refCount!=0))
|
||||
LogWarning("Warning - Mesa changed while holding interop contexts.");
|
||||
|
||||
GlxInfo=(PFNMESAGLINTEROPGLXQUERYDEVICEINFOPROC)glxinfo;
|
||||
EglInfo=(PFNMESAGLINTEROPEGLQUERYDEVICEINFOPROC)eglinfo;
|
||||
|
||||
GlxExport=(PFNMESAGLINTEROPGLXEXPORTOBJECTPROC)dlsym(RTLD_DEFAULT, "MesaGLInteropGLXExportObject");
|
||||
EglExport=(PFNMESAGLINTEROPEGLEXPORTOBJECTPROC)dlsym(RTLD_DEFAULT, "MesaGLInteropEGLExportObject");
|
||||
|
||||
uint32_t ret=MESA_INTEROP_NONE;
|
||||
if(GlxInfo && GlxExport)
|
||||
ret|=MESA_INTEROP_GLX;
|
||||
if(EglInfo && EglExport)
|
||||
ret|=MESA_INTEROP_EGL;
|
||||
|
||||
kind = MESA_INTEROP_KIND(ret & Kind);
|
||||
display=Display;
|
||||
context=Context;
|
||||
|
||||
if(kind!=MESA_INTEROP_NONE)
|
||||
{
|
||||
refCount++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
bool MesaInterop::GetInfo(mesa_glinterop_device_info& info) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return false;
|
||||
#else
|
||||
switch(kind)
|
||||
{
|
||||
case MESA_INTEROP_GLX:
|
||||
return GlxInfo(display.glxDisplay, context.glxContext, &info)==MESA_GLINTEROP_SUCCESS;
|
||||
case MESA_INTEROP_EGL:
|
||||
return EglInfo(display.eglDisplay, context.eglContext, &info)==MESA_GLINTEROP_SUCCESS;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool MesaInterop::Export (mesa_glinterop_export_in& in, mesa_glinterop_export_out& out) const
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return false;
|
||||
#else
|
||||
switch(kind)
|
||||
{
|
||||
case MESA_INTEROP_GLX:
|
||||
return GlxExport(display.glxDisplay, context.glxContext, &in, &out)==MESA_GLINTEROP_SUCCESS;
|
||||
case MESA_INTEROP_EGL:
|
||||
return EglExport(display.eglDisplay, context.eglContext, &in, &out)==MESA_GLINTEROP_SUCCESS;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
@@ -1,158 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#ifdef _WIN32
|
||||
//GLX header cannot be included in Windows due to X11 header dependency
|
||||
#define MESA_GLINTEROP_NO_GLX
|
||||
#include "device/rocm/mesa_glinterop.h"
|
||||
//Give GLX parameters void* size
|
||||
typedef void Display;
|
||||
typedef void* GLXContext;
|
||||
#undef MESA_GLINTEROP_NO_GLX
|
||||
#else
|
||||
#include "device/rocm/mesa_glinterop.h"
|
||||
#endif
|
||||
|
||||
#include "device/rocm/rocregisters.hpp"
|
||||
#include "hsa_ext_amd.h"
|
||||
|
||||
#include <atomic>
|
||||
|
||||
namespace roc
|
||||
{
|
||||
|
||||
//Specific typed container for version 1
|
||||
typedef struct metadata_amd_ci_vi_s {
|
||||
uint32_t version; // Must be 1
|
||||
uint32_t vendorID; // AMD | CZ
|
||||
SQ_IMG_RSRC_WORD0 word0;
|
||||
SQ_IMG_RSRC_WORD1 word1;
|
||||
SQ_IMG_RSRC_WORD2 word2;
|
||||
SQ_IMG_RSRC_WORD3 word3;
|
||||
SQ_IMG_RSRC_WORD4 word4;
|
||||
SQ_IMG_RSRC_WORD5 word5;
|
||||
SQ_IMG_RSRC_WORD6 word6;
|
||||
SQ_IMG_RSRC_WORD7 word7;
|
||||
uint32_t mip_offsets[0]; //Mip level offset bits [39:8] for each level (if any)
|
||||
} metadata_amd_ci_vi_t;
|
||||
|
||||
class image_metadata
|
||||
{
|
||||
private:
|
||||
metadata_amd_ci_vi_t* data;
|
||||
|
||||
image_metadata(const image_metadata&)=delete;
|
||||
image_metadata& operator=(const image_metadata&)=delete;
|
||||
|
||||
public:
|
||||
image_metadata() : data(nullptr) {}
|
||||
~image_metadata() { data=nullptr; }
|
||||
|
||||
bool create(hsa_amd_image_descriptor_t* image_desc)
|
||||
{
|
||||
if((image_desc->version!=1) || ((image_desc->deviceID>>16)!=0x1002)) return false;
|
||||
data=reinterpret_cast<metadata_amd_ci_vi_t*>(image_desc);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setMipLevel(uint32_t level)
|
||||
{
|
||||
if(level>data->word3.bits.last_level)
|
||||
return false;
|
||||
data->word3.bits.base_level=level;
|
||||
data->word3.bits.last_level=level;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setLayer(uint32_t layer)
|
||||
{
|
||||
data->word3.bits.type=SQ_RSRC_IMG_2D_ARRAY;
|
||||
data->word5.bits.last_array=layer;
|
||||
data->word5.bits.base_array=layer;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool setFace(GLenum face)
|
||||
{
|
||||
int index=face-GL_TEXTURE_CUBE_MAP_POSITIVE_X;
|
||||
if(index<0 || index>5)
|
||||
return false;
|
||||
if(data->word3.bits.type!=SQ_RSRC_IMG_CUBE)
|
||||
return false;
|
||||
return setLayer(index);
|
||||
}
|
||||
};
|
||||
|
||||
class MesaInterop
|
||||
{
|
||||
public:
|
||||
|
||||
enum MESA_INTEROP_KIND { MESA_INTEROP_NONE=0, MESA_INTEROP_GLX=1, MESA_INTEROP_EGL=2 };
|
||||
|
||||
union DisplayHandle
|
||||
{
|
||||
Display* glxDisplay;
|
||||
EGLDisplay eglDisplay;
|
||||
};
|
||||
|
||||
union ContextHandle
|
||||
{
|
||||
GLXContext glxContext;
|
||||
EGLContext eglContext;
|
||||
};
|
||||
|
||||
//True if the configuration supports the indicated interop ability.
|
||||
static bool Supported();
|
||||
|
||||
MesaInterop() { kind=MESA_INTEROP_NONE; }
|
||||
MesaInterop(const MesaInterop& rhs) { *this=rhs; }
|
||||
~MesaInterop() { Unbind(); }
|
||||
|
||||
const MesaInterop& operator=(const MesaInterop& rhs)
|
||||
{
|
||||
display=rhs.display;
|
||||
context=rhs.context;
|
||||
kind=rhs.kind;
|
||||
if(kind!=MESA_INTEROP_NONE)
|
||||
refCount++;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/*
|
||||
Loads Mesa interop APIs and sets this interface object to use the indicated
|
||||
subsystem (GLX/EGL). Returns true if the required subsystem is found.
|
||||
*/
|
||||
bool Bind(MESA_INTEROP_KIND Kind, const DisplayHandle& Display, const ContextHandle& Context);
|
||||
|
||||
/*
|
||||
Releases use of Mesa interop APIs.
|
||||
Used to check for bad load/unload sequences.
|
||||
*/
|
||||
void Unbind()
|
||||
{
|
||||
if(kind==MESA_INTEROP_NONE) return;
|
||||
assert(refCount>0 && "Invalid refCount in MesaInterop.");
|
||||
refCount--;
|
||||
kind=MESA_INTEROP_NONE;
|
||||
}
|
||||
|
||||
bool GetInfo(mesa_glinterop_device_info& info) const;
|
||||
|
||||
bool Export (mesa_glinterop_export_in& in, mesa_glinterop_export_out& out) const;
|
||||
|
||||
private:
|
||||
static std::atomic<uint32_t> refCount;
|
||||
|
||||
DisplayHandle display;
|
||||
ContextHandle context;
|
||||
MESA_INTEROP_KIND kind;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif /*WITHOUT_HSA_BACKEND*/
|
||||
|
||||
@@ -1,680 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#include "rockernel.hpp"
|
||||
#include "SCHSAInterface.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
|
||||
inline static HSAIL_ARG_TYPE
|
||||
GetHSAILArgType(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return HSAIL_ARGTYPE_POINTER;
|
||||
case ARG_TYPE_VALUE:
|
||||
return HSAIL_ARGTYPE_VALUE;
|
||||
case ARG_TYPE_IMAGE:
|
||||
return HSAIL_ARGTYPE_IMAGE;
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return HSAIL_ARGTYPE_SAMPLER;
|
||||
case ARG_TYPE_ERROR:
|
||||
default:
|
||||
return HSAIL_ARGTYPE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
inline static size_t
|
||||
GetHSAILArgAlignment(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER:
|
||||
return argInfo->arg.pointer.align;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
inline static HSAIL_ACCESS_TYPE
|
||||
GetHSAILArgAccessType(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.type) {
|
||||
case ACCESS_TYPE_RO:
|
||||
return HSAIL_ACCESS_TYPE_RO;
|
||||
case ACCESS_TYPE_WO:
|
||||
return HSAIL_ACCESS_TYPE_WO;
|
||||
case ACCESS_TYPE_RW:
|
||||
default:
|
||||
return HSAIL_ACCESS_TYPE_RW;
|
||||
}
|
||||
}
|
||||
return HSAIL_ACCESS_TYPE_NONE;
|
||||
}
|
||||
|
||||
inline static HSAIL_ADDRESS_QUALIFIER
|
||||
GetHSAILAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV:
|
||||
case PTR_MT_GLOBAL:
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
case PTR_MT_LDS_EMU:
|
||||
case PTR_MT_LDS:
|
||||
return HSAIL_ADDRESS_LOCAL;
|
||||
case PTR_MT_ERROR:
|
||||
default:
|
||||
LogError("Unsupported address type");
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
}
|
||||
else if ((argInfo->type == ARG_TYPE_IMAGE) ||
|
||||
(argInfo->type == ARG_TYPE_SAMPLER)) {
|
||||
return HSAIL_ADDRESS_GLOBAL;
|
||||
}
|
||||
return HSAIL_ADDRESS_ERROR;
|
||||
}
|
||||
|
||||
/* f16 returns f32 - workaround due to comp lib */
|
||||
inline static HSAIL_DATA_TYPE
|
||||
GetHSAILDataType(const aclArgData* argInfo)
|
||||
{
|
||||
aclArgDataType dataType;
|
||||
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
dataType = argInfo->arg.pointer.data;
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||||
dataType = argInfo->arg.value.data;
|
||||
}
|
||||
else {
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
switch (dataType) {
|
||||
case DATATYPE_i1:
|
||||
return HSAIL_DATATYPE_B1;
|
||||
case DATATYPE_i8:
|
||||
return HSAIL_DATATYPE_S8;
|
||||
case DATATYPE_i16:
|
||||
return HSAIL_DATATYPE_S16;
|
||||
case DATATYPE_i32:
|
||||
return HSAIL_DATATYPE_S32;
|
||||
case DATATYPE_i64:
|
||||
return HSAIL_DATATYPE_S64;
|
||||
case DATATYPE_u8:
|
||||
return HSAIL_DATATYPE_U8;
|
||||
case DATATYPE_u16:
|
||||
return HSAIL_DATATYPE_U16;
|
||||
case DATATYPE_u32:
|
||||
return HSAIL_DATATYPE_U32;
|
||||
case DATATYPE_u64:
|
||||
return HSAIL_DATATYPE_U64;
|
||||
case DATATYPE_f16:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case DATATYPE_f32:
|
||||
return HSAIL_DATATYPE_F32;
|
||||
case DATATYPE_f64:
|
||||
return HSAIL_DATATYPE_F64;
|
||||
case DATATYPE_struct:
|
||||
return HSAIL_DATATYPE_STRUCT;
|
||||
case DATATYPE_opaque:
|
||||
return HSAIL_DATATYPE_OPAQUE;
|
||||
case DATATYPE_ERROR:
|
||||
default:
|
||||
return HSAIL_DATATYPE_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
// returns size in number of bytes
|
||||
inline static int
|
||||
GetHSAILArgSize(const aclArgData *argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (GetHSAILDataType(argInfo)) {
|
||||
case HSAIL_DATATYPE_B1:
|
||||
return 1;
|
||||
case HSAIL_DATATYPE_B8:
|
||||
case HSAIL_DATATYPE_S8:
|
||||
case HSAIL_DATATYPE_U8:
|
||||
return 1;
|
||||
case HSAIL_DATATYPE_B16:
|
||||
case HSAIL_DATATYPE_U16:
|
||||
case HSAIL_DATATYPE_S16:
|
||||
case HSAIL_DATATYPE_F16:
|
||||
return 2;
|
||||
case HSAIL_DATATYPE_B32:
|
||||
case HSAIL_DATATYPE_U32:
|
||||
case HSAIL_DATATYPE_S32:
|
||||
case HSAIL_DATATYPE_F32:
|
||||
return 4;
|
||||
case HSAIL_DATATYPE_B64:
|
||||
case HSAIL_DATATYPE_U64:
|
||||
case HSAIL_DATATYPE_S64:
|
||||
case HSAIL_DATATYPE_F64:
|
||||
return 8;
|
||||
case HSAIL_DATATYPE_STRUCT:
|
||||
return argInfo->arg.value.numElements;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
case ARG_TYPE_POINTER:
|
||||
case ARG_TYPE_IMAGE:
|
||||
case ARG_TYPE_SAMPLER:
|
||||
return sizeof(void*);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
inline static clk_value_type_t
|
||||
GetOclType(const aclArgData* argInfo)
|
||||
{
|
||||
static const clk_value_type_t ClkValueMapType[6][6] = {
|
||||
{ T_CHAR, T_CHAR2, T_CHAR3, T_CHAR4, T_CHAR8, T_CHAR16 },
|
||||
{ T_SHORT, T_SHORT2, T_SHORT3, T_SHORT4, T_SHORT8, T_SHORT16 },
|
||||
{ T_INT, T_INT2, T_INT3, T_INT4, T_INT8, T_INT16 },
|
||||
{ T_LONG, T_LONG2, T_LONG3, T_LONG4, T_LONG8, T_LONG16 },
|
||||
{ T_FLOAT, T_FLOAT2, T_FLOAT3, T_FLOAT4, T_FLOAT8, T_FLOAT16 },
|
||||
{ T_DOUBLE, T_DOUBLE2, T_DOUBLE3, T_DOUBLE4, T_DOUBLE8, T_DOUBLE16 },
|
||||
};
|
||||
|
||||
uint sizeType;
|
||||
if ((argInfo->type == ARG_TYPE_POINTER) || (argInfo->type == ARG_TYPE_IMAGE)) {
|
||||
return T_POINTER;
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_VALUE) {
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
sizeType = 0;
|
||||
break;
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_u16:
|
||||
sizeType = 1;
|
||||
break;
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_u32:
|
||||
sizeType = 2;
|
||||
break;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
sizeType = 3;
|
||||
break;
|
||||
case DATATYPE_f16:
|
||||
case DATATYPE_f32:
|
||||
sizeType = 4;
|
||||
break;
|
||||
case DATATYPE_f64:
|
||||
sizeType = 5;
|
||||
break;
|
||||
default:
|
||||
return T_VOID;
|
||||
}
|
||||
switch (argInfo->arg.value.numElements) {
|
||||
case 1: return ClkValueMapType[sizeType][0];
|
||||
case 2: return ClkValueMapType[sizeType][1];
|
||||
case 3: return ClkValueMapType[sizeType][2];
|
||||
case 4: return ClkValueMapType[sizeType][3];
|
||||
case 8: return ClkValueMapType[sizeType][4];
|
||||
case 16: return ClkValueMapType[sizeType][5];
|
||||
default: return T_VOID;
|
||||
}
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_SAMPLER) {
|
||||
return T_SAMPLER;
|
||||
}
|
||||
else {
|
||||
return T_VOID;
|
||||
}
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_address_qualifier
|
||||
GetOclAddrQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_UAV:
|
||||
case PTR_MT_GLOBAL:
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
return CL_KERNEL_ARG_ADDRESS_CONSTANT;
|
||||
case PTR_MT_LDS_EMU:
|
||||
case PTR_MT_LDS:
|
||||
return CL_KERNEL_ARG_ADDRESS_LOCAL;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
}
|
||||
else if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
return CL_KERNEL_ARG_ADDRESS_GLOBAL;
|
||||
}
|
||||
//default for all other cases
|
||||
return CL_KERNEL_ARG_ADDRESS_PRIVATE;
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_access_qualifier
|
||||
GetOclAccessQual(const aclArgData* argInfo)
|
||||
{
|
||||
if (argInfo->type == ARG_TYPE_IMAGE) {
|
||||
switch (argInfo->arg.image.type) {
|
||||
case ACCESS_TYPE_RO:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_ONLY;
|
||||
case ACCESS_TYPE_WO:
|
||||
return CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
|
||||
case ACCESS_TYPE_RW:
|
||||
return CL_KERNEL_ARG_ACCESS_READ_WRITE;
|
||||
default:
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
}
|
||||
return CL_KERNEL_ARG_ACCESS_NONE;
|
||||
}
|
||||
|
||||
inline static cl_kernel_arg_type_qualifier
|
||||
GetOclTypeQual(const aclArgData* argInfo)
|
||||
{
|
||||
cl_kernel_arg_type_qualifier rv = CL_KERNEL_ARG_TYPE_NONE;
|
||||
if (argInfo->type == ARG_TYPE_POINTER) {
|
||||
if (argInfo->arg.pointer.isVolatile) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_VOLATILE;
|
||||
}
|
||||
if (argInfo->arg.pointer.isRestrict) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_RESTRICT;
|
||||
}
|
||||
if (argInfo->isConst) {
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
}
|
||||
switch (argInfo->arg.pointer.memory) {
|
||||
case PTR_MT_CONSTANT:
|
||||
case PTR_MT_UAV_CONSTANT:
|
||||
case PTR_MT_CONSTANT_EMU:
|
||||
rv |= CL_KERNEL_ARG_TYPE_CONST;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static int
|
||||
GetOclSize(const aclArgData* argInfo)
|
||||
{
|
||||
switch (argInfo->type) {
|
||||
case ARG_TYPE_POINTER: return sizeof(void *);
|
||||
case ARG_TYPE_VALUE:
|
||||
switch (argInfo->arg.value.data) {
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
case DATATYPE_struct:
|
||||
return 1 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16:
|
||||
return 2 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32:
|
||||
return 4 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64:
|
||||
return 8 * argInfo->arg.value.numElements;
|
||||
case DATATYPE_ERROR:
|
||||
default: return -1;
|
||||
}
|
||||
case ARG_TYPE_IMAGE: return sizeof(cl_mem);
|
||||
case ARG_TYPE_SAMPLER: return sizeof(cl_sampler);
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
KernelArg::KernelArg(aclArgData *argInfo) {
|
||||
argInfo_ = argInfo;
|
||||
name_ = argInfo_->argStr;
|
||||
typeName_ = argInfo->typeStr;
|
||||
}
|
||||
|
||||
int KernelArg::size() {
|
||||
switch (argInfo_->type) {
|
||||
case ARG_TYPE_POINTER: {
|
||||
return sizeof(void *);
|
||||
}
|
||||
case ARG_TYPE_VALUE: {
|
||||
switch (argInfo_->arg.value.data) {
|
||||
case DATATYPE_ERROR: {
|
||||
return -1;
|
||||
}
|
||||
case DATATYPE_i8:
|
||||
case DATATYPE_u8:
|
||||
case DATATYPE_struct: {
|
||||
return 1 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_u16:
|
||||
case DATATYPE_i16:
|
||||
case DATATYPE_f16: {
|
||||
return 2 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_u32:
|
||||
case DATATYPE_i32:
|
||||
case DATATYPE_f32: {
|
||||
return 4 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
case DATATYPE_i64:
|
||||
case DATATYPE_u64:
|
||||
case DATATYPE_f64: {
|
||||
return 8 * argInfo_->arg.value.numElements;
|
||||
}
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
case ARG_TYPE_IMAGE: {
|
||||
return sizeof(cl_mem);
|
||||
}
|
||||
case ARG_TYPE_SAMPLER: {
|
||||
return sizeof(cl_sampler);
|
||||
}
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string& KernelArg::name() {
|
||||
return name_;
|
||||
}
|
||||
|
||||
std::string& KernelArg::typeName()
|
||||
{
|
||||
return typeName_;
|
||||
}
|
||||
|
||||
void
|
||||
Kernel::initArgList(const aclArgData* aclArg)
|
||||
{
|
||||
// Initialize the hsail argument list too
|
||||
initHsailArgs(aclArg);
|
||||
|
||||
// Iterate through the arguments and insert into parameterList
|
||||
device::Kernel::parameters_t params;
|
||||
amd::KernelParameterDescriptor desc;
|
||||
size_t offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += MaxExtraArgumentsNum;
|
||||
for (uint i = 0; aclArg->struct_size != 0; i++, aclArg++) {
|
||||
desc.name_ = hsailArgList_[i]->name_.c_str();
|
||||
desc.type_ = GetOclType(aclArg);
|
||||
desc.addressQualifier_ = GetOclAddrQual(aclArg);
|
||||
desc.accessQualifier_ = GetOclAccessQual(aclArg);
|
||||
desc.typeQualifier_ = GetOclTypeQual(aclArg);
|
||||
desc.typeName_ = hsailArgList_[i]->typeName_.c_str();
|
||||
|
||||
// Make a check if it is local or global
|
||||
if (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL) {
|
||||
desc.size_ = 0;
|
||||
}
|
||||
else {
|
||||
desc.size_ = GetOclSize(aclArg);
|
||||
}
|
||||
|
||||
// Make offset alignment to match CPU metadata, since
|
||||
// in multidevice config abstraction layer has a single signature
|
||||
// and CPU sends the paramaters as they are allocated in memory
|
||||
size_t size = desc.size_;
|
||||
if (size == 0) {
|
||||
// Local memory for CPU
|
||||
size = sizeof(cl_mem);
|
||||
}
|
||||
offset = amd::alignUp(offset, std::min(size, size_t(16)));
|
||||
desc.offset_ = offset;
|
||||
offset += amd::alignUp(size, sizeof(uint32_t));
|
||||
params.push_back(desc);
|
||||
}
|
||||
createSignature(params);
|
||||
}
|
||||
|
||||
void
|
||||
Kernel::initHsailArgs(const aclArgData* aclArg)
|
||||
{
|
||||
int offset = 0;
|
||||
|
||||
// Reserved arguments for HSAIL launch
|
||||
aclArg += MaxExtraArgumentsNum;
|
||||
|
||||
// Iterate through the each kernel argument
|
||||
for (; aclArg->struct_size != 0; aclArg++) {
|
||||
HsailKernelArg* arg = new HsailKernelArg;
|
||||
// Initialize HSAIL kernel argument
|
||||
arg->name_ = aclArg->argStr;
|
||||
arg->typeName_ = aclArg->typeStr;
|
||||
arg->size_ = GetHSAILArgSize(aclArg);
|
||||
arg->offset_ = offset;
|
||||
arg->type_ = GetHSAILArgType(aclArg);
|
||||
arg->addrQual_ = GetHSAILAddrQual(aclArg);
|
||||
arg->dataType_ = GetHSAILDataType(aclArg);
|
||||
// If vector of args we add additional arguments to flatten it out
|
||||
arg->numElem_ = ((aclArg->type == ARG_TYPE_VALUE) &&
|
||||
(aclArg->arg.value.data != DATATYPE_struct)) ?
|
||||
aclArg->arg.value.numElements : 1;
|
||||
arg->alignment_ = GetHSAILArgAlignment(aclArg);
|
||||
arg->access_ = GetHSAILArgAccessType(aclArg);
|
||||
offset += GetHSAILArgSize(aclArg);
|
||||
hsailArgList_.push_back(arg);
|
||||
}
|
||||
}
|
||||
|
||||
Kernel::Kernel(std::string name, HSAILProgram* prog,
|
||||
const uint64_t& kernelCodeHandle,
|
||||
const uint32_t workgroupGroupSegmentByteSize,
|
||||
const uint32_t workitemPrivateSegmentByteSize,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
const uint32_t kernargSegmentAlignment,
|
||||
uint extraArgsNum)
|
||||
: device::Kernel(name),
|
||||
program_(prog),
|
||||
kernelCodeHandle_(kernelCodeHandle),
|
||||
workgroupGroupSegmentByteSize_(workgroupGroupSegmentByteSize),
|
||||
workitemPrivateSegmentByteSize_(workitemPrivateSegmentByteSize),
|
||||
kernargSegmentByteSize_(kernargSegmentByteSize),
|
||||
kernargSegmentAlignment_(kernargSegmentAlignment),
|
||||
extraArgumentsNum_(extraArgsNum) {}
|
||||
|
||||
bool Kernel::init(){
|
||||
acl_error errorCode;
|
||||
//compile kernel down to ISA
|
||||
hsa_agent_t hsaDevice = program_->hsaDevice();
|
||||
// Pull out metadata from the ELF
|
||||
size_t sizeOfArgList;
|
||||
aclCompiler* compileHandle = program_->dev().compiler();
|
||||
std::string openClKernelName("&__OpenCL_" + name() + "_kernel");
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(),
|
||||
NULL,
|
||||
&sizeOfArgList);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
std::unique_ptr<char[]> argList(new char[sizeOfArgList]);
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_ARGUMENT_ARRAY,
|
||||
openClKernelName.c_str(),
|
||||
argList.get(),
|
||||
&sizeOfArgList);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
//Set the argList
|
||||
initArgList((const aclArgData *) argList.get());
|
||||
//Set the workgroup information for the kernel
|
||||
memset(&workGroupInfo_, 0, sizeof(workGroupInfo_));
|
||||
workGroupInfo_.availableLDSSize_ = program_->dev().info().localMemSizePerCU_;
|
||||
assert(workGroupInfo_.availableLDSSize_ > 0);
|
||||
workGroupInfo_.availableSGPRs_ = 0;
|
||||
workGroupInfo_.availableVGPRs_ = 0;
|
||||
size_t sizeOfWorkGroupSize;
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(),
|
||||
NULL,
|
||||
&sizeOfWorkGroupSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle,
|
||||
program_->binaryElf(),
|
||||
RT_WORK_GROUP_SIZE,
|
||||
openClKernelName.c_str(),
|
||||
workGroupInfo_.compileSize_,
|
||||
&sizeOfWorkGroupSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t wavefront_size = 0;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(
|
||||
program_->hsaDevice(), HSA_AGENT_INFO_WAVEFRONT_SIZE,
|
||||
&wavefront_size)) {
|
||||
return false;
|
||||
}
|
||||
assert(wavefront_size > 0);
|
||||
|
||||
// Setting it the same as used LDS.
|
||||
workGroupInfo_.localMemSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.privateMemSize_ = workitemPrivateSegmentByteSize_;
|
||||
workGroupInfo_.usedLDSSize_ = workgroupGroupSegmentByteSize_;
|
||||
workGroupInfo_.preferredSizeMultiple_ = wavefront_size;
|
||||
workGroupInfo_.usedSGPRs_ = 0;
|
||||
workGroupInfo_.usedStackSize_ = 0;
|
||||
workGroupInfo_.usedVGPRs_ = 0;
|
||||
workGroupInfo_.wavefrontPerSIMD_ =
|
||||
program_->dev().info().maxWorkItemSizes_[0] / wavefront_size;
|
||||
workGroupInfo_.wavefrontSize_ = wavefront_size;
|
||||
if (workGroupInfo_.compileSize_[0] != 0) {
|
||||
workGroupInfo_.size_ =
|
||||
workGroupInfo_.compileSize_[0] *
|
||||
workGroupInfo_.compileSize_[1] *
|
||||
workGroupInfo_.compileSize_[2];
|
||||
}
|
||||
else {
|
||||
workGroupInfo_.size_ = program_->dev().info().maxWorkGroupSize_;
|
||||
}
|
||||
|
||||
// Pull out printf metadata from the ELF
|
||||
size_t sizeOfPrintfList;
|
||||
errorCode = g_complibApi._aclQueryInfo(compileHandle, program_->binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), NULL, &sizeOfPrintfList);
|
||||
if (errorCode != ACL_SUCCESS){
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make sure kernel has any printf info
|
||||
if (0 != sizeOfPrintfList) {
|
||||
std::unique_ptr<char[]> aclPrintfList(new char[sizeOfPrintfList]);
|
||||
if (!aclPrintfList) {
|
||||
return false;
|
||||
}
|
||||
errorCode = g_complibApi._aclQueryInfo(
|
||||
compileHandle, program_->binaryElf(), RT_GPU_PRINTF_ARRAY,
|
||||
openClKernelName.c_str(), aclPrintfList.get(), &sizeOfPrintfList);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Set the Printf List
|
||||
initPrintf(reinterpret_cast<aclPrintfFmt*>(aclPrintfList.get()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void Kernel::initPrintf(const aclPrintfFmt* aclPrintf) {
|
||||
PrintfInfo info;
|
||||
uint index = 0;
|
||||
for (; aclPrintf->struct_size != 0; aclPrintf++) {
|
||||
index = aclPrintf->ID;
|
||||
if (printf_.size() <= index) {
|
||||
printf_.resize(index + 1);
|
||||
}
|
||||
std::string pfmt = aclPrintf->fmtStr;
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < pfmt.size(); ++i) {
|
||||
char symbol = pfmt[pos++];
|
||||
if (symbol == '\\') {
|
||||
// Rest of the C escape sequences (e.g. \') are handled correctly
|
||||
// by the MDParser, we are not sure exactly how!
|
||||
switch (pfmt[pos]) {
|
||||
case 'a':
|
||||
pos++;
|
||||
symbol = '\a';
|
||||
break;
|
||||
case 'b':
|
||||
pos++;
|
||||
symbol = '\b';
|
||||
break;
|
||||
case 'f':
|
||||
pos++;
|
||||
symbol = '\f';
|
||||
break;
|
||||
case 'n':
|
||||
pos++;
|
||||
symbol = '\n';
|
||||
break;
|
||||
case 'r':
|
||||
pos++;
|
||||
symbol = '\r';
|
||||
break;
|
||||
case 'v':
|
||||
pos++;
|
||||
symbol = '\v';
|
||||
break;
|
||||
case '7':
|
||||
if (pfmt[++pos] == '2') {
|
||||
pos++;
|
||||
i++;
|
||||
symbol = '\72';
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
info.fmtString_.push_back(symbol);
|
||||
}
|
||||
info.fmtString_ += "\n";
|
||||
uint32_t* tmp_ptr = const_cast<uint32_t*>(aclPrintf->argSizes);
|
||||
for (uint i = 0; i < aclPrintf->numSizes; i++, tmp_ptr++) {
|
||||
info.arguments_.push_back(*tmp_ptr);
|
||||
}
|
||||
printf_[index] = info;
|
||||
info.arguments_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Kernel::~Kernel() {
|
||||
while (!hsailArgList_.empty()) {
|
||||
HsailKernelArg* kernelArgPointer = hsailArgList_.back();
|
||||
delete kernelArgPointer;
|
||||
hsailArgList_.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace roc
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
@@ -1,195 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include "acl.h"
|
||||
#include "rocprogram.hpp"
|
||||
#include "top.hpp"
|
||||
#include "rocprintf.hpp"
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
|
||||
#define MAX_INFO_STRING_LEN 0x40
|
||||
enum HSAIL_ADDRESS_QUALIFIER{
|
||||
HSAIL_ADDRESS_ERROR=0,
|
||||
HSAIL_ADDRESS_GLOBAL,
|
||||
HSAIL_ADDRESS_LOCAL,
|
||||
HSAIL_MAX_ADDRESS_QUALIFIERS
|
||||
} ;
|
||||
|
||||
enum HSAIL_ARG_TYPE{
|
||||
HSAIL_ARGTYPE_ERROR=0,
|
||||
HSAIL_ARGTYPE_POINTER,
|
||||
HSAIL_ARGTYPE_VALUE,
|
||||
HSAIL_ARGTYPE_IMAGE,
|
||||
HSAIL_ARGTYPE_SAMPLER,
|
||||
HSAIL_ARGMAX_ARG_TYPES
|
||||
};
|
||||
|
||||
enum HSAIL_DATA_TYPE{
|
||||
HSAIL_DATATYPE_ERROR=0,
|
||||
HSAIL_DATATYPE_B1,
|
||||
HSAIL_DATATYPE_B8,
|
||||
HSAIL_DATATYPE_B16,
|
||||
HSAIL_DATATYPE_B32,
|
||||
HSAIL_DATATYPE_B64,
|
||||
HSAIL_DATATYPE_S8,
|
||||
HSAIL_DATATYPE_S16,
|
||||
HSAIL_DATATYPE_S32,
|
||||
HSAIL_DATATYPE_S64,
|
||||
HSAIL_DATATYPE_U8,
|
||||
HSAIL_DATATYPE_U16,
|
||||
HSAIL_DATATYPE_U32,
|
||||
HSAIL_DATATYPE_U64,
|
||||
HSAIL_DATATYPE_F16,
|
||||
HSAIL_DATATYPE_F32,
|
||||
HSAIL_DATATYPE_F64,
|
||||
HSAIL_DATATYPE_STRUCT,
|
||||
HSAIL_DATATYPE_OPAQUE,
|
||||
HSAIL_DATATYPE_MAX_TYPES
|
||||
};
|
||||
|
||||
enum HSAIL_ACCESS_TYPE {
|
||||
HSAIL_ACCESS_TYPE_NONE = 0,
|
||||
HSAIL_ACCESS_TYPE_RO,
|
||||
HSAIL_ACCESS_TYPE_WO,
|
||||
HSAIL_ACCESS_TYPE_RW
|
||||
};
|
||||
|
||||
struct HsailKernelArg
|
||||
{
|
||||
std::string name_; //!< Argument's name
|
||||
std::string typeName_; //!< Argument's type name
|
||||
uint size_; //!< Size in bytes
|
||||
uint offset_; //!< Argument's offset
|
||||
uint alignment_; //!< Argument's alignment
|
||||
HSAIL_ARG_TYPE type_; //!< Type of the argument
|
||||
HSAIL_ADDRESS_QUALIFIER addrQual_; //!< Address qualifier of the argument
|
||||
HSAIL_DATA_TYPE dataType_; //!< The type of data
|
||||
uint numElem_; //!< Number of elements
|
||||
HSAIL_ACCESS_TYPE access_; //!< Access type for the argument
|
||||
};
|
||||
|
||||
class KernelArg
|
||||
{
|
||||
public:
|
||||
KernelArg(aclArgData* argInfo);
|
||||
//! Return type of the argument
|
||||
clk_value_type_t amdoclType();
|
||||
//! Global, local etc - returns amdocl types
|
||||
clk_address_space_t amdoclAddrQual();
|
||||
//! Global,localetc - returns opencl type
|
||||
cl_kernel_arg_address_qualifier oclAddrQual();
|
||||
//! read , write etc - returns amdocl type
|
||||
clk_arg_qualifier_t amdoclAccessQual();
|
||||
//! read , write etc - returns opencl type type
|
||||
cl_kernel_arg_access_qualifier oclAccessQual();
|
||||
//! const,volatile,restrict etc - returns opencl type type
|
||||
cl_kernel_arg_type_qualifier oclTypeQual();
|
||||
|
||||
//! Name of the argument
|
||||
std::string& name();
|
||||
//! Name of the argument
|
||||
std::string& typeName();
|
||||
//! reflection
|
||||
std::string reflection(){ return name(); };
|
||||
//! Returns the size of the argument
|
||||
int size();
|
||||
//! returns the offset
|
||||
int offset();
|
||||
|
||||
void setOffset();
|
||||
|
||||
private:
|
||||
aclArgData* argInfo_;
|
||||
int offset_;
|
||||
std::string name_;
|
||||
std::string typeName_;
|
||||
};
|
||||
|
||||
class Kernel : public device::Kernel
|
||||
{
|
||||
public:
|
||||
Kernel(std::string name,
|
||||
HSAILProgram* prog,
|
||||
const uint64_t &kernelCodeHandle,
|
||||
const uint32_t workgroupGroupSegmentByteSize,
|
||||
const uint32_t workitemPrivateSegmentByteSize,
|
||||
const uint32_t kernargSegmentByteSize,
|
||||
const uint32_t kernargSegmentAlignment,
|
||||
uint extraArgsNum);
|
||||
|
||||
const uint64_t& KernelCodeHandle() {
|
||||
return kernelCodeHandle_;
|
||||
}
|
||||
|
||||
const uint32_t WorkgroupGroupSegmentByteSize() const {
|
||||
return workgroupGroupSegmentByteSize_;
|
||||
}
|
||||
|
||||
const uint32_t workitemPrivateSegmentByteSize() const {
|
||||
return workitemPrivateSegmentByteSize_;
|
||||
}
|
||||
|
||||
const uint64_t KernargSegmentByteSize() const {
|
||||
return kernargSegmentByteSize_;
|
||||
}
|
||||
|
||||
const uint8_t KernargSegmentAlignment() const {
|
||||
return kernargSegmentAlignment_;
|
||||
}
|
||||
|
||||
~Kernel();
|
||||
|
||||
//! Initializes the metadata required for this kernel
|
||||
bool init();
|
||||
|
||||
const HSAILProgram* program() {
|
||||
return static_cast<const HSAILProgram*>(program_);
|
||||
}
|
||||
|
||||
//! Returns a pointer to the hsail argument at the specified index
|
||||
HsailKernelArg* hsailArgAt(size_t index) const {
|
||||
return hsailArgList_[index];
|
||||
}
|
||||
|
||||
//! Max number of possible extra (hidden) kernel arguments
|
||||
static const uint MaxExtraArgumentsNum = 6;
|
||||
|
||||
uint extraArgumentsNum() const { return extraArgumentsNum_; }
|
||||
|
||||
//! Return printf info array
|
||||
const std::vector<PrintfInfo>& printfInfo() const {return printf_;}
|
||||
|
||||
private:
|
||||
//! Populates hsailArgList_
|
||||
void initArgList(const aclArgData* aclArg);
|
||||
|
||||
//! Initializes Hsail Argument metadata and info ;
|
||||
void initHsailArgs(const aclArgData* aclArg);
|
||||
|
||||
//! Initializes HSAIL Printf metadata and info
|
||||
void initPrintf(const aclPrintfFmt* aclPrintf);
|
||||
|
||||
HSAILProgram *program_; //!< The roc::HSAILProgram context
|
||||
std::vector<HsailKernelArg*> hsailArgList_; //!< Vector list of HSAIL Arguments
|
||||
std::string compileOptions_; //!< compile used for finalizing this kernel
|
||||
uint64_t kernelCodeHandle_; //!< Kernel code handle (aka amd_kernel_code_t)
|
||||
const uint32_t workgroupGroupSegmentByteSize_;
|
||||
const uint32_t workitemPrivateSegmentByteSize_;
|
||||
const uint32_t kernargSegmentByteSize_;
|
||||
const uint32_t kernargSegmentAlignment_;
|
||||
size_t kernelDirectiveOffset_;
|
||||
const uint extraArgumentsNum_; // Number of arguments in Kernenv
|
||||
std::vector<PrintfInfo> printf_;
|
||||
};
|
||||
|
||||
} // namespace roc
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
|
||||
@@ -1,783 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "CL/cl_ext.h"
|
||||
|
||||
#include "utils/util.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/rocm/rocmemory.hpp"
|
||||
#include "device/rocm/rocdevice.hpp"
|
||||
#include "device/rocm/rocblit.hpp"
|
||||
#include "device/rocm/rocglinterop.hpp"
|
||||
#include "thread/monitor.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "platform/sampler.hpp"
|
||||
#include "api/opencl/amdocl/cl_gl_amd.hpp"
|
||||
|
||||
namespace roc {
|
||||
|
||||
/////////////////////////////////roc::Memory//////////////////////////////
|
||||
Memory::Memory(const roc::Device &dev, amd::Memory &owner)
|
||||
: device::Memory(owner),
|
||||
dev_(dev),
|
||||
deviceMemory_(NULL),
|
||||
kind_(MEMORY_KIND_NORMAL)
|
||||
{
|
||||
}
|
||||
|
||||
Memory::~Memory()
|
||||
{
|
||||
dev_.removeVACache(this);
|
||||
}
|
||||
|
||||
bool
|
||||
Memory::allocateMapMemory(size_t allocationSize)
|
||||
{
|
||||
assert(mapMemory_ == NULL);
|
||||
|
||||
void *mapData = NULL;
|
||||
|
||||
amd::Memory* mapMemory = dev_.findMapTarget(owner()->getSize());
|
||||
|
||||
if (mapMemory == nullptr) {
|
||||
// Create buffer object to contain the map target.
|
||||
mapMemory =
|
||||
new(owner()->getContext()) amd::Buffer(
|
||||
owner()->getContext(), CL_MEM_ALLOC_HOST_PTR, owner()->getSize());
|
||||
|
||||
if ((mapMemory == NULL) || (!mapMemory->create())) {
|
||||
LogError("[OCL] Fail to allocate map target object");
|
||||
dev_.hostFree(mapData);
|
||||
if (mapMemory) {
|
||||
mapMemory->release();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
roc::Memory* hsaMapMemory = reinterpret_cast<roc::Memory *>(
|
||||
mapMemory->getDeviceMemory(dev_));
|
||||
if (hsaMapMemory == nullptr) {
|
||||
mapMemory->release();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
mapMemory_ = mapMemory;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void*
|
||||
Memory::allocMapTarget(
|
||||
const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch)
|
||||
{
|
||||
// Map/Unmap must be serialized.
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
incIndMapCount();
|
||||
|
||||
// If the device backing storage is direct accessible, use it.
|
||||
if (isHostMemDirectAccess()) {
|
||||
if (owner()->getHostMem() != nullptr) {
|
||||
return (static_cast<char *>(owner()->getHostMem()) + origin[0]);
|
||||
}
|
||||
|
||||
return (static_cast<char *>(deviceMemory_) + origin[0]);
|
||||
}
|
||||
|
||||
// Otherwise, check for host memory.
|
||||
void *hostMem = owner()->getHostMem();
|
||||
if (hostMem != NULL) {
|
||||
return (static_cast<char *>(hostMem) + origin[0]);
|
||||
}
|
||||
|
||||
// Allocate one if needed.
|
||||
if (indirectMapCount_ == 1) {
|
||||
if (!allocateMapMemory(owner()->getSize())) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Did the map resource allocation fail?
|
||||
if (mapMemory_ == NULL) {
|
||||
LogError("Could not map target resource");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
roc::Memory* hsaMapMemory = reinterpret_cast<roc::Memory *>(
|
||||
mapMemory_->getDeviceMemory(dev_));
|
||||
return reinterpret_cast<address>(hsaMapMemory->getDeviceMemory()) + origin[0];
|
||||
}
|
||||
|
||||
void
|
||||
Memory::decIndMapCount()
|
||||
{
|
||||
// Map/Unmap must be serialized.
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
if (indirectMapCount_ == 0) {
|
||||
LogError("decIndMapCount() called when indirectMapCount_ already zero");
|
||||
return;
|
||||
}
|
||||
|
||||
// Decrement the counter and release indirect map if it's the last op
|
||||
if (--indirectMapCount_ == 0 &&
|
||||
mapMemory_ != NULL) {
|
||||
if (!dev_.addMapTarget(mapMemory_)) {
|
||||
// Release the buffer object containing the map data.
|
||||
mapMemory_->release();
|
||||
}
|
||||
mapMemory_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void *
|
||||
Memory::cpuMap(
|
||||
device::VirtualDevice& vDev,
|
||||
uint flags,
|
||||
uint startLayer,
|
||||
uint numLayers,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
// Create the map target.
|
||||
void * mapTarget =
|
||||
allocMapTarget(amd::Coord3D(0), amd::Coord3D(0), 0, rowPitch, slicePitch);
|
||||
|
||||
assert(mapTarget != NULL);
|
||||
|
||||
if (!isHostMemDirectAccess()) {
|
||||
if (!vDev.blitMgr().readBuffer(
|
||||
*this, mapTarget, amd::Coord3D(0), amd::Coord3D(size()), true)) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return mapTarget;
|
||||
}
|
||||
|
||||
void
|
||||
Memory::cpuUnmap(device::VirtualDevice& vDev)
|
||||
{
|
||||
if (!isHostMemDirectAccess()) {
|
||||
if (!vDev.blitMgr().writeBuffer(
|
||||
mapMemory_->getHostMem(), *this, amd::Coord3D(0),
|
||||
amd::Coord3D(size()), true)) {
|
||||
LogError("[OCL] Fail sync the device memory on cpuUnmap");
|
||||
}
|
||||
}
|
||||
|
||||
decIndMapCount();
|
||||
}
|
||||
|
||||
// Setup an interop buffer (dmabuf handle) as an OpenCL buffer
|
||||
bool Memory::createInteropBuffer(GLenum targetType, int miplevel, size_t* metadata_size, const hsa_amd_image_descriptor_t** metadata)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return false;
|
||||
#else
|
||||
assert(owner()->isInterop() && "Object is not an interop object.");
|
||||
|
||||
mesa_glinterop_export_in in;
|
||||
mesa_glinterop_export_out out;
|
||||
|
||||
in.size=sizeof(mesa_glinterop_export_in);
|
||||
out.size=sizeof(mesa_glinterop_export_out);
|
||||
|
||||
if(owner()->getMemFlags() & CL_MEM_READ_ONLY)
|
||||
in.access=MESA_GLINTEROP_ACCESS_READ_ONLY;
|
||||
else if(owner()->getMemFlags() & CL_MEM_WRITE_ONLY)
|
||||
in.access=MESA_GLINTEROP_ACCESS_WRITE_ONLY;
|
||||
else
|
||||
in.access=MESA_GLINTEROP_ACCESS_READ_WRITE;
|
||||
|
||||
in.target = targetType;
|
||||
in.obj=owner()->getInteropObj()->asGLObject()->getGLName();
|
||||
in.miplevel=miplevel;
|
||||
in.out_driver_data_size=0;
|
||||
in.out_driver_data=NULL;
|
||||
|
||||
if(!dev_.mesa().Export(in, out))
|
||||
return false;
|
||||
|
||||
size_t size;
|
||||
hsa_agent_t agent=dev_.getBackendDevice();
|
||||
hsa_status_t status=hsa_amd_interop_map_buffer(1, &agent, out.dmabuf_fd, 0, &size, &deviceMemory_, metadata_size, (const void**)metadata);
|
||||
close(out.dmabuf_fd);
|
||||
|
||||
if(status!=HSA_STATUS_SUCCESS)
|
||||
return false;
|
||||
|
||||
kind_=MEMORY_KIND_INTEROP;
|
||||
assert(deviceMemory_!=NULL && "Interop map failed to produce a pointer!");
|
||||
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Memory::destroyInteropBuffer()
|
||||
{
|
||||
assert(kind_==MEMORY_KIND_INTEROP && "Memory must be interop type.");
|
||||
hsa_amd_interop_unmap_buffer(deviceMemory_);
|
||||
deviceMemory_=NULL;
|
||||
}
|
||||
|
||||
/////////////////////////////////roc::Buffer//////////////////////////////
|
||||
|
||||
Buffer::Buffer(const roc::Device &dev, amd::Memory &owner)
|
||||
: roc::Memory(dev, owner)
|
||||
{}
|
||||
|
||||
Buffer::~Buffer()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Buffer::destroy()
|
||||
{
|
||||
if (owner()->parent() != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(kind_==MEMORY_KIND_INTEROP)
|
||||
{
|
||||
destroyInteropBuffer();
|
||||
return;
|
||||
}
|
||||
|
||||
const cl_mem_flags memFlags = owner()->getMemFlags();
|
||||
|
||||
if ((deviceMemory_ != nullptr) &&
|
||||
(deviceMemory_ != owner()->getHostMem())) {
|
||||
// if they are identical, the host pointer will be
|
||||
// deallocated later on => avoid double deallocation
|
||||
if (isHostMemDirectAccess()) {
|
||||
if (memFlags & CL_MEM_USE_HOST_PTR) {
|
||||
if (dev_.agent_profile() != HSA_PROFILE_FULL) {
|
||||
hsa_amd_memory_unlock(owner()->getHostMem());
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
dev_.deviceLocalFree(deviceMemory_, size());
|
||||
}
|
||||
}
|
||||
|
||||
if (memFlags & CL_MEM_USE_HOST_PTR) {
|
||||
if (dev_.agent_profile() == HSA_PROFILE_FULL) {
|
||||
hsa_memory_deregister(owner()->getHostMem(), size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
Buffer::create()
|
||||
{
|
||||
//Interop buffer
|
||||
if(owner()->isInterop())
|
||||
return createInteropBuffer(GL_ARRAY_BUFFER, 0, NULL, NULL);
|
||||
|
||||
if (owner()->parent()) {
|
||||
// Sub-Buffer creation.
|
||||
roc::Memory *parentBuffer =
|
||||
static_cast<roc::Memory *>(owner()->parent()->getDeviceMemory(dev_));
|
||||
|
||||
if (parentBuffer == NULL) {
|
||||
LogError("[OCL] Fail to allocate parent buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t offset = owner()->getOrigin();
|
||||
deviceMemory_ =
|
||||
static_cast<char *>(parentBuffer->getDeviceMemory()) + offset;
|
||||
|
||||
flags_ |= SubMemoryObject;
|
||||
flags_ |=
|
||||
parentBuffer->isHostMemDirectAccess() ? HostMemoryDirectAccess : 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allocate backing storage in device local memory unless UHP or AHP are set
|
||||
const cl_mem_flags memFlags = owner()->getMemFlags();
|
||||
if (!(memFlags & (CL_MEM_USE_HOST_PTR |
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_USE_PERSISTENT_MEM_AMD))) {
|
||||
deviceMemory_ = dev_.deviceLocalAlloc(size());
|
||||
|
||||
if (deviceMemory_ == NULL) {
|
||||
// TODO: device memory is not enabled yet.
|
||||
// Fallback to system memory if exist.
|
||||
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
if (dev_.agent_profile() == HSA_PROFILE_FULL &&
|
||||
owner()->getHostMem() != NULL) {
|
||||
deviceMemory_ = owner()->getHostMem();
|
||||
assert(
|
||||
amd::isMultipleOf(
|
||||
deviceMemory_,
|
||||
static_cast<size_t>(dev_.info().memBaseAddrAlign_)));
|
||||
return true;
|
||||
}
|
||||
|
||||
deviceMemory_ = dev_.hostAlloc(size(), 1, false);
|
||||
}
|
||||
|
||||
assert(
|
||||
amd::isMultipleOf(
|
||||
deviceMemory_,
|
||||
static_cast<size_t>(dev_.info().memBaseAddrAlign_)));
|
||||
|
||||
if (deviceMemory_ && (memFlags & CL_MEM_COPY_HOST_PTR)) {
|
||||
// To avoid recurssive call to Device::createMemory, we perform
|
||||
// data transfer to the view of the buffer.
|
||||
amd::Buffer *bufferView = new (owner()->getContext()) amd::Buffer(
|
||||
*owner(), 0, owner()->getOrigin(), owner()->getSize());
|
||||
bufferView->create();
|
||||
|
||||
roc::Buffer *devBufferView =
|
||||
new roc::Buffer(dev_, *bufferView);
|
||||
devBufferView->deviceMemory_ = deviceMemory_;
|
||||
|
||||
bufferView->replaceDeviceMemory(&dev_, devBufferView);
|
||||
|
||||
bool ret = dev_.xferMgr().writeBuffer(
|
||||
owner()->getHostMem(), *devBufferView, amd::Coord3D(0),
|
||||
amd::Coord3D(size()), true);
|
||||
|
||||
if (!ret) {
|
||||
dev_.deviceLocalFree(deviceMemory_, size());
|
||||
deviceMemory_ = NULL;
|
||||
}
|
||||
|
||||
bufferView->release();
|
||||
return ret;
|
||||
}
|
||||
|
||||
return deviceMemory_ != NULL;
|
||||
}
|
||||
else if (memFlags & CL_MEM_USE_PERSISTENT_MEM_AMD) {
|
||||
deviceMemory_ = dev_.hostAlloc(size(), 1, false);
|
||||
if (deviceMemory_ != nullptr) {
|
||||
if (owner()->getHostMem() != nullptr) {
|
||||
memcpy(deviceMemory_, owner()->getHostMem(), size());
|
||||
}
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
}
|
||||
return deviceMemory_ != nullptr;
|
||||
}
|
||||
|
||||
assert(owner()->getHostMem() != NULL);
|
||||
|
||||
flags_ |= HostMemoryDirectAccess;
|
||||
|
||||
if (dev_.agent_profile() == HSA_PROFILE_FULL) {
|
||||
deviceMemory_ = owner()->getHostMem();
|
||||
|
||||
if (memFlags & CL_MEM_USE_HOST_PTR) {
|
||||
hsa_memory_register(deviceMemory_, size());
|
||||
}
|
||||
|
||||
return deviceMemory_ != NULL;
|
||||
}
|
||||
|
||||
if (owner()->getSvmPtr() != owner()->getHostMem()) {
|
||||
if (memFlags & CL_MEM_USE_HOST_PTR) {
|
||||
hsa_agent_t agent = dev_.getBackendDevice();
|
||||
hsa_status_t status = hsa_amd_memory_lock(
|
||||
owner()->getHostMem(), owner()->getSize(), &agent, 1, &deviceMemory_);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
deviceMemory_ = nullptr;
|
||||
}
|
||||
}
|
||||
else {
|
||||
deviceMemory_ = owner()->getHostMem();
|
||||
}
|
||||
}
|
||||
else {
|
||||
deviceMemory_ = owner()->getHostMem();
|
||||
}
|
||||
|
||||
return deviceMemory_ != NULL;
|
||||
}
|
||||
|
||||
/////////////////////////////////roc::Image//////////////////////////////
|
||||
typedef struct ChannelOrderMap {
|
||||
uint32_t cl_channel_order;
|
||||
hsa_ext_image_channel_order_t hsa_channel_order;
|
||||
} ChannelOrderMap;
|
||||
|
||||
typedef struct ChannelTypeMap {
|
||||
uint32_t cl_channel_type;
|
||||
hsa_ext_image_channel_type_t hsa_channel_type;
|
||||
} ChannelTypeMap;
|
||||
|
||||
static const ChannelOrderMap kChannelOrderMapping[] = {
|
||||
{ CL_R, HSA_EXT_IMAGE_CHANNEL_ORDER_R },
|
||||
{ CL_A, HSA_EXT_IMAGE_CHANNEL_ORDER_A },
|
||||
{ CL_RG, HSA_EXT_IMAGE_CHANNEL_ORDER_RG },
|
||||
{ CL_RA, HSA_EXT_IMAGE_CHANNEL_ORDER_RA },
|
||||
{ CL_RGB, HSA_EXT_IMAGE_CHANNEL_ORDER_RGB },
|
||||
{ CL_RGBA, HSA_EXT_IMAGE_CHANNEL_ORDER_RGBA },
|
||||
{ CL_BGRA, HSA_EXT_IMAGE_CHANNEL_ORDER_BGRA },
|
||||
{ CL_ARGB, HSA_EXT_IMAGE_CHANNEL_ORDER_ARGB },
|
||||
{ CL_INTENSITY, HSA_EXT_IMAGE_CHANNEL_ORDER_INTENSITY },
|
||||
{ CL_LUMINANCE, HSA_EXT_IMAGE_CHANNEL_ORDER_LUMINANCE },
|
||||
{ CL_Rx, HSA_EXT_IMAGE_CHANNEL_ORDER_RX },
|
||||
{ CL_RGx, HSA_EXT_IMAGE_CHANNEL_ORDER_RGX },
|
||||
{ CL_RGBx, HSA_EXT_IMAGE_CHANNEL_ORDER_RGBX },
|
||||
{ CL_DEPTH, HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH },
|
||||
{ CL_DEPTH_STENCIL, HSA_EXT_IMAGE_CHANNEL_ORDER_DEPTH_STENCIL },
|
||||
{ CL_sRGB, HSA_EXT_IMAGE_CHANNEL_ORDER_SRGB },
|
||||
{ CL_sRGBx, HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBX },
|
||||
{ CL_sRGBA, HSA_EXT_IMAGE_CHANNEL_ORDER_SRGBA },
|
||||
{ CL_sBGRA, HSA_EXT_IMAGE_CHANNEL_ORDER_SBGRA },
|
||||
{ CL_ABGR, HSA_EXT_IMAGE_CHANNEL_ORDER_ABGR },
|
||||
};
|
||||
|
||||
static const ChannelTypeMap kChannelTypeMapping[] = {
|
||||
{CL_SNORM_INT8, HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT8},
|
||||
{CL_SNORM_INT16, HSA_EXT_IMAGE_CHANNEL_TYPE_SNORM_INT16},
|
||||
{CL_UNORM_INT8, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT8},
|
||||
{CL_UNORM_INT16, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT16},
|
||||
{CL_UNORM_SHORT_565, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_565},
|
||||
{CL_UNORM_SHORT_555, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_555},
|
||||
{CL_UNORM_INT_101010, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_SHORT_101010},
|
||||
{CL_SIGNED_INT8, HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT8},
|
||||
{CL_SIGNED_INT16, HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT16},
|
||||
{CL_SIGNED_INT32, HSA_EXT_IMAGE_CHANNEL_TYPE_SIGNED_INT32},
|
||||
{CL_UNSIGNED_INT8, HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8},
|
||||
{CL_UNSIGNED_INT16, HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT16},
|
||||
{CL_UNSIGNED_INT32, HSA_EXT_IMAGE_CHANNEL_TYPE_UNSIGNED_INT32},
|
||||
{CL_HALF_FLOAT, HSA_EXT_IMAGE_CHANNEL_TYPE_HALF_FLOAT},
|
||||
{CL_FLOAT, HSA_EXT_IMAGE_CHANNEL_TYPE_FLOAT},
|
||||
{CL_UNORM_INT24, HSA_EXT_IMAGE_CHANNEL_TYPE_UNORM_INT24},
|
||||
};
|
||||
|
||||
|
||||
static hsa_access_permission_t
|
||||
GetHsaAccessPermission(const cl_mem_flags flags) {
|
||||
if(flags & CL_MEM_READ_ONLY)
|
||||
return HSA_ACCESS_PERMISSION_RO;
|
||||
else if(flags & CL_MEM_WRITE_ONLY)
|
||||
return HSA_ACCESS_PERMISSION_WO;
|
||||
else
|
||||
return HSA_ACCESS_PERMISSION_RW;
|
||||
}
|
||||
|
||||
Image::Image(const roc::Device& dev, amd::Memory& owner) :
|
||||
roc::Memory(dev, owner)
|
||||
{
|
||||
flags_ &= (~HostMemoryDirectAccess & ~HostMemoryRegistered);
|
||||
populateImageDescriptor();
|
||||
hsaImageObject_.handle = 0;
|
||||
hsaImageData_ = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
Image::populateImageDescriptor()
|
||||
{
|
||||
amd::Image* image = owner()->asImage();
|
||||
|
||||
// build HSA runtime image descriptor
|
||||
imageDescriptor_.width = image->getWidth();
|
||||
imageDescriptor_.height = image->getHeight();
|
||||
imageDescriptor_.depth = image->getDepth();
|
||||
imageDescriptor_.array_size = 0;
|
||||
|
||||
switch (image->getType())
|
||||
{
|
||||
case CL_MEM_OBJECT_IMAGE1D:
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1D;
|
||||
imageDescriptor_.height = 1;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DB;
|
||||
imageDescriptor_.height = 1;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
|
||||
//@todo - arraySize = height ?!
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_1DA;
|
||||
imageDescriptor_.height = 1;
|
||||
imageDescriptor_.array_size = image->getHeight();
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D:
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2D;
|
||||
imageDescriptor_.depth = 1;
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
|
||||
//@todo - arraySize = depth ?!
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_2DA;
|
||||
imageDescriptor_.depth = 1;
|
||||
imageDescriptor_.array_size = image->getDepth();
|
||||
break;
|
||||
case CL_MEM_OBJECT_IMAGE3D:
|
||||
imageDescriptor_.geometry = HSA_EXT_IMAGE_GEOMETRY_3D;
|
||||
break;
|
||||
}
|
||||
|
||||
const int kChannelOrderCount =
|
||||
sizeof(kChannelOrderMapping) / sizeof(ChannelOrderMap);
|
||||
for (int i = 0; i < kChannelOrderCount; i++) {
|
||||
if (image->getImageFormat().image_channel_order ==
|
||||
kChannelOrderMapping[i].cl_channel_order) {
|
||||
imageDescriptor_.format.channel_order =
|
||||
kChannelOrderMapping[i].hsa_channel_order;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int kChannelTypeCount =
|
||||
sizeof(kChannelTypeMapping) / sizeof(ChannelTypeMap);
|
||||
for (int i = 0; i < kChannelTypeCount; i++) {
|
||||
if (image->getImageFormat().image_channel_data_type ==
|
||||
kChannelTypeMapping[i].cl_channel_type) {
|
||||
imageDescriptor_.format.channel_type =
|
||||
kChannelTypeMapping[i].hsa_channel_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
permission_ =
|
||||
GetHsaAccessPermission(owner()->getMemFlags());
|
||||
}
|
||||
|
||||
bool
|
||||
Image::createInteropImage()
|
||||
{
|
||||
auto obj=owner()->getInteropObj()->asGLObject();
|
||||
assert(obj->getCLGLObjectType()!=CL_GL_OBJECT_BUFFER && "Non-image OpenGL object used with interop image API.");
|
||||
|
||||
const hsa_amd_image_descriptor_t* meta;
|
||||
size_t size=0;
|
||||
|
||||
GLenum glTarget = obj->getGLTarget();
|
||||
if (glTarget == GL_TEXTURE_CUBE_MAP) {
|
||||
glTarget = obj->getCubemapFace();
|
||||
}
|
||||
if(!createInteropBuffer(glTarget, obj->getGLMipLevel(), &size, &meta))
|
||||
{
|
||||
assert(false && "Failed to map image buffer.");
|
||||
return false;
|
||||
}
|
||||
MAKE_SCOPE_GUARD(BufferGuard, [&](){ destroyInteropBuffer(); });
|
||||
|
||||
amdImageDesc_=(hsa_amd_image_descriptor_t*)malloc(size);
|
||||
if(amdImageDesc_==NULL)
|
||||
return false;
|
||||
MAKE_SCOPE_GUARD(DescGuard, [&](){ free(amdImageDesc_); amdImageDesc_=NULL; });
|
||||
|
||||
memcpy(amdImageDesc_, meta, size);
|
||||
|
||||
image_metadata desc;
|
||||
if(!desc.create(amdImageDesc_))
|
||||
return false;
|
||||
|
||||
if(!desc.setMipLevel(obj->getGLMipLevel()))
|
||||
return false;
|
||||
|
||||
if (obj->getGLTarget()==GL_TEXTURE_CUBE_MAP)
|
||||
desc.setFace(obj->getCubemapFace());
|
||||
|
||||
hsaImageData_=deviceMemory_;
|
||||
|
||||
hsa_status_t err=hsa_amd_image_create(dev_.getBackendDevice(), &imageDescriptor_, amdImageDesc_, hsaImageData_, permission_, &hsaImageObject_);
|
||||
if(err!=HSA_STATUS_SUCCESS)
|
||||
return false;
|
||||
|
||||
BufferGuard.Dismiss();
|
||||
DescGuard.Dismiss();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Image::create()
|
||||
{
|
||||
if (owner()->parent()) {
|
||||
// Image view creation
|
||||
roc::Memory *parent =
|
||||
static_cast<roc::Memory *>(owner()->parent()->getDeviceMemory(dev_));
|
||||
|
||||
if (parent == NULL) {
|
||||
LogError("[OCL] Fail to allocate parent image");
|
||||
return false;
|
||||
}
|
||||
|
||||
return createView(*parent);
|
||||
}
|
||||
|
||||
//Interop image
|
||||
if(owner()->isInterop())
|
||||
return createInteropImage();
|
||||
|
||||
// Get memory size requirement for device specific image.
|
||||
hsa_status_t status = hsa_ext_image_data_get_info(
|
||||
dev_.getBackendDevice(), &imageDescriptor_,
|
||||
permission_, &deviceImageInfo_);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("[OCL] Fail to allocate image memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
// roc::Device::hostAlloc and deviceLocalAlloc implementation does not
|
||||
// support alignment larger than HSA memory region allocation granularity.
|
||||
// In this case, the user manages the alignment.
|
||||
const size_t alloc_size =
|
||||
(deviceImageInfo_.alignment <= dev_.alloc_granularity())
|
||||
? deviceImageInfo_.size
|
||||
: deviceImageInfo_.size + deviceImageInfo_.alignment;
|
||||
|
||||
if (!(owner()->getMemFlags() & CL_MEM_ALLOC_HOST_PTR)) {
|
||||
deviceMemory_ = dev_.deviceLocalAlloc(alloc_size);
|
||||
}
|
||||
|
||||
if (deviceMemory_ == NULL) {
|
||||
deviceMemory_ =
|
||||
dev_.hostAlloc(alloc_size, 1, false);
|
||||
}
|
||||
|
||||
hsaImageData_ = reinterpret_cast<const void *>(
|
||||
amd::alignUp(reinterpret_cast<uintptr_t>(deviceMemory_),
|
||||
deviceImageInfo_.alignment));
|
||||
|
||||
assert(amd::isMultipleOf(
|
||||
hsaImageData_, static_cast<size_t>(deviceImageInfo_.alignment)));
|
||||
|
||||
status = hsa_ext_image_create(
|
||||
dev_.getBackendDevice(), &imageDescriptor_, hsaImageData_,
|
||||
permission_, &hsaImageObject_);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("[OCL] Fail to allocate image memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
Image::createView(Memory &parent)
|
||||
{
|
||||
deviceMemory_ = parent.getDeviceMemory();
|
||||
|
||||
hsaImageData_ = (parent.owner()->asBuffer() != NULL)
|
||||
? deviceMemory_
|
||||
: static_cast<Image &>(parent).hsaImageData_;
|
||||
|
||||
kind_=parent.getKind();
|
||||
|
||||
hsa_status_t status;
|
||||
if(kind_==MEMORY_KIND_INTEROP)
|
||||
status = hsa_amd_image_create(dev_.getBackendDevice(), &imageDescriptor_, amdImageDesc_, hsaImageData_, permission_, &hsaImageObject_);
|
||||
else
|
||||
status= hsa_ext_image_create(dev_.getBackendDevice(), &imageDescriptor_, hsaImageData_, permission_, &hsaImageObject_);
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
LogError("[OCL] Fail to allocate image memory");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void*
|
||||
Image::allocMapTarget(
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch)
|
||||
{
|
||||
amd::ScopedLock lock(owner()->lockMemoryOps());
|
||||
|
||||
incIndMapCount();
|
||||
|
||||
void* pHostMem = owner()->getHostMem();
|
||||
|
||||
if (pHostMem == NULL) {
|
||||
if (indirectMapCount_ == 1) {
|
||||
if (!allocateMapMemory(owner()->getSize())) {
|
||||
decIndMapCount();
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Did the map resource allocation fail?
|
||||
if (mapMemory_ == NULL) {
|
||||
LogError("Could not map target resource");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
pHostMem = mapMemory_->getHostMem();
|
||||
}
|
||||
|
||||
amd::Image* image = owner()->asImage();
|
||||
|
||||
size_t elementSize = image->getImageFormat().getElementSize();
|
||||
|
||||
size_t offset = origin[0] * elementSize;
|
||||
|
||||
// Adjust offset with Y dimension
|
||||
offset += image->getRowPitch() * origin[1];
|
||||
|
||||
// Adjust offset with Z dimension
|
||||
offset += image->getSlicePitch() * origin[2];
|
||||
|
||||
*rowPitch = image->getRowPitch();
|
||||
if (slicePitch != NULL) {
|
||||
*slicePitch = image->getSlicePitch();
|
||||
}
|
||||
|
||||
return (static_cast<uint8_t*>(pHostMem)+offset);
|
||||
}
|
||||
|
||||
Image::~Image()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
void
|
||||
Image::destroy()
|
||||
{
|
||||
if (owner()->parent() != NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(kind_==MEMORY_KIND_INTEROP)
|
||||
{
|
||||
hsa_ext_image_destroy(dev_.getBackendDevice(), hsaImageObject_);
|
||||
free(amdImageDesc_);
|
||||
amdImageDesc_=NULL;
|
||||
destroyInteropBuffer();
|
||||
return;
|
||||
}
|
||||
|
||||
if (deviceMemory_ != NULL) {
|
||||
dev_.hostFree(deviceMemory_, deviceImageInfo_.size);
|
||||
}
|
||||
|
||||
if (hsaImageObject_.handle != 0) {
|
||||
hsa_status_t status =
|
||||
hsa_ext_image_destroy(dev_.getBackendDevice(), hsaImageObject_);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
@@ -1,188 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
#include "utils/debug.hpp"
|
||||
#include "device/rocm/rocdevice.hpp"
|
||||
#include "device/rocm/rocglinterop.hpp"
|
||||
|
||||
namespace roc {
|
||||
class Memory : public device::Memory {
|
||||
public:
|
||||
enum MEMORY_KIND { MEMORY_KIND_NORMAL=0, MEMORY_KIND_LOCK, MEMORY_KIND_GART, MEMORY_KIND_INTEROP };
|
||||
|
||||
Memory(const roc::Device &dev, amd::Memory &owner);
|
||||
|
||||
virtual ~Memory();
|
||||
|
||||
// Getter for deviceMemory_.
|
||||
void *getDeviceMemory() const { return deviceMemory_; }
|
||||
|
||||
// Gets a pointer to a region of host-visible memory for use as the target
|
||||
// of an indirect map for a given memory object
|
||||
virtual void *allocMapTarget(const amd::Coord3D &origin,
|
||||
const amd::Coord3D ®ion,
|
||||
uint mapFlags,
|
||||
size_t *rowPitch,
|
||||
size_t *slicePitch);
|
||||
|
||||
// Create device memory according to OpenCL memory flag.
|
||||
virtual bool create() = 0;
|
||||
|
||||
// Pins system memory associated with this memory object.
|
||||
virtual bool pinSystemMemory(void *hostPtr, // System memory address
|
||||
size_t size // Size of allocated system memory
|
||||
) {
|
||||
Unimplemented();
|
||||
return true;
|
||||
}
|
||||
|
||||
// Immediate blocking write from device cache to owners's backing store.
|
||||
// Marks owner as "current" by resetting the last writer to NULL.
|
||||
virtual void syncHostFromCache(SyncFlags syncFlags = SyncFlags())
|
||||
{
|
||||
// Need to revisit this when multi-devices is supported.
|
||||
}
|
||||
|
||||
// Releases indirect map surface
|
||||
void releaseIndirectMap() { decIndMapCount(); }
|
||||
|
||||
//! Map the device memory to CPU visible
|
||||
virtual void* cpuMap(
|
||||
device::VirtualDevice& vDev, //!< Virtual device for map operaiton
|
||||
uint flags = 0, //!< flags for the map operation
|
||||
// Optimization for multilayer map/unmap
|
||||
uint startLayer = 0, //!< Start layer for multilayer map
|
||||
uint numLayers = 0, //!< End layer for multilayer map
|
||||
size_t* rowPitch = NULL,//!< Row pitch for the device memory
|
||||
size_t* slicePitch = NULL //!< Slice pitch for the device memory
|
||||
);
|
||||
|
||||
//! Unmap the device memory
|
||||
virtual void cpuUnmap(
|
||||
device::VirtualDevice& vDev //!< Virtual device for unmap operaiton
|
||||
);
|
||||
|
||||
//Mesa has already decomressed if needed and also does acquire at the start of every command batch.
|
||||
virtual bool processGLResource(GLResourceOP operation) { return true; }
|
||||
|
||||
// Accessors for indirect map memory object
|
||||
amd::Memory *mapMemory() const { return mapMemory_; }
|
||||
|
||||
MEMORY_KIND getKind() const { return kind_; }
|
||||
|
||||
protected:
|
||||
|
||||
bool allocateMapMemory(size_t allocationSize);
|
||||
|
||||
// Decrement map count
|
||||
virtual void decIndMapCount();
|
||||
|
||||
// Free / deregister device memory.
|
||||
virtual void destroy() = 0;
|
||||
|
||||
// Place interop object into HSA's flat address space
|
||||
bool createInteropBuffer(GLenum targetType, int miplevel, size_t* metadata_size, const hsa_amd_image_descriptor_t** metadata);
|
||||
|
||||
void destroyInteropBuffer();
|
||||
|
||||
// Pointer to the device associated with this memory object.
|
||||
const roc::Device &dev_;
|
||||
|
||||
// Pointer to the device memory. This could be in system or device local mem.
|
||||
void* deviceMemory_;
|
||||
|
||||
// Track if this memory is interop, lock, gart, or normal.
|
||||
MEMORY_KIND kind_;
|
||||
|
||||
private:
|
||||
// Disable copy constructor
|
||||
Memory(const Memory &);
|
||||
|
||||
// Disable operator=
|
||||
Memory &operator=(const Memory &);
|
||||
|
||||
};
|
||||
|
||||
class Buffer : public roc::Memory {
|
||||
public:
|
||||
Buffer(const roc::Device &dev, amd::Memory &owner);
|
||||
|
||||
virtual ~Buffer();
|
||||
|
||||
// Create device memory according to OpenCL memory flag.
|
||||
virtual bool create();
|
||||
|
||||
// Recreate the device memory using new size and alignment.
|
||||
bool recreate(size_t newSize, size_t newAlignment, bool forceSystem);
|
||||
|
||||
private:
|
||||
// Disable copy constructor
|
||||
Buffer(const Buffer &);
|
||||
|
||||
// Disable operator=
|
||||
Buffer &operator=(const Buffer &);
|
||||
|
||||
// Free device memory.
|
||||
void destroy();
|
||||
};
|
||||
|
||||
class Image : public roc::Memory
|
||||
{
|
||||
public:
|
||||
Image(const roc::Device& dev, amd::Memory& owner);
|
||||
|
||||
virtual ~Image();
|
||||
|
||||
//! Create device memory according to OpenCL memory flag.
|
||||
virtual bool create();
|
||||
|
||||
//! Create an image view
|
||||
bool createView(Memory &parent);
|
||||
|
||||
//! Gets a pointer to a region of host-visible memory for use as the target
|
||||
//! of an indirect map for a given memory object
|
||||
virtual void* allocMapTarget(
|
||||
const amd::Coord3D& origin,
|
||||
const amd::Coord3D& region,
|
||||
uint mapFlags,
|
||||
size_t* rowPitch,
|
||||
size_t* slicePitch);
|
||||
|
||||
size_t getDeviceDataSize() { return deviceImageInfo_.size; }
|
||||
size_t getDeviceDataAlignment() { return deviceImageInfo_.alignment; }
|
||||
|
||||
hsa_ext_image_t getHsaImageObject() { return hsaImageObject_; }
|
||||
const hsa_ext_image_descriptor_t& getHsaImageDescriptor() const { return imageDescriptor_; }
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Image(const Buffer&);
|
||||
|
||||
//! Disable operator=
|
||||
Image& operator=(const Buffer&);
|
||||
|
||||
// Setup an interop image
|
||||
bool createInteropImage();
|
||||
|
||||
// Free / deregister device memory.
|
||||
void destroy();
|
||||
|
||||
void populateImageDescriptor();
|
||||
|
||||
hsa_ext_image_descriptor_t imageDescriptor_;
|
||||
hsa_access_permission_t permission_;
|
||||
hsa_ext_image_data_info_t deviceImageInfo_;
|
||||
hsa_ext_image_t hsaImageObject_;
|
||||
hsa_amd_image_descriptor_t* amdImageDesc_;
|
||||
|
||||
const void* hsaImageData_;
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,467 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#include "top.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/rocm/rocdefs.hpp"
|
||||
#include "device/rocm/rocmemory.hpp"
|
||||
#include "device/rocm/rockernel.hpp"
|
||||
#include "device/rocm/rocprogram.hpp"
|
||||
#include "device/rocm/rocdevice.hpp"
|
||||
#include "device/rocm/rocprintf.hpp"
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <math.h>
|
||||
|
||||
namespace roc {
|
||||
|
||||
PrintfDbg::PrintfDbg(Device& device, FILE* file)
|
||||
: dbgBuffer_(NULL),
|
||||
dbgBuffer_size_(0),
|
||||
dbgFile_(file),
|
||||
gpuDevice_(device) {}
|
||||
|
||||
PrintfDbg::~PrintfDbg() { dev().hostFree(dbgBuffer_, dbgBuffer_size_); }
|
||||
|
||||
bool PrintfDbg::allocate(bool realloc) {
|
||||
if (NULL == dbgBuffer_) {
|
||||
dbgBuffer_size_ = dev().info().printfBufferSize_;
|
||||
dbgBuffer_ = reinterpret_cast<address>(
|
||||
dev().hostAlloc(dbgBuffer_size_, sizeof(void*)));
|
||||
} else if (realloc) {
|
||||
LogWarning("Debug buffer reallocation!");
|
||||
// Double the buffer size if it's not big enough
|
||||
dev().hostFree(dbgBuffer_, dbgBuffer_size_);
|
||||
dbgBuffer_size_ = dbgBuffer_size_ << 1;
|
||||
dbgBuffer_ = reinterpret_cast<address>(dbgBuffer_size_, sizeof(void*));
|
||||
}
|
||||
|
||||
return (NULL != dbgBuffer_) ? true : false;
|
||||
}
|
||||
|
||||
bool PrintfDbg::checkFloat(const std::string& fmt) const {
|
||||
switch (fmt[fmt.size() - 1]) {
|
||||
case 'e':
|
||||
case 'E':
|
||||
case 'f':
|
||||
case 'g':
|
||||
case 'G':
|
||||
case 'a':
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PrintfDbg::checkString(const std::string& fmt) const {
|
||||
if (fmt[fmt.size() - 1] == 's') return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
int PrintfDbg::checkVectorSpecifier(const std::string& fmt, size_t startPos,
|
||||
size_t& curPos) const {
|
||||
int vectorSize = 0;
|
||||
size_t pos = curPos;
|
||||
size_t size = curPos - startPos;
|
||||
|
||||
if (size >= 3) {
|
||||
size = 0;
|
||||
// no modifiers
|
||||
if (fmt[curPos - 3] == 'v') {
|
||||
size = 2;
|
||||
}
|
||||
// the modifiers are "h" or "l"
|
||||
else if (fmt[curPos - 4] == 'v') {
|
||||
size = 3;
|
||||
}
|
||||
// the modifier is "hh"
|
||||
else if ((curPos >= 5) && (fmt[curPos - 5] == 'v')) {
|
||||
size = 4;
|
||||
}
|
||||
if (size > 0) {
|
||||
curPos = size;
|
||||
pos -= curPos;
|
||||
|
||||
// Get vector size
|
||||
vectorSize = fmt[pos++] - '0';
|
||||
// PrintfDbg supports only 2, 3, 4, 8 and 16 wide vectors
|
||||
switch (vectorSize) {
|
||||
case 1:
|
||||
if ((fmt[pos++] - '0') == 6) {
|
||||
vectorSize = 16;
|
||||
} else {
|
||||
vectorSize = 0;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
case 4:
|
||||
case 8:
|
||||
break;
|
||||
default:
|
||||
vectorSize = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vectorSize;
|
||||
}
|
||||
|
||||
static const size_t ConstStr = 0xffffffff;
|
||||
static const char Separator[] = ",\0";
|
||||
|
||||
size_t PrintfDbg::outputArgument(const std::string& fmt, bool printFloat,
|
||||
size_t size, const uint32_t* argument) const {
|
||||
// Serialize the output to the screen
|
||||
// amd::ScopedLock k(dev().lockAsyncOps());
|
||||
|
||||
size_t copiedBytes = size;
|
||||
// Print the string argument, using standard PrintfDbg()
|
||||
if (checkString(fmt.c_str())) {
|
||||
// copiedBytes should be as number of printed chars
|
||||
copiedBytes = 0;
|
||||
//(null) should be printed
|
||||
if (*argument == 0) {
|
||||
amd::Os::printf(fmt.data(), 0);
|
||||
// copiedBytes = strlen("(null)")
|
||||
copiedBytes = 6;
|
||||
} else {
|
||||
const unsigned char* argumentStr =
|
||||
reinterpret_cast<const unsigned char*>(argument);
|
||||
amd::Os::printf(fmt.data(), argumentStr);
|
||||
// copiedBytes = strlen(argumentStr)
|
||||
while (argumentStr[copiedBytes++] != 0)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
// Print the argument(except for string ), using standard PrintfDbg()
|
||||
else {
|
||||
bool hlModifier = (strstr(fmt.c_str(), "hl") != NULL);
|
||||
std::string hlFmt;
|
||||
if (hlModifier) {
|
||||
hlFmt = fmt;
|
||||
hlFmt.erase(hlFmt.find_first_of("hl"), 2);
|
||||
}
|
||||
switch (size) {
|
||||
case 0: {
|
||||
const char* str = reinterpret_cast<const char*>(argument);
|
||||
amd::Os::printf(fmt.data(), str);
|
||||
// Find the string length
|
||||
while (str[copiedBytes++] != 0)
|
||||
;
|
||||
} break;
|
||||
case 1:
|
||||
amd::Os::printf(fmt.data(),
|
||||
*(reinterpret_cast<const unsigned char*>(argument)));
|
||||
break;
|
||||
case 2:
|
||||
case 4:
|
||||
if (printFloat) {
|
||||
static const char* fSpecifiers = "eEfgGa";
|
||||
std::string fmtF = fmt;
|
||||
size_t posS = fmtF.find_first_of("%");
|
||||
size_t posE = fmtF.find_first_of(fSpecifiers);
|
||||
if (posS != std::string::npos && posE != std::string::npos) {
|
||||
fmtF.replace(posS + 1, posE - posS, "s");
|
||||
}
|
||||
float fArg = *(reinterpret_cast<const float*>(argument));
|
||||
float fSign = copysign(1.0, fArg);
|
||||
if (isinf(fArg) && !isnan(fArg)) {
|
||||
if (fSign < 0) {
|
||||
amd::Os::printf(fmtF.data(), "-infinity");
|
||||
} else {
|
||||
amd::Os::printf(fmtF.data(), "infinity");
|
||||
}
|
||||
} else if (isnan(fArg)) {
|
||||
if (fSign < 0) {
|
||||
amd::Os::printf(fmtF.data(), "-nan");
|
||||
} else {
|
||||
amd::Os::printf(fmtF.data(), "nan");
|
||||
}
|
||||
} else if (hlModifier) {
|
||||
amd::Os::printf(hlFmt.data(), fArg);
|
||||
} else {
|
||||
amd::Os::printf(fmt.data(), fArg);
|
||||
}
|
||||
} else {
|
||||
bool hhModifier = (strstr(fmt.c_str(), "hh") != NULL);
|
||||
if (hhModifier) {
|
||||
// current implementation of printf in gcc 4.5.2 runtime libraries,
|
||||
// doesn`t recognize "hh" modifier ==>
|
||||
// argument should be explicitly converted to unsigned char (uchar)
|
||||
// before printing and
|
||||
// fmt should be updated not to contain "hh" modifier
|
||||
std::string hhFmt = fmt;
|
||||
hhFmt.erase(hhFmt.find_first_of("h"), 2);
|
||||
amd::Os::printf(
|
||||
hhFmt.data(),
|
||||
*(reinterpret_cast<const unsigned char*>(argument)));
|
||||
} else if (hlModifier) {
|
||||
amd::Os::printf(hlFmt.data(), *argument);
|
||||
} else {
|
||||
amd::Os::printf(fmt.data(), *argument);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
if (printFloat) {
|
||||
if (hlModifier) {
|
||||
amd::Os::printf(hlFmt.data(),
|
||||
*(reinterpret_cast<const double*>(argument)));
|
||||
} else {
|
||||
amd::Os::printf(fmt.data(),
|
||||
*(reinterpret_cast<const double*>(argument)));
|
||||
}
|
||||
} else {
|
||||
std::string out = fmt;
|
||||
// Use 'll' for 64 bit printf
|
||||
out.insert((out.size() - 1), 1, 'l');
|
||||
amd::Os::printf(out.data(),
|
||||
*(reinterpret_cast<const uint64_t*>(argument)));
|
||||
}
|
||||
break;
|
||||
case ConstStr: {
|
||||
const char* str = reinterpret_cast<const char*>(argument);
|
||||
amd::Os::printf(fmt.data(), str);
|
||||
} break;
|
||||
default:
|
||||
amd::Os::printf("Error: Unsupported data size for PrintfDbg. %d bytes",
|
||||
static_cast<int>(size));
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
fflush(stdout);
|
||||
return copiedBytes;
|
||||
}
|
||||
|
||||
void PrintfDbg::outputDbgBuffer(const PrintfInfo& info,
|
||||
const uint32_t* workitemData, size_t& i) const {
|
||||
static const char* specifiers = "cdieEfgGaosuxXp";
|
||||
static const char* modifiers = "hl";
|
||||
static const char* special = "%n";
|
||||
static const std::string sepStr = "%s";
|
||||
const uint32_t* s = workitemData;
|
||||
size_t pos = 0;
|
||||
|
||||
// Find the format string
|
||||
std::string str = info.fmtString_;
|
||||
std::string fmt;
|
||||
size_t posStart, posEnd;
|
||||
|
||||
// Print all arguments
|
||||
// Note: the following code walks through all arguments, provided by the
|
||||
// kernel and
|
||||
// finds the corresponding specifier in the format string.
|
||||
// Then it splits the original string into substrings with a single specifier
|
||||
// and
|
||||
// uses standard PrintfDbg() to print each argument
|
||||
for (uint j = 0; j < info.arguments_.size(); ++j) {
|
||||
do {
|
||||
posStart = str.find_first_of("%", pos);
|
||||
if (posStart != std::string::npos) {
|
||||
posStart++;
|
||||
// Erase all spaces after %
|
||||
while (str[posStart] == ' ') {
|
||||
str.erase(posStart, 1);
|
||||
}
|
||||
size_t tmp = str.find_first_of(special, posStart);
|
||||
size_t tmp2 = str.find_first_of(specifiers, posStart);
|
||||
// Special cases. Special symbol is located before any specifier
|
||||
if (tmp < tmp2) {
|
||||
posEnd = posStart + 1;
|
||||
fmt = str.substr(pos, posEnd - pos);
|
||||
fmt.erase(posStart - pos - 1, 1);
|
||||
pos = posStart = posEnd;
|
||||
outputArgument(sepStr, false, ConstStr,
|
||||
reinterpret_cast<const uint32_t*>(fmt.data()));
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} else if (pos < str.length()) {
|
||||
outputArgument(
|
||||
sepStr, false, ConstStr,
|
||||
reinterpret_cast<const uint32_t*>((str.substr(pos)).data()));
|
||||
}
|
||||
} while (posStart != std::string::npos);
|
||||
|
||||
if (posStart != std::string::npos) {
|
||||
bool printFloat = false;
|
||||
int vectorSize = 0;
|
||||
size_t length;
|
||||
size_t idPos = 0;
|
||||
|
||||
// Search for PrintfDbg specifier in the format string.
|
||||
// It will be a split point for the output
|
||||
posEnd = str.find_first_of(specifiers, posStart);
|
||||
if (posEnd == std::string::npos) {
|
||||
pos = posStart = posEnd;
|
||||
break;
|
||||
}
|
||||
posEnd++;
|
||||
|
||||
size_t curPos = posEnd;
|
||||
vectorSize = checkVectorSpecifier(str, posStart, curPos);
|
||||
|
||||
// Get substring from the last position to the current specifier
|
||||
fmt = str.substr(pos, posEnd - pos);
|
||||
|
||||
// Readjust the string pointer if PrintfDbg outputs a vector
|
||||
if (vectorSize != 0) {
|
||||
size_t posVecSpec = fmt.length() - (curPos + 1);
|
||||
size_t posVecMod = fmt.find_first_of(modifiers, posVecSpec + 1);
|
||||
size_t posMod = str.find_first_of(modifiers, posStart);
|
||||
if (posMod < posEnd) {
|
||||
fmt = fmt.erase(posVecSpec, posVecMod - posVecSpec);
|
||||
} else {
|
||||
fmt = fmt.erase(posVecSpec, curPos);
|
||||
}
|
||||
idPos = posStart - pos - 1;
|
||||
}
|
||||
pos = posStart = posEnd;
|
||||
|
||||
// Find out if the argument is a float
|
||||
printFloat = checkFloat(fmt);
|
||||
|
||||
// Is it a scalar value?
|
||||
if (vectorSize == 0) {
|
||||
length = outputArgument(fmt, printFloat, info.arguments_[j], &s[i]);
|
||||
if (0 == length) {
|
||||
return;
|
||||
}
|
||||
i += amd::alignUp(length, sizeof(uint32_t)) / sizeof(uint32_t);
|
||||
} else {
|
||||
// 3-component vector's size is defined as 4 * size of each scalar
|
||||
// component
|
||||
size_t elemSize =
|
||||
info.arguments_[j] / (vectorSize == 3 ? 4 : vectorSize);
|
||||
size_t k = i * sizeof(uint32_t);
|
||||
std::string elementStr = fmt.substr(idPos, fmt.size());
|
||||
|
||||
// Print first element with full string
|
||||
if (0 == outputArgument(fmt, printFloat, elemSize, &s[i])) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Print other elemnts with separator if available
|
||||
for (int e = 1; e < vectorSize; ++e) {
|
||||
const char* t = reinterpret_cast<const char*>(s);
|
||||
// Output the vector separator
|
||||
outputArgument(sepStr, false, ConstStr,
|
||||
reinterpret_cast<const uint32_t*>(Separator));
|
||||
|
||||
// Output the next element
|
||||
outputArgument(
|
||||
elementStr, printFloat, elemSize,
|
||||
reinterpret_cast<const uint32_t*>(&t[k + e * elemSize]));
|
||||
}
|
||||
i += (amd::alignUp(info.arguments_[j], sizeof(uint32_t))) /
|
||||
sizeof(uint32_t);
|
||||
}
|
||||
} else {
|
||||
amd::Os::printf(
|
||||
"Error: The arguments don't match the printf format string. "
|
||||
"printf(%s)",
|
||||
info.fmtString_.data());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (pos != std::string::npos) {
|
||||
fmt = str.substr(pos, str.size() - pos);
|
||||
outputArgument(sepStr, false, ConstStr,
|
||||
reinterpret_cast<const uint32_t*>(fmt.data()));
|
||||
}
|
||||
}
|
||||
|
||||
bool PrintfDbg::init(bool printfEnabled) {
|
||||
// Set up debug output buffer (if printf active)
|
||||
if (printfEnabled) {
|
||||
if (!allocate()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The first two DWORDs in the printf buffer are as follows:
|
||||
// First DWORD = Offset to where next information is to
|
||||
// be written, initialized to 0
|
||||
// Second DWORD = Number of bytes available for printf data
|
||||
// = buffer size \96 2*sizeof(uint32_t)
|
||||
const uint8_t initSize = 2 * sizeof(uint32_t);
|
||||
uint8_t sysMem[initSize];
|
||||
memset(sysMem, 0, initSize);
|
||||
uint32_t dbgBufferSize = dbgBuffer_size_ - initSize;
|
||||
memcpy(&sysMem[4], &dbgBufferSize, sizeof(dbgBufferSize));
|
||||
|
||||
// Copy offset and number of bytes available for printf data
|
||||
// into the corresponding location in the debug buffer
|
||||
hsa_status_t err =
|
||||
hsa_memory_copy(dbgBuffer_, sysMem, 2 * sizeof(uint32_t));
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
LogError("\n Can't copy offset and bytes available data to dgbBuffer_!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PrintfDbg::output(VirtualGPU& gpu, bool printfEnabled,
|
||||
const std::vector<PrintfInfo>& printfInfo) {
|
||||
if (printfEnabled) {
|
||||
uint32_t offsetSize = 0;
|
||||
|
||||
// Wait until outstanding kernels finish
|
||||
gpu.releaseGpuMemoryFence();
|
||||
|
||||
// Get memory pointer to the staged buffer
|
||||
uint32_t* dbgBufferPtr = reinterpret_cast<uint32_t*>(dbgBuffer_);
|
||||
if (NULL == dbgBufferPtr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
offsetSize = *dbgBufferPtr;
|
||||
|
||||
if (offsetSize == 0) {
|
||||
LogError("\n The printf buffer is empty!");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get a pointer to the buffer data
|
||||
dbgBufferPtr =
|
||||
reinterpret_cast<uint32_t*>(dbgBuffer_ + 2 * sizeof(uint32_t));
|
||||
if (NULL == dbgBufferPtr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint>::const_iterator ita;
|
||||
uint sb = 0;
|
||||
uint sbt = 0;
|
||||
size_t idx = 1;
|
||||
|
||||
// parse the debug buffer
|
||||
while (sbt < offsetSize) {
|
||||
assert(((*dbgBufferPtr) < printfInfo.size()) &&
|
||||
"Cound't find the reported PrintfID!");
|
||||
const PrintfInfo& info = printfInfo[(*dbgBufferPtr)];
|
||||
sb += sizeof(uint32_t);
|
||||
for (ita = info.arguments_.begin(); ita != info.arguments_.end(); ++ita) {
|
||||
sb += *ita;
|
||||
}
|
||||
|
||||
// There's something in the debug buffer
|
||||
outputDbgBuffer(info, dbgBufferPtr, idx);
|
||||
|
||||
sbt += sb;
|
||||
dbgBufferPtr += sb / sizeof(uint32_t);
|
||||
sb = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
@@ -1,115 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
/*! \addtogroup GPU GPU Device Implementation
|
||||
* @{
|
||||
*/
|
||||
#ifndef isinf
|
||||
#ifdef _MSC_VER
|
||||
#define isinf(X) (!_finite(X) && !_isnan(X))
|
||||
#endif //_MSC_VER
|
||||
#endif // isinf
|
||||
|
||||
#ifndef isnan
|
||||
#ifdef _MSC_VER
|
||||
#define isnan(X) (_isnan(X))
|
||||
#endif //_MSC_VER
|
||||
#endif // isnan
|
||||
|
||||
#ifndef copysign
|
||||
#ifdef _MSC_VER
|
||||
#define copysign(X, Y) (_copysign(X, Y))
|
||||
#endif //_MSC_VER
|
||||
#endif // copysign
|
||||
|
||||
//! GPU Device Implementation
|
||||
namespace roc {
|
||||
|
||||
//! Printf info structure
|
||||
struct PrintfInfo {
|
||||
std::string fmtString_; //!< formated string for printf
|
||||
std::vector<uint> arguments_; //!< passed arguments to the printf() call
|
||||
};
|
||||
|
||||
class Kernel;
|
||||
class VirtualGPU;
|
||||
class Device;
|
||||
|
||||
class PrintfDbg : public amd::HeapObject {
|
||||
public:
|
||||
//! Debug buffer size per workitem
|
||||
static const uint WorkitemDebugSize = 4096;
|
||||
|
||||
//! constructor
|
||||
PrintfDbg(Device& device, FILE* file = NULL);
|
||||
|
||||
//! Destructor
|
||||
~PrintfDbg();
|
||||
|
||||
//! Initializes the debug buffer before kernel's execution
|
||||
bool init(bool printfEnabled //!< checks for printf
|
||||
);
|
||||
|
||||
//! Prints the kernel's debug informaiton from the buffer
|
||||
bool output(VirtualGPU& gpu,
|
||||
bool printfEnabled, //!< checks for printf
|
||||
const std::vector<PrintfInfo>& printfInfo //!< printf info
|
||||
);
|
||||
|
||||
//! Returns debug buffer object
|
||||
address dbgBuffer() const { return dbgBuffer_; }
|
||||
|
||||
protected:
|
||||
address dbgBuffer_; //!< Buffer to hold debug output
|
||||
size_t dbgBuffer_size_; //!< Size of the debugger buffer
|
||||
FILE* dbgFile_; //!< Debug file
|
||||
Device& gpuDevice_; //!< GPU device object
|
||||
|
||||
//! Gets GPU device object
|
||||
Device& dev() const { return gpuDevice_; }
|
||||
|
||||
//! Allocates the debug buffer
|
||||
bool allocate(
|
||||
bool realloc = false //!< If TRUE then reallocate the debug memory
|
||||
);
|
||||
|
||||
//! Returns TRUE if a float value has to be printed
|
||||
bool checkFloat(const std::string& fmt //!< Format string
|
||||
) const;
|
||||
|
||||
//! Returns TRUE if a string value has to be printed
|
||||
bool checkString(const std::string& fmt //!< Format string
|
||||
) const;
|
||||
|
||||
//! Finds the specifier in the format string
|
||||
int checkVectorSpecifier(const std::string& fmt, //!< Format string
|
||||
size_t startPos, //!< Start position for processing
|
||||
size_t& curPos //!< End position for processing
|
||||
) const;
|
||||
|
||||
//! Outputs an argument
|
||||
size_t outputArgument(const std::string& fmt, //!< Format strint
|
||||
bool printFloat, //!< Argument is a float value
|
||||
size_t size, //!< Argument's size
|
||||
const uint32_t* argument //!< Argument's location
|
||||
) const;
|
||||
|
||||
//! Displays the PrintfDbg
|
||||
void outputDbgBuffer(
|
||||
const PrintfInfo& info, //!< printf info
|
||||
const uint32_t* workitemData, //!< The PrintfDbg dump buffer
|
||||
size_t& i //!< index to the data in the buffer
|
||||
) const;
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
PrintfDbg(const PrintfDbg&);
|
||||
|
||||
//! Disable assignment
|
||||
PrintfDbg& operator=(const PrintfDbg&);
|
||||
};
|
||||
|
||||
/*@}*/} // namespace roc
|
||||
|
||||
@@ -1,845 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "rocprogram.hpp"
|
||||
|
||||
#include "compiler/lib/loaders/elf/elf.hpp"
|
||||
#include "compiler/lib/utils/options.hpp"
|
||||
#include "rockernel.hpp"
|
||||
#include "roccompilerlib.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <istream>
|
||||
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
namespace roc {
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
/* Temporary log function for the compiler library */
|
||||
static void logFunction(const char *msg, size_t size) {
|
||||
std::cout << "Compiler Library log :" << msg << std::endl;
|
||||
}
|
||||
|
||||
HSAILProgram::~HSAILProgram() {
|
||||
acl_error error;
|
||||
// Free the elf binary
|
||||
if (binaryElf_ != NULL) {
|
||||
error = g_complibApi._aclBinaryFini(binaryElf_);
|
||||
if (error != ACL_SUCCESS) {
|
||||
LogWarning( "Error while destroying the acl binary \n" );
|
||||
}
|
||||
}
|
||||
// Destroy the executable.
|
||||
if (hsaExecutable_.handle != 0) {
|
||||
hsa_executable_destroy(hsaExecutable_);
|
||||
}
|
||||
// Destroy the code object.
|
||||
if (hsaProgramCodeObject_.handle != 0) {
|
||||
hsa_code_object_destroy(hsaProgramCodeObject_);
|
||||
}
|
||||
// Destroy the program handle.
|
||||
if (hsaProgramHandle_.handle != 0) {
|
||||
hsa_ext_program_destroy(hsaProgramHandle_);
|
||||
}
|
||||
destroyBrigModule();
|
||||
destroyBrigContainer();
|
||||
releaseClBinary();
|
||||
}
|
||||
|
||||
HSAILProgram::HSAILProgram(roc::NullDevice& device): device::Program(device),
|
||||
llvmBinary_(),
|
||||
binaryElf_(NULL),
|
||||
device_(device),
|
||||
brigModule_(NULL),
|
||||
hsaBrigContainer_(NULL)
|
||||
{
|
||||
memset(&binOpts_, 0, sizeof(binOpts_));
|
||||
binOpts_.struct_size = sizeof(binOpts_);
|
||||
//binOpts_.elfclass = LP64_SWITCH( ELFCLASS32, ELFCLASS64 );
|
||||
//Setting as 32 bit because hsail64 returns an invalid aclTargetInfo
|
||||
//when aclGetTargetInfo is called - EPR# 377910
|
||||
binOpts_.elfclass = ELFCLASS32;
|
||||
binOpts_.bitness = ELFDATA2LSB;
|
||||
binOpts_.alloc = &::malloc;
|
||||
binOpts_.dealloc = &::free;
|
||||
hsaProgramHandle_.handle = 0;
|
||||
hsaProgramCodeObject_.handle = 0;
|
||||
hsaExecutable_.handle = 0;
|
||||
}
|
||||
|
||||
bool HSAILProgram::initClBinary(char *binaryIn, size_t size) { // Save the
|
||||
// original
|
||||
// binary that
|
||||
// isn't owned
|
||||
// by ClBinary
|
||||
clBinary()->saveOrigBinary(binaryIn, size);
|
||||
|
||||
char *bin = binaryIn;
|
||||
size_t sz = size;
|
||||
|
||||
int encryptCode;
|
||||
|
||||
char *decryptedBin;
|
||||
size_t decryptedSize;
|
||||
if (!clBinary()->decryptElf(binaryIn, size,
|
||||
&decryptedBin, &decryptedSize, &encryptCode)) {
|
||||
return false;
|
||||
}
|
||||
if (decryptedBin != NULL) {
|
||||
// It is decrypted binary.
|
||||
bin = decryptedBin;
|
||||
sz = decryptedSize;
|
||||
}
|
||||
|
||||
// Both 32-bit and 64-bit are allowed!
|
||||
if (!amd::isElfMagic(bin)) {
|
||||
// Invalid binary.
|
||||
if (decryptedBin != NULL) {
|
||||
delete[]decryptedBin;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
clBinary()->setFlags(encryptCode);
|
||||
|
||||
return clBinary()->setBinary(bin, sz, (decryptedBin != NULL));
|
||||
}
|
||||
|
||||
|
||||
bool HSAILProgram::initBuild(amd::option::Options *options) {
|
||||
compileOptions_ = options->origOptionStr;
|
||||
|
||||
if (!device::Program::initBuild(options)) {
|
||||
return false;
|
||||
}
|
||||
// Elf Binary setup
|
||||
std::string outFileName;
|
||||
|
||||
// true means hsail required
|
||||
clBinary()->init(options, true);
|
||||
if (options->isDumpFlagSet(amd::option::DUMP_BIF)) {
|
||||
outFileName = options->getDumpFileName(".bin");
|
||||
}
|
||||
|
||||
bool useELF64 = getCompilerOptions()->oVariables->EnableGpuElf64;
|
||||
if (!clBinary()->setElfOut(useELF64 ? ELFCLASS64 : ELFCLASS32,
|
||||
(outFileName.size() >
|
||||
0) ? outFileName.c_str() : NULL)) {
|
||||
LogError("Setup elf out for gpu failed");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ! post-compile setup for GPU
|
||||
bool HSAILProgram::finiBuild(bool isBuildGood) {
|
||||
clBinary()->resetElfOut();
|
||||
clBinary()->resetElfIn();
|
||||
|
||||
if (!isBuildGood) {
|
||||
// Prevent the encrypted binary form leaking out
|
||||
clBinary()->setBinary(NULL, 0);
|
||||
|
||||
}
|
||||
|
||||
return device::Program::finiBuild(isBuildGood);
|
||||
}
|
||||
|
||||
static char *readFile(std::string source_filename, size_t &size) {
|
||||
FILE *fp = ::fopen(source_filename.c_str(), "rb");
|
||||
unsigned int length;
|
||||
size_t offset = 0;
|
||||
char *ptr;
|
||||
|
||||
if (!fp) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// obtain file size.
|
||||
::fseek(fp, 0, SEEK_END);
|
||||
length = ::ftell(fp);
|
||||
::rewind(fp);
|
||||
|
||||
ptr = reinterpret_cast<char *>(malloc(offset + length + 1));
|
||||
if (length != fread(&ptr[offset], 1, length, fp)) {
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr[offset + length] = '\0';
|
||||
size = offset + length;
|
||||
::fclose(fp);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
aclType HSAILProgram::getCompilationStagesFromBinary(std::vector<aclType>& completeStages, bool& needOptionsCheck)
|
||||
{
|
||||
acl_error errorCode;
|
||||
size_t secSize = 0;
|
||||
completeStages.clear();
|
||||
aclType from = ACL_TYPE_DEFAULT;
|
||||
needOptionsCheck = true;
|
||||
size_t boolSize = sizeof(bool);
|
||||
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
||||
// Checking llvmir in .llvmir section
|
||||
bool containsLlvmirText = true;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_LLVMIR, NULL, &containsLlvmirText, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
containsLlvmirText = false;
|
||||
}
|
||||
// Checking compile & link options in .comment section
|
||||
bool containsOpts = true;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_OPTIONS, NULL, &containsOpts, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
containsOpts = false;
|
||||
}
|
||||
if (containsLlvmirText && containsOpts) {
|
||||
completeStages.push_back(from);
|
||||
from = ACL_TYPE_LLVMIR_BINARY;
|
||||
}
|
||||
// Checking HSAIL in .cg section
|
||||
bool containsHsailText = true;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_HSAIL, NULL, &containsHsailText, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
containsHsailText = false;
|
||||
}
|
||||
// Checking BRIG sections
|
||||
bool containsBrig = true;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_BRIG, NULL, &containsBrig, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
containsBrig = false;
|
||||
}
|
||||
if (containsBrig) {
|
||||
completeStages.push_back(from);
|
||||
from = ACL_TYPE_HSAIL_BINARY;
|
||||
// Here we should check that CG stage was done.
|
||||
// Right now there are 2 criterions to check it (besides BRIG itself):
|
||||
// 1. matadata symbols symOpenclKernel for every kernel.
|
||||
// 2. HSAIL text in aclCODEGEN section.
|
||||
// Unfortunately there is no appropriate way in Compiler Lib to check 1.
|
||||
// because kernel names are unknown here, therefore only 2.
|
||||
if (containsHsailText) {
|
||||
completeStages.push_back(from);
|
||||
from = ACL_TYPE_CG;
|
||||
}
|
||||
}
|
||||
else if (containsHsailText) {
|
||||
completeStages.push_back(from);
|
||||
from = ACL_TYPE_HSAIL_TEXT;
|
||||
}
|
||||
// Checking ISA in .text section
|
||||
bool containsShaderIsa = true;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_CONTAINS_ISA, NULL, &containsShaderIsa, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
containsShaderIsa = false;
|
||||
}
|
||||
if (containsShaderIsa) {
|
||||
completeStages.push_back(from);
|
||||
from = ACL_TYPE_ISA;
|
||||
}
|
||||
std::string sCurOptions = compileOptions_ + linkOptions_;
|
||||
amd::option::Options curOptions;
|
||||
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
||||
buildLog_ += curOptions.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return ACL_TYPE_DEFAULT;
|
||||
}
|
||||
switch (from) {
|
||||
// compile from HSAIL text, no matter prev. stages and options
|
||||
case ACL_TYPE_HSAIL_TEXT:
|
||||
needOptionsCheck = false;
|
||||
break;
|
||||
case ACL_TYPE_HSAIL_BINARY:
|
||||
case ACL_TYPE_CG:
|
||||
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
||||
if (curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
||||
needOptionsCheck = false;
|
||||
}
|
||||
break;
|
||||
case ACL_TYPE_ISA:
|
||||
// do not check options, if LLVMIR is absent or might be absent or options are absent
|
||||
if (curOptions.oVariables->BinLLVMIR || !containsLlvmirText || !containsOpts) {
|
||||
needOptionsCheck = false;
|
||||
}
|
||||
if (containsBrig && containsHsailText && curOptions.oVariables->BinHSAIL) {
|
||||
needOptionsCheck = false;
|
||||
// recompile from prev. stage, if BRIG || HSAIL are absent
|
||||
} else {
|
||||
from = completeStages.back();
|
||||
completeStages.pop_back();
|
||||
needOptionsCheck = true;
|
||||
}
|
||||
break;
|
||||
// recompilation might be needed
|
||||
case ACL_TYPE_LLVMIR_BINARY:
|
||||
case ACL_TYPE_DEFAULT:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return from;
|
||||
}
|
||||
|
||||
aclType HSAILProgram::getNextCompilationStageFromBinary(amd::option::Options* options) {
|
||||
aclType continueCompileFrom = ACL_TYPE_DEFAULT;
|
||||
binary_t binary = this->binary();
|
||||
// If the binary already exists
|
||||
if ((binary.first != NULL) && (binary.second > 0)) {
|
||||
void *mem = const_cast<void *>(binary.first);
|
||||
acl_error errorCode;
|
||||
binaryElf_ = g_complibApi._aclReadFromMem(mem, binary.second, &errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while BRIG Codegen phase: aclReadFromMem failure \n" ;
|
||||
LogWarning("aclReadFromMem failed");
|
||||
return continueCompileFrom;
|
||||
}
|
||||
// Calculate the next stage to compile from, based on sections in binaryElf_;
|
||||
// No any validity checks here
|
||||
std::vector<aclType> completeStages;
|
||||
bool needOptionsCheck = true;
|
||||
continueCompileFrom = getCompilationStagesFromBinary(completeStages, needOptionsCheck);
|
||||
// Saving binary in the interface class,
|
||||
// which also load compile & link options from binary
|
||||
setBinary(static_cast<char*>(mem), binary.second);
|
||||
if (!options || !needOptionsCheck) {
|
||||
return continueCompileFrom;
|
||||
}
|
||||
bool recompile = false;
|
||||
//! @todo Should we also check for ACL_TYPE_OPENCL & ACL_TYPE_LLVMIR_TEXT?
|
||||
switch (continueCompileFrom) {
|
||||
case ACL_TYPE_HSAIL_BINARY:
|
||||
case ACL_TYPE_CG:
|
||||
case ACL_TYPE_ISA: {
|
||||
// Compare options loaded from binary with current ones, recompile if differ;
|
||||
// If compile options are absent in binary, do not compare and recompile
|
||||
if (compileOptions_.empty())
|
||||
break;
|
||||
const oclBIFSymbolStruct* symbol = findBIF30SymStruct(symOpenclCompilerOptions);
|
||||
assert(symbol && "symbol not found");
|
||||
std::string symName = std::string(symbol->str[bif::PRE]) + std::string(symbol->str[bif::POST]);
|
||||
size_t symSize = 0;
|
||||
const void *opts = g_complibApi._aclExtractSymbol(device().compiler(),
|
||||
binaryElf_, &symSize, aclCOMMENT, symName.c_str(), &errorCode);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
recompile = true;
|
||||
break;
|
||||
}
|
||||
std::string sBinOptions = std::string((char*)opts, symSize);
|
||||
std::string sCurOptions = compileOptions_ + linkOptions_;
|
||||
amd::option::Options curOptions, binOptions;
|
||||
if (!amd::option::parseAllOptions(sBinOptions, binOptions)) {
|
||||
buildLog_ += binOptions.optionsLog();
|
||||
LogError("Parsing compile options from binary failed.");
|
||||
return ACL_TYPE_DEFAULT;
|
||||
}
|
||||
if (!amd::option::parseAllOptions(sCurOptions, curOptions)) {
|
||||
buildLog_ += curOptions.optionsLog();
|
||||
LogError("Parsing compile options failed.");
|
||||
return ACL_TYPE_DEFAULT;
|
||||
}
|
||||
if (!curOptions.equals(binOptions)) {
|
||||
recompile = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (recompile) {
|
||||
while (!completeStages.empty()) {
|
||||
continueCompileFrom = completeStages.back();
|
||||
if (continueCompileFrom == ACL_TYPE_LLVMIR_BINARY ||
|
||||
continueCompileFrom == ACL_TYPE_DEFAULT) {
|
||||
break;
|
||||
}
|
||||
completeStages.pop_back();
|
||||
}
|
||||
}
|
||||
}
|
||||
return continueCompileFrom;
|
||||
}
|
||||
|
||||
bool HSAILProgram::saveBinaryAndSetType(type_t type) {
|
||||
//Write binary to memory
|
||||
void *rawBinary = NULL;
|
||||
size_t size;
|
||||
if (g_complibApi._aclWriteToMem(binaryElf_, &rawBinary, &size)
|
||||
!= ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to write binary to memory \n";
|
||||
return false;
|
||||
}
|
||||
clBinary()->saveBIFBinary((char*)rawBinary, size);
|
||||
//Set the type of binary
|
||||
setType(type);
|
||||
//Free memory containing rawBinary
|
||||
binaryElf_->binOpts.dealloc(rawBinary);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::linkImpl(const std::vector<Program *> &inputPrograms,
|
||||
amd::option::Options *options,
|
||||
bool createLibrary) {
|
||||
std::vector<device::Program *>::const_iterator it
|
||||
= inputPrograms.begin();
|
||||
std::vector<device::Program *>::const_iterator itEnd
|
||||
= inputPrograms.end();
|
||||
acl_error errorCode;
|
||||
|
||||
// For each program we need to extract the LLVMIR and create
|
||||
// aclBinary for each
|
||||
std::vector<aclBinary *> binaries_to_link;
|
||||
|
||||
for (size_t i = 0; it != itEnd; ++it, ++i) {
|
||||
HSAILProgram *program = (HSAILProgram *)*it;
|
||||
// Check if the program was created with clCreateProgramWIthBinary
|
||||
binary_t binary = program->binary();
|
||||
if ((binary.first != NULL) && (binary.second > 0)) {
|
||||
// Binary already exists -- we can also check if there is no
|
||||
// opencl source code
|
||||
// Need to check if LLVMIR exists in the binary
|
||||
// If LLVMIR does not exist then is it valid
|
||||
// We need to pull out all the compiled kernels
|
||||
// We cannot do this at present because we need at least
|
||||
// Hsail text to pull the kernels oout
|
||||
void *mem = const_cast<void *>(binary.first);
|
||||
binaryElf_ = g_complibApi._aclReadFromMem(mem,
|
||||
binary.second,
|
||||
&errorCode);
|
||||
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
LogWarning("Error while linking : Could not read from raw binary");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// At this stage each HSAILProgram contains a valid binary_elf
|
||||
// Check if LLVMIR is in the binary
|
||||
size_t boolSize = sizeof(bool);
|
||||
bool containsLLLVMIR = false;
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_,
|
||||
RT_CONTAINS_LLVMIR, NULL, &containsLLLVMIR, &boolSize);
|
||||
if (errorCode != ACL_SUCCESS || !containsLLLVMIR) {
|
||||
buildLog_ +="Error while linking : Invalid binary (Missing LLVMIR section)";
|
||||
return false;
|
||||
}
|
||||
// Create a new aclBinary for each LLVMIR and save it in a list
|
||||
aclBIFVersion ver = g_complibApi._aclBinaryVersion(binaryElf_);
|
||||
aclBinary *bin = g_complibApi._aclCreateFromBinary(binaryElf_, ver);
|
||||
binaries_to_link.push_back(bin);
|
||||
}
|
||||
|
||||
// At this stage each HSAILProgram in the list has an aclBinary initialized
|
||||
// and contains LLVMIR
|
||||
// We can now go ahead and link them.
|
||||
if (binaries_to_link.size() > 1) {
|
||||
errorCode = g_complibApi._aclLink(device().compiler(),
|
||||
binaries_to_link[0],
|
||||
binaries_to_link.size() - 1,
|
||||
&binaries_to_link[1],
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
"-create-library",
|
||||
NULL);
|
||||
}
|
||||
else {
|
||||
errorCode = g_complibApi._aclLink(device().compiler(),
|
||||
binaries_to_link[0],
|
||||
0,
|
||||
NULL,
|
||||
ACL_TYPE_LLVMIR_BINARY,
|
||||
"-create-library",
|
||||
NULL);
|
||||
}
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Failed to link programs";
|
||||
return false;
|
||||
}
|
||||
// Store the newly linked aclBinary for this program.
|
||||
binaryElf_ = binaries_to_link[0];
|
||||
// Free all the other aclBinaries
|
||||
for (size_t i = 1; i < binaries_to_link.size(); i++) {
|
||||
g_complibApi._aclBinaryFini(binaries_to_link[i]);
|
||||
}
|
||||
if (createLibrary) {
|
||||
saveBinaryAndSetType(TYPE_LIBRARY);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Now call linkImpl with the new options
|
||||
return linkImpl(options);
|
||||
}
|
||||
|
||||
bool HSAILProgram::initBrigModule() {
|
||||
const char *symbol_name = "__BRIG__";
|
||||
BrigModuleHeader* brig;
|
||||
acl_error error_code;
|
||||
size_t size;
|
||||
const void* symbol_data = g_complibApi._aclExtractSymbol(
|
||||
device().compiler(),
|
||||
binaryElf_,
|
||||
&size,
|
||||
aclBRIG,
|
||||
symbol_name,
|
||||
&error_code);
|
||||
if (error_code != ACL_SUCCESS) {
|
||||
std::string error = "Could not find Brig in BIF: ";
|
||||
error += symbol_name;
|
||||
LogError(error.c_str());
|
||||
buildLog_ += error;
|
||||
return false;
|
||||
}
|
||||
brig = (BrigModuleHeader*)malloc(size);
|
||||
memcpy(brig, symbol_data, size);
|
||||
brigModule_ = brig;
|
||||
return true;
|
||||
}
|
||||
void HSAILProgram::destroyBrigModule() {
|
||||
if (brigModule_ != NULL) {
|
||||
free(brigModule_);
|
||||
}
|
||||
}
|
||||
bool HSAILProgram::initBrigContainer() {
|
||||
assert(brigModule_ != NULL);
|
||||
|
||||
//Create a BRIG container
|
||||
hsaBrigContainer_ = new BrigContainer(brigModule_);
|
||||
if (!hsaBrigContainer_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void HSAILProgram::destroyBrigContainer() {
|
||||
delete (hsaBrigContainer_);
|
||||
}
|
||||
|
||||
|
||||
void HSAILProgram::hsaError(const char *msg, hsa_status_t status) {
|
||||
std::string fmsg;
|
||||
fmsg += msg;
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
const char *hmsg = 0;
|
||||
hsa_status_string(status, &hmsg);
|
||||
if (hmsg) {
|
||||
fmsg += ": ";
|
||||
fmsg += hmsg;
|
||||
}
|
||||
}
|
||||
LogError(fmsg.c_str());
|
||||
buildLog_ += fmsg;
|
||||
}
|
||||
|
||||
bool HSAILProgram::linkImpl(amd::option::Options *options) {
|
||||
acl_error errorCode;
|
||||
aclType continueCompileFrom = ACL_TYPE_LLVMIR_BINARY;
|
||||
bool finalize = true;
|
||||
// If !binaryElf_ then program must have been created using clCreateProgramWithBinary
|
||||
if (!binaryElf_) {
|
||||
continueCompileFrom = getNextCompilationStageFromBinary(options);
|
||||
}
|
||||
switch (continueCompileFrom) {
|
||||
// Compilation from ACL_TYPE_LLVMIR_BINARY to ACL_TYPE_CG in cases:
|
||||
// 1. if the program is not created with binary;
|
||||
// 2. if the program is created with binary and contains only .llvmir & .comment
|
||||
// 3. if the program is created with binary, contains .llvmir, .comment, brig sections,
|
||||
// but the binary's compile & link options differ from current ones (recompilation);
|
||||
case ACL_TYPE_LLVMIR_BINARY:
|
||||
// Compilation from ACL_TYPE_HSAIL_BINARY to ACL_TYPE_CG in cases:
|
||||
// 1. if the program is created with binary and contains only brig sections
|
||||
case ACL_TYPE_HSAIL_BINARY:
|
||||
// Compilation from ACL_TYPE_HSAIL_TEXT to ACL_TYPE_CG in cases:
|
||||
// 1. if the program is created with binary and contains only hsail text
|
||||
case ACL_TYPE_HSAIL_TEXT: {
|
||||
std::string curOptions = options->origOptionStr + hsailOptions();
|
||||
errorCode = g_complibApi._aclCompile(device().compiler(), binaryElf_,
|
||||
curOptions.c_str(), continueCompileFrom, ACL_TYPE_CG, logFunction);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while BRIG Codegen phase: compilation error \n" ;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACL_TYPE_CG:
|
||||
break;
|
||||
case ACL_TYPE_ISA:
|
||||
finalize = false;
|
||||
break;
|
||||
default:
|
||||
buildLog_ += "Error while BRIG Codegen phase: the binary is incomplete \n" ;
|
||||
return false;
|
||||
}
|
||||
//Stop compilation if it is an offline device - HSA runtime does not
|
||||
//support ISA compiled offline
|
||||
if (!dev().isOnline()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
hsa_agent_t hsaDevice = dev().getBackendDevice();
|
||||
if (!initBrigModule()) {
|
||||
hsaError("Failed to create Brig Module");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create a BrigContainer.
|
||||
if (!initBrigContainer()) {
|
||||
hsaError("Failed to create Brig Container");
|
||||
return false;
|
||||
}
|
||||
// Create a program.
|
||||
hsa_status_t status = hsa_ext_program_create(
|
||||
HSA_MACHINE_MODEL_LARGE,
|
||||
HSA_PROFILE_FULL,
|
||||
HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO,
|
||||
NULL,
|
||||
&hsaProgramHandle_
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to create hsail program", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Add module to a program.
|
||||
hsa_ext_module_t programModule =
|
||||
reinterpret_cast<hsa_ext_module_t>(brigModule_);
|
||||
status = hsa_ext_program_add_module(
|
||||
hsaProgramHandle_, programModule
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to add a module to the program", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Obtain agent's Isa.
|
||||
hsa_isa_t hsaDeviceIsa;
|
||||
status = hsa_agent_get_info(
|
||||
hsaDevice, HSA_AGENT_INFO_ISA, &hsaDeviceIsa
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to create hsail program", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Finalize a program.
|
||||
hsa_ext_control_directives_t hsaControlDirectives;
|
||||
memset(&hsaControlDirectives, 0, sizeof(hsa_ext_control_directives_t));
|
||||
status = hsa_ext_program_finalize(
|
||||
hsaProgramHandle_,
|
||||
hsaDeviceIsa,
|
||||
0,
|
||||
hsaControlDirectives,
|
||||
NULL,
|
||||
HSA_CODE_OBJECT_TYPE_PROGRAM,
|
||||
&hsaProgramCodeObject_
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to finalize hsail program", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// HLC always generates full profile
|
||||
hsa_profile_t profile = HSA_PROFILE_FULL;
|
||||
|
||||
// Create an executable.
|
||||
status = hsa_executable_create(
|
||||
profile,
|
||||
HSA_EXECUTABLE_STATE_UNFROZEN,
|
||||
"",
|
||||
&hsaExecutable_
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to create executable", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Load the code object.
|
||||
status = hsa_executable_load_code_object(
|
||||
hsaExecutable_, hsaDevice, hsaProgramCodeObject_, NULL
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to load code object", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Freeze the executable.
|
||||
status = hsa_executable_freeze(hsaExecutable_, NULL);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to freeze executable", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
Code first_d = hsaBrigContainer_->code().begin();
|
||||
Code last_d = hsaBrigContainer_->code().end();
|
||||
//Iterate through the symbols using brig assembler
|
||||
for (;first_d != last_d;first_d = first_d.next()) {
|
||||
if (DirectiveExecutable de = first_d) {
|
||||
// Disable function compilation unconditionally.
|
||||
// TODO: May remove this after the finalizer supports function compilation.
|
||||
if (DirectiveFunction df = first_d) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string kernelName = (SRef)de.name();
|
||||
if (de.linkage() != BRIG_LINKAGE_PROGRAM) {
|
||||
kernelName.insert(0, "am::");
|
||||
}
|
||||
// Query symbol handle for this symbol.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_executable_get_symbol(
|
||||
hsaExecutable_, NULL, kernelName.c_str(), hsaDevice, 0, &kernelSymbol
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get executable symbol", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Query code handle for this symbol.
|
||||
uint64_t kernelCodeHandle;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernelCodeHandle
|
||||
);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get executable symbol info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string openclKernelName = kernelName;
|
||||
// Strip the opencl and kernel name
|
||||
kernelName = kernelName.substr(strlen("&__OpenCL_"), kernelName.size());
|
||||
kernelName = kernelName.substr(0,kernelName.size() - strlen("_kernel"));
|
||||
aclMetadata md;
|
||||
md.numHiddenKernelArgs = 0;
|
||||
size_t sizeOfnumHiddenKernelArgs = sizeof(md.numHiddenKernelArgs);
|
||||
errorCode = g_complibApi._aclQueryInfo(device().compiler(), binaryElf_, RT_NUM_KERNEL_HIDDEN_ARGS,
|
||||
openclKernelName.c_str(), &md.numHiddenKernelArgs, &sizeOfnumHiddenKernelArgs);
|
||||
if (errorCode != ACL_SUCCESS) {
|
||||
buildLog_ += "Error while Finalization phase: Kernel extra arguments count querying from the ELF failed\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t workgroupGroupSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&workgroupGroupSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get group segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t workitemPrivateSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
|
||||
&workitemPrivateSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get private segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t kernargSegmentByteSize;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
||||
&kernargSegmentByteSize);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get kernarg segment size info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t kernargSegmentAlignment;
|
||||
status = hsa_executable_symbol_get_info(
|
||||
kernelSymbol,
|
||||
HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT,
|
||||
&kernargSegmentAlignment);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
hsaError("Failed to get kernarg segment alignment info", status);
|
||||
return false;
|
||||
}
|
||||
|
||||
Kernel *aKernel = new roc::Kernel(
|
||||
kernelName,
|
||||
this,
|
||||
kernelCodeHandle,
|
||||
workgroupGroupSegmentByteSize,
|
||||
workitemPrivateSegmentByteSize,
|
||||
kernargSegmentByteSize,
|
||||
kernargSegmentAlignment,
|
||||
md.numHiddenKernelArgs
|
||||
);
|
||||
if (!aKernel->init()) {
|
||||
return false;
|
||||
}
|
||||
aKernel->setUniformWorkGroupSize(options->oVariables->UniformWorkGroupSize);
|
||||
kernels()[kernelName] = aKernel;
|
||||
}
|
||||
}
|
||||
saveBinaryAndSetType(TYPE_EXECUTABLE);
|
||||
buildLog_ += g_complibApi._aclGetCompilerLog(device().compiler());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HSAILProgram::createBinary(amd::option::Options *options) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HSAILProgram::initClBinary() {
|
||||
if (clBinary_ == NULL) {
|
||||
clBinary_ = new ClBinary(static_cast<const Device &>(device()));
|
||||
if (clBinary_ == NULL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void HSAILProgram::releaseClBinary() {
|
||||
if (clBinary_ != NULL) {
|
||||
delete clBinary_;
|
||||
clBinary_ = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
std::string HSAILProgram::hsailOptions() {
|
||||
std::string hsailOptions;
|
||||
//Set options for the standard device specific options
|
||||
//This is just for legacy compiler code
|
||||
// All our devices support these options now
|
||||
hsailOptions.append(" -DFP_FAST_FMAF=1");
|
||||
hsailOptions.append(" -DFP_FAST_FMA=1");
|
||||
//TODO: this is a quick fix to restore original f32 denorm flushing
|
||||
//Make this target/option dependent
|
||||
hsailOptions.append(" -cl-denorms-are-zero");
|
||||
//TODO(sramalin) : Query the device for opencl version
|
||||
// and only set if -cl-std wasn't specified in
|
||||
// original build options (app)
|
||||
//hsailOptions.append(" -cl-std=CL1.2");
|
||||
//check if the host is 64 bit or 32 bit
|
||||
LP64_ONLY(hsailOptions.append(" -m64"));
|
||||
//Now append each extension supported by the device
|
||||
// one by one
|
||||
std::string token;
|
||||
std::istringstream iss("");
|
||||
iss.str(device().info().extensions_);
|
||||
while (getline(iss, token, ' ')) {
|
||||
if (!token.empty()) {
|
||||
hsailOptions.append(" -D");
|
||||
hsailOptions.append(token);
|
||||
hsailOptions.append("=1");
|
||||
}
|
||||
}
|
||||
return hsailOptions;
|
||||
}
|
||||
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
} // namespace hsa
|
||||
|
||||
@@ -1,156 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "rocbinary.hpp"
|
||||
#include "roccompilerlib.hpp"
|
||||
#include "acl.h"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include "rocdevice.hpp"
|
||||
#include "HSAILItems.h"
|
||||
|
||||
using namespace HSAIL_ASM;
|
||||
//! \namespace roc HSA Device Implementation
|
||||
namespace roc {
|
||||
|
||||
//! \class empty program
|
||||
class HSAILProgram : public device::Program
|
||||
{
|
||||
friend class ClBinary;
|
||||
public:
|
||||
//! Default constructor
|
||||
HSAILProgram(roc::NullDevice& device);
|
||||
//! Default destructor
|
||||
~HSAILProgram();
|
||||
|
||||
// Initialize Binary for GPU (used only for clCreateProgramWithBinary()).
|
||||
virtual bool initClBinary(char *binaryIn, size_t size);
|
||||
|
||||
//! Returns the aclBinary associated with the progrm
|
||||
const aclBinary* binaryElf() const {
|
||||
return static_cast<const aclBinary*>(binaryElf_); }
|
||||
|
||||
const std::string& HsailText() {
|
||||
return hsailProgram_;
|
||||
}
|
||||
|
||||
const NullDevice& dev() const { return device_; }
|
||||
//! Returns the hsaBinary associated with the progrm
|
||||
hsa_agent_t hsaDevice() const {
|
||||
return dev().getBackendDevice();
|
||||
}
|
||||
|
||||
protected:
|
||||
//! log and append to build log an error from runtime
|
||||
void hsaError(const char *msg, hsa_status_t status = HSA_STATUS_SUCCESS);
|
||||
|
||||
//! pre-compile setup for GPU
|
||||
virtual bool initBuild(amd::option::Options* options);
|
||||
|
||||
//! post-compile setup for GPU
|
||||
virtual bool finiBuild(bool isBuildGood);
|
||||
|
||||
/*! \brief Compiles GPU CL program to LLVM binary (compiler frontend)
|
||||
*
|
||||
* \return True if we successefully compiled a GPU program
|
||||
*/
|
||||
virtual bool compileImpl(
|
||||
const std::string& sourceCode, //!< the program's source code
|
||||
const std::vector<const std::string*>& headers,
|
||||
const char** headerIncludeNames,
|
||||
amd::option::Options* options //!< compile options's object
|
||||
);
|
||||
|
||||
/*! \brief Compiles LLVM binary to HSAIL code (compiler backend: link+opt+codegen)
|
||||
*
|
||||
* \return The build error code
|
||||
*/
|
||||
int compileBinaryToHSAIL(
|
||||
amd::option::Options* options //!< options for compilation
|
||||
);
|
||||
|
||||
|
||||
virtual bool linkImpl(amd::option::Options* options);
|
||||
|
||||
//! Link the device programs.
|
||||
virtual bool linkImpl (const std::vector<Program*>& inputPrograms,
|
||||
amd::option::Options* options,
|
||||
bool createLibrary);
|
||||
|
||||
virtual bool createBinary(amd::option::Options* options);
|
||||
|
||||
//! Initialize Binary
|
||||
virtual bool initClBinary();
|
||||
|
||||
//! Release the Binary
|
||||
virtual void releaseClBinary();
|
||||
|
||||
virtual const aclTargetInfo & info(const char * str = ""){
|
||||
return info_;
|
||||
}
|
||||
|
||||
virtual bool isElf(const char* bin) const {
|
||||
return amd::isElfMagic(bin);
|
||||
//return false;
|
||||
}
|
||||
|
||||
//! Returns the binary
|
||||
// This should ensure that the binary is updated with all the kernels
|
||||
// ClBinary& clBinary() { return binary_; }
|
||||
ClBinary* clBinary() {
|
||||
return static_cast<ClBinary*>(device::Program::clBinary());
|
||||
}
|
||||
const ClBinary* clBinary() const {
|
||||
return static_cast<const ClBinary*>(device::Program::clBinary());
|
||||
}
|
||||
private:
|
||||
/* \brief Returns the next stage to compile from, based on sections in binary,
|
||||
* also returns completeStages in a vector, which contains at least ACL_TYPE_DEFAULT,
|
||||
* sets needOptionsCheck to true if options check is needed to decide whether or not to recompile
|
||||
*/
|
||||
aclType getCompilationStagesFromBinary(std::vector<aclType>& completeStages, bool& needOptionsCheck);
|
||||
|
||||
/* \brief Returns the next stage to compile from, based on sections and options in binary
|
||||
*/
|
||||
aclType getNextCompilationStageFromBinary(amd::option::Options* options);
|
||||
bool saveBinaryAndSetType(type_t type);
|
||||
bool initBrigContainer();
|
||||
void destroyBrigContainer();
|
||||
//Initializes BRIG module
|
||||
bool initBrigModule();
|
||||
void destroyBrigModule();
|
||||
//! Disable default copy constructor
|
||||
HSAILProgram(const HSAILProgram&);
|
||||
|
||||
//! Disable operator=
|
||||
HSAILProgram& operator=(const HSAILProgram&);
|
||||
|
||||
//! Returns all the options to be appended while passing to the
|
||||
//compiler library
|
||||
std::string hsailOptions();
|
||||
|
||||
std::string openCLSource_; //!< Original OpenCL source
|
||||
std::string hsailProgram_; //!< HSAIL program after compilation.
|
||||
std::string llvmBinary_; //!< LLVM IR binary code
|
||||
//!< aclBinary and aclCompiler - for the compiler libray
|
||||
aclBinary* binaryElf_; //!<Binary for the new compiler library - shreyas edit
|
||||
aclBinaryOptions binOpts_; //!<Binary options to create aclBinary
|
||||
roc::NullDevice& device_; //!< Device related to the program
|
||||
/* Brig and Brig modules */
|
||||
BrigModule_t brigModule_; //!< Brig that should be used in the HSA runtime
|
||||
BrigContainer* hsaBrigContainer_; //!< Container for the BRIG;
|
||||
hsa_ext_program_t hsaProgramHandle_; //!< Handle to HSA runtime program
|
||||
hsa_code_object_t hsaProgramCodeObject_; //!< Handle to HSA code object
|
||||
hsa_executable_t hsaExecutable_; //!< Handle to HSA executable
|
||||
};
|
||||
|
||||
/*@}*/} // namespace roc
|
||||
|
||||
#endif /*WITHOUT_HSA_BACKEND*/
|
||||
|
||||
@@ -1,198 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
/*
|
||||
Definitions taken from Mesa radeonsi and GCN3 isa manual.
|
||||
https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/radeonsi/sid.h
|
||||
http://gpuopen.com/compute-product/amd-gcn3-isa-architecture-manual/
|
||||
|
||||
WORD7 is defined in mesa but has no fields and isn't in GCN3 doc. Can I use this?
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#if !defined(LITTLEENDIAN_CPU) && !defined(BIGENDIAN_CPU)
|
||||
#error "Must define LITTLEENDIAN_CPU or BIGENDIAN_CPU"
|
||||
#endif
|
||||
#if defined(LITTLEENDIAN_CPU) && defined(BIGENDIAN_CPU)
|
||||
#error "LITTLEENDIAN_CPU and BIGENDIAN_CPU are mutually exclusive"
|
||||
#endif
|
||||
|
||||
namespace roc {
|
||||
|
||||
enum SQ_RSRC_IMG_TYPES {
|
||||
SQ_RSRC_IMG_1D = 0x08,
|
||||
SQ_RSRC_IMG_2D = 0x09,
|
||||
SQ_RSRC_IMG_3D = 0x0A,
|
||||
SQ_RSRC_IMG_CUBE = 0x0B,
|
||||
SQ_RSRC_IMG_1D_ARRAY = 0x0C,
|
||||
SQ_RSRC_IMG_2D_ARRAY = 0x0D,
|
||||
SQ_RSRC_IMG_2D_MSAA = 0x0E,
|
||||
SQ_RSRC_IMG_2D_MSAA_ARRAY = 0x0F
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD0 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int base_address : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int base_address : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD1 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int base_address_hi : 8;
|
||||
unsigned int min_lod : 12;
|
||||
unsigned int data_format : 6;
|
||||
unsigned int num_format : 4;
|
||||
unsigned int mtype : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int mtype : 2;
|
||||
unsigned int num_format : 4;
|
||||
unsigned int data_format : 6;
|
||||
unsigned int min_lod : 12;
|
||||
unsigned int base_address_hi : 8;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD2 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int width : 14;
|
||||
unsigned int height : 14;
|
||||
unsigned int perf_mod : 3;
|
||||
unsigned int interlaced : 1;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int interlaced : 1;
|
||||
unsigned int perf_mod : 3;
|
||||
unsigned int height : 14;
|
||||
unsigned int width : 14;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD3 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int dst_sel_x : 3;
|
||||
unsigned int dst_sel_y : 3;
|
||||
unsigned int dst_sel_z : 3;
|
||||
unsigned int dst_sel_w : 3;
|
||||
unsigned int base_level : 4;
|
||||
unsigned int last_level : 4;
|
||||
unsigned int tiling_index : 5;
|
||||
unsigned int pow2_pad : 1;
|
||||
unsigned int mtype : 1;
|
||||
unsigned int atc : 1;
|
||||
unsigned int type : 4;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int type : 4;
|
||||
unsigned int atc : 1;
|
||||
unsigned int mtype : 1;
|
||||
unsigned int pow2_pad : 1;
|
||||
unsigned int tiling_index : 5;
|
||||
unsigned int last_level : 4;
|
||||
unsigned int base_level : 4;
|
||||
unsigned int dst_sel_w : 3;
|
||||
unsigned int dst_sel_z : 3;
|
||||
unsigned int dst_sel_y : 3;
|
||||
unsigned int dst_sel_x : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD4 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int depth : 13;
|
||||
unsigned int pitch : 14;
|
||||
unsigned int : 5;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 5;
|
||||
unsigned int pitch : 14;
|
||||
unsigned int depth : 13;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD5 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int base_array : 13;
|
||||
unsigned int last_array : 13;
|
||||
unsigned int : 6;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 6;
|
||||
unsigned int last_array : 13;
|
||||
unsigned int base_array : 13;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32_all;
|
||||
signed int i32_all;
|
||||
float f32_all;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD6 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int min_lod_warn : 12;
|
||||
unsigned int counter_bank_id : 8;
|
||||
unsigned int lod_hdw_cnt_en : 1;
|
||||
unsigned int compression_en : 1;
|
||||
unsigned int alpha_is_on_msb : 1;
|
||||
unsigned int color_transform : 1;
|
||||
unsigned int lost_alpha_bits : 4;
|
||||
unsigned int lost_color_bits : 4;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int lost_color_bits : 4;
|
||||
unsigned int lost_alpha_bits : 4;
|
||||
unsigned int color_transform : 1;
|
||||
unsigned int alpha_is_on_msb : 1;
|
||||
unsigned int compression_en : 1;
|
||||
unsigned int lod_hdw_cnt_en : 1;
|
||||
unsigned int counter_bank_id : 8;
|
||||
unsigned int min_lod_warn : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_IMG_RSRC_WORD7 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int meta_data_address : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int meta_data_address : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,100 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef WITHOUT_GPU_BACKEND
|
||||
|
||||
#include "top.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "rocsettings.hpp"
|
||||
#include "device/rocm/rocglinterop.hpp"
|
||||
|
||||
namespace roc {
|
||||
|
||||
Settings::Settings()
|
||||
{
|
||||
// Initialize the HSA device default settings
|
||||
|
||||
// Set this to true when we drop the flag
|
||||
doublePrecision_ = ::CL_KHR_FP64;
|
||||
pollCompletion_ = ENVVAR_HSA_POLL_KERNEL_COMPLETION;
|
||||
|
||||
enableLocalMemory_ = HSA_LOCAL_MEMORY_ENABLE;
|
||||
enableImageHandle_ = true;
|
||||
|
||||
maxWorkGroupSize_ = 256;
|
||||
maxWorkGroupSize2DX_ = 16;
|
||||
maxWorkGroupSize2DY_ = 16;
|
||||
maxWorkGroupSize3DX_ = 4;
|
||||
maxWorkGroupSize3DY_ = 4;
|
||||
maxWorkGroupSize3DZ_ = 4;
|
||||
|
||||
kernargPoolSize_ = HSA_KERNARG_POOL_SIZE;
|
||||
signalPoolSize_ = HSA_SIGNAL_POOL_SIZE;
|
||||
|
||||
// Determine if user is requesting Non-Coherent mode
|
||||
// for system memory. By default system memory is
|
||||
// operates or is programmed to be in Coherent mode.
|
||||
// Users can turn it off for hardware that does not
|
||||
// support this feature naturally
|
||||
char *nonCoherentMode = NULL;
|
||||
nonCoherentMode = getenv("OPENCL_USE_NC_MEMORY_POLICY");
|
||||
enableNCMode_ = (nonCoherentMode)? true : false;
|
||||
|
||||
// Determine if user wishes to disable support for
|
||||
// partial dispatch. By default support for partial
|
||||
// dispatch is enabled. Users can turn it off for
|
||||
// devices that do not support this feature.
|
||||
//
|
||||
// @note Update appropriate field of device::Settings
|
||||
char *partialDispatch = NULL;
|
||||
partialDispatch = getenv("OPENCL_DISABLE_PARTIAL_DISPATCH");
|
||||
enablePartialDispatch_ = (partialDispatch) ? false : true;
|
||||
partialDispatch_ = (partialDispatch) ? false : true;
|
||||
}
|
||||
|
||||
bool
|
||||
Settings::create(bool doublePrecision)
|
||||
{
|
||||
customHostAllocator_ = true;
|
||||
|
||||
// Enable extensions
|
||||
enableExtension(ClKhrByteAddressableStore);
|
||||
enableExtension(ClKhrGlobalInt32BaseAtomics);
|
||||
enableExtension(ClKhrGlobalInt32ExtendedAtomics);
|
||||
enableExtension(ClKhrLocalInt32BaseAtomics);
|
||||
enableExtension(ClKhrLocalInt32ExtendedAtomics);
|
||||
enableExtension(ClKhr3DImageWrites);
|
||||
enableExtension(ClAmdMediaOps);
|
||||
enableExtension(ClAmdMediaOps2);
|
||||
if(MesaInterop::Supported())
|
||||
enableExtension(ClKhrGlSharing);
|
||||
|
||||
// Make sure device supports doubles
|
||||
doublePrecision_ &= doublePrecision;
|
||||
|
||||
if (doublePrecision_) {
|
||||
// Enable KHR double precision extension
|
||||
enableExtension(ClKhrFp64);
|
||||
// Also enable AMD double precision extension?
|
||||
enableExtension(ClAmdFp64);
|
||||
}
|
||||
|
||||
enableExtension(ClKhrDepthImages);
|
||||
supportDepthsRGB_ = true;
|
||||
|
||||
// Override current device settings
|
||||
override();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
Settings::override()
|
||||
{
|
||||
}
|
||||
|
||||
} // namespace roc
|
||||
|
||||
#endif // WITHOUT_GPU_BACKEND
|
||||
@@ -1,69 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
#pragma once
|
||||
|
||||
#ifndef WITHOUT_HSA_BACKEND
|
||||
|
||||
#include "library.hpp"
|
||||
|
||||
/*! \addtogroup HSA OCL Stub Implementation
|
||||
* @{
|
||||
*/
|
||||
|
||||
//! HSA OCL STUB Implementation
|
||||
namespace roc {
|
||||
|
||||
//! Device settings
|
||||
class Settings : public device::Settings
|
||||
{
|
||||
public:
|
||||
union {
|
||||
struct {
|
||||
uint doublePrecision_: 1; //!< Enables double precision support
|
||||
uint pollCompletion_: 1; //!< Enables polling in HSA
|
||||
uint enableLocalMemory_ : 1; //!< Enable GPUVM memory
|
||||
uint enableImageHandle_: 1; //!< Use HSAIL image/sampler pointer
|
||||
uint enableNCMode_: 1; //!< Enable Non Coherent mode for system memory
|
||||
uint enablePartialDispatch_: 1; //!< Enable support for Partial Dispatch
|
||||
uint reserved_: 26;
|
||||
};
|
||||
uint value_;
|
||||
};
|
||||
|
||||
//! Default max workgroup size for 1D
|
||||
int maxWorkGroupSize_;
|
||||
|
||||
//! Default max workgroup sizes for 2D
|
||||
int maxWorkGroupSize2DX_;
|
||||
int maxWorkGroupSize2DY_;
|
||||
|
||||
//! Default max workgroup sizes for 3D
|
||||
int maxWorkGroupSize3DX_;
|
||||
int maxWorkGroupSize3DY_;
|
||||
int maxWorkGroupSize3DZ_;
|
||||
|
||||
uint kernargPoolSize_;
|
||||
uint signalPoolSize_;
|
||||
|
||||
//! Default constructor
|
||||
Settings();
|
||||
|
||||
//! Creates settings
|
||||
bool create(bool doublePrecision);
|
||||
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Settings(const Settings&);
|
||||
|
||||
//! Disable assignment
|
||||
Settings& operator=(const Settings&);
|
||||
|
||||
//! Overrides current settings based on registry/environment
|
||||
void override();
|
||||
};
|
||||
|
||||
/*@}*/} // namespace roc
|
||||
|
||||
#endif /*WITHOUT_HSA_BACKEND*/
|
||||
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -1,251 +0,0 @@
|
||||
//
|
||||
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "rocdevice.hpp"
|
||||
#include "utils/util.hpp"
|
||||
#include "hsa.h"
|
||||
#include "hsa_ext_image.h"
|
||||
#include "hsa_ext_finalize.h"
|
||||
#include "hsa_ext_amd.h"
|
||||
#include "rocprintf.hpp"
|
||||
|
||||
namespace roc {
|
||||
class Device;
|
||||
class Memory;
|
||||
class Timestamp;
|
||||
|
||||
struct ProfilingSignal : public amd::HeapObject
|
||||
{
|
||||
hsa_signal_t signal_; //!< HSA signal to track profiling information
|
||||
Timestamp* ts_; //!< Timestamp object associated with the signal
|
||||
|
||||
ProfilingSignal(): ts_(nullptr) { signal_.handle = 0; }
|
||||
};
|
||||
|
||||
// Timestamp for keeping track of some profiling information for various commands
|
||||
// including EnqueueNDRangeKernel and clEnqueueCopyBuffer.
|
||||
class Timestamp {
|
||||
private:
|
||||
uint64_t start_;
|
||||
uint64_t end_;
|
||||
ProfilingSignal* profilingSignal_;
|
||||
hsa_agent_t agent_;
|
||||
static double ticksToTime_;
|
||||
|
||||
public:
|
||||
uint64_t getStart() { checkGpuTime(); return start_; }
|
||||
|
||||
uint64_t getEnd() { checkGpuTime(); return end_; }
|
||||
|
||||
void setProfilingSignal(ProfilingSignal* signal) { profilingSignal_ = signal; }
|
||||
|
||||
const ProfilingSignal* getProfilingSignal() const { return profilingSignal_; }
|
||||
|
||||
void setAgent(hsa_agent_t agent) { agent_ = agent; }
|
||||
|
||||
Timestamp() : start_(0), end_(0), profilingSignal_(nullptr) {
|
||||
agent_.handle = 0;
|
||||
}
|
||||
|
||||
~Timestamp() {}
|
||||
|
||||
//! Finds execution ticks on GPU
|
||||
void checkGpuTime() {
|
||||
if (profilingSignal_ != nullptr) {
|
||||
hsa_amd_profiling_dispatch_time_t time;
|
||||
hsa_amd_profiling_get_dispatch_time(agent_, profilingSignal_->signal_, &time);
|
||||
start_ = time.start * ticksToTime_;
|
||||
end_ = time.end * ticksToTime_;
|
||||
profilingSignal_->ts_ = nullptr;
|
||||
profilingSignal_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Start a timestamp (get timestamp from OS)
|
||||
void start() {
|
||||
start_ = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
// End a timestamp (get timestamp from OS)
|
||||
void end() {
|
||||
end_ = amd::Os::timeNanos();
|
||||
}
|
||||
|
||||
static void setGpuTicksToTime(double ticksToTime) { ticksToTime_=ticksToTime; }
|
||||
static double getGpuTicksToTime() { return ticksToTime_; }
|
||||
};
|
||||
|
||||
class VirtualGPU : public device::VirtualDevice {
|
||||
public:
|
||||
//! Initial signal value
|
||||
static const hsa_signal_value_t InitSignalValue = 1;
|
||||
|
||||
class MemoryDependency : public amd::EmbeddedObject
|
||||
{
|
||||
public:
|
||||
//! Default constructor
|
||||
MemoryDependency()
|
||||
: memObjectsInQueue_(NULL)
|
||||
, numMemObjectsInQueue_(0)
|
||||
, maxMemObjectsInQueue_(0) {}
|
||||
|
||||
~MemoryDependency() { delete [] memObjectsInQueue_; }
|
||||
|
||||
//! Creates memory dependecy structure
|
||||
bool create(size_t numMemObj);
|
||||
|
||||
//! Notify the tracker about new kernel
|
||||
void newKernel() { endMemObjectsInQueue_ = numMemObjectsInQueue_; }
|
||||
|
||||
//! Validates memory object on dependency
|
||||
void validate(VirtualGPU& gpu, const Memory* memory, bool readOnly);
|
||||
|
||||
//! Clear memory dependency
|
||||
void clear(bool all = true);
|
||||
|
||||
private:
|
||||
struct MemoryState {
|
||||
uint64_t start_; //! Busy memory start address
|
||||
uint64_t end_; //! Busy memory end address
|
||||
bool readOnly_; //! Current GPU state in the queue
|
||||
};
|
||||
|
||||
MemoryState* memObjectsInQueue_; //!< Memory object state in the queue
|
||||
size_t endMemObjectsInQueue_; //!< End of mem objects in the queue
|
||||
size_t numMemObjectsInQueue_; //!< Number of mem objects in the queue
|
||||
size_t maxMemObjectsInQueue_; //!< Maximum number of mem objects in the queue
|
||||
};
|
||||
|
||||
VirtualGPU(Device &device);
|
||||
~VirtualGPU();
|
||||
|
||||
bool create(bool profilingEna);
|
||||
bool terminate();
|
||||
const Device& dev() const { return roc_device_; }
|
||||
|
||||
void profilingBegin(amd::Command &command, bool drmProfiling = false);
|
||||
void profilingEnd(amd::Command &command);
|
||||
|
||||
void updateCommandsState(amd::Command* list);
|
||||
|
||||
void submitReadMemory(amd::ReadMemoryCommand& cmd);
|
||||
void submitWriteMemory(amd::WriteMemoryCommand& cmd);
|
||||
void submitCopyMemory(amd::CopyMemoryCommand& cmd);
|
||||
void submitMapMemory(amd::MapMemoryCommand& cmd);
|
||||
void submitUnmapMemory(amd::UnmapMemoryCommand& cmd);
|
||||
void submitKernel(amd::NDRangeKernelCommand& cmd);
|
||||
bool submitKernelInternal(
|
||||
const amd::NDRangeContainer& sizes, //!< Workload sizes
|
||||
const amd::Kernel& kernel, //!< Kernel for execution
|
||||
const_address parameters, //!< Parameters for the kernel
|
||||
void *event_handle //!< Handle to OCL event for debugging
|
||||
);
|
||||
void submitNativeFn(amd::NativeFnCommand& cmd);
|
||||
void submitMarker(amd::Marker& cmd);
|
||||
|
||||
void submitAcquireExtObjects(amd::AcquireExtObjectsCommand& cmd);
|
||||
void submitReleaseExtObjects(amd::ReleaseExtObjectsCommand& cmd);
|
||||
void submitPerfCounter(amd::PerfCounterCommand& cmd){};
|
||||
|
||||
void flush(amd::Command* list = NULL, bool wait = false);
|
||||
void submitFillMemory(amd::FillMemoryCommand& cmd);
|
||||
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
|
||||
|
||||
// { roc OpenCL integration
|
||||
// Added these stub (no-ops) implementation of pure virtual methods,
|
||||
// when integrating HSA and OpenCL branches.
|
||||
// TODO: After inegration, whoever is working on VirtualGPU should write
|
||||
// actual implemention.
|
||||
virtual void submitSignal(amd::SignalCommand &cmd) {}
|
||||
virtual void submitMakeBuffersResident(amd::MakeBuffersResidentCommand &cmd) {}
|
||||
|
||||
virtual void submitSvmFreeMemory(amd::SvmFreeMemoryCommand& cmd);
|
||||
virtual void submitSvmCopyMemory(amd::SvmCopyMemoryCommand& cmd);
|
||||
virtual void submitSvmFillMemory(amd::SvmFillMemoryCommand& cmd);
|
||||
virtual void submitSvmMapMemory(amd::SvmMapMemoryCommand& cmd);
|
||||
virtual void submitSvmUnmapMemory(amd::SvmUnmapMemoryCommand& cmd);
|
||||
|
||||
void submitThreadTraceMemObjects(amd::ThreadTraceMemObjectsCommand &cmd) {}
|
||||
void submitThreadTrace(amd::ThreadTraceCommand &vcmd) {}
|
||||
|
||||
/**
|
||||
* @brief Waits on an outstanding kernel without regard to how
|
||||
* it was dispatched - with or without a signal
|
||||
*
|
||||
* @return bool true if Wait returned successfully, false
|
||||
* otherwise
|
||||
*/
|
||||
bool releaseGpuMemoryFence();
|
||||
|
||||
hsa_agent_t gpu_device() { return gpu_device_; }
|
||||
hsa_queue_t* gpu_queue() { return gpu_queue_; }
|
||||
|
||||
// Return pointer to PrintfDbg
|
||||
PrintfDbg* printfDbg() const {return printfdbg_;}
|
||||
|
||||
//! Returns memory dependency class
|
||||
MemoryDependency& memoryDependency() { return memoryDependency_; }
|
||||
|
||||
//! Detects memory dependency for HSAIL kernels and uses appropriate AQL header
|
||||
bool processMemObjects(
|
||||
const amd::Kernel& kernel, //!< AMD kernel object for execution
|
||||
const_address params //!< Pointer to the param's store
|
||||
);
|
||||
|
||||
// } roc OpenCL integration
|
||||
private:
|
||||
bool dispatchAqlPacket(
|
||||
hsa_kernel_dispatch_packet_t* packet, bool blocking = true);
|
||||
bool dispatchAqlPacket(
|
||||
hsa_barrier_and_packet_t* packet, bool blocking = true);
|
||||
template<typename AqlPacket> bool dispatchGenericAqlPacket(
|
||||
AqlPacket* packet, bool blocking);
|
||||
void dispatchBarrierPacket(const hsa_barrier_and_packet_t* packet);
|
||||
void initializeDispatchPacket(hsa_kernel_dispatch_packet_t* packet,
|
||||
amd::NDRangeContainer& sizes);
|
||||
|
||||
bool initPool(size_t kernarg_pool_size, uint signal_pool_count);
|
||||
void destroyPool();
|
||||
|
||||
void* allocKernArg(size_t size, size_t alignment);
|
||||
void resetKernArgPool() { kernarg_pool_cur_offset_ = 0; }
|
||||
|
||||
//! Updates AQL header for the upcomming dispatch
|
||||
void setAqlHeader(uint16_t header) { aqlHeader_ = header; }
|
||||
|
||||
/**
|
||||
* @brief Maintains the list of sampler allocated for one or more kernel
|
||||
* submissions.
|
||||
*/
|
||||
std::vector<hsa_ext_sampler_t> samplerList_;
|
||||
|
||||
/**
|
||||
* @brief Indicates if a kernel dispatch is outstanding. This flag is
|
||||
* used to synchronized on kernel outputs.
|
||||
*/
|
||||
bool hasPendingDispatch_;
|
||||
Timestamp* timestamp_;
|
||||
hsa_agent_t gpu_device_; //!< Physical device
|
||||
hsa_queue_t* gpu_queue_; //!< Queue associated with a gpu
|
||||
hsa_barrier_and_packet_t barrier_packet_;
|
||||
hsa_signal_t barrier_signal_;
|
||||
uint32_t dispatch_id_; //!< This variable must be updated atomically.
|
||||
Device& roc_device_; //!< roc device object
|
||||
void * tools_lib_;
|
||||
PrintfDbg* printfdbg_;
|
||||
MemoryDependency memoryDependency_; //!< Memory dependency class
|
||||
uint16_t aqlHeader_; //!< AQL header for dispatch
|
||||
|
||||
char* kernarg_pool_base_;
|
||||
size_t kernarg_pool_size_;
|
||||
uint kernarg_pool_cur_offset_;
|
||||
|
||||
std::vector<ProfilingSignal> signal_pool_; //!< Pool of signals for profiling
|
||||
|
||||
friend class Timestamp;
|
||||
};
|
||||
}
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir