b8c2ac4de4
Change-Id: Id7fbb638c1088c23df52fee877cd790d637b1ffb
182 baris
6.3 KiB
C++
182 baris
6.3 KiB
C++
/* Copyright (c) 2019 - 2021 Advanced Micro Devices, Inc.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE. */
|
|
|
|
#pragma once
|
|
|
|
#include "top.hpp"
|
|
#include "device/device.hpp"
|
|
#include "device/devhcmessages.hpp"
|
|
#include <cstddef>
|
|
|
|
#if defined(__clang__)
|
|
#if __has_feature(address_sanitizer)
|
|
#include "device/devurilocator.hpp"
|
|
#endif
|
|
#endif
|
|
|
|
namespace amd {
|
|
|
|
/** \file Support for invoking host services from the device.
|
|
*
|
|
* A hostcall is a fixed-size request generated by a kernel running
|
|
* on the device, for some predefined service provided by the
|
|
* host. The life-cycle of a hostcall is as follows:
|
|
*
|
|
* 1. A workitem in the some kernel dispatch submits a request as a
|
|
* "packet" in a "hostcall buffer". The workitem blocks until it
|
|
* receives a response from the host.
|
|
*
|
|
* 2. A host thread called the "hostcall listener" notices the packet
|
|
* and invokes the desired service on the host.
|
|
*
|
|
* 3. When the service completes, the listener copies the response
|
|
* into the request packet. This unblocks the workitem, and the
|
|
* hostcall is said to be completed.
|
|
*
|
|
* The hostcall listeners and buffers are managed by the VDI
|
|
* runtime. The typical flow is as follows:
|
|
*
|
|
* - Create and launch one or more hostcall listeners.
|
|
*
|
|
* - Create and initialize a distinct hostcall buffer for each
|
|
* command queue in hardware (e.g., an hsa_queue_t on ROCm).
|
|
*
|
|
* - Register this buffer with the appropriate listener.
|
|
*
|
|
* - When a buffer is no longer used, deregister and then free
|
|
* it. This usually happens when the corresponding hardware queue
|
|
* is freed.
|
|
*
|
|
* - Destroy the listener(s) when they are no longer required. This must be
|
|
* done before exiting the application, so that the listener
|
|
* threads can join() correctly.
|
|
*
|
|
* A single listener is sufficient to correctly handle all hostcall
|
|
* buffers created in the application. The client may also launch
|
|
* multiple listeners, as long the same hostcall buffer is not
|
|
* registered with multiple listeners.
|
|
*/
|
|
|
|
/** \brief Determine the buffer size to be allocated
|
|
* \param num_packets Number of packets to be supported.
|
|
* \return Required size, including any internal padding required for
|
|
* the packets and their headers.
|
|
*/
|
|
size_t getHostcallBufferSize(uint32_t num_packets);
|
|
|
|
/** \brief Return the required alignment for a hostcall buffer.
|
|
*/
|
|
uint32_t getHostcallBufferAlignment(void);
|
|
|
|
bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets);
|
|
void disableHostcalls(void* buffer);
|
|
|
|
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
|
|
|
|
/** \brief Packet payload
|
|
*
|
|
* Contains 64 slots of 8 ulongs each, one for each workitem in the
|
|
* wave. A slot with index \c i contains valid data if the
|
|
* corresponding bit in PacketHeader::activemask is set.
|
|
*/
|
|
struct Payload {
|
|
uint64_t slots[64][8];
|
|
};
|
|
|
|
/** Packet header */
|
|
struct PacketHeader {
|
|
/** Tagged pointer to the next packet in an intrusive stack */
|
|
uint64_t next_;
|
|
/** Bitmask that represents payload slots with valid data */
|
|
uint64_t activemask_;
|
|
/** Service ID requested by the wave */
|
|
uint32_t service_;
|
|
/** Control bits.
|
|
* \li 0: \c READY flag. Indicates packet awaiting a host response.
|
|
*/
|
|
std::atomic<uint32_t> control_;
|
|
};
|
|
|
|
static_assert(std::is_standard_layout<PacketHeader>::value,
|
|
"the hostcall packet must be useable from other languages");
|
|
|
|
/** Field offsets in the packet control field */
|
|
enum ControlOffset {
|
|
CONTROL_OFFSET_READY_FLAG = 0,
|
|
CONTROL_OFFSET_RESERVED0 = 1,
|
|
};
|
|
|
|
/** Field widths in the packet control field */
|
|
enum ControlWidth {
|
|
CONTROL_WIDTH_READY_FLAG = 1,
|
|
CONTROL_WIDTH_RESERVED0 = 31,
|
|
};
|
|
|
|
/** \brief Shared buffer submitting hostcall requests.
|
|
*
|
|
* Holds hostcall packets requested by all kernels executing on the
|
|
* same device queue. Each hostcall buffer is associated with at most
|
|
* one device queue.
|
|
*
|
|
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
|
|
* the ABA problem in lock-free stacks. The index_mask is used to extract the
|
|
* lower bits of the pointer, which form the index into the packet array. The
|
|
* remaining higher bits define a tag that is incremented on every pop from a
|
|
* stack.
|
|
*/
|
|
class HostcallBuffer {
|
|
/** Array of packet headers */
|
|
PacketHeader* headers_;
|
|
/** Array of packet payloads */
|
|
Payload* payloads_;
|
|
/** Signal used by kernels to indicate new work */
|
|
void* doorbell_;
|
|
/** Stack of free packets. Uses tagged pointers. */
|
|
uint64_t free_stack_;
|
|
/** Stack of ready packets. Uses tagged pointers */
|
|
std::atomic<uint64_t> ready_stack_;
|
|
/** Mask for accessing the packet index in the tagged pointer. */
|
|
uint64_t index_mask_;
|
|
/** Some services need a device**/
|
|
const amd::Device* device_;
|
|
|
|
PacketHeader* getHeader(uint64_t ptr) const;
|
|
Payload* getPayload(uint64_t ptr) const;
|
|
|
|
public:
|
|
void processPackets(MessageHandler& messages);
|
|
void initialize(uint32_t num_packets);
|
|
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
|
|
void setDevice(const amd::Device* dptr) { device_ = dptr; };
|
|
|
|
#if defined(__clang__)
|
|
#if __has_feature(address_sanitizer)
|
|
private:
|
|
device::UriLocator* uri_locator;
|
|
public:
|
|
void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; };
|
|
#endif
|
|
#endif
|
|
};
|
|
|
|
static_assert(std::is_standard_layout<HostcallBuffer>::value,
|
|
"the hostcall buffer must be useable from other languages");
|
|
|
|
}// namespace amd
|