wsl/hsakmt: move src/inc to include/impl

Signed-off-by: Flora Cui <flora.cui@amd.com>
Reviewed-by: Horatio Zhang <Hongkun.Zhang@amd.com>
Part-of: <http://10.67.69.192/wsl/rocr-runtime/-/merge_requests/15>
This commit is contained in:
Flora Cui
2024-12-05 18:02:00 +08:00
committed by Frank Min
melakukan 240dc71b91
12 mengubah file dengan 3145 tambahan dan 0 penghapusan
+1090
Melihat File
File diff ditekan karena terlalu besar Load Diff
+363
Melihat File
@@ -0,0 +1,363 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// This file is used only for open source cmake builds, if we hardcode the
// register values in amd_aql_queue.cpp then this file won't be required. For
// now we are using this file where register details are spelled out in the
// structs/unions below.
#ifndef _WSL_INC_REGISTERS_H_
#define _WSL_INC_REGISTERS_H_
typedef enum SQ_RSRC_BUF_TYPE {
SQ_RSRC_BUF = 0x00000000,
SQ_RSRC_BUF_RSVD_1 = 0x00000001,
SQ_RSRC_BUF_RSVD_2 = 0x00000002,
SQ_RSRC_BUF_RSVD_3 = 0x00000003,
} SQ_RSRC_BUF_TYPE;
typedef enum BUF_DATA_FORMAT {
BUF_DATA_FORMAT_INVALID = 0x00000000,
BUF_DATA_FORMAT_8 = 0x00000001,
BUF_DATA_FORMAT_16 = 0x00000002,
BUF_DATA_FORMAT_8_8 = 0x00000003,
BUF_DATA_FORMAT_32 = 0x00000004,
BUF_DATA_FORMAT_16_16 = 0x00000005,
BUF_DATA_FORMAT_10_11_11 = 0x00000006,
BUF_DATA_FORMAT_11_11_10 = 0x00000007,
BUF_DATA_FORMAT_10_10_10_2 = 0x00000008,
BUF_DATA_FORMAT_2_10_10_10 = 0x00000009,
BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a,
BUF_DATA_FORMAT_32_32 = 0x0000000b,
BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c,
BUF_DATA_FORMAT_32_32_32 = 0x0000000d,
BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e,
BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f,
} BUF_DATA_FORMAT;
typedef enum BUF_NUM_FORMAT {
BUF_NUM_FORMAT_UNORM = 0x00000000,
BUF_NUM_FORMAT_SNORM = 0x00000001,
BUF_NUM_FORMAT_USCALED = 0x00000002,
BUF_NUM_FORMAT_SSCALED = 0x00000003,
BUF_NUM_FORMAT_UINT = 0x00000004,
BUF_NUM_FORMAT_SINT = 0x00000005,
BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006,
BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006,
BUF_NUM_FORMAT_FLOAT = 0x00000007,
} BUF_NUM_FORMAT;
typedef enum BUF_FORMAT {
BUF_FORMAT_32_UINT = 0x00000014,
} BUF_FORMAT;
typedef enum SQ_SEL_XYZW01 {
SQ_SEL_0 = 0x00000000,
SQ_SEL_1 = 0x00000001,
SQ_SEL_RESERVED_0 = 0x00000002,
SQ_SEL_RESERVED_1 = 0x00000003,
SQ_SEL_X = 0x00000004,
SQ_SEL_Y = 0x00000005,
SQ_SEL_Z = 0x00000006,
SQ_SEL_W = 0x00000007,
} SQ_SEL_XYZW01;
union COMPUTE_TMPRING_SIZE {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 13;
unsigned int : 7;
#elif defined(BIGENDIAN_CPU)
unsigned int : 7;
unsigned int WAVESIZE : 13;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union COMPUTE_TMPRING_SIZE_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 15;
unsigned int : 5;
#elif defined(BIGENDIAN_CPU)
unsigned int : 5;
unsigned int WAVESIZE : 15;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union COMPUTE_TMPRING_SIZE_GFX12 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int WAVES : 12;
unsigned int WAVESIZE : 18;
unsigned int : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int : 2;
unsigned int WAVESIZE : 18;
unsigned int WAVES : 12;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD0 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int BASE_ADDRESS : 32;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD1 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : 16;
unsigned int STRIDE : 14;
unsigned int CACHE_SWIZZLE : 1;
unsigned int SWIZZLE_ENABLE : 1;
#elif defined(BIGENDIAN_CPU)
unsigned int SWIZZLE_ENABLE : 1;
unsigned int CACHE_SWIZZLE : 1;
unsigned int STRIDE : 14;
unsigned int BASE_ADDRESS_HI : 16;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD1_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int BASE_ADDRESS_HI : 16;
unsigned int STRIDE : 14;
unsigned int SWIZZLE_ENABLE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int SWIZZLE_ENABLE : 2;
unsigned int STRIDE : 14;
unsigned int BASE_ADDRESS_HI : 16;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD2 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int NUM_RECORDS : 32;
#elif defined(BIGENDIAN_CPU)
unsigned int NUM_RECORDS : 32;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD3 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int NUM_FORMAT : 3;
unsigned int DATA_FORMAT : 4;
unsigned int ELEMENT_SIZE : 2;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int ATC__CI__VI : 1;
unsigned int HASH_ENABLE : 1;
unsigned int HEAP : 1;
unsigned int MTYPE__CI__VI : 3;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int MTYPE__CI__VI : 3;
unsigned int HEAP : 1;
unsigned int HASH_ENABLE : 1;
unsigned int ATC__CI__VI : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int ELEMENT_SIZE : 2;
unsigned int DATA_FORMAT : 4;
unsigned int NUM_FORMAT : 3;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
union SQ_BUF_RSRC_WORD3_GFX10 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 7;
unsigned int RESERVED1 : 2;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int RESOURCE_LEVEL : 1;
unsigned int RESERVED2 : 3;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int RESERVED2 : 3;
unsigned int RESOURCE_LEVEL : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 2;
unsigned int FORMAT : 7;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
// From V# Table
union SQ_BUF_RSRC_WORD3_GFX11 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 6;
unsigned int RESERVED1 : 3;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int RESERVED2 : 4;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int RESERVED2 : 4;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 3;
unsigned int FORMAT : 6;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
// From V# Table
union SQ_BUF_RSRC_WORD3_GFX12 {
struct {
#if defined(LITTLEENDIAN_CPU)
unsigned int DST_SEL_X : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_W : 3;
unsigned int FORMAT : 6;
unsigned int RESERVED1 : 3;
unsigned int INDEX_STRIDE : 2;
unsigned int ADD_TID_ENABLE : 1;
unsigned int WRITE_COMPRESS_ENABLE : 1;
unsigned int COMPRESSION_EN : 1;
unsigned int COMPRESSION_ACCESS_MODE : 2;
unsigned int OOB_SELECT : 2;
unsigned int TYPE : 2;
#elif defined(BIGENDIAN_CPU)
unsigned int TYPE : 2;
unsigned int OOB_SELECT : 2;
unsigned int COMPRESSION_ACCESS_MODE : 2;
unsigned int COMPRESSION_EN : 1;
unsigned int WRITE_COMPRESS_ENABLE : 1;
unsigned int ADD_TID_ENABLE : 1;
unsigned int INDEX_STRIDE : 2;
unsigned int RESERVED1 : 3;
unsigned int FORMAT : 6;
unsigned int DST_SEL_W : 3;
unsigned int DST_SEL_Z : 3;
unsigned int DST_SEL_Y : 3;
unsigned int DST_SEL_X : 3;
#endif
} bitfields, bits;
unsigned int u32All;
signed int i32All;
float f32All;
};
#endif // header guard
+128
Melihat File
@@ -0,0 +1,128 @@
#ifndef _WSL_INC_THUNK_PROXY_H_
#define _WSL_INC_THUNK_PROXY_H_
#include <vector>
namespace thunk_proxy {
enum AllocDomain {
kSystem,
kLocal,
kUserMemory,
kUserQueue,
kDomainCount,
};
enum MemFlag {
kFineGrain = (1ULL << 0),
kKernarg = (1ULL << 1),
};
enum EngineFlag {
KCOMPUTE0 = (1ULL << 0),
KDRMDMA = (1ULL << 1),
KDRMDMA1 = (1ULL << 2),
};
enum SchedLevel {
kLow = 0,
kNormal = 1,
kHigh = 2,
};
enum AsicFamilyType {
kPlumBONITO,
kNavi44,
kNavi48
};
struct HwsInfo {
union {
struct {
uint32_t gfxHwsEnabled : 1;
uint32_t computeHwsEnabled : 1;
uint32_t dmaHwsEnabled : 1;
uint32_t dma1HwsEnabled : 1;
uint32_t reserved : 28;
} hwsMask;
uint32_t osHwsEnableFlags;
};
uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS
};
typedef struct {
int major;
int minor;
int stepping;
bool is_dgpu;
char product_name[MAX_PATH];
const char *uuid;
AsicFamilyType family;
uint32_t device_id;
uint32_t wavefront_size;
uint32_t compute_unit_count;
uint32_t max_engine_clock_mhz;
uint32_t watch_points_num;
uint32_t pci_bus_addr;
uint32_t memory_bus_width;
uint32_t max_memory_clock_mhz;
uint64_t gpu_counter_frequency;
uint32_t wave_per_cu;
uint32_t simd_per_cu;
uint32_t max_scratch_slots_per_cu;
uint32_t num_shader_engine;
uint32_t shader_array_per_shader_engine;
uint32_t domain;
uint32_t num_gws;
uint32_t asic_revision;
uint64_t local_visible_heap_size;
uint64_t local_invisible_heap_size;
uint64_t private_aperture_base;
uint64_t private_aperture_size;
uint64_t shared_aperture_base;
uint64_t shared_aperture_size;
uint32_t user_queue_size;
uint32_t lds_size;
uint32_t big_page_alignment_size;
uint32_t hw_big_page_min_alignment_size;
uint32_t hw_big_page_alignment_size;
bool enable_big_page_alignment;
uint32_t mec_fw_version;
uint32_t sdma_fw_version;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
uint32_t l3_cache_size;
uint32_t gl2_cacheline_size;
uint32_t num_cp_queues;
HwsInfo hwsInfo;
std::vector<int> sdma_schedid;
uint32_t compute_schedid;
bool state_shadowing_by_cpfw;
bool platform_atomic_support;
void *adapter_info;
void *adapter_ex_info;
} DeviceInfo;
int EngineOrdinal(int engine, DeviceInfo *device_info);
bool GetHwsEnabled(int engine, DeviceInfo *device_info);
bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info);
bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info);
bool QueryAdapterSupported(D3DKMT_HANDLE adapter);
uint32_t QueueEngine2EngineFlag(uint32_t queue_engine);
void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain,
uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info);
bool CreatePrivateAllocInfo(int num_handles, void **ppdrv_priv, void **ppalloc_priv,
int *pdrv_priv_data_size, int *palloc_priv_data_size);
void DestroyPrivateAllocInfo(void *drv_priv, void *alloc_priv);
int CreateSubmitPrivData(void **priv_data, D3DKMT_HANDLE queue, uint64_t command_addr,
uint64_t command_size, bool is_hw_queue);
int CreateHwQueuePrivData(void **priv_data, D3DKMT_HANDLE context,
bool FwManagedGfxState, SchedLevel level = kNormal);
int CreateContextPrivData(void **priv_data, bool FwManagedGfxState);
int CreatePowerOptPrivData(void **priv_data, bool restore);
int CreateCalibratedTimestampsPrivData(void **priv_data);
void QueryCalibratedTimestamps(void* priv, uint64_t* gpu, uint64_t* cpu);
void DestroyPrivData(void *priv_data);
}
#endif
+155
Melihat File
@@ -0,0 +1,155 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
#define _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
#include <stdint.h>
#include <no_sal2.h>
typedef uint32_t UINT, *UINT_PTR;
typedef int32_t INT32;
typedef int32_t LONG;
typedef uint32_t ULONG, *ULONG_PTR;
typedef int64_t LONGLONG;
typedef int64_t LONG64;
typedef uint64_t ULONGLONG;
typedef uint64_t ULONG64, *ULONG64_PTR;
typedef uint8_t BYTE;
typedef uint16_t WORD;
typedef uint32_t DWORD;
typedef int32_t BOOL;
typedef int32_t NTSTATUS;
typedef uint16_t USHORT;
typedef uint16_t UINT16;
typedef uint32_t UINT32;
typedef uint64_t UINT64;
typedef int32_t INT;
typedef uint64_t SIZE_T;
typedef void VOID;
typedef float FLOAT;
typedef char CHAR;
typedef unsigned char UCHAR;
typedef UCHAR BOOLEAN;
typedef int16_t WCHAR;
typedef void *HANDLE;
typedef void *PVOID;
typedef void *LPVOID;
typedef const int16_t *PCWSTR;
#define ULONG ULONG
#define ULONG_PTR ULONG_PTR
#define USHORT USHORT
#define DECLARE_HANDLE(name) struct name##__{int unused;}; typedef struct name##__ *name
#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1]
DECLARE_HANDLE(HWND);
DECLARE_HANDLE(HDC);
DECLARE_HANDLE(PALETTEENTRY);
typedef struct tagPOINT {
LONG x;
LONG y;
} POINT;
typedef struct tagRECT {
LONG left;
LONG top;
LONG right;
LONG bottom;
} RECT;
typedef struct tagRECTL {
LONG left;
LONG top;
LONG right;
LONG bottom;
} RECTL;
typedef union _LARGE_INTEGER {
struct {
DWORD LowPart;
DWORD HighPart;
} u;
LONGLONG QuadPart;
} LARGE_INTEGER;
typedef LARGE_INTEGER *PLARGE_INTEGER;
typedef struct _LUID {
ULONG LowPart;
LONG HighPart;
} LUID, *PLUID;
typedef enum _DEVICE_POWER_STATE {
PowerDeviceUnspecified = 0,
PowerDeviceD0,
PowerDeviceD1,
PowerDeviceD2,
PowerDeviceD3,
PowerDeviceMaximum
} DEVICE_POWER_STATE, *PDEVICE_POWER_STATE;
#define _Check_return_
#define APIENTRY
#define CONST const
#define IN
#define OUT
#define FAR
#define MAX_PATH 260
#define __stdcall
#ifndef GUID_DEFINED
#define GUID_DEFINED
typedef struct _GUID {
uint32_t Data1;
uint16_t Data2;
uint16_t Data3;
uint8_t Data4[ 8 ];
} GUID;
#endif
#include <guiddef.h>
#endif
+83
Melihat File
@@ -0,0 +1,83 @@
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */
#ifndef _WSL_INC_WDDM_CMD_UTIL_H_
#define _WSL_INC_WDDM_CMD_UTIL_H_
#include <string.h>
#include "hsa-runtime/inc/hsa.h"
#include "hsa-runtime/inc/amd_hsa_queue.h"
#include "hsa-runtime/inc/amd_hsa_kernel_code.h"
#include "impl/pm4_cmds.h"
#include "util/utils.h"
#include "libhsakmt.h"
namespace wsl {
namespace thunk {
struct DispatchInfo {
uint8_t major;
hsa_kernel_dispatch_packet_t *pPacket;
void *pEntry;
const amd_kernel_code_t *pKernelObject;
uint32_t ldsBlks;
amd_queue_t *pAmdQueue;
bool wave32;
uint32_t srd;
void *pScratchBase;
uint32_t scratchSizePerWave;
uint32_t scratchBaseOffset[2];
uint32_t offsetCnt;
};
class CmdUtil {
public:
CmdUtil() {};
~CmdUtil() {};
size_t BuildCopyData(
uint64_t *pDstAddr,
void *pBuffer,
uint32_t dstSel = dst_sel__mec_copy_data__tc_l2,
uint32_t dstCachePolicy = dst_cache_policy__mec_copy_data__stream,
uint32_t srcSel = src_sel__mec_copy_data__gpu_clock_count,
uint32_t srcCachePolicy = src_cache_policy__mec_copy_data__lru,
uint32_t countSel = count_sel__mec_copy_data__64_bits_of_data,
uint32_t wrConfirm = wr_confirm__mec_copy_data__wait_for_confirmation);
size_t BuildBarrier(
void *pBuffer,
uint32_t eventIndex = event_index__mec_event_write__cs_partial_flush,
uint32_t eventType = CS_PARTIAL_FLUSH);
size_t BuildWriteData64Command(
void *pBuffer,
uint64_t* write_addr,
uint64_t write_value);
size_t BuildAcquireMem(
uint8_t major,
void *pBuffer);
size_t BuildScratch(
void *pScratchBase,
void *pBuffer);
size_t BuildComputeShaderParams(
void *pBuffer);
size_t BuildDispatch(
struct DispatchInfo *pInfo,
void *pBuffer);
size_t BuildAtomicMem(
uint64_t *pAddr,
uint32_t atomic,
void *pBuffer,
uint32_t cachePolicy = cache_policy__mec_atomic_mem__stream,
uint64_t srcData = 1);
};
} // namespace thunk
} // namespace wsl
#endif
+257
Melihat File
@@ -0,0 +1,257 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_DEVICE_H_
#define _WSL_INC_WDDM_DEVICE_H_
#include <cassert>
#include <ntstatus.h>
#include <atomic>
#include <memory>
#include <vector>
#include "impl/wddm/types.h"
#include "impl/thunk_proxy/thunk_proxy.h"
#include "impl/wddm/va_mgr.h"
#include "impl/wddm/status.h"
#include "impl/wddm/types.h"
#include "impl/wddm/gpu_memory.h"
#include "impl/wddm/cmd_util.h"
namespace wsl {
namespace thunk {
//class Queue;
class WDDMQueue;
// WSL2 hyperv GPADL protocol limitation
#define MAX_USERPTR_BLOCK_SIZE 0xf0000000
#define START_NON_CANONICAL_ADDR (1ULL << 47)
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))
#define IS_OVERLAPPING(start1, size1, start2, size2) \
((start1 < (start2 + size2)) && (start2 < (start1 + size1)))
class WDDMDevice {
public:
static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB
WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid);
~WDDMDevice();
int Major() { return device_info_.major; }
int Minor() { return device_info_.minor; }
int Stepping() { return device_info_.stepping; }
bool IsDgpu() { return device_info_.is_dgpu; }
const char *ProductName() { return device_info_.product_name; }
const char *Uuid() { return device_info_.uuid; }
thunk_proxy::AsicFamilyType GfxFamily() { return device_info_.family; }
uint32_t DeviceId() { return device_info_.device_id; }
uint32_t WavefrontSize() { return device_info_.wavefront_size; }
uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; }
uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; }
uint32_t WatchPointsNum() { return device_info_.watch_points_num; }
uint32_t PciBusAddr() { return device_info_.pci_bus_addr; }
uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; }
uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; }
uint32_t WavePerCu() { return device_info_.wave_per_cu; }
uint32_t SimdPerCu() { return device_info_.simd_per_cu; }
uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; }
uint32_t NumShaderEngine() { return device_info_.num_shader_engine; }
uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; }
uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); }
uint32_t Domain() { return device_info_.domain; }
uint32_t NumGws() { return device_info_.num_gws; }
uint32_t AsicRevision() { return device_info_.asic_revision; }
uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; }
uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; }
uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; }
uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; }
uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; }
uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; }
uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; }
uint32_t LdsSize() { return device_info_.lds_size; }
uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; }
uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; }
uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; }
uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; }
uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; }
uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; }
uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; }
uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; }
bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; }
bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; }
uint32_t GetSdmaEngine(uint32_t idx) {
assert(idx < NumSdmaEngine());
return device_info_.sdma_schedid[idx];
}
uint32_t GetComputeEngine() { return device_info_.compute_schedid; }
uint64_t VramAvail();
void GetClockCounters(uint64_t *gpu, uint64_t *cpu);
uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; }
bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr);
void DestroySyncobj(D3DKMT_HANDLE handle);
bool CreateQueue(WDDMQueue *queue);
void DestroyQueue(WDDMQueue *queue);
bool CreateHwQueue(WDDMQueue *queue);
bool DestroyHwQueue(WDDMQueue *queue);
bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
uint64_t command_size, uint64_t fence_value);
bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
uint64_t command_size, uint64_t fence_value);
bool WaitPagingFence(WDDMQueue *queue) {
uint64_t value = page_fence_value_;
if (*page_fence_addr_ < value &&
!GpuWait(queue, &page_syncobj_, &value, 1))
return false;
return true;
}
bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
uint64_t *values, int count);
bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
uint64_t *value, int count);
bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
int count, bool wait_any);
bool WaitOnPagingFenceFromCpu();
uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt);
uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; }
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; }
static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; }
// Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory,
// return false by default
bool AllocUserQueueMemFromUMD(void) const { return false; }
bool IsHwsEnabled(int engine) {
return thunk_proxy::GetHwsEnabled(engine, &device_info_);
}
void UpdatePageFence(uint64_t fence_value);
D3DKMT_HANDLE PagingQueue() const { return page_queue_; }
D3DKMT_HANDLE PagingFence() const { return page_syncobj_; }
D3DKMT_HANDLE DeviceHandle() const { return device_; }
LUID GetLuid() const { return adapter_luid_; }
const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; }
ErrorCode ReserveGpuVirtualAddress(thunk_proxy::AllocDomain domain,
gpusize hit_base_addr,
gpusize size,
gpusize *out_gpu_virtual_addr,
gpusize alignment,
bool lock=false);
ErrorCode FreeGpuVirtualAddress(thunk_proxy::AllocDomain domain,
gpusize base_addr,
gpusize size);
ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem);
ErrorCode HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr);
void HandleApertureFree(gpusize gpu_addr);
private:
bool ParseDeviceInfo(void);
void DestroyDeviceInfo(void);
bool CreateDevice(void);
bool DestroyDevice(void);
bool CreatePagingQueue(void);
bool DestroyPagingQueue(void);
void *Lock(D3DKMT_HANDLE handle);
bool Unlock(D3DKMT_HANDLE handle);
bool CreateContext(int engine, D3DKMT_HANDLE *handle);
bool DestroyContext(D3DKMT_HANDLE handle);
void SetPowerOptimization(bool restore);
void InitCmdbufInfo(void);
bool ReserveSystemHeapSpace(void);
bool FreeSystemHeapSpace(void);
bool ReserveLocalHeapSpace(void);
bool InitHandleApertureSpace(void);
bool CommitSystemHeapSpace(void* addr, int64_t size, bool lock=false);
bool DecommitSystemHeapSpace(void* addr, int64_t size);
bool FreeLocalHeapSpace(void);
void InitVaMgr();
void InitHandleApertureMgr();
D3DKMT_HANDLE adapter_;
LUID adapter_luid_;
D3DKMT_HANDLE device_;
D3DKMT_HANDLE page_queue_;
D3DKMT_HANDLE page_syncobj_;
uint64_t *page_fence_addr_;
std::atomic<uint64_t> page_fence_value_;
uint64_t handle_aperture_start_;
uint64_t handle_aperture_size_;
uint64_t local_heap_space_start_;
uint64_t local_heap_space_size_;
uint64_t system_heap_space_start_;
uint64_t system_heap_space_size_;
uint32_t cmdbuf_size_;
uint32_t cmdbuf_aql_frame_size_;
static const uint32_t cmdbuf_aql_frame_num_;
// device info
thunk_proxy::DeviceInfo device_info_;
std::unique_ptr<VaMgr> local_va_mgr_;
std::unique_ptr<VaMgr> handle_aperture_mgr_;
//CmdUtil cmd_util;
};
NTSTATUS WDDMGetAdapters(D3DKMT_ADAPTERINFO *&adapters, int &num_adapters);
} // namespace thunk
} // namespace wsl
#endif
+227
Melihat File
@@ -0,0 +1,227 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_GPU_MEMORY_H_
#define _WSL_INC_WDDM_GPU_MEMORY_H_
#include <cstddef>
#include <cstdint>
#include "util/utils.h"
#include "impl/wddm/types.h"
#include "impl/wddm/thunks.h"
#include "impl/thunk_proxy/thunk_proxy.h"
namespace wsl {
namespace thunk {
class WDDMDevice;
union GpuMemoryCreateFlags {
struct {
uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer
uint64_t physical_only : 1; // only allocate physical buffer, without virutal address
uint64_t interprocess : 1; // physical buffer need share info between exporter and importer
uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area
uint64_t physical_contiguous : 1; // contiguous physical pages
uint64_t unused : 59;
};
uint64_t reserved;
};
struct GpuMemoryCreateInfo {
GpuMemoryCreateInfo() {
flags.reserved = 0;
domain = thunk_proxy::kLocal;
size = 0;
alignment = 0;
mem_flags = 0;
engine_flag = 0;
va_hint = 0;
user_ptr = nullptr;
dmabuf_fd = -1;
}
GpuMemoryCreateFlags flags;
thunk_proxy::AllocDomain domain;
gpusize size;
gpusize alignment;
int mem_flags;
int engine_flag;
int dmabuf_fd; // Import from dmabuf
void *user_ptr;
gpusize va_hint;
};
struct GpuMemoryDesc {
GpuMemoryDesc() {
gpu_addr = 0;
cpu_addr = nullptr;
client_size = 0;
size = alignment = 0;
flags.reserved = 0;
mem_flags = 0;
engine_flag = 0;
handle_ape_addr = 0;
}
thunk_proxy::AllocDomain domain;
LUID adapter_luid; // Where is the backing store location
gpusize gpu_addr;
void *cpu_addr;
gpusize client_size; // user request size
gpusize size;
gpusize alignment;
gpusize handle_ape_addr;
union {
struct {
uint32_t is_virtual : 1;
uint32_t is_shared : 1;
uint32_t is_external : 1;
uint32_t is_physical_only : 1;
uint32_t is_locked : 1;
uint32_t is_queue_referenced : 1;
uint32_t is_physical_contiguous : 1;
uint32_t unused : 25;
};
uint32_t reserved;
} flags;
int mem_flags;
int engine_flag;
};
struct SharedHandleInfo {
thunk_proxy::AllocDomain domain;
LUID adapter_luid;
gpusize client_size; // user request size
uint64_t size;
uint32_t flags;
int mem_flags;
};
using GpuMemoryHandle = void *;
class GpuMemory {
public:
static size_t CalcChunkNumbers(gpusize size);
ErrorCode Init(const GpuMemoryCreateInfo &create_info);
WDDMDevice *GetDevice() const { return device_; }
gpusize Size() const { return desc_.size; }
gpusize ClientSize() const { return desc_.client_size; }
uint64_t GpuAddress() const { return desc_.gpu_addr; }
void *CpuAddress() const { return desc_.cpu_addr; }
uint64_t HandleApeAddress() const { return desc_.handle_ape_addr; }
inline bool IsLocal() const { return desc_.domain == thunk_proxy::kLocal; }
inline bool IsUserMemory() const { return desc_.domain == thunk_proxy::kUserMemory; }
inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; }
inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; }
inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; }
inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; }
inline bool IsVirtual() const { return desc_.flags.is_virtual; }
inline bool IsShared() const { return desc_.flags.is_shared; }
inline bool IsExternal() const { return desc_.flags.is_external; }
inline uint32_t Flags() const { return desc_.flags.reserved; }
inline int GetAllocInfo() const { return desc_.mem_flags; }
inline bool IsFineGrain() const { return (desc_.mem_flags & thunk_proxy::kFineGrain); }
inline bool IsSameAdapter(const LUID &luid) const {
return (desc_.adapter_luid.HighPart == luid.HighPart &&
desc_.adapter_luid.LowPart == luid.LowPart);
}
inline void GetQueueReference() { desc_.flags.is_queue_referenced = 1; }
inline void PutQueueReference() { desc_.flags.is_queue_referenced = 0; }
inline bool IsQueueReferenced() const { return desc_.flags.is_queue_referenced; }
WinAllocationHandle GetAllocationHandle(size_t index) const { return alloc_handles_ptr_[index]; }
size_t NumChunks() const { return num_allocations_; }
const GpuMemoryHandle GetGpuMemoryHandle() const {
return reinterpret_cast<GpuMemoryHandle>(const_cast<GpuMemory*>(this));
}
static GpuMemory *Convert(GpuMemoryHandle handle) { return reinterpret_cast<GpuMemory *>(handle); }
ErrorCode ReserveGpuVirtualAddress(gpusize base_virt_addr, gpusize va_size, gpusize alignment);
ErrorCode FreeGpuVirtualAddress(gpusize va_start_address, gpusize va_size);
ErrorCode MapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
ErrorCode UnmapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
ErrorCode MakeResident();
ErrorCode Evict();
ErrorCode ExportPhysicalHandle(int* dmabuf_fd, uint32_t flags = SHARED_ALLOCATION_ALL_ACCESS);
ErrorCode ImportPhysicalHandle(int dmabuf_fd);
~GpuMemory();
protected:
explicit GpuMemory(WDDMDevice *device);
private:
ErrorCode CreatePhysicalMemory();
ErrorCode FreePhysicalMemory();
uint64_t AdjustSize(gpusize size) const;
private:
friend class WDDMDevice;
WDDMDevice *const device_;
GpuMemoryDesc desc_;
size_t num_allocations_;
WinAllocationHandle *alloc_handles_ptr_;
WinAllocationHandle alloc_handle_; // Optimization for num_allocations_ is 1
WinResourceHandle resource_; // Handle to a resource object that wraps the allocation. Used for shared resources
DISALLOW_COPY_AND_ASSIGN(GpuMemory);
};
} // namespace thunk
} // namespace wsl
#endif
+363
Melihat File
@@ -0,0 +1,363 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_QUEUE_H_
#define _WSL_INC_WDDM_QUEUE_H_
#include <cinttypes>
#include <condition_variable>
#include <iostream>
#include <queue>
#include <utility>
#include "impl/wddm/types.h"
#include "impl/wddm/device.h"
#include "impl/wddm/gpu_memory.h"
#include "hsa-runtime/inc/hsa_ext_amd.h"
#include "hsa-runtime/inc/amd_hsa_queue.h"
#include "hsa-runtime/inc/amd_hsa_signal.h"
#include "impl/wddm/cmd_util.h"
namespace wsl {
namespace thunk {
class Queue;
class WDDMDevice;
class WDDMQueue {
public:
WDDMQueue(WDDMDevice *device,
uint64_t cmdbuf_addr,
uint32_t cmdbuf_size,
uint32_t engine,
bool use_hws = true) :
device(device),
context(0),
queue(0),
syncobj(0),
sync_addr(NULL),
cmdbuf(0),
cmdbuf_addr(cmdbuf_addr),
cmdbuf_size(cmdbuf_size),
queue_engine(engine),
use_hws(use_hws),
prio(thunk_proxy::kNormal) {
}
virtual ~WDDMQueue() { }
virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; }
virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; }
virtual void RingDoorbell() { }
virtual void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
hsa_status_t SwsInit(void);
hsa_status_t SwsFini(void);
hsa_status_t SwsSubmit(uint64_t command_addr,
uint64_t command_size,
uint64_t fence_value);
hsa_status_t HwsInit(void);
hsa_status_t HwsFini(void);
hsa_status_t HwsSubmit(uint64_t command_addr,
uint64_t command_size,
uint64_t fence_value);
hsa_status_t SetPriority(hsa_amd_queue_priority_t priority);
uint64_t *GetSyncAddr(void) const { return sync_addr; }
uint64_t GetCmdbufAddr(void) const { return cmdbuf_addr; }
thunk_proxy::SchedLevel ConvertSchedLevel(hsa_amd_queue_priority_t prio) const {
switch (prio) {
case HSA_AMD_QUEUE_PRIORITY_LOW:
return thunk_proxy::kLow;
case HSA_AMD_QUEUE_PRIORITY_HIGH:
return thunk_proxy::kHigh;
case HSA_AMD_QUEUE_PRIORITY_NORMAL:
default:
return thunk_proxy::kNormal;
}
}
WDDMDevice *device;
D3DKMT_HANDLE context;
D3DKMT_HANDLE queue;
D3DKMT_HANDLE syncobj;
uint64_t *sync_addr;
GpuMemoryHandle cmdbuf;
uint64_t cmdbuf_addr;
uint32_t cmdbuf_size;
GpuMemoryHandle queue_mem;
uint64_t queue_addr;
uint32_t queue_engine;
bool use_hws;
thunk_proxy::SchedLevel prio;
};
class ComputeQueue : public WDDMQueue {
public:
ComputeQueue(WDDMDevice *device,
void *ring,
uint64_t ring_size,
std::atomic<uint64_t> *ring_wptr,
std::atomic<uint64_t> *ring_rptr,
volatile int64_t *error_addr,
uint32_t cmdbuf_size,
uint32_t engine,
bool use_hws = true);
~ComputeQueue();
virtual hsa_status_t Init(void);
virtual hsa_status_t Fini(void);
virtual hsa_status_t Submit(void);
void* GetRing(void) const { return ring; }
uint64_t GetRingSize(void) const { return ring_size; }
std::atomic<uint64_t>* GetRingWptr(void) const { return ring_wptr; }
std::atomic<uint64_t>* GetRingRptr(void) const { return ring_rptr; }
uint64_t GetAqlWriteIndex(void) const { return cmdbuf_aql_frame_write_index; }
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size; }
void* GetHsaQueueAddr(void) const { return ring; }
bool IsInvalidPacket(void) const {
uint16_t *packet = (uint16_t *)((char *)ring +
(cmdbuf_aql_frame_write_index % ring_size) * 64);
return ((*packet >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1))
== HSA_PACKET_TYPE_INVALID;
}
hsa_status_t Process(void);
uint64_t * GetDoorbellPtr() const { return (uint64_t *)&doorbell_signal_value_; }
void RingDoorbell();
private:
hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet);
hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false);
struct amd_aql_pm4_ib {
uint16_t header;
uint16_t ven_hdr;
uint32_t ib_jump_cmd[4];
uint32_t dw_cnt_remain;
uint32_t reserved[8];
hsa_signal_t completion_signal;
};
hsa_status_t VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *packet);
hsa_status_t SwitchAql2PM4(void);
hsa_status_t PreSubmit(void);
hsa_status_t EndSubmit(void);
void *ring;
uint64_t ring_size;
std::atomic<uint64_t> *ring_wptr;
std::atomic<uint64_t> *ring_rptr;
// ib_start_addr is the current ib start address
uint64_t ib_start_addr;
// ib_size is the current ib size.
uint64_t ib_size;
// record the last submitted aql frame write index
uint64_t sync_point;
uint64_t cmdbuf_aql_frame_write_index;
uint32_t cmdbuf_aql_frame_size;
uint64_t *signal_addr_;
bool platform_atomic_support_;
bool needs_barrier;
bool ready_to_submit;
CmdUtil cmd_util;
private:
bool EnableProfiling() {
return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING);
}
void HandleError(hsa_status_t status);
bool UpdateScratch(uint32_t private_segment_size, bool wave32);
uint32_t UpdateIndexStride(uint32_t srd, bool wave32);
void *ScratchBase() { return scratch_base_; }
void AppendCmdbufSratchBaseOffset(int offset) {
scratch_base_offset_array_.push_back(offset);
}
bool RelocateCmdbufScratchBase(uint64_t addr);
uint32_t ScratchSizePerWave() { return scratch_size_per_wave_; }
uint64_t GetKernelObjAddr(uint64_t addr) const;
void InitScratchSRD();
GpuMemoryHandle amd_queue_mem_;
amd_queue_t *amd_queue_;
amd_queue_t *amd_queue_rocr_;
uint64_t doorbell_signal_value_;
volatile std::atomic<int64_t> *error_code_;
std::thread aql_to_pm4_thread_;
bool thread_stop_;
std::mutex thread_cond_lock_;
std::condition_variable thread_cond_;
static void AqlToPm4Thread(ComputeQueue *queue);
uint32_t scratch_waves_;
uint32_t scratch_size_per_wave_;
uint32_t scratch_size_;
void *scratch_base_;
GpuMemoryHandle scratch_mem_;
std::vector<int> scratch_base_offset_array_;
};
class SDMAQueue : public WDDMQueue {
public:
SDMAQueue(WDDMDevice *device,
void *ring,
uint64_t cmdbuf_size,
uint32_t engine,
bool use_hws = true);
virtual ~SDMAQueue();
hsa_status_t Init(void);
hsa_status_t Fini(void);
hsa_status_t Submit(void);
int PreparePacket(uint32_t offset, uint64_t size);
void WaitQueue(void) {
device->CpuWait(&syncobj, &rptr_next, 1, false);
}
uint64_t * GetRingWptr(void) { return &wptr_next_; }
uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); }
uint64_t * GetDoorbellPtr() { return &doorbell_; }
void RingDoorbell();
void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
private:
uint64_t wptr_next_;
uint64_t wptr_pre_;
uint64_t rptr_next;
uint64_t doorbell_;
std::vector<std::pair<uint64_t, uint64_t>> wptr_queue_;
uint64_t ib_size;
uint64_t ib_start_addr;
std::thread thread_;
bool thread_stop_;
std::mutex thread_cond_lock_;
std::condition_variable thread_cond_;
static void SdmaThread(SDMAQueue *queue);
struct SDMA_PKT_POLL_REGMEM {
union {
struct {
unsigned int op : 8;
unsigned int sub_op : 8;
unsigned int reserved_0 : 10;
unsigned int hdp_flush : 1;
unsigned int reserved_1 : 1;
unsigned int func : 3;
unsigned int mem_poll : 1;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union {
struct {
unsigned int addr_31_0 : 32;
};
unsigned int DW_1_DATA;
} ADDR_LO_UNION;
union {
struct {
unsigned int addr_63_32 : 32;
};
unsigned int DW_2_DATA;
} ADDR_HI_UNION;
union {
struct {
unsigned int value : 32;
};
unsigned int DW_3_DATA;
} VALUE_UNION;
union {
struct {
unsigned int mask : 32;
};
unsigned int DW_4_DATA;
} MASK_UNION;
union {
struct {
unsigned int interval : 16;
unsigned int retry_count : 12;
unsigned int reserved_0 : 4;
};
unsigned int DW_5_DATA;
} DW5_UNION;
};
const unsigned int SDMA_OP_POLL_REGMEM = 8;
bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) {
return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM &&
pkt->HEADER_UNION.mem_poll == 1 &&
pkt->HEADER_UNION.func == 3;
}
uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); }
};
} // namespace thunk
} // namespace wsl
#endif
+60
Melihat File
@@ -0,0 +1,60 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_STATUS_H
#define _WSL_INC_WDDM_STATUS_H
enum class ErrorCode {
Success,
DeviceLost,
UnSupported,
NotReady,
OutOfMemory,
OutOfGpuMemory,
OutOfHandleApeMemory,
Timeout,
SyscallFail,
InvalidateParams,
Unknown,
};
#endif
+232
Melihat File
@@ -0,0 +1,232 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_THUNKS_H
#define _WSL_INC_WDDM_THUNKS_H
#include "impl/wddm/status.h"
#include "impl/wddm/types.h"
namespace wsl {
namespace thunk {
inline ErrorCode TranslateNtStatus(NTSTATUS status) {
switch (status) {
case STATUS_SUCCESS:
return ErrorCode::Success;
case STATUS_PENDING:
return ErrorCode::NotReady;
case STATUS_NO_MEMORY:
return ErrorCode::OutOfMemory;
case STATUS_DEVICE_REMOVED:
return ErrorCode::DeviceLost;
case STATUS_GRAPHICS_NO_VIDEO_MEMORY:
return ErrorCode::OutOfGpuMemory;
case STATUS_TIMEOUT:
return ErrorCode::Timeout;
case STATUS_INVALID_PARAMETER:
return ErrorCode::InvalidateParams;
default:
break;
}
return ErrorCode::Unknown;
}
namespace d3dthunk {
typedef D3DKMT_CREATEALLOCATION CreateAllocationArgs;
typedef D3DKMT_CREATECONTEXT CreateContextArgs;
typedef D3DKMT_CREATECONTEXTVIRTUAL CreateContextVirtualArgs;
typedef D3DKMT_CREATEPAGINGQUEUE CreatePagingQueueArgs;
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT CreateSynchronizationObjectArgs;
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject2Args;
typedef D3DKMT_ESCAPE EscapeArgs;
typedef D3DKMT_EVICT EvictArgs;
typedef D3DKMT_FREEGPUVIRTUALADDRESS FreeGpuVirtualAddressArgs;
typedef D3DKMT_LOCK LockArgs;
typedef D3DKMT_LOCK2 Lock2Args;
typedef D3DKMT_OPENRESOURCE OpenResourceArgs;
typedef D3DKMT_OPENRESOURCEFROMNTHANDLE OpenResourceFromNtHandleArgs;
typedef D3DKMT_QUERYADAPTERINFO QueryAdapterInfoArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT SignalSynchronizationObjectArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 SignalSynchronizationObject2Args;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU SignalSynchronizationObjectFromCpuArgs;
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU2 SignalSynchronizationObjectFromGpuArgs;
typedef D3DKMT_SUBMITCOMMAND SubmitCommandArgs;
typedef D3DKMT_UNLOCK UnlockArgs;
typedef D3DKMT_UNLOCK2 Unlock2Args;
typedef D3DKMT_UPDATEGPUVIRTUALADDRESS UpdateGpuVirtualAddressArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT WaitForSynchronizationObjectArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT2 WaitForSynchronizationObject2Args;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU WaitForSynchronizationObjectFromCpuArgs;
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitForSynchronizationObjectFromGpuArgs;
typedef D3DKMT_ACQUIREKEYEDMUTEX AcquireKeyedMutexArgs;
typedef D3DKMT_RELEASEKEYEDMUTEX ReleaseKeyedMutexArgs;
typedef D3DKMT_OPENKEYEDMUTEX OpenKeyedMutexArgs;
typedef D3DKMT_DESTROYKEYEDMUTEX DestroyKeyedMutexArgs;
typedef D3DKMT_QUERYVIDEOMEMORYINFO QueryVideoMemoryInfoArgs;
typedef D3DKMT_CREATEHWQUEUE CreateHwQueueArgs;
typedef D3DKMT_DESTROYHWQUEUE DestroyHwQueueArgs;
typedef D3DKMT_SUBMITCOMMANDTOHWQUEUE SubmitCommandToHwQueueArgs;
typedef D3DKMT_SUBMITPRESENTTOHWQUEUE SubmitPresentToHwQueueArgs;
typedef D3DKMT_SUBMITSIGNALSYNCOBJECTSTOHWQUEUE SubmitSignalSyncObjectsToHwQueueArgs;
typedef D3DKMT_SUBMITWAITFORSYNCOBJECTSTOHWQUEUE SubmitWaitForSyncObjectsToHwQueueArgs;
typedef D3DKMT_CREATESYNCFILE CreateSyncFileArgs;
inline ErrorCode MapGpuVirtualAddress(D3DDDI_MAPGPUVIRTUALADDRESS *args) {
return TranslateNtStatus(D3DKMTMapGpuVirtualAddress(args));
}
inline ErrorCode CreateAllocation(CreateAllocationArgs *args) {
return TranslateNtStatus(D3DKMTCreateAllocation2(args));
}
inline ErrorCode DestroyAllocation(
WinDeviceHandle device,
WinResourceHandle resource,
size_t num_allocations,
const WinAllocationHandle *alloc_handles) {
D3DKMT_DESTROYALLOCATION2 args{};
memset(&args, 0, sizeof(args));
args.hDevice = device;
if (resource) {
args.hResource = resource;
} else {
args.phAllocationList = alloc_handles;
args.AllocationCount = num_allocations;
}
return TranslateNtStatus(D3DKMTDestroyAllocation2(&args));
}
inline ErrorCode ReserveGpuVirtualAddress(D3DDDI_RESERVEGPUVIRTUALADDRESS *args) {
return TranslateNtStatus(D3DKMTReserveGpuVirtualAddress(args));
}
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
gpusize size,
gpusize base_address,
gpusize *out_addr) {
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
args.hPagingQueue = handle;
args.Size = size;
args.BaseAddress = base_address;
auto code = ReserveGpuVirtualAddress(&args);
if (code == ErrorCode::Success)
*out_addr = args.VirtualAddress;
return code;
}
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
gpusize size,
gpusize minimum_address,
gpusize maximum_address,
gpusize *out_addr) {
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
args.hPagingQueue = handle;
args.Size = size;
args.MinimumAddress = minimum_address;
args.MaximumAddress = maximum_address;
auto code = ReserveGpuVirtualAddress(&args);
if (code == ErrorCode::Success)
*out_addr = args.VirtualAddress;
return code;
}
inline ErrorCode FreeGpuVirtualAddress(FreeGpuVirtualAddressArgs *args) {
return TranslateNtStatus(D3DKMTFreeGpuVirtualAddress(args));
}
inline ErrorCode FreeGpuVirtualAddress(WinAdapterHandle handle,
gpusize base_address,
gpusize size) {
FreeGpuVirtualAddressArgs args{};
args.hAdapter = handle;
args.Size = size;
args.BaseAddress = base_address;
return FreeGpuVirtualAddress(&args);
}
inline ErrorCode MakeResident(D3DDDI_MAKERESIDENT *args) {
return TranslateNtStatus(D3DKMTMakeResident(args));
}
inline ErrorCode Evict(EvictArgs *args) {
return TranslateNtStatus(D3DKMTEvict(args));
}
inline ErrorCode ShareObjects(size_t num_allocations,
WinResourceHandle resource,
uint32_t flags,
int* dmabuf_fd) {
OBJECT_ATTRIBUTES obj_attr;
HANDLE nt_handle;
ErrorCode ret;
InitializeObjectAttributes(&obj_attr, nullptr, OBJ_INHERIT, nullptr, nullptr);
ret = TranslateNtStatus(D3DKMTShareObjects(num_allocations,
&resource, &obj_attr, flags, &nt_handle));
if (ret == ErrorCode::Success)
*dmabuf_fd = *(reinterpret_cast<int*>(&nt_handle));
else
*dmabuf_fd = -1;
return ret;
}
inline ErrorCode QueryResourceInfoFromNtHandle(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *args) {
return TranslateNtStatus(D3DKMTQueryResourceInfoFromNtHandle(args));
}
inline ErrorCode OpenResourceFromNtHandle(D3DKMT_OPENRESOURCEFROMNTHANDLE *args) {
return TranslateNtStatus(D3DKMTOpenResourceFromNtHandle(args));
}
} // namespace d3dthunk
} // namespace thunk
} // namespace wsl
#endif
+101
Melihat File
@@ -0,0 +1,101 @@
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef _WSL_INC_WDDM_TYPES_H_
#define _WSL_INC_WDDM_TYPES_H_
#include <cstdint>
#include <ntstatus.h>
#include "impl/thunk_proxy/wddm_types.h"
// windows wchar is 16bit, but linux is 32bit
// seems libdxcore (not dxgkrnl.ko) convert thunk windows wchar to linux one
// so only accept 32bit wchar args. note driver private data structure still
// use 16bit wchar
#define WCHAR wchar_t
#define PCWSTR const wchar_t *
#include <d3dkmthk.h>
#undef WCHAR
#undef PCWSTR
using gpusize = uint64_t; // Used to specify GPU addresses and sizes of GPU allocations
using WinAllocationHandle = D3DKMT_HANDLE;
using WinResourceHandle = D3DKMT_HANDLE;
using WinContextHandle = D3DKMT_HANDLE;
using WinDeviceHandle = D3DKMT_HANDLE;
using WinAdapterHandle = D3DKMT_HANDLE;
//reference dk/winnt.h
#define STANDARD_RIGHTS_REQUIRED (0x000F0000L)
//reference dk/ntdef.h
#define OBJ_INHERIT (0x00000002L)
typedef WCHAR *PWCHAR, *LPWCH, *PWCH;
typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
#ifdef MIDL_PASS
[size_is(MaximumLength / 2), length_is((Length) / 2) ] USHORT * Buffer;
#else // MIDL_PASS
_Field_size_bytes_part_opt_(MaximumLength, Length) PWCH Buffer;
#endif // MIDL_PASS
} UNICODE_STRING;
typedef UNICODE_STRING *PUNICODE_STRING;
typedef const UNICODE_STRING *PCUNICODE_STRING;
typedef struct _OBJECT_ATTRIBUTES {
ULONG Length;
HANDLE RootDirectory;
PUNICODE_STRING ObjectName;
ULONG Attributes;
PVOID SecurityDescriptor;
PVOID SecurityQualityOfService;
} OBJECT_ATTRIBUTES;
#define InitializeObjectAttributes( p, n, a, r, s ) { \
(p)->Length = sizeof( OBJECT_ATTRIBUTES ); \
(p)->RootDirectory = r; \
(p)->Attributes = a; \
(p)->ObjectName = n; \
(p)->SecurityDescriptor = s; \
(p)->SecurityQualityOfService = NULL; \
}
#endif
+86
Melihat File
@@ -0,0 +1,86 @@
#ifndef _WSL_INC_WDDM_VA_MGR_H_
#define _WSL_INC_WDDM_VA_MGR_H_
#include <mutex>
#include <map>
#include "util/utils.h"
namespace wsl {
namespace thunk {
class VaMgr {
public:
VaMgr(uint64_t start, uint64_t size, uint64_t min_align);
~VaMgr();
/* Allocate `bytes` VA, if `align` is not zero, the returned address is aligned by `align`.
* If `addr` parameter is not zero, try best to allocate VA from fixed address `addr`.
*/
uint64_t Alloc(uint64_t bytes, uint64_t align, uint64_t addr = 0);
void Free(uint64_t addr);
private:
uint64_t AllocImpl(uint64_t bytes, uint64_t align);
struct Fragment {
using ptr = std::multimap<uint64_t, uint64_t>::iterator;
ptr free_list_entry_;
struct {
uint64_t size : 63;
bool is_free : 1;
};
Fragment() : size(0), is_free(false) {}
Fragment(ptr iterator, uint64_t len, bool is_free)
: free_list_entry_(iterator), size(len), is_free(is_free) {}
};
static inline Fragment make_fragment(typename Fragment::ptr iter, uint64_t len) {
return {iter, len, true};
}
inline Fragment make_fragment(uint64_t len) { return {free_list_.end(), len, false}; }
static inline bool is_free(const Fragment& f) { return f.is_free; }
void set_used(Fragment& f) {
f.is_free = false;
f.free_list_entry_ = free_list_.end();
}
static void set_free(Fragment& f, typename Fragment::ptr iter) {
f.free_list_entry_ = iter;
f.is_free = true;
}
inline void remove_free_list_entry(Fragment& frag) {
if (frag.free_list_entry_ != free_list_.end()) {
free_list_.erase(frag.free_list_entry_);
frag.free_list_entry_ = free_list_.end();
}
}
inline void add_free_fragment(uint64_t size, uint64_t base) {
auto it = free_list_.insert(std::make_pair(size, base));
frag_map_[base] = make_fragment(it, size);
}
inline void add_used_fragment(uint64_t size, uint64_t base) {
frag_map_[base] = make_fragment(size);
}
// Indexed by size
std::multimap<uint64_t, uint64_t> free_list_;
// Indexed by VA, each fragment has no overlap
std::map<uint64_t, Fragment> frag_map_;
uint64_t min_align_;
std::mutex lock_; // Mutex protecting allocation and free of va
DISALLOW_COPY_AND_ASSIGN(VaMgr);
};
} // namespace thunk
} // namespace wsl
#endif