wsl/hsakmt: move src/inc to include/impl
Signed-off-by: Flora Cui <flora.cui@amd.com> Reviewed-by: Horatio Zhang <Hongkun.Zhang@amd.com> Part-of: <http://10.67.69.192/wsl/rocr-runtime/-/merge_requests/15>
This commit is contained in:
gecommit door
Frank Min
bovenliggende
ce64cf0314
commit
7734c0baad
Diff onderdrukt omdat het te groot bestand
Laad Diff
@@ -1,363 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// This file is used only for open source cmake builds, if we hardcode the
|
||||
// register values in amd_aql_queue.cpp then this file won't be required. For
|
||||
// now we are using this file where register details are spelled out in the
|
||||
// structs/unions below.
|
||||
#ifndef _WSL_INC_REGISTERS_H_
|
||||
#define _WSL_INC_REGISTERS_H_
|
||||
|
||||
typedef enum SQ_RSRC_BUF_TYPE {
|
||||
SQ_RSRC_BUF = 0x00000000,
|
||||
SQ_RSRC_BUF_RSVD_1 = 0x00000001,
|
||||
SQ_RSRC_BUF_RSVD_2 = 0x00000002,
|
||||
SQ_RSRC_BUF_RSVD_3 = 0x00000003,
|
||||
} SQ_RSRC_BUF_TYPE;
|
||||
|
||||
typedef enum BUF_DATA_FORMAT {
|
||||
BUF_DATA_FORMAT_INVALID = 0x00000000,
|
||||
BUF_DATA_FORMAT_8 = 0x00000001,
|
||||
BUF_DATA_FORMAT_16 = 0x00000002,
|
||||
BUF_DATA_FORMAT_8_8 = 0x00000003,
|
||||
BUF_DATA_FORMAT_32 = 0x00000004,
|
||||
BUF_DATA_FORMAT_16_16 = 0x00000005,
|
||||
BUF_DATA_FORMAT_10_11_11 = 0x00000006,
|
||||
BUF_DATA_FORMAT_11_11_10 = 0x00000007,
|
||||
BUF_DATA_FORMAT_10_10_10_2 = 0x00000008,
|
||||
BUF_DATA_FORMAT_2_10_10_10 = 0x00000009,
|
||||
BUF_DATA_FORMAT_8_8_8_8 = 0x0000000a,
|
||||
BUF_DATA_FORMAT_32_32 = 0x0000000b,
|
||||
BUF_DATA_FORMAT_16_16_16_16 = 0x0000000c,
|
||||
BUF_DATA_FORMAT_32_32_32 = 0x0000000d,
|
||||
BUF_DATA_FORMAT_32_32_32_32 = 0x0000000e,
|
||||
BUF_DATA_FORMAT_RESERVED_15 = 0x0000000f,
|
||||
} BUF_DATA_FORMAT;
|
||||
|
||||
typedef enum BUF_NUM_FORMAT {
|
||||
BUF_NUM_FORMAT_UNORM = 0x00000000,
|
||||
BUF_NUM_FORMAT_SNORM = 0x00000001,
|
||||
BUF_NUM_FORMAT_USCALED = 0x00000002,
|
||||
BUF_NUM_FORMAT_SSCALED = 0x00000003,
|
||||
BUF_NUM_FORMAT_UINT = 0x00000004,
|
||||
BUF_NUM_FORMAT_SINT = 0x00000005,
|
||||
BUF_NUM_FORMAT_SNORM_OGL__SI__CI = 0x00000006,
|
||||
BUF_NUM_FORMAT_RESERVED_6__VI = 0x00000006,
|
||||
BUF_NUM_FORMAT_FLOAT = 0x00000007,
|
||||
} BUF_NUM_FORMAT;
|
||||
|
||||
typedef enum BUF_FORMAT {
|
||||
BUF_FORMAT_32_UINT = 0x00000014,
|
||||
} BUF_FORMAT;
|
||||
|
||||
typedef enum SQ_SEL_XYZW01 {
|
||||
SQ_SEL_0 = 0x00000000,
|
||||
SQ_SEL_1 = 0x00000001,
|
||||
SQ_SEL_RESERVED_0 = 0x00000002,
|
||||
SQ_SEL_RESERVED_1 = 0x00000003,
|
||||
SQ_SEL_X = 0x00000004,
|
||||
SQ_SEL_Y = 0x00000005,
|
||||
SQ_SEL_Z = 0x00000006,
|
||||
SQ_SEL_W = 0x00000007,
|
||||
} SQ_SEL_XYZW01;
|
||||
|
||||
union COMPUTE_TMPRING_SIZE {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int : 7;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 7;
|
||||
unsigned int WAVESIZE : 13;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union COMPUTE_TMPRING_SIZE_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 15;
|
||||
unsigned int : 5;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 5;
|
||||
unsigned int WAVESIZE : 15;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union COMPUTE_TMPRING_SIZE_GFX12 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int WAVES : 12;
|
||||
unsigned int WAVESIZE : 18;
|
||||
unsigned int : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int : 2;
|
||||
unsigned int WAVESIZE : 18;
|
||||
unsigned int WAVES : 12;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD0 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD1 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int SWIZZLE_ENABLE : 1;
|
||||
unsigned int CACHE_SWIZZLE : 1;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD1_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int SWIZZLE_ENABLE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int SWIZZLE_ENABLE : 2;
|
||||
unsigned int STRIDE : 14;
|
||||
unsigned int BASE_ADDRESS_HI : 16;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD2 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int NUM_RECORDS : 32;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
|
||||
union SQ_BUF_RSRC_WORD3 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int MTYPE__CI__VI : 3;
|
||||
unsigned int HEAP : 1;
|
||||
unsigned int HASH_ENABLE : 1;
|
||||
unsigned int ATC__CI__VI : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ELEMENT_SIZE : 2;
|
||||
unsigned int DATA_FORMAT : 4;
|
||||
unsigned int NUM_FORMAT : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
union SQ_BUF_RSRC_WORD3_GFX10 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 7;
|
||||
unsigned int RESERVED1 : 2;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int RESOURCE_LEVEL : 1;
|
||||
unsigned int RESERVED2 : 3;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int RESERVED2 : 3;
|
||||
unsigned int RESOURCE_LEVEL : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 2;
|
||||
unsigned int FORMAT : 7;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
|
||||
// From V# Table
|
||||
union SQ_BUF_RSRC_WORD3_GFX11 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int RESERVED2 : 4;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int RESERVED2 : 4;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
// From V# Table
|
||||
union SQ_BUF_RSRC_WORD3_GFX12 {
|
||||
struct {
|
||||
#if defined(LITTLEENDIAN_CPU)
|
||||
unsigned int DST_SEL_X : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int WRITE_COMPRESS_ENABLE : 1;
|
||||
unsigned int COMPRESSION_EN : 1;
|
||||
unsigned int COMPRESSION_ACCESS_MODE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int TYPE : 2;
|
||||
#elif defined(BIGENDIAN_CPU)
|
||||
unsigned int TYPE : 2;
|
||||
unsigned int OOB_SELECT : 2;
|
||||
unsigned int COMPRESSION_ACCESS_MODE : 2;
|
||||
unsigned int COMPRESSION_EN : 1;
|
||||
unsigned int WRITE_COMPRESS_ENABLE : 1;
|
||||
unsigned int ADD_TID_ENABLE : 1;
|
||||
unsigned int INDEX_STRIDE : 2;
|
||||
unsigned int RESERVED1 : 3;
|
||||
unsigned int FORMAT : 6;
|
||||
unsigned int DST_SEL_W : 3;
|
||||
unsigned int DST_SEL_Z : 3;
|
||||
unsigned int DST_SEL_Y : 3;
|
||||
unsigned int DST_SEL_X : 3;
|
||||
#endif
|
||||
} bitfields, bits;
|
||||
unsigned int u32All;
|
||||
signed int i32All;
|
||||
float f32All;
|
||||
};
|
||||
#endif // header guard
|
||||
@@ -1,128 +0,0 @@
|
||||
#ifndef _WSL_INC_THUNK_PROXY_H_
|
||||
#define _WSL_INC_THUNK_PROXY_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace thunk_proxy {
|
||||
enum AllocDomain {
|
||||
kSystem,
|
||||
kLocal,
|
||||
kUserMemory,
|
||||
kUserQueue,
|
||||
kDomainCount,
|
||||
};
|
||||
|
||||
enum MemFlag {
|
||||
kFineGrain = (1ULL << 0),
|
||||
kKernarg = (1ULL << 1),
|
||||
};
|
||||
|
||||
enum EngineFlag {
|
||||
KCOMPUTE0 = (1ULL << 0),
|
||||
KDRMDMA = (1ULL << 1),
|
||||
KDRMDMA1 = (1ULL << 2),
|
||||
};
|
||||
|
||||
enum SchedLevel {
|
||||
kLow = 0,
|
||||
kNormal = 1,
|
||||
kHigh = 2,
|
||||
};
|
||||
|
||||
enum AsicFamilyType {
|
||||
kPlumBONITO,
|
||||
kNavi44,
|
||||
kNavi48
|
||||
};
|
||||
|
||||
struct HwsInfo {
|
||||
union {
|
||||
struct {
|
||||
uint32_t gfxHwsEnabled : 1;
|
||||
uint32_t computeHwsEnabled : 1;
|
||||
uint32_t dmaHwsEnabled : 1;
|
||||
uint32_t dma1HwsEnabled : 1;
|
||||
uint32_t reserved : 28;
|
||||
} hwsMask;
|
||||
uint32_t osHwsEnableFlags;
|
||||
};
|
||||
uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int major;
|
||||
int minor;
|
||||
int stepping;
|
||||
bool is_dgpu;
|
||||
char product_name[MAX_PATH];
|
||||
const char *uuid;
|
||||
AsicFamilyType family;
|
||||
uint32_t device_id;
|
||||
uint32_t wavefront_size;
|
||||
uint32_t compute_unit_count;
|
||||
uint32_t max_engine_clock_mhz;
|
||||
uint32_t watch_points_num;
|
||||
uint32_t pci_bus_addr;
|
||||
uint32_t memory_bus_width;
|
||||
uint32_t max_memory_clock_mhz;
|
||||
uint64_t gpu_counter_frequency;
|
||||
uint32_t wave_per_cu;
|
||||
uint32_t simd_per_cu;
|
||||
uint32_t max_scratch_slots_per_cu;
|
||||
uint32_t num_shader_engine;
|
||||
uint32_t shader_array_per_shader_engine;
|
||||
uint32_t domain;
|
||||
uint32_t num_gws;
|
||||
uint32_t asic_revision;
|
||||
uint64_t local_visible_heap_size;
|
||||
uint64_t local_invisible_heap_size;
|
||||
uint64_t private_aperture_base;
|
||||
uint64_t private_aperture_size;
|
||||
uint64_t shared_aperture_base;
|
||||
uint64_t shared_aperture_size;
|
||||
uint32_t user_queue_size;
|
||||
uint32_t lds_size;
|
||||
uint32_t big_page_alignment_size;
|
||||
uint32_t hw_big_page_min_alignment_size;
|
||||
uint32_t hw_big_page_alignment_size;
|
||||
bool enable_big_page_alignment;
|
||||
uint32_t mec_fw_version;
|
||||
uint32_t sdma_fw_version;
|
||||
uint32_t l1_cache_size;
|
||||
uint32_t l2_cache_size;
|
||||
uint32_t l3_cache_size;
|
||||
uint32_t gl2_cacheline_size;
|
||||
uint32_t num_cp_queues;
|
||||
HwsInfo hwsInfo;
|
||||
std::vector<int> sdma_schedid;
|
||||
uint32_t compute_schedid;
|
||||
bool state_shadowing_by_cpfw;
|
||||
bool platform_atomic_support;
|
||||
void *adapter_info;
|
||||
void *adapter_ex_info;
|
||||
} DeviceInfo;
|
||||
|
||||
int EngineOrdinal(int engine, DeviceInfo *device_info);
|
||||
bool GetHwsEnabled(int engine, DeviceInfo *device_info);
|
||||
bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info);
|
||||
bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info);
|
||||
bool QueryAdapterSupported(D3DKMT_HANDLE adapter);
|
||||
|
||||
uint32_t QueueEngine2EngineFlag(uint32_t queue_engine);
|
||||
void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain,
|
||||
uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info);
|
||||
bool CreatePrivateAllocInfo(int num_handles, void **ppdrv_priv, void **ppalloc_priv,
|
||||
int *pdrv_priv_data_size, int *palloc_priv_data_size);
|
||||
void DestroyPrivateAllocInfo(void *drv_priv, void *alloc_priv);
|
||||
|
||||
int CreateSubmitPrivData(void **priv_data, D3DKMT_HANDLE queue, uint64_t command_addr,
|
||||
uint64_t command_size, bool is_hw_queue);
|
||||
int CreateHwQueuePrivData(void **priv_data, D3DKMT_HANDLE context,
|
||||
bool FwManagedGfxState, SchedLevel level = kNormal);
|
||||
int CreateContextPrivData(void **priv_data, bool FwManagedGfxState);
|
||||
int CreatePowerOptPrivData(void **priv_data, bool restore);
|
||||
int CreateCalibratedTimestampsPrivData(void **priv_data);
|
||||
void QueryCalibratedTimestamps(void* priv, uint64_t* gpu, uint64_t* cpu);
|
||||
void DestroyPrivData(void *priv_data);
|
||||
}
|
||||
#endif
|
||||
@@ -1,155 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
|
||||
#define _WSL_INC_THUNK_PROXY_WDDM_TYPES_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <no_sal2.h>
|
||||
|
||||
typedef uint32_t UINT, *UINT_PTR;
|
||||
typedef int32_t INT32;
|
||||
typedef int32_t LONG;
|
||||
typedef uint32_t ULONG, *ULONG_PTR;
|
||||
typedef int64_t LONGLONG;
|
||||
typedef int64_t LONG64;
|
||||
typedef uint64_t ULONGLONG;
|
||||
typedef uint64_t ULONG64, *ULONG64_PTR;
|
||||
typedef uint8_t BYTE;
|
||||
typedef uint16_t WORD;
|
||||
typedef uint32_t DWORD;
|
||||
typedef int32_t BOOL;
|
||||
typedef int32_t NTSTATUS;
|
||||
typedef uint16_t USHORT;
|
||||
typedef uint16_t UINT16;
|
||||
typedef uint32_t UINT32;
|
||||
typedef uint64_t UINT64;
|
||||
typedef int32_t INT;
|
||||
typedef uint64_t SIZE_T;
|
||||
typedef void VOID;
|
||||
typedef float FLOAT;
|
||||
typedef char CHAR;
|
||||
typedef unsigned char UCHAR;
|
||||
typedef UCHAR BOOLEAN;
|
||||
typedef int16_t WCHAR;
|
||||
typedef void *HANDLE;
|
||||
typedef void *PVOID;
|
||||
typedef void *LPVOID;
|
||||
typedef const int16_t *PCWSTR;
|
||||
|
||||
#define ULONG ULONG
|
||||
#define ULONG_PTR ULONG_PTR
|
||||
#define USHORT USHORT
|
||||
|
||||
#define DECLARE_HANDLE(name) struct name##__{int unused;}; typedef struct name##__ *name
|
||||
#define C_ASSERT(e) typedef char __C_ASSERT__[(e)?1:-1]
|
||||
|
||||
DECLARE_HANDLE(HWND);
|
||||
DECLARE_HANDLE(HDC);
|
||||
DECLARE_HANDLE(PALETTEENTRY);
|
||||
|
||||
typedef struct tagPOINT {
|
||||
LONG x;
|
||||
LONG y;
|
||||
} POINT;
|
||||
|
||||
typedef struct tagRECT {
|
||||
LONG left;
|
||||
LONG top;
|
||||
LONG right;
|
||||
LONG bottom;
|
||||
} RECT;
|
||||
|
||||
typedef struct tagRECTL {
|
||||
LONG left;
|
||||
LONG top;
|
||||
LONG right;
|
||||
LONG bottom;
|
||||
} RECTL;
|
||||
|
||||
typedef union _LARGE_INTEGER {
|
||||
struct {
|
||||
DWORD LowPart;
|
||||
DWORD HighPart;
|
||||
} u;
|
||||
LONGLONG QuadPart;
|
||||
} LARGE_INTEGER;
|
||||
|
||||
typedef LARGE_INTEGER *PLARGE_INTEGER;
|
||||
|
||||
typedef struct _LUID {
|
||||
ULONG LowPart;
|
||||
LONG HighPart;
|
||||
} LUID, *PLUID;
|
||||
|
||||
typedef enum _DEVICE_POWER_STATE {
|
||||
PowerDeviceUnspecified = 0,
|
||||
PowerDeviceD0,
|
||||
PowerDeviceD1,
|
||||
PowerDeviceD2,
|
||||
PowerDeviceD3,
|
||||
PowerDeviceMaximum
|
||||
} DEVICE_POWER_STATE, *PDEVICE_POWER_STATE;
|
||||
|
||||
#define _Check_return_
|
||||
#define APIENTRY
|
||||
#define CONST const
|
||||
#define IN
|
||||
#define OUT
|
||||
#define FAR
|
||||
#define MAX_PATH 260
|
||||
#define __stdcall
|
||||
|
||||
#ifndef GUID_DEFINED
|
||||
#define GUID_DEFINED
|
||||
typedef struct _GUID {
|
||||
uint32_t Data1;
|
||||
uint16_t Data2;
|
||||
uint16_t Data3;
|
||||
uint8_t Data4[ 8 ];
|
||||
} GUID;
|
||||
#endif
|
||||
|
||||
#include <guiddef.h>
|
||||
|
||||
#endif
|
||||
@@ -1,83 +0,0 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */
|
||||
|
||||
#ifndef _WSL_INC_WDDM_CMD_UTIL_H_
|
||||
#define _WSL_INC_WDDM_CMD_UTIL_H_
|
||||
|
||||
#include <string.h>
|
||||
#include "hsa-runtime/inc/hsa.h"
|
||||
#include "hsa-runtime/inc/amd_hsa_queue.h"
|
||||
#include "hsa-runtime/inc/amd_hsa_kernel_code.h"
|
||||
#include "inc/pm4_cmds.h"
|
||||
#include "util/utils.h"
|
||||
#include "libhsakmt.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
struct DispatchInfo {
|
||||
uint8_t major;
|
||||
hsa_kernel_dispatch_packet_t *pPacket;
|
||||
void *pEntry;
|
||||
const amd_kernel_code_t *pKernelObject;
|
||||
uint32_t ldsBlks;
|
||||
amd_queue_t *pAmdQueue;
|
||||
bool wave32;
|
||||
uint32_t srd;
|
||||
void *pScratchBase;
|
||||
uint32_t scratchSizePerWave;
|
||||
uint32_t scratchBaseOffset[2];
|
||||
uint32_t offsetCnt;
|
||||
};
|
||||
|
||||
class CmdUtil {
|
||||
public:
|
||||
CmdUtil() {};
|
||||
~CmdUtil() {};
|
||||
|
||||
size_t BuildCopyData(
|
||||
uint64_t *pDstAddr,
|
||||
void *pBuffer,
|
||||
uint32_t dstSel = dst_sel__mec_copy_data__tc_l2,
|
||||
uint32_t dstCachePolicy = dst_cache_policy__mec_copy_data__stream,
|
||||
uint32_t srcSel = src_sel__mec_copy_data__gpu_clock_count,
|
||||
uint32_t srcCachePolicy = src_cache_policy__mec_copy_data__lru,
|
||||
uint32_t countSel = count_sel__mec_copy_data__64_bits_of_data,
|
||||
uint32_t wrConfirm = wr_confirm__mec_copy_data__wait_for_confirmation);
|
||||
|
||||
size_t BuildBarrier(
|
||||
void *pBuffer,
|
||||
uint32_t eventIndex = event_index__mec_event_write__cs_partial_flush,
|
||||
uint32_t eventType = CS_PARTIAL_FLUSH);
|
||||
|
||||
size_t BuildWriteData64Command(
|
||||
void *pBuffer,
|
||||
uint64_t* write_addr,
|
||||
uint64_t write_value);
|
||||
|
||||
size_t BuildAcquireMem(
|
||||
uint8_t major,
|
||||
void *pBuffer);
|
||||
|
||||
size_t BuildScratch(
|
||||
void *pScratchBase,
|
||||
void *pBuffer);
|
||||
|
||||
size_t BuildComputeShaderParams(
|
||||
void *pBuffer);
|
||||
|
||||
size_t BuildDispatch(
|
||||
struct DispatchInfo *pInfo,
|
||||
void *pBuffer);
|
||||
|
||||
size_t BuildAtomicMem(
|
||||
uint64_t *pAddr,
|
||||
uint32_t atomic,
|
||||
void *pBuffer,
|
||||
uint32_t cachePolicy = cache_policy__mec_atomic_mem__stream,
|
||||
uint64_t srcData = 1);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -1,257 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_DEVICE_H_
|
||||
#define _WSL_INC_WDDM_DEVICE_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <ntstatus.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/thunk_proxy/thunk_proxy.h"
|
||||
#include "inc/wddm/va_mgr.h"
|
||||
#include "inc/wddm/status.h"
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/gpu_memory.h"
|
||||
#include "inc/wddm/cmd_util.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
//class Queue;
|
||||
class WDDMQueue;
|
||||
|
||||
// WSL2 hyperv GPADL protocol limitation
|
||||
#define MAX_USERPTR_BLOCK_SIZE 0xf0000000
|
||||
#define START_NON_CANONICAL_ADDR (1ULL << 47)
|
||||
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))
|
||||
#define IS_OVERLAPPING(start1, size1, start2, size2) \
|
||||
((start1 < (start2 + size2)) && (start2 < (start1 + size1)))
|
||||
|
||||
class WDDMDevice {
|
||||
public:
|
||||
static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB
|
||||
|
||||
WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid);
|
||||
~WDDMDevice();
|
||||
|
||||
int Major() { return device_info_.major; }
|
||||
int Minor() { return device_info_.minor; }
|
||||
int Stepping() { return device_info_.stepping; }
|
||||
bool IsDgpu() { return device_info_.is_dgpu; }
|
||||
const char *ProductName() { return device_info_.product_name; }
|
||||
const char *Uuid() { return device_info_.uuid; }
|
||||
thunk_proxy::AsicFamilyType GfxFamily() { return device_info_.family; }
|
||||
uint32_t DeviceId() { return device_info_.device_id; }
|
||||
uint32_t WavefrontSize() { return device_info_.wavefront_size; }
|
||||
uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; }
|
||||
uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; }
|
||||
uint32_t WatchPointsNum() { return device_info_.watch_points_num; }
|
||||
uint32_t PciBusAddr() { return device_info_.pci_bus_addr; }
|
||||
|
||||
uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; }
|
||||
uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; }
|
||||
uint32_t WavePerCu() { return device_info_.wave_per_cu; }
|
||||
uint32_t SimdPerCu() { return device_info_.simd_per_cu; }
|
||||
uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; }
|
||||
uint32_t NumShaderEngine() { return device_info_.num_shader_engine; }
|
||||
uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; }
|
||||
uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); }
|
||||
uint32_t Domain() { return device_info_.domain; }
|
||||
uint32_t NumGws() { return device_info_.num_gws; }
|
||||
uint32_t AsicRevision() { return device_info_.asic_revision; }
|
||||
uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; }
|
||||
uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; }
|
||||
uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; }
|
||||
uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; }
|
||||
uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; }
|
||||
uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; }
|
||||
uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; }
|
||||
uint32_t LdsSize() { return device_info_.lds_size; }
|
||||
uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; }
|
||||
uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; }
|
||||
uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; }
|
||||
uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; }
|
||||
uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; }
|
||||
uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; }
|
||||
uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; }
|
||||
uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; }
|
||||
bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; }
|
||||
bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; }
|
||||
uint32_t GetSdmaEngine(uint32_t idx) {
|
||||
assert(idx < NumSdmaEngine());
|
||||
return device_info_.sdma_schedid[idx];
|
||||
}
|
||||
uint32_t GetComputeEngine() { return device_info_.compute_schedid; }
|
||||
|
||||
uint64_t VramAvail();
|
||||
|
||||
void GetClockCounters(uint64_t *gpu, uint64_t *cpu);
|
||||
uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; }
|
||||
|
||||
bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr);
|
||||
void DestroySyncobj(D3DKMT_HANDLE handle);
|
||||
|
||||
bool CreateQueue(WDDMQueue *queue);
|
||||
void DestroyQueue(WDDMQueue *queue);
|
||||
bool CreateHwQueue(WDDMQueue *queue);
|
||||
bool DestroyHwQueue(WDDMQueue *queue);
|
||||
bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
|
||||
uint64_t command_size, uint64_t fence_value);
|
||||
bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
|
||||
uint64_t command_size, uint64_t fence_value);
|
||||
|
||||
bool WaitPagingFence(WDDMQueue *queue) {
|
||||
uint64_t value = page_fence_value_;
|
||||
|
||||
if (*page_fence_addr_ < value &&
|
||||
!GpuWait(queue, &page_syncobj_, &value, 1))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
|
||||
uint64_t *values, int count);
|
||||
bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
|
||||
uint64_t *value, int count);
|
||||
bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
|
||||
int count, bool wait_any);
|
||||
bool WaitOnPagingFenceFromCpu();
|
||||
|
||||
uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt);
|
||||
uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; }
|
||||
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; }
|
||||
static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; }
|
||||
|
||||
// Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory,
|
||||
// return false by default
|
||||
bool AllocUserQueueMemFromUMD(void) const { return false; }
|
||||
|
||||
bool IsHwsEnabled(int engine) {
|
||||
return thunk_proxy::GetHwsEnabled(engine, &device_info_);
|
||||
}
|
||||
|
||||
void UpdatePageFence(uint64_t fence_value);
|
||||
|
||||
D3DKMT_HANDLE PagingQueue() const { return page_queue_; }
|
||||
D3DKMT_HANDLE PagingFence() const { return page_syncobj_; }
|
||||
D3DKMT_HANDLE DeviceHandle() const { return device_; }
|
||||
LUID GetLuid() const { return adapter_luid_; }
|
||||
|
||||
const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; }
|
||||
|
||||
ErrorCode ReserveGpuVirtualAddress(thunk_proxy::AllocDomain domain,
|
||||
gpusize hit_base_addr,
|
||||
gpusize size,
|
||||
gpusize *out_gpu_virtual_addr,
|
||||
gpusize alignment,
|
||||
bool lock=false);
|
||||
|
||||
ErrorCode FreeGpuVirtualAddress(thunk_proxy::AllocDomain domain,
|
||||
gpusize base_addr,
|
||||
gpusize size);
|
||||
|
||||
ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem);
|
||||
ErrorCode HandleApertureAlloc(gpusize size, gpusize *out_gpu_virt_addr);
|
||||
void HandleApertureFree(gpusize gpu_addr);
|
||||
|
||||
private:
|
||||
bool ParseDeviceInfo(void);
|
||||
void DestroyDeviceInfo(void);
|
||||
bool CreateDevice(void);
|
||||
bool DestroyDevice(void);
|
||||
bool CreatePagingQueue(void);
|
||||
bool DestroyPagingQueue(void);
|
||||
void *Lock(D3DKMT_HANDLE handle);
|
||||
bool Unlock(D3DKMT_HANDLE handle);
|
||||
bool CreateContext(int engine, D3DKMT_HANDLE *handle);
|
||||
bool DestroyContext(D3DKMT_HANDLE handle);
|
||||
|
||||
void SetPowerOptimization(bool restore);
|
||||
void InitCmdbufInfo(void);
|
||||
bool ReserveSystemHeapSpace(void);
|
||||
bool FreeSystemHeapSpace(void);
|
||||
bool ReserveLocalHeapSpace(void);
|
||||
bool InitHandleApertureSpace(void);
|
||||
bool CommitSystemHeapSpace(void* addr, int64_t size, bool lock=false);
|
||||
bool DecommitSystemHeapSpace(void* addr, int64_t size);
|
||||
bool FreeLocalHeapSpace(void);
|
||||
void InitVaMgr();
|
||||
void InitHandleApertureMgr();
|
||||
|
||||
D3DKMT_HANDLE adapter_;
|
||||
LUID adapter_luid_;
|
||||
D3DKMT_HANDLE device_;
|
||||
|
||||
D3DKMT_HANDLE page_queue_;
|
||||
D3DKMT_HANDLE page_syncobj_;
|
||||
uint64_t *page_fence_addr_;
|
||||
std::atomic<uint64_t> page_fence_value_;
|
||||
|
||||
uint64_t handle_aperture_start_;
|
||||
uint64_t handle_aperture_size_;
|
||||
uint64_t local_heap_space_start_;
|
||||
uint64_t local_heap_space_size_;
|
||||
uint64_t system_heap_space_start_;
|
||||
uint64_t system_heap_space_size_;
|
||||
uint32_t cmdbuf_size_;
|
||||
uint32_t cmdbuf_aql_frame_size_;
|
||||
static const uint32_t cmdbuf_aql_frame_num_;
|
||||
// device info
|
||||
thunk_proxy::DeviceInfo device_info_;
|
||||
|
||||
std::unique_ptr<VaMgr> local_va_mgr_;
|
||||
std::unique_ptr<VaMgr> handle_aperture_mgr_;
|
||||
//CmdUtil cmd_util;
|
||||
};
|
||||
|
||||
NTSTATUS WDDMGetAdapters(D3DKMT_ADAPTERINFO *&adapters, int &num_adapters);
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -1,227 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_GPU_MEMORY_H_
|
||||
#define _WSL_INC_WDDM_GPU_MEMORY_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include "util/utils.h"
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/thunks.h"
|
||||
#include "inc/thunk_proxy/thunk_proxy.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class WDDMDevice;
|
||||
|
||||
union GpuMemoryCreateFlags {
|
||||
struct {
|
||||
uint64_t virtual_alloc : 1; // only allocate virtual address, without physical buffer
|
||||
uint64_t physical_only : 1; // only allocate physical buffer, without virutal address
|
||||
uint64_t interprocess : 1; // physical buffer need share info between exporter and importer
|
||||
uint64_t locked : 1; // lock virtual address space into RAM, preventing that memory from being paged to the swap area
|
||||
uint64_t physical_contiguous : 1; // contiguous physical pages
|
||||
uint64_t unused : 59;
|
||||
};
|
||||
uint64_t reserved;
|
||||
};
|
||||
|
||||
struct GpuMemoryCreateInfo {
|
||||
GpuMemoryCreateInfo() {
|
||||
flags.reserved = 0;
|
||||
domain = thunk_proxy::kLocal;
|
||||
size = 0;
|
||||
alignment = 0;
|
||||
mem_flags = 0;
|
||||
engine_flag = 0;
|
||||
va_hint = 0;
|
||||
user_ptr = nullptr;
|
||||
dmabuf_fd = -1;
|
||||
}
|
||||
|
||||
GpuMemoryCreateFlags flags;
|
||||
thunk_proxy::AllocDomain domain;
|
||||
gpusize size;
|
||||
gpusize alignment;
|
||||
int mem_flags;
|
||||
int engine_flag;
|
||||
int dmabuf_fd; // Import from dmabuf
|
||||
|
||||
void *user_ptr;
|
||||
gpusize va_hint;
|
||||
};
|
||||
|
||||
struct GpuMemoryDesc {
|
||||
GpuMemoryDesc() {
|
||||
gpu_addr = 0;
|
||||
cpu_addr = nullptr;
|
||||
client_size = 0;
|
||||
size = alignment = 0;
|
||||
flags.reserved = 0;
|
||||
mem_flags = 0;
|
||||
engine_flag = 0;
|
||||
handle_ape_addr = 0;
|
||||
}
|
||||
|
||||
thunk_proxy::AllocDomain domain;
|
||||
LUID adapter_luid; // Where is the backing store location
|
||||
gpusize gpu_addr;
|
||||
void *cpu_addr;
|
||||
gpusize client_size; // user request size
|
||||
gpusize size;
|
||||
gpusize alignment;
|
||||
gpusize handle_ape_addr;
|
||||
|
||||
union {
|
||||
struct {
|
||||
uint32_t is_virtual : 1;
|
||||
uint32_t is_shared : 1;
|
||||
uint32_t is_external : 1;
|
||||
uint32_t is_physical_only : 1;
|
||||
uint32_t is_locked : 1;
|
||||
uint32_t is_queue_referenced : 1;
|
||||
uint32_t is_physical_contiguous : 1;
|
||||
uint32_t unused : 25;
|
||||
};
|
||||
|
||||
uint32_t reserved;
|
||||
} flags;
|
||||
|
||||
int mem_flags;
|
||||
int engine_flag;
|
||||
};
|
||||
|
||||
struct SharedHandleInfo {
|
||||
thunk_proxy::AllocDomain domain;
|
||||
LUID adapter_luid;
|
||||
gpusize client_size; // user request size
|
||||
uint64_t size;
|
||||
uint32_t flags;
|
||||
int mem_flags;
|
||||
};
|
||||
|
||||
using GpuMemoryHandle = void *;
|
||||
|
||||
class GpuMemory {
|
||||
public:
|
||||
static size_t CalcChunkNumbers(gpusize size);
|
||||
|
||||
ErrorCode Init(const GpuMemoryCreateInfo &create_info);
|
||||
|
||||
WDDMDevice *GetDevice() const { return device_; }
|
||||
gpusize Size() const { return desc_.size; }
|
||||
gpusize ClientSize() const { return desc_.client_size; }
|
||||
uint64_t GpuAddress() const { return desc_.gpu_addr; }
|
||||
void *CpuAddress() const { return desc_.cpu_addr; }
|
||||
uint64_t HandleApeAddress() const { return desc_.handle_ape_addr; }
|
||||
|
||||
inline bool IsLocal() const { return desc_.domain == thunk_proxy::kLocal; }
|
||||
inline bool IsUserMemory() const { return desc_.domain == thunk_proxy::kUserMemory; }
|
||||
inline bool IsSystem() const { return desc_.domain == thunk_proxy::kSystem; }
|
||||
inline bool IsUserQueue() const { return desc_.domain == thunk_proxy::kUserQueue; }
|
||||
inline bool IsPhysicalOnly() const { return desc_.flags.is_physical_only; }
|
||||
inline bool IsPhysicalContiguous() const { return desc_.flags.is_physical_contiguous; }
|
||||
inline bool IsVirtual() const { return desc_.flags.is_virtual; }
|
||||
inline bool IsShared() const { return desc_.flags.is_shared; }
|
||||
inline bool IsExternal() const { return desc_.flags.is_external; }
|
||||
|
||||
inline uint32_t Flags() const { return desc_.flags.reserved; }
|
||||
inline int GetAllocInfo() const { return desc_.mem_flags; }
|
||||
inline bool IsFineGrain() const { return (desc_.mem_flags & thunk_proxy::kFineGrain); }
|
||||
inline bool IsSameAdapter(const LUID &luid) const {
|
||||
return (desc_.adapter_luid.HighPart == luid.HighPart &&
|
||||
desc_.adapter_luid.LowPart == luid.LowPart);
|
||||
}
|
||||
inline void GetQueueReference() { desc_.flags.is_queue_referenced = 1; }
|
||||
inline void PutQueueReference() { desc_.flags.is_queue_referenced = 0; }
|
||||
inline bool IsQueueReferenced() const { return desc_.flags.is_queue_referenced; }
|
||||
|
||||
WinAllocationHandle GetAllocationHandle(size_t index) const { return alloc_handles_ptr_[index]; }
|
||||
size_t NumChunks() const { return num_allocations_; }
|
||||
|
||||
const GpuMemoryHandle GetGpuMemoryHandle() const {
|
||||
return reinterpret_cast<GpuMemoryHandle>(const_cast<GpuMemory*>(this));
|
||||
}
|
||||
|
||||
static GpuMemory *Convert(GpuMemoryHandle handle) { return reinterpret_cast<GpuMemory *>(handle); }
|
||||
|
||||
ErrorCode ReserveGpuVirtualAddress(gpusize base_virt_addr, gpusize va_size, gpusize alignment);
|
||||
ErrorCode FreeGpuVirtualAddress(gpusize va_start_address, gpusize va_size);
|
||||
|
||||
ErrorCode MapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
|
||||
ErrorCode UnmapGpuVirtualAddress(const gpusize map_addr, const gpusize size, gpusize offset = 0);
|
||||
|
||||
ErrorCode MakeResident();
|
||||
ErrorCode Evict();
|
||||
|
||||
ErrorCode ExportPhysicalHandle(int* dmabuf_fd, uint32_t flags = SHARED_ALLOCATION_ALL_ACCESS);
|
||||
ErrorCode ImportPhysicalHandle(int dmabuf_fd);
|
||||
~GpuMemory();
|
||||
protected:
|
||||
explicit GpuMemory(WDDMDevice *device);
|
||||
private:
|
||||
ErrorCode CreatePhysicalMemory();
|
||||
ErrorCode FreePhysicalMemory();
|
||||
|
||||
uint64_t AdjustSize(gpusize size) const;
|
||||
private:
|
||||
friend class WDDMDevice;
|
||||
|
||||
WDDMDevice *const device_;
|
||||
|
||||
GpuMemoryDesc desc_;
|
||||
|
||||
size_t num_allocations_;
|
||||
WinAllocationHandle *alloc_handles_ptr_;
|
||||
WinAllocationHandle alloc_handle_; // Optimization for num_allocations_ is 1
|
||||
|
||||
WinResourceHandle resource_; // Handle to a resource object that wraps the allocation. Used for shared resources
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(GpuMemory);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -1,363 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef _WSL_INC_WDDM_QUEUE_H_
|
||||
#define _WSL_INC_WDDM_QUEUE_H_
|
||||
|
||||
#include <cinttypes>
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "inc/wddm/gpu_memory.h"
|
||||
#include "hsa-runtime/inc/hsa_ext_amd.h"
|
||||
#include "hsa-runtime/inc/amd_hsa_queue.h"
|
||||
#include "hsa-runtime/inc/amd_hsa_signal.h"
|
||||
#include "inc/wddm/cmd_util.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class Queue;
|
||||
class WDDMDevice;
|
||||
|
||||
class WDDMQueue {
|
||||
public:
|
||||
WDDMQueue(WDDMDevice *device,
|
||||
uint64_t cmdbuf_addr,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true) :
|
||||
device(device),
|
||||
context(0),
|
||||
queue(0),
|
||||
syncobj(0),
|
||||
sync_addr(NULL),
|
||||
cmdbuf(0),
|
||||
cmdbuf_addr(cmdbuf_addr),
|
||||
cmdbuf_size(cmdbuf_size),
|
||||
queue_engine(engine),
|
||||
use_hws(use_hws),
|
||||
prio(thunk_proxy::kNormal) {
|
||||
|
||||
}
|
||||
|
||||
virtual ~WDDMQueue() { }
|
||||
|
||||
virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual void RingDoorbell() { }
|
||||
virtual void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
|
||||
|
||||
hsa_status_t SwsInit(void);
|
||||
hsa_status_t SwsFini(void);
|
||||
hsa_status_t SwsSubmit(uint64_t command_addr,
|
||||
uint64_t command_size,
|
||||
uint64_t fence_value);
|
||||
|
||||
hsa_status_t HwsInit(void);
|
||||
hsa_status_t HwsFini(void);
|
||||
hsa_status_t HwsSubmit(uint64_t command_addr,
|
||||
uint64_t command_size,
|
||||
uint64_t fence_value);
|
||||
hsa_status_t SetPriority(hsa_amd_queue_priority_t priority);
|
||||
|
||||
uint64_t *GetSyncAddr(void) const { return sync_addr; }
|
||||
uint64_t GetCmdbufAddr(void) const { return cmdbuf_addr; }
|
||||
|
||||
thunk_proxy::SchedLevel ConvertSchedLevel(hsa_amd_queue_priority_t prio) const {
|
||||
switch (prio) {
|
||||
case HSA_AMD_QUEUE_PRIORITY_LOW:
|
||||
return thunk_proxy::kLow;
|
||||
case HSA_AMD_QUEUE_PRIORITY_HIGH:
|
||||
return thunk_proxy::kHigh;
|
||||
case HSA_AMD_QUEUE_PRIORITY_NORMAL:
|
||||
default:
|
||||
return thunk_proxy::kNormal;
|
||||
}
|
||||
}
|
||||
|
||||
WDDMDevice *device;
|
||||
|
||||
D3DKMT_HANDLE context;
|
||||
D3DKMT_HANDLE queue;
|
||||
|
||||
D3DKMT_HANDLE syncobj;
|
||||
uint64_t *sync_addr;
|
||||
|
||||
GpuMemoryHandle cmdbuf;
|
||||
uint64_t cmdbuf_addr;
|
||||
uint32_t cmdbuf_size;
|
||||
|
||||
GpuMemoryHandle queue_mem;
|
||||
uint64_t queue_addr;
|
||||
|
||||
uint32_t queue_engine;
|
||||
|
||||
bool use_hws;
|
||||
thunk_proxy::SchedLevel prio;
|
||||
};
|
||||
|
||||
class ComputeQueue : public WDDMQueue {
|
||||
public:
|
||||
ComputeQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t ring_size,
|
||||
std::atomic<uint64_t> *ring_wptr,
|
||||
std::atomic<uint64_t> *ring_rptr,
|
||||
volatile int64_t *error_addr,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true);
|
||||
|
||||
~ComputeQueue();
|
||||
|
||||
virtual hsa_status_t Init(void);
|
||||
virtual hsa_status_t Fini(void);
|
||||
virtual hsa_status_t Submit(void);
|
||||
|
||||
void* GetRing(void) const { return ring; }
|
||||
uint64_t GetRingSize(void) const { return ring_size; }
|
||||
std::atomic<uint64_t>* GetRingWptr(void) const { return ring_wptr; }
|
||||
std::atomic<uint64_t>* GetRingRptr(void) const { return ring_rptr; }
|
||||
|
||||
uint64_t GetAqlWriteIndex(void) const { return cmdbuf_aql_frame_write_index; }
|
||||
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size; }
|
||||
void* GetHsaQueueAddr(void) const { return ring; }
|
||||
|
||||
bool IsInvalidPacket(void) const {
|
||||
uint16_t *packet = (uint16_t *)((char *)ring +
|
||||
(cmdbuf_aql_frame_write_index % ring_size) * 64);
|
||||
return ((*packet >> HSA_PACKET_HEADER_TYPE) & ((1 << HSA_PACKET_HEADER_WIDTH_TYPE) - 1))
|
||||
== HSA_PACKET_TYPE_INVALID;
|
||||
}
|
||||
|
||||
hsa_status_t Process(void);
|
||||
uint64_t * GetDoorbellPtr() const { return (uint64_t *)&doorbell_signal_value_; }
|
||||
void RingDoorbell();
|
||||
private:
|
||||
hsa_status_t KernelDispatchAqlToPm4(char *cpu, hsa_kernel_dispatch_packet_t *packet);
|
||||
hsa_status_t BarrierGenericAqlToPm4(char *cpu, hsa_barrier_and_packet_t *packet, bool is_or = false);
|
||||
struct amd_aql_pm4_ib {
|
||||
uint16_t header;
|
||||
uint16_t ven_hdr;
|
||||
uint32_t ib_jump_cmd[4];
|
||||
uint32_t dw_cnt_remain;
|
||||
uint32_t reserved[8];
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
hsa_status_t VendorSpecificAqlToPm4(char *cpu, amd_aql_pm4_ib *packet);
|
||||
hsa_status_t SwitchAql2PM4(void);
|
||||
|
||||
hsa_status_t PreSubmit(void);
|
||||
hsa_status_t EndSubmit(void);
|
||||
|
||||
void *ring;
|
||||
uint64_t ring_size;
|
||||
std::atomic<uint64_t> *ring_wptr;
|
||||
std::atomic<uint64_t> *ring_rptr;
|
||||
|
||||
// ib_start_addr is the current ib start address
|
||||
uint64_t ib_start_addr;
|
||||
|
||||
// ib_size is the current ib size.
|
||||
uint64_t ib_size;
|
||||
|
||||
// record the last submitted aql frame write index
|
||||
uint64_t sync_point;
|
||||
|
||||
uint64_t cmdbuf_aql_frame_write_index;
|
||||
uint32_t cmdbuf_aql_frame_size;
|
||||
|
||||
uint64_t *signal_addr_;
|
||||
bool platform_atomic_support_;
|
||||
bool needs_barrier;
|
||||
bool ready_to_submit;
|
||||
|
||||
CmdUtil cmd_util;
|
||||
|
||||
private:
|
||||
bool EnableProfiling() {
|
||||
return AMD_HSA_BITS_GET(amd_queue_rocr_->queue_properties, AMD_QUEUE_PROPERTIES_ENABLE_PROFILING);
|
||||
}
|
||||
void HandleError(hsa_status_t status);
|
||||
bool UpdateScratch(uint32_t private_segment_size, bool wave32);
|
||||
|
||||
uint32_t UpdateIndexStride(uint32_t srd, bool wave32);
|
||||
|
||||
void *ScratchBase() { return scratch_base_; }
|
||||
|
||||
void AppendCmdbufSratchBaseOffset(int offset) {
|
||||
scratch_base_offset_array_.push_back(offset);
|
||||
}
|
||||
|
||||
bool RelocateCmdbufScratchBase(uint64_t addr);
|
||||
|
||||
uint32_t ScratchSizePerWave() { return scratch_size_per_wave_; }
|
||||
uint64_t GetKernelObjAddr(uint64_t addr) const;
|
||||
void InitScratchSRD();
|
||||
GpuMemoryHandle amd_queue_mem_;
|
||||
amd_queue_t *amd_queue_;
|
||||
amd_queue_t *amd_queue_rocr_;
|
||||
uint64_t doorbell_signal_value_;
|
||||
volatile std::atomic<int64_t> *error_code_;
|
||||
std::thread aql_to_pm4_thread_;
|
||||
bool thread_stop_;
|
||||
std::mutex thread_cond_lock_;
|
||||
std::condition_variable thread_cond_;
|
||||
static void AqlToPm4Thread(ComputeQueue *queue);
|
||||
|
||||
uint32_t scratch_waves_;
|
||||
uint32_t scratch_size_per_wave_;
|
||||
uint32_t scratch_size_;
|
||||
void *scratch_base_;
|
||||
GpuMemoryHandle scratch_mem_;
|
||||
|
||||
std::vector<int> scratch_base_offset_array_;
|
||||
};
|
||||
|
||||
class SDMAQueue : public WDDMQueue {
|
||||
public:
|
||||
SDMAQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true);
|
||||
|
||||
virtual ~SDMAQueue();
|
||||
|
||||
hsa_status_t Init(void);
|
||||
hsa_status_t Fini(void);
|
||||
hsa_status_t Submit(void);
|
||||
|
||||
int PreparePacket(uint32_t offset, uint64_t size);
|
||||
|
||||
void WaitQueue(void) {
|
||||
device->CpuWait(&syncobj, &rptr_next, 1, false);
|
||||
}
|
||||
|
||||
uint64_t * GetRingWptr(void) { return &wptr_next_; }
|
||||
uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); }
|
||||
uint64_t * GetDoorbellPtr() { return &doorbell_; }
|
||||
void RingDoorbell();
|
||||
void* GetHsaQueueAddr(void) const { return reinterpret_cast<void*>(GetCmdbufAddr()); }
|
||||
|
||||
private:
|
||||
uint64_t wptr_next_;
|
||||
uint64_t wptr_pre_;
|
||||
uint64_t rptr_next;
|
||||
uint64_t doorbell_;
|
||||
std::vector<std::pair<uint64_t, uint64_t>> wptr_queue_;
|
||||
uint64_t ib_size;
|
||||
uint64_t ib_start_addr;
|
||||
|
||||
std::thread thread_;
|
||||
bool thread_stop_;
|
||||
std::mutex thread_cond_lock_;
|
||||
std::condition_variable thread_cond_;
|
||||
static void SdmaThread(SDMAQueue *queue);
|
||||
|
||||
struct SDMA_PKT_POLL_REGMEM {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 10;
|
||||
unsigned int hdp_flush : 1;
|
||||
unsigned int reserved_1 : 1;
|
||||
unsigned int func : 3;
|
||||
unsigned int mem_poll : 1;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int value : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} VALUE_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int mask : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} MASK_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int interval : 16;
|
||||
unsigned int retry_count : 12;
|
||||
unsigned int reserved_0 : 4;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} DW5_UNION;
|
||||
};
|
||||
const unsigned int SDMA_OP_POLL_REGMEM = 8;
|
||||
bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) {
|
||||
return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM &&
|
||||
pkt->HEADER_UNION.mem_poll == 1 &&
|
||||
pkt->HEADER_UNION.func == 3;
|
||||
}
|
||||
uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); }
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -1,60 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_STATUS_H
|
||||
#define _WSL_INC_WDDM_STATUS_H
|
||||
|
||||
enum class ErrorCode {
|
||||
Success,
|
||||
DeviceLost,
|
||||
UnSupported,
|
||||
NotReady,
|
||||
OutOfMemory,
|
||||
OutOfGpuMemory,
|
||||
OutOfHandleApeMemory,
|
||||
Timeout,
|
||||
SyscallFail,
|
||||
InvalidateParams,
|
||||
Unknown,
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,232 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_THUNKS_H
|
||||
#define _WSL_INC_WDDM_THUNKS_H
|
||||
|
||||
#include "inc/wddm/status.h"
|
||||
#include "inc/wddm/types.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
inline ErrorCode TranslateNtStatus(NTSTATUS status) {
|
||||
switch (status) {
|
||||
case STATUS_SUCCESS:
|
||||
return ErrorCode::Success;
|
||||
case STATUS_PENDING:
|
||||
return ErrorCode::NotReady;
|
||||
case STATUS_NO_MEMORY:
|
||||
return ErrorCode::OutOfMemory;
|
||||
case STATUS_DEVICE_REMOVED:
|
||||
return ErrorCode::DeviceLost;
|
||||
case STATUS_GRAPHICS_NO_VIDEO_MEMORY:
|
||||
return ErrorCode::OutOfGpuMemory;
|
||||
case STATUS_TIMEOUT:
|
||||
return ErrorCode::Timeout;
|
||||
case STATUS_INVALID_PARAMETER:
|
||||
return ErrorCode::InvalidateParams;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return ErrorCode::Unknown;
|
||||
}
|
||||
|
||||
namespace d3dthunk {
|
||||
|
||||
typedef D3DKMT_CREATEALLOCATION CreateAllocationArgs;
|
||||
typedef D3DKMT_CREATECONTEXT CreateContextArgs;
|
||||
typedef D3DKMT_CREATECONTEXTVIRTUAL CreateContextVirtualArgs;
|
||||
typedef D3DKMT_CREATEPAGINGQUEUE CreatePagingQueueArgs;
|
||||
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT CreateSynchronizationObjectArgs;
|
||||
typedef D3DKMT_CREATESYNCHRONIZATIONOBJECT2 CreateSynchronizationObject2Args;
|
||||
typedef D3DKMT_ESCAPE EscapeArgs;
|
||||
typedef D3DKMT_EVICT EvictArgs;
|
||||
typedef D3DKMT_FREEGPUVIRTUALADDRESS FreeGpuVirtualAddressArgs;
|
||||
typedef D3DKMT_LOCK LockArgs;
|
||||
typedef D3DKMT_LOCK2 Lock2Args;
|
||||
typedef D3DKMT_OPENRESOURCE OpenResourceArgs;
|
||||
typedef D3DKMT_OPENRESOURCEFROMNTHANDLE OpenResourceFromNtHandleArgs;
|
||||
typedef D3DKMT_QUERYADAPTERINFO QueryAdapterInfoArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT SignalSynchronizationObjectArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECT2 SignalSynchronizationObject2Args;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMCPU SignalSynchronizationObjectFromCpuArgs;
|
||||
typedef D3DKMT_SIGNALSYNCHRONIZATIONOBJECTFROMGPU2 SignalSynchronizationObjectFromGpuArgs;
|
||||
typedef D3DKMT_SUBMITCOMMAND SubmitCommandArgs;
|
||||
typedef D3DKMT_UNLOCK UnlockArgs;
|
||||
typedef D3DKMT_UNLOCK2 Unlock2Args;
|
||||
typedef D3DKMT_UPDATEGPUVIRTUALADDRESS UpdateGpuVirtualAddressArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT WaitForSynchronizationObjectArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECT2 WaitForSynchronizationObject2Args;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMCPU WaitForSynchronizationObjectFromCpuArgs;
|
||||
typedef D3DKMT_WAITFORSYNCHRONIZATIONOBJECTFROMGPU WaitForSynchronizationObjectFromGpuArgs;
|
||||
typedef D3DKMT_ACQUIREKEYEDMUTEX AcquireKeyedMutexArgs;
|
||||
typedef D3DKMT_RELEASEKEYEDMUTEX ReleaseKeyedMutexArgs;
|
||||
typedef D3DKMT_OPENKEYEDMUTEX OpenKeyedMutexArgs;
|
||||
typedef D3DKMT_DESTROYKEYEDMUTEX DestroyKeyedMutexArgs;
|
||||
typedef D3DKMT_QUERYVIDEOMEMORYINFO QueryVideoMemoryInfoArgs;
|
||||
typedef D3DKMT_CREATEHWQUEUE CreateHwQueueArgs;
|
||||
typedef D3DKMT_DESTROYHWQUEUE DestroyHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITCOMMANDTOHWQUEUE SubmitCommandToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITPRESENTTOHWQUEUE SubmitPresentToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITSIGNALSYNCOBJECTSTOHWQUEUE SubmitSignalSyncObjectsToHwQueueArgs;
|
||||
typedef D3DKMT_SUBMITWAITFORSYNCOBJECTSTOHWQUEUE SubmitWaitForSyncObjectsToHwQueueArgs;
|
||||
typedef D3DKMT_CREATESYNCFILE CreateSyncFileArgs;
|
||||
|
||||
inline ErrorCode MapGpuVirtualAddress(D3DDDI_MAPGPUVIRTUALADDRESS *args) {
|
||||
return TranslateNtStatus(D3DKMTMapGpuVirtualAddress(args));
|
||||
}
|
||||
|
||||
inline ErrorCode CreateAllocation(CreateAllocationArgs *args) {
|
||||
return TranslateNtStatus(D3DKMTCreateAllocation2(args));
|
||||
}
|
||||
|
||||
inline ErrorCode DestroyAllocation(
|
||||
WinDeviceHandle device,
|
||||
WinResourceHandle resource,
|
||||
size_t num_allocations,
|
||||
const WinAllocationHandle *alloc_handles) {
|
||||
|
||||
D3DKMT_DESTROYALLOCATION2 args{};
|
||||
|
||||
memset(&args, 0, sizeof(args));
|
||||
args.hDevice = device;
|
||||
if (resource) {
|
||||
args.hResource = resource;
|
||||
} else {
|
||||
args.phAllocationList = alloc_handles;
|
||||
args.AllocationCount = num_allocations;
|
||||
}
|
||||
|
||||
return TranslateNtStatus(D3DKMTDestroyAllocation2(&args));
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(D3DDDI_RESERVEGPUVIRTUALADDRESS *args) {
|
||||
return TranslateNtStatus(D3DKMTReserveGpuVirtualAddress(args));
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize size,
|
||||
gpusize base_address,
|
||||
gpusize *out_addr) {
|
||||
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
|
||||
args.hPagingQueue = handle;
|
||||
args.Size = size;
|
||||
args.BaseAddress = base_address;
|
||||
|
||||
auto code = ReserveGpuVirtualAddress(&args);
|
||||
if (code == ErrorCode::Success)
|
||||
*out_addr = args.VirtualAddress;
|
||||
return code;
|
||||
}
|
||||
|
||||
inline ErrorCode ReserveGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize size,
|
||||
gpusize minimum_address,
|
||||
gpusize maximum_address,
|
||||
gpusize *out_addr) {
|
||||
D3DDDI_RESERVEGPUVIRTUALADDRESS args{};
|
||||
args.hPagingQueue = handle;
|
||||
args.Size = size;
|
||||
args.MinimumAddress = minimum_address;
|
||||
args.MaximumAddress = maximum_address;
|
||||
|
||||
auto code = ReserveGpuVirtualAddress(&args);
|
||||
if (code == ErrorCode::Success)
|
||||
*out_addr = args.VirtualAddress;
|
||||
return code;
|
||||
}
|
||||
|
||||
inline ErrorCode FreeGpuVirtualAddress(FreeGpuVirtualAddressArgs *args) {
|
||||
return TranslateNtStatus(D3DKMTFreeGpuVirtualAddress(args));
|
||||
}
|
||||
|
||||
inline ErrorCode FreeGpuVirtualAddress(WinAdapterHandle handle,
|
||||
gpusize base_address,
|
||||
gpusize size) {
|
||||
FreeGpuVirtualAddressArgs args{};
|
||||
args.hAdapter = handle;
|
||||
args.Size = size;
|
||||
args.BaseAddress = base_address;
|
||||
return FreeGpuVirtualAddress(&args);
|
||||
}
|
||||
|
||||
inline ErrorCode MakeResident(D3DDDI_MAKERESIDENT *args) {
|
||||
return TranslateNtStatus(D3DKMTMakeResident(args));
|
||||
}
|
||||
|
||||
inline ErrorCode Evict(EvictArgs *args) {
|
||||
return TranslateNtStatus(D3DKMTEvict(args));
|
||||
}
|
||||
|
||||
inline ErrorCode ShareObjects(size_t num_allocations,
|
||||
WinResourceHandle resource,
|
||||
uint32_t flags,
|
||||
int* dmabuf_fd) {
|
||||
OBJECT_ATTRIBUTES obj_attr;
|
||||
HANDLE nt_handle;
|
||||
ErrorCode ret;
|
||||
|
||||
InitializeObjectAttributes(&obj_attr, nullptr, OBJ_INHERIT, nullptr, nullptr);
|
||||
ret = TranslateNtStatus(D3DKMTShareObjects(num_allocations,
|
||||
&resource, &obj_attr, flags, &nt_handle));
|
||||
if (ret == ErrorCode::Success)
|
||||
*dmabuf_fd = *(reinterpret_cast<int*>(&nt_handle));
|
||||
else
|
||||
*dmabuf_fd = -1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline ErrorCode QueryResourceInfoFromNtHandle(D3DKMT_QUERYRESOURCEINFOFROMNTHANDLE *args) {
|
||||
return TranslateNtStatus(D3DKMTQueryResourceInfoFromNtHandle(args));
|
||||
}
|
||||
|
||||
inline ErrorCode OpenResourceFromNtHandle(D3DKMT_OPENRESOURCEFROMNTHANDLE *args) {
|
||||
return TranslateNtStatus(D3DKMTOpenResourceFromNtHandle(args));
|
||||
}
|
||||
|
||||
} // namespace d3dthunk
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
|
||||
#endif
|
||||
@@ -1,101 +0,0 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimers in
|
||||
// the documentation and/or other materials provided with the distribution.
|
||||
// - Neither the names of Advanced Micro Devices, Inc,
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
// DEALINGS WITH THE SOFTWARE.
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef _WSL_INC_WDDM_TYPES_H_
|
||||
#define _WSL_INC_WDDM_TYPES_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <ntstatus.h>
|
||||
#include "inc/thunk_proxy/wddm_types.h"
|
||||
// windows wchar is 16bit, but linux is 32bit
|
||||
// seems libdxcore (not dxgkrnl.ko) convert thunk windows wchar to linux one
|
||||
// so only accept 32bit wchar args. note driver private data structure still
|
||||
// use 16bit wchar
|
||||
#define WCHAR wchar_t
|
||||
#define PCWSTR const wchar_t *
|
||||
#include <d3dkmthk.h>
|
||||
#undef WCHAR
|
||||
#undef PCWSTR
|
||||
|
||||
using gpusize = uint64_t; // Used to specify GPU addresses and sizes of GPU allocations
|
||||
using WinAllocationHandle = D3DKMT_HANDLE;
|
||||
using WinResourceHandle = D3DKMT_HANDLE;
|
||||
using WinContextHandle = D3DKMT_HANDLE;
|
||||
using WinDeviceHandle = D3DKMT_HANDLE;
|
||||
using WinAdapterHandle = D3DKMT_HANDLE;
|
||||
|
||||
//reference dk/winnt.h
|
||||
#define STANDARD_RIGHTS_REQUIRED (0x000F0000L)
|
||||
|
||||
//reference dk/ntdef.h
|
||||
#define OBJ_INHERIT (0x00000002L)
|
||||
typedef WCHAR *PWCHAR, *LPWCH, *PWCH;
|
||||
typedef struct _UNICODE_STRING {
|
||||
USHORT Length;
|
||||
USHORT MaximumLength;
|
||||
#ifdef MIDL_PASS
|
||||
[size_is(MaximumLength / 2), length_is((Length) / 2) ] USHORT * Buffer;
|
||||
#else // MIDL_PASS
|
||||
_Field_size_bytes_part_opt_(MaximumLength, Length) PWCH Buffer;
|
||||
#endif // MIDL_PASS
|
||||
} UNICODE_STRING;
|
||||
typedef UNICODE_STRING *PUNICODE_STRING;
|
||||
typedef const UNICODE_STRING *PCUNICODE_STRING;
|
||||
|
||||
typedef struct _OBJECT_ATTRIBUTES {
|
||||
ULONG Length;
|
||||
HANDLE RootDirectory;
|
||||
PUNICODE_STRING ObjectName;
|
||||
ULONG Attributes;
|
||||
PVOID SecurityDescriptor;
|
||||
PVOID SecurityQualityOfService;
|
||||
} OBJECT_ATTRIBUTES;
|
||||
#define InitializeObjectAttributes( p, n, a, r, s ) { \
|
||||
(p)->Length = sizeof( OBJECT_ATTRIBUTES ); \
|
||||
(p)->RootDirectory = r; \
|
||||
(p)->Attributes = a; \
|
||||
(p)->ObjectName = n; \
|
||||
(p)->SecurityDescriptor = s; \
|
||||
(p)->SecurityQualityOfService = NULL; \
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,86 +0,0 @@
|
||||
#ifndef _WSL_INC_WDDM_VA_MGR_H_
|
||||
#define _WSL_INC_WDDM_VA_MGR_H_
|
||||
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
#include "util/utils.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
class VaMgr {
|
||||
public:
|
||||
VaMgr(uint64_t start, uint64_t size, uint64_t min_align);
|
||||
~VaMgr();
|
||||
|
||||
/* Allocate `bytes` VA, if `align` is not zero, the returned address is aligned by `align`.
|
||||
* If `addr` parameter is not zero, try best to allocate VA from fixed address `addr`.
|
||||
*/
|
||||
uint64_t Alloc(uint64_t bytes, uint64_t align, uint64_t addr = 0);
|
||||
|
||||
void Free(uint64_t addr);
|
||||
|
||||
private:
|
||||
uint64_t AllocImpl(uint64_t bytes, uint64_t align);
|
||||
|
||||
struct Fragment {
|
||||
using ptr = std::multimap<uint64_t, uint64_t>::iterator;
|
||||
ptr free_list_entry_;
|
||||
|
||||
struct {
|
||||
uint64_t size : 63;
|
||||
bool is_free : 1;
|
||||
};
|
||||
|
||||
Fragment() : size(0), is_free(false) {}
|
||||
Fragment(ptr iterator, uint64_t len, bool is_free)
|
||||
: free_list_entry_(iterator), size(len), is_free(is_free) {}
|
||||
};
|
||||
|
||||
static inline Fragment make_fragment(typename Fragment::ptr iter, uint64_t len) {
|
||||
return {iter, len, true};
|
||||
}
|
||||
|
||||
inline Fragment make_fragment(uint64_t len) { return {free_list_.end(), len, false}; }
|
||||
|
||||
static inline bool is_free(const Fragment& f) { return f.is_free; }
|
||||
void set_used(Fragment& f) {
|
||||
f.is_free = false;
|
||||
f.free_list_entry_ = free_list_.end();
|
||||
}
|
||||
static void set_free(Fragment& f, typename Fragment::ptr iter) {
|
||||
f.free_list_entry_ = iter;
|
||||
f.is_free = true;
|
||||
}
|
||||
|
||||
inline void remove_free_list_entry(Fragment& frag) {
|
||||
if (frag.free_list_entry_ != free_list_.end()) {
|
||||
free_list_.erase(frag.free_list_entry_);
|
||||
frag.free_list_entry_ = free_list_.end();
|
||||
}
|
||||
}
|
||||
|
||||
inline void add_free_fragment(uint64_t size, uint64_t base) {
|
||||
auto it = free_list_.insert(std::make_pair(size, base));
|
||||
frag_map_[base] = make_fragment(it, size);
|
||||
}
|
||||
|
||||
inline void add_used_fragment(uint64_t size, uint64_t base) {
|
||||
frag_map_[base] = make_fragment(size);
|
||||
}
|
||||
// Indexed by size
|
||||
std::multimap<uint64_t, uint64_t> free_list_;
|
||||
// Indexed by VA, each fragment has no overlap
|
||||
std::map<uint64_t, Fragment> frag_map_;
|
||||
|
||||
uint64_t min_align_;
|
||||
|
||||
std::mutex lock_; // Mutex protecting allocation and free of va
|
||||
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(VaMgr);
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
} // namespace wsl
|
||||
#endif
|
||||
+2
-2
@@ -41,8 +41,8 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#include <cstdint>
|
||||
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "libhsakmt.h"
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetAMDGPUDeviceHandle(
|
||||
|
||||
+2
-2
@@ -32,8 +32,8 @@
|
||||
#include "hsakmt/hsakmt.h"
|
||||
#include "hsakmt/hsakmt_drm.h"
|
||||
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/device.h"
|
||||
|
||||
wsl::thunk::WDDMDevice* get_wddmdev(uint32_t node_id);
|
||||
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@
|
||||
#include <sys/mman.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <fcntl.h>
|
||||
#include "inc/wddm/gpu_memory.h"
|
||||
#include "impl/wddm/gpu_memory.h"
|
||||
#include "util/simple_heap.h"
|
||||
|
||||
struct Allocation {
|
||||
|
||||
+2
-2
@@ -24,8 +24,8 @@
|
||||
*/
|
||||
#include <cinttypes>
|
||||
#include "libhsakmt.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "inc/wddm/queue.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "impl/wddm/queue.h"
|
||||
#include "hsa-runtime/inc/amd_hsa_signal.h"
|
||||
|
||||
uint32_t get_vgpr_size_per_cu(HSA_ENGINE_ID id) {
|
||||
|
||||
Binair bestand niet weergegeven.
+1
-1
@@ -27,7 +27,7 @@
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
#include "libhsakmt.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "impl/wddm/device.h"
|
||||
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtGetClockCounters(HSAuint32 NodeId,
|
||||
HsaClockCounters *Counters) {
|
||||
|
||||
@@ -39,8 +39,8 @@
|
||||
#include <sys/sysinfo.h>
|
||||
|
||||
#include "libhsakmt.h"
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "util/utils.h"
|
||||
|
||||
/* Number of memory banks added by thunk on top of topology
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */
|
||||
|
||||
#include "inc/wddm/cmd_util.h"
|
||||
#include "impl/wddm/cmd_util.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
@@ -48,10 +48,10 @@
|
||||
#include <linux/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include "inc/wddm/status.h"
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "inc/wddm/queue.h"
|
||||
#include "impl/wddm/status.h"
|
||||
#include "impl/wddm/types.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "impl/wddm/queue.h"
|
||||
|
||||
namespace wsl {
|
||||
namespace thunk {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <cinttypes>
|
||||
#include <cassert>
|
||||
#include "inc/wddm/gpu_memory.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "impl/wddm/gpu_memory.h"
|
||||
#include "impl/wddm/device.h"
|
||||
#include "util/utils.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@@ -44,8 +44,8 @@
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
|
||||
#include "inc/wddm/queue.h"
|
||||
#include "inc/registers.h"
|
||||
#include "impl/wddm/queue.h"
|
||||
#include "impl/registers.h"
|
||||
#include "libhsakmt.h"
|
||||
|
||||
#include "hsa-runtime/inc/hsa.h"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include "inc/wddm/va_mgr.h"
|
||||
#include "impl/wddm/va_mgr.h"
|
||||
#include "libhsakmt.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Verwijs in nieuw issue
Block a user