Files
rocm-systems/inc/thunk_proxy/thunk_proxy.h
T
2025-11-05 18:53:35 +08:00

129 wiersze
3.6 KiB
C++

#ifndef _WSL_INC_THUNK_PROXY_H_
#define _WSL_INC_THUNK_PROXY_H_
#include <vector>
namespace thunk_proxy {
enum AllocDomain {
kSystem,
kLocal,
kUserMemory,
kUserQueue,
kDomainCount,
};
enum MemFlag {
kFineGrain = (1ULL << 0),
kKernarg = (1ULL << 1),
};
enum EngineFlag {
KCOMPUTE0 = (1ULL << 0),
KDRMDMA = (1ULL << 1),
KDRMDMA1 = (1ULL << 2),
};
enum SchedLevel {
kLow = 0,
kNormal = 1,
kHigh = 2,
};
enum AsicFamilyType {
kPlumBONITO,
kNavi44,
kNavi48
};
struct HwsInfo {
union {
struct {
uint32_t gfxHwsEnabled : 1;
uint32_t computeHwsEnabled : 1;
uint32_t dmaHwsEnabled : 1;
uint32_t dma1HwsEnabled : 1;
uint32_t reserved : 28;
} hwsMask;
uint32_t osHwsEnableFlags;
};
uint64_t engineOrdinalMask; // Indicates which engines (by ordinal) support MES HWS
};
typedef struct {
int major;
int minor;
int stepping;
bool is_dgpu;
char product_name[MAX_PATH];
const char *uuid;
AsicFamilyType family;
uint32_t device_id;
uint32_t wavefront_size;
uint32_t compute_unit_count;
uint32_t max_engine_clock_mhz;
uint32_t watch_points_num;
uint32_t pci_bus_addr;
uint32_t memory_bus_width;
uint32_t max_memory_clock_mhz;
uint64_t gpu_counter_frequency;
uint32_t wave_per_cu;
uint32_t simd_per_cu;
uint32_t max_scratch_slots_per_cu;
uint32_t num_shader_engine;
uint32_t shader_array_per_shader_engine;
uint32_t domain;
uint32_t num_gws;
uint32_t asic_revision;
uint64_t local_visible_heap_size;
uint64_t local_invisible_heap_size;
uint64_t private_aperture_base;
uint64_t private_aperture_size;
uint64_t shared_aperture_base;
uint64_t shared_aperture_size;
uint32_t user_queue_size;
uint32_t lds_size;
uint32_t big_page_alignment_size;
uint32_t hw_big_page_min_alignment_size;
uint32_t hw_big_page_alignment_size;
bool enable_big_page_alignment;
uint32_t mec_fw_version;
uint32_t sdma_fw_version;
uint32_t l1_cache_size;
uint32_t l2_cache_size;
uint32_t l3_cache_size;
uint32_t gl2_cacheline_size;
uint32_t num_cp_queues;
HwsInfo hwsInfo;
std::vector<int> sdma_schedid;
uint32_t compute_schedid;
bool state_shadowing_by_cpfw;
bool platform_atomic_support;
void *adapter_info;
void *adapter_ex_info;
} DeviceInfo;
int EngineOrdinal(int engine, DeviceInfo *device_info);
bool GetHwsEnabled(int engine, DeviceInfo *device_info);
bool ShouldDisableGpuTimeout(int engine, DeviceInfo *device_info);
bool ParseAdapterInfo(D3DKMT_HANDLE adapter, DeviceInfo *device_info);
bool QueryAdapterSupported(D3DKMT_HANDLE adapter);
uint32_t QueueEngine2EngineFlag(uint32_t queue_engine);
void SetAllocationInfo(void *data, uint64_t size, AllocDomain domain,
uint64_t addr, uint32_t mem_flags, uint32_t engine_flag, const DeviceInfo &device_info);
bool CreatePrivateAllocInfo(int num_handles, void **ppdrv_priv, void **ppalloc_priv,
int *pdrv_priv_data_size, int *palloc_priv_data_size);
void DestroyPrivateAllocInfo(void *drv_priv, void *alloc_priv);
int CreateSubmitPrivData(void **priv_data, D3DKMT_HANDLE queue, uint64_t command_addr,
uint64_t command_size, bool is_hw_queue);
int CreateHwQueuePrivData(void **priv_data, D3DKMT_HANDLE context,
bool FwManagedGfxState, SchedLevel level = kNormal);
int CreateContextPrivData(void **priv_data, bool FwManagedGfxState);
int CreatePowerOptPrivData(void **priv_data, bool restore);
int CreateCalibratedTimestampsPrivData(void **priv_data);
void QueryCalibratedTimestamps(void* priv, uint64_t* gpu, uint64_t* cpu);
void DestroyPrivData(void *priv_data);
}
#endif