2024-12-05 18:02:00 +08:00
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
//
|
|
|
|
|
// The University of Illinois/NCSA
|
|
|
|
|
// Open Source License (NCSA)
|
|
|
|
|
//
|
|
|
|
|
// Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Developed by:
|
|
|
|
|
//
|
|
|
|
|
// AMD Research and AMD HSA Software Development
|
|
|
|
|
//
|
|
|
|
|
// Advanced Micro Devices, Inc.
|
|
|
|
|
//
|
|
|
|
|
// www.amd.com
|
|
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to
|
|
|
|
|
// deal with the Software without restriction, including without limitation
|
|
|
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// - Redistributions of source code must retain the above copyright notice,
|
|
|
|
|
// this list of conditions and the following disclaimers.
|
|
|
|
|
// - Redistributions in binary form must reproduce the above copyright
|
|
|
|
|
// notice, this list of conditions and the following disclaimers in
|
|
|
|
|
// the documentation and/or other materials provided with the distribution.
|
|
|
|
|
// - Neither the names of Advanced Micro Devices, Inc,
|
|
|
|
|
// nor the names of its contributors may be used to endorse or promote
|
|
|
|
|
// products derived from this Software without specific prior written
|
|
|
|
|
// permission.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
|
|
|
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
|
|
|
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
|
// DEALINGS WITH THE SOFTWARE.
|
|
|
|
|
//
|
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
#ifndef _WSL_INC_WDDM_DEVICE_H_
|
|
|
|
|
#define _WSL_INC_WDDM_DEVICE_H_
|
|
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
|
#include <ntstatus.h>
|
|
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
#include "impl/wddm/types.h"
|
|
|
|
|
#include "impl/thunk_proxy/thunk_proxy.h"
|
|
|
|
|
#include "impl/wddm/va_mgr.h"
|
|
|
|
|
#include "impl/wddm/status.h"
|
|
|
|
|
#include "impl/wddm/types.h"
|
|
|
|
|
#include "impl/wddm/gpu_memory.h"
|
|
|
|
|
#include "impl/wddm/cmd_util.h"
|
|
|
|
|
|
|
|
|
|
namespace wsl {
|
|
|
|
|
namespace thunk {
|
|
|
|
|
|
|
|
|
|
//class Queue;
|
|
|
|
|
class WDDMQueue;
|
|
|
|
|
|
|
|
|
|
// WSL2 hyperv GPADL protocol limitation
|
|
|
|
|
#define MAX_USERPTR_BLOCK_SIZE 0xf0000000
|
|
|
|
|
#define START_NON_CANONICAL_ADDR (1ULL << 47)
|
|
|
|
|
#define END_NON_CANONICAL_ADDR (~0UL - (1UL << 47))
|
|
|
|
|
#define IS_OVERLAPPING(start1, size1, start2, size2) \
|
|
|
|
|
((start1 < (start2 + size2)) && (start2 < (start1 + size1)))
|
|
|
|
|
|
2025-11-06 17:31:47 +08:00
|
|
|
struct SegmentInfo {
|
|
|
|
|
uint32_t segment_id;
|
|
|
|
|
uint32_t segment_type; // 0=aperture, 1=gpu memory, 2=system memory
|
|
|
|
|
bool aperture;
|
|
|
|
|
bool system_memory;
|
|
|
|
|
uint64_t commit_limit;
|
|
|
|
|
|
|
|
|
|
SegmentInfo()
|
|
|
|
|
: segment_id(0), segment_type(0), aperture(false),
|
|
|
|
|
system_memory(false), commit_limit(0) {}
|
|
|
|
|
};
|
|
|
|
|
|
2024-12-05 18:02:00 +08:00
|
|
|
class WDDMDevice {
|
|
|
|
|
public:
|
|
|
|
|
static constexpr size_t GpuMemoryChunkSize = 2 * (1ULL << 30); // 2 GB
|
|
|
|
|
|
2025-06-25 12:08:35 +08:00
|
|
|
WDDMDevice(D3DKMT_HANDLE adapter, LUID adapter_luid, uint32_t node_id);
|
2024-12-05 18:02:00 +08:00
|
|
|
~WDDMDevice();
|
|
|
|
|
|
2025-06-25 12:08:35 +08:00
|
|
|
int NodeId() const { return node_id_; }
|
2024-12-05 18:02:00 +08:00
|
|
|
int Major() { return device_info_.major; }
|
|
|
|
|
int Minor() { return device_info_.minor; }
|
|
|
|
|
int Stepping() { return device_info_.stepping; }
|
|
|
|
|
bool IsDgpu() { return device_info_.is_dgpu; }
|
|
|
|
|
const char *ProductName() { return device_info_.product_name; }
|
2025-01-06 16:44:41 +08:00
|
|
|
uint64_t Uuid() { return device_info_.uuid; }
|
2025-04-10 16:21:17 +08:00
|
|
|
uint32_t GfxFamily() { return device_info_.family; }
|
2024-12-05 18:02:00 +08:00
|
|
|
uint32_t DeviceId() { return device_info_.device_id; }
|
|
|
|
|
uint32_t WavefrontSize() { return device_info_.wavefront_size; }
|
|
|
|
|
uint32_t ComputeUnitCount() { return device_info_.compute_unit_count; }
|
|
|
|
|
uint32_t MaxEngineClockMhz() { return device_info_.max_engine_clock_mhz; }
|
|
|
|
|
uint32_t WatchPointsNum() { return device_info_.watch_points_num; }
|
|
|
|
|
uint32_t PciBusAddr() { return device_info_.pci_bus_addr; }
|
|
|
|
|
|
|
|
|
|
uint32_t MemoryBusWidth() { return device_info_.memory_bus_width; }
|
|
|
|
|
uint32_t MaxMemoryClockMhz() { return device_info_.max_memory_clock_mhz; }
|
|
|
|
|
uint32_t WavePerCu() { return device_info_.wave_per_cu; }
|
|
|
|
|
uint32_t SimdPerCu() { return device_info_.simd_per_cu; }
|
|
|
|
|
uint32_t MaxScratchSlotsPerCu() { return device_info_.max_scratch_slots_per_cu; }
|
|
|
|
|
uint32_t NumShaderEngine() { return device_info_.num_shader_engine; }
|
|
|
|
|
uint32_t ShaderArrayPerShaderEngine() { return device_info_.shader_array_per_shader_engine; }
|
|
|
|
|
uint32_t NumSdmaEngine() { return device_info_.sdma_schedid.size(); }
|
|
|
|
|
uint32_t Domain() { return device_info_.domain; }
|
|
|
|
|
uint32_t NumGws() { return device_info_.num_gws; }
|
|
|
|
|
uint32_t AsicRevision() { return device_info_.asic_revision; }
|
|
|
|
|
uint64_t LocalHeapSize() { return device_info_.local_visible_heap_size + device_info_.local_invisible_heap_size; }
|
|
|
|
|
uint64_t LocalVisibleHeapSize() { return device_info_.local_visible_heap_size; }
|
|
|
|
|
uint64_t LocalInvisibleHeapSize() { return device_info_.local_invisible_heap_size; }
|
2025-10-27 10:59:00 +08:00
|
|
|
uint64_t NonLocalHeapSize() { return device_info_.non_local_heap_size; }
|
2024-12-05 18:02:00 +08:00
|
|
|
uint64_t PrivateApertureBase() { return device_info_.private_aperture_base; }
|
|
|
|
|
uint64_t PrivateApertureSize() { return device_info_.private_aperture_size; }
|
|
|
|
|
uint64_t SharedApertureBase() { return device_info_.shared_aperture_base; }
|
|
|
|
|
uint64_t SharedApertureSize() { return device_info_.shared_aperture_size; }
|
|
|
|
|
uint32_t LdsSize() { return device_info_.lds_size; }
|
|
|
|
|
uint64_t GPUCounterFrequency() { return device_info_.gpu_counter_frequency; }
|
|
|
|
|
uint32_t GetSwsQueueSize(void) const { return device_info_.user_queue_size; }
|
|
|
|
|
uint32_t GetMecFwVersion() { return device_info_.mec_fw_version; }
|
|
|
|
|
uint32_t GetSdmaFwVersion() { return device_info_.sdma_fw_version; }
|
|
|
|
|
uint32_t GetL1CacheSize() { return device_info_.l1_cache_size; }
|
|
|
|
|
uint32_t GetL2CacheSize() { return device_info_.l2_cache_size; }
|
|
|
|
|
uint32_t GetL3CacheSize() { return device_info_.l3_cache_size; }
|
|
|
|
|
uint32_t Gl2CacheLineSize() { return device_info_.gl2_cacheline_size; }
|
|
|
|
|
bool SupportStateShadowingByCpFw(void) const { return device_info_.state_shadowing_by_cpfw; }
|
|
|
|
|
bool SupportPlatformAtomic(void) const { return device_info_.platform_atomic_support; }
|
|
|
|
|
uint32_t GetSdmaEngine(uint32_t idx) {
|
|
|
|
|
assert(idx < NumSdmaEngine());
|
|
|
|
|
return device_info_.sdma_schedid[idx];
|
|
|
|
|
}
|
|
|
|
|
uint32_t GetComputeEngine() { return device_info_.compute_schedid; }
|
|
|
|
|
|
|
|
|
|
uint64_t VramAvail();
|
|
|
|
|
|
|
|
|
|
void GetClockCounters(uint64_t *gpu, uint64_t *cpu);
|
|
|
|
|
uint32_t GetNumCpQueues() { return device_info_.num_cp_queues; }
|
|
|
|
|
|
|
|
|
|
bool CreateSyncobj(D3DKMT_HANDLE *handle, uint64_t **addr);
|
|
|
|
|
void DestroySyncobj(D3DKMT_HANDLE handle);
|
|
|
|
|
|
|
|
|
|
bool CreateQueue(WDDMQueue *queue);
|
|
|
|
|
void DestroyQueue(WDDMQueue *queue);
|
|
|
|
|
bool CreateHwQueue(WDDMQueue *queue);
|
|
|
|
|
bool DestroyHwQueue(WDDMQueue *queue);
|
|
|
|
|
bool SubmitToSwQueue(WDDMQueue *queue, uint64_t command_addr,
|
|
|
|
|
uint64_t command_size, uint64_t fence_value);
|
|
|
|
|
bool SubmitToHwQueue(WDDMQueue *queue, uint64_t command_addr,
|
|
|
|
|
uint64_t command_size, uint64_t fence_value);
|
|
|
|
|
|
|
|
|
|
bool WaitPagingFence(WDDMQueue *queue) {
|
|
|
|
|
uint64_t value = page_fence_value_;
|
|
|
|
|
|
|
|
|
|
if (*page_fence_addr_ < value &&
|
|
|
|
|
!GpuWait(queue, &page_syncobj_, &value, 1))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool GpuWait(WDDMQueue *queue, const D3DKMT_HANDLE *syncobjs,
|
|
|
|
|
uint64_t *values, int count);
|
|
|
|
|
bool GpuSignal(D3DKMT_HANDLE context, const D3DKMT_HANDLE *syncobjs,
|
|
|
|
|
uint64_t *value, int count);
|
|
|
|
|
bool CpuWait(const D3DKMT_HANDLE *syncobjs, uint64_t *value,
|
|
|
|
|
int count, bool wait_any);
|
|
|
|
|
bool WaitOnPagingFenceFromCpu();
|
|
|
|
|
|
|
|
|
|
uint32_t LdsBlocks(const hsa_kernel_dispatch_packet_t *pkt);
|
|
|
|
|
uint32_t GetCmdbufSize(void) const { return cmdbuf_size_; }
|
|
|
|
|
uint32_t GetAqlFrameSize(void) const { return cmdbuf_aql_frame_size_; }
|
|
|
|
|
static uint32_t GetAqlFrameNum(void) { return cmdbuf_aql_frame_num_; }
|
|
|
|
|
|
|
|
|
|
// Both legacy HWS and stage 1 HWS use KMD to alloc use queue memory,
|
|
|
|
|
// return false by default
|
|
|
|
|
bool AllocUserQueueMemFromUMD(void) const { return false; }
|
|
|
|
|
|
|
|
|
|
bool IsHwsEnabled(int engine) {
|
|
|
|
|
return thunk_proxy::GetHwsEnabled(engine, &device_info_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void UpdatePageFence(uint64_t fence_value);
|
|
|
|
|
|
|
|
|
|
D3DKMT_HANDLE PagingQueue() const { return page_queue_; }
|
|
|
|
|
D3DKMT_HANDLE PagingFence() const { return page_syncobj_; }
|
|
|
|
|
D3DKMT_HANDLE DeviceHandle() const { return device_; }
|
|
|
|
|
LUID GetLuid() const { return adapter_luid_; }
|
2025-06-27 09:08:19 +08:00
|
|
|
D3DKMT_HANDLE GetAdapter() const { return adapter_; }
|
2024-12-05 18:02:00 +08:00
|
|
|
|
|
|
|
|
const thunk_proxy::DeviceInfo& DeviceInfo() const { return device_info_; }
|
|
|
|
|
|
2025-07-05 12:38:07 +08:00
|
|
|
ErrorCode CreateGpuMemory(const GpuMemoryCreateInfo &create_info, GpuMemory **gpu_mem, gpusize *gpu_va = nullptr);
|
2024-12-05 18:02:00 +08:00
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
bool ParseDeviceInfo(void);
|
|
|
|
|
void DestroyDeviceInfo(void);
|
|
|
|
|
bool CreateDevice(void);
|
|
|
|
|
bool DestroyDevice(void);
|
|
|
|
|
bool CreatePagingQueue(void);
|
|
|
|
|
bool DestroyPagingQueue(void);
|
|
|
|
|
void *Lock(D3DKMT_HANDLE handle);
|
|
|
|
|
bool Unlock(D3DKMT_HANDLE handle);
|
|
|
|
|
bool CreateContext(int engine, D3DKMT_HANDLE *handle);
|
|
|
|
|
bool DestroyContext(D3DKMT_HANDLE handle);
|
|
|
|
|
|
|
|
|
|
void SetPowerOptimization(bool restore);
|
|
|
|
|
void InitCmdbufInfo(void);
|
|
|
|
|
|
2025-11-06 17:31:47 +08:00
|
|
|
bool QuerySegmentInfo();
|
|
|
|
|
bool GetSegmentId(D3DKMT_QUERYSTATISTICS_SEGMENT_TYPE segment_type, uint32_t &segment_id);
|
|
|
|
|
|
2024-12-05 18:02:00 +08:00
|
|
|
D3DKMT_HANDLE adapter_;
|
|
|
|
|
LUID adapter_luid_;
|
|
|
|
|
D3DKMT_HANDLE device_;
|
|
|
|
|
|
|
|
|
|
D3DKMT_HANDLE page_queue_;
|
|
|
|
|
D3DKMT_HANDLE page_syncobj_;
|
|
|
|
|
uint64_t *page_fence_addr_;
|
|
|
|
|
std::atomic<uint64_t> page_fence_value_;
|
|
|
|
|
|
|
|
|
|
uint32_t cmdbuf_size_;
|
|
|
|
|
uint32_t cmdbuf_aql_frame_size_;
|
|
|
|
|
static const uint32_t cmdbuf_aql_frame_num_;
|
2025-06-25 12:08:35 +08:00
|
|
|
uint32_t node_id_;
|
2024-12-05 18:02:00 +08:00
|
|
|
// device info
|
|
|
|
|
thunk_proxy::DeviceInfo device_info_;
|
2025-11-06 17:31:47 +08:00
|
|
|
std::vector<struct SegmentInfo> segment_infos_;
|
2024-12-05 18:02:00 +08:00
|
|
|
//CmdUtil cmd_util;
|
|
|
|
|
};
|
|
|
|
|
|
2025-07-25 11:42:16 +08:00
|
|
|
NTSTATUS WDDMCreateDevices(std::vector<WDDMDevice *> &devices);
|
2024-12-05 18:02:00 +08:00
|
|
|
|
|
|
|
|
} // namespace thunk
|
|
|
|
|
} // namespace wsl
|
|
|
|
|
|
|
|
|
|
#endif
|