Dosyalar
rocm-systems/rocclr/runtime/device/gpu/gpusched.hpp
T
foreman 4e5c499ace P4 to Git Change 1182228 by gandryey@gera-dev-w7 on 2015/08/19 12:22:39
EPR #419072 - [OpenCL2.0] Enable 16MB large on device queues
	- Add mask_groups argument into the library for the empty slot spreading

Affected files ...

... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/devenq.h#11 edit
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/enqueue.cl#10 edit
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/eprep.cl#6 edit
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/events.cl#4 edit
... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/schedule.cl#11 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#520 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#18 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#373 edit
2015-08-19 12:37:48 -04:00

79 satır
3.9 KiB
C++
Ham Suçlama Geçmiş

Bu dosya muğlak Evrensel Kodlu karakter içeriyor
Bu dosya, başka karakterlerle karıştırılabilecek evrensel kodlu karakter içeriyor. Eğer bunu kasıtlı olarak yaptıysanız bu uyarıyı yok sayabilirsiniz. Gizli karakterleri göstermek için Kaçış Karakterli düğmesine tıklayın.
//
// Copyright (c) 2010 Advanced Micro Devices, Inc. All rights reserved.
//
#ifndef GPUSCHED_HPP_
#define GPUSCHED_HPP_
#include "hsa.h"
namespace gpu {
//! AmdAqlWrap slot state
enum AqlWrapState {
AQL_WRAP_FREE = 0,
AQL_WRAP_RESERVED,
AQL_WRAP_READY,
AQL_WRAP_MARKER,
AQL_WRAP_BUSY,
AQL_WRAP_DONE
};
struct AmdVQueueHeader {
uint32_t aql_slot_num; //!< [LRO/SRO] The total number of the AQL slots (multiple of 64).
uint32_t event_slot_num; //!< [LRO] The number of kernel events in the events buffer
uint64_t event_slot_mask; //!< [LRO] A pointer to the allocation bitmask array for the events
uint64_t event_slots; //!< [LRO] Pointer to a buffer for the events.
// Array of event_slot_num entries of AmdEvent
uint64_t aql_slot_mask; //!< [LRO/SRO]A pointer to the allocation bitmask for aql_warp slots
uint32_t command_counter; //!< [LRW] The global counter for the submitted commands into the queue
uint32_t wait_size; //!< [LRO] The wait list size (in clk_event_t)
uint32_t arg_size; //!< [LRO] The size of argument buffer (in bytes)
uint32_t mask_groups; //!< Processed mask groups by one thread
uint64_t kernel_table; //!< [LRO] Pointer to an array with all kernel objects (ulong for each entry)
uint32_t reserved[2]; //!< For the future usage
};
struct AmdAqlWrap {
uint32_t state; //!< [LRW/SRW] The current state of the AQL wrapper: FREE, RESERVED, READY,
// MARKER, BUSY and DONE. The block could be returned back to a free state.
uint32_t enqueue_flags; //!< [LWO/SRO] Contains the flags for the kernel execution start
uint32_t command_id; //!< [LWO/SRO] The unique command ID
uint32_t child_counter; //!< [LRW/SRW] Counter that determine the launches of child kernels.
// It’s incremented on the
// start and decremented on the finish. The parent kernel can be considered as
// done when the value is 0 and the state is DONE
uint64_t completion; //!< [LWO/SRO] CL event for the current execution (clk_event_t)
uint64_t parent_wrap; //!< [LWO/SRO] Pointer to the parent AQL wrapper (AmdAqlWrap*)
uint64_t wait_list; //!< [LRO/SRO] Pointer to an array of clk_event_t objects (64 bytes default)
uint32_t wait_num; //!< [LWO/SRO] The number of cl_event_wait objects
uint32_t reserved[5]; //!< For the future usage
hsa_kernel_dispatch_packet_t aql; //!< [LWO/SRO] AQL packet 64 bytes AQL packet
};
struct AmdEvent {
uint32_t state; //!< [LRO/SRW] Event state: START, END, COMPLETE
uint32_t counter; //!< [LRW] Event retain/release counter. 0 means the event is free
uint64_t timer[3]; //!< [LRO/SWO] Timer values for profiling for each state
uint64_t captureInfo; //!< [LRW/SRO] Profiling capture info for CLK_PROFILING_COMMAND_EXEC_TIME
};
struct SchedulerParam {
uint32_t signal; //!< Signal to stop the child queue(address must be 16 bytes aligned)
uint32_t eng_clk; //!< Engine clock in Mhz
uint64_t hw_queue; //!< Address to HW queue
uint64_t hsa_queue; //!< Address to HSA dummy queue
uint32_t useATC; //!< GPU access to shader program by ATC.
uint32_t scratchSize; //!< Scratch buffer size
uint64_t scratch; //!< GPU address to the scratch buffer
uint32_t numMaxWaves; //!< The max number of possible waves
uint32_t releaseHostCP; //!< Releases CP on the host queue
uint64_t parentAQL; //!< Host parent AmdAqlWrap packet
uint32_t dedicatedQueue; //!< Scheduler uses a dedicated queue
uint32_t scratchOffset; //!< Scratch buffer offset
uint32_t reserved[2]; //!< Reserved
};
} // namespace gpu
#endif