P4 to Git Change 1182228 by gandryey@gera-dev-w7 on 2015/08/19 12:22:39
EPR #419072 - [OpenCL2.0] Enable 16MB large on device queues - Add mask_groups argument into the library for the empty slot spreading Affected files ... ... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/devenq.h#11 edit ... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/enqueue.cl#10 edit ... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/eprep.cl#6 edit ... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/events.cl#4 edit ... //depot/stg/opencl/drivers/opencl/library/hsa/hsail/src/devenq/schedule.cl#11 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpudevice.cpp#520 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#18 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#373 edit
This commit is contained in:
@@ -489,7 +489,7 @@ void NullDevice::fillDeviceInfo(
|
||||
info_.queueOnDeviceProperties_ =
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE;
|
||||
info_.queueOnDevicePreferredSize_ = 256 * Ki;
|
||||
info_.queueOnDeviceMaxSize_ = 12 * Mi;
|
||||
info_.queueOnDeviceMaxSize_ = 16 * Mi;
|
||||
info_.maxOnDeviceQueues_ = 1;
|
||||
info_.maxOnDeviceEvents_ = settings().numDeviceEvents_;
|
||||
info_.globalVariablePreferredTotalSize_ = static_cast<size_t>(info_.globalMemSize_);
|
||||
|
||||
@@ -28,7 +28,7 @@ struct AmdVQueueHeader {
|
||||
uint32_t command_counter; //!< [LRW] The global counter for the submitted commands into the queue
|
||||
uint32_t wait_size; //!< [LRO] The wait list size (in clk_event_t)
|
||||
uint32_t arg_size; //!< [LRO] The size of argument buffer (in bytes)
|
||||
uint32_t reserved0; //!< For the future usage
|
||||
uint32_t mask_groups; //!< Processed mask groups by one thread
|
||||
uint64_t kernel_table; //!< [LRO] Pointer to an array with all kernel objects (ulong for each entry)
|
||||
uint32_t reserved[2]; //!< For the future usage
|
||||
};
|
||||
@@ -70,8 +70,7 @@ struct SchedulerParam {
|
||||
uint64_t parentAQL; //!< Host parent AmdAqlWrap packet
|
||||
uint32_t dedicatedQueue; //!< Scheduler uses a dedicated queue
|
||||
uint32_t scratchOffset; //!< Scratch buffer offset
|
||||
uint32_t mask_groups; //!< Processed mask groups by one thread
|
||||
uint32_t reserved; //!< Reserved
|
||||
uint32_t reserved[2]; //!< Reserved
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
|
||||
@@ -358,6 +358,7 @@ VirtualGPU::createVirtualQueue(uint deviceQueueSize)
|
||||
header->aql_slot_mask = vaBase + slotMaskOffs;
|
||||
header->wait_size = dev().settings().numWaitEvents_;
|
||||
header->arg_size = dev().info().maxParameterSize_ + 64;
|
||||
header->mask_groups = maskGroups_;
|
||||
vqHeader_ = new AmdVQueueHeader;
|
||||
if (NULL == vqHeader_) {
|
||||
return false;
|
||||
@@ -1938,7 +1939,6 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
param->parentAQL = vmParentWrap;
|
||||
param->dedicatedQueue = dev().settings().useDeviceQueue_;
|
||||
param->useATC = dev().settings().svmFineGrainSystem_;
|
||||
param->mask_groups = maskGroups_;
|
||||
|
||||
// Fill the scratch buffer information
|
||||
if (hsaKernel.prog().maxScratchRegs() > 0) {
|
||||
|
||||
Reference in New Issue
Block a user