P4 to Git Change 1095935 by bwicakso@opencl-hsa-stg-bwicakso on 2014/11/11 16:00:10
ECR #333755 - Part 2- Update to foundation spec 1.0 20141019:
- hsa_dispatch_packet_t now becomes hsa_kernel_dispatch_packet_t
- all bit mask in a struct are removed and replaced by enums that indicates the bit position and width.
Test: TC precheckin
Review: Hari, Fan, Shucai, German, Yunjun.
Affected files ...
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#268 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#103 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#338 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.hpp#12 edit
[ROCm/clr commit: c7988f7209]
Bu işleme şunda yer alıyor:
@@ -3791,10 +3791,13 @@ WriteAqlArg(
|
||||
*dst += size;
|
||||
}
|
||||
|
||||
const hsa_packet_header_t DISPATCH_PACKET_HEADER = {
|
||||
HSA_PACKET_TYPE_DISPATCH, 1, HSA_FENCE_SCOPE_SYSTEM, HSA_FENCE_SCOPE_COMPONENT, 0 };
|
||||
const uint16_t kDispatchPacketHeader =
|
||||
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
|
||||
(1 << HSA_PACKET_HEADER_BARRIER) |
|
||||
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
|
||||
(HSA_FENCE_SCOPE_COMPONENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
|
||||
|
||||
hsa_dispatch_packet_t*
|
||||
hsa_kernel_dispatch_packet_t*
|
||||
HSAILKernel::loadArguments(
|
||||
VirtualGPU& gpu,
|
||||
const amd::Kernel& kernel,
|
||||
@@ -4028,7 +4031,8 @@ HSAILKernel::loadArguments(
|
||||
"Size and the number of arguments don't match!");
|
||||
uint argBufSize = amd::alignUp(
|
||||
static_cast<uint>(argsBufferSize()), sizeof(uint32_t));
|
||||
hsa_dispatch_packet_t* hsaDisp = reinterpret_cast<hsa_dispatch_packet_t*>(
|
||||
hsa_kernel_dispatch_packet_t* hsaDisp =
|
||||
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
|
||||
gpu.cb(0)->sysMemCopy() + argBufSize);
|
||||
|
||||
amd::NDRange local(sizes.local());
|
||||
@@ -4037,9 +4041,8 @@ HSAILKernel::loadArguments(
|
||||
// Check if runtime has to find local workgroup size
|
||||
findLocalWorkSize(sizes.dimensions(), sizes.global(), local);
|
||||
|
||||
hsaDisp->header = DISPATCH_PACKET_HEADER;
|
||||
hsaDisp->dimensions = sizes.dimensions();
|
||||
hsaDisp->reserved = 0;
|
||||
hsaDisp->header = kDispatchPacketHeader;
|
||||
hsaDisp->setup = sizes.dimensions();
|
||||
|
||||
hsaDisp->workgroup_size_x = local[0];
|
||||
hsaDisp->workgroup_size_y = (sizes.dimensions() > 1) ? local[1] : 1;
|
||||
@@ -4053,14 +4056,14 @@ HSAILKernel::loadArguments(
|
||||
// Initialize kernel ISA and execution buffer requirements
|
||||
hsaDisp->private_segment_size = spillSegSize();
|
||||
hsaDisp->group_segment_size = ldsAddress - ldsSize();
|
||||
hsaDisp->kernel_object_address = gpuAqlCode()->vmAddress();
|
||||
hsaDisp->kernel_object = gpuAqlCode()->vmAddress();
|
||||
|
||||
ConstBuffer* cb = gpu.constBufs_[0];
|
||||
cb->uploadDataToHw(argBufSize + sizeof(hsa_dispatch_packet_t));
|
||||
cb->uploadDataToHw(argBufSize + sizeof(hsa_kernel_dispatch_packet_t));
|
||||
uint64_t argList = cb->vmAddress() + cb->wrtOffset();
|
||||
|
||||
hsaDisp->kernarg_address = argList;
|
||||
hsaDisp->reserved3 = 0;
|
||||
hsaDisp->kernarg_address = reinterpret_cast<void*>(argList);
|
||||
hsaDisp->reserved2 = 0;
|
||||
hsaDisp->completion_signal = 0;
|
||||
|
||||
memList.push_back(cb);
|
||||
|
||||
@@ -885,7 +885,7 @@ public:
|
||||
|
||||
//! Returns AQL packet in CPU memory
|
||||
//! if the kerenl arguments were successfully loaded, otherwise NULL
|
||||
hsa_dispatch_packet_t* loadArguments(
|
||||
hsa_kernel_dispatch_packet_t* loadArguments(
|
||||
VirtualGPU& gpu, //!< Running GPU context
|
||||
const amd::Kernel& kernel, //!< AMD kernel object
|
||||
const amd::NDRangeContainer& sizes, //!< NDrange container
|
||||
|
||||
@@ -47,7 +47,7 @@ struct AmdAqlWrap {
|
||||
uint64_t wait_list; //!< [LRO/SRO] Pointer to an array of clk_event_t objects (64 bytes default)
|
||||
uint32_t wait_num; //!< [LWO/SRO] The number of cl_event_wait objects
|
||||
uint32_t reserved[5]; //!< For the future usage
|
||||
hsa_dispatch_packet_t aql; //!< [LWO/SRO] AQL packet – 64 bytes AQL packet
|
||||
hsa_kernel_dispatch_packet_t aql; //!< [LWO/SRO] AQL packet – 64 bytes AQL packet
|
||||
};
|
||||
|
||||
struct AmdEvent {
|
||||
|
||||
@@ -347,7 +347,7 @@ VirtualGPU::createVirtualQueue(uint deviceQueueSize)
|
||||
AmdAqlWrap* slots = reinterpret_cast<AmdAqlWrap*>(&header[1]);
|
||||
for (uint i = 0; i < numSlots; ++i) {
|
||||
uint64_t argStart = vaBase + argOffs + i * singleArgSize;
|
||||
slots[i].aql.kernarg_address = argStart;
|
||||
slots[i].aql.kernarg_address = reinterpret_cast<void*>(argStart);
|
||||
slots[i].wait_list = argStart + dev().info().maxParameterSize_ + 64;
|
||||
}
|
||||
// Upload data back to local memory
|
||||
@@ -1735,7 +1735,7 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
}
|
||||
|
||||
// Program the kernel arguments for the GPU execution
|
||||
hsa_dispatch_packet_t* aqlPkt =
|
||||
hsa_kernel_dispatch_packet_t* aqlPkt =
|
||||
hsaKernel.loadArguments(*this, kernel, sizes, parameters, nativeMem,
|
||||
vmDefQueue, &vmParentWrap, memList);
|
||||
if (NULL == aqlPkt) {
|
||||
@@ -1812,7 +1812,7 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
uint64_t* kernels = (uint64_t*)(
|
||||
const_cast<Memory*>(hsaKernel.prog().kernelTable())->map(this));
|
||||
for (j = 0; j < hsaKernel.prog().kernels().size(); ++j) {
|
||||
if (kernels[j] == wraps[i].aql.kernel_object_address) {
|
||||
if (kernels[j] == wraps[i].aql.kernel_object) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1828,7 +1828,9 @@ VirtualGPU::submitKernelInternalHSA(
|
||||
printf("Error: couldn't find child kernel!\n");
|
||||
continue;
|
||||
}
|
||||
uint offsArg = wraps[i].aql.kernarg_address -
|
||||
const uint64_t kernarg_address =
|
||||
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(wraps[i].aql.kernarg_address));
|
||||
uint offsArg = kernarg_address -
|
||||
gpuDefQueue->virtualQueue_->vmAddress();
|
||||
address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
|
||||
print << "Kernel: " << child->name() << "\n";
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle