P4 to Git Change 1095935 by bwicakso@opencl-hsa-stg-bwicakso on 2014/11/11 16:00:10

ECR #333755 - Part 2- Update to foundation spec 1.0 20141019:
	- hsa_dispatch_packet_t now becomes hsa_kernel_dispatch_packet_t
	- all bit mask in a struct are removed and replaced by enums that indicates the bit position and width.

	Test: TC precheckin

	Review: Hari, Fan, Shucai, German, Yunjun.

Affected files ...

... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.cpp#268 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpukernel.hpp#103 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpusched.hpp#15 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuvirtual.cpp#338 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/hsa_foundation/hsavirtual.hpp#12 edit


[ROCm/clr commit: c7988f7209]
Bu işleme şunda yer alıyor:
foreman
2014-11-11 16:27:13 -05:00
ebeveyn a3cd8fcbb3
işleme 91b247014f
4 değiştirilmiş dosya ile 22 ekleme ve 17 silme
+14 -11
Dosyayı Görüntüle
@@ -3791,10 +3791,13 @@ WriteAqlArg(
*dst += size;
}
const hsa_packet_header_t DISPATCH_PACKET_HEADER = {
HSA_PACKET_TYPE_DISPATCH, 1, HSA_FENCE_SCOPE_SYSTEM, HSA_FENCE_SCOPE_COMPONENT, 0 };
const uint16_t kDispatchPacketHeader =
(HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE) |
(1 << HSA_PACKET_HEADER_BARRIER) |
(HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE) |
(HSA_FENCE_SCOPE_COMPONENT << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE);
hsa_dispatch_packet_t*
hsa_kernel_dispatch_packet_t*
HSAILKernel::loadArguments(
VirtualGPU& gpu,
const amd::Kernel& kernel,
@@ -4028,7 +4031,8 @@ HSAILKernel::loadArguments(
"Size and the number of arguments don't match!");
uint argBufSize = amd::alignUp(
static_cast<uint>(argsBufferSize()), sizeof(uint32_t));
hsa_dispatch_packet_t* hsaDisp = reinterpret_cast<hsa_dispatch_packet_t*>(
hsa_kernel_dispatch_packet_t* hsaDisp =
reinterpret_cast<hsa_kernel_dispatch_packet_t*>(
gpu.cb(0)->sysMemCopy() + argBufSize);
amd::NDRange local(sizes.local());
@@ -4037,9 +4041,8 @@ HSAILKernel::loadArguments(
// Check if runtime has to find local workgroup size
findLocalWorkSize(sizes.dimensions(), sizes.global(), local);
hsaDisp->header = DISPATCH_PACKET_HEADER;
hsaDisp->dimensions = sizes.dimensions();
hsaDisp->reserved = 0;
hsaDisp->header = kDispatchPacketHeader;
hsaDisp->setup = sizes.dimensions();
hsaDisp->workgroup_size_x = local[0];
hsaDisp->workgroup_size_y = (sizes.dimensions() > 1) ? local[1] : 1;
@@ -4053,14 +4056,14 @@ HSAILKernel::loadArguments(
// Initialize kernel ISA and execution buffer requirements
hsaDisp->private_segment_size = spillSegSize();
hsaDisp->group_segment_size = ldsAddress - ldsSize();
hsaDisp->kernel_object_address = gpuAqlCode()->vmAddress();
hsaDisp->kernel_object = gpuAqlCode()->vmAddress();
ConstBuffer* cb = gpu.constBufs_[0];
cb->uploadDataToHw(argBufSize + sizeof(hsa_dispatch_packet_t));
cb->uploadDataToHw(argBufSize + sizeof(hsa_kernel_dispatch_packet_t));
uint64_t argList = cb->vmAddress() + cb->wrtOffset();
hsaDisp->kernarg_address = argList;
hsaDisp->reserved3 = 0;
hsaDisp->kernarg_address = reinterpret_cast<void*>(argList);
hsaDisp->reserved2 = 0;
hsaDisp->completion_signal = 0;
memList.push_back(cb);
+1 -1
Dosyayı Görüntüle
@@ -885,7 +885,7 @@ public:
//! Returns AQL packet in CPU memory
//! if the kerenl arguments were successfully loaded, otherwise NULL
hsa_dispatch_packet_t* loadArguments(
hsa_kernel_dispatch_packet_t* loadArguments(
VirtualGPU& gpu, //!< Running GPU context
const amd::Kernel& kernel, //!< AMD kernel object
const amd::NDRangeContainer& sizes, //!< NDrange container
+1 -1
Dosyayı Görüntüle
@@ -47,7 +47,7 @@ struct AmdAqlWrap {
uint64_t wait_list; //!< [LRO/SRO] Pointer to an array of clk_event_t objects (64 bytes default)
uint32_t wait_num; //!< [LWO/SRO] The number of cl_event_wait objects
uint32_t reserved[5]; //!< For the future usage
hsa_dispatch_packet_t aql; //!< [LWO/SRO] AQL packet 64 bytes AQL packet
hsa_kernel_dispatch_packet_t aql; //!< [LWO/SRO] AQL packet 64 bytes AQL packet
};
struct AmdEvent {
+6 -4
Dosyayı Görüntüle
@@ -347,7 +347,7 @@ VirtualGPU::createVirtualQueue(uint deviceQueueSize)
AmdAqlWrap* slots = reinterpret_cast<AmdAqlWrap*>(&header[1]);
for (uint i = 0; i < numSlots; ++i) {
uint64_t argStart = vaBase + argOffs + i * singleArgSize;
slots[i].aql.kernarg_address = argStart;
slots[i].aql.kernarg_address = reinterpret_cast<void*>(argStart);
slots[i].wait_list = argStart + dev().info().maxParameterSize_ + 64;
}
// Upload data back to local memory
@@ -1735,7 +1735,7 @@ VirtualGPU::submitKernelInternalHSA(
}
// Program the kernel arguments for the GPU execution
hsa_dispatch_packet_t* aqlPkt =
hsa_kernel_dispatch_packet_t* aqlPkt =
hsaKernel.loadArguments(*this, kernel, sizes, parameters, nativeMem,
vmDefQueue, &vmParentWrap, memList);
if (NULL == aqlPkt) {
@@ -1812,7 +1812,7 @@ VirtualGPU::submitKernelInternalHSA(
uint64_t* kernels = (uint64_t*)(
const_cast<Memory*>(hsaKernel.prog().kernelTable())->map(this));
for (j = 0; j < hsaKernel.prog().kernels().size(); ++j) {
if (kernels[j] == wraps[i].aql.kernel_object_address) {
if (kernels[j] == wraps[i].aql.kernel_object) {
break;
}
}
@@ -1828,7 +1828,9 @@ VirtualGPU::submitKernelInternalHSA(
printf("Error: couldn't find child kernel!\n");
continue;
}
uint offsArg = wraps[i].aql.kernarg_address -
const uint64_t kernarg_address =
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(wraps[i].aql.kernarg_address));
uint offsArg = kernarg_address -
gpuDefQueue->virtualQueue_->vmAddress();
address argum = gpuDefQueue->virtualQueue_->data() + offsArg;
print << "Kernel: " << child->name() << "\n";