P4 to Git Change 1076493 by yaxunl@yaxunl_stg_win50 on 2014/09/12 14:45:24
ECR #377625 - AMDIL Function support: Calculate total private memory usage by a kernel including memory used by called functions. This cannot be done by IPA since stack size is known only after register allocation due to potential register spill, but MachineFunctionAnalysis cannot persist after CGSCC pass with current LLVM version. This change adds private memory usage metadata for non-kernel functions. The total private memory usage by a kernel is calculated when AMDIL is split for different kernels. BIF will contain total private memory size. Affected files ... ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.cpp#1 add ... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.hpp#1 add ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.cpp#451 edit ... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.h#51 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#175 edit ... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#54 edit
Этот коммит содержится в:
@@ -5,6 +5,7 @@
|
||||
#include "os/os.hpp"
|
||||
#include "utils/flags.hpp"
|
||||
#include "include/aclTypes.h"
|
||||
#include "utils/amdilUtils.hpp"
|
||||
#include "utils/bif_section_labels.hpp"
|
||||
#include "device/gpu/gpuprogram.hpp"
|
||||
#include "device/gpu/gpublit.hpp"
|
||||
@@ -232,6 +233,21 @@ NullProgram::isCalled(const ILFunc* base, const ILFunc* func)
|
||||
return false;
|
||||
}
|
||||
|
||||
uint
|
||||
ILFunc::totalHwPrivateUsage() {
|
||||
if (totalHwPrivateSize_ >= 0)
|
||||
return totalHwPrivateSize_;
|
||||
|
||||
uint maxChildUsage = 0;
|
||||
for (size_t i = 0; i < calls_.size(); ++i) {
|
||||
uint childUsage = calls_[i]->totalHwPrivateUsage();
|
||||
if (childUsage > maxChildUsage)
|
||||
maxChildUsage = childUsage;
|
||||
}
|
||||
totalHwPrivateSize_ = hwPrivateSize_ + maxChildUsage;
|
||||
return totalHwPrivateSize_;
|
||||
}
|
||||
|
||||
void
|
||||
NullProgram::patchMain(std::string& kernel, uint index)
|
||||
{
|
||||
@@ -540,7 +556,6 @@ NullProgram::linkImpl(amd::option::Options* options)
|
||||
// Accumulate all emulated local and private sizes,
|
||||
// necessary for the kernel execution
|
||||
initData.localSize_ += func->localSize_;
|
||||
initData.privateSize_ += func->privateSize_;
|
||||
|
||||
// Accumulate all HW local and private sizes,
|
||||
// necessary for the kernel execution
|
||||
@@ -548,6 +563,9 @@ NullProgram::linkImpl(amd::option::Options* options)
|
||||
initData.hwPrivateSize_ += func->hwPrivateSize_;
|
||||
initData.flags_ |= func->flags_;
|
||||
}
|
||||
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
|
||||
amdilUtils::changePrivateUAVLength(kernel,
|
||||
initData.privateSize_);
|
||||
|
||||
// Create a GPU kernel
|
||||
bool created;
|
||||
@@ -913,7 +931,6 @@ NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
// Accumulate all emulated local and private sizes,
|
||||
// necessary for the kernel execution
|
||||
initData.localSize_ += func->localSize_;
|
||||
initData.privateSize_ += func->privateSize_;
|
||||
|
||||
// Accumulate all HW local and private sizes,
|
||||
// necessary for the kernel execution
|
||||
@@ -921,6 +938,9 @@ NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
|
||||
initData.hwPrivateSize_ += func->hwPrivateSize_;
|
||||
initData.flags_ |= func->flags_;
|
||||
}
|
||||
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
|
||||
amdilUtils::changePrivateUAVLength(kernel,
|
||||
initData.privateSize_);
|
||||
|
||||
// Create a GPU kernel
|
||||
bool created;
|
||||
|
||||
Ссылка в новой задаче
Block a user