P4 to Git Change 1076493 by yaxunl@yaxunl_stg_win50 on 2014/09/12 14:45:24

ECR #377625 - AMDIL Function support: Calculate total private memory usage by a kernel including memory used by called functions.

	This cannot be done by IPA since stack size is known only after register allocation due to potential register spill, but MachineFunctionAnalysis cannot persist after CGSCC pass with current LLVM version.

	This change adds private memory usage metadata for non-kernel functions. The total private memory usage by a kernel is calculated when AMDIL is split for different kernels. BIF will contain total private memory size.

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.cpp#1 add
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/amdilUtils.hpp#1 add
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.cpp#451 edit
... //depot/stg/opencl/drivers/opencl/compiler/llvm/lib/Target/AMDIL/AMDILKernelManager.h#51 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.cpp#175 edit
... //depot/stg/opencl/drivers/opencl/runtime/device/gpu/gpuprogram.hpp#54 edit
Tento commit je obsažen v:
foreman
2014-09-12 15:02:32 -04:00
rodič fea6100aa9
revize 42f4b2af97
4 změnil soubory, kde provedl 53 přidání a 4 odebrání
+14
Zobrazit soubor
@@ -0,0 +1,14 @@
#include "amdilUtils.hpp"
#include <regex>
#include <string>
#include <sstream>
// Change all private uav length in a kernel
void amdilUtils::changePrivateUAVLength(std::string& kernel, unsigned length) {
std::regex pattern("dcl_typeless_uav_id\\(([[:digit:]]+)\\)_stride"
"\\(([[:digit:]]+)\\)_length\\([[:digit:]]+\\)_access\\(private\\)");
std::stringstream ss;
ss << "dcl_typeless_uav_id($1)_stride($2)_length(" << length <<
")_access(private)";
kernel = std::regex_replace(kernel, pattern, ss.str());
}
+11
Zobrazit soubor
@@ -0,0 +1,11 @@
#ifndef AMDILUTILS_H_
#define AMDILUTILS_H_
#include <string>
namespace amdilUtils {
// Change all private uav length in a kernel
void changePrivateUAVLength(std::string& kernel, unsigned length);
}
#endif /* AMDILUTILS_H_ */
+22 -2
Zobrazit soubor
@@ -5,6 +5,7 @@
#include "os/os.hpp"
#include "utils/flags.hpp"
#include "include/aclTypes.h"
#include "utils/amdilUtils.hpp"
#include "utils/bif_section_labels.hpp"
#include "device/gpu/gpuprogram.hpp"
#include "device/gpu/gpublit.hpp"
@@ -232,6 +233,21 @@ NullProgram::isCalled(const ILFunc* base, const ILFunc* func)
return false;
}
uint
ILFunc::totalHwPrivateUsage() {
if (totalHwPrivateSize_ >= 0)
return totalHwPrivateSize_;
uint maxChildUsage = 0;
for (size_t i = 0; i < calls_.size(); ++i) {
uint childUsage = calls_[i]->totalHwPrivateUsage();
if (childUsage > maxChildUsage)
maxChildUsage = childUsage;
}
totalHwPrivateSize_ = hwPrivateSize_ + maxChildUsage;
return totalHwPrivateSize_;
}
void
NullProgram::patchMain(std::string& kernel, uint index)
{
@@ -540,7 +556,6 @@ NullProgram::linkImpl(amd::option::Options* options)
// Accumulate all emulated local and private sizes,
// necessary for the kernel execution
initData.localSize_ += func->localSize_;
initData.privateSize_ += func->privateSize_;
// Accumulate all HW local and private sizes,
// necessary for the kernel execution
@@ -548,6 +563,9 @@ NullProgram::linkImpl(amd::option::Options* options)
initData.hwPrivateSize_ += func->hwPrivateSize_;
initData.flags_ |= func->flags_;
}
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
amdilUtils::changePrivateUAVLength(kernel,
initData.privateSize_);
// Create a GPU kernel
bool created;
@@ -913,7 +931,6 @@ NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
// Accumulate all emulated local and private sizes,
// necessary for the kernel execution
initData.localSize_ += func->localSize_;
initData.privateSize_ += func->privateSize_;
// Accumulate all HW local and private sizes,
// necessary for the kernel execution
@@ -921,6 +938,9 @@ NullProgram::linkImpl(const std::vector<device::Program*>& inputPrograms,
initData.hwPrivateSize_ += func->hwPrivateSize_;
initData.flags_ |= func->flags_;
}
initData.privateSize_ = baseFunc->totalHwPrivateUsage();
amdilUtils::changePrivateUAVLength(kernel,
initData.privateSize_);
// Create a GPU kernel
bool created;
+6 -2
Zobrazit soubor
@@ -50,6 +50,7 @@ public:
, hwPrivateSize_(0)
, hwLocalSize_(0)
, flags_(0)
, totalHwPrivateSize_(-1)
{
code_.begin_ = code_.end_ = 0;
metadata_.begin_ = metadata_.end_ = 0;
@@ -74,6 +75,7 @@ public:
hwPrivateSize_ = func.hwPrivateSize_;
hwLocalSize_ = func.hwLocalSize_;
flags_ = func.flags_;
totalHwPrivateSize_ = func.totalHwPrivateSize_;
// Note: we don't copy calls_ and macros_
return *this;
@@ -89,9 +91,11 @@ public:
uint hwPrivateSize_; //!< HW private ring allocation by the function
uint hwLocalSize_; //!< HW local ring allocation by the function
uint flags_; //!< The IL func flags/properties
std::vector<const ILFunc*> calls_; //! Functions called from the current
long long totalHwPrivateSize_; //!< total HW private usage including called functions
std::vector<ILFunc*> calls_; //! Functions called from the current
std::vector<uint> macros_; //! Macros, used in the IL function
uint totalHwPrivateUsage(); //!< total HW private usage including called functions
};
//! \class empty program