P4 to Git Change 1170011 by kebai@kebai-lnx-desktop on 2015/07/13 13:47:29

ECR #304775 - Bug 10752 kernel caching feature (AMDIL and HSAIL path)
	1. For the stage we want to cache, call getCacheEntry() followed by makeCacheEntry() if the get fails; otherwise directly return cached data.
	a. Each device have a separate cache directory
	b. It logs caching errors, so we can debug the cache and/or detect collisions
	2. Implementeded cache size tracking, so we can evict old data when cache files are too large
	3. Added file/path access permission control on both windows and linux
	4. Have read/write file lock protection
	5. -kcache-disable flag can be used to turn on/off the caching functionality
	6. AMD_FORCE_KCACHE_TEST env variable is used for internal testing

	TO DO:
	1. Tracking of timestamps for cache entries
	-LRU eviction when cache grows too large
	2. Track cache entries per application

Affected files ...

... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/cache.cpp#1 add
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/cache.hpp#1 add
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/frontend.cpp#34 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/frontend_clang.cpp#20 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/common/v0_8/if_acl.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/amdil_be.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/backends/gpu/hsail_be.cpp#42 edit
... //depot/stg/opencl/drivers/opencl/compiler/lib/utils/OPTIONS.def#124 edit
... //depot/stg/opencl/drivers/opencl/tests/kcache/Makefile#1 add
... //depot/stg/opencl/drivers/opencl/tests/kcache/build/Makefile#1 add
... //depot/stg/opencl/drivers/opencl/tests/kcache/build/Makefile.kcache#1 add
... //depot/stg/opencl/drivers/opencl/tests/kcache/kCacheTest_std.txt#1 add
... //depot/stg/opencl/drivers/opencl/tests/kcache/kernel.cl#1 add
... //depot/stg/opencl/drivers/opencl/tests/kcache/main.cpp#1 add


[ROCm/clr commit: 4b6f2324d0]
This commit is contained in:
foreman
2015-07-13 18:11:44 -04:00
bovenliggende 7ccd0ea843
commit 509bd691bb
6 gewijzigde bestanden met toevoegingen van 1508 en 11 verwijderingen
Diff onderdrukt omdat het te groot bestand Laad Diff
@@ -0,0 +1,123 @@
#ifndef AMD_KERNEL_CACHE_H_
#define AMD_KERNEL_CACHE_H_
#include <string>
#include <sstream>
#include <fstream>
#include <iostream>
#include <cstdio>
#ifdef __linux__
#include <unistd.h>
#include <fcntl.h>
#include <pwd.h>
#include <sys/file.h>
#include <sys/types.h>
#else
#include <windows.h>
#include <shlobj.h>
#include <Lmcons.h>
#include <aclapi.h>
#endif
#include "os/os.hpp"
typedef struct _KernelCacheData {
char *data;
unsigned int dataSize;
} KernelCacheData;
class KernelCache {
private:
static const char ESCAPE = 0x7f;
// TODO: the default cache size (512MB) might be changed later
static const unsigned int DEFAULT_CACHE_SIZE = 512 * 1024 * 1024;
unsigned int version;
unsigned int cacheSize;
std::string rootPath;
std::string indexName;
std::string errorMsg;
// Set the root path for the cache
bool setRootPath(const std::string &chipName);
// Check if file exists
bool fileExists(const std::string &fileName);
// Wipe the cache folder structure
bool wipeCacheFolders();
// Setup cache tree structure
bool setUpCacheFolders();
// Get the cache version and size from the index file
bool getCacheInfo();
// Set the cache version and size in the index file
bool setCacheInfo(unsigned int newVersion, unsigned int newSize);
// Read contents of a file
bool readFile(const std::string &fileName, char **contents, size_t &fileSize);
// Write data to a file
bool writeFile(const std::string &fileName, const char *data, size_t fileSize);
// Compute hash value for chunks of data
unsigned int computeHash(const KernelCacheData *data, const unsigned int numData, const std::string &buildOpts, const std::string &kernelName);
// Compares two sets of data
inline bool compareData(const char *data0, const char *data1, const unsigned int size) {
return (memcmp(data0, data1, size) == 0);
}
// Computes hash and file name from given data
void makeFileName(const KernelCacheData *data, const unsigned int numData, const std::string &buildOpts, const std::string &kernelName, std::string &pathToFile);
// Finds path to a file from a given hash value
void getFilePathFromHash(const unsigned int hash, std::string &pathToFile);
// Finds the cache entry for a chunk of data
bool findCacheEntry(const KernelCacheData *data, const unsigned int numData, const std::string &buildOpts, const std::string &kernelName, std::string &pathToFile);
// Creates a cache file in the cache heirarchy
bool makeCacheEntry(const KernelCacheData *srcData, const unsigned int srcNum, const std::string &buildOpts, const char *dstData, unsigned int dstSize, bool fromModule);
// Builds a file for storage into the cache
bool buildFile(const KernelCacheData *srcData, const unsigned int srcNum, const std::string &buildOpts, const std::string &kernelName, const char *dstData, const unsigned int dstSize, const unsigned int dstHash, char **fileData, unsigned int &dataSize);
// Parses data from a file
bool parseFile(const char *fileData, const unsigned int dataSize, KernelCacheData **srcData, unsigned int &srcNum, std::string &buildOpts, std::string &kernelName, char **dstData, unsigned int &dstSize, unsigned int &dstHash);
#if _WIN32
// Get Sid of account
bool getSid(TCHAR *userName, PSID &sid);
#endif
// Set file to only owner accessible
bool setAccessPermission(const std::string fileName, bool isFile = false);
public:
KernelCache() : version(0) {rootPath.clear(); indexName.clear(); errorMsg.clear(); }
bool cacheInit(unsigned int compilerVersion, const std::string &chipName);
// Get cache entry corresponding to srcData, if it exists
bool getCacheEntry(const KernelCacheData *srcData, const unsigned int srcNum, const std::string &buildOpts, const std::string &kernelName, char **dstData, unsigned int &dstSize, unsigned int &dstHash);
// Make cache entry corresponding to srcData, dstData, buildOpts and kernelName
bool makeCacheEntry(const KernelCacheData *srcData, const unsigned int srcNum, const std::string &buildOpts, const std::string &kernelName, const char *dstData, const unsigned int dstSize);
std::string ErrorMsg() { return errorMsg; }
// Control kernel cache test
bool internalKCacheTestSwitch(bool &canUseCache);
// Log caching error messages for debugging the cache and/or detecting collisions
void saveLogToFile(std::string extraMsg = " ") {
if (amd::Os::pathExists(rootPath)) {
std::string fileName = rootPath + amd::Os::fileSeparator() + "cacheError.log";
errorMsg += extraMsg;
writeFile(fileName, errorMsg.c_str(), errorMsg.length());
}
}
};
#endif // AMD_KERNEL_CACHE_H_
@@ -4,6 +4,8 @@
#include "top.hpp"
#include "frontend.hpp"
#include "bif/bifbase.hpp"
#include "cache.hpp"
#include "../../../sc/Interface/SCLib_Ver.h"
#include "utils/libUtils.h"
#include "utils/target_mappings.h"
#include "utils/options.hpp"
@@ -219,7 +221,55 @@ amdcl::OCLFrontend::compileCommand(const std::string& singleSrc)
if (!checkFlag(aclutGetCaps(Elf()), capSaveSOURCE)) {
CL()->clAPI.remSec(CL(), Elf(), aclSOURCE);
}
int ret = openclFrontEnd(frontendCmd.c_str(), &Source(), NULL);
int ret = 0;
KernelCache kc;
KernelCacheData clSrc;
std::string kernelID;
char *llvmIR = NULL;
unsigned int llvmIRSize = 0;
bool isCacheReady = false, kernelCached = false;
bool canUseCache = !Options()->oVariables->DisableKernelCaching;
bool kCacheTest = kc.internalKCacheTestSwitch(canUseCache);
size_t pos = frontendCmd.find("--error_output");
std::string buildOpts = frontendCmd.substr(0, pos);
if (canUseCache && Options()->oVariables->OptLevel > 0) {
std::string deviceName(getDeviceName(Elf()->target));
isCacheReady = kc.cacheInit(SC_BUILD_NUMBER, deviceName);
if (!isCacheReady) {
kc.saveLogToFile();
} else {
unsigned int hashVal = 0;
const char *name = Options()->getCurrKernelName();
kernelID = name ? name : " ";
clSrc.data = const_cast<char *>(singleSrc.c_str());
clSrc.dataSize = singleSrc.size();
kernelCached = kc.getCacheEntry((const KernelCacheData *)&clSrc, 1, buildOpts, kernelID, &llvmIR, llvmIRSize, hashVal);
if (!kc.ErrorMsg().empty()) {
kc.saveLogToFile();
}
}
}
if (kernelCached) {
if (kCacheTest) {
fprintf(stdout, "FE to IR stage is cached!\n");
fflush(stdout);
}
source_.assign(llvmIR, llvmIRSize);
if (llvmIR) delete[] llvmIR;
} else {
if (kCacheTest) {
fprintf(stdout, "FE to IR stage is not cached!\n");
fflush(stdout);
}
ret = openclFrontEnd(frontendCmd.c_str(), &Source(), NULL);
// Caching LLVM IR
if (isCacheReady && !kc.makeCacheEntry((const KernelCacheData *)&clSrc, 1, buildOpts, kernelID, Source().data(), Source().size())) {
kc.saveLogToFile();
}
}
// We dump the preprocessed code by invoking clc a second time after the
// original call, just in case somthing really bad happens in the original
@@ -5,6 +5,8 @@
#include "OpenCLFE.h"
#include "bif/bifbase.hpp"
#include "cache.hpp"
#include "../../../sc/Interface/SCLib_Ver.h"
#include "frontend.hpp"
#include "os/os.hpp"
#include "top.hpp"
@@ -158,11 +160,62 @@ int amdcl::ClangOCLFrontend::compileCommand(const std::string& src) {
ret |= 1;
return ret;
#else
if (!parseOCLSource(ClangOptions, argsToClang, &Source(), &logFromClang)) {
KernelCache kc;
KernelCacheData clSrc;
std::string kernelID, buildOpts;
char *llvmIR = NULL;
unsigned int llvmIRSize = 0;
bool isCacheReady = false, kernelCached = false;
bool canUseCache = !Options()->oVariables->DisableKernelCaching;
bool kCacheTest = kc.internalKCacheTestSwitch(canUseCache);
if (canUseCache && Options()->oVariables->OptLevel > 0) {
std::string deviceName(getDeviceName(Elf()->target));
isCacheReady = kc.cacheInit(SC_BUILD_NUMBER, deviceName);
if (!isCacheReady) {
kc.saveLogToFile();
} else {
unsigned int hashVal = 0;
const char *name = Options()->getCurrKernelName();
kernelID = name ? name : " ";
clSrc.data = const_cast<char *>(src.c_str());
clSrc.dataSize = src.size();
for (std::vector<const char*>::const_iterator it = argsToClang.begin();
it != argsToClang.end(); ++it) {
std::string arg(*it);
buildOpts += arg;
}
kernelCached = kc.getCacheEntry((const KernelCacheData *)&clSrc, 1,
buildOpts, kernelID, &llvmIR, llvmIRSize, hashVal);
if (!kc.ErrorMsg().empty()) {
kc.saveLogToFile();
}
}
}
if (kernelCached) {
if (kCacheTest) {
fprintf(stdout, "FE to IR stage is cached!\n");
fflush(stdout);
}
source_.assign(llvmIR, llvmIRSize);
if (llvmIR) delete[] llvmIR;
} else {
if (kCacheTest) {
fprintf(stdout, "FE to IR stage is not cached!\n");
fflush(stdout);
}
if (!parseOCLSource(ClangOptions, argsToClang, &Source(), &logFromClang)) {
log_ += logFromClang;
log_ += "\nerror: Clang front-end compilation failed!\n";
ret |= 1;
return ret;
}
// Caching LLVM IR
if (isCacheReady && !kc.makeCacheEntry((const KernelCacheData *)&clSrc, 1, buildOpts, kernelID, Source().data(), Source().size())) {
kc.saveLogToFile();
}
}
#endif
@@ -13,6 +13,8 @@
#include "acl.h"
#include "aclTypes.h"
#include "cache.hpp"
#include "../../../../sc/Interface/SCLib_Ver.h"
#include "compiler_stage.hpp"
#include "frontend.hpp"
#include "spir.hpp"
@@ -1106,6 +1108,16 @@ aclCompileInternal(
acl_error error_code = ACL_SUCCESS;
aclLoaderData *ald;
amd::option::Options* Opts = reinterpret_cast<amd::option::Options*>(bin->options);;
KernelCache kc;
KernelCacheData llvmIR;
std::string kernelName;
char *IL = NULL;
unsigned int ILSize = 0;
bool kCacheTest = false;
bool isCacheReady = false, kernelCached = false, llvmIRbinPath = true, canUseCache = true;
bool bHsailTextInput = false;
char *hsail_text_input = NULL;
// Load the frontend to convert from Source to LLVM-IR
if (useFE) {
@@ -1133,6 +1145,103 @@ aclCompileInternal(
}
}
if (useCG) {
ald = cl->cgAPI.init(cl, bin, compile_callback, &error_code);
#ifdef WITH_TARGET_HSAIL
if (isHSAILTarget(bin->target)) {
bool hsailBinary = (!useFE && !useLinker && !useOpt);
hsail_text_input = getenv("AMD_DEBUG_HSAIL_TEXT_INPUT");
// Verify that the internal (blit) kernel is not being compiled
if (hsail_text_input && strcmp(hsail_text_input, "") != 0 && !Opts->oVariables->clInternalKernel) {
bHsailTextInput = true;
}
llvmIRbinPath = !bHsailTextInput && !hsailBinary;
} else
#endif
cl->cgAPI.fini(ald);
canUseCache = !Opts->oVariables->DisableKernelCaching;
kCacheTest = kc.internalKCacheTestSwitch(canUseCache);
if (canUseCache && llvmIRbinPath && Opts->oVariables->OptLevel > 0) {
unsigned int hashVal = 0;
std::string deviceName(getDeviceName(bin->target));
isCacheReady = kc.cacheInit(SC_BUILD_NUMBER, deviceName);
if (!isCacheReady) {
kc.saveLogToFile();
} else {
const char *name = Opts->getCurrKernelName();
kernelName.assign(name ? name : " ");
llvmIR.data = const_cast<char *>(data);
llvmIR.dataSize = data_size;
kernelCached = kc.getCacheEntry((const KernelCacheData *)&llvmIR, 1,
Opts->origOptionStr, kernelName, &IL, ILSize, hashVal);
if (!kc.ErrorMsg().empty()) {
kc.saveLogToFile();
}
}
}
}
if (kernelCached) {
if (kCacheTest) {
fprintf(stdout, "IR to IL stage is cached!\n");
fflush(stdout);
}
uint64_t start_time = 0, stop_time = 0;
if (Opts->oVariables->EnableBuildTiming) {
start_time = amd::Os::timeNanos();
}
ald = cl->cgAPI.init(cl, bin, compile_callback, &error_code);
amdcl::CLCodeGen *aclCG = reinterpret_cast<amdcl::CLCodeGen*>(ald);
#ifdef WITH_TARGET_HSAIL
amdcl::HSAIL *acl = reinterpret_cast<amdcl::HSAIL*>(ald);
if (isHSAILTarget(acl->Elf()->target)) {
// from ACL_TYPE_LLVMIR_BINARY
aclCG->Source().assign(IL, ILSize);
if (IL) delete[] IL;
if (!acl->insertBRIG(aclCG->Source())) {
appendLogToCL(cl, "ERROR: BRIG inserting failed.");
error_code = ACL_CODEGEN_ERROR;
goto internal_compile_failure;
}
char* dumpFileName = ::getenv("AMD_DEBUG_DUMP_HSAIL_ALL_KERNELS");
if (Opts->isDumpFlagSet(amd::option::DUMP_CGIL) || dumpFileName) {
acl->dumpHSAIL(acl->disassembleBRIG(), ".hsail");
}
bifbase *elfBin = reinterpret_cast<bifbase*>(bin->bin);
elfBin->setType(ET_EXEC);
} else
#endif
{
dataStr.assign(IL, ILSize);
if (IL) delete[] IL;
if (checkFlag(aclutGetCaps(aclCG->Elf()), capSaveCG)) {
aclCG->CL()->clAPI.insSec(aclCG->CL(), aclCG->Elf(),
dataStr.data(), dataStr.size(), aclCODEGEN);
}
}
if (!checkFlag(aclutGetCaps(bin), capSaveLLVMIR) || !Opts->oVariables->BinLLVMIR) {
cl->clAPI.remSec(cl, bin, aclLLVMIR);
}
cl->cgAPI.fini(ald);
if (Opts->oVariables->EnableBuildTiming) {
stop_time = amd::Os::timeNanos();
std::stringstream tmp_ss;
tmp_ss << " LLVM time (link+opt+codegen): "
<< (stop_time - start_time)/1000ULL
<< "us\n";
appendLogToCL(cl, tmp_ss.str());
}
} else {
if (kCacheTest) {
fprintf(stdout, "IR to IL stage is not cached!\n");
fflush(stdout);
}
// Use the linker to link in the libraries to the current module.
if (useLinker) {
ald = cl->linkAPI.init(cl, bin, compile_callback, &error_code);
@@ -1159,12 +1268,6 @@ aclCompileInternal(
#ifdef WITH_TARGET_HSAIL
amdcl::HSAIL *acl = reinterpret_cast<amdcl::HSAIL*>(ald);
if (isHSAILTarget(acl->Elf()->target)) {
bool bHsailTextInput = false;
const char *hsail_text_input = getenv("AMD_DEBUG_HSAIL_TEXT_INPUT");
// Verify that the internal (blit) kernel is not being compiled
if (hsail_text_input && strcmp(hsail_text_input, "") != 0 && !acl->Options()->oVariables->clInternalKernel) {
bHsailTextInput = true;
}
if (!bHsailTextInput) {
// from ACL_TYPE_HSAIL_BINARY
if (!useFE && !useLinker && !useOpt) {
@@ -1199,6 +1302,10 @@ aclCompileInternal(
if (!cg || error_code != ACL_SUCCESS) {
goto internal_compile_failure;
}
// Caching HSAIL
if (isCacheReady && !kc.makeCacheEntry((const KernelCacheData *)&llvmIR, 1, Opts->origOptionStr, kernelName, (*cg).c_str(), (*cg).size())) {
kc.saveLogToFile();
}
if (!acl->insertBRIG(*cg)) {
appendLogToCL(cl, "ERROR: BRIG inserting failed.");
error_code = ACL_CODEGEN_ERROR;
@@ -1250,7 +1357,7 @@ aclCompileInternal(
}
}
char* dumpFileName = ::getenv("AMD_DEBUG_DUMP_HSAIL_ALL_KERNELS");
if (acl->Options()->isDumpFlagSet(amd::option::DUMP_CGIL) || dumpFileName) {
if (Opts->isDumpFlagSet(amd::option::DUMP_CGIL) || dumpFileName) {
acl->dumpHSAIL(acl->disassembleBRIG(), ".hsail");
}
bifbase *elfBin = reinterpret_cast<bifbase*>(bin->bin);
@@ -1263,9 +1370,12 @@ aclCompileInternal(
goto internal_compile_failure;
}
dataStr = *cg;
// Caching AMDIL
if (isCacheReady && !kc.makeCacheEntry((const KernelCacheData *)&llvmIR, 1, Opts->origOptionStr, kernelName, (*cg).c_str(), (*cg).size())) {
kc.saveLogToFile();
}
}
if (!checkFlag(aclutGetCaps(bin), capSaveLLVMIR) ||
!(reinterpret_cast<amdcl::CompilerStage*>(ald))->Options()->oVariables->BinLLVMIR) {
if (!checkFlag(aclutGetCaps(bin), capSaveLLVMIR) || !Opts->oVariables->BinLLVMIR) {
cl->clAPI.remSec(cl, bin, aclLLVMIR);
}
cl->cgAPI.fini(ald);
@@ -1273,6 +1383,7 @@ aclCompileInternal(
goto internal_compile_failure;
}
}
}
if (useISA) {
ald = cl->beAPI.init(cl, bin, compile_callback, &error_code);
@@ -1032,6 +1032,14 @@ OPTION(OT_BOOL, \
false, 0, 0, NULL, \
"Enable timing for Kernel build.")
// -kcache-disable
OPTION(OT_BOOL, \
OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \
"kcache-disable", NULL, \
DisableKernelCaching, \
false, 0, 0, NULL, \
"Disable kernel caching functionality.")
// -print-compile-phases
OPTION(OT_BOOL, \
OA_RUNTIME|OVIS_SUPPORT|OVA_DISALLOWED, \