Add clean up of buffered_storage files (#1738)
* Add clean up of buffered_storage files * Add step to workflows to test for remaining temp files after tests * Applied suggestions from code review * add deletion of all cache files --------- Co-authored-by: David Galiffi <David.Galiffi@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
6b73f6ab5c
Коммит
d9b00da102
@@ -74,14 +74,14 @@ jobs:
|
||||
packages: read
|
||||
container:
|
||||
image: ghcr.io/rocm/rocprofiler-ubuntu:${{ matrix.system.os-release }}-systems-ci-${{ matrix.system.arch }}
|
||||
options:
|
||||
--privileged
|
||||
options:
|
||||
--privileged
|
||||
--ipc host
|
||||
--group-add video
|
||||
--device /dev/kfd
|
||||
--device /dev/dri
|
||||
--cap-add CAP_SYS_ADMIN
|
||||
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
with:
|
||||
@@ -171,6 +171,19 @@ jobs:
|
||||
-L "rocm" \
|
||||
-LE "rccl|runtime|ompvv"
|
||||
|
||||
- name: Check for Leftover Buffered Files
|
||||
timeout-minutes: 5
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
run: |
|
||||
set -v
|
||||
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
|
||||
echo "Error: Found leftover buffered storage files in /tmp:"
|
||||
ls -lh /tmp/buffered*
|
||||
exit 1
|
||||
else
|
||||
echo "✓ No buffered storage files found in /tmp"
|
||||
fi
|
||||
|
||||
- name: Output Logs
|
||||
if: failure() && steps.run_ci.outcome == 'failure'
|
||||
working-directory: projects/rocprofiler-systems
|
||||
|
||||
@@ -136,6 +136,19 @@ jobs:
|
||||
-- \
|
||||
-LE "transpose|rccl|videodecode|jpegdecode|network"
|
||||
|
||||
- name: Check for Leftover Buffered Files
|
||||
timeout-minutes: 5
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
run: |
|
||||
set -v
|
||||
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
|
||||
echo "Error: Found leftover buffered storage files in /tmp:"
|
||||
ls -lh /tmp/buffered*
|
||||
exit 1
|
||||
else
|
||||
echo "✓ No buffered storage files found in /tmp"
|
||||
fi
|
||||
|
||||
- name: Test Clean Up
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
|
||||
@@ -136,6 +136,19 @@ jobs:
|
||||
--
|
||||
-LE "transpose|rccl|videodecode|jpegdecode|network"
|
||||
|
||||
- name: Check for Leftover Buffered Files
|
||||
timeout-minutes: 5
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
run: |
|
||||
set -v
|
||||
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
|
||||
echo "Error: Found leftover buffered storage files in /tmp:"
|
||||
ls -lh /tmp/buffered*
|
||||
exit 1
|
||||
else
|
||||
echo "✓ No buffered storage files found in /tmp"
|
||||
fi
|
||||
|
||||
- name: Test Clean Up
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
|
||||
@@ -320,6 +320,19 @@ jobs:
|
||||
-- \
|
||||
-LE "transpose|rccl|videodecode|jpegdecode|network"
|
||||
|
||||
- name: Check for Leftover Buffered Files
|
||||
timeout-minutes: 5
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
run: |
|
||||
set -v
|
||||
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
|
||||
echo "Error: Found leftover buffered storage files in /tmp:"
|
||||
ls -lh /tmp/buffered*
|
||||
exit 1
|
||||
else
|
||||
echo "✓ No buffered storage files found in /tmp"
|
||||
fi
|
||||
|
||||
- name: Test Clean Up
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
|
||||
@@ -135,6 +135,19 @@ jobs:
|
||||
-- \
|
||||
-LE "transpose|rccl|videodecode|jpegdecode|network"
|
||||
|
||||
- name: Check for Leftover Buffered Files
|
||||
timeout-minutes: 5
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
run: |
|
||||
set -v
|
||||
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
|
||||
echo "Error: Found leftover buffered storage files in /tmp:"
|
||||
ls -lh /tmp/buffered*
|
||||
exit 1
|
||||
else
|
||||
echo "✓ No buffered storage files found in /tmp"
|
||||
fi
|
||||
|
||||
- name: Test Clean Up
|
||||
timeout-minutes: 10
|
||||
working-directory: projects/rocprofiler-systems/
|
||||
|
||||
@@ -39,6 +39,31 @@ namespace trace_cache
|
||||
{
|
||||
namespace
|
||||
{
|
||||
void
|
||||
remove_if_exists(const std::string& fname)
|
||||
{
|
||||
if(fname.empty()) return;
|
||||
std::ifstream file(fname);
|
||||
if(file.is_open())
|
||||
{
|
||||
file.close();
|
||||
auto result = std::remove(fname.c_str());
|
||||
if(result == 0)
|
||||
{
|
||||
ROCPROFSYS_DEBUG("Removed file: %s\n", fname.c_str());
|
||||
}
|
||||
else if(errno == ENOENT)
|
||||
{
|
||||
ROCPROFSYS_DEBUG("File does not exist: %s\n", fname.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
ROCPROFSYS_WARNING(0, "Failed to remove file: %s (errno: %d - %s)\n",
|
||||
fname.c_str(), errno, std::strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
list_dir_files(const std::string& path)
|
||||
{
|
||||
@@ -105,6 +130,30 @@ get_cache_files()
|
||||
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
|
||||
return cache_map;
|
||||
}
|
||||
|
||||
std::vector<std::string>
|
||||
get_all_cache_files()
|
||||
{
|
||||
const auto tmp_files = list_dir_files(tmp_directory);
|
||||
std::vector<std::string> result{};
|
||||
auto parse_and_fill_cache = [&](const std::string& filename) {
|
||||
const std::regex buff_regex(R"(buffered_storage.*\.bin)");
|
||||
const std::regex meta_regex(R"(metadata.*\.json)");
|
||||
std::smatch match;
|
||||
|
||||
if(std::regex_match(filename, match, buff_regex))
|
||||
{
|
||||
result.push_back(tmp_directory + filename);
|
||||
}
|
||||
else if(std::regex_match(filename, match, meta_regex))
|
||||
{
|
||||
result.push_back(tmp_directory + filename);
|
||||
}
|
||||
};
|
||||
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
cache_manager&
|
||||
@@ -127,13 +176,13 @@ cache_manager::post_process_bulk()
|
||||
shutdown();
|
||||
}
|
||||
|
||||
auto _cache_files = get_cache_files();
|
||||
|
||||
if(get_use_rocpd())
|
||||
{
|
||||
ROCPROFSYS_PRINT(
|
||||
"Generating rocpd with collected data. This may take a while..\n");
|
||||
|
||||
auto _cache_files = get_cache_files();
|
||||
|
||||
std::vector<std::thread> rocpd_threads;
|
||||
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
|
||||
|
||||
@@ -179,7 +228,6 @@ cache_manager::post_process_bulk()
|
||||
_post_processing.register_parser_callback(_parser);
|
||||
_post_processing.post_process_metadata();
|
||||
_parser.consume_storage();
|
||||
std::remove(files.metadata.c_str()); // Remove metadata file
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -189,6 +237,15 @@ cache_manager::post_process_bulk()
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
ROCPROFSYS_PRINT("Removing all cached temporary files...\n");
|
||||
|
||||
auto all_cache_files = get_all_cache_files();
|
||||
for(const auto& filename : all_cache_files)
|
||||
{
|
||||
ROCPROFSYS_PRINT("Removing cached temporary file: %s\n", filename.c_str());
|
||||
remove_if_exists(filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -248,9 +248,6 @@ storage_parser::consume_storage()
|
||||
}
|
||||
|
||||
ifs.close();
|
||||
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system\n",
|
||||
m_filename.c_str());
|
||||
std::remove(m_filename.c_str());
|
||||
|
||||
if(m_on_finished_callback != nullptr)
|
||||
{
|
||||
|
||||
Ссылка в новой задаче
Block a user