Add clean up of buffered_storage files (#1738)

* Add clean up of buffered_storage files

* Add step to workflows to test for remaining temp files after tests

* Applied suggestions from code review

* add deletion of all cache files

---------

Co-authored-by: David Galiffi <David.Galiffi@amd.com>
Этот коммит содержится в:
Milan Radosavljevic
2025-11-07 17:51:09 +01:00
коммит произвёл GitHub
родитель 6b73f6ab5c
Коммит d9b00da102
7 изменённых файлов: 128 добавлений и 9 удалений
+16 -3
Просмотреть файл
@@ -74,14 +74,14 @@ jobs:
packages: read
container:
image: ghcr.io/rocm/rocprofiler-ubuntu:${{ matrix.system.os-release }}-systems-ci-${{ matrix.system.arch }}
options:
--privileged
options:
--privileged
--ipc host
--group-add video
--device /dev/kfd
--device /dev/dri
--cap-add CAP_SYS_ADMIN
steps:
- uses: actions/checkout@v5
with:
@@ -171,6 +171,19 @@ jobs:
-L "rocm" \
-LE "rccl|runtime|ompvv"
- name: Check for Leftover Buffered Files
timeout-minutes: 5
working-directory: projects/rocprofiler-systems/
run: |
set -v
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
echo "Error: Found leftover buffered storage files in /tmp:"
ls -lh /tmp/buffered*
exit 1
else
echo "✓ No buffered storage files found in /tmp"
fi
- name: Output Logs
if: failure() && steps.run_ci.outcome == 'failure'
working-directory: projects/rocprofiler-systems
+13
Просмотреть файл
@@ -136,6 +136,19 @@ jobs:
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Check for Leftover Buffered Files
timeout-minutes: 5
working-directory: projects/rocprofiler-systems/
run: |
set -v
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
echo "Error: Found leftover buffered storage files in /tmp:"
ls -lh /tmp/buffered*
exit 1
else
echo "✓ No buffered storage files found in /tmp"
fi
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
+13
Просмотреть файл
@@ -136,6 +136,19 @@ jobs:
--
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Check for Leftover Buffered Files
timeout-minutes: 5
working-directory: projects/rocprofiler-systems/
run: |
set -v
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
echo "Error: Found leftover buffered storage files in /tmp:"
ls -lh /tmp/buffered*
exit 1
else
echo "✓ No buffered storage files found in /tmp"
fi
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
+13
Просмотреть файл
@@ -320,6 +320,19 @@ jobs:
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Check for Leftover Buffered Files
timeout-minutes: 5
working-directory: projects/rocprofiler-systems/
run: |
set -v
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
echo "Error: Found leftover buffered storage files in /tmp:"
ls -lh /tmp/buffered*
exit 1
else
echo "✓ No buffered storage files found in /tmp"
fi
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
+13
Просмотреть файл
@@ -135,6 +135,19 @@ jobs:
-- \
-LE "transpose|rccl|videodecode|jpegdecode|network"
- name: Check for Leftover Buffered Files
timeout-minutes: 5
working-directory: projects/rocprofiler-systems/
run: |
set -v
if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then
echo "Error: Found leftover buffered storage files in /tmp:"
ls -lh /tmp/buffered*
exit 1
else
echo "✓ No buffered storage files found in /tmp"
fi
- name: Test Clean Up
timeout-minutes: 10
working-directory: projects/rocprofiler-systems/
+60 -3
Просмотреть файл
@@ -39,6 +39,31 @@ namespace trace_cache
{
namespace
{
void
remove_if_exists(const std::string& fname)
{
if(fname.empty()) return;
std::ifstream file(fname);
if(file.is_open())
{
file.close();
auto result = std::remove(fname.c_str());
if(result == 0)
{
ROCPROFSYS_DEBUG("Removed file: %s\n", fname.c_str());
}
else if(errno == ENOENT)
{
ROCPROFSYS_DEBUG("File does not exist: %s\n", fname.c_str());
}
else
{
ROCPROFSYS_WARNING(0, "Failed to remove file: %s (errno: %d - %s)\n",
fname.c_str(), errno, std::strerror(errno));
}
}
}
std::vector<std::string>
list_dir_files(const std::string& path)
{
@@ -105,6 +130,30 @@ get_cache_files()
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
return cache_map;
}
std::vector<std::string>
get_all_cache_files()
{
const auto tmp_files = list_dir_files(tmp_directory);
std::vector<std::string> result{};
auto parse_and_fill_cache = [&](const std::string& filename) {
const std::regex buff_regex(R"(buffered_storage.*\.bin)");
const std::regex meta_regex(R"(metadata.*\.json)");
std::smatch match;
if(std::regex_match(filename, match, buff_regex))
{
result.push_back(tmp_directory + filename);
}
else if(std::regex_match(filename, match, meta_regex))
{
result.push_back(tmp_directory + filename);
}
};
std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache);
return result;
}
} // namespace
cache_manager&
@@ -127,13 +176,13 @@ cache_manager::post_process_bulk()
shutdown();
}
auto _cache_files = get_cache_files();
if(get_use_rocpd())
{
ROCPROFSYS_PRINT(
"Generating rocpd with collected data. This may take a while..\n");
auto _cache_files = get_cache_files();
std::vector<std::thread> rocpd_threads;
ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false);
@@ -179,7 +228,6 @@ cache_manager::post_process_bulk()
_post_processing.register_parser_callback(_parser);
_post_processing.post_process_metadata();
_parser.consume_storage();
std::remove(files.metadata.c_str()); // Remove metadata file
});
}
}
@@ -189,6 +237,15 @@ cache_manager::post_process_bulk()
thread.join();
}
}
ROCPROFSYS_PRINT("Removing all cached temporary files...\n");
auto all_cache_files = get_all_cache_files();
for(const auto& filename : all_cache_files)
{
ROCPROFSYS_PRINT("Removing cached temporary file: %s\n", filename.c_str());
remove_if_exists(filename);
}
}
}
-3
Просмотреть файл
@@ -248,9 +248,6 @@ storage_parser::consume_storage()
}
ifs.close();
ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system\n",
m_filename.c_str());
std::remove(m_filename.c_str());
if(m_on_finished_callback != nullptr)
{