From d9b00da102711f68306176f6e620a54bbdf68c7e Mon Sep 17 00:00:00 2001 From: Milan Radosavljevic Date: Fri, 7 Nov 2025 17:51:09 +0100 Subject: [PATCH] Add clean up of buffered_storage files (#1738) * Add clean up of buffered_storage files * Add step to workflows to test for remaining temp files after tests * Applied suggestions from code review * add deletion of all cache files --------- Co-authored-by: David Galiffi --- ...rofiler-systems-continuous-integration.yml | 19 +++++- .../workflows/rocprofiler-systems-debian.yml | 13 ++++ .../workflows/rocprofiler-systems-redhat.yml | 13 ++++ .../rocprofiler-systems-ubuntu-jammy.yml | 13 ++++ .../rocprofiler-systems-ubuntu-noble.yml | 13 ++++ .../lib/core/trace_cache/cache_manager.cpp | 63 ++++++++++++++++++- .../lib/core/trace_cache/storage_parser.cpp | 3 - 7 files changed, 128 insertions(+), 9 deletions(-) diff --git a/.github/workflows/rocprofiler-systems-continuous-integration.yml b/.github/workflows/rocprofiler-systems-continuous-integration.yml index 59bc426f49..c7bfe1f83e 100644 --- a/.github/workflows/rocprofiler-systems-continuous-integration.yml +++ b/.github/workflows/rocprofiler-systems-continuous-integration.yml @@ -74,14 +74,14 @@ jobs: packages: read container: image: ghcr.io/rocm/rocprofiler-ubuntu:${{ matrix.system.os-release }}-systems-ci-${{ matrix.system.arch }} - options: - --privileged + options: + --privileged --ipc host --group-add video --device /dev/kfd --device /dev/dri --cap-add CAP_SYS_ADMIN - + steps: - uses: actions/checkout@v5 with: @@ -171,6 +171,19 @@ jobs: -L "rocm" \ -LE "rccl|runtime|ompvv" + - name: Check for Leftover Buffered Files + timeout-minutes: 5 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then + echo "Error: Found leftover buffered storage files in /tmp:" + ls -lh /tmp/buffered* + exit 1 + else + echo "✓ No buffered storage files found in /tmp" + fi + - name: Output Logs if: failure() && steps.run_ci.outcome == 'failure' working-directory: projects/rocprofiler-systems diff --git a/.github/workflows/rocprofiler-systems-debian.yml b/.github/workflows/rocprofiler-systems-debian.yml index 38c05a3e02..d766ee001c 100644 --- a/.github/workflows/rocprofiler-systems-debian.yml +++ b/.github/workflows/rocprofiler-systems-debian.yml @@ -136,6 +136,19 @@ jobs: -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Check for Leftover Buffered Files + timeout-minutes: 5 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then + echo "Error: Found leftover buffered storage files in /tmp:" + ls -lh /tmp/buffered* + exit 1 + else + echo "✓ No buffered storage files found in /tmp" + fi + - name: Test Clean Up timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-redhat.yml b/.github/workflows/rocprofiler-systems-redhat.yml index b1a51808eb..14edd44b10 100644 --- a/.github/workflows/rocprofiler-systems-redhat.yml +++ b/.github/workflows/rocprofiler-systems-redhat.yml @@ -136,6 +136,19 @@ jobs: -- -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Check for Leftover Buffered Files + timeout-minutes: 5 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then + echo "Error: Found leftover buffered storage files in /tmp:" + ls -lh /tmp/buffered* + exit 1 + else + echo "✓ No buffered storage files found in /tmp" + fi + - name: Test Clean Up timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml index caf95ccd55..7c608b4c1f 100644 --- a/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml +++ b/.github/workflows/rocprofiler-systems-ubuntu-jammy.yml @@ -320,6 +320,19 @@ jobs: -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Check for Leftover Buffered Files + timeout-minutes: 5 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then + echo "Error: Found leftover buffered storage files in /tmp:" + ls -lh /tmp/buffered* + exit 1 + else + echo "✓ No buffered storage files found in /tmp" + fi + - name: Test Clean Up timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ diff --git a/.github/workflows/rocprofiler-systems-ubuntu-noble.yml b/.github/workflows/rocprofiler-systems-ubuntu-noble.yml index e60a67bb70..b11adf84d6 100644 --- a/.github/workflows/rocprofiler-systems-ubuntu-noble.yml +++ b/.github/workflows/rocprofiler-systems-ubuntu-noble.yml @@ -135,6 +135,19 @@ jobs: -- \ -LE "transpose|rccl|videodecode|jpegdecode|network" + - name: Check for Leftover Buffered Files + timeout-minutes: 5 + working-directory: projects/rocprofiler-systems/ + run: | + set -v + if find /tmp -maxdepth 1 -name 'buffered*' -print -quit | grep -q .; then + echo "Error: Found leftover buffered storage files in /tmp:" + ls -lh /tmp/buffered* + exit 1 + else + echo "✓ No buffered storage files found in /tmp" + fi + - name: Test Clean Up timeout-minutes: 10 working-directory: projects/rocprofiler-systems/ diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp index 73aa345bf7..3993c54179 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/cache_manager.cpp @@ -39,6 +39,31 @@ namespace trace_cache { namespace { +void +remove_if_exists(const std::string& fname) +{ + if(fname.empty()) return; + std::ifstream file(fname); + if(file.is_open()) + { + file.close(); + auto result = std::remove(fname.c_str()); + if(result == 0) + { + ROCPROFSYS_DEBUG("Removed file: %s\n", fname.c_str()); + } + else if(errno == ENOENT) + { + ROCPROFSYS_DEBUG("File does not exist: %s\n", fname.c_str()); + } + else + { + ROCPROFSYS_WARNING(0, "Failed to remove file: %s (errno: %d - %s)\n", + fname.c_str(), errno, std::strerror(errno)); + } + } +} + std::vector list_dir_files(const std::string& path) { @@ -105,6 +130,30 @@ get_cache_files() std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache); return cache_map; } + +std::vector +get_all_cache_files() +{ + const auto tmp_files = list_dir_files(tmp_directory); + std::vector result{}; + auto parse_and_fill_cache = [&](const std::string& filename) { + const std::regex buff_regex(R"(buffered_storage.*\.bin)"); + const std::regex meta_regex(R"(metadata.*\.json)"); + std::smatch match; + + if(std::regex_match(filename, match, buff_regex)) + { + result.push_back(tmp_directory + filename); + } + else if(std::regex_match(filename, match, meta_regex)) + { + result.push_back(tmp_directory + filename); + } + }; + std::for_each(tmp_files.begin(), tmp_files.end(), parse_and_fill_cache); + return result; +} + } // namespace cache_manager& @@ -127,13 +176,13 @@ cache_manager::post_process_bulk() shutdown(); } + auto _cache_files = get_cache_files(); + if(get_use_rocpd()) { ROCPROFSYS_PRINT( "Generating rocpd with collected data. This may take a while..\n"); - auto _cache_files = get_cache_files(); - std::vector rocpd_threads; ROCPROFSYS_SCOPED_SAMPLING_ON_CHILD_THREADS(false); @@ -179,7 +228,6 @@ cache_manager::post_process_bulk() _post_processing.register_parser_callback(_parser); _post_processing.post_process_metadata(); _parser.consume_storage(); - std::remove(files.metadata.c_str()); // Remove metadata file }); } } @@ -189,6 +237,15 @@ cache_manager::post_process_bulk() thread.join(); } } + + ROCPROFSYS_PRINT("Removing all cached temporary files...\n"); + + auto all_cache_files = get_all_cache_files(); + for(const auto& filename : all_cache_files) + { + ROCPROFSYS_PRINT("Removing cached temporary file: %s\n", filename.c_str()); + remove_if_exists(filename); + } } } diff --git a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp index 9c388a7e22..10bbfead5f 100644 --- a/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp +++ b/projects/rocprofiler-systems/source/lib/core/trace_cache/storage_parser.cpp @@ -248,9 +248,6 @@ storage_parser::consume_storage() } ifs.close(); - ROCPROFSYS_DEBUG("File parsing finished. Removing %s from file system\n", - m_filename.c_str()); - std::remove(m_filename.c_str()); if(m_on_finished_callback != nullptr) {