diff --git a/.github/workflows/rocprofiler-systems-formatting.yml b/.github/workflows/rocprofiler-systems-formatting.yml index 77b28d7f9f..499419a621 100644 --- a/.github/workflows/rocprofiler-systems-formatting.yml +++ b/.github/workflows/rocprofiler-systems-formatting.yml @@ -70,7 +70,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - python-version: [3.8] + python-version: ['3.10'] steps: - uses: actions/checkout@v4 diff --git a/projects/rocprofiler-systems/.pre-commit-config.yaml b/projects/rocprofiler-systems/.pre-commit-config.yaml index 3f954dab5f..b17f0bcdd8 100644 --- a/projects/rocprofiler-systems/.pre-commit-config.yaml +++ b/projects/rocprofiler-systems/.pre-commit-config.yaml @@ -30,9 +30,12 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: + - id: check-json # Check JSON files for syntax errors - id: check-yaml # Check YAML files for syntax errors - id: trailing-whitespace # Remove trailing whitespace - id: end-of-file-fixer # Fix files to have a newline at the end + - id: pretty-format-json # Pretty-format JSON files + args: ['--indent', '4', '--autofix'] - repo: https://github.com/pre-commit/mirrors-clang-format rev: v18.1.8 # Version 18 as specified in contributor guide diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-flow.md b/projects/rocprofiler-systems/tests/rocpd-validation-flow.md new file mode 100644 index 0000000000..ed858a4c47 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-flow.md @@ -0,0 +1,113 @@ +# ROCpd Validation Flow + +```mermaid +flowchart TD + A[Start: validate-rocpd.py] --> B{Parse Arguments} + B --> |--help| C[Display Help & Exit] + B --> |Missing --database| D[Show Error & Exit] + B --> |Valid Args| E[Load Validation Rules] + + E --> F{Rules File Exists?} + F --> |No| G[Use Default Rules
default_rules.json] + F --> |Yes| H[Load Custom Rules] + G --> I[Parse JSON Rules] + H --> I + + I --> J[Create Rule Objects:
• required_table
• validation_rule] + + J --> K{Database File Exists?} + K --> |No| L[Error: File Not Found] + K --> |Yes| M[Connect to SQLite Database] + + M --> N[Get All Tables from Database
SELECT name FROM sqlite_master] + + N --> O[Start Validation Loop] + O --> P[For Each Required Table Rule] + + P --> Q{Table Exists
in Database?} + Q --> |No| R[❌ FAIL: Table Missing] + Q --> |Yes| S[Check Required Columns
PRAGMA table_info] + + S --> T{All Required
Columns Present?} + T --> |No| U[❌ FAIL: Missing Columns] + T --> |Yes| V[Check Minimum Row Count] + + V --> W{Meets Minimum
Row Count?} + W --> |No| X[❌ FAIL: Insufficient Rows] + W --> |Yes| Y[Execute Validation Queries] + + Y --> Z[For Each Query in Rule] + Z --> AA[Execute SQL Query] + AA --> BB[Get Result] + BB --> CC{Validation
Comparison Pass?} + + CC --> |No| DD[❌ FAIL: Query Failed
Log Error Message] + CC --> |Yes| EE[✅ PASS: Query Passed] + + EE --> FF{More Queries?} + DD --> FF + FF --> |Yes| Z + FF --> |No| GG{More Tables?} + + R --> GG + U --> GG + X --> GG + + GG --> |Yes| P + GG --> |No| HH{All Validations
Passed?} + + HH --> |Yes| II[✅ SUCCESS
Exit Code: 0] + HH --> |No| JJ[❌ FAILURE
Exit Code: 65] + + L --> KK[Exit Code: 1] + + subgraph "Validation Rules Structure" + LL[JSON Rules File] + LL --> MM["required_tables[]"] + MM --> NN["Table Definition:
• name
• required_columns
• min_rows
• validation_queries"] + NN --> OO["Validation Query:
• description
• query (SQL)
• expected_result
• comparison
• error_message"] + end + + subgraph "Database Structure" + PP[ROCpd SQLite Database] + PP --> QQ[Tables:
• kernel_summary
• kernels
• threads
• ...] + QQ --> RR[Columns per Table] + RR --> SS[Data Rows] + end + + subgraph "Comparison Operations" + TT[Supported Comparisons:
• equals
• greater_than
• less_than
• greater_than_or_equal
• less_than_or_equal
• not_equals] + end +``` + +## Input Phase + +- Takes a ROCpd database file (.db) as input +- Optionally accepts custom validation rules (JSON file) +- Uses default rules if no custom rules provided + +## Validation Rules Structure + +- JSON-based configuration with required tables +- Each table has: + - Required columns to check for + - Minimum row count requirements + - Custom SQL validation queries + +## Validation Process + +- For each required table, the tool: + + - Checks table existence in the database + - Verifies required columns are present + - Validates minimum row count + - Executes custom SQL queries with various comparison operations + +## Output & Results + +- Real-time feedback with ✅/❌ indicators +- Detailed error messages for failures +- Exit codes: + - **0**: All validations passed + - **65**: Validation failures + - **1**: General errors (file not found, etc.) diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/default-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/default-rules.json new file mode 100644 index 0000000000..5a8725c8fe --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/default-rules.json @@ -0,0 +1,73 @@ +{ + "required_tables": [ + { + "name": "kernel_summary", + "required_columns": [ + "name", + "calls", + "DURATION (nsec)" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found API calls with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have some kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary" + } + ] + }, + { + "name": "kernels", + "required_columns": [ + "id", + "category", + "name", + "start", + "end", + "queue", + "stream" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found kernels with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have kernel entries", + "error_message": "No kernel entries found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels" + }, + { + "comparison": "equals", + "description": "Check for kernels with no active time", + "error_message": "Kernels with no active execution times found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE (end - start) = 0" + } + ] + }, + { + "min_rows": 0, + "name": "threads", + "required_columns": [ + "tid", + "start", + "end", + "name" + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/amd-smi-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/amd-smi-rules.json new file mode 100644 index 0000000000..9ffcb9ef64 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/amd-smi-rules.json @@ -0,0 +1,58 @@ +{ + "required_tables": [ + { + "min_rows": 1, + "name_prefix": "rocpd_info_pmc_", + "required_columns": [ + "agent_id", + "target_arch", + "name", + "symbol", + "description", + "units", + "value_type" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for jpeg_activity amd-smi metrics", + "error_message": "Did not find jpeg_activity in amd-smi metrics", + "expected_result": 1, + "query": "SELECT COUNT(*) as count FROM {table_name} WHERE symbol LIKE 'JpegAct%'" + } + ] + }, + { + "min_rows": 500, + "name_prefix": "rocpd_pmc_event_", + "required_columns": [ + "event_id", + "pmc_id", + "value" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring busy times", + "error_message": "Less than expected number of captured amd-smi mm-busy samples!", + "expected_result": 50, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU memory usage", + "error_message": "Less than expected number of captured amd-smi memory-usage samples!", + "expected_result": 50, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring JPEG activity", + "error_message": "Less than expected activity in amd-smi jpeg-activity samples!", + "expected_result": 50, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name LIKE 'device_jpeg_activity_%' and event.value > 0" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/sdk-metrics-rules.json new file mode 100644 index 0000000000..7fba791fb8 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/sdk-metrics-rules.json @@ -0,0 +1,133 @@ +{ + "required_tables": [ + { + "commit": "Validation rules for rocm_rocjpeg_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_rocjpeg_api' appears in category at least 500 times in table regions", + "error_message": "'rocm_rocjpeg_api' category entries are fewer than expected in regions", + "expected_result": 100, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_rocjpeg_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HIP API calls that last 0 seconds", + "error_message": "Found rocJPEG API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_rocjpeg_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rules for rocm_rocjpeg_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_rocjpeg_api' string is present in the table", + "error_message": "'rocm_rocjpeg_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_rocjpeg_api%';" + } + ] + }, + { + "commit": "Validation rules for hip_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 1500 times in table events_args", + "error_message": "'rocm_hip_api' category entries are fewer than expected in events_args", + "expected_result": 100, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Check for missing category entries", + "error_message": "Empty or NULL category entries found in events_args", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM events_args WHERE category IS NULL OR TRIM(category) = '';" + } + ] + }, + { + "commit": "Validation rules for hip_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 50 times in table regions", + "error_message": "'rocm_hip_api' category entries are fewer than expected in regions", + "expected_result": 50, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HIP API calls that last 0 seconds", + "error_message": "Found HIP API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rule for hip_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than_or_equal", + "description": "Verify that 'rocm_hip_api' string is present in the table", + "error_message": "'rocm_hip_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_hip_api%';" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/validation-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/validation-rules.json new file mode 100644 index 0000000000..97c1f4473a --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/jpeg-decode/validation-rules.json @@ -0,0 +1,74 @@ +{ + "required_tables": [ + { + "name": "kernel_summary", + "required_columns": [ + "name", + "calls", + "DURATION (nsec)" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found API calls with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have some kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary" + } + ] + }, + { + "min_rows": 50, + "name": "kernels", + "required_columns": [ + "id", + "category", + "name", + "start", + "end", + "queue", + "stream" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found kernels with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have kernel entries", + "error_message": "No kernel entries found", + "expected_result": 50, + "query": "SELECT COUNT(*) as count FROM kernels" + }, + { + "comparison": "equals", + "description": "Check for kernels with no active time", + "error_message": "Kernels with no active execution times found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE (end - start) = 0" + } + ] + }, + { + "min_rows": 3, + "name": "threads", + "required_columns": [ + "tid", + "start", + "end", + "name" + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/kernel-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/kernel-rules.json new file mode 100644 index 0000000000..32d3451632 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/kernel-rules.json @@ -0,0 +1,85 @@ +{ + "required_tables": [ + { + "name": "kernel_summary", + "required_columns": [ + "name", + "calls", + "DURATION (nsec)" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found API calls with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name IS NULL" + }, + { + "comparison": "equals", + "description": "Check for 3 unique kernels", + "error_message": "Expecting 3 unique kernels", + "expected_result": 3, + "query": "SELECT COUNT(*) as count FROM kernel_summary" + } + ] + }, + { + "min_rows": 12, + "name": "kernels", + "required_columns": [ + "id", + "category", + "name", + "start", + "end", + "queue", + "stream" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found kernels with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name IS NULL" + }, + { + "comparison": "equals", + "description": "Check that we have 12 kernel dispatches", + "error_message": "Expecting 12 kernel dispatches", + "expected_result": 12, + "query": "SELECT COUNT(*) as count FROM kernels" + }, + { + "comparison": "equals", + "description": "Check for kernels with no active time", + "error_message": "Kernels with no active execution times found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE (end - start) = 0" + }, + { + "comparison": "equals", + "description": "Check we have 4 kernels named %Z4vmulIiEvPT_S1_S1_i_l51.kd", + "error_message": "Unexpected %Z4vmulIiEvPT_S1_S1_i_l51.kd kernel dispatches", + "expected_result": 4, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name LIKE '__omp_offloading_%Z4vmulIiEvPT_S1_S1_i_l51.kd'" + }, + { + "comparison": "equals", + "description": "Check we have 4 kernels named %Z4vmulIfEvPT_S1_S1_i_l51.kd", + "error_message": "Unexpected %Z4vmulIfEvPT_S1_S1_i_l51.kd kernel dispatches", + "expected_result": 4, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name LIKE '__omp_offloading_%Z4vmulIfEvPT_S1_S1_i_l51.kd'" + }, + { + "comparison": "equals", + "description": "Check we have 4 kernels named %Z4vmulIdEvPT_S1_S1_i_l51.kd", + "error_message": "Unexpected %Z4vmulIdEvPT_S1_S1_i_l51.kd kernel dispatches", + "expected_result": 4, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name LIKE '__omp_offloading_%Z4vmulIdEvPT_S1_S1_i_l51.kd'" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/sdk-metrics-rules.json new file mode 100644 index 0000000000..8f290700f7 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/openmp-target/sdk-metrics-rules.json @@ -0,0 +1,99 @@ +{ + "required_tables": [ + { + "commit": "Validation rules for rocm_ompt_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_ompt_api' appears in category at least 100 times in table regions", + "error_message": "'rocm_ompt_api' category entries are fewer than expected in regions", + "expected_result": 100, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_ompt_api';" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rules for rocm_ompt_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than_or_equal", + "description": "Verify that 'rocm_ompt_api' string is present in the table", + "error_message": "'rocm_ompt_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_ompt_api%';" + } + ] + }, + { + "commit": "Validation rules for hsa_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hsa_api' appears in category at least 500 times in table regions", + "error_message": "'rocm_hsa_api' category entries are fewer than expected in regions", + "expected_result": 500, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hsa_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HSA API calls that last 0 seconds", + "error_message": "Found HSA API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hsa_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rule for hsa_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than_or_equal", + "description": "Verify that 'rocm_hsa_api' string is present in the table", + "error_message": "'rocm_hsa_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_hsa_api%';" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json new file mode 100644 index 0000000000..5f5874cc4a --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/amd-smi-rules.json @@ -0,0 +1,65 @@ +{ + "required_tables": [ + { + "min_rows": 4, + "name_prefix": "rocpd_info_pmc_", + "required_columns": [ + "agent_id", + "target_arch", + "name", + "symbol", + "description", + "units", + "value_type" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring categories", + "error_message": "Found none of the amd-smi categories", + "expected_result": 4, + "query": "SELECT COUNT(*) as count FROM {table_name} WHERE target_arch is 'GPU'" + } + ] + }, + { + "min_rows": 100, + "name_prefix": "rocpd_pmc_event_", + "required_columns": [ + "event_id", + "pmc_id", + "value" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring busy times", + "error_message": "Less than expected number of captured amd-smi-busy samples!", + "expected_result": 10, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU temperature", + "error_message": "Less than expected number of captured amd-smi-temperature samples!", + "expected_result": 10, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_temp'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU power consumption", + "error_message": "Less than expected number of captured amd-smi-power samples!", + "expected_result": 10, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_power'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU memory usage", + "error_message": "Less than expected number of captured amd-smi-memory-usage samples!", + "expected_result": 10, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json new file mode 100644 index 0000000000..b0eca381d9 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/sdk-metrics-rules.json @@ -0,0 +1,110 @@ +{ + "required_tables": [ + { + "commit": "Validation rules for rocm_marker_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_marker_api' appears in category at least 5 times in table events_args", + "error_message": "'rocm_marker_api' category entries are fewer than expected in events_args", + "expected_result": 5, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_marker_api';" + }, + { + "comparison": "equals", + "description": "Check for missing category entries", + "error_message": "Empty or NULL category entries found in events_args", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM events_args WHERE category IS NULL OR TRIM(category) = '';" + } + ] + }, + { + "commit": "Validation rules for rocm_marker_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Verify that 'rocm_marker_api' appears in category 11 times in 'regions' table", + "error_message": "Expected 11 'rocm_marker_api' entries in the 'regions' table", + "expected_result": 11, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no rocTX API calls that last 0 seconds", + "error_message": "Found rocTX API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + }, + { + "comparison": "equals", + "description": "Verify that 'roctxMarkA' appears at 5 times in table 'regions'", + "error_message": "Expected 5 'roctxMarkA' entries in `regions` table", + "expected_result": 5, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name = 'roctxMarkA';" + }, + { + "comparison": "equals", + "description": "Verify that 'roctxRangePop' appears at 3 times in table 'regions'", + "error_message": "Expected 3 'roctxRangePop' entries in `regions` table", + "expected_result": 3, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name = 'roctxRangePop';" + }, + { + "comparison": "equals", + "description": "Verify that 'roctxRangeStop' appears at 2 times in table 'regions'", + "error_message": "Expected 2 'roctxRangeStop' entries in `regions` table", + "expected_result": 2, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_marker_api' AND name = 'roctxRangeStop';" + } + ] + }, + { + "commit": "Validation rule for rocm_marker_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than_or_equal", + "description": "Verify that 'rocm_marker_api' string is present in the table", + "error_message": "'rocm_marker_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_marker_api%';" + }, + { + "comparison": "greater_than", + "description": "Verify that 'roctx' string is present in the table", + "error_message": "'roctx' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%roctx%';" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/validation-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/validation-rules.json new file mode 100644 index 0000000000..b6e437e719 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/roctx/validation-rules.json @@ -0,0 +1,95 @@ +{ + "required_tables": [ + { + "name": "kernel_summary", + "required_columns": [ + "name", + "calls", + "DURATION (nsec)" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found API calls with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have some kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary" + }, + { + "comparison": "equals", + "description": "Check that we have 'hipKernelLaunch' kernel captured", + "error_message": "No kernel calls found in summary", + "expected_result": 1, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name LIKE 'hipKernelLaunch%'" + }, + { + "comparison": "equals", + "description": "Check that we have predefined number of kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 2, + "query": "SELECT calls as num_calls FROM kernel_summary WHERE name LIKE 'hipKernelLaunch%'" + } + ] + }, + { + "min_rows": 2, + "name": "kernels", + "required_columns": [ + "id", + "category", + "name", + "start", + "end", + "queue", + "stream" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found kernels with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name IS NULL" + }, + { + "comparison": "equals", + "description": "Check that we have kernel entries", + "error_message": "No kernel entries found", + "expected_result": 2, + "query": "SELECT COUNT(*) as count FROM kernels" + }, + { + "comparison": "equals", + "description": "Check for kernels with no active time", + "error_message": "Kernels with no active execution times found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE (end - start) = 0" + }, + { + "comparison": "equals", + "description": "Check that we have number of kernel entries as expected number of calls", + "error_message": "Mismatch in expected numbers of kernels entries", + "expected_result": 2, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name LIKE 'hipKernelLaunch%'" + } + ] + }, + { + "min_rows": 3, + "name": "threads", + "required_columns": [ + "tid", + "start", + "end", + "name" + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/amd-smi-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/amd-smi-rules.json new file mode 100644 index 0000000000..a76b73f124 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/amd-smi-rules.json @@ -0,0 +1,65 @@ +{ + "required_tables": [ + { + "min_rows": 4, + "name_prefix": "rocpd_info_pmc_", + "required_columns": [ + "agent_id", + "target_arch", + "name", + "symbol", + "description", + "units", + "value_type" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring categories", + "error_message": "Found none of the amd-smi categories", + "expected_result": 4, + "query": "SELECT COUNT(*) as count FROM {table_name} WHERE target_arch is 'GPU'" + } + ] + }, + { + "min_rows": 2000, + "name_prefix": "rocpd_pmc_event_", + "required_columns": [ + "event_id", + "pmc_id", + "value" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring busy times", + "error_message": "Less than expected number of captured amd-smi-busy samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU temperature", + "error_message": "Less than expected number of captured amd-smi-temperature samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_temp'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU power consumption", + "error_message": "Less than expected number of captured amd-smi-power samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_power'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU memory usage", + "error_message": "Less than expected number of captured amd-smi-memory-usage samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/cpu-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/cpu-metrics-rules.json new file mode 100644 index 0000000000..82a2adb6c5 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/cpu-metrics-rules.json @@ -0,0 +1,138 @@ +{ + "required_tables": [ + { + "name": "pmc_info", + "required_columns": [ + "id", + "guid", + "nid", + "pid", + "agent_abs_index", + "is_constant", + "is_derived", + "name", + "description", + "block", + "expression" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for missing PMC names", + "error_message": "PMC entries are missing a name", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_info_pmc WHERE name IS NULL OR name = ''" + }, + { + "comparison": "equals", + "description": "Validate agent absolute index is non-negative", + "error_message": "Negative absolute_index found in agent metadata", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_info_agent WHERE absolute_index < 0" + }, + { + "comparison": "equals", + "description": "Check derived PMCs have expressions", + "error_message": "Derived PMC missing valid expression", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_info_pmc WHERE is_derived = 1 AND (expression IS NULL OR expression = '')" + }, + { + "comparison": "equals", + "description": "Validate description presence", + "error_message": "PMC entries missing description field", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_info_pmc WHERE description IS NULL OR description = ''" + } + ] + }, + { + "name": "rocpd_pmc_event", + "required_columns": [ + "id", + "guid", + "event_id", + "pmc_id", + "value", + "extdata" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for NULL values in 'id'", + "error_message": "NULL value found in 'id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_pmc_event WHERE id IS NULL" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'guid' where guid count > 3000", + "error_message": "NULL 'guid' found for guid values with more than 3000 occurrences", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_pmc_event WHERE guid IS NULL AND (SELECT COUNT(*) FROM rocpd_pmc_event WHERE guid = rocpd_pmc_event.guid) > 3000" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'event_id'", + "error_message": "NULL value found in 'event_id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_pmc_event WHERE event_id IS NULL" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'pmc_id'", + "error_message": "NULL value found in 'pmc_id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_pmc_event WHERE pmc_id IS NULL" + } + ] + }, + { + "name": "rocpd_sample", + "required_columns": [ + "id", + "guid", + "track_id", + "timestamp", + "event_id" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for NULL values in 'id'", + "error_message": "NULL value found in 'id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_sample WHERE id IS NULL" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'guid' where guid count > 3000", + "error_message": "NULL 'guid' found for guid values with more than 3000 occurrences", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_sample WHERE guid IS NULL AND (SELECT COUNT(*) FROM rocpd_sample WHERE guid = rocpd_sample.guid) > 3000" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'track_id'", + "error_message": "NULL value found in 'track_id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_sample WHERE track_id IS NULL" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'timestamp'", + "error_message": "NULL value found in 'timestamp' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_sample WHERE timestamp IS NULL" + }, + { + "comparison": "equals", + "description": "Check for NULL values in 'event_id'", + "error_message": "NULL value found in 'event_id' column", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_sample WHERE event_id IS NULL" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/sdk-metrics-rules.json new file mode 100644 index 0000000000..feca976bf1 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/sdk-metrics-rules.json @@ -0,0 +1,327 @@ +{ + "required_tables": [ + { + "commit": "Validation rules for hip_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 1500 times in table events_args", + "error_message": "'rocm_hip_api' category entries are fewer than expected in events_args", + "expected_result": 1500, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Check for missing category entries", + "error_message": "Empty or NULL category entries found in events_args", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM events_args WHERE category IS NULL OR TRIM(category) = '';" + } + ] + }, + { + "commit": "Validation rules for hip_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 500 times in table regions", + "error_message": "'rocm_hip_api' category entries are fewer than expected in regions", + "expected_result": 500, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HIP API calls that last 0 seconds", + "error_message": "Found HIP API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rule for hip_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' string is present in the table", + "error_message": "'rocm_hip_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_hip_api%';" + } + ] + }, + { + "commit": "Validation rules for hsa_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hsa_api' appears in category at least 1000 times in table events_args", + "error_message": "'rocm_hsa_api' category entries are fewer than expected in events_args", + "expected_result": 1000, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_hsa_api';" + } + ] + }, + { + "commit": "Validation rules for hsa_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that rocm_hsa_api' appears in category at least 500 times in table regions", + "error_message": "'rocm_hsa_api' category entries are fewer than expected in regions", + "expected_result": 500, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hsa_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HSA API calls that last 0 seconds", + "error_message": "Found HSA API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hsa_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rule for hsa_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hsa_api' string is present in the table", + "error_message": "'rocm_hsa_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_hsa_api%';" + } + ] + }, + { + "commit": "Validation rules for memory_allocations", + "name": "memory_allocations", + "required_columns": [ + "id", + "guid", + "category", + "nid", + "pid", + "tid", + "start", + "end", + "duration", + "type", + "level", + "agent_name" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check that all IDs are not NULL", + "error_message": "NULL entries found in the id column of memory_allocations", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_allocations WHERE id IS NULL;" + }, + { + "comparison": "equals", + "description": "Check the sizes of executed memory allocate calls", + "error_message": "Entries found where allocated size is 0 or NULL", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_allocations WHERE size IS NULL or 0;" + }, + { + "comparison": "greater_than", + "description": "Verify that 'rocm_memory_allocate' appears more than 1 times in category", + "error_message": "'rocm_memory_allocate' string appears fewer than 1 times in category column of memory_allocations", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM memory_allocations WHERE category LIKE '%rocm_memory_allocate%';" + } + ] + }, + { + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_memory_allocate' is present in the string column", + "error_message": "'rocm_memory_allocate' string not found in the string column of rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_memory_allocate%';" + } + ] + }, + { + "commit": "Validation rules for memory_copies", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_memory_copy' is present in the string column", + "error_message": "'rocm_memory_copy' string not found in the string column of rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_memory_copy%';" + }, + { + "comparison": "greater_than", + "description": "Verify that 'MEMORY_COPY_DEVICE_TO_HOST' is present in the string column", + "error_message": "'MEMORY_COPY_DEVICE_TO_HOST' string not found in the string column of rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%MEMORY_COPY_DEVICE_TO_HOST%';" + }, + { + "comparison": "greater_than", + "description": "Verify that 'MEMORY_COPY_HOST_TO_DEVICE' is present in the string column", + "error_message": "'MEMORY_COPY_HOST_TO_DEVICE' string not found in the string column of rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%MEMORY_COPY_HOST_TO_DEVICE%';" + } + ] + }, + { + "commit": "Validation rules for memory_copies", + "name": "memory_copies", + "required_columns": [ + "id", + "guid", + "category", + "nid", + "pid", + "tid", + "start", + "end" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_memory_copy' appears in category column", + "error_message": "'rocm_memory_copy' string appears fewer than 10 times in category column of memory_copies", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE category LIKE '%rocm_memory_copy%';" + }, + { + "comparison": "equals", + "description": "Check that all IDs are not NULL", + "error_message": "NULL entries found in the id column of memory_copies", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE id IS NULL;" + }, + { + "comparison": "equals", + "description": "Check the sizes of executed memory_copy calls", + "error_message": "NULL entries found where copied size is 0 or non-existing field", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE size IS NULL or 0;" + }, + { + "comparison": "equals", + "description": "Check the agents executing memory_copy calls", + "error_message": "NULL entries found where copied size is 0 or non-existing field", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE size IS NULL or 0;" + } + ] + }, + { + "commit": "Validation rules for memory_copies - agent verification", + "name": "memory_copies", + "required_columns": [ + "id", + "guid", + "dst_agent_abs_index", + "src_agent_abs_index" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Verify that all dst_agent_abs_index values exist in rocpd_info_agent table", + "error_message": "Found dst_agent_abs_index values in memory_copies that do not exist in rocpd_info_agent table", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies mc LEFT JOIN rocpd_info_agent ag ON mc.dst_agent_abs_index = ag.absolute_index AND mc.guid = ag.guid WHERE mc.dst_agent_abs_index IS NOT NULL AND ag.absolute_index IS NULL;" + }, + { + "comparison": "equals", + "description": "Verify that all src_agent_abs_index values exist in rocpd_info_agent table", + "error_message": "Found src_agent_abs_index values in memory_copies that do not exist in rocpd_info_agent table", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies mc LEFT JOIN rocpd_info_agent ag ON mc.src_agent_abs_index = ag.absolute_index AND mc.guid = ag.guid WHERE mc.src_agent_abs_index IS NOT NULL AND ag.absolute_index IS NULL;" + }, + { + "comparison": "equals", + "description": "Check that dst_agent_abs_index is not NULL for memory copy operations", + "error_message": "NULL entries found in dst_agent_abs_index column of memory_copies", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE dst_agent_abs_index IS NULL;" + }, + { + "comparison": "equals", + "description": "Check that src_agent_abs_index is not NULL for memory copy operations", + "error_message": "NULL entries found in src_agent_abs_index column of memory_copies", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM memory_copies WHERE src_agent_abs_index IS NULL;" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/timer-sampling-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/timer-sampling-rules.json new file mode 100644 index 0000000000..887b132d7c --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/timer-sampling-rules.json @@ -0,0 +1,53 @@ +{ + "required_tables": [ + { + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check if 'timer sampling' exists in string entries", + "error_message": "'timer sampling' string not found in rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%timer_sampling%';" + }, + { + "comparison": "equals", + "description": "Verify no empty string values", + "error_message": "Empty or NULL string entries found in view_rocpd_string", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string IS NULL OR TRIM(string) = '';" + } + ] + }, + { + "name": "regions", + "required_columns": [ + "guid", + "category", + "id", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check that category column contains more than 1000 'timer_sampling' entries", + "error_message": "Less than 1001 'timer_sampling' entries found in category column of regions", + "expected_result": 1000, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'timer_sampling';" + }, + { + "comparison": "equals", + "description": "Verify no NULL or empty values in guid column", + "error_message": "NULL or empty guid values found in regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE guid IS NULL OR TRIM(guid) = '';" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/validation-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/validation-rules.json new file mode 100644 index 0000000000..48fee021d8 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/transpose/validation-rules.json @@ -0,0 +1,95 @@ +{ + "required_tables": [ + { + "name": "kernel_summary", + "required_columns": [ + "name", + "calls", + "DURATION (nsec)" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found API calls with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have some kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernel_summary" + }, + { + "comparison": "equals", + "description": "Check that we have transpose kernel captured", + "error_message": "No kernel calls found in summary", + "expected_result": 1, + "query": "SELECT COUNT(*) as count FROM kernel_summary WHERE name LIKE 'transpose%'" + }, + { + "comparison": "equals", + "description": "Check that we have predefined number of kernel calls", + "error_message": "No kernel calls found in summary", + "expected_result": 1000, + "query": "SELECT calls as num_calls FROM kernel_summary WHERE name LIKE 'transpose%'" + } + ] + }, + { + "min_rows": 1000, + "name": "kernels", + "required_columns": [ + "id", + "category", + "name", + "start", + "end", + "queue", + "stream" + ], + "validation_queries": [ + { + "comparison": "equals", + "description": "Check for null function names", + "error_message": "Found kernels with null function names", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name IS NULL" + }, + { + "comparison": "greater_than", + "description": "Check that we have kernel entries", + "error_message": "No kernel entries found", + "expected_result": 1000, + "query": "SELECT COUNT(*) as count FROM kernels" + }, + { + "comparison": "equals", + "description": "Check for kernels with no active time", + "error_message": "Kernels with no active execution times found", + "expected_result": 0, + "query": "SELECT COUNT(*) as count FROM kernels WHERE (end - start) = 0" + }, + { + "comparison": "equals", + "description": "Check that we have number of kernel entries as expected number of calls", + "error_message": "Mismatch in expected numbers of kernels entries", + "expected_result": 1000, + "query": "SELECT COUNT(*) as count FROM kernels WHERE name LIKE 'transpose%'" + } + ] + }, + { + "min_rows": 3, + "name": "threads", + "required_columns": [ + "tid", + "start", + "end", + "name" + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/amd-smi-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/amd-smi-rules.json new file mode 100644 index 0000000000..5be074e2d4 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/amd-smi-rules.json @@ -0,0 +1,58 @@ +{ + "required_tables": [ + { + "min_rows": 1, + "name_prefix": "rocpd_info_pmc_", + "required_columns": [ + "agent_id", + "target_arch", + "name", + "symbol", + "description", + "units", + "value_type" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for vcn_activity amd-smi metrics", + "error_message": "Did not find vcn_activity in amd-smi metrics", + "expected_result": 1, + "query": "SELECT COUNT(*) as count FROM {table_name} WHERE symbol LIKE 'VcnAct%'" + } + ] + }, + { + "min_rows": 500, + "name_prefix": "rocpd_pmc_event_", + "required_columns": [ + "event_id", + "pmc_id", + "value" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring busy times", + "error_message": "Less than expected number of captured amd-smi mm-busy samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_busy_mm'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring GPU memory usage", + "error_message": "Less than expected number of captured amd-smi memory-usage samples!", + "expected_result": 150, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name = 'device_memory_usage'" + }, + { + "comparison": "greater_than", + "description": "Check for amd-smi monitoring VCN activity", + "error_message": "Less than expected activity in amd-smi vcn-activity samples!", + "expected_result": 100, + "query": "SELECT COUNT(*) as count FROM {table_name} event JOIN rocpd_info_pmc info ON event.pmc_id = info.id WHERE info.name LIKE 'device_vcn_activity_%' and event.value > 0" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/sdk-metrics-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/sdk-metrics-rules.json new file mode 100644 index 0000000000..4f38e2d6c7 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/sdk-metrics-rules.json @@ -0,0 +1,160 @@ +{ + "required_tables": [ + { + "commit": "Validation rules for rocm_rocdecode_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_rocdecode_api' appears in category at least 1500 times in table events_args", + "error_message": "'rocm_rocdecode_api' category entries are fewer than expected in events_args", + "expected_result": 1500, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_rocdecode_api';" + }, + { + "comparison": "equals", + "description": "Check for missing category entries", + "error_message": "Empty or NULL category entries found in events_args", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM events_args WHERE category IS NULL OR TRIM(category) = '';" + } + ] + }, + { + "commit": "Validation rules for rocm_rocdecode_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_rocdecode_api' appears in category at least 500 times in table regions", + "error_message": "'rocm_rocdecode_api' category entries are fewer than expected in regions", + "expected_result": 500, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_rocdecode_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HIP API calls that last 0 seconds", + "error_message": "Found rocDecode API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_rocdecode_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rules for rocm_rocdecode_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_rocdecode_api' string is present in the table", + "error_message": "'rocm_rocdecode_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_rocdecode_api%';" + } + ] + }, + { + "commit": "Validation rules for hip_api", + "name": "events_args", + "required_columns": [ + "event_id", + "category", + "stack_id", + "parent_stack_id", + "correlation_id" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 1500 times in table events_args", + "error_message": "'rocm_hip_api' category entries are fewer than expected in events_args", + "expected_result": 100, + "query": "SELECT COUNT(*) FROM events_args WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Check for missing category entries", + "error_message": "Empty or NULL category entries found in events_args", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM events_args WHERE category IS NULL OR TRIM(category) = '';" + } + ] + }, + { + "commit": "Validation rules for hip_api", + "name": "regions", + "required_columns": [ + "id", + "guid", + "category", + "name" + ], + "validation_queries": [ + { + "comparison": "greater_than", + "description": "Verify that 'rocm_hip_api' appears in category at least 50 times in table regions", + "error_message": "'rocm_hip_api' category entries are fewer than expected in regions", + "expected_result": 50, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api';" + }, + { + "comparison": "equals", + "description": "Ensure there are no HIP API calls that last 0 seconds", + "error_message": "Found HIP API captures where duration is 0", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE category = 'rocm_hip_api' AND duration = 0;" + }, + { + "comparison": "equals", + "description": "Check for any NULL values in the 'name' column of regions", + "error_message": "NULL entries found in the name column of regions", + "expected_result": 0, + "query": "SELECT COUNT(*) FROM regions WHERE name IS NULL;" + } + ] + }, + { + "commit": "Validation rule for hip_api", + "name": "rocpd_string", + "required_columns": [ + "id", + "guid", + "string" + ], + "validation_queries": [ + { + "comparison": "greater_than_or_equal", + "description": "Verify that 'rocm_hip_api' string is present in the table", + "error_message": "'rocm_hip_api' string not found in the table rocpd_string", + "expected_result": 1, + "query": "SELECT COUNT(*) FROM rocpd_string WHERE string LIKE '%rocm_hip_api%';" + } + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/validation-rules.json b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/validation-rules.json new file mode 100644 index 0000000000..2b6f52f0c4 --- /dev/null +++ b/projects/rocprofiler-systems/tests/rocpd-validation-rules/video-decode/validation-rules.json @@ -0,0 +1,14 @@ +{ + "required_tables": [ + { + "min_rows": 10, + "name": "threads", + "required_columns": [ + "tid", + "start", + "end", + "name" + ] + } + ] +} diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-decode-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-decode-tests.cmake index b164504ed6..4c86388726 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-decode-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-decode-tests.cmake @@ -27,34 +27,57 @@ # -------------------------------------------------------------------------------------- # set(_video_decode_environment - "${_base_environment}" "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,rocdecode_api" "ROCPROFSYS_AMD_SMI_METRICS=busy,temp,power,vcn_activity,mem_usage" "ROCPROFSYS_SAMPLING_CPUS=none" ) set(_jpeg_decode_environment - "${_base_environment}" "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,rocjpeg_api" "ROCPROFSYS_AMD_SMI_METRICS=busy,temp,power,jpeg_activity,mem_usage" "ROCPROFSYS_SAMPLING_CPUS=none" ) +set(_vcn_rocpd_validation_rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/video-decode/validation-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/video-decode/sdk-metrics-rules.json" +) + +set(_jpeg_rocpd_validation_rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/default-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/jpeg-decode/validation-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/jpeg-decode/sdk-metrics-rules.json" +) + +# Enable ROCPD for tests only if valid ROCm is installed and a valid GPU is detected +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + list(APPEND _video_decode_environment "ROCPROFSYS_USE_ROCPD=ON") + list(APPEND _jpeg_decode_environment "ROCPROFSYS_USE_ROCPD=ON") +endif() + +# Engine activity counters are only supported on MI300 and later GPUs rocprofiler_systems_get_gfx_archs(MI300_DETECTED GFX_MATCH "gfx9[4-9][A-Fa-f0-9]" ECHO) if(MI300_DETECTED) - list(APPEND VCN_COUNTER_NAMES_ARG --counter-names "VCN Activity") - list(APPEND JPEG_COUNTER_NAMES_ARG --counter-names "JPEG Activity") + list(APPEND _vcn_counter_names --counter-names "VCN Activity") + list(APPEND _jpeg_counter_names --counter-names "JPEG Activity") + list( + APPEND + _vcn_rocpd_validation_rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/video-decode/amd-smi-rules.json" + ) + list( + APPEND + _jpeg_rocpd_validation_rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/jpeg-decode/amd-smi-rules.json" + ) endif() -# check_gpu("MI100" MI100_DETECTED) if(MI100_DETECTED) list(APPEND VCN_COUNTER_NAMES_ARG -# --counter-names "VCN Activity") endif() - rocprofiler_systems_add_test( SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE NAME video-decode TARGET videodecode GPU ON - ENVIRONMENT "${_video_decode_environment}" + ENVIRONMENT "${_base_environment};${_video_decode_environment}" RUN_ARGS -i ${PROJECT_BINARY_DIR}/videos -t 1 LABELS "decode" ) @@ -64,9 +87,21 @@ rocprofiler_systems_add_validation_test( PERFETTO_METRIC "rocm_rocdecode_api" PERFETTO_FILE "perfetto-trace.proto" LABELS "decode" - ARGS -l rocDecCreateVideoParser -c 2 -d 1 ${VCN_COUNTER_NAMES_ARG} -p + ARGS -l rocDecCreateVideoParser -c 2 -d 1 ${_vcn_counter_names} -p ) +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + set_property(TEST video-decode-sampling APPEND PROPERTY LABELS rocpd) + + rocprofiler_systems_add_validation_test( + NAME video-decode-sampling + ROCPD_FILE "rocpd.db" + LABELS "decode;rocpd" + ARGS --validation-rules + ${_vcn_rocpd_validation_rules} + ) +endif() + # -------------------------------------------------------------------------------------- # # # jpeg decode tests @@ -78,7 +113,7 @@ rocprofiler_systems_add_test( NAME jpeg-decode TARGET jpegdecode GPU ON - ENVIRONMENT "${_jpeg_decode_environment}" + ENVIRONMENT "${_base_environment};${_jpeg_decode_environment}" RUN_ARGS -i ${PROJECT_BINARY_DIR}/images -b 32 LABELS "decode" ) @@ -88,5 +123,17 @@ rocprofiler_systems_add_validation_test( PERFETTO_METRIC "rocm_rocjpeg_api" PERFETTO_FILE "perfetto-trace.proto" LABELS "decode" - ARGS -l rocJpegCreate -c 1 -d 1 ${JPEG_COUNTER_NAMES_ARG} -p + ARGS -l rocJpegCreate -c 1 -d 1 ${_jpeg_counter_names} -p ) + +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + set_property(TEST jpeg-decode-sampling APPEND PROPERTY LABELS rocpd) + + rocprofiler_systems_add_validation_test( + NAME jpeg-decode-sampling + ROCPD_FILE "rocpd.db" + LABELS "decode;rocpd" + ARGS --validation-rules + ${_jpeg_rocpd_validation_rules} + ) +endif() diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake index 58e39528d6..f44d909642 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-openmp-tests.cmake @@ -16,6 +16,21 @@ if(NOT EXISTS "${ROCM_LLVM_LIB_PATH}/libomptarget.so" AND ROCPROFSYS_USE_ROCM) ) endif() +set(_ompt_environment + "ROCPROFSYS_TRACE=ON" + "ROCPROFSYS_PROFILE=ON" + "ROCPROFSYS_TIME_OUTPUT=OFF" + "ROCPROFSYS_USE_OMPT=ON" + "ROCPROFSYS_TIMEMORY_COMPONENTS=wall_clock,trip_count,peak_rss" + "${_test_openmp_env}" + "${_test_library_path}" +) + +# Enable ROCPD for tests only if valid ROCm is installed and a valid GPU is detected +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + list(APPEND _ompt_environment "ROCPROFSYS_USE_ROCPD=ON") +endif() + if(ROCPROFSYS_OPENMP_USING_LIBOMP_LIBRARY AND ROCPROFSYS_USE_OMPT) set(_OMPT_PASS_REGEX "\\|_omp_") set(_OMPVV_TARGET_PASS_REGEX "_+omp_offloading") @@ -25,6 +40,7 @@ else() endif() rocprofiler_systems_add_test( + SKIP_RUNTIME NAME openmp-cg TARGET openmp-cg LABELS "openmp" @@ -61,7 +77,7 @@ rocprofiler_systems_add_test( GPU ON LABELS "openmp;openmp-target" ENVIRONMENT - "${_ompt_environment};${_rocm_ld_env};ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch" + "${_ompt_environment};ROCPROFSYS_ROCM_DOMAINS=hip_api,hsa_api,kernel_dispatch" ) rocprofiler_systems_add_validation_test( @@ -69,7 +85,6 @@ rocprofiler_systems_add_validation_test( PERFETTO_METRIC "rocm_kernel_dispatch" PERFETTO_FILE "perfetto-trace.proto" LABELS "openmp;openmp-target" - ENVIRONMENT "${_rocm_ld_env}" ARGS --label-substrings Z4vmulIiEvPT_S1_S1_i_l51.kd @@ -80,6 +95,19 @@ rocprofiler_systems_add_validation_test( -p ) +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + set_property(TEST openmp-target-sampling APPEND PROPERTY LABELS rocpd) + + rocprofiler_systems_add_validation_test( + NAME openmp-target-sampling + ROCPD_FILE "rocpd.db" + LABELS "openmp;openmp-target;rocpd" + ARGS --validation-rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/openmp-target/kernel-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/openmp-target/sdk-metrics-rules.json" + ) +endif() + # OpenMP tests generated using OMPVV binaries if(ROCPROFSYS_OMPVV_HOST_TESTS) foreach(HOST_TEST_NAME ${ROCPROFSYS_OMPVV_HOST_TESTS}) @@ -103,7 +131,6 @@ if(ROCPROFSYS_OMPVV_HOST_TESTS) set(_ompvv_offload_environment "${_ompt_environment}" - "${_rocm_ld_env}" "ROCPROFSYS_USE_SAMPLING=ON" "ROCPROFSYS_SAMPLING_FREQ=50" "ROCPROFSYS_COUT_OUTPUT=ON" diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake index 37c383e79f..9823d798f2 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-rocm-tests.cmake @@ -22,10 +22,20 @@ # -------------------------------------------------------------------------------------- # # -# ROCm tests +# ROCm transpose tests # # -------------------------------------------------------------------------------------- # +set(_transpose_environment + "${_base_environment}" + "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,kernel_dispatch,memory_copy,memory_allocation,hsa_api" +) + +# Enable ROCPD for tests only if valid ROCm is installed and a valid GPU is detected +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + list(APPEND _transpose_environment "ROCPROFSYS_USE_ROCPD=ON") +endif() + rocprofiler_systems_add_test( NAME transpose TARGET transpose @@ -44,7 +54,7 @@ rocprofiler_systems_add_test( args -E uniform_int_distribution - ENVIRONMENT "${_base_environment}" + ENVIRONMENT "${_transpose_environment}" RUNTIME_TIMEOUT 480 ) @@ -56,7 +66,7 @@ rocprofiler_systems_add_test( GPU ON NUM_PROCS 1 RUN_ARGS 1 2 2 - ENVIRONMENT "${_base_environment}" + ENVIRONMENT "${_transpose_environment}" ) rocprofiler_systems_add_test( @@ -80,10 +90,16 @@ rocprofiler_systems_add_test( -E uniform_int_distribution RUN_ARGS 2 100 50 - ENVIRONMENT "${_base_environment}" + ENVIRONMENT "${_transpose_environment}" REWRITE_FAIL_REGEX "0 instrumented loops in procedure transpose" ) +# -------------------------------------------------------------------------------------- # +# +# ROCProfiler tests (counter collection) +# +# -------------------------------------------------------------------------------------- # + if(ROCPROFSYS_USE_ROCM) set(NAVI_REGEX "gfx(10|11|12)[A-Fa-f0-9][A-Fa-f0-9]") rocprofiler_systems_get_gfx_archs(NAVI_DETECTED GFX_MATCH ${NAVI_REGEX} ECHO) @@ -120,7 +136,7 @@ if(ROCPROFSYS_USE_ROCM) NUM_PROCS ${NUM_PROCS} REWRITE_ARGS -e -v 2 -E uniform_int_distribution ENVIRONMENT - "${_base_environment};ROCPROFSYS_ROCM_EVENTS=${ROCPROFSYS_ROCM_EVENTS_TEST}" + "${_transpose_environment};ROCPROFSYS_ROCM_EVENTS=${ROCPROFSYS_ROCM_EVENTS_TEST}" REWRITE_RUN_PASS_REGEX "${_ROCP_PASS_REGEX}" SAMPLING_PASS_REGEX "${_ROCP_PASS_REGEX}" ) @@ -141,3 +157,26 @@ if(ROCPROFSYS_USE_ROCM) LABELS "rocprofiler" ) endif() + +# -------------------------------------------------------------------------------------- # +# +# ROCpd tests +# +# -------------------------------------------------------------------------------------- # + +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + set_property(TEST transpose-sampling APPEND PROPERTY LABELS rocpd) + + rocprofiler_systems_add_validation_test( + NAME transpose-sampling + ROCPD_FILE "rocpd.db" + ARGS --validation-rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/validation-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/default-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/amd-smi-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/cpu-metrics-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/timer-sampling-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/transpose/sdk-metrics-rules.json" + LABELS "rocpd" + ) +endif() diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake index 7d6a9337af..740d1df0ab 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-roctx-tests.cmake @@ -20,23 +20,42 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. +find_package(ROCmVersion) + +if(NOT ROCmVersion_FOUND) + message( + WARNING + "ROCmVersion_FOUND not found, skipping tests in ${CMAKE_CURRENT_LIST_FILE}" + ) + return() +endif() + # -------------------------------------------------------------------------------------- # # # roctx tests # # -------------------------------------------------------------------------------------- # + # Ensure ROCPROFSYS_ROCM_DOMAINS is defined set(_roctx_environment "${_base_environment}" "ROCPROFSYS_ROCM_DOMAINS=hip_runtime_api,marker_api,kernel_dispatch" ) + +# Enable ROCPD for tests only if valid ROCm is installed and a valid GPU is detected +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + list(APPEND _roctx_environment "ROCPROFSYS_USE_ROCPD=ON") +endif() + rocprofiler_systems_add_test( - # SKIP_BASELINE SKIP_RUNTIME SKIP_REWRITE SKIP_RUNTIME + SKIP_RUNTIME NAME roctx-api TARGET roctx GPU ON + LABELS "roctx" ENVIRONMENT "${_roctx_environment}" ) + set(ROCTX_LABEL roctxMark_GPU_workload roctxRangePush_run_profiling @@ -86,3 +105,18 @@ rocprofiler_systems_add_validation_test( LABELS "roctx" ARGS -l ${ROCTX_LABEL} -c ${ROCTX_COUNT} -d ${ROCTX_DEPTH} -p ) + +if(${ENABLE_ROCPD_TEST} AND ${_VALID_GPU}) + set_property(TEST roctx-api-sampling APPEND PROPERTY LABELS rocpd) + + rocprofiler_systems_add_validation_test( + NAME roctx-api-sampling + ROCPD_FILE "rocpd.db" + ARGS --validation-rules + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/default-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/roctx/amd-smi-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/roctx/validation-rules.json" + "${CMAKE_CURRENT_LIST_DIR}/rocpd-validation-rules/roctx/sdk-metrics-rules.json" + LABELS "roctx;rocpd" + ) +endif() diff --git a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake index bf28044d52..89866c3fde 100644 --- a/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake +++ b/projects/rocprofiler-systems/tests/rocprof-sys-testing.cmake @@ -68,6 +68,20 @@ if(MAX_CAUSAL_ITERATIONS GREATER 100) set(MAX_CAUSAL_ITERATIONS 100) endif() +if( + DEFINED ROCmVersion_FULL_VERSION + AND ROCmVersion_FULL_VERSION VERSION_GREATER_EQUAL "7.0" +) + set(ENABLE_ROCPD_TEST YES) +else() + set(ENABLE_ROCPD_TEST NO) +endif() + +rocprofiler_systems_message( + STATUS + "ROCm ${ROCmVersion_FULL_VERSION} - Including ROCPD Test: ${ENABLE_ROCPD_TEST}" +) + if(DEFINED ROCM_PATH) set(ROCM_LLVM_LIB_PATH "${ROCM_PATH}/lib/llvm/lib") set(_test_library_path @@ -122,16 +136,6 @@ set(_lock_environment "${_test_library_path}" ) -set(_ompt_environment - "ROCPROFSYS_TRACE=ON" - "ROCPROFSYS_PROFILE=ON" - "ROCPROFSYS_TIME_OUTPUT=OFF" - "ROCPROFSYS_USE_OMPT=ON" - "ROCPROFSYS_TIMEMORY_COMPONENTS=wall_clock,trip_count,peak_rss" - "${_test_openmp_env}" - "${_test_library_path}" -) - set(_perfetto_environment "ROCPROFSYS_TRACE=ON" "ROCPROFSYS_PROFILE=OFF" @@ -1178,7 +1182,7 @@ function(ROCPROFILER_SYSTEMS_ADD_VALIDATION_TEST) cmake_parse_arguments( TEST "" - "NAME;TIMEOUT;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE" + "NAME;TIMEOUT;TIMEMORY_METRIC;TIMEMORY_FILE;PERFETTO_METRIC;PERFETTO_FILE;ROCPD_FILE" "ENVIRONMENT;LABELS;PROPERTIES;PASS_REGEX;FAIL_REGEX;SKIP_REGEX;DEPENDS;EXIST_FILES;ARGS" ${ARGN} ) @@ -1211,7 +1215,7 @@ function(ROCPROFILER_SYSTEMS_ADD_VALIDATION_TEST) if(NOT TEST_PASS_REGEX) set(TEST_PASS_REGEX - "rocprof-sys-tests-output/${TEST_NAME}/(${TEST_TIMEMORY_FILE}|${TEST_PERFETTO_FILE}) validated" + "rocprof-sys-tests-output/${TEST_NAME}/(${TEST_TIMEMORY_FILE}|${TEST_PERFETTO_FILE}|${TEST_ROCPD_FILE}) validated" ) endif() @@ -1250,13 +1254,46 @@ function(ROCPROFILER_SYSTEMS_ADD_VALIDATION_TEST) ) endif() + if(TEST_ROCPD_FILE) + add_test( + NAME validate-${TEST_NAME}-rocpd + COMMAND + ${ROCPROFSYS_VALIDATION_PYTHON} + ${CMAKE_CURRENT_LIST_DIR}/validate-rocpd.py -db + ${PROJECT_BINARY_DIR}/rocprof-sys-tests-output/${TEST_NAME}/${TEST_ROCPD_FILE} + ${TEST_ARGS} + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + ) + endif() + list(APPEND TEST_ENVIRONMENT "ROCPROFSYS_CI_TIMEOUT=${TEST_TIMEOUT}") - foreach(_TEST validate-${TEST_NAME}-timemory validate-${TEST_NAME}-perfetto) + foreach( + _TEST + validate-${TEST_NAME}-timemory + validate-${TEST_NAME}-perfetto + validate-${TEST_NAME}-rocpd + ) + # Skip tests that don't exist if(NOT TEST "${_TEST}") continue() endif() + # Skip timemory validation if no timemory file is specified + if("${_TEST}" MATCHES "-timemory" AND NOT TEST_TIMEMORY_FILE) + continue() + endif() + + # Skip perfetto validation if no perfetto file is specified + if("${_TEST}" MATCHES "-perfetto" AND NOT TEST_PERFETTO_FILE) + continue() + endif() + + # Skip rocpd validation if no rocpd file is specified + if("${_TEST}" MATCHES "-rocpd" AND NOT TEST_ROCPD_FILE) + continue() + endif() + rocprofiler_systems_check_pass_fail_regex("${_TEST}" "TEST_PASS_REGEX" "TEST_FAIL_REGEX" ) diff --git a/projects/rocprofiler-systems/tests/validate-rocpd.py b/projects/rocprofiler-systems/tests/validate-rocpd.py new file mode 100644 index 0000000000..43f3241760 --- /dev/null +++ b/projects/rocprofiler-systems/tests/validate-rocpd.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 + +# Copyright (c) Advanced Micro Devices, Inc. +# SPDX-License-Identifier: MIT + +import argparse +import os +import sys +import sqlite3 +from pathlib import Path + + +class validation_rule: + """Class to represent a validation rule as defined in JSON file""" + + def __init__(self, description, query, expected_result, comparison, error_message): + self.description = description + self.query = query + self.expected_result = expected_result + self.comparison = comparison + self.error_message = error_message + + def __repr__(self): + return f"validation_rule(description={self.description}, query={self.query})" + + def validate_query(self, result): + """ + Validate the actual result against expected using the specified comparison + defined in validation_queries in rules definition. + NOTE: see default_rules.json + """ + if self.comparison == "equals": + return result == self.expected_result + elif self.comparison == "greater_than": + return result > self.expected_result + elif self.comparison == "less_than": + return result < self.expected_result + elif self.comparison == "greater_than_or_equal": + return result >= self.expected_result + elif self.comparison == "less_than_or_equal": + return result <= self.expected_result + elif self.comparison == "not_equals": + return result != self.expected_result + else: + raise ValueError(f"Unknown comparison operator: {self.comparison}") + + +class required_table: + """Class to represent a required table as defined in JSON rules file""" + + def __init__( + self, name, name_prefix, required_columns, min_rows=1, validation_queries=None + ): + if name is None and name_prefix is None: + raise ValueError("Either 'name' or 'name_prefix' must be specified") + if name is not None and name_prefix is not None: + raise ValueError("Cannot specify both 'name' and 'name_prefix'") + + self.name = name + self.name_prefix = name_prefix + self.required_columns = required_columns + self.min_rows = min_rows + self.validation_queries = validation_queries or [] + + def __repr__(self): + identifier = ( + f"name={self.name}" if self.name else f"name_prefix={self.name_prefix}" + ) + return f"required_table({identifier}, required_columns={self.required_columns})" + + def get_table_identifier(self): + """Returns the table identifier (name or prefix) for display purposes""" + return self.name if self.name else f"{self.name_prefix}*" + + +def print_help(): + """Print out the help message""" + print( + f""" + ROCPD Database Validation Tool + + DESCRIPTION: + This tool validates ROCm Profiler Database (ROCPD) files against a set of predefined rules. + It checks for required tables, columns, minimum row counts, and executes custom validation queries. + + USAGE: + {os.path.basename(__file__)} --database [OPTIONS] + + REQUIRED ARGUMENTS: + -db, --database PATH Path to the ROCPD database file (.db) to validate + + OPTIONAL ARGUMENTS: + -r, --validation_rules PATH [PATH ...] One or more JSON rules files (default: default_rules.json) + -h, --help Show this help message and exit + + EXAMPLES: + # Validate database with default rules + {os.path.basename(__file__)} --database my_profile.db + + # Validate database with custom rules file + {os.path.basename(__file__)} --database my_profile.db -r custom_rules.json + + # Validate database with multiple rules files + {os.path.basename(__file__)} --database my_profile.db -r validation_rules.json amd_smi_rules.json + + VALIDATION FEATURES: + - Checks for presence of required tables + - Verifies required columns exist in each table + - Ensures minimum row count requirements are met + - Executes custom SQL validation queries + - Supports various comparison operators (equals, greater_than, less_than, etc.) + + EXIT CODES: + 0 - All validations passed successfully + 64 - Invalid command line arguments (EX_USAGE) + 65 - Validation failures detected (EX_DATAERR) + 1 - General error (database connection, file not found, etc.) + """ + ) + + +def validate_table(cursor, rule, tables) -> bool: + """ + Validates a database table against a set of rules. + This function checks if a table specified by `rule` exists in the provided `tables` list, + verifies that all required columns are present, ensures the table meets a minimum row count, + and executes custom validation queries defined in the rule. + + Args: + cursor: Database cursor used to execute SQL queries. + rule: An object containing validation rules for the table. + bool: True if the table passes all validation checks, False otherwise. + + Returns: + bool: True if table is found in the database and if all validation queries pass, + False if any validation fails or matching table not found in database. + """ + + matching_tables = [] + + if rule.name: + for table in tables: + if table["name"] == rule.name: + matching_tables.append(table) + break + elif rule.name_prefix: + for table in tables: + if table["name"].startswith(rule.name_prefix): + matching_tables.append(table) + + if not matching_tables: + if rule.name: + print(f"❌ ERROR: Required table '{rule.name}' not found in database") + elif rule.name_prefix: + print( + f"❌ ERROR: No tables found with prefix '{rule.name_prefix}' in database" + ) + return False + + all_tables_passed = True + + for matching_table in matching_tables: + table_name = matching_table["name"] + + try: + cursor.execute(f"PRAGMA table_info({table_name})") + columns = cursor.fetchall() + column_names = [col["name"] for col in columns] + + missing_columns = [ + col for col in rule.required_columns if col not in column_names + ] + if missing_columns: + print( + f"❌ ERROR: Table '{table_name}' missing required columns: {missing_columns}" + ) + all_tables_passed = False + continue + else: + print( + f"✅ All required columns present in '{table_name}': {rule.required_columns}" + ) + + cursor.execute(f"SELECT COUNT(*) as count FROM {table_name}") + row_count = cursor.fetchone()["count"] + + if row_count < rule.min_rows: + print( + f"❌ ERROR: Table '{table_name}' has {row_count} rows, minimum required: {rule.min_rows}" + ) + all_tables_passed = False + continue + else: + print( + f"✅ Row count check passed for '{table_name}': {row_count} rows (minimum: {rule.min_rows})" + ) + + all_queries_passed = True + for validation_query in rule.validation_queries: + try: + query = validation_query.query.replace("{table_name}", table_name) + cursor.execute(query) + result = cursor.fetchone() + + if result and "count" in result.keys(): + actual_result = result["count"] + else: + actual_result = result[0] if result else None + + if not validation_query.validate_query(actual_result): + print( + f"❌ ERROR: {validation_query.error_message} (Table: '{table_name}')" + ) + print( + f" Expected: {validation_query.comparison} {validation_query.expected_result}, Got: {actual_result}" + ) + all_queries_passed = False + else: + print( + f"✅ Validation query passed for '{table_name}': {validation_query.description}" + ) + + except sqlite3.Error as e: + print( + f"❌ ERROR: Failed to execute validation query on '{table_name}': {e}" + ) + print(f"Query: {validation_query.query}") + all_queries_passed = False + + if not all_queries_passed: + all_tables_passed = False + + except sqlite3.Error as e: + print(f"❌ ERROR: Failed to validate table '{table_name}': {e}") + all_tables_passed = False + + return all_tables_passed + + +def validate_rocpd(cursor, rules, tables) -> bool: + """ + Validation of a ROCPD database by applying a set of validation rules to specified tables. + It iterates through each rule, validates the corresponding table, and provides feedback on the validation status. + + Args: + cursor: Database cursor object for executing SQL queries + rules: List of validation rule objects containing validation criteria for a specific table + tables: Collection of table definitions or table objects to validate against + + Returns: + bool: True if all validation checks pass for all tables, + False if any validation fails. + """ + + print("Starting ROCPD database validation...") + db_valid = True + + for rule in rules: + print(f"\nValidating table: {rule.get_table_identifier()}") + table_valid = validate_table(cursor, rule, tables) + db_valid = db_valid and table_valid + + if db_valid: + print("\n✅ All validation checks passed!") + else: + print("\n❌ Some validation checks failed!") + + return db_valid + + +def load_validation_rules(validation_rules) -> list: + """ + Load validation rules from a JSON file and convert them to validation objects. + + Args: + rules_file: Path to the JSON rules file containing validation configuration. + + Returns: + list: A list of required_table objects. + Returns empty list if any file doesn't exist or on error. + """ + import json + + all_rules = [] + + for rules_file in validation_rules: + try: + rules_path = Path(rules_file) + if not rules_path.exists(): + print( + f"Warning: Rules file '{rules_file}' not found, using default rules" + ) + return [] + + with open(rules_path, "r") as f: + rules_data = json.load(f) + rules = [] + + for table_data in rules_data["required_tables"]: + validation_queries = [] + for vq in table_data.get("validation_queries", []): + validation_query_obj = validation_rule( + description=vq["description"], + query=vq["query"], + expected_result=vq["expected_result"], + comparison=vq.get("comparison", "equals"), + error_message=vq["error_message"], + ) + validation_queries.append(validation_query_obj) + + required_table_obj = required_table( + name=table_data.get("name", None), + name_prefix=table_data.get("name_prefix", None), + required_columns=table_data["required_columns"], + min_rows=table_data.get("min_rows", 1), + validation_queries=validation_queries, + ) + rules.append(required_table_obj) + print(f"Loaded required table rule: {required_table_obj}") + + all_rules.extend(rules) + + except Exception as e: + print(f"Error loading rules file: {e}") + return [] + + if not all_rules: + print("Warning: No validation rules loaded from any file") + else: + print(f"Total rules loaded: {len(all_rules)}") + + return all_rules + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(add_help=False) + + parser.add_argument( + "-db", "--database", type=Path, help="Database file to validate", default=None + ) + + parser.add_argument( + "-r", + "--validation-rules", + type=Path, + nargs="+", + help="Rules against which to validate database", + default=[ + Path( + f"{os.path.dirname(os.path.abspath(__file__))}/rocpd-validation-rules/default-rules.json" + ) + ], + ) + + parser.add_argument( + "-h", "--help", action="store_true", help="Prints out the help message" + ) + + args = parser.parse_args() + + if args.help: + print_help() + sys.exit(os.EX_OK) + + if not args.database: + print("Database file not provided!") + print_help() + + sys.exit(os.EX_USAGE) + + print(f"Validating ROCPD. Database file: {args.database}") + db_path = args.database + validation_rules_files = args.validation_rules + rules = load_validation_rules(validation_rules_files) + + if not rules: + print("❌ No validation rules loaded. Exiting.") + sys.exit(1) + + try: + if not Path(db_path).exists(): + print(f"❌ Error: Database file '{db_path}' not found") + sys.exit(1) + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + print(f"✅ Successfully connected to database: {db_path}") + + cursor.execute("SELECT name FROM sqlite_master WHERE type IN ('table', 'view');") + tables = cursor.fetchall() + + validation_result = validate_rocpd(cursor, rules, tables) + + conn.close() + + if validation_result: + print(f"✅ {db_path} validated") + else: + print(f"❌ Failure validating {db_path}") + + sys.exit(os.EX_OK if validation_result else os.EX_DATAERR) + + except sqlite3.Error as e: + print(f"SQLite error: {e}") + sys.exit(1) + except Exception as e: + print(f"Error: {e}") + sys.exit(1)