feat: Implement dynamic model discovery from benchmark results, add benchmark notes, and include dialog dependency.

This commit is contained in:
Donato Capitella
2025-12-20 12:31:20 +00:00
parent 711de530f6
commit 3b0e736c94
2 changed files with 37 additions and 1 deletions
+36
View File
@@ -31,6 +31,42 @@ else:
HOST = os.getenv("HOST", "0.0.0.0")
PORT = os.getenv("PORT", "8000")
def get_discovered_models():
"""
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
This allows the UI to show all verified models, not just what's enabled for benchmarking.
"""
if not RESULTS_FILE.exists():
return MODELS_TO_RUN
try:
with open(RESULTS_FILE, "r") as f:
data = json.load(f)
# 1. Find all models with at least one success
verified_models = set()
for r in data:
if r.get("status") == "success":
verified_models.add(r["model"])
# 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
# and must be in our verified list
final_list = []
for m in sorted(list(verified_models)):
if m in MODEL_TABLE:
final_list.append(m)
if final_list:
return final_list
except Exception as e:
print(f"Warning: Model discovery failed ({e}). Using default list.")
return MODELS_TO_RUN
# Refresh the list of models to run based on what we found
MODELS_TO_RUN = get_discovered_models()
def check_dependencies():
if not shutil.which("dialog"):
print("Error: 'dialog' is required. Please install it (apt-get install dialog).")