Tento commit je obsažen v:
Donato Capitella
2026-02-18 15:22:12 +00:00
rodič 290beffb05
revize 49b85fc1fb
3 změnil soubory, kde provedl 50 přidání a 45 odebrání
+11 -1
Zobrazit soubor
@@ -68,7 +68,7 @@ MODEL_TABLE = {
# 5. Qwen 80B AWQ
# Size: ~48GB. Fits on 2x32GB (64GB). Leftover for Cache: ~16GB.
# Config: 20k ctx fits in that cache. Eager mode required for stability.
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
"trust_remote": True,
"valid_tp": [1], # Too big for single GPU
"max_num_seqs": "64", # Large Model / Bandwidth Constrained
@@ -77,6 +77,15 @@ MODEL_TABLE = {
"env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
},
"mratsim/MiniMax-M2.5-BF16-INT4-AWQ": {
"trust_remote": True,
"valid_tp": [2],
"max_num_seqs": "64",
"max_tokens": "16384",
"enforce_eager": False,
"env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
},
}
MODELS_TO_RUN = [
@@ -89,6 +98,7 @@ MODELS_TO_RUN = [
"btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
]
# Hardware / Global Defaults
+37 -21
Zobrazit soubor
@@ -41,33 +41,49 @@ def get_discovered_models():
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
This allows the UI to show all verified models, not just what's enabled for benchmarking.
"""
if not RESULTS_FILE.exists():
return MODELS_TO_RUN
try:
with open(RESULTS_FILE, "r") as f:
data = json.load(f)
# 1. Find all models with at least one success
verified_models = set()
for r in data:
if r.get("status") == "success":
verified_models.add(r["model"])
# 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
# and must be in our verified list
final_list = []
for m in sorted(list(verified_models)):
if m in MODEL_TABLE:
final_list.append(m)
if RESULTS_FILE.exists():
with open(RESULTS_FILE, "r") as f:
data = json.load(f)
if final_list:
return final_list
# 1. Find all models with at least one success
verified_models = set()
for r in data:
if r.get("status") == "success":
verified_models.add(r["model"])
# 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
# and must be in our verified list (if results exist)
final_list = []
gpu_count = detect_gpus()
for m in sorted(list(verified_models)):
if m in MODEL_TABLE:
# Check valid_tp
valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
min_required = min(valid_tps)
if min_required <= gpu_count:
final_list.append(m)
if final_list:
return final_list
except Exception as e:
print(f"Warning: Model discovery failed ({e}). Using default list.")
return MODELS_TO_RUN
# Fallback if no results file or error: return all models compatible with current hardware
gpu_count = detect_gpus()
compatible_models = []
for m in MODELS_TO_RUN:
if m in MODEL_TABLE:
valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
min_required = min(valid_tps)
if min_required <= gpu_count:
compatible_models.append(m)
return compatible_models
# Refresh the list of models to run based on what we found
MODELS_TO_RUN = get_discovered_models()
+2 -23
Zobrazit soubor
@@ -41,29 +41,8 @@ def get_discovered_models():
"""
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
"""
if not RESULTS_FILE.exists():
return MODELS_TO_RUN
try:
with open(RESULTS_FILE, "r") as f:
data = json.load(f)
verified_models = set()
for r in data:
if r.get("status") == "success":
verified_models.add(r["model"])
final_list = []
for m in sorted(list(verified_models)):
if m in MODEL_TABLE:
final_list.append(m)
if final_list:
return final_list
except Exception as e:
print(f"Warning: Model discovery failed ({e}). Using default list.")
# Bypass verification check for Cluster Launcher
# We want to see ALL models, including those that require TP > 1 (which find_max_context might have skipped)
return MODELS_TO_RUN
# Refresh the list of models to run based on what we found