add MiniMax
Tento commit je obsažen v:
+11
-1
@@ -68,7 +68,7 @@ MODEL_TABLE = {
|
||||
# 5. Qwen 80B AWQ
|
||||
# Size: ~48GB. Fits on 2x32GB (64GB). Leftover for Cache: ~16GB.
|
||||
# Config: 20k ctx fits in that cache. Eager mode required for stability.
|
||||
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
|
||||
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
|
||||
"trust_remote": True,
|
||||
"valid_tp": [1], # Too big for single GPU
|
||||
"max_num_seqs": "64", # Large Model / Bandwidth Constrained
|
||||
@@ -77,6 +77,15 @@ MODEL_TABLE = {
|
||||
"env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
|
||||
},
|
||||
|
||||
"mratsim/MiniMax-M2.5-BF16-INT4-AWQ": {
|
||||
"trust_remote": True,
|
||||
"valid_tp": [2],
|
||||
"max_num_seqs": "64",
|
||||
"max_tokens": "16384",
|
||||
"enforce_eager": False,
|
||||
"env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
MODELS_TO_RUN = [
|
||||
@@ -89,6 +98,7 @@ MODELS_TO_RUN = [
|
||||
"btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||
"btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||
"dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||
"mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||
]
|
||||
|
||||
# Hardware / Global Defaults
|
||||
|
||||
+37
-21
@@ -41,33 +41,49 @@ def get_discovered_models():
|
||||
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
|
||||
This allows the UI to show all verified models, not just what's enabled for benchmarking.
|
||||
"""
|
||||
if not RESULTS_FILE.exists():
|
||||
return MODELS_TO_RUN
|
||||
|
||||
try:
|
||||
with open(RESULTS_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 1. Find all models with at least one success
|
||||
verified_models = set()
|
||||
for r in data:
|
||||
if r.get("status") == "success":
|
||||
verified_models.add(r["model"])
|
||||
|
||||
# 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
|
||||
# and must be in our verified list
|
||||
final_list = []
|
||||
for m in sorted(list(verified_models)):
|
||||
if m in MODEL_TABLE:
|
||||
final_list.append(m)
|
||||
if RESULTS_FILE.exists():
|
||||
with open(RESULTS_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if final_list:
|
||||
return final_list
|
||||
# 1. Find all models with at least one success
|
||||
verified_models = set()
|
||||
for r in data:
|
||||
if r.get("status") == "success":
|
||||
verified_models.add(r["model"])
|
||||
|
||||
# 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
|
||||
# and must be in our verified list (if results exist)
|
||||
final_list = []
|
||||
gpu_count = detect_gpus()
|
||||
|
||||
for m in sorted(list(verified_models)):
|
||||
if m in MODEL_TABLE:
|
||||
# Check valid_tp
|
||||
valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
|
||||
min_required = min(valid_tps)
|
||||
|
||||
if min_required <= gpu_count:
|
||||
final_list.append(m)
|
||||
|
||||
if final_list:
|
||||
return final_list
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: Model discovery failed ({e}). Using default list.")
|
||||
|
||||
return MODELS_TO_RUN
|
||||
# Fallback if no results file or error: return all models compatible with current hardware
|
||||
gpu_count = detect_gpus()
|
||||
compatible_models = []
|
||||
|
||||
for m in MODELS_TO_RUN:
|
||||
if m in MODEL_TABLE:
|
||||
valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
|
||||
min_required = min(valid_tps)
|
||||
if min_required <= gpu_count:
|
||||
compatible_models.append(m)
|
||||
|
||||
return compatible_models
|
||||
|
||||
# Refresh the list of models to run based on what we found
|
||||
MODELS_TO_RUN = get_discovered_models()
|
||||
|
||||
@@ -41,29 +41,8 @@ def get_discovered_models():
|
||||
"""
|
||||
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
|
||||
"""
|
||||
if not RESULTS_FILE.exists():
|
||||
return MODELS_TO_RUN
|
||||
|
||||
try:
|
||||
with open(RESULTS_FILE, "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
verified_models = set()
|
||||
for r in data:
|
||||
if r.get("status") == "success":
|
||||
verified_models.add(r["model"])
|
||||
|
||||
final_list = []
|
||||
for m in sorted(list(verified_models)):
|
||||
if m in MODEL_TABLE:
|
||||
final_list.append(m)
|
||||
|
||||
if final_list:
|
||||
return final_list
|
||||
|
||||
except Exception as e:
|
||||
print(f"Warning: Model discovery failed ({e}). Using default list.")
|
||||
|
||||
# Bypass verification check for Cluster Launcher
|
||||
# We want to see ALL models, including those that require TP > 1 (which find_max_context might have skipped)
|
||||
return MODELS_TO_RUN
|
||||
|
||||
# Refresh the list of models to run based on what we found
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele