add MiniMax

2026-02-18 15:22:12 +00:00
@@ -68,7 +68,7 @@ MODEL_TABLE = {
    # 5. Qwen 80B AWQ
    # Size: ~48GB. Fits on 2x32GB (64GB). Leftover for Cache: ~16GB.
    # Config: 20k ctx fits in that cache. Eager mode required for stability.
-     "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
+    "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16": {
        "trust_remote": True,
        "valid_tp": [1], # Too big for single GPU
        "max_num_seqs": "64", # Large Model / Bandwidth Constrained
@@ -77,6 +77,15 @@ MODEL_TABLE = {
        "env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
    },

+    "mratsim/MiniMax-M2.5-BF16-INT4-AWQ": {
+        "trust_remote": True,
+        "valid_tp": [2],
+        "max_num_seqs": "64",
+        "max_tokens": "16384",
+        "enforce_eager": False,
+        "env": {"VLLM_USE_TRITON_AWQ": "1"} # Fixes "Unsupported Hardware" error
+    },
+
 }

 MODELS_TO_RUN = [
@@ -89,6 +98,7 @@ MODELS_TO_RUN = [
    "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
    "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
    "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
+    "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
 ]

 # Hardware / Global Defaults
@@ -41,33 +41,49 @@ def get_discovered_models():
    Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
    This allows the UI to show all verified models, not just what's enabled for benchmarking.
    """
-    if not RESULTS_FILE.exists():
-        return MODELS_TO_RUN
-        
    try:
-        with open(RESULTS_FILE, "r") as f:
-            data = json.load(f)
-            
-        # 1. Find all models with at least one success
-        verified_models = set()
-        for r in data:
-            if r.get("status") == "success":
-                verified_models.add(r["model"])
-        
-        # 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
-        #    and must be in our verified list
-        final_list = []
-        for m in sorted(list(verified_models)):
-            if m in MODEL_TABLE:
-                final_list.append(m)
+        if RESULTS_FILE.exists():
+            with open(RESULTS_FILE, "r") as f:
+                data = json.load(f)
                
-        if final_list:
-            return final_list
+            # 1. Find all models with at least one success
+            verified_models = set()
+            for r in data:
+                if r.get("status") == "success":
+                    verified_models.add(r["model"])
+            
+            # 2. Filter: Must be in MODEL_TABLE (so we have config/valid_tp)
+            #    and must be in our verified list (if results exist)
+            final_list = []
+            gpu_count = detect_gpus()
+            
+            for m in sorted(list(verified_models)):
+                if m in MODEL_TABLE:
+                    # Check valid_tp
+                    valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
+                    min_required = min(valid_tps)
+                    
+                    if min_required <= gpu_count:
+                        final_list.append(m)
+                    
+            if final_list:
+                return final_list
            
    except Exception as e:
        print(f"Warning: Model discovery failed ({e}). Using default list.")
        
-    return MODELS_TO_RUN
+    # Fallback if no results file or error: return all models compatible with current hardware
+    gpu_count = detect_gpus()
+    compatible_models = []
+    
+    for m in MODELS_TO_RUN:
+        if m in MODEL_TABLE:
+            valid_tps = MODEL_TABLE[m].get("valid_tp", [1])
+            min_required = min(valid_tps)
+            if min_required <= gpu_count:
+                compatible_models.append(m)
+                
+    return compatible_models

 # Refresh the list of models to run based on what we found
 MODELS_TO_RUN = get_discovered_models()
@@ -41,29 +41,8 @@ def get_discovered_models():
    """
    Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
    """
-    if not RESULTS_FILE.exists():
-        return MODELS_TO_RUN
-        
-    try:
-        with open(RESULTS_FILE, "r") as f:
-            data = json.load(f)
-            
-        verified_models = set()
-        for r in data:
-            if r.get("status") == "success":
-                verified_models.add(r["model"])
-        
-        final_list = []
-        for m in sorted(list(verified_models)):
-            if m in MODEL_TABLE:
-                final_list.append(m)
-                
-        if final_list:
-            return final_list
-            
-    except Exception as e:
-        print(f"Warning: Model discovery failed ({e}). Using default list.")
-        
+    # Bypass verification check for Cluster Launcher
+    # We want to see ALL models, including those that require TP > 1 (which find_max_context might have skipped)
    return MODELS_TO_RUN

 # Refresh the list of models to run based on what we found