amd-strix-halo-vllm-toolboxes/benchmarks/max_context_results.json

[
  {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 829952,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 4,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 830064,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 8,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 830080,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 16,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 830064,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "google/gemma-3-12b-it",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 246032,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "google/gemma-3-12b-it",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 4,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 246064,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "google/gemma-3-12b-it",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 8,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 246064,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "google/gemma-3-12b-it",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 16,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 246064,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "Qwen/Qwen3-14B-AWQ",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 40960,
    "configured_len": 0,
    "real_capacity": 0,
    "status": "fail",
    "error": "Verification Failed"
  },
  {
    "model": "Qwen/Qwen3-14B-AWQ",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 1,
    "model_limit": 40960,
    "configured_len": 40960,
    "real_capacity": 655712,
    "status": "success",
    "error": "",
    "max_context_1_user": 40960
  },
  {
    "model": "Qwen/Qwen3-14B-AWQ",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 4,
    "model_limit": 40960,
    "configured_len": 40960,
    "real_capacity": 655616,
    "status": "success",
    "error": "",
    "max_context_1_user": 40960
  },
  {
    "model": "Qwen/Qwen3-14B-AWQ",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 8,
    "model_limit": 40960,
    "configured_len": 40960,
    "real_capacity": 655600,
    "status": "success",
    "error": "",
    "max_context_1_user": 40960
  },
  {
    "model": "Qwen/Qwen3-14B-AWQ",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 16,
    "model_limit": 40960,
    "configured_len": 40960,
    "real_capacity": 655600,
    "status": "success",
    "error": "",
    "max_context_1_user": 40960
  },
  {
    "model": "openai/gpt-oss-20b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 2232848,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-20b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 4,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 2232560,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-20b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 8,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 2232544,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-20b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 16,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 2232544,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-120b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 711360,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-120b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 4,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 711168,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-120b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 8,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 711168,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "openai/gpt-oss-120b",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 16,
    "model_limit": 131072,
    "configured_len": 131072,
    "real_capacity": 711168,
    "status": "success",
    "error": "",
    "max_context_1_user": 131072
  },
  {
    "model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 262144,
    "configured_len": 262144,
    "real_capacity": 1097712,
    "status": "success",
    "error": "",
    "max_context_1_user": 262144
  },
  {
    "model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 4,
    "model_limit": 262144,
    "configured_len": 0,
    "real_capacity": 0,
    "status": "fail",
    "error": "Verification Failed"
  },
  {
    "model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 4,
    "model_limit": 262144,
    "configured_len": 209715,
    "real_capacity": 1029856,
    "status": "success",
    "error": "Process died or timed out",
    "max_context_1_user": 209715
  },
  {
    "model": "cpatonn/Qwen3-Coder-30B-A3B-Instruct-GPTQ-4bit",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 8,
    "model_limit": 262144,
    "configured_len": 0,
    "real_capacity": 0,
    "status": "fail",
    "error": "Verification Failed"
  },
  {
    "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
    "tp": 1,
    "util": "0.95",
    "max_seqs": 1,
    "model_limit": 262144,
    "configured_len": 0,
    "real_capacity": 0,
    "status": "fail",
    "error": "Verification Failed"
  },
  {
    "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 1,
    "model_limit": 262144,
    "configured_len": 262144,
    "real_capacity": 696320,
    "status": "success",
    "error": "",
    "max_context_1_user": 262144
  },
  {
    "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
    "tp": 1,
    "util": "0.90",
    "max_seqs": 4,
    "model_limit": 262144,
    "configured_len": 0,
    "real_capacity": 0,
    "status": "fail",
    "error": "Verification Failed"
  }
]