[ { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 131072, "configured_len": 131072, "real_capacity": 828144, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 131072, "configured_len": 131072, "real_capacity": 830128, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 131072, "configured_len": 131072, "real_capacity": 830144, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 131072, "configured_len": 131072, "real_capacity": 830128, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "google/gemma-3-12b-it", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 131072, "configured_len": 131072, "real_capacity": 240512, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "google/gemma-3-12b-it", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 131072, "configured_len": 131072, "real_capacity": 240544, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "google/gemma-3-12b-it", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 131072, "configured_len": 131072, "real_capacity": 240544, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "google/gemma-3-12b-it", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 131072, "configured_len": 131072, "real_capacity": 240544, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "Qwen/Qwen3-14B-AWQ", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 40960, "configured_len": 40960, "real_capacity": 695200, "status": "success", "error": "", "max_context_1_user": 40960 }, { "model": "Qwen/Qwen3-14B-AWQ", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 40960, "configured_len": 40960, "real_capacity": 696240, "status": "success", "error": "", "max_context_1_user": 40960 }, { "model": "Qwen/Qwen3-14B-AWQ", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 40960, "configured_len": 40960, "real_capacity": 696240, "status": "success", "error": "", "max_context_1_user": 40960 }, { "model": "Qwen/Qwen3-14B-AWQ", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 40960, "configured_len": 40960, "real_capacity": 696240, "status": "success", "error": "", "max_context_1_user": 40960 }, { "model": "openai/gpt-oss-20b", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 131072, "configured_len": 131072, "real_capacity": 2222384, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-20b", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 131072, "configured_len": 131072, "real_capacity": 2232672, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-20b", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 131072, "configured_len": 131072, "real_capacity": 2232672, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-20b", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 131072, "configured_len": 131072, "real_capacity": 2232656, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-120b", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 131072, "configured_len": 131072, "real_capacity": 711296, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-120b", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 131072, "configured_len": 131072, "real_capacity": 711184, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-120b", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 131072, "configured_len": 131072, "real_capacity": 711168, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "openai/gpt-oss-120b", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 131072, "configured_len": 131072, "real_capacity": 711168, "status": "success", "error": "", "max_context_1_user": 131072 }, { "model": "zai-org/GLM-4.7-Flash", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 202752, "configured_len": 202752, "real_capacity": 791088, "status": "success", "error": "", "max_context_1_user": 202752 }, { "model": "zai-org/GLM-4.7-Flash", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 202752, "configured_len": 202752, "real_capacity": 791008, "status": "success", "error": "", "max_context_1_user": 202752 }, { "model": "zai-org/GLM-4.7-Flash", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 202752, "configured_len": 202752, "real_capacity": 791008, "status": "success", "error": "", "max_context_1_user": 202752 }, { "model": "zai-org/GLM-4.7-Flash", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 202752, "configured_len": 202752, "real_capacity": 790992, "status": "success", "error": "", "max_context_1_user": 202752 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 262144, "configured_len": 262144, "real_capacity": 1088288, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 262144, "configured_len": 262144, "real_capacity": 1089072, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 262144, "configured_len": 262144, "real_capacity": 1089072, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 262144, "configured_len": 262144, "real_capacity": 1089056, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 262144, "configured_len": 262144, "real_capacity": 936544, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 262144, "configured_len": 262144, "real_capacity": 937312, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 262144, "configured_len": 262144, "real_capacity": 937296, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 262144, "configured_len": 262144, "real_capacity": 937280, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "tp": 1, "util": "0.95", "max_seqs": 1, "model_limit": 262144, "configured_len": 262144, "real_capacity": 763776, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "tp": 1, "util": "0.95", "max_seqs": 4, "model_limit": 262144, "configured_len": 262144, "real_capacity": 763776, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "tp": 1, "util": "0.95", "max_seqs": 8, "model_limit": 262144, "configured_len": 262144, "real_capacity": 763776, "status": "success", "error": "", "max_context_1_user": 262144 }, { "model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16", "tp": 1, "util": "0.95", "max_seqs": 16, "model_limit": 262144, "configured_len": 262144, "real_capacity": 763776, "status": "success", "error": "", "max_context_1_user": 262144 } ]