Files
amd-strix-halo-vllm-toolboxes/benchmarks/max_context_results.json
T
2026-02-02 11:56:26 +00:00

434 regels
9.5 KiB
JSON

[
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 828144,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 830128,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 830144,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 830128,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "google/gemma-3-12b-it",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 240512,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "google/gemma-3-12b-it",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 240544,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "google/gemma-3-12b-it",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 240544,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "google/gemma-3-12b-it",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 240544,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 40960,
"configured_len": 40960,
"real_capacity": 695200,
"status": "success",
"error": "",
"max_context_1_user": 40960
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 40960,
"configured_len": 40960,
"real_capacity": 696240,
"status": "success",
"error": "",
"max_context_1_user": 40960
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 40960,
"configured_len": 40960,
"real_capacity": 696240,
"status": "success",
"error": "",
"max_context_1_user": 40960
},
{
"model": "Qwen/Qwen3-14B-AWQ",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 40960,
"configured_len": 40960,
"real_capacity": 696240,
"status": "success",
"error": "",
"max_context_1_user": 40960
},
{
"model": "openai/gpt-oss-20b",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 2222384,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-20b",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 2232672,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-20b",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 2232672,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-20b",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 2232656,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-120b",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 711296,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-120b",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 711184,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-120b",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 711168,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "openai/gpt-oss-120b",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 131072,
"configured_len": 131072,
"real_capacity": 711168,
"status": "success",
"error": "",
"max_context_1_user": 131072
},
{
"model": "zai-org/GLM-4.7-Flash",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 202752,
"configured_len": 202752,
"real_capacity": 791088,
"status": "success",
"error": "",
"max_context_1_user": 202752
},
{
"model": "zai-org/GLM-4.7-Flash",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 202752,
"configured_len": 202752,
"real_capacity": 791008,
"status": "success",
"error": "",
"max_context_1_user": 202752
},
{
"model": "zai-org/GLM-4.7-Flash",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 202752,
"configured_len": 202752,
"real_capacity": 791008,
"status": "success",
"error": "",
"max_context_1_user": 202752
},
{
"model": "zai-org/GLM-4.7-Flash",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 202752,
"configured_len": 202752,
"real_capacity": 790992,
"status": "success",
"error": "",
"max_context_1_user": 202752
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 1088288,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 1089072,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 1089072,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 1089056,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 936544,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 937312,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 937296,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 937280,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"tp": 1,
"util": "0.95",
"max_seqs": 1,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 763776,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"tp": 1,
"util": "0.95",
"max_seqs": 4,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 763776,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"tp": 1,
"util": "0.95",
"max_seqs": 8,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 763776,
"status": "success",
"error": "",
"max_context_1_user": 262144
},
{
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
"tp": 1,
"util": "0.95",
"max_seqs": 16,
"model_limit": 262144,
"configured_len": 262144,
"real_capacity": 763776,
"status": "success",
"error": "",
"max_context_1_user": 262144
}
]