434 regels
9.5 KiB
JSON
434 regels
9.5 KiB
JSON
[
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 828144,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 830128,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 830144,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 830128,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 240512,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 240544,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 240544,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "google/gemma-3-12b-it",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 240544,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 40960,
|
|
"configured_len": 40960,
|
|
"real_capacity": 695200,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 40960
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 40960,
|
|
"configured_len": 40960,
|
|
"real_capacity": 696240,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 40960
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 40960,
|
|
"configured_len": 40960,
|
|
"real_capacity": 696240,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 40960
|
|
},
|
|
{
|
|
"model": "Qwen/Qwen3-14B-AWQ",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 40960,
|
|
"configured_len": 40960,
|
|
"real_capacity": 696240,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 40960
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 2222384,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 2232672,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 2232672,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-20b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 2232656,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 711296,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 711184,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 711168,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "openai/gpt-oss-120b",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 131072,
|
|
"configured_len": 131072,
|
|
"real_capacity": 711168,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 131072
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 202752,
|
|
"configured_len": 202752,
|
|
"real_capacity": 791088,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 202752
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 202752,
|
|
"configured_len": 202752,
|
|
"real_capacity": 791008,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 202752
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 202752,
|
|
"configured_len": 202752,
|
|
"real_capacity": 791008,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 202752
|
|
},
|
|
{
|
|
"model": "zai-org/GLM-4.7-Flash",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 202752,
|
|
"configured_len": 202752,
|
|
"real_capacity": 790992,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 202752
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 1088288,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 1089072,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 1089072,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 1089056,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 936544,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 937312,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 937296,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 937280,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 1,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 763776,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 4,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 763776,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 8,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 763776,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
},
|
|
{
|
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
"tp": 1,
|
|
"util": "0.95",
|
|
"max_seqs": 16,
|
|
"model_limit": 262144,
|
|
"configured_len": 262144,
|
|
"real_capacity": 763776,
|
|
"status": "success",
|
|
"error": "",
|
|
"max_context_1_user": 262144
|
|
}
|
|
] |