updated benchmarks, fix start-vllm
Bu işleme şunda yer alıyor:
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 524.2037815230142,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.3815310134141399,
|
||||||
|
"tokens_per_second": 280.05330212131406
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 421.75657659699937,
|
"elapsed_time": 424.04632396099623,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.4742071875054738,
|
"requests_per_second": 0.4716465836369236,
|
||||||
"tokens_per_second": 348.0799308087054
|
"tokens_per_second": 346.2003835540928
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 868.8101008250001,
|
"elapsed_time": 918.187000697013,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.2301999019234296,
|
"requests_per_second": 0.21782055272855774,
|
||||||
"tokens_per_second": 168.9724830093454
|
"tokens_per_second": 159.8857312165796
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 456.08530166203855,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.4385144604993234,
|
||||||
|
"tokens_per_second": 321.88057686801585
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 457.7749735690013,
|
"elapsed_time": 458.737264430034,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.4368958801760569,
|
"requests_per_second": 0.4359794058773347,
|
||||||
"tokens_per_second": 320.69249844623016
|
"tokens_per_second": 320.0197833991106
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 644.1538858940003,
|
"elapsed_time": 686.8188757880125,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.3104848148551126,
|
"requests_per_second": 0.29119758796747197,
|
||||||
"tokens_per_second": 227.90361622402403
|
"tokens_per_second": 213.74630950782364
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 534.8865945799625,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.3739110346503573,
|
||||||
|
"tokens_per_second": 274.46004720922855
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 534.4193308840004,
|
"elapsed_time": 524.8208868440124,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.3742379596733028,
|
"requests_per_second": 0.38108239403864297,
|
||||||
"tokens_per_second": 274.7000183491961
|
"tokens_per_second": 279.7240042842149
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 733.5017090729998,
|
"elapsed_time": 789.1420173590304,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.2726646680247824,
|
"requests_per_second": 0.2534398062712803,
|
||||||
"tokens_per_second": 200.1426829468909
|
"tokens_per_second": 186.03115379827653
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 805.9022228560061,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.24816906360082697,
|
||||||
|
"tokens_per_second": 182.16229690959702
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 879.0596038709991,
|
"elapsed_time": 748.1414223780157,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.22751585799106944,
|
"requests_per_second": 0.2673291359329993,
|
||||||
"tokens_per_second": 167.00232766189475
|
"tokens_per_second": 196.2262690032198
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1109.9732099440007,
|
"elapsed_time": 1168.3619703819859,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.18018452896722634,
|
"requests_per_second": 0.17117982703135376,
|
||||||
"tokens_per_second": 132.2599488751683
|
"tokens_per_second": 125.65027253668944
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 510.63144373201067,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 148857,
|
||||||
|
"requests_per_second": 0.391671923958063,
|
||||||
|
"tokens_per_second": 291.5155379231269
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 504.69023761399876,
|
"elapsed_time": 520.7929677469656,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 148857,
|
"total_num_tokens": 148857,
|
||||||
"requests_per_second": 0.39628268013570256,
|
"requests_per_second": 0.3840297630462106,
|
||||||
"tokens_per_second": 294.9472545848014
|
"tokens_per_second": 285.8275921888489
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 876.911706677,
|
"elapsed_time": 930.6109793490032,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 148857,
|
"total_num_tokens": 148857,
|
||||||
"requests_per_second": 0.22807313265081958,
|
"requests_per_second": 0.2149125729635249,
|
||||||
"tokens_per_second": 169.75141153501525
|
"tokens_per_second": 159.95620436815713
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 237.61095946098794,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 145877,
|
||||||
|
"requests_per_second": 0.8417120172137385,
|
||||||
|
"tokens_per_second": 613.9321196754427
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 244.51837097500174,
|
"elapsed_time": 247.22850671299966,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 145877,
|
"total_num_tokens": 145877,
|
||||||
"requests_per_second": 0.8179344529513773,
|
"requests_per_second": 0.8089681997399035,
|
||||||
"tokens_per_second": 596.5891209659404
|
"tokens_per_second": 590.0492703672895
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 380.55349342600005,
|
"elapsed_time": 395.08209386101225,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 145877,
|
"total_num_tokens": 145877,
|
||||||
"requests_per_second": 0.5255502930730307,
|
"requests_per_second": 0.5062239041143659,
|
||||||
"tokens_per_second": 383.3285005130725
|
"tokens_per_second": 369.23212230245684
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1361.426551499986,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146523,
|
||||||
|
"requests_per_second": 0.14690473002722398,
|
||||||
|
"tokens_per_second": 107.62460878889469
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1482.2689266130328,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146523,
|
||||||
|
"requests_per_second": 0.13492828218223374,
|
||||||
|
"tokens_per_second": 98.85048345093716
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1724.1368565150187,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 147036,
|
||||||
|
"requests_per_second": 0.11600007229371459,
|
||||||
|
"tokens_per_second": 85.2809331488931
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1307.2402118169994,
|
"elapsed_time": 1199.1163451180328,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.15299406963775225,
|
"requests_per_second": 0.16678948695367285,
|
||||||
"tokens_per_second": 112.4781801162827
|
"tokens_per_second": 122.62029501860121
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1886.751298176,
|
"elapsed_time": 1959.4152568069985,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.10600231211890418,
|
"requests_per_second": 0.10207126810164463,
|
||||||
"tokens_per_second": 77.93077982357597
|
"tokens_per_second": 75.0407548829671
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 243.98866786801955,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 147036,
|
||||||
|
"requests_per_second": 0.819710201082723,
|
||||||
|
"tokens_per_second": 602.6345456319963
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 247.62527259899798,
|
"elapsed_time": 242.14750060701044,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.8076720033495051,
|
"requests_per_second": 0.825942863331829,
|
||||||
"tokens_per_second": 593.7843034224891
|
"tokens_per_second": 607.216674264294
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 341.2666312900001,
|
"elapsed_time": 357.72086531698005,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.5860520240258851,
|
"requests_per_second": 0.5590951476167821,
|
||||||
"tokens_per_second": 430.8537270233502
|
"tokens_per_second": 411.03557062490586
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 486.3392907420057,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146278,
|
||||||
|
"requests_per_second": 0.41123553824915293,
|
||||||
|
"tokens_per_second": 300.773560320048
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 422.7612150579989,
|
"elapsed_time": 398.827027003048,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146278,
|
"total_num_tokens": 146278,
|
||||||
"requests_per_second": 0.47308029420949094,
|
"requests_per_second": 0.5014705284716613,
|
||||||
"tokens_per_second": 346.0061963818796
|
"tokens_per_second": 366.77052981888835
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 594.5536415039987,
|
"elapsed_time": 610.5734472059994,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146278,
|
"total_num_tokens": 146278,
|
||||||
"requests_per_second": 0.33638680522429343,
|
"requests_per_second": 0.32756091984544267,
|
||||||
"tokens_per_second": 246.02994547299596
|
"tokens_per_second": 239.57478116575834
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 497.111974740983,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.40232384284085837,
|
||||||
|
"tokens_per_second": 295.31575874126105
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 395.26841144900027,
|
"elapsed_time": 399.3928133630543,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.5059852854591319,
|
"requests_per_second": 0.5007601371589951,
|
||||||
"tokens_per_second": 371.4058491591393
|
"tokens_per_second": 367.5704596781314
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 769.1666062429999,
|
"elapsed_time": 813.6141017450136,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.260021688898978,
|
"requests_per_second": 0.24581678165489804,
|
||||||
"tokens_per_second": 190.86242019407229
|
"tokens_per_second": 180.43566315423652
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 456.45958357997006,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.4381548929949473,
|
||||||
|
"tokens_per_second": 321.6166453306162
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 464.71097393700256,
|
"elapsed_time": 440.66104900900973,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.43037503139986644,
|
"requests_per_second": 0.4538635771184551,
|
||||||
"tokens_per_second": 315.906032423287
|
"tokens_per_second": 333.147212194374
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 638.3282979609994,
|
"elapsed_time": 683.9224744850071,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.31331839844615444,
|
"requests_per_second": 0.29243080533447857,
|
||||||
"tokens_per_second": 229.9835374194385
|
"tokens_per_second": 214.65152188564062
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 517.5916094129789,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.38640502736670695,
|
||||||
|
"tokens_per_second": 283.6309502128471
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 502.6907218439992,
|
"elapsed_time": 497.59323585999664,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.3978589444944367,
|
"requests_per_second": 0.4019347241614679,
|
||||||
"tokens_per_second": 292.0384117325289
|
"tokens_per_second": 295.0301359026215
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 721.7994779089986,
|
"elapsed_time": 780.1687226030044,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.2770852655357769,
|
"requests_per_second": 0.2563548040386794,
|
||||||
"tokens_per_second": 203.38751203489863
|
"tokens_per_second": 188.17083503449163
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 802.5698999410379,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146805,
|
||||||
|
"requests_per_second": 0.24919947784572202,
|
||||||
|
"tokens_per_second": 182.9186467257061
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 886.8526372269989,
|
"elapsed_time": 757.2171181479935,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.2255166096425645,
|
"requests_per_second": 0.2641250378612165,
|
||||||
"tokens_per_second": 165.5348293928834
|
"tokens_per_second": 193.87438091607942
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1084.3601952080007,
|
"elapsed_time": 1144.2253085140255,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 146805,
|
"total_num_tokens": 146805,
|
||||||
"requests_per_second": 0.18444055848217136,
|
"requests_per_second": 0.1747907501362075,
|
||||||
"tokens_per_second": 135.3839809398758
|
"tokens_per_second": 128.30078036872973
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 373.92354663898004,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 148857,
|
||||||
|
"requests_per_second": 0.5348686965496139,
|
||||||
|
"tokens_per_second": 398.09474781142933
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 369.2837602610016,
|
"elapsed_time": 374.03978066996206,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 148857,
|
"total_num_tokens": 148857,
|
||||||
"requests_per_second": 0.5415889392445647,
|
"requests_per_second": 0.5347024844303181,
|
||||||
"tokens_per_second": 403.09652364564084
|
"tokens_per_second": 397.9710386242193
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 509.0738683320001,
|
"elapsed_time": 555.4390292470343,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 148857,
|
"total_num_tokens": 148857,
|
||||||
"requests_per_second": 0.39287029337276264,
|
"requests_per_second": 0.36007552488906747,
|
||||||
"tokens_per_second": 292.4074663029466
|
"tokens_per_second": 267.99881204205957
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 213.75922767800512,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 145877,
|
||||||
|
"requests_per_second": 0.9356321229849724,
|
||||||
|
"tokens_per_second": 682.4360360233941
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 224.76228898300178,
|
"elapsed_time": 224.3753512299736,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 145877,
|
"total_num_tokens": 145877,
|
||||||
"requests_per_second": 0.8898289873490544,
|
"requests_per_second": 0.8913635071929533,
|
||||||
"tokens_per_second": 649.02791593759
|
"tokens_per_second": 650.1471716939323
|
||||||
}
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 322.171811016,
|
"elapsed_time": 336.45260514499387,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 145877,
|
"total_num_tokens": 145877,
|
||||||
"requests_per_second": 0.620786776376495,
|
"requests_per_second": 0.5944373648520577,
|
||||||
"tokens_per_second": 452.7925628873698
|
"tokens_per_second": 433.5736973626181
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1484.8385301349917,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 146523,
|
||||||
|
"requests_per_second": 0.1346947805710681,
|
||||||
|
"tokens_per_second": 98.67941666807306
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 1707.9124416089617,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 147036,
|
||||||
|
"requests_per_second": 0.11710202181769186,
|
||||||
|
"tokens_per_second": 86.0910643999307
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1315.035868578001,
|
"elapsed_time": 1242.463667072996,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.15208710635115047,
|
"requests_per_second": 0.16097050183460196,
|
||||||
"tokens_per_second": 111.8113988472388
|
"tokens_per_second": 118.34229353876268
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 1923.4690410719995,
|
"elapsed_time": 1966.935257990961,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.10397879858182421,
|
"requests_per_second": 0.10168102848706935,
|
||||||
"tokens_per_second": 76.44313314138553
|
"tokens_per_second": 74.75385852312364
|
||||||
}
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"elapsed_time": 299.5004001749912,
|
||||||
|
"num_requests": 200,
|
||||||
|
"total_num_tokens": 147036,
|
||||||
|
"requests_per_second": 0.6677787404729495,
|
||||||
|
"tokens_per_second": 490.93757442090305
|
||||||
|
}
|
||||||
+3
-3
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 246.0529060009976,
|
"elapsed_time": 244.54776988498634,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.8128333180474167,
|
"requests_per_second": 0.8178361229548825,
|
||||||
"tokens_per_second": 597.5787987620997
|
"tokens_per_second": 601.2567608739705
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"elapsed_time": 333.59849170300004,
|
"elapsed_time": 362.9645123449736,
|
||||||
"num_requests": 200,
|
"num_requests": 200,
|
||||||
"total_num_tokens": 147036,
|
"total_num_tokens": 147036,
|
||||||
"requests_per_second": 0.5995230943012126,
|
"requests_per_second": 0.5510180560294371,
|
||||||
"tokens_per_second": 440.75738846836555
|
"tokens_per_second": 405.0974544317216
|
||||||
}
|
}
|
||||||
@@ -15,18 +15,21 @@ except ImportError:
|
|||||||
print("Error: 'transformers' not found. Please install it or run in vLLM environment.")
|
print("Error: 'transformers' not found. Please install it or run in vLLM environment.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
# Import path handling for scripts/models.py
|
# Import path handling for scripts/models.py
|
||||||
try:
|
try:
|
||||||
import sys, os
|
import sys, os
|
||||||
sys.path.append(str(Path(__file__).parent.parent / "scripts"))
|
sys.path.append(str(Path(__file__).parent.parent / "scripts"))
|
||||||
import models
|
import models
|
||||||
|
import cluster_manager # Import shared cluster logic
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("Error: Could not import scripts/models.py.")
|
print("Error: Could not import scripts/models.py or cluster_manager.py.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Import Utils from run_vllm_bench (keep utils shared)
|
# Import Utils from run_vllm_bench (keep utils shared)
|
||||||
try:
|
try:
|
||||||
from run_vllm_bench import get_gpu_count, kill_vllm
|
from run_vllm_bench import kill_vllm
|
||||||
|
# We do NOT import get_gpu_count because we are overriding it for cluster awareness
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("Error: Could not import run_vllm_bench.py.")
|
print("Error: Could not import run_vllm_bench.py.")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -65,7 +68,30 @@ CONCURRENCY_STEPS = [1, 4, 8, 16]
|
|||||||
|
|
||||||
def log(msg): print(f"[MAX-CTX] {msg}", flush=True)
|
def log(msg): print(f"[MAX-CTX] {msg}", flush=True)
|
||||||
|
|
||||||
|
def get_gpu_count():
|
||||||
|
"""
|
||||||
|
Returns total GPUs.
|
||||||
|
If Ray Cluster is active, returns TOTAL cluster GPUs (e.g., 2).
|
||||||
|
Otherwise returns local AMD GPUs.
|
||||||
|
"""
|
||||||
|
if cluster_manager.check_ray_status():
|
||||||
|
# Ideally we'd query Ray for total resources, but for this specific 2-node setup:
|
||||||
|
# If cluster is up, we assume 2 nodes x 1 GPU = 2 GPUs.
|
||||||
|
# Constructing a Ray client just to count is slow/complex here.
|
||||||
|
log("Ray Cluster Detected: Assuming 2 GPUs available.")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
# Local Fallback
|
||||||
|
try:
|
||||||
|
res = subprocess.run("rocm-smi --showid", shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if res.returncode == 0:
|
||||||
|
return res.stdout.count("GPU")
|
||||||
|
except: pass
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
def get_hf_context_limit(model_name, trust_remote=False):
|
def get_hf_context_limit(model_name, trust_remote=False):
|
||||||
|
# ... (Keep existing implementation)
|
||||||
try:
|
try:
|
||||||
cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote)
|
cfg = AutoConfig.from_pretrained(model_name, trust_remote_code=trust_remote)
|
||||||
|
|
||||||
@@ -95,6 +121,7 @@ def get_hf_context_limit(model_name, trust_remote=False):
|
|||||||
def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
|
def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
|
||||||
"""
|
"""
|
||||||
Constructs the vLLM serve command.
|
Constructs the vLLM serve command.
|
||||||
|
Using Ray Backend if tp_size > 1 (Cluster Mode).
|
||||||
"""
|
"""
|
||||||
config = MODEL_TABLE[model]
|
config = MODEL_TABLE[model]
|
||||||
|
|
||||||
@@ -105,16 +132,46 @@ def get_vllm_server_cmd(model, tp_size, util, max_len, max_seqs):
|
|||||||
"--tensor-parallel-size", str(tp_size),
|
"--tensor-parallel-size", str(tp_size),
|
||||||
"--max-num-seqs", str(max_seqs),
|
"--max-num-seqs", str(max_seqs),
|
||||||
"--dtype", "auto",
|
"--dtype", "auto",
|
||||||
# "--disable-log-stats" # Cleaner output, but user managed without it
|
# "--disable-log-stats"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Env Setup
|
||||||
|
env = os.environ.copy()
|
||||||
|
env.update(config.get("env", {}))
|
||||||
|
|
||||||
|
# CLUSTER / RAY LOGIC
|
||||||
|
# Only if we need more than 1 GPU do we engage the cluster machinery
|
||||||
|
if tp_size > 1:
|
||||||
|
log(f"TP={tp_size} > 1: Using Ray Distributed Backend")
|
||||||
|
cmd.extend(["--distributed-executor-backend", "ray"])
|
||||||
|
|
||||||
|
# Inject Cluster Env Vars (similar to start_vllm_cluster.py)
|
||||||
|
# We need to know Head IP and RDMA Interface
|
||||||
|
rdma_iface = cluster_manager.get_net_iface()
|
||||||
|
head_ip = cluster_manager.get_local_ip(rdma_iface) # Assuming we run this ON HEAD
|
||||||
|
|
||||||
|
# IMPORTANT: vLLM needs to bind to the Head IP for Ray workers to reach it?
|
||||||
|
# Or at least we should be explicit.
|
||||||
|
cmd.extend(["--host", head_ip])
|
||||||
|
|
||||||
|
# Update our own process env so verify_context knows where to look?
|
||||||
|
# No, verify_context runs in THIS process. We need to export it or pass it.
|
||||||
|
# Simplest is to set it in os.environ for OUR process too, but that might be messy.
|
||||||
|
# Better: We rely on standard PORT.
|
||||||
|
|
||||||
|
env["RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES"] = "1"
|
||||||
|
env["VLLM_HOST_IP"] = head_ip
|
||||||
|
env["NCCL_SOCKET_IFNAME"] = rdma_iface
|
||||||
|
env["NCCL_IB_GID_INDEX"] = "1"
|
||||||
|
env["NCCL_IB_DISABLE"] = "0"
|
||||||
|
env["NCCL_NET_GDR_LEVEL"] = "0"
|
||||||
|
else:
|
||||||
|
# Default Localhost bind for single node safety
|
||||||
|
cmd.extend(["--host", "127.0.0.1"])
|
||||||
|
|
||||||
if config.get("trust_remote"): cmd.append("--trust-remote-code")
|
if config.get("trust_remote"): cmd.append("--trust-remote-code")
|
||||||
if config.get("enforce_eager"): cmd.append("--enforce-eager")
|
if config.get("enforce_eager"): cmd.append("--enforce-eager")
|
||||||
|
|
||||||
# Add model specific env vars
|
|
||||||
env = os.environ.copy()
|
|
||||||
env.update(config.get("env", {}))
|
|
||||||
|
|
||||||
return cmd, env
|
return cmd, env
|
||||||
|
|
||||||
def is_port_free(port):
|
def is_port_free(port):
|
||||||
@@ -300,7 +357,14 @@ def verify_context(model, context_len):
|
|||||||
"""
|
"""
|
||||||
Sends a request to the server with length ~context_len to verify stability.
|
Sends a request to the server with length ~context_len to verify stability.
|
||||||
"""
|
"""
|
||||||
url = f"http://{HOST}:{PORT}/v1/completions"
|
# Use dynamic host if set (by cluster logic), else localhost
|
||||||
|
# But wait, the env var is set for the SERVER process, not necessarily us?
|
||||||
|
# Actually, we (the client script) need to know where to send requests.
|
||||||
|
# If we are on Head, localhost is fine for Head-based server.
|
||||||
|
# But if we use Ray, vLLM head usually binds to HOST IP.
|
||||||
|
|
||||||
|
target_host = os.getenv("VLLM_HOST_IP", "127.0.0.1")
|
||||||
|
url = f"http://{target_host}:{PORT}/v1/completions"
|
||||||
|
|
||||||
# We use a simple "A " * N prompt.
|
# We use a simple "A " * N prompt.
|
||||||
# Llama 3 tokenizer: "A" is usually 1 token.
|
# Llama 3 tokenizer: "A" is usually 1 token.
|
||||||
@@ -529,9 +593,22 @@ def main():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
config = MODEL_TABLE[model]
|
config = MODEL_TABLE[model]
|
||||||
valid_tps = [t for t in config["valid_tp"] if t <= gpu_count]
|
|
||||||
|
|
||||||
for tp in valid_tps:
|
# KEY CHANGES:
|
||||||
|
# We only want to test the MINIMUM required TP.
|
||||||
|
# If model supports 1 and 2, we ONLY test 1 (local is faster/easier).
|
||||||
|
# We only test 2 if model VALID_TP *starts* with 2 (or higher).
|
||||||
|
|
||||||
|
valid_tps = config.get("valid_tp", [1])
|
||||||
|
min_tp = min(valid_tps)
|
||||||
|
|
||||||
|
if min_tp > gpu_count:
|
||||||
|
log(f"Skipping {model}: Requires TP={min_tp} but only {gpu_count} GPUs available.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
tps_to_test = [min_tp]
|
||||||
|
|
||||||
|
for tp in tps_to_test:
|
||||||
# Track successful seqs for this TP to skip lower utils
|
# Track successful seqs for this TP to skip lower utils
|
||||||
# effectively: {seqs_count: max_working_util}
|
# effectively: {seqs_count: max_working_util}
|
||||||
# Since we iterate high-util -> low-util, if we succeeded already for this 'seqs', we skip.
|
# Since we iterate high-util -> low-util, if we succeeded already for this 'seqs', we skip.
|
||||||
|
|||||||
+21
-9
@@ -469,6 +469,10 @@
|
|||||||
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
<input type="checkbox" id="toggleTP2" checked> TP2
|
<input type="checkbox" id="toggleTP2" checked> TP2
|
||||||
</label>
|
</label>
|
||||||
|
<label
|
||||||
|
style="font-size: 0.9rem; font-weight: 500; display: flex; align-items: center; gap: 4px; cursor: pointer;">
|
||||||
|
<input type="checkbox" id="toggleTP2Eth" checked> TP2 (Eth)
|
||||||
|
</label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Attention Group -->
|
<!-- Attention Group -->
|
||||||
@@ -544,6 +548,7 @@
|
|||||||
activeTab: "Throughput",
|
activeTab: "Throughput",
|
||||||
showTP1: true,
|
showTP1: true,
|
||||||
showTP2: true,
|
showTP2: true,
|
||||||
|
showTP2Eth: true,
|
||||||
showTriton: true,
|
showTriton: true,
|
||||||
showRocm: false
|
showRocm: false
|
||||||
};
|
};
|
||||||
@@ -615,6 +620,7 @@
|
|||||||
// Toggles
|
// Toggles
|
||||||
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
|
$('toggleTP1').addEventListener('change', e => { state.showTP1 = e.target.checked; render(); });
|
||||||
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
|
$('toggleTP2').addEventListener('change', e => { state.showTP2 = e.target.checked; render(); });
|
||||||
|
$('toggleTP2Eth').addEventListener('change', e => { state.showTP2Eth = e.target.checked; render(); });
|
||||||
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
|
$('toggleTriton').addEventListener('change', e => { state.showTriton = e.target.checked; render(); });
|
||||||
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
|
$('toggleRocm').addEventListener('change', e => { state.showRocm = e.target.checked; render(); });
|
||||||
}
|
}
|
||||||
@@ -636,13 +642,17 @@
|
|||||||
params: run.params_b || run.name_params_b,
|
params: run.params_b || run.name_params_b,
|
||||||
results: {
|
results: {
|
||||||
1: { triton: null, rocm: null },
|
1: { triton: null, rocm: null },
|
||||||
2: { triton: null, rocm: null }
|
2: { triton: null, rocm: null },
|
||||||
|
"2_eth": { triton: null, rocm: null }
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const m = testGroups[testName].models[modelName];
|
const m = testGroups[testName].models[modelName];
|
||||||
const tp = run.tp || 1;
|
let tp = run.tp || 1;
|
||||||
|
if (tp === 2 && run.network === "Ethernet") {
|
||||||
|
tp = "2_eth";
|
||||||
|
}
|
||||||
|
|
||||||
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
|
if (!m.results[tp]) m.results[tp] = { triton: null, rocm: null };
|
||||||
|
|
||||||
@@ -749,8 +759,12 @@
|
|||||||
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
|
if (state.showRocm) cols.push({ id: "tp1_rocm", label: "TP1 ROCm" });
|
||||||
}
|
}
|
||||||
if (state.showTP2) {
|
if (state.showTP2) {
|
||||||
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 Triton" });
|
if (state.showTriton) cols.push({ id: "tp2_triton", label: "TP2 RoCE Triton" });
|
||||||
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 ROCm" });
|
if (state.showRocm) cols.push({ id: "tp2_rocm", label: "TP2 RoCE ROCm" });
|
||||||
|
}
|
||||||
|
if (state.showTP2Eth) {
|
||||||
|
if (state.showTriton) cols.push({ id: "tp2_eth_triton", label: "TP2 Eth Triton" });
|
||||||
|
if (state.showRocm) cols.push({ id: "tp2_eth_rocm", label: "TP2 Eth ROCm" });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thead
|
// Thead
|
||||||
@@ -790,11 +804,7 @@
|
|||||||
|
|
||||||
// Data Cells
|
// Data Cells
|
||||||
cols.forEach(c => {
|
cols.forEach(c => {
|
||||||
let val = null;
|
let val = getVal(m, c.id);
|
||||||
if (c.id === "tp1_triton") val = m.results[1]?.triton;
|
|
||||||
if (c.id === "tp1_rocm") val = m.results[1]?.rocm;
|
|
||||||
if (c.id === "tp2_triton") val = m.results[2]?.triton;
|
|
||||||
if (c.id === "tp2_rocm") val = m.results[2]?.rocm;
|
|
||||||
|
|
||||||
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
|
const bg = c.id.startsWith("tp2") ? 'style="background:#fbfdff;"' : "";
|
||||||
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
|
rowHtml += `<td class="col-data" ${bg}>${formatVal(val, unit)}</td>`;
|
||||||
@@ -823,6 +833,8 @@
|
|||||||
if (colId === "tp1_rocm") return m.results[1]?.rocm;
|
if (colId === "tp1_rocm") return m.results[1]?.rocm;
|
||||||
if (colId === "tp2_triton") return m.results[2]?.triton;
|
if (colId === "tp2_triton") return m.results[2]?.triton;
|
||||||
if (colId === "tp2_rocm") return m.results[2]?.rocm;
|
if (colId === "tp2_rocm") return m.results[2]?.rocm;
|
||||||
|
if (colId === "tp2_eth_triton") return m.results["2_eth"]?.triton;
|
||||||
|
if (colId === "tp2_eth_rocm") return m.results["2_eth"]?.rocm;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -66,6 +66,11 @@ def parse_logs():
|
|||||||
if not tp_match: continue
|
if not tp_match: continue
|
||||||
tp = int(tp_match.group(1))
|
tp = int(tp_match.group(1))
|
||||||
|
|
||||||
|
# Network
|
||||||
|
network = "RoCE"
|
||||||
|
if "_eth" in rest:
|
||||||
|
network = "Ethernet"
|
||||||
|
|
||||||
# Model Name
|
# Model Name
|
||||||
if "_" in model_part:
|
if "_" in model_part:
|
||||||
model_display = model_part.replace("_", "/", 1)
|
model_display = model_part.replace("_", "/", 1)
|
||||||
@@ -87,6 +92,7 @@ def parse_logs():
|
|||||||
"params_b": params_b,
|
"params_b": params_b,
|
||||||
"name_params_b": params_b,
|
"name_params_b": params_b,
|
||||||
"backend": backend_name, # "Triton" or "ROCm"
|
"backend": backend_name, # "Triton" or "ROCm"
|
||||||
|
"network": network,
|
||||||
"error": False
|
"error": False
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+505
-171
@@ -1,131 +1,5 @@
|
|||||||
{
|
{
|
||||||
"runs": [
|
"runs": [
|
||||||
{
|
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
||||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": 8.0,
|
|
||||||
"name_params_b": 8.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 383.3285005130725
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "google/gemma-3-12b-it",
|
|
||||||
"model_clean": "google/gemma-3-12b-it",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": 12.0,
|
|
||||||
"name_params_b": 12.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 169.75141153501525
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
|
||||||
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "AWQ",
|
|
||||||
"params_b": 14.0,
|
|
||||||
"name_params_b": 14.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 168.9724830093454
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "openai/gpt-oss-20b",
|
|
||||||
"model_clean": "openai/gpt-oss-20b",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": 20.0,
|
|
||||||
"name_params_b": 20.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 430.8537270233502
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "openai/gpt-oss-120b",
|
|
||||||
"model_clean": "openai/gpt-oss-120b",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": 120.0,
|
|
||||||
"name_params_b": 120.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 77.93077982357597
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "zai-org/GLM-4.7-Flash",
|
|
||||||
"model_clean": "zai-org/GLM-4.7-Flash",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": null,
|
|
||||||
"name_params_b": null,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 246.02994547299596
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
||||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "GPTQ",
|
|
||||||
"params_b": 30.0,
|
|
||||||
"name_params_b": 30.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 227.90361622402403
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
||||||
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "GPTQ",
|
|
||||||
"params_b": 30.0,
|
|
||||||
"name_params_b": 30.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 200.1426829468909
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
||||||
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "GPTQ",
|
|
||||||
"params_b": 80.0,
|
|
||||||
"name_params_b": 80.0,
|
|
||||||
"backend": "Triton",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 132.2599488751683
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
@@ -135,10 +9,11 @@
|
|||||||
"params_b": 8.0,
|
"params_b": 8.0,
|
||||||
"name_params_b": 8.0,
|
"name_params_b": 8.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 596.5891209659404
|
"tps_mean": 613.9321196754427
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "google/gemma-3-12b-it",
|
"model": "google/gemma-3-12b-it",
|
||||||
@@ -149,10 +24,11 @@
|
|||||||
"params_b": 12.0,
|
"params_b": 12.0,
|
||||||
"name_params_b": 12.0,
|
"name_params_b": 12.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 294.9472545848014
|
"tps_mean": 291.5155379231269
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
@@ -163,10 +39,11 @@
|
|||||||
"params_b": 14.0,
|
"params_b": 14.0,
|
||||||
"name_params_b": 14.0,
|
"name_params_b": 14.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 348.0799308087054
|
"tps_mean": 280.05330212131406
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-20b",
|
"model": "openai/gpt-oss-20b",
|
||||||
@@ -177,10 +54,11 @@
|
|||||||
"params_b": 20.0,
|
"params_b": 20.0,
|
||||||
"name_params_b": 20.0,
|
"name_params_b": 20.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 593.7843034224891
|
"tps_mean": 602.6345456319963
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-120b",
|
"model": "openai/gpt-oss-120b",
|
||||||
@@ -191,10 +69,11 @@
|
|||||||
"params_b": 120.0,
|
"params_b": 120.0,
|
||||||
"name_params_b": 120.0,
|
"name_params_b": 120.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 112.4781801162827
|
"tps_mean": 85.2809331488931
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "zai-org/GLM-4.7-Flash",
|
"model": "zai-org/GLM-4.7-Flash",
|
||||||
@@ -205,10 +84,11 @@
|
|||||||
"params_b": null,
|
"params_b": null,
|
||||||
"name_params_b": null,
|
"name_params_b": null,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 346.0061963818796
|
"tps_mean": 300.773560320048
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
@@ -219,10 +99,11 @@
|
|||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 320.69249844623016
|
"tps_mean": 321.88057686801585
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
@@ -233,10 +114,11 @@
|
|||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 274.7000183491961
|
"tps_mean": 274.46004720922855
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
@@ -247,10 +129,176 @@
|
|||||||
"params_b": 80.0,
|
"params_b": 80.0,
|
||||||
"name_params_b": 80.0,
|
"name_params_b": 80.0,
|
||||||
"backend": "Triton",
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 167.00232766189475
|
"tps_mean": 182.16229690959702
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "Ethernet",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 107.62460878889469
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 8.0,
|
||||||
|
"name_params_b": 8.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 590.0492703672895
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 285.8275921888489
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 346.2003835540928
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-20b",
|
||||||
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 20.0,
|
||||||
|
"name_params_b": 20.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 607.216674264294
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-120b",
|
||||||
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 120.0,
|
||||||
|
"name_params_b": 120.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 122.62029501860121
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "zai-org/GLM-4.7-Flash",
|
||||||
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 366.77052981888835
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 320.0197833991106
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 279.7240042842149
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 196.2262690032198
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 98.85048345093716
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
@@ -260,25 +308,12 @@
|
|||||||
"quant": "BF16",
|
"quant": "BF16",
|
||||||
"params_b": 8.0,
|
"params_b": 8.0,
|
||||||
"name_params_b": 8.0,
|
"name_params_b": 8.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 452.7925628873698
|
"tps_mean": 369.23212230245684
|
||||||
},
|
|
||||||
{
|
|
||||||
"model": "google/gemma-3-12b-it",
|
|
||||||
"model_clean": "google/gemma-3-12b-it",
|
|
||||||
"env": "TP1",
|
|
||||||
"gpu_config": "single",
|
|
||||||
"quant": "BF16",
|
|
||||||
"params_b": 12.0,
|
|
||||||
"name_params_b": 12.0,
|
|
||||||
"backend": "ROCm",
|
|
||||||
"error": false,
|
|
||||||
"test": "Throughput",
|
|
||||||
"tp": 1,
|
|
||||||
"tps_mean": 292.4074663029466
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
@@ -288,11 +323,12 @@
|
|||||||
"quant": "AWQ",
|
"quant": "AWQ",
|
||||||
"params_b": 14.0,
|
"params_b": 14.0,
|
||||||
"name_params_b": 14.0,
|
"name_params_b": 14.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 190.86242019407229
|
"tps_mean": 159.8857312165796
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-20b",
|
"model": "openai/gpt-oss-20b",
|
||||||
@@ -302,11 +338,12 @@
|
|||||||
"quant": "BF16",
|
"quant": "BF16",
|
||||||
"params_b": 20.0,
|
"params_b": 20.0,
|
||||||
"name_params_b": 20.0,
|
"name_params_b": 20.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 440.75738846836555
|
"tps_mean": 411.03557062490586
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-120b",
|
"model": "openai/gpt-oss-120b",
|
||||||
@@ -316,11 +353,27 @@
|
|||||||
"quant": "BF16",
|
"quant": "BF16",
|
||||||
"params_b": 120.0,
|
"params_b": 120.0,
|
||||||
"name_params_b": 120.0,
|
"name_params_b": 120.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 76.44313314138553
|
"tps_mean": 75.0407548829671
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "zai-org/GLM-4.7-Flash",
|
||||||
|
"model_clean": "zai-org/GLM-4.7-Flash",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 239.57478116575834
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
@@ -330,11 +383,12 @@
|
|||||||
"quant": "GPTQ",
|
"quant": "GPTQ",
|
||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 229.9835374194385
|
"tps_mean": 213.74630950782364
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
@@ -344,11 +398,12 @@
|
|||||||
"quant": "GPTQ",
|
"quant": "GPTQ",
|
||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 203.38751203489863
|
"tps_mean": 186.03115379827653
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
@@ -358,11 +413,27 @@
|
|||||||
"quant": "GPTQ",
|
"quant": "GPTQ",
|
||||||
"params_b": 80.0,
|
"params_b": 80.0,
|
||||||
"name_params_b": 80.0,
|
"name_params_b": 80.0,
|
||||||
"backend": "ROCm",
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 1,
|
"tp": 1,
|
||||||
"tps_mean": 135.3839809398758
|
"tps_mean": 125.65027253668944
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "Triton",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 159.95620436815713
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
@@ -373,10 +444,11 @@
|
|||||||
"params_b": 8.0,
|
"params_b": 8.0,
|
||||||
"name_params_b": 8.0,
|
"name_params_b": 8.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 649.02791593759
|
"tps_mean": 682.4360360233941
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "google/gemma-3-12b-it",
|
"model": "google/gemma-3-12b-it",
|
||||||
@@ -387,10 +459,11 @@
|
|||||||
"params_b": 12.0,
|
"params_b": 12.0,
|
||||||
"name_params_b": 12.0,
|
"name_params_b": 12.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 403.09652364564084
|
"tps_mean": 398.09474781142933
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "Qwen/Qwen3-14B-AWQ",
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
@@ -401,10 +474,11 @@
|
|||||||
"params_b": 14.0,
|
"params_b": 14.0,
|
||||||
"name_params_b": 14.0,
|
"name_params_b": 14.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 371.4058491591393
|
"tps_mean": 295.31575874126105
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-20b",
|
"model": "openai/gpt-oss-20b",
|
||||||
@@ -415,10 +489,11 @@
|
|||||||
"params_b": 20.0,
|
"params_b": 20.0,
|
||||||
"name_params_b": 20.0,
|
"name_params_b": 20.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 597.5787987620997
|
"tps_mean": 490.93757442090305
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "openai/gpt-oss-120b",
|
"model": "openai/gpt-oss-120b",
|
||||||
@@ -429,10 +504,11 @@
|
|||||||
"params_b": 120.0,
|
"params_b": 120.0,
|
||||||
"name_params_b": 120.0,
|
"name_params_b": 120.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 111.8113988472388
|
"tps_mean": 86.0910643999307
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
@@ -443,10 +519,11 @@
|
|||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 315.906032423287
|
"tps_mean": 321.6166453306162
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
@@ -457,10 +534,11 @@
|
|||||||
"params_b": 30.0,
|
"params_b": 30.0,
|
||||||
"name_params_b": 30.0,
|
"name_params_b": 30.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 292.0384117325289
|
"tps_mean": 283.6309502128471
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
@@ -471,10 +549,266 @@
|
|||||||
"params_b": 80.0,
|
"params_b": 80.0,
|
||||||
"name_params_b": 80.0,
|
"name_params_b": 80.0,
|
||||||
"backend": "ROCm",
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
"error": false,
|
"error": false,
|
||||||
"test": "Throughput",
|
"test": "Throughput",
|
||||||
"tp": 2,
|
"tp": 2,
|
||||||
"tps_mean": 165.5348293928834
|
"tps_mean": 182.9186467257061
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"model_clean": "mratsim/MiniMax-M2.5-BF16-INT4-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": null,
|
||||||
|
"name_params_b": null,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "Ethernet",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 98.67941666807306
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 8.0,
|
||||||
|
"name_params_b": 8.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 650.1471716939323
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 397.9710386242193
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 367.5704596781314
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-20b",
|
||||||
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 20.0,
|
||||||
|
"name_params_b": 20.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 601.2567608739705
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-120b",
|
||||||
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 120.0,
|
||||||
|
"name_params_b": 120.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 118.34229353876268
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 333.147212194374
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 295.0301359026215
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP2",
|
||||||
|
"gpu_config": "dual",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 2,
|
||||||
|
"tps_mean": 193.87438091607942
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"model_clean": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 8.0,
|
||||||
|
"name_params_b": 8.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 433.5736973626181
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"model_clean": "Qwen/Qwen3-14B-AWQ",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "AWQ",
|
||||||
|
"params_b": 14.0,
|
||||||
|
"name_params_b": 14.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 180.43566315423652
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-20b",
|
||||||
|
"model_clean": "openai/gpt-oss-20b",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 20.0,
|
||||||
|
"name_params_b": 20.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 405.0974544317216
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "openai/gpt-oss-120b",
|
||||||
|
"model_clean": "openai/gpt-oss-120b",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 120.0,
|
||||||
|
"name_params_b": 120.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 74.75385852312364
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-4bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 214.65152188564062
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"model_clean": "btbtyler09/Qwen3-Coder-30B-A3B-Instruct-gptq-8bit",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 30.0,
|
||||||
|
"name_params_b": 30.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 188.17083503449163
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"model_clean": "dazipe/Qwen3-Next-80B-A3B-Instruct-GPTQ-Int4A16",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "GPTQ",
|
||||||
|
"params_b": 80.0,
|
||||||
|
"name_params_b": 80.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 128.30078036872973
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model": "google/gemma-3-12b-it",
|
||||||
|
"model_clean": "google/gemma-3-12b-it",
|
||||||
|
"env": "TP1",
|
||||||
|
"gpu_config": "single",
|
||||||
|
"quant": "BF16",
|
||||||
|
"params_b": 12.0,
|
||||||
|
"name_params_b": 12.0,
|
||||||
|
"backend": "ROCm",
|
||||||
|
"network": "RoCE",
|
||||||
|
"error": false,
|
||||||
|
"test": "Throughput",
|
||||||
|
"tp": 1,
|
||||||
|
"tps_mean": 267.99881204205957
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -36,6 +36,22 @@ else:
|
|||||||
HOST = os.getenv("HOST", "0.0.0.0")
|
HOST = os.getenv("HOST", "0.0.0.0")
|
||||||
PORT = os.getenv("PORT", "8000")
|
PORT = os.getenv("PORT", "8000")
|
||||||
|
|
||||||
|
def detect_gpus():
|
||||||
|
"""Detects AMD GPUs via rocm-smi or /dev/dri."""
|
||||||
|
try:
|
||||||
|
# Try rocm-smi first
|
||||||
|
res = subprocess.run(["rocm-smi", "--showid", "--csv"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
if res.returncode == 0:
|
||||||
|
count = res.stdout.count("GPU")
|
||||||
|
if count > 0: return count
|
||||||
|
except: pass
|
||||||
|
|
||||||
|
# Fallback to /dev/dri/render*
|
||||||
|
try:
|
||||||
|
return len(list(Path("/dev/dri").glob("renderD*")))
|
||||||
|
except:
|
||||||
|
return 1
|
||||||
|
|
||||||
def get_discovered_models():
|
def get_discovered_models():
|
||||||
"""
|
"""
|
||||||
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
|
Overrides the hardcoded MODELS_TO_RUN by looking at what we actually have results for.
|
||||||
@@ -93,22 +109,6 @@ def check_dependencies():
|
|||||||
print("Error: 'dialog' is required. Please install it (apt-get install dialog).")
|
print("Error: 'dialog' is required. Please install it (apt-get install dialog).")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def detect_gpus():
|
|
||||||
"""Detects AMD GPUs via rocm-smi or /dev/dri."""
|
|
||||||
try:
|
|
||||||
# Try rocm-smi first
|
|
||||||
res = subprocess.run(["rocm-smi", "--showid", "--csv"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
||||||
if res.returncode == 0:
|
|
||||||
count = res.stdout.count("GPU")
|
|
||||||
if count > 0: return count
|
|
||||||
except: pass
|
|
||||||
|
|
||||||
# Fallback to /dev/dri/render*
|
|
||||||
try:
|
|
||||||
return len(list(Path("/dev/dri").glob("renderD*")))
|
|
||||||
except:
|
|
||||||
return 1
|
|
||||||
|
|
||||||
def get_verified_config(model_id, tp_size, max_seqs):
|
def get_verified_config(model_id, tp_size, max_seqs):
|
||||||
"""
|
"""
|
||||||
Reads max_context_results.json to find the best verified configuration.
|
Reads max_context_results.json to find the best verified configuration.
|
||||||
@@ -334,7 +334,15 @@ def configure_and_launch(model_idx, gpu_count):
|
|||||||
print(f" Backend: {'ROCm' if use_rocm_attn else 'Triton'}")
|
print(f" Backend: {'ROCm' if use_rocm_attn else 'Triton'}")
|
||||||
if clear_cache:
|
if clear_cache:
|
||||||
print(f" Action: Clearing vLLM Cache (~/.cache/vllm)")
|
print(f" Action: Clearing vLLM Cache (~/.cache/vllm)")
|
||||||
print(f" Command: {' '.join(cmd)}")
|
|
||||||
|
# Variables that represent the custom environment overrides for models
|
||||||
|
custom_env = config.get("env", {})
|
||||||
|
if custom_env:
|
||||||
|
print("\n --- Environment Variables ---")
|
||||||
|
for k, v in custom_env.items():
|
||||||
|
print(f" export {k}={v}")
|
||||||
|
|
||||||
|
print(f"\n Command: {' '.join(cmd)}")
|
||||||
print("="*60 + "\n")
|
print("="*60 + "\n")
|
||||||
|
|
||||||
os.execvpe("vllm", cmd, env)
|
os.execvpe("vllm", cmd, env)
|
||||||
|
|||||||
Yeni konuda referans
Bir kullanıcı engelle