Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
57c14f8
Updating nic setup-1
amd-droy Jun 1, 2026
205ea3f
Patching gsm8k task in sglang
amd-droy Jun 1, 2026
e06b8dc
Patching gsm8k task in sglang-2
amd-droy Jun 1, 2026
7b45763
Patching gsm8k task in sglang-3
amd-droy Jun 1, 2026
7e987f8
Patching gsm8k task in sglang-4
amd-droy Jun 1, 2026
184aad0
restoring sglang_disagg_lib
amd-droy Jun 1, 2026
a49f7ce
updating nic cmd
amd-droy Jun 1, 2026
ad7f1c9
updating benchmark tests
amd-droy Jun 2, 2026
09fb427
updating benchmark tests-2
amd-droy Jun 2, 2026
ba12ea6
updating benchmark tests-3
amd-droy Jun 2, 2026
4ebe206
updating benchmark tests-4
amd-droy Jun 2, 2026
3456f2e
updating benchmark tests-5
amd-droy Jun 2, 2026
2eaca78
updating benchmark tests-6
amd-droy Jun 2, 2026
c2df174
updating benchmark tests-7
amd-droy Jun 2, 2026
6a25525
updating benchmark tests-8
amd-droy Jun 2, 2026
2e62b92
updating benchmark tests-9
amd-droy Jun 3, 2026
f2e35d2
updating benchmark tests-10
amd-droy Jun 3, 2026
41f3efe
updating benchmark tests-11
amd-droy Jun 3, 2026
5a49e81
updating benchmark tests-12
amd-droy Jun 3, 2026
89a6abc
updating benchmark tests-13
amd-droy Jun 3, 2026
3a086d5
updating benchmark tests-14
amd-droy Jun 4, 2026
0b21195
updating benchmark tests-15
amd-droy Jun 4, 2026
621e2f2
updating benchmark tests-15
amd-droy Jun 4, 2026
140d7d3
updating benchmark tests-16
amd-droy Jun 4, 2026
a0fa043
updating benchmark tests-17
amd-droy Jun 4, 2026
8ad760d
updating benchmark tests-18
amd-droy Jun 4, 2026
81578ba
updating benchmark tests-19
amd-droy Jun 4, 2026
b82a54b
updating benchmark tests-20
amd-droy Jun 4, 2026
aee8c5f
updating benchmark tests-25
amd-droy Jun 5, 2026
7f5cb94
updating benchmark tests-26
amd-droy Jun 5, 2026
cbed4bb
updating benchmark tests-27
amd-droy Jun 5, 2026
2b23758
updating benchmark tests-28
amd-droy Jun 5, 2026
88744eb
updating benchmark tests-29
amd-droy Jun 5, 2026
dfe3f62
updating benchmark tests-30
amd-droy Jun 5, 2026
38157ce
updating benchmark tests-31
amd-droy Jun 5, 2026
0c695b7
updating benchmark tests-32
amd-droy Jun 5, 2026
9fb77b2
updating benchmark tests-39
amd-droy Jun 8, 2026
7996314
updating benchmark tests-40
amd-droy Jun 9, 2026
c776975
updating benchmark tests-41
amd-droy Jun 9, 2026
8a7b0c5
updating benchmark tests-42
amd-droy Jun 9, 2026
637ddbd
updating benchmark tests-43
amd-droy Jun 9, 2026
5e55421
updating benchmark tests-44
amd-droy Jun 9, 2026
4a5261b
updating benchmark tests-45
amd-droy Jun 9, 2026
29f5d15
Apply formatting fixes
amd-droy Jun 9, 2026
0bc5069
Apply formatting fixes-1
amd-droy Jun 9, 2026
669bc4b
updating benchmark tests-46
amd-droy Jun 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 188 additions & 0 deletions cvs/input/config_file/inference/sglang/mi30x_sglang_distributed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
{
"config":
{
"container_image": "rocm/sgl-dev:v0.5.12.post1-rocm720-mi30x-20260603",
"_container_image": "rocm/sgl-dev:v0.5.12.post1-rocm720-mi30x-20260601",
"container_name": "sglang_container",
"_example_nnodes": "4",
"nnodes": "2",
"hf_token_file": "/home/{user-id}/.hf_token",
"shm_size": "128G",
"_log_dir_comments": "Provide some common file system that is accessible from any node",
"log_dir": "/home/{user-id}/LOGS/sglang",
"log_level": "info",
"nic_type": "thor2",
"_example_nccl_ib_hca_list": "rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7",
"nccl_ib_hca_list": "<changeme>",
"_example_nccl_ib_hca": "rdma0,rdma1,rdma2,rdma3,rdma4,rdma5,rdma6,rdma7",
"nccl_ib_hca": "<changeme>",
"_example_nccl_socket_ifname": "eno0",
"nccl_socket_ifname": "<changeme>",
"_example_gloo_socket_ifname": "eno0",
"gloo_socket_ifname": "<changeme>",
"_example_gloo_tcp_ifname": "eno0",
"gloo_tcp_ifname": "<changeme>",
"nccl_ib_gid_index": "3",
"nccl_debug": "ERROR",
"prefill_node_list": ["<changeme>", "<changeme>"],
"decode_node_list": ["<changeme>", "<changeme>"],
"proxy_router_node": "<changeme>",
"benchmark_serv_node": "<changeme>",
"prefill_serv_port": "30001",
"decode_serv_port": "30002",
"proxy_router_port": "8000",
"_prefill_coordinator_addr": "This is the master address for co-ordination among all prefill nodes",
"prefill_coordinator_addr": "<changeme>",
"_decode_coordinator_addr": "This is the master address for co-ordination among all decode nodes",
"decode_coordinator_addr": "<changeme>",
"prefill_coordinator_port": "40001",
"decode_coordinator_port": "40002",
"proxy_router_serv_port": "8000",
"container_config":
{
"device_list": [ "/dev/dri", "/dev/kfd", "/dev/infiniband/rdma_cm" ],
"volume_dict":
{
"/home/{user-id}": "/home/{user-id}",
"/mnt/dtni/models": "/root/models",
"/dev/infiniband": "/dev/infiniband",
"/usr/local/lib/libbnxt_re-rdmav34.so": "/usr/lib/x86_64-linux-gnu/libibverbs/libbnxt_re-rdmav34.so.host",
"/lib/libibverbs.d": "/lib/libibverbs.d"
},
"env_dict":
{
}
}

},
"benchmark_params":
{
"llama-70b":
{
"backend": "sglang",
"max_concurrency": "25",
"model": "meta-llama/Llama-3.1-70B-Instruct",
"prefill_policy": "cache_aware",
"decode_policy": "cache_aware",
"tensor_parallelism": "8",
"memory_fraction": "0.85",
"tokenizer_mode": "auto",
"inference_poll_iterations": "16",
"inference_tests":
{
"gsm8k":
{
"backend": "sglang",
"num_questions": "1000",
"max_concurrency": "25",
"expected_results":
{
"auto":
{
"tokens_per_sec": "350"
}
}
},
"bench_serv_random":
{
"backend": "sglang",
"data_set_name": "random",
"num_prompts": "100",
"input_length": "1024",
"output_length": "1024",
"random_range_ratio": "0.5",
"expected_results":
{
"auto":
{
"output_throughput_per_sec": "1000",
"mean_ttft_ms": "60000",
"mean_tpot_ms": "150"
}
}
},
"bench_serv_generated_shared_prefix":
{
"backend": "sglang",
"gsp_num_groups": "1",
"gsp_prompts_per_group": "16",
"gsp_system_prompt_len": "0",
"gsp_question_len": "1024",
"gsp_output_len": "1024",
"expected_results":
{
"auto":
{
}
}
}

}
},
"deepseek-r1":
{
"backend": "sglang",
"max_concurrency": "64",
"_comments_model": "If the model is local, specify the full path of the model",
"model": "/root/models/DeepSeek-R1-0528",
"prefill_policy": "cache_aware",
"decode_policy": "cache_aware",
"tensor_parallelism": "16",
"memory_fraction": "0.85",
"tokenizer_mode": "auto",
"inference_poll_iterations": "16",
"inference_tests":
{
"gsm8k":
{
"backend": "sglang",
"num_questions": "1000",
"max_concurrency": "100",
"expected_results":
{
"auto":
{
"tokens_per_sec": "700"
}
}
},
"bench_serv_random":
{
"backend": "sglang",
"data_set_name": "random",
"num_prompts": "100",
"input_length": "1024",
"output_length": "1024",
"random_range_ratio": "0.5",
"expected_results":
{
"auto":
{
"output_throughput_per_sec": "1400",
"mean_ttft_ms": "60000",
"mean_tpot_ms": "110"
}
}
},
"bench_serv_generated_shared_prefix":
{
"backend": "sglang",
"gsp_num_groups": "1",
"gsp_prompts_per_group": "16",
"gsp_system_prompt_len": "0",
"gsp_question_len": "1024",
"gsp_output_len": "1024",
"expected_results":
{
"auto":
{
}
}
}

}
}

}

}
Loading
Loading