Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
c8b3888
qwen3_vl_moe support prefill_cudagraph
WANDY666 Mar 26, 2026
e7fba3a
add audio dp
WANDY666 Mar 27, 2026
671b5aa
Add startup warmups for HTTP audio preload and per-rank audio workers…
WANDY666 Mar 27, 2026
a387259
add http client cache
WANDY666 Mar 30, 2026
cd89cd6
reduce polling time
WANDY666 Mar 30, 2026
4788980
Optimize audio shm payload handling and cache lookups
WANDY666 Mar 30, 2026
7b05403
cache hann_window/mel_filters
WANDY666 Mar 30, 2026
713c45d
Fix audio preload config to follow tokenizer settings
WANDY666 Mar 30, 2026
65a3ec6
Optimize qwen3 omni audio preprocessing fast path
WANDY666 Mar 31, 2026
2e48008
Add audio server fast path for single pending requests
WANDY666 Mar 31, 2026
456a71a
fix num_frames
WANDY666 Apr 1, 2026
479367d
tune fp8
WANDY666 Apr 2, 2026
2c09aa2
set default model
WANDY666 Apr 2, 2026
5168dae
add prompt_text_cache to QWen3OmniTokenizer
WANDY666 Apr 2, 2026
167f8b0
multi images or audios use asyncio
WANDY666 Apr 2, 2026
30d8603
single file without _resource_lock
WANDY666 Apr 2, 2026
db3e63b
use deque instead of list
WANDY666 Apr 2, 2026
2fbd55d
Merge branch 'main' of https://github.com/ModelTC/LightLLM into optim…
WANDY666 Apr 3, 2026
878c2f9
chore: format merged audio/httpserver files
WANDY666 Apr 3, 2026
ab788d9
chore: improve qwen3 omni audio formatting
WANDY666 Apr 3, 2026
0570b96
fixâ€
WANDY666 Apr 3, 2026
70aad72
fix
WANDY666 Apr 3, 2026
86a16f7
fix md5 and
WANDY666 Apr 7, 2026
4601637
fix md5
WANDY666 Apr 7, 2026
16203e4
format
WANDY666 Apr 7, 2026
93421d2
using asyncio.to_thread preventing the server from handling other con…
WANDY666 Apr 7, 2026
f7b0589
fix
WANDY666 Apr 7, 2026
0ea2156
fix
hiworldwzj Apr 7, 2026
6856540
fix
WANDY666 Apr 7, 2026
9d0671b
use details_log to log
WANDY666 Apr 7, 2026
8e21207
delete warmup
WANDY666 Apr 7, 2026
fe39faa
delete audio_preload_config
WANDY666 Apr 7, 2026
f1c9f07
delete _preprocess_single_padded
WANDY666 Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"1024": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"16384": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 4,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"800": {
"BLOCK_SIZE_K": 32,
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 5,
"num_warps": 4
},
"8192": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
{
"1": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 16,
"NEED_TRANS": false,
"num_stages": 5,
"num_warps": 4
},
"100": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"GROUP_SIZE_M": 16,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 5,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 4,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 5,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 5,
"num_warps": 8
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"1": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 16,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"100": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": true,
"num_stages": 4,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": true,
"num_stages": 2,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 16,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 16,
"NEED_TRANS": true,
"num_stages": 3,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"4096": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 16,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 1,
"NEED_TRANS": true,
"num_stages": 3,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": true,
"num_stages": 3,
"num_warps": 8
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{
"1024": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": true,
"num_stages": 4,
"num_warps": 8
},
"128": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"16384": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": true,
"num_stages": 3,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"32768": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 3,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 64,
"NEED_TRANS": false,
"num_stages": 4,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"GROUP_SIZE_M": 1,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 16,
"NEED_TRANS": true,
"num_stages": 3,
"num_warps": 4
},
"800": {
"BLOCK_SIZE_K": 64,
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"GROUP_SIZE_M": 64,
"NEED_TRANS": true,
"num_stages": 4,
"num_warps": 4
},
"8192": {
"BLOCK_SIZE_K": 128,
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"GROUP_SIZE_M": 32,
"NEED_TRANS": false,
"num_stages": 2,
"num_warps": 4
}
}
Loading
Loading