-
Notifications
You must be signed in to change notification settings - Fork 488
Open
Description
Currently, the embedding operator is supported on the NPU and has passed the accuracy test. However, the running performance is much lower than that of other systems such as Hugging Face, and further improvements are needed.
********** Benchmark Data **********
[
{
"kernel_name": "embedding",
"kernel_provider": "liger",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
43.11292266845703,
42.90717315673828,
43.2876091003418,
43.323081970214844,
42.62879943847656,
43.2666015625,
43.26323699951172,
43.450679779052734
],
"y_values_20": [
43.11175537109375,
42.90643310546875,
43.28743362426758,
43.32281494140625,
42.62324523925781,
43.2645263671875,
43.26128387451172,
43.43910217285156
],
"y_values_80": [
43.11408615112305,
42.90790939331055,
43.287784576416016,
43.32334518432617,
42.63435745239258,
43.2686767578125,
43.265193939208984,
43.462257385253906
],
"timestamp": "2026-01-21 03:48:04",
"kernel_operation_mode": "forward",
"extra_benchmark_config_str": "{\"B\": 32, \"T\": 512, \"D\": 768, \"dtype\": \"torch.float32\"}",
"liger_version": "0.0.0"
},
{
"kernel_name": "embedding",
"kernel_provider": "huggingface",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
0.08064000308513641,
0.09166000038385391,
0.11357000470161438,
0.1482200026512146,
0.18525999784469604,
0.21186000108718872,
0.2272000014781952,
0.23625999689102173
],
"y_values_20": [
0.08020000159740448,
0.09121999889612198,
0.1130559965968132,
0.14762000739574432,
0.18479999899864197,
0.21121999621391296,
0.22665999829769135,
0.2358199954032898
],
"y_values_80": [
0.08157600462436676,
0.09262000024318695,
0.11438000202178955,
0.14905999600887299,
0.18602000176906586,
0.21264000236988068,
0.22774000465869904,
0.2370000034570694
],
"timestamp": "2026-01-21 03:48:17",
"kernel_operation_mode": "forward",
"extra_benchmark_config_str": "{\"B\": 32, \"T\": 512, \"D\": 768, \"dtype\": \"torch.float32\"}",
"liger_version": "0.0.0"
},
{
"kernel_name": "embedding",
"kernel_provider": "torch_compile",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
0.1696000099182129,
0.20161999762058258,
0.20436999201774597,
0.20262999832630157,
0.20093999803066254,
0.21164000034332275,
0.23170000314712524,
0.23863999545574188
],
"y_values_20": [
0.16475999355316162,
0.1967879980802536,
0.19750800728797913,
0.19844000041484833,
0.19786399602890015,
0.21121999621391296,
0.2281000018119812,
0.23797999322414398
],
"y_values_80": [
0.17654000222682953,
0.20855599641799927,
0.2127159982919693,
0.20847199857234955,
0.20453999936580658,
0.21206000447273254,
0.2365799993276596,
0.23921999335289001
],
"timestamp": "2026-01-21 03:48:32",
"kernel_operation_mode": "forward",
"extra_benchmark_config_str": "{\"B\": 32, \"T\": 512, \"D\": 768, \"dtype\": \"torch.float32\"}",
"liger_version": "0.0.0"
},
{
"kernel_name": "embedding",
"kernel_provider": "liger",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
61.94499969482422,
62.33565902709961,
61.86201858520508,
62.008819580078125,
62.232479095458984,
62.473838806152344,
63.21195983886719,
64.28494262695312
],
"y_values_20": [
61.94499969482422,
62.33565902709961,
61.86201858520508,
62.008819580078125,
62.232479095458984,
62.473838806152344,
63.21195983886719,
64.28494262695312
],
"y_values_80": [
61.94499969482422,
62.33565902709961,
61.86201858520508,
62.008819580078125,
62.232479095458984,
62.473838806152344,
63.21195983886719,
64.28494262695312
],
"timestamp": "2026-01-21 03:48:50",
"kernel_operation_mode": "backward",
"extra_benchmark_config_str": "{\"B\": 32, \"T\": 512, \"D\": 768, \"dtype\": \"torch.float32\"}",
"liger_version": "0.0.0"
},
{
"kernel_name": "embedding",
"kernel_provider": "huggingface",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
1.5649499893188477,
1.5831799507141113,
1.6187798976898193,
1.6978700160980225,
1.8488800525665283,
2.2740001678466797,
2.995260000228882,
4.282050132751465
],
"y_values_20": [
1.5638200044631958,
1.581808090209961,
1.6171720027923584,
1.6966240406036377,
1.8473479747772217,
2.271728038787842,
2.993760108947754,
4.2774481773376465
],
"y_values_80": [
1.5658999681472778,
1.585103988647461,
1.6203559637069702,
1.6992720365524292,
1.8494240045547485,
2.2756519317626953,
2.9975199699401855,
4.283827781677246
],
"timestamp": "2026-01-21 03:49:03",
"kernel_operation_mode": "backward",
"extra_benchmark_config_str": "{\"B\": 32, \"T\": 512, \"D\": 768, \"dtype\": \"torch.float32\"}",
"liger_version": "0.0.0"
},
{
"kernel_name": "embedding",
"kernel_provider": "torch_compile",
"metric_name": "speed",
"metric_unit": "ms",
"gpu_name": "Ascend910B4",
"x_name": "V",
"x_label": "embedding dimension",
"x_values": [
1024,
2048,
4096,
8192,
16384,
32768,
65536,
131072
],
"y_values_50": [
1.5654499530792236,
1.579859972000122,
1.6195299625396729,
1.7007999420166016,
1.8484400510787964,
2.296339988708496,
3.0023000240325928,
4.276410102844238
],
"y_values_20": [
1.5641800165176392,
1.5789759159088135,
1.6186840534210205,
1.6993600130081177,
1.8475240468978882,
2.295095920562744,
3.000540018081665,
4.273176193237305
],
"y_values_80": [
1.5664000511169434,
1.5810760259628296,
1.6209640502929688,
1.7026759386062622,
1.8497200012207031,
2.298271894454956,
3.0036399364471436,
4.278992176055908
],
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels