sgl-project · dcw02 · Oct 31, 2025 · Nov 4, 2025
@@ -0,0 +1,46 @@
+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "eagle_config": {
+    "eagle_aux_hidden_state_layer_ids": [
+      1,
+      23,
+      45
+    ]
+  },
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "max_position_embeddings": 262144,
+  "model_type": "llama",
+  "target_model_type": "qwen3_vl_moe",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "pretraining_tp": 1,
+  "rope_scaling": {
+    "rope_type": "default",
+    "mrope_section": [
+      24,
+      20,
+      20
+    ],
+    "mrope_interleaved": true
+  },
+  "rope_theta": 5000000,
+  "tie_word_embeddings": false,
+  "dtype": "bfloat16",
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}
@@ -0,0 +1,39 @@
+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 262144,
+  "model_type": "llama",
+  "target_model_type": "qwen3_vl",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "pretraining_tp": 1,
+  "rope_scaling": {
+    "rope_type": "default",
+    "mrope_section": [
+      24,
+      20,
+      20
+    ],
+    "mrope_interleaved": true
+  },
+  "rope_theta": 5000000,
+  "tie_word_embeddings": false,
+  "dtype": "bfloat16",
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+
+# support tp1 train eagle3 for Qwen3-VL-8B-Instruct
+NUM_GPUS=${1:-1}
+
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_eagle3_online.py \
+    --target-model-path Qwen/Qwen3-VL-8B-Instruct \
+    --draft-model-config $ROOT_DIR/configs/qwen3-vl-8b-eagle3.json \
+    --train-data-path $ROOT_DIR/cache/dataset/allava4v_train.jsonl \
+    --output-dir $ROOT_DIR/outputs/Qwen3-VL-8B-eagle3 \
+    --build-dataset-num-proc 0 \
+    --num-epochs 10 \
+    --batch-size 1 \
+    --learning-rate 1e-4 \
+    --max-length 8192 \
+    --chat-template qwen3-vl \
+    --cache-dir $ROOT_DIR/cache \
+    --embedding-key model.language_model.embed_tokens.weight \
+    --tp-size 1 \
+    --is-vlm \
+    --min-pixels 50176 \
+    --max-pixels 802816
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+
+# support tp1 train eagle3 for Qwen3-VL-30B-A3B-Instruct
+NUM_GPUS=${1:-1}
+
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_eagle3_online.py \
+    --target-model-path Qwen/Qwen3-VL-30B-A3B-Instruct \
+    --draft-model-config $ROOT_DIR/configs/qwen3-vl-30b-a3b-eagle3.json \
+    --train-data-path $ROOT_DIR/cache/dataset/allava4v_train.jsonl \
+    --output-dir $ROOT_DIR/outputs/Qwen3-VL-30B-A3B-eagle3 \
+    --build-dataset-num-proc 0 \
+    --num-epochs 10 \
+    --batch-size 1 \
+    --learning-rate 1e-4 \
+    --max-length 8192 \
+    --chat-template qwen3-vl \
+    --cache-dir $ROOT_DIR/cache \
+    --embedding-key model.language_model.embed_tokens.weight \
+    --tp-size 1 \
+    --is-vlm \
+    --min-pixels 50176 \
+    --max-pixels 802816
@@ -230,6 +230,36 @@ def build_target_model(
                 .eval()
                 .cuda()
             )
+        elif (
+            args.is_vlm
+            and draft_model_config.target_model_type == "qwen3_vl"
+            and args.tp_size == 1
+        ):
+            from transformers import Qwen3VLForConditionalGeneration
+
+            target_model = (
+                Qwen3VLForConditionalGeneration.from_pretrained(
+                    pretrained_model_name_or_path=args.target_model_path,
+                    dtype=torch.bfloat16,
+                )
+                .eval()
+                .cuda()
+            )
+        elif (
+            args.is_vlm
+            and draft_model_config.target_model_type == "qwen3_vl_moe"
+            and args.tp_size == 1
+        ):
+            from transformers import Qwen3VLMoeForConditionalGeneration
+
+            target_model = (
+                Qwen3VLMoeForConditionalGeneration.from_pretrained(
+                    pretrained_model_name_or_path=args.target_model_path,
+                    dtype=torch.bfloat16,
+                )
+                .eval()
+                .cuda()
+            )
         else:
             if args.target_model_backend == "sglang":
                 target_model_kwargs = SGLangBackendArgs.from_args(args).to_kwargs()
@@ -625,14 +655,20 @@ def main():
     # ================================================
     if (
         args.is_vlm
-        and getattr(draft_model_config, "target_model_type", None) == "qwen2_5_vl"
+        and getattr(draft_model_config, "target_model_type", None)
+        in {
+            "qwen2_5_vl",
+            "qwen3_vl",
+            "qwen3_vl_moe",
+        }
     ):
         eagle3_model = QwenVLOnlineEagle3Model(
             target_model=target_model,
             draft_model=draft_model,
             processor=processor,
             length=args.ttt_length,
             attention_backend=args.attention_backend,
+            target_model_type=getattr(draft_model_config, "target_model_type", None),
         )
     else:
         eagle3_model = OnlineEagle3Model(