decouple onnx-download from onnx-cuda feature

zhen9910 · zhen9910 · commit 7e442d5fa56a · 2025-11-18T23:57:26.000Z
diff --git a/crates/wasi-nn/Cargo.toml b/crates/wasi-nn/Cargo.toml
@@ -71,8 +71,8 @@ openvino = ["dep:openvino"]
 onnx = ["dep:ort"]
 # Use prebuilt ONNX Runtime binaries from ort.
 onnx-download = ["onnx", "ort/download-binaries"]
-# CUDA execution provider for NVIDIA GPU support (download prebuilt binaries)
-onnx-cuda = ["onnx", "ort/cuda", "ort/download-binaries"]
+# CUDA execution provider for NVIDIA GPU support (requires CUDA toolkit)
+onnx-cuda = ["onnx", "ort/cuda"]
 # WinML is only available on Windows 10 1809 and later.
 winml = ["dep:windows"]
 # PyTorch is available on all platforms; requires Libtorch to be installed
diff --git a/crates/wasi-nn/examples/classification-component-onnx/README.md b/crates/wasi-nn/examples/classification-component-onnx/README.md
@@ -6,8 +6,7 @@ This example demonstrates how to use the `wasi-nn` crate to run a classification
 It supports CPU and GPU (Nvidia CUDA) execution targets.
 
 **Note:**
-For the wasi-nn GPU execution target, CUDA (onnx-cuda) is the only supported ONNX execution provider (EP).
-TPU execution target is not supported and will fall back to CPU execution.
+GPU execution target only supports Nvidia CUDA (onnx-cuda) as execution provider (EP) for now.
 
 ## Build
 
@@ -27,7 +26,8 @@ cargo build --features component-model,wasi-nn,wasmtime-wasi-nn/onnx-download
 
 #### For GPU (Nvidia CUDA) support:
 ```sh
-cargo build --features component-model,wasi-nn,wasmtime-wasi-nn/onnx-cuda
+# This will automatically download onnxruntime dynamic shared library from cdn.pyke.io
+cargo build --features component-model,wasi-nn,wasmtime-wasi-nn/onnx-cuda,wasmtime-wasi-nn/onnx-download
 ```
 
 ### Running with Different Execution Targets
@@ -46,15 +46,6 @@ Arguments:
     ./crates/wasi-nn/examples/classification-component-onnx/target/wasm32-wasip1/debug/classification-component-onnx.wasm
 ```
 
-Or explicitly specify CPU:
-```sh
-./target/debug/wasmtime run \
-    -Snn \
-    --dir ./crates/wasi-nn/examples/classification-component-onnx/fixture/::fixture \
-    ./crates/wasi-nn/examples/classification-component-onnx/target/wasm32-wasip1/debug/classification-component-onnx.wasm \
-    cpu
-```
-
 #### GPU (CUDA) Execution:
 ```sh
 # path to `libonnxruntime_providers_cuda.so` downloaded by `ort-sys`
@@ -66,12 +57,6 @@ export LD_LIBRARY_PATH={wasmtime_workspace}/target/debug
     ./crates/wasi-nn/examples/classification-component-onnx/target/wasm32-wasip1/debug/classification-component-onnx.wasm \
     gpu
 
-# With debug logging
-WASMTIME_LOG=wasmtime_wasi_nn=debug ./target/debug/wasmtime run -Snn \
-    --dir ./crates/wasi-nn/examples/classification-component-onnx/fixture/::fixture \
-    ./crates/wasi-nn/examples/classification-component-onnx/target/wasm32-wasip1/debug/classification-component-onnx.wasm \
-    gpu
-
 ```
 
 ## Expected Output
@@ -97,12 +82,3 @@ You can monitor GPU usage using cmd `watch -n 1 nvidia-smi`.
 - NVIDIA GPU with CUDA support
 - CUDA Toolkit 12.x with cuDNN 9.x
 - Build wasmtime with `wasmtime-wasi-nn/onnx-cuda` feature
-
-## Troubleshooting
-
-If you see an error like:
-```
-ONNX GPU execution target requested, but 'onnx-cuda' feature is not enabled
-```
-
-Make sure you've built wasmtime with the appropriate feature flag (see "Building Wasmtime" section above).
diff --git a/crates/wasi-nn/examples/classification-component-onnx/src/main.rs b/crates/wasi-nn/examples/classification-component-onnx/src/main.rs
@@ -35,7 +35,7 @@ fn get_execution_target() -> ExecutionTarget {
                 return ExecutionTarget::Cpu;
             }
             _ => {
-                println!("Unknown/Unsupported execution target '{}', defaulting to CPU", args[1]);
+                println!("Unknown execution target '{}', defaulting to CPU", args[1]);
             }
         }
     } else {
diff --git a/crates/wasi-nn/src/backend/onnx.rs b/crates/wasi-nn/src/backend/onnx.rs
@@ -35,23 +35,11 @@ impl BackendInner for OnnxBackend {
         // Configure execution providers based on target
         let execution_providers = configure_execution_providers(target)?;
 
-        tracing::info!(
-            "Configuring ONNX session with {} execution provider(s)",
-            execution_providers.len()
-        );
-
         let session = Session::builder()?
             .with_execution_providers(execution_providers)?
             .with_optimization_level(GraphOptimizationLevel::Level3)?
             .commit_from_memory(builders[0])?;
 
-        // Log which execution providers were actually used
-        tracing::info!(
-            "ONNX session created successfully. Model inputs: {}, outputs: {}",
-            session.inputs.len(),
-            session.outputs.len()
-        );
-
         let box_: Box<dyn BackendGraph> =
             Box::new(OnnxGraph(Arc::new(Mutex::new(session)), target));
         Ok(box_.into())
@@ -76,19 +64,18 @@ fn configure_execution_providers(
             #[cfg(feature = "onnx-cuda")]
             {
                 // Use CUDA execution provider for GPU acceleration
-                // Fallback to CPU if CUDA initialization fails
-                tracing::info!("Configuring CUDA execution provider for GPU target");
+                tracing::debug!("Configuring ONNX Nvidia CUDA execution provider for GPU target");
                 Ok(vec![CUDAExecutionProvider::default().build()])
             }
             #[cfg(not(feature = "onnx-cuda"))]
             {
                 Err(BackendError::BackendAccess(anyhow::anyhow!(
-                    "ONNX GPU execution target requested, but 'onnx-cuda' feature is not enabled"
+                    "GPU execution target is requested, but 'onnx-cuda' feature is not enabled"
                 )))
             }
         }
         ExecutionTarget::Tpu => {
-            unimplemented!("ONNX TPU execution target is not supported yet");
+            unimplemented!("TPU execution target is not supported for ONNX backend yet");
         }
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ fn get_execution_target() -> ExecutionTarget {`
`35`	`35`	`return ExecutionTarget::Cpu;`
`36`	`36`	`}`
`37`	`37`	`_ => {`
`38`		`- println!("Unknown/Unsupported execution target '{}', defaulting to CPU", args[1]);`
	`38`	`+ println!("Unknown execution target '{}', defaulting to CPU", args[1]);`
`39`	`39`	`}`
`40`	`40`	`}`
`41`	`41`	`} else {`
Original file line number	Diff line number	Diff line change
`@@ -35,23 +35,11 @@ impl BackendInner for OnnxBackend {`
`35`	`35`	`// Configure execution providers based on target`
`36`	`36`	`let execution_providers = configure_execution_providers(target)?;`
`37`	`37`
`38`		`- tracing::info!(`
`39`		`- "Configuring ONNX session with {} execution provider(s)",`
`40`		`- execution_providers.len()`
`41`		`- );`
`42`		`-`
`43`	`38`	`let session = Session::builder()?`
`44`	`39`	`.with_execution_providers(execution_providers)?`
`45`	`40`	`.with_optimization_level(GraphOptimizationLevel::Level3)?`
`46`	`41`	`.commit_from_memory(builders[0])?;`
`47`	`42`
`48`		`- // Log which execution providers were actually used`
`49`		`- tracing::info!(`
`50`		`- "ONNX session created successfully. Model inputs: {}, outputs: {}",`
`51`		`- session.inputs.len(),`
`52`		`- session.outputs.len()`
`53`		`- );`
`54`		`-`
`55`	`43`	`let box_: Box<dyn BackendGraph> =`
`56`	`44`	`Box::new(OnnxGraph(Arc::new(Mutex::new(session)), target));`
`57`	`45`	`Ok(box_.into())`
`@@ -76,19 +64,18 @@ fn configure_execution_providers(`
`76`	`64`	`#[cfg(feature = "onnx-cuda")]`
`77`	`65`	`{`
`78`	`66`	`// Use CUDA execution provider for GPU acceleration`
`79`		`- // Fallback to CPU if CUDA initialization fails`
`80`		`- tracing::info!("Configuring CUDA execution provider for GPU target");`
	`67`	`+ tracing::debug!("Configuring ONNX Nvidia CUDA execution provider for GPU target");`
`81`	`68`	`Ok(vec![CUDAExecutionProvider::default().build()])`
`82`	`69`	`}`
`83`	`70`	`#[cfg(not(feature = "onnx-cuda"))]`
`84`	`71`	`{`
`85`	`72`	`Err(BackendError::BackendAccess(anyhow::anyhow!(`
`86`		`- "ONNX GPU execution target requested, but 'onnx-cuda' feature is not enabled"`
	`73`	`+ "GPU execution target is requested, but 'onnx-cuda' feature is not enabled"`
`87`	`74`	`)))`
`88`	`75`	`}`
`89`	`76`	`}`
`90`	`77`	`ExecutionTarget::Tpu => {`
`91`		`- unimplemented!("ONNX TPU execution target is not supported yet");`
	`78`	`+ unimplemented!("TPU execution target is not supported for ONNX backend yet");`
`92`	`79`	`}`
`93`	`80`	`}`
`94`	`81`	`}`